image-segmentation图像分割中的数据读取和处理

Posted on 2022-08-23 Edited on 2025-05-08 In cv Symbols count in article: 12k Reading time ≈ 11 mins.

用tensorflow.image


import matplotlib.pyplot as plt 
import tensorflow_io as tfio
import tensorflow as tf

def read_image(image_path, mask=False):
    image = tf.io.read_file(image_path)
    
    if mask:
        image = tf.image.decode_png(image, channels=1)
        image.set_shape([None, None, 1])
        image = tf.image.resize(images=image, size=[IMAGE_SIZE, IMAGE_SIZE])
        image = tf.cast(image, tf.int32)
    else:
        image = tf.image.decode_png(image, channels=3)
        image.set_shape([None, None, 3])
        image = tf.image.resize(images=image, size=[IMAGE_SIZE, IMAGE_SIZE])
        image = image / 255.
        
    return image

def load_data(image_list, mask_list):
    image = read_image(image_list)
    mask  = read_image(mask_list, mask=True)
    return image, mask

def data_generator(image_list, mask_list, split='train'):
    dataset = tf.data.Dataset.from_tensor_slices((image_list, mask_list))
    dataset = dataset.shuffle(8*BATCH_SIZE) if split == 'train' else dataset 
    dataset = dataset.map(load_data, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

IMAGE_SIZE = 128
BATCH_SIZE = 86

train_dataset = data_generator(images, masks)
print("Train Dataset:", train_dataset)

input_data = os.path.join(root, 'train_images')
images = sorted(
    [
        os.path.join(input_data, fname)
        for fname in os.listdir(input_data)
        if fname.endswith(exts) and not fname.startswith(".")
    ]
)


target_data = os.path.join(root, 'train_masks')
masks = sorted(
    [
        os.path.join(target_data, fname)
        for fname in os.listdir(target_data)
        if fname.endswith(exts) and not fname.startswith(".")
    ]
)

print("Number of samples:", len(images), len(masks))
for input_path, target_path in zip(images[:10], masks[:10]):
    print(input_path[-32:], "|", target_path[-31:], '|', np.unique(cv2.imread(target_path)))

以上方式传入data_generator的是image和mask所在图片路径.

用cv2

这种方式适合图片不多的情况下使用，直接读入ndarry里存储。


train_img_paths = sorted(glob('../output/kaggle/working/train/*.jpg'))[:SAMPLE]
train_mask_paths = sorted(glob('../output/kaggle/working/train_masks/*.gif'))[:SAMPLE]

train_imgs = np.array([cv2.resize(imageio.imread(path), (IMG_ROWS, IMG_COLS))
                        for path in train_img_paths])

train_masks = np.array([cv2.resize(imageio.imread(path), (IMG_ROWS, IMG_COLS))
                        for path in train_mask_paths])

train_masks = train_masks.astype(np.float32)
train_masks[train_masks<=127] = 0.
train_masks[train_masks>127] = 1.
train_masks = np.reshape(train_masks, (*train_masks.shape, 1))

用generator方式

推荐用这种方式，占用内存小。

img_paths = sorted(glob('../output/kaggle/working/train/*.jpg'))[:500]
mask_paths = sorted(glob('../output/kaggle/working/train_masks/*.gif'))[:500]
train_img_files,val_img_files,train_mask_files,val_mask_files = train_test_split(img_paths,mask_paths,test_size=0.1)

def process_mask(masks):
    masks[masks<=127] = 0.
    masks[masks>127] = 1.
    masks = masks.astype(np.float32)
    masks = np.reshape(masks, (*masks.shape, 1))
    return masks

def train_img_mask_gen():
    for img_path,mask_path in zip(train_img_files,train_mask_files):
        img = cv2.resize(imageio.imread(img_path), (IMG_ROWS, IMG_COLS))
        img = img / 127.5
        mask = cv2.resize(imageio.imread(mask_path), (IMG_ROWS, IMG_COLS))
        mask = process_mask(mask)
        
        yield img, mask

def val_img_mask_gen():
    for img_path,mask_path in zip(val_img_files,val_mask_files):
        img = cv2.resize(imageio.imread(img_path), (IMG_ROWS, IMG_COLS))
        mask = cv2.resize(imageio.imread(mask_path), (IMG_ROWS, IMG_COLS))
        img = img / 127.5
        mask = process_mask(mask)
        yield img, mask

train_dataset = tf.data.Dataset.from_generator(train_img_mask_gen,
                                              output_signature=(
                                                    tf.TensorSpec(shape=(IMG_ROWS, IMG_COLS, 3), dtype=tf.float32),
                                                    tf.TensorSpec(shape=(IMG_ROWS, IMG_COLS, 1), dtype=tf.float32))
                                              )
val_dataset = tf.data.Dataset.from_generator(val_img_mask_gen,
                                             output_signature=(
                                                    tf.TensorSpec(shape=(IMG_ROWS, IMG_COLS, 3), dtype=tf.float32),
                                                    tf.TensorSpec(shape=(IMG_ROWS, IMG_COLS, 1), dtype=tf.float32))
                                            )

上面这种方式需要创建一个generator函数，该函数不接受参数，如果需要传入参数可以另外新建一个函数，该函数会返回一个不接受任何参数的fun().比如：


def img_masks_generator_from_files(img_files, mask_files, sample_weights=None, width=512, height=512, shuffle=True):

    if not sample_weights:
        sample_weights = [[1]] * len(img_files) # Make sample_weights equipped with a broadcastable shape when fed to a tf.Tensor
    
    sample_weights = np.array(sample_weights)

    if len(sample_weights.shape) == 1:
        sample_weights = sample_weights[..., np.newaxis] # Make sample_weights equipped with a broadcastable shape when fed to a tf.Tensor
    
    assert(len(sample_weights.shape) == 2 and sample_weights.shape[-1] == 1)

    if shuffle:
        indexes = np.arange(len(img_files))
        np.random.shuffle(indexes)
        img_files = [ img_files[i] for i in indexes ]
        mask_files = [ mask_files[i] for i in indexes ]
        sample_weights = [ sample_weights[i] for i in indexes ]

    def f():
        for img, masks, sample_weight in zip(img_files, mask_files, sample_weights):
            yield img_masks_from_file(img, masks, sample_weight)
    
    return f

train_img_masks_gen = img_masks_generator_from_files(train_img_files, train_mask_files, train_sample_weights, width=W, height=H)

train_img_masks_dataset = tf.data.Dataset.from_generator(
    train_img_masks_gen, 
    output_signature=(
        tf.TensorSpec(shape=(W, H, 1), dtype=tf.float32),
        tf.TensorSpec(shape=(W, H, len(train_mask_files[0])), dtype=tf.int32),
        tf.TensorSpec(shape=(1), dtype=tf.float32)
    )
)

上面这种方式还可以换成另一种，在tensorflow.data.Dataset初始化时直接传递args参数:

1	ds_train = tf.data.Dataset.from_generator(noise_generator,args=['train', mode],output_types=tf.int32,output_shapes=(None, None, n_channels))

官方文档是这样写的：

(Optional.) A tuple of tf.Tensor objects that will be evaluated and passed to generator as NumPy-array arguments.

关于更多tf.data.Dataset.from_generator的用法可以参考博客。里面有一句话解答了我的疑惑：

we need to have a python generator function which generates one training pair needed for our model.

这就是说我们在创建generator这个函数的时候，函数返回值应该是一个training pair，也就是X和y

visualize图片的可视化

import matplotlib.pyplot as plt 

def visualize(**images): # **images是(key,item)的方式，*images是item list的方式
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()

image, mask = next(iter(train_dataset.take(1))) # train_dataset
# image,mask = list(train_dataset.take(1)) 
print(image.shape, mask.shape)

for (img, msk) in zip(image[:5], mask[:5]):
    print(mask.numpy().min(), mask.numpy().max())
    visualize(
        image=img.numpy(),
        gt_mask=msk.numpy(), 
    )

以下是mask标注大于两类（0或者1）的情况下可以观察数据的方式（需要被赋予color）

# Visualization Utilities

# there are 11 classes in the dataset: one class for each digit (0 to 9) plus the background class
n_classes = 11

# assign a random color for each class
colors = [tuple(np.random.randint(256, size=3) / 255.0) for i in range(n_classes)]

def fuse_with_pil(images):
  '''
  Creates a blank image and pastes input images

  Args:
    images (list of numpy arrays) - numpy array representations of the images to paste
  
  Returns:
    PIL Image object containing the images
  '''

  widths = (image.shape[1] for image in images)
  heights = (image.shape[0] for image in images)
  total_width = sum(widths)
  max_height = max(heights)

  new_im = PIL.Image.new('RGB', (total_width, max_height))

  x_offset = 0
  for im in images:
    pil_image = PIL.Image.fromarray(np.uint8(im))
    new_im.paste(pil_image, (x_offset,0))
    x_offset += im.shape[1]
  
  return new_im


def give_color_to_annotation(annotation):
  '''
  Converts a 2-D annotation to a numpy array with shape (height, width, 3) where
  the third axis represents the color channel. The label values are multiplied by
  255 and placed in this axis to give color to the annotation

  Args:
    annotation (numpy array) - label map array
  
  Returns:
    the annotation array with an additional color channel/axis
  '''
  seg_img = np.zeros( (annotation.shape[0],annotation.shape[1], 3) ).astype('float')
  
  for c in range(n_classes):
    segc = (annotation == c)
    seg_img[:,:,0] += segc*( colors[c][0] * 255.0)
    seg_img[:,:,1] += segc*( colors[c][1] * 255.0)
    seg_img[:,:,2] += segc*( colors[c][2] * 255.0)
  
  return seg_img


def show_annotation_and_prediction(image, annotation, prediction, iou_list, dice_score_list):
  '''
  Displays the images with the ground truth and predicted label maps. Also overlays the metrics.

  Args:
    image (numpy array) -- the input image
    annotation (numpy array) -- the ground truth label map
    prediction (numpy array) -- the predicted label map
    iou_list (list of floats) -- the IOU values for each class
    dice_score_list (list of floats) -- the Dice Score for each class
  '''

  new_ann = np.argmax(annotation, axis=2)
  true_img = give_color_to_annotation(new_ann)
  pred_img = give_color_to_annotation(prediction)

  image = image + 1
  image = image * 127.5
  image = np.reshape(image, (image.shape[0], image.shape[1],))
  image = np.uint8(image)
  images = [image, np.uint8(pred_img), np.uint8(true_img)]

  metrics_by_id = [(idx, iou, dice_score) for idx, (iou, dice_score) in enumerate(zip(iou_list, dice_score_list)) if iou > 0.0 and idx < 10]
  metrics_by_id.sort(key=lambda tup: tup[1], reverse=True)  # sorts in place

  display_string_list = ["{}: IOU: {} Dice Score: {}".format(idx, iou, dice_score) for idx, iou, dice_score in metrics_by_id]
  display_string = "\n".join(display_string_list)

  plt.figure(figsize=(15, 4))

  for idx, im in enumerate(images):
    plt.subplot(1, 3, idx+1)
    if idx == 1:
      plt.xlabel(display_string)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(im)


def show_annotation_and_image(image, annotation):
  '''
  Displays the image and its annotation side by side

  Args:
    image (numpy array) -- the input image
    annotation (numpy array) -- the label map
  '''
  new_ann = np.argmax(annotation, axis=2)
  seg_img = give_color_to_annotation(new_ann)
  
  image = image + 1
  image = image * 127.5
  image = np.reshape(image, (image.shape[0], image.shape[1],))

  image = np.uint8(image)
  images = [image, seg_img]
  
  images = [image, seg_img]
  fused_img = fuse_with_pil(images)
  plt.imshow(fused_img)


def list_show_annotation(dataset, num_images):
  '''
  Displays images and its annotations side by side

  Args:
    dataset (tf Dataset) -- batch of images and annotations
    num_images (int) -- number of images to display
  '''
  ds = dataset.unbatch()

  plt.figure(figsize=(20, 15))
  plt.title("Images And Annotations")
  plt.subplots_adjust(bottom=0.1, top=0.9, hspace=0.05)

  for idx, (image, annotation) in enumerate(ds.take(num_images)):
    plt.subplot(5, 5, idx + 1)
    plt.yticks([])
    plt.xticks([])
    show_annotation_and_image(image.numpy(), annotation.numpy())

# get 10 images from the training set
list_show_annotation(training_dataset, 10)

segmentation中checkpoint的设置


class DisplayCallback(tf.keras.callbacks.Callback): # 每间隔5个spoch显示一次结果
    def __init__(self, dataset, epoch_interval=5):
        self.dataset = dataset
        self.epoch_interval = epoch_interval
    
    def display(self, display_list, extra_title=''):
        plt.figure(figsize=(15, 15))
        title = ['Input Image', 'True Mask', 'Predicted Mask']

        if len(display_list) > len(title):
            title.append(extra_title)

        for i in range(len(display_list)):
            plt.subplot(1, len(display_list), i+1)
            plt.title(title[i])
            plt.imshow(display_list[i])
            plt.axis('off')
        plt.show()
        
    def create_mask(self, pred_mask):
        pred_mask = (pred_mask > 0.5).astype("int32")
        return pred_mask[0]
    
    def show_predictions(self, dataset, num=1):
        for image, mask in dataset.take(num):
            pred_mask = model.predict(image)
            self.display([image[0], mask[0], self.create_mask(pred_mask)])
        
    def on_epoch_end(self, epoch, logs=None):
        if epoch and epoch % self.epoch_interval == 0:
            self.show_predictions(self.dataset)
            print ('\nSample Prediction after epoch {}\n'.format(epoch+1))

epochs = 30
model.fit(
    train_dataset, 
    epochs=epochs, 
    callbacks=[DisplayCallback(train_dataset)]
)

也需要有常规的checkpoint

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, LearningRateScheduler

filepath_dice_coeff = "_val_loss.hdf5" # 保存成一个单独的hdf5文件
checkpointer = ModelCheckpoint(filepath_dice_coeff, monitor='val_loss', verbose=1, save_best_only=True, mode='min')# val_dice_coeff
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=30, min_lr=0.5e-6)
early_stop = EarlyStopping(monitor='val_loss', patience=50)

hist = seg_classi_model.fit(train_data,
                steps_per_epoch=(train_imgs.shape[0] + batch_size - 1) // batch_size,
                epochs=300,
                callbacks=[checkpointer, lr_reducer, early_stop],
                validation_data=val_data,
                validation_steps=(valid_imgs.shape[0] + batch_size - 1) // batch_size)