image-segmentation图像分割中的数据读取和处理

用tensorflow.image

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

import matplotlib.pyplot as plt
import tensorflow_io as tfio
import tensorflow as tf

def read_image(image_path, mask=False):
image = tf.io.read_file(image_path)

if mask:
image = tf.image.decode_png(image, channels=1)
image.set_shape([None, None, 1])
image = tf.image.resize(images=image, size=[IMAGE_SIZE, IMAGE_SIZE])
image = tf.cast(image, tf.int32)
else:
image = tf.image.decode_png(image, channels=3)
image.set_shape([None, None, 3])
image = tf.image.resize(images=image, size=[IMAGE_SIZE, IMAGE_SIZE])
image = image / 255.

return image

def load_data(image_list, mask_list):
image = read_image(image_list)
mask = read_image(mask_list, mask=True)
return image, mask

def data_generator(image_list, mask_list, split='train'):
dataset = tf.data.Dataset.from_tensor_slices((image_list, mask_list))
dataset = dataset.shuffle(8*BATCH_SIZE) if split == 'train' else dataset
dataset = dataset.map(load_data, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
dataset = dataset.prefetch(tf.data.AUTOTUNE)
return dataset

IMAGE_SIZE = 128
BATCH_SIZE = 86

train_dataset = data_generator(images, masks)
print("Train Dataset:", train_dataset)

input_data = os.path.join(root, 'train_images')
images = sorted(
[
os.path.join(input_data, fname)
for fname in os.listdir(input_data)
if fname.endswith(exts) and not fname.startswith(".")
]
)


target_data = os.path.join(root, 'train_masks')
masks = sorted(
[
os.path.join(target_data, fname)
for fname in os.listdir(target_data)
if fname.endswith(exts) and not fname.startswith(".")
]
)

print("Number of samples:", len(images), len(masks))
for input_path, target_path in zip(images[:10], masks[:10]):
print(input_path[-32:], "|", target_path[-31:], '|', np.unique(cv2.imread(target_path)))

以上方式传入data_generator的是image和mask所在图片路径.

用cv2

这种方式适合图片不多的情况下使用,直接读入ndarry里存储。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

train_img_paths = sorted(glob('../output/kaggle/working/train/*.jpg'))[:SAMPLE]
train_mask_paths = sorted(glob('../output/kaggle/working/train_masks/*.gif'))[:SAMPLE]

train_imgs = np.array([cv2.resize(imageio.imread(path), (IMG_ROWS, IMG_COLS))
for path in train_img_paths])

train_masks = np.array([cv2.resize(imageio.imread(path), (IMG_ROWS, IMG_COLS))
for path in train_mask_paths])

train_masks = train_masks.astype(np.float32)
train_masks[train_masks<=127] = 0.
train_masks[train_masks>127] = 1.
train_masks = np.reshape(train_masks, (*train_masks.shape, 1))

用generator方式

推荐用这种方式,占用内存小。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
img_paths = sorted(glob('../output/kaggle/working/train/*.jpg'))[:500]
mask_paths = sorted(glob('../output/kaggle/working/train_masks/*.gif'))[:500]
train_img_files,val_img_files,train_mask_files,val_mask_files = train_test_split(img_paths,mask_paths,test_size=0.1)

def process_mask(masks):
masks[masks<=127] = 0.
masks[masks>127] = 1.
masks = masks.astype(np.float32)
masks = np.reshape(masks, (*masks.shape, 1))
return masks

def train_img_mask_gen():
for img_path,mask_path in zip(train_img_files,train_mask_files):
img = cv2.resize(imageio.imread(img_path), (IMG_ROWS, IMG_COLS))
img = img / 127.5
mask = cv2.resize(imageio.imread(mask_path), (IMG_ROWS, IMG_COLS))
mask = process_mask(mask)

yield img, mask

def val_img_mask_gen():
for img_path,mask_path in zip(val_img_files,val_mask_files):
img = cv2.resize(imageio.imread(img_path), (IMG_ROWS, IMG_COLS))
mask = cv2.resize(imageio.imread(mask_path), (IMG_ROWS, IMG_COLS))
img = img / 127.5
mask = process_mask(mask)
yield img, mask

train_dataset = tf.data.Dataset.from_generator(train_img_mask_gen,
output_signature=(
tf.TensorSpec(shape=(IMG_ROWS, IMG_COLS, 3), dtype=tf.float32),
tf.TensorSpec(shape=(IMG_ROWS, IMG_COLS, 1), dtype=tf.float32))
)
val_dataset = tf.data.Dataset.from_generator(val_img_mask_gen,
output_signature=(
tf.TensorSpec(shape=(IMG_ROWS, IMG_COLS, 3), dtype=tf.float32),
tf.TensorSpec(shape=(IMG_ROWS, IMG_COLS, 1), dtype=tf.float32))
)
上面这种方式需要创建一个generator函数,该函数不接受参数,如果需要传入参数可以另外新建一个函数,该函数会返回一个不接受任何参数的fun().比如:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

def img_masks_generator_from_files(img_files, mask_files, sample_weights=None, width=512, height=512, shuffle=True):

if not sample_weights:
sample_weights = [[1]] * len(img_files) # Make sample_weights equipped with a broadcastable shape when fed to a tf.Tensor

sample_weights = np.array(sample_weights)

if len(sample_weights.shape) == 1:
sample_weights = sample_weights[..., np.newaxis] # Make sample_weights equipped with a broadcastable shape when fed to a tf.Tensor

assert(len(sample_weights.shape) == 2 and sample_weights.shape[-1] == 1)

if shuffle:
indexes = np.arange(len(img_files))
np.random.shuffle(indexes)
img_files = [ img_files[i] for i in indexes ]
mask_files = [ mask_files[i] for i in indexes ]
sample_weights = [ sample_weights[i] for i in indexes ]

def f():
for img, masks, sample_weight in zip(img_files, mask_files, sample_weights):
yield img_masks_from_file(img, masks, sample_weight)

return f

train_img_masks_gen = img_masks_generator_from_files(train_img_files, train_mask_files, train_sample_weights, width=W, height=H)

train_img_masks_dataset = tf.data.Dataset.from_generator(
train_img_masks_gen,
output_signature=(
tf.TensorSpec(shape=(W, H, 1), dtype=tf.float32),
tf.TensorSpec(shape=(W, H, len(train_mask_files[0])), dtype=tf.int32),
tf.TensorSpec(shape=(1), dtype=tf.float32)
)
)
上面这种方式还可以换成另一种,在tensorflow.data.Dataset初始化时直接传递args参数:

1
ds_train = tf.data.Dataset.from_generator(noise_generator,args=['train', mode],output_types=tf.int32,output_shapes=(None, None, n_channels))

官方文档是这样写的:

(Optional.) A tuple of tf.Tensor objects that will be evaluated and passed to generator as NumPy-array arguments.

关于更多tf.data.Dataset.from_generator的用法可以参考博客。里面有一句话解答了我的疑惑:

we need to have a python generator function which generates one training pair needed for our model.

这就是说我们在创建generator这个函数的时候,函数返回值应该是一个training pair,也就是Xy


visualize图片的可视化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import matplotlib.pyplot as plt 

def visualize(**images): # **images是(key,item)的方式,*images是item list的方式
"""PLot images in one row."""
n = len(images)
plt.figure(figsize=(16, 5))
for i, (name, image) in enumerate(images.items()):
plt.subplot(1, n, i + 1)
plt.xticks([])
plt.yticks([])
plt.title(' '.join(name.split('_')).title())
plt.imshow(image)
plt.show()

image, mask = next(iter(train_dataset.take(1))) # train_dataset
# image,mask = list(train_dataset.take(1))
print(image.shape, mask.shape)

for (img, msk) in zip(image[:5], mask[:5]):
print(mask.numpy().min(), mask.numpy().max())
visualize(
image=img.numpy(),
gt_mask=msk.numpy(),
)

以下是mask标注大于两类(0或者1)的情况下可以观察数据的方式(需要被赋予color)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# Visualization Utilities

# there are 11 classes in the dataset: one class for each digit (0 to 9) plus the background class
n_classes = 11

# assign a random color for each class
colors = [tuple(np.random.randint(256, size=3) / 255.0) for i in range(n_classes)]

def fuse_with_pil(images):
'''
Creates a blank image and pastes input images

Args:
images (list of numpy arrays) - numpy array representations of the images to paste

Returns:
PIL Image object containing the images
'''

widths = (image.shape[1] for image in images)
heights = (image.shape[0] for image in images)
total_width = sum(widths)
max_height = max(heights)

new_im = PIL.Image.new('RGB', (total_width, max_height))

x_offset = 0
for im in images:
pil_image = PIL.Image.fromarray(np.uint8(im))
new_im.paste(pil_image, (x_offset,0))
x_offset += im.shape[1]

return new_im


def give_color_to_annotation(annotation):
'''
Converts a 2-D annotation to a numpy array with shape (height, width, 3) where
the third axis represents the color channel. The label values are multiplied by
255 and placed in this axis to give color to the annotation

Args:
annotation (numpy array) - label map array

Returns:
the annotation array with an additional color channel/axis
'''
seg_img = np.zeros( (annotation.shape[0],annotation.shape[1], 3) ).astype('float')

for c in range(n_classes):
segc = (annotation == c)
seg_img[:,:,0] += segc*( colors[c][0] * 255.0)
seg_img[:,:,1] += segc*( colors[c][1] * 255.0)
seg_img[:,:,2] += segc*( colors[c][2] * 255.0)

return seg_img


def show_annotation_and_prediction(image, annotation, prediction, iou_list, dice_score_list):
'''
Displays the images with the ground truth and predicted label maps. Also overlays the metrics.

Args:
image (numpy array) -- the input image
annotation (numpy array) -- the ground truth label map
prediction (numpy array) -- the predicted label map
iou_list (list of floats) -- the IOU values for each class
dice_score_list (list of floats) -- the Dice Score for each class
'''

new_ann = np.argmax(annotation, axis=2)
true_img = give_color_to_annotation(new_ann)
pred_img = give_color_to_annotation(prediction)

image = image + 1
image = image * 127.5
image = np.reshape(image, (image.shape[0], image.shape[1],))
image = np.uint8(image)
images = [image, np.uint8(pred_img), np.uint8(true_img)]

metrics_by_id = [(idx, iou, dice_score) for idx, (iou, dice_score) in enumerate(zip(iou_list, dice_score_list)) if iou > 0.0 and idx < 10]
metrics_by_id.sort(key=lambda tup: tup[1], reverse=True) # sorts in place

display_string_list = ["{}: IOU: {} Dice Score: {}".format(idx, iou, dice_score) for idx, iou, dice_score in metrics_by_id]
display_string = "\n".join(display_string_list)

plt.figure(figsize=(15, 4))

for idx, im in enumerate(images):
plt.subplot(1, 3, idx+1)
if idx == 1:
plt.xlabel(display_string)
plt.xticks([])
plt.yticks([])
plt.imshow(im)


def show_annotation_and_image(image, annotation):
'''
Displays the image and its annotation side by side

Args:
image (numpy array) -- the input image
annotation (numpy array) -- the label map
'''
new_ann = np.argmax(annotation, axis=2)
seg_img = give_color_to_annotation(new_ann)

image = image + 1
image = image * 127.5
image = np.reshape(image, (image.shape[0], image.shape[1],))

image = np.uint8(image)
images = [image, seg_img]

images = [image, seg_img]
fused_img = fuse_with_pil(images)
plt.imshow(fused_img)


def list_show_annotation(dataset, num_images):
'''
Displays images and its annotations side by side

Args:
dataset (tf Dataset) -- batch of images and annotations
num_images (int) -- number of images to display
'''
ds = dataset.unbatch()

plt.figure(figsize=(20, 15))
plt.title("Images And Annotations")
plt.subplots_adjust(bottom=0.1, top=0.9, hspace=0.05)

for idx, (image, annotation) in enumerate(ds.take(num_images)):
plt.subplot(5, 5, idx + 1)
plt.yticks([])
plt.xticks([])
show_annotation_and_image(image.numpy(), annotation.numpy())

# get 10 images from the training set
list_show_annotation(training_dataset, 10)

segmentation中checkpoint的设置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

class DisplayCallback(tf.keras.callbacks.Callback): # 每间隔5个spoch显示一次结果
def __init__(self, dataset, epoch_interval=5):
self.dataset = dataset
self.epoch_interval = epoch_interval

def display(self, display_list, extra_title=''):
plt.figure(figsize=(15, 15))
title = ['Input Image', 'True Mask', 'Predicted Mask']

if len(display_list) > len(title):
title.append(extra_title)

for i in range(len(display_list)):
plt.subplot(1, len(display_list), i+1)
plt.title(title[i])
plt.imshow(display_list[i])
plt.axis('off')
plt.show()

def create_mask(self, pred_mask):
pred_mask = (pred_mask > 0.5).astype("int32")
return pred_mask[0]

def show_predictions(self, dataset, num=1):
for image, mask in dataset.take(num):
pred_mask = model.predict(image)
self.display([image[0], mask[0], self.create_mask(pred_mask)])

def on_epoch_end(self, epoch, logs=None):
if epoch and epoch % self.epoch_interval == 0:
self.show_predictions(self.dataset)
print ('\nSample Prediction after epoch {}\n'.format(epoch+1))

epochs = 30
model.fit(
train_dataset,
epochs=epochs,
callbacks=[DisplayCallback(train_dataset)]
)

也需要有常规的checkpoint

1
2
3
4
5
6
7
8
9
10
11
12
13
14
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, LearningRateScheduler

filepath_dice_coeff = "_val_loss.hdf5" # 保存成一个单独的hdf5文件
checkpointer = ModelCheckpoint(filepath_dice_coeff, monitor='val_loss', verbose=1, save_best_only=True, mode='min')# val_dice_coeff
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=30, min_lr=0.5e-6)
early_stop = EarlyStopping(monitor='val_loss', patience=50)

hist = seg_classi_model.fit(train_data,
steps_per_epoch=(train_imgs.shape[0] + batch_size - 1) // batch_size,
epochs=300,
callbacks=[checkpointer, lr_reducer, early_stop],
validation_data=val_data,
validation_steps=(valid_imgs.shape[0] + batch_size - 1) // batch_size)