0%

在图像分割任务中,通常需要将loss设置成dice或者IOU的值,这里总结一下他们的使用方式:

二分类问题(mask只有0或者1)

这种task网络的最后一层通常会加sigmoid激活函数,比如unet实现中最后一层就是一个卷积层:

1
conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)
因此输出的feature map中每一个pixel的值就是0~1之间的值。

dice loss可以如下计算:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from keras import backend as K
from keras.losses import binary_crossentropy
from tensorflow.keras.optimizers import Adam

SMOOTH = 1.

def dice_coef(y_true, y_pred):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + SMOOTH) / (K.sum(y_true_f) + K.sum(y_pred_f) + SMOOTH)

def iou_coef(y_true, y_pred, smooth=1):
intersection = K.sum(K.abs(y_true * y_pred), axis=[1,2,3])
union = K.sum(y_true,[1,2,3])+K.sum(y_pred,[1,2,3])-intersection
iou = K.mean((intersection + smooth) / (union + smooth), axis=0)
return iou

def bce_dice_loss(y_true, y_pred):
return 0.5 * binary_crossentropy(y_true, y_pred) - dice_coef(y_true, y_pred) # 这里也可以用 + (1-dice_coef)


model.compile(Adam(learning_rate=1e-4),
bce_dice_loss,
metrics=[binary_crossentropy, dice_coef])

多分类问题(mask有除了0和1以外的其他值)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def class_wise_metrics(y_true, y_pred):
'''
Computes the class-wise IOU and Dice Score.

Args:
y_true (tensor) - ground truth label maps
y_pred (tensor) - predicted label maps
'''
class_wise_iou = []
class_wise_dice_score = []

smoothing_factor = 0.00001

for i in range(n_classes):
intersection = np.sum((y_pred == i) * (y_true == i)) # 计算true positive的pixel个数
y_true_area = np.sum((y_true == i)) # 计算pixcel=i的像素个数
y_pred_area = np.sum((y_pred == i))
combined_area = y_true_area + y_pred_area

iou = (intersection) / (combined_area - intersection + smoothing_factor)
class_wise_iou.append(iou)

dice_score = 2 * ((intersection) / (combined_area + smoothing_factor))
class_wise_dice_score.append(dice_score)

return class_wise_iou, class_wise_dice_score

在上述计算过程中需要注意的是,这里的y_pred是要经过np.argmax()的:

1
2
3
4
5
6
7
results = model.predict(test_dataset, steps=test_steps)
print(results.shape) # (192, 64, 84, 11)

results = np.argmax(results, axis=3)

integer_slider = 105 # 取第105个图片
iou, dice_score = class_wise_metrics(np.argmax(y_true_segments[integer_slider], axis=3), results[integer_slider])

对Counter中的内容进行排序

1
2
3
4
5
6
7
8
9
10
11
12
13
from collections import Counter

x = Counter({'a':5, 'b':3, 'c':7})

# 1
x.most_common() # [('c', 7), ('a', 5), ('b', 3)]

# 2
sorted(x, key=x.get, reverse=True) # ['c', 'a', 'b']

# 3
sorted(x.items(), key=lambda pair: pair[1], reverse=True) # [('c', 7), ('a', 5), ('b', 3)]

一共三种方法,其中第二种方法中传入key=x.get最终返回的是所有的key,而其他两种方法返回的都是排完序之后的list

PIL

1
2
3
4
5
6
7
8
9
10
11
from PIL import Image
import numpy as np

img = Image.open("1.jpg")
img.load()

array = np.asarrary(img)

img.show() # view the picture

img.save("./new_img.jpg")

PIL.Image包有很多其他的功能,比如:

1
2
# 从array生成图片convert it to a Pillow image
Image.fromarray(data,'RGB') # 如果mode='L',那么只有一个通道

cv2

1
2
3
4
5
6
import cv2

img = cv2.imread("./1.jpg") # 返回的img的channel顺序是BGR

grey_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 转化为灰度图的模式
rgb_img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) # 转化为RGB的模式

cv2一个有用的method是resize

1
img = cv2.resize(raw_img,(width,height))

tensorflow

1
2
3
4
5
6
7
8
9
import tensorflow as tf

img = tf.keras.preprocessing.image.load_img("./i.jpg",target_size=(32,32,3)) # Loads an image into PIL format
img = tf.keras.preprocessing.image.img_to_array(img)

# 从array转化成PIL image 实例
img = tf.keras.preprocessing.image.array_to_img(array) # array是3D numpy array

tf.keras.utils.save_img(path,array)