import theano.tensor as T
import numpy as np
def dice(y_pred, y_true, n_classes):
# DICE is 2 * intersect / (num_pred + num_true)
y_true = T.flatten(y_true)
y_pred = T.argmax(y_pred, axis=1)
dice = T.zeros(n_classes)
for i in range(n_classes):
i_val = T.constant(i)
y_true_i = T.eq(y_true, i_val)
y_pred_i = T.eq(y_pred, i_val)
dice = T.set_subtensor(dice[i], (T.constant(2.) * T.sum(y_true_i * y_pred_i) + T.constant(1e-7)) / (T.sum(y_true_i) + T.sum(y_pred_i) + T.constant(1e-7)))
return dice
def dice_npy(y_pred, y_true, n_classes):
y_true = y_true.flatten()
y_pred = np.argmax(y_pred, -1)
dice = np.zeros(n_classes)
for i in range(n_classes):
y_true_i = np.equal(y_true, i)
y_pred_i = np.equal(y_pred, i)
dice[i] = (2. * np.sum(y_true_i * y_pred_i) + np.float32(1e-7)) / (np.sum(y_true_i) + np.sum(y_pred_i) + np.float32(1e-7))
return dice
if __name__ == "__main__":
# just generate 2 dummy images
# img_gt is 0/1 encoded for foreground/background
img_gt = np.zeros((100, 100), dtype=np.int32)
# fill gt with some values
img_gt[:, :65] = 1
# img pred is a some pseudo probability map. for simplicity, we se the whole 0 channel to 0.4 and some 1 channel to a higher value
img_pred = np.zeros((2, 100, 100), dtype=np.float32)
img_pred[1, :50, :] = 0.55
img_pred[0, :, :] = 0.4
# this will create a 10000,2 matrix with each pixel being represented by a row. This is what the output of a segmentation network looks like
img_pred = np.transpose(img_pred, (1, 2, 0))
img_pred = img_pred.reshape((10000, 2))
# now we compute some dice coefficients with different packages
# medpy
import medpy.metric
dc_medpy = np.float32(medpy.metric.dc(img_gt, img_pred.argmax(-1).reshape(100,100)))
# our numpy implementation, note that this one gived a dice score for backgorund as well
dc_npy = np.float32(dice_npy(img_pred, img_gt, 2))
# our theano implementation, note that this one gived a dice score for backgorund as well
# symbolic variables
img_pred_sym = T.matrix()
img_gt_sym = T.matrix()
# define the symbolic dice coeff
dc_sym_theano = dice(img_pred_sym, img_gt_sym, 2)
import theano
# create a theano function that computes the symbolic dice
dc_fct_theano = theano.function([img_pred_sym, img_gt_sym], dc_sym_theano)
dc_theano = dc_fct_theano(img_pred, img_gt.astype(np.float32))
# these three are equal, we did not make any mistake so far
assert dc_theano[1] == dc_npy[1] == dc_medpy
# we now want to have a look at the gradient of the dice score
grads = T.grad(T.mean(dc_sym_theano), img_pred_sym)
# for some reason this crashes:
# theano.function was asked to create a function computing outputs given certain inputs, but the provided input variable at index 1 is not part of the computational graph needed to compute the outputs: <TensorType(float32, matrix)>.
# however, img_gt_sym is clearly part of the computational graph since it is required for the computation of dc_sym_theano
grad_fn = theano.function([img_pred_sym, img_gt_sym], grads)
When I do grad_fn = theano.function([img_pred_sym, img_gt_sym], grads), theano complains that img_gt_sym is not part of the computational graph, although it clearly is as it is required for the computation of dc_sym_theano. Any suggestions?
class DiceOp(theano.Op):
__props__ = tuple("n")
def __init__(self, n):
self.n = n
theano.Op.__init__(self)
def make_node(self, softmax_output, gt_flat):
softmax_output = theano.tensor.as_tensor_variable(softmax_output)
gt_flat = theano.tensor.as_tensor_variable(gt_flat)
return theano.Apply(self, [softmax_output, gt_flat], [T.vector(dtype=softmax_output.dtype)])
def perform(self, node, inputs, output_storage, params=None):
theano.config.exception_verbosity='high'
out_pred = inputs[0].argmax(1)
out_gt = inputs[1].flatten()
output_storage[0][0] = T.zeros(self.n, dtype=inputs[0].dtype)
for i in range(self.n):
x_pred = T.eq(out_pred, i)
x_gt = T.eq(out_gt, i)
intersect = T.sum(x_gt * x_pred)
denominator = T.sum(x_pred) + T.sum(x_gt)
output_storage[0][0] = T.set_subtensor(output_storage[0][0][i], T.constant(2) * intersect / (denominator + T.constant(1e-5)))
def infer_shape(self, node, in_shapes):
return [(in_shapes[0][0],)]
def grad(self, inp, grads):
print len(inp), len(grads)
for i in inp:
print i
softmax_output = inp[0]
gt_flat = inp[1]
(g, ) = grads
d_softm = T.zeros(softmax_output.shape, softmax_output.dtype)
out_pred = T.argmax(softmax_output, 1)
for i in range(self.n):
x_pred = T.eq(out_pred, i)
x_gt = T.eq(gt_flat, i)
intersect = T.sum(x_gt*x_pred)
denominator = T.sum(x_pred) + T.sum(x_gt)
d_softm = T.set_subtensor(d_softm[:, i],
T.constant(2) *
(x_gt * denominator - T.constant(2) * intersect * softmax_output[:, i]) /
(T.power(denominator, T.constant(2.))) * g[i]
)
return [d_softm, grad_not_implemented(self, 1, gt_flat)]def generate_dummy_image_and_seg():
img_gt = np.zeros((100, 100))
img_gt[:50, :] = 1
img_pred = np.zeros((100, 100, 2))
img_pred[:, 60:, 0] = 0.52
img_pred[:, :60, 0] = 0.3
img_pred[:, 60:, 1] = 0.48
img_pred[:, :60, 1] = 0.7
img_pred_flat = img_pred.reshape((-1, 2))
return img_gt, img_pred, img_pred_flat
if __name__ == "__main__": img_gt, img_pred, img_pred_flat = generate_dummy_image_and_seg() dc_medpy = metric.dc(img_gt, img_pred.argmax(-1))
img_gt_sym = T.vector() img_pred_sym = T.matrix()
dc_op = DiceOp(2) dc_theano_sym = dc_op(img_pred_sym, img_gt_sym) dc_theano_fct = theano.function([img_pred_sym, img_gt_sym], dc_theano_sym) dc = dc_theano_fct(img_pred_flat.astype(np.float32), img_gt.flatten().astype(np.float32)) dc_grad_from_op = theano.function([img_pred_sym, img_gt_sym], T.grad(T.mean(dc_theano_sym), img_pred_sym)) dc_grad_from_op(img_pred_flat.astype(np.float32), img_gt.flatten().astype(np.float32))The op works so far in this very basic setting, but what is strange is that dc_theano_fct does not return a numerical value but a theano expression. If I do dc.eval() it returns what I would actually have expected. This is not how theano functions usually work, at least in my experience.
import sys
caffe_root = '/home/yonatan/Downloads/caffe-segnet-cudnn5-master/python'
sys.path.insert(0, caffe_root)
import caffe
import numpy as np
class DiceLossLayer(caffe.Layer):
"""
Compute the Dice Loss in the same manner
to demonstrate the class interface for developing layers in Python.
"""
def setup(self, bottom, top):
# check input pair
if len(bottom) != 2:
raise Exception("Need two inputs to compute distance.")
def reshape(self, bottom, top):
# check input dimensions match
nclass = 5
if bottom[0].count != nclass * bottom[1].count:
raise Exception("Inputs must have the same dimension.")
# difference is shape of inputs
self.diff = np.zeros_like(bottom[0].data, dtype=np.float32)
# loss output is scalar
top[0].reshape(1)
def forward(self, bottom, top):
self.diff[...] = bottom[1].data
top[0].data[...] = 1 - self.dice_coef_multi_class(bottom[0], bottom[1])
def backward(self, top, propagate_down, bottom):
if propagate_down[1]:
raise Exception("label not diff")
elif propagate_down[0]:
a=(-2. * self.diff + self.dice) / self.sum
bottom[0].diff[...] = a
else:
raise Exception("no diff")
# =============================
def dice_coef_multi_class(self, y_pred, y_true):
n_classes = 5
y_true=y_true.data
y_pred=y_pred.data
y_pred = np.argmax(y_pred, 1)
y_pred = np.expand_dims(y_pred,1)
y_pred=np.ndarray.flatten(y_pred)
y_true = np.ndarray.flatten(y_true)
dice = np.zeros(n_classes)
self.sum = np.zeros([n_classes])
for i in range(n_classes):
y_true_i = np.equal(y_true, i)
y_pred_i = np.equal(y_pred, i)
self.sum[i] = np.sum(y_true_i) + np.sum(y_pred_i) + np.float32(1e-7)
dice[i] = (2. * np.sum(y_true_i * y_pred_i) + np.float32(1e-7)) / self.sum[i]
self.sum=np.sum(self.sum)
self.dice=np.sum(dice)
return self.dice