Multi-target regression results in same estimated values for every image

74 views

Skip to first unread message

Manuel Paternoster

unread,

Apr 12, 2016, 9:34:49 AM4/12/16

to Caffe Users

Dear All,

I want to do multi-target regression, but I am currently stuck in a problem.

My setup is as follows:

I want to do pixelwise 3D object coordinate estimation in an image. My imagedata is a combined color and depth image.

Training is done patch-wise, which means I have a small image location around a center pixel and the ground truth label for this center pixel.
For the Dataset I use two LMDB files: One contains the data for every image-patch as float values, the other the ground truth label for the center pixel. (see below for example data from both LMDBs)

As loss function I use Euclidean loss. So far I trained on very large datasets without success. The loss starts off very very big, but decreases pretty fast until it is around 0.1 in most cases. However, the estimation of the object coordinate is always incorrect.
In order to check if there are any implementation mistakes I trained on a very small dataset (120 train/ 40 test patches) and tried to overfit the network to my data. After about 1000 iterations I achieved relatively low loss values (about 0.03), however, when I try to estimate the object coordinate of the center pixel for a training patch, I get the same estimated values for every image/patch, which cannot be correct.

I thank you in advance for any help.

Please find below the samples of the LMDB-data and my network setup.

Best wishes,
Manuel

Ground truth LMDB:

key: 00000000000000000000
label: 0
channels: 1
height: 1
width: 3
[[-0.0556     -0.23899999  0.156     ]]

..

Image-Data LMDB:

key: 00000000000000000000
label: 0
channels: 4
height: 127
width: 127
[[  39.12133408   39.12133408   39.12133408 ...,  -25.87866592
   -28.87866592  -30.87866592]
 [  36.13559341   36.13559341   37.13559341 ...,  -26.86440659
   -28.86440659  -30.86440659]
 [  29.57616806   29.57616806   30.57616806 ...,  -32.42383194
   -33.42383194  -34.42383194]
 ..., 
 [ 101.          102.          104.         ...,   93.           92.           92.        ]
 [ 102.          103.          105.         ...,   93.           92.           91.        ]
 [ 103.          104.          106.         ...,   93.           91.           90.        ]]
[[  34.12133408   31.12133408   31.12133408 ...,  -40.87866592
   -41.87866592  -40.87866592]
 [  34.13559341   30.13559341   29.13559341 ...,  -38.86440659
   -40.86440659  -39.86440659]
 [  23.57616806   21.57616806   20.57616806 ...,  -46.42383194
   -47.42383194  -48.42383194]
 ..., 
 [  89.           91.           91.         ...,   87.           86.           86.        ]
 [  94.           95.           96.         ...,   89.           87.           88.        ]
 [  98.          100.           98.         ...,   86.           86.           86.        ]]
[[ 40.12133408  37.12133408  39.12133408 ..., -41.87866592 -42.87866592
  -42.87866592]
 [ 42.13559341  38.13559341  41.13559341 ..., -45.86440659 -47.86440659
  -47.86440659]
 [ 37.57616806  34.57616806  32.57616806 ..., -52.42383194 -53.42383194
  -54.42383194]
 ..., 
 [ 90.          90.          89.         ...,  91.          90.          89.        ]
 [ 93.          93.          93.         ...,  92.          91.          90.        ]
 [ 94.          96.          95.         ...,  90.          89.          90.        ]]
[[  121.   121.   129. ...,  1489.  1463.  1463.]
 [  130.   130.   130. ...,  1491.  1490.  1490.]
 [  130.   130.   121. ...,  1491.  1491.  1491.]
 ..., 
 [  -32.   -33.   -32. ...,  -135.  -135.  -141.]
 [  -39.   -47.   -40. ...,  -141.  -141.  -141.]
 [  -46.   -52.   -46. ...,  -146.  -141.  -147.]]

...

I have the following network, that I use for training:

name: "Wohlhart_CVPR_15_Pose"
# Data-layer - TRAIN
layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "ignorelabel1"    
  include {
    phase: TRAIN
  }

  data_param {  

    source: "train_120-lmdb"
    batch_size: 120 
    backend: LMDB
  }
}

layer {
  name: "labels"
  type: "Data"
  top: "labels"
  top: "ignorelabel2"
  include {
    phase: TRAIN
  }

  data_param {
    source: "train_120-objCoords-lmdb"
  batch_size: 120
  backend: LMDB
  }
}

# Data-layer - TEST
layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "ignorelabel1"
  include {
    phase: TEST
  }

  data_param {
    source: "test_40-lmdb"
    batch_size: 40
    backend: LMDB
  }
}

layer {
  name: "labels"
  type: "Data"
  top: "labels"
  top: "ignorelabel2"
  include {
    phase: TEST
  }

  data_param {
   source: "test_40-objCoords-lmdb"
  batch_size: 40
  backend: LMDB
  }
}

#----- Conv 1 ---------

layer {
  bottom: "data"
  top: "conv1"
  name: "conv1"
  type: "Convolution"
  convolution_param {
    num_output: 16
    pad: 0
    kernel_size: 8
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0    
    }
  }
}
layer {
  bottom: "conv1"
  top: "conv1"
  name: "relu1"
  type: "ReLU"
}
layer {
  bottom: "conv1"
  top: "pool1"
  name: "pool1"
  type: "Pooling"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

#----- Conv 2 ---------

layer {
  bottom: "pool1"
  top: "conv2"
  name: "conv2"
  type: "Convolution"
  convolution_param {
    num_output: 7
    pad: 0
    kernel_size: 5
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0    
    }
  }
}
layer {
  bottom: "conv2"
  top: "conv2"
  name: "relu2"
  type: "ReLU"
}
layer {
  bottom: "conv2"
  top: "pool2"
  name: "pool2"
  type: "Pooling"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

#----- FC 1 ---------
layer {
  bottom: "pool2"
  top: "fc1"
  name: "fc1"
  type: "InnerProduct"
  inner_product_param {
    num_output: 256    
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.1    
    }    
  }
}
layer {
  bottom: "fc1"
  top: "fc1"
  name: "relu3"
  type: "ReLU"
}

#----- FC 2 ---------
layer {
  bottom: "fc1"
  top: "fc2"
  name: "fc2"
  type: "InnerProduct"
  inner_product_param {
    num_output: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.1    
    }        
  }
}

# ----- Loss --------
layer {
  name: "test_loss"
  type: "EuclideanLoss"
  bottom: "fc2"
  bottom: "labels"
  top: "test_loss"
  include {
    phase: TEST
  }
}
layer {
  name: "loss"
  type: "EuclideanLoss"
  bottom: "fc2"
  bottom: "labels"
  top: "loss"
}

Reply all

Reply to author

Forward

0 new messages