I have one question regarding siamese network. I want to have two of two siamese network in parallel and then the features will be concatenated before the fully connected layer/classifier.I am using torch lua. The problem is that my network cannot learn anything. I ran it for a very simple task but it gave me random accuracy for training accuracy. My expectation was that my model will learn the task and after sometimes it will become overfitted, but unfortunately the accuracy stuck around the random chance. I was wondering if someone can help me find my problem. This is my code:
siamese= nn.Sequential()
siamese:add(nn.SpatialConvolution(3,96,11,11,4,4,2,2)) -- 224 -> 55
siamese:add(nn.SpatialBatchNormalization(96))
siamese:add(nn.ReLU(true))
siamese:add(nn.SpatialMaxPooling(3,3,2,2)) -- 55 -> 27
siamese:add(nn.SpatialConvolution(96,256,5,5,1,1,2,2)) -- 27 -> 27
siamese:add(nn.SpatialBatchNormalization(256))
siamese:add(nn.ReLU(true))
siamese:add(nn.SpatialMaxPooling(3,3,2,2)) -- 27 -> 13
siamese:add(nn.SpatialConvolution(256,384,3,3,1,1,1,1)) -- 13 -> 13
siamese:add(nn.SpatialBatchNormalization(384))
siamese:add(nn.ReLU(true))
siamese:add(nn.SpatialConvolution(384,256,3,3,1,1,1,1)) -- 13 -> 13
siamese:add(nn.SpatialBatchNormalization(256))
siamese:add(nn.ReLU(true))
siamese:add(nn.SpatialConvolution(256,256,3,3,1,1,1,1)) -- 13 -> 13
siamese:add(nn.SpatialBatchNormalization(256))
siamese:add(nn.ReLU(true))
siamese:add(nn.SpatialMaxPooling(3,3,2,2))
siamese:add(nn.View(-1):setNumInputDims(3))
siamese:add(nn.Linear(256*6*6, 512))
siamese_encoder = siameseclone('weight','bias', 'gradWeight','gradBias')
siamese1 = nn.Sequential()
siamese1:add(nn.SpatialConvolution(3,96,11,11,4,4,2,2)) -- 224 -> 55
siamese1:add(nn.SpatialBatchNormalization(96))
siamese1:add(nn.ReLU(true))
siamese1:add(nn.SpatialMaxPooling(3,3,2,2)) -- 55 -> 27
siamese1:add(nn.SpatialConvolution(96,256,5,5,1,1,2,2)) -- 27 -> 27
siamese1:add(nn.SpatialBatchNormalization(256))
siamese1:add(nn.ReLU(true))
siamese1:add(nn.SpatialMaxPooling(3,3,2,2)) -- 27 -> 13
siamese1:add(nn.SpatialConvolution(256,384,3,3,1,1,1,1)) -- 13 -> 13
siamese1:add(nn.SpatialBatchNormalization(384))
siamese1:add(nn.ReLU(true))
siamese1:add(nn.SpatialConvolution(384,256,3,3,1,1,1,1)) -- 13 -> 13
siamese1:add(nn.SpatialBatchNormalization(256))
siamese1:add(nn.ReLU(true))
siamese1:add(nn.SpatialConvolution(256,256,3,3,1,1,1,1)) -- 13 -> 13
siamese1:add(nn.SpatialBatchNormalization(256))
siamese1:add(nn.ReLU(true))
siamese1:add(nn.SpatialMaxPooling(3,3,2,2))
siamese1:add(nn.View(-1):setNumInputDims(3))
siamese1:add(nn.Linear(256*6*6, 512))
siamese_encoder1 = siamese1:clone('weight','bias', 'gradWeight','gradBias')
parallel_model = nn.ParallelTable() -- model that concatenates net1 and net2 and net3
parallel_model:add(siamese)
parallel_model:add(siamese_encoder)
parallel_model:add(siamese1)
parallel_model:add(siamese_encoder1)
net:add(parallel_model)
net:add(nn.JoinTable(2))
net:add(nn.Linear(512*4, 4096))
net:add(nn.BatchNormalization(4096))
net:add(nn.ReLU())
net:add(nn.Dropout(0.5))
net:add(nn.Linear(4096, 4096))
net:add(nn.BatchNormalization(4096))
net:add(nn.ReLU())
net:add(nn.Dropout(0.5))
net:add(nn.Linear(4096, opt.nClasses))
-- initialize the model
local function weights_init(m)
local name = torch.type(m)
if name:find('Convolution') then
m.weight:normal(0.0, 0.01)
m.bias:fill(0)
elseif name:find('BatchNormalization') then
if m.weight then m.weight:normal(1.0, 0.02) end
if m.bias then m.bias:fill(0) end
end
end
siamese:apply(weights_init)
siamese1:apply(weights_init)
net:apply(weights_init)
-- another part of the code to forward the inputs and calculate training loss:
output = net:forward({input1,input2,input3,input4})
print (output)
err = criterion:forward(output, label)
local df_do = criterion:backward(output, label)
net:backward({input1,input2,input3,input4}, df_do)
Any help will be much appreciated!!
Thanks in advance!!