I found that if I add the crop_size parameter in the datalayer definition I can get the script running, but all my images are likely cropped to 200x200 which is not preferred.
def rain_net(lmdb, batch_size):
n = caffe.NetSpec()
n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb,
transform_param=dict(scale=1./255, crop_size=200), ntop=2)
When I set this cropsize to 224 it gives the same error as the one mentioned above (i.e. expecting size 200 but getting 224).
How can I make sure that caffe accepts an image size of 224x224, and also define the number of channels (grayscale/RGB). I would prefer to do this in python, but if someone knows how to fix this in the prototext it would be appreciated as well!
For your reference I have included my entire script down below, including the data layer prototext. Thank you very much for your time in advance.
Prototxt
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
transform_param {
scale: 0.00392156862745
crop_size: 224
}
data_param {
source: "/home/simon/Downloads/snn-rgc/rainnet/train"
batch_size: 2
backend: LMDB
}
Script
WORKING_ROOT_FOLDER = '/home/simon/Downloads/snn-rgc/rainnet/'
import caffe
#from caffe.draw import draw_net, draw_net_to_file
from caffe.proto import caffe_pb2
from caffe import layers as L, params as P
import numpy as np
from google.protobuf import text_format
import sys
caffe.set_mode_cpu()
CAFFE_MODE = caffe_pb2.SolverParameter.CPU
TRAIN_LMDB = WORKING_ROOT_FOLDER + 'train'
TEST_LMDB = WORKING_ROOT_FOLDER + 'test'
# TRAIN_LMDB = 'rainnet/train_lmdb'
# TEST_LMDB = 'rainnet/test_lmdb'
SOLVER_FILE = WORKING_ROOT_FOLDER + 'solver.prototxt'
SNAPSHOT_PREFIX = WORKING_ROOT_FOLDER + 'snap_'
TRAIN_PROTO = WORKING_ROOT_FOLDER + 'train.prototxt'
TEST_PROTO = WORKING_ROOT_FOLDER + 'test.prototxt'
def rain_net(lmdb, batch_size):
n = caffe.NetSpec()
""" 1 quick fix: crop_size=200 is added becuase for some reason the transform param requires 200 instead of 224"""
n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb,
transform_param=dict(scale=1./255, crop_size=200), ntop=2)
n.conv1 = L.Convolution(n.data, kernel_size=5,stride = 3, num_output=20, weight_filler=dict(type='xavier'), bias_filler=dict(type='xavier'))
n.relu1 = L.ReLU(n.conv1, in_place=True)
n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
n.fc1 = L.InnerProduct(n.pool1, num_output=100, weight_filler=dict(type='xavier'))
n.score = L.InnerProduct(n.fc1, num_output=2, weight_filler=dict(type='xavier'))
n.loss = L.SoftmaxWithLoss(n.score, n.label)
return n.to_proto()
with open(TRAIN_PROTO, 'w') as f:
f.write(str(rain_net(TRAIN_LMDB, 2)))
with open(TEST_PROTO, 'w') as f:
f.write(str(rain_net(TEST_LMDB, 5)))
net = caffe_pb2.NetParameter()
text_format.Merge(open(TRAIN_PROTO).read(), net)
# netimg = draw_net(net,'right-left')
### define solver
s = caffe_pb2.SolverParameter()
# Set a seed for reproducible experiments:
# this controls for randomization in training.
s.random_seed = 0xCAFFE
# Specify locations of the train and (maybe) test networks.
s.train_net = TRAIN_PROTO
s.test_net.append(TEST_PROTO)
s.test_interval = 100 # Test after every 500 training iterations.
s.test_iter.append(100) # Test on 100 batches each time we test.
s.max_iter = 100 # no. of times to update the net (training iterations)
# EDIT HERE to try different solvers
# solver types include "SGD", "Adam", and "Nesterov" among others.
s.type = "SGD"
# Set the initial learning rate for SGD.
s.base_lr = 0.01 # EDIT HERE to try different learning rates
# Set momentum to accelerate learning by
# taking weighted average of current and previous updates.
s.momentum = 0.9
# Set weight decay to regularize and prevent overfitting
s.weight_decay = 5e-4
# Set `lr_policy` to define how the learning rate changes during training.
# This is the same policy as our default LeNet.
s.lr_policy = 'inv'
s.gamma = 0.0001
s.power = 0.75
# EDIT HERE to try the fixed rate (and compare with adaptive solvers)
# `fixed` is the simplest policy that keeps the learning rate constant.
# s.lr_policy = 'fixed'
# Display the current training loss and accuracy every 1000 iterations.
s.display = 10
# Snapshots are files used to store networks we've trained.
# We'll snapshot every 5K iterations -- twice during training.
s.snapshot = 200
s.snapshot_prefix = SNAPSHOT_PREFIX
# Train on the GPU
s.solver_mode = CAFFE_MODE
# Write the solver to a temporary file and return its filename.
with open(SOLVER_FILE, 'w') as f:
f.write(str(s))
### load the solver and create train and test nets
solver = None # ignore this workaround for lmdb data (can't instantiate two solvers on the same data)
solver = caffe.get_solver(SOLVER_FILE)
# ### solve
niter = 250 # EDIT HERE increase to train for longer
test_interval = niter / 10
# losses will also be stored in the log
train_loss = np.zeros(niter)
test_acc = np.zeros(int(np.ceil(niter / test_interval)))
# the main solver loop
for it in range(niter):
solver.step(1) # SGD by Caffe
# store the train loss
train_loss[it] = solver.net.blobs['loss'].data
# run a full test every so often
# (Caffe can also do this for us and write to a log, but we show here
# how to do it directly in Python, where more complicated things are easier.)
if it % test_interval == 0:
print 'Iteration', it, 'testing...'
correct = 0
for test_it in range(100):
solver.test_nets[0].forward()
correct += sum(solver.test_nets[0].blobs['score'].data.argmax(1)
== solver.test_nets[0].blobs['label'].data)
test_acc[it // test_interval] = correct / 1e4