# important training parameters (others are described later below)
reg=.00001 # regularization factor
reg_time=0 # time (in samples) after which to start regularizing
eta=.00001 # learning rate
ndiaghessian=400 # number of sample for 2nd derivatives estimation
hessian_period=20000 # calculates 2nd derivatives after every n samples
epoch_mode=0 # 0: fixed number of samples/iter 1: show all at least once
epoch_size=10000 # number of samples to show per iteration
iterations=1000 # number of training iterations
# training data ################################################################
root=${HOME}/eblearn/demos/svhn/svhn/ToUse # root directory of the dataset
train_dsname=svhn_yuv7_train # name of train dataset
val_dsname=svhn_yuv7_val # name of validation dataset
train=${root}/${train_dsname}_data.mat
train_labels=${root}/${train_dsname}_labels.mat
train_classes=${root}/${train_dsname}_classes.mat
# train_size = 10000 # limit number of samples
val=${root}/${val_dsname}_data.mat
val_labels=${root}/${val_dsname}_labels.mat
val_classes=${root}/${val_dsname}_classes.mat
# val_size = 200 # limit number of samples
# network high level parameters ################################################
classification=1 # 1 is classification, 0 is regression
shared=1 # all modules with the same name share weights
input_thickness=3 # input has 3-channels
# put norm before/after subsampling
aft=1
bef=0
pooling=l4pool # type of pooling to use
nonlin=tanh # type of nonlinearity to use
shrink=lshrink0 # type of shrink to use
learn_norm=0
learn_mean_norm=0
gaussian_coeff=4
norm=snorm # type of normalization
norm_div=0 # divisive norm or not
norm_split=1
snorm_div=${norm_div}
snorm_split=${norm_split}
l1=1 # l1 penalty
penalty=${penalty${l1}}
penalty0=
penalty1=l1penalty
l1penalty_threshold=0
l1penalty_coeff=.00001
# network architecture #########################################################
# We describe the following architecture in the section below
############################################################
#
#
#
# I ******* ******* *******
# M * * * * * *
# A * Y * * U * * V *
# G * * * * * *
# E ***.*** ***.*** ***.***
# . . .
# . . .
# _______ *****************************************
# F | * Convolution layer5x5 kernel * 16 feature maps
# E | *****************************************
# A | *****************************************
# T | *l4pooling layer 2x2 kernel, 2x2 stride* 16 feature maps
# U | ********.***********.********************
# R | . .
# E | ********.******* .
# | *conv layer 7x7* **********************
# E | ********.******* * l4pool 5x5 kernel * 512 feature maps
# X | ********.******* * 3x3 stride *
# T | * l4pool 2x2 * ***********.**********
# R | ********.******* .
# A | ********.*******************.************
# C | * merge layer(merges the two inputs)*
# T | *****************************************
# O | .
# R | .
# ______| $$$$$$$$$$$$.$$$$$$$$$$$
# | $ 20 hidden units $
# C | $ $
# L | $$$$$$$$$$$$.$$$$$$$$$$$
# A | .
# S | .
# S | $$$$$$$$.$$$$$$$
# I | $ 10 outputs $
# F | $ $
# I | $$$$$$$$$$$$$$$$
# E |
# R |
# ______|
#
############################################################
#arch = conv051,addc0,tanh,l4pool22,snorm5,ms2,merge2,linear6,addc6,tanh,linear7,addc7,tanh
arch= ${features},${classifier}
# feature extractors
features=${c05sh},${s022_5},ms2,merge2
# ms2 is a pipe from a sequential layer to two parallel layers
# which would be joined back at the end of the pipe
ms2_pipe0=${c17sh},${s122_7}
ms2_pipe1=${s153_5}
# classifier
classifier=${f6},${f7}
f6=linear6,addc6,${nonlin}
f7=linear7,addc7,${nonlin}
# features parameters ##########################################################
# convolution layers
c05sh=conv051,addc0,${nonlin}
c17sh=conv171,addc2,${nonlin}
# subsampling layers with norm
s022_5=${norm${bef}5},${pooling}22,${norm${aft}5}
s153_5=${norm${bef}5},${pooling}53,${norm${aft}5}
s122_7=${norm${bef}7},${pooling}22,${norm${aft}7}
norm03=
norm05=
norm06=
norm09=
norm15=${norm}5
norm17=${norm}7
# l4 poolings
l4pool22_kernel=2x2
l4pool22_stride=2x2
l4pool22_energy=${penalty}
l4pool53_kernel=5x5
l4pool53_stride=3x3
l4pool53_energy=${penalty}
# contrast normalizations
snorm5_kernel=5x5
snorm5_learn=${learn_mean_norm}
snorm5_gaussian_coeff=${gaussian_coeff}
snorm5_fsum_div=${norm_div}
snorm5_fsum_split=${norm_split}
snorm7_kernel=7x7
snorm7_learn=${learn_mean_norm}
snorm7_gaussian_coeff=${gaussian_coeff}
snorm7_fsum_div=${norm_div}
snorm7_fsum_split=${norm_split}
# convolutions
conv051_kernel=5x5
conv051_shared=${shared}
conv051_stride=1x1
conv051_table=${table0}
table0=table_3_16_connect_32_fanin_density_0.67_yuv_y8_u0_v0_yuv16_uv0.mat
conv171_kernel=7x7
conv171_shared=${shared}
conv171_stride=1x1
conv171_table=${table1}
table1=table_16_512_connect_1792_fanin_2_3_4_5_density_0.22_random.mat
#16 input nodes
#512 output nodes
#1792 connected nodes
#fanin = Number of inputs connected to each output
#If use fanin don't use density!
# merging
merge_padding=0
merge2_type=mflat
merge2_ins=4x4,4x4
merge2_strides=1x1,1x1
# classifiers parameters #######################################################
linear6_in=8448
linear6_out=20
linear7_in=${linear6_out}
linear7_out=noutputs
# energies & answers ###########################################################
trainer=trainable_module1
trainable_module1_energy=l2_energy
answer=class_answer
# training params ##############################################################
reg_l1=${reg} # L1 regularization
reg_l2=${reg} # L2 regularization
inertia = 0.0 # gradient inertia
anneal_value = 0.0 # learning rate decay value
annea_period = 0 # period (in samples) at which to decay learning rate
gradient_threshold = 0.0
epoch_show_modulo=1000 # print message every n training samples
no_testing_test=0 # do not test on test set if 1
no_training_test=1 # do not test on training set if 1
test_only=0 # if 1, just test the data and return
sample_probabilities=0 # use probabilities to pick samples
hardest_focus=1 # 0: focus on easiest samples 1: focus on hardest ones
ignore_correct=1 # If 1, do not train on correctly classified samples
min_sample_weight=0 # minimum probability of each sample
per_class_norm=1 # normalize probability by class (1) or globally (0)
shuffle_passes=1 # shuffle samples between passes
balanced_training=1 # show each class the same amount of samples or not
random_class_order=1 # class order is randomized or not when balanced
target_factor=1
save_pickings=0 # save sample picking statistics
binary_target=0 # use only 1 output, -1 or +1
save_weights=1 # if 0, do not save weights after each iteration
save_confusion=0 # if 0, do not save confusion matrix after each iter
keep_outputs=0 # keep the predicted outputs in this iteration
fixed_randomization=1 # if 1, uses a fixed randomization seed
training_precision=double # training precision
# training display #############################################################
show_conf=1
show_train=1 # enable/disable all training display
show_train_ninternals=1 # number of internal examples to display
show_train_errors=0 # show worst errors on training set
show_val_errors=1 # show worst errors on validation set
show_val_correct=1 # show worst correct errors on validation set
show_hsample=5 # number of samples to show on height axis
show_wsample=18 # number of samples to show on width axis