I'm trying to get an intuition as to what parameters might be relaxed in spyking circus to speed up performance. Currently for 1 hour of recording across 374 channels (385 total, ,11 reference channels marked as dead), the code has been running for at least 2 hours. The estimated time remaining for main part of the algorithm (here comes spyking circus) is another 4 hours. This is on a machine with 40 cores (38 being used by spyking circus) and 128 GB of memory, with the data stored on an SSD. The params file is below. I can certainly imagine scaling upward in terms of cores etc, but wondered if perhaps I could adjust these constraints to speed up the overall algorithm. Or is this the standard run time?
file_format = raw_binary # Can be raw_binary, openephys, hdf5, ... See >> spyking-circus help -i for more info
data_dtype = int16
sampling_rate = 30000
nb_channels = 385
stream_mode = None # None by default. Can be multi-files, or anything depending to the file format
mapping = /data2/part1/probe.prb # Mapping of the electrode (see
http://spyking-circus.rtfd.org)
suffix = # Suffix to add to generated files, if needed
overwrite = True # Filter or remove artefacts on site (if write access is possible). Data are duplicated otherwise
parallel_hdf5 = True # Use the parallel HDF5 feature (if available)
output_dir = # By default, generated data are in the same folder as the data.
[detection]
radius = auto # Radius [in um] (if auto, read from the prb file)
N_t = 3 # Width of the templates [in ms]
spike_thresh = 4 # Threshold for spike detection
peaks = negative # Can be negative (default), positive or both
dead_channels = {0: [36,75,112,151,188,227,264,303,340,379,384]} # If not empty or specified in the probe, a dictionary {channel_group : [list_of_valid_ids]}
[filtering]
cut_off = 300, auto # Min and Max (auto=nyquist) cut off frequencies for the band pass butterworth filter [Hz]
filter = True # If True, then a low-pass filtering is performed
remove_median = False # If True, medians over all channels within shanks are substracted (movement artifacts)
common_ground = # If you want to use channels as ground within shanks {channel_group : ground_channel}
[triggers]
trig_file = # External stimuli to be considered as putative artefacts [in trig units] (see documentation)
trig_windows = # The time windows of those external stimuli [in trig units]
trig_unit = ms # The unit in which times are expressed: can be ms or timestep
clean_artefact = False # If True, external artefacts induced by triggers will be suppressed from data
dead_file = # Portion of the signals that should be excluded from the analysis [in dead units]
dead_unit = ms # The unit in which times for dead regions are expressed: can be ms or timestep
ignore_times = False # If True, any spike in the dead regions will be ignored by the analysis
make_plots = # Generate sanity plots of the averaged artefacts [Nothing or None if no plots]
[whitening]
spatial = True # Perform spatial whitening
max_elts = 1000 # Max number of events per electrode (should be compatible with nb_elts)
nb_elts = 0.8 # Fraction of max_elts that should be obtained per electrode [0-1]
output_dim = 5 # Can be in percent of variance explain, or num of dimensions for PCA on waveforms
[clustering]
extraction = median-raw # Can be either median-raw (default) or mean-raw
sub_dim = 10 # Number of dimensions to keep for local PCA per electrode
max_elts = 10000 # Max number of events per electrode (should be compatible with nb_elts)
nb_elts = 0.8 # Fraction of max_elts that should be obtained per electrode [0-1]
nb_repeats = 3 # Number of passes used for the clustering
smart_search = True # Activate the smart search mode
merging_method = nd-bhatta # Method to perform local merges (distance, dip, folding, nd-folding, bhatta, nd-bhatta)
merging_param = default # Merging parameter (see docs) (3 if distance, 0.5 if dip, 1e-9 if folding, 2 if bhatta)
sensitivity = 3 # Single parameter for clustering sensitivity. The lower the more sensitive
cc_merge = 0.95 # If CC between two templates is higher, they are merged
dispersion = (5, 5) # Min and Max dispersion allowed for amplitudes [in MAD]
fine_amplitude = True # Optimize the amplitudes and compute a purity index for each template
make_plots = # Generate sanity plots of the clustering [Nothing or None if no plots]
[fitting]
amp_limits = (0.3, 5) # Amplitudes for the templates during spike detection [if not auto]
amp_auto = True # True if amplitudes are adjusted automatically for every templates
collect_all = True # If True, one garbage template per electrode is created, to store unfitted spikes
ratio_thresh = 0.9 # Ratio of the spike_threshold used while fitting [0-1]. The lower the slower
mse_error = False # If True, RMS is collected over time, to assess quality of reconstruction
[merging]
erase_all = True # If False, a prompt will ask you to remerge if merged has already been done
cc_overlap = 0.75 # Only templates with CC higher than cc_overlap may be merged
cc_bin = 2 # Bin size for computing CC [in ms]
default_lag = 5 # Default length of the period to compute dip in the CC [ms]
auto_mode = 0.75 # Between 0 (aggressive) and 1 (no merging). If empty, GUI is launched
remove_noise = True # If True, meta merging will remove obvious noise templates (weak amplitudes)
noise_limit = 0.75 # Amplitude at which templates are classified as noise
sparsity_limit = 0 # Sparsity level (in percentage) for selecting templates as putative noise (in [0, 1])
time_rpv = 5 # Time [in ms] to consider for Refraction Period Violations (RPV) (0 to disable)
rpv_threshold = 0.02 # Percentage of RPV allowed while merging
merge_drifts = True # Try to automatically merge drifts, i.e. non overlapping spiking neurons
drift_limit = 1 # Distance for drifts. The higher, the more non-overlapping the activities should be
clean_merging = False # When templates are merged, automatically remove spike duplicated (less than 0.5ms appart)
[converting]
erase_all = True # If False, a prompt will ask you to export if export has already been done
export_pcs = prompt # Can be prompt [default] or in none, all, some
export_all = False # If True, unfitted spikes will be exported as the last Ne templates
sparse_export = True # For recent versions of phy, and large number of templates/channels
prelabelling = False # If True, putative labels (good, noise, best, mua) are pre-assigned to neurons
rpv_threshold = 0.05 # Percentage of RPV allowed while labelling neurons as good neurons
[validating]
nearest_elec = auto # Validation channel (e.g. electrode closest to the ground truth cell)
max_iter = 200 # Maximum number of iterations of the stochastic gradient descent (SGD)
learning_rate = 1.0e-3 # Initial learning rate which controls the step-size of the SGD
roc_sampling = 10 # Number of points to estimate the ROC curve of the BEER estimate
test_size = 0.3 # Portion of the dataset to include in the test split
radius_factor = 0.5 # Radius factor to modulate physical radius during validation
juxta_dtype = uint16 # Type of the juxtacellular data
juxta_thresh = 6 # Threshold for juxtacellular detection
juxta_valley = False # True if juxta-cellular spikes are negative peaks
juxta_spikes = # If none, spikes are automatically detected based on juxta_thresh
filter = True # If the juxta channel need to be filtered or not
make_plots = png # Generate sanity plots of the validation [Nothing or None if no plots]
[extracting]
safety_time = 1 # Temporal zone around which spikes are isolated [in ms]
max_elts = 1000 # Max number of collected events per templates
output_dim = 5 # Percentage of variance explained while performing PCA
cc_merge = 0.975 # If CC between two templates is higher, they are merged
noise_thr = 0.8 # Minimal amplitudes are such than amp*min(templates) < noise_thr*threshold
[noedits]
filter_done = True #!! AUTOMATICALLY EDITED: DO NOT MODIFY !!
artefacts_done = False # Will become True automatically after removing artefacts
median_done = False # Will become True automatically after removing common median
ground_done = False # Will become True automatically after removing common ground