Hi Oliver,
Thanks for the response! For
context, I am trying to cluster conformations from a coarse grained
simulation of 100 protein monomers (164 particles each) performed with HOOMD-Blue. The
Universe object is built from a .gsd file representing the trajectory
from this simulation. Presumably this isn't a standard application for
ENCORE's clustering algorithm. Since posting this question I was able to cluster the monomer structures by splitting each into their respective coordinate arrays and using RMSDs (calculated by passing arrays to mda.analysis.rms.rmsd) to guide the clustering, so I don't need to use ENCORE anymore. Maybe this stack trace will be helpful for someone else though!
To get the
error, I pulled a single frame from the trajectory using tools provided
by HOOMD and saved it as a new .gsd file. For what it's worth, the error does not change if a gsd file with more than one frame is used. I tried to cluster the
monomers within that file (I don't know if ENCORE or MDAnalysis would be
aware of whether or not atoms would be part of one monomer or another,
so I just gave it a shot) as follows:
>>>universe = mda.Universe("../single_frame_trajectory.gsd")
>>>
universe.atoms
<AtomGroup with 16400 atoms>
>>>
universe.residues
<ResidueGroup with 1 residue>
>>>
first_five = universe.atoms[:5]
>>>
first_five
<AtomGroup [<Atom 1: GLY of type GLY of resname 0, resid 0
and segid SYSTEM>, <Atom 2: MET of type MET of resname 0, resid 0
and segid SYSTEM>, <Atom 3: ALA of type ALA of resname 0, resid 0
and segid SYSTEM>, <Atom 4: SER of type SER of resname 0, resid 0
and segid SYSTEM>, <Atom 5: ASN of type ASN of resname 0, resid 0
and segid SYSTEM>]>
>>>
len(universe.trajectory)
1
>>>cc = encore.cluster(universe)
ZeroDivisionError Traceback (most recent call last)
<ipython-input-15-de3be95c15d2> in <module>
----> 1 cc = encore.cluster(universe)
~/miniconda3/lib/python3.8/site-packages/MDAnalysis/analysis/encore/clustering/cluster.py in cluster(ensembles, method, select, distance_matrix, allow_collapsed_result, ncores, **kwargs)
197 distance_matrix = []
198 for merged_ensemble in merged_ensembles:
--> 199 distance_matrix.append(get_distance_matrix(merged_ensemble,
200 select=select,
201 **kwargs))
~/miniconda3/lib/python3.8/site-packages/MDAnalysis/analysis/encore/confdistmatrix.py in get_distance_matrix(ensemble, select, load_matrix, save_matrix, superimpose, superimposition_subset, weights, n_jobs, max_nbytes, verbose, *conf_dist_args, **conf_dist_kwargs)
365 # Use superimposition subset, if necessary. If the pairwise alignment
366 # is not required, it will not be performed anyway.
--> 367 confdistmatrix = conformational_distance_matrix(ensemble,
368 conf_dist_function=set_rmsd_matrix_elements,
369 select=select,
~/miniconda3/lib/python3.8/site-packages/MDAnalysis/analysis/encore/confdistmatrix.py in conformational_distance_matrix(ensemble, conf_dist_function, select, superimposition_select, n_jobs, pairwise_align, weights, metadata, verbose, max_nbytes)
174 # fitter worker does.
175 indices = trm_indices((0, 0), (framesn - 1, framesn - 1))
--> 176 Parallel(n_jobs=n_jobs, verbose=verbose, require='sharedmem',
177 max_nbytes=max_nbytes)(delayed(conf_dist_function)(
178 np.int64(element),
~/miniconda3/lib/python3.8/site-packages/joblib/parallel.py in __call__(self, iterable)
1046 # remaining jobs.
1047 self._iterating = False
-> 1048 if self.dispatch_one_batch(iterator):
1049 self._iterating = self._original_iterator is not None
1050
~/miniconda3/lib/python3.8/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
864 return False
865 else:
--> 866 self._dispatch(tasks)
867 return True
868
~/miniconda3/lib/python3.8/site-packages/joblib/parallel.py in _dispatch(self, batch)
782 with self._lock:
783 job_idx = len(self._jobs)
--> 784 job = self._backend.apply_async(batch, callback=cb)
785 # A job can complete so quickly than its callback is
786 # called before we get here, causing self._jobs to
~/miniconda3/lib/python3.8/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~/miniconda3/lib/python3.8/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~/miniconda3/lib/python3.8/site-packages/joblib/parallel.py in __call__(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~/miniconda3/lib/python3.8/site-packages/joblib/parallel.py in <listcomp>(.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~/miniconda3/lib/python3.8/site-packages/MDAnalysis/analysis/encore/confdistmatrix.py in set_rmsd_matrix_elements(tasks, coords, rmsdmat, weights, fit_coords, fit_weights, *args, **kwargs)
235 sumweights = np.sum(weights)
236 subset_weights = np.asarray(fit_weights) / np.mean(fit_weights)
--> 237 com_i = np.average(fit_coords[i], axis=0,
238 weights=fit_weights)
239 translated_i = coords[i] - com_i
<__array_function__ internals> in average(*args, **kwargs)
~/miniconda3/lib/python3.8/site-packages/numpy/lib/function_base.py in average(a, axis, weights, returned)
420 scl = wgt.sum(axis=axis, dtype=result_dtype)
421 if np.any(scl == 0.0):
--> 422 raise ZeroDivisionError(
423 "Weights sum to zero, can't be normalized")
424
ZeroDivisionError: Weights sum to zero, can't be normalized
I also tried passing weights = None to ENCORE, which produced the following error:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-16-a698f8d90f8d> in <module>
----> 1 cc = encore.cluster(universe, weights = None)
~/miniconda3/lib/python3.8/site-packages/MDAnalysis/analysis/encore/clustering/cluster.py in cluster(ensembles, method, select, distance_matrix, allow_collapsed_result, ncores, **kwargs)
197 distance_matrix = []
198 for merged_ensemble in merged_ensembles:
--> 199 distance_matrix.append(get_distance_matrix(merged_ensemble,
200 select=select,
201 **kwargs))
~/miniconda3/lib/python3.8/site-packages/MDAnalysis/analysis/encore/confdistmatrix.py in get_distance_matrix(ensemble, select, load_matrix, save_matrix, superimpose, superimposition_subset, weights, n_jobs, max_nbytes, verbose, *conf_dist_args, **conf_dist_kwargs)
365 # Use superimposition subset, if necessary. If the pairwise alignment
366 # is not required, it will not be performed anyway.
--> 367 confdistmatrix = conformational_distance_matrix(ensemble,
368 conf_dist_function=set_rmsd_matrix_elements,
369 select=select,
~/miniconda3/lib/python3.8/site-packages/MDAnalysis/analysis/encore/confdistmatrix.py in conformational_distance_matrix(ensemble, conf_dist_function, select, superimposition_select, n_jobs, pairwise_align, weights, metadata, verbose, max_nbytes)
149 subset_weights = None
150 elif weights is None:
--> 151 weights = np.ones((ensemble.trajectory.timeseries(
152 ensemble.select_atoms(select))[0].shape[0])).astype(np.float64)
153 if pairwise_align:
IndexError: index 0 is out of bounds for axis 0 with size 0