As discussed in another thread on this forum, I was having trouble attaching trial-by-trial measures to my simulated PPC data using hddm.utils.post_pred_gen(m, append_data=True). Below is representative output:
rt_sampled | index | subj_idx | rt | response | revNum | |||
---|---|---|---|---|---|---|---|---|
node | sample | |||||||
wfpt.10 | 0 | 1103 | 1.533658 | NaN | NaN | NaN | NaN | NaN |
1104 | 0.908446 | NaN | NaN | NaN | NaN | NaN | ||
1105 | -1.542960 | NaN | NaN | NaN | NaN | NaN | ||
1106 | 1.115957 | NaN | NaN | NaN | NaN | NaN | ||
1107 | 0.708157 | NaN | NaN | NaN | NaN | NaN | ||
1108 | 2.129046 | NaN | NaN | NaN | NaN | NaN | ||
1109 | 1.220014 | NaN | NaN | NaN | NaN | NaN | ||
1110 | 0.765557 | NaN | NaN | NaN | NaN | NaN | ||
1111 | 0.674214 | NaN | NaN | NaN | NaN | NaN | ||
1112 | 3.590046 | NaN | NaN | NaN | NaN | NaN |
accuracy mean_ub std_ub 10q_ub 30q_ub 50q_ub \wfpt.10 0 0.000000 NaN NaN NaN NaN NaN 1 0.000000 NaN NaN NaN NaN NaN wfpt.11 0 0.000000 NaN NaN NaN NaN NaN 1 0.000000 NaN NaN NaN NaN NaN wfpt.12 0 0.000000 NaN NaN NaN NaN NaN 1 0.000000 NaN NaN NaN NaN NaN wfpt.13 0 0.000000 NaN NaN NaN NaN NaN 1 0.000000 NaN NaN NaN NaN NaN
# Import requisite packagesimport pandas as pdimport matplotlib.pyplot as plt%matplotlib inlineimport hddmimport picklefrom patsy import dmatrix
# Get around a problem with saving regression outputs in Python 3def savePatch(self, fname): import pickle with open(fname, 'wb') as f: pickle.dump(self, f)hddm.HDDM.savePatch = savePatch
# Load data from csv file into a NumPy structured arraydata = hddm.load_csv('../../data.csv')
# Idiot check: make sure you're working with the right datasetdata.head(10)
# Build and run regression modelmA = hddm.HDDMRegressor(data, {"v ~ C(revNum, Treatment('Alone'))", "a ~ C(revNum, Treatment('Alone'))", "z ~ C(revNum, Treatment('Alone'))"}, group_only_regressors=True, p_outlier=.05, include={'z'})mA.find_starting_values()mA.sample(100, burn=20, dbname='mA.db', db='pickle')
mA.savePatch('mA')
# Generate PPC simulated datappc_data = hddm.utils.post_pred_gen(mA, samples=2, append_data=True)
# Take a peek at the immediate output# Some/all of the appended columns will only have values of NaNppc_data.head(10)
# Set these as the column numbers in your ppc_datasubj_idx_columnNum = 2 #i.e. subj_idx is third column (Python starts numbering at 0)rt_columnNum = 3response_columnNum = 4revNum_columnNum = 5 # A trial-by-trial measure
# Not an efficient solution! ...but it works# Depending on your simulation & dataset sizes, this will take a while...
for j in range(0, len(ppc_data)): # Get current row number of ppc_data rowNum = ppc_data.iloc[j].name[2] # Copies index numbers ppc_data.iloc[j, 1] = rowNum # Copies other columns ppc_data.iloc[j, subj_idx_columnNum] = data.iloc[rowNum].subj_idx ppc_data.iloc[j, rt_columnNum] = data.iloc[rowNum].rt ppc_data.iloc[j, response_columnNum] = data.iloc[rowNum].response ppc_data.iloc[j, revNum_columnNum] = data.iloc[rowNum].revNum
# Take another peek at your PPC data# Your appended columns should all be filled in now!ppc_data.head(10)
# Download entire simulated dataset# Ensure that all of your appended columns match your original data!ppc_data.to_csv('PPC_sim_mA.csv')
# Take a look at each of your PPC nodes before looking at the aggregate stats# This ensures that the appropriate stats were computed for each node/subjectppc_compare = hddm.utils.post_pred_stats(data, ppc_data, call_compare=False)print(ppc_compare)
# Once you're positive that everything's worked the way it was supposed to...ppc_compare = hddm.utils.post_pred_stats(data, ppc_data)print(ppc_compare)
import pymc as pmimport numpy as npimport pymc.progressbar as pbar
def _parents_to_random_posterior_sample(bottom_node, pos=None): """Walks through parents and sets them to pos sample.""" for i, parent in enumerate(bottom_node.extended_parents): if not isinstance(parent, pm.Node): # Skip non-stochastic nodes continue
if pos is None: # Set to random posterior position pos = np.random.randint(0, len(parent.trace()))
assert len(parent.trace()) >= pos, "pos larger than posterior sample size" parent.value = parent.trace()[pos]
def _post_pred_generate(bottom_node, samples=500, data=None, append_data=True): """Generate posterior predictive data from a single observed node.""" datasets = [] ############################## # Sample and generate stats for sample in range(samples): _parents_to_random_posterior_sample(bottom_node) # Generate data from bottom node sampled_data = bottom_node.random() if append_data and data is not None:
sampled_data.reset_index(inplace=True) # Only modification of original Kabuki code
sampled_data = sampled_data.join(data.reset_index(), lsuffix='_sampled') datasets.append(sampled_data) return datasets
def post_pred_gen(model, groupby=None, samples=500, append_data=False, progress_bar=True): results = {}
# Progress bar if progress_bar: n_iter = len(model.get_observeds()) bar = pbar.progress_bar(n_iter) bar_iter = 0 else: print("Sampling...")
if groupby is None: iter_data = ((name, model.data.ix[obs['node'].value.index]) for name, obs in model.iter_observeds()) else: iter_data = model.data.groupby(groupby)
for name, data in iter_data: node = model.get_data_nodes(data.index)
if progress_bar: bar_iter += 1 bar.update(bar_iter)
if node is None or not hasattr(node, 'random'): continue # Skip
############################## # Sample and generate stats datasets = _post_pred_generate(node, samples=samples, data=data, append_data=append_data) results[name] = pd.concat(datasets, names=['sample'], keys=list(range(len(datasets))))
if progress_bar: bar_iter += 1 bar.update(bar_iter)
return pd.concat(results, names=['node'])
--
You received this message because you are subscribed to the Google Groups "hddm-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to hddm-users+unsubscribe@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
--------------------------------------------------------------------------- NotImplementedError Traceback (most recent call last) <ipython-input-58-842590cc16f1> in <module>() 3 for name, data in iter_data: 4 print(name,data) ----> 5 node = m_within_subj.get_data_nodes(data.index) /opt/conda/lib/python3.6/site-packages/kabuki/hierarchical.py in get_data_nodes(self, idx) 942 943 if len(data_nodes) != 1: --> 944 raise NotImplementedError("Supply a grouping so that at most 1 observed node codes for each group.") 945 946 return data_nodes[0] NotImplementedError: Supply a grouping so that at most 1 observed node codes for each group.
I've tried a bit of debugging, but the issue appears to lie within get_data_nodes(), which is a little opaque to me.
Thanks for any advice you might have!
-Ian
--
You received this message because you are subscribed to the Google Groups "hddm-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to hddm-users+...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/hddm-users/48fc2469-33c6-4951-a8ec-36f1c4231de6%40googlegroups.com.
To unsubscribe from this group and stop receiving emails from it, send an email to hddm-...@googlegroups.com.
To unsubscribe from this group and stop receiving emails from it, send an email to hddm-users+...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/hddm-users/45132fed-51a6-4407-a507-1e30a4f9af70%40googlegroups.com.
iter_data = ((name, model.data.ix[obs['node'].value.index]) for name, obs in model.iter_observeds())
iter_data = ((name, model.data.iloc[obs['node'].value.index]) for name, obs in model.iter_observeds())
-Ian
To view this discussion on the web visit https://groups.google.com/d/msgid/hddm-users/45132fed-51a6-4407-a507-1e30a4f9af70%40googlegroups.com.
To unsubscribe from this group and stop receiving emails from it, send an email to hddm-users+...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/hddm-users/a2010c27-2146-4499-8c4b-7c8fcf31b6f3%40googlegroups.com.