About Cross Validation Methods in HDDM

49 views
Skip to first unread message

abstract thoughts

unread,
Oct 1, 2021, 2:30:42 AM10/1/21
to hddm-users
Hi everyone,
I am Swarag. T a fourth year BS-MS student in Indian Institute of Science Education and Research, Pune.
I was quantifying some test error in my HDDM Models. Initially I split the entire data at the level of condition using a binomial distribution. Then I used wftp.weiner_like to estimate the likelihood of each point in the data. Then I compared the likelihood of that value being positive versus negative(rt) for computing the prediction. If the value for positive is higher, then the output will be one. The regression coeffiecients are computed at the level of group
. But, I am unable to understand what is the base of weiner_like in the HDDM package. It seems like it is the logarithmic value of the likelihood. Can someone please explain this topic, and can you please refer some of the test error quantification methods that was done by someone.
Thank you

abstract thoughts

unread,
Oct 1, 2021, 5:38:56 AM10/1/21
to hddm-users
I am  attaching code that i have used. Looking forward to hearing from you!

data=Model103R.data
data.rt=(data.response-0.5)*2*data.rt

n_sub=15
err=0.001

post=Model103L.nodes_db



para=pd.DataFrame()
para['t_mean']=np.zeros(n_sub)
para['z_mean']=np.zeros(n_sub)
para['v_Intercept_mean']=np.zeros(n_sub)
para['v_C(Cat2)[T.B]']=np.zeros(n_sub)
para['v_C(Cat3)[T.C]']=np.zeros(n_sub)
para['a_Intercept_mean']=np.zeros(n_sub)
para['a_C(Cat1)[T.A]']=np.zeros(n_sub)
para['a_C(Cat3)[T.C]']=np.zeros(n_sub)
para['a_C(Cat1)[T.A]:C(Cat3)[T.C]']=np.zeros(n_sub)


for sub in range(1,n_sub+1):
    para.t_mean[sub-1]=post.loc[ 't_subj.'+str(sub),'mean' ]      
    para.z_mean[sub-1]=post.loc[ 'z_subj.'+str(sub),'mean' ]
    para['v_Intercept_mean'][sub-1]=post.loc[ 'v_Intercept_subj.'+str(sub),'mean' ]                   
    para['a_Intercept_mean'][sub-1]=post.loc[ 'a_Intercept_subj.'+str(sub),'mean' ]
    para['v_C(Cat2)[T.B]'][sub-1]=post.loc['v_C(Cat2)[T.B]','mean' ]   
    para['v_C(Cat3)[T.C]'][sub-1]=post.loc['v_C(Cat3)[T.C]','mean' ]
    para['a_C(Cat1)[T.A]'][sub-1]=post.loc['a_C(Cat1)[T.A]','mean' ]
    para['a_C(Cat3)[T.C]'][sub-1]=post.loc['a_C(Cat3)[T.C]','mean' ]
    para['a_C(Cat1)[T.A]:C(Cat3)[T.C]'][sub-1]=post.loc['a_C(Cat1)[T.A]:C(Cat3)[T.C]','mean' ]

Per_SC1C2AD={}
log_like=[]
Class=[]
SI=[]
TN=[]
Response=[]
Prediction=[]
for sub in range(1,n_sub+1):
    for Cat1i in ['A','O']:
        for Cat2i in ['B','O']:
            for Cat3i in ['C','O']:
                v = para['v_Intercept_mean'][sub-1]+(Cat2i=='B') * para['v_C(Cat2)[T.B]'][sub-1] + (Cat3i=='C')* para['v_C(Cat3)[T.C]'][sub-1]
                a = para['a_Intercept_mean'][sub-1]+(Cat1i=='A') * para['a_C(Cat1)[T.A]'][sub-1] + (Cat3i=='C')* para['a_C(Cat3)[T.C]'][sub-1] + (Cat1i=='A')*(Cat3i=='C') *para['a_C(Cat1)[T.A]:C(Cat3)[T.C]'][sub-1]
                gdata=data[(data.subj_idx==sub) & (data.Cat1==selfl) & (data.Cat2==good)& (data.AD==AD)]
                rt=np.array(gdata.rt)
                Trial_Number=np.array(gdata.Trial_Number)
                resp=np.array(gdata.response)
                k=len(rt)
                temp_resp=[]
                temp_b=[]
                for i in range(k):
                  rti=rt[i]
                  a1=hddm.wfpt.wiener_like(np.array([rti]), v, 0, a, para.z_mean[sub-1], 0, para.t_mean[sub-1], 0, err)
                  b1=hddm.wfpt.wiener_like(np.array([1.0*rti]), v, 0, a, para.z_mean[sub-1], 0, para.t_mean[sub-1], 0, err)
                  b2=hddm.wfpt.wiener_like(np.array([-1.0*rti]), v, 0, a, para.z_mean[sub-1], 0, para.t_mean[sub-1], 0, err)
                  if b1==float("-inf"):
                    b1=float('-inf')

                  if b2==float("-inf"):
                    b2=float('-inf')

                  b1e=np.exp(b1)
                  b2e=np.exp(b2)
                  b=1*(b1e>b2e)
                  SI.append(sub)
                  TrialN=Trial_Number[i]
                  TN.append(TrialN)

                  resp1=(1+np.sign(rti))/2

                  Response.append(resp1)
                  temp_resp.append(resp1)
                  temp_b.append(b)
                  Prediction.append(b)
                  log_like.append(a1)
                  Acc=(resp1==b)
                Acc=np.array(temp_resp)*np.array(temp_b)
                Per_SC1C2AD[str(sub)+selfl+good+str(AD)]=Acc                 



data=Model103L.data
data.rt=(data.response-0.5)*2*data.rt

n_sub=15
err=0.001

post=Model103R.nodes_db

LL_hddm=np.zeros((200*n_sub,4))

para=pd.DataFrame()
para['t_mean']=np.zeros(n_sub)
para['z_mean']=np.zeros(n_sub)
para['v_Intercept_mean']=np.zeros(n_sub)
para['v_C(Cat2)[T.B]']=np.zeros(n_sub)
para['v_C(Cat3)[T.C]']=np.zeros(n_sub)
para['a_Intercept_mean']=np.zeros(n_sub)
para['a_C(Cat1)[T.A]']=np.zeros(n_sub)
para['a_C(Cat3)[T.C]']=np.zeros(n_sub)
para['a_C(Cat1)[T.A]:C(Cat3)[T.C]']=np.zeros(n_sub)


for sub in range(1,n_sub+1):
    para.t_mean[sub-1]=post.loc[ 't_subj.'+str(sub),'mean' ]      
    para.z_mean[sub-1]=post.loc[ 'z_subj.'+str(sub),'mean' ]
    para['v_Intercept_mean'][sub-1]=post.loc[ 'v_Intercept_subj.'+str(sub),'mean' ]                   
    para['a_Intercept_mean'][sub-1]=post.loc[ 'a_Intercept_subj.'+str(sub),'mean' ]
    para['v_C(Cat2)[T.B]'][sub-1]=post.loc['v_C(Cat2)[T.B]','mean' ]   
    para['v_C(Cat3)[T.C]'][sub-1]=post.loc['v_C(Cat3)[T.C]','mean' ]
    para['a_C(Cat1)[T.A]'][sub-1]=post.loc['a_C(Cat1)[T.A]','mean' ]
    para['a_C(Cat3)[T.C]'][sub-1]=post.loc['a_C(Cat3)[T.C]','mean' ]
    para['a_C(Cat1)[T.A]:C(Cat3)[T.C]'][sub-1]=post.loc['a_C(Cat1)[T.A]:C(Cat3)[T.C]','mean' ]


for sub in range(1,n_sub+1):
    for Cat1i in ['A','O']:
        for Cat2i in ['B','O']:
            for Cat3i in ['C','O']:
                v = para['v_Intercept_mean'][sub-1]+(Cat2i=='B') * para['v_C(Cat2)[T.B]'][sub-1] + (Cat3i=='C')* para['v_C(Cat3)[T.C]'][sub-1]
                a = para['a_Intercept_mean'][sub-1]+(Cat1i=='A') * para['a_C(Cat1)[T.A]'][sub-1] + (Cat3i=='C')* para['a_C(Cat3)[T.C]'][sub-1] + (Cat1i=='A')*(Cat3i=='C') *para['a_C(Cat1)[T.A]:C(Cat3)[T.C]'][sub-1]
                gdata=data[(data.subj_idx==sub) & (data.Cat1==selfl) & (data.Cat2==good)& (data.AD==AD)]
                rt=np.array(gdata.rt)
                Trial_Number=np.array(gdata.Trial_Number)
                resp=np.array(gdata.response)
                k=len(rt)
                temp_resp=[]
                temp_b=[]
                for i in range(k):
                  rti=rt[i]
                  a1=hddm.wfpt.wiener_like(np.array([rti]), v, 0, a, para.z_mean[sub-1], 0, para.t_mean[sub-1], 0, err)
                  b1=hddm.wfpt.wiener_like(np.array([1.0*rti]), v, 0, a, para.z_mean[sub-1], 0, para.t_mean[sub-1], 0, err)
                  b2=hddm.wfpt.wiener_like(np.array([-1.0*rti]), v, 0, a, para.z_mean[sub-1], 0, para.t_mean[sub-1], 0, err)
                  if b1==float("-inf"):
                    b1=float('-inf')

                  if b2==float("-inf"):
                    b2=float('-inf')

                  b1e=np.exp(b1)
                  b2e=np.exp(b2)
                  b=1*(b1e>b2e)
                  SI.append(sub)
                  TrialN=Trial_Number[i]
                  TN.append(TrialN)

                  resp1=(1+np.sign(rti))/2

                  Response.append(resp1)
                  temp_resp.append(resp1)
                  temp_b.append(b)
                  Prediction.append(b)
                  log_like.append(a1)

                Acc=np.array(temp_resp)*np.array(temp_b)

                Per_SC1C2AD[str(sub)+selfl+good+str(AD)]=np.mean(Acc)
                
log_like[log_like==float("-inf")]=0
print(log_like)
log_like[log_like==float("-inf")]=0
print(log_like)

Class=np.array(Response)*np.array(Prediction)
print(np.sum(Class)/len(Class))
print(np.exp((1/len(log_like))*np.sum(log_like)))
Dict0={'subj_idx':SI, 'Trial_Number':TN, 'response':Response,'Prediction':Prediction,'log_like':log_like,'Correct_True':Class}
df20=pd.DataFrame.from_dict(Dict0,orient='columns')
df20.to_csv('Test_Summary.csv')

df222=pd.DataFrame.from_dict(Per_SC1C2AD,orient='index',columns=['Accuracy'])
df222.to_csv('Cond_Wise.csv')
Reply all
Reply to author
Forward
0 new messages