from statsmodels.stats.anova import AnovaRM
import pandas as pd
# import dataset and try factorial
df = pd.read_csv('C://Users//L//Desktop//factorial_data.csv')
df
subject time_of_day difficulty RT
0 1 1 1 155
1 1 1 2 215
2 1 2 1 222
3 1 2 2 261
4 2 1 1 167
5 2 1 2 224
6 2 2 1 214
7 2 2 2 265
8 3 1 1 196
9 3 1 2 223
10 3 2 1 219
11 3 2 2 258
12 4 1 1 172
13 4 1 2 216
14 4 2 1 179
15 4 2 2 270
16 5 1 1 195
17 5 1 2 219
18 5 2 1 191
19 5 2 2 272aovrm2 = AnovaRM(df, 'RT', 'subject', within=['difficulty', 'time_of_day'])
fit = aovrm2.fit()
fit.summary()Just bumping this - can anybody help?
def fit(self):
"""estimate the model and compute the Anova table
Returns
-------
AnovaResults instance
"""
y = self.data[self.depvar].values
# Construct OLS endog and exog from string using patsy
within = ['C(%s, Sum)' % i for i in self.within]
subject = 'C(%s, Sum)' % self.subject
factors = within + [subject]
x = patsy.dmatrix('*'.join(factors), data=self.data)
term_slices = x.design_info.term_name_slices
for key in term_slices:
ind = np.array([False]*x.shape[1])
ind[term_slices[key]] = True
term_slices[key] = np.array(ind)
term_exclude = [':'.join(factors)]
ind = _not_slice(term_slices, term_exclude, x.shape[1])
x = x[:, ind]
# Fit OLS
model = OLS(y, x)
results = model.fit()
if model.rank < x.shape[1]:
raise ValueError('Independent variables are collinear.')
for i in term_exclude:
term_slices.pop(i)
for key in term_slices:
term_slices[key] = term_slices[key][ind]
params = results.params
df_resid = results.df_resid
ssr = results.ssr
anova_table = pd.DataFrame(
{'F Value': [], 'Num DF': [], 'Den DF': [], 'Pr > F': []})
for key in term_slices:
print(key)
if self.subject not in key and key != 'Intercept':
# Independent variables are orthogonal
ssr1, df_resid1 = _ssr_reduced_model(
y, x, term_slices, params, [key])
df1 = df_resid1 - df_resid
msm = (ssr1 - ssr) / df1
if (key == ':'.join(factors[:-1]) or
(key + ':' + subject not in term_slices)):
# Interaction effect ### Edit 04/10/2018
mse = ssr / df_resid
df2 = df_resid
type_3_SS_error = ssr ### Edit 04/10/2018
else:
# Simple main effect
ssr1, df_resid1 = _ssr_reduced_model(
y, x, term_slices, params,
[key + ':' + subject])
df2 = df_resid1 - df_resid
mse = (ssr1 - ssr) / df2
type_3_SS_error = ssr1 - ssr ### Edit 04/10/2018
F = msm / mse
p = stats.f.sf(F, df1, df2)
term = key.replace('C(', '').replace(', Sum)', '')
anova_table.loc[term, 'F Value'] = F
anova_table.loc[term, 'Num DF'] = df1
anova_table.loc[term, 'Den DF'] = df2
anova_table.loc[term, 'Pr > F'] = p
# ---------------- Edit 04/10/2018 ----------------- #
anova_table.loc[term, 'T3 SS'] = msm
anova_table.loc[term, 'T3 SS (error)'] = type_3_SS_error
# calculate effect sizes.
ss_total = np.var(y, ddof=1) * (len(y) - 1)
omega_squared = (msm - (df1 * mse)) / (ss_total + mse) # omega squared
eta_squared = msm / ss_total
partial_eta_squared = msm / (msm + type_3_SS_error)
anova_table.loc[term, 'eta^2'] = eta_squared
anova_table.loc[term, 'par.eta^2'] = partial_eta_squared
anova_table.loc[term, 'omega^2'] = omega_squared
# ---------------- Edit 04/10/2018 ---------------- #
#return AnovaResults(anova_table.iloc[:, [1, 2, 0, 3]])
return AnovaResults(anova_table.iloc[:, [4, 5, 1, 2, 0, 3, 6, 7, 8]])Hi, thanks a lot for the hint of where to look. I made a copy of the anova.py file with following edits. They seem to agree with SPSS in my example data, but haven't tested on other data yet:
On Thu, Oct 4, 2018 at 5:38 PM fffrost <lewis....@gmail.com> wrote:Hi, thanks a lot for the hint of where to look. I made a copy of the anova.py file with following edits. They seem to agree with SPSS in my example data, but haven't tested on other data yet:I just found this again while closing open windows on my computer.Can you create a test case for this comparing some examples to SPSS?I think it would be useful to add this to the statsmodels implementation (with small changes)