mod1 = sm.GLM(df['ca'], df[int_col_s], family=sm.families.Binomial()) # int_col_s a list of the x variables
mod1.fit(disp=False, cov_type='cluster', cov_kwds=dict(groups=np.array(df.Ticker)))
# or
mod1.fit(disp=False, cov_type='HC1', cov_kwds={})
MemoryError Traceback (most recent call last)
<ipython-input-207-98c3158770e6> in <module>()
1 ca_['CA_t'] = (ca_['CA']+1)/2
2 mod1 = sm.GLM(ca_['CA_t'], ca_[int_col_s], family=sm.families.Binomial())
----> 3 mod1.fit(disp=False, cov_type='cluster', cov_kwds=dict(groups=np.array(df.Ticker)))
C:\Users\chamar.stu\AppData\Local\Continuum\Anaconda2\lib\site-packages\statsmodels\genmod\generalized_linear_model.pyc in fit(self, start_params, maxiter, method, tol, scale, cov_type, cov_kwds, use_t, **kwargs)
727 self.scale,
728 cov_type=cov_type, cov_kwds=cov_kwds,
--> 729 use_t=use_t)
730
731 history['iteration'] = iteration + 1
C:\Users\chamar.stu\AppData\Local\Continuum\Anaconda2\lib\site-packages\statsmodels\genmod\generalized_linear_model.pyc in __init__(self, model, params, normalized_cov_params, scale, cov_type, cov_kwds, use_t)
920 cov_kwds = {}
921 get_robustcov_results(self, cov_type=cov_type, use_self=True,
--> 922 use_t=use_t, **cov_kwds)
923
924 @cache_readonly
C:\Users\chamar.stu\AppData\Local\Continuum\Anaconda2\lib\site-packages\statsmodels\base\covtype.py in get_robustcov_results(self, cov_type, use_t, **kwds)
193 self.n_groups = n_groups = len(np.unique(groups))
194 res.cov_params_default = sw.cov_cluster(self, groups,
--> 195 use_correction=use_correction)
196
197 elif groups.ndim == 2:
C:\Users\chamar.stu\AppData\Local\Continuum\Anaconda2\lib\site-packages\statsmodels\stats\sandwich_covariance.pyc in cov_cluster(results, group, use_correction)
528 '''
529 #TODO: currently used version of groupsums requires 2d resid
--> 530 xu, hessian_inv = _get_sandwich_arrays(results)
531
532 if not hasattr(group, 'dtype') or group.dtype != np.dtype('int'):
C:\Users\chamar.stu\AppData\Local\Continuum\Anaconda2\lib\site-packages\statsmodels\stats\sandwich_covariance.pyc in _get_sandwich_arrays(results)
239 elif hasattr(results.model, 'score_obs'):
240 xu = results.model.score_obs(results.params)
--> 241 hessian_inv = np.linalg.inv(results.model.hessian(results.params))
242 else:
243 xu = results.model.wexog * results.wresid[:, None]
C:\Users\chamar.stu\AppData\Local\Continuum\Anaconda2\lib\site-packages\statsmodels\genmod\generalized_linear_model.pyc in hessian(self, params, scale, observed)
418 """
419
--> 420 factor = self.hessian_factor(params, scale=scale, observed=observed)
421 hess = -np.dot(self.exog.T * factor, self.exog)
422 return hess
C:\Users\chamar.stu\AppData\Local\Continuum\Anaconda2\lib\site-packages\statsmodels\genmod\generalized_linear_model.pyc in hessian_factor(self, params, scale, observed)
378 raise RuntimeError('something wrong')
379
--> 380 tmp = self.family.variance(mu) * self.family.link.deriv2(mu)
381 tmp += self.family.variance.deriv(mu) * self.family.link.deriv(mu)
382
C:\Users\chamar.stu\AppData\Local\Continuum\Anaconda2\lib\site-packages\statsmodels\genmod\families\links.pyc in deriv2(self, p)
69 from statsmodels.tools.numdiff import approx_fprime_cs
70 # TODO: workaround proplem with numdiff for 1d
---> 71 return np.diag(approx_fprime_cs(p, self.deriv))
72
73 def inverse_deriv(self, z):
C:\Users\chamar.stu\AppData\Local\Continuum\Anaconda2\lib\site-packages\statsmodels\tools\numdiff.pyc in approx_fprime_cs(x, f, epsilon, args, kwargs)
182 n = len(x)
183 epsilon = _get_epsilon(x, 1, epsilon, n)
--> 184 increments = np.identity(n) * 1j * epsilon
185 #TODO: see if this can be vectorized, but usually dim is small
186 partials = [f(x+ih, *args, **kwargs).imag / epsilon[i]
C:\Users\chamar.stu\AppData\Local\Continuum\Anaconda2\lib\site-packages\numpy\core\numeric.pyc in identity(n, dtype)
2306 """
2307 from numpy import eye
-> 2308 return eye(n, dtype=dtype)
2309
2310 def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
C:\Users\chamar.stu\AppData\Local\Continuum\Anaconda2\lib\site-packages\numpy\lib\twodim_base.pyc in eye(N, M, k, dtype)
231 if M is None:
232 M = N
--> 233 m = zeros((N, M), dtype=dtype)
234 if k >= M:
235 return m
MemoryError: