I'm trying to run a GLM with a tweedie family, and keep getting the following error. I'm not seeing what exactly to do in order to clear this up.
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\genmod\families\family.py:1427: RuntimeWarning: invalid value encountered in sqrt
endog * mu ** (1-p) / (1 - p) + mu ** (2 - p) / (2 - p))
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-282-bb94ac0700a8> in <module>
----> 1 pricing_model_comp_1_results = pricing_model_comp_1.fit()
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in fit(self, start_params, maxiter, method, tol, scale, cov_type, cov_kwds, use_t, full_output, disp, max_start_irls, **kwargs)
1010 return self._fit_irls(start_params=start_params, maxiter=maxiter,
1011 tol=tol, scale=scale, cov_type=cov_type,
-> 1012 cov_kwds=cov_kwds, use_t=use_t, **kwargs)
1013 else:
1014 self._optim_hessian = kwargs.get('optim_hessian')
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in _fit_irls(self, start_params, maxiter, tol, scale, cov_type, cov_kwds, use_t, **kwargs)
1107 self.freq_weights, self.scale)
1108 if np.isnan(dev):
-> 1109 raise ValueError("The first guess on the deviance function "
1110 "returned a nan. This could be a boundary "
1111 " problem and should be reported.")
ValueError: The first guess on the deviance function returned a nan. This could be a boundary problem and should be reported.
Code below:
#modeling
import statsmodels.api as sm
import statsmodels.formula.api as smf
import patsy
formula = 'target~ eff_year + C(STATE) + rba_model + driver_age_model + marital_status_model_S + \
marital_status_model_not_available + vehicle_age_model + length_ft_model + yrs_owned_model + \
cm_ded_model + majorvio + minorvio + atfault + DTMND_VEH_TYPE_CD_AH + DTMND_VEH_TYPE_CD_AN + \
DTMND_VEH_TYPE_CD_AU + DTMND_VEH_TYPE_CD_FW + DTMND_VEH_TYPE_CD_PC + DTMND_VEH_TYPE_CD_ST + \
DTMND_VEH_TYPE_CD_SU + DTMND_VEH_TYPE_CD_TC + DTMND_VEH_TYPE_CD_TH + DTMND_VEH_TYPE_CD_UT'
#turn formula into a matrix of data for the model
y, x = patsy.dmatrices(formula, data, return_type = 'dataframe')
weight = data['cmeu']
model_1 = sm.GLM(y,x, family = sm.families.Tweedie(link = sm.families.links.log, var_power = 1.5)
, weights = weight)
model_1_results = model_1.fit()