#I am fitting the SARIMAX model without exogenous variables. My data looks like this and do not contain any missing values:
y.head(5)
DateId
2017-09-08 139.0
2017-09-09 232.0
2017-09-10 10129.0
2017-09-11 61154.0
2017-09-12 22093.0
Name: NumberOfViewers, dtype: float32
#checking if any of the values are null
y.isnull().any().any()
False
#Further after some exploration, I consider to use SARIMAX with freq=7 and perform grid search to find the the best order parameters
import warnings
import itertools
import statsmodels.api as sm
# Define the p, d and q parameters to take any value between 0 and 5
p = d = q = range(0, 5)
# Generate all different combinations of p, q and q triplets
pdq = list(itertools.product(p, d, q))
# Generate all different combinations of seasonal p, q and q triplets
seasonal_pdq = [(x[0], x[1], x[2], 7) for x in list(itertools.product(p, d, q))]
warnings.filterwarnings("ignore") # specify to ignore warning messages
param_list = []
param_seasonal_list = []
results_aic_list = []
for param in pdq:
for param_seasonal in seasonal_pdq:
try:
mod = sm.tsa.statespace.SARIMAX(y[: round(0.7 * len(index_))],
order=param,
seasonal_order=param_seasonal,
enforce_stationarity=False,
enforce_invertibility=False)
#simple_differencing=True)
results = mod.fit()
param_list.append(param)
param_seasonal_list.append(param_seasonal)
results_aic_list.append(results.aic)
print('ARIMA{}x{} - AIC:{}'.format(param, param_seasonal, results.aic))
except:
continue
# then for the minimal AIC i pick up the model parameters, estimate the model
order_best = param_list[results_aic_list.index(min(results_aic_list))]
seasonal_best = param_seasonal_list[results_aic_list.index(min(results_aic_list))]
mod = sm.tsa.statespace.SARIMAX(y[: round(0.7 * len(index_))],
order=order_best,
seasonal_order=seasonal_best,
enforce_stationarity=False,
enforce_invertibility=False)
#simple_differencing=True)
fit_results = mod.fit(disp=False)
fit_results.params
#and here the problem starts. I can reach the fit_results parameters, bic, aic coefficients, however I cannot print summary or explore the residuals with the f#it_results.plot_diagnostics(). For the summary() i get the following message "ValueError: On entry to DLASCL parameter number 5 had an illegal value".
# I tried to play around with different methods in fit() as it was offered to try in here but that did not help. Where is my possible mistake could be or is it a bug?
#BTW, the range for the grid search i specify in (0, 5), but the actual results i get only for (0, 4)