Bonjour;
Je suis débutante sur Python et j'essaye de faire la régression logistique avec Statsmodels; j'ai essayé de faire ce code mais j'ai eu une erreur que j'arrive pas à le résoudre voila mon code:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import logit, probit, poisson, ols
from numpy import (genfromtxt)
fname="C:/Users/lenovo/Desktop/table.csv"
my_data = genfromtxt(fname,delimiter=',')
my_data_dict = dict(
x=my_data[:,1],y= my_data[:,6]
)
form='x ~ y'
affair_mod = logit(form, my_data_dict).fit()
Voila l'erreur que j'ai eu:
Thanx
Josef
Josef
Josef
#from scipy import stats
#import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import logit, probit, poisson, ols
fname=r"E:\Josef\work-oth2\tableFusion.csv"
my_data = np.genfromtxt(fname,delimiter=',')
y = my_data[:,1]
x = my_data[:,6]
mask = ~(np.isnan(y) | np.isnan(x))
y = my_data[mask,1] - 1
x = my_data[mask,6]
my_data_dict = dict(
y=y,
x=x
)
form='y ~ C(x)'
affair_model = logit(form, my_data_dict, missing='drop')
affair_result = affair_model.fit()
print affair_result.summary()
# and now with pandas
import pandas as pd
names=['var0', 'y', 'var2', 'var3', 'var4', 'var5', 'x']
pd_data = pd.read_csv(fname, delimiter=',', names=names)
# drop observations with a missing value in any of the variables
# could drop instead only in the ones used
pd_data_clean = pd_data.dropna()
pd_data_clean['y'] -= 1
form='y ~ C(x)'
affair_model = logit(form, pd_data_clean)
affair_result = affair_model.fit()
print affair_result.summary()
-------------------------------
I hope this helps
Josef
JosefJosef
Josef