I learned a bit more about Stata programming (getting familiar with
lots of quotes)
I added a few more options to mat2nparray, I renamed it because it has
different defaults
writes e() into a dict and adds it to bunch
https://github.com/josef-pkt/statsmodels/commit/2a837a2fec26c0af1eb224972949c102fc19a615
My adjustments are for Stata 12, so I don't know if everything works
the same for Stata 11
also I found
matrix params_table = r(table)'
which gets params, bse t/zvalues, ... for the last estimate
example file created for cox regression is below, I added the do file
as an example to tools (which also dumps a bunch of csv files)
I also have a script that extracts the data from a Stata plot into a
csv file, but that doesn't have a clean copyright since I adjusted a
downloaded ado file.
Josef
-------------------
import numpy as np
est = dict(
risk = 31938.09999990463,
N_fail = 75,
N_sub = 103,
df_m = 2,
rank = 2,
converged = 1,
r2_p = .0091231867926981,
chi2 = 5.443169407354162,
ll = -295.5935507329181,
ll_0 = -298.3151354365951,
N = 172,
cmdline = "stcox ",
datasignaturevars = "_t _t0 _d age posttran",
datasignature = "172:5:
3138366353:569466897",
cmd2 = "stcox",
marginsnotok = "CSNell DEViance DFBeta ESR LDisplace LMax
MGale SCAledsch SCHoenfeld SCores",
marginsprop = "addcons",
k_eform = "1",
footnote = "stcox_footnote",
predict = "stcox_p",
estat_cmd = "stcox_estat",
vce = "oim",
chi2type = "LR",
method = "breslow",
ties = "breslow",
t0 = "_t0",
depvar = "_t",
cmd = "cox",
crittype = "log likelihood",
properties = "b V",
)
params_table = np.array([ 1.0323969785881,
.01505912144071,
2.1857970773551,
.02883045469589,
1.0032995597729,
1.0623382727677,
np.nan,
1.9599639845401,
1,
.96759949129124,
.29895753046714,
-.10660326750099,
.91510372626864,
.52808200941855,
1.7729230665857,
np.nan,
1.9599639845401,
1]).reshape(2,9)
params_table_colnames = 'b se z pvalue ll ul df crit eform'.split()
params_table_rownames = 'age posttran'.split()
cov = np.array([ .00021276776001,
-.00082941216974,
-.00082941216974,
.09546138521867]).reshape(2,2)
cov_colnames = 'age posttran'.split()
cov_rownames = 'age posttran'.split()
class Bunch(dict):
def __init__(self, **kw):
dict.__init__(self, kw)
self.__dict__ = self
results = Bunch(
params_table=params_table,
params_table_colnames=params_table_colnames,
params_table_rownames=params_table_rownames,
cov=cov,
cov_colnames=cov_colnames,
cov_rownames=cov_rownames,
**est
)
import numpy as np
est = dict(
risk = 31938.09999990463,
N_fail = 75,
N_sub = 103,
df_m = 2,
rank = 2,
converged = 1,
r2_p = .0091231867926981,
chi2 = 5.443169407354162,
ll = -295.5935507329181,
ll_0 = -298.3151354365951,
N = 172,
cmdline = "stcox ",
datasignaturevars = "_t _t0 _d age posttran",
datasignature = "172:5:
3138366353:569466897",
cmd2 = "stcox",
marginsnotok = "CSNell DEViance DFBeta ESR LDisplace LMax
MGale SCAledsch SCHoenfeld SCores",
marginsprop = "addcons",
k_eform = "1",
footnote = "stcox_footnote",
predict = "stcox_p",
estat_cmd = "stcox_estat",
vce = "oim",
chi2type = "LR",
method = "breslow",
ties = "breslow",
t0 = "_t0",
depvar = "_t",
cmd = "cox",
crittype = "log likelihood",
properties = "b V",
)
params_table = np.array([ 1.0323969785881,
.01505912144071,
2.1857970773551,
.02883045469589,
1.0032995597729,
1.0623382727677,
np.nan,
1.9599639845401,
1,
.96759949129124,
.29895753046714,
-.10660326750099,
.91510372626864,
.52808200941855,
1.7729230665857,
np.nan,
1.9599639845401,
1]).reshape(2,9)
params_table_colnames = 'b se z pvalue ll ul df crit eform'.split()
params_table_rownames = 'age posttran'.split()
cov = np.array([ .00021276776001,
-.00082941216974,
-.00082941216974,
.09546138521867]).reshape(2,2)
cov_colnames = 'age posttran'.split()
cov_rownames = 'age posttran'.split()
class Bunch(dict):
def __init__(self, **kw):
dict.__init__(self, kw)
self.__dict__ = self
results = Bunch(
params_table=params_table,
params_table_colnames=params_table_colnames,
params_table_rownames=params_table_rownames,
cov=cov,
cov_colnames=cov_colnames,
cov_rownames=cov_rownames,
**est
)
-------------------------