Dear INLA-group,
I try to fit treatment-data for teeth to a survival model. When using the package "flexsurv" with a Gompertz distribution the fit followes nicely the original data. However, when I repeat the fit with INLA, the fit looks different. Right now I cannot decide whether I use INLA in a wrong way or the INLA-fit is wrong because of the "experimental" status of the Gompertz-distribution. The survival-analysis with INLA follows the instructions from the book "Bayesian inference with INLA" from
Virgilio Gómez-Rubio.
Here is a reproducible example:
library(survival)
library(flexsurv)
library(INLA)
library(parallel)
library(ggplot2)
# create random data
# parameters from the initial flexsurf fit on original data
# shape rate
# -0.2290890 -0.7998633
random_data<-data.frame(time = rgompertz(n = 1e3,
shape = -0.2290890,
rate = exp(-0.7998633)),
cens = 1)
# In my original data set, I have data for eight years. To adjust for this
# time-span all times longer than eight years will be censored
random_data[random_data$time > 8, 'time']<-8
random_data[random_data$time == 8, 'cens']<-0
# Kaplan-Meier plot of the random data
km<-survfit(Surv(time,cens)~1,
data=random_data)
plot(km)
# fit with flexsurv
flexsurv_model<-flexsurvreg(Surv(time,cens) ~ 1,
data = random_data,
dist = 'gompertz')
plot(flexsurv_model)
coef(flexsurv_model)
# shape rate
# -0.2391953 -0.7735919
# visual fit and recovered parameters are very close to the original data
# fit with INLA
sinla <- inla.surv(random_data$time, random_data$cens)
inla_model <- inla(sinla ~ 1,
data = random_data,
family = "gompertzsurv")
summary(inla_model)
# the recovered parameters look different from the original ones
# plot fit
# Grid of times
time <- seq(0.01, 8, by = 0.1)
# Marginal of alpha
alpha.marg <- inla_model$marginals.fixed[["(Intercept)"]]
# Set number of cores to use
options(mc.cores = 4)
# Compute post. marg. of survival function
surv.inla <- mclapply(times, function(t){
surv.marg <- inla.tmarginal(function(x) {exp(- exp(x) * t)}, alpha.marg)
surv.stats <- inla.zmarginal(surv.marg, silent = TRUE)
return(unlist(surv.stats[c("quant0.025", "mean", "quant0.975")]))
})
surv.inla <- as.data.frame(do.call(rbind, surv.inla))
surv.inla$times<-times
km_data<-data.frame(time=km$time,
surv=km$surv,
lb=km$lower,
ub=km$upper)
ggplot(km_data,aes(x=time))+
geom_line(aes(y=surv))+
geom_ribbon(aes(ymin=lb,ymax=ub),alpha=0.3)+
geom_ribbon(data=surv.inla, aes(ymin=quant0.025,ymax=quant0.975),alpha=0.3)+
geom_line(data=surv.inla, aes(y=mean))
# the plot also looks different from the original data
# crosscheck with new data according to the recovered parameters
random_data2<-data.frame(time = rgompertz(n = 1e3,
shape = 0.004,
rate = exp(-1.355)),
cens = 1)
# In my original data set, I have data for eight years. To adjust for this
# time-span all times longer than eight years will be censored
random_data2[random_data2$time > 8, 'time']<-8
random_data2[random_data2$time == 8, 'cens']<-0
# Kaplan-Meier plot of the random data
km2<-survfit(Surv(time,cens)~1,
data=random_data2)
plot(km2)
km2_data<-data.frame(time=km2$time,
surv=km2$surv,
lb=km2$lower,
ub=km2$upper)
# plot the inla-fit togegher with the new random data
ggplot(km2_data,aes(x=time))+
geom_line(aes(y=surv))+
geom_ribbon(aes(ymin=lb,ymax=ub),alpha=0.3)+
geom_ribbon(data=surv.inla, aes(ymin=quant0.025,ymax=quant0.975),alpha=0.3)+
geom_line(data=surv.inla, aes(y=mean))
# there is a nice fit of the inla-fit with the new random-data