Warning messages:
1: There were 6417 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help.
2: Examine the pairs() plot to diagnose sampling problems
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
# simulated data ----------------------------------------------------------
I <- 3 # number of groups
J <- 4 # number of replicates per group
set.seed(444)
### simulation of overall means ###
Mu.t1 <- 20 # overall mean at timepoint 1
Mu.t2 <- 5 # overall mean at timepoint 2
Mu <- c(Mu.t1, Mu.t2)
names(Mu) <- c("t1", "t2")
sigmab.t1 <- 8 # between standard deviation at timepoint 1
sigmab.t2 <- 1 # between standard deviation at timepoint 1
rho <- 0.2 # correlation between the two timepoints
Sigma <- rbind(
c(sigmab.t1^2, rho*sigmab.t1*sigmab.t2),
c(rho*sigmab.t1*sigmab.t2, sigmab.t2^2)
)
mu <- mvtnorm::rmvnorm(I, Mu, Sigma) # group means
### simulation within-groups ###
sigmaw.t1 <- 2
sigmaw.t2 <- 0.5
y.t1 <- c(sapply(mu[,"t1"], function(m) rnorm(J, m, sigmaw.t1)))
y.t2 <- c(sapply(mu[,"t2"], function(m) rnorm(J, m, sigmaw.t2)))
### constructs the dataset ####
Timepoint <- rep(c("t1", "t2"), each=I*J)
Group <- paste0("grp", rep(gl(I,J), times=2))
Repeat <- rep(1:J, times=2*I)
dat <- data.frame(
Timepoint=Timepoint,
Group=Group,
Repeat=Repeat,
y=c(y.t1,y.t2)
)
# STAN --------------------------------------------------------------------
dat <- transform(dat, timepoint=as.integer(Timepoint), group=as.integer(Group))
stancode <- 'data {
int<lower=1> N; // number of observations
real y[N]; // observations
int<lower=1> ngroups; // number of groups
int<lower=1> group[N]; // group indices
int<lower=1> timepoint[N]; // timepoint indices
}
parameters {
vector[2] Mu;
vector[2] mu[ngroups]; // group means
cholesky_factor_corr[2] L;
vector<lower=0>[2] sigma_b;
vector<lower=0>[2] sigma_w;
}
model {
sigma_w ~ gamma(1, 0.001);
for(k in 1:N){
y[k] ~ normal(mu[group[k], timepoint[k]], sigma_w[timepoint[k]]);
}
sigma_b ~ gamma(1, 0.01);
L ~ lkj_corr_cholesky(1);
Mu ~ normal(0, 25);
for(j in 1:ngroups){
mu[j] ~ multi_normal_cholesky(Mu, diag_pre_multiply(sigma_b, L));
}
}
generated quantities {
matrix[2,2] Omega;
matrix[2,2] Sigma;
real rho_b;
Omega <- multiply_lower_tri_self_transpose(L);
Sigma <- quad_form_diag(Omega, sigma_b);
rho_b <- Sigma[1,2]/(sigma_b[1]*sigma_b[2]);
}'
### compile Stan model
stanmodel <- stan_model(model_code = stancode, model_name="stanmodel")
### Stan data
standata <- list(y=dat$y, N=nrow(dat), ngroups=nlevels(dat$Group),
timepoint=dat$timepoint, group=dat$group)
### Stan initial values
estimates <- function(dat, perturb=FALSE){
if(perturb) dat$y <- dat$y + rnorm(length(dat$y), 0, 1)
mu <- matrix(aggregate(y~timepoint:group, data=dat, FUN=mean)$y, ncol=2, byrow=TRUE)
Mu <- colMeans(mu)
sigma_b <- sqrt(diag(var(mu)))
L <- t(chol(cor(mu)))
sigmaw1 <- mean(aggregate(y~Group, data=subset(dat, Timepoint=="t1"), FUN=sd)$y)
sigmaw2 <- mean(aggregate(y~Group, data=subset(dat, Timepoint=="t2"), FUN=sd)$y)
return(list(mu=mu, Mu=Mu, L=L, sigma_b=sigma_b, sigma_w = c(sigmaw1, sigmaw2)))
}
inits <- function(chain_id){
values <- estimates(dat, perturb = chain_id > 1)
return(values)
}
### run Stan
samples <- sampling(stanmodel, data = standata, init=inits,
iter = 20000, warmup = 10000, chains = 4,
control=list(adapt_delta=0.8))
### outputs
library(coda)
codasamples <- do.call(mcmc.list,
plyr::alply(rstan::extract(samples, permuted=FALSE,
pars = c("Mu", "sigma_b", "sigma_w", "rho_b")), 2, mcmc))
summary(codasamples)
Hello,In this blog post I show how to fit a certain model by several ways, including nlme::lme in R and JAGS. I wanted to add the STAN way but I get some warnings and I would like to understand.Warning messages:
1: There were 6417 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help.
2: Examine the pairs() plot to diagnose sampling problemsMy code is given below. The model is explained in the blog post. It is true that I get less "divergent transitions" when I increase adapt_delta . What does it mean ?
Even when I increase it to 0.98 I still get "divergent transitions". I did several attempts and I also got a warning suggesting to increase max_treedepth.
And I don't know what does that mean to "Examine the pairs() plot to diagnose sampling problems".
condition is not"n_divergent__", red points will be superimposed onto the smoothed density plots indicating which (if any) iterations encountered a divergent transition. Otherwise, yellow points indicate a transition that hit the maximum treedepth rather than terminated its evolution normally.The estimates of the between variances sigma_b[1,2] and look unstable: they are sensible to the choice of the prior. The estimates of the other parameters look quite good (similar to those obtained with lme or JAGS, and close to the true values used for simulating the data).The data are simulated. I get less "divergent transitions" when I use a larger number of groups (the "sample size" is rather small on my example : only three groups to estimate the between components).Should I use more informative priors on the between variances ? Is it usual to get such warnings with small sample sizes ?I would welcome any comments.
Even when I increase it to 0.98 I still get "divergent transitions". I did several attempts and I also got a warning suggesting to increase max_treedepth.max_treedepth specifies the maximum number of leapfrogs NUTS will take before to hit a U-turn. If it hits this limit then NUTS prematurely terminates and becomes a random walk. Making your sampling less efficient... and potentially (bias??) any posterior estimates.
I haven't really used multi_normal_cholesky's so I can't be sure whether you could use a non-centered parameterisation here. Maybe one of the others might help here.
Not sure this will help much but stan normally does a pretty good job with initialising parameters without you specifying the initials. You might want to see how well stan goes without setting them.
data {
int<lower=1> N; // number of observed points for an experiment. Note that here I consider all trajectories in terms of their positions. The reason is the initial regression analysis shows that the cell position is explained by the covariates.
int<lower=1> D; // D refers to dimension
int<lower=1> K; // number of states
matrix[N, D] trajectory; // cartesian coordinates of positions in time.
vector<lower=0>[D] alpha; // dirichlet distribution parameters
int<lower=1> counter_n; // where to start the markovian process
int<lower=1> b_final_f; // where to end the markovian process
int<lower=1> a_initial_f; // first position of each trajectory
real mu_0; // mean of obsreved increments in x, y, and z directions
real upsigma;
int<lower=1> nu;
}
parameters {
corr_matrix[D] Omega[K-1]; // correlation matrix (for Levy and persistent random walk)
simplex[K] prior_pi; // k is the number of observed models we consider.
simplex[K] P[K]; // transition matrix
vector<lower=0>[D] sigma_error[K]; // to model error of X, Y, Z variables.
vector[D] mu_model[K]; // For now, I consider three states available in the study. Each element of the mu_model is considered a matrix for K hidden states.
}
model {
matrix[K, K] P_transformed; //saving P as a matrix format.
vector[K] prior_pi_alternative[N];
matrix[K, N] models; // coordinates
row_vector[K] a;
matrix[D, D] Sigma_model[K]; // I chose 3 as because of the cartisian coordinate setups. First, persistent. Second, levy, and third, brownian.
for(k in 1:K)
P[k] ~ dirichlet(alpha);
for(m in 1:K){
for(n in 1:K){
P_transformed[m, n] = P[m, n];
}
}
prior_pi ~ dirichlet(alpha);
prior_pi_alternative[a_initial_f] = prior_pi;
for(l in counter_n: b_final_f){
a = prior_pi_alternative[l-1]' * P_transformed;
prior_pi_alternative[l] = a';
}
for(k in 1:K){
sigma_error[k] ~ cauchy(0,5); // half-Cauchy due to constraint
mu_model[k] ~ normal(mu_0, upsigma);
}
for(k in 1:(K-1)){
Omega[k] ~ lkj_corr(2.0); // regularize to unit correlation
}
for(k in 1:(K-1)){
for (d in 1:D){
Sigma_model[k, d, d] = sigma_error[k, d] * sigma_error[k, d] * Omega[k, d, d];
}
for (m in 1:(D-1)) {
for (n in (m+1):D) {
Sigma_model[k, m, n] = sigma_error[k, m] * sigma_error[k, n] * Omega[k, m, n];
Sigma_model[k, n, m] = Sigma_model[k, m, n];
}
}
}
for (m in 1:D)
Sigma_model[3, m,m] = sigma_error[3, m] * sigma_error[3, m];
for (m in 1:(D-1)) {
for (n in (m+1):D) {
Sigma_model[3, m, n] = 0;
Sigma_model[3, n, m] = 0;
}
}
for(n in 1:N) models[1, n] = log(prior_pi_alternative[n, 1]) + multi_student_t_lpdf(trajectory[n] | nu, mu_model[1], Sigma_model[1]);
for(n in 1:N) models[2, n] = log(prior_pi_alternative[n, 2]) + multi_normal_lpdf(trajectory[n] | mu_model[2], Sigma_model[2]);
for(n in 1:N) models[3, n] = log(prior_pi_alternative[n, 3]) + multi_normal_lpdf(trajectory[n] | mu_model[3], Sigma_model[3]);
for (k in 1:K)
increment_log_prob(log_sum_exp(models[k]));
}
I call it with using the following command
dat <- list(N = track.length[i]-1, D=3, trajectory= increment.trajectory, alpha=c(1, 1, 1), K=3, counter_n=2, a_initial_f=1, b_final_f=track.length[i]-1, mu_0= max(mu_0_prior), upsigma=10*max(sigma_0_prior), nu=track.length[i]-2)
fit <- stan(model_code = stanmodelcode, model_name = "example", data = dat, iter = 2012, chains = 3, verbose = TRUE, control = list(adapt_delta = 0.9999, stepsize = 0.001, max_treedepth =15))