data {
int<lower=1> N; // Number of observations
int<lower=0> P; // Number of fixed-effects predictors
int<lower=0> J; // Number of groups (Species)
int<lower=0> K; // Number of phylogeny-derived correlation matrix columns; it includes both nodes and tips
real Y[N]; // Response variable
matrix[N,P] X; // Fixed-effect predictor matrix
matrix[N,J] ZI; // Random-effect predictor matrix for across-species unique effects
matrix[N,K] ZII; // Random-effect predictor matrix for heritable between-species effects (i.e. phylogeny-derived)
vector[K] zeroes; // Dummy vector containing zeroes for MVN
corr_matrix[K] S; // Phylogenetic correlation matrix (tips + nodes)
}
parameters {
vector[P] beta; // Vector of fixed-effect estimates
vector[J] gammaSpp; // Among-species random-effect coefficients
real<lower=0> sigmaSpp; // Among-species random-effect standard deviation
vector[K] gammaPhy; // Phylogeny-derived random-effect coefficients
real<lower=0> sigmaPhy; // Standard deviation attributable to heritability among species (i.e. phylogeny-derived)
real<lower=0> sigmaY; // Error scale
}
transformed parameters {
vector[N] mu; // Linear mean predictor
cov_matrix[K] SigmaPhy; // Variance-covariance matrix for the multi-normal distribution
SigmaPhy = (sigmaPhy ^ 2) * S; // Operation depicted in eqn 22 from hadfield_jd_2010_j23_494.pdf; sigmaPhy is the standard deviation, needs to be squared
mu = X * beta + ZI * gammaSpp + ZII * gammaPhy;
}
model {
// Priors
gammaSpp ~ normal(0, sigmaSpp);
sigmaSpp ~ cauchy(0,5);
gammaPhy ~ multi_normal(zeroes, SigmaPhy);
sigmaPhy ~ cauchy(0,5);
beta ~ normal(0,5);
sigmaY ~ cauchy(0,5);
// Likelihood
Y ~ normal(mu, sigmaY);
}
data{
...
matrix[N,J] ZI; // Random-effect predictor matrix for across-species unique effects
matrix[N,K] ZII; // Random-effect predictor matrix for heritable between-species effects }
}
parameters {
...
}
transformed parameters {
...
mu = X * beta + ZI * gammaSpp + ZII * gammaPhy;
}
data{
...
int<lower = 1, upper = J> ZI[N]; // Random-effect index of group membership for across-species unique effects
int<lower = 1, upper = K> ZII[N]; // Random-effect index of group membership for heritable between-species effects }
}
parameters {
...
}
transformed parameters {
...
mu = X * beta + gammaSpp[ZI] + gammaPhy[ZII];
}
vignette("brms_phylogenetics")data {
int<lower=1> N; // Number of observations
int<lower=0> P; // Number of fixed-effects predictors
int<lower=0> J; // Number of groups (Species)
int<lower=0> K; // Number of phylogeny-derived correlation matrix columns; it includes tips only
real Y[N]; // Response variable
matrix[N,P] X; // Fixed-effect predictor matrix
int<lower=1,upper=J> ZI[N]; // Random-effect index of group membership for across-species unique effects
int<lower=1,upper=K> ZII[N]; // Random-effect index of group membership for heritable between-species effects (i.e. phylogeny-derived)
corr_matrix[K] S; // Phylogenetic correlation matrix (tips + nodes)
}
transformed data {
matrix[K,K] LPhy;
LPhy = cholesky_decompose(S);
}
parameters {
vector[P] beta; // Vector of fixed-effect estimates
vector[J] gammaSpp; // Among-species random-effect coefficients
real<lower=0> sigmaSpp; // Among-species random-effect standard deviation
vector[K] alphaPhy; // Phylogeny-derived random-effect coefficients (re-parametrised for MVN)
vector<lower=0>[K] sigmaPhy; // Standard deviation attributable to heritability among species (i.e. phylogeny-derived)
real<lower=0> sigmaY; // Error scale
}
transformed parameters {
vector[N] mu; // Linear mean predictor
vector[K] gammaPhy; // Phylogeny-derived random-effect coefficients
gammaPhy = sigmaPhy .* (LPhy * alphaPhy);
mu = X * beta + gammaSpp[ZI] + gammaPhy[ZII];
}
model {
// Priors
gammaSpp ~ normal(0, sigmaSpp);
sigmaSpp ~ cauchy(0, 5);
alphaPhy ~ normal(0, 5); // implies: gammaPhy ~ multi_normal_cholesky(zeroes, diag_matrix(sigmaPhy) * LPhy * LPhy' * diag_matrix(sigmaPhy)))
sigmaPhy ~ cauchy(0, 5);
beta ~ normal(0, 5);
sigmaY ~ cauchy(0, 5);
// Likelihood
Y ~ normal(mu, sigmaY);
}
transformed data {
matrix[K,K] LPhy;
LPhy = transpose(cholesky_decompose(inverse(S)));
}
--
You received this message because you are subscribed to the Google Groups "Stan users mailing list" group.
To unsubscribe from this group and stop receiving emails from it, send an email to stan-users+...@googlegroups.com.
To post to this group, send email to stan-...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
On Jan 25, 2017, at 8:52 PM, Paul Buerkner <paul.b...@gmail.com> wrote:
Andrew,
while working on the first paper for brms a year ago, I noticed that MCMCglmm might well be faster than Stan in terms of effective sample size. Not necessarily for phylogenetic models (I didn't test it for these models), but for multilevel models with only one or two varying intercepts this might happen. The models are not fully equivalent since MCMCglmm doesn't have the LKJ prior. For more complex models, however (e.g., when modelling varying slopes or multiple varying intercepts across different hierachical levels), the performance of MCMCglmm drops pretty fast to be sometimes more or less unusable unless one sets very strong priors. Stan, on the other hand, is very robust when scaling up model complexity, that's why I love it.
library(MCMCglmm)
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
load('./dataAndTree.RData')
# fixed-effect model matrix
X <- model.matrix(~ 1 + Pred1 + Pred2 + Pred3 + Pred4, data = data) # 5 in total, includes intercept (1)
P <- as.integer(ncol(X)) # 5
# among-species random-effect vector
ZI <- as.integer(as.factor(data$species)) # 284 species in total
J <- as.integer(length(unique(ZI))) # 284
# cholesky factor of phylogenetic vcv matrix
A <- ape::vcv(tree, corr = FALSE) # 284 x 284 covariance matrix
LPhy <- t(chol(A[sort(rownames(A)),sort(rownames(A))]))# phylogeny-derived relatedness random-effect vector
ZII <- as.integer(data$animal) # also 284
# number of tips (i.e. species) only
K <- as.integer(length(unique(ZII))) # also 284
stanData <- list(N = as.integer(nrow(data)),
P = P,
J = J,
K = K,
MSpp = 1L,
MPhy = 1L,
Y = data$Y,
X = X,
ZI = ZI,
ZII = ZII,
LPhy = LPhy)
nIter <- 1e4
nWarm <- 1e3
nThin <- 5
nChains <- 1
inits <- list(beta=c(0.1, -0.15, -0.8, -0.2, -0.2), sigmaY=1)
timingRawStan <- system.time({
stanFit <- stan(file = 'phyloLMM.stan', data = stanData, iter = nIter, thin = nThin, chains = nChains, warmup = nWarm, init = list(inits))
})
# compare with original MCMCglmm model
set.seed(10)
priorInverseWishart <- list(G = list(G1 = list(V = diag(1), n = 0.002), G2 = list(V = diag(1), n = 0.002)), R = list(V = diag(1), n = 0.002))
timingRawMCMCglmm <- system.time({
MCMCglmmFit <- MCMCglmm(Y ~ Pred1 + Pred2 + Pred3 + Pred4, random = ~animal + species, pedigree = tree, data = data, family = 'gaussian', nitt = nIter, thin = nThin, burnin = nWarm, prior = priorInverseWishart, pr = TRUE, verbose = TRUE)
})
> timingRawStan
user system elapsed
420.654 2.550 423.960
> timingRawMCMCglmm
user system elapsed
5.093 0.031 5.145
# from stan
> res <- summary(stanFit)[[1]]
> res[c('sigmaPhy[1]','sigmaSpp[1]','sigmaY'), 'mean'] ^ 2
sigmaPhy[1] sigmaSpp[1] sigmaY
0.47419640 0.05777246 0.12036096
# from MCMCglmm
> colMeans(MCMCglmmFit$VCV)
animal otlSpecies units
0.45447357 0.05864063 0.12024812
data {
int<lower=1> N; // Number of observations
int<lower=0> P; // Number of fixed-effects predictors
int<lower=0> J; // Number of groups (Species)
int<lower=0> K; // Number of phylogeny-derived correlation matrix columns; it includes tips only
int<lower=0> MSpp; // 1 (integer for random-effect row vector definition)
int<lower=0> MPhy; // 1 (integer for random-effect row vector definition)
real Y[N]; // Response variable
matrix[N,P] X; // Fixed-effect predictor matrix
int<lower=1,upper=J> ZI[N]; // Random-effect index of group membership for across-species unique effects
int<lower=1,upper=K> ZII[N]; // Random-effect index of group membership for heritable between-species effects (i.e. phylogeny-derived)
matrix[K,K] LPhy; // Cholesky factor of phylogenetic covariance matrix
}
parameters {
vector[P] beta; // Vector of fixed-effect estimates
vector[J] alphaSpp[MSpp]; // Unscaled among-species random-effect coefficients
vector<lower=0>[MSpp] sigmaSpp; // Among-species random-effect standard deviation
vector[K] alphaPhy[MPhy]; // Unscaled phylogeny-derived random-effect coefficients
vector<lower=0>[MPhy] sigmaPhy; // Standard deviation attributable to heritability among species (i.e. phylogeny-derived)
real<lower=0> sigmaY; // Residual sd
}
transformed parameters {
vector[N] mu; // Linear mean predictor
vector[J] gammaSpp; // Among-species random-effect coefficients
vector[K] gammaPhy; // Phylogeny-derived random-effect coefficients
gammaSpp = sigmaSpp[1] * (alphaSpp[1]);
gammaPhy = sigmaPhy[1] * (LPhy * alphaPhy[1]);
mu = X * beta + gammaSpp[ZI] + gammaPhy[ZII];
}
model {
// Priors
alphaSpp[1] ~ normal(0, 1);
sigmaSpp ~ cauchy(0, 2.5);
alphaPhy[1] ~ normal(0, 1);
sigmaPhy ~ cauchy(0, 2.5);
beta ~ normal(0, 1);
sigmaY ~ cauchy(0, 2.5);
// Likelihood
Y ~ normal(mu, sigmaY);
}
MCMCglmmFit <- MCMCglmm(Y ~ Pred1 + Pred2 + Pred3 + Pred4, random = ~animal + otlSpecies, pedigree = tree, data = data, family = 'gaussian', nitt = nIter, thin = nThin, burnin = nWarm, prior = priorInverseWishart, pr = TRUE, verbose = TRUE)
})
library(MCMCglmm)
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
rm(list=ls())
load('./dataAndTree.RData')
# fixed-effect model matrix
X <- model.matrix(~ 1 + Pred2 + Pred3 + Pred4 + Pred5, data = data) # 874 obs
P <- as.integer(ncol(X)) # 5 predictors
# among-species random-effect vector
ZI <- as.integer(as.factor(data$species)) # 1:284
J <- as.integer(length(unique(ZI))) # number of species, 284
# phylogenetic vcv matrix
A <- ape::vcv(tree, corr = FALSE)
LPhy <- t(chol(A[sort(rownames(A)), sort(rownames(A))])) # Cholesky factor
# phylogeny-derived relatedness random-effect vector
ZII <- as.integer(data$animal) # 1:284
# number of tips (i.e. species)
K <- as.integer(length(unique(ZII))) # number of tips, 284
stanData <- list(N = as.integer(nrow(data)),
P = P,
J = J,
K = K,
Y = data$Y,
X = X,
ZI = ZI,
ZII = ZII,
LPhy = LPhy)
nIter <- 5e4
nWarm <- nIter / 2
nThin <- round(nWarm / 6e3) # keep 6e3 samples
nChains <- 3
inits1 <- list(beta = c(0.1, -0.15, -0.8, -0.2, -0.2), sigmaY = 1)
inits2 <- list(beta = c(0.2, -0.3, -0.5, -0.1, -0.5), sigmaY = 1)
inits3 <- list(beta = c(0, 0.15, -1, -0.5, -0.3), sigmaY = 1)
timingRawStan <- system.time({
stanFit <- stan(file = 'phyloLMM.stan', data = stanData, iter = nIter, thin = nThin, chains = nChains, warmup = nWarm, init = list(inits1, inits2, inits3))
})
res <- summary(stanFit)[[1]]
res[which(rownames(res) %in% c('sigmaPhy','sigmaSpp','sigmaY')), 1] ^ 2
nEff <- res[, 'n_eff']summary(nEff)
# compare with original MCMCglmm modellibrary(parallel)set.seed(10)priorInverseWishart <- list(G = list(G1 = list(V = diag(1), n = 0.002), G2 = list(V = diag(1), n = 0.002)), R = list(V = diag(1), n = 0.002))timingRawMCMCglmm <- system.time({ MCMCglmmFit <- mclapply(1:3, function(i) { MCMCglmm(Y ~ Pred2 + Pred3 + Pred4 + Pred5, random = ~animal + species, pedigree = tree, data = data, family = 'gaussian', nitt = nIter, thin = nThin, burnin = nWarm, prior = priorInverseWishart, pr = TRUE, verbose = TRUE) }, mc.cores = 3)})
summary(as.numeric(sapply(MCMCglmmFit, function(m)summary(m)$solutions[,'eff.samp'])))
data {
int<lower=1> N; // Number of observations
int<lower=0> P; // Number of fixed-effects predictors
int<lower=0> J; // Number of groups (Species)
int<lower=0> K; // Number of phylogeny-derived correlation matrix columns; it includes tips only
real Y[N]; // Response variable
matrix[N,P] X; // Fixed-effect predictor matrix
int<lower=1,upper=J> ZI[N]; // Random-effect index of group membership for across-species unique effects
int<lower=1,upper=K> ZII[N]; // Random-effect index of group membership for heritable between-species effects (i.e. phylogeny-derived)
matrix[K,K] LPhy; // Cholesky factor of phylogenetic covariance matrix
}
parameters {
vector[P] beta; // Vector of fixed-effect estimates
vector[J] alphaSpp; // Unscaled among-species random-effect coefficients
vector[K] alphaPhy; // Unscaled phylogeny-derived random-effect coefficients
real<lower=0> sigmaSpp; // Among-species random-effect standard deviation
real<lower=0> sigmaPhy; // Standard deviation attributable to heritability among species (i.e. phylogeny-derived)
real<lower=0> sigmaY; // Model residual standard deviation
}
transformed parameters {
vector[N] mu; // Linear mean predictor
vector[J] gammaSpp; // Among-species random-effect coefficients
vector[K] gammaPhy; // Phylogeny-derived random-effect coefficients
gammaPhy = sigmaPhy * (LPhy * alphaPhy);
gammaSpp = sigmaSpp * (alphaSpp);
mu = X * beta + gammaSpp[ZI] + gammaPhy[ZII];
}
model {
// Priors
alphaSpp ~ normal(0, 1);
sigmaSpp ~ cauchy(0, 0.5);
alphaPhy ~ normal(0, 1);
sigmaPhy ~ cauchy(0.5, 1);
beta ~ normal(0, 1);
sigmaY ~ cauchy(0.1, 0.5);
// Likelihood
Y ~ normal(mu, sigmaY);
}> res <- summary(stanFit)[[1]]
> res[which(rownames(res) %in% c('sigmaPhy','sigmaSpp','sigmaY')), 1] ^ 2
sigmaSpp sigmaPhy sigmaY
0.05758587 0.47334377 0.12012333> timingRawStan
user system elapsed
4928.952 33.740 2507.232> nEff <- res[, 'n_eff']
> summary(nEff)
Min. 1st Qu. Median Mean 3rd Qu. Max.
12680 17200 17670 17660 18070 18750Chain 1, Iteration: 50000 / 50000 [100%] (Sampling)
Elapsed Time: 1062.82 seconds (Warm-up)
1432.62 seconds (Sampling)
2495.44 seconds (Total)
Chain 2, Iteration: 50000 / 50000 [100%] (Sampling)
Elapsed Time: 1022.18 seconds (Warm-up)
1443.64 seconds (Sampling)
2465.83 seconds (Total)
Chain 3, Iteration: 50000 / 50000 [100%] (Sampling)
Elapsed Time: 1053.64 seconds (Warm-up)
1434.36 seconds (Sampling)
2488 seconds (Total)
Inference for Stan model: phyloLMM.
3 chains, each with iter=50000; warmup=25000; thin=4;
post-warmup draws per chain=6250, total post-warmup draws=18750.> timingRawMCMCglmm
user system elapsed
2508.775 17.754 29.163> summary(as.numeric(sapply(MCMCglmmFit, function(m)summary(m)$solutions[,'eff.samp'])))
Min. 1st Qu. Median Mean 3rd Qu. Max.
3555 6092 6250 5965 6250 6666data {
int<lower=1> N; // Number of observations
int<lower=0> P; // Number of fixed-effects predictors
int<lower=0> J; // Number of groups (Species)
int<lower=0> K; // Number of phylogeny-derived correlation matrix columns; it includes tips only
real Y[N]; // Response variable
matrix[N,P] X; // Fixed-effect predictor matrix
int<lower=1,upper=J> ZI[N]; // Random-effect index of group membership for across-species unique effects
int<lower=1,upper=K> ZII[N]; // Random-effect index of group membership for heritable between-species effects (i.e. phylogeny-derived)
cov_matrix[K] A; // Phylogenetic vcv matrix (tips only)
}
transformed data {
matrix[K,K] t_LAinv;
t_LAinv = transpose(cholesky_decompose(inverse(A)));
}
parameters {
vector[P] beta; // Vector of fixed-effect estimates
vector[J] alphaSpp; // Unscaled among-species random-effect coefficients
vector[K] gammaPhy; // Phylogeny-derived random-effect coefficients
real<lower=0> SigmaSpp; // Among-species random-effect standard deviation
real<lower=0> SigmaPhy; // Variance attributable to heritability among species (i.e. phylogeny-derived)
real<lower=0> SigmaY; // Error scale
}
transformed parameters {
vector[N] mu; // Linear mean predictor
vector[J] gammaSpp; // Among-species random-effect coefficients
gammaSpp = sqrt(SigmaSpp) * (alphaSpp);
mu = X * beta + gammaSpp[ZI] + gammaPhy[ZII];
}
model {
vector[K] v;
real sumOfSquares;
v = t_LAinv * gammaPhy;
sumOfSquares = dot_product(v,v);
target += - 0.5 * N * log(SigmaPhy); // non-constant part of log(det(SigmaPhy * A)^-0.5)
target += - sumOfSquares / (2 * SigmaPhy); // log of kernel of multivariate normal
// Priors
alphaSpp ~ normal(0, 1);
SigmaSpp ~ scaled_inv_chi_square(2, 0.25);
SigmaPhy ~ scaled_inv_chi_square(2, 0.25);
beta ~ normal(0, 1);
SigmaY ~ scaled_inv_chi_square(2, 0.25);
// Likelihood
Y ~ normal(mu, sqrt(SigmaY));
}> timingRawStan
user system elapsed
11057.179 54.732 4017.181> res <- summary(stanFit)[[1]]
> res[which(rownames(res) %in% c('SigmaPhy','SigmaSpp','SigmaY')), 1]
SigmaSpp SigmaPhy SigmaY
3.416017e-02 1.000567e-13 2.957514e-01Inference for Stan model: workingExample-MCMCglmm-MatrixPhylo3.
3 chains, each with iter=50000; warmup=25000; thin=4;
post-warmup draws per chain=6250, total post-warmup draws=18750.
Now this is the effective sample size for 3 parallel chains in MCMCglmm using the same specs:> timingRawMCMCglmm
user system elapsed
2508.775 17.754 29.163
And the effective sample size is (not sure if that's how it should be calculated though):
> summary(as.numeric(sapply(MCMCglmmFit, function(m)summary(m)$solutions[,'eff.samp'])))
Min. 1st Qu. Median Mean 3rd Qu. Max.
3555 6092 6250 5965 6250 6666
So is this comparison correct and fair?
Stan was 4928.952/2508.775 = 1.964685 times slower than MCMCglmm, but at the same time had 17670/6250 = 2.8272 as many efficient samples if compared to MCMCglmm?Peter's alternative coding suggestion:
Finally, I tried to modify my stan code even further following Peter's suggestion. Unfortunately however, I do not get similar estimates for the variance components, and the code runs much slower. So, two questions about this one: (1) what did I code wrong? (2) Is it much slower because I coded it incorrectly, or is it because my working code (which is improved based on brms auto-generated stan code) above is faster already?
Here are the estimates for the standard deviation components: