### 8.4.1. Fitting the model
# Write model
sink("linreg.txt")
cat("
model {
# Priors
alpha ~ dnorm(0,0.001)
beta ~ dnorm(0,0.001)
sigma ~ dunif(0, 100)
# Likelihood
for (i in 1:n) {
y[i] ~ dnorm(mu[i], tau)
mu[i] <- alpha + beta*x[i]
}
# Derived quantities
tau <- 1/ (sigma * sigma)
p.decline <- 1-step(beta) # Probability of decline
# Assess model fit using a sums-of-squares-type discrepancy
for (i in 1:n) {
residual[i] <- y[i]-mu[i] # Residuals for observed data
predicted[i] <- mu[i] # Predicted values
sq[i] <- pow(residual[i], 2) # Squared residuals for observed data
# Generate replicate data and compute fit stats for them
y.new[i] ~ dnorm(mu[i], tau) # one new data set at each MCMC iteration
sq.new[i] <- pow(y.new[i]-predicted[i], 2) # Squared residuals for new data
}
fit <- sum(sq[]) # Sum of squared residuals for actual data set
fit.new <- sum(sq.new[]) # Sum of squared residuals for new data set
test <- step(fit.new - fit) # Test whether new data set more extreme
bpvalue <- mean(test) # Bayesian p-value
}
",fill=TRUE)
sink()
### 8.4.2. Goodness-of-Fit assessment in Bayesian analyses
plot(out$mean$predicted, out$mean$residual, main = "Residuals vs. predicted values",
las = 1, xlab = "Predicted values", ylab = "Residuals")
abline(h = 0)
lim <- c(0, 3200)
plot(out$sims.list$fit, out$sims.list$fit.new, main = "Graphical posterior predictive check",
las = 1, xlab = "SSQ for actual data set", ylab = "SSQ for ideal (new) data sets", xlim = lim, ylim = lim)
abline(0, 1)
mean(out$sims.list$fit.new > out$sims.list$fit) # Bayesian p-value
### 8.4.3. Forming predictions
predictions <- array(dim = c(length(x), length(out$sims.list$alpha)))
for(i in 1:length(x)){
predictions[i,] <- out$sims.list$alpha + out$sims.list$beta*i
}
LPB <- apply(predictions, 1, quantile, probs = 0.025) # Lower bound
UPB <- apply(predictions, 1, quantile, probs = 0.975) # Upper bound
# for 64 bits RStudio
Sys.setenv(R_ARCH = '/x86_64')
library(inline)
library(Rcpp)
library(rstan)
file.path(R.home(component = 'etc'), .Platform$r_arch, 'Makeconf')
set_cppo(mode = "fast")
m8_4_stan <- '
data{
int n;
real y[n];
real x[n];
}
parameters{
real alpha;
real beta;
real<lower=0, upper=100> sigma;
}
transformed parameters{
real mu[n];
for(i in 1:n){
mu[i] <- alpha + beta*x[i];
}
}
model{
//Priors
alpha ~ normal(0, 100);
beta ~normal(0, 100);
sigma ~ uniform(0, 100);
//Likelihood
for(i in 1:n){
y[i] ~ normal(mu[i], sigma);
}
}
generated quantities{
real tau;
real residual[n];
real predicted[n];
real sq[n];
//real y_new[n];
//real sq_new[n];
// Assess model fit using a sums of squares type discrepancy
for(i in 1:n){
residual[i] <- y[i] - mu[i];
predicted[i] <- mu[i];
sq[i] <- pow(residual[i], 2); // Squared residuals for observed data
// Generate replicate data and compute fit stats for them - how to do in Stan?
//y_new[i] ~ normal(mu[i], sigma); // One new data set at each MCMC iteration
//sq_new[i] <- pow(y_new[i] - predicted[i], 2); // Squared residuals for new data
}
tau <- 1/(sigma*sigma);
// compute fit statistics back in R
}
'
m8_4_data <- list(n = n, x = x, y = y)
system.time(m8_4_stanfit <- stan(model_code = m8_4_stan,
data = m8_4_data,
iter = 10000,
chains = 3,
thin = 1,
warmup = 5000)) # 30.7, 2.6, 36.3
print(m8_4_stanfit, dig = 3)
# Extract estimates
mu.est <- mean(extract(m8_4_stanfit, pars = 'mu', inc_warmup = FALSE))
sigma.est <- mean(extract(m8_4_stanfit, pars = 'sigma', inc_warmup = FALSE))
residual.est <- (extract(m8_4_stanfit, pars = 'residual', inc_warmup = FALSE))
mu_est
for(i in 1:n){
mu_est[i] <- mean(extract(m8_4_stanfit, pars = 'mu[i]', inc_warmup = FALSE)) # doesn't work - can't index mu
}
mu.1 <- (extract(m8_4_stanfit, pars = 'mu[1]', inc_warmup = FALSE))
# Generate new dataset from MCMC estimates
y.new <- rnorm(n, mu.1, sigma.est) # not sure how to do this for all values of mu (should be n = 16)
sq.new <- pow(Resid, 2)
I am currently going through Marc Kery's book, "Introduction to WinBUGS for Ecologists" and trying to translate all of the WinBUGS code into stan (using rstan).
I am wondering the best way to calculate these same parameters and how to derive the new data using rstan? I have the following rstan code that runs the model but I am not sure how to best generate the new data and extract the parameter estimates from the stan model fit to calculate the fit metrics. I'm not sure how much would be easier to do within stan (generated quantities) or back in R (I am functionally literate in R and illiterate in C++). Any suggestions would be appreciated. I hope this question is not too vague and that I didn't dump too much code in this post.
generated quantities{
real tau;
real residual[n];
real predicted[n];
real sq[n];
real y_new[n];
real sq_new[n];
// Assess model fit using a sums of squares type discrepancy
for(i in 1:n){
residual[i] <- y[i] - mu[i];
predicted[i] <- mu[i];
sq[i] <- pow(residual[i], 2); // Squared residuals for observed data
// Generate replicate data and compute fit stats for them - how to do in Stan?
y_new[i] ~ normal_rand(mu[i], sigma); // One new data set at each MCMC iteration DOES NOT ACTUALLY WORK YET
sq_new[i] <- pow(y_new[i] - predicted[i], 2); // Squared residuals for new data
}
tau <- 1/(sigma*sigma);
// compute fit statistics back in R
}
mu_est
for (i in 1:n) {
mu_est[i] <- mean(extract(m8_4_stanfit, pars = "mu[i]", inc_warmup = FALSE))
}mu_est <- rep(NA, n)
for (i in 1:n) {
mu_est[i] <- mean(extract(m8_4_stanfit, pars = paste0("mu[", i, "]"), inc_warmup = FALSE))
}posterior <- extract(m8_4_stanfit, inc_warmup = FALSE)
dim(posterior) # 5000 iterations, by 3 chains, by 20 parameters / transformed parameters / generated quantities
dimnames(posterior)[[3]]
[1] "alpha" "beta" "sigma" "mu[1]" "mu[2]" "mu[3]" "mu[4]" "mu[5]" "mu[6]" "mu[7]" "mu[8]" "mu[9]"
[13] "mu[10]" "mu[11]" "mu[12]" "mu[13]" "mu[14]" "mu[15]" "mu[16]" "lp__"
y_rep <- apply(posterior, MARGIN = 1:2, FUN = function(draw) {
rnorm(n, mean = draw[grepl("^mu", names(draw))], sd = draw["sigma"])
})
dim(y_rep) # 16 replicates by 5000 iterations by 3 chains
error_rep <- y - y_rep # same dimensions
mean(error_rep^2) # somewhat greater than sigma2
stripstan <- function( object , pars=NULL , permuted=TRUE , ... ) { result <- list() if ( is.null(pars) ) pseq <- 1:length(object@par_dims) else pseq <- pars for ( i in pseq ) { name <- names(object@par_dims)[i] result[[ name ]] <- extract( object , name , permuted=permuted , ... )[[1]] } result}
# for 64 bits RStudio
Sys.setenv(R_ARCH = '/x86_64')
library(inline)
library(Rcpp)
library(rstan)
file.path(R.home(component = 'etc'), .Platform$r_arch, 'Makeconf')
set_cppo(mode = "fast")
model_code <- '
data{
// Data that will be imported for use in the model should be defined here
}
parameters{
// parameters that will be used in the model should be defined here
// These will be saved and printed with the output
}
transformed parameters{
// Generation of derived parameters from parameters above should be defined here
// These will be saved and printed with the output
}
model{
// This is area for describing the model including the priors and likelihood
// This is the only place where sampling can be done within Stan
// All data and parameters used in this section must be defined above
}
generated quantities{
// This section is for derived parameters not used in the model
// If not used in the model it is more efficient to include them here rather than in the transformed parameters
// These values will be save and printed along with parameters and transformed parameters
}
'
stan_data <- list()
stanfit <- stan(file, model_name = "anon_model", model_code = "",
fit = NA, data = list(), pars = NA, chains = 4,
iter = 2000, warmup = floor(iter/2), thin = 1,
init = "random", seed = sample.int(.Machine$integer.max, 1),
sample_file, save_dso = TRUE,
verbose = FALSE, ...,
boost_lib = NULL,
eigen_lib = NULL)
print(stanfit)
posterior <- extract(stanfit)
y_rep <- apply(posterior, MARGIN = 1:2, FUN = function(draw) {
rnorm(n, mean = draw[grepl("^mu", names(draw))], sd = draw["sigma"])
})
I can understand what it does in the end but I can't figure it out enough to use it myself in other circumstances. For example, how would I go about creating a vector of the mean mu values if I wanted to make plots similar to Kery:
### 8.4.2. Goodness-of-Fit assessment in Bayesian analyses
plot(out$mean$predicted, out$mean$residual, main = "Residuals vs. predicted values",
las = 1, xlab = "Predicted values", ylab = "Residuals")
abline(h = 0)
lim <- c(0, 3200)
plot(out$sims.list$fit, out$sims.list$fit.new, main = "Graphical posterior predictive check",
las = 1, xlab = "SSQ for actual data set", ylab = "SSQ for ideal (new) data sets", xlim = lim, ylim = lim)
abline(0, 1)
mean(out$sims.list$fit.new > out$sims.list$fit) # Bayesian p-value
The current for of the extract function seems like it's difficult to manipulate and handle the posteriors for post processing, at least for novices such as myself.
Thanks again!
Dan
Thank you Ben and Bob, the general discussion is very helpful. I will be happy to share my rstan code for Kery's WinBUGS examples, although they may be better in combination with Shige since I am still quite a novice in R as well as BUGS and Stan.
I would also be happy to share my experiences to help you improve Stan and rstan for end users like myself.
There seems to be sufficient information for running all the models I've tried but more information for post processing would be very helpful. The only other thing that I can think of that has been confusing is lp_. There is a little about it in the manual but more information would be useful.
Back to my example, I may have been generous when I claimed functional literacy in R because I am not sure I fully grasp your method of extracting and using the posterior samples
y_rep <- apply(posterior, MARGIN = 1:2, FUN = function(draw) {
rnorm(n, mean = draw[grepl("^mu", names(draw))], sd = draw["sigma"])
})
I can understand what it does in the end but I can't figure it out enough to use it myself in other circumstances. For example, how would I go about creating a vector of the mean mu values if I wanted to make plots similar to Kery:
mu_means <- apply(posterior[,,grepl("^mu",dimnames(posterior)[[3]])], 3, mean)> as.matrix(mu_means)
[,1]
mu[1] 41.98327
mu[2] 40.31490
mu[3] 38.64653
mu[4] 36.97816
mu[5] 35.30979
mu[6] 33.64142
mu[7] 31.97306
mu[8] 30.30469
mu[9] 28.63632
mu[10] 26.96795
mu[11] 25.29958
mu[12] 23.63121
mu[13] 21.96284
mu[14] 20.29447
mu[15] 18.62611
mu[16] 16.95774
Should this go in what the R folks call a "vignette"?
I'd be happy to help with the structure, but
we'd have to design whether to go with Ben's
expert-level code, something simpler and more
loop oriented, or both. I'd love to see both,
with the simple version first, then the
bind-and-apply versions, because it'd help me
learn R better.
stan_apply <- function(fit, dimensions = c("parameters", "chains","iterations"), pars = fit@sim$pars_oi, inc_warmup = FALSE, FUN, ...) { dimensions <- match.arg(dimensions, several.ok = TRUE) if(length(dimensions) == 3) { stop("the length of 'dimensions' must be either 1 or 2") } MARGIN <- sapply(dimensions, switch, iterations = 1L, chains = 2L, parameters = 3L) return(apply(extract(fit, pars = pars, inc_warmup = inc_warmup), MARGIN = MARGIN, FUN = FUN, ...)) }> oxford <- stan_demo("oxford")> IQRs <- stan_apply(oxford, "parameters", FUN = IQR)> length(IQRs)[1] 246> head(IQRs) mu[1] mu[2] mu[3] mu[4] mu[5] mu[6] 0.9217132 0.6696871 0.7633586 0.6213006 0.5682908 1.1354033 > quantiles <- stan_apply(oxford, "parameters", FUN = quantile, probs = c(.25, .75))> dim(quantiles)[1] 2 246> diff(quantiles[,1:6]) mu[1] mu[2] mu[3] mu[4] mu[5] mu[6]75% 0.9217132 0.6696871 0.7633586 0.6213006 0.5682908 1.135403
The list of RStan functions is available by following
the "Index" link at the bottom of the rstan help
page. I had to hunt for that myself, still being a
novice R user.
> ls("package:rstan")
[1] "extract" "get_adaptation_info" "get_cppcode" "get_cppo" "get_cppo_mode"
[6] "get_cxxflags" "get_inits" "get_logposterior" "get_sampler_params" "get_seed"
[11] "get_seeds" "get_stancode" "get_stanmodel" "makeconf_path" "plot"
[16] "print" "read_rdump" "rstan_options" "sampling" "set_cppo"
[21] "sflist2stanfit" "show" "stan" "stanc" "stan_demo"
[26] "stan_model" "stan_rdump" "stan_version" "summary" "traceplot"
# Assess model fit using a sums-of-squares-type discrepancy
for (i in 1:n) {
residual[i] <- y[i]-mu[i] # Residuals for observed data
predicted[i] <- mu[i] # Predicted values
sq[i] <- pow(residual[i], 2) # Squared residuals for observed data
# Generate replicate data and compute fit stats for them
y.new[i] ~ dnorm(mu[i], tau) # one new data set at each MCMC iteration
sq.new[i] <- pow(y.new[i]-predicted[i], 2) # Squared residuals for new data
}
fit <- sum(sq[]) # Sum of squared residuals for actual data set
fit.new <- sum(sq.new[]) # Sum of squared residuals for new data set
test <- step(fit.new - fit) # Test whether new data set more extreme
bpvalue <- mean(test) # Bayesian p-value
# Extract Entire posterior (for all parameters) - 3 chains
posterior <- extract(m8_4_stanfit, inc_warmup = FALSE, permute = FALSE)
# Generate new data
y_rep <- apply(posterior, MARGIN = 1:2, FUN = function(draw) {
rnorm(n, mean = draw[grepl("^mu", names(draw))], sd = draw["sigma"])
})
dim(y_rep) # 16 replicates by 5000 iterations by 3 chains
error_rep <- y - y_rep # y = 16 replications
mean(error_rep^2) # far greater than sigma2 (92 vs true = 25) - also differs from estimated sigma2 of ~40
# alternative way to extract mu MCMC estimates
mu_rep <- apply(posterior, MARGIN = 1:2, FUN = function(draw) {
mu1 = draw[grepl("^mu", names(draw))]
})
# Fit statistics that are done within WinBUGS
residual <- y - y_rep
sq <- residual^2
sq.new <- (y_rep - mu_rep)^2
for(i in 1:5000){
fit[i] <- sum(sq[ , i, 1]) # Not sure how to get this to loop right for 3 chains - might be easier with permuted extract function
}
for(i in 1:5000) fit.new[i] <- sum(sq.new[, i, 1])
# Posterior predictive distributions and bayesian p-values
test <- ifelse(test = (fit.new - fit) > 0, yes=1, no = 0) # Test whether new data set more extreme
(bpvalue <- mean(test)) # Bayesian p-value = 0.0358 indicates poor fit (should be ~0.5)
plot(fit, fit.new)
abline(0, 1) # Also shows poor fit - bias of idealized (new) fit data being lower than fit data. The SSQ are also MUCH larger than those found in WinBUGS. Seems like I did something incorrectly since the summary output was the same for WinBUGS and Stan.
P.S. The full R files for the Stan and JAGS code would be attached but I get "An error (#340) occurred while communicating with the server" and I can't post on the google group with an attachment.
residual <- y - mu_rep # although mu_rep is not really a "replicate" of anything
sq <- residual^2
sq.new <- (y_rep - mu_rep)^2
dim(sq) # 16 observations by 5000 iterations by 3 chains
fit <- apply(sq, 2:3, sum) # yields a 5000 by 3 matrix
fit.new <- apply(sq.new, 2:3, sum)
bpvalue <- mean(sign(fit.new - fit)) # I got 0.085
transformed parameters{
real mu[n];
for(i in 1:n){
mu[i] <- alpha + beta*x[i];
}
}
model{
//Priors
alpha ~ normal(0, 100);
beta ~ normal(0, 100);
sigma ~ uniform(0, 100);
//Likelihood
for(i in 1:n){
y[i] ~ normal(mu[i], sigma);
}
model{
//Priors
alpha ~ normal(0, 100);
beta ~ normal(0, 100);
sigma ~ uniform(0, 100);
//Likelihood
for(i in 1:n){
y[i] ~ normal(alpha + beta*x[i], sigma);
}
I have another question about working with rstan output. Shige's question about Stan's trouble with large datasets made me want to try some examples (using a similar version of the code he had, I had the same problem of very small n_eff and huge Rhat when using a large dataset of n=100,000). I decided to try increasing the size of the dataset for another problem I was working on.
That way there is no mu since I am really only interested in alpha, beta, and sigma estimations. However, in more complex hierarchical models with lots of covariates, I find the code including mu to be more pleasing. My resulting question is, can only some of the parameters be printed?
model {
real mu;
//Priors
alpha ~ normal(0, 100);
beta ~ normal(0, 100);
sigma ~ uniform(0, 100);
//Likelihood
for(i in 1:n){
mu <- alpha + beta*x[i];
y[i] ~ normal(mu,sigma);
}
model {
vector[N] mu;
mu <- alpha + beta * x;
// priors
y ~ normal(mu,sigma);
}I'm not terribly familiar with working with S4 objects but I know head() and stanfit[1:3] wouldn't work.
You shouldn't have to know anything about S4 objects to use stan, but those functions indeed do not work. We are working in v1.0.3 and future versions to make more functions work as expected with stanfit objects.
On Sat, Oct 20, 2012 at 9:52 PM, Ben Goodrich <goodri...@gmail.com> wrote:You shouldn't have to know anything about S4 objects to use stan, but those functions indeed do not work. We are working in v1.0.3 and future versions to make more functions work as expected with stanfit objects.Ooh, something I could theoretically actually help out with. Depending on how ugly/not ugly the accessors are. Will have a look.
What sort of plans exist to get rstan into CRAN? It would sure help with popularizing Stan and to have packages depend upon it.