stopifnot(require(rstan))
schools_dat <- list(J = 8,
y = c(28, 8, -3, 7, -1, 1, 18, 12),
sigma = c(15, 10, 16, 11, 9, 11, 10, 18))
eight_schools <- stan(file = "http://wiki.stan.googlecode.com/git/rstangettingstarted/8schools.stan",
data = schools_dat, iter = 1, chains = 1) # just get it to compile the model
library(parallel) # or some other parallelizing package
chains <- 8
posterior <- mclapply(1:chains, mc.cores = chains, FUN = function(chain) { # or another parallelizing function
stan(fit = eight_schools, data = schools_dat, chains = 1, verbose = FALSE, refresh = -1, chain_id=chain)
})
posterior <- sflist2stanfit(posterior) # see below for function definition
print(posterior)
plot(posterior)
sflist2stanfit <- function(sflist) {
# merge a list of stanfit objects into one
# Args:
# sflist, a list of stanfit objects, each element of which
# should have equal length of `iter`, `warmup`, and `thin`.
# Returns:
# A new stanfit objects have all the chains in each element of sf_list.
# The date would be where the new object is created.
# Note:
# * method get_seed would not work well for this merged
# stanfit object. But all the information is still there.
# * When print function is called, the sampler is obtained
# only from the first chain.
#
sf_len <- length(sflist)
if (sf_len < 2) stop("'sflist' should have more than 1 elements")
if (!is.list(sflist) ||
any(sapply(sflist, function(x) !is(x, "stanfit")))) {
stop("'sflist' must be a list of 'stanfit' objects")
}
if (any(sapply(sflist, function(x) x@mode != 0))) {
stop("each 'stanfit' object in 'sflist' must contain samples")
}
for (i in 2:sf_len) {
if (!identical(sflist[[i]]@sim$pars_oi, sflist[[1]]@sim$pars_oi) ||
!identical(sflist[[i]]@sim$dims_oi, sflist[[1]]@sim$dims_oi))
stop("parameters in element ", i, " (stanfit object) is different from in element 1")
if (sflist[[i]]@sim$n_save[1] != sflist[[1]]@sim$n_save[1] ||
sflist[[i]]@sim$warmup2[1] != sflist[[1]]@sim$warmup2[1])
stop("all 'stanfit' objects should have equal length of iterations and warmup")
}
n_chains = sum(sapply(sflist, function(x) x@sim$chains))
sim = list(samples = do.call(c, lapply(sflist, function(x) x@sim$samples)),
chains = n_chains,
iter = sflist[[1]]@sim$iter,
thin = sflist[[1]]@sim$thin,
warmup = sflist[[1]]@sim$warmup,
n_save = rep(sflist[[1]]@sim$n_save[1], n_chains),
warmup2 = rep(sflist[[1]]@sim$warmup2[1], n_chains),
thin = rep(sflist[[1]]@sim$thin[1], n_chains),
permutation = do.call(c, lapply(sflist, function(x) x@sim$permutation)),
pars_oi = sflist[[1]]@sim$pars_oi,
dims_oi = sflist[[1]]@sim$dims_oi,
fnames_oi = sflist[[1]]@sim$fnames_oi,
n_flatnames = sflist[[1]]@sim$n_flatnames)
nfit <- new("stanfit",
model_name = sflist[[1]]@model_name,
model_pars = sflist[[1]]@model_pars,
par_dims = sflist[[1]]@par_dims,
mode = 0L,
sim = sim,
inits = do.call(c, lapply(sflist, function(x) x@inits)),
stan_args = do.call(c, lapply(sflist, function(x) x@stan_args)),
stanmodel = sflist[[1]]@stanmodel,
date = date(),
.MISC = new.env())
invisible(nfit)
}
Good news everyone (particularly people who are willing to help us test)! Jiqiang wrote a function that will be included in rstan 1.0.3 that facilitates running chains in parallel. In particular, you can do something like
stopifnot(require(rstan))
schools_dat <- list(J = 8,
y = c(28, 8, -3, 7, -1, 1, 18, 12),
sigma = c(15, 10, 16, 11, 9, 11, 10, 18))
eight_schools <- stan(file = "http://wiki.stan.googlecode.com/git/rstangettingstarted/8schools.stan",
data = schools_dat, iter = 1, chains = 1) # just get it to compile the model
library(
parallel) # or some other parallelizing package
chains <- 8
posterior <- mclapply(1:chains, mc.cores = chains, FUN = function(chain) { # or another parallelizing function
stan(fit = eight_schools, data = schools_dat, chains = 1, verbose = FALSE, refresh = -1)
})
posterior <- sflist2stanfit(posterior) # see below for function definition
print(posterior)
plot(posterior)
--
<sflist2stanfit.txt>
> install.packages("parallel")Installing package(s) into ‘/Library/Frameworks/R.framework/Versions/2.15/Resources/library’(as ‘lib’ is unspecified)Warning in install.packages :package ‘parallel’ is not available (for R version 2.15.1)
library(parallel)
--
posterior <- mclapply(1:chains, FUN = function(chain) do.call(stan, args) )
posterior <- mclapply(1:chains, FUN = function(chain) {
args$chain_id <- chain
do.call(stan, args)
})
--
I think what Ben was trying to say it is *not* possible to pass chain_id to stan(). Whatever you give as chain_id for stan() (as it has ... argument, so it would not complain), the real chain ids would be from 1 to the number of chains (parameter chains).
So it is important to have very different seeds for stan if run in parallel in rstan 1.0.2.
seed = sample.int(.Machine$integer.max, 1)
So I should let stan() honor user supplied chain_id in rstan 1.0.3. Then the best way would be to let all stan call have the same seed, but different chain_id's.
# compile modelfit.stan <- stan(model_code = m2.SEM, data = bdat, iter = 1, chains = 1, verbose = T)
# load snowlibrary(snow)# setup cluster with two corescl <- makeCluster(c("localhost","localhost"), type = "SOCK")
# create list with data, parameters to be traced, compiled model, and function for generating initial valuescldata <- list(data=bdat, params=params, fit = fit.stan, inits=its)clusterEvalQ(cl, library(rstan)) # loads rjags library on each nodeclusterExport(cl, "cldata") # loads cldata on each node
stanmod <- function(i) { stan(fit = cldata$fit, data = cldata$data, inits = cldata$inits(), iter = 700, warmup = 250, thin = 2, chains = 1, pars = cldata$params) }stan.out <- clusterApply(cl, 1:2, stanmod)stopCluster(cl)
For us Windows folks, it doesn't look like the parallel package works. Here is my attempt using snow.
stanmod <- function(i) {stan(fit = cldata$fit, data = cldata$data, inits = cldata$inits(),iter = 700, warmup = 250, thin = 2, chains = 1,pars = cldata$params)}stan.out <- clusterApply(cl, 1:2, stanmod)stopCluster(cl)
seed = random:::randomNumbers(1, min = -10^9, max = 10^9, col = 1)[1,1] + 10^9 + 1
I failed to mention something potentially important. In code such this:
On Friday, October 12, 2012 2:41:30 AM UTC-4, Sebastian Weber wrote:posterior <- mclapply(1:chains, FUN = function(chain) do.call(stan, args) )
It is important to pass chain_id=chain to Stan, as in
posterior <- mclapply(1:chains, FUN = function(chain) { # or another parallelizing function
stan(fit = eight_schools, data = schools_dat, chains = 1, verbose = FALSE,
refresh = ifelse(chain == 1, 200, -1), chain_id = chain)
})
--
Good news everyone (particularly people who are willing to help us test)! Jiqiang wrote a function that will be included in rstan 1.0.3 that facilitates running chains in parallel. In particular, you can do something like
stopifnot(require(rstan))
schools_dat <- list(J = 8,
y = c(28, 8, -3, 7, -1, 1, 18, 12),
sigma = c(15, 10, 16, 11, 9, 11, 10, 18))
eight_schools <- stan(file = "http://wiki.stan.googlecode.com/git/rstangettingstarted/8schools.stan",
data = schools_dat, iter = 1, chains = 1) # just get it to compile the model
library(
parallel) # or some other parallelizing package
chains <- 8
posterior <- mclapply(1:chains, mc.cores = chains, FUN = function(chain) { # or another parallelizing function
stan(fit = eight_schools, data = schools_dat, chains = 1, verbose = FALSE, refresh = -1, chain_id=chain)
})
posterior <- sflist2stanfit(posterior) # see below for function definition
print(posterior)
plot(posterior)
I got the following error when running the 8 school examples on a MacBook:posterior <- sflist2stanfit(posterior) # see below for function definitionError in FUN(X[[1L]], ...) :no slot of name "mode" for this object of class "stanfit"
--
if set to TRUE then the computation is first divided to (at most) as many jobs are there are cores and then the jobs are started, each job possibly covering more than one value. If set to FALSE then one job is forked for each value of X. The former is better for short computations or large number of values in X, the latter is better for jobs that have high variance of completion time and not too many values of X compared to mc.cores. |
Good news everyone (particularly people who are willing to help us test)! Jiqiang wrote a function that will be included in rstan 1.0.3 that facilitates running chains in parallel. In particular, you can do something like
stopifnot(require(rstan))
schools_dat <- list(J = 8,
y = c(28, 8, -3, 7, -1, 1, 18, 12),
sigma = c(15, 10, 16, 11, 9, 11, 10, 18))
eight_schools <- stan(file = "http://wiki.stan.googlecode.com/git/rstangettingstarted/8schools.stan",
data = schools_dat, iter = 1, chains = 1) # just get it to compile the model
library(parallel) # or some other parallelizing package
chains <- 8
posterior <- mclapply(1:chains, mc.cores = chains, FUN = function(chain) { # or another parallelizing function
stan(fit = eight_schools, data = schools_dat, chains = 1, verbose = FALSE, refresh = -1, chain_id=chain)
})
posterior <- sflist2stanfit(posterior) # see below for function definition
print(posterior)
plot(posterior)
On Oct 12, 2012, at 12:14 AM, Ben Goodrich wrote:
Good news everyone (particularly people who are willing to help us test)! Jiqiang wrote a function that will be included in rstan 1.0.3 that facilitates running chains in parallel. In particular, you can do something like
stopifnot(require(rstan))
schools_dat <- list(J = 8,
y = c(28, 8, -3, 7, -1, 1, 18, 12),
sigma = c(15, 10, 16, 11, 9, 11, 10, 18))
eight_schools <- stan(file = "http://wiki.stan.googlecode.com/git/rstangettingstarted/8schools.stan",
data = schools_dat, iter = 1, chains = 1) # just get it to compile the model
When using this method to sample in parallel, does each chain use the same initial values? If the initial values are set in running this single iteration, and that iteration is passed to the parallel operation, would each chain would follow from the same set of initial values? What if, in the example above, iter=0?
initial_values <- list(something, different, for, each, chain)
posterior <- sflist2stanfit(mclapply(1:5, mc.cores = 5, FUN = function(i) {
stan(fit = eight_schools, seed = 12345, chain_id = i, chains = 1, inits = initial_values[[i]])
}))