library(rstan)model = " // Multivariate mixture of Gaussiansdata {int<lower=2> n;int<lower=2> k;int<lower=2> d;row_vector[d] x[n];}parameters {simplex[k] rho;row_vector[d] mu[k];corr_matrix[d] lambda[k];}model {rho ~ dirichlet(rep_vector(1.0, k));for (j in 1:k) {mu[j] ~ normal(0, 1);lambda[k] ~ lkj_corr(1);}for (i in 1:n) {real p[k];for (j in 1:k) {p[j] = log(rho[j]) + multi_normal_lpdf(x[i] | mu[j], lambda[j]);}target += log_sum_exp(p);}}"d = 2library(mvtnorm) # for rmvnormx1 = rmvnorm(200, c(-7, 6), sigma = diag(d))x2 = rmvnorm(100, c(6, -7), sigma = diag(d))x = rbind(x1, x2)n = dim(x)[1]k = 3data = list(x = x, n = n, d = d, k = k)pars = c("rho", "mu", "lambda")identities = list(diag(rep(1, d), diag(rep(1, d))))init = list(list(simplex = rep(1/k, k),mu = matrix(0, nrow = k, ncol = d),lambda = identities))samples = stan(model_code = model,data = data,pars = pars,iter = 100,chains = 1,thin = 1,seed = 1337,init = init)print(samples)
...Chain 1, Iteration: 100 / 100 [100%] (Sampling)Elapsed Time: 2.80285 seconds (Warm-up)3.98537 seconds (Sampling)6.78822 seconds (Total)The following numerical problems occured the indicated number of times after warmup on chain 1countException thrown at line 19: lkj_corr_log: y is not positive definite. 12When a numerical problem occurs, the Hamiltonian proposal gets rejected.If the number in the 'count' column is small, do not ask about this message on stan-users.Inference for Stan model: 23b4bc39bbc3c199b918718a2a7e8c32.1 chains, each with iter=100; warmup=50; thin=1;post-warmup draws per chain=50, total post-warmup draws=50.mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhatrho[1] 0.33 0.00 0.02 0.30 0.32 0.33 0.34 0.37 37 1.00rho[2] 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.02 17 1.02rho[3] 0.66 0.00 0.02 0.63 0.65 0.66 0.68 0.70 50 0.99mu[1,1] 5.97 0.01 0.09 5.82 5.89 5.97 6.02 6.16 50 1.02mu[1,2] -7.09 0.01 0.09 -7.24 -7.17 -7.11 -7.03 -6.93 50 1.04mu[2,1] 0.02 0.21 0.95 -1.78 -0.47 -0.07 0.80 1.43 21 1.01mu[2,2] -0.11 0.14 0.72 -1.45 -0.57 -0.13 0.24 1.07 27 1.04mu[3,1] -7.01 0.01 0.08 -7.17 -7.06 -7.01 -6.95 -6.85 50 1.01mu[3,2] 5.99 0.01 0.07 5.88 5.95 5.98 6.03 6.13 50 1.01...
library(rstan)model = " // Multivariate mixture of Gaussiansdata {int<lower=2> n;int<lower=2> k;int<lower=2> d;row_vector[d] x[n];}parameters {simplex[k] rho;row_vector[d] mu[k];corr_matrix[d] lambda[k];}transformed parameters {cov_matrix[d] omega[k];for (j in 1:k) {omega[k] = lambda[k]; // omega = f(lambda), with f(X) = X}}model {rho ~ dirichlet(rep_vector(1.0, k));for (j in 1:k) {mu[j] ~ normal(0, 1);lambda[k] ~ lkj_corr(1);}for (i in 1:n) {real p[k];for (j in 1:k) {p[j] = log(rho[j]) + multi_normal_lpdf(x[i] | mu[j], omega[j]);}target += log_sum_exp(p);}}"d = 2library(mvtnorm) # for rmvnormx1 = rmvnorm(200, c(-7, 6), sigma = diag(d))x2 = rmvnorm(100, c(6, -7), sigma = diag(d))x = rbind(x1, x2)n = dim(x)[1]k = 3data = list(x = x, n = n, d = d, k = k)pars = c("rho", "mu", "lambda")identities = list(diag(rep(1, d), diag(rep(1, d))))init = list(list(simplex = rep(1/k, k),mu = matrix(0, nrow = k, ncol = d),lambda = identities))samples = stan(model_code = model,data = data,pars = pars,iter = 100,chains = 1,thin = 1,seed = 1337,init = init)print(samples)
...[595] "Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:"[596] "validate transformed params: omega[k0__] is not symmetric. omega[k0__][1,2] = nan, but omega[k0__][2,1] = nan"[597] "If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,"[598] "but if this warning occurs often then your model may be either severely ill-conditioned or misspecified."[599] "Rejecting initial value:"[600] " Error evaluating the log probability at the initial value."[601] "Initialization between (-2, 2) failed after 100 attempts. "[602] " Try specifying initial values, reducing ranges of constrained values, or reparameterizing the model."[603] "Error in eval(ei, envir) : "error occurred during calling the sampler; sampling not doneStan model '5244928831b01aff407c40bddf406215' does not contain samples.Stan model '5244928831b01aff407c40bddf406215' does not contain samples.Stan model '5244928831b01aff407c40bddf406215' does not contain samples.
init = list(list(simplex = rep(1/k, k),mu = matrix(0, nrow = k, ncol = d),lambda = identities,omega = identities))
I'm trying to implement a mixture of multivariate Gaussians in Stan. I'm using rstan version 2.12.1. I have simulated bimodal data for the two dimensional case, with means at (-7, 6) and (6, -7), and I'm using a LKJ prior on the covariance matrices (lambda[k]) of the mixture components. It works perfectly. But when I try to transform lambda[k] (thereby using a more complicated prior on the covariance matrices of the mixture), I'm running into trouble. Instead of using lambda[k] I'd like to use omega[k] = f(lambda[k]), for a given function f (which could involve other random variables). I'm running into trouble even when f(X) = X. Here's the code for the mixture of multivariate Gaussians before adding f (this code works perfectly):
transformed parameters {
matrix[d,d] omega[k]; for (j in 1:k) { omega[k] = f(lambda[k]); }
transformed parameters {cov_matrix[d] omega[k];
for (j in 1:k) {
omega[k] = lambda[k]; // omega = f(lambda), with f(X) = X}}
SAMPLING FOR MODEL 'a9cdf93d5bc3b2355caa9fa0aa8f3b52' NOW (CHAIN 1).[1] "Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:"[2] "Exception thrown at line 32: multi_normal_log: Covariance matrix is not symmetric. Covariance matrix[1,2] = nan, but Covariance matrix[2,1] = nan"[3] "If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,"[4] "but if this warning occurs often then your model may be either severely ill-conditioned or misspecified."[5] "Rejecting initial value:"[6] " Error evaluating the log probability at the initial value."[7] "Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:"[8] "Exception thrown at line 32: multi_normal_log: Covariance matrix is not symmetric. Covariance matrix[1,2] = nan, but Covariance matrix[2,1] = nan"[9] "If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,"[10] "but if this warning occurs often then your model may be either severely ill-conditioned or misspecified."[11] "Rejecting initial value:"[12] " Error evaluating the log probability at the initial value."[13] "Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:"[14] "Exception thrown at line 32: multi_normal_log: Covariance matrix is not symmetric. Covariance matrix[1,2] = nan, but Covariance matrix[2,1] = nan"[15] "If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,"[16] "but if this warning occurs often then your model may be either severely ill-conditioned or misspecified."...
error occurred during calling the sampler; sampling not done
Stan model 'a9cdf93d5bc3b2355caa9fa0aa8f3b52' does not contain samples.
row_vector[d] z;z = x[i] - mu[j];p[j] = log(rho[j]) - log_determinant(omega[k])/2 - multiply(z, multiply(omega[j], z'))/2;
transformed parameters {cov_matrix[d] omega[k];
for (j in 1:k) {
omega[k] = inverse(lambda[k]); // omega = f(lambda), with f(X) = X^{-1}}}
...[589] "Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:"[590] "Exception thrown at line 34: multiply: A[1] is nan, but must not be nan!"[591] "If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,"[592] "but if this warning occurs often then your model may be either severely ill-conditioned or misspecified."[593] "Rejecting initial value:"[594] " Error evaluating the log probability at the initial value."
[595] "Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:"
[596] "Exception thrown at line 34: multiply: A[1] is nan, but must not be nan!"
[597] "If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,"[598] "but if this warning occurs often then your model may be either severely ill-conditioned or misspecified."[599] "Rejecting initial value:"[600] " Error evaluating the log probability at the initial value."[601] "Initialization between (-2, 2) failed after 100 attempts. "[602] " Try specifying initial values, reducing ranges of constrained values, or reparameterizing the model."[603] "Error in eval(ei, envir) : "error occurred during calling the sampler; sampling not done
Stan model 'a2d1881831356711c29d61facd9e5a3a' does not contain samples.
library(rstan)model = " // Multivariate mixture of Gaussiansdata {int<lower=2> n;int<lower=2> k;int<lower=2> d;row_vector[d] x[n];}parameters {simplex[k] rho;row_vector[d] mu[k];corr_matrix[d] lambda[k];}transformed parameters {
matrix[d, d] omega[k];
for (j in 1:k) {
omega[k] = inverse(lambda[k]); // omega = f(lambda), with f(X) = X^{-1}
}}model {rho ~ dirichlet(rep_vector(1.0, k));
for (j in 1:k) {
mu[j] ~ normal(0, 1);lambda[k] ~ lkj_corr(1);}for (i in 1:n) {
real p[k];
for (j in 1:k) {
row_vector[d] z;z = x[i] - mu[j];p[j] = log(rho[j]) + log_determinant(omega[k])/2 - multiply(z, multiply(omega[j], z'))/2;
--
You received this message because you are subscribed to the Google Groups "Stan users mailing list" group.
To unsubscribe from this group and stop receiving emails from it, send an email to stan-users+...@googlegroups.com.
To post to this group, send email to stan-...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
fit = mgfit(x)pred = predict(fit, y)
# load datasetx = iris[, 1:4]# split into test set and train setx = x[sample(length(x))]n = dim(x)[1]d = dim(x)[2]n.train = floor(n/2)x.train = x[1:n.train, ]x.validate = x[-(1:n.train), ]x.test = x.validaten.test = dim(x.test)[1]# hold out 1/2 entries sporadically in test setfor (j in 1:d) {x.test[sample(n.test, floor(n.test/2)), j] = NA}x.validate[!is.na(x.test)] = NA# fit mixture of Gaussiansfit = mgfit(x.train)# predict into test setx.pred = predict(fit, x.test)# print RMSEs of model, compared to baselinecat('RMSEs:\n')cat('VARIABLE MODEL BASE\n')for (j in 1:d) {mask = is.na(x.test[, j])model = x.pred[mask, j]truth = x.validate[mask, j]base = mean(x.train[, j])model.rmse = sqrt(mean(truth - model)^2)base.rmse = sqrt(mean(truth - base)^2)cat(sprintf("%s %.4f %.4f\n", colnames(x)[j], model.rmse, base.rmse))}
...RMSEs:VARIABLE MODEL BASESepal.Length 0.2311 1.0938Petal.Length 1.1687 2.5691Petal.Width 0.8141 1.2664Sepal.Width 0.1416 0.2485
# mgfit/predict v0.1: model and predict variables using a mixture of Gaussians# Copyright (c) 2016, Lloyd T. Elliott# All rights reserved.# Redistribution and use in source and binary forms, with or without# modification, are permitted provided that the following conditions are met:# 1. Redistributions of source code must retain the above copyright notice, this# list of conditions and the following disclaimer.# 2. Redistributions in binary form must reproduce the above copyright notice,# this list of conditions and the following disclaimer in the documentation# and/or other materials provided with the distribution.# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# fit a model
mgfit = function(x, # datasetk = 7, # number of components to use in mixtureseed = NULL, # random seed for stan (NULL = generate random seed)quiet = TRUE, # suppress all output (currently unimplemented)iter = 1000, # number of iterationsthin = 10, # amount of thinning (collected = iter/thin)burnin = .75 # proportion of initial iterations to throw away as burnin) {if (is.null(seed)) {seed = sample(.Machine$integer.max, 1)
}model = "// Multivariate mixture of Gaussiansdata {int<lower=2> n;int<lower=2> k;int<lower=2> d;row_vector[d] x[n];}parameters {simplex[k] rho;row_vector[d] mu[k];corr_matrix[d] lambda[k];
vector<lower=0>[d] h[k];
}transformed parameters {cov_matrix[d] omega[k];for (j in 1:k) {
vector[d] al;for (i in 1:d) {al[i] = 1.0/h[j][i];}omega[j] = diag_matrix(al) * inverse(lambda[j]) * diag_matrix(al);
}}model {rho ~ dirichlet(rep_vector(1.0, k));for (j in 1:k) {mu[j] ~ normal(0, 1);
lambda[j] ~ lkj_corr(1);h[j] ~ exponential(1);
}for (i in 1:n) {real p[k];for (j in 1:k) {row_vector[d] z;z = x[i] - mu[j];
p[j] = log(rho[j]) + multi_normal_prec_lpdf(x[i] | mu[j], omega[j]);}target += log_sum_exp(p);}}"library(rstan)n = dim(x)[1]d = dim(x)[2]# scale training setmeans = colMeans(x)stds = apply(x, 2, sd)x = t(apply(x, 1, function(v) { return((v - means)/stds) }))pars = c("rho", "mu", "lambda", "omega", "h")identities = list()ones = list()
for (j in 1:k) {
identities[[j]] = diag(rep(1, d))ones[[j]] = rep(1, d)}init = list(list(rho = rep(1/k, k),
mu = matrix(0, nrow = k, ncol = d),
lambda = identities,omega = identities,h = ones))
data = list(x = x, n = n, d = d, k = k)
samples = stan(model_code = model,data = data,pars = pars,
chains = 1,iter = iter,thin = thin,seed = seed,init = init)result = structure(list(k = k,d = d,samples = samples,means = means,stds = stds,count = iter/thin,burnin = burnin), class = "mg")return(result)}
# helper functionsmvnpdf = function(x, mu, logdet, omega) {v = matrix(x - mu)return(-logdet/2 - (t(v) %*% omega %*% v)/2)}logsumexp = function(x) {m = max(x)return(log(sum(exp(x - m))) + m)}
# perform predictions based on model
predict.mg = function(mg, x) {# scale test setx = t(apply(x, 1, function(v) { return((v - mg$means)/mg$stds) }))k = mg$kd = mg$dn = dim(x)[1]z = matrix(0, nrow = n, ncol = d)count = mg$countburnin = mg$burninsamples = fit$samples@sim$samples[[1]]denom = 0for (iter in ceiling(count * burnin):count) {denom = denom + 1# extract sample from stan's tracerho = rep(NA, k)mus = list()lambdas = list()
for (j in 1:k) {
token = sprintf("rho[%d]", j)rho[j] = unlist(samples[token])[iter]mu = matrix(NA, nrow = d, ncol = 1)for (i in 1:d) {token = sprintf("mu[%d,%d]", j, i)mu[i, 1] = unlist(samples[token])[iter]}omega = matrix(NA, nrow = d, ncol = d)for (i1 in 1:d) {for (i2 in 1:d) {token = sprintf("omega[%d,%d,%d]", j, i1, i2)omega[i1, i2] = unlist(samples[token])[iter]}}mus[[j]] = mulambdas[[j]] = solve(omega)}transforms = list()
for (i in 1:n) {
# find the missingness patternpattern = is.na(x[i, ])x1 = matrix(x[i, pattern])x2 = matrix(x[i, !pattern])
if (sum(pattern) == d) {# fully observed row: nothing to predictnext}if (sum(pattern) == 0) {# completely missing row: predict the meannext}
p = rep(0, k)
for (j in 1:k) {
key = sprintf("%s%d", paste(sprintf("%d", pattern), collapse = ""), j)if (is.null(transforms[[key]])) {# we haven't seen this pattern: calculate the conditionalslambda22 = lambdas[[j]][!pattern, !pattern]omega22 = solve(lambda22)if (is.matrix(lambda22)) {logdet22 = unlist(determinant(lambda22, logarithm = TRUE))[[1]]} else {logdet22 = log(lambda22)}lambda12 = lambdas[[j]][pattern, !pattern]transform = lambda12 %*% solve(lambda22)info = list(omega22 = omega22,logdet22 = logdet22,transform = transform)transforms[[key]] = info} else {# we've already seen this pattern: retreive the conditionalsinfo = transforms[[key]]omega22 = info$omega22logdet22 = info$logdet22}# compute posterior component assignmentmu2 = mus[[j]][!pattern, 1]p[j] = log(rho[j]) + mvnpdf(x2, mu2, logdet22, omega22)}# normalize posterior component assignmentsp = exp(p - logsumexp(p))
for (j in 1:k) {
# average the conditional predictionskey = sprintf("%s%d", paste(sprintf("%d", pattern), collapse = ""), j)transform = transforms[[key]]$transformmu1 = matrix(mus[[j]][pattern, 1])mu2 = matrix(mus[[j]][!pattern, 1])inc = p[j] * (mu1 + transform %*% (x2 - mu2))z[i, pattern] = z[i, pattern] + inc}}}z = z/denom# transform the predictions back to the original spacez = t(apply(z, 1, function(v) { return(v * mg$stds + mg$means) }))# remove observed values from predictionsz[!is.na(x)] = NAreturn(z)}
vector[d] al;for (i in 1:d) {al[i] = 1.0/h[j][i];}