Folks,
I am playing with seemingly unrelated regressions (pg. 71 of the 2.7 manual) using the Grunfeld investment data available in the R systemfit package. I use both the multi_normal and multi_normal_cholesky approaches provided in the manual to model two time series.
The first approach goes well. The second LKJ priors approach throws up Rhat NaNs on some correlations coefficients. I wanted to find out how that can be fixed.
Here is the data code:
library(systemfit)
library(rstan)
data("GrunfeldGreene")
## use the GE and Westinghouse data
ge <- subset(GrunfeldGreene, firm %in% c("General Electric"))
west <- subset(GrunfeldGreene, firm %in% c("Westinghouse"))
N = nrow(ge) ### number of obs
## standardize predictors
ge$value_std <- (ge$value - mean(ge$value)) / sd(ge$value)
west$value_std <- (west$value - mean(west$value)) / sd(west$value)
ge$capital_std <- (ge$capital - mean(ge$capital)) / sd(ge$capital)
west$capital_std <- (west$capital - mean(west$capital)) / sd(west$capital)
y <- cbind(ge$invest, west$invest)
x1 <- cbind(rep(1,N), ge$value_std, ge$capital_std) ## GE predictors with intercept
x2 <- cbind(rep(1,N), west$value_std, west$capital_std) ## Westinghouse predictors with intercept
K = ncol(y) ## number of time series / equations
J = ncol(x1) ## number of predictors
data_ge_west <- list(y=y, x1=x1, x2=x2, N=N, K=K, J=J)
Code for the multi_normal approach that works well:
data {
int<lower=1> K;
int<lower=1> J;
int<lower=0> N;
vector[J] x1[N];
vector[J] x2[N];
vector[K] y[N];
}
parameters {
matrix[K,J] beta;
cov_matrix[K] Sigma;
}
model {
vector[K] mu[N];
for (n in 1:N) {
// mu[n] <- beta * x[n];
mu[n][1] <- beta[1] * x1[n];
mu[n][2] <- beta[2] * x2[n];
}
y ~ multi_normal(mu, Sigma);
}
The results of this approach with default warmups, iterations, chains, and thinning:
4 chains, each with iter=2000; warmup=1000; thin=1;
post-warmup draws per chain=1000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
beta[1,1] 102.16 0.19 8.08 85.52 97.01 102.31 107.19 118.29 1749 1
beta[1,2] 17.88 0.25 7.97 2.86 12.61 17.58 22.88 34.69 1049 1
beta[1,3] 32.89 0.20 8.50 14.59 27.77 33.15 38.40 48.90 1727 1
beta[2,1] 42.87 0.07 2.88 37.06 41.03 42.84 44.65 48.64 1815 1
beta[2,2] 13.66 0.11 4.03 5.52 11.10 13.68 16.30 21.77 1278 1
beta[2,3] 2.71 0.14 4.28 -5.92 0.02 2.83 5.43 10.86 984 1
Sigma[1,1] 1317.66 21.91 687.11 558.48 865.51 1136.43 1570.10 3039.40 983 1
Sigma[1,2] 364.40 7.25 216.43 116.93 224.89 309.80 446.86 907.93 892 1
Sigma[2,1] 364.40 7.25 216.43 116.93 224.89 309.80 446.86 907.93 892 1
Sigma[2,2] 167.72 2.45 85.06 72.43 111.66 146.81 200.49 382.09 1202 1
lp__ -111.92 0.08 2.57 -118.14 -113.31 -111.45 -110.04 -108.25 976 1
Code for the LKJ prior approach:
data {
int<lower=1> K;
int<lower=1> J;
int<lower=0> N;
vector[J] x1[N];
vector[J] x2[N];
vector[K] y[N];
}
parameters {
matrix[K,J] beta;
cholesky_factor_corr[K] L_Omega;
vector<lower=0>[K] L_sigma;
}
model {
vector[K] mu[N];
matrix[K,K] L_Sigma;
L_Sigma <- diag_post_multiply(L_Omega, L_sigma);
to_vector(beta) ~ normal(0, 50);
L_Omega ~ lkj_corr_cholesky(2);
L_sigma ~ cauchy(0, 2.5);
// likelihood
for (n in 1:N) {
// mu[n] <- beta * x[n];
mu[n][1] <- beta[1] * x1[n];
mu[n][2] <- beta[2] * x2[n];
}
y ~ multi_normal_cholesky(mu, L_Sigma);
}
Results of this approach:
4 chains, each with iter=2000; warmup=1000; thin=1;
post-warmup draws per chain=1000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
beta[1,1] 100.64 0.15 6.51 87.87 96.33 100.62 104.92 113.46 1786 1
beta[1,2] 16.13 0.17 6.52 3.79 11.92 15.97 20.18 29.53 1507 1
beta[1,3] 33.62 0.16 6.68 19.53 29.46 33.85 38.29 46.14 1819 1
beta[2,1] 42.42 0.06 2.37 37.90 40.80 42.44 43.96 47.19 1793 1
beta[2,2] 12.94 0.09 3.41 6.01 10.77 12.98 15.21 19.61 1444 1
beta[2,3] 3.49 0.10 3.65 -3.85 1.10 3.44 5.91 10.57 1406 1
L_Omega[1,1] 1.00 0.00 0.00 1.00 1.00 1.00 1.00 1.00 4000 NaN
L_Omega[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4000 NaN
L_Omega[2,1] 0.27 0.00 0.06 0.15 0.23 0.27 0.31 0.39 2190 1
L_Omega[2,2] 0.96 0.00 0.02 0.92 0.95 0.96 0.97 0.99 2057 1
L_sigma[1] 28.97 0.13 5.26 20.93 25.13 28.31 31.87 41.41 1705 1
L_sigma[2] 7.30 0.04 1.44 5.08 6.30 7.09 8.08 10.64 1380 1
lp__ -131.08 0.07 2.35 -136.51 -132.44 -130.72 -129.34 -127.55 1302 1
Thanks for all your help.
Raghu