# Author: Kyle Foreman # kforeman@post.harvard.edu # Date: 27 Nov 2013 # Purpose: demonstrate sparse MVN methods for STAN # Notes: see original Scotland Lip Cancer example at http://www.openbugs.info/Manuals/GeoBUGS/Examples/Scotland.html # libraries library(rstan) set_cppo('fast'); # load data source('scotland_lip_cancer.RData') # N = number of observations # O = observed lip cancer cases # E = expected lip cancer cases # x = covariate (percent pop involved in agriculture) # A = adjacency matrix # D = diagonal matrix D <- diag(rowSums(A)) # how many samples to draw? n_samples <- 1e3 # model CAR using multi_normal_prec() mvn_prec_code <- " data { int N; # number of regions int O[N]; # observed cases vector[N] E; # expected cases vector[N] x; # covariate matrix[N,N] A; # adjacency matrix matrix[N,N] D; # diagonal matrix (sum_cij) } transformed data { # zeros for MVN prior vector[N] zeros; zeros <- rep_vector(0.0, N); } parameters { real alpha0; # intercept real alpha1; # coefficient on covariate vector[N] beta1; # area-specific random effect (CAR) real tau; # precision of CAR real p;# strength of spatial correlation } model { # tmp variables vector[N] beta1_stz; # beta1 with sum-to-zero constraint vector[N] log_mu; # prediction of log(mu) matrix[N,N] Tau; # precision matrix for CAR # priors on model parameters # no explicit prior on alpha0 (improper flat prior) alpha1 ~ normal(0.0, sqrt(1e5)); tau ~ gamma(0.5, 0.0005); # precision matrix for CAR Tau <- tau * (D - p * A); # CAR model using precision version of MVN beta1 ~ multi_normal_prec(zeros, Tau); # impose sum-to-zero constraint beta1_stz <- beta1 - mean(beta1); # calculate prediction log_mu <- log(E) + alpha0 + (alpha1 * x) + beta1_stz; # likelihood O ~ poisson_log(log_mu); } " mvn_prec_data <- list(N=N, O=O, E=E, x=x, A=A, D=D) mvn_prec_init <- list( list( alpha0 = 0.0, alpha1 = 0.0, beta1 = rep(0, N), tau = 1.0, p = 0.99 ) ) mvn_prec_fit <- stan( model_code = mvn_prec_code, data = mvn_prec_data, init = mvn_prec_init, chains = 1, iter = n_samples ) # create sparse versions of A & D # D is just the diagonal itself D_sparse <- diag(D) # A is two vectors corresponding to the non-zero pairs A_sparse <- which(A == 1, arr.ind=TRUE) A_sparse <- A_sparse[A_sparse[,1] < A_sparse[,2],] # remove duplicates (because matrix is symmetric) A_N <- dim(A_sparse)[1] A1 <- A_sparse[,1] A2 <- A_sparse[,2] # so, about that determinant... # the multivariate normal likelihood requires the determinant of Tau (Tau = D - p * A) # it's quite slow to calculate # there are some sparse matrix algorithms that could work, but they're a bit cumbersome # since the determinant is a relatively small contributor to the likelihood we can live with an approximation # so we'll just sample it at some values and then interpolate the rest # determinant(Tau) is a function of just p since D & A are constants # and p is constrained to [0,1], so we can just sample at 1000 values of p p_samples <- 1e3 # code for the sparse version sparse_code <- " data { int N; # number of regions int O[N]; # observed cases vector[N] E; # expected cases vector[N] x; # covariate int A_N; # number of adjacent region pairs int A1[A_N]; # first half of adjacency pairs int A2[A_N]; # second half of adjacency pairs vector[N] D_sparse; # diagonal (sum_cij) int p_samples; # number of times to sample determinant } transformed data { # calculate log(determinant(D - p * A)) at sampled values vector[p_samples + 1] ldet_DpA; { matrix [N,N] DpA; # D - p * A (e.g. Tau / tau) DpA <- diag_matrix(D_sparse); # calculate the off diagonal of D - p * A for each value of p for (i in 1:(p_samples + 1)) { for (a in 1:A_N) DpA[A1[a], A2[a]] <- -1.0 * ((i - 1.0) / p_samples); # store the sampled log(determinant(D - p * A)) ldet_DpA[i] <- log_determinant(DpA); } } } parameters { real alpha0; # intercept real alpha1; # coefficient on covariate vector[N] beta1; # area-specific random effect (CAR) real tau; # precision of CAR real p;# strength of spatial correlation } model { # tmp variables for MVN likelihood row_vector[N] beta1t_D; # beta1' * D row_vector[N] beta1t_A; # beta1' * A real sigma_sq; # other tmp variables vector[N] beta1_stz; # beta1 with sum-to-zero constraint vector[N] log_mu; # prediction of log(mu) # priors on model parameters # no explicit prior on alpha0 (improper flat prior) alpha1 ~ normal(0.0, sqrt(1e5)); tau ~ gamma(0.5, 0.0005); # CAR model using sparse MVN # (beta1' * Tau * beta1) can be calculated as (beta1' * D * beta1) - p * (beta1' * A * beta1) # and each of those can benefit from a sparse representation # sigma_sq sigma_sq <- 1 / tau; # find beta1' * D beta1t_D <- (beta1 .* D_sparse)'; # find beta1' * A beta1t_A <- rep_vector(0.0, N)'; # initialize vector for (i in 1:A_N) { beta1t_A[A1[i]] <- beta1t_A[A1[i]] + beta1[A2[i]]; beta1t_A[A2[i]] <- beta1t_A[A2[i]] + beta1[A1[i]]; } # incorporate log probability of MVN(0.0, Tau) increment_log_prob(-0.5 / sigma_sq * (beta1t_D * beta1 - p * (beta1t_A * beta1))); increment_log_prob(-0.5 * N * log(sigma_sq)); # hacky interpolated log(determinant(D - p * A)) { int idx; real ldet_left; real ldet_right; idx <- 0; while (floor(p * p_samples) >= idx) { idx <- idx + 1; } ldet_left <- ldet_DpA[idx]; ldet_right <- ldet_DpA[idx + 1]; increment_log_prob(0.5 * (ldet_left + (ldet_right - ldet_left) * (p * p_samples - floor(p * p_samples)))); } # impose sum-to-zero constraint beta1_stz <- beta1 - mean(beta1); # calculate prediction log_mu <- log(E) + alpha0 + (alpha1 * x) + beta1_stz; # data likelihood O ~ poisson_log(log_mu); } " sparse_data <- list(N=N, O=O, E=E, x=x, A_N=A_N, A1=A1, A2=A2, D_sparse=D_sparse, p_samples=p_samples) sparse_init <- mvn_prec_init sparse_fit <- stan( model_code = sparse_code, data = sparse_data, init = sparse_init, chains = 1, iter = n_samples ) # compare outputs to ensure we're getting the same result print(sparse_fit) print(mvn_prec_fit)