Hello, folks.
I have been trying to replicate the gmultmix N-mixture models in JAGS, for a removal design with both primary and secondary periods. I may have succeeded in the optimal case where there are no missing data (thanks to Richard Chandler for the idea of the sequential binomial trials for the removal part), but things rapidly fall apart once I introduce a missing data point or two (either for the catch data or covariates). Does anyone have any idea of how to get the code fully generalizable to allow missing data? And any suggestions to optimize the code for speed? It is darn slow... which is not surprising, I'm fairly new to WinBUGS and brand-new with JAGS.
Thanks in advance for any advice or pointers.
cheers, Jessi Brown
Code appended below:
# my attempt at making JAGS code for
# the Chandler et al. 2011 temporary
# emigration N-mixture models
# removal design with secondary samples
library(unmarked)
library(rjags)
load.module("glm")
# generating some data
# Data dimensions
nSites <- 200
nSecondary <- 3
nPrimary <- 6
# Parameters
expit <- function(x) {
1/(1+exp(-x))
}
# dummy variable to assign sites to one of two areas
area <- round(runif(nSites, 0, 1))
# super-population calculation
lambda0 <- log(200) #5.298317
lambda1 <- log(100)-log(200) # area1 sites are less well-populated, -0.6931472
lambda <- exp(lambda0 + lambda1*area) # mean abundance in primary period 1
# linear availability for capture; max at -2 temp
temp <- array(NA, c(nSites, nPrimary))
for(t in 1:nPrimary){
temp[,t] <- runif(nSites, -2, 2)
}
phi0 <- -1.5459
phi1 <- -1.1422
phi <- expit(phi0 + phi1*temp) # availability for capture
# unimodal detection probability; max at 0 time, each secondary sample
# follows in sequence
time <- array(NA, c(nSites, nSecondary, nPrimary))
for(t in 1:nPrimary){
time[,1,t] <- runif(nSites, -2, 2)
time[,2,t] <- time[,1,t] + abs(rnorm(nSites, 0.1, 0.05))
time[,3,t] <- time[,2,t] + abs(rnorm(nSites, 0.1, 0.05))
}
time2 <- time*time
p0 <- -2.1969
p1 <- 0
p2 <- -0.95069
p <- expit(p0 + p1*time + p2*time*time) # detection probability
# Super-population size
set.seed(458)
M <- rpois(nSites, lambda)
# Population size during each primary sampling period
N <- matrix(NA, nSites, nPrimary)
# Observed counts
y <- array(NA, c(nSites, nSecondary, nPrimary))
for(i in 1:nSites) {
for(j in 1:nPrimary) {
N[i,] <- rbinom(nPrimary, M[i], phi[i,j])
for(k in 1:nSecondary) {
y[i,1,] <- rbinom(nPrimary, N[i,j], p[i,k,j])
Nleft1 <- N[i,j] - y[i,1,j]
y[i,2,] <- rbinom(nPrimary, Nleft1, p[i,k,j])
Nleft2 <- Nleft1 - y[i,2,j]
y[i,3,] <- rbinom(nPrimary, Nleft2, p[i,k,j])
}
}
}
plot(1:nPrimary, colSums(N), xlab="Primary period", ylab="Population available", type="o")
#############################################################
#
# on to JAGS model
# attempted removal model
#
#############################################################
sink(file="temp_em_covs.txt")
cat("
model {
lambda.int ~ dnorm(0, 0.001)
lambda.beta1 ~ dnorm(0, 0.001)
phi.int ~ dnorm(0, 0.001)
phi.beta1 ~ dnorm(0, 0.001)
p.int ~ dnorm(0, 0.001)
p.beta1 ~ dnorm(0, 0.001)
p.beta2 ~ dnorm(0, 0.001)
for(i in 1:nSites) {
M[i] ~ dpois(lambda[i])
log(lambda[i]) <-
lambda.int + lambda.beta1*area[i]
for(j in 1:nPrimary) {
N[i,j] ~ dbin(phi[i,j], M[i])
logit(phi[i,j]) <-
phi.int + phi.beta1*temp[i,j]
for(k in 1:nSecondary) {
logit(p[i,k,j]) <-
p.int + p.beta1*time[i,k,j] + p.beta2*time2[i,k,j]
}
y[i,1,j] ~ dbin(p[i,1,j], N[i,j])
y[i,2,j] ~ dbin(p[i,2,j], N[i,j]-y[i,1,j])
y[i,3,j] ~ dbin(p[i,3,j], N[i,j]-y[i,1,j]-y[i,2,j])
}
}
}
", fill=TRUE)
sink()
# Format data and create function to initiate parameters
Mst <- apply(y, 1, function(x) {
if(all(
is.na(x)))
return(2)
else
return(sum(x, na.rm = TRUE) + 30)
})
dat.cov <- list(nSites=nSites, nPrimary=nPrimary, nSecondary=nSecondary, y=y, area=area, temp=temp, time=time, time2=time2)
init.cov <- function() list(
lambda.int=rnorm(1), lambda.beta1=rnorm(1),
phi.int=rnorm(1), phi.beta1=rnorm(1),
p.int=rnorm(1), p.beta1=rnorm(1), p.beta2=rnorm(1), M=Mst)
pars.cov <- c("
lambda.int", "lambda.beta1", "
phi.int", "phi.beta1", "
p.int", "p.beta1", "p.beta2")
# Compile the model.
jm.cov <- jags.model("temp_em_covs.txt", dat.cov, init.cov, n.chains=2, n.adapt=5000)
# seems very sensitive to the initial values for Mst
# Draw samples from the posterior
jc.cov <- coda.samples(jm.cov, pars.cov, n.iter=10000)
# View the Markov chains
plot(jc.cov, ask=T)
summary(jc.cov) # coefficients generally in ballpark especially when run longer
# MCMC diagnostics
gelman.diag(jc.cov) # Should be ~ <1.1
autocorr.plot(jc.cov) # If samples are autocorrelated, you need more iters
crosscorr(jc.cov) # Correlation among parameters
# try putting an NA in or two; uncomment to choose which data gets NA
# "fix" by just putting a zero back in
# time[1,1,5] <- NA
y[1,1:3,5] <- NA
# temp[1,5] <- 0
Mst <- apply(y, 1, function(x) {
if(all(
is.na(x)))
return(2)
else
return(sum(x, na.rm = TRUE) + 30)
})
dat.cov <- list(nSites=nSites, nPrimary=nPrimary, nSecondary=nSecondary, y=y, area=area, temp=temp, time=time, time2=time2)
init.cov <- function() list(
lambda.int=rnorm(1), lambda.beta1=rnorm(1),
phi.int=rnorm(1), phi.beta1=rnorm(1),
p.int=rnorm(1), p.beta1=rnorm(1), p.beta2=rnorm(1), M=Mst)
pars.cov <- c("
lambda.int", "lambda.beta1", "
phi.int", "phi.beta1", "
p.int", "p.beta1", "p.beta2")
# Compile the model.
jm.cov <- jags.model("temp_em_covs.txt", dat.cov, init.cov, n.chains=2, n.adapt=5000)
# apparently can't have NAs in the time or temp covariate
# sometimes will run if NAs are in the response array y