Hi all,
Imagine you had data like this, where Y is a set of rates calculated previously, and X is a covariate that you suspect drives the rate. You could regress the natural log of the pre-calculated rates (no uncertainty specified) against the covariate to see if there is a meaningful relationship. The regression would give you a line like the red line and a credible interval like the red ribbon. In contrast, if you did the same regression but also specified the uncertainty in Y, say using JAGS (code below), the regression would give you a line like the red line and a credible interval like the gray ribbon.

I am wondering if there is a way to explicitly incorporate the uncertainty in each Y observation when doing regression using INLA. Maybe not? Maybe the way to do that with INLA would be using a Monte Carlo approach, where you draw samples from the 20 Y-distributions repeatedly, each time doing a regression in INLA, and accumulate posteriors for the covariate effect estimate? Will you get the same answer, more or less?
Thanks,
Tim
----------------------------------
# libraries
library(ggplot2)
library(rjags)
library(jagsUI)
library(MCMCvis)
library(dplyr)
# make data
dat1 <- data.frame(x = 1:20) %>%
mutate(y = exp(1 + (0.08 * x))) %>%
mutate(y = rpois(20, y)) %>%
mutate(lcl=y * 0.85, ucl=y * 1.3) %>%
mutate(log_y=log(y),
log_prec_y=
1/(((log(ucl)-log(lcl))/(1.96*2))^2))
n1 <- length(unique(dat1$x))
# plot data
plot_grid(
ggplot(dat1, aes(x=x, y=y, ymin=lcl, ymax=ucl)) +
geom_linerange() + geom_point(),
ggplot(dat1, aes(x=x, y=log_y,
ymin=log_y - sqrt(1/log_prec_y),
ymax=log_y + sqrt(1/log_prec_y))) +
geom_linerange() + geom_point()
)
# bundle data for jags
jags_data <- list(log_y = dat1$log_y,
log_prec_y = dat1$log_prec_y,
x1 = dat1$x,
n1 = n1)
jags_data2 <- list(log_y = dat1$log_y,
x1 = dat1$x,
n1 = n1)
# model with uncertainty in Y
sink("linreg.txt")
cat("
model {
# priors
alpha ~ dnorm(0, 0.001)
beta ~ dnorm(0, 0.001)
# likelihood
for(i in 1:n1) {
log_y[i] ~ dnorm(mu[i], log_prec_y[i])
mu[i] <- alpha + beta * x1[i]
}
}
", fill=TRUE)
sink()
# model with no uncertainty in Y
sink("linreg2.txt")
cat("
model {
# priors
alpha ~ dnorm(0,0.001)
beta ~ dnorm(0,0.001)
sigma ~ dunif(0, 100)
# Likelihood
for (i in 1:n1) {
log_y[i] ~ dnorm(mu[i], tau)
mu[i] <- alpha + beta * x1[i]
}
# derived
tau <- 1 / (sigma * sigma)
}
", fill=TRUE)
sink()
# inits function
inits <- function(){ list(alpha=rnorm(1), beta=rnorm(1))}
inits2 <- function(){ list(alpha=rnorm(1), beta=rnorm(1),
sigma=runif(1, 0, 100))}
# set up and run first model
out1 <- jags(data=jags_data,
inits=inits,
parameters.to.save=c("alpha","beta", "mu"),
model.file="linreg.txt",
n.chains = 3, n.adapt = 1000, n.iter = 200000,
n.burnin = 100000, n.thin = 10)
# first model summary
MCMCsummary(out1, round=3)
quants <- out1$summary
quants <- quants[grep("mu", row.names(quants)), c("2.5%", "50%", "97.5%")]
dat1 <- dat1 %>% mutate(mu=exp(quants[,2]),
mu_lcl=exp(quants[,1]),
mu_ucl=exp(quants[,3]))
# set up and run second model
out2 <- jags(data=jags_data2,
inits=inits2,
parameters.to.save=c("alpha", "beta", "mu"),
model.file="linreg2.txt",
n.chains = 3, n.adapt = 1000, n.iter = 200000,
n.burnin = 100000, n.thin = 10)
# second model summary
MCMCsummary(out2, round=3)
quants2 <- out2$summary
quants2 <- quants2[grep("mu", row.names(quants2)), c("2.5%", "50%", "97.5%")]
dat1 <- dat1 %>% mutate(mu_2=exp(quants2[,2]),
mu_lcl_2=exp(quants2[,1]),
mu_ucl_2=exp(quants2[,3]))
# plot stuff
ggplot(data = dat1,
aes(x = x, y = mu)) +
geom_line(col="darkred") +
geom_line(aes(y=mu_2), col="gray30", lty=2) +
geom_ribbon(aes(ymin = mu_lcl, ymax = mu_ucl),
alpha = 0.2, fill="darkred") +
geom_ribbon(aes(ymin = mu_lcl_2, ymax = mu_ucl_2),
alpha = 0.4, fill="gray50") +
geom_point(aes(y=y)) +
geom_linerange(aes(ymin = lcl, ymax = ucl)) +
labs(x = "Covariate",y = "Rate") +
theme_minimal()