Thanks all - particularly Haavard for making those changes to code, I really appreciate it. I've just tried it out with xbinomial and it works great :)
My use-case is that I have vaccination status (binary variable) for a spatially referenced population and am interested in identifying "cold spots" of vaccination. Some individuals don't have a vaccination card, but we do have a separate measure of "confidence" (i.e. probability) that they are vaccinated, hence the need for q-values. But a more common use-case I would have thought would be prevalence estimates where there is imperfect test sensitivity, meaning observed values are Binomial/Bernoulli with probability p*sensitivity, where p is the true prevalence. If different tests are used in different groups then q would have to be variable (although probably limited to a small number of levels).
For what it's worth, I think I also got the same approach working using inlabru following the guidance above. I've pasted my code below, cribbing off the examples in the inlabru tutorials. I'd be interested to know if this is/isn't correct for future reference.
Best wishes,
Bob
# setup
library(INLA)
library(inlabru)
library(tidyverse)
library(magrittr)
library(ggplot2)
# ------------------------------------------------------------------
# GENERATE TRUE PREVALENCE SURFACE
# define boundaries and create fine mesh
bnd = inla.mesh.segment(cbind(c(0, 10, 10, 0),
c(0, 0, 10, 10)))
mesh_fine <- inla.mesh.2d(boundary = bnd, max.edge = 0.2)
# specify true SPDE model
spde_fine <- inla.spde2.pcmatern(mesh_fine,
prior.sigma = c(1, 0.01),
prior.range = c(1, 0.01))
# get true precision matrix from model
true_range <- 4
true_sigma <- 1
true_Q <- inla.spde.precision(spde_fine,
theta = log(c(true_range, true_sigma)))
# convert to true SD
true_sd <- diag(inla.qinv(true_Q))^0.5
# sample the true field
true_field <- inla.qsample(1, true_Q)[,1]
# get true field into dataframe and apply logistic transform to get true
# prevalence
truth <- expand.grid(lon = seq(0, 10, length = 100),
lat = seq(0, 10, length = 100)) %>%
dplyr::mutate(true_field <- inla.mesh.project(mesh_fine,
loc = cbind(lon, lat),
field = true_field),
true_prev = plogis(true_field))
# change format
coordinates(truth) <- c("lon", "lat")
truth <- as(truth, "SpatialPixelsDataFrame")
# plot true prevalence surface
pl_truth <- ggplot() +
gg(truth, mapping = aes_string("lon", "lat", fill = "true_prev")) +
coord_equal() +
scale_fill_distiller(palette = "RdBu") +
ggtitle("True prevalence")
pl_truth
# ------------------------------------------------------------------
# DRAW OBSERVED DATA
# choose sampling locations uniformly within domain and get true prevalence at
# these locations
n <- 200
mydata <- data.frame(lon = runif(n, 0, 10),
lat = runif(n, 0, 10)) %>%
dplyr::mutate(true_field = inla.mesh.project(mesh_fine,
loc = cbind(lon, lat),
field = true_field),
true_prev = plogis(true_field))
# contrived example of test sensitivity increasing from 0 to 1 as we move from
# left to right of domain. Draw binary observations from this
# prevalence*sensitivity combo
mydata <- mydata %>%
dplyr::mutate(sens = lon / 10,
pos = rbinom(n, 1, prob = true_prev*sens))
coordinates(mydata) <- c("lon", "lat")
# plot observations against true prevalence surface
pl_truth +
geom_point(aes(x = lon, y = lat, col = as.factor(pos)),
data = as.data.frame(mydata))
# ------------------------------------------------------------------
# FIT MODEL AND VISUALISE RESULTS
# create course mesh
mesh <- inla.mesh.2d(boundary = bnd, max.edge = 0.5)
#plot(mesh)
# specify SPDE model on mesh
spde <- inla.spde2.pcmatern(mesh,
prior.sigma = c(10, 0.01),
prior.range = c(1, 0.01))
# specify model components
cmp <- ~ mySPDE(main = coordinates, model = spde) + Intercept(1)
# specify model formula
#form <- pos ~ mySPDE + Intercept
form <- pos ~ qlogis(sens * plogis(mySPDE + Intercept))
# fit model
t0 <- Sys.time()
fit <- bru(components = cmp,
data = mydata,
family = "binomial",
Ntrials = 1,
formula = form)
Sys.time() - t0
# predict on grid
t0 <- Sys.time()
pix <- pixels(mesh, nx = 200, ny = 200)
pred <- predict(fit, pix,
~ plogis(mySPDE + Intercept))
Sys.time() - t0
# plot fitted surface against data
pl_est <- ggplot() +
gg(pred, mapping = aes_string("x", "y", fill = "mean")) +
geom_point(aes(x = lon, y = lat, col = as.factor(pos)),
data = as.data.frame(mydata)) +
coord_equal() +
scale_fill_distiller(palette = "RdBu") +
ggtitle("Estimated prevalence")
pl_est
# plot SD of posterior. Should be a general trend of higher uncertainty towards
# left of domain
pred$sd <- sqrt(pred$var)
pl_sd <- ggplot() +
gg(pred, mapping = aes_string("x", "y", fill = "sd")) +
geom_point(aes(x = lon, y = lat, col = as.factor(pos)),
data = as.data.frame(mydata)) +
coord_equal() +
scale_fill_distiller(palette = "RdBu") +
ggtitle("SD of estimated prevalence")
pl_sd