I have been working on specifying a SPDE model, to analyse factors explaining per-pixel rates of a degradation index, in a pre-defined study region (approx. 350km x 170km). Based on yearly degradation maps, I calculated the overall pixel rate, and built a model with Gaussian family, specifying priors with 'inla.spde2.pcmatern' (see code below).
Looking at plots of the random field compared to the model predictions, the two are essentially indistinguishable, suggesting the random effect is taking over (I think).
I did some posterior predictive model checking, and the model seemed to be overfitting quite drastically:
Interestingly, when I checked the model using cross-validation based on CPO and PIT, the histogram appears to show a rather uniform distribution:
I tried setting the alpha parameter to 2/3, as suggested in the same threat, but to no avail.
The code (my apologies, not sure how to format this as code):
-----------------------------------------------------------------------
deg500 <- a raster stack, containing 20 rasters with 500m pixel resolution
load("BG_raw_spdf.RData")) a SpatialPointsDataFrame, containing the covariates
annRain <- yearly rainfall for the time period
studyArea <- shapefile for the study area
### ----------------------------------------------------------------------------
time <- 1:nlayers(deg500)
# Compute the overall slope of degradation, accounting for rainfall in a given year
slowfun <- function(y) {
NA
} else {
m = lm(y ~ time + annRain$mean); summary(m)$coefficients[2] # get time
}
}
deg500_rate <- calc(deg500, slowfun)
### Basic INLA model predicting rate from various covariates:
### ----------------------------------------------------------------------------
## Add the rate to the raw_spdf:
deg500_rate_spdf <- as(deg500_rate, "SpatialPointsDataFrame")
deg500_rate_spdf <- deg500_rate_spdf[!
is.na(deg500_rate_spdf@data$layer), ]
xy_raw <- apply(round(coordinates(raw_spdf), 6), 1, paste, collapse = "_")
xy_rate <- apply(round(coordinates(deg500_rate_spdf), 6), 1, paste, collapse = "_")
raw_spdf <- raw_spdf[order(xy_raw), ]
deg500_rate_spdf <- deg500_rate_spdf[xy_rate %in% xy_raw, ]
deg500_rate_spdf <- deg500_rate_spdf[order(xy_rate[xy_rate %in% xy_raw]), ]
raw_spdf$deg_rate <- deg500_rate_spdf$layer * 10 # Small decimal numbers will use more memory to get to integer. Also easier to read, adn index is arbitrary anyways
## Transform and scale covariates:
raw_spdf$lpop <- log(raw_spdf$pop + 1) # log(x+1) for right skewed data containing zeros
raw_spdf$ltlu <- log(raw_spdf$tlu + 1)
raw_spdf$pop_s <- c(scale(raw_spdf$lpop))
raw_spdf$rain_s <- c(scale(raw_spdf$rain))
raw_spdf$tlu_s <- c(scale(raw_spdf$ltlu))
## Make a mesh:
max.edge = diff(range(raw_spdf@coords[, 2]))/15
bound.outer = diff(range(raw_spdf@coords[, 2]))/3
mesh <- inla.mesh.2d(
boundary = studyArea,
loc = coordinates(raw_spdf),
max.edge = c(1,5)*max.edge, # For filling in area
cutoff = max.edge/5,
offset = c(max.edge, bound.outer), # Expanding outside actual locations. Determine inner and outer offset
)
# and the spde:
A = inla.spde.make.A(mesh=mesh, loc=data.matrix(coordinates(raw_spdf))) # Combine actual points with mesh
# covariates:
Xcov = data.frame(intercept=1,
pop_s = raw_spdf$pop_s,
rain_s = raw_spdf$rain_s,
tlu_s = raw_spdf$tlu_s,
CA1 = (raw_spdf$CA == 1) * 1, # CCRO
CA2 = (raw_spdf$CA == 2) * 1, # NP
CA3 = (raw_spdf$CA == 3) * 1) # WMA
Xcov = as.matrix(Xcov)
colnames(Xcov)
stck <- inla.stack(tag='est', # stack is INLA data structure
data=list(deg_rate=raw_spdf$deg_rate),
effects=list(
s=1:mesh$n, # spatial random effect
iidx=1:nrow(raw_spdf), # iid random effect
Xcov=Xcov),
A = list(A, 1, 1)
## define the model:
# - sd(raw_spdf@coords[, 2])/10
# - 0.5*diff(range(raw_spdf@coords[, 2]))
spde = inla.spde2.pcmatern(mesh,
alpha = 3/2,
prior.range = c(1.6, 0.5),
prior.sigma = c(0.08, 0.1))
hyper.iid = list(prec = list(prior = 'pc.prec', param = c(0.08, 0.1)))
formula = deg_rate ~ -1 + Xcov +
f(s, model=spde) + # spatial random effect
f(iidx, model="iid", hyper=hyper.iid) # iid random effect
## make some linear combinations, for effect plots:
pop_lc <- inla.make.lincombs(Xcov1 = rep(1, 100),
Xcov2 = seq(min(raw_spdf$pop_s), max(raw_spdf$pop_s), len = 100))
names(pop_lc) <- paste0("pop", 1:100)
rain_lc <- inla.make.lincombs(Xcov1 = rep(1, 100),
Xcov3 = seq(min(raw_spdf$rain_s), max(raw_spdf$rain_s), len = 100))
names(rain_lc) <- paste0("rain", 1:100)
tlu_lc <- inla.make.lincombs(Xcov1 = rep(1, 100),
Xcov4 = seq(min(raw_spdf$tlu_s), max(raw_spdf$tlu_s), len = 100))
names(tlu_lc) <- paste0("tlu", 1:100)
CA_lc <- inla.make.lincombs(Xcov1 = rep(1, 4),
Xcov5 = c(0,1,0,0),
Xcov6 = c(0,0,1,0),
Xcov7 = c(0,0,0,1))
names(CA_lc) <- paste0("CA", 1:4)
all_lc <- c(pop_lc, rain_lc, tlu_lc, CA_lc)
res <- inla(formula, data=inla.stack.data(stck),
control.predictor=list(A = inla.stack.A(stck), compute=TRUE),
family = family,
lincomb = all_lc,
control.compute = list(cpo = TRUE),
control.inla = list(int.strategy='eb'), # empirical bayes (cheap computation for testing)
verbose = TRUE)
summary(res)
# ### ----------------------------------------------------------------------------
# ### MODEL DIAGNOSTICS
# ### ----------------------------------------------------------------------------
# # Make plot of predictions ---------------------------------------------
index.val <- inla.stack.index(stck,"est")$data
post.mean.val <- res$summary.linear.predictor[index.val,"mean"]
post.sd.val <- res$summary.linear.predictor[index.val,"sd"]
raw_spdf$predicted <- res$summary.linear.predictor[index.val,"mean"]
raw_spdf$pred_sd <- res$summary.linear.predictor[index.val,"sd"]
mapdata <-data.frame(raw_spdf)
pred_mean <- ggplot() +
geom_point(data = mapdata, aes(x=x, y=y, col = predicted)) +
coord_equal() +
theme_minimal()
pred_sd <- ggplot() +
geom_point(data = mapdata, aes(x=x, y=y, col = pred_sd), cex = 0.5) +
coord_equal() +
theme_minimal()
pred_comb <- gridExtra::grid.arrange(pred_mean, pred_sd, nrow = 1)
# # Visualize the random field, the unexplained spatial error -----------------------------
projector <- inla.mesh.projector(mesh)
projection <- inla.mesh.project(projector,
res[["summary.random"]]$s$mean)
INLAutils::ggplot_projection_shapefile(projection, projector) +
geom_polygon(data = studyArea, aes(long, lat, group = group), fill = NA, col = 'black') +
coord_equal()
# Posterior predictive check ----------------------------------------------------------
INLAutils::ggplot_inla_residuals(res, raw_spdf$deg_rate, binwidth = 0.1)
# Cross-validation model checking ----------------------------------------------------------
sum(res$cpo$failure) # Check if numerical problems occurred when calculating CPO
hist(res$cpo$pit, main="", breaks = 10, xlab = "PIT")
qqplot(qunif(ppoints(length(res$cpo$pit))), res$cpo$pit, main = "Q-Q plot for Unif(0,1)", xlab = "Theoretical
Quantiles", ylab = "Sample Quantiles")
qqline(res$cpo$pit, distribution = function(p) qunif(p), prob = c(0.1, 0.9), col = "red")