I am fitting a model to observe & predict the spatial effect on a variable of interest computed as longitude & latitude point data. Each point has a varied sample size ranging from 100s to 1000s.
I initially thought I could incorporate sample size as a covariate in the model,
[formula = y ~ -1 +Intercept + samplesize + f(spatial.field, model = SPDE)]
but I soon realised that this is wrong as any covariates incorporated in the model must have values at the unsampled coordinates to enable predictions to be made.
I have also considered duplicating every data entry by its respective sample size and then jittering the overlapped points, but given the sample size numbers this would be too computationally intensive.
Is there an alternative way to incorporate sample size into my analysis so that it controls the weight/intensity of each observation point?
Any help is highly appreciated.
Reproducible sample code of model without sample size:
# Sample data
library(data.table)
dat <- data.table(ADMIN = c("China", "China", "China", "China", "China"),
entry_id = c("1", "2", "3", "4", "5"),
y = c("1", "10", "20", "70", "90"),
sample_size = c("20", "100", "1000", "500", "250"),
lon = c("118.1568", "118.066", "118.0356", "113.3613", "112.9701"),
lat = c("26.53895", "22.89347", "23.13714", "23.60236", "23.62635"))
dat$y <- as.numeric(dat$y)
# Obtain country polygon
library(rnaturalearth)
library(rnaturalearthdata)
library(rnaturalearthhires)
library(sf)
countries <- countries10
countries <- st_as_sf(countries)
countries <- countries[c("ADMIN", "geometry")]
dat <- merge(dat, countries, by = c("ADMIN"))
# Extract coordinates of sampled locations
Sampled <- data.frame(lon = dat$lon, lat = dat$lat)
# Create Raster of all polygon locations
library(raster)
library(stars)
Raster <-st_rasterize(st_as_sf(dat$geometry))
write_stars(Raster, "Raster.tif")
Raster <- raster('Raster.tif')
# Extract coordinates of unsampled locations
Unsampled <- rasterToPoints(Raster, spatial = TRUE)
Unsampled <- data.frame(lon = Unsampled$x, lat = Unsampled$y)
# Extract coordinates of country boundary
library(INLA)
Boundary <- as(dat$geometry[[1]], "Spatial") %>% inla.sp2segment()
# Mesh construction
max.edge <- diff(range(as.numeric(Sampled[,1])))/(3*5)
bound.outer <- diff(range(as.numeric(Sampled[,1])))/3
Mesh <- inla.mesh.2d(boundary = Boundary,
loc = Sampled,
max.edge = c(1,3)*max.edge,
offset = c(max.edge, bound.outer),
cutoff = 0.65)
# Plot mesh
plot(Mesh,asp = 1, main = "")
# Define Projector Matrices
Aest <- inla.spde.make.A(mesh = Mesh, loc = cbind(Sampled$lon, Sampled$lat))
Apre <- inla.spde.make.A(mesh = Mesh, loc = cbind(Unsampled$lon, Unsampled$lat))
# Define SPDE
SPDE <- inla.spde2.matern(mesh = Mesh, alpha = 2)
# Define Spatial Index
SIndex <- inla.spde.make.index(name = "spatial.field",
n.spde = SPDE$n.spde,
n.group = 1,
n.repl = 1)
# Define stack
Stackest1 <- inla.stack(data = list(y = dat$y),
A = list(Aest, 1),
effects = list(spatial.field = SIndex, Intercept = rep(1, nrow(Sampled))),
tag = "est")
Stackpre1 <- inla.stack(data = list(y = NA),
A = list(Apre, 1),
effects = list(spatial.field = SIndex, Intercept = rep(1, nrow(Unsampled))),
tag = "pre")
Stack1 <- inla.stack(Stackest1, Stackpre1)
# Formula
Formula1 <- (y ~ -1 + Intercept + f(spatial.field, model = SPDE))
# Fit model
Model1 <- inla(Formula1,
family = "gaussian",
data = inla.stack.data(Stack1),
control.predictor = list(A = inla.stack.A(Stack1), compute = TRUE),
control.compute = list(cpo = TRUE, dic = TRUE, waic = TRUE,
return.marginals.predictor = TRUE, mlik = TRUE, config = TRUE))
# Define index to easily extract predictions
Index <- inla.stack.index(stack = Stack1, tag = "pre")$data
Mean_Frequency <- Model1$summary.fitted.values[Index, "mean"]
# Raster predictions
MeanRaster1 <- rasterize(x = Unsampled,
y = Raster,
field = Mean_Frequency,
fun = mean,
crs = CRS("+init=epsg:4326 +proj=longlat +ellps=WGS84
+datum=WGS84 +no_defs +towgs84=0,0,0"))
# Create color palette
library(viridis)
library(leaflet)
Pal <- colorNumeric("viridis", domain = c(0, 100),
na.color = "transparent", reverse = TRUE)
# Plot predicted mean
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addRasterImage(MeanRaster1, colors = Pal, opacity = 0.5) %>%
addLegend("bottomright", pal = Pal, values = values(MeanRaster1), title = "Mean") %>%
addScaleBar(position = c("bottomleft"))