Hi everyone!
As part of my PhD, I have been developing a point process model to relate the location of penguin nests to fine scale spatial covariates such as guano area and terrain (slope, aspect, etc.). Before including any covariates, I built an LGCP model that just accounted for spatial autocorrelation among points and predicted an abundance that was close to my input data (about 200,000 points) and the intensity plot looked good too.
However, when I try to include covariates (with a finer subdivided mesh), the predictions became very large and implausible (millions or infinite penguins!) and sometimes causes the model to crash. The intercept and fixed-effects slopes are also very strange. Each of my covariates are included as rasters at 2 x 2 m resolution. The guano area raster is the proportion of a pixel covered in guano, 1 being fully covered. The guano area is very spatially defined where most of the points (but not all) are contained within it. I have also standardised each covariate to have a mean of 0 and a standard deviation of 1 and ensured they have the same crs (EPSG:3031). I have been using an HPC server to run my r script which has restricted me to R 4.3.2-foss-2023a, with INLA 23.04.24 and inlabru 2.13.0.9008. I have included my code below, including mesh construction, SPDE priors, and fitting LGCP models. Happy to provide more information, my email is: alexandr...@pg.canterbury.ac.nz
Very new to this method and any advice would be greatly appreciated!
Alexandra
# load packages
library(sf)
library(fmesher)
library(ggplot2)
library(INLA)
library(ggpubr)
library(inlabru) # for lgcp()
library(dplyr)
library(purrr)
library(tidyr)
library(terra) # for rasters
# read in points from xy csv
Crozier_xy <- read.csv("Crozier_Points_2020_3031.csv")
# convert to sf object and check CRS
sf_Crozier <- st_as_sf(Crozier_xy, coords = c("x", "y"), crs = 3031)
st_crs(sf_Crozier)
# import Crozier coastline boundary
sf_Crozier_boundary <- st_read("Crozier_boundary.shp")
# transform shapefile so it has the right crs code
sf_Crozier_boundary <- st_transform(sf_Crozier_boundary, crs = st_crs(sf_Crozier))
st_crs(sf_Crozier_boundary)
# mesh parameters
# this is 1/15 study size using x range
# x range is longer than y here
Crozier_max.edge <- diff(range(st_coordinates(sf_Crozier)[,1]))/(3*5)
print(Crozier_max.edge)
# ~150 metres
# expand outer layer
Crozier_bound.outer = diff(range(st_coordinates(sf_Crozier)[,1]))/5
print(Crozier_bound.outer)
# ~450 metres
# create Crozier mesh
Crozier_mesh <- fm_mesh_2d(boundary = sf_Crozier_boundary,
max.edge = c(1,5)*Crozier_max.edge,
offset = c(Crozier_max.edge, Crozier_bound.outer),
cutoff = Crozier_max.edge/10,
crs = st_crs(sf_Crozier))
print(Crozier_mesh$n)
# null LGCP
# define the SPDE priors (matern)
matern <- inla.spde2.pcmatern(mesh = Crozier_mesh,
prior.range = c(250, 0.5), # distance decay in metres
prior.sigma = c(1, 0.5)) # amount of spatial variation
# spatial SPDE effect and Intercept
null_cmp <- geometry ~
Intercept(1) +
mySmooth(geometry, model = matern) # random effect
# import guano area shapefile for samplers area
sf_Crozier_guano <- st_read("Crozier_2020_1_3031_guano.shp")
# ensure shapefile has right crs code
sf_Crozier_guano <- st_transform(sf_Crozier_guano, crs = st_crs(sf_Crozier))
# add 100 metre buffer
sf_Crozier_guano_buffered <- st_buffer(sf_Crozier_guano, dist = 100)
sf_Crozier_guano_buffered <- st_sf(geometry = st_union(sf_Crozier_guano_buffered))
crs(sf_Crozier_guano_buffered)
# fit null lgcp model
null_model <- lgcp(null_cmp, # formula
data = sf_Crozier, # locations
samplers = sf_Crozier_guano_buffered,
domain = list(geometry = Crozier_mesh), # mesh
options = list(
control.inla = list(verbose = TRUE),
control.compute = list(dic = TRUE, waic = TRUE, cpo = TRUE)
)
)
# takes less than ~2 mins to run
summary(null_model)
# predicting intensity
# predict the spatial intensity surface
# need to sum over prediction grid
# make prediction grid as sf points
grid_pts <- fm_pixels(
Crozier_mesh,
format = "sf",
mask = sf_Crozier_guano_buffered
)
# predict intensity per m2
lambda <- predict(
null_model,
grid_pts, # use this instead of fm_pixels
~ exp(mySmooth + Intercept)
)
# cell spacing from the point coordinates
coords <- st_coordinates(grid_pts)
dx <- median(diff(sort(unique(coords[,1]))))
dy <- median(diff(sort(unique(coords[,2]))))
cell_area <- dx * dy # m2 per pixel
# multiply and sum
total_pred <- sum(lambda$mean * cell_area)
print(total_pred)
# Build LGCP model with covarites
# use continuous guano raster
percent_guano_raster <- rast("CrozierGuano_2m.tif")
# change guano crs to match
crs(percent_guano_raster) <- "EPSG:3031"
crs(percent_guano_raster)
res(percent_guano_raster) # 2m x 2m
# 2m terrain rasters clipped by mesh
slope_raster <- rast("Crozier_terrain_mesh/Cape_Crozier_slope.tif")
aspect_raster <- rast("Crozier_terrain_mesh/Cape_Crozier_aspect_corrected.tif")
roughness_raster <- rast("Crozier_terrain_mesh/Cape_Crozier_Roughness.tif")
TRI_raster <- rast("Crozier_terrain_mesh/Cape_Crozier_TRI.tif")
# change corrected aspect crs to match
crs(aspect_raster) <- "EPSG:3031"
crs(aspect_raster)
# match guano area extent to terrain rasters
percent_guano_raster <- extend(percent_guano_raster, slope_raster)
percent_guano_raster[values(percent_guano_raster) > 1] <- 0
percent_guano_raster[is.na(percent_guano_raster)] <- 0
# scale (mean of 0 sd of 1)
standardize <- function(r) {
m <- global(r, fun = "mean", na.rm = TRUE)[1, 1]
s <- global(r, fun = "sd", na.rm = TRUE)[1, 1]
(r - m) / s
}
# apply to continuous variables
percent_guano_raster <- standardize(percent_guano_raster)
slope_raster <- standardize(slope_raster)
aspect_raster <- standardize(aspect_raster)
roughness_raster <- standardize(roughness_raster)
TRI_raster <- standardize(TRI_raster)
# check for misalignment
covariate_plot <- c(percent_guano_raster, slope_raster, aspect_raster, roughness_raster, TRI_raster)
plot(covariate_plot)
# check for collinearity between covariates
cov_stack <- c(percent_guano_raster, slope_raster, aspect_raster, roughness_raster, TRI_raster)
cov_values <- as.data.frame(cov_stack, na.rm = TRUE)
cor(cov_values)
# subdivide mesh
# splits triangles into subtriangles
mesh_sub <- fm_subdivide(Crozier_mesh,3) # try 1/9 of max.edge
print(mesh_sub$n)
# update model formula to include covariates
matern <- inla.spde2.pcmatern(mesh = mesh_sub,
prior.range = c(250, 0.5), # distance decay in metres
prior.sigma = c(1, 0.5)) # amount of spatial variation
# trial
Full_cmp <- geometry ~
Intercept(1) +
percentguano(percent_guano_raster, model = "linear") +
slope(slope_raster, model = "linear") +
mySmooth(geometry, model = matern) # random effect
Full_model <- lgcp(Full_cmp, # formula
data = sf_Crozier, # locations
samplers = sf_Crozier_guano_buffered, # sample area
domain = list(geometry = mesh_sub), # mesh
options = list(
control.inla = list(verbose = TRUE),
control.compute = list(dic = TRUE, waic = TRUE, cpo = TRUE))
)
summary(Full_model)
# need to sum over prediction grid
# make prediction grid as sf points
grid_pts <- fm_pixels(
mesh_sub,
format = "sf",
mask = sf_Crozier_guano_buffered
)
# predict intensity per m2
lambda <- predict(
Full_model,
grid_pts, # use this instead of fm_pixels
~ exp(mySmooth + Intercept + percentguano + slope)
)
# cell spacing from the point coordinates
coords <- st_coordinates(grid_pts)
dx <- median(diff(sort(unique(coords[,1]))))
dy <- median(diff(sort(unique(coords[,2]))))
print(dx)
print(dy)
cell_area <- dx * dy # m2 per pixel
print(cell_area)
# multiply and sum
total_pred <- sum(lambda$mean * cell_area)
print(total_pred)
# terrain only
Terrain_cmp <- geometry ~
Intercept(1) +
slope(slope_raster, model = "linear") +
aspect(aspect_raster, model = "linear") +
mySmooth(geometry, model = matern) # random effect
Terrain_model <- lgcp(Full_cmp, # formula
data = sf_Crozier, # locations
samplers = sf_Crozier_guano_buffered, # sample area
domain = list(geometry = mesh_sub), # mesh
options = list(
control.inla = list(verbose = TRUE),
control.compute = list(dic = TRUE, waic = TRUE, cpo = TRUE))
)
summary(Terrain_model)
# need to sum over prediction grid
# make prediction grid as sf points
grid_pts <- fm_pixels(
mesh_sub,
format = "sf",
mask = sf_Crozier_guano_buffered
)
# predict intensity per m2
lambda <- predict(
Terrain_model,
grid_pts, # use this instead of fm_pixels
~ exp(mySmooth + Intercept + slope + aspect)
)
# cell spacing from the point coordinates
coords <- st_coordinates(grid_pts)
dx <- median(diff(sort(unique(coords[,1]))))
dy <- median(diff(sort(unique(coords[,2]))))
print(dx)
print(dy)
cell_area <- dx * dy # m2 per pixel
print(cell_area)
# multiply and sum
total_pred <- sum(lambda$mean * cell_area)
print(total_pred)