library(tidyverse)
library(INLA)
library(rnaturalearth)
library(inlabru)
library(gridExtra)
library(sf)
#Read in data
select(-X1)%>%mutate(x15=round(x15))
#You should be able to ignore the warnings safely
#Get wisconsin sf object, extract coordinates, and project them directly on the globe
wisconsin_sf<-st_as_sf(ne_download(scale = 50, type = "states"))%>%
filter(code_hasc=="US.WI")
coords<-st_coordinates(wisconsin_sf)
coords_cartesion = inla.mesh.map(coords, projection = "longlat")
# Construct mesh for the state
boundary.loc <- SpatialPoints(as.matrix(coords_cartesion))
boundary <- list(
inla.nonconvex.hull(coordinates(boundary.loc), 0.012),
inla.nonconvex.hull(coordinates(boundary.loc), 0.0265))
mesh <- inla.mesh.2d(boundary=boundary,
max.edge=c(0.0044, 0.0266),
min.angle=c(20, 18),
max.n=c(48000, 16000), ## Safeguard against large meshes.
max.n.strict=c(128000, 128000), ## Don't build a huge mesh!
cutoff=0.001, ## Filter away adjacent points.
offset=c(0.012, 0.0265)) ## Offset for extra boundaries, if needed.
#Review the plot. It's larger than it needs to be--I'll fix that later.
ggplot()+
gg(mesh)+#This uses inlabru package
geom_point(aes(x=coords_cartesion[,1],y=coords_cartesion[,2]))+
labs(title="Constrained refined Delaunay triangulation",
x="3D Cartesian X",y="3D Cartesian Y")+
theme_bw()
#Let's make a mesh and an index
spde <- inla.spde2.matern(mesh=mesh, alpha=2)
s_index = inla.spde.make.index(name = "spatial.field",
spde$n.spde)
#Identify lat/lon in the training set, convert them to cartesian, make A Matrix, make train stack
coords_train <- as.matrix(INLA_train_masked%>%select(x16,x17))
coords_cartesian_train = inla.mesh.map(coords_train, projection = "longlat")
A_matrix_train = inla.spde.make.A(mesh=mesh, loc=coords_cartesian_train)
train_stack <- inla.stack(data = list(x15 = INLA_train_masked$x15),
A = list(A_matrix_train, 1),
effects = list(c(s_index, list(intercept = 1)),
list(cov = INLA_train_masked%>%select(-c(x15:x17)))), #Remove lat, lon, response
tag = "train")
#Same as above, but now for the test set
coords_test <- as.matrix(INLA_test_masked%>%select(x16,x17))
coords_cartesion_test = inla.mesh.map(coords_test, projection = "longlat")
A_matrix_test = inla.spde.make.A(mesh=mesh, loc=coords_cartesion_test)
test_stack <- inla.stack(data = list(x15 = NA),
A = list(A_matrix_test, 1),
effects = list(c(s_index, list(intercept = 1)),
list(cov = INLA_test_masked%>%select(-c(x15:x17)))), #Remove lat, lon, response
tag = "test")
#Combine train/test stacks
INLA_stacks <- inla.stack(train_stack, test_stack)
#Model Formula. Basically: x15 ~ x1+...+X14+spatial.field (x16 is lat, x17 is lon, so leave those off)
inla_formula<-str_c("-1","intercept",paste0(names(INLA_train_masked)[-c(15:17)],collapse = "+"),"f(spatial.field, model=spde)",sep = "+")%>%str_c("x15~",.)%>%as.formula()
#Model. I have four logical processors, and that usually takes around 45 seconds to run this block
INLA_model <- inla(inla_formula,
family = "zeroinflatedpoisson1",
data = inla.stack.data(INLA_stacks, spde=spde),
control.predictor=list(A=inla.stack.A(INLA_stacks),
compute=TRUE),
control.compute=list(config = TRUE)) #to try inlabru predict function
#link=c(rep(1,1000),rep(NA,1000)) does not seem to change the outcome
#Store theta
zero_prob<-summary(INLA_model)[["hyperpar"]][1,1]
one_m_zero_prob<-(1-zero_prob)
#Extract "naive" fitted values for training set, create various summaries
train_index = inla.stack.index(INLA_stacks, "train")$data
train_fit<-(INLA_model$summary.fitted.values$mean[train_index])*one_m_zero_prob
cor(INLA_train_masked$x15,
train_fit)
train_fit%>%summary
table(round(train_fit))
grid.arrange(ggplot()+geom_histogram(aes(x=train_fit),binwidth = 1),
ggplot()+geom_histogram(data=INLA_train_masked,aes(x=x15),binwidth = 1))
#Extract "naive" fitted values for testing set, create various summaries
test_index = inla.stack.index(INLA_stacks, "test")$data
test_fit<-(INLA_model$summary.fitted.values$mean[test_index])*one_m_zero_prob
cor(INLA_test_masked$x15,
test_fit)
test_fit%>%summary
table(round(test_fit))
grid.arrange(ggplot()+geom_histogram(aes(x=test_fit),binwidth = 1),
ggplot()+geom_histogram(data=INLA_test_masked,aes(x=x15),binwidth = 1))