Plotting Variance For All Species in A For Loop

Andrew Pettit

unread,

May 8, 2024, 10:02:43 PMMay 8

to distance-sampling

Hello everyone,

I created a density surface model for all 15 of my study species with the code below. I have been trying to plot the variance for each species in a similar fashion but have been running into a multitude of errors. Is it possible to plot variance using a for loop like I did here? If anyone has any insight or examples that could help, please let me know.

Code:

obsdata<-read.csv("distdata_final.csv")
segdata<-read.csv("segdata_final.csv")
preddata<-read.csv("prediction_data.csv")
obs<-read.csv("sightings.csv",sep = ";", header = TRUE) %>%
clean_names() %>%
na.omit()

management_area <- st_read("Study area.shp") #WGS84
study_area <- st_read("Study_area.shp") #UTM ZONE 36S
#transects_sf <- st_read("line_data.shp")

# Reproject the geodetic CRS to the projected CRS
management_area_reprojected <- st_transform(management_area, crs = st_crs(study_area))

#Remove points outside of management area
# Convert preddata to a spatial object
preddata_sf <- st_as_sf(preddata, coords=c("X", "Y"))
obsdata_sf <- st_as_sf(obsdata, coords = c("X","Y"))

# Check the CRS of both objects
st_crs(preddata_sf) <- st_crs(management_area_reprojected)

# Perform the intersection
preddata_sf <- st_intersection(preddata_sf, management_area_reprojected)

# Extracting coordinates
coords_obsdata <- data.frame(st_coordinates(obsdata_sf))
coords_preddata <- data.frame(st_coordinates(preddata_sf))

# Adding coordinates back to the data frames
obsdata_sf$X <- coords_obsdata$X
obsdata_sf$Y <- coords_obsdata$Y

preddata_sf$X <- coords_preddata$X

preddata_sf$Y <- coords_preddata$Y

# Initialize an empty data frame to store AICc values
aic_table <- data.frame(
Species = character(),
Model = character(),
AICc = numeric(),
stringsAsFactors = FALSE
)

# Initialize a named list to store ggplot objects
plot_list <- list()

species_list <- c("Aardvark", "Bushbuck", "Bushpig", "Dikdik",
"Eland", "Elephant", "Giraffe",
"Greater Kudu", "Hyena", "Impala", "Lesser Kudu",
"Mongoose", "Warthog", "Zebra")

# Loop over each species to fit DSMs and compare
for (species_name in species_list) {

# Clear predictions from preddata_sf for the next iteration
preddata_sf$Prediction <- NULL

# Clear the prediction grid to ensure it starts fresh for each species
if (exists("cropped_grid")) {
rm(cropped_grid)
}

# Filter observations for the current species
obs_species <- obsdata_sf %>% filter(species == species_name)

# Print the number of observations for the current species
print(paste("Number of observations for", species_name, ":", nrow(obs_species)))

# Calculate the detection function for the current species
detection_function <- ds(obs_species$distance, truncation = list(left = "0%", right = "10%"), key = "hn", adjustment = "cos")

# Fit DSM with HFI (Human Footprint Index)
dsm_with_HFI <- dsm(count~ s(X, Y, k = 10) + s(HFI,k=10) + s(River,k=10) + s(EVI,k=10) + s(TRI,k=10), detection_function, segdata, obs_species, family = tw(), convert.units = 1/1000, method="REML")

# Print the summary of the model with HFI
print(summary(dsm_with_HFI))

# Fit DSM without HFI
dsm_without_HFI <- dsm(count~ s(X, Y, k = 10) + s(River,k=10) + s(EVI,k=10) + s(TRI,k=10), detection_function, segdata, obs_species, family = tw(), convert.units = 1/1000, method="REML")

# Print the summary of the model without HFI
print(summary(dsm_without_HFI))

# Calculate AICc for each model
AICc_with_HFI <- AICc(dsm_with_HFI)
AICc_without_HFI <- AICc(dsm_without_HFI)

# Append to the AICc table
aic_table <- rbind(
aic_table,
data.frame(Species = species_name, Model = "with_HFI", AICc = AICc_with_HFI),
data.frame(Species = species_name, Model = "without_HFI", AICc = AICc_without_HFI)
)

# Choose the best model based on AIC
best_model <- if (AICc_with_HFI < AICc_without_HFI) dsm_with_HFI else dsm_without_HFI

# Predict densities using the best model
predictions <- predict(best_model, preddata_sf, preddata_sf$Area)

# Assign predictions back to the spatial dataframe for visualization
preddata_sf$Prediction <- predictions

# Create the visualization for the current species
plot_title <- paste(species_name, "") # This creates a unique title for each species

# Create a prediction grid
prediction_grid <- st_make_grid(study_area, cellsize = c(1000, 1000))
prediction_grid_sf <- st_sf(geometry = prediction_grid)
cropped_grid <- st_intersection(prediction_grid_sf, management_area_reprojected)

# Join predictions to the grid
cropped_grid <- st_join(cropped_grid, preddata_sf, join = st_nearest_feature)

min_value <- min(cropped_grid$Prediction, na.rm = TRUE)
max_value <- max(cropped_grid$Prediction, na.rm = TRUE)
#breaks <- pretty(seq(min_value, max_value), n = 5)

# Calculate a set of pretty breaks for the full range of your data
#full_breaks <- pretty(c(min_value, max_value))
# Calculate the breaks as proportional to the maximum value
breaks <- c(0, max_value / 4, max_value / 2, 3 * max_value / 4, max_value)

# Plotting
p <- ggplot() +
geom_sf(data = cropped_grid, aes(fill = Prediction), color = NA) +
scale_fill_viridis_c(option = "viridis", direction = 1,
name = "Density/km²",
labels = scales::comma,
limits = range(breaks), # Use the range of selected breaks
breaks = breaks # Use the selected breaks
) +
labs(title = plot_title) +
theme_void() +
theme(
plot.title = element_text(size = 32, face = "bold", vjust = -3.5), # Increase title font size
legend.title = element_text(size = 22), # Increase legend title font size
legend.text = element_text(size = 16), # Increase legend text font size
legend.key.size = unit(2, "lines"), # Increase the size of the legend keys
#panel.border = element_rect(colour = "black", fill=NA, size=1), # Add border around each plot
legend.position = c(0.85, 0.45) # You will need to adjust these numbers
#legend.margin = margin(t = 10, b = 10, unit = "pt") # Use margin and spacing to adjust the exact position
)
theme(legend.position = "right")

plot_list[[species_name]] <- p

# Save the plot or print it
#ggsave(paste0(species_name, "_DSM.png"), plot = p, width = 10, height = 7)

# Clear predictions from preddata_sf for the next iteration
preddata_sf$Prediction <- NULL
}

# After the loop, arrange the plots in a 5x3 grid
combined_plot <- plot_grid(plotlist = plot_list, ncol = 3, nrow = 5)

print(combined_plot)

#print(aic_table)

I have attached my DSM output for all of my species. If anyone has any insight, please let me know! Thank you!

Thanks,

Andrew

DSM_plots.png

Eric Rexstad

unread,

May 9, 2024, 2:49:31 AMMay 9

to Andrew Pettit, distance-sampling

Andrew

Thanks for your question and your code and plot.

I confess that I'm not adept at handling spatial data, but perhaps someone on the list has those skills. From first principles, I wouldn't expect inherent problems in mapping uncertainty. You note that you are getting errors when applying similar code when mapping uncertainty. Can you share with us what you are trying to plot (CV might be more sensible to plot that cell-specific variances) and what those errors are?

From: distance...@googlegroups.com <distance...@googlegroups.com> on behalf of Andrew Pettit <andrew...@ucsb.edu>
Sent: 09 May 2024 03:02
To: distance-sampling <distance...@googlegroups.com>
Subject: [distance-sampling] Plotting Variance For All Species in A For Loop

--
You received this message because you are subscribed to the Google Groups "distance-sampling" group.
To unsubscribe from this group and stop receiving emails from it, send an email to distance-sampl...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/distance-sampling/a3178205-9f7c-442f-9b16-18fce7849231n%40googlegroups.com.

Eric Rexstad

unread,

May 11, 2024, 3:07:01 AMMay 11

to Andrew Pettit, distance-sampling

Andrew

Working with the code you provided, I've modified the code you provided to produce a tandem plot of cell-specific abundance and cell-specific CV, shown below.

Don't know whether the code will work in your multi-species context, but it might give you some ideas.

library(dsm)

library(sf)

load(url("https://examples.distancesampling.org/dsm-line-dolphins/mexdolphins-extra.rda"))

data(mexdolphins)

predsf <- st_as_sf(pred.polys)

proj4string(survey.area) <- CRS("+proj=longlat +datum=WGS84")

area.sf <- st_as_sf(survey.area)

st_crs(area.sf) <- "WGS84"

area.sf.proj <- st_transform(area.sf, crs = st_crs(predsf))

# Convert preddata to a spatial object

preddata_sf <- st_as_sf(preddata, coords=c("x", "y"))

st_crs(preddata_sf) <- st_crs(area.sf.proj)

# Perform the intersection

preddata_sf <- st_intersection(preddata_sf, area.sf.proj)

coords_preddata <- data.frame(st_coordinates(preddata_sf))

preddata_sf$x <- coords_preddata$X

preddata_sf$y <- coords_preddata$Y 

segdata_sf <- st_as_sf(segdata, coords = c("x","y"))

st_crs(segdata_sf) <- st_crs(area.sf.proj)

# to outline study area for plot of abundance

survey.area <- data.frame(survey.area@polygons[[1]]@Polygons[[1]]@coords)

names(survey.area) <- c("x", "y")

library(ggplot2)

# study area outline and segment centres

plot_segments <- ggplot() +

  geom_sf(data = area.sf.proj, fill="lightblue", color = "blue", linewidth=.1) +

  geom_sf(data=segdata_sf, fill=NA, color="black", linewidth=.3) +

  labs(title="1996 SE Fisheries Science Center Gulf of Mexico cruise",

       subtitle = "Points are segment centres") +

  scale_fill_viridis_c(option = "magma", guide = "none")

plot_segments

# detection function and spatial model

library(Distance)

detfc.hr.null <- ds(distdata, max(distdata$distance), key="hr", adjustment=NULL)

dsm.xy.depth <- dsm(count~s(x,y,k=10) + s(depth,k=20), detfc.hr.null, segdata, obsdata, method="REML")

summary(dsm.xy.depth)

plot(dsm.xy.depth, select=2)

dsm.xy.depth.pred <- predict(dsm.xy.depth, preddata, preddata$area)

preddata.var <- split(preddata, 1:nrow(preddata))

dsm.xy.depth.var <- dsm_var_gam(dsm.xy.depth, pred.data=preddata.var,off.set=preddata$area)

summary(dsm.xy.depth.var)

#================

# attach prediction and cv to the predata sf object

preddata_sf$Prediction <- dsm.xy.depth.pred

preddata_sf$CV <- sqrt(dsm.xy.depth.var$pred.var)/preddata_sf$Prediction

prediction_grid <- st_make_grid(area.sf.proj, cellsize = c(9000,9000))

prediction_grid_sf <- st_sf(geometry = prediction_grid)

cropped_grid <- st_join(prediction_grid_sf, preddata_sf, join = st_nearest_feature)

cropped_grid <- st_intersection(cropped_grid, area.sf.proj)

pred <- ggplot() +

  geom_sf(data = cropped_grid, aes(fill = Prediction), color = NA) +

  geom_sf(data=segdata_sf, fill=NA, color="white", linewidth=.001) +

  labs(title="Spotted dolphins, Gulf of Mexico, abundance estimates",

       subtitle = "Bivariate smooth of location, smooth of depth, quasipoisson") +

  scale_fill_viridis_c(option = "viridis", direction = 1)

pred

CV <- ggplot() +

  geom_sf(data = cropped_grid, aes(fill = CV), color = NA) +

  geom_sf(data=segdata_sf, fill=NA, color="white", linewidth=.001) +

  labs(title="Spotted dolphins, Gulf of Mexico, uncertainty (CV)",

       subtitle = "Bivariate smooth of location, smooth of depth, quasipoisson") +

  scale_fill_viridis_c(option = "viridis", direction = 1)

CV

library(patchwork)

pred / CV

From: distance...@googlegroups.com <distance...@googlegroups.com> on behalf of Andrew Pettit <andrew...@ucsb.edu>
Sent: 09 May 2024 03:02
To: distance-sampling <distance...@googlegroups.com>
Subject: [distance-sampling] Plotting Variance For All Species in A For Loop

--

Reply all

Reply to author

Forward