Plotting Variance For All Species in A For Loop

15 views
Skip to first unread message

Andrew Pettit

unread,
May 8, 2024, 10:02:43 PMMay 8
to distance-sampling
Hello everyone, 
I created a density surface model for all 15 of my study species with the code below. I have been trying to plot the variance for each species in a similar fashion but have been running into a multitude of errors. Is it possible to plot variance using a for loop like I did here? If anyone has any insight or examples that could help, please let me know. 

Code: 

obsdata<-read.csv("distdata_final.csv")
segdata<-read.csv("segdata_final.csv")
preddata<-read.csv("prediction_data.csv")
obs<-read.csv("sightings.csv",sep = ";", header = TRUE) %>%
  clean_names() %>%
  na.omit()

management_area <- st_read("Study area.shp") #WGS84
study_area <- st_read("Study_area.shp") #UTM ZONE 36S
#transects_sf <- st_read("line_data.shp")

# Reproject the geodetic CRS to the projected CRS
management_area_reprojected <- st_transform(management_area, crs = st_crs(study_area))


#Remove points outside of management area
# Convert preddata to a spatial object
preddata_sf <- st_as_sf(preddata, coords=c("X", "Y"))
obsdata_sf <- st_as_sf(obsdata, coords = c("X","Y"))

# Check the CRS of both objects
st_crs(preddata_sf) <- st_crs(management_area_reprojected)

# Perform the intersection
preddata_sf <- st_intersection(preddata_sf, management_area_reprojected)

# Extracting coordinates
coords_obsdata <- data.frame(st_coordinates(obsdata_sf))
coords_preddata <- data.frame(st_coordinates(preddata_sf))

# Adding coordinates back to the data frames
obsdata_sf$X <- coords_obsdata$X
obsdata_sf$Y <- coords_obsdata$Y

preddata_sf$X <- coords_preddata$X
preddata_sf$Y <- coords_preddata$Y 

# Initialize an empty data frame to store AICc values
aic_table <- data.frame(
  Species = character(),
  Model = character(),
  AICc = numeric(),
  stringsAsFactors = FALSE
)

# Initialize a named list to store ggplot objects
plot_list <- list()

species_list <- c("Aardvark", "Bushbuck", "Bushpig", "Dikdik",
                  "Eland", "Elephant", "Giraffe",
                  "Greater Kudu", "Hyena", "Impala", "Lesser Kudu",
                  "Mongoose", "Warthog", "Zebra")

# Loop over each species to fit DSMs and compare
for (species_name in species_list) {
 
  # Clear predictions from preddata_sf for the next iteration
  preddata_sf$Prediction <- NULL

  # Clear the prediction grid to ensure it starts fresh for each species
  if (exists("cropped_grid")) {
    rm(cropped_grid)
  }
 
   # Filter observations for the current species
  obs_species <- obsdata_sf %>% filter(species == species_name)
 
  # Print the number of observations for the current species
  print(paste("Number of observations for", species_name, ":", nrow(obs_species)))
 
   # Calculate the detection function for the current species
  detection_function <- ds(obs_species$distance, truncation = list(left = "0%", right = "10%"), key = "hn", adjustment = "cos")
 
  # Fit DSM with HFI (Human Footprint Index)
  dsm_with_HFI <- dsm(count~ s(X, Y, k = 10) + s(HFI,k=10) + s(River,k=10) + s(EVI,k=10) + s(TRI,k=10), detection_function, segdata, obs_species, family = tw(), convert.units = 1/1000, method="REML")
 
  # Print the summary of the model with HFI
  print(summary(dsm_with_HFI))
 
  # Fit DSM without HFI
  dsm_without_HFI <- dsm(count~ s(X, Y, k = 10) + s(River,k=10) + s(EVI,k=10) + s(TRI,k=10), detection_function, segdata, obs_species, family = tw(), convert.units = 1/1000, method="REML")
 
  # Print the summary of the model without HFI
  print(summary(dsm_without_HFI))
 
  # Calculate AICc for each model
  AICc_with_HFI <- AICc(dsm_with_HFI)
  AICc_without_HFI <- AICc(dsm_without_HFI)
 
  # Append to the AICc table
  aic_table <- rbind(
    aic_table,
    data.frame(Species = species_name, Model = "with_HFI", AICc = AICc_with_HFI),
    data.frame(Species = species_name, Model = "without_HFI", AICc = AICc_without_HFI)
  )
 
  # Choose the best model based on AIC
  best_model <- if (AICc_with_HFI < AICc_without_HFI) dsm_with_HFI else dsm_without_HFI
 
  # Predict densities using the best model
  predictions <- predict(best_model, preddata_sf, preddata_sf$Area)
 
  # Assign predictions back to the spatial dataframe for visualization
  preddata_sf$Prediction <- predictions
 
  # Create the visualization for the current species
plot_title <- paste(species_name, "") # This creates a unique title for each species

# Create a prediction grid
prediction_grid <- st_make_grid(study_area, cellsize = c(1000, 1000))
prediction_grid_sf <- st_sf(geometry = prediction_grid)
cropped_grid <- st_intersection(prediction_grid_sf, management_area_reprojected)

# Join predictions to the grid
cropped_grid <- st_join(cropped_grid, preddata_sf, join = st_nearest_feature)

min_value <- min(cropped_grid$Prediction, na.rm = TRUE)
max_value <- max(cropped_grid$Prediction, na.rm = TRUE)
#breaks <- pretty(seq(min_value, max_value), n = 5)

# Calculate a set of pretty breaks for the full range of your data
#full_breaks <- pretty(c(min_value, max_value))
# Calculate the breaks as proportional to the maximum value
  breaks <- c(0, max_value / 4, max_value / 2, 3 * max_value / 4, max_value)
 
# Plotting
  p <- ggplot() +
    geom_sf(data = cropped_grid, aes(fill = Prediction), color = NA) +
    scale_fill_viridis_c(option = "viridis", direction = 1,
                         name = "Density/km²",
                         labels = scales::comma,
                         limits = range(breaks),  # Use the range of selected breaks
                         breaks = breaks  # Use the selected breaks
    ) +
    labs(title = plot_title) +
    theme_void() +
    theme(
      plot.title = element_text(size = 32, face = "bold",  vjust = -3.5), # Increase title font size
      legend.title = element_text(size = 22), # Increase legend title font size
      legend.text = element_text(size = 16), # Increase legend text font size
      legend.key.size = unit(2, "lines"), # Increase the size of the legend keys
      #panel.border = element_rect(colour = "black", fill=NA, size=1), # Add border around each plot
      legend.position = c(0.85, 0.45)  # You will need to adjust these numbers
      #legend.margin = margin(t = 10, b = 10, unit = "pt")    # Use margin and spacing to adjust the exact position
    )
    theme(legend.position = "right")
 
  plot_list[[species_name]] <- p

# Save the plot or print it
#ggsave(paste0(species_name, "_DSM.png"), plot = p, width = 10, height = 7)
 
  # Clear predictions from preddata_sf for the next iteration
  preddata_sf$Prediction <- NULL
}

 # After the loop, arrange the plots in a 5x3 grid
combined_plot <- plot_grid(plotlist = plot_list, ncol = 3, nrow = 5)

print(combined_plot)

#print(aic_table)

I have attached my DSM output for all of my species. If anyone has any insight, please let me know! Thank you! 

Thanks, 
Andrew
DSM_plots.png

Eric Rexstad

unread,
May 9, 2024, 2:49:31 AMMay 9
to Andrew Pettit, distance-sampling
Andrew

Thanks for your question and your code and plot.

I confess that I'm not adept at handling spatial data, but perhaps someone on the list has those skills. From first principles, I wouldn't expect inherent problems in mapping uncertainty. You note that you are getting errors when applying similar code when mapping uncertainty. Can you share with us what you are trying to plot (CV might be more sensible to plot that cell-specific variances) and what those errors are?


From: distance...@googlegroups.com <distance...@googlegroups.com> on behalf of Andrew Pettit <andrew...@ucsb.edu>
Sent: 09 May 2024 03:02
To: distance-sampling <distance...@googlegroups.com>
Subject: [distance-sampling] Plotting Variance For All Species in A For Loop
 
--
You received this message because you are subscribed to the Google Groups "distance-sampling" group.
To unsubscribe from this group and stop receiving emails from it, send an email to distance-sampl...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/distance-sampling/a3178205-9f7c-442f-9b16-18fce7849231n%40googlegroups.com.

Eric Rexstad

unread,
May 11, 2024, 3:07:01 AMMay 11
to Andrew Pettit, distance-sampling
Andrew

Working with the code you provided, I've modified the code you provided to produce a tandem plot of cell-specific abundance and cell-specific CV, shown below.
Don't know whether the code will work in your multi-species context, but it might give you some ideas.

library(dsm)
library(sf)
data(mexdolphins)
predsf <- st_as_sf(pred.polys)

proj4string(survey.area) <- CRS("+proj=longlat +datum=WGS84")
area.sf <- st_as_sf(survey.area)
st_crs(area.sf) <- "WGS84"
area.sf.proj <- st_transform(area.sf, crs = st_crs(predsf))

# Convert preddata to a spatial object
preddata_sf <- st_as_sf(preddata, coords=c("x", "y"))
st_crs(preddata_sf) <- st_crs(area.sf.proj)
# Perform the intersection
preddata_sf <- st_intersection(preddata_sf, area.sf.proj)

coords_preddata <- data.frame(st_coordinates(preddata_sf))

preddata_sf$x <- coords_preddata$X
preddata_sf$y <- coords_preddata$Y 

segdata_sf <- st_as_sf(segdata, coords = c("x","y"))
st_crs(segdata_sf) <- st_crs(area.sf.proj)

# to outline study area for plot of abundance
survey.area <- data.frame(survey.area@polygons[[1]]@Polygons[[1]]@coords)
names(survey.area) <- c("x", "y")

library(ggplot2)
# study area outline and segment centres
plot_segments <- ggplot() +
  geom_sf(data = area.sf.proj, fill="lightblue", color = "blue", linewidth=.1) +
  geom_sf(data=segdata_sf, fill=NA, color="black", linewidth=.3) +
  labs(title="1996 SE Fisheries Science Center Gulf of Mexico cruise",
       subtitle = "Points are segment centres") +
  scale_fill_viridis_c(option = "magma", guide = "none")
plot_segments

# detection function and spatial model
library(Distance)
detfc.hr.null <- ds(distdata, max(distdata$distance), key="hr", adjustment=NULL)
dsm.xy.depth <- dsm(count~s(x,y,k=10) + s(depth,k=20), detfc.hr.null, segdata, obsdata, method="REML")
summary(dsm.xy.depth)
plot(dsm.xy.depth, select=2)
dsm.xy.depth.pred <- predict(dsm.xy.depth, preddata, preddata$area)
preddata.var <- split(preddata, 1:nrow(preddata))
dsm.xy.depth.var <- dsm_var_gam(dsm.xy.depth, pred.data=preddata.var,off.set=preddata$area)
summary(dsm.xy.depth.var)
#================

# attach prediction and cv to the predata sf object
preddata_sf$Prediction <- dsm.xy.depth.pred
preddata_sf$CV <- sqrt(dsm.xy.depth.var$pred.var)/preddata_sf$Prediction

prediction_grid <- st_make_grid(area.sf.proj, cellsize = c(9000,9000))
prediction_grid_sf <- st_sf(geometry = prediction_grid)
cropped_grid <- st_join(prediction_grid_sf, preddata_sf, join = st_nearest_feature)
cropped_grid <- st_intersection(cropped_grid, area.sf.proj)

pred <- ggplot() +
  geom_sf(data = cropped_grid, aes(fill = Prediction), color = NA) +
  geom_sf(data=segdata_sf, fill=NA, color="white", linewidth=.001) +
  labs(title="Spotted dolphins, Gulf of Mexico, abundance estimates",
       subtitle = "Bivariate smooth of location, smooth of depth, quasipoisson") +
  scale_fill_viridis_c(option = "viridis", direction = 1)
pred
CV <- ggplot() +
  geom_sf(data = cropped_grid, aes(fill = CV), color = NA) +
  geom_sf(data=segdata_sf, fill=NA, color="white", linewidth=.001) +
  labs(title="Spotted dolphins, Gulf of Mexico, uncertainty (CV)",
       subtitle = "Bivariate smooth of location, smooth of depth, quasipoisson") +
  scale_fill_viridis_c(option = "viridis", direction = 1)
CV

library(patchwork)
pred / CV

From: distance...@googlegroups.com <distance...@googlegroups.com> on behalf of Andrew Pettit <andrew...@ucsb.edu>
Sent: 09 May 2024 03:02
To: distance-sampling <distance...@googlegroups.com>
Subject: [distance-sampling] Plotting Variance For All Species in A For Loop
 
--
Reply all
Reply to author
Forward
0 new messages