I'm simulating some data and analyzing it in unmarked, and I noticed
that the analysis is often (but not always), extremely sensitive to the
distance breaks in the analysis. Below is some example code. When
binning the data into bins that seem perfectly sensible to me, the
analysis fails miserably, but if I use the same distance breaks that are
used in most of the vignette examples (i.e. c(0,5,10,15,20)), then the
estimates are quite sensible. What's going on?
E.g.
library('unmarked')
y
<- data.frame(transect = factor(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10,
10)),distance=c(5.54,2.8,1.87,11.43,4.16,12.3,2.37,3.65,11.05,10.94,4.62,13.61,11.48,4.9,8.26,4.1,1.49,1.28,9.15,0.4,4.2,0.2,5.81,1.86,12.9,4.27,5.96,4.67,4.22,10.22,6.22,8.37,9.62,1.04,0.6,7.04,7.55,5.46,4.55,1.17,5.48,0.8,9.86,3.95,13.09,9.55,10.69,13.07,5.47,10.13,7.8,0.9,8.98,1.83,5.11,12.45,4.15,11.97,9.52,2.79,14.45,8.22,12.47,3.94,12.48,6.52,11.71,3.6,12.3,7.17,2.58,10.75,5.38,13.26,11.82,3.51,5.59,7.53,10.94,4.11,0.15,14.11,3.32,4.63,13.52,0.45,0.22,9.07,2.18,3.16,6.62,0.41,8.54,8.53,1.14,5.16,9.6,6.51,4.22,3.47,6.28,6.44,3.13,10.58,3.2,0.61,14.48,5.84,5.44,9.31,2.66,5.76,0.89,6.93,1.42,14.14,0.82,1.94,7.03,12.11,5.3,2.1,11.14,9.83,0.86,10,0.2,11.47,7.7,2.85,1.12,8.52,2.87,4.86,3.55,12.44,1.32))
# number of distance-break categories
cats <- 5
# 2 different options for distance breaks. The first doesn't work for me. The 2nd does.
breaks <- c(0, round((max(y$distance)+1)/cats)*(1:cats))
breaks <- c(0,5,10,15,20)
# format the data
unmk.dat <- formatDistData(distData = y,
distCol="distance",
transectNameCol="transect",
dist.breaks = breaks)
umf <- unmarkedFrameDS(y=unmk.dat,
siteCovs=NULL,
survey="line",
dist.breaks=breaks,
tlength=rep(100,10),
unitsIn="m")
# Analyze data
hn_Null <- distsamp(formula = ~1~1,
data = umf,
keyfun="halfnorm",
output="density",
unitsOut="ha")
# extract density estimate & calculate bias
unmarkeddensity <- backTransform(hn_Null, type="state")
unmarkeddensity_bias <- (unmarkeddensity@estimate - 60)/60
# extract estimate of detection parameter & calculate bias
unmarkedsigma <- backTransform(hn_Null, type='det')
unmarkedsigma_bias <- (unmarkedsigma@estimate - 10)/10
unmarked.estimates <- data.frame(Item = c("density", "sigma"),
True = c(60, 10),
Estimated = c(unmarkeddensity@estimate, unmarkedsigma@estimate),
Bias = c(unmarkeddensity_bias, unmarkedsigma_bias))
unmarked.estimates