Gdistsamp: ‘non-finite finite-difference value’ error when trying to run basic models with only one observation covariate; using starting values not helping

208 views
Skip to first unread message

Bryan Tarbox

unread,
Feb 12, 2020, 3:15:11 PM2/12/20
to unmarked
Hi,

So I am trying to build my model for gdistsamp very slowly, one step at a time, but getting hung up in the very beginning when testing whether individual observation covariates (e.g., observer, wind) should be included or not.

I have multiple years of data and am using the ‘stacked’ approach suggested in this group.

>ydatEAME<-formatDistData(EAME,distCol="Distance",transectNameCol = "UniqueID",dist.breaks=c(0,25,50,75,100,125,150,250))

>yearlycovs←data.frame(observer=dat_noDups$Observer,noise=dat_noDups$Noise,wind=dat_noDups$Wind,date=dat_noDups$Julian,time=dat_noDups$Time,year=dat_noDups$Year)

>umfEAME<-unmarkedFrameGDS(y=as.matrix(ydatEAME),yearlySiteCovs = yearlycovs,numPrimary=1,dist.breaks=c(0,25,50,75,100,125,150,250),survey="point",unitsIn="m")

> summary(umfEAME)
unmarkedFrame Object

5266 sites
Maximum number of observations per site: 7 
Mean number of observations per site: 7 
Number of primary survey periods: 1 
Number of secondary survey periods: 1 
Sites with at least one detection: 846 

Tabulation of y observations:
    0     1     2     3     4     5 
35664  1029   144    23     1     1 

Yearly-site-level covariates:
    observer        noise               wind               date              time            year    
 B      : 621   Min.   :-1.64975   Min.   :-1.69971   Min.   :-1.4756   Min.   :-1.74110   2013:551  
 F      : 531   1st Qu.:-0.50942   1st Qu.:-0.75413   1st Qu.:-0.6776   1st Qu.:-0.80900   2014:778  
 A      : 498   Median :-0.50942   Median : 0.19145   Median :-0.2217   Median : 0.04086   2015:797  
 D      : 489   Mean   : 0.02134   Mean   : 0.04636   Mean   :-0.0306   Mean   : 0.05476   2016:858  
 L      : 484   3rd Qu.: 0.63091   3rd Qu.: 1.13703   3rd Qu.: 0.2343   3rd Qu.: 0.90442   2017:860  
 E      : 396   Max.   : 2.91158   Max.   : 2.08261   Max.   : 3.7682   Max.   : 2.41223   2018:855  
 (Other):2247   


> modSel(fitList(ex_NullEAME,hn_NullEAME,hz_NullEAME))
            nPars      AIC delta   AICwt cumltvWt
hz_NullEAME     3 11093.65  0.00 9.9e-01     0.99
hn_NullEAME     2 11103.22  9.57 8.3e-03     1.00
ex_NullEAME     2 11136.20 42.55 5.7e-10     1.00

> hz_NullEAME

Call:
gdistsamp(lambdaformula = ~1, phiformula = ~1, pformula = ~1, 
    data = umfEAME, keyfun = "hazard")

Abundance:
 Estimate     SE     z  P(>|z|)
   -0.645 0.0504 -12.8 1.36e-37

Detection:
 Estimate     SE   z P(>|z|)
     4.94 0.0446 111       0

Hazard-rate(scale):
 Estimate     SE    z  P(>|z|)
     1.04 0.0979 10.6 3.74e-26

AIC: 11093.65 


#Select covariates to use for model building

>hz_noiseEAME <-gdistsamp(lambdaformula = ~1,phiformula = ~1,pformula=~noise,data=umfEAME,keyfun="hazard")

>hz_windEAME <-gdistsamp(lambdaformula = ~1,phiformula = ~1,pformula=~wind,data=umfEAME,keyfun="hazard")

>hz_dateEAME <-gdistsamp(lambdaformula = ~1,phiformula = ~1,pformula=~date,data=umfEAME,keyfun="hazard")

>hz_timeEAME <-gdistsamp(lambdaformula = ~1,phiformula = ~1,pformula=~time,data=umfEAME,keyfun="hazard")


> hz_observerEAME <-gdistsamp(lambdaformula = ~1,phiformula = ~1,pformula=~observer,data=umfEAME,keyfun="hazard")
Error in optim(starts, nll, method = method, hessian = se, ...) : 
  non-finite finite-difference value [23]


#Using hn model coefs as starts for hz model

> hn_observerEAME <-gdistsamp(lambdaformula = ~1,phiformula = ~1,pformula=~observer,data=umfEAME,keyfun="halfnorm")

#it runs, but produces NaNs
> hn_observerEAME

Call:
gdistsamp(lambdaformula = ~1, phiformula = ~1, pformula = ~observer, 
    data = umfEAME, keyfun = "halfnorm")

Abundance:
 Estimate     SE     z  P(>|z|)
   -0.431 0.0436 -9.87 5.41e-23

Detection:
            Estimate     SE       z  P(>|z|)
(Intercept)  4.99363 0.0644 77.5437 0.00e+00
observerB   -0.12249 0.0749 -1.6361 1.02e-01
observerC   -0.40827 0.1143 -3.5710 3.56e-04
observerD   -0.44144 0.0758 -5.8206 5.87e-09
observerE   -0.17877 0.0813 -2.1998 2.78e-02
observerF   -0.31643 0.0746 -4.2418 2.22e-05
observerG   -0.11730 0.1063 -1.1036 2.70e-01
observerH   -0.59352 0.1111 -5.3398 9.30e-08
observerI   -0.56821 0.0983 -5.7811 7.42e-09
observerJ   -0.09223 0.1022 -0.9025 3.67e-01
observerK   -0.07276 0.1060 -0.6863 4.93e-01
observerL   -0.41438 0.0757 -5.4728 4.43e-08
observerM   -0.51080 0.0995 -5.1321 2.86e-07
observerN   -0.38090 0.1031 -3.6939 2.21e-04
observerO    0.00216 0.1285  0.0168 9.87e-01
observerP    5.90508    NaN     NaN      NaN
observerQ   -0.49692 0.1605 -3.0955 1.97e-03
observerR   -0.09244 0.0879 -1.0521 2.93e-01
observerS   -0.25768 0.2053 -1.2551 2.09e-01
observerT   -0.18080 0.1129 -1.6018 1.09e-01
observerU   -0.07745 0.1163 -0.6657 5.06e-01

AIC: 10910.51 
Warning message:
In sqrt(diag(vcov(obj))) : NaNs produced


#use hn coefs as starting values to re-run hn, but with 0 for the NaN parameter (Observer P)

> starts_hno <- c(-0.430808559,4.993632411,-0.122491346,-0.408265972,-0.441435301,-0.178765446,-0.316430640,-0.117295767,-0.593523082,-0.568205051,-0.092234024,-0.072759791,-0.414384242,-0.510802402,-0.380903437,0.002164097,0,-0.496918611,-0.092438860,-0.257685056,-0.180799229,-0.077453170)

> hn_observerEAME <-gdistsamp(lambdaformula = ~1,phiformula = ~1,pformula=~observer,starts=starts_hno,data=umfEAME,keyfun="halfnorm")
> hn_observerEAME

Call:
gdistsamp(lambdaformula = ~1, phiformula = ~1, pformula = ~observer, 
    data = umfEAME, keyfun = "halfnorm", starts = starts_hno)

Abundance:
 Estimate     SE     z  P(>|z|)
   -0.431 0.0436 -9.87 5.43e-23

Detection:
            Estimate      SE      z  P(>|z|)
(Intercept)  4.99362  0.0644 77.545 0.00e+00
observerB   -0.12248  0.0749 -1.636 1.02e-01
observerC   -0.40826  0.1143 -3.571 3.56e-04
observerD   -0.44144  0.0758 -5.821 5.86e-09
observerE   -0.17876  0.0813 -2.200 2.78e-02
observerF   -0.31641  0.0746 -4.242 2.22e-05
observerG   -0.11730  0.1063 -1.104 2.70e-01
observerH   -0.59352  0.1111 -5.340 9.30e-08
observerI   -0.56820  0.0983 -5.781 7.42e-09
observerJ   -0.09229  0.1022 -0.903 3.66e-01
observerK   -0.07274  0.1060 -0.686 4.93e-01
observerL   -0.41436  0.0757 -5.473 4.43e-08
observerM   -0.51079  0.0995 -5.132 2.87e-07
observerN   -0.38090  0.1031 -3.694 2.21e-04
observerO    0.00218  0.1285  0.017 9.86e-01
observerP    5.76115 41.7764  0.138 8.90e-01
observerQ   -0.49691  0.1605 -3.095 1.97e-03
observerR   -0.09244  0.0879 -1.052 2.93e-01
observerS   -0.25780  0.2053 -1.256 2.09e-01
observerT   -0.18081  0.1129 -1.602 1.09e-01
observerU   -0.07744  0.1163 -0.666 5.06e-01

AIC: 10910.51 


#now using hn coefs as starting values for hz

#there should be 23 parameters: lambda, p(int), 20 observers, and the hz scale parameter – right?

>starts_hzo <- c(0,0,-0.122484281,-0.408257265,-0.441444384,-0.178763362,-0.316413521,-0.117300457,-0.593524883,-0.568199542,-0.092285078,-0.072742405,-0.414364150,-0.510787767,-0.380898808,0.002180646,5.761151003,-0.496911128,-0.092438420,-0.257796680,-0.180813516,-0.077442439,0)


> hz_observerEAME <-gdistsamp(lambdaformula = ~1,phiformula = ~1,pformula=~observer,starts=starts_hzo,data=umfEAME,keyfun="hazard")
Error in optim(starts, nll, method = method, hessian = se, ...) : 
  non-finite finite-difference value [23]


#trying it using the coef function directly in starts definition

> starts_hzo <- c(coef(hn_observerEAME),0)
> hz_observerEAME <-gdistsamp(lambdaformula = ~1,phiformula = ~1,pformula=~observer,starts=starts_hzo,data=umfEAME,keyfun="hazard")
Error in optim(starts, nll, method = method, hessian = se, ...) : 
  initial value in 'vmmin' is not finite


#for reference:

> coef(hn_observerEAME)
 lambda(Int)       p(Int) p(observerB) p(observerC) p(observerD) p(observerE) p(observerF) p(observerG) p(observerH) 
-0.430791554  4.993622423 -0.122484281 -0.408257265 -0.441444384 -0.178763362 -0.316413521 -0.117300457 -0.593524883 
p(observerI) p(observerJ) p(observerK) p(observerL) p(observerM) p(observerN) p(observerO) p(observerP) p(observerQ) 
-0.568199542 -0.092285078 -0.072742405 -0.414364150 -0.510787767 -0.380898808  0.002180646  5.761151003 -0.496911128 
p(observerR) p(observerS) p(observerT) p(observerU) 
-0.092438420 -0.257796680 -0.180813516 -0.077442439 


#switching the placement of the 0 around

> starts_hzo <- c(0,coef(hn_observerEAME))
> hz_observerEAME <-gdistsamp(lambdaformula = ~1,phiformula = ~1,pformula=~observer,starts=starts_hzo,data=umfEAME,keyfun="hazard")
Error in optim(starts, nll, method = method, hessian = se, ...) : 
  non-finite finite-difference value [23]


I’ve also tried putting two 0s before or after coef(model_name), which doesn’t make a difference. I tried just using the hn coefs (with no 0s added) and that crashes R, right after spitting out some out of bounds type error (I’m assuming because I’m not including enough parameters in the starts).

I’ve also tried using 1, 2 and 4 instead of 0s because supposedly 0 is not always a good starting value for lambda or phi…  but that makes no difference.

I’ve also tried setting mixture to P and se to FALSE, based on recommendations on this forum, to no avail.

I realize 20 different observers is a lot, but there’s not really any way around that, short of just throwing away data. This is another organization’s data (not my own), and there’s no information with regard to observer skill by which I could lump observers, unfortunately.

Any thoughts on how to resolve these errors?

Thank you!
Bryan Tarbox

Ken Kellner

unread,
Feb 12, 2020, 3:40:50 PM2/12/20
to unmarked
Hi Bryan,

I would guess that the strange coefficient estimates for observer P reflects something strange about the data for observer P. Have you looked at the breakdown of the number of observations for each observer to see if there is anything unusual? Maybe P had a very small number of observations relative to the others or their actual observations had unusual values.

I would also try running the model without the data from that observer to see if runs without issues.

Ken

Adrienne Beatrice

unread,
Feb 13, 2020, 2:59:09 PM2/13/20
to unmarked
I have been having a similar issue, where my start values seem to be causing the same error.
Error in optim(starts, nll, method = method, hessian = se, ...) : 
  non-finite finite-difference value [23]

Were you able to solve the issue Bryan?
Or does anyone else have suggestions as to to how to modify the start values so that it is able to compute a finite value?

-Adrienne

Bryan Tarbox

unread,
Feb 13, 2020, 3:02:47 PM2/13/20
to unmarked
Hi Ken,

Thanks for the feedback. As far as I can tell, there's nothing anomalous about Observer P.

Histogram of observations of Eastern Meadowlark by each Observer:

Observer_Hist.jpeg

Box plot of Distances at which Eastern Meadowlark was observed by each Observer:

ObserverxDist_Boxplot.jpeg

What I do notice is that there are 21 observers, but for some reason the halfnormal model (which I'm using coefs from as starting values for the hazard model) removes Observer A... is it just using A as the intercept? Doesn't really make sense to me...


> str(yearlycovs)
'data.frame':	5266 obs. of  6 variables:
 $ observer: Factor w/ 21 levels "A","B","C","D",..: 16 16 16 16 16 6 6 6 6 6 ...
 $ noise   : num  -0.509 -0.509 0.631 1.771 1.771 ...
 $ wind    : num  -1.7 -1.7 -0.754 -0.754 -0.754 ...
 $ date    : num  -0.678 -0.678 -0.678 -0.564 -0.564 ...
 $ time    : num  -1.481 -1.385 -1.248 0.301 0.397 ...
 $ year    : Factor w/ 7 levels "2013","2014",..: 1 1 1 1 1 1 1 1 1 1 ...

For the record, I'm also having the same issues with the covariate of year. Interestingly, year and observer are the only two factor covariates, the rest are numeric:


> str(umfEAME)
Formal class 'unmarkedFrameGDS' [package "unmarked"] with 11 slots
  ..@ dist.breaks   : num [1:8] 0 25 50 75 100 125 150 250
  ..@ tlength       : num [1:5266] 1 1 1 1 1 1 1 1 1 1 ...
  ..@ survey        : chr "point"
  ..@ unitsIn       : chr "m"
  ..@ numPrimary    : num 1
  ..@ yearlySiteCovs:'data.frame':	5266 obs. of  6 variables:
  .. ..$ observer: Factor w/ 21 levels "A","B","C","D",..: 16 16 16 16 16 6 6 6 6 6 ...
  .. ..$ noise   : num [1:5266] -0.509 -0.509 0.631 1.771 1.771 ...
  .. ..$ wind    : num [1:5266] -1.7 -1.7 -0.754 -0.754 -0.754 ...
  .. ..$ date    : num [1:5266] -0.678 -0.678 -0.678 -0.564 -0.564 ...
  .. ..$ time    : num [1:5266] -1.481 -1.385 -1.248 0.301 0.397 ...
  .. ..$ year    : Factor w/ 7 levels "2013","2014",..: 1 1 1 1 1 1 1 1 1 1 ...
  ..@ y             : num [1:5266, 1:7] 0 0 0 0 0 0 0 0 0 0 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:5266] "13coaustin001" "13coaustin002" "13coaustin003" "13coaustin049" ...
  .. .. ..$ : chr [1:7] "[0,25]" "(25,50]" "(50,75]" "(75,100]" ...
  ..@ obsCovs       : NULL
  ..@ siteCovs      : NULL
  ..@ mapInfo       : NULL
  ..@ obsToY        : num [1, 1:7] 1 1 1 1 1 1 1


I've considered trying the models without Observer P, but the structure of the data set makes that a bit of an effort, so I just wanted to figure out if there was a genuine reason to think that would help (before wasting time + throwing away data). Based on the graphs I shared here, I don't really see why Observer P in particular would be the problem, but maybe I'm missing something?

Thank you,
Bryan

Bryan Tarbox

unread,
Feb 13, 2020, 3:48:02 PM2/13/20
to unmarked
Just wanted to update that using the coefficients from the halfnormal year model allowed the hazard year model to work just fine.

> hn_yearEAME

Call:
gdistsamp(lambdaformula = ~1, phiformula = ~1, pformula = ~year, 
    data = umfEAME, keyfun = "halfnorm")

Abundance:
 Estimate     SE     z  P(>|z|)
   -0.465 0.0472 -9.86 6.47e-23

Detection:
            Estimate     SE     z  P(>|z|)
(Intercept)    5.208 0.0849 61.38 0.00e+00
year2014      -0.623 0.0861 -7.24 4.64e-13
year2015      -0.421 0.0856 -4.91 8.93e-07
year2016      -0.373 0.0851 -4.38 1.17e-05
year2017      -0.278 0.0857 -3.25 1.17e-03
year2018      -0.446 0.0849 -5.25 1.53e-07
year2019      -0.604 0.0890 -6.79 1.12e-11

AIC: 11000.95 

starts_hzy <- c(-0.4652879,5.2083353,-0.6233272,-0.4206767,-0.3727187,-0.2780598,-0.4458392,-0.6044468,0)


>
hz_yearEAME Call: gdistsamp(lambdaformula = ~1, phiformula = ~1, pformula = ~year, data = umfEAME, keyfun = "hazard", starts = starts_hzy) Abundance: Estimate SE z P(>|z|) -0.542 0.0563 -9.62 6.75e-22 Detection: Estimate SE z P(>|z|) (Intercept) 5.215 0.0690 75.54 0.00e+00 year2014 -0.700 0.0973 -7.19 6.31e-13 year2015 -0.428 0.0839 -5.10 3.48e-07 year2016 -0.350 0.0760 -4.61 4.10e-06 year2017 -0.237 0.0752 -3.16 1.59e-03 year2018 -0.431 0.0807 -5.34 9.14e-08 year2019 -0.611 0.0935 -6.53 6.59e-11 Hazard-rate(scale): Estimate SE z P(>|z|) 0.881 0.0972 9.06 1.27e-19 AIC: 10992.18


Bryan Tarbox

unread,
Feb 13, 2020, 3:49:10 PM2/13/20
to unmarked
But still no luck with the observer model. Tried replicating the process I used for year exactly, just to double check:

> starts_hzo <- c(-0.430791554,4.993622423,-0.122484281,-0.408257265,-0.441444384,-0.178763362,-0.316413521,-0.117300457,-0.593524883,-0.568199542,-0.092285078,-0.072742405,-0.414364150,-0.510787767,-0.380898808,0.002180646,5.761151003,-0.496911128,-0.092438420,-0.257796680,-0.180813516,-0.077442439,0)

Bryan Tarbox

unread,
Feb 25, 2020, 1:31:11 PM2/25/20
to unmarked
So changing the hazard scale starting value from 0 to 1 fixed my problem.

> starts_hzo <- c(-0.430808559,4.993632411,-0.122491346,-0.408265972,-0.441435301,-0.178765446,-0.316430640,-0.117295767,-0.593523082,-0.568205051,-0.092234024,-0.072759791,-0.414384242,-0.510802402,-0.380903437,0.002164097,0,-0.496918611,-0.092438860,-0.257685056,-0.180799229,-0.077453170,1)
> hz_observerEAME <-gdistsamp(lambdaformula = ~1,phiformula = ~1,pformula=~observer,starts=starts_hzo,data=umfEAME,keyfun="hazard")
> summary(hz_observerEAME)

Call: gdistsamp(lambdaformula = ~1, phiformula = ~1, pformula = ~observer, data = umfEAME, keyfun = "hazard", starts = starts_hzo) Abundance (log-scale): Estimate SE z P(>|z|) -0.415 0.069 -6.02 1.78e-09 Detection (log-scale): Estimate SE z P(>|z|) (Intercept) 4.9616 0.0835 59.444 0.00e+00 observerB -0.2220 0.1008 -2.201 2.77e-02 observerC -0.4482 0.1601 -2.799 5.12e-03 observerD -0.7492 0.1333 -5.622 1.89e-08 observerE -0.2600 0.1058 -2.458 1.40e-02 observerF -0.4504 0.1072 -4.202 2.65e-05 observerG -0.1457 0.1190 -1.225 2.21e-01 observerH -0.6987 0.1536 -4.550 5.37e-06 observerI -0.6967 0.1497 -4.655 3.24e-06 observerJ -0.0516 0.1077 -0.479 6.32e-01 observerK -0.1462 0.1266 -1.155 2.48e-01 observerL -0.6127 0.1236 -4.957 7.18e-07 observerM -0.7688 0.1703 -4.514 6.35e-06 observerN -0.4480 0.1355 -3.306 9.45e-04 observerO -0.0737 0.1532 -0.481 6.30e-01 observerP 1.6096 8.7830 0.183 8.55e-01 observerQ -0.5098 0.2041 -2.498 1.25e-02 observerR -0.0844 0.0920 -0.918 3.59e-01 observerS -0.3500 0.2879 -1.216 2.24e-01 observerT -0.2294 0.1422 -1.613 1.07e-01 observerU -0.0203 0.1206 -0.168 8.67e-01 Hazard-rate(scale) (log-scale): Estimate SE z P(>|z|) 0.725 0.122 5.93 2.96e-09 AIC: 10901.26 Number of sites: 5266 optim convergence code: 0 optim iterations: 116 Bootstrap iterations: 0

Reply all
Reply to author
Forward
0 new messages