Aggregate simulated daily values to match monthly data

34 views
Skip to first unread message

Debbie Shackleton

unread,
Jun 21, 2022, 9:37:31 AM6/21/22
to nimble-users
Hello,

I am having an issue trying to fit an SIR-style DE model to data. The problem is that the simulation needs to run on a daily resolution, but I only have monthly data to fit it to. I am trying to convince Nimble to first aggregate the simulated daily infection values according to their month before sampling, but the only way I have managed to think to achieve this gives me a BUGS error. It feels a very inelegant solution anyhow, so I'm sure there is a better way! 

Here is a simplified stand-alone version of the model to demonstrate the issue and my (poor) attempt. I've also left in a commented-out version which calibrates against simulated daily data which functions well - so I've left that in for reference. 

library(deSolve)
library(nimble)
library(dplyr)

######################################## FUNCTIONS ##############################################
# SIR equations for NIMBLE
SIR_equations <- function(t, z, theta) {
  H           <- 10000       # Population size
  S           <- z[1]
  I           <- z[2]
  R           <- z[3]
  alpha     <- theta[1]
  beta        <- theta[2]
  lambda = beta * (I / H)
  dS = -lambda * S
  dI = lambda * S - alpha * I
  dR = alpha * I
  returnList  <- list(c(dS = dS, dI = dI, dR = dR))
  return(returnList)
}


ODEfun <- function(z_init, theta) {
  as.matrix(ode(
    y = z_init,
    times = 0:999,
    func = SIR_equations,
    parms = theta
  ))[-1,-1]
}

nimODE <-
  nimbleRcall(function(z_init = double(1),
                       theta = double(1)) {
   
  }, Rfun = 'ODEfun', returnType = double(2))


##################################### SIMULATE DUMMY DATA ############################################
# Parameters which will be estimated
theta <- c(alpha = 0.005, beta = 0.03)

# Number of unknown parameters in this model
num_params <- 2

# Initial Conditions
H  <- 10000       # Population size
I0 <- 10
R0 <- 0
S0 <- H - I0 - R0

# Initial values
z_init <- c(S = S0,
            I = I0,
            R = R0)

# Time series
num_days <- 1000 - 1
ts       <- seq(0, num_days)


# Analyse DEs
SIR_values <-
  as.matrix(ode(
    y = z_init,
    times = ts,
    SIR_equations,
    parms = theta
  ))
df <- data.frame(SIR_values)

# Create some noise
sigma <- 10
noise <- replicate(3, rnorm(nrow(SIR_values), 0, sd = sigma))

noisy_SIR          <- SIR_values[, 2:4] + noise
noisy_SIR[noisy_SIR < 0] <- 0
colnames(noisy_SIR) <- c("S_noise", "I_noise", "R_noise")
daily_infection_data <- unlist(noisy_SIR[-c(1),][,"I_noise"])
plot(daily_infection_data)

#Simulate Monthly Data
monthly_infection_data <- as.data.frame(noisy_SIR) %>%
  mutate(day = 1:nrow(noisy_SIR)) %>%
  mutate(date = as.Date(day, origin = "2000/01/01")) %>%
  mutate(month = lubridate::month(date)) %>%
  mutate(year = lubridate::year(date)) %>%
  group_by(year, month) %>%
  summarise(I_noise = sum(I_noise)) %>%
  ungroup() %>%
  select(I_noise) %>%
  pull()

##An inelegant way to instruct the model which days go in which month
day_month_mapper <- data.frame(day = 1:num_days) %>%
  mutate(date = as.Date(day, origin = "2000/01/01")) %>%
  mutate(month = lubridate::month(date)) %>%
  mutate(year = lubridate::year(date)) %>%
  mutate(month_num = month + 12*(year-2000)) %>%
  select(day,month_num)

month_start_dates <- vector()
month_end_dates <- vector()

for(i in 1:33){
  month <- day_month_mapper %>% filter(month_num == i)
  month_start_dates[i] <- head(month$day,1)
  month_end_dates[i] <- tail(month$day,1)
}
############################################### NIMBLE MODEL ##########################################
num_chains     <- 4
n_months <- 33

Model <- nimbleCode({
  for (i in 1:n_months) {
    start_date = month_start_dates[i]
    end_date = month_end_dates[i]
    z_month = sum(z[start_date:end_date,2]) #Aggregate daily infection values to month
    y[i] ~ dnorm(z_month, sd = sigma)
  }
  z[1:n, 1:3] <- nimODE(z_init = z_init[1:3], theta = theta[1:2])
  sigma    ~ T(dnorm(0, 1), 0,)      # sigma = 10
  theta[1] ~ T(dnorm(0, sd = 5), 0,) # alpha = 0.2
  theta[2] ~ T(dnorm(0, sd = 5), 0,) # beta = 0.5
})
# #Model for daily data
# Model <- nimbleCode({
#   for (i in 1:n) {
#     y[i] ~ dnorm(z[i, 2], sd = sigma)
#   }
#   z[1:n, 1:3] <- nimODE(z_init = z_init[1:3], theta = theta[1:2])
#   sigma    ~ T(dnorm(0, 1), 0,)      # sigma = 10
#   theta[1] ~ T(dnorm(0, sd = 5), 0,) # alpha = 0.2
#   theta[2] ~ T(dnorm(0, sd = 5), 0,) # beta = 0.5
# })

Consts      <- list(n = num_days, n_months = 33, month_start_dates = month_start_dates, month_end_dates = month_end_dates)
# Consts      <- list(n = num_days)
nimbleData  <- list(y = monthly_infection_data, z_init = z_init)
# nimbleData  <- list(y = daily_infection_data, z_init = z_init)
inits       <- list(theta = c(0.1, 0.1), sigma = 1)
nimbleModel <-
  nimbleModel(
    code = Model,
    name = 'nimbleModel',
    constants = Consts,
    data = nimbleData,
    inits = inits
  )

MCMCconfig          <-
  configureMCMC(nimbleModel, monitors = c("theta", "sigma"))
MCMCconfig$removeSamplers("theta")
MCMCconfig$addSampler(target = "theta", type = 'AF_slice')

modelMCMC           <- buildMCMC(MCMCconfig)
compiled_model      <-
  compileNimble(nimbleModel, showCompilerOutput = TRUE)
compiled_model_MCMC <-
  compileNimble(modelMCMC, project = nimbleModel, showCompilerOutput = TRUE)

results <-
  runMCMC(
    compiled_model_MCMC,
    niter = 5000,
    nburnin = 2000,
    nchains = num_chains,
    thin = 10,
    inits = inits,
    progressBar = T,
    samplesAsCodaMCMC = T
  )

Any ideas as to how I can achieve this would be very much appreciated!

Thanks,
Debbie

David Pleydell

unread,
Jun 21, 2022, 1:02:11 PM6/21/22
to nimble-users
Hi Debbie

One problem with this line  z_month = sum(z[start_date:end_date,2])is that the same individuals can contribute multiple times to the aggregate, so z_month will be a biased estimate of how many cases there actually were. One option would be to try to correct this bias by dividing by the expected number of days each individual will be infected.  

A further potential problem (from a biological perspective) is in your observation model y[i] ~ dnorm(z_month, sd = sigma)- where your expected value is all aggregated cases (including it's bias) but that you may observe a y > aggregate(I), which may not make much sense biologically. Usually what people do is to have an "observation model", which models which proportion of the cases that are actually observed, in which case E[y] = aggregate(I) * detectionProbability.  Also, a normal likelihood does not prevent you simulating -ve values for y when z_month is close to zero.

One alternative to help overcome the first problem is to switch to a stochastic model. Using something like the tau-leap method you can track when individuals enter and leave each compartment. We did this recently in a spatial SIR model for African Swine Fever spread among wild boar, and used 10-day aggregates of observed cases (in each pixel) in our likelihood calculations (we used a Poisson likelihood, with the expected value obtained from Monte Carlo simulation). Here's a temporary link to the paper, which has just been accepted for publication in a special issue of Epidemics. Normally you should be able to access our code via the links in the paper. Feel free to shoot me a message if you need some pointers navigating the code to the parts which are relevant to you.

Hope this helps, or at least provides food for thought.

Cheers
David

David Pleydell

unread,
Jun 22, 2022, 4:47:19 AM6/22/22
to nimble-users
Another option is to add a 4th compartment to the SIR model just to track the cumulative incidence, i.e. dCI = lambda * S, then at the end of each month calculate the number of new cases as the difference in CI at the end and start of the month. 

Cheers
David

Chris Paciorek

unread,
Jun 22, 2022, 11:58:48 AM6/22/22
to Debbie Shackleton, nimble-users
Also a quick note in terms of the model coding of your original code (without delving into David's comment and how the model should be defined statistically), it won't work to do this:

for (i in 1:n_months) {
    start_date = month_start_dates[i]
    end_date = month_end_dates[i]
    z_month = sum(z[start_date:end_date,2]) #Aggregate daily infection values to month
    y[i] ~ dnorm(z_month, sd = sigma)
  }

because you are defining start_date, end_date, and z_month multiple times, once for each iteration of the loop. When writing model code you are defining a graphical model, and NIMBLE will return an error if you try to define a node (e.g, start_date) multiple times.

You should be able to do this:

for (i in 1:n_months) {
     y[i] ~ dnorm(sum(z[month_start_dates[i]:month_end_dates[i],2]), sd = sigma)
  }



--
You received this message because you are subscribed to the Google Groups "nimble-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to nimble-users...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/nimble-users/8a9cc6e8-35d2-4ee5-91ee-de11d79314fcn%40googlegroups.com.

David Pleydell

unread,
Jun 27, 2022, 4:18:42 AM6/27/22
to nimble-users
For anyone reading through this thread, and interested in how we fitted the mentioned African Swine Fever spatial spread model, a pre-proof version of the paper is now available online https://doi.org/10.1016/j.epidem.2022.100596

Cheers
David

Debbie Shackleton

unread,
Aug 4, 2022, 12:47:33 PM8/4/22
to David Pleydell, nimble-users
Hi David,

Thanks so much for taking the time to send this. I took some time to look through it, but eventually had to conclude it was beyond my fairly rudimentary understanding of statistics! I therefore thought I would start with what seemed like a more basic solution to achieve integer simulation values in order to use a binomial distribution to map cases to infections. My theory was that I could just swap the ODE solver to a discrete tau-leaping algorithm (I went for the package adaptivetau). However, while I am able to simulate data successfully using the ssa.adaptivetau function, when I use it as a NimbleFunction, I start getting input errors (despite the fact that the inputs should be exactly the same?) I was wondering if you could think of any fundamental reasons why this approach wouldn't work (both technically in Nimble and mathematically)? The problem appears to be with the rate function, which is used as an argument within the NimbleFunction. I think maybe I need to declare it as an argument within the nimbleRcall() function, but I'm not sure if that's possible? Can you think of an alternative solution? (Hopefully this makes sense with the code!). As always, your input is much appreciated!

Here is the code:
library(nimble)
library(dplyr)
library(adaptivetau)

##################################### FUNCTIONS #################################################
#Tau-leap rate function
rate_function <- function(x, p, t) {
  return(c(x["I"]*p$e, #Bacteria Birth
           0.33, #Bacteria death
           p$mu * (x["S"] + x["I"]), # Human birth
           p$mu * x["S"], #Susceptible death
           p$a * (x["B"] / (x["B"] + 10**6)) * x["S"], # Infection
           p$r * x["I"])) #Infective death
}

#Convert from who-knows-what timescale to daily
get_daily <- function(results,num_days){
  df <- as.data.frame(results)
  df$day <- ceiling(df$time)
  daily <- data.frame(day = 1:num_days, S = NA, I = NA, B = NA)  
  for(i in 1:num_days){
    day <- df %>%
      filter(day == i)
    if(nrow(day) > 0){
      daily[i,2:4] <- tail(day[,2:4],1)
    } else {
      daily[i,2:4] <- daily[i-1,2:4]
    }
  }
  return(daily)
}


rate_function <- function(x, p, t) {
  return(c(x["I"]*p$e, #Bacteria Birth
           0.33, #Bacteria death
           p$mu * (x["S"] + x["I"]), # Human birth
           p$mu * x["S"], #Susceptible death
           p$a * (x["B"] / (x["B"] + 10**6)) * x["S"], # Infection
           p$r * x["I"])) #Infective death
}
 
##Simulate Data
SIB_daily <- function(z_init, theta){
  #Transition matrix
  transitions = ssa.maketrans(names(z_init),
                                 rbind('B', +1), # Bacteria birth
                                 rbind('B', -1), # Bacteria death
                                 rbind('S', +1), # Human birth
                                 rbind('S', -1), #Susceptible death
                                 rbind('S', -1, 'I', +1), #Infection
                                 rbind('I', -1) #Infective death
  )

 
  num_days = 300
  theta_list <- as.list(theta)
  results =ssa.adaptivetau(z_init, transitions, rate_function, theta_list, tf= num_days)
  daily = get_daily(results,num_days)
  return(daily)
}

#Remove intial values and day column for Nimble
SIB_daily_nim <- function(z_init, theta){
  SIB_daily_nim <- SIB_daily(z_init,theta)[-1,2:4]
  return(as.matrix(SIB_daily_nim))
}

nimODE <- nimbleRcall(function(z_init = integer(1), theta = double(1)){}, Rfun = 'SIB_daily_nim',
                      returnType = integer(2))

##################################### INITIAL SET UP ###########################################
#Parameters
theta <- c(
  a = 1,
  e = 10,
  r = 0.2,
  mu = 0.0001
)


#Set initial values
N = 10000
I0 = 10
B0 = 0
S0 = N - I0
num_days = 300
z_init <- c(S = S0, I = I0, B = B0)

#Transitions
SIB_daily_sim <- SIB_daily(z_init, theta) #Simulate 'true' values
daily_cases <- rbinom(n = nrow(SIB_daily_sim), size = SIB_daily_sim$I, p = 0.3)[-1] #Simulate reported cases


###################################### NIMBLE MODEL #################################################
Model <- nimbleCode({
  for(i in 1:n){
    y[i] ~ dbin(prob = p, size = z[i,2])
  }
  z[1:n,1:3] <- nimODE(z_init=z_init[1:3],theta=theta[1:4])
  ## Priors
  p ~ dbeta(shape1 = 1.3, shape2 = 5) #P
  theta[1] ~ T(dnorm(0,5),0,) #dgamma(3,0.6)        #a    
  theta[2] ~ T(dnorm(0,5),0,) #dgamma(10,1)         #e
  theta[3] ~ T(dnorm(0,5),0,) #dgamma(3, 0.06)      #r
  theta[4] ~ T(dnorm(0,0.1),0,)
})
Consts <- list(n = num_days - 1)
nimbleData <- list(y=daily_cases,z_init=z_init)
inits <- list(theta=c(1,1,1,0.001),p=0.3)
nimbleModel <- nimbleModel(code=Model, name='nimbleModel', constants = Consts, data = nimbleData,inits = inits)
MCMCconfig <- configureMCMC(nimbleModel,monitors=c("theta","p"))

modelMCMC <- buildMCMC(MCMCconfig)
compiled_model <- compileNimble(nimbleModel, showCompilerOutput = TRUE)
compiled_model_MCMC <- compileNimble(modelMCMC, project = nimbleModel)
results <- runMCMC(compiled_model_MCMC, niter = 50000, nburnin = 30000, nchains = 1, thin=10,
                   inits=inits, progressBar = T, samplesAsCodaMCMC = T)



Thank you!
Debbie

--
You received this message because you are subscribed to the Google Groups "nimble-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to nimble-users...@googlegroups.com.
Reply all
Reply to author
Forward
0 new messages