Hi Perry and Daniel
Unfortunately, the code seems to get 'stuck' at the initial number of knots simulated and does not explore different knot values? I also note warnings of LogProb of -Inf for certain knot values which have been set to zero and so should not be in the model in the first place so I'm not sure why the MCMC algorithm is determining its log probability?
I'm assuming somewhere in the algorithm this is leading to the probability of any proposed changes to the number of knots always being zero but I'm not sure what exactly could be causing this? Please can you assist me in determining how the code below needs to be adjusted to work for this example.
Thank you
Best regards
Kavir
## R CODE:
## ----------------------------------------------------------------------------
## ADAPTIVE BAYESIAN REGRESSION SPLINES CUSTOM SAMPLER
## Attempt to implement BARS using nimble
## ----------------------------------------------------------------------------
## LOAD PACKAGES ----------------
suppressPackageStartupMessages({
library(data.table)
library(plotly)
library(splines2)
library(ModelMatrixModel)
library(nimble)
})
## SIM DATA ----------------
set.seed(123)
dt <- data.table(x = seq(0.1, 10, 0.1))
betas <- c(20, 30, 50)
dt[, y := nsp(x, knots = 5, intercept = T)%*%betas + rnorm(100, 0, 0.5)
]
plot_ly(dt) %>%
add_markers(x = ~x, y = ~y)
## P-BARS MODEL ----------------
## Specify nsp code in R
Build_nsp <- function(x, K) {
nsp(x, knots = K, intercept = T)
}
Rnsp <- nimbleRcall(function(x = double(1), K = double(1)) {},
Rfun = 'Build_nsp',
returnType = double(2))
## Produce the fit from the selected spline
calc_fit <- nimbleFunction(
run = function(x = double(1), K = double(1), betas = double(2), KNum = integer(0)) {
p <- KNum + 2
B <- Rnsp(x, K[1:KNum])
fitArray <- B %*% betas[1:p, KNum]
returnType(double(1))
return(fitArray[, 1])
}
)
## Specify model code
BARS_code <- nimbleCode({
Alpha[1:Kmax] <- nimRep(0.1, Kmax)
Theta[1:Kmax] ~ ddirch(Alpha[1:Kmax])
KNum ~ dcat(Theta[1:Kmax])
K[1] ~ dunif(x_min, x_max)
for (i in 2:Kmax){
K[i] ~ dunif(K[i-1], x_max)
}
for (j in 1:Kmax) {
betas[1, j] ~ dnorm(0, sd = 1)
for (i in 2:(Kmax+2)) {
betas[i, j] ~ dnorm(betas[i-1, j], sd = 1)
}
}
fit[1:N] <- calc_fit(x[1:N], K[1:Kmax], betas[1:(Kmax+2), 1:Kmax], KNum)
for (i in 1:N) {
y[i] ~ dnorm(fit[i], sd = 1)
}
})
## Prepare required params for the model
BARS_const <- list(N = dim(dt)[1] ,
Kmax = 5 ,
x_min = min(dt$x) ,
x_max = max(dt$x)
)
BARS_data <- list(x = dt$x,
y = dt$y)
BARS_inits <- function(BARS_const) {
KNum = sample(1:BARS_const$Kmax, 1)
K = sort(runif(BARS_const$Kmax, BARS_const$x_min, BARS_const$x_max))
K[(KNum+1):BARS_const$Kmax] <- 0
betas = matrix(rnorm((BARS_const$Kmax+2)*BARS_const$Kmax), ncol = BARS_const$Kmax)
z = matrix(0, ncol = BARS_const$Kmax, nrow = BARS_const$Kmax+2)
z[, KNum] <- 1
betas <- betas*z
return(list(Theta = rep(1 / BARS_const$Kmax, BARS_const$Kmax) ,
KNum = KNum ,
K = K ,
betas = betas
) )
}
## Specify the Bars Model
BARS_model <- nimbleModel(code = BARS_code ,
constants = BARS_const ,
data = BARS_data ,
inits = BARS_inits(BARS_const))
## Check all variables have been initialised
BARS_model$initializeInfo()
## Compile model in Cpp
CBARS_model <- compileNimble(BARS_model)
## Configure the base MCMC process
BARS_Conf <- configureMCMC(BARS_model,
monitors = c("KNum", "K", "betas", "fit"))
BARS_Conf$getSamplers()
## CUSTOM KNUM SAMPLER ------------------
## Specify a custom sampler of the number of knots which will then require an
## associated proposal distribution for knot locations and coefficients
Sampler_BARSRJ_Knots <- nimbleFunction(
name = 'Sampler_BARSRJ_Knots',
contains = sampler_BASE,
setup = function(model, mvSaved, target, control) {
x_min <- control$x_min
x_max <- control$x_max
Kmax <- control$Kmax
## Select all possible parameters that can be effected
## for specifying the nodes to be saved in the last step
calcNodes <- model$getDependencies(c("KNum", "K", "betas"))
## Due to dependency on the previous knot we need to remove the knot
## not being used in the model from the dependency structure
Current_KNum <- model[[target]]
KnotNodes <- paste0("K[1:", Current_KNum, "]")
BetaNodes <- paste0("betas[1:", Kmax+2, ", ", Current_KNum,"]")
CalcCurrentNodes <- model$getDependencies(c("KNum", KnotNodes, BetaNodes))
CalcCurrentNodes <- CalcCurrentNodes[CalcCurrentNodes != paste0("K[", Current_KNum+1, "]")]
## Need to determine required knot selection at this step
## cannot make use of r functions in run code like paste0
if (Current_KNum == 1) {
## If we at the minimum number of knots we cannot reduce
CalcReducedNodes <- CalcCurrentNodes
} else {
## Otherwise we can remove a knot
ReducedKnotNodes <- paste0("K[1:", Current_KNum-1, "]")
ReducedBetaNodes <- paste0("betas[1:", Kmax+2, ", ", Current_KNum-1,"]")
CalcReducedNodes <- model$getDependencies(c("KNum", ReducedKnotNodes, ReducedBetaNodes))
CalcReducedNodes <- CalcReducedNodes[CalcReducedNodes != paste0("K[", Current_KNum, "]")]
}
if (Current_KNum == Kmax) {
## If we at the max number of knots we cannot increase
CalcAddNodes <- CalcCurrentNodes
} else {
## Otherwise we can remove a knot
AddKnotNodes <- paste0("K[1:", Current_KNum+1, "]")
AddBetaNodes <- paste0("betas[1:", Kmax+2, ", ", Current_KNum+1,"]")
CalcAddNodes <- model$getDependencies(c("KNum", AddKnotNodes, AddBetaNodes))
CalcAddNodes <- CalcAddNodes[CalcAddNodes != paste0("K[", Current_KNum+2, "]")]
}
},
run = function() {
# Current_KNum <- model$KNum
## Determine proposed change to current num knots
if (Current_KNum == Kmax) {
## If the current number of knots is the maximum allowed
## number then propose removing a knot
K_delta <- -1
} else {
if (Current_KNum == 1) {
## If the current number of knots is 1
## then propose adding a knot
K_delta <- 1
} else {
## Randomly add or remove a knot
if (runif(1)>0.5) {
K_delta <- 1
} else {
K_delta <- -1
}
}
}
## calculated proposed number of knots
Proposed_KNum <- Current_KNum+K_delta
if (K_delta == 1) {
## Logic for when we adding a new knot
currentLogProb <- model$getLogProb(CalcCurrentNodes)
## Update number of knots
model[[target]] <<- Proposed_KNum
## In all instances this will be the 2nd or
## beyond knot as there is a min of 1 knot
## as such we can always use the previous knot
## value as a lower bound
Proposed_knot <- runif(1, model$K[Proposed_KNum-1], x_max)
model$K[Proposed_KNum] <<- Proposed_knot
## Propose values for the associated set of
## coefficients to be activated.
Current_Coef <- model$betas[, Current_KNum]
Proposed_Coef <- rnorm(Kmax+2, 0, 1)
model$betas[, Proposed_KNum] <<- Proposed_Coef
model$betas[, Current_KNum] <<- 0
## Determine associated probability of proposed inclusions
## noting that the existing parameter set is being removed
## Note at each step we are running the MH algorithm for a specific variable
## while keeping all others constant!!
logProbForwardProposal <- dunif(Proposed_knot, model$K[Proposed_KNum-1], x_max, log = TRUE) +
sum(dnorm(Proposed_Coef, 0, 1, log = TRUE)) - sum(dnorm(Current_Coef, 0, 1, log = TRUE))
proposalLogProb <- model$calculate(CalcAddNodes)
logAcceptanceProb <- proposalLogProb - currentLogProb - logProbForwardProposal
} else {
## Logic for when we remove a knot
currentLogProb <- model$getLogProb(CalcCurrentNodes)
## Update number of knots
model[[target]] <<- Proposed_KNum
## Define logic to remove the last knot
Current_knot <- model$K[Current_KNum]
model$K[Current_KNum] <<- 0
## Amend coefficient values
Current_Coef <- model$betas[, Current_KNum]
Proposed_Coef <- rnorm(Kmax+2, 0, 1)
model$betas[, Proposed_KNum] <<- Proposed_Coef
model$betas[, Current_KNum] <<- 0
logProbReverseProposal <- dunif(Current_knot, model$K[Current_KNum-1], x_max, log = TRUE) -
sum(dnorm(Proposed_Coef, 0, 1, log = TRUE)) + sum(dnorm(Current_Coef, 0, 1, log = TRUE))
proposalLogProb <- model$calculate(CalcReducedNodes)
logAcceptanceProb <- proposalLogProb - currentLogProb + logProbReverseProposal
}
accept <- decide(logAcceptanceProb)
if(accept) { copy(from = model, to = mvSaved, row = 1, nodes = calcNodes, logProb = TRUE)
} else { copy(from = mvSaved, to = model, row = 1, nodes = calcNodes, logProb = TRUE) }
},
methods = list(
reset = function() { }
)
)
## Next we need to define the toggle logic
## Here we can try to leverage the existing rj method of only
## sampling if current value != fixed value (0)
## Note that each target node of the knots and betas will need to be looped through this logic
BARS_RJ_toggled <- nimbleFunction(
name = 'BARS_RJ_toggled',
contains = sampler_BASE,
setup = function(model, mvSaved, target, control) {
## control list extraction
fixedValue <- 0
samplerType <- if(!is.null(control$samplerType)) control$samplerType else stop('must provide \'samplerType\' control list element to RJ_toggled sampler')
## nested function and function list definitions
samplerFL <- nimbleFunctionList(sampler_BASE)
samplerFL[[1]] <- samplerType$buildSampler(model = model, mvSaved = mvSaved)
},
run = function() {
if(model[[target]] != fixedValue)
samplerFL[[1]]$run()
},
methods = list(
reset = function() {
samplerFL[[1]]$reset()
}
)
)
## Configure number of knot sampler
BARS_Conf$removeSamplers("KNum")
BARS_Conf$addSampler(type = "Sampler_BARSRJ_Knots",
target = "KNum",
control = list(x_min = BARS_const$x_min,
x_max = BARS_const$x_max,
Kmax = BARS_const$Kmax
))
## Check samplers
BARS_Conf$getSamplers()
## Configuration function for knots and coefficients
## to be toggled in and out of the model
configureBARSRJ_toggled <- function(conf, targetNodes) {
model <- conf$model
nNodes <- length(targetNodes)
for(i in 1:nNodes) {
nodeAsScalar <- model$expandNodeNames(targetNodes[i], returnScalarComponents = TRUE)
for(j in 1:length(nodeAsScalar)) {
currentConf <- conf$getSamplers(nodeAsScalar[j])
## Add sampler for the coefficient variable (when is in the model)
conf$removeSamplers(nodeAsScalar[j])
conf$addSampler(type = BARS_RJ_toggled,
target = nodeAsScalar[j],
control = list(samplerType = currentConf[[1]]))
}
}
return(invisible(NULL))
}
## Configure the parameters to be toggled in and out
## of the sampling process
configureBARSRJ_toggled(BARS_Conf, c("K", "betas"))
## Check samplers
BARS_Conf$getSamplers()
## Build the MCMC object
nimBarsRJMCMC <- buildMCMC(BARS_Conf)
## Compile the Sampler
CnimBarsRJMCMC <- compileNimble(nimBarsRJMCMC)
BARS_model$KNum
BARS_model$K
RJBarssamples <- runMCMC(CnimBarsRJMCMC,
niter = 1e4,
nchains = 4
)
## NB! PROBLEM: WARNINGS PROVIDED OF -INF FOR KNOT LOCATIONS > CURRENT NUMBER OF KNOTS
## THE SAMPLING IS NOT MOVING BETWEEN DIFFERENT NUMBER OF KNOTS??
nim_rjmcmc_samples <- data.table::rbindlist(lapply(1:length(RJBarssamples),
function(i) {as.data.table(RJBarssamples[[i]])[, Chain := i] }))
summary(nim_rjmcmc_samples$KNum)
Bars_fit <- colMeans(nim_rjmcmc_samples[, .SD, .SDcols = patterns('fit')])
plot_ly(dt) %>%
add_markers(x = ~x, y = ~y, name = "Simulated data") %>%
add_lines(x = ~x, y = ~Bars_fit, name = "Nimble Bars fit")