library(R2jags)
set.seed(1)
J <- 2
n <- 5000
X <- cbind(1, matrix(runif(n*J, -1, 1), ncol=2))
b <- 1:3
p <- plogis(X %*% b)
y <- rbinom(n, 1, p)
M <- function() {
for(i in 1:n) {
y[i] ~ dbern(p[i])
logit(p[i]) <- inprod(X[i, ], b)
}
for(j in 1:(J+1)) {
b[j] ~ dnorm(0, 0.0001)
}
for (t in 1:length(thr)) {
sens[t] <- sum((p > thr[t]) && (y==1))/n1
spec[t] <- sum((p < thr[t]) && (y==0))/n0
fpr[t] <- 1 - spec[t]
fnr[t] <- 1 - sens[t]
}
auc <- sum((sens[2:length(sens)]+sens[1:(length(sens)-1)])/2 *
-(fpr[2:length(fpr)] - fpr[1:(length(fpr)-1)]))
}
j <- jags(list(X=X, y=y, n=n, J=J, n1=sum(y), n0=sum(!y), thr=seq(0, 1, 0.05)),
NULL, c('b', 'sens', 'spec', 'fpr', 'fnr', 'auc'), M, 3, 1000)
n <- 5000X <- runif(n, -1, 1)a <- 0.5b <- 1.2p <- plogis(a + b * X)y <- rbinom(n, 1, p)thr <- seq(0, 1, 0.05)n_threshold <- length(seq(0, 1, 0.05))
data <- list( n = n, variable = X, y = y, thr = thr, n_threshold = n_threshold )model <-" data{ int<lower=1> n; real variable[n]; int n_threshold; real thr[n_threshold]; int<lower=0, upper=1> y[n]; } parameters{ real a; real b; } model{ real logit_p[n]; real p[n]; real sens[n_threshold]; real spec[n_threshold]; real fpr[n_threshold]; real auc;
for(i in 1:n) { logit_p[i] <- a + b * variable[i]; p[i] <- inv_logit(logit_p[i]); } y ~ bernoulli(p);
a ~ normal(0, 10); b ~ normal(0, 10);
for (t in 1:n_threshold) { sens[t] <- sum((p > thr[t]) && (y==1))/cumulative_sum(y); spec[t] <- sum((p < thr[t]) && (y==0))/cumulative_sum(!y); fpr[t] <- 1 - spec[t] } auc <- sum((sens[2:length(sens)]+sens[1:(length(sens)-1)])/2 * -(fpr[2:length(fpr)] - fpr[1:(length(fpr)-1)]))}"j <- stan(model_code=model, data = data, iter=2000, chains = 3)
binary infix operator >= with functional interpretation logical_gt requires arguments or primitive type (int or real), found left type=real[], right arg type=real; No matches for:
logical_gt(real[], real)
I don't see why you needed to declare this is only an estimate of AUC.
As far as I'm aware you can only ever "estimate" it.
Despite this, like WAIC and other measures, these approximations are useful for examining model performance.
Hi Bob, we use it for binary classification problems along with cross validating to assess model performance for binary problems.
I'm curious to know what measures you would recommend for binary issues though.
Thanks, this is good to know.I'll have a look at how to code up the log loss. I haven't used it before.
-1 / length(y) * sum(y * log(yhat) + (1 - y) * log(1 - yhat))