Phil
`````
#' RMSD statistics to quantify category-level DIF
#'
#'
#' @param dat complete dataset
#' @param group group membership vector
#' @param pooled_mod a multiple-group model (used to compute the model-implied
#' probability in the goodness-of-fit test)
RMSD_DIF <- function(dat, group, pooled_mod){
stopifnot(nrow(dat) == length(group))
which.groups <- extract.mirt(pooled_mod, 'groupNames')
ret <- vector('list', length(which.groups))
names(ret) <- which.groups
for(which.group in which.groups){
smod <- extract.group(pooled_mod, which.group)
nfact <- extract.mirt(smod, 'nfact')
stopifnot(nfact == 1L)
sv <- mod2values(smod)
sv$est <- FALSE
# make wider grid for better numerical integration
Theta <- matrix(seq(-6,6,length.out = 201))
mod_g <- mirt(subset(dat, group == which.group), nfact,
itemtype = extract.mirt(smod, 'itemtype'),
pars = sv, technical = list(storeEtable=TRUE, customTheta=Theta))
Etable <- mod_g@Internals$Etable[[1]]$r1
# standard normal dist for theta
f_theta <- dnorm(Theta)
f_theta <- as.vector(f_theta / sum(f_theta))
itemloc <- extract.mirt(mod_g, 'itemloc')
which.items <- 1L:ncol(dat)
ret2 <- vector('list', ncol(dat))
names(ret2) <- extract.mirt(mod_g, 'itemnames')
for(i in seq_len(length(which.items))){
pick <- itemloc[which.items[i]]:(itemloc[which.items[i]+1L] - 1L)
O <- Etable[ ,pick]
P_o <- O / rowSums(O)
item <- extract.item(smod, which.items[i])
P_e <- probtrace(item, Theta)
ret2[[i]] <- sqrt(colSums((P_o - P_e)^2 * f_theta))
}
class(ret2) <- 'mirt_list'
ret[[which.group]] <- ret2
}
ret
}
#----------------------------------------------------------------------
#----- generate some data
set.seed(12345)
a <- a2 <- matrix(abs(rnorm(15,1,.3)), ncol=1)
d <- d2 <- matrix(rnorm(15,0,.7),ncol=1)
# item 1 has DIF
d2[1] <- d[1] - .5
a2[1] <- a[1] + 1
itemtype <- rep('2PL', nrow(a))
N <- 1000
dataset1 <- simdata(a, d, N, itemtype)
dataset2 <- simdata(a2, d2, N, itemtype)
dat <- rbind(dataset1, dataset2)
group <- c(rep('D1', N), rep('D2', N))
#-----
# fully pooled model
pooled_mod <- multipleGroup(dat, 1, group=group, invariance = colnames(dat))
coef(pooled_mod, simplify=TRUE)
RMSD_DIF(dat, group=group, pooled_mod)
# more freely estimated model (item 1 has 2 parameters estimated)
MGmod <- multipleGroup(dat, 1, group=group,
invariance = colnames(dat)[-1])
coef(MGmod, simplify=TRUE)
# RMSD in item.1 now reduced (MG model accounts for DIF)
RMSD_DIF(dat, group=group, MGmod)
#################
# polytomous example
set.seed(12345)
a <- a2 <- matrix(rlnorm(20,.2,.3))
# for the graded model, ensure that there is enough space between the intercepts,
# otherwise closer categories will not be selected often (minimum distance of 0.3 here)
diffs <- t(apply(matrix(runif(20*4, .3, 1), 20), 1, cumsum))
diffs <- -(diffs - rowMeans(diffs))
d <- d2 <- diffs + rnorm(20)
# item 1 has slope + dif for first intercept parameter
d2[1] <- d[1] - .5
a2[1] <- a[1] + 1
itemtype <- rep('graded', nrow(a))
N <- 1000
dataset1 <- simdata(a, d, N, itemtype)
dataset2 <- simdata(a2, d2, N, itemtype)
dat <- rbind(dataset1, dataset2)
group <- c(rep('D1', N), rep('D2', N))
#-----
# fully pooled model
pooled_mod <- multipleGroup(dat, 1, group=group, invariance = colnames(dat))
coef(pooled_mod, simplify=TRUE)
RMSD_DIF(dat, group=group, pooled_mod)
# more freely estimated model (item 1 has more parameters estimated)
MGmod <- multipleGroup(dat, 1, group=group,
invariance = colnames(dat)[-1])
coef(MGmod, simplify=TRUE)
# RMSD in first two categories of item.1 now reduced (MG model accounts for DIF)
RMSD_DIF(dat, group=group, MGmod)
````