# Data
HolzingerSwineford1939
# 1. CFA on participants who completed all cognitive tests
G_factor_model <- ' G =~ NA*x1 + x4 + x5 + x6 + x9 G ~~ 1*G'fit <- cfa(G_factor_model, data=HolzingerSwineford1939, meanstructure=TRUE)parameterestimates(fit)FactorScores <- lavPredict(fit, method = "ml")
# 2. CFA on participants who completed all cognitive tests except x1
HolzingerSwineford1939_no_x1 <- setDT(as.data.frame(HolzingerSwineford1939[, -7]))HolzingerSwineford1939_no_x1$x1 <- NA #blank column
G_factor_model_fixed <- ' G =~ 0.480*x1 + 0.990*x4 + 1.102*x5 + 0.913*x6 + 0.276*x9 # fix variance G ~~ 1*Gx1 ~~ 1.128*x1 # fix interceptsx1 ~ 4.936 * 1 x4 ~ 3.061 * 1 x5 ~ 4.341* 1 x6 ~ 2.186 * 1 x9 ~ 5.374 * 1 'fit_fixed <- cfa(G_factor_model_fixed, data=HolzingerSwineford1939_no_x1, meanstructure=TRUE, missing = "ml")
FactorScores_fixed <- lavPredict(fit_fixed, method = "ml")
# Compare factor scores
cor(FactorScores, FactorScores_fixed)dt <- HolzingerSwineford1939
model <- 'G =~ NA*x1 + x4 + x5 + x6 + x9
G ~~ 1*G'
# Perform CFA on full dataset
fit_fiml <- cfa(model, data=dt, missing="fiml")
# Remove some values for varible x4
dtWithNA <- dt
dtWithNA[1:100, "x1"] <- NA_real_
nrow(dt)# 301
# Compute factor scores using data with missing values
FactorScores_missing <- lavPredict(fit_fiml, newdata=dtWithNA, method="ML")
# Compare factor scores
cor(FactorScores, FactorScores_missing)
When missing values were present, the FIML method proceeds by eliminating those rows and columns of the predicted covariance matrix corresponding to the position of the missing values. Elements of the predicted mean vector (or threshold matrix) are also removed, so the likelihood calculation is performed only for those values present in the data. "
## Two models
dt <- HolzingerSwineford1939
dtWithNA <- dt dtWithNA[1:100, "x1"] <- NA_real_ nrow(dt)# 301
dt_no_x1 <- dt[-7]
G_factor_model <- ' G =~ NA*x1 + x4 + x5 + x6 + x9 G ~~ 1*G'
G_factor_model_no_x1 <- ' G =~ NA*x4 + x5 + x6 + x9 G ~~ 1*G'
fit <- cfa(G_factor_model, data=dt,missing = "ml")
fit_no_x1 <- cfa(G_factor_model_no_x1, data=dt,missing = "ml")
# Factor Scores - Complete Data Set, Data set wthout x1, Dataset where x1[1:100] = NA
dtWithNA <- setDT(dtWithNA)
dtWithNA[1:100, 7] # column 7 corresponds to x1
# [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
# [32] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
# [63] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
# [94] NA NA NA NA NA NA NA
FactorScores <- lavPredict(fit, newdata = dt, method = "ml")
FactorScores_no_x1 <- lavPredict(fit_no_x1, newdata = dt_no_x1, method = "ml")
FactorScores_NA <- lavPredict(fit, newdata = dtWithNA, method = "ml")
# Keeping x1 in the CFA model allows for factor scores for individuals with missing data [1:100] that are more similar to the factor scores they would have obtained if the individual 1:100 did not have missing data.
cor(FactorScores[1:100], FactorScores_NA[1:100])
#[1] 0.9981
cor(FactorScores[1:100], FactorScores_no_x1[1:100])
#[1] 0.9975