It's a little bit strange. I ran 100 simulated datasets. Mplus, lavaan, and psych::polychoric all produced identical results.
But why does the above dataset give different estimation results? The following is my code.
Hao
# Load necessary packages
library(MplusAutomation)
library(mvtnorm)
library(lavaan)
generate_mplus_model <- function(sampleSize, cor, thresh1, thresh2, seed = 1) {
set.seed(seed)
corMat <- matrix(c(1, cor, cor, 1), 2, 2)
Data <- as.data.frame(rmvnorm(sampleSize, sigma = corMat))
Data[, 1] <- as.numeric(cut(Data[, 1], breaks = c(-Inf, thresh1, Inf)))
Data[, 2] <- as.numeric(cut(Data[, 2], breaks = c(-Inf, thresh2, Inf)))
mod <- mplusObject(TITLE = "Poly;",
VARIABLE ="categorical are all;",
ANALYSIS = "ESTIMATOR IS WLSMV;",
OUTPUT = "sampstat; standardized;",
usevariables = colnames(Data),
rdata = Data)
m_basic_fit <- mplusModeler(mod,
dataout = paste0("dataset_", seed, ".dat"),
modelout = paste0("model_", seed, "_basic_Lab1.inp"),
check = TRUE,
run = TRUE,
hashfilename = F)
# Extract model summary
summary <- get_sampstat(m_basic_fit)
# Calculate lavCor correlation matrix
lavCor_mat <- lavCor(Data, order = T)
psych_mat <- psych::polychoric(Data)$rho
# Return model fit, sample statistics, and lavCor correlation matrix
return(data.frame( "mplus_poly" = summary$correlations.vardiag[2, 1],
"lav_poly" = round(lavCor_mat[2, 1], 3),
"psych_poly" = round(psych_mat[2, 1], 3))
)
}
# Generate 100 datasets and run Mplus models
results <- list()
for(i in 1:100) {
results[[i]] <- generate_mplus_model(
sampleSize = 1794,
cor = 0.97,
thresh1 = c(-2.285 , -1.818, -0.930, 0.194),
thresh2 = c(-2.267, -1.949 , -0.988 , 0.163),
seed = i
)
}
results <- results %>%
bind_rows()