Hi Yves,
Thank you for the reply again. I am simulating some data and getting errors with your suggestions below is the simulated data I use and the errors/warnings im getting. I wanted to bring it to your attention, it could be totally my error as well.
------------------- simulated data------------------------------
# Load necessary libraries
library(MASS)
library(lavaan)
set.seed(12345)
n <- 20000
# 1. Single latent factor
F1 <- rnorm(n, mean=0, sd=1)
# 2. Exogenous variables
X1 <- rnorm(n, mean=50, sd=10)
X2 <- rnorm(n, mean=100, sd=15)
D1 <- rbinom(n, size=1, prob=0.5)
D2 <- rbinom(n, size=1, prob=0.5)
D3 <- rbinom(n, size=1, prob=0.5)
# Function to create one ordinal indicator
generate_ordered <- function(F, lambda, tau1, tau2) {
eta <- lambda * F
prob1 <- pnorm(tau1 - eta)
prob2 <- pnorm(tau2 - eta) - prob1
prob3 <- 1 - pnorm(tau2 - eta)
probs <- cbind(prob1, prob2, prob3)
apply(probs, 1, function(p) sample(1:3, size=1, prob=p))
}
# Define loadings and thresholds for each indicator
lambda <- c(1.0, 0.8, 0.6, 0.9)
tau1 <- c(-0.5, -0.4, -0.3, -0.2)
tau2 <- c(0.5, 0.6, 0.7, 0.8)
# Generate exactly four ordinal indicators
Y1 <- generate_ordered(F1, lambda[1], tau1[1], tau2[1])
Y2 <- generate_ordered(F1, lambda[2], tau1[2], tau2[2])
Y3 <- generate_ordered(F1, lambda[3], tau1[3], tau2[3])
Y4 <- generate_ordered(F1, lambda[4], tau1[4], tau2[4])
# 3. Outcome "wage"
intercept <- 10
beta_F <- 2.0
beta_X1 <- 0.5
beta_X2 <- -0.3
beta_D1 <- 1.0
beta_D2 <- -1.5
beta_D3 <- 0.8
error <- rnorm(n, mean=0, sd=5)
wage <- intercept + beta_F*F1 + beta_X1*X1 + beta_X2*X2 +
beta_D1*D1 + beta_D2*D2 + beta_D3*D3 + error
# Combine into data frame
simulated_data <- data.frame(
wage, X1, X2, D1, D2, D3,
Y1, Y2, Y3, Y4
)
simulated_data[,c("Y1", "Y2", "Y3", "Y4")] <- simulated_data[,c("Y1", "Y2", "Y3", "Y4")] - 1
head(simulated_data)
------------------- Estimating SAM with Dummy Variables------------------------------
model <- '
# Measurement model (ordinal indicators loading on F)
F =~ Y1 + Y2 + Y3 + Y4
wage ~ F + X1 + X2 + D1 + D2 + D3
'
fit.lsam <- sam(model, data = simulated_data, sam.method = "local", mm.args = list(estimator = "DWLS"), ordered = c("Y1", "Y2", "Y3", "Y4"))
Warning: lavaan->muthen1984():
trouble constructing W matrix; used generalized inverse for A11 submatrix
------------------- Estimating SAM with Dummy Variables fixed.x=T------------------------------
model <- '
# Measurement model (ordinal indicators loading on F)
F =~ Y1 + Y2 + Y3 + Y4
wage ~ F + X1 + X2 + D1 + D2 + D3
'
fit.lsam <- sam(model, data = simulated_data, sam.method = "local", struc.args = list(fixed.x = TRUE), mm.args = list(estimator = "DWLS"), ordered = c("Y1", "Y2", "Y3", "Y4"))
Error: lavaan->lav_samplestats_from_moments():
the (D)WLS estimator is only available with full data or with a user-provided WLS.V
------------------- Estimating SAM without Dummy Variables------------------------------
model <- '
# Measurement model (ordinal indicators loading on F)
F =~ Y1 + Y2 + Y3 + Y4
wage ~ F + X1 + X2
'
fit.lsam <- sam(model, data = simulated_data, sam.method = "local", mm.args = list(estimator = "DWLS"), ordered = c("Y1", "Y2", "Y3", "Y4"))
RESULT: Works fine no errors
Side note: recovers the parameters well
If I estimate the "S" using a heterogenous Correlation matrix would this work? For example, to deal with the dummy variables
N <- nrow(Data)
S <- hetcor(Data) * (N - 1L)/N # adjust for biased sample covariance matrix
Var.eta <- M %*% (S - Theta) %*% t(M)