Thanks, Yves and Felipe!
Yves: I didn't know about the use of the model-implied (co)variance matrix, so it's good to know! Thanks for sharing!
Felipe: That's a great idea to calculate the coefficients manually! However, the function didn't return the predicted values based only on the path coefficients, so I wrote another one instead. The full example is below. Maybe someone else might find this useful in the future.
Thank you both again! Have a nice weekend!
rm(list=ls())
library("lavaan")
set.seed(9000)
popModel <- '
Y ~ 2*1 + 0.5*X
'
D <- simulateData(model=popModel, model.type="sem", sample.nobs=1000)
# regression model only
fit1 <- sem(model=model1,data=D,meanstructure=TRUE)
subset(parameterestimates(fit1),lhs=="Y" & op!="~~")
# lhs op rhs est se z pvalue ci.lower ci.upper
# 1 Y ~ X 0.524 0.031 16.672 0 0.463 0.586
# 4 Y ~1 2.030 0.032 63.511 0 1.968 2.093
# with residual covariance
model2 <- '
Y ~ X
Y ~~ 0.7*X
'
fit2 <- sem(model=model2,data=D,meanstructure=TRUE)
subset(parameterestimates(fit2),lhs=="Y" & op!="~~")
# lhs op rhs est se z pvalue ci.lower ci.upper
# 1 Y ~ X -0.153 0.044 -3.508 0 -0.239 -0.068
# 5 Y ~1 2.039 0.039 52.697 0 1.963 2.114
# this is correct
lavPredictY(object=fit1,newdata=D[1:2,],ynames="Y",xnames="X")[,"Y"]
# [1] 1.407290 1.668423
# this is incorrect
lavPredictY(object=fit2,newdata=D[1:2,],ynames="Y",xnames="X")[,"Y"]
# [1] 1.407290 1.668423
# conditional mean prediction from implied (!) mu and sigma
predictions_by_hand <- function(fit, newdata) {
imp <- lavInspect(fit, "implied")
sigma <- imp$cov; mu <- imp$mean
muY <- mu["Y"]; muX <- mu["X"]
Sxx <- sigma["X","X"]; Syx <- sigma["Y","X"]
x_new <- newdata$X
beta_cond <- Syx / Sxx
muY + beta_cond * (x_new - muX)
}
# this is correct
predictions_by_hand(fit1, D[1:2,])
# [1] 1.407290 1.668423
# this is incorrect
predictions_by_hand(fit2, D[1:2,])
# [1] 1.407290 1.668423
# manually calculate predicted values using path coefficients
predictions_by_hand_2 <- function(fit, newdata, dv) {
fit.coef <- subset(parameterEstimates(fit), op=="~" & lhs==dv)
fit.coef.intercept <- subset(parameterEstimates(fit), op=="~1" & lhs==dv)
as.numeric(
fit.coef.intercept[,"est"] +
(as.matrix(newdata[,fit.coef[,"rhs"],drop=FALSE]) %*%
unlist(fit.coef[,"est"]))[,1])
}
# these are correct
predictions_by_hand_2(fit1, D[1:2,], dv="Y")
# 1.407290 1.668423
predictions_by_hand_2(fit2, D[1:2,], dv="Y")
# [1] 2.220626 2.144333