MCFAsim <- function(N_2=100, # number of clusters (level 2)
N_1=10, # number of observations(level 1)
MU=0, # grand average
n.items=4, # number of items to be generated
item.letter="x", # letter that identifies items of interest
cluster="ID", # name of the grouping variable
# COVARIANCE MATRICES
matrices.type="simulated", # simulated or empirical
std=TRUE, # standardized cov matrices?
# 2.1. if empirical, specify the DATASET and the cluster variable
mydata=NA, # dataset (data.frame)
# 2.2. if simulated, specify the LOADINGS in the one-level model(s)
level2.model="WLb =~ 0.6*x1 + 0.6*x2 + 0.6*x3 + 0.6*x4",
level1.model=level2.model, # default: the same as level 1 (conf. construct)
# MCFA MODEL
twolevels.model='level: 1
WLw =~ a*x1 + b*x2 + c*x3 + d*x4
level: 2
WLb =~ a*x1 + b*x2 + c*x3 + d*x4',
corrs=FALSE # want to save also correlations between simulated variables?
){
if(matrices.type=="simulated"){
# generate data on lv2
myData <- lavaan::simulateData(level2.model,model.type="cfa",
std.lv=TRUE,sample.nobs=N_2)
b.cov <- lavaan::inspectSampleCov(level2.model,myData)$'cov' # cov matrix on lv2
# generate data on lv1
myData <- lavaan::simulateData(level1.model,model.type="cfa",
std.lv=TRUE,sample.nobs=N_1)
w.cov <- lavaan::inspectSampleCov(level2.model,myData)$'cov' # covariance matrix on lv1
}else if(matrices.type=="empirical"){
# take cov matrices from observed data using the specified MCFA model
b.cov <- lavaan::inspectSampleCov(twolevels.model,mydata,cluster="ID")$ID$`cov`
w.cov <- lavaan::inspectSampleCov(twolevels.model,mydata,cluster="ID")$`within`$`cov`
}
# standardized cov matrices?
if(std==TRUE){
b.cov <- cov2cor(b.cov)
w.cov <- cov2cor(w.cov)
}
# Simulating clusters' means (lv2) = grandAverage + BLUP
U <- MASS::mvrnorm(n=N_2,mu=rep(0,n.items),Sigma=b.cov) # BLUPs
cMeans <- MU + U # simulated clusters' means (1 X cluster X item)
# Simulating within-cluster scores (lv1) = clusters' means + residual deviance
dataList <- list()
for (cl in 1:nrow(cMeans)){
e <- MASS::mvrnorm(n=N_1,mu=rep(0,n.items),Sigma=w.cov) # Residuals
for (i in 1:nrow(e)){ e[i,] <- e[i,] + cMeans[cl,] } # within-cluster scores
dataList[[cl]] <- e
}
# Merging lv1 and lv2 data in a multilevel dataset
simulated <- data.frame(ID=rep(1:N_2,N_1),TIME=rep(rep(NA,N_1),N_2)) # empty dataset
simulated <- simulated[order(simulated$ID),] # ordered by ID
simulated$TIME=rep(1:N_1,N_2)
for(i in 1:n.items){ simulated <- cbind(simulated,rep(NA,nrow(simulated))) } # empty items columns
colnames(simulated)[3:ncol(simulated)] <- paste(item.letter,seq(1,n.items,1),sep="") # item names
for(cl in 1:length(dataList)){ # from list to df
simulated[simulated$ID==cl,3:ncol(simulated)] <- dataList[[cl]]
}
# parameters estimation
results <- try(lavaan::cfa(twolevels.model,data=simulated,cluster=cluster,
std.lv=TRUE),silent=TRUE) # CFA
parameters <- data.frame(matrix(nrow=0,ncol=n.items*6+5+3+6)) # empty dataset
if(class(results)!="try-error"){ error = FALSE # errors check
if(lavaan::lavInspect(results,"converged")==TRUE){ convergence = TRUE # convergence check
if(any(as.vector(lavaan::lavInspect(results,"est")[["ID"]][["theta"]])<0) & # negative var check
any(as.vector(lavaan::lavInspect(results,"est")[["within"]][["theta"]])<0)){
neg.Variance = TRUE } else { neg.Variance = FALSE }
parameters <- rbind(parameters,
# model info (ncol=5)
c(N_2,N_1,matrices.type,lavaan::lavInspect(results,"fit")[c("npar")],std,
# converg. (ncol=3)
error,convergence,neg.Variance,
# lv1 & lv2 loadings - default type="std.all" (ncol=n.items*2)
c(round(lavaan::standardizedsolution(results)[
lavaan::standardizedsolution(results)$op=="=~","est.std"],3)),
# fit indices (ncol=6)
round(lavaan::lavInspect(results,"fit")[c("chisq","df","rmsea","cfi",
"srmr_within","srmr_between")],3),
# lv1&lv2 loadings CI (ncol=n.items*4)
c(round(lavaan::standardizedsolution(results)[
lavaan::standardizedsolution(results)$op=="=~","ci.lower"],3)),
c(round(lavaan::standardizedsolution(results)[
lavaan::standardizedsolution(results)$op=="=~","ci.upper"],3))
# correlations between observed variables
))
}else{ convergence = FALSE # not converged
parameters <- rbind(parameters,
c(N_2,N_1,matrices.type,NA,std, # model info (ncol=5)
error,convergence,NA, # converg. (ncol=3)
rep(NA,n.items*6+6))) # loadings and fit indices (ncol=n.items*4+4)
}
}else{ error=TRUE # error
parameters <- rbind(parameters,
c(N_2,N_1,matrices.type,NA,std, # model info (ncol=2)
error,NA,NA, # converg. (ncol=3)
rep(NA,n.items*6+6))) # loadings and fit indices (ncol=n.items*4+4)
}
colnames(parameters) <- c("N_2","N_1","matrices.type","npar","std","error","converged","negVariance",
paste(paste(item.letter,1:n.items,sep=""),rep("w",n.items),sep=""),
paste(paste(item.letter,1:n.items,sep=""),rep("b",n.items),sep=""),
"chisq","df","rmsea","cfi","srmr_between","srmr_within",
paste(paste(item.letter,1:n.items,sep=""),rep("w.CI.low",n.items),sep=""),
paste(paste(item.letter,1:n.items,sep=""),rep("b.CI.low",n.items),sep=""),
paste(paste(item.letter,1:n.items,sep=""),rep("w.CI.up",n.items),sep=""),
paste(paste(item.letter,1:n.items,sep=""),rep("b.CI.up",n.items),sep=""))
if(corrs==TRUE){ # should add n.items*(n-1)/2 columns
corrS <- cor(simulated[,3:ncol(simulated)],simulated[,3:ncol(simulated)],use="complete.obs",method="pearson")
corr.lab <- matrix(apply(expand.grid(rownames(corrS),rownames(corrS)),1,paste,collapse="."),
nrow=n.items,ncol=n.items)
corrS <- round(corrS[lower.tri(corrS, diag = FALSE)],2)
names(corrS) <- corr.lab[lower.tri(corr.lab, diag = FALSE)]
parameters <- cbind(parameters,t(corrS))
}
return(parameters)
}