a16_CLMM_conditional.Rmd
Generated data using conditional model - other CLMM Vignettes were using marginal data generation.
The conditional data generation does NOT allow you to just fit a model without random intercepts. This is distinct from the marginal models! With marginal models only the SE is affected; with conditional models, you can’t ignore the subject effects or else you bias the estimates. So of course the fixed effects only model clm()
won’t handle missing data correctly, because it can’t handle complete data correctly!
clm gee clmm0|1 -1.503910258 -1.504229339 -2.029189347
1|2 0.012948946 0.012290884 0.009714282
2|3 1.497568976 1.496833165 2.035309535
-0.001289427 0.005188065 -0.001165598
GroupGroup_2 -0.004865262 0.022942622 -0.007264689
TimeTime_2 -0.022885972 -0.011805839 -0.031622008
TimeTime_3 0.011694160 0.003421927 0.014869004
TimeTime_4 :TimeTime_2 0.202491944 -0.202849533 0.270631754
GroupGroup_2:TimeTime_3 0.500235452 -0.501617735 0.675335162
GroupGroup_2:TimeTime_4 0.748884352 -0.749870769 1.012121149
GroupGroup_2>
# Marginal means:
> cbind('clm' = colMeans(out.emms.clm),
+ 'clmm' = colMeans(out.emms.clmm))
clm clmm1,] 0.001289427 0.001165598
[2,] -0.201202517 -0.269466156
[3,] -0.498946025 -0.674169564
[4,] -0.747594924 -1.010955551 [
rm(list = ls())
gc()
library(SPR)
library(ordinal)
library(multgee)
library(emmeans)
# Initialize output:
out.clm <- vector()
out.clmm <- vector()
out.gee <- vector()
out.emms.clm <- vector()
out.emms.clmm <- vector()
number.repl <- 100
#--------------------------
for (repl in 1:number.repl) {
set.seed(6252021 + repl)
sim.out <- SPR::sim_dat_ord_logistic_conditional(N = 300,
number.groups = 2 ,
number.timepoints = 4,
reg.formula = formula( ~ Time + Group + Time*Group),
Beta = 1,
thresholds = c(-2, 0, 2),
subject.var = 2,
cond.mcar = F,
Covariate = F)
dat <- sim.out$dat
# Visualization:
# Quick barplot, base R plot functions
# barplot(100*table(dat$Y_comp)/sum(table(dat$Y_comp)),
# ylim = c(0, 100), ylab = 'Percentage',
# col = 'grey', main = 'Ordinal')
#------------
# CLM
mod.clm <- ordinal::clm(as.factor(Y_comp) ~ Group + Time + Group*Time,
nAGQ = 5, data= dat)
# CLMM - Random Effect
mod.clmm <- ordinal::clmm(as.factor(Y_comp) ~ Group + Time + Group*Time + (1|USUBJID),
nAGQ = 5, data= dat)
# Generalized Estimating Equations:
dat.gee <- dat
tmp <- order(dat.gee$id.geepack)
dat.gee <- dat.gee[tmp, ]
mod.gee <- multgee::ordLORgee(formula = Y_comp ~ Time + Group + Time*Group,
data = dat.gee,
id = id.geepack,
repeated = Time, LORstr = "uniform")
# Marginal Means
# emmeans::emmeans(mod.gee, pairwise ~ Group | Time) NOT AVAILABLE
emm.clm <- emmeans::emmeans(mod.clm, pairwise ~ Group | Time)
emm.clmm <- emmeans::emmeans(mod.clmm, pairwise ~ Group | Time)
# Output
out.gee <- rbind(out.gee, coef(mod.gee))
out.clm <- rbind(out.clm, coef(mod.clm))
out.clmm <- rbind(out.clmm, coef(mod.clmm))
out.emms.clm <- rbind(out.emms.clm,
as.data.frame(emm.clm$contrast)$estimate)
out.emms.clmm <- rbind(out.emms.clmm,
as.data.frame(emm.clmm$contrast)$estimate)
cat(paste0('Replication: ', repl, '\n'))
}# end replications
sim.out$Beta
sim.out$thresholds
# All parameters
cbind('clm' = colMeans(out.clm),
'gee' = colMeans(out.gee),
'clmm' = colMeans(out.clmm))
# Marginal means:
cbind('clm' = colMeans(out.emms.clm),
'clmm' = colMeans(out.emms.clmm))
How does the conditional model handle drop-out? Results of simulation study show that it behaves the way you would expect - the CLMM returns unbiased estimates under MAR drop-out, and biased estimates under MNAR.
> tab.out[tab.out$Param_type == 'GroupGroup_2:TimeTime_4', ]
Data Param_type Estimate Bias41 Complete GroupGroup_2:TimeTime_4 1.0121211 0.01212115
42 MCAR GroupGroup_2:TimeTime_4 1.0204622 0.02046216
43 MAR GroupGroup_2:TimeTime_4 1.0163423 0.01634231
44 MNAR GroupGroup_2:TimeTime_4 0.8633878 -0.13661223
rm(list = ls())
gc()
library(SPR)
library(ordinal)
library(emmeans)
# Initialize output:
score.names <- c('Y_comp', 'Y_mcar', 'Y_mar', 'Y_mnar')
out <- data.frame('Param_type' = NA,
'Data' = NA,
'Estimate' = NA,
'Gen_param' = NA)
number.repl <- 100
#--------------------------
st <- Sys.time()
for (repl in 1:number.repl) {
set.seed(6252021 + repl)
sim.out <- SPR::sim_dat_ord_logistic_conditional(
N = 300,
number.groups = 2 ,
number.timepoints = 4,
reg.formula = formula( ~ Time + Group + Time*Group),
Beta = 1,
thresholds = c(-2, 0, 2),
subject.var = 2,
cond.mcar = F,
Covariate = F)
dat <- sim.out$dat
dat <- SPR::dropout(dat = dat,
type_dropout = c('mcar', 'mar', 'mnar'),
prop.miss = 0.3)
aggregate(Y_mar ~ Time, FUN = function(x) sum(!is.na(x)),
na.action = na.pass, data = dat)
for (score in score.names) {
# formula:
mf <- as.formula(paste0('ordered(', score,') ~ Group + Time + Group*Time + (1|USUBJID)'))
# CLMM - Random Effect
mod.clmm <- ordinal::clmm(mf, nAGQ = 5, data= dat)
tmp <- data.frame(
'Param_type' = names(coef(mod.clmm)),
'Data' = score,
'Estimate' = coef(mod.clmm),
'Gen_param' = c(sim.out$thresholds, as.vector(sim.out$Beta)[-1]))
# drop the intercept from Beta!
out <- rbind.data.frame(out, tmp)
# Marginal Means
emm.clmm <-
suppressMessages(
emmeans::emmeans(mod.clmm, pairwise ~ Group | Time)
)
emm <- as.data.frame(emm.clmm$contrasts)
# Output
tmp <- data.frame(
'Param_type' = paste0('emm_', emm$Time),
'Data' = score,
'Estimate' = emm$estimate,
'Gen_param' = as.vector(-1*sim.out$Beta[5:8,]))
out <- rbind.data.frame(out, tmp)
}# score loop
cat(paste0('Replication: ', repl, '\n'))
}# end replications
# Runtime:
et <- Sys.time()
et - st
#---------------------------------------------------------------
# Data Mgmt
out$Data <- ifelse(out$Data == 'Y_comp', 'Complete',
ifelse(out$Data == 'Y_mcar', 'MCAR',
ifelse(out$Data == 'Y_mar', 'MAR',
ifelse(out$Data == 'Y_mnar', 'MNAR', NA))))
out$Data <- factor(out$Data, levels = c('Complete', 'MCAR', 'MAR', 'MNAR'))
#-------------------------------------------------------------
# Output
tab.out <- aggregate(
cbind(Estimate,'Bias' = Estimate - Gen_param) ~ Data + Param_type,
FUN = function(x) mean(x, na.rm = T),
na.action = na.pass,
data = out)
tab.out
tab.out[tab.out$Param_type == 'emm_Time_4', ]
tab.out[tab.out$Param_type == 'GroupGroup_2:TimeTime_4', ]
# g2g