Generated data using conditional model - other CLMM Vignettes were using marginal data generation.

Data Generation

  1. Generate longitudinal ordinal data
  2. Logistic specification
  3. Conditional specification - random intercepts

Simulation Study 1, Conditional Data Generation

The conditional data generation does NOT allow you to just fit a model without random intercepts. This is distinct from the marginal models! With marginal models only the SE is affected; with conditional models, you can’t ignore the subject effects or else you bias the estimates. So of course the fixed effects only model clm() won’t handle missing data correctly, because it can’t handle complete data correctly!

Spoiler: Results, Simulation Study 1

                            clm          gee         clmm
0|1                     -1.503910258 -1.504229339 -2.029189347
1|2                      0.012948946  0.012290884  0.009714282
2|3                      1.497568976  1.496833165  2.035309535
GroupGroup_2            -0.001289427  0.005188065 -0.001165598
TimeTime_2              -0.004865262  0.022942622 -0.007264689
TimeTime_3              -0.022885972 -0.011805839 -0.031622008
TimeTime_4               0.011694160  0.003421927  0.014869004
GroupGroup_2:TimeTime_2  0.202491944 -0.202849533  0.270631754
GroupGroup_2:TimeTime_3  0.500235452 -0.501617735  0.675335162
GroupGroup_2:TimeTime_4  0.748884352 -0.749870769  1.012121149
> 
  
 # Marginal means:
> cbind('clm' = colMeans(out.emms.clm), 
+       'clmm' = colMeans(out.emms.clmm))
              clm         clmm
[1,]  0.001289427  0.001165598
[2,] -0.201202517 -0.269466156
[3,] -0.498946025 -0.674169564
[4,] -0.747594924 -1.010955551 

Simulation Study 1 code

rm(list = ls())
gc()

library(SPR)
library(ordinal)
library(multgee)
library(emmeans)

# Initialize output:
out.clm <- vector()
out.clmm <- vector()
out.gee <- vector()
out.emms.clm <- vector()
out.emms.clmm <- vector()
number.repl <- 100


#--------------------------
for (repl in 1:number.repl) {

  set.seed(6252021 + repl)

  sim.out <- SPR::sim_dat_ord_logistic_conditional(N = 300,
                                                   number.groups = 2 ,
                                                   number.timepoints = 4,
                                                   reg.formula = formula( ~ Time + Group + Time*Group),
                                                   Beta = 1,
                                                   thresholds = c(-2, 0, 2),
                                                   subject.var = 2,
                                                   cond.mcar = F,
                                                   Covariate = F)


  dat <- sim.out$dat



# Visualization:
  # Quick barplot, base R plot functions
  # barplot(100*table(dat$Y_comp)/sum(table(dat$Y_comp)),
  #       ylim = c(0, 100), ylab = 'Percentage',
  #       col = 'grey', main = 'Ordinal')



#------------
  # CLM
  mod.clm <- ordinal::clm(as.factor(Y_comp) ~ Group + Time + Group*Time,
                          nAGQ = 5, data= dat)


  # CLMM - Random Effect
  mod.clmm <- ordinal::clmm(as.factor(Y_comp) ~ Group + Time + Group*Time + (1|USUBJID),
                            nAGQ = 5, data= dat)


  # Generalized Estimating Equations:
  dat.gee <- dat
  tmp <- order(dat.gee$id.geepack)
  dat.gee <- dat.gee[tmp, ]
  mod.gee <- multgee::ordLORgee(formula = Y_comp ~ Time + Group + Time*Group,
                                data = dat.gee,
                                id = id.geepack,
                                repeated = Time, LORstr = "uniform")


  # Marginal Means
  # emmeans::emmeans(mod.gee, pairwise ~ Group | Time) NOT AVAILABLE
  emm.clm <- emmeans::emmeans(mod.clm, pairwise ~ Group | Time)
  emm.clmm <- emmeans::emmeans(mod.clmm, pairwise ~ Group | Time)


  # Output
  out.gee <- rbind(out.gee, coef(mod.gee))
  out.clm <- rbind(out.clm, coef(mod.clm))
  out.clmm <- rbind(out.clmm, coef(mod.clmm))
  out.emms.clm  <- rbind(out.emms.clm,
                         as.data.frame(emm.clm$contrast)$estimate)
  out.emms.clmm <- rbind(out.emms.clmm,
                         as.data.frame(emm.clmm$contrast)$estimate)



  cat(paste0('Replication: ', repl, '\n'))

}# end replications

sim.out$Beta
sim.out$thresholds
# All parameters
cbind('clm' = colMeans(out.clm), 
      'gee' = colMeans(out.gee), 
      'clmm' = colMeans(out.clmm))

# Marginal means:
cbind('clm' = colMeans(out.emms.clm), 
      'clmm' = colMeans(out.emms.clmm))

Simulation Study 2, Drop-out

How does the conditional model handle drop-out? Results of simulation study show that it behaves the way you would expect - the CLMM returns unbiased estimates under MAR drop-out, and biased estimates under MNAR.

Spoiler: results of simulation study 2

> tab.out[tab.out$Param_type == 'GroupGroup_2:TimeTime_4', ]
       Data              Param_type  Estimate        Bias
41 Complete GroupGroup_2:TimeTime_4 1.0121211  0.01212115
42     MCAR GroupGroup_2:TimeTime_4 1.0204622  0.02046216
43      MAR GroupGroup_2:TimeTime_4 1.0163423  0.01634231
44     MNAR GroupGroup_2:TimeTime_4 0.8633878 -0.13661223

Simulation Study code

rm(list = ls())
gc()

library(SPR)
library(ordinal)
library(emmeans)

# Initialize output:
score.names <- c('Y_comp', 'Y_mcar', 'Y_mar', 'Y_mnar')

out <- data.frame('Param_type' = NA,
                  'Data' = NA,
                  'Estimate' = NA,
                  'Gen_param' = NA)

number.repl <- 100


#--------------------------
  st <- Sys.time()
for (repl in 1:number.repl) {

  set.seed(6252021 + repl)

  sim.out <- SPR::sim_dat_ord_logistic_conditional(
    N = 300,
    number.groups = 2 ,
    number.timepoints = 4,
    reg.formula = formula( ~ Time + Group + Time*Group),
    Beta = 1,
    thresholds = c(-2, 0, 2),
    subject.var = 2,
    cond.mcar = F,
    Covariate = F)


  dat <- sim.out$dat

  dat <- SPR::dropout(dat = dat,
                      type_dropout  = c('mcar', 'mar', 'mnar'),
                      prop.miss = 0.3)


  aggregate(Y_mar ~ Time, FUN = function(x) sum(!is.na(x)),
            na.action = na.pass, data = dat)

  for (score in score.names) {

    # formula:
    mf <- as.formula(paste0('ordered(', score,') ~ Group + Time + Group*Time + (1|USUBJID)'))

    # CLMM - Random Effect
    mod.clmm <- ordinal::clmm(mf, nAGQ = 5, data= dat)

    tmp <- data.frame(
      'Param_type' = names(coef(mod.clmm)),
      'Data' = score,
      'Estimate' = coef(mod.clmm),
      'Gen_param' = c(sim.out$thresholds, as.vector(sim.out$Beta)[-1]))
    # drop the intercept from Beta!

    out <- rbind.data.frame(out, tmp)

    # Marginal Means
    emm.clmm <-
      suppressMessages(
        emmeans::emmeans(mod.clmm, pairwise ~ Group | Time)
      )

    emm <- as.data.frame(emm.clmm$contrasts)

    # Output
    tmp <- data.frame(
      'Param_type' = paste0('emm_', emm$Time),
      'Data' = score,
      'Estimate' = emm$estimate,
      'Gen_param' = as.vector(-1*sim.out$Beta[5:8,]))

    out <- rbind.data.frame(out, tmp)


  }# score loop

  cat(paste0('Replication: ', repl, '\n'))

}# end replications

# Runtime:
et <- Sys.time()
et - st

#---------------------------------------------------------------
# Data Mgmt
out$Data <- ifelse(out$Data == 'Y_comp', 'Complete',
       ifelse(out$Data == 'Y_mcar', 'MCAR',
              ifelse(out$Data == 'Y_mar', 'MAR',
                    ifelse(out$Data == 'Y_mnar', 'MNAR', NA))))

out$Data <- factor(out$Data, levels = c('Complete', 'MCAR', 'MAR', 'MNAR'))
#-------------------------------------------------------------
# Output
tab.out <- aggregate(
  cbind(Estimate,'Bias' = Estimate - Gen_param) ~ Data + Param_type,
  FUN = function(x)  mean(x, na.rm = T),
  na.action = na.pass,
  data = out)

tab.out
tab.out[tab.out$Param_type == 'emm_Time_4', ]
tab.out[tab.out$Param_type == 'GroupGroup_2:TimeTime_4', ]
# g2g