Data Generation

  1. Generate longitudinal ordinal data
  2. Logistic specification
  3. Marginal model - ordinal GEE should work.

Results of Simulation Study, Missing Data

The results are unclear currently - 6/25/21. Why would the performance be this high?

> tab.out[tab.out$Param_type == 'GroupGroup_2:TimeTime_4', ]
   Approach     Data              Param_type   Estimate         Bias
49      clm Complete GroupGroup_2:TimeTime_4  0.9933811 -0.006618925
50      gee Complete GroupGroup_2:TimeTime_4 -0.9951194 -1.995119386
51      clm     MCAR GroupGroup_2:TimeTime_4  0.9780867 -0.021913267
52      gee     MCAR GroupGroup_2:TimeTime_4 -0.9814151 -1.981415127
53      clm      MAR GroupGroup_2:TimeTime_4  0.9536493 -0.046350734
54      gee      MAR GroupGroup_2:TimeTime_4 -0.9557070 -1.955707037
55      clm     MNAR GroupGroup_2:TimeTime_4  0.9028185 -0.097181478
56      gee     MNAR GroupGroup_2:TimeTime_4 -0.9177229 -1.917722939

Simulation Study Code

rm(list = ls())
gc()

library(SPR)
library(ordinal)
library(multgee)

# Initialize output:
score.names <- c('Y_comp', 'Y_mcar', 'Y_mar', 'Y_mnar')

out <- data.frame('Approach' = NA,
                  'Param_type' = NA,
                  'Data' = NA,
                  'Estimate' = NA,
                  'Gen_param' = NA)

number.repl <- 100


#--------------------------
  st <- Sys.time()
for (repl in 1:number.repl) {

  set.seed(6252021 + repl)

  sim.out <- SPR::sim_dat_ord_logistic(
    N = 300,
    number.groups = 2 ,
    number.timepoints = 4,
    reg.formula = formula( ~ Time + Group + Time*Group),
    Beta = 1,
    thresholds = c(-1, 0, 1),
    corr = 'ar1',
    cor.value = 0.4)


  dat <- sim.out$dat

  dat <- SPR::dropout(dat = dat,
                      type_dropout  = c('mcar', 'mar', 'mnar'),
                      prop.miss = 0.3)

# # Check your data:
#   aggregate(Y_mar ~ Time, FUN = function(x) sum(!is.na(x)),
#             na.action = na.pass, data = dat)
#
#   # Quick barplot, base R plot functions
# barplot(100*table(dat$Y_mar)/sum(table(dat$Y_mar)),
#         ylim = c(0, 100), ylab = 'Percentage',
#         col = 'grey', main = 'Ordinal')
# Check the correlations:
# polychor(x = dat$Y_comp[dat$Time == 'Time_1'], y = dat$Y_comp[dat$Time == 'Time_2'])
# polychor(x = dat$Y_comp[dat$Time == 'Time_2'], y = dat$Y_comp[dat$Time == 'Time_3'])
# polychor(x = dat$Y_comp[dat$Time == 'Time_3'], y = dat$Y_comp[dat$Time == 'Time_4'])


  for (score in score.names) {

    # formula:
    mf <- as.formula(paste0('ordered(', score,') ~ Group + Time + Group*Time'))

    # CLM - no random effect
    mod.clm <- ordinal::clm(mf, nAGQ = 5, data= dat)

    # Generalized Estimating Equations:
    dat.gee <- dat
      tmp <- order(dat.gee$id.geepack)
    dat.gee <- dat.gee[tmp, ]
    mod.gee <- ordLORgee(formula = mf,
                     data = dat.gee,
                     id = id.geepack,
                     repeated = Time, LORstr = "uniform")

  # Output:
    tmp <- data.frame(
      'Approach' = 'clm',
      'Param_type' = names(coef(mod.clm)),
      'Data' = score,
      'Estimate' = coef(mod.clm),
      'Gen_param' = c(sim.out$thresholds, as.vector(sim.out$Beta)[-1]))
    # drop the intercept from Beta!

    out <- rbind.data.frame(out, tmp)

    # Output
    tmp <- data.frame(
      'Approach' = 'gee',
      'Param_type' = names(coef(mod.gee)),
      'Data' = score,
      'Estimate' = coef(mod.gee),
      'Gen_param' = c(sim.out$thresholds, as.vector(sim.out$Beta)[-1]))

    out <- rbind.data.frame(out, tmp)

      # Conditional Model fit to Marginal Data is SLOW!
    # skip for now
    # mff <- as.formula(paste0('ordered(', score,') ~ Group + Time + Group*Time + (1|USUBJID)'))
    # mod.clmm <- ordinal::clmm(mff, nAGQ = 5, data= dat)
    # sigma2 <- ordinal::VarCorr(mod.clmm)
    #   sigma2 <- as.numeric(as.data.frame(sigma2))
    #   denom.logit <- sqrt( (sigma2 + (pi^2/3))/(pi^2/3) )


  }# score loop

  cat(paste0('Replication: ', repl, '\n'))

}# end replications

# Runtime:
et <- Sys.time()
et - st

#---------------------------------------------------------------
# Data Mgmt
out$Data <- ifelse(out$Data == 'Y_comp', 'Complete',
       ifelse(out$Data == 'Y_mcar', 'MCAR',
              ifelse(out$Data == 'Y_mar', 'MAR',
                    ifelse(out$Data == 'Y_mnar', 'MNAR', NA))))

out$Data <- factor(out$Data, levels = c('Complete', 'MCAR', 'MAR', 'MNAR'))
#-------------------------------------------------------------
# Output
tab.out <- aggregate(
  cbind(Estimate,'Bias' = Estimate - Gen_param) ~ Approach + Data + Param_type,
  FUN = function(x)  mean(x, na.rm = T),
  na.action = na.pass,
  data = out)

tab.out
tab.out[tab.out$Param_type == 'GroupGroup_2:TimeTime_4', ]
# ???

Ordinal Probit Specification

Generate longitudinal ordinal data, using probit specification.

Unclear how to fit a longitudinal model to this data.


library(SPR)
library(MASS)
library(polycor)


  out <- sim_dat_ord(N = 10000,
                 number.groups = 2 ,
                 number.timepoints = 4,
                 reg.formula = formula( ~ Time + Group + Time*Group),
                 Beta = 0.5,
                 thresholds = c(0.25, 0.50, 0.75),
                 corr = 'ar1',
                 cor.value = 0.4)


  dat <- out$dat
  str(dat)

  dat <- dropout(dat = dat,
                 type_dropout  = c('mcar'),
                 prop.miss = 0.3)

# Ordinal
mod.ord1 <- MASS:::polr(as.factor(Y_comp) ~ Group + Time + Group*Time , data= dat, method = 'probit', Hess = T)
mod.ord2 <- MASS:::polr(as.factor(Y_mcar) ~ Group + Time + Group*Time , data= dat, method = 'probit', Hess = T)

out$Beta
matrix(mod.ord1$coefficients, ncol = 1, dimnames = list(names(mod.ord1$coefficients)))
matrix(mod.ord2$coefficients, ncol = 1, dimnames = list(names(mod.ord2$coefficients)))
out$thresholds
mod.ord1$zeta
mod.ord2$zeta