a15_CLMM_logistic.Rmd
The results are unclear currently - 6/25/21. Why would the performance be this high?
> tab.out[tab.out$Param_type == 'GroupGroup_2:TimeTime_4', ]
Approach Data Param_type Estimate Bias49 clm Complete GroupGroup_2:TimeTime_4 0.9933811 -0.006618925
50 gee Complete GroupGroup_2:TimeTime_4 -0.9951194 -1.995119386
51 clm MCAR GroupGroup_2:TimeTime_4 0.9780867 -0.021913267
52 gee MCAR GroupGroup_2:TimeTime_4 -0.9814151 -1.981415127
53 clm MAR GroupGroup_2:TimeTime_4 0.9536493 -0.046350734
54 gee MAR GroupGroup_2:TimeTime_4 -0.9557070 -1.955707037
55 clm MNAR GroupGroup_2:TimeTime_4 0.9028185 -0.097181478
56 gee MNAR GroupGroup_2:TimeTime_4 -0.9177229 -1.917722939
rm(list = ls())
gc()
library(SPR)
library(ordinal)
library(multgee)
# Initialize output:
score.names <- c('Y_comp', 'Y_mcar', 'Y_mar', 'Y_mnar')
out <- data.frame('Approach' = NA,
'Param_type' = NA,
'Data' = NA,
'Estimate' = NA,
'Gen_param' = NA)
number.repl <- 100
#--------------------------
st <- Sys.time()
for (repl in 1:number.repl) {
set.seed(6252021 + repl)
sim.out <- SPR::sim_dat_ord_logistic(
N = 300,
number.groups = 2 ,
number.timepoints = 4,
reg.formula = formula( ~ Time + Group + Time*Group),
Beta = 1,
thresholds = c(-1, 0, 1),
corr = 'ar1',
cor.value = 0.4)
dat <- sim.out$dat
dat <- SPR::dropout(dat = dat,
type_dropout = c('mcar', 'mar', 'mnar'),
prop.miss = 0.3)
# # Check your data:
# aggregate(Y_mar ~ Time, FUN = function(x) sum(!is.na(x)),
# na.action = na.pass, data = dat)
#
# # Quick barplot, base R plot functions
# barplot(100*table(dat$Y_mar)/sum(table(dat$Y_mar)),
# ylim = c(0, 100), ylab = 'Percentage',
# col = 'grey', main = 'Ordinal')
# Check the correlations:
# polychor(x = dat$Y_comp[dat$Time == 'Time_1'], y = dat$Y_comp[dat$Time == 'Time_2'])
# polychor(x = dat$Y_comp[dat$Time == 'Time_2'], y = dat$Y_comp[dat$Time == 'Time_3'])
# polychor(x = dat$Y_comp[dat$Time == 'Time_3'], y = dat$Y_comp[dat$Time == 'Time_4'])
for (score in score.names) {
# formula:
mf <- as.formula(paste0('ordered(', score,') ~ Group + Time + Group*Time'))
# CLM - no random effect
mod.clm <- ordinal::clm(mf, nAGQ = 5, data= dat)
# Generalized Estimating Equations:
dat.gee <- dat
tmp <- order(dat.gee$id.geepack)
dat.gee <- dat.gee[tmp, ]
mod.gee <- ordLORgee(formula = mf,
data = dat.gee,
id = id.geepack,
repeated = Time, LORstr = "uniform")
# Output:
tmp <- data.frame(
'Approach' = 'clm',
'Param_type' = names(coef(mod.clm)),
'Data' = score,
'Estimate' = coef(mod.clm),
'Gen_param' = c(sim.out$thresholds, as.vector(sim.out$Beta)[-1]))
# drop the intercept from Beta!
out <- rbind.data.frame(out, tmp)
# Output
tmp <- data.frame(
'Approach' = 'gee',
'Param_type' = names(coef(mod.gee)),
'Data' = score,
'Estimate' = coef(mod.gee),
'Gen_param' = c(sim.out$thresholds, as.vector(sim.out$Beta)[-1]))
out <- rbind.data.frame(out, tmp)
# Conditional Model fit to Marginal Data is SLOW!
# skip for now
# mff <- as.formula(paste0('ordered(', score,') ~ Group + Time + Group*Time + (1|USUBJID)'))
# mod.clmm <- ordinal::clmm(mff, nAGQ = 5, data= dat)
# sigma2 <- ordinal::VarCorr(mod.clmm)
# sigma2 <- as.numeric(as.data.frame(sigma2))
# denom.logit <- sqrt( (sigma2 + (pi^2/3))/(pi^2/3) )
}# score loop
cat(paste0('Replication: ', repl, '\n'))
}# end replications
# Runtime:
et <- Sys.time()
et - st
#---------------------------------------------------------------
# Data Mgmt
out$Data <- ifelse(out$Data == 'Y_comp', 'Complete',
ifelse(out$Data == 'Y_mcar', 'MCAR',
ifelse(out$Data == 'Y_mar', 'MAR',
ifelse(out$Data == 'Y_mnar', 'MNAR', NA))))
out$Data <- factor(out$Data, levels = c('Complete', 'MCAR', 'MAR', 'MNAR'))
#-------------------------------------------------------------
# Output
tab.out <- aggregate(
cbind(Estimate,'Bias' = Estimate - Gen_param) ~ Approach + Data + Param_type,
FUN = function(x) mean(x, na.rm = T),
na.action = na.pass,
data = out)
tab.out
tab.out[tab.out$Param_type == 'GroupGroup_2:TimeTime_4', ]
# ???
Generate longitudinal ordinal data, using probit specification.
Unclear how to fit a longitudinal model to this data.
library(SPR)
library(MASS)
library(polycor)
out <- sim_dat_ord(N = 10000,
number.groups = 2 ,
number.timepoints = 4,
reg.formula = formula( ~ Time + Group + Time*Group),
Beta = 0.5,
thresholds = c(0.25, 0.50, 0.75),
corr = 'ar1',
cor.value = 0.4)
dat <- out$dat
str(dat)
dat <- dropout(dat = dat,
type_dropout = c('mcar'),
prop.miss = 0.3)
# Ordinal
mod.ord1 <- MASS:::polr(as.factor(Y_comp) ~ Group + Time + Group*Time , data= dat, method = 'probit', Hess = T)
mod.ord2 <- MASS:::polr(as.factor(Y_mcar) ~ Group + Time + Group*Time , data= dat, method = 'probit', Hess = T)
out$Beta
matrix(mod.ord1$coefficients, ncol = 1, dimnames = list(names(mod.ord1$coefficients)))
matrix(mod.ord2$coefficients, ncol = 1, dimnames = list(names(mod.ord2$coefficients)))
out$thresholds
mod.ord1$zeta
mod.ord2$zeta