a05_Multinomial.Rmd
Review and run the code below to better understand the data distribution.
N = 1e4 #this should be divisible by however many groups you use!
number.groups <- 2
number.timepoints <- 1
set.seed(02032021)
dat <- data.frame(
'USUBJID' = rep(paste0('Subject_', formatC(1:N, width = 4, flag = '0')), length.out= N*number.timepoints),
'Group' = rep(paste0('Group_', 1:number.groups), length.out = N*number.timepoints),
stringsAsFactors=F)
# Create Beta parameters for these design matrix:
X <- model.matrix( ~ Group , data = dat)
k <- 5 # Number of categories in the nominal item
# Create Beta
Beta <- matrix(0, nrow = ncol(X), ncol = k - 1, dimnames=list(colnames(X), paste0('param', 1:(k-1))))
Beta[1, ] <- c(0.2, 0.8, 0.4, 0.6)
Beta[2, ] <- c(0.2, -1.0, -0.6, 0.8)
# Matrix multiply:
XB <- X %*% Beta
sum.expXB <- apply(exp(XB), 1, sum)
p <- exp(XB)/(1 + sum.expXB)
param0 <- 1 - rowSums(p)
p <- cbind(param0, p)
out <- vector()
for(i in 1:nrow(p)){
out <- c(out,
sample(x = c('A', 'B', 'C', 'D', 'E'), size = 1, prob = p[i, ])
)
} #end loop
dat$Y_nom <- out
#----
# Compare observed proportions to the probabilities:
prop.table(table(out))
colMeans(p)
# Plot data:
barplot(100*table(dat$Y_nom)/sum(table(dat$Y_nom)), ylim = c(0, 100), ylab = 'Percentage', col = 'grey', main = 'Nominal')
# Fit Models:
library(nnet)
dat$Y_nom_factor <- as.factor(dat$Y_nom)
mod <- nnet:::multinom(Y_nom_factor ~ Group, data = dat)
summary(mod)
coef(mod)
t(Beta)
# What are the estimates?
# Compute the odds ratios
# Cross tabs
tab <- addmargins(xtabs(~ Y_nom + Group, data = dat), 1)
coef(mod)
# Odds of selecting B over reference category A
G1 <- tab['B', 'Group_1'] /tab['A', 'Group_1']
G2 <- tab['B', 'Group_2'] /tab['A', 'Group_2']
G2/G1
exp(coef(mod)['B','GroupGroup_2'])
# Odds of selecting C over reference category A
G1 <- tab['C', 'Group_1'] /tab['A', 'Group_1']
G2 <- tab['C', 'Group_2'] /tab['A', 'Group_2']
G2/G1
exp(coef(mod)['C','GroupGroup_2'])
# Odds of selecting D over reference category A
G1 <- tab['D', 'Group_1'] /tab['A', 'Group_1']
G2 <- tab['D', 'Group_2'] /tab['A', 'Group_2']
G2/G1
exp(coef(mod)['D','GroupGroup_2'])
# Odds of selecting C over reference category A
G1 <- tab['E', 'Group_1'] /tab['A', 'Group_1']
G2 <- tab['E', 'Group_2'] /tab['A', 'Group_2']
G2/G1
exp(coef(mod)['E','GroupGroup_2'])
# Custom written estimation routine:
multinom_loglike <- function(vP, X, Y, data, k){
vP <- matrix(vP, nrow = ncol(X), ncol = k - 1)
Y <- model.matrix( ~ - 1 + Y, data = data)
XB <- X %*% vP
sum.expXB <- apply(exp(XB), 1, sum)
p <- exp(XB)/(1 + sum.expXB)
param0 <- 1 - rowSums(p)
p <- cbind(param0, p)
# Loglikelihood:
loglike <- Y*log(p)
loglike <- -1*loglike # optimization will minimize function
loglike <- sum(loglike)
return(loglike)
}
# optimize
vP <- Beta
out <- optim(par = vP,
fn = multinom_loglike,
method = 'BFGS',
hessian = T,
X = X,
Y = dat$Y_nom_factor,
data = dat,
k = k)
# Compare Generating Parameter to nnet package estimate and the custom code estimate
cbind.data.frame('Gen_param' = t(Beta),
'nnet R package' = coef(mod),
'Custom code' = t(out$par))
# Compare SE:
SE <- sqrt(diag(solve(out$hessian)))
cbind.data.frame('nnet R package SE' = summary(mod)$standard.errors,
'Custom code SE' = matrix(SE, ncol = 2, byrow = T))
Conduct a simulation study with a unique set of conditions and compute the Type I error and Power. Remember to use the save()
and load()
functions. Use the R code provided below.
rm(list = ls())
gc()
library(nnet)
N = 100
number.groups <- 2
number.timepoints <- 1
set.seed(2032021)
dat <- data.frame(
'USUBJID' = rep(paste0('Subject_', formatC(1:N, width = 4, flag = '0')), length.out= N*number.timepoints),
'Group' = rep(paste0('Group_', 1:number.groups), length.out = N*number.timepoints),
stringsAsFactors=F)
# Create Beta parameters for these design matrix:
X <- model.matrix( ~ Group , data = dat)
k <- 5 # Number of categories in the nominal item
# Create Beta
Beta <- matrix(0, nrow = ncol(X), ncol = k - 1, dimnames=list(colnames(X), paste0('param', 1:(k-1))))
Beta[1, ] <- c(0.2, 0.8, 0.4, 0.6) # Intercepts
# Beta values will determine whether you're
# evaluating Type I error or Power:
#Beta[2, ] <- 0 # Type I error
Beta[2, ] <- c(0.2, -1.0, -0.6, 0.8) # Power
# Matrix multiply:
XB <- X %*% Beta
sum.expXB <- apply(exp(XB), 1, sum)
p <- exp(XB)/(1 + sum.expXB)
param0 <- 1 - rowSums(p)
p <- cbind(param0, p)
##
out <- vector()
for(repl in 1:1000){
Y <- vector()
for(i in 1:nrow(p)){
Y <- c(Y, sample(x = c('A', 'B', 'C', 'D', 'E'), size = 1, prob = p[i, ]))
} #end loop
dat$Y_nom <- Y
# Fit Models:
mod0 <- nnet:::multinom(Y_nom ~ 1, data = dat, trace = F)
mod1 <- nnet:::multinom(Y_nom ~ Group, data = dat, trace = F)
tmp <- anova(mod0, mod1)
out <- c(out, tmp$`Pr(Chi)`[2])
cat('Replication: ', repl, '\n')
} #end loop
mean(out < 0.05)