a03_Vignette_Test_Retest_Reliability.Rmd
Recently, the FDA has recommended ICC(A,1). Refer to this article: https://link.springer.com/article/10.1007/s11136-018-2076-0
COA34::compute_iccA1
is just a wrapper for the icc
function in the irr
R package. This R function just makes it easier to compute the values and output the tables. The underlying code is available here: https://github.com/cran/irr/blob/master/R/icc.R
Figure out what the generating value is - is it the residual correlation? Sim study needed!
Explore further how MAR/MNAR drop-out severely attenuates the ICC(A,1) value
set.seed(12162021)
sim.out <- COA34::sim_pro_dat_v2(N=1000,
number.timepoints = 3,
Beta.PRO = NULL,
number.of.anchor.groups = 5,
polychor.value = 0.7,
corr = 'ar1',
cor.value = 0.8,
#var.values = c(7.136))
var.values = c(2))
dat <- sim.out$dat
# Simulate IRT items
# use the scores generated as the theta in the IRT model
sim.out2 <- COA34::sim_irt_item(dat = dat, J = 5, K = 4, latent.variable = 'Y_comp')
str(sim.out2)
#> List of 3
#> $ dat :'data.frame': 3000 obs. of 22 variables:
#> ..$ USUBJID : chr [1:3000] "Subject_0001" "Subject_0001" "Subject_0001" "Subject_0002" ...
#> ..$ Time : Factor w/ 3 levels "Time_1","Time_2",..: 1 2 3 1 2 3 1 2 3 1 ...
#> ..$ PGIS : num [1:3000] 3 4 3 0 0 0 0 0 1 1 ...
#> ..$ PGIS_bl : num [1:3000] 3 3 3 0 0 0 0 0 0 1 ...
#> ..$ PGIS_delta: num [1:3000] 0 1 0 0 0 0 0 0 1 0 ...
#> ..$ ag : num [1:3000] 0 1 0 0 0 0 0 0 1 0 ...
#> ..$ XB : num [1:3000] 4 4.25 4 1 1 1 1 1 2 2 ...
#> ..$ error : num [1:3000] -0.336 -0.509 0.122 0.825 1.212 ...
#> ..$ Y_comp : num [1:3000] 3.66 3.74 4.12 1.82 2.21 ...
#> ..$ Val_1 : int [1:3000] 2 NA NA 0 NA NA 0 NA NA 0 ...
#> ..$ Val_2 : num [1:3000] 6.38 NA NA 3.87 NA ...
#> ..$ Val_3 : num [1:3000] 3.73 NA NA 4.57 NA ...
#> ..$ Val_4 : num [1:3000] 0.555 NA NA 1.558 NA ...
#> ..$ Val_5 : int [1:3000] 4 NA NA 4 NA NA 0 NA NA 2 ...
#> ..$ Y_mcar : num [1:3000] 3.66 3.74 NA 1.82 2.21 ...
#> ..$ Y_mar : num [1:3000] 3.66 3.74 NA 1.82 2.21 ...
#> ..$ Y_mnar : num [1:3000] 3.66 3.74 NA 1.82 2.21 ...
#> ..$ Item_1 : int [1:3000] 2 2 2 1 1 1 0 1 0 1 ...
#> ..$ Item_2 : int [1:3000] 3 1 3 0 2 3 0 0 0 0 ...
#> ..$ Item_3 : int [1:3000] 2 2 2 1 1 3 0 0 0 0 ...
#> ..$ Item_4 : int [1:3000] 3 3 1 0 0 2 2 0 1 0 ...
#> ..$ Item_5 : int [1:3000] 2 2 2 1 0 1 0 0 1 1 ...
#> $ item.param : num [1:5, 1:4] 2 2 2 2 2 -2 -2 -2 -2 -2 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:5] "Item_1" "Item_2" "Item_3" "Item_4" ...
#> .. ..$ : chr [1:4] "slope" "intercept_1" "intercept_2" "intercept_3"
#> $ item.responses:'data.frame': 3000 obs. of 5 variables:
#> ..$ Item_1: int [1:3000] 2 2 2 1 1 1 0 1 0 1 ...
#> ..$ Item_2: int [1:3000] 3 1 3 0 2 3 0 0 0 0 ...
#> ..$ Item_3: int [1:3000] 2 2 2 1 1 3 0 0 0 0 ...
#> ..$ Item_4: int [1:3000] 3 3 1 0 0 2 2 0 1 0 ...
#> ..$ Item_5: int [1:3000] 2 2 2 1 0 1 0 0 1 1 ...
dat <- sim.out2$dat
# Score the PRO - just take a simple sum score here:
dat$PRO.score <- apply(dat[, grep('Item', colnames(dat))], 1, sum)
# Create the same PRO score, but with MAR drop-out:
dat$PRO.score_mar <- dat$PRO.score
dat$PRO.score_mar[is.na(dat$Y_mar)] <- NA
# Note that you've just set the PRO score to missing wherever the Y_mar variable is missing
# Now for the other missing types:
dat$PRO.score_mcar <- dat$PRO.score_mnar <- dat$PRO.score
dat$PRO.score_mcar[is.na(dat$Y_mcar)] <- NA
dat$PRO.score_mnar[is.na(dat$Y_mnar)] <- NA
aggregate(cbind(PRO.score, PRO.score_mcar, PRO.score_mar, PRO.score_mnar) ~ Time,
function(x) mean(x, na.rm = T),
data = dat,
na.action = na.pass)
#> Time PRO.score PRO.score_mcar PRO.score_mar PRO.score_mnar
#> 1 Time_1 7.445 7.445000 7.445 7.445
#> 2 Time_2 7.492 7.486667 5.912 5.748
#> 3 Time_3 7.520 7.736000 5.210 4.204
# You already have the PGIS_bl and PGIS_delta in the generated data,
# you wouldn't have that in a real dataset, so drop that first
dat <- dat[, !(colnames(dat) %in% c('PGIS_bl', 'PGIS_delta'))]
# This makes this more realistic
# use the function below:
dat <- COA34::compute_anchor_delta(dat = dat,
subject.id = 'USUBJID',
time.var = 'Time',
anchor = 'PGIS')
dat <- COA34::compute_change_score(dat = dat,
subject.id = 'USUBJID',
time.var = 'Time',
score = c('PRO.score', 'PRO.score_mcar',
'PRO.score_mar', 'PRO.score_mnar'))
# The function creates variables with the same name plus "_delta":
str(dat)
#> 'data.frame': 3000 obs. of 34 variables:
#> $ USUBJID : chr "Subject_0001" "Subject_0001" "Subject_0001" "Subject_0002" ...
#> $ Time : Factor w/ 3 levels "Time_1","Time_2",..: 1 2 3 1 2 3 1 2 3 1 ...
#> $ PGIS : num 3 4 3 0 0 0 0 0 1 1 ...
#> $ ag : num 0 1 0 0 0 0 0 0 1 0 ...
#> $ XB : num 4 4.25 4 1 1 1 1 1 2 2 ...
#> $ error : num -0.336 -0.509 0.122 0.825 1.212 ...
#> $ Y_comp : num 3.66 3.74 4.12 1.82 2.21 ...
#> $ Val_1 : int 2 NA NA 0 NA NA 0 NA NA 0 ...
#> $ Val_2 : num 6.38 NA NA 3.87 NA ...
#> $ Val_3 : num 3.73 NA NA 4.57 NA ...
#> $ Val_4 : num 0.555 NA NA 1.558 NA ...
#> $ Val_5 : int 4 NA NA 4 NA NA 0 NA NA 2 ...
#> $ Y_mcar : num 3.66 3.74 NA 1.82 2.21 ...
#> $ Y_mar : num 3.66 3.74 NA 1.82 2.21 ...
#> $ Y_mnar : num 3.66 3.74 NA 1.82 2.21 ...
#> $ Item_1 : int 2 2 2 1 1 1 0 1 0 1 ...
#> $ Item_2 : int 3 1 3 0 2 3 0 0 0 0 ...
#> $ Item_3 : int 2 2 2 1 1 3 0 0 0 0 ...
#> $ Item_4 : int 3 3 1 0 0 2 2 0 1 0 ...
#> $ Item_5 : int 2 2 2 1 0 1 0 0 1 1 ...
#> $ PRO.score : int 12 10 10 3 4 10 2 1 2 2 ...
#> $ PRO.score_mar : int 12 10 NA 3 4 10 2 1 2 2 ...
#> $ PRO.score_mnar : int 12 10 NA 3 4 10 2 1 2 2 ...
#> $ PRO.score_mcar : int 12 10 NA 3 4 10 2 1 NA 2 ...
#> $ PGIS_bl : num 3 3 3 0 0 0 0 0 0 1 ...
#> $ PGIS_delta : num 0 1 0 0 0 0 0 0 1 0 ...
#> $ PRO.score_bl : int 12 12 12 3 3 3 2 2 2 2 ...
#> $ PRO.score_mcar_bl : int 12 12 12 3 3 3 2 2 2 2 ...
#> $ PRO.score_mar_bl : int 12 12 12 3 3 3 2 2 2 2 ...
#> $ PRO.score_mnar_bl : int 12 12 12 3 3 3 2 2 2 2 ...
#> $ PRO.score_delta : int 0 -2 -2 0 1 7 0 -1 0 0 ...
#> $ PRO.score_mcar_delta: int 0 -2 NA 0 1 7 0 -1 NA 0 ...
#> $ PRO.score_mar_delta : int 0 -2 NA 0 1 7 0 -1 0 0 ...
#> $ PRO.score_mnar_delta: int 0 -2 NA 0 1 7 0 -1 0 0 ...
Compute the ICC(A,1) values and output them to a table:
icc <- COA34::compute_iccA1(dat = dat,
PRO.score = c('PRO.score',
'PRO.score_mcar',
'PRO.score_mar',
'PRO.score_mnar'),
time.var = 'Time',
subject.id = 'USUBJID',
anchor = 'PGIS_delta',
stable.score = 0,
first.timepoint = 'Time_1',
second.timepoint = 'Time_2')
Print the table out:
library(R2Word)
R2Word::dump_df_mat_to_file(out = icc$icc.A1,
decimals = c(2, 0, 0),
table.title = 'ICC(A,1) - Vignette Illustration',
table.footnote = '**All data simulated',
file.name = 'trtr_vig_iccA1',
print.dir = print.dir)
ICC(A,1) - Vignette Illustration | ||||
PRO Score |
ICC(A,1) |
N |
Anchor |
Stable Anchor Score |
PRO.score |
0.80 |
438 |
PGIS_delta |
0 |
PRO.score_mcar |
0.79 |
328 |
PGIS_delta |
0 |
PRO.score_mar |
0.65 |
314 |
PGIS_delta |
0 |
PRO.score_mnar |
0.63 |
315 |
PGIS_delta |
0 |
**All data simulated |
If you’re putting together shell tables, use the function shell_table
, specifying that you want columns 2 and 3 to be converted to x instead of the numeric values.
st <- R2Word::shell_table(out = icc$icc.A1, decimals = c(2, 0), cols = c(2, 3))
R2Word::dump_df_mat_to_file(out = st,
decimals = 0,
table.title = 'ICC(A,1) - Vignette Illustration',
table.footnote = '**All data simulated',
file.name = 'trtr_vig_iccA1_shell_tables',
print.dir = print.dir)
ICC(A,1) - Vignette Illustration | ||||
PRO Score |
ICC(A,1) |
N |
Anchor |
Stable Anchor Score |
PRO.score |
x.xx |
xxx |
PGIS_delta |
0 |
PRO.score_mcar |
x.xx |
xxx |
PGIS_delta |
0 |
PRO.score_mar |
x.xx |
xxx |
PGIS_delta |
0 |
PRO.score_mnar |
x.xx |
xxx |
PGIS_delta |
0 |
**All data simulated |