Evaluate Test-Retest Reliability

Recently, the FDA has recommended ICC(A,1). Refer to this article: https://link.springer.com/article/10.1007/s11136-018-2076-0


COA34::compute_iccA1 is just a wrapper for the icc function in the irr R package. This R function just makes it easier to compute the values and output the tables. The underlying code is available here: https://github.com/cran/irr/blob/master/R/icc.R

TODO

  1. Figure out what the generating value is - is it the residual correlation? Sim study needed!

  2. Explore further how MAR/MNAR drop-out severely attenuates the ICC(A,1) value

Required R packages


library(COA34)
# In addition, you'll need the following
library(ggplot2)
library(grid)
library(gridExtra)

Generate data


  set.seed(12162021)

  sim.out <- COA34::sim_pro_dat_v2(N=1000,
                            number.timepoints = 3,
                            Beta.PRO = NULL,
                            number.of.anchor.groups = 5,
                            polychor.value = 0.7,
                            corr = 'ar1',
                            cor.value = 0.8,
                            #var.values = c(7.136))
                            var.values = c(2))
  dat <- sim.out$dat

 

Implement drop-out


dat <- COA34::dropout(dat = dat,
               type_dropout  = c('mcar', 'mar', 'mnar'),
               prop.miss = 0.5,
               stochastic.component = 0.2)

Simulate Items


 # Simulate IRT items
 # use the scores generated as the theta in the IRT model 
  sim.out2 <- COA34::sim_irt_item(dat = dat, J = 5, K = 4, latent.variable = 'Y_comp')
    str(sim.out2)
#> List of 3
#>  $ dat           :'data.frame':  3000 obs. of  22 variables:
#>   ..$ USUBJID   : chr [1:3000] "Subject_0001" "Subject_0001" "Subject_0001" "Subject_0002" ...
#>   ..$ Time      : Factor w/ 3 levels "Time_1","Time_2",..: 1 2 3 1 2 3 1 2 3 1 ...
#>   ..$ PGIS      : num [1:3000] 3 4 3 0 0 0 0 0 1 1 ...
#>   ..$ PGIS_bl   : num [1:3000] 3 3 3 0 0 0 0 0 0 1 ...
#>   ..$ PGIS_delta: num [1:3000] 0 1 0 0 0 0 0 0 1 0 ...
#>   ..$ ag        : num [1:3000] 0 1 0 0 0 0 0 0 1 0 ...
#>   ..$ XB        : num [1:3000] 4 4.25 4 1 1 1 1 1 2 2 ...
#>   ..$ error     : num [1:3000] -0.336 -0.509 0.122 0.825 1.212 ...
#>   ..$ Y_comp    : num [1:3000] 3.66 3.74 4.12 1.82 2.21 ...
#>   ..$ Val_1     : int [1:3000] 2 NA NA 0 NA NA 0 NA NA 0 ...
#>   ..$ Val_2     : num [1:3000] 6.38 NA NA 3.87 NA ...
#>   ..$ Val_3     : num [1:3000] 3.73 NA NA 4.57 NA ...
#>   ..$ Val_4     : num [1:3000] 0.555 NA NA 1.558 NA ...
#>   ..$ Val_5     : int [1:3000] 4 NA NA 4 NA NA 0 NA NA 2 ...
#>   ..$ Y_mcar    : num [1:3000] 3.66 3.74 NA 1.82 2.21 ...
#>   ..$ Y_mar     : num [1:3000] 3.66 3.74 NA 1.82 2.21 ...
#>   ..$ Y_mnar    : num [1:3000] 3.66 3.74 NA 1.82 2.21 ...
#>   ..$ Item_1    : int [1:3000] 2 2 2 1 1 1 0 1 0 1 ...
#>   ..$ Item_2    : int [1:3000] 3 1 3 0 2 3 0 0 0 0 ...
#>   ..$ Item_3    : int [1:3000] 2 2 2 1 1 3 0 0 0 0 ...
#>   ..$ Item_4    : int [1:3000] 3 3 1 0 0 2 2 0 1 0 ...
#>   ..$ Item_5    : int [1:3000] 2 2 2 1 0 1 0 0 1 1 ...
#>  $ item.param    : num [1:5, 1:4] 2 2 2 2 2 -2 -2 -2 -2 -2 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:5] "Item_1" "Item_2" "Item_3" "Item_4" ...
#>   .. ..$ : chr [1:4] "slope" "intercept_1" "intercept_2" "intercept_3"
#>  $ item.responses:'data.frame':  3000 obs. of  5 variables:
#>   ..$ Item_1: int [1:3000] 2 2 2 1 1 1 0 1 0 1 ...
#>   ..$ Item_2: int [1:3000] 3 1 3 0 2 3 0 0 0 0 ...
#>   ..$ Item_3: int [1:3000] 2 2 2 1 1 3 0 0 0 0 ...
#>   ..$ Item_4: int [1:3000] 3 3 1 0 0 2 2 0 1 0 ...
#>   ..$ Item_5: int [1:3000] 2 2 2 1 0 1 0 0 1 1 ...
    dat <- sim.out2$dat

# Score the PRO - just take a simple sum score here:
  dat$PRO.score <- apply(dat[, grep('Item', colnames(dat))], 1, sum)
  
# Create the same PRO score, but with MAR drop-out:
  dat$PRO.score_mar <- dat$PRO.score
  dat$PRO.score_mar[is.na(dat$Y_mar)] <- NA
# Note that you've just set the PRO score to missing wherever the Y_mar variable is missing
  
# Now for the other missing types:
    dat$PRO.score_mcar <- dat$PRO.score_mnar <- dat$PRO.score
    dat$PRO.score_mcar[is.na(dat$Y_mcar)] <- NA
    dat$PRO.score_mnar[is.na(dat$Y_mnar)] <- NA

  aggregate(cbind(PRO.score, PRO.score_mcar, PRO.score_mar, PRO.score_mnar) ~ Time,
                  function(x) mean(x, na.rm = T), 
                  data = dat, 
                  na.action = na.pass)
#>     Time PRO.score PRO.score_mcar PRO.score_mar PRO.score_mnar
#> 1 Time_1     7.445       7.445000         7.445          7.445
#> 2 Time_2     7.492       7.486667         5.912          5.748
#> 3 Time_3     7.520       7.736000         5.210          4.204

Compute PGIS delta


# You already have the PGIS_bl and PGIS_delta in the generated data,
# you wouldn't have that in a real dataset, so drop that first
dat <- dat[, !(colnames(dat) %in% c('PGIS_bl', 'PGIS_delta'))]
# This makes this more realistic
# use the function below:

dat <- COA34::compute_anchor_delta(dat = dat,
                                   subject.id = 'USUBJID',
                                   time.var = 'Time',
                                   anchor = 'PGIS')

Compute PRO Score delta

dat <- COA34::compute_change_score(dat = dat,
                                   subject.id = 'USUBJID',
                                   time.var = 'Time',
                                   score = c('PRO.score', 'PRO.score_mcar', 
                                             'PRO.score_mar', 'PRO.score_mnar'))

# The function creates variables with the same name plus "_delta":
str(dat)
#> 'data.frame':    3000 obs. of  34 variables:
#>  $ USUBJID             : chr  "Subject_0001" "Subject_0001" "Subject_0001" "Subject_0002" ...
#>  $ Time                : Factor w/ 3 levels "Time_1","Time_2",..: 1 2 3 1 2 3 1 2 3 1 ...
#>  $ PGIS                : num  3 4 3 0 0 0 0 0 1 1 ...
#>  $ ag                  : num  0 1 0 0 0 0 0 0 1 0 ...
#>  $ XB                  : num  4 4.25 4 1 1 1 1 1 2 2 ...
#>  $ error               : num  -0.336 -0.509 0.122 0.825 1.212 ...
#>  $ Y_comp              : num  3.66 3.74 4.12 1.82 2.21 ...
#>  $ Val_1               : int  2 NA NA 0 NA NA 0 NA NA 0 ...
#>  $ Val_2               : num  6.38 NA NA 3.87 NA ...
#>  $ Val_3               : num  3.73 NA NA 4.57 NA ...
#>  $ Val_4               : num  0.555 NA NA 1.558 NA ...
#>  $ Val_5               : int  4 NA NA 4 NA NA 0 NA NA 2 ...
#>  $ Y_mcar              : num  3.66 3.74 NA 1.82 2.21 ...
#>  $ Y_mar               : num  3.66 3.74 NA 1.82 2.21 ...
#>  $ Y_mnar              : num  3.66 3.74 NA 1.82 2.21 ...
#>  $ Item_1              : int  2 2 2 1 1 1 0 1 0 1 ...
#>  $ Item_2              : int  3 1 3 0 2 3 0 0 0 0 ...
#>  $ Item_3              : int  2 2 2 1 1 3 0 0 0 0 ...
#>  $ Item_4              : int  3 3 1 0 0 2 2 0 1 0 ...
#>  $ Item_5              : int  2 2 2 1 0 1 0 0 1 1 ...
#>  $ PRO.score           : int  12 10 10 3 4 10 2 1 2 2 ...
#>  $ PRO.score_mar       : int  12 10 NA 3 4 10 2 1 2 2 ...
#>  $ PRO.score_mnar      : int  12 10 NA 3 4 10 2 1 2 2 ...
#>  $ PRO.score_mcar      : int  12 10 NA 3 4 10 2 1 NA 2 ...
#>  $ PGIS_bl             : num  3 3 3 0 0 0 0 0 0 1 ...
#>  $ PGIS_delta          : num  0 1 0 0 0 0 0 0 1 0 ...
#>  $ PRO.score_bl        : int  12 12 12 3 3 3 2 2 2 2 ...
#>  $ PRO.score_mcar_bl   : int  12 12 12 3 3 3 2 2 2 2 ...
#>  $ PRO.score_mar_bl    : int  12 12 12 3 3 3 2 2 2 2 ...
#>  $ PRO.score_mnar_bl   : int  12 12 12 3 3 3 2 2 2 2 ...
#>  $ PRO.score_delta     : int  0 -2 -2 0 1 7 0 -1 0 0 ...
#>  $ PRO.score_mcar_delta: int  0 -2 NA 0 1 7 0 -1 NA 0 ...
#>  $ PRO.score_mar_delta : int  0 -2 NA 0 1 7 0 -1 0 0 ...
#>  $ PRO.score_mnar_delta: int  0 -2 NA 0 1 7 0 -1 0 0 ...

Compute Test-Retest Reliability

Compute the ICC(A,1) values and output them to a table:


icc <- COA34::compute_iccA1(dat = dat,
                            PRO.score = c('PRO.score', 
                                          'PRO.score_mcar', 
                                          'PRO.score_mar', 
                                          'PRO.score_mnar'),
                            time.var = 'Time',
                            subject.id = 'USUBJID',
                            anchor = 'PGIS_delta',
                            stable.score = 0, 
                            first.timepoint = 'Time_1', 
                            second.timepoint = 'Time_2')

Print the table out:

library(R2Word)

R2Word::dump_df_mat_to_file(out = icc$icc.A1,
                            decimals = c(2, 0, 0), 
                            table.title = 'ICC(A,1) - Vignette Illustration', 
                            table.footnote = '**All data simulated',
                            file.name = 'trtr_vig_iccA1', 
                            print.dir = print.dir)

Shell Tables

If you’re putting together shell tables, use the function shell_table, specifying that you want columns 2 and 3 to be converted to x instead of the numeric values.


st <- R2Word::shell_table(out = icc$icc.A1, decimals = c(2, 0), cols = c(2, 3))
                           
R2Word::dump_df_mat_to_file(out = st,
                            decimals = 0, 
                            table.title = 'ICC(A,1) - Vignette Illustration', 
                            table.footnote = '**All data simulated',
                            file.name = 'trtr_vig_iccA1_shell_tables', 
                            print.dir = print.dir)

TODO: What is the generating value of ICC(A,1)?

Is it the correlation between the two timepoints? That is equal to 0.80.


sim.out$cor.mat
#>      [,1] [,2] [,3]
#> [1,] 1.00  0.8 0.64
#> [2,] 0.80  1.0 0.80
#> [3,] 0.64  0.8 1.00