Item Response Distribution • COA34

Functions to Compute Item Response Distributions

The purpose of this is to show some basic functionality using base R functions. Additional illustrations are available here: https://cjangelo.github.io/R2Word/articles/introduction.html

These sources are common tables that are rendered as output.

A much more sophisticated collection of functions and examples is available in the gtsummary R package.

Required R packages

library(COA34)
library(R2Word)

Generate data


  set.seed(12162021)

  sim.out <- COA34::sim_pro_dat_v2(N=1000,
                            number.timepoints = 3,
                            Beta.PRO = NULL,
                            number.of.anchor.groups = 5,
                            polychor.value = 0.7,
                            corr = 'ar1',
                            cor.value = 0.8,
                            #var.values = c(7.136))
                            var.values = c(2))
  dat <- sim.out$dat

Implement drop-out


dat <- COA34::dropout(dat = dat,
               type_dropout  = c('mcar', 'mar', 'mnar'),
               prop.miss = 0.5,
               stochastic.component = 0.2)

Simulate Items


 # Simulate IRT items
 # use the scores generated as the theta in the IRT model 
  sim.out2 <- COA34::sim_irt_item(dat = dat, J = 5, K = 4, latent.variable = 'Y_comp')
    str(sim.out2)
#> List of 3
#>  $ dat           :'data.frame':  3000 obs. of  22 variables:
#>   ..$ USUBJID   : chr [1:3000] "Subject_0001" "Subject_0001" "Subject_0001" "Subject_0002" ...
#>   ..$ Time      : Factor w/ 3 levels "Time_1","Time_2",..: 1 2 3 1 2 3 1 2 3 1 ...
#>   ..$ PGIS      : num [1:3000] 3 4 3 0 0 0 0 0 1 1 ...
#>   ..$ PGIS_bl   : num [1:3000] 3 3 3 0 0 0 0 0 0 1 ...
#>   ..$ PGIS_delta: num [1:3000] 0 1 0 0 0 0 0 0 1 0 ...
#>   ..$ ag        : num [1:3000] 0 1 0 0 0 0 0 0 1 0 ...
#>   ..$ XB        : num [1:3000] 4 4.25 4 1 1 1 1 1 2 2 ...
#>   ..$ error     : num [1:3000] -0.336 -0.509 0.122 0.825 1.212 ...
#>   ..$ Y_comp    : num [1:3000] 3.66 3.74 4.12 1.82 2.21 ...
#>   ..$ Val_1     : int [1:3000] 2 NA NA 0 NA NA 0 NA NA 0 ...
#>   ..$ Val_2     : num [1:3000] 6.38 NA NA 3.87 NA ...
#>   ..$ Val_3     : num [1:3000] 3.73 NA NA 4.57 NA ...
#>   ..$ Val_4     : num [1:3000] 0.555 NA NA 1.558 NA ...
#>   ..$ Val_5     : int [1:3000] 4 NA NA 4 NA NA 0 NA NA 2 ...
#>   ..$ Y_mcar    : num [1:3000] 3.66 3.74 NA 1.82 2.21 ...
#>   ..$ Y_mar     : num [1:3000] 3.66 3.74 NA 1.82 2.21 ...
#>   ..$ Y_mnar    : num [1:3000] 3.66 3.74 NA 1.82 2.21 ...
#>   ..$ Item_1    : int [1:3000] 2 2 2 1 1 1 0 1 0 1 ...
#>   ..$ Item_2    : int [1:3000] 3 1 3 0 2 3 0 0 0 0 ...
#>   ..$ Item_3    : int [1:3000] 2 2 2 1 1 3 0 0 0 0 ...
#>   ..$ Item_4    : int [1:3000] 3 3 1 0 0 2 2 0 1 0 ...
#>   ..$ Item_5    : int [1:3000] 2 2 2 1 0 1 0 0 1 1 ...
#>  $ item.param    : num [1:5, 1:4] 2 2 2 2 2 -2 -2 -2 -2 -2 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:5] "Item_1" "Item_2" "Item_3" "Item_4" ...
#>   .. ..$ : chr [1:4] "slope" "intercept_1" "intercept_2" "intercept_3"
#>  $ item.responses:'data.frame':  3000 obs. of  5 variables:
#>   ..$ Item_1: int [1:3000] 2 2 2 1 1 1 0 1 0 1 ...
#>   ..$ Item_2: int [1:3000] 3 1 3 0 2 3 0 0 0 0 ...
#>   ..$ Item_3: int [1:3000] 2 2 2 1 1 3 0 0 0 0 ...
#>   ..$ Item_4: int [1:3000] 3 3 1 0 0 2 2 0 1 0 ...
#>   ..$ Item_5: int [1:3000] 2 2 2 1 0 1 0 0 1 1 ...
    dat <- sim.out2$dat

# Score the PRO - just take a simple sum score here:
  dat$PRO.score <- apply(dat[, grep('Item', colnames(dat))], 1, sum)
  
# Create the same PRO score, but with MAR drop-out:
  dat$PRO.score_mar <- dat$PRO.score
  dat$PRO.score_mar[is.na(dat$Y_mar)] <- NA
# Note that you've just set the PRO score to missing wherever the Y_mar variable is missing
  
# Now for the other missing types:
    dat$PRO.score_mcar <- dat$PRO.score_mnar <- dat$PRO.score
    dat$PRO.score_mcar[is.na(dat$Y_mcar)] <- NA
    dat$PRO.score_mnar[is.na(dat$Y_mnar)] <- NA

  aggregate(cbind(PRO.score, PRO.score_mcar, PRO.score_mar, PRO.score_mnar) ~ Time,
                  function(x) mean(x, na.rm = T), 
                  data = dat, 
                  na.action = na.pass)
#>     Time PRO.score PRO.score_mcar PRO.score_mar PRO.score_mnar
#> 1 Time_1     7.445       7.445000         7.445          7.445
#> 2 Time_2     7.492       7.486667         5.912          5.748
#> 3 Time_3     7.520       7.736000         5.210          4.204

Examples

Two useful base R functions (actually in stats package) are aggregate() and xtabs().

Item Response Distributions:

out1 <- do.call(data.frame, stats::aggregate(PRO.score_mar ~ Time,
        FUN = function(x) c(sum(!is.na(x)),
                            mean(x, na.rm = T),
                            sd(x, na.rm = T),
                            quantile(x, na.rm = T)),
        data = dat, na.action = na.pass))
# note: be careful of NA handling
colnames(out1) <- c('Time', 'N', 'Mean', 'SD', '0%', '25%', '50%', '75%', '100%')

R2Word::dump_df_mat_to_file(out = out1,
                            decimals = c(0, 2, 3, 2, 2, 2, 2, 2), 
                            table.title = 'Item Response Distribution - Vignette Illustration', 
                            table.footnote = '**All data simulated',
                            file.name = 'ird1', 
                            print.dir = print.dir)

Item Response Distribution - Vignette Illustration
Time_1	1000	7.45	4.230	0.00	4.00	7.00	11.00	15.00
Time_2	750	5.91	3.575	0.00	3.00	6.00	9.00	15.00
Time_3	500	5.21	3.617	0.00	2.00	5.00	8.00	15.00
**All data simulated

Cross-Tabs:

out2 <- stats::xtabs( ~  PGIS_delta + Time, data = dat)
out2 <- addmargins(out2, margin = 1)
df <- as.data.frame.matrix(out2)
df <- cbind.data.frame('PGIS_delta' = rownames(out2), df)

R2Word::dump_df_mat_to_file(out = df,
                            decimals = 0,
                            table.title = 'Cross-Table, Vignette Illustration',
                            table.footnote = '**All data simulated',
                            file.name = 'ct',
                            print.dir = print.dir)

Cross-Table, Vignette Illustration
-4	0	0	4
-3	0	11	50
-2	0	67	84
-1	0	207	206
0	1000	438	321
1	0	188	180
2	0	65	110
3	0	22	37
4	0	2	8
Sum	1000	1000	1000
**All data simulated

If you’re using a loop, use as.formula() with paste0():

x <- 'PGIS_delta'
y <- 'Time'
stats::xtabs( as.formula(paste0('~ ',  x, ' + ', y)), data = dat)
#>           Time
#> PGIS_delta Time_1 Time_2 Time_3
#>         -4      0      0      4
#>         -3      0     11     50
#>         -2      0     67     84
#>         -1      0    207    206
#>         0    1000    438    321
#>         1       0    188    180
#>         2       0     65    110
#>         3       0     22     37
#>         4       0      2      8

More examples…

Additional examples available here: https://cjangelo.github.io/R2Word/articles/introduction.html

A great library: library(dplyr). Example:

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
dplyr::count(dat, Time, PGIS_bl, PGIS_delta, .drop = F)
#>      Time PGIS_bl PGIS_delta   n
#> 1  Time_1       0          0 229
#> 2  Time_1       1          0 202
#> 3  Time_1       2          0 198
#> 4  Time_1       3          0 193
#> 5  Time_1       4          0 178
#> 6  Time_2       0          0 129
#> 7  Time_2       0          1  64
#> 8  Time_2       0          2  25
#> 9  Time_2       0          3   9
#> 10 Time_2       0          4   2
#> 11 Time_2       1         -1  63
#> 12 Time_2       1          0  66
#> 13 Time_2       1          1  41
#> 14 Time_2       1          2  19
#> 15 Time_2       1          3  13
#> 16 Time_2       2         -2  28
#> 17 Time_2       2         -1  44
#> 18 Time_2       2          0  72
#> 19 Time_2       2          1  33
#> 20 Time_2       2          2  21
#> 21 Time_2       3         -3   7
#> 22 Time_2       3         -2  24
#> 23 Time_2       3         -1  52
#> 24 Time_2       3          0  60
#> 25 Time_2       3          1  50
#> 26 Time_2       4         -3   4
#> 27 Time_2       4         -2  15
#> 28 Time_2       4         -1  48
#> 29 Time_2       4          0 111
#> 30 Time_3       0          0 106
#> 31 Time_3       0          1  48
#> 32 Time_3       0          2  49
#> 33 Time_3       0          3  18
#> 34 Time_3       0          4   8
#> 35 Time_3       1         -1  61
#> 36 Time_3       1          0  54
#> 37 Time_3       1          1  40
#> 38 Time_3       1          2  28
#> 39 Time_3       1          3  19
#> 40 Time_3       2         -2  32
#> 41 Time_3       2         -1  47
#> 42 Time_3       2          0  50
#> 43 Time_3       2          1  36
#> 44 Time_3       2          2  33
#> 45 Time_3       3         -3  24
#> 46 Time_3       3         -2  25
#> 47 Time_3       3         -1  52
#> 48 Time_3       3          0  36
#> 49 Time_3       3          1  56
#> 50 Time_3       4         -4   4
#> 51 Time_3       4         -3  26
#> 52 Time_3       4         -2  27
#> 53 Time_3       4         -1  46
#> 54 Time_3       4          0  75

Item Response Distribution - Vignette Illustration
Time	N	Mean	SD	0%	25%	50%	75%	100%
Time_1	1000	7.45	4.230	0.00	4.00	7.00	11.00	15.00
Time_2	750	5.91	3.575	0.00	3.00	6.00	9.00	15.00
Time_3	500	5.21	3.617	0.00	2.00	5.00	8.00	15.00
**All data simulated

Cross-Table, Vignette Illustration
PGIS_delta	Time_1	Time_2	Time_3
-4	0	0	4
-3	0	11	50
-2	0	67	84
-1	0	207	206
0	1000	438	321
1	0	188	180
2	0	65	110
3	0	22	37
4	0	2	8
Sum	1000	1000	1000
**All data simulated