a02_Vignette_Item_Resp_Dist.Rmd
The purpose of this is to show some basic functionality using base R functions. Additional illustrations are available here: https://cjangelo.github.io/R2Word/articles/introduction.html
These sources are common tables that are rendered as output.
A much more sophisticated collection of functions and examples is available in the gtsummary
R package.
set.seed(12162021)
sim.out <- COA34::sim_pro_dat_v2(N=1000,
number.timepoints = 3,
Beta.PRO = NULL,
number.of.anchor.groups = 5,
polychor.value = 0.7,
corr = 'ar1',
cor.value = 0.8,
#var.values = c(7.136))
var.values = c(2))
dat <- sim.out$dat
# Simulate IRT items
# use the scores generated as the theta in the IRT model
sim.out2 <- COA34::sim_irt_item(dat = dat, J = 5, K = 4, latent.variable = 'Y_comp')
str(sim.out2)
#> List of 3
#> $ dat :'data.frame': 3000 obs. of 22 variables:
#> ..$ USUBJID : chr [1:3000] "Subject_0001" "Subject_0001" "Subject_0001" "Subject_0002" ...
#> ..$ Time : Factor w/ 3 levels "Time_1","Time_2",..: 1 2 3 1 2 3 1 2 3 1 ...
#> ..$ PGIS : num [1:3000] 3 4 3 0 0 0 0 0 1 1 ...
#> ..$ PGIS_bl : num [1:3000] 3 3 3 0 0 0 0 0 0 1 ...
#> ..$ PGIS_delta: num [1:3000] 0 1 0 0 0 0 0 0 1 0 ...
#> ..$ ag : num [1:3000] 0 1 0 0 0 0 0 0 1 0 ...
#> ..$ XB : num [1:3000] 4 4.25 4 1 1 1 1 1 2 2 ...
#> ..$ error : num [1:3000] -0.336 -0.509 0.122 0.825 1.212 ...
#> ..$ Y_comp : num [1:3000] 3.66 3.74 4.12 1.82 2.21 ...
#> ..$ Val_1 : int [1:3000] 2 NA NA 0 NA NA 0 NA NA 0 ...
#> ..$ Val_2 : num [1:3000] 6.38 NA NA 3.87 NA ...
#> ..$ Val_3 : num [1:3000] 3.73 NA NA 4.57 NA ...
#> ..$ Val_4 : num [1:3000] 0.555 NA NA 1.558 NA ...
#> ..$ Val_5 : int [1:3000] 4 NA NA 4 NA NA 0 NA NA 2 ...
#> ..$ Y_mcar : num [1:3000] 3.66 3.74 NA 1.82 2.21 ...
#> ..$ Y_mar : num [1:3000] 3.66 3.74 NA 1.82 2.21 ...
#> ..$ Y_mnar : num [1:3000] 3.66 3.74 NA 1.82 2.21 ...
#> ..$ Item_1 : int [1:3000] 2 2 2 1 1 1 0 1 0 1 ...
#> ..$ Item_2 : int [1:3000] 3 1 3 0 2 3 0 0 0 0 ...
#> ..$ Item_3 : int [1:3000] 2 2 2 1 1 3 0 0 0 0 ...
#> ..$ Item_4 : int [1:3000] 3 3 1 0 0 2 2 0 1 0 ...
#> ..$ Item_5 : int [1:3000] 2 2 2 1 0 1 0 0 1 1 ...
#> $ item.param : num [1:5, 1:4] 2 2 2 2 2 -2 -2 -2 -2 -2 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:5] "Item_1" "Item_2" "Item_3" "Item_4" ...
#> .. ..$ : chr [1:4] "slope" "intercept_1" "intercept_2" "intercept_3"
#> $ item.responses:'data.frame': 3000 obs. of 5 variables:
#> ..$ Item_1: int [1:3000] 2 2 2 1 1 1 0 1 0 1 ...
#> ..$ Item_2: int [1:3000] 3 1 3 0 2 3 0 0 0 0 ...
#> ..$ Item_3: int [1:3000] 2 2 2 1 1 3 0 0 0 0 ...
#> ..$ Item_4: int [1:3000] 3 3 1 0 0 2 2 0 1 0 ...
#> ..$ Item_5: int [1:3000] 2 2 2 1 0 1 0 0 1 1 ...
dat <- sim.out2$dat
# Score the PRO - just take a simple sum score here:
dat$PRO.score <- apply(dat[, grep('Item', colnames(dat))], 1, sum)
# Create the same PRO score, but with MAR drop-out:
dat$PRO.score_mar <- dat$PRO.score
dat$PRO.score_mar[is.na(dat$Y_mar)] <- NA
# Note that you've just set the PRO score to missing wherever the Y_mar variable is missing
# Now for the other missing types:
dat$PRO.score_mcar <- dat$PRO.score_mnar <- dat$PRO.score
dat$PRO.score_mcar[is.na(dat$Y_mcar)] <- NA
dat$PRO.score_mnar[is.na(dat$Y_mnar)] <- NA
aggregate(cbind(PRO.score, PRO.score_mcar, PRO.score_mar, PRO.score_mnar) ~ Time,
function(x) mean(x, na.rm = T),
data = dat,
na.action = na.pass)
#> Time PRO.score PRO.score_mcar PRO.score_mar PRO.score_mnar
#> 1 Time_1 7.445 7.445000 7.445 7.445
#> 2 Time_2 7.492 7.486667 5.912 5.748
#> 3 Time_3 7.520 7.736000 5.210 4.204
Two useful base R functions (actually in stats package) are aggregate()
and xtabs()
.
out1 <- do.call(data.frame, stats::aggregate(PRO.score_mar ~ Time,
FUN = function(x) c(sum(!is.na(x)),
mean(x, na.rm = T),
sd(x, na.rm = T),
quantile(x, na.rm = T)),
data = dat, na.action = na.pass))
# note: be careful of NA handling
colnames(out1) <- c('Time', 'N', 'Mean', 'SD', '0%', '25%', '50%', '75%', '100%')
R2Word::dump_df_mat_to_file(out = out1,
decimals = c(0, 2, 3, 2, 2, 2, 2, 2),
table.title = 'Item Response Distribution - Vignette Illustration',
table.footnote = '**All data simulated',
file.name = 'ird1',
print.dir = print.dir)
Item Response Distribution - Vignette Illustration | ||||||||
Time |
N |
Mean |
SD |
0% |
25% |
50% |
75% |
100% |
Time_1 |
1000 |
7.45 |
4.230 |
0.00 |
4.00 |
7.00 |
11.00 |
15.00 |
Time_2 |
750 |
5.91 |
3.575 |
0.00 |
3.00 |
6.00 |
9.00 |
15.00 |
Time_3 |
500 |
5.21 |
3.617 |
0.00 |
2.00 |
5.00 |
8.00 |
15.00 |
**All data simulated |
out2 <- stats::xtabs( ~ PGIS_delta + Time, data = dat)
out2 <- addmargins(out2, margin = 1)
df <- as.data.frame.matrix(out2)
df <- cbind.data.frame('PGIS_delta' = rownames(out2), df)
R2Word::dump_df_mat_to_file(out = df,
decimals = 0,
table.title = 'Cross-Table, Vignette Illustration',
table.footnote = '**All data simulated',
file.name = 'ct',
print.dir = print.dir)
Cross-Table, Vignette Illustration | |||
PGIS_delta |
Time_1 |
Time_2 |
Time_3 |
-4 |
0 |
0 |
4 |
-3 |
0 |
11 |
50 |
-2 |
0 |
67 |
84 |
-1 |
0 |
207 |
206 |
0 |
1000 |
438 |
321 |
1 |
0 |
188 |
180 |
2 |
0 |
65 |
110 |
3 |
0 |
22 |
37 |
4 |
0 |
2 |
8 |
Sum |
1000 |
1000 |
1000 |
**All data simulated |
If you’re using a loop, use as.formula() with paste0():
x <- 'PGIS_delta'
y <- 'Time'
stats::xtabs( as.formula(paste0('~ ', x, ' + ', y)), data = dat)
#> Time
#> PGIS_delta Time_1 Time_2 Time_3
#> -4 0 0 4
#> -3 0 11 50
#> -2 0 67 84
#> -1 0 207 206
#> 0 1000 438 321
#> 1 0 188 180
#> 2 0 65 110
#> 3 0 22 37
#> 4 0 2 8
Additional examples available here: https://cjangelo.github.io/R2Word/articles/introduction.html
A great library: library(dplyr)
. Example:
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
dplyr::count(dat, Time, PGIS_bl, PGIS_delta, .drop = F)
#> Time PGIS_bl PGIS_delta n
#> 1 Time_1 0 0 229
#> 2 Time_1 1 0 202
#> 3 Time_1 2 0 198
#> 4 Time_1 3 0 193
#> 5 Time_1 4 0 178
#> 6 Time_2 0 0 129
#> 7 Time_2 0 1 64
#> 8 Time_2 0 2 25
#> 9 Time_2 0 3 9
#> 10 Time_2 0 4 2
#> 11 Time_2 1 -1 63
#> 12 Time_2 1 0 66
#> 13 Time_2 1 1 41
#> 14 Time_2 1 2 19
#> 15 Time_2 1 3 13
#> 16 Time_2 2 -2 28
#> 17 Time_2 2 -1 44
#> 18 Time_2 2 0 72
#> 19 Time_2 2 1 33
#> 20 Time_2 2 2 21
#> 21 Time_2 3 -3 7
#> 22 Time_2 3 -2 24
#> 23 Time_2 3 -1 52
#> 24 Time_2 3 0 60
#> 25 Time_2 3 1 50
#> 26 Time_2 4 -3 4
#> 27 Time_2 4 -2 15
#> 28 Time_2 4 -1 48
#> 29 Time_2 4 0 111
#> 30 Time_3 0 0 106
#> 31 Time_3 0 1 48
#> 32 Time_3 0 2 49
#> 33 Time_3 0 3 18
#> 34 Time_3 0 4 8
#> 35 Time_3 1 -1 61
#> 36 Time_3 1 0 54
#> 37 Time_3 1 1 40
#> 38 Time_3 1 2 28
#> 39 Time_3 1 3 19
#> 40 Time_3 2 -2 32
#> 41 Time_3 2 -1 47
#> 42 Time_3 2 0 50
#> 43 Time_3 2 1 36
#> 44 Time_3 2 2 33
#> 45 Time_3 3 -3 24
#> 46 Time_3 3 -2 25
#> 47 Time_3 3 -1 52
#> 48 Time_3 3 0 36
#> 49 Time_3 3 1 56
#> 50 Time_3 4 -4 4
#> 51 Time_3 4 -3 26
#> 52 Time_3 4 -2 27
#> 53 Time_3 4 -1 46
#> 54 Time_3 4 0 75