forked from aalfons/simulations-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patherror_demonstration.R
More file actions
122 lines (95 loc) · 4.52 KB
/
error_demonstration.R
File metadata and controls
122 lines (95 loc) · 4.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# ------------------------------------
# Author: Andreas Alfons
# Erasmus University Rotterdam
# Floris van den Doel
# Erasmus University Rotterdam
# ------------------------------------
# load packages, but functions should still be called with package::function()
library("parallel")
library("simstudy")
library("ccaPP")
source("functions/block_correlation_matrix_fixed.R")
source("functions/probabilities.R")
# control parameters for data generation
n <- 1000 # number of observations
num_scales <- 4 # number of scales
p <- 5 # number of items per scale
prob <- c(0.05, 0.25, 0.4, 0.25, 0.05) # probabilities of response categories
L <- length(prob) # number of response categories
rho_b <- 0.4 # target correlation between scales
rho_w_seq <- seq(0.4, 0.8, 0.1) # target correlation within scales
R <- 10 # number of simulation runs
seed <- 20230111 # seed of the random number generator
scale_estimator <- "IQR" # type of scale estimator used
true_sd <- likert_sd(prob) # get the true variance of each variable
# define matrix with probabilities of response categories per item
prob_mat <- matrix(prob, nrow = num_scales*p, ncol = L, byrow = TRUE)
# control parameters for random respondents
epsilons <- seq(0, 0.3, by = 0.05)
epsilon_max <- max(epsilons)
# it is very easy to use parallel computing on Unix systems, but not on Windows
if (.Platform$OS.type == "windows") {
n_cores <- 1 # use only one CPU core
} else {
n_cores <- 2 # number of CPU cores to be used
RNGkind("L'Ecuyer-CMRG") # use parallel random number streams
}
# loop over different within correlation levels
results_list_rho <- parallel::mclapply(rho_w_seq, function(rho_w) {
# define correlation matrix
Rho <- cor_mat_block(num_scales, p, rho_w, rho_b)
# define matrix with variance of each variable on the diagonal
V <- matrix(0, ncol = p*num_scales, nrow = p*num_scales)
diag(V) <- true_sd
# Multiply it with standard deviation matrices to get Sigma
Sigma <- V %*% Rho %*% V
# run simulation
cat(paste(Sys.time(), ": starting ...\n"))
set.seed(seed)
results_list <- parallel::mclapply(seq_len(R), function(r) {
# print simulation run
cat(paste(Sys.time(), sprintf(": run = %d\n", r)))
# initialize data table
initial <- simstudy::genData(n)
# generate correlated rating-scale items
data <- simstudy::genOrdCat(initial, baseprobs = prob_mat, prefix = "item",
corMatrix = Rho)
# drop ID and convert to integer matrix
data <- as.matrix(data[, -1])
storage.mode(data) <- "integer"
# generate probabilities of being a random respondents
careless_probabilities <- runif(n)
# order observations according to probabilities of being random respondents,
# which makes it easier to keep previous careless respondents the same as the
# contamination level increases (for maximum comparability)
order <- order(careless_probabilities)
data <- data[order, ]
careless_probabilities <- careless_probabilities[order]
# generate random responses to be used for careless respondents
n_careless_max <- sum(careless_probabilities < epsilon_max)
data_careless <- replicate(p, sample.int(L, n_careless_max, replace = TRUE))
# loop over contamination levels
results_r <- lapply(epsilons, function(epsilon) {
# turn selected observations into careless respondents: since the
# observations are sorted according to the probability of being careless,
# this keeps previous careless respondents the same as the contamination
# level increases
if (epsilon > 0) {
careless <- which(careless_probabilities < epsilon)
data[careless, ] <- data_careless[careless, ]
}
# Create a scale matrix with variance on the diagonal
scale_estimate = matrix(0, nrow = p*num_scales, ncol = p*num_scales)
for (i in 1:(p*num_scales)) {
scale_estimate[i, i] <- robustbase::Qn(data[, i])
}
scale_estimate
})
# combine results from current simulation run into data frame
do.call(rbind, results_r)
}, mc.cores = n_cores)
# combine results into data frame
results <- do.call(rbind, results_list)
}, mc.cores = n_cores)
# combine results into dataframe
results_rho <-do.call(rbind, results_list_rho)