-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathREADME.Rmd
More file actions
114 lines (90 loc) · 3.55 KB
/
README.Rmd
File metadata and controls
114 lines (90 loc) · 3.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
---
output: github_document
---
```{r, echo = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "README-"
)
```
[](https://cran.r-project.org/package=robStepSplitReg)
[](https://cran.r-project.org/package=robStepSplitReg)
[](https://cran.r-project.org/package=robStepSplitReg)
robStepSplitReg
================
This package provides functions for performing robust stepwise split regularized regression.
---------------------------------------------------------------------------------------------
### Installation
You can install the **stable** version on [R CRAN](https://cran.r-project.org/package=robStepSplitReg).
```{r installation, eval = FALSE}
install.packages("robStepSplitReg", dependencies = TRUE)
```
You can install the **development** version from [GitHub](https://github.com/AnthonyChristidis/robStepSplitReg)
``` r
library(devtools)
devtools::install_github("AnthonyChristidis/robStepSplitReg")
```
### Usage
``` r
# Required library
library(mvnfast)
# Simulation parameters
n <- 50
p <- 500
rho <- 0.8
p.active <- 100
snr <- 3
contamination.prop <- 0.2
# Setting the seed
set.seed(0)
# Simulation of beta vector
true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active))
# Simulation of uncontaminated data
sigma.mat <- matrix(0, nrow = p, ncol = p)
sigma.mat[1:p.active, 1:p.active] <- rho
diag(sigma.mat) <- 1
x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat)
sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr))
y <- x %*% true.beta + rnorm(n, 0, sigma)
# Contamination of data
contamination_indices <- 1:floor(n*contamination.prop)
k_lev <- 2
k_slo <- 100
x_train <- x
y_train <- y
beta_cont <- true.beta
beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo)
beta_cont[true.beta==0] <- k_slo*max(abs(true.beta))
for(cont_id in contamination_indices){
a <- runif(p, min = -1, max = 1)
a <- a - as.numeric((1/p)*t(a) %*% rep(1, p))
x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) +
k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a))
y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont
}
# Ensemble models
ensemble_fit <- robStepSplitReg(x_train, y_train,
n_models = 5,
model_saturation = c("fixed", "p-value")[1],
alpha = 0.05, model_size = 25,
robust = TRUE,
compute_coef = TRUE,
pense_alpha = 1/4, pense_cv_k = 5, pense_cv_repl = 1,
cl = NULL)
# Ensemble coefficients
ensemble_coefs <- coef(ensemble_fit, group_index = 1:ensemble_fit$n_models)
sens_ensemble <- sum(which((ensemble_coefs[-1]!=0)) <= p.active)/p.active
spec_ensemble <- sum(which((ensemble_coefs[-1]!=0)) <= p.active)/sum(ensemble_coefs[-1]!=0)
# Simulation of test data
m <- 2e3
x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat)
y_test <- x_test %*% true.beta + rnorm(m, 0, sigma)
# Prediction of test samples
ensemble_preds <- predict(ensemble_fit, newx = x_test,
group_index = 1:ensemble_fit$n_models,
dynamic = FALSE)
mspe_ensemble <- mean((y_test - ensemble_preds)^2)/sigma^2
```
### License
This package is free and open source software, licensed under GPL (>= 2).