Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ export(learner_surv_cox)
export(learner_surv_rf)
export(learner_svm)
export(learner_xgboost)
export(metalearner_discrete)
export(metalearner_nnls)
export(ml_model)
export(naivebayes)
Expand Down
23 changes: 18 additions & 5 deletions R/superlearner.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# utility function to ensure that Dmat in quadprog::solve.QP is positive
# definite by projecting the original matrix to the nearest positive definite
# matrix
make_dmat_pos_definite <- function(pred) {
Dmat <- t(pred) %*% pred
.eigen <- eigen(Dmat)
tau <- .eigen$values
tau[tau < sqrt(.Machine$double.eps)] <- sqrt(.Machine$double.eps)
Dmat <- .eigen$vectors %*% diag(tau) %*% t(.eigen$vectors)
return(Dmat)
}

#' @export
metalearner_nnls <- function(y, pred, method = "quadprog", ...) {
if (NCOL(pred) == 1) {
Expand All @@ -12,7 +24,7 @@ metalearner_nnls <- function(y, pred, method = "quadprog", ...) {
} else {
opt <- tryCatch(
quadprog::solve.QP(
Dmat = t(pred) %*% pred,
Dmat = make_dmat_pos_definite(pred),
Amat = diag(nrow = ncol(pred)),
dvec = t(pred) %*% y
),
Expand All @@ -38,12 +50,12 @@ metalearner_convexcomb <- function(y, pred, ...) {
A <- cbind(1, A)
b <- c(1, rep(0, ncol(pred)))
opt <- quadprog::solve.QP(
Dmat = t(pred) %*% pred,
Dmat = make_dmat_pos_definite(pred),
Amat = A,
dvec = t(pred) %*% y,
bvec = b,
meq = 1
)
)
coefs[idx] <- opt$solution
if (any(is.na(coefs))) coefs[is.na(coefs)] <- 0
if (all(coefs == 0)) coefs[1] <- 1
Expand All @@ -68,9 +80,10 @@ metalearner_convexcomb <- function(y, pred, ...) {
## }


#' @export
metalearner_discrete <- function(y, pred, risk, ...) {
weights <- rep(0, NCOL(pred))
risk[is.na] <- Inf
risk[is.na(weights)] <- Inf
weights[which.min(risk)[1]] <- 1
return(weights)
}
Expand Down Expand Up @@ -126,7 +139,7 @@ get_learner_names <- function(model.list, name.prefix) {
#' [future.apply::future_lapply].
#' @references Luedtke & van der Laan (2016) Super-Learning of an Optimal
#' Dynamic Treatment Rule, The International Journal of Biostatistics.
#' @aliases superlearner metalearner_nnls
#' @aliases superlearner metalearner_nnls metalearner_discrete
#' @seealso [predict.superlearner] [weights.superlearner] [score.superlearner]
#' @examples
#' sim1 <- function(n = 5e2) {
Expand Down
71 changes: 70 additions & 1 deletion inst/tinytest/test_superlearner.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
sim1 <- function(n = 5e2) {
x1 <- rnorm(n, sd = 2)
x2 <- rnorm(n)
y <- x1 + cos(x1) + rnorm(n, sd = 0.5**.5)
y <- x1 + cos(x1) + x2 + x1 * x2 + rnorm(n, sd = 0.5**.5)
yb <- as.numeric(y > 0)
d <- data.frame(y, yb, x1, x2)
d
Expand Down Expand Up @@ -89,3 +89,72 @@ test_score.superlearner <- function() {
expect_equal(score(sl), sl$model.score)
}
test_score.superlearner()


test_metalearners <- function() {
# check edge case where the predictions of two learners are identical
# set seed to ensure that the predictions in both runs are the same
set.seed(400)
d0 <- sim1()
lrs <- list(
learner_glm(y ~ I(x1 ** 2) + x2),
learner_glm(y ~ x1 + x2),
learner_glm(y ~ x1 + x2)
)
set.seed(1)
sl_quadprog <- superlearner(lrs, data = d0, nfolds = 2)
set.seed(1)
sl_nnls <- superlearner(lrs, data = d0, nfolds = 2,
meta.learner = targeted:::metalearner_nnls2
)
set.seed(1)
sl_convex <- superlearner(lrs, data = d0, nfolds = 2,
meta.learner = targeted:::metalearner_convexcomb
)

# using quadprog::solve.QP splits the weight of the duplicated learner equally
# whereas nnls::nnls assigns no weight to one learner
expect_equal(
sum(sl_quadprog$weights[c(2, 3)]), sum(sl_nnls$weights[c(2, 3)])
)
expect_true(0 %in% sl_nnls$weights[c(2, 3)])

# verify that estimating a convex combination of weights handles duplicated
# learners correctly
expect_equal(sum(sl_convex$weights), 1)
expect_equal(sl_convex$weights[2], sl_convex$weights[3], tol = 1e-4)

# discrete metalearner handles duplicated learners correctly by selecting only
# one of the duplicated learners
set.seed(1)
sl_discrete <- superlearner(lrs, data = d0, nfolds = 2,
meta.learner = targeted:::metalearner_discrete
)
expect_equal(sum(sl_discrete$weights == 0), 2)
expect_equal(sum(sl_discrete$weights), 1)
expect_equal(sl_discrete$weights[which.min(sl_discrete$model.score)][[1]], 1)

# can also be called with character argument
set.seed(1)
sl_discrete_char <- superlearner(lrs, data = d0, nfolds = 2,
meta.learner = "discrete"
)
expect_equal(sl_discrete_char$weights, sl_discrete$weights)

# nnls::nnls and quadprog::solve.QP should give the same solution
d0 <- sim1(n = 200)
lrs <- list(
learner_glm(y ~ x1 * x2 + cos(x1)),
learner_glm(y ~ x1 * x2)
)
set.seed(1)
sl_quadprog <- superlearner(lrs, data = d0, nfolds = 2)
set.seed(1)
sl_nnls <- superlearner(lrs, data = d0, nfolds = 2,
meta.learner = targeted:::metalearner_nnls2
)
# weights need to be equal when model scores are equal
expect_equal(sl_quadprog$score, sl_nnls$score)
expect_equal(sl_quadprog$weights, sl_nnls$weights)
}
test_metalearners()
1 change: 1 addition & 0 deletions man/superlearner.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading