From 86e025f78eee87c0a0aee96e6c351f0e403cd8cc Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Tue, 29 Jul 2025 14:49:33 -0400 Subject: [PATCH 01/17] run styler to format all code --- R/1.1_MLE_Point.R | 126 +++++----- R/1.2_MLE_Var.R | 100 ++++---- R/1_CallMLE.R | 97 ++++---- R/2.1_DR_Point.R | 131 +++++----- R/2.2.1_Hessian_RD.R | 135 +++++------ R/2.2.1_Hessian_RR.R | 149 ++++++------ R/2.2_DR_Var_RD.R | 243 +++++++++---------- R/2.2_DR_Var_RR.R | 227 +++++++++--------- R/2_CallDR.R | 70 +++--- R/MyFunc.R | 145 ++++++------ R/Print.R | 87 +++---- R/ValidCheck.R | 84 ++++--- R/brm.R | 40 ++-- R/getProbScalarRD.R | 201 ++++++++-------- R/getProbScalarRR.R | 238 ++++++++++--------- R/main.R | 380 ++++++++++++++++-------------- R/predict.brm.R | 136 +++++------ tests/testthat/test-brm.R | 42 ++-- tests/testthat/test-mat_vec_mul.R | 12 +- 19 files changed, 1353 insertions(+), 1290 deletions(-) diff --git a/R/1.1_MLE_Point.R b/R/1.1_MLE_Point.R index 354b75f..ec171aa 100644 --- a/R/1.1_MLE_Point.R +++ b/R/1.1_MLE_Point.R @@ -1,63 +1,67 @@ +max.likelihood <- function(param, y, x, va, vb, alpha.start, beta.start, weights, + max.step, thres, pa, pb) { + startpars <- c(alpha.start, beta.start) -max.likelihood = function(param, y, x, va, vb, alpha.start, beta.start, weights, - max.step, thres, pa, pb) { - - startpars = c(alpha.start, beta.start) - - getProb = if (param == "RR") getProbRR else getProbRD - - ## negative log likelihood function - neg.log.likelihood = function(pars) { - alpha = pars[1:pa] - beta = pars[(pa + 1):(pa + pb)] - p0p1 = getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta)) - p0 = p0p1[, 1]; p1 = p0p1[, 2] - - return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weights[x == 0] + - (y[x == 0]) * log(p0[x == 0]) * weights[x == 0]) - sum((1 - y[x == - 1]) * log(1 - p1[x == 1]) * weights[x == 1] + (y[x == 1]) * log(p1[x == - 1]) * weights[x == 1])) - } - - neg.log.likelihood.alpha = function(alpha){ - p0p1 = getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta)) - p0 = p0p1[,1]; p1 = p0p1[,2] - - return(-sum((1-y[x==0])*log(1-p0[x==0])*weights[x==0] + - (y[x==0])*log(p0[x==0])*weights[x==0]) - - sum((1-y[x==1])*log(1-p1[x==1])*weights[x==1] + - (y[x==1])*log(p1[x==1])*weights[x==1])) - } - - neg.log.likelihood.beta = function(beta){ - p0p1 = getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta)) - p0 = p0p1[,1]; p1 = p0p1[,2] - - return(-sum((1-y[x==0])*log(1-p0[x==0])*weights[x==0] + - (y[x==0])*log(p0[x==0])*weights[x==0]) - - sum((1-y[x==1])*log(1-p1[x==1])*weights[x==1] + - (y[x==1])*log(p1[x==1])*weights[x==1])) - } - - - ## Optimization - - Diff = function(x,y) sum((x-y)^2)/sum(x^2+thres) - alpha = alpha.start; beta = beta.start - diff = thres + 1; step = 0 - while(diff > thres & step < max.step){ - step = step + 1 - opt1 = stats::optim(alpha,neg.log.likelihood.alpha,control=list(maxit=max(100,max.step/10))) - diff1 = Diff(opt1$par,alpha) - alpha = opt1$par - opt2 = stats::optim(beta,neg.log.likelihood.beta,control=list(maxit=max(100,max.step/10))) - diff = max(diff1,Diff(opt2$par,beta)) - beta = opt2$par - } - - opt = list(par = c(alpha,beta), convergence = (step < max.step), - value = neg.log.likelihood(c(alpha,beta)), step = step) - - return(opt) -} + getProb <- if (param == "RR") getProbRR else getProbRD + + ## negative log likelihood function + neg.log.likelihood <- function(pars) { + alpha <- pars[1:pa] + beta <- pars[(pa + 1):(pa + pb)] + p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta)) + p0 <- p0p1[, 1] + p1 <- p0p1[, 2] + + return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weights[x == 0] + + (y[x == 0]) * log(p0[x == 0]) * weights[x == 0]) - sum((1 - y[x == + 1]) * log(1 - p1[x == 1]) * weights[x == 1] + (y[x == 1]) * log(p1[x == + 1]) * weights[x == 1])) + } + + neg.log.likelihood.alpha <- function(alpha) { + p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta)) + p0 <- p0p1[, 1] + p1 <- p0p1[, 2] + + return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weights[x == 0] + + (y[x == 0]) * log(p0[x == 0]) * weights[x == 0]) - + sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weights[x == 1] + + (y[x == 1]) * log(p1[x == 1]) * weights[x == 1])) + } + + neg.log.likelihood.beta <- function(beta) { + p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta)) + p0 <- p0p1[, 1] + p1 <- p0p1[, 2] + return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weights[x == 0] + + (y[x == 0]) * log(p0[x == 0]) * weights[x == 0]) - + sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weights[x == 1] + + (y[x == 1]) * log(p1[x == 1]) * weights[x == 1])) + } + + + ## Optimization + + Diff <- function(x, y) sum((x - y)^2) / sum(x^2 + thres) + alpha <- alpha.start + beta <- beta.start + diff <- thres + 1 + step <- 0 + while (diff > thres & step < max.step) { + step <- step + 1 + opt1 <- stats::optim(alpha, neg.log.likelihood.alpha, control = list(maxit = max(100, max.step / 10))) + diff1 <- Diff(opt1$par, alpha) + alpha <- opt1$par + opt2 <- stats::optim(beta, neg.log.likelihood.beta, control = list(maxit = max(100, max.step / 10))) + diff <- max(diff1, Diff(opt2$par, beta)) + beta <- opt2$par + } + + opt <- list( + par = c(alpha, beta), convergence = (step < max.step), + value = neg.log.likelihood(c(alpha, beta)), step = step + ) + + return(opt) +} diff --git a/R/1.2_MLE_Var.R b/R/1.2_MLE_Var.R index d45f7ad..b02a0b2 100644 --- a/R/1.2_MLE_Var.R +++ b/R/1.2_MLE_Var.R @@ -1,52 +1,48 @@ - - -### variance calculation - -var.mle.rr = function(x, alpha.ml, beta.ml, va, vb, weights) { - - p0p1 = getProbRR(va %*% alpha.ml, vb %*% beta.ml) - n = dim(va)[1] - pA = rep(NA, n) # P(Y=1|A,V); here A = X - pA[x == 0] = p0p1[x == 0, 1] - pA[x == 1] = p0p1[x == 1, 2] - - expect.dl.by.dpsi0.squared = (pA)/(1 - pA) - dpsi0.by.dphi = (1 - p0p1[, 1]) * (1 - p0p1[, 2])/((1 - p0p1[, 1]) + (1 - - p0p1[, 2])) - dpsi0.by.dtheta = -(1 - p0p1[, 1])/((1 - p0p1[, 1]) + (1 - p0p1[, 2])) - tmp = cbind((dpsi0.by.dtheta + x) * va, dpsi0.by.dphi * vb) - ## since dtheta.by.dalpha = va, and dphi.by.dbeta = vb - fisher.info = (t(expect.dl.by.dpsi0.squared * weights * tmp) %*% tmp) - return(solve(fisher.info)) -} - - - - -### variance calculation - -var.mle.rd = function(x, alpha.ml, beta.ml, va, vb, weights) { - - p0p1 = getProbRD(va %*% alpha.ml, vb %*% beta.ml) - # p0p1 = cbind(p0, p1): n * 2 matrix - p0 = p0p1[, 1] - p1 = p0p1[, 2] - n = nrow(va) - pA = p0 # P(Y=1|A,V); here A = X - pA[x == 1] = p1[x == 1] - s0 = p0 * (1 - p0) - s1 = p1 * (1 - p1) - sA = pA * (1 - pA) - - rho = as.vector(tanh(va %*% alpha.ml)) #estimated risk differences - - expect.dl.by.dpA.squared = 1/sA - dp0.by.dphi = s0 * s1/(s0 + s1) - dp0.by.drho = -s0/(s0 + s1) - drho.by.dalpha = (1 - rho^2) * va - dphi.by.dbeta = vb - - tmp = cbind((dp0.by.drho + x) * drho.by.dalpha, dp0.by.dphi * dphi.by.dbeta) - fisher.info = (t(expect.dl.by.dpA.squared * weights * tmp) %*% tmp) - return(solve(fisher.info)) -} +### variance calculation + +var.mle.rr <- function(x, alpha.ml, beta.ml, va, vb, weights) { + p0p1 <- getProbRR(va %*% alpha.ml, vb %*% beta.ml) + n <- dim(va)[1] + pA <- rep(NA, n) # P(Y=1|A,V); here A = X + pA[x == 0] <- p0p1[x == 0, 1] + pA[x == 1] <- p0p1[x == 1, 2] + + expect.dl.by.dpsi0.squared <- (pA) / (1 - pA) + dpsi0.by.dphi <- (1 - p0p1[, 1]) * (1 - p0p1[, 2]) / ((1 - p0p1[, 1]) + (1 - + p0p1[, 2])) + dpsi0.by.dtheta <- -(1 - p0p1[, 1]) / ((1 - p0p1[, 1]) + (1 - p0p1[, 2])) + tmp <- cbind((dpsi0.by.dtheta + x) * va, dpsi0.by.dphi * vb) + ## since dtheta.by.dalpha = va, and dphi.by.dbeta = vb + fisher.info <- (t(expect.dl.by.dpsi0.squared * weights * tmp) %*% tmp) + return(solve(fisher.info)) +} + + + + +### variance calculation + +var.mle.rd <- function(x, alpha.ml, beta.ml, va, vb, weights) { + p0p1 <- getProbRD(va %*% alpha.ml, vb %*% beta.ml) + # p0p1 = cbind(p0, p1): n * 2 matrix + p0 <- p0p1[, 1] + p1 <- p0p1[, 2] + n <- nrow(va) + pA <- p0 # P(Y=1|A,V); here A = X + pA[x == 1] <- p1[x == 1] + s0 <- p0 * (1 - p0) + s1 <- p1 * (1 - p1) + sA <- pA * (1 - pA) + + rho <- as.vector(tanh(va %*% alpha.ml)) # estimated risk differences + + expect.dl.by.dpA.squared <- 1 / sA + dp0.by.dphi <- s0 * s1 / (s0 + s1) + dp0.by.drho <- -s0 / (s0 + s1) + drho.by.dalpha <- (1 - rho^2) * va + dphi.by.dbeta <- vb + + tmp <- cbind((dp0.by.drho + x) * drho.by.dalpha, dp0.by.dphi * dphi.by.dbeta) + fisher.info <- (t(expect.dl.by.dpA.squared * weights * tmp) %*% tmp) + return(solve(fisher.info)) +} diff --git a/R/1_CallMLE.R b/R/1_CallMLE.R index aa82343..7f7088b 100644 --- a/R/1_CallMLE.R +++ b/R/1_CallMLE.R @@ -1,47 +1,50 @@ - -MLEst = function(param, y, x, va, vb, weights, max.step, thres, alpha.start, - beta.start, pa, pb) { - - ## starting values for parameter optimization - if (is.null(alpha.start)) - alpha.start = rep(0, pa) - if (is.null(beta.start)) - beta.start = rep(0, pb) - - if (param == "OR") { - fit = stats::glm(y ~ vb - 1 + x * va - va - x, family = "binomial", - weights = weights, start = c(beta.start, alpha.start)) - - point.temp = summary(fit)$coefficients[, 1] - index = c((pb + 1):(pa + pb), 1:pb) - point.est = point.temp[index] - - cov = stats::vcov(fit)[index, index] - - converged = fit$converged - - } else { - - ### point estimate - mle = max.likelihood(param, y, x, va, vb, alpha.start, beta.start, - weights, max.step, thres, pa, pb) - point.est = mle$par - converged = mle$convergence - # print(point.est) - alpha.ml = point.est[1:pa] - beta.ml = point.est[(pa + 1):(pa + pb)] - - ### Computing Fisher Information: - if (param == "RR") - cov = var.mle.rr(x, alpha.ml, beta.ml, va, vb, weights) - if (param == "RD") - cov = var.mle.rd(x, alpha.ml, beta.ml, va, vb, weights) - sd.est = sqrt(diag(cov)) - - } - - name = paste(c(rep("alpha", pa), rep("beta", pb)), c(1:pa, 1:pb)) - sol = WrapResults(point.est, cov, param, name, va, vb, converged) - return(sol) - -} +MLEst <- function( + param, y, x, va, vb, weights, max.step, thres, alpha.start, + beta.start, pa, pb) { + ## starting values for parameter optimization + if (is.null(alpha.start)) { + alpha.start <- rep(0, pa) + } + if (is.null(beta.start)) { + beta.start <- rep(0, pb) + } + + if (param == "OR") { + fit <- stats::glm(y ~ vb - 1 + x * va - va - x, + family = "binomial", + weights = weights, start = c(beta.start, alpha.start) + ) + + point.temp <- summary(fit)$coefficients[, 1] + index <- c((pb + 1):(pa + pb), 1:pb) + point.est <- point.temp[index] + + cov <- stats::vcov(fit)[index, index] + + converged <- fit$converged + } else { + ### point estimate + mle <- max.likelihood( + param, y, x, va, vb, alpha.start, beta.start, + weights, max.step, thres, pa, pb + ) + point.est <- mle$par + converged <- mle$convergence + # print(point.est) + alpha.ml <- point.est[1:pa] + beta.ml <- point.est[(pa + 1):(pa + pb)] + + ### Computing Fisher Information: + if (param == "RR") { + cov <- var.mle.rr(x, alpha.ml, beta.ml, va, vb, weights) + } + if (param == "RD") { + cov <- var.mle.rd(x, alpha.ml, beta.ml, va, vb, weights) + } + sd.est <- sqrt(diag(cov)) + } + + name <- paste(c(rep("alpha", pa), rep("beta", pb)), c(1:pa, 1:pb)) + sol <- WrapResults(point.est, cov, param, name, va, vb, converged) + return(sol) +} diff --git a/R/2.1_DR_Point.R b/R/2.1_DR_Point.R index bee2cce..aaed64a 100644 --- a/R/2.1_DR_Point.R +++ b/R/2.1_DR_Point.R @@ -1,65 +1,66 @@ - - -dr.estimate.onestep = function(param, y, x, va, vb, alpha.start, beta, pscore, - wt, weights, max.step, thres, message) { - - startpars = c(alpha.start) # pars only contain alpha - ## DR estimation equation^2 - if (param == "RR") { - dr.objective = function(pars) { - p0 = getProbRR(mat_vec_mul(va,startpars), mat_vec_mul(vb, beta))[, 1] - H.alpha = as.vector(y * exp(-x * (va %*% pars))) - tmp = mat_vec_mul(t(va), (wt * (x - pscore) * (H.alpha - p0) * weights)) - return(sum(tmp^2)) - } - } - if (param == "RD") { - dr.objective = function(pars) { - p0 = getProbRD(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1] - H.alpha = y - x * tanh(mat_vec_mul(va, pars)) - tmp = mat_vec_mul(t((H.alpha - p0) * (x - pscore)) , (va * wt * weights)) - return(sum(tmp^2)) - } - } - - opt <- stats::optim(startpars, dr.objective, control=list(reltol=thres)) - opt$convergence = (opt$convergence == 0) # change cf. optim() - - return(opt) -} - - - -dr.estimate.noiterate = function(param, y, x, va, vb, vc, alpha.ml, beta.ml, - gamma, optimal, weights, max.step, thres, alpha.start, message) { - - pscore = as.vector(expit(mat_vec_mul(vc, gamma))) - - if (optimal == TRUE) { - if (param == "RR") { - p0 = getProbRR(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))[, 1] - wt = as.vector(1/(1 - p0 + (1 - pscore) * (exp(mat_vec_mul(-va, alpha.ml)) - - 1))) - } - if (param == "RD") { - p0 = getProbRD(mat_vec_mul(va,alpha.ml), mat_vec_mul(vb, beta.ml))[, 1] - rho = as.vector(tanh(mat_vec_mul(va, alpha.ml))) - wt = (1 - rho) * (1 + rho)/(p0 * (1 - p0) + rho * (1 - pscore) * - (1 - 2 * p0 - rho)) - } - } else { - wt = rep(1, length(pscore)) - } - - if (is.null(alpha.start)) - alpha.start = alpha.ml - - alpha.dr.opt = dr.estimate.onestep(param, y, x, va, vb, alpha.start, beta.ml, - pscore, wt, weights, max.step, thres, message) - - # if(MESSAGE){ print(paste('DR One Step: ',' Alpha: - # ',paste(round(alpha.dr,5),collapse=', '),' Beta: - # ',paste(round(beta.ml,5),collapse=', '))) } - - return(alpha.dr.opt) -} +dr.estimate.onestep <- function( + param, y, x, va, vb, alpha.start, beta, pscore, + wt, weights, max.step, thres, message) { + startpars <- c(alpha.start) # pars only contain alpha + ## DR estimation equation^2 + if (param == "RR") { + dr.objective <- function(pars) { + p0 <- getProbRR(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1] + H.alpha <- as.vector(y * exp(-x * (va %*% pars))) + tmp <- mat_vec_mul(t(va), (wt * (x - pscore) * (H.alpha - p0) * weights)) + return(sum(tmp^2)) + } + } + if (param == "RD") { + dr.objective <- function(pars) { + p0 <- getProbRD(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1] + H.alpha <- y - x * tanh(mat_vec_mul(va, pars)) + tmp <- mat_vec_mul(t((H.alpha - p0) * (x - pscore)), (va * wt * weights)) + return(sum(tmp^2)) + } + } + + opt <- stats::optim(startpars, dr.objective, control = list(reltol = thres)) + opt$convergence <- (opt$convergence == 0) # change cf. optim() + + return(opt) +} + + + +dr.estimate.noiterate <- function( + param, y, x, va, vb, vc, alpha.ml, beta.ml, + gamma, optimal, weights, max.step, thres, alpha.start, message) { + pscore <- as.vector(expit(mat_vec_mul(vc, gamma))) + + if (optimal == TRUE) { + if (param == "RR") { + p0 <- getProbRR(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))[, 1] + wt <- as.vector(1 / (1 - p0 + (1 - pscore) * (exp(mat_vec_mul(-va, alpha.ml)) - + 1))) + } + if (param == "RD") { + p0 <- getProbRD(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))[, 1] + rho <- as.vector(tanh(mat_vec_mul(va, alpha.ml))) + wt <- (1 - rho) * (1 + rho) / (p0 * (1 - p0) + rho * (1 - pscore) * + (1 - 2 * p0 - rho)) + } + } else { + wt <- rep(1, length(pscore)) + } + + if (is.null(alpha.start)) { + alpha.start <- alpha.ml + } + + alpha.dr.opt <- dr.estimate.onestep( + param, y, x, va, vb, alpha.start, beta.ml, + pscore, wt, weights, max.step, thres, message + ) + + # if(MESSAGE){ print(paste('DR One Step: ',' Alpha: + # ',paste(round(alpha.dr,5),collapse=', '),' Beta: + # ',paste(round(beta.ml,5),collapse=', '))) } + + return(alpha.dr.opt) +} diff --git a/R/2.2.1_Hessian_RD.R b/R/2.2.1_Hessian_RD.R index 63a3dfb..6e5ddb0 100644 --- a/R/2.2.1_Hessian_RD.R +++ b/R/2.2.1_Hessian_RD.R @@ -1,66 +1,69 @@ -Hessian2RD = function(y, x, va, vb, alpha.ml, beta.ml, cnt) { - # calculating the Hessian using the second derivative have to do so - # because under mis-specification of models Hessian no longer equals the - # square of the first order derivatives - - p0p1 = getProbRD(va %*% alpha.ml, vb %*% beta.ml) - # p0p1 = cbind(p0, p1): n * 2 matrix - p0 = p0p1[, 1] - p1 = p0p1[, 2] - n = nrow(va) - pA = p0 - pA[x == 1] = p1[x == 1] - s0 = p0 * (1 - p0) - s1 = p1 * (1 - p1) - sA = pA * (1 - pA) - - rho = as.vector(tanh(va %*% alpha.ml)) #estimated risk differences - - ### First order derivatives ### - - dl.by.dpA = (y - pA)/sA - dp0.by.dphi = s0 * s1/(s0 + s1) - dp0.by.drho = -s0/(s0 + s1) - drho.by.dalpha = va * (1 - rho^2) - dphi.by.dbeta = vb - - dpA.by.drho = dp0.by.drho + x - dpA.by.dalpha = drho.by.dalpha * dpA.by.drho - dpA.by.dphi = dp0.by.dphi - dpA.by.dbeta = dphi.by.dbeta * dpA.by.dphi - - ### Second order derivatives ### - - d2l.by.dpA.2 = -(y - pA)^2/sA^2 - d2pA.by.drho.2 = s0 * s1 * (2 - 2 * p0 - 2 * p1)/(s0 + s1)^3 - d2pA.by.dphi.drho = (s0 * (1 - 2 * p1) - s1 * (1 - 2 * p0)) * s0 * s1/(s0 + - s1)^3 - d2pA.by.dphi.2 = (s0^2 * (1 - 2 * p1) + s1^2 * (1 - 2 * p0)) * s0 * s1/(s0 + - s1)^3 - - d2rho.by.dalpha.2 = -2 * t(va * rho) %*% drho.by.dalpha - - ### Compute elements of the Hessian matrix ### - - d2l.by.dalpha.2 = t(dpA.by.dalpha * d2l.by.dpA.2 * cnt) %*% dpA.by.dalpha + - t(drho.by.dalpha * dl.by.dpA * d2pA.by.drho.2 * cnt) %*% drho.by.dalpha - - 2 * t(va * rho * dl.by.dpA * dpA.by.drho * cnt) %*% drho.by.dalpha - - d2l.by.dalpha.dbeta = t(dpA.by.dalpha * d2l.by.dpA.2 * cnt) %*% dpA.by.dbeta + - t(drho.by.dalpha * dl.by.dpA * d2pA.by.dphi.drho * cnt) %*% dphi.by.dbeta - d2l.by.dbeta.dalpha = t(d2l.by.dalpha.dbeta) - - d2l.by.dbeta.2 = t(dpA.by.dbeta * d2l.by.dpA.2 * cnt) %*% dpA.by.dbeta + - t(dphi.by.dbeta * dl.by.dpA * d2pA.by.dphi.2 * cnt) %*% dphi.by.dbeta - - hessian = -rbind(cbind(d2l.by.dalpha.2, d2l.by.dalpha.dbeta), cbind(d2l.by.dbeta.dalpha, - d2l.by.dbeta.2)) - ### NB Note the extra minus sign here - - return(list(hessian = hessian, p0 = p0, p1 = p1, pA = pA, s0 = s0, s1 = s1, - sA = sA, rho = rho, dl.by.dpA = dl.by.dpA, dp0.by.dphi = dp0.by.dphi, - dp0.by.drho = dp0.by.drho, drho.by.dalpha = drho.by.dalpha, dphi.by.dbeta = dphi.by.dbeta, - dpA.by.drho = dpA.by.drho, dpA.by.dalpha = dpA.by.dalpha, dpA.by.dphi = dpA.by.dphi, - dpA.by.dbeta = dpA.by.dbeta)) - -} +Hessian2RD <- function(y, x, va, vb, alpha.ml, beta.ml, cnt) { + # calculating the Hessian using the second derivative have to do so + # because under mis-specification of models Hessian no longer equals the + # square of the first order derivatives + + p0p1 <- getProbRD(va %*% alpha.ml, vb %*% beta.ml) + # p0p1 = cbind(p0, p1): n * 2 matrix + p0 <- p0p1[, 1] + p1 <- p0p1[, 2] + n <- nrow(va) + pA <- p0 + pA[x == 1] <- p1[x == 1] + s0 <- p0 * (1 - p0) + s1 <- p1 * (1 - p1) + sA <- pA * (1 - pA) + + rho <- as.vector(tanh(va %*% alpha.ml)) # estimated risk differences + + ### First order derivatives ### + + dl.by.dpA <- (y - pA) / sA + dp0.by.dphi <- s0 * s1 / (s0 + s1) + dp0.by.drho <- -s0 / (s0 + s1) + drho.by.dalpha <- va * (1 - rho^2) + dphi.by.dbeta <- vb + + dpA.by.drho <- dp0.by.drho + x + dpA.by.dalpha <- drho.by.dalpha * dpA.by.drho + dpA.by.dphi <- dp0.by.dphi + dpA.by.dbeta <- dphi.by.dbeta * dpA.by.dphi + + ### Second order derivatives ### + + d2l.by.dpA.2 <- -(y - pA)^2 / sA^2 + d2pA.by.drho.2 <- s0 * s1 * (2 - 2 * p0 - 2 * p1) / (s0 + s1)^3 + d2pA.by.dphi.drho <- (s0 * (1 - 2 * p1) - s1 * (1 - 2 * p0)) * s0 * s1 / (s0 + + s1)^3 + d2pA.by.dphi.2 <- (s0^2 * (1 - 2 * p1) + s1^2 * (1 - 2 * p0)) * s0 * s1 / (s0 + + s1)^3 + + d2rho.by.dalpha.2 <- -2 * t(va * rho) %*% drho.by.dalpha + + ### Compute elements of the Hessian matrix ### + + d2l.by.dalpha.2 <- t(dpA.by.dalpha * d2l.by.dpA.2 * cnt) %*% dpA.by.dalpha + + t(drho.by.dalpha * dl.by.dpA * d2pA.by.drho.2 * cnt) %*% drho.by.dalpha - + 2 * t(va * rho * dl.by.dpA * dpA.by.drho * cnt) %*% drho.by.dalpha + + d2l.by.dalpha.dbeta <- t(dpA.by.dalpha * d2l.by.dpA.2 * cnt) %*% dpA.by.dbeta + + t(drho.by.dalpha * dl.by.dpA * d2pA.by.dphi.drho * cnt) %*% dphi.by.dbeta + d2l.by.dbeta.dalpha <- t(d2l.by.dalpha.dbeta) + + d2l.by.dbeta.2 <- t(dpA.by.dbeta * d2l.by.dpA.2 * cnt) %*% dpA.by.dbeta + + t(dphi.by.dbeta * dl.by.dpA * d2pA.by.dphi.2 * cnt) %*% dphi.by.dbeta + + hessian <- -rbind(cbind(d2l.by.dalpha.2, d2l.by.dalpha.dbeta), cbind( + d2l.by.dbeta.dalpha, + d2l.by.dbeta.2 + )) + ### NB Note the extra minus sign here + + return(list( + hessian = hessian, p0 = p0, p1 = p1, pA = pA, s0 = s0, s1 = s1, + sA = sA, rho = rho, dl.by.dpA = dl.by.dpA, dp0.by.dphi = dp0.by.dphi, + dp0.by.drho = dp0.by.drho, drho.by.dalpha = drho.by.dalpha, dphi.by.dbeta = dphi.by.dbeta, + dpA.by.drho = dpA.by.drho, dpA.by.dalpha = dpA.by.dalpha, dpA.by.dphi = dpA.by.dphi, + dpA.by.dbeta = dpA.by.dbeta + )) +} diff --git a/R/2.2.1_Hessian_RR.R b/R/2.2.1_Hessian_RR.R index 3d4dbdc..93b7bd4 100644 --- a/R/2.2.1_Hessian_RR.R +++ b/R/2.2.1_Hessian_RR.R @@ -1,73 +1,76 @@ -Hessian2RR = function(y, x, va, vb, alpha.ml, beta.ml, weights) { - # calculating the Hessian using the second derivative have to do so - # because under mis-specification of models Hessian no longer equals the - # square of the first order derivatives - - p0p1 = getProbRR(va %*% alpha.ml, vb %*% beta.ml) - # p0p1 = cbind(p0, p1): n * 2 matrix - p0 = p0p1[, 1] - p1 = p0p1[, 2] - n = nrow(va) - pA = p0 - pA[x == 1] = p1[x == 1] - - - ### Building blocks - - dpsi0.by.dtheta = -(1 - p0)/(1 - p0 + 1 - p1) - dpsi0.by.dphi = (1 - p0) * (1 - p1)/(1 - p0 + 1 - p1) - - dtheta.by.dalpha = va - dphi.by.dbeta = vb - - dl.by.dpsi0 = (y - pA)/(1 - pA) - d2l.by.dpsi0.2 = (y - 1) * pA/((1 - pA)^2) - - - - ###### d2l.by.dalpha.2 - - d2psi0.by.dtheta.2 = ((p0 - p1) * dpsi0.by.dtheta - (1 - p0) * p1)/((1 - - p0 + 1 - p1)^2) - - d2l.by.dtheta.2 = d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x)^2 + dl.by.dpsi0 * - d2psi0.by.dtheta.2 - - d2l.by.dalpha.2 = t(dtheta.by.dalpha * d2l.by.dtheta.2 * weights) %*% - dtheta.by.dalpha - - - ###### d2l.by.dalpha.dbeta - - d2psi0.by.dtheta.dphi = (1 - p0) * (1 - p1) * (p0 - p1)/(1 - p0 + 1 - - p1)^3 - - d2l.by.dtheta.dphi = d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x) * dpsi0.by.dphi + - dl.by.dpsi0 * d2psi0.by.dtheta.dphi - - d2l.by.dalpha.dbeta = t(dtheta.by.dalpha * d2l.by.dtheta.dphi * weights) %*% - dphi.by.dbeta - d2l.by.dbeta.dalpha = t(d2l.by.dalpha.dbeta) - # d2l.by.dalpha.dbeta is symmetric itself if (because) va=vb - - - #### d2l.by.dbeta2 - - d2psi0.by.dphi.2 = (-(p0 * (1 - p1)^2 + p1 * (1 - p0)^2)/(1 - p0 + 1 - - p1)^2) * dpsi0.by.dphi - - d2l.by.dphi.2 = d2l.by.dpsi0.2 * (dpsi0.by.dphi)^2 + dl.by.dpsi0 * d2psi0.by.dphi.2 - - d2l.by.dbeta.2 = t(dphi.by.dbeta * d2l.by.dphi.2 * weights) %*% dphi.by.dbeta - - - - hessian = -rbind(cbind(d2l.by.dalpha.2, d2l.by.dalpha.dbeta), cbind(d2l.by.dbeta.dalpha, - d2l.by.dbeta.2)) - ### NB Note the extra minus sign here - - return(list(hessian = hessian, p0 = p0, p1 = p1, pA = pA, dpsi0.by.dtheta = dpsi0.by.dtheta, - dpsi0.by.dphi = dpsi0.by.dphi, dtheta.by.dalpha = dtheta.by.dalpha, - dphi.by.dbeta = dphi.by.dbeta, dl.by.dpsi0 = dl.by.dpsi0)) - -} +Hessian2RR <- function(y, x, va, vb, alpha.ml, beta.ml, weights) { + # calculating the Hessian using the second derivative have to do so + # because under mis-specification of models Hessian no longer equals the + # square of the first order derivatives + + p0p1 <- getProbRR(va %*% alpha.ml, vb %*% beta.ml) + # p0p1 = cbind(p0, p1): n * 2 matrix + p0 <- p0p1[, 1] + p1 <- p0p1[, 2] + n <- nrow(va) + pA <- p0 + pA[x == 1] <- p1[x == 1] + + + ### Building blocks + + dpsi0.by.dtheta <- -(1 - p0) / (1 - p0 + 1 - p1) + dpsi0.by.dphi <- (1 - p0) * (1 - p1) / (1 - p0 + 1 - p1) + + dtheta.by.dalpha <- va + dphi.by.dbeta <- vb + + dl.by.dpsi0 <- (y - pA) / (1 - pA) + d2l.by.dpsi0.2 <- (y - 1) * pA / ((1 - pA)^2) + + + + ###### d2l.by.dalpha.2 + + d2psi0.by.dtheta.2 <- ((p0 - p1) * dpsi0.by.dtheta - (1 - p0) * p1) / ((1 - + p0 + 1 - p1)^2) + + d2l.by.dtheta.2 <- d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x)^2 + dl.by.dpsi0 * + d2psi0.by.dtheta.2 + + d2l.by.dalpha.2 <- t(dtheta.by.dalpha * d2l.by.dtheta.2 * weights) %*% + dtheta.by.dalpha + + + ###### d2l.by.dalpha.dbeta + + d2psi0.by.dtheta.dphi <- (1 - p0) * (1 - p1) * (p0 - p1) / (1 - p0 + 1 - + p1)^3 + + d2l.by.dtheta.dphi <- d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x) * dpsi0.by.dphi + + dl.by.dpsi0 * d2psi0.by.dtheta.dphi + + d2l.by.dalpha.dbeta <- t(dtheta.by.dalpha * d2l.by.dtheta.dphi * weights) %*% + dphi.by.dbeta + d2l.by.dbeta.dalpha <- t(d2l.by.dalpha.dbeta) + # d2l.by.dalpha.dbeta is symmetric itself if (because) va=vb + + + #### d2l.by.dbeta2 + + d2psi0.by.dphi.2 <- (-(p0 * (1 - p1)^2 + p1 * (1 - p0)^2) / (1 - p0 + 1 - + p1)^2) * dpsi0.by.dphi + + d2l.by.dphi.2 <- d2l.by.dpsi0.2 * (dpsi0.by.dphi)^2 + dl.by.dpsi0 * d2psi0.by.dphi.2 + + d2l.by.dbeta.2 <- t(dphi.by.dbeta * d2l.by.dphi.2 * weights) %*% dphi.by.dbeta + + + + hessian <- -rbind(cbind(d2l.by.dalpha.2, d2l.by.dalpha.dbeta), cbind( + d2l.by.dbeta.dalpha, + d2l.by.dbeta.2 + )) + ### NB Note the extra minus sign here + + return(list( + hessian = hessian, p0 = p0, p1 = p1, pA = pA, dpsi0.by.dtheta = dpsi0.by.dtheta, + dpsi0.by.dphi = dpsi0.by.dphi, dtheta.by.dalpha = dtheta.by.dalpha, + dphi.by.dbeta = dphi.by.dbeta, dl.by.dpsi0 = dl.by.dpsi0 + )) +} diff --git a/R/2.2_DR_Var_RD.R b/R/2.2_DR_Var_RD.R index 97730f0..621423d 100644 --- a/R/2.2_DR_Var_RD.R +++ b/R/2.2_DR_Var_RD.R @@ -1,120 +1,123 @@ - -## Sandwich estimator for variance of RD - -var.rd.dr = function(y, x, va, vb, vc, alpha.dr, alpha.ml, beta.ml, gamma, - optimal, weights) { - ######################################## - - pscore = as.vector(expit(vc %*% gamma)) - n = length(pscore) - - ### 1. - E[dS/d(alpha.ml,beta.ml)] ############################## Computing - ### the Hessian: - - Hrd = Hessian2RD(y, x, va, vb, alpha.ml, beta.ml, weights) - hessian = Hrd$hessian - p0 = Hrd$p0; p1 = Hrd$p1; pA = Hrd$pA - s0 = Hrd$s0; s1 = Hrd$s1; sA = Hrd$sA - rho = Hrd$rho - dl.by.dpA = Hrd$dl.by.dpA - dp0.by.dphi = Hrd$dp0.by.dphi - dp0.by.drho = Hrd$dp0.by.drho - drho.by.dalpha = Hrd$drho.by.dalpha - dphi.by.dbeta = Hrd$dphi.by.dbeta - dpA.by.drho = Hrd$dpA.by.drho - dpA.by.dalpha = Hrd$dpA.by.dalpha - dpA.by.dphi = Hrd$dpA.by.dphi - dpA.by.dbeta = Hrd$dpA.by.dbeta - - - ############# extra building blocks ########################## - - H.alpha = y - x * as.vector(tanh(va %*% alpha.dr)) - - ############# Calculation of optimal vector (used in several places below) ## - - if (optimal == TRUE) { - wt = (1 - rho^2)/(pscore * s0 + (1 - pscore) * s1) - } else { - wt = rep(1, n) - } - - - ### 2. -E[dU.by.dalphaml.betaml] #################################### - - dU.by.dp0 = -va * wt * (x - pscore) # n by 2 - dp0.by.dalpha.beta = cbind(drho.by.dalpha * dp0.by.drho, dphi.by.dbeta * - dp0.by.dphi) # n by 4 - - dU.by.dwt = va * (x - pscore) * (H.alpha - p0) # n by 2 - - if (optimal == TRUE) { - esA = pscore * s0 + (1 - pscore) * s1 # E[s_{1-A}]... - dwt.by.drho = (-2 * rho * esA - (1 - rho^2) * (1 - pscore) * (1 - - 2 * p1))/esA^2 - dwt.by.dp0 = -(1 - rho^2) * (2 * pscore * rho + 1 - 2 * p1)/esA^2 - - dwt.by.dalpha = drho.by.dalpha * (dwt.by.drho + dwt.by.dp0 * dp0.by.drho) - dwt.by.dbeta = dphi.by.dbeta * dwt.by.dp0 * dp0.by.dphi - - dwt.by.dalpha.beta = cbind(dwt.by.dalpha, dwt.by.dbeta) # n by 4 - } else { - dwt.by.dalpha.beta = matrix(0, n, ncol(va) + ncol(vb)) - } - - dU.by.dalpha.ml.beta.ml = t(dU.by.dp0 * weights) %*% (dp0.by.dalpha.beta) + - t(dU.by.dwt * weights) %*% dwt.by.dalpha.beta - - - ### 3. tau = -E[dU/dalpha.dr] ######################################## (This - ### is the bread of the sandwich estimate) - - dU.by.dH = va * wt * (x - pscore) # n by 2 - rho.dr = as.vector(tanh(va %*% alpha.dr)) - dH.by.dalpha.dr = -va * x * (1 - rho.dr^2) - - tau = -t(dU.by.dH * weights) %*% dH.by.dalpha.dr/sum(weights) # 2 by 2 - - - ### 4. E[d(prop score score equation)/dgamma] - - dpscore.by.dgamma = vc * pscore * (1 - pscore) # n by 2 - part4 = -t(vc * weights) %*% dpscore.by.dgamma # 2 by 2 - - - ### 5. E[dU/dgamma] - - dU.by.dpscore = -va * wt * (H.alpha - p0) # n by 2 - - if (optimal == TRUE) { - dwt.by.dpscore = -(1 - rho^2) * (s0 - s1)/esA^2 - dwt.by.dgamma = dpscore.by.dgamma * dwt.by.dpscore # n by 2 - } else { - dwt.by.dgamma = matrix(0, n, ncol(vc)) - } - - dU.by.dgamma = t(dU.by.dpscore * weights) %*% dpscore.by.dgamma + t(dU.by.dwt * - weights) %*% dwt.by.dgamma # 2 by 2 - - - - ############################################################################# Assembling semi-parametric variance matrix - - U = va * wt * (x - pscore) * (H.alpha - p0) # n by 2 - - S = cbind(dpA.by.dalpha, dpA.by.dbeta) * dl.by.dpA - - pscore.score = vc * (x - pscore) - - Utilde = U - t(dU.by.dalpha.ml.beta.ml %*% (-solve(hessian)) %*% t(S)) - - t(dU.by.dgamma %*% (solve(part4)) %*% t(pscore.score)) # n by 2 - USigma = t(Utilde * weights) %*% Utilde/sum(weights) - - - - ################################### Asymptotic var matrix for alpha.dr - - alpha.dr.variance = solve(tau) %*% USigma %*% solve(tau)/sum(weights) - return(alpha.dr.variance) - -} +## Sandwich estimator for variance of RD + +var.rd.dr <- function( + y, x, va, vb, vc, alpha.dr, alpha.ml, beta.ml, gamma, + optimal, weights) { + ######################################## + + pscore <- as.vector(expit(vc %*% gamma)) + n <- length(pscore) + + ### 1. - E[dS/d(alpha.ml,beta.ml)] ############################## Computing + ### the Hessian: + + Hrd <- Hessian2RD(y, x, va, vb, alpha.ml, beta.ml, weights) + hessian <- Hrd$hessian + p0 <- Hrd$p0 + p1 <- Hrd$p1 + pA <- Hrd$pA + s0 <- Hrd$s0 + s1 <- Hrd$s1 + sA <- Hrd$sA + rho <- Hrd$rho + dl.by.dpA <- Hrd$dl.by.dpA + dp0.by.dphi <- Hrd$dp0.by.dphi + dp0.by.drho <- Hrd$dp0.by.drho + drho.by.dalpha <- Hrd$drho.by.dalpha + dphi.by.dbeta <- Hrd$dphi.by.dbeta + dpA.by.drho <- Hrd$dpA.by.drho + dpA.by.dalpha <- Hrd$dpA.by.dalpha + dpA.by.dphi <- Hrd$dpA.by.dphi + dpA.by.dbeta <- Hrd$dpA.by.dbeta + + + ############# extra building blocks ########################## + + H.alpha <- y - x * as.vector(tanh(va %*% alpha.dr)) + + ############# Calculation of optimal vector (used in several places below) ## + + if (optimal == TRUE) { + wt <- (1 - rho^2) / (pscore * s0 + (1 - pscore) * s1) + } else { + wt <- rep(1, n) + } + + + ### 2. -E[dU.by.dalphaml.betaml] #################################### + + dU.by.dp0 <- -va * wt * (x - pscore) # n by 2 + dp0.by.dalpha.beta <- cbind(drho.by.dalpha * dp0.by.drho, dphi.by.dbeta * + dp0.by.dphi) # n by 4 + + dU.by.dwt <- va * (x - pscore) * (H.alpha - p0) # n by 2 + + if (optimal == TRUE) { + esA <- pscore * s0 + (1 - pscore) * s1 # E[s_{1-A}]... + dwt.by.drho <- (-2 * rho * esA - (1 - rho^2) * (1 - pscore) * (1 - + 2 * p1)) / esA^2 + dwt.by.dp0 <- -(1 - rho^2) * (2 * pscore * rho + 1 - 2 * p1) / esA^2 + + dwt.by.dalpha <- drho.by.dalpha * (dwt.by.drho + dwt.by.dp0 * dp0.by.drho) + dwt.by.dbeta <- dphi.by.dbeta * dwt.by.dp0 * dp0.by.dphi + + dwt.by.dalpha.beta <- cbind(dwt.by.dalpha, dwt.by.dbeta) # n by 4 + } else { + dwt.by.dalpha.beta <- matrix(0, n, ncol(va) + ncol(vb)) + } + + dU.by.dalpha.ml.beta.ml <- t(dU.by.dp0 * weights) %*% (dp0.by.dalpha.beta) + + t(dU.by.dwt * weights) %*% dwt.by.dalpha.beta + + + ### 3. tau = -E[dU/dalpha.dr] ######################################## (This + ### is the bread of the sandwich estimate) + + dU.by.dH <- va * wt * (x - pscore) # n by 2 + rho.dr <- as.vector(tanh(va %*% alpha.dr)) + dH.by.dalpha.dr <- -va * x * (1 - rho.dr^2) + + tau <- -t(dU.by.dH * weights) %*% dH.by.dalpha.dr / sum(weights) # 2 by 2 + + + ### 4. E[d(prop score score equation)/dgamma] + + dpscore.by.dgamma <- vc * pscore * (1 - pscore) # n by 2 + part4 <- -t(vc * weights) %*% dpscore.by.dgamma # 2 by 2 + + + ### 5. E[dU/dgamma] + + dU.by.dpscore <- -va * wt * (H.alpha - p0) # n by 2 + + if (optimal == TRUE) { + dwt.by.dpscore <- -(1 - rho^2) * (s0 - s1) / esA^2 + dwt.by.dgamma <- dpscore.by.dgamma * dwt.by.dpscore # n by 2 + } else { + dwt.by.dgamma <- matrix(0, n, ncol(vc)) + } + + dU.by.dgamma <- t(dU.by.dpscore * weights) %*% dpscore.by.dgamma + t(dU.by.dwt * + weights) %*% dwt.by.dgamma # 2 by 2 + + + + ############################################################################# Assembling semi-parametric variance matrix + + U <- va * wt * (x - pscore) * (H.alpha - p0) # n by 2 + + S <- cbind(dpA.by.dalpha, dpA.by.dbeta) * dl.by.dpA + + pscore.score <- vc * (x - pscore) + + Utilde <- U - t(dU.by.dalpha.ml.beta.ml %*% (-solve(hessian)) %*% t(S)) - + t(dU.by.dgamma %*% (solve(part4)) %*% t(pscore.score)) # n by 2 + USigma <- t(Utilde * weights) %*% Utilde / sum(weights) + + + + ################################### Asymptotic var matrix for alpha.dr + + alpha.dr.variance <- solve(tau) %*% USigma %*% solve(tau) / sum(weights) + return(alpha.dr.variance) +} diff --git a/R/2.2_DR_Var_RR.R b/R/2.2_DR_Var_RR.R index 66277f6..8708cc0 100644 --- a/R/2.2_DR_Var_RR.R +++ b/R/2.2_DR_Var_RR.R @@ -1,113 +1,114 @@ - -## Sandwich estimator for variance of RR - -var.rr.dr = function(y, x, va, vb, vc, alpha.dr, alpha.ml, beta.ml, gamma, - optimal, weights) { - ######################################## - - pscore = as.vector(expit(vc %*% gamma)) - n = length(pscore) - - ### 1. - E[dS/d(alpha.ml,beta.ml)] ############################## Computing - ### the Hessian: - - Hrr = Hessian2RR(y, x, va, vb, alpha.ml, beta.ml, weights) - hessian = Hrr$hessian - p0 = Hrr$p0; p1 = Hrr$p1; pA = Hrr$pA - dpsi0.by.dtheta = Hrr$dpsi0.by.dtheta - dpsi0.by.dphi = Hrr$dpsi0.by.dphi - dtheta.by.dalpha = Hrr$dtheta.by.dalpha - dphi.by.dbeta = Hrr$dphi.by.dbeta - dl.by.dpsi0 = Hrr$dl.by.dpsi0 - - ############# extra building blocks ########################## - - H.alpha = y * exp(-x * (as.vector(va %*% alpha.dr))) - - ############# Calculation of optimal vector (used in several places below) ## - - if (optimal == TRUE) { - theta = as.vector(va %*% alpha.ml) # avoid n by 1 matrix - dtheta.by.dalpha.beta = cbind(va, matrix(0, n, length(beta.ml))) - wt = 1/(1 - p0 + (1 - pscore) * (exp(-theta) - 1)) - } else { - wt = rep(1, n) - } - - - ### 2. -E[dU.by.dalphaml.betaml] #################################### - - dU.by.dp0 = -va * wt * (x - pscore) # n by 2 - dp0.by.dpsi0 = p0 - dpsi0.by.dalpha.beta = cbind(dpsi0.by.dtheta * dtheta.by.dalpha, dpsi0.by.dphi * - dphi.by.dbeta) # n by 4 - # 4 = 2 (alpha) + 2 (beta) - dp0.by.dalpha.beta = dpsi0.by.dalpha.beta * dp0.by.dpsi0 # n by 4 - - dU.by.dwt = va * (x - pscore) * (H.alpha - p0) # n by 2 - dwt.by.dwti = -wt^2 # n - # wti is short for wt_inv - dU.by.dwti = dU.by.dwt * dwt.by.dwti # n by 2 - if (optimal == TRUE) { - dwti.by.dalpha.beta = -dp0.by.dalpha.beta - (1 - pscore) * exp(-theta) * - dtheta.by.dalpha.beta # n by 4 - } else { - dwti.by.dalpha.beta = matrix(0, n, ncol(va) + ncol(vb)) - } - - dU.by.dalpha.ml.beta.ml = t(dU.by.dp0 * weights) %*% dp0.by.dalpha.beta + - t(dU.by.dwti * weights) %*% dwti.by.dalpha.beta - - - ### 3. tau = -E[dU/dalpha.dr] ######################################## (This - ### is the bread of the sandwich estimate) - - dU.by.dH = va * wt * (x - pscore) # n by 2 - dH.by.dalpha.dr = -va * x * H.alpha # n by 2 - - tau = -t(dU.by.dH * weights) %*% dH.by.dalpha.dr/sum(weights) # 2 by 2 - - - ### 4. E[d(prop score score equation)/dgamma] - - dpscore.by.dgamma = vc * pscore * (1 - pscore) # n by 2 - part4 = -t(vc * weights) %*% dpscore.by.dgamma # 2 by 2 - - - ### 5. E[dU/dgamma] - - dU.by.dpscore = -va * wt * (H.alpha - p0) # n by 2 - - if (optimal == TRUE) { - dwti.by.dpscore = 1 - exp(-theta) # n - dwti.by.dgamma = dpscore.by.dgamma * dwti.by.dpscore # n by 2 - } else { - dwti.by.dgamma = matrix(0, n, ncol(vc)) - } - - dU.by.dgamma = t(dU.by.dpscore * weights) %*% dpscore.by.dgamma + t(dU.by.dwti * - weights) %*% dwti.by.dgamma # 2 by 2 - - - - ############################################################################# Assembling semi-parametric variance matrix - - U = va * wt * (x - pscore) * (H.alpha - p0) # n by 2 - - S = cbind(dl.by.dpsi0 * (dpsi0.by.dtheta + x) * dtheta.by.dalpha, dl.by.dpsi0 * - dpsi0.by.dphi * dphi.by.dbeta) - pscore.score = vc * (x - pscore) - - Utilde = U - t(dU.by.dalpha.ml.beta.ml %*% (-solve(hessian)) %*% t(S)) - - t(dU.by.dgamma %*% (solve(part4)) %*% t(pscore.score)) # n by 2 - USigma = t(Utilde * weights) %*% Utilde/sum(weights) - - - - ################################### Asymptotic var matrix for alpha.dr - - alpha.dr.variance = solve(tau) %*% USigma %*% solve(tau)/sum(weights) - - return(alpha.dr.variance) - -} +## Sandwich estimator for variance of RR + +var.rr.dr <- function( + y, x, va, vb, vc, alpha.dr, alpha.ml, beta.ml, gamma, + optimal, weights) { + ######################################## + + pscore <- as.vector(expit(vc %*% gamma)) + n <- length(pscore) + + ### 1. - E[dS/d(alpha.ml,beta.ml)] ############################## Computing + ### the Hessian: + + Hrr <- Hessian2RR(y, x, va, vb, alpha.ml, beta.ml, weights) + hessian <- Hrr$hessian + p0 <- Hrr$p0 + p1 <- Hrr$p1 + pA <- Hrr$pA + dpsi0.by.dtheta <- Hrr$dpsi0.by.dtheta + dpsi0.by.dphi <- Hrr$dpsi0.by.dphi + dtheta.by.dalpha <- Hrr$dtheta.by.dalpha + dphi.by.dbeta <- Hrr$dphi.by.dbeta + dl.by.dpsi0 <- Hrr$dl.by.dpsi0 + + ############# extra building blocks ########################## + + H.alpha <- y * exp(-x * (as.vector(va %*% alpha.dr))) + + ############# Calculation of optimal vector (used in several places below) ## + + if (optimal == TRUE) { + theta <- as.vector(va %*% alpha.ml) # avoid n by 1 matrix + dtheta.by.dalpha.beta <- cbind(va, matrix(0, n, length(beta.ml))) + wt <- 1 / (1 - p0 + (1 - pscore) * (exp(-theta) - 1)) + } else { + wt <- rep(1, n) + } + + + ### 2. -E[dU.by.dalphaml.betaml] #################################### + + dU.by.dp0 <- -va * wt * (x - pscore) # n by 2 + dp0.by.dpsi0 <- p0 + dpsi0.by.dalpha.beta <- cbind(dpsi0.by.dtheta * dtheta.by.dalpha, dpsi0.by.dphi * + dphi.by.dbeta) # n by 4 + # 4 = 2 (alpha) + 2 (beta) + dp0.by.dalpha.beta <- dpsi0.by.dalpha.beta * dp0.by.dpsi0 # n by 4 + + dU.by.dwt <- va * (x - pscore) * (H.alpha - p0) # n by 2 + dwt.by.dwti <- -wt^2 # n + # wti is short for wt_inv + dU.by.dwti <- dU.by.dwt * dwt.by.dwti # n by 2 + if (optimal == TRUE) { + dwti.by.dalpha.beta <- -dp0.by.dalpha.beta - (1 - pscore) * exp(-theta) * + dtheta.by.dalpha.beta # n by 4 + } else { + dwti.by.dalpha.beta <- matrix(0, n, ncol(va) + ncol(vb)) + } + + dU.by.dalpha.ml.beta.ml <- t(dU.by.dp0 * weights) %*% dp0.by.dalpha.beta + + t(dU.by.dwti * weights) %*% dwti.by.dalpha.beta + + + ### 3. tau = -E[dU/dalpha.dr] ######################################## (This + ### is the bread of the sandwich estimate) + + dU.by.dH <- va * wt * (x - pscore) # n by 2 + dH.by.dalpha.dr <- -va * x * H.alpha # n by 2 + + tau <- -t(dU.by.dH * weights) %*% dH.by.dalpha.dr / sum(weights) # 2 by 2 + + + ### 4. E[d(prop score score equation)/dgamma] + + dpscore.by.dgamma <- vc * pscore * (1 - pscore) # n by 2 + part4 <- -t(vc * weights) %*% dpscore.by.dgamma # 2 by 2 + + + ### 5. E[dU/dgamma] + + dU.by.dpscore <- -va * wt * (H.alpha - p0) # n by 2 + + if (optimal == TRUE) { + dwti.by.dpscore <- 1 - exp(-theta) # n + dwti.by.dgamma <- dpscore.by.dgamma * dwti.by.dpscore # n by 2 + } else { + dwti.by.dgamma <- matrix(0, n, ncol(vc)) + } + + dU.by.dgamma <- t(dU.by.dpscore * weights) %*% dpscore.by.dgamma + t(dU.by.dwti * + weights) %*% dwti.by.dgamma # 2 by 2 + + + + ############################################################################# Assembling semi-parametric variance matrix + + U <- va * wt * (x - pscore) * (H.alpha - p0) # n by 2 + + S <- cbind(dl.by.dpsi0 * (dpsi0.by.dtheta + x) * dtheta.by.dalpha, dl.by.dpsi0 * + dpsi0.by.dphi * dphi.by.dbeta) + pscore.score <- vc * (x - pscore) + + Utilde <- U - t(dU.by.dalpha.ml.beta.ml %*% (-solve(hessian)) %*% t(S)) - + t(dU.by.dgamma %*% (solve(part4)) %*% t(pscore.score)) # n by 2 + USigma <- t(Utilde * weights) %*% Utilde / sum(weights) + + + + ################################### Asymptotic var matrix for alpha.dr + + alpha.dr.variance <- solve(tau) %*% USigma %*% solve(tau) / sum(weights) + + return(alpha.dr.variance) +} diff --git a/R/2_CallDR.R b/R/2_CallDR.R index 6ca8816..113bd52 100644 --- a/R/2_CallDR.R +++ b/R/2_CallDR.R @@ -1,31 +1,39 @@ - -DREst = function(param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, - weights, max.step, thres, alpha.start, beta.cov, gamma.cov, message) { - - dr.est = dr.estimate.noiterate(param, y, x, va, vb, vc, alpha.ml, beta.ml, - gamma, optimal, weights, max.step, thres, alpha.start, message) - point.est = dr.est$par - converged = dr.est$convergence - - if (param == "RR") - alpha.cov = var.rr.dr(y, x, va, vb, vc, point.est, alpha.ml, beta.ml, - gamma, optimal, weights) - if (param == "RD") - alpha.cov = var.rd.dr(y, x, va, vb, vc, point.est, alpha.ml, beta.ml, - gamma, optimal, weights) - - pa = dim(va)[2] - pb = dim(vb)[2] - pc = dim(vc)[2] - name = paste(c(rep("alpha", pa), rep("beta", pb), rep("gamma", pc)), - c(1:pa, 1:pb, 1:pc)) - point.est = c(point.est, beta.ml, gamma) - cov = matrix(NA,pa+pb+pc, pa+pb+pc) - cov[1:pa,1:pa] = alpha.cov - cov[(pa+1):(pa+pb),(pa+1):(pa+pb)] = beta.cov - cov[(pa+pb+1):(pa+pb+pc),(pa+pb+1):(pa+pb+pc)] = gamma.cov - - sol = WrapResults(point.est, cov, param, name, va, vb, converged) - return(sol) - -} +DREst <- function( + param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, + weights, max.step, thres, alpha.start, beta.cov, gamma.cov, message) { + dr.est <- dr.estimate.noiterate( + param, y, x, va, vb, vc, alpha.ml, beta.ml, + gamma, optimal, weights, max.step, thres, alpha.start, message + ) + point.est <- dr.est$par + converged <- dr.est$convergence + + if (param == "RR") { + alpha.cov <- var.rr.dr( + y, x, va, vb, vc, point.est, alpha.ml, beta.ml, + gamma, optimal, weights + ) + } + if (param == "RD") { + alpha.cov <- var.rd.dr( + y, x, va, vb, vc, point.est, alpha.ml, beta.ml, + gamma, optimal, weights + ) + } + + pa <- dim(va)[2] + pb <- dim(vb)[2] + pc <- dim(vc)[2] + name <- paste( + c(rep("alpha", pa), rep("beta", pb), rep("gamma", pc)), + c(1:pa, 1:pb, 1:pc) + ) + point.est <- c(point.est, beta.ml, gamma) + cov <- matrix(NA, pa + pb + pc, pa + pb + pc) + cov[1:pa, 1:pa] <- alpha.cov + cov[(pa + 1):(pa + pb), (pa + 1):(pa + pb)] <- beta.cov + cov[(pa + pb + 1):(pa + pb + pc), (pa + pb + 1):(pa + pb + pc)] <- gamma.cov + + sol <- WrapResults(point.est, cov, param, name, va, vb, converged) + return(sol) +} diff --git a/R/MyFunc.R b/R/MyFunc.R index 2c95ee1..7462499 100644 --- a/R/MyFunc.R +++ b/R/MyFunc.R @@ -1,71 +1,74 @@ -logit = function(prob) { - log(prob) - log(1 - prob) -} - -expit = function(logodds) { - 1/(1 + exp(-logodds)) -} - - -getlogop = function(p0, p1) { - log(p0) + log(p1) - log(1 - p0) - log(1 - p1) -} - -getlogrr = function(p0, p1) { - log(p1) - log(p0) -} - -getatanhrd = function(p0, p1) { - atanh(p1 - p0) -} - - -## Function for checking if two things are equal within numerical precision -same = function(x, y, tolerance = .Machine$double.eps^0.5) { - abs(x - y) < tolerance -} - - -## Functions for wrapping estimation results into a nice format -WrapResults = function(point.est, cov, param, name, va, vb, converged) { - - se.est = sqrt(diag(cov)) - - conf.lower = point.est + stats::qnorm(0.025) * se.est - conf.upper = point.est + stats::qnorm(0.975) * se.est - p.temp = stats::pnorm(point.est/se.est, 0, 1) - p.value = 2 * pmin(p.temp, 1 - p.temp) - - names(point.est) = names(se.est) = rownames(cov) = colnames(cov) = names(conf.lower) = names(conf.upper) = names(p.value) = name - - coefficients = cbind(point.est, se.est, conf.lower, conf.upper, p.value) - - linear.predictors = va %*% point.est[1:ncol(va)] - if(param=="RR") param.est = exp(linear.predictors) - if(param=="RD") param.est = linear.predictors - if(param=="OR") param.est = expit(linear.predictors) - - sol = list(param = param, point.est = point.est, se.est = se.est, cov = cov, - conf.lower = conf.lower, conf.upper = conf.upper, p.value = p.value, - coefficients = coefficients, param.est = param.est, va = va, vb = vb, - converged = converged) - class(sol) = c("brm", "list") - attr(sol, "hidden") = c("param", "se.est", "cov", "conf.lower", "conf.upper", - "p.value","coefficients", "param.est", "va", "vb", "converged") - - return(sol) - -} - - -## This function is useful for finding limits on the boundary -## It gives 0.5*exp(x)*(-1+sqrt(1+4exp(-x))) -## This is bounded between 0 and 1, and takes value (-1+sqrt(5))/2 at x=0 (Some relation to golden ratio) -## Limits are 0 and 1 as x goes to -infty and +infty respectively -## The function will never return NaN given a numerical input - -getPrbAux = function(x) { - ifelse((x < 17) & (x > (-500)), - 0.5 * exp(x) * (-1 + (1 + 4 * exp(-x))^0.5), - ifelse(x<0, 0, 1)) -} +logit <- function(prob) { + log(prob) - log(1 - prob) +} + +expit <- function(logodds) { + 1 / (1 + exp(-logodds)) +} + + +getlogop <- function(p0, p1) { + log(p0) + log(p1) - log(1 - p0) - log(1 - p1) +} + +getlogrr <- function(p0, p1) { + log(p1) - log(p0) +} + +getatanhrd <- function(p0, p1) { + atanh(p1 - p0) +} + + +## Function for checking if two things are equal within numerical precision +same <- function(x, y, tolerance = .Machine$double.eps^0.5) { + abs(x - y) < tolerance +} + + +## Functions for wrapping estimation results into a nice format +WrapResults <- function(point.est, cov, param, name, va, vb, converged) { + se.est <- sqrt(diag(cov)) + + conf.lower <- point.est + stats::qnorm(0.025) * se.est + conf.upper <- point.est + stats::qnorm(0.975) * se.est + p.temp <- stats::pnorm(point.est / se.est, 0, 1) + p.value <- 2 * pmin(p.temp, 1 - p.temp) + + names(point.est) <- names(se.est) <- rownames(cov) <- colnames(cov) <- names(conf.lower) <- names(conf.upper) <- names(p.value) <- name + + coefficients <- cbind(point.est, se.est, conf.lower, conf.upper, p.value) + + linear.predictors <- va %*% point.est[1:ncol(va)] + if (param == "RR") param.est <- exp(linear.predictors) + if (param == "RD") param.est <- linear.predictors + if (param == "OR") param.est <- expit(linear.predictors) + + sol <- list( + param = param, point.est = point.est, se.est = se.est, cov = cov, + conf.lower = conf.lower, conf.upper = conf.upper, p.value = p.value, + coefficients = coefficients, param.est = param.est, va = va, vb = vb, + converged = converged + ) + class(sol) <- c("brm", "list") + attr(sol, "hidden") <- c( + "param", "se.est", "cov", "conf.lower", "conf.upper", + "p.value", "coefficients", "param.est", "va", "vb", "converged" + ) + + return(sol) +} + + +## This function is useful for finding limits on the boundary +## It gives 0.5*exp(x)*(-1+sqrt(1+4exp(-x))) +## This is bounded between 0 and 1, and takes value (-1+sqrt(5))/2 at x=0 (Some relation to golden ratio) +## Limits are 0 and 1 as x goes to -infty and +infty respectively +## The function will never return NaN given a numerical input + +getPrbAux <- function(x) { + ifelse((x < 17) & (x > (-500)), + 0.5 * exp(x) * (-1 + (1 + 4 * exp(-x))^0.5), + ifelse(x < 0, 0, 1) + ) +} diff --git a/R/Print.R b/R/Print.R index 4b5fe02..6baab87 100644 --- a/R/Print.R +++ b/R/Print.R @@ -1,41 +1,46 @@ - -#' Ancillary function for printing -#' -#' @param x a list obtained with the function 'brm' -#' -#' @param ... additional arguments affecting the output -#' -#' @export - -print.brm = function(x, ...) { - hid = attr(x, "hidden") - nhid = which(!names(x) %in% hid) - - if (x$param == "RR") { - cat("Parameter of interest: (conditional) relative risk;", "\n", "nuisance parameter: odds product.", - "\n\n", sep = "") - cat("Target model: log(RR) = alpha * va", "\n") - cat("Nuisance model: log(OP) = beta * vb", "\n\n") - } - if (x$param == "RD") { - cat("Parameter of interest: (conditional) risk difference;", "\n", - "nuisance parameter: odds product.", "\n\n", sep = "") - cat("Target model: log(RD) = alpha * va", "\n") - cat("Nuisance model: log(OP) = beta * vb", "\n\n") - } - if (x$param == "OR") { - cat("Parameter of interest: (conditional) odds ratio;", "\n", "nuisance parameter: baseline risk.", - "\n\n", sep = "") - cat("Target model: log(OR) = alpha * va", "\n") - cat("Nuisance model: log(p0) = beta * vb", "\n\n") - } - - - for (i in nhid) { - x[[i]] = round(x[[i]], 3) - } - - print(x[nhid], 3) - - cat("See the element '$coefficients' for more information.\n") -} +#' Ancillary function for printing +#' +#' @param x a list obtained with the function 'brm' +#' +#' @param ... additional arguments affecting the output +#' +#' @export + +print.brm <- function(x, ...) { + hid <- attr(x, "hidden") + nhid <- which(!names(x) %in% hid) + + if (x$param == "RR") { + cat("Parameter of interest: (conditional) relative risk;", "\n", "nuisance parameter: odds product.", + "\n\n", + sep = "" + ) + cat("Target model: log(RR) = alpha * va", "\n") + cat("Nuisance model: log(OP) = beta * vb", "\n\n") + } + if (x$param == "RD") { + cat("Parameter of interest: (conditional) risk difference;", "\n", + "nuisance parameter: odds product.", "\n\n", + sep = "" + ) + cat("Target model: log(RD) = alpha * va", "\n") + cat("Nuisance model: log(OP) = beta * vb", "\n\n") + } + if (x$param == "OR") { + cat("Parameter of interest: (conditional) odds ratio;", "\n", "nuisance parameter: baseline risk.", + "\n\n", + sep = "" + ) + cat("Target model: log(OR) = alpha * va", "\n") + cat("Nuisance model: log(p0) = beta * vb", "\n\n") + } + + + for (i in nhid) { + x[[i]] <- round(x[[i]], 3) + } + + print(x[nhid], 3) + + cat("See the element '$coefficients' for more information.\n") +} diff --git a/R/ValidCheck.R b/R/ValidCheck.R index 3b74d4c..8814c2c 100644 --- a/R/ValidCheck.R +++ b/R/ValidCheck.R @@ -1,36 +1,48 @@ - -ValidCheck = function(param, y, x, va, vb, vc, weights, subset, est.method, - optimal, max.step, thres, alpha.start, beta.start) { - if (!is.character(param)) - stop("Parameter must be a character") - if (!(param %in% c("RD", "RR", "OR"))) - stop("Parameter can only take RR, RD or OR") - - if (sum(is.na(y)) + sum(is.na(x)) + sum(is.na(va)) + sum(is.na(vb)) + - sum(is.na(vc)) + sum(is.na(weights)) > 0) - warning("Observations with missing values will be removed.") - if (!(all(y %in% c(0, 1)))) - stop("y values must be either 0 or 1.") - if (!(all(x %in% c(0, 1)))) - stop("x values must be either 0 or 1.") - if (!identical(length(y), length(x), dim(va)[1], dim(vb)[1], dim(vc)[1])) - stop("y, x and v must have the same length (dimension)") - - if(!is.numeric(weights)) - stop("weights must either be NULL or take numerical values") - if(!is.numeric(subset)) - stop("subset must either be NULL or take numerical values") - if (!(est.method %in% c("MLE", "DR"))) - stop("Must use MLE or DR for estimation") - if (!is.logical(optimal)) - stop("optimal must be a logical variable") - if(!is.numeric(max.step) & !is.null(max.step)) - stop("max.step must be a number") - if(!is.numeric(thres)) - stop("thres must be a number") - if(!is.null(alpha.start) & length(alpha.start) != dim(va)[2]) - stop("length of alpha.start must match the dimension of va") - if(!is.null(beta.start) & length(beta.start) != dim(vb)[2]) - stop("length of beta.start must match the dimension of vb") - -} +ValidCheck <- function(param, y, x, va, vb, vc, weights, subset, est.method, + optimal, max.step, thres, alpha.start, beta.start) { + if (!is.character(param)) { + stop("Parameter must be a character") + } + if (!(param %in% c("RD", "RR", "OR"))) { + stop("Parameter can only take RR, RD or OR") + } + + if (sum(is.na(y)) + sum(is.na(x)) + sum(is.na(va)) + sum(is.na(vb)) + + sum(is.na(vc)) + sum(is.na(weights)) > 0) { + warning("Observations with missing values will be removed.") + } + if (!(all(y %in% c(0, 1)))) { + stop("y values must be either 0 or 1.") + } + if (!(all(x %in% c(0, 1)))) { + stop("x values must be either 0 or 1.") + } + if (!identical(length(y), length(x), dim(va)[1], dim(vb)[1], dim(vc)[1])) { + stop("y, x and v must have the same length (dimension)") + } + + if (!is.numeric(weights)) { + stop("weights must either be NULL or take numerical values") + } + if (!is.numeric(subset)) { + stop("subset must either be NULL or take numerical values") + } + if (!(est.method %in% c("MLE", "DR"))) { + stop("Must use MLE or DR for estimation") + } + if (!is.logical(optimal)) { + stop("optimal must be a logical variable") + } + if (!is.numeric(max.step) & !is.null(max.step)) { + stop("max.step must be a number") + } + if (!is.numeric(thres)) { + stop("thres must be a number") + } + if (!is.null(alpha.start) & length(alpha.start) != dim(va)[2]) { + stop("length of alpha.start must match the dimension of va") + } + if (!is.null(beta.start) & length(beta.start) != dim(vb)[2]) { + stop("length of beta.start must match the dimension of vb") + } +} diff --git a/R/brm.R b/R/brm.R index 3373435..9505ba9 100644 --- a/R/brm.R +++ b/R/brm.R @@ -1,20 +1,20 @@ -#' @title Fitting Binary Regression Models -#' -#' @description The function \code{brm} in this package provides an alternative to generalized linear models for fitting binary regression models, in which both the response \eqn{y} and the primary exposure of interest \eqn{x} are binary. This is especially useful if the interest lies in estimating the association between \eqn{x} and \eqn{y}, and how that association varies as a function of (other) covariates \eqn{v}. -#' -#' Unlike \code{glm}, which uses a single link function for the outcome, \code{brm} separates the nuisance model from the target model. This separation provides opportunities to choose nuisance models independently of the target model. To see why this is important, we may contrast it with the use of a GLM to model the log relative risk. In this setting one might use a Poisson regression (with interaction term) \eqn{log P(y = 1|x, va, vb) = \alpha * x * va + \beta * vb} (though such a model ignores the fact that \eqn{y} is binary); here \eqn{va} and \eqn{vb} are subsets of \eqn{v}. Such a Poisson model can be seen as a combination of two parts: a target model \eqn{log RR(va) = \alpha * va} and a nuisance model \eqn{log P(y = 1|x = 0, vb) = \beta * vb}. However, this nuisance model is variation dependent of the target model so that predicted probabilities may go outside of \eqn{[0,1]}. Furthermore, one cannot solve this problem under a GLM framework as with a GLM, the target model and nuisance model are determined \emph{simultaneously} through a link function. -#' -#' More specifically, if the target model is a linear model on the conditional log Relative Risk (log RR) or ('logistically' transformed) conditional Risk Difference (atanh RD), \code{brm} fits a linear nuisance model for the conditional log Odds Product (log OP). If the target model is a linear model on the conditional log Odds Ratio (log OR), \code{brm} fits a linear nuisance model on the conditional logit baseline risk, logit P(y = 1|x = 0, vb). Note in this case the target and nuisance models combine to form a simple logistic regression model (which is fitted using \code{glm}). -#' -#' \code{brm} fits the three target models described above as they are simple and the parameter space is unconstrained. \code{brm} fits the nuisance models above as they are variation independent of the corresponding target model. This variation independence greatly facilitates parameter estimation and interpretation. -#' -#' \code{brm} also provides doubly robust fitting as an option such that the estimates for \eqn{\alpha} are still consistent and asymptotically normal even when the nuisance model is misspecified, provided that we have a correctly specified logistic model for the exposure probability \eqn{P(x=1|v)}. Such doubly robust estimation is only possible for the Relative Risk and Risk Difference, but not the Odds Ratio. -#' -#' See Richardson et al. (2017) for more details. -#' -#' @references Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimation for the Relative Risk and Risk Difference." Journal of the American Statistical Association: Theory and Methods (2017). -#' -#' -#' @docType package -#' @name brm-package -NULL \ No newline at end of file +#' @title Fitting Binary Regression Models +#' +#' @description The function \code{brm} in this package provides an alternative to generalized linear models for fitting binary regression models, in which both the response \eqn{y} and the primary exposure of interest \eqn{x} are binary. This is especially useful if the interest lies in estimating the association between \eqn{x} and \eqn{y}, and how that association varies as a function of (other) covariates \eqn{v}. +#' +#' Unlike \code{glm}, which uses a single link function for the outcome, \code{brm} separates the nuisance model from the target model. This separation provides opportunities to choose nuisance models independently of the target model. To see why this is important, we may contrast it with the use of a GLM to model the log relative risk. In this setting one might use a Poisson regression (with interaction term) \eqn{log P(y = 1|x, va, vb) = \alpha * x * va + \beta * vb} (though such a model ignores the fact that \eqn{y} is binary); here \eqn{va} and \eqn{vb} are subsets of \eqn{v}. Such a Poisson model can be seen as a combination of two parts: a target model \eqn{log RR(va) = \alpha * va} and a nuisance model \eqn{log P(y = 1|x = 0, vb) = \beta * vb}. However, this nuisance model is variation dependent of the target model so that predicted probabilities may go outside of \eqn{[0,1]}. Furthermore, one cannot solve this problem under a GLM framework as with a GLM, the target model and nuisance model are determined \emph{simultaneously} through a link function. +#' +#' More specifically, if the target model is a linear model on the conditional log Relative Risk (log RR) or ('logistically' transformed) conditional Risk Difference (atanh RD), \code{brm} fits a linear nuisance model for the conditional log Odds Product (log OP). If the target model is a linear model on the conditional log Odds Ratio (log OR), \code{brm} fits a linear nuisance model on the conditional logit baseline risk, logit P(y = 1|x = 0, vb). Note in this case the target and nuisance models combine to form a simple logistic regression model (which is fitted using \code{glm}). +#' +#' \code{brm} fits the three target models described above as they are simple and the parameter space is unconstrained. \code{brm} fits the nuisance models above as they are variation independent of the corresponding target model. This variation independence greatly facilitates parameter estimation and interpretation. +#' +#' \code{brm} also provides doubly robust fitting as an option such that the estimates for \eqn{\alpha} are still consistent and asymptotically normal even when the nuisance model is misspecified, provided that we have a correctly specified logistic model for the exposure probability \eqn{P(x=1|v)}. Such doubly robust estimation is only possible for the Relative Risk and Risk Difference, but not the Odds Ratio. +#' +#' See Richardson et al. (2017) for more details. +#' +#' @references Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimation for the Relative Risk and Risk Difference." Journal of the American Statistical Association: Theory and Methods (2017). +#' +#' +#' @docType package +#' @name brm-package +NULL diff --git a/R/getProbScalarRD.R b/R/getProbScalarRD.R index 2dfcada..5125695 100644 --- a/R/getProbScalarRD.R +++ b/R/getProbScalarRD.R @@ -1,99 +1,102 @@ - -#' Calculate risks from arctanh RD and log OP -#' -#' @param atanhrd arctanh of risk difference -#' -#' @param logop log of odds product -#' -#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. -#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. -#' -#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))} -#' -#' @examples getProbScalarRD(0,0) -#' -#' set.seed(0) -#' logrr = rnorm(10,0,1) -#' logop = rnorm(10,0,1) -#' probs = mapply(getProbScalarRD, logrr, logop) -#' rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)") -#' probs -#' -#' @export - - -getProbScalarRD = function(atanhrd, logop) { - - if(length(atanhrd) == 2){ - logop = atanhrd[2] - atanhrd = atanhrd[1] - } - - if (logop > 350) { - if (atanhrd < 0) { - p0 = 1 - p1 = p0 + tanh(atanhrd) - } else { - p1 = 1 - p0 = p1 - tanh(atanhrd) - } - } else { - ## not on boundary logop = 0; solving linear equations - if (same(logop, 0)) { - p0 = 0.5 * (1 - tanh(atanhrd)) - } else { - p0 = (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * - (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * - (1 - tanh(atanhrd)) * (1 - exp(logop))))/(2 * (exp(logop) - - 1)) - } - p1 = p0 + tanh(atanhrd) - } - return(c(p0, p1)) -} - -#' Calculate risks from arctanh RD and log OP (vectorised) -#' -#' @param atanhrd arctanh of risk difference -#' -#' @param logop log of odds product -#' -#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. -#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. -#' -#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns -#' -#' @examples getProbRD(0,0) -#' -#' set.seed(0) -#' logrr = rnorm(10,0,1) -#' logop = rnorm(10,0,1) -#' probs = getProbRD(logrr, logop) -#' colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)") -#' probs -#' -#' @export -getProbRD = function(atanhrd, logop) { - if(is.matrix(atanhrd) && ncol(atanhrd) == 2){ - logop = atanhrd[,2] - atanhrd = atanhrd[,1] - } else if(length(logop)==1 && is.na(logop) && length(atanhrd) == 2){ - logop = atanhrd[2] - atanhrd = atanhrd[1] - } - p0 <- ifelse (logop > 350, - ifelse(atanhrd < 0, - 1, - 1 - tanh(atanhrd)), - ## not on boundary logop = 0; solving linear equations - ifelse(same(logop, 0), - 0.5 * (1 - tanh(atanhrd)), - (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * (1 - tanh(atanhrd)) * (1 - exp(logop))))/(2 * (exp(logop) - 1)))) - p1 <- ifelse (logop > 350, - ifelse(atanhrd < 0, - 1 + tanh(atanhrd), - 1), - ## not on boundary logop = 0 - p0 + tanh(atanhrd)) - cbind(p0,p1) -} +#' Calculate risks from arctanh RD and log OP +#' +#' @param atanhrd arctanh of risk difference +#' +#' @param logop log of odds product +#' +#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. +#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. +#' +#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))} +#' +#' @examples getProbScalarRD(0, 0) +#' +#' set.seed(0) +#' logrr <- rnorm(10, 0, 1) +#' logop <- rnorm(10, 0, 1) +#' probs <- mapply(getProbScalarRD, logrr, logop) +#' rownames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)") +#' probs +#' +#' @export + + +getProbScalarRD <- function(atanhrd, logop) { + if (length(atanhrd) == 2) { + logop <- atanhrd[2] + atanhrd <- atanhrd[1] + } + + if (logop > 350) { + if (atanhrd < 0) { + p0 <- 1 + p1 <- p0 + tanh(atanhrd) + } else { + p1 <- 1 + p0 <- p1 - tanh(atanhrd) + } + } else { + ## not on boundary logop = 0; solving linear equations + if (same(logop, 0)) { + p0 <- 0.5 * (1 - tanh(atanhrd)) + } else { + p0 <- (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * + (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * + (1 - tanh(atanhrd)) * (1 - exp(logop)))) / (2 * (exp(logop) - + 1)) + } + p1 <- p0 + tanh(atanhrd) + } + return(c(p0, p1)) +} + +#' Calculate risks from arctanh RD and log OP (vectorised) +#' +#' @param atanhrd arctanh of risk difference +#' +#' @param logop log of odds product +#' +#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. +#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. +#' +#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns +#' +#' @examples getProbRD(0, 0) +#' +#' set.seed(0) +#' logrr <- rnorm(10, 0, 1) +#' logop <- rnorm(10, 0, 1) +#' probs <- getProbRD(logrr, logop) +#' colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)") +#' probs +#' +#' @export +getProbRD <- function(atanhrd, logop) { + if (is.matrix(atanhrd) && ncol(atanhrd) == 2) { + logop <- atanhrd[, 2] + atanhrd <- atanhrd[, 1] + } else if (length(logop) == 1 && is.na(logop) && length(atanhrd) == 2) { + logop <- atanhrd[2] + atanhrd <- atanhrd[1] + } + p0 <- ifelse(logop > 350, + ifelse(atanhrd < 0, + 1, + 1 - tanh(atanhrd) + ), + ## not on boundary logop = 0; solving linear equations + ifelse(same(logop, 0), + 0.5 * (1 - tanh(atanhrd)), + (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * (1 - tanh(atanhrd)) * (1 - exp(logop)))) / (2 * (exp(logop) - 1)) + ) + ) + p1 <- ifelse(logop > 350, + ifelse(atanhrd < 0, + 1 + tanh(atanhrd), + 1 + ), + ## not on boundary logop = 0 + p0 + tanh(atanhrd) + ) + cbind(p0, p1) +} diff --git a/R/getProbScalarRR.R b/R/getProbScalarRR.R index 6949c62..afd1354 100644 --- a/R/getProbScalarRR.R +++ b/R/getProbScalarRR.R @@ -1,117 +1,121 @@ - - -#' Calculate risks from log RR and log OP -#' -#' @param logrr log of relative risk -#' -#' @param logop log of odds product -#' -#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. -#' -#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))} -#' -#' @examples getProbScalarRR(0,0) -#' -#' set.seed(0) -#' logrr = rnorm(10,0,1) -#' logop = rnorm(10,0,1) -#' probs = mapply(getProbScalarRR, logrr, logop) -#' rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)") -#' probs -#' -#' @export - -getProbScalarRR = function(logrr, logop = NA) { - - if(length(logrr) == 2){ - logop = logrr[2] - logrr = logrr[1] - } - - if ((logop < (-12)) || (logop > 12) || (logrr < (-12)) || (logrr > 12)) { - # on the boundary South edge: large -ve logrr or (large -ve logop and -ve - # logrr) - if ((logrr < (-12)) || ((logop < (-12)) && (logrr < 0))) { - p0 = getPrbAux(logop - logrr) - p1 = 0 - } else { - if ((logrr > 12) || ((logop < (-12)) && (logrr > 0))) { - # West edge: large +ve logrr or (large -ve logop and +ve logrr) - p0 = 0 - p1 = getPrbAux(logrr + logop) - } else { - # North or East edges (=logop is large +ve) - p0 = min(exp(-logrr), 1) - p1 = min(exp(logrr), 1) - } - } - } else { - # not on the boundary logop = 0; solving linear equations logop not 0; - # solving a quadratic equation - if (same(logop, 0)) { - p0 = 1/(1 + exp(logrr)) - } else { - p0 = (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + - 1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop))))/(2 * exp(logrr) * - (1 - exp(logop))) - } - p1 = exp(logrr) * p0 - } - return(c(p0, p1)) -} - -#' Calculate risks from log RR and log OP (vectorised) -#' -#' @param logrr log of relative risk -#' -#' @param logop log of odds product -#' -#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. -#' -#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns -#' -#' @examples getProbRR(0,0) -#' -#' set.seed(0) -#' logrr = rnorm(10,0,1) -#' logop = rnorm(10,0,1) -#' probs = getProbRR(logrr, logop) -#' colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)") -#' probs -#' -#' @export -getProbRR = function(logrr, logop = NA) { - if(is.matrix(logrr) && ncol(logrr) == 2){ - logop = logrr[,2] - logrr = logrr[,1] - } else if(length(logop)==1 && is.na(logop) && length(logrr) == 2){ - logop = logrr[2] - logrr = logrr[1] - } - p0 <- ifelse((logop < (-12)) | (logop > 12) | (logrr < (-12)) | (logrr > 12), - ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve - ## logrr) - ifelse ((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)), - getPrbAux(logop-logrr), - ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)), - ## West edge: large +ve logrr or (large -ve logop and +ve logrr) - 0, - pmin(exp(-logrr), 1))), - ## not on the boundary logop = 0; solving linear equations logop not 0; - ## solving a quadratic equation - ifelse(same(logop, 0), - 1/(1 + exp(logrr)), - (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + 1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop))))/(2 * exp(logrr) * (1 - exp(logop))))) - p1 <- ifelse((logop < (-12)) | (logop > 12) | (logrr < (-12)) | (logrr > 12), - ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve - ## logrr - ifelse ((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)), - 0, - ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)), - ## West edge: large +ve logrr or (large -ve logop and +ve logrr) - getPrbAux(logop + logrr), - pmin(exp(logrr), 1))), - ## not on the boundary logop = 0 - exp(logrr) * p0) - cbind(p0,p1) -} +#' Calculate risks from log RR and log OP +#' +#' @param logrr log of relative risk +#' +#' @param logop log of odds product +#' +#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. +#' +#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))} +#' +#' @examples getProbScalarRR(0, 0) +#' +#' set.seed(0) +#' logrr <- rnorm(10, 0, 1) +#' logop <- rnorm(10, 0, 1) +#' probs <- mapply(getProbScalarRR, logrr, logop) +#' rownames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)") +#' probs +#' +#' @export + +getProbScalarRR <- function(logrr, logop = NA) { + if (length(logrr) == 2) { + logop <- logrr[2] + logrr <- logrr[1] + } + + if ((logop < (-12)) || (logop > 12) || (logrr < (-12)) || (logrr > 12)) { + # on the boundary South edge: large -ve logrr or (large -ve logop and -ve + # logrr) + if ((logrr < (-12)) || ((logop < (-12)) && (logrr < 0))) { + p0 <- getPrbAux(logop - logrr) + p1 <- 0 + } else { + if ((logrr > 12) || ((logop < (-12)) && (logrr > 0))) { + # West edge: large +ve logrr or (large -ve logop and +ve logrr) + p0 <- 0 + p1 <- getPrbAux(logrr + logop) + } else { + # North or East edges (=logop is large +ve) + p0 <- min(exp(-logrr), 1) + p1 <- min(exp(logrr), 1) + } + } + } else { + # not on the boundary logop = 0; solving linear equations logop not 0; + # solving a quadratic equation + if (same(logop, 0)) { + p0 <- 1 / (1 + exp(logrr)) + } else { + p0 <- (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + + 1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop)))) / (2 * exp(logrr) * + (1 - exp(logop))) + } + p1 <- exp(logrr) * p0 + } + return(c(p0, p1)) +} + +#' Calculate risks from log RR and log OP (vectorised) +#' +#' @param logrr log of relative risk +#' +#' @param logop log of odds product +#' +#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. +#' +#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns +#' +#' @examples getProbRR(0, 0) +#' +#' set.seed(0) +#' logrr <- rnorm(10, 0, 1) +#' logop <- rnorm(10, 0, 1) +#' probs <- getProbRR(logrr, logop) +#' colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)") +#' probs +#' +#' @export +getProbRR <- function(logrr, logop = NA) { + if (is.matrix(logrr) && ncol(logrr) == 2) { + logop <- logrr[, 2] + logrr <- logrr[, 1] + } else if (length(logop) == 1 && is.na(logop) && length(logrr) == 2) { + logop <- logrr[2] + logrr <- logrr[1] + } + p0 <- ifelse((logop < (-12)) | (logop > 12) | (logrr < (-12)) | (logrr > 12), + ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve + ## logrr) + ifelse((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)), + getPrbAux(logop - logrr), + ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)), + ## West edge: large +ve logrr or (large -ve logop and +ve logrr) + 0, + pmin(exp(-logrr), 1) + ) + ), + ## not on the boundary logop = 0; solving linear equations logop not 0; + ## solving a quadratic equation + ifelse(same(logop, 0), + 1 / (1 + exp(logrr)), + (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + 1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop)))) / (2 * exp(logrr) * (1 - exp(logop))) + ) + ) + p1 <- ifelse((logop < (-12)) | (logop > 12) | (logrr < (-12)) | (logrr > 12), + ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve + ## logrr + ifelse((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)), + 0, + ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)), + ## West edge: large +ve logrr or (large -ve logop and +ve logrr) + getPrbAux(logop + logrr), + pmin(exp(logrr), 1) + ) + ), + ## not on the boundary logop = 0 + exp(logrr) * p0 + ) + cbind(p0, p1) +} diff --git a/R/main.R b/R/main.R index 7bdfa58..c9e218b 100644 --- a/R/main.R +++ b/R/main.R @@ -1,181 +1,199 @@ -#' Fitting Binary Regression Models -#' -#' @description \code{brm} is used to estimate the association between two binary variables, and how that varies as a function of other covariates. -#' -#' @param param The measure of association. Can take value 'RD' (risk difference), -#' 'RR' (relative risk) or 'OR' (odds ratio) -#' -#' @param y The response vector. Should only take values 0 or 1. -#' -#' @param x The exposure vector. Should only take values 0 or 1. -#' -#' @param va The covariates matrix for the target model. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. -#' -#' @param vb The covariates matrix for the nuisance model. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. (If not specified, defaults to va.) -#' -#' @param vc The covariates matrix for the probability of exposure, often called the propensity score. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. By default we fit a logistic regression model for the propensity score. (If not specified, defaults to va.) -#' -#' @param weights An optional vector of 'prior weights' to be used in the fitting process. Should be NULL or a numeric vector. -#' -#' @param subset An optional vector specifying a subset of observations to be used in the fitting process. -#' -#' @param est.method The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation). -#' -#' @param optimal Use the optimal weighting function for the doubly robust estimator? Ignored if the estimation method is 'MLE'. The default is TRUE. -#' -#' @param max.step The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000. -#' -#' @param thres Threshold for judging convergence. The default is 1e-6. -#' -#' @param alpha.start Starting values for the parameters in the target model. -#' -#' @param beta.start Starting values for the parameters in the nuisance model. -#' -#' @param message Show optimization details? Ignored if the estimation method is 'MLE'. The default is FALSE. -#' -#' @details \code{brm} contains two parts: the target model for the dependence measure (RR, RD or OR) and the nuisance model; the latter is required for maximum likelihood estimation. -#'If \code{param="RR"} then the target model is \eqn{log RR(va) = \alpha*va}. -#'If \code{param="RD"} then the target model is \eqn{atanh RD(va) = \alpha*va}. -#'If \code{param="OR"} then the target model is \eqn{log OR(va) = \alpha*va}. -#'For RR and RD, the nuisance model is for the log Odds Product: \eqn{log OP(vb) = \beta*vb}. -#'For OR, the nuisance model is for the baseline risk: \eqn{logit(P(y=1|x=0,vb)) = \beta*vb.} -#' In each case the nuisance model is variation independent of the target model, which ensures that the predicted probabilities lie in \eqn{[0,1]}. -#' See Richardson et al. (2016+) for more details. -#' -#' If \code{est.method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details. -#' -#' When estimating RR and RD, \code{est.method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency. -#' -#' For the doubly robust estimation method, MLE is used to obtain preliminary estimates of \eqn{\alpha}, \eqn{\beta} and \eqn{\gamma}. The estimate of \eqn{\alpha} is then updated by solving a doubly-robust estimating equation. (The estimate for \eqn{\beta} remains the MLE.) -#' -#' @return A list consisting of -#' \item{param}{the measure of association.} -#' -#' \item{point.est}{ the point estimates.} -#' -#' \item{se.est}{the standard error estimates.} -#' -#' \item{cov}{estimate of the covariance matrix for the estimates.} -#' -#' \item{conf.lower}{ the lower limit of the 95\% (marginal) confidence interval.} -#' -#' \item{conf.upper}{ the upper limit of the 95\% (marginal) confidence interval.} -#' -#' \item{p.value}{the two sided p-value for testing zero coefficients.} -#' -#' \item{coefficients}{ the matrix summarizing key information: point estimate, 95\% confidence interval and p-value.} -#' -#' \item{param.est}{the fitted RR/RD/OR.} -#' -#' \item{va}{ the matrix of covariates for the target model.} -#' -#' \item{vb}{ the matrix of covariates for the nuisance model.} -#' -#' \item{converged}{ Did the maximization process converge? } -#' -#' @author Linbo Wang , Mark Clements , Thomas Richardson -#' -#' @references Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimation for the Relative Risk and Risk Difference." Journal of the American Statistical Association: Theory and Methods (2017). -#' -#' Eric J. Tchetgen Tchetgen, James M. Robins and Andrea Rotnitzky. "On doubly robust estimation in a semiparametric odds ratio model." Biometrika 97.1 (2010): 171-180. -#' -#' @seealso \code{getProbScalarRD}, \code{getProbRD} (vectorised), \code{getProbScalarRR} and \code{getProbRR} (vectorised) for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP); -#' -#' \code{predict.blm} for obtaining fitted probabilities from \code{brm} fits. -#' -#' @examples -#' set.seed(0) -#' n = 100 -#' alpha.true = c(0,-1) -#' beta.true = c(-0.5,1) -#' gamma.true = c(0.1,-0.5) -#' params.true = list(alpha.true=alpha.true, beta.true=beta.true, -#' gamma.true=gamma.true) -#' v.1 = rep(1,n) # intercept term -#' v.2 = runif(n,-2,2) -#' v = cbind(v.1,v.2) -#' pscore.true = exp(v %*% gamma.true) / (1+exp(v %*% gamma.true)) -#' p0p1.true = getProbRR(v %*% alpha.true,v %*% beta.true) -#' x = rbinom(n, 1, pscore.true) -#' pA.true = p0p1.true[,1] -#' pA.true[x==1] = p0p1.true[x==1,2] -#' y = rbinom(n, 1, pA.true) -#' -#' fit.mle = brm(y,x,v,v,'RR','MLE',v,TRUE) -#' fit.drw = brm(y,x,v,v,'RR','DR',v,TRUE) -#' fit.dru = brm(y,x,v,v,'RR','DR',v,FALSE) -#' -#' fit.mle2 = brm(y,x,~v.2, ~v.2, 'RR','MLE', ~v.2,TRUE) # same as fit.mle -#' -#' @export - - -brm = function(y, x, va, vb = NULL, param, est.method = "MLE", vc = NULL, - optimal = TRUE, weights = NULL, subset = NULL, max.step = NULL, thres = 1e-8, - alpha.start = NULL, beta.start = NULL, message = FALSE) { - - # default param = 'RR'; est.method = 'MLE'; va = v; vb = v; vc = v; - # weights = NULL; subset = NULL; optimal = TRUE; max.step = NULL; - # thres = 1e-06; alpha.start = NULL; beta.start = NULL - - if (is.null(vb)) - vb = va - if (is.null(vc)) - vc = va - - if(class(va)[1] == "formula") va = stats::model.matrix(va) - if(class(vb)[1] == "formula") vb = stats::model.matrix(vb) - if(class(vc)[1] == "formula") vc = stats::model.matrix(vc) - - if(is.vector(va)) va = as.matrix(va, ncol = 1) - if(is.vector(vb)) vb = as.matrix(vb, ncol = 1) - if(is.vector(vc)) vc = as.matrix(vc, ncol = 1) - - if (is.null(weights)) - weights = rep(1, length(y)) - if (is.null(subset)) - subset = 1:length(y) - - ValidCheck(param, y, x, va, vb, vc, weights, subset, est.method, optimal, - max.step, thres, alpha.start, beta.start) - - data = cbind(y,x,va,vb,vc,weights)[subset,] - subset = subset[rowSums(is.na(data)) == 0] - y = y[subset]; x = x[subset]; va = va[subset,,drop=FALSE]; vb = vb[subset,,drop=FALSE] - vc = vc[subset,,drop=FALSE]; weights = weights[subset] - - pa = dim(va)[2] - pb = dim(vb)[2] - if (is.null(max.step)) max.step = min(pa * 20, 1000) - - if (est.method == "MLE"){ - sol = MLEst(param, y, x, va, vb, weights, max.step, thres, alpha.start, - beta.start, pa, pb) - } - if (est.method == "DR") { - if (param == "OR") { - cat("No doubly robust estimation methods for OR (with propensity score models) are available. Please refer to Tchetgen Tchetgen et al. (2010) for an alternative doubly robust estimation method. \n") - return() - } - if (is.null(alpha.start) | is.null(beta.start)){ - sol = MLEst(param, y, x, va, vb, weights, max.step, thres, - alpha.start, beta.start, pa, pb) - alpha.ml = sol$point.est[1:pa] - beta.ml = sol$point.est[(pa+1):(pa+pb)] - beta.cov = sol$cov[(pa+1):(pa+pb),(pa+1):(pa+pb)] - alpha.start = alpha.ml - }else{ - alpha.ml = alpha.start; beta.ml = beta.start - beta.cov = matrix(NA,pb,pb) - } - - gamma.fit = stats::glm(x~vc-1, weight = weights, family="binomial") - gamma = gamma.fit$coefficients - gamma.cov = summary(gamma.fit)$cov.unscaled - sol = DREst(param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, - weights, max.step, thres, alpha.start, beta.cov, gamma.cov, message) - } - - return(sol) - -} +#' Fitting Binary Regression Models +#' +#' @description \code{brm} is used to estimate the association between two binary variables, and how that varies as a function of other covariates. +#' +#' @param param The measure of association. Can take value 'RD' (risk difference), +#' 'RR' (relative risk) or 'OR' (odds ratio) +#' +#' @param y The response vector. Should only take values 0 or 1. +#' +#' @param x The exposure vector. Should only take values 0 or 1. +#' +#' @param va The covariates matrix for the target model. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. +#' +#' @param vb The covariates matrix for the nuisance model. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. (If not specified, defaults to va.) +#' +#' @param vc The covariates matrix for the probability of exposure, often called the propensity score. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. By default we fit a logistic regression model for the propensity score. (If not specified, defaults to va.) +#' +#' @param weights An optional vector of 'prior weights' to be used in the fitting process. Should be NULL or a numeric vector. +#' +#' @param subset An optional vector specifying a subset of observations to be used in the fitting process. +#' +#' @param est.method The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation). +#' +#' @param optimal Use the optimal weighting function for the doubly robust estimator? Ignored if the estimation method is 'MLE'. The default is TRUE. +#' +#' @param max.step The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000. +#' +#' @param thres Threshold for judging convergence. The default is 1e-6. +#' +#' @param alpha.start Starting values for the parameters in the target model. +#' +#' @param beta.start Starting values for the parameters in the nuisance model. +#' +#' @param message Show optimization details? Ignored if the estimation method is 'MLE'. The default is FALSE. +#' +#' @details \code{brm} contains two parts: the target model for the dependence measure (RR, RD or OR) and the nuisance model; the latter is required for maximum likelihood estimation. +#' If \code{param="RR"} then the target model is \eqn{log RR(va) = \alpha*va}. +#' If \code{param="RD"} then the target model is \eqn{atanh RD(va) = \alpha*va}. +#' If \code{param="OR"} then the target model is \eqn{log OR(va) = \alpha*va}. +#' For RR and RD, the nuisance model is for the log Odds Product: \eqn{log OP(vb) = \beta*vb}. +#' For OR, the nuisance model is for the baseline risk: \eqn{logit(P(y=1|x=0,vb)) = \beta*vb.} +#' In each case the nuisance model is variation independent of the target model, which ensures that the predicted probabilities lie in \eqn{[0,1]}. +#' See Richardson et al. (2016+) for more details. +#' +#' If \code{est.method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details. +#' +#' When estimating RR and RD, \code{est.method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency. +#' +#' For the doubly robust estimation method, MLE is used to obtain preliminary estimates of \eqn{\alpha}, \eqn{\beta} and \eqn{\gamma}. The estimate of \eqn{\alpha} is then updated by solving a doubly-robust estimating equation. (The estimate for \eqn{\beta} remains the MLE.) +#' +#' @return A list consisting of +#' \item{param}{the measure of association.} +#' +#' \item{point.est}{ the point estimates.} +#' +#' \item{se.est}{the standard error estimates.} +#' +#' \item{cov}{estimate of the covariance matrix for the estimates.} +#' +#' \item{conf.lower}{ the lower limit of the 95\% (marginal) confidence interval.} +#' +#' \item{conf.upper}{ the upper limit of the 95\% (marginal) confidence interval.} +#' +#' \item{p.value}{the two sided p-value for testing zero coefficients.} +#' +#' \item{coefficients}{ the matrix summarizing key information: point estimate, 95\% confidence interval and p-value.} +#' +#' \item{param.est}{the fitted RR/RD/OR.} +#' +#' \item{va}{ the matrix of covariates for the target model.} +#' +#' \item{vb}{ the matrix of covariates for the nuisance model.} +#' +#' \item{converged}{ Did the maximization process converge? } +#' +#' @author Linbo Wang , Mark Clements , Thomas Richardson +#' +#' @references Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimation for the Relative Risk and Risk Difference." Journal of the American Statistical Association: Theory and Methods (2017). +#' +#' Eric J. Tchetgen Tchetgen, James M. Robins and Andrea Rotnitzky. "On doubly robust estimation in a semiparametric odds ratio model." Biometrika 97.1 (2010): 171-180. +#' +#' @seealso \code{getProbScalarRD}, \code{getProbRD} (vectorised), \code{getProbScalarRR} and \code{getProbRR} (vectorised) for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP); +#' +#' \code{predict.blm} for obtaining fitted probabilities from \code{brm} fits. +#' +#' @examples +#' set.seed(0) +#' n <- 100 +#' alpha.true <- c(0, -1) +#' beta.true <- c(-0.5, 1) +#' gamma.true <- c(0.1, -0.5) +#' params.true <- list( +#' alpha.true = alpha.true, beta.true = beta.true, +#' gamma.true = gamma.true +#' ) +#' v.1 <- rep(1, n) # intercept term +#' v.2 <- runif(n, -2, 2) +#' v <- cbind(v.1, v.2) +#' pscore.true <- exp(v %*% gamma.true) / (1 + exp(v %*% gamma.true)) +#' p0p1.true <- getProbRR(v %*% alpha.true, v %*% beta.true) +#' x <- rbinom(n, 1, pscore.true) +#' pA.true <- p0p1.true[, 1] +#' pA.true[x == 1] <- p0p1.true[x == 1, 2] +#' y <- rbinom(n, 1, pA.true) +#' +#' fit.mle <- brm(y, x, v, v, "RR", "MLE", v, TRUE) +#' fit.drw <- brm(y, x, v, v, "RR", "DR", v, TRUE) +#' fit.dru <- brm(y, x, v, v, "RR", "DR", v, FALSE) +#' +#' fit.mle2 <- brm(y, x, ~v.2, ~v.2, "RR", "MLE", ~v.2, TRUE) # same as fit.mle +#' +#' @export + + +brm <- function( + y, x, va, vb = NULL, param, est.method = "MLE", vc = NULL, + optimal = TRUE, weights = NULL, subset = NULL, max.step = NULL, thres = 1e-8, + alpha.start = NULL, beta.start = NULL, message = FALSE) { + # default param = 'RR'; est.method = 'MLE'; va = v; vb = v; vc = v; + # weights = NULL; subset = NULL; optimal = TRUE; max.step = NULL; + # thres = 1e-06; alpha.start = NULL; beta.start = NULL + + if (is.null(vb)) { + vb <- va + } + if (is.null(vc)) { + vc <- va + } + + if (class(va)[1] == "formula") va <- stats::model.matrix(va) + if (class(vb)[1] == "formula") vb <- stats::model.matrix(vb) + if (class(vc)[1] == "formula") vc <- stats::model.matrix(vc) + + if (is.vector(va)) va <- as.matrix(va, ncol = 1) + if (is.vector(vb)) vb <- as.matrix(vb, ncol = 1) + if (is.vector(vc)) vc <- as.matrix(vc, ncol = 1) + + if (is.null(weights)) { + weights <- rep(1, length(y)) + } + if (is.null(subset)) { + subset <- 1:length(y) + } + + ValidCheck( + param, y, x, va, vb, vc, weights, subset, est.method, optimal, + max.step, thres, alpha.start, beta.start + ) + + data <- cbind(y, x, va, vb, vc, weights)[subset, ] + subset <- subset[rowSums(is.na(data)) == 0] + y <- y[subset] + x <- x[subset] + va <- va[subset, , drop = FALSE] + vb <- vb[subset, , drop = FALSE] + vc <- vc[subset, , drop = FALSE] + weights <- weights[subset] + + pa <- dim(va)[2] + pb <- dim(vb)[2] + if (is.null(max.step)) max.step <- min(pa * 20, 1000) + + if (est.method == "MLE") { + sol <- MLEst( + param, y, x, va, vb, weights, max.step, thres, alpha.start, + beta.start, pa, pb + ) + } + if (est.method == "DR") { + if (param == "OR") { + cat("No doubly robust estimation methods for OR (with propensity score models) are available. Please refer to Tchetgen Tchetgen et al. (2010) for an alternative doubly robust estimation method. \n") + return() + } + if (is.null(alpha.start) | is.null(beta.start)) { + sol <- MLEst( + param, y, x, va, vb, weights, max.step, thres, + alpha.start, beta.start, pa, pb + ) + alpha.ml <- sol$point.est[1:pa] + beta.ml <- sol$point.est[(pa + 1):(pa + pb)] + beta.cov <- sol$cov[(pa + 1):(pa + pb), (pa + 1):(pa + pb)] + alpha.start <- alpha.ml + } else { + alpha.ml <- alpha.start + beta.ml <- beta.start + beta.cov <- matrix(NA, pb, pb) + } + + gamma.fit <- stats::glm(x ~ vc - 1, weight = weights, family = "binomial") + gamma <- gamma.fit$coefficients + gamma.cov <- summary(gamma.fit)$cov.unscaled + sol <- DREst( + param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, + weights, max.step, thres, alpha.start, beta.cov, gamma.cov, message + ) + } + + return(sol) +} diff --git a/R/predict.brm.R b/R/predict.brm.R index e98bc0a..a2151d1 100644 --- a/R/predict.brm.R +++ b/R/predict.brm.R @@ -1,71 +1,65 @@ -#' Fitted probabilities from \code{brm} fits -#' -#' @description Calculate fitted probabilities from a fitted binary regression model object. -#' -#' @param object A fitted object from function \code{brm}. -#' -#' @param va.new An optional covariate matrix to make predictions with. If omitted, the original matrix va is used. -#' -#' @param vb.new An optional covariate matrix to make predictions with. If vb.new is omitted but va.new is not, then vb.new is set to be equal to va.new. If both vb.new and va.new are omitted, then the original matrix vb is used. -#' -#' @param x.new An optional vector of x. -#' -#' @param ... affecting the predictions produced. -#' -#' @return If x.new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb). -#' -#' If x.new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x.new,va,vb). -#' -#' @export - - -predict.brm = function(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...) { - - va = object$va - vb = object$vb - - if(is.null(vb.new)){ - if(is.null(va.new)){ - vb.new = vb - }else{ - vb.new = va.new - } - } - if(is.null(va.new)) va.new = va - - n = nrow(va.new) - pa = ncol(va.new) - pb = ncol(vb.new) - alpha.est = object$point.est[1:pa] - beta.est = object$point.est[(pa+1):(pa+pb)] - - linear.predictors = cbind(va.new %*% alpha.est, vb.new %*% beta.est) - if(object$param=="RR") - p0p1 = getProbRR(linear.predictors) - if(object$param=="RD") - p0p1 = getProbRD(linear.predictors) - if(object$param=="OR"){ - p0 = expit(linear.predictors[,2]) - or = exp(linear.predictors[,1]) - odds1 = or * (p0 / (1-p0)) - p1 = odds1 / (1+odds1) - p0p1 = cbind(p0, p1) - } - colnames(p0p1) = c("p0", "p1") - - if(!is.null(x.new)){ - px = rep(NA, n) - px[x.new == 0] = p0p1[x.new == 0, 1] - px[x.new == 1] = p0p1[x.new == 1, 2] - return(px) - }else{ - return(p0p1) - } - -} - - - - - - +#' Fitted probabilities from \code{brm} fits +#' +#' @description Calculate fitted probabilities from a fitted binary regression model object. +#' +#' @param object A fitted object from function \code{brm}. +#' +#' @param va.new An optional covariate matrix to make predictions with. If omitted, the original matrix va is used. +#' +#' @param vb.new An optional covariate matrix to make predictions with. If vb.new is omitted but va.new is not, then vb.new is set to be equal to va.new. If both vb.new and va.new are omitted, then the original matrix vb is used. +#' +#' @param x.new An optional vector of x. +#' +#' @param ... affecting the predictions produced. +#' +#' @return If x.new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb). +#' +#' If x.new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x.new,va,vb). +#' +#' @export + + +predict.brm <- function(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...) { + va <- object$va + vb <- object$vb + + if (is.null(vb.new)) { + if (is.null(va.new)) { + vb.new <- vb + } else { + vb.new <- va.new + } + } + if (is.null(va.new)) va.new <- va + + n <- nrow(va.new) + pa <- ncol(va.new) + pb <- ncol(vb.new) + alpha.est <- object$point.est[1:pa] + beta.est <- object$point.est[(pa + 1):(pa + pb)] + + linear.predictors <- cbind(va.new %*% alpha.est, vb.new %*% beta.est) + if (object$param == "RR") { + p0p1 <- getProbRR(linear.predictors) + } + if (object$param == "RD") { + p0p1 <- getProbRD(linear.predictors) + } + if (object$param == "OR") { + p0 <- expit(linear.predictors[, 2]) + or <- exp(linear.predictors[, 1]) + odds1 <- or * (p0 / (1 - p0)) + p1 <- odds1 / (1 + odds1) + p0p1 <- cbind(p0, p1) + } + colnames(p0p1) <- c("p0", "p1") + + if (!is.null(x.new)) { + px <- rep(NA, n) + px[x.new == 0] <- p0p1[x.new == 0, 1] + px[x.new == 1] <- p0p1[x.new == 1, 2] + return(px) + } else { + return(p0p1) + } +} diff --git a/tests/testthat/test-brm.R b/tests/testthat/test-brm.R index 5d9ba31..8c170c0 100644 --- a/tests/testthat/test-brm.R +++ b/tests/testthat/test-brm.R @@ -1,26 +1,28 @@ # sanity checks for now set.seed(0) -n = 100 -alpha.true = c(0,-1) -beta.true = c(-0.5, 1) -gamma.true = c(0.1,-0.5) -params.true = list(alpha.true = alpha.true, - beta.true = beta.true, - gamma.true = gamma.true) -v.1 = rep(1, n) # intercept term -v.2 = runif(n,-2, 2) -v = cbind(v.1, v.2) -pscore.true = exp(v %*% gamma.true) / (1 + exp(v %*% gamma.true)) -p0p1.true = getProbRR(v %*% alpha.true, v %*% beta.true) -x = rbinom(n, 1, pscore.true) -pA.true = p0p1.true[, 1] -pA.true[x == 1] = p0p1.true[x == 1, 2] -y = rbinom(n, 1, pA.true) +n <- 100 +alpha.true <- c(0, -1) +beta.true <- c(-0.5, 1) +gamma.true <- c(0.1, -0.5) +params.true <- list( + alpha.true = alpha.true, + beta.true = beta.true, + gamma.true = gamma.true +) +v.1 <- rep(1, n) # intercept term +v.2 <- runif(n, -2, 2) +v <- cbind(v.1, v.2) +pscore.true <- exp(v %*% gamma.true) / (1 + exp(v %*% gamma.true)) +p0p1.true <- getProbRR(v %*% alpha.true, v %*% beta.true) +x <- rbinom(n, 1, pscore.true) +pA.true <- p0p1.true[, 1] +pA.true[x == 1] <- p0p1.true[x == 1, 2] +y <- rbinom(n, 1, pA.true) test_that("example runs correctly with MLE", { - r = brm(y, x, v, v, 'RR', 'MLE', v, TRUE) - expected_result = c( + r <- brm(y, x, v, v, "RR", "MLE", v, TRUE) + expected_result <- c( "alpha 1" = -0.78035527, "alpha 2" = -1.63433532, "beta 1" = -1.06854529, @@ -30,8 +32,8 @@ test_that("example runs correctly with MLE", { }) test_that("example runs correctly with DR", { - r = brm(y, x, v, v, 'RR', 'DR', v, TRUE) - expected_result = c( + r <- brm(y, x, v, v, "RR", "DR", v, TRUE) + expected_result <- c( "alpha 1" = -0.73984903, "alpha 2" = -1.33843580, "beta 1" = -1.06854529, diff --git a/tests/testthat/test-mat_vec_mul.R b/tests/testthat/test-mat_vec_mul.R index 73ee871..106accd 100644 --- a/tests/testthat/test-mat_vec_mul.R +++ b/tests/testthat/test-mat_vec_mul.R @@ -1,8 +1,8 @@ test_that("mat_vec_mul works", { - m <- matrix(c(1,2,3,1,2,3), 2, 3) - v <- c(1,2,3) - r <- c(13,13) - dim(r) <- c(2,1) - - expect_equal(r, mat_vec_mul(m,v)) + m <- matrix(c(1, 2, 3, 1, 2, 3), 2, 3) + v <- c(1, 2, 3) + r <- c(13, 13) + dim(r) <- c(2, 1) + + expect_equal(r, mat_vec_mul(m, v)) }) From 644911a9f75bea7a607617fbcb38afc90f85d477 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Tue, 29 Jul 2025 16:20:27 -0400 Subject: [PATCH 02/17] remove cpp artifact --- .gitignore | 1 + src/RcppExports.cpp | 35 ----------------------------------- 2 files changed, 1 insertion(+), 35 deletions(-) delete mode 100644 src/RcppExports.cpp diff --git a/.gitignore b/.gitignore index 2263322..3ecce47 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ docker rstudio *.o *.so +RcppExports.cpp diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp deleted file mode 100644 index fa7583d..0000000 --- a/src/RcppExports.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// Generated by using Rcpp::compileAttributes() -> do not edit by hand -// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -#include -#include - -using namespace Rcpp; - -#ifdef RCPP_USE_GLOBAL_ROSTREAM -Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); -Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); -#endif - -// mat_vec_mul -arma::mat mat_vec_mul(const arma::mat& m, const arma::colvec& v); -RcppExport SEXP _brm_mat_vec_mul(SEXP mSEXP, SEXP vSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const arma::mat& >::type m(mSEXP); - Rcpp::traits::input_parameter< const arma::colvec& >::type v(vSEXP); - rcpp_result_gen = Rcpp::wrap(mat_vec_mul(m, v)); - return rcpp_result_gen; -END_RCPP -} - -static const R_CallMethodDef CallEntries[] = { - {"_brm_mat_vec_mul", (DL_FUNC) &_brm_mat_vec_mul, 2}, - {NULL, NULL, 0} -}; - -RcppExport void R_init_brm(DllInfo *dll) { - R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); - R_useDynamicSymbols(dll, FALSE); -} From d6d18c3bf1761f7ad0dbbafc00cb1f508783d6fc Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Tue, 29 Jul 2025 16:20:49 -0400 Subject: [PATCH 03/17] basic action --- .github/workflows/check.yml | 50 +++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 .github/workflows/check.yml diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml new file mode 100644 index 0000000..d6c9134 --- /dev/null +++ b/.github/workflows/check.yml @@ -0,0 +1,50 @@ +--- +name: Check + +on: + pull_request: + types: + - opened + - synchronize + - reopened + - ready_for_review + branches: + - master + - development + - cklamann/github-actions + push: + branches: + - master + - development + - cklamann/github-actions + + workflow_dispatch: + +# when ready to release can use https://github.dev/insightsengineering/r.pkg.template/tree/main/.github/workflows +jobs: + + style: + name: Style Check + uses: insightsengineering/r.pkg.template/.github/workflows/style.yaml@main + with: + auto-update: false + + unit-test: + runs-on: ubuntu-latest + container: + image: ghcr.io/insightsengineering/rstudio + steps: + - + name: Checkout repo + uses: actions/checkout@v4.1.1 + - + name: Build + run: | + Rscript -e "devtools::install()" + - + name: Test + run: | + Rscript -e "testthat::test_local()" + + + From 8f0c025cc86fb70d54c7f99ef89f7986741dd6ad Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Mon, 11 Aug 2025 11:36:07 -0400 Subject: [PATCH 04/17] pass in token to stylecheck action --- .github/workflows/check.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index d6c9134..fb786a2 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -24,8 +24,11 @@ on: jobs: style: + if: github.event_name == 'pull_request' name: Style Check uses: insightsengineering/r.pkg.template/.github/workflows/style.yaml@main + secrets: + REPO_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: auto-update: false From 5dd64cd883ce6506eaf209f909d7b9c63e9ae3bc Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Mon, 11 Aug 2025 11:38:28 -0400 Subject: [PATCH 05/17] remove conditional for testing --- .github/workflows/check.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index fb786a2..a53ecf4 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -24,7 +24,6 @@ on: jobs: style: - if: github.event_name == 'pull_request' name: Style Check uses: insightsengineering/r.pkg.template/.github/workflows/style.yaml@main secrets: From 5c46cfb103d95a1a62957c6a82aaa42c1eea23d6 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Mon, 11 Aug 2025 11:43:22 -0400 Subject: [PATCH 06/17] remove testing config --- .github/workflows/check.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index a53ecf4..12b061b 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -11,19 +11,16 @@ on: branches: - master - development - - cklamann/github-actions push: branches: - master - development - - cklamann/github-actions workflow_dispatch: -# when ready to release can use https://github.dev/insightsengineering/r.pkg.template/tree/main/.github/workflows jobs: - style: + if: github.event_name == 'pull_request' name: Style Check uses: insightsengineering/r.pkg.template/.github/workflows/style.yaml@main secrets: From 8658365f6691cefb091ec1a120a07f2bda54c005 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Fri, 3 Oct 2025 11:27:09 -0400 Subject: [PATCH 07/17] remove unused getProbScalar methods --- R/getProbScalarRD.R | 52 --------------------------------------- R/getProbScalarRR.R | 59 --------------------------------------------- R/main.R | 2 +- 3 files changed, 1 insertion(+), 112 deletions(-) diff --git a/R/getProbScalarRD.R b/R/getProbScalarRD.R index 5125695..8b8cc91 100644 --- a/R/getProbScalarRD.R +++ b/R/getProbScalarRD.R @@ -1,55 +1,3 @@ -#' Calculate risks from arctanh RD and log OP -#' -#' @param atanhrd arctanh of risk difference -#' -#' @param logop log of odds product -#' -#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. -#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. -#' -#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))} -#' -#' @examples getProbScalarRD(0, 0) -#' -#' set.seed(0) -#' logrr <- rnorm(10, 0, 1) -#' logop <- rnorm(10, 0, 1) -#' probs <- mapply(getProbScalarRD, logrr, logop) -#' rownames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)") -#' probs -#' -#' @export - - -getProbScalarRD <- function(atanhrd, logop) { - if (length(atanhrd) == 2) { - logop <- atanhrd[2] - atanhrd <- atanhrd[1] - } - - if (logop > 350) { - if (atanhrd < 0) { - p0 <- 1 - p1 <- p0 + tanh(atanhrd) - } else { - p1 <- 1 - p0 <- p1 - tanh(atanhrd) - } - } else { - ## not on boundary logop = 0; solving linear equations - if (same(logop, 0)) { - p0 <- 0.5 * (1 - tanh(atanhrd)) - } else { - p0 <- (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * - (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * - (1 - tanh(atanhrd)) * (1 - exp(logop)))) / (2 * (exp(logop) - - 1)) - } - p1 <- p0 + tanh(atanhrd) - } - return(c(p0, p1)) -} - #' Calculate risks from arctanh RD and log OP (vectorised) #' #' @param atanhrd arctanh of risk difference diff --git a/R/getProbScalarRR.R b/R/getProbScalarRR.R index afd1354..f8be76c 100644 --- a/R/getProbScalarRR.R +++ b/R/getProbScalarRR.R @@ -6,65 +6,6 @@ #' #' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. #' -#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))} -#' -#' @examples getProbScalarRR(0, 0) -#' -#' set.seed(0) -#' logrr <- rnorm(10, 0, 1) -#' logop <- rnorm(10, 0, 1) -#' probs <- mapply(getProbScalarRR, logrr, logop) -#' rownames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)") -#' probs -#' -#' @export - -getProbScalarRR <- function(logrr, logop = NA) { - if (length(logrr) == 2) { - logop <- logrr[2] - logrr <- logrr[1] - } - - if ((logop < (-12)) || (logop > 12) || (logrr < (-12)) || (logrr > 12)) { - # on the boundary South edge: large -ve logrr or (large -ve logop and -ve - # logrr) - if ((logrr < (-12)) || ((logop < (-12)) && (logrr < 0))) { - p0 <- getPrbAux(logop - logrr) - p1 <- 0 - } else { - if ((logrr > 12) || ((logop < (-12)) && (logrr > 0))) { - # West edge: large +ve logrr or (large -ve logop and +ve logrr) - p0 <- 0 - p1 <- getPrbAux(logrr + logop) - } else { - # North or East edges (=logop is large +ve) - p0 <- min(exp(-logrr), 1) - p1 <- min(exp(logrr), 1) - } - } - } else { - # not on the boundary logop = 0; solving linear equations logop not 0; - # solving a quadratic equation - if (same(logop, 0)) { - p0 <- 1 / (1 + exp(logrr)) - } else { - p0 <- (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + - 1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop)))) / (2 * exp(logrr) * - (1 - exp(logop))) - } - p1 <- exp(logrr) * p0 - } - return(c(p0, p1)) -} - -#' Calculate risks from log RR and log OP (vectorised) -#' -#' @param logrr log of relative risk -#' -#' @param logop log of odds product -#' -#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. -#' #' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns #' #' @examples getProbRR(0, 0) diff --git a/R/main.R b/R/main.R index c9e218b..45416e8 100644 --- a/R/main.R +++ b/R/main.R @@ -79,7 +79,7 @@ #' #' Eric J. Tchetgen Tchetgen, James M. Robins and Andrea Rotnitzky. "On doubly robust estimation in a semiparametric odds ratio model." Biometrika 97.1 (2010): 171-180. #' -#' @seealso \code{getProbScalarRD}, \code{getProbRD} (vectorised), \code{getProbScalarRR} and \code{getProbRR} (vectorised) for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP); +#' @seealso \code{getProbRD}, and \code{getProbRR} for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP); #' #' \code{predict.blm} for obtaining fitted probabilities from \code{brm} fits. #' From 2df666bbc47786b7374e8c328945e6bbc0d139d6 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Fri, 3 Oct 2025 12:10:32 -0400 Subject: [PATCH 08/17] rename files for their functions, use snake case --- NAMESPACE | 10 ++-- R/{2_CallDR.R => dr_est.R} | 10 ++-- R/{2.1_DR_Point.R => dr_point.R} | 14 ++--- R/{getProbScalarRD.R => get_prob_rd.R} | 6 +-- R/{getProbScalarRR.R => get_prob_rr.R} | 10 ++-- R/{2.2.1_Hessian_RD.R => hessian_2_rd.R} | 4 +- R/{2.2.1_Hessian_RR.R => hessian_2_rr.R} | 4 +- R/main.R | 14 ++--- R/{1_CallMLE.R => mle_est.R} | 10 ++-- R/{1.1_MLE_Point.R => mle_point.R} | 4 +- R/{1.2_MLE_Var.R => mle_var.R} | 8 +-- R/{predict.brm.R => predict_brm.R} | 6 +-- R/{Print.R => print_brm.R} | 2 +- R/{MyFunc.R => util.R} | 4 +- R/{ValidCheck.R => valid_check.R} | 2 +- R/{2.2_DR_Var_RD.R => var_rd_dr.R} | 4 +- R/{2.2_DR_Var_RR.R => var_rr_dr.R} | 4 +- man/brm-package.Rd | 10 ++-- man/brm.Rd | 62 +++++++++++----------- man/getProbRR.Rd | 33 ------------ man/getProbScalarRD.Rd | 34 ------------ man/{getProbRD.Rd => get_prob_rd.Rd} | 20 +++---- man/{getProbScalarRR.Rd => get_prob_rr.Rd} | 20 +++---- man/{predict.brm.Rd => predict_brm.Rd} | 8 +-- man/{print.brm.Rd => print_brm.Rd} | 8 +-- tests/testthat/test-brm.R | 2 +- 26 files changed, 123 insertions(+), 190 deletions(-) rename R/{2_CallDR.R => dr_est.R} (84%) rename R/{2.1_DR_Point.R => dr_point.R} (79%) rename R/{getProbScalarRD.R => get_prob_rd.R} (92%) rename R/{getProbScalarRR.R => get_prob_rr.R} (91%) rename R/{2.2.1_Hessian_RD.R => hessian_2_rd.R} (95%) rename R/{2.2.1_Hessian_RR.R => hessian_2_rr.R} (94%) rename R/{1_CallMLE.R => mle_est.R} (82%) rename R/{1.1_MLE_Point.R => mle_point.R} (93%) rename R/{1.2_MLE_Var.R => mle_var.R} (84%) rename R/{predict.brm.R => predict_brm.R} (92%) rename R/{Print.R => print_brm.R} (97%) rename R/{MyFunc.R => util.R} (95%) rename R/{ValidCheck.R => valid_check.R} (95%) rename R/{2.2_DR_Var_RD.R => var_rd_dr.R} (97%) rename R/{2.2_DR_Var_RR.R => var_rr_dr.R} (97%) delete mode 100644 man/getProbRR.Rd delete mode 100644 man/getProbScalarRD.Rd rename man/{getProbRD.Rd => get_prob_rd.Rd} (65%) rename man/{getProbScalarRR.Rd => get_prob_rr.Rd} (54%) rename man/{predict.brm.Rd => predict_brm.Rd} (84%) rename man/{print.brm.Rd => print_brm.Rd} (71%) diff --git a/NAMESPACE b/NAMESPACE index 9d02f22..29a1604 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,10 @@ # Generated by roxygen2: do not edit by hand -S3method(predict,brm) -S3method(print,brm) export(brm) -export(getProbRD) -export(getProbRR) -export(getProbScalarRD) -export(getProbScalarRR) +export(get_prob_rd) +export(get_prob_rr) +export(predict_brm) +export(print_brm) exportPattern("ˆ[[:alpha:]]+") importFrom(Rcpp,evalCpp) useDynLib(brm) diff --git a/R/2_CallDR.R b/R/dr_est.R similarity index 84% rename from R/2_CallDR.R rename to R/dr_est.R index 113bd52..27d087a 100644 --- a/R/2_CallDR.R +++ b/R/dr_est.R @@ -1,7 +1,7 @@ -DREst <- function( +dr_est <- function( param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, weights, max.step, thres, alpha.start, beta.cov, gamma.cov, message) { - dr.est <- dr.estimate.noiterate( + dr.est <- dr_estimate_noiterate( param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, weights, max.step, thres, alpha.start, message ) @@ -9,13 +9,13 @@ DREst <- function( converged <- dr.est$convergence if (param == "RR") { - alpha.cov <- var.rr.dr( + alpha.cov <- var_rr_dr( y, x, va, vb, vc, point.est, alpha.ml, beta.ml, gamma, optimal, weights ) } if (param == "RD") { - alpha.cov <- var.rd.dr( + alpha.cov <- var_rd_dr( y, x, va, vb, vc, point.est, alpha.ml, beta.ml, gamma, optimal, weights ) @@ -34,6 +34,6 @@ DREst <- function( cov[(pa + 1):(pa + pb), (pa + 1):(pa + pb)] <- beta.cov cov[(pa + pb + 1):(pa + pb + pc), (pa + pb + 1):(pa + pb + pc)] <- gamma.cov - sol <- WrapResults(point.est, cov, param, name, va, vb, converged) + sol <- wrap_results(point.est, cov, param, name, va, vb, converged) return(sol) } diff --git a/R/2.1_DR_Point.R b/R/dr_point.R similarity index 79% rename from R/2.1_DR_Point.R rename to R/dr_point.R index aaed64a..3fa97e0 100644 --- a/R/2.1_DR_Point.R +++ b/R/dr_point.R @@ -1,11 +1,11 @@ -dr.estimate.onestep <- function( +dr_estimate_onestep <- function( param, y, x, va, vb, alpha.start, beta, pscore, wt, weights, max.step, thres, message) { startpars <- c(alpha.start) # pars only contain alpha ## DR estimation equation^2 if (param == "RR") { dr.objective <- function(pars) { - p0 <- getProbRR(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1] + p0 <- get_prob_rr(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1] H.alpha <- as.vector(y * exp(-x * (va %*% pars))) tmp <- mat_vec_mul(t(va), (wt * (x - pscore) * (H.alpha - p0) * weights)) return(sum(tmp^2)) @@ -13,7 +13,7 @@ dr.estimate.onestep <- function( } if (param == "RD") { dr.objective <- function(pars) { - p0 <- getProbRD(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1] + p0 <- get_prob_rd(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1] H.alpha <- y - x * tanh(mat_vec_mul(va, pars)) tmp <- mat_vec_mul(t((H.alpha - p0) * (x - pscore)), (va * wt * weights)) return(sum(tmp^2)) @@ -28,19 +28,19 @@ dr.estimate.onestep <- function( -dr.estimate.noiterate <- function( +dr_estimate_noiterate <- function( param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, weights, max.step, thres, alpha.start, message) { pscore <- as.vector(expit(mat_vec_mul(vc, gamma))) if (optimal == TRUE) { if (param == "RR") { - p0 <- getProbRR(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))[, 1] + p0 <- get_prob_rr(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))[, 1] wt <- as.vector(1 / (1 - p0 + (1 - pscore) * (exp(mat_vec_mul(-va, alpha.ml)) - 1))) } if (param == "RD") { - p0 <- getProbRD(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))[, 1] + p0 <- get_prob_rd(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))[, 1] rho <- as.vector(tanh(mat_vec_mul(va, alpha.ml))) wt <- (1 - rho) * (1 + rho) / (p0 * (1 - p0) + rho * (1 - pscore) * (1 - 2 * p0 - rho)) @@ -53,7 +53,7 @@ dr.estimate.noiterate <- function( alpha.start <- alpha.ml } - alpha.dr.opt <- dr.estimate.onestep( + alpha.dr.opt <- dr_estimate_onestep( param, y, x, va, vb, alpha.start, beta.ml, pscore, wt, weights, max.step, thres, message ) diff --git a/R/getProbScalarRD.R b/R/get_prob_rd.R similarity index 92% rename from R/getProbScalarRD.R rename to R/get_prob_rd.R index 8b8cc91..4bc99c0 100644 --- a/R/getProbScalarRD.R +++ b/R/get_prob_rd.R @@ -9,17 +9,17 @@ #' #' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns #' -#' @examples getProbRD(0, 0) +#' @examples get_prob_rd(0, 0) #' #' set.seed(0) #' logrr <- rnorm(10, 0, 1) #' logop <- rnorm(10, 0, 1) -#' probs <- getProbRD(logrr, logop) +#' probs <- get_prob_rd(logrr, logop) #' colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)") #' probs #' #' @export -getProbRD <- function(atanhrd, logop) { +get_prob_rd <- function(atanhrd, logop) { if (is.matrix(atanhrd) && ncol(atanhrd) == 2) { logop <- atanhrd[, 2] atanhrd <- atanhrd[, 1] diff --git a/R/getProbScalarRR.R b/R/get_prob_rr.R similarity index 91% rename from R/getProbScalarRR.R rename to R/get_prob_rr.R index f8be76c..40097e2 100644 --- a/R/getProbScalarRR.R +++ b/R/get_prob_rr.R @@ -8,17 +8,17 @@ #' #' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns #' -#' @examples getProbRR(0, 0) +#' @examples get_prob_rr(0, 0) #' #' set.seed(0) #' logrr <- rnorm(10, 0, 1) #' logop <- rnorm(10, 0, 1) -#' probs <- getProbRR(logrr, logop) +#' probs <- get_prob_rr(logrr, logop) #' colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)") #' probs #' #' @export -getProbRR <- function(logrr, logop = NA) { +get_prob_rr <- function(logrr, logop = NA) { if (is.matrix(logrr) && ncol(logrr) == 2) { logop <- logrr[, 2] logrr <- logrr[, 1] @@ -30,7 +30,7 @@ getProbRR <- function(logrr, logop = NA) { ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve ## logrr) ifelse((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)), - getPrbAux(logop - logrr), + get_prb_aux(logop - logrr), ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)), ## West edge: large +ve logrr or (large -ve logop and +ve logrr) 0, @@ -51,7 +51,7 @@ getProbRR <- function(logrr, logop = NA) { 0, ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)), ## West edge: large +ve logrr or (large -ve logop and +ve logrr) - getPrbAux(logop + logrr), + get_prb_aux(logop + logrr), pmin(exp(logrr), 1) ) ), diff --git a/R/2.2.1_Hessian_RD.R b/R/hessian_2_rd.R similarity index 95% rename from R/2.2.1_Hessian_RD.R rename to R/hessian_2_rd.R index 6e5ddb0..fec319d 100644 --- a/R/2.2.1_Hessian_RD.R +++ b/R/hessian_2_rd.R @@ -1,9 +1,9 @@ -Hessian2RD <- function(y, x, va, vb, alpha.ml, beta.ml, cnt) { +hessian_2_rd <- function(y, x, va, vb, alpha.ml, beta.ml, cnt) { # calculating the Hessian using the second derivative have to do so # because under mis-specification of models Hessian no longer equals the # square of the first order derivatives - p0p1 <- getProbRD(va %*% alpha.ml, vb %*% beta.ml) + p0p1 <- get_prob_rd(va %*% alpha.ml, vb %*% beta.ml) # p0p1 = cbind(p0, p1): n * 2 matrix p0 <- p0p1[, 1] p1 <- p0p1[, 2] diff --git a/R/2.2.1_Hessian_RR.R b/R/hessian_2_rr.R similarity index 94% rename from R/2.2.1_Hessian_RR.R rename to R/hessian_2_rr.R index 93b7bd4..cdfce4c 100644 --- a/R/2.2.1_Hessian_RR.R +++ b/R/hessian_2_rr.R @@ -1,9 +1,9 @@ -Hessian2RR <- function(y, x, va, vb, alpha.ml, beta.ml, weights) { +hessian_2_rr <- function(y, x, va, vb, alpha.ml, beta.ml, weights) { # calculating the Hessian using the second derivative have to do so # because under mis-specification of models Hessian no longer equals the # square of the first order derivatives - p0p1 <- getProbRR(va %*% alpha.ml, vb %*% beta.ml) + p0p1 <- get_prob_rr(va %*% alpha.ml, vb %*% beta.ml) # p0p1 = cbind(p0, p1): n * 2 matrix p0 <- p0p1[, 1] p1 <- p0p1[, 2] diff --git a/R/main.R b/R/main.R index 45416e8..c6d96b6 100644 --- a/R/main.R +++ b/R/main.R @@ -79,9 +79,9 @@ #' #' Eric J. Tchetgen Tchetgen, James M. Robins and Andrea Rotnitzky. "On doubly robust estimation in a semiparametric odds ratio model." Biometrika 97.1 (2010): 171-180. #' -#' @seealso \code{getProbRD}, and \code{getProbRR} for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP); +#' @seealso \code{get_prob_rd}, and \code{get_prob_rr} for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP); #' -#' \code{predict.blm} for obtaining fitted probabilities from \code{brm} fits. +#' \code{predict.brm} for obtaining fitted probabilities from \code{brm} fits. #' #' @examples #' set.seed(0) @@ -97,7 +97,7 @@ #' v.2 <- runif(n, -2, 2) #' v <- cbind(v.1, v.2) #' pscore.true <- exp(v %*% gamma.true) / (1 + exp(v %*% gamma.true)) -#' p0p1.true <- getProbRR(v %*% alpha.true, v %*% beta.true) +#' p0p1.true <- get_prob_rr(v %*% alpha.true, v %*% beta.true) #' x <- rbinom(n, 1, pscore.true) #' pA.true <- p0p1.true[, 1] #' pA.true[x == 1] <- p0p1.true[x == 1, 2] @@ -142,7 +142,7 @@ brm <- function( subset <- 1:length(y) } - ValidCheck( + valid_check( param, y, x, va, vb, vc, weights, subset, est.method, optimal, max.step, thres, alpha.start, beta.start ) @@ -161,7 +161,7 @@ brm <- function( if (is.null(max.step)) max.step <- min(pa * 20, 1000) if (est.method == "MLE") { - sol <- MLEst( + sol <- mle_est( param, y, x, va, vb, weights, max.step, thres, alpha.start, beta.start, pa, pb ) @@ -172,7 +172,7 @@ brm <- function( return() } if (is.null(alpha.start) | is.null(beta.start)) { - sol <- MLEst( + sol <- mle_est( param, y, x, va, vb, weights, max.step, thres, alpha.start, beta.start, pa, pb ) @@ -189,7 +189,7 @@ brm <- function( gamma.fit <- stats::glm(x ~ vc - 1, weight = weights, family = "binomial") gamma <- gamma.fit$coefficients gamma.cov <- summary(gamma.fit)$cov.unscaled - sol <- DREst( + sol <- dr_est( param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, weights, max.step, thres, alpha.start, beta.cov, gamma.cov, message ) diff --git a/R/1_CallMLE.R b/R/mle_est.R similarity index 82% rename from R/1_CallMLE.R rename to R/mle_est.R index 7f7088b..016554a 100644 --- a/R/1_CallMLE.R +++ b/R/mle_est.R @@ -1,4 +1,4 @@ -MLEst <- function( +mle_est <- function( param, y, x, va, vb, weights, max.step, thres, alpha.start, beta.start, pa, pb) { ## starting values for parameter optimization @@ -24,7 +24,7 @@ MLEst <- function( converged <- fit$converged } else { ### point estimate - mle <- max.likelihood( + mle <- mle_point( param, y, x, va, vb, alpha.start, beta.start, weights, max.step, thres, pa, pb ) @@ -36,15 +36,15 @@ MLEst <- function( ### Computing Fisher Information: if (param == "RR") { - cov <- var.mle.rr(x, alpha.ml, beta.ml, va, vb, weights) + cov <- mle_var_rr(x, alpha.ml, beta.ml, va, vb, weights) } if (param == "RD") { - cov <- var.mle.rd(x, alpha.ml, beta.ml, va, vb, weights) + cov <- mle_var_rd(x, alpha.ml, beta.ml, va, vb, weights) } sd.est <- sqrt(diag(cov)) } name <- paste(c(rep("alpha", pa), rep("beta", pb)), c(1:pa, 1:pb)) - sol <- WrapResults(point.est, cov, param, name, va, vb, converged) + sol <- wrap_results(point.est, cov, param, name, va, vb, converged) return(sol) } diff --git a/R/1.1_MLE_Point.R b/R/mle_point.R similarity index 93% rename from R/1.1_MLE_Point.R rename to R/mle_point.R index ec171aa..0f6708d 100644 --- a/R/1.1_MLE_Point.R +++ b/R/mle_point.R @@ -1,8 +1,8 @@ -max.likelihood <- function(param, y, x, va, vb, alpha.start, beta.start, weights, +mle_point <- function(param, y, x, va, vb, alpha.start, beta.start, weights, max.step, thres, pa, pb) { startpars <- c(alpha.start, beta.start) - getProb <- if (param == "RR") getProbRR else getProbRD + getProb <- if (param == "RR") get_prob_rr else get_prob_rd ## negative log likelihood function neg.log.likelihood <- function(pars) { diff --git a/R/1.2_MLE_Var.R b/R/mle_var.R similarity index 84% rename from R/1.2_MLE_Var.R rename to R/mle_var.R index b02a0b2..ecc37cb 100644 --- a/R/1.2_MLE_Var.R +++ b/R/mle_var.R @@ -1,7 +1,7 @@ ### variance calculation -var.mle.rr <- function(x, alpha.ml, beta.ml, va, vb, weights) { - p0p1 <- getProbRR(va %*% alpha.ml, vb %*% beta.ml) +mle_var_rr <- function(x, alpha.ml, beta.ml, va, vb, weights) { + p0p1 <- get_prob_rr(va %*% alpha.ml, vb %*% beta.ml) n <- dim(va)[1] pA <- rep(NA, n) # P(Y=1|A,V); here A = X pA[x == 0] <- p0p1[x == 0, 1] @@ -22,8 +22,8 @@ var.mle.rr <- function(x, alpha.ml, beta.ml, va, vb, weights) { ### variance calculation -var.mle.rd <- function(x, alpha.ml, beta.ml, va, vb, weights) { - p0p1 <- getProbRD(va %*% alpha.ml, vb %*% beta.ml) +mle_var_rd <- function(x, alpha.ml, beta.ml, va, vb, weights) { + p0p1 <- get_prob_rd(va %*% alpha.ml, vb %*% beta.ml) # p0p1 = cbind(p0, p1): n * 2 matrix p0 <- p0p1[, 1] p1 <- p0p1[, 2] diff --git a/R/predict.brm.R b/R/predict_brm.R similarity index 92% rename from R/predict.brm.R rename to R/predict_brm.R index a2151d1..6b2afa8 100644 --- a/R/predict.brm.R +++ b/R/predict_brm.R @@ -19,7 +19,7 @@ #' @export -predict.brm <- function(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...) { +predict_brm <- function(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...) { va <- object$va vb <- object$vb @@ -40,10 +40,10 @@ predict.brm <- function(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...) linear.predictors <- cbind(va.new %*% alpha.est, vb.new %*% beta.est) if (object$param == "RR") { - p0p1 <- getProbRR(linear.predictors) + p0p1 <- get_prob_rr(linear.predictors) } if (object$param == "RD") { - p0p1 <- getProbRD(linear.predictors) + p0p1 <- get_prob_rd(linear.predictors) } if (object$param == "OR") { p0 <- expit(linear.predictors[, 2]) diff --git a/R/Print.R b/R/print_brm.R similarity index 97% rename from R/Print.R rename to R/print_brm.R index 6baab87..80eb466 100644 --- a/R/Print.R +++ b/R/print_brm.R @@ -6,7 +6,7 @@ #' #' @export -print.brm <- function(x, ...) { +print_brm <- function(x, ...) { hid <- attr(x, "hidden") nhid <- which(!names(x) %in% hid) diff --git a/R/MyFunc.R b/R/util.R similarity index 95% rename from R/MyFunc.R rename to R/util.R index 7462499..b88b073 100644 --- a/R/MyFunc.R +++ b/R/util.R @@ -27,7 +27,7 @@ same <- function(x, y, tolerance = .Machine$double.eps^0.5) { ## Functions for wrapping estimation results into a nice format -WrapResults <- function(point.est, cov, param, name, va, vb, converged) { +wrap_results <- function(point.est, cov, param, name, va, vb, converged) { se.est <- sqrt(diag(cov)) conf.lower <- point.est + stats::qnorm(0.025) * se.est @@ -66,7 +66,7 @@ WrapResults <- function(point.est, cov, param, name, va, vb, converged) { ## Limits are 0 and 1 as x goes to -infty and +infty respectively ## The function will never return NaN given a numerical input -getPrbAux <- function(x) { +get_prb_aux <- function(x) { ifelse((x < 17) & (x > (-500)), 0.5 * exp(x) * (-1 + (1 + 4 * exp(-x))^0.5), ifelse(x < 0, 0, 1) diff --git a/R/ValidCheck.R b/R/valid_check.R similarity index 95% rename from R/ValidCheck.R rename to R/valid_check.R index 8814c2c..6daed6d 100644 --- a/R/ValidCheck.R +++ b/R/valid_check.R @@ -1,4 +1,4 @@ -ValidCheck <- function(param, y, x, va, vb, vc, weights, subset, est.method, +valid_check <- function(param, y, x, va, vb, vc, weights, subset, est.method, optimal, max.step, thres, alpha.start, beta.start) { if (!is.character(param)) { stop("Parameter must be a character") diff --git a/R/2.2_DR_Var_RD.R b/R/var_rd_dr.R similarity index 97% rename from R/2.2_DR_Var_RD.R rename to R/var_rd_dr.R index 621423d..2c8b3c2 100644 --- a/R/2.2_DR_Var_RD.R +++ b/R/var_rd_dr.R @@ -1,6 +1,6 @@ ## Sandwich estimator for variance of RD -var.rd.dr <- function( +var_rd_dr <- function( y, x, va, vb, vc, alpha.dr, alpha.ml, beta.ml, gamma, optimal, weights) { ######################################## @@ -11,7 +11,7 @@ var.rd.dr <- function( ### 1. - E[dS/d(alpha.ml,beta.ml)] ############################## Computing ### the Hessian: - Hrd <- Hessian2RD(y, x, va, vb, alpha.ml, beta.ml, weights) + Hrd <- hessian_2_rd(y, x, va, vb, alpha.ml, beta.ml, weights) hessian <- Hrd$hessian p0 <- Hrd$p0 p1 <- Hrd$p1 diff --git a/R/2.2_DR_Var_RR.R b/R/var_rr_dr.R similarity index 97% rename from R/2.2_DR_Var_RR.R rename to R/var_rr_dr.R index 8708cc0..6558119 100644 --- a/R/2.2_DR_Var_RR.R +++ b/R/var_rr_dr.R @@ -1,6 +1,6 @@ ## Sandwich estimator for variance of RR -var.rr.dr <- function( +var_rr_dr <- function( y, x, va, vb, vc, alpha.dr, alpha.ml, beta.ml, gamma, optimal, weights) { ######################################## @@ -11,7 +11,7 @@ var.rr.dr <- function( ### 1. - E[dS/d(alpha.ml,beta.ml)] ############################## Computing ### the Hessian: - Hrr <- Hessian2RR(y, x, va, vb, alpha.ml, beta.ml, weights) + Hrr <- hessian_2_rr(y, x, va, vb, alpha.ml, beta.ml, weights) hessian <- Hrr$hessian p0 <- Hrr$p0 p1 <- Hrr$p1 diff --git a/man/brm-package.Rd b/man/brm-package.Rd index 761054f..6f9265b 100644 --- a/man/brm-package.Rd +++ b/man/brm-package.Rd @@ -9,12 +9,12 @@ The function \code{brm} in this package provides an alternative to generalized l Unlike \code{glm}, which uses a single link function for the outcome, \code{brm} separates the nuisance model from the target model. This separation provides opportunities to choose nuisance models independently of the target model. To see why this is important, we may contrast it with the use of a GLM to model the log relative risk. In this setting one might use a Poisson regression (with interaction term) \eqn{log P(y = 1|x, va, vb) = \alpha * x * va + \beta * vb} (though such a model ignores the fact that \eqn{y} is binary); here \eqn{va} and \eqn{vb} are subsets of \eqn{v}. Such a Poisson model can be seen as a combination of two parts: a target model \eqn{log RR(va) = \alpha * va} and a nuisance model \eqn{log P(y = 1|x = 0, vb) = \beta * vb}. However, this nuisance model is variation dependent of the target model so that predicted probabilities may go outside of \eqn{[0,1]}. Furthermore, one cannot solve this problem under a GLM framework as with a GLM, the target model and nuisance model are determined \emph{simultaneously} through a link function. - More specifically, if the target model is a linear model on the conditional log Relative Risk (log RR) or ('logistically' transformed) conditional Risk Difference (atanh RD), \code{brm} fits a linear nuisance model for the conditional log Odds Product (log OP). If the target model is a linear model on the conditional log Odds Ratio (log OR), \code{brm} fits a linear nuisance model on the conditional logit baseline risk, logit P(y = 1|x = 0, vb). Note in this case the target and nuisance models combine to form a simple logistic regression model (which is fitted using \code{glm}). - - \code{brm} fits the three target models described above as they are simple and the parameter space is unconstrained. \code{brm} fits the nuisance models above as they are variation independent of the corresponding target model. This variation independence greatly facilitates parameter estimation and interpretation. - + More specifically, if the target model is a linear model on the conditional log Relative Risk (log RR) or ('logistically' transformed) conditional Risk Difference (atanh RD), \code{brm} fits a linear nuisance model for the conditional log Odds Product (log OP). If the target model is a linear model on the conditional log Odds Ratio (log OR), \code{brm} fits a linear nuisance model on the conditional logit baseline risk, logit P(y = 1|x = 0, vb). Note in this case the target and nuisance models combine to form a simple logistic regression model (which is fitted using \code{glm}). + + \code{brm} fits the three target models described above as they are simple and the parameter space is unconstrained. \code{brm} fits the nuisance models above as they are variation independent of the corresponding target model. This variation independence greatly facilitates parameter estimation and interpretation. + \code{brm} also provides doubly robust fitting as an option such that the estimates for \eqn{\alpha} are still consistent and asymptotically normal even when the nuisance model is misspecified, provided that we have a correctly specified logistic model for the exposure probability \eqn{P(x=1|v)}. Such doubly robust estimation is only possible for the Relative Risk and Risk Difference, but not the Odds Ratio. - + See Richardson et al. (2017) for more details. } \references{ diff --git a/man/brm.Rd b/man/brm.Rd index 6454307..74b5c32 100644 --- a/man/brm.Rd +++ b/man/brm.Rd @@ -56,7 +56,7 @@ brm( } \value{ A list consisting of - \item{param}{the measure of association.} + \item{param}{the measure of association.} \item{point.est}{ the point estimates.} @@ -84,13 +84,13 @@ A list consisting of \code{brm} is used to estimate the association between two binary variables, and how that varies as a function of other covariates. } \details{ -\code{brm} contains two parts: the target model for the dependence measure (RR, RD or OR) and the nuisance model; the latter is required for maximum likelihood estimation. -If \code{param="RR"} then the target model is \eqn{log RR(va) = \alpha*va}. -If \code{param="RD"} then the target model is \eqn{atanh RD(va) = \alpha*va}. -If \code{param="OR"} then the target model is \eqn{log OR(va) = \alpha*va}. -For RR and RD, the nuisance model is for the log Odds Product: \eqn{log OP(vb) = \beta*vb}. +\code{brm} contains two parts: the target model for the dependence measure (RR, RD or OR) and the nuisance model; the latter is required for maximum likelihood estimation. +If \code{param="RR"} then the target model is \eqn{log RR(va) = \alpha*va}. +If \code{param="RD"} then the target model is \eqn{atanh RD(va) = \alpha*va}. +If \code{param="OR"} then the target model is \eqn{log OR(va) = \alpha*va}. +For RR and RD, the nuisance model is for the log Odds Product: \eqn{log OP(vb) = \beta*vb}. For OR, the nuisance model is for the baseline risk: \eqn{logit(P(y=1|x=0,vb)) = \beta*vb.} -In each case the nuisance model is variation independent of the target model, which ensures that the predicted probabilities lie in \eqn{[0,1]}. +In each case the nuisance model is variation independent of the target model, which ensures that the predicted probabilities lie in \eqn{[0,1]}. See Richardson et al. (2016+) for more details. If \code{est.method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details. @@ -101,27 +101,29 @@ For the doubly robust estimation method, MLE is used to obtain preliminary estim } \examples{ set.seed(0) -n = 100 -alpha.true = c(0,-1) -beta.true = c(-0.5,1) -gamma.true = c(0.1,-0.5) -params.true = list(alpha.true=alpha.true, beta.true=beta.true, - gamma.true=gamma.true) -v.1 = rep(1,n) # intercept term -v.2 = runif(n,-2,2) -v = cbind(v.1,v.2) -pscore.true = exp(v \%*\% gamma.true) / (1+exp(v \%*\% gamma.true)) -p0p1.true = getProbRR(v \%*\% alpha.true,v \%*\% beta.true) -x = rbinom(n, 1, pscore.true) -pA.true = p0p1.true[,1] -pA.true[x==1] = p0p1.true[x==1,2] -y = rbinom(n, 1, pA.true) - -fit.mle = brm(y,x,v,v,'RR','MLE',v,TRUE) -fit.drw = brm(y,x,v,v,'RR','DR',v,TRUE) -fit.dru = brm(y,x,v,v,'RR','DR',v,FALSE) - -fit.mle2 = brm(y,x,~v.2, ~v.2, 'RR','MLE', ~v.2,TRUE) # same as fit.mle +n <- 100 +alpha.true <- c(0, -1) +beta.true <- c(-0.5, 1) +gamma.true <- c(0.1, -0.5) +params.true <- list( + alpha.true = alpha.true, beta.true = beta.true, + gamma.true = gamma.true +) +v.1 <- rep(1, n) # intercept term +v.2 <- runif(n, -2, 2) +v <- cbind(v.1, v.2) +pscore.true <- exp(v \%*\% gamma.true) / (1 + exp(v \%*\% gamma.true)) +p0p1.true <- get_prob_rr(v \%*\% alpha.true, v \%*\% beta.true) +x <- rbinom(n, 1, pscore.true) +pA.true <- p0p1.true[, 1] +pA.true[x == 1] <- p0p1.true[x == 1, 2] +y <- rbinom(n, 1, pA.true) + +fit.mle <- brm(y, x, v, v, "RR", "MLE", v, TRUE) +fit.drw <- brm(y, x, v, v, "RR", "DR", v, TRUE) +fit.dru <- brm(y, x, v, v, "RR", "DR", v, FALSE) + +fit.mle2 <- brm(y, x, ~v.2, ~v.2, "RR", "MLE", ~v.2, TRUE) # same as fit.mle } \references{ @@ -130,9 +132,9 @@ Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimatio Eric J. Tchetgen Tchetgen, James M. Robins and Andrea Rotnitzky. "On doubly robust estimation in a semiparametric odds ratio model." Biometrika 97.1 (2010): 171-180. } \seealso{ -\code{getProbScalarRD}, \code{getProbRD} (vectorised), \code{getProbScalarRR} and \code{getProbRR} (vectorised) for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP); +\code{get_prob_rd}, and \code{get_prob_rr} for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP); -\code{predict.blm} for obtaining fitted probabilities from \code{brm} fits. +\code{predict.brm} for obtaining fitted probabilities from \code{brm} fits. } \author{ Linbo Wang , Mark Clements , Thomas Richardson diff --git a/man/getProbRR.Rd b/man/getProbRR.Rd deleted file mode 100644 index 838daee..0000000 --- a/man/getProbRR.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getProbScalarRR.R -\name{getProbRR} -\alias{getProbRR} -\title{Calculate risks from log RR and log OP (vectorised)} -\usage{ -getProbRR(logrr, logop = NA) -} -\arguments{ -\item{logrr}{log of relative risk} - -\item{logop}{log of odds product} -} -\value{ -a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns -} -\description{ -Calculate risks from log RR and log OP (vectorised) -} -\details{ -The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. -} -\examples{ -getProbRR(0,0) - -set.seed(0) -logrr = rnorm(10,0,1) -logop = rnorm(10,0,1) -probs = getProbRR(logrr, logop) -colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)") -probs - -} diff --git a/man/getProbScalarRD.Rd b/man/getProbScalarRD.Rd deleted file mode 100644 index d997cb6..0000000 --- a/man/getProbScalarRD.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getProbScalarRD.R -\name{getProbScalarRD} -\alias{getProbScalarRD} -\title{Calculate risks from arctanh RD and log OP} -\usage{ -getProbScalarRD(atanhrd, logop) -} -\arguments{ -\item{atanhrd}{arctanh of risk difference} - -\item{logop}{log of odds product} -} -\value{ -a vector \eqn{(P(y=1|x=0),P(y=1|x=1))} -} -\description{ -Calculate risks from arctanh RD and log OP -} -\details{ -The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. -The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. -} -\examples{ -getProbScalarRD(0,0) - -set.seed(0) -logrr = rnorm(10,0,1) -logop = rnorm(10,0,1) -probs = mapply(getProbScalarRD, logrr, logop) -rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)") -probs - -} diff --git a/man/getProbRD.Rd b/man/get_prob_rd.Rd similarity index 65% rename from man/getProbRD.Rd rename to man/get_prob_rd.Rd index 14e7292..fb193b8 100644 --- a/man/getProbRD.Rd +++ b/man/get_prob_rd.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getProbScalarRD.R -\name{getProbRD} -\alias{getProbRD} +% Please edit documentation in R/get_prob_rd.R +\name{get_prob_rd} +\alias{get_prob_rd} \title{Calculate risks from arctanh RD and log OP (vectorised)} \usage{ -getProbRD(atanhrd, logop) +get_prob_rd(atanhrd, logop) } \arguments{ \item{atanhrd}{arctanh of risk difference} @@ -18,17 +18,17 @@ a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns Calculate risks from arctanh RD and log OP (vectorised) } \details{ -The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. +The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. } \examples{ -getProbRD(0,0) +get_prob_rd(0, 0) set.seed(0) -logrr = rnorm(10,0,1) -logop = rnorm(10,0,1) -probs = getProbRD(logrr, logop) -colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)") +logrr <- rnorm(10, 0, 1) +logop <- rnorm(10, 0, 1) +probs <- get_prob_rd(logrr, logop) +colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)") probs } diff --git a/man/getProbScalarRR.Rd b/man/get_prob_rr.Rd similarity index 54% rename from man/getProbScalarRR.Rd rename to man/get_prob_rr.Rd index be6e84f..29f729a 100644 --- a/man/getProbScalarRR.Rd +++ b/man/get_prob_rr.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getProbScalarRR.R -\name{getProbScalarRR} -\alias{getProbScalarRR} +% Please edit documentation in R/get_prob_rr.R +\name{get_prob_rr} +\alias{get_prob_rr} \title{Calculate risks from log RR and log OP} \usage{ -getProbScalarRR(logrr, logop = NA) +get_prob_rr(logrr, logop = NA) } \arguments{ \item{logrr}{log of relative risk} @@ -12,7 +12,7 @@ getProbScalarRR(logrr, logop = NA) \item{logop}{log of odds product} } \value{ -a vector \eqn{(P(y=1|x=0),P(y=1|x=1))} +a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns } \description{ Calculate risks from log RR and log OP @@ -21,13 +21,13 @@ Calculate risks from log RR and log OP The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. } \examples{ -getProbScalarRR(0,0) +get_prob_rr(0, 0) set.seed(0) -logrr = rnorm(10,0,1) -logop = rnorm(10,0,1) -probs = mapply(getProbScalarRR, logrr, logop) -rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)") +logrr <- rnorm(10, 0, 1) +logop <- rnorm(10, 0, 1) +probs <- get_prob_rr(logrr, logop) +colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)") probs } diff --git a/man/predict.brm.Rd b/man/predict_brm.Rd similarity index 84% rename from man/predict.brm.Rd rename to man/predict_brm.Rd index a83cdaa..73a6ae1 100644 --- a/man/predict.brm.Rd +++ b/man/predict_brm.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/predict.brm.R -\name{predict.brm} -\alias{predict.brm} +% Please edit documentation in R/predict_brm.R +\name{predict_brm} +\alias{predict_brm} \title{Fitted probabilities from \code{brm} fits} \usage{ -\method{predict}{brm}(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...) +predict_brm(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...) } \arguments{ \item{object}{A fitted object from function \code{brm}.} diff --git a/man/print.brm.Rd b/man/print_brm.Rd similarity index 71% rename from man/print.brm.Rd rename to man/print_brm.Rd index 2430c01..6bd07b5 100644 --- a/man/print.brm.Rd +++ b/man/print_brm.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Print.R -\name{print.brm} -\alias{print.brm} +% Please edit documentation in R/print_brm.R +\name{print_brm} +\alias{print_brm} \title{Ancillary function for printing} \usage{ -\method{print}{brm}(x, ...) +print_brm(x, ...) } \arguments{ \item{x}{a list obtained with the function 'brm'} diff --git a/tests/testthat/test-brm.R b/tests/testthat/test-brm.R index 8c170c0..891c761 100644 --- a/tests/testthat/test-brm.R +++ b/tests/testthat/test-brm.R @@ -14,7 +14,7 @@ v.1 <- rep(1, n) # intercept term v.2 <- runif(n, -2, 2) v <- cbind(v.1, v.2) pscore.true <- exp(v %*% gamma.true) / (1 + exp(v %*% gamma.true)) -p0p1.true <- getProbRR(v %*% alpha.true, v %*% beta.true) +p0p1.true <- get_prob_rr(v %*% alpha.true, v %*% beta.true) x <- rbinom(n, 1, pscore.true) pA.true <- p0p1.true[, 1] pA.true[x == 1] <- p0p1.true[x == 1, 2] From 59c153b2c54a8f60685b34a10d098d15b6da6bfa Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Fri, 3 Oct 2025 12:25:33 -0400 Subject: [PATCH 09/17] move valid_check and print_brm into utils --- R/print_brm.R | 46 ----------------------- R/util.R | 97 ++++++++++++++++++++++++++++++++++++++++++++++++ R/valid_check.R | 48 ------------------------ man/print_brm.Rd | 2 +- 4 files changed, 98 insertions(+), 95 deletions(-) delete mode 100644 R/print_brm.R delete mode 100644 R/valid_check.R diff --git a/R/print_brm.R b/R/print_brm.R deleted file mode 100644 index 80eb466..0000000 --- a/R/print_brm.R +++ /dev/null @@ -1,46 +0,0 @@ -#' Ancillary function for printing -#' -#' @param x a list obtained with the function 'brm' -#' -#' @param ... additional arguments affecting the output -#' -#' @export - -print_brm <- function(x, ...) { - hid <- attr(x, "hidden") - nhid <- which(!names(x) %in% hid) - - if (x$param == "RR") { - cat("Parameter of interest: (conditional) relative risk;", "\n", "nuisance parameter: odds product.", - "\n\n", - sep = "" - ) - cat("Target model: log(RR) = alpha * va", "\n") - cat("Nuisance model: log(OP) = beta * vb", "\n\n") - } - if (x$param == "RD") { - cat("Parameter of interest: (conditional) risk difference;", "\n", - "nuisance parameter: odds product.", "\n\n", - sep = "" - ) - cat("Target model: log(RD) = alpha * va", "\n") - cat("Nuisance model: log(OP) = beta * vb", "\n\n") - } - if (x$param == "OR") { - cat("Parameter of interest: (conditional) odds ratio;", "\n", "nuisance parameter: baseline risk.", - "\n\n", - sep = "" - ) - cat("Target model: log(OR) = alpha * va", "\n") - cat("Nuisance model: log(p0) = beta * vb", "\n\n") - } - - - for (i in nhid) { - x[[i]] <- round(x[[i]], 3) - } - - print(x[nhid], 3) - - cat("See the element '$coefficients' for more information.\n") -} diff --git a/R/util.R b/R/util.R index b88b073..2e09c7e 100644 --- a/R/util.R +++ b/R/util.R @@ -72,3 +72,100 @@ get_prb_aux <- function(x) { ifelse(x < 0, 0, 1) ) } + + +valid_check <- function(param, y, x, va, vb, vc, weights, subset, est.method, + optimal, max.step, thres, alpha.start, beta.start) { + if (!is.character(param)) { + stop("Parameter must be a character") + } + if (!(param %in% c("RD", "RR", "OR"))) { + stop("Parameter can only take RR, RD or OR") + } + + if (sum(is.na(y)) + sum(is.na(x)) + sum(is.na(va)) + sum(is.na(vb)) + + sum(is.na(vc)) + sum(is.na(weights)) > 0) { + warning("Observations with missing values will be removed.") + } + if (!(all(y %in% c(0, 1)))) { + stop("y values must be either 0 or 1.") + } + if (!(all(x %in% c(0, 1)))) { + stop("x values must be either 0 or 1.") + } + if (!identical(length(y), length(x), dim(va)[1], dim(vb)[1], dim(vc)[1])) { + stop("y, x and v must have the same length (dimension)") + } + + if (!is.numeric(weights)) { + stop("weights must either be NULL or take numerical values") + } + if (!is.numeric(subset)) { + stop("subset must either be NULL or take numerical values") + } + if (!(est.method %in% c("MLE", "DR"))) { + stop("Must use MLE or DR for estimation") + } + if (!is.logical(optimal)) { + stop("optimal must be a logical variable") + } + if (!is.numeric(max.step) & !is.null(max.step)) { + stop("max.step must be a number") + } + if (!is.numeric(thres)) { + stop("thres must be a number") + } + if (!is.null(alpha.start) & length(alpha.start) != dim(va)[2]) { + stop("length of alpha.start must match the dimension of va") + } + if (!is.null(beta.start) & length(beta.start) != dim(vb)[2]) { + stop("length of beta.start must match the dimension of vb") + } +} + +#' Ancillary function for printing +#' +#' @param x a list obtained with the function 'brm' +#' +#' @param ... additional arguments affecting the output +#' +#' @export + +print_brm <- function(x, ...) { + hid <- attr(x, "hidden") + nhid <- which(!names(x) %in% hid) + + if (x$param == "RR") { + cat("Parameter of interest: (conditional) relative risk;", "\n", "nuisance parameter: odds product.", + "\n\n", + sep = "" + ) + cat("Target model: log(RR) = alpha * va", "\n") + cat("Nuisance model: log(OP) = beta * vb", "\n\n") + } + if (x$param == "RD") { + cat("Parameter of interest: (conditional) risk difference;", "\n", + "nuisance parameter: odds product.", "\n\n", + sep = "" + ) + cat("Target model: log(RD) = alpha * va", "\n") + cat("Nuisance model: log(OP) = beta * vb", "\n\n") + } + if (x$param == "OR") { + cat("Parameter of interest: (conditional) odds ratio;", "\n", "nuisance parameter: baseline risk.", + "\n\n", + sep = "" + ) + cat("Target model: log(OR) = alpha * va", "\n") + cat("Nuisance model: log(p0) = beta * vb", "\n\n") + } + + + for (i in nhid) { + x[[i]] <- round(x[[i]], 3) + } + + print(x[nhid], 3) + + cat("See the element '$coefficients' for more information.\n") +} diff --git a/R/valid_check.R b/R/valid_check.R deleted file mode 100644 index 6daed6d..0000000 --- a/R/valid_check.R +++ /dev/null @@ -1,48 +0,0 @@ -valid_check <- function(param, y, x, va, vb, vc, weights, subset, est.method, - optimal, max.step, thres, alpha.start, beta.start) { - if (!is.character(param)) { - stop("Parameter must be a character") - } - if (!(param %in% c("RD", "RR", "OR"))) { - stop("Parameter can only take RR, RD or OR") - } - - if (sum(is.na(y)) + sum(is.na(x)) + sum(is.na(va)) + sum(is.na(vb)) + - sum(is.na(vc)) + sum(is.na(weights)) > 0) { - warning("Observations with missing values will be removed.") - } - if (!(all(y %in% c(0, 1)))) { - stop("y values must be either 0 or 1.") - } - if (!(all(x %in% c(0, 1)))) { - stop("x values must be either 0 or 1.") - } - if (!identical(length(y), length(x), dim(va)[1], dim(vb)[1], dim(vc)[1])) { - stop("y, x and v must have the same length (dimension)") - } - - if (!is.numeric(weights)) { - stop("weights must either be NULL or take numerical values") - } - if (!is.numeric(subset)) { - stop("subset must either be NULL or take numerical values") - } - if (!(est.method %in% c("MLE", "DR"))) { - stop("Must use MLE or DR for estimation") - } - if (!is.logical(optimal)) { - stop("optimal must be a logical variable") - } - if (!is.numeric(max.step) & !is.null(max.step)) { - stop("max.step must be a number") - } - if (!is.numeric(thres)) { - stop("thres must be a number") - } - if (!is.null(alpha.start) & length(alpha.start) != dim(va)[2]) { - stop("length of alpha.start must match the dimension of va") - } - if (!is.null(beta.start) & length(beta.start) != dim(vb)[2]) { - stop("length of beta.start must match the dimension of vb") - } -} diff --git a/man/print_brm.Rd b/man/print_brm.Rd index 6bd07b5..6c04d12 100644 --- a/man/print_brm.Rd +++ b/man/print_brm.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/print_brm.R +% Please edit documentation in R/util.R \name{print_brm} \alias{print_brm} \title{Ancillary function for printing} From 7b0eabacff18a296fbf7661c00c6fd9f53bfb7a2 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Fri, 3 Oct 2025 13:38:24 -0400 Subject: [PATCH 10/17] replace dots with underscores in variable names --- R/dr_est.R | 32 +++++------ R/dr_point.R | 48 ++++++++-------- R/hessian_2_rd.R | 64 ++++++++++----------- R/hessian_2_rr.R | 66 +++++++++++----------- R/main.R | 114 +++++++++++++++++++------------------- R/mle_est.R | 37 ++++++------- R/mle_point.R | 26 ++++----- R/mle_var.R | 40 ++++++------- R/predict_brm.R | 48 ++++++++-------- R/util.R | 52 ++++++++--------- R/var_rd_dr.R | 94 +++++++++++++++---------------- R/var_rr_dr.R | 92 +++++++++++++++--------------- man/brm.Rd | 66 +++++++++++----------- man/predict_brm.Rd | 12 ++-- tests/testthat/test-brm.R | 36 ++++++------ 15 files changed, 413 insertions(+), 414 deletions(-) diff --git a/R/dr_est.R b/R/dr_est.R index 27d087a..cf6e149 100644 --- a/R/dr_est.R +++ b/R/dr_est.R @@ -1,22 +1,22 @@ dr_est <- function( - param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, - weights, max.step, thres, alpha.start, beta.cov, gamma.cov, message) { - dr.est <- dr_estimate_noiterate( - param, y, x, va, vb, vc, alpha.ml, beta.ml, - gamma, optimal, weights, max.step, thres, alpha.start, message + param, y, x, va, vb, vc, alpha_ml, beta_ml, gamma, optimal, + weights, max_step, thres, alpha_start, beta_cov, gamma_cov, message) { + dr_est <- dr_estimate_noiterate( + param, y, x, va, vb, vc, alpha_ml, beta_ml, + gamma, optimal, weights, max_step, thres, alpha_start, message ) - point.est <- dr.est$par - converged <- dr.est$convergence + point_est <- dr_est$par + converged <- dr_est$convergence if (param == "RR") { - alpha.cov <- var_rr_dr( - y, x, va, vb, vc, point.est, alpha.ml, beta.ml, + alpha_cov <- var_rr_dr( + y, x, va, vb, vc, point_est, alpha_ml, beta_ml, gamma, optimal, weights ) } if (param == "RD") { - alpha.cov <- var_rd_dr( - y, x, va, vb, vc, point.est, alpha.ml, beta.ml, + alpha_cov <- var_rd_dr( + y, x, va, vb, vc, point_est, alpha_ml, beta_ml, gamma, optimal, weights ) } @@ -28,12 +28,12 @@ dr_est <- function( c(rep("alpha", pa), rep("beta", pb), rep("gamma", pc)), c(1:pa, 1:pb, 1:pc) ) - point.est <- c(point.est, beta.ml, gamma) + point_est <- c(point_est, beta_ml, gamma) cov <- matrix(NA, pa + pb + pc, pa + pb + pc) - cov[1:pa, 1:pa] <- alpha.cov - cov[(pa + 1):(pa + pb), (pa + 1):(pa + pb)] <- beta.cov - cov[(pa + pb + 1):(pa + pb + pc), (pa + pb + 1):(pa + pb + pc)] <- gamma.cov + cov[1:pa, 1:pa] <- alpha_cov + cov[(pa + 1):(pa + pb), (pa + 1):(pa + pb)] <- beta_cov + cov[(pa + pb + 1):(pa + pb + pc), (pa + pb + 1):(pa + pb + pc)] <- gamma_cov - sol <- wrap_results(point.est, cov, param, name, va, vb, converged) + sol <- wrap_results(point_est, cov, param, name, va, vb, converged) return(sol) } diff --git a/R/dr_point.R b/R/dr_point.R index 3fa97e0..375e517 100644 --- a/R/dr_point.R +++ b/R/dr_point.R @@ -1,26 +1,26 @@ dr_estimate_onestep <- function( - param, y, x, va, vb, alpha.start, beta, pscore, - wt, weights, max.step, thres, message) { - startpars <- c(alpha.start) # pars only contain alpha + param, y, x, va, vb, alpha_start, beta, pscore, + wt, weights, max_step, thres, message) { + startpars <- c(alpha_start) # pars only contain alpha ## DR estimation equation^2 if (param == "RR") { - dr.objective <- function(pars) { + dr_objective <- function(pars) { p0 <- get_prob_rr(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1] - H.alpha <- as.vector(y * exp(-x * (va %*% pars))) - tmp <- mat_vec_mul(t(va), (wt * (x - pscore) * (H.alpha - p0) * weights)) + H_alpha <- as.vector(y * exp(-x * (va %*% pars))) + tmp <- mat_vec_mul(t(va), (wt * (x - pscore) * (H_alpha - p0) * weights)) return(sum(tmp^2)) } } if (param == "RD") { - dr.objective <- function(pars) { + dr_objective <- function(pars) { p0 <- get_prob_rd(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1] - H.alpha <- y - x * tanh(mat_vec_mul(va, pars)) - tmp <- mat_vec_mul(t((H.alpha - p0) * (x - pscore)), (va * wt * weights)) + H_alpha <- y - x * tanh(mat_vec_mul(va, pars)) + tmp <- mat_vec_mul(t((H_alpha - p0) * (x - pscore)), (va * wt * weights)) return(sum(tmp^2)) } } - opt <- stats::optim(startpars, dr.objective, control = list(reltol = thres)) + opt <- stats::optim(startpars, dr_objective, control = list(reltol = thres)) opt$convergence <- (opt$convergence == 0) # change cf. optim() return(opt) @@ -29,19 +29,19 @@ dr_estimate_onestep <- function( dr_estimate_noiterate <- function( - param, y, x, va, vb, vc, alpha.ml, beta.ml, - gamma, optimal, weights, max.step, thres, alpha.start, message) { + param, y, x, va, vb, vc, alpha_ml, beta_ml, + gamma, optimal, weights, max_step, thres, alpha_start, message) { pscore <- as.vector(expit(mat_vec_mul(vc, gamma))) if (optimal == TRUE) { if (param == "RR") { - p0 <- get_prob_rr(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))[, 1] - wt <- as.vector(1 / (1 - p0 + (1 - pscore) * (exp(mat_vec_mul(-va, alpha.ml)) - + p0 <- get_prob_rr(mat_vec_mul(va, alpha_ml), mat_vec_mul(vb, beta_ml))[, 1] + wt <- as.vector(1 / (1 - p0 + (1 - pscore) * (exp(mat_vec_mul(-va, alpha_ml)) - 1))) } if (param == "RD") { - p0 <- get_prob_rd(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))[, 1] - rho <- as.vector(tanh(mat_vec_mul(va, alpha.ml))) + p0 <- get_prob_rd(mat_vec_mul(va, alpha_ml), mat_vec_mul(vb, beta_ml))[, 1] + rho <- as.vector(tanh(mat_vec_mul(va, alpha_ml))) wt <- (1 - rho) * (1 + rho) / (p0 * (1 - p0) + rho * (1 - pscore) * (1 - 2 * p0 - rho)) } @@ -49,18 +49,18 @@ dr_estimate_noiterate <- function( wt <- rep(1, length(pscore)) } - if (is.null(alpha.start)) { - alpha.start <- alpha.ml + if (is.null(alpha_start)) { + alpha_start <- alpha_ml } - alpha.dr.opt <- dr_estimate_onestep( - param, y, x, va, vb, alpha.start, beta.ml, - pscore, wt, weights, max.step, thres, message + alpha_dr_opt <- dr_estimate_onestep( + param, y, x, va, vb, alpha_start, beta_ml, + pscore, wt, weights, max_step, thres, message ) # if(MESSAGE){ print(paste('DR One Step: ',' Alpha: - # ',paste(round(alpha.dr,5),collapse=', '),' Beta: - # ',paste(round(beta.ml,5),collapse=', '))) } + # ',paste(round(alpha_dr,5),collapse=', '),' Beta: + # ',paste(round(beta_ml,5),collapse=', '))) } - return(alpha.dr.opt) + return(alpha_dr_opt) } diff --git a/R/hessian_2_rd.R b/R/hessian_2_rd.R index fec319d..ba4cab3 100644 --- a/R/hessian_2_rd.R +++ b/R/hessian_2_rd.R @@ -1,9 +1,9 @@ -hessian_2_rd <- function(y, x, va, vb, alpha.ml, beta.ml, cnt) { +hessian_2_rd <- function(y, x, va, vb, alpha_ml, beta_ml, cnt) { # calculating the Hessian using the second derivative have to do so # because under mis-specification of models Hessian no longer equals the # square of the first order derivatives - p0p1 <- get_prob_rd(va %*% alpha.ml, vb %*% beta.ml) + p0p1 <- get_prob_rd(va %*% alpha_ml, vb %*% beta_ml) # p0p1 = cbind(p0, p1): n * 2 matrix p0 <- p0p1[, 1] p1 <- p0p1[, 2] @@ -14,56 +14,56 @@ hessian_2_rd <- function(y, x, va, vb, alpha.ml, beta.ml, cnt) { s1 <- p1 * (1 - p1) sA <- pA * (1 - pA) - rho <- as.vector(tanh(va %*% alpha.ml)) # estimated risk differences + rho <- as.vector(tanh(va %*% alpha_ml)) # estimated risk differences ### First order derivatives ### - dl.by.dpA <- (y - pA) / sA - dp0.by.dphi <- s0 * s1 / (s0 + s1) - dp0.by.drho <- -s0 / (s0 + s1) - drho.by.dalpha <- va * (1 - rho^2) - dphi.by.dbeta <- vb + dl_by_dpA <- (y - pA) / sA + dp0_by_dphi <- s0 * s1 / (s0 + s1) + dp0_by_drho <- -s0 / (s0 + s1) + drho_by_dalpha <- va * (1 - rho^2) + dphi_by_dbeta <- vb - dpA.by.drho <- dp0.by.drho + x - dpA.by.dalpha <- drho.by.dalpha * dpA.by.drho - dpA.by.dphi <- dp0.by.dphi - dpA.by.dbeta <- dphi.by.dbeta * dpA.by.dphi + dpA_by_drho <- dp0_by_drho + x + dpA_by_dalpha <- drho_by_dalpha * dpA_by_drho + dpA_by_dphi <- dp0_by_dphi + dpA_by_dbeta <- dphi_by_dbeta * dpA_by_dphi ### Second order derivatives ### - d2l.by.dpA.2 <- -(y - pA)^2 / sA^2 - d2pA.by.drho.2 <- s0 * s1 * (2 - 2 * p0 - 2 * p1) / (s0 + s1)^3 - d2pA.by.dphi.drho <- (s0 * (1 - 2 * p1) - s1 * (1 - 2 * p0)) * s0 * s1 / (s0 + + d2l_by_dpA_2 <- -(y - pA)^2 / sA^2 + d2pA_by_drho_2 <- s0 * s1 * (2 - 2 * p0 - 2 * p1) / (s0 + s1)^3 + d2pA_by_dphi_drho <- (s0 * (1 - 2 * p1) - s1 * (1 - 2 * p0)) * s0 * s1 / (s0 + s1)^3 - d2pA.by.dphi.2 <- (s0^2 * (1 - 2 * p1) + s1^2 * (1 - 2 * p0)) * s0 * s1 / (s0 + + d2pA_by_dphi_2 <- (s0^2 * (1 - 2 * p1) + s1^2 * (1 - 2 * p0)) * s0 * s1 / (s0 + s1)^3 - d2rho.by.dalpha.2 <- -2 * t(va * rho) %*% drho.by.dalpha + d2rho_by_dalpha_2 <- -2 * t(va * rho) %*% drho_by_dalpha ### Compute elements of the Hessian matrix ### - d2l.by.dalpha.2 <- t(dpA.by.dalpha * d2l.by.dpA.2 * cnt) %*% dpA.by.dalpha + - t(drho.by.dalpha * dl.by.dpA * d2pA.by.drho.2 * cnt) %*% drho.by.dalpha - - 2 * t(va * rho * dl.by.dpA * dpA.by.drho * cnt) %*% drho.by.dalpha + d2l_by_dalpha_2 <- t(dpA_by_dalpha * d2l_by_dpA_2 * cnt) %*% dpA_by_dalpha + + t(drho_by_dalpha * dl_by_dpA * d2pA_by_drho_2 * cnt) %*% drho_by_dalpha - + 2 * t(va * rho * dl_by_dpA * dpA_by_drho * cnt) %*% drho_by_dalpha - d2l.by.dalpha.dbeta <- t(dpA.by.dalpha * d2l.by.dpA.2 * cnt) %*% dpA.by.dbeta + - t(drho.by.dalpha * dl.by.dpA * d2pA.by.dphi.drho * cnt) %*% dphi.by.dbeta - d2l.by.dbeta.dalpha <- t(d2l.by.dalpha.dbeta) + d2l_by_dalpha_dbeta <- t(dpA_by_dalpha * d2l_by_dpA_2 * cnt) %*% dpA_by_dbeta + + t(drho_by_dalpha * dl_by_dpA * d2pA_by_dphi_drho * cnt) %*% dphi_by_dbeta + d2l_by_dbeta_dalpha <- t(d2l_by_dalpha_dbeta) - d2l.by.dbeta.2 <- t(dpA.by.dbeta * d2l.by.dpA.2 * cnt) %*% dpA.by.dbeta + - t(dphi.by.dbeta * dl.by.dpA * d2pA.by.dphi.2 * cnt) %*% dphi.by.dbeta + d2l_by_dbeta_2 <- t(dpA_by_dbeta * d2l_by_dpA_2 * cnt) %*% dpA_by_dbeta + + t(dphi_by_dbeta * dl_by_dpA * d2pA_by_dphi_2 * cnt) %*% dphi_by_dbeta - hessian <- -rbind(cbind(d2l.by.dalpha.2, d2l.by.dalpha.dbeta), cbind( - d2l.by.dbeta.dalpha, - d2l.by.dbeta.2 + hessian <- -rbind(cbind(d2l_by_dalpha_2, d2l_by_dalpha_dbeta), cbind( + d2l_by_dbeta_dalpha, + d2l_by_dbeta_2 )) ### NB Note the extra minus sign here return(list( hessian = hessian, p0 = p0, p1 = p1, pA = pA, s0 = s0, s1 = s1, - sA = sA, rho = rho, dl.by.dpA = dl.by.dpA, dp0.by.dphi = dp0.by.dphi, - dp0.by.drho = dp0.by.drho, drho.by.dalpha = drho.by.dalpha, dphi.by.dbeta = dphi.by.dbeta, - dpA.by.drho = dpA.by.drho, dpA.by.dalpha = dpA.by.dalpha, dpA.by.dphi = dpA.by.dphi, - dpA.by.dbeta = dpA.by.dbeta + sA = sA, rho = rho, dl_by_dpA = dl_by_dpA, dp0_by_dphi = dp0_by_dphi, + dp0_by_drho = dp0_by_drho, drho_by_dalpha = drho_by_dalpha, dphi_by_dbeta = dphi_by_dbeta, + dpA_by_drho = dpA_by_drho, dpA_by_dalpha = dpA_by_dalpha, dpA_by_dphi = dpA_by_dphi, + dpA_by_dbeta = dpA_by_dbeta )) } diff --git a/R/hessian_2_rr.R b/R/hessian_2_rr.R index cdfce4c..d605cfc 100644 --- a/R/hessian_2_rr.R +++ b/R/hessian_2_rr.R @@ -1,9 +1,9 @@ -hessian_2_rr <- function(y, x, va, vb, alpha.ml, beta.ml, weights) { +hessian_2_rr <- function(y, x, va, vb, alpha_ml, beta_ml, weights) { # calculating the Hessian using the second derivative have to do so # because under mis-specification of models Hessian no longer equals the # square of the first order derivatives - p0p1 <- get_prob_rr(va %*% alpha.ml, vb %*% beta.ml) + p0p1 <- get_prob_rr(va %*% alpha_ml, vb %*% beta_ml) # p0p1 = cbind(p0, p1): n * 2 matrix p0 <- p0p1[, 1] p1 <- p0p1[, 2] @@ -14,63 +14,63 @@ hessian_2_rr <- function(y, x, va, vb, alpha.ml, beta.ml, weights) { ### Building blocks - dpsi0.by.dtheta <- -(1 - p0) / (1 - p0 + 1 - p1) - dpsi0.by.dphi <- (1 - p0) * (1 - p1) / (1 - p0 + 1 - p1) + dpsi0_by_dtheta <- -(1 - p0) / (1 - p0 + 1 - p1) + dpsi0_by_dphi <- (1 - p0) * (1 - p1) / (1 - p0 + 1 - p1) - dtheta.by.dalpha <- va - dphi.by.dbeta <- vb + dtheta_by_dalpha <- va + dphi_by_dbeta <- vb - dl.by.dpsi0 <- (y - pA) / (1 - pA) - d2l.by.dpsi0.2 <- (y - 1) * pA / ((1 - pA)^2) + dl_by_dpsi0 <- (y - pA) / (1 - pA) + d2l_by_dpsi0_2 <- (y - 1) * pA / ((1 - pA)^2) - ###### d2l.by.dalpha.2 + ###### d2l_by_dalpha_2 - d2psi0.by.dtheta.2 <- ((p0 - p1) * dpsi0.by.dtheta - (1 - p0) * p1) / ((1 - + d2psi0_by_dtheta_2 <- ((p0 - p1) * dpsi0_by_dtheta - (1 - p0) * p1) / ((1 - p0 + 1 - p1)^2) - d2l.by.dtheta.2 <- d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x)^2 + dl.by.dpsi0 * - d2psi0.by.dtheta.2 + d2l_by_dtheta_2 <- d2l_by_dpsi0_2 * (dpsi0_by_dtheta + x)^2 + dl_by_dpsi0 * + d2psi0_by_dtheta_2 - d2l.by.dalpha.2 <- t(dtheta.by.dalpha * d2l.by.dtheta.2 * weights) %*% - dtheta.by.dalpha + d2l_by_dalpha_2 <- t(dtheta_by_dalpha * d2l_by_dtheta_2 * weights) %*% + dtheta_by_dalpha - ###### d2l.by.dalpha.dbeta + ###### d2l_by_dalpha_dbeta - d2psi0.by.dtheta.dphi <- (1 - p0) * (1 - p1) * (p0 - p1) / (1 - p0 + 1 - + d2psi0_by_dtheta_dphi <- (1 - p0) * (1 - p1) * (p0 - p1) / (1 - p0 + 1 - p1)^3 - d2l.by.dtheta.dphi <- d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x) * dpsi0.by.dphi + - dl.by.dpsi0 * d2psi0.by.dtheta.dphi + d2l_by_dtheta_dphi <- d2l_by_dpsi0_2 * (dpsi0_by_dtheta + x) * dpsi0_by_dphi + + dl_by_dpsi0 * d2psi0_by_dtheta_dphi - d2l.by.dalpha.dbeta <- t(dtheta.by.dalpha * d2l.by.dtheta.dphi * weights) %*% - dphi.by.dbeta - d2l.by.dbeta.dalpha <- t(d2l.by.dalpha.dbeta) - # d2l.by.dalpha.dbeta is symmetric itself if (because) va=vb + d2l_by_dalpha_dbeta <- t(dtheta_by_dalpha * d2l_by_dtheta_dphi * weights) %*% + dphi_by_dbeta + d2l_by_dbeta_dalpha <- t(d2l_by_dalpha_dbeta) + # d2l_by_dalpha_dbeta is symmetric itself if (because) va=vb - #### d2l.by.dbeta2 + #### d2l_by_dbeta2 - d2psi0.by.dphi.2 <- (-(p0 * (1 - p1)^2 + p1 * (1 - p0)^2) / (1 - p0 + 1 - - p1)^2) * dpsi0.by.dphi + d2psi0_by_dphi_2 <- (-(p0 * (1 - p1)^2 + p1 * (1 - p0)^2) / (1 - p0 + 1 - + p1)^2) * dpsi0_by_dphi - d2l.by.dphi.2 <- d2l.by.dpsi0.2 * (dpsi0.by.dphi)^2 + dl.by.dpsi0 * d2psi0.by.dphi.2 + d2l_by_dphi_2 <- d2l_by_dpsi0_2 * (dpsi0_by_dphi)^2 + dl_by_dpsi0 * d2psi0_by_dphi_2 - d2l.by.dbeta.2 <- t(dphi.by.dbeta * d2l.by.dphi.2 * weights) %*% dphi.by.dbeta + d2l_by_dbeta_2 <- t(dphi_by_dbeta * d2l_by_dphi_2 * weights) %*% dphi_by_dbeta - hessian <- -rbind(cbind(d2l.by.dalpha.2, d2l.by.dalpha.dbeta), cbind( - d2l.by.dbeta.dalpha, - d2l.by.dbeta.2 + hessian <- -rbind(cbind(d2l_by_dalpha_2, d2l_by_dalpha_dbeta), cbind( + d2l_by_dbeta_dalpha, + d2l_by_dbeta_2 )) ### NB Note the extra minus sign here return(list( - hessian = hessian, p0 = p0, p1 = p1, pA = pA, dpsi0.by.dtheta = dpsi0.by.dtheta, - dpsi0.by.dphi = dpsi0.by.dphi, dtheta.by.dalpha = dtheta.by.dalpha, - dphi.by.dbeta = dphi.by.dbeta, dl.by.dpsi0 = dl.by.dpsi0 + hessian = hessian, p0 = p0, p1 = p1, pA = pA, dpsi0_by_dtheta = dpsi0_by_dtheta, + dpsi0_by_dphi = dpsi0_by_dphi, dtheta_by_dalpha = dtheta_by_dalpha, + dphi_by_dbeta = dphi_by_dbeta, dl_by_dpsi0 = dl_by_dpsi0 )) } diff --git a/R/main.R b/R/main.R index c6d96b6..5873fe6 100644 --- a/R/main.R +++ b/R/main.R @@ -19,17 +19,17 @@ #' #' @param subset An optional vector specifying a subset of observations to be used in the fitting process. #' -#' @param est.method The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation). +#' @param est_method The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation). #' #' @param optimal Use the optimal weighting function for the doubly robust estimator? Ignored if the estimation method is 'MLE'. The default is TRUE. #' -#' @param max.step The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000. +#' @param max_step The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000. #' #' @param thres Threshold for judging convergence. The default is 1e-6. #' -#' @param alpha.start Starting values for the parameters in the target model. +#' @param alpha_start Starting values for the parameters in the target model. #' -#' @param beta.start Starting values for the parameters in the nuisance model. +#' @param beta_start Starting values for the parameters in the nuisance model. #' #' @param message Show optimization details? Ignored if the estimation method is 'MLE'. The default is FALSE. #' @@ -42,30 +42,30 @@ #' In each case the nuisance model is variation independent of the target model, which ensures that the predicted probabilities lie in \eqn{[0,1]}. #' See Richardson et al. (2016+) for more details. #' -#' If \code{est.method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details. +#' If \code{est_method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details. #' -#' When estimating RR and RD, \code{est.method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency. +#' When estimating RR and RD, \code{est_method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency. #' #' For the doubly robust estimation method, MLE is used to obtain preliminary estimates of \eqn{\alpha}, \eqn{\beta} and \eqn{\gamma}. The estimate of \eqn{\alpha} is then updated by solving a doubly-robust estimating equation. (The estimate for \eqn{\beta} remains the MLE.) #' #' @return A list consisting of #' \item{param}{the measure of association.} #' -#' \item{point.est}{ the point estimates.} +#' \item{point_est}{ the point estimates.} #' -#' \item{se.est}{the standard error estimates.} +#' \item{se_est}{the standard error estimates.} #' #' \item{cov}{estimate of the covariance matrix for the estimates.} #' -#' \item{conf.lower}{ the lower limit of the 95\% (marginal) confidence interval.} +#' \item{conf_lower}{ the lower limit of the 95\% (marginal) confidence interval.} #' -#' \item{conf.upper}{ the upper limit of the 95\% (marginal) confidence interval.} +#' \item{conf_upper}{ the upper limit of the 95\% (marginal) confidence interval.} #' -#' \item{p.value}{the two sided p-value for testing zero coefficients.} +#' \item{p_value}{the two sided p-value for testing zero coefficients.} #' #' \item{coefficients}{ the matrix summarizing key information: point estimate, 95\% confidence interval and p-value.} #' -#' \item{param.est}{the fitted RR/RD/OR.} +#' \item{param_est}{the fitted RR/RD/OR.} #' #' \item{va}{ the matrix of covariates for the target model.} #' @@ -81,44 +81,44 @@ #' #' @seealso \code{get_prob_rd}, and \code{get_prob_rr} for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP); #' -#' \code{predict.brm} for obtaining fitted probabilities from \code{brm} fits. +#' \code{predict_brm} for obtaining fitted probabilities from \code{brm} fits. #' #' @examples #' set.seed(0) #' n <- 100 -#' alpha.true <- c(0, -1) -#' beta.true <- c(-0.5, 1) -#' gamma.true <- c(0.1, -0.5) -#' params.true <- list( -#' alpha.true = alpha.true, beta.true = beta.true, -#' gamma.true = gamma.true +#' alpha_true <- c(0, -1) +#' beta_true <- c(-0.5, 1) +#' gamma_true <- c(0.1, -0.5) +#' params_true <- list( +#' alpha_true = alpha_true, beta_true = beta_true, +#' gamma_true = gamma_true #' ) #' v.1 <- rep(1, n) # intercept term #' v.2 <- runif(n, -2, 2) #' v <- cbind(v.1, v.2) -#' pscore.true <- exp(v %*% gamma.true) / (1 + exp(v %*% gamma.true)) -#' p0p1.true <- get_prob_rr(v %*% alpha.true, v %*% beta.true) -#' x <- rbinom(n, 1, pscore.true) -#' pA.true <- p0p1.true[, 1] -#' pA.true[x == 1] <- p0p1.true[x == 1, 2] -#' y <- rbinom(n, 1, pA.true) +#' pscore_true <- exp(v %*% gamma_true) / (1 + exp(v %*% gamma_true)) +#' p0p1.true <- get_prob_rr(v %*% alpha_true, v %*% beta_true) +#' x <- rbinom(n, 1, pscore_true) +#' pA_true <- p0p1.true[, 1] +#' pA_true[x == 1] <- p0p1.true[x == 1, 2] +#' y <- rbinom(n, 1, pA_true) #' -#' fit.mle <- brm(y, x, v, v, "RR", "MLE", v, TRUE) -#' fit.drw <- brm(y, x, v, v, "RR", "DR", v, TRUE) -#' fit.dru <- brm(y, x, v, v, "RR", "DR", v, FALSE) +#' fit_mle <- brm(y, x, v, v, "RR", "MLE", v, TRUE) +#' fit_drw <- brm(y, x, v, v, "RR", "DR", v, TRUE) +#' fit_dru <- brm(y, x, v, v, "RR", "DR", v, FALSE) #' -#' fit.mle2 <- brm(y, x, ~v.2, ~v.2, "RR", "MLE", ~v.2, TRUE) # same as fit.mle +#' fit_mle2 <- brm(y, x, ~v.2, ~v.2, "RR", "MLE", ~v.2, TRUE) # same as fit_mle #' #' @export brm <- function( - y, x, va, vb = NULL, param, est.method = "MLE", vc = NULL, - optimal = TRUE, weights = NULL, subset = NULL, max.step = NULL, thres = 1e-8, - alpha.start = NULL, beta.start = NULL, message = FALSE) { - # default param = 'RR'; est.method = 'MLE'; va = v; vb = v; vc = v; - # weights = NULL; subset = NULL; optimal = TRUE; max.step = NULL; - # thres = 1e-06; alpha.start = NULL; beta.start = NULL + y, x, va, vb = NULL, param, est_method = "MLE", vc = NULL, + optimal = TRUE, weights = NULL, subset = NULL, max_step = NULL, thres = 1e-8, + alpha_start = NULL, beta_start = NULL, message = FALSE) { + # default param = 'RR'; est_method = 'MLE'; va = v; vb = v; vc = v; + # weights = NULL; subset = NULL; optimal = TRUE; max_step = NULL; + # thres = 1e-06; alpha_start = NULL; beta_start = NULL if (is.null(vb)) { vb <- va @@ -143,8 +143,8 @@ brm <- function( } valid_check( - param, y, x, va, vb, vc, weights, subset, est.method, optimal, - max.step, thres, alpha.start, beta.start + param, y, x, va, vb, vc, weights, subset, est_method, optimal, + max_step, thres, alpha_start, beta_start ) data <- cbind(y, x, va, vb, vc, weights)[subset, ] @@ -158,40 +158,40 @@ brm <- function( pa <- dim(va)[2] pb <- dim(vb)[2] - if (is.null(max.step)) max.step <- min(pa * 20, 1000) + if (is.null(max_step)) max_step <- min(pa * 20, 1000) - if (est.method == "MLE") { + if (est_method == "MLE") { sol <- mle_est( - param, y, x, va, vb, weights, max.step, thres, alpha.start, - beta.start, pa, pb + param, y, x, va, vb, weights, max_step, thres, alpha_start, + beta_start, pa, pb ) } - if (est.method == "DR") { + if (est_method == "DR") { if (param == "OR") { cat("No doubly robust estimation methods for OR (with propensity score models) are available. Please refer to Tchetgen Tchetgen et al. (2010) for an alternative doubly robust estimation method. \n") return() } - if (is.null(alpha.start) | is.null(beta.start)) { + if (is.null(alpha_start) | is.null(beta_start)) { sol <- mle_est( - param, y, x, va, vb, weights, max.step, thres, - alpha.start, beta.start, pa, pb + param, y, x, va, vb, weights, max_step, thres, + alpha_start, beta_start, pa, pb ) - alpha.ml <- sol$point.est[1:pa] - beta.ml <- sol$point.est[(pa + 1):(pa + pb)] - beta.cov <- sol$cov[(pa + 1):(pa + pb), (pa + 1):(pa + pb)] - alpha.start <- alpha.ml + alpha_ml <- sol$point_est[1:pa] + beta_ml <- sol$point_est[(pa + 1):(pa + pb)] + beta_cov <- sol$cov[(pa + 1):(pa + pb), (pa + 1):(pa + pb)] + alpha_start <- alpha_ml } else { - alpha.ml <- alpha.start - beta.ml <- beta.start - beta.cov <- matrix(NA, pb, pb) + alpha_ml <- alpha_start + beta_ml <- beta_start + beta_cov <- matrix(NA, pb, pb) } - gamma.fit <- stats::glm(x ~ vc - 1, weight = weights, family = "binomial") - gamma <- gamma.fit$coefficients - gamma.cov <- summary(gamma.fit)$cov.unscaled + gamma_fit <- stats::glm(x ~ vc - 1, weight = weights, family = "binomial") + gamma <- gamma_fit$coefficients + gamma_cov <- summary(gamma_fit)$cov.unscaled sol <- dr_est( - param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, - weights, max.step, thres, alpha.start, beta.cov, gamma.cov, message + param, y, x, va, vb, vc, alpha_ml, beta_ml, gamma, optimal, + weights, max_step, thres, alpha_start, beta_cov, gamma_cov, message ) } diff --git a/R/mle_est.R b/R/mle_est.R index 016554a..a59a603 100644 --- a/R/mle_est.R +++ b/R/mle_est.R @@ -1,23 +1,23 @@ mle_est <- function( - param, y, x, va, vb, weights, max.step, thres, alpha.start, - beta.start, pa, pb) { + param, y, x, va, vb, weights, max_step, thres, alpha_start, + beta_start, pa, pb) { ## starting values for parameter optimization - if (is.null(alpha.start)) { - alpha.start <- rep(0, pa) + if (is.null(alpha_start)) { + alpha_start <- rep(0, pa) } - if (is.null(beta.start)) { - beta.start <- rep(0, pb) + if (is.null(beta_start)) { + beta_start <- rep(0, pb) } if (param == "OR") { fit <- stats::glm(y ~ vb - 1 + x * va - va - x, family = "binomial", - weights = weights, start = c(beta.start, alpha.start) + weights = weights, start = c(beta_start, alpha_start) ) - point.temp <- summary(fit)$coefficients[, 1] + point_temp <- summary(fit)$coefficients[, 1] index <- c((pb + 1):(pa + pb), 1:pb) - point.est <- point.temp[index] + point_est <- point_temp[index] cov <- stats::vcov(fit)[index, index] @@ -25,26 +25,25 @@ mle_est <- function( } else { ### point estimate mle <- mle_point( - param, y, x, va, vb, alpha.start, beta.start, - weights, max.step, thres, pa, pb + param, y, x, va, vb, alpha_start, beta_start, + weights, max_step, thres, pa, pb ) - point.est <- mle$par + point_est <- mle$par converged <- mle$convergence - # print(point.est) - alpha.ml <- point.est[1:pa] - beta.ml <- point.est[(pa + 1):(pa + pb)] + # print(point_est) + alpha_ml <- point_est[1:pa] + beta_ml <- point_est[(pa + 1):(pa + pb)] ### Computing Fisher Information: if (param == "RR") { - cov <- mle_var_rr(x, alpha.ml, beta.ml, va, vb, weights) + cov <- mle_var_rr(x, alpha_ml, beta_ml, va, vb, weights) } if (param == "RD") { - cov <- mle_var_rd(x, alpha.ml, beta.ml, va, vb, weights) + cov <- mle_var_rd(x, alpha_ml, beta_ml, va, vb, weights) } - sd.est <- sqrt(diag(cov)) } name <- paste(c(rep("alpha", pa), rep("beta", pb)), c(1:pa, 1:pb)) - sol <- wrap_results(point.est, cov, param, name, va, vb, converged) + sol <- wrap_results(point_est, cov, param, name, va, vb, converged) return(sol) } diff --git a/R/mle_point.R b/R/mle_point.R index 0f6708d..6847bae 100644 --- a/R/mle_point.R +++ b/R/mle_point.R @@ -1,11 +1,11 @@ -mle_point <- function(param, y, x, va, vb, alpha.start, beta.start, weights, - max.step, thres, pa, pb) { - startpars <- c(alpha.start, beta.start) +mle_point <- function(param, y, x, va, vb, alpha_start, beta_start, weights, + max_step, thres, pa, pb) { + startpars <- c(alpha_start, beta_start) getProb <- if (param == "RR") get_prob_rr else get_prob_rd ## negative log likelihood function - neg.log.likelihood <- function(pars) { + neg_log_likelihood <- function(pars) { alpha <- pars[1:pa] beta <- pars[(pa + 1):(pa + pb)] p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta)) @@ -18,7 +18,7 @@ mle_point <- function(param, y, x, va, vb, alpha.start, beta.start, weights, 1]) * weights[x == 1])) } - neg.log.likelihood.alpha <- function(alpha) { + neg_log_likelihood_alpha <- function(alpha) { p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta)) p0 <- p0p1[, 1] p1 <- p0p1[, 2] @@ -29,7 +29,7 @@ mle_point <- function(param, y, x, va, vb, alpha.start, beta.start, weights, (y[x == 1]) * log(p1[x == 1]) * weights[x == 1])) } - neg.log.likelihood.beta <- function(beta) { + neg_log_likelihood_beta <- function(beta) { p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta)) p0 <- p0p1[, 1] p1 <- p0p1[, 2] @@ -44,23 +44,23 @@ mle_point <- function(param, y, x, va, vb, alpha.start, beta.start, weights, ## Optimization Diff <- function(x, y) sum((x - y)^2) / sum(x^2 + thres) - alpha <- alpha.start - beta <- beta.start + alpha <- alpha_start + beta <- beta_start diff <- thres + 1 step <- 0 - while (diff > thres & step < max.step) { + while (diff > thres & step < max_step) { step <- step + 1 - opt1 <- stats::optim(alpha, neg.log.likelihood.alpha, control = list(maxit = max(100, max.step / 10))) + opt1 <- stats::optim(alpha, neg_log_likelihood_alpha, control = list(maxit = max(100, max_step / 10))) diff1 <- Diff(opt1$par, alpha) alpha <- opt1$par - opt2 <- stats::optim(beta, neg.log.likelihood.beta, control = list(maxit = max(100, max.step / 10))) + opt2 <- stats::optim(beta, neg_log_likelihood_beta, control = list(maxit = max(100, max_step / 10))) diff <- max(diff1, Diff(opt2$par, beta)) beta <- opt2$par } opt <- list( - par = c(alpha, beta), convergence = (step < max.step), - value = neg.log.likelihood(c(alpha, beta)), step = step + par = c(alpha, beta), convergence = (step < max_step), + value = neg_log_likelihood(c(alpha, beta)), step = step ) return(opt) diff --git a/R/mle_var.R b/R/mle_var.R index ecc37cb..d60aa4a 100644 --- a/R/mle_var.R +++ b/R/mle_var.R @@ -1,20 +1,20 @@ ### variance calculation -mle_var_rr <- function(x, alpha.ml, beta.ml, va, vb, weights) { - p0p1 <- get_prob_rr(va %*% alpha.ml, vb %*% beta.ml) +mle_var_rr <- function(x, alpha_ml, beta_ml, va, vb, weights) { + p0p1 <- get_prob_rr(va %*% alpha_ml, vb %*% beta_ml) n <- dim(va)[1] pA <- rep(NA, n) # P(Y=1|A,V); here A = X pA[x == 0] <- p0p1[x == 0, 1] pA[x == 1] <- p0p1[x == 1, 2] - expect.dl.by.dpsi0.squared <- (pA) / (1 - pA) - dpsi0.by.dphi <- (1 - p0p1[, 1]) * (1 - p0p1[, 2]) / ((1 - p0p1[, 1]) + (1 - + expect_dl_by_dpsi0_squared <- (pA) / (1 - pA) + dpsi0_by_dphi <- (1 - p0p1[, 1]) * (1 - p0p1[, 2]) / ((1 - p0p1[, 1]) + (1 - p0p1[, 2])) - dpsi0.by.dtheta <- -(1 - p0p1[, 1]) / ((1 - p0p1[, 1]) + (1 - p0p1[, 2])) - tmp <- cbind((dpsi0.by.dtheta + x) * va, dpsi0.by.dphi * vb) - ## since dtheta.by.dalpha = va, and dphi.by.dbeta = vb - fisher.info <- (t(expect.dl.by.dpsi0.squared * weights * tmp) %*% tmp) - return(solve(fisher.info)) + dpsi0_by_dtheta <- -(1 - p0p1[, 1]) / ((1 - p0p1[, 1]) + (1 - p0p1[, 2])) + tmp <- cbind((dpsi0_by_dtheta + x) * va, dpsi0_by_dphi * vb) + ## since dtheta_by_dalpha = va, and dphi_by_dbeta = vb + fisher_info <- (t(expect_dl_by_dpsi0_squared * weights * tmp) %*% tmp) + return(solve(fisher_info)) } @@ -22,8 +22,8 @@ mle_var_rr <- function(x, alpha.ml, beta.ml, va, vb, weights) { ### variance calculation -mle_var_rd <- function(x, alpha.ml, beta.ml, va, vb, weights) { - p0p1 <- get_prob_rd(va %*% alpha.ml, vb %*% beta.ml) +mle_var_rd <- function(x, alpha_ml, beta_ml, va, vb, weights) { + p0p1 <- get_prob_rd(va %*% alpha_ml, vb %*% beta_ml) # p0p1 = cbind(p0, p1): n * 2 matrix p0 <- p0p1[, 1] p1 <- p0p1[, 2] @@ -34,15 +34,15 @@ mle_var_rd <- function(x, alpha.ml, beta.ml, va, vb, weights) { s1 <- p1 * (1 - p1) sA <- pA * (1 - pA) - rho <- as.vector(tanh(va %*% alpha.ml)) # estimated risk differences + rho <- as_vector(tanh(va %*% alpha_ml)) # estimated risk differences - expect.dl.by.dpA.squared <- 1 / sA - dp0.by.dphi <- s0 * s1 / (s0 + s1) - dp0.by.drho <- -s0 / (s0 + s1) - drho.by.dalpha <- (1 - rho^2) * va - dphi.by.dbeta <- vb + expect_dl_by_dpA_squared <- 1 / sA + dp0_by_dphi <- s0 * s1 / (s0 + s1) + dp0_by_drho <- -s0 / (s0 + s1) + drho_by_dalpha <- (1 - rho^2) * va + dphi_by_dbeta <- vb - tmp <- cbind((dp0.by.drho + x) * drho.by.dalpha, dp0.by.dphi * dphi.by.dbeta) - fisher.info <- (t(expect.dl.by.dpA.squared * weights * tmp) %*% tmp) - return(solve(fisher.info)) + tmp <- cbind((dp0_by_drho + x) * drho_by_dalpha, dp0_by_dphi * dphi_by_dbeta) + fisher_info <- (t(expect_dl_by_dpA_squared * weights * tmp) %*% tmp) + return(solve(fisher_info)) } diff --git a/R/predict_brm.R b/R/predict_brm.R index 6b2afa8..2773dd3 100644 --- a/R/predict_brm.R +++ b/R/predict_brm.R @@ -4,60 +4,60 @@ #' #' @param object A fitted object from function \code{brm}. #' -#' @param va.new An optional covariate matrix to make predictions with. If omitted, the original matrix va is used. +#' @param va_new An optional covariate matrix to make predictions with. If omitted, the original matrix va is used. #' -#' @param vb.new An optional covariate matrix to make predictions with. If vb.new is omitted but va.new is not, then vb.new is set to be equal to va.new. If both vb.new and va.new are omitted, then the original matrix vb is used. +#' @param vb_new An optional covariate matrix to make predictions with. If vb_new is omitted but va_new is not, then vb_new is set to be equal to va_new. If both vb_new and va_new are omitted, then the original matrix vb is used. #' -#' @param x.new An optional vector of x. +#' @param x_new An optional vector of x. #' #' @param ... affecting the predictions produced. #' -#' @return If x.new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb). +#' @return If x_new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb). #' -#' If x.new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x.new,va,vb). +#' If x_new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x_new,va,vb). #' #' @export -predict_brm <- function(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...) { +predict_brm <- function(object, x_new = NULL, va_new = NULL, vb_new = NULL, ...) { va <- object$va vb <- object$vb - if (is.null(vb.new)) { - if (is.null(va.new)) { - vb.new <- vb + if (is.null(vb_new)) { + if (is.null(va_new)) { + vb_new <- vb } else { - vb.new <- va.new + vb_new <- va_new } } - if (is.null(va.new)) va.new <- va + if (is.null(va_new)) va_new <- va - n <- nrow(va.new) - pa <- ncol(va.new) - pb <- ncol(vb.new) - alpha.est <- object$point.est[1:pa] - beta.est <- object$point.est[(pa + 1):(pa + pb)] + n <- nrow(va_new) + pa <- ncol(va_new) + pb <- ncol(vb_new) + alpha_est <- object$point_est[1:pa] + beta_est <- object$point_est[(pa + 1):(pa + pb)] - linear.predictors <- cbind(va.new %*% alpha.est, vb.new %*% beta.est) + linear_predictors <- cbind(va_new %*% alpha_est, vb_new %*% beta_est) if (object$param == "RR") { - p0p1 <- get_prob_rr(linear.predictors) + p0p1 <- get_prob_rr(linear_predictors) } if (object$param == "RD") { - p0p1 <- get_prob_rd(linear.predictors) + p0p1 <- get_prob_rd(linear_predictors) } if (object$param == "OR") { - p0 <- expit(linear.predictors[, 2]) - or <- exp(linear.predictors[, 1]) + p0 <- expit(linear_predictors[, 2]) + or <- exp(linear_predictors[, 1]) odds1 <- or * (p0 / (1 - p0)) p1 <- odds1 / (1 + odds1) p0p1 <- cbind(p0, p1) } colnames(p0p1) <- c("p0", "p1") - if (!is.null(x.new)) { + if (!is.null(x_new)) { px <- rep(NA, n) - px[x.new == 0] <- p0p1[x.new == 0, 1] - px[x.new == 1] <- p0p1[x.new == 1, 2] + px[x_new == 0] <- p0p1[x_new == 0, 1] + px[x_new == 1] <- p0p1[x_new == 1, 2] return(px) } else { return(p0p1) diff --git a/R/util.R b/R/util.R index 2e09c7e..f870042 100644 --- a/R/util.R +++ b/R/util.R @@ -27,33 +27,33 @@ same <- function(x, y, tolerance = .Machine$double.eps^0.5) { ## Functions for wrapping estimation results into a nice format -wrap_results <- function(point.est, cov, param, name, va, vb, converged) { - se.est <- sqrt(diag(cov)) +wrap_results <- function(point_est, cov, param, name, va, vb, converged) { + se_est <- sqrt(diag(cov)) - conf.lower <- point.est + stats::qnorm(0.025) * se.est - conf.upper <- point.est + stats::qnorm(0.975) * se.est - p.temp <- stats::pnorm(point.est / se.est, 0, 1) - p.value <- 2 * pmin(p.temp, 1 - p.temp) + conf_lower <- point_est + stats::qnorm(0.025) * se_est + conf_upper <- point_est + stats::qnorm(0.975) * se_est + p_temp <- stats::pnorm(point_est / se_est, 0, 1) + p_value <- 2 * pmin(p_temp, 1 - p_temp) - names(point.est) <- names(se.est) <- rownames(cov) <- colnames(cov) <- names(conf.lower) <- names(conf.upper) <- names(p.value) <- name + names(point_est) <- names(se_est) <- rownames(cov) <- colnames(cov) <- names(conf_lower) <- names(conf_upper) <- names(p_value) <- name - coefficients <- cbind(point.est, se.est, conf.lower, conf.upper, p.value) + coefficients <- cbind(point_est, se_est, conf_lower, conf_upper, p_value) - linear.predictors <- va %*% point.est[1:ncol(va)] - if (param == "RR") param.est <- exp(linear.predictors) - if (param == "RD") param.est <- linear.predictors - if (param == "OR") param.est <- expit(linear.predictors) + linear_predictors <- va %*% point_est[1:ncol(va)] + if (param == "RR") param_est <- exp(linear_predictors) + if (param == "RD") param_est <- linear_predictors + if (param == "OR") param_est <- expit(linear_predictors) sol <- list( - param = param, point.est = point.est, se.est = se.est, cov = cov, - conf.lower = conf.lower, conf.upper = conf.upper, p.value = p.value, - coefficients = coefficients, param.est = param.est, va = va, vb = vb, + param = param, point_est = point_est, se_est = se_est, cov = cov, + conf_lower = conf_lower, conf_upper = conf_upper, p_value = p_value, + coefficients = coefficients, param_est = param_est, va = va, vb = vb, converged = converged ) class(sol) <- c("brm", "list") attr(sol, "hidden") <- c( - "param", "se.est", "cov", "conf.lower", "conf.upper", - "p.value", "coefficients", "param.est", "va", "vb", "converged" + "param", "se_est", "cov", "conf_lower", "conf_upper", + "p_value", "coefficients", "param_est", "va", "vb", "converged" ) return(sol) @@ -74,8 +74,8 @@ get_prb_aux <- function(x) { } -valid_check <- function(param, y, x, va, vb, vc, weights, subset, est.method, - optimal, max.step, thres, alpha.start, beta.start) { +valid_check <- function(param, y, x, va, vb, vc, weights, subset, est_method, + optimal, max_step, thres, alpha_start, beta_start) { if (!is.character(param)) { stop("Parameter must be a character") } @@ -103,23 +103,23 @@ valid_check <- function(param, y, x, va, vb, vc, weights, subset, est.method, if (!is.numeric(subset)) { stop("subset must either be NULL or take numerical values") } - if (!(est.method %in% c("MLE", "DR"))) { + if (!(est_method %in% c("MLE", "DR"))) { stop("Must use MLE or DR for estimation") } if (!is.logical(optimal)) { stop("optimal must be a logical variable") } - if (!is.numeric(max.step) & !is.null(max.step)) { - stop("max.step must be a number") + if (!is.numeric(max_step) & !is.null(max_step)) { + stop("max_step must be a number") } if (!is.numeric(thres)) { stop("thres must be a number") } - if (!is.null(alpha.start) & length(alpha.start) != dim(va)[2]) { - stop("length of alpha.start must match the dimension of va") + if (!is.null(alpha_start) & length(alpha_start) != dim(va)[2]) { + stop("length of alpha_start must match the dimension of va") } - if (!is.null(beta.start) & length(beta.start) != dim(vb)[2]) { - stop("length of beta.start must match the dimension of vb") + if (!is.null(beta_start) & length(beta_start) != dim(vb)[2]) { + stop("length of beta_start must match the dimension of vb") } } diff --git a/R/var_rd_dr.R b/R/var_rd_dr.R index 2c8b3c2..9403a3f 100644 --- a/R/var_rd_dr.R +++ b/R/var_rd_dr.R @@ -1,17 +1,17 @@ ## Sandwich estimator for variance of RD var_rd_dr <- function( - y, x, va, vb, vc, alpha.dr, alpha.ml, beta.ml, gamma, + y, x, va, vb, vc, alpha_dr, alpha_ml, beta_ml, gamma, optimal, weights) { ######################################## pscore <- as.vector(expit(vc %*% gamma)) n <- length(pscore) - ### 1. - E[dS/d(alpha.ml,beta.ml)] ############################## Computing + ### 1. - E[dS/d(alpha_ml,beta_ml)] ############################## Computing ### the Hessian: - Hrd <- hessian_2_rd(y, x, va, vb, alpha.ml, beta.ml, weights) + Hrd <- hessian_2_rd(y, x, va, vb, alpha_ml, beta_ml, weights) hessian <- Hrd$hessian p0 <- Hrd$p0 p1 <- Hrd$p1 @@ -20,20 +20,20 @@ var_rd_dr <- function( s1 <- Hrd$s1 sA <- Hrd$sA rho <- Hrd$rho - dl.by.dpA <- Hrd$dl.by.dpA - dp0.by.dphi <- Hrd$dp0.by.dphi - dp0.by.drho <- Hrd$dp0.by.drho - drho.by.dalpha <- Hrd$drho.by.dalpha - dphi.by.dbeta <- Hrd$dphi.by.dbeta - dpA.by.drho <- Hrd$dpA.by.drho - dpA.by.dalpha <- Hrd$dpA.by.dalpha - dpA.by.dphi <- Hrd$dpA.by.dphi - dpA.by.dbeta <- Hrd$dpA.by.dbeta + dl_by_dpA <- Hrd$dl_by_dpA + dp0_by_dphi <- Hrd$dp0_by_dphi + dp0_by_drho <- Hrd$dp0_by_drho + drho_by_dalpha <- Hrd$drho_by_dalpha + dphi_by_dbeta <- Hrd$dphi_by_dbeta + dpA_by_drho <- Hrd$dpA_by_drho + dpA_by_dalpha <- Hrd$dpA_by_dalpha + dpA_by_dphi <- Hrd$dpA_by_dphi + dpA_by_dbeta <- Hrd$dpA_by_dbeta ############# extra building blocks ########################## - H.alpha <- y - x * as.vector(tanh(va %*% alpha.dr)) + H_alpha <- y - x * as.vector(tanh(va %*% alpha_dr)) ############# Calculation of optimal vector (used in several places below) ## @@ -44,80 +44,80 @@ var_rd_dr <- function( } - ### 2. -E[dU.by.dalphaml.betaml] #################################### + ### 2. -E[dU_by_dalphaml_betaml] #################################### - dU.by.dp0 <- -va * wt * (x - pscore) # n by 2 - dp0.by.dalpha.beta <- cbind(drho.by.dalpha * dp0.by.drho, dphi.by.dbeta * - dp0.by.dphi) # n by 4 + dU_by_dp0 <- -va * wt * (x - pscore) # n by 2 + dp0_by_dalpha_beta <- cbind(drho_by_dalpha * dp0_by_drho, dphi_by_dbeta * + dp0_by_dphi) # n by 4 - dU.by.dwt <- va * (x - pscore) * (H.alpha - p0) # n by 2 + dU_by_dwt <- va * (x - pscore) * (H_alpha - p0) # n by 2 if (optimal == TRUE) { esA <- pscore * s0 + (1 - pscore) * s1 # E[s_{1-A}]... - dwt.by.drho <- (-2 * rho * esA - (1 - rho^2) * (1 - pscore) * (1 - + dwt_by_drho <- (-2 * rho * esA - (1 - rho^2) * (1 - pscore) * (1 - 2 * p1)) / esA^2 - dwt.by.dp0 <- -(1 - rho^2) * (2 * pscore * rho + 1 - 2 * p1) / esA^2 + dwt_by_dp0 <- -(1 - rho^2) * (2 * pscore * rho + 1 - 2 * p1) / esA^2 - dwt.by.dalpha <- drho.by.dalpha * (dwt.by.drho + dwt.by.dp0 * dp0.by.drho) - dwt.by.dbeta <- dphi.by.dbeta * dwt.by.dp0 * dp0.by.dphi + dwt_by_dalpha <- drho_by_dalpha * (dwt_by_drho + dwt_by_dp0 * dp0_by_drho) + dwt_by_dbeta <- dphi_by_dbeta * dwt_by_dp0 * dp0_by_dphi - dwt.by.dalpha.beta <- cbind(dwt.by.dalpha, dwt.by.dbeta) # n by 4 + dwt_by_dalpha_beta <- cbind(dwt_by_dalpha, dwt_by_dbeta) # n by 4 } else { - dwt.by.dalpha.beta <- matrix(0, n, ncol(va) + ncol(vb)) + dwt_by_dalpha_beta <- matrix(0, n, ncol(va) + ncol(vb)) } - dU.by.dalpha.ml.beta.ml <- t(dU.by.dp0 * weights) %*% (dp0.by.dalpha.beta) + - t(dU.by.dwt * weights) %*% dwt.by.dalpha.beta + dU_by_dalpha_ml_beta_ml <- t(dU_by_dp0 * weights) %*% (dp0_by_dalpha_beta) + + t(dU_by_dwt * weights) %*% dwt_by_dalpha_beta - ### 3. tau = -E[dU/dalpha.dr] ######################################## (This + ### 3. tau = -E[dU/dalpha_dr] ######################################## (This ### is the bread of the sandwich estimate) - dU.by.dH <- va * wt * (x - pscore) # n by 2 - rho.dr <- as.vector(tanh(va %*% alpha.dr)) - dH.by.dalpha.dr <- -va * x * (1 - rho.dr^2) + dU_by_dH <- va * wt * (x - pscore) # n by 2 + rho_dr <- as.vector(tanh(va %*% alpha_dr)) + dH_by_dalpha_dr <- -va * x * (1 - rho_dr^2) - tau <- -t(dU.by.dH * weights) %*% dH.by.dalpha.dr / sum(weights) # 2 by 2 + tau <- -t(dU_by_dH * weights) %*% dH_by_dalpha_dr / sum(weights) # 2 by 2 ### 4. E[d(prop score score equation)/dgamma] - dpscore.by.dgamma <- vc * pscore * (1 - pscore) # n by 2 - part4 <- -t(vc * weights) %*% dpscore.by.dgamma # 2 by 2 + dpscore_by_dgamma <- vc * pscore * (1 - pscore) # n by 2 + part4 <- -t(vc * weights) %*% dpscore_by_dgamma # 2 by 2 ### 5. E[dU/dgamma] - dU.by.dpscore <- -va * wt * (H.alpha - p0) # n by 2 + dU_by_dpscore <- -va * wt * (H_alpha - p0) # n by 2 if (optimal == TRUE) { - dwt.by.dpscore <- -(1 - rho^2) * (s0 - s1) / esA^2 - dwt.by.dgamma <- dpscore.by.dgamma * dwt.by.dpscore # n by 2 + dwt_by_dpscore <- -(1 - rho^2) * (s0 - s1) / esA^2 + dwt_by_dgamma <- dpscore_by_dgamma * dwt_by_dpscore # n by 2 } else { - dwt.by.dgamma <- matrix(0, n, ncol(vc)) + dwt_by_dgamma <- matrix(0, n, ncol(vc)) } - dU.by.dgamma <- t(dU.by.dpscore * weights) %*% dpscore.by.dgamma + t(dU.by.dwt * - weights) %*% dwt.by.dgamma # 2 by 2 + dU_by_dgamma <- t(dU_by_dpscore * weights) %*% dpscore_by_dgamma + t(dU_by_dwt * + weights) %*% dwt_by_dgamma # 2 by 2 ############################################################################# Assembling semi-parametric variance matrix - U <- va * wt * (x - pscore) * (H.alpha - p0) # n by 2 + U <- va * wt * (x - pscore) * (H_alpha - p0) # n by 2 - S <- cbind(dpA.by.dalpha, dpA.by.dbeta) * dl.by.dpA + S <- cbind(dpA_by_dalpha, dpA_by_dbeta) * dl_by_dpA - pscore.score <- vc * (x - pscore) + pscore_score <- vc * (x - pscore) - Utilde <- U - t(dU.by.dalpha.ml.beta.ml %*% (-solve(hessian)) %*% t(S)) - - t(dU.by.dgamma %*% (solve(part4)) %*% t(pscore.score)) # n by 2 + Utilde <- U - t(dU_by_dalpha_ml_beta_ml %*% (-solve(hessian)) %*% t(S)) - + t(dU_by_dgamma %*% (solve(part4)) %*% t(pscore_score)) # n by 2 USigma <- t(Utilde * weights) %*% Utilde / sum(weights) - ################################### Asymptotic var matrix for alpha.dr + ################################### Asymptotic var matrix for alpha_dr - alpha.dr.variance <- solve(tau) %*% USigma %*% solve(tau) / sum(weights) - return(alpha.dr.variance) + alpha_dr_variance <- solve(tau) %*% USigma %*% solve(tau) / sum(weights) + return(alpha_dr_variance) } diff --git a/R/var_rr_dr.R b/R/var_rr_dr.R index 6558119..7c6a3cd 100644 --- a/R/var_rr_dr.R +++ b/R/var_rr_dr.R @@ -1,114 +1,114 @@ ## Sandwich estimator for variance of RR var_rr_dr <- function( - y, x, va, vb, vc, alpha.dr, alpha.ml, beta.ml, gamma, + y, x, va, vb, vc, alpha_dr, alpha_ml, beta_ml, gamma, optimal, weights) { ######################################## pscore <- as.vector(expit(vc %*% gamma)) n <- length(pscore) - ### 1. - E[dS/d(alpha.ml,beta.ml)] ############################## Computing + ### 1. - E[dS/d(alpha_ml,beta_ml)] ############################## Computing ### the Hessian: - Hrr <- hessian_2_rr(y, x, va, vb, alpha.ml, beta.ml, weights) + Hrr <- hessian_2_rr(y, x, va, vb, alpha_ml, beta_ml, weights) hessian <- Hrr$hessian p0 <- Hrr$p0 p1 <- Hrr$p1 pA <- Hrr$pA - dpsi0.by.dtheta <- Hrr$dpsi0.by.dtheta - dpsi0.by.dphi <- Hrr$dpsi0.by.dphi - dtheta.by.dalpha <- Hrr$dtheta.by.dalpha - dphi.by.dbeta <- Hrr$dphi.by.dbeta - dl.by.dpsi0 <- Hrr$dl.by.dpsi0 + dpsi0_by_dtheta <- Hrr$dpsi0_by_dtheta + dpsi0_by_dphi <- Hrr$dpsi0_by_dphi + dtheta_by_dalpha <- Hrr$dtheta_by_dalpha + dphi_by_dbeta <- Hrr$dphi_by_dbeta + dl_by_dpsi0 <- Hrr$dl_by_dpsi0 ############# extra building blocks ########################## - H.alpha <- y * exp(-x * (as.vector(va %*% alpha.dr))) + H_alpha <- y * exp(-x * (as.vector(va %*% alpha_dr))) ############# Calculation of optimal vector (used in several places below) ## if (optimal == TRUE) { - theta <- as.vector(va %*% alpha.ml) # avoid n by 1 matrix - dtheta.by.dalpha.beta <- cbind(va, matrix(0, n, length(beta.ml))) + theta <- as.vector(va %*% alpha_ml) # avoid n by 1 matrix + dtheta_by_dalpha_beta <- cbind(va, matrix(0, n, length(beta_ml))) wt <- 1 / (1 - p0 + (1 - pscore) * (exp(-theta) - 1)) } else { wt <- rep(1, n) } - ### 2. -E[dU.by.dalphaml.betaml] #################################### + ### 2. -E[dU_by_dalphaml_betaml] #################################### - dU.by.dp0 <- -va * wt * (x - pscore) # n by 2 - dp0.by.dpsi0 <- p0 - dpsi0.by.dalpha.beta <- cbind(dpsi0.by.dtheta * dtheta.by.dalpha, dpsi0.by.dphi * - dphi.by.dbeta) # n by 4 + dU_by_dp0 <- -va * wt * (x - pscore) # n by 2 + dp0_by_dpsi0 <- p0 + dpsi0_by_dalpha_beta <- cbind(dpsi0_by_dtheta * dtheta_by_dalpha, dpsi0_by_dphi * + dphi_by_dbeta) # n by 4 # 4 = 2 (alpha) + 2 (beta) - dp0.by.dalpha.beta <- dpsi0.by.dalpha.beta * dp0.by.dpsi0 # n by 4 + dp0_by_dalpha_beta <- dpsi0_by_dalpha_beta * dp0_by_dpsi0 # n by 4 - dU.by.dwt <- va * (x - pscore) * (H.alpha - p0) # n by 2 - dwt.by.dwti <- -wt^2 # n + dU_by_dwt <- va * (x - pscore) * (H_alpha - p0) # n by 2 + dwt_by_dwti <- -wt^2 # n # wti is short for wt_inv - dU.by.dwti <- dU.by.dwt * dwt.by.dwti # n by 2 + dU_by_dwti <- dU_by_dwt * dwt_by_dwti # n by 2 if (optimal == TRUE) { - dwti.by.dalpha.beta <- -dp0.by.dalpha.beta - (1 - pscore) * exp(-theta) * - dtheta.by.dalpha.beta # n by 4 + dwti_by_dalpha_beta <- -dp0_by_dalpha_beta - (1 - pscore) * exp(-theta) * + dtheta_by_dalpha_beta # n by 4 } else { - dwti.by.dalpha.beta <- matrix(0, n, ncol(va) + ncol(vb)) + dwti_by_dalpha_beta <- matrix(0, n, ncol(va) + ncol(vb)) } - dU.by.dalpha.ml.beta.ml <- t(dU.by.dp0 * weights) %*% dp0.by.dalpha.beta + - t(dU.by.dwti * weights) %*% dwti.by.dalpha.beta + dU_by_dalpha_ml_beta_ml <- t(dU_by_dp0 * weights) %*% dp0_by_dalpha_beta + + t(dU_by_dwti * weights) %*% dwti_by_dalpha_beta - ### 3. tau = -E[dU/dalpha.dr] ######################################## (This + ### 3. tau = -E[dU/dalpha_dr] ######################################## (This ### is the bread of the sandwich estimate) - dU.by.dH <- va * wt * (x - pscore) # n by 2 - dH.by.dalpha.dr <- -va * x * H.alpha # n by 2 + dU_by_dH <- va * wt * (x - pscore) # n by 2 + dH_by_dalpha_dr <- -va * x * H_alpha # n by 2 - tau <- -t(dU.by.dH * weights) %*% dH.by.dalpha.dr / sum(weights) # 2 by 2 + tau <- -t(dU_by_dH * weights) %*% dH_by_dalpha_dr / sum(weights) # 2 by 2 ### 4. E[d(prop score score equation)/dgamma] - dpscore.by.dgamma <- vc * pscore * (1 - pscore) # n by 2 - part4 <- -t(vc * weights) %*% dpscore.by.dgamma # 2 by 2 + dpscore_by_dgamma <- vc * pscore * (1 - pscore) # n by 2 + part4 <- -t(vc * weights) %*% dpscore_by_dgamma # 2 by 2 ### 5. E[dU/dgamma] - dU.by.dpscore <- -va * wt * (H.alpha - p0) # n by 2 + dU_by_dpscore <- -va * wt * (H_alpha - p0) # n by 2 if (optimal == TRUE) { - dwti.by.dpscore <- 1 - exp(-theta) # n - dwti.by.dgamma <- dpscore.by.dgamma * dwti.by.dpscore # n by 2 + dwti_by_dpscore <- 1 - exp(-theta) # n + dwti_by_dgamma <- dpscore_by_dgamma * dwti_by_dpscore # n by 2 } else { - dwti.by.dgamma <- matrix(0, n, ncol(vc)) + dwti_by_dgamma <- matrix(0, n, ncol(vc)) } - dU.by.dgamma <- t(dU.by.dpscore * weights) %*% dpscore.by.dgamma + t(dU.by.dwti * - weights) %*% dwti.by.dgamma # 2 by 2 + dU_by_dgamma <- t(dU_by_dpscore * weights) %*% dpscore_by_dgamma + t(dU_by_dwti * + weights) %*% dwti_by_dgamma # 2 by 2 ############################################################################# Assembling semi-parametric variance matrix - U <- va * wt * (x - pscore) * (H.alpha - p0) # n by 2 + U <- va * wt * (x - pscore) * (H_alpha - p0) # n by 2 - S <- cbind(dl.by.dpsi0 * (dpsi0.by.dtheta + x) * dtheta.by.dalpha, dl.by.dpsi0 * - dpsi0.by.dphi * dphi.by.dbeta) - pscore.score <- vc * (x - pscore) + S <- cbind(dl_by_dpsi0 * (dpsi0_by_dtheta + x) * dtheta_by_dalpha, dl_by_dpsi0 * + dpsi0_by_dphi * dphi_by_dbeta) + pscore_score <- vc * (x - pscore) - Utilde <- U - t(dU.by.dalpha.ml.beta.ml %*% (-solve(hessian)) %*% t(S)) - - t(dU.by.dgamma %*% (solve(part4)) %*% t(pscore.score)) # n by 2 + Utilde <- U - t(dU_by_dalpha_ml_beta_ml %*% (-solve(hessian)) %*% t(S)) - + t(dU_by_dgamma %*% (solve(part4)) %*% t(pscore_score)) # n by 2 USigma <- t(Utilde * weights) %*% Utilde / sum(weights) - ################################### Asymptotic var matrix for alpha.dr + ################################### Asymptotic var matrix for alpha_dr - alpha.dr.variance <- solve(tau) %*% USigma %*% solve(tau) / sum(weights) + alpha_dr_variance <- solve(tau) %*% USigma %*% solve(tau) / sum(weights) - return(alpha.dr.variance) + return(alpha_dr_variance) } diff --git a/man/brm.Rd b/man/brm.Rd index 74b5c32..db9e623 100644 --- a/man/brm.Rd +++ b/man/brm.Rd @@ -10,15 +10,15 @@ brm( va, vb = NULL, param, - est.method = "MLE", + est_method = "MLE", vc = NULL, optimal = TRUE, weights = NULL, subset = NULL, - max.step = NULL, + max_step = NULL, thres = 1e-08, - alpha.start = NULL, - beta.start = NULL, + alpha_start = NULL, + beta_start = NULL, message = FALSE ) } @@ -34,7 +34,7 @@ brm( \item{param}{The measure of association. Can take value 'RD' (risk difference), 'RR' (relative risk) or 'OR' (odds ratio)} -\item{est.method}{The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation).} +\item{est_method}{The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation).} \item{vc}{The covariates matrix for the probability of exposure, often called the propensity score. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. By default we fit a logistic regression model for the propensity score. (If not specified, defaults to va.)} @@ -44,13 +44,13 @@ brm( \item{subset}{An optional vector specifying a subset of observations to be used in the fitting process.} -\item{max.step}{The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000.} +\item{max_step}{The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000.} \item{thres}{Threshold for judging convergence. The default is 1e-6.} -\item{alpha.start}{Starting values for the parameters in the target model.} +\item{alpha_start}{Starting values for the parameters in the target model.} -\item{beta.start}{Starting values for the parameters in the nuisance model.} +\item{beta_start}{Starting values for the parameters in the nuisance model.} \item{message}{Show optimization details? Ignored if the estimation method is 'MLE'. The default is FALSE.} } @@ -58,21 +58,21 @@ brm( A list consisting of \item{param}{the measure of association.} -\item{point.est}{ the point estimates.} +\item{point_est}{ the point estimates.} -\item{se.est}{the standard error estimates.} +\item{se_est}{the standard error estimates.} \item{cov}{estimate of the covariance matrix for the estimates.} -\item{conf.lower}{ the lower limit of the 95\% (marginal) confidence interval.} +\item{conf_lower}{ the lower limit of the 95\% (marginal) confidence interval.} -\item{conf.upper}{ the upper limit of the 95\% (marginal) confidence interval.} +\item{conf_upper}{ the upper limit of the 95\% (marginal) confidence interval.} -\item{p.value}{the two sided p-value for testing zero coefficients.} +\item{p_value}{the two sided p-value for testing zero coefficients.} \item{coefficients}{ the matrix summarizing key information: point estimate, 95\% confidence interval and p-value.} -\item{param.est}{the fitted RR/RD/OR.} +\item{param_est}{the fitted RR/RD/OR.} \item{va}{ the matrix of covariates for the target model.} @@ -93,37 +93,37 @@ For OR, the nuisance model is for the baseline risk: \eqn{logit(P(y=1|x=0,vb)) = In each case the nuisance model is variation independent of the target model, which ensures that the predicted probabilities lie in \eqn{[0,1]}. See Richardson et al. (2016+) for more details. -If \code{est.method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details. +If \code{est_method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details. -When estimating RR and RD, \code{est.method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency. +When estimating RR and RD, \code{est_method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency. For the doubly robust estimation method, MLE is used to obtain preliminary estimates of \eqn{\alpha}, \eqn{\beta} and \eqn{\gamma}. The estimate of \eqn{\alpha} is then updated by solving a doubly-robust estimating equation. (The estimate for \eqn{\beta} remains the MLE.) } \examples{ set.seed(0) n <- 100 -alpha.true <- c(0, -1) -beta.true <- c(-0.5, 1) -gamma.true <- c(0.1, -0.5) -params.true <- list( - alpha.true = alpha.true, beta.true = beta.true, - gamma.true = gamma.true +alpha_true <- c(0, -1) +beta_true <- c(-0.5, 1) +gamma_true <- c(0.1, -0.5) +params_true <- list( + alpha_true = alpha_true, beta_true = beta_true, + gamma_true = gamma_true ) v.1 <- rep(1, n) # intercept term v.2 <- runif(n, -2, 2) v <- cbind(v.1, v.2) -pscore.true <- exp(v \%*\% gamma.true) / (1 + exp(v \%*\% gamma.true)) -p0p1.true <- get_prob_rr(v \%*\% alpha.true, v \%*\% beta.true) -x <- rbinom(n, 1, pscore.true) -pA.true <- p0p1.true[, 1] -pA.true[x == 1] <- p0p1.true[x == 1, 2] -y <- rbinom(n, 1, pA.true) +pscore_true <- exp(v \%*\% gamma_true) / (1 + exp(v \%*\% gamma_true)) +p0p1.true <- get_prob_rr(v \%*\% alpha_true, v \%*\% beta_true) +x <- rbinom(n, 1, pscore_true) +pA_true <- p0p1.true[, 1] +pA_true[x == 1] <- p0p1.true[x == 1, 2] +y <- rbinom(n, 1, pA_true) -fit.mle <- brm(y, x, v, v, "RR", "MLE", v, TRUE) -fit.drw <- brm(y, x, v, v, "RR", "DR", v, TRUE) -fit.dru <- brm(y, x, v, v, "RR", "DR", v, FALSE) +fit_mle <- brm(y, x, v, v, "RR", "MLE", v, TRUE) +fit_drw <- brm(y, x, v, v, "RR", "DR", v, TRUE) +fit_dru <- brm(y, x, v, v, "RR", "DR", v, FALSE) -fit.mle2 <- brm(y, x, ~v.2, ~v.2, "RR", "MLE", ~v.2, TRUE) # same as fit.mle +fit_mle2 <- brm(y, x, ~v.2, ~v.2, "RR", "MLE", ~v.2, TRUE) # same as fit_mle } \references{ @@ -134,7 +134,7 @@ Eric J. Tchetgen Tchetgen, James M. Robins and Andrea Rotnitzky. "On doubly robu \seealso{ \code{get_prob_rd}, and \code{get_prob_rr} for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP); -\code{predict.brm} for obtaining fitted probabilities from \code{brm} fits. +\code{predict_brm} for obtaining fitted probabilities from \code{brm} fits. } \author{ Linbo Wang , Mark Clements , Thomas Richardson diff --git a/man/predict_brm.Rd b/man/predict_brm.Rd index 73a6ae1..04b70d6 100644 --- a/man/predict_brm.Rd +++ b/man/predict_brm.Rd @@ -4,23 +4,23 @@ \alias{predict_brm} \title{Fitted probabilities from \code{brm} fits} \usage{ -predict_brm(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...) +predict_brm(object, x_new = NULL, va_new = NULL, vb_new = NULL, ...) } \arguments{ \item{object}{A fitted object from function \code{brm}.} -\item{x.new}{An optional vector of x.} +\item{x_new}{An optional vector of x.} -\item{va.new}{An optional covariate matrix to make predictions with. If omitted, the original matrix va is used.} +\item{va_new}{An optional covariate matrix to make predictions with. If omitted, the original matrix va is used.} -\item{vb.new}{An optional covariate matrix to make predictions with. If vb.new is omitted but va.new is not, then vb.new is set to be equal to va.new. If both vb.new and va.new are omitted, then the original matrix vb is used.} +\item{vb_new}{An optional covariate matrix to make predictions with. If vb_new is omitted but va_new is not, then vb_new is set to be equal to va_new. If both vb_new and va_new are omitted, then the original matrix vb is used.} \item{...}{affecting the predictions produced.} } \value{ -If x.new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb). +If x_new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb). -If x.new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x.new,va,vb). +If x_new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x_new,va,vb). } \description{ Calculate fitted probabilities from a fitted binary regression model object. diff --git a/tests/testthat/test-brm.R b/tests/testthat/test-brm.R index 891c761..451c572 100644 --- a/tests/testthat/test-brm.R +++ b/tests/testthat/test-brm.R @@ -2,23 +2,23 @@ set.seed(0) n <- 100 -alpha.true <- c(0, -1) -beta.true <- c(-0.5, 1) -gamma.true <- c(0.1, -0.5) -params.true <- list( - alpha.true = alpha.true, - beta.true = beta.true, - gamma.true = gamma.true +alpha_true <- c(0, -1) +beta_true <- c(-0.5, 1) +gamma_true <- c(0.1, -0.5) +params_true <- list( + alpha_true = alpha_true, + beta_true = beta_true, + gamma_true = gamma_true ) -v.1 <- rep(1, n) # intercept term -v.2 <- runif(n, -2, 2) -v <- cbind(v.1, v.2) -pscore.true <- exp(v %*% gamma.true) / (1 + exp(v %*% gamma.true)) -p0p1.true <- get_prob_rr(v %*% alpha.true, v %*% beta.true) -x <- rbinom(n, 1, pscore.true) -pA.true <- p0p1.true[, 1] -pA.true[x == 1] <- p0p1.true[x == 1, 2] -y <- rbinom(n, 1, pA.true) +v_1 <- rep(1, n) # intercept term +v_2 <- runif(n, -2, 2) +v <- cbind(v_1, v_2) +pscore_true <- exp(v %*% gamma_true) / (1 + exp(v %*% gamma_true)) +p0p1_true <- get_prob_rr(v %*% alpha_true, v %*% beta_true) +x <- rbinom(n, 1, pscore_true) +pA_true <- p0p1_true[, 1] +pA_true[x == 1] <- p0p1_true[x == 1, 2] +y <- rbinom(n, 1, pA_true) test_that("example runs correctly with MLE", { r <- brm(y, x, v, v, "RR", "MLE", v, TRUE) @@ -28,7 +28,7 @@ test_that("example runs correctly with MLE", { "beta 1" = -1.06854529, "beta 2" = 0.02441527 ) - expect_equal(r$point.est, expected_result) + expect_equal(r$point_est, expected_result) }) test_that("example runs correctly with DR", { @@ -41,5 +41,5 @@ test_that("example runs correctly with DR", { "gamma 1" = 0.16779440, "gamma 2" = -0.47170547 ) - expect_equal(r$point.est, expected_result) + expect_equal(r$point_est, expected_result) }) From c67bf3c20161a42c6f0e137704d5cc4e579182d7 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Fri, 3 Oct 2025 13:40:55 -0400 Subject: [PATCH 11/17] styler --- R/mle_point.R | 2 +- R/util.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/mle_point.R b/R/mle_point.R index 6847bae..cc28893 100644 --- a/R/mle_point.R +++ b/R/mle_point.R @@ -1,5 +1,5 @@ mle_point <- function(param, y, x, va, vb, alpha_start, beta_start, weights, - max_step, thres, pa, pb) { + max_step, thres, pa, pb) { startpars <- c(alpha_start, beta_start) getProb <- if (param == "RR") get_prob_rr else get_prob_rd diff --git a/R/util.R b/R/util.R index f870042..ce9c537 100644 --- a/R/util.R +++ b/R/util.R @@ -75,7 +75,7 @@ get_prb_aux <- function(x) { valid_check <- function(param, y, x, va, vb, vc, weights, subset, est_method, - optimal, max_step, thres, alpha_start, beta_start) { + optimal, max_step, thres, alpha_start, beta_start) { if (!is.character(param)) { stop("Parameter must be a character") } From 2b54cbcdc817cb7ce28b405a92a255271411fa22 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Fri, 14 Nov 2025 10:38:53 -0500 Subject: [PATCH 12/17] remove unused util functions --- R/util.R | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/R/util.R b/R/util.R index ce9c537..dce7971 100644 --- a/R/util.R +++ b/R/util.R @@ -1,25 +1,7 @@ -logit <- function(prob) { - log(prob) - log(1 - prob) -} - expit <- function(logodds) { 1 / (1 + exp(-logodds)) } - -getlogop <- function(p0, p1) { - log(p0) + log(p1) - log(1 - p0) - log(1 - p1) -} - -getlogrr <- function(p0, p1) { - log(p1) - log(p0) -} - -getatanhrd <- function(p0, p1) { - atanh(p1 - p0) -} - - ## Function for checking if two things are equal within numerical precision same <- function(x, y, tolerance = .Machine$double.eps^0.5) { abs(x - y) < tolerance From 564b179c345462e5045023bd0a3f391cd6488851 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Fri, 14 Nov 2025 10:39:08 -0500 Subject: [PATCH 13/17] test util functions --- tests/testthat/test-util.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/testthat/test-util.R diff --git a/tests/testthat/test-util.R b/tests/testthat/test-util.R new file mode 100644 index 0000000..c1ae694 --- /dev/null +++ b/tests/testthat/test-util.R @@ -0,0 +1,21 @@ +set.seed(0) + +test_that("expit function works", { + p <- runif(0,1) + l <- log(p) - log(1 - p) + i <- expit(l) + expect_equal(i, 1/l) +}) + +test_that("same function works", { + expect_true(same(5,5)) + expect_false(same(5,5 + .Machine$double.eps^0.5)) + expect_true(same(-5,5, 10.1)) + expect_false(same(1,2,.5)) +}) + +test_that("get_prob_aux function works", { + expect_equal(get_prb_aux(-501), 0) + expect_equal(get_prb_aux(20), 1) + expect_equal(get_prb_aux(0), .5 * (sqrt(5) - 1)) +}) From a88fa53e04ad8c94235f4f9387f66054a1e9727a Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Fri, 14 Nov 2025 13:47:42 -0500 Subject: [PATCH 14/17] add get_prob_rd tests --- tests/testthat/test-get_probs.R | 49 +++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/testthat/test-get_probs.R diff --git a/tests/testthat/test-get_probs.R b/tests/testthat/test-get_probs.R new file mode 100644 index 0000000..491225c --- /dev/null +++ b/tests/testthat/test-get_probs.R @@ -0,0 +1,49 @@ +set.seed(0) + +test_that("get_prob_rr", { + test_that("works when logrr < -12", { + logrr <- c(-15, -15) + logop <- c(-15, -15) + p0 <- get_prb_aux(logop - logrr) + p1 <- 0 + expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1)) + }) + + test_that("works when logrr = 0, logop = 0", { + p0 <- c(0.5, 0.5) + p1 <- c(0.5, 0.5) + expect_equal(get_prob_rr(c(0, 0), c(0, 0)), cbind(p0, p1)) + }) + + test_that("works when logrr > 12, 12 > logop > -12", { + logrr <- c(13, 13) + logop <- c(5, 5) + p0 <- c(0, 0) + p1 <- get_prb_aux(logop + logrr) + expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1)) + }) + + test_that("works when logrr < 12, logop > 12", { + logrr <- c(11, 11) + logop <- c(13, 13) + p0 <- exp(-logrr) + p1 <- get_prb_aux(logop + logrr) + expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1)) + }) + + test_that("works when logrr < -12, logop > 12", { + logrr <- c(-11, -11) + logop <- c(13, 13) + p0 <- c(1, 1) + p1 <- exp(logrr) + expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1)) + }) + + test_that("works when 12 > logrr > -12, 12 > logop > -12", { + logrr <- c(5, 5) + logop <- c(5, 5) + p0 <- (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + 1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop)))) / (2 * exp(logrr) * (1 - exp(logop))) + p1 <- exp(logrr) * p0 + expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1)) + }) +}) From 65fa6b75bc75b727b5cb7c57a89bb29ed8f7d281 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Fri, 14 Nov 2025 14:09:10 -0500 Subject: [PATCH 15/17] add tests for get_prob_rr --- tests/testthat/test-get_probs.R | 40 ++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-get_probs.R b/tests/testthat/test-get_probs.R index 491225c..237e5e6 100644 --- a/tests/testthat/test-get_probs.R +++ b/tests/testthat/test-get_probs.R @@ -1,6 +1,7 @@ -set.seed(0) test_that("get_prob_rr", { + expect_true(exists("get_prob_rr")) # prevent "empty test" notification + test_that("works when logrr < -12", { logrr <- c(-15, -15) logop <- c(-15, -15) @@ -47,3 +48,40 @@ test_that("get_prob_rr", { expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1)) }) }) + + +test_that("get_probs_rd", { + expect_true(exists("get_prob_rd")) # prevent "empty test" notification + + test_that("works when logop > 350, atanhrd > 0", { + logop <- c(351, 351) + atanhrd <- c(1, 1) + p0 <- 1 - tanh(atanhrd) + p1 <- c(1, 1) + expect_equal(get_prob_rd(atanhrd, logop), cbind(p0, p1)) + }) + + test_that("works when logop > 350, atanhrd < 0", { + logop <- c(351, 351) + atanhrd <- c(-1, -1) + p0 <- c(1, 1) + p1 <- 1 + tanh(atanhrd) + expect_equal(get_prob_rd(atanhrd, logop), cbind(p0, p1)) + }) + + test_that("works when logop == 0", { + logop <- c(0, 0) + atanhrd <- runif(2, -50, 50) + p0 <- 0.5 * (1 - tanh(atanhrd)) + p1 <- p0 + tanh(atanhrd) + expect_equal(get_prob_rd(atanhrd, logop), cbind(p0, p1)) + }) + + test_that("works when logop != 0 & logop < 350", { + logop <- c(349, 349) + atanhrd <- runif(2, -50, 50) + p0 <- (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * (1 - tanh(atanhrd)) * (1 - exp(logop)))) / (2 * (exp(logop) - 1)) + p1 <- p0 + tanh(atanhrd) + expect_equal(get_prob_rd(atanhrd, logop), cbind(p0, p1)) + }) +}) From fa771537fd707e787e12e2a1e9bc5932885d82f9 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Tue, 18 Nov 2025 10:34:21 -0500 Subject: [PATCH 16/17] style tests --- tests/testthat/test-get_probs.R | 1 - tests/testthat/test-util.R | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test-get_probs.R b/tests/testthat/test-get_probs.R index 237e5e6..14703ce 100644 --- a/tests/testthat/test-get_probs.R +++ b/tests/testthat/test-get_probs.R @@ -1,4 +1,3 @@ - test_that("get_prob_rr", { expect_true(exists("get_prob_rr")) # prevent "empty test" notification diff --git a/tests/testthat/test-util.R b/tests/testthat/test-util.R index c1ae694..1a0db71 100644 --- a/tests/testthat/test-util.R +++ b/tests/testthat/test-util.R @@ -1,17 +1,17 @@ set.seed(0) test_that("expit function works", { - p <- runif(0,1) + p <- runif(0, 1) l <- log(p) - log(1 - p) i <- expit(l) - expect_equal(i, 1/l) + expect_equal(i, 1 / l) }) test_that("same function works", { - expect_true(same(5,5)) - expect_false(same(5,5 + .Machine$double.eps^0.5)) - expect_true(same(-5,5, 10.1)) - expect_false(same(1,2,.5)) + expect_true(same(5, 5)) + expect_false(same(5, 5 + .Machine$double.eps^0.5)) + expect_true(same(-5, 5, 10.1)) + expect_false(same(1, 2, .5)) }) test_that("get_prob_aux function works", { From 5aea183f658e307175a6ef4ec0cef505f17a1ca7 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Tue, 18 Nov 2025 11:25:23 -0500 Subject: [PATCH 17/17] upgrade styler and restyle --- R/dr_est.R | 5 +++-- R/dr_point.R | 11 ++++++----- R/hessian_2_rr.R | 2 -- R/main.R | 7 ++++--- R/mle_est.R | 5 +++-- R/mle_var.R | 2 -- R/var_rd_dr.R | 7 +++---- R/var_rr_dr.R | 7 +++---- 8 files changed, 22 insertions(+), 24 deletions(-) diff --git a/R/dr_est.R b/R/dr_est.R index cf6e149..3cb5b74 100644 --- a/R/dr_est.R +++ b/R/dr_est.R @@ -1,6 +1,7 @@ dr_est <- function( - param, y, x, va, vb, vc, alpha_ml, beta_ml, gamma, optimal, - weights, max_step, thres, alpha_start, beta_cov, gamma_cov, message) { + param, y, x, va, vb, vc, alpha_ml, beta_ml, gamma, optimal, + weights, max_step, thres, alpha_start, beta_cov, gamma_cov, message +) { dr_est <- dr_estimate_noiterate( param, y, x, va, vb, vc, alpha_ml, beta_ml, gamma, optimal, weights, max_step, thres, alpha_start, message diff --git a/R/dr_point.R b/R/dr_point.R index 375e517..a00d971 100644 --- a/R/dr_point.R +++ b/R/dr_point.R @@ -1,6 +1,7 @@ dr_estimate_onestep <- function( - param, y, x, va, vb, alpha_start, beta, pscore, - wt, weights, max_step, thres, message) { + param, y, x, va, vb, alpha_start, beta, pscore, + wt, weights, max_step, thres, message +) { startpars <- c(alpha_start) # pars only contain alpha ## DR estimation equation^2 if (param == "RR") { @@ -27,10 +28,10 @@ dr_estimate_onestep <- function( } - dr_estimate_noiterate <- function( - param, y, x, va, vb, vc, alpha_ml, beta_ml, - gamma, optimal, weights, max_step, thres, alpha_start, message) { + param, y, x, va, vb, vc, alpha_ml, beta_ml, + gamma, optimal, weights, max_step, thres, alpha_start, message +) { pscore <- as.vector(expit(mat_vec_mul(vc, gamma))) if (optimal == TRUE) { diff --git a/R/hessian_2_rr.R b/R/hessian_2_rr.R index d605cfc..bfb4d14 100644 --- a/R/hessian_2_rr.R +++ b/R/hessian_2_rr.R @@ -24,7 +24,6 @@ hessian_2_rr <- function(y, x, va, vb, alpha_ml, beta_ml, weights) { d2l_by_dpsi0_2 <- (y - 1) * pA / ((1 - pA)^2) - ###### d2l_by_dalpha_2 d2psi0_by_dtheta_2 <- ((p0 - p1) * dpsi0_by_dtheta - (1 - p0) * p1) / ((1 - @@ -61,7 +60,6 @@ hessian_2_rr <- function(y, x, va, vb, alpha_ml, beta_ml, weights) { d2l_by_dbeta_2 <- t(dphi_by_dbeta * d2l_by_dphi_2 * weights) %*% dphi_by_dbeta - hessian <- -rbind(cbind(d2l_by_dalpha_2, d2l_by_dalpha_dbeta), cbind( d2l_by_dbeta_dalpha, d2l_by_dbeta_2 diff --git a/R/main.R b/R/main.R index 5873fe6..2635363 100644 --- a/R/main.R +++ b/R/main.R @@ -113,9 +113,10 @@ brm <- function( - y, x, va, vb = NULL, param, est_method = "MLE", vc = NULL, - optimal = TRUE, weights = NULL, subset = NULL, max_step = NULL, thres = 1e-8, - alpha_start = NULL, beta_start = NULL, message = FALSE) { + y, x, va, vb = NULL, param, est_method = "MLE", vc = NULL, + optimal = TRUE, weights = NULL, subset = NULL, max_step = NULL, thres = 1e-8, + alpha_start = NULL, beta_start = NULL, message = FALSE +) { # default param = 'RR'; est_method = 'MLE'; va = v; vb = v; vc = v; # weights = NULL; subset = NULL; optimal = TRUE; max_step = NULL; # thres = 1e-06; alpha_start = NULL; beta_start = NULL diff --git a/R/mle_est.R b/R/mle_est.R index a59a603..8b538fe 100644 --- a/R/mle_est.R +++ b/R/mle_est.R @@ -1,6 +1,7 @@ mle_est <- function( - param, y, x, va, vb, weights, max_step, thres, alpha_start, - beta_start, pa, pb) { + param, y, x, va, vb, weights, max_step, thres, alpha_start, + beta_start, pa, pb +) { ## starting values for parameter optimization if (is.null(alpha_start)) { alpha_start <- rep(0, pa) diff --git a/R/mle_var.R b/R/mle_var.R index d60aa4a..f51e300 100644 --- a/R/mle_var.R +++ b/R/mle_var.R @@ -18,8 +18,6 @@ mle_var_rr <- function(x, alpha_ml, beta_ml, va, vb, weights) { } - - ### variance calculation mle_var_rd <- function(x, alpha_ml, beta_ml, va, vb, weights) { diff --git a/R/var_rd_dr.R b/R/var_rd_dr.R index 9403a3f..6f75d08 100644 --- a/R/var_rd_dr.R +++ b/R/var_rd_dr.R @@ -1,8 +1,9 @@ ## Sandwich estimator for variance of RD var_rd_dr <- function( - y, x, va, vb, vc, alpha_dr, alpha_ml, beta_ml, gamma, - optimal, weights) { + y, x, va, vb, vc, alpha_dr, alpha_ml, beta_ml, gamma, + optimal, weights +) { ######################################## pscore <- as.vector(expit(vc %*% gamma)) @@ -101,7 +102,6 @@ var_rd_dr <- function( weights) %*% dwt_by_dgamma # 2 by 2 - ############################################################################# Assembling semi-parametric variance matrix U <- va * wt * (x - pscore) * (H_alpha - p0) # n by 2 @@ -115,7 +115,6 @@ var_rd_dr <- function( USigma <- t(Utilde * weights) %*% Utilde / sum(weights) - ################################### Asymptotic var matrix for alpha_dr alpha_dr_variance <- solve(tau) %*% USigma %*% solve(tau) / sum(weights) diff --git a/R/var_rr_dr.R b/R/var_rr_dr.R index 7c6a3cd..feac259 100644 --- a/R/var_rr_dr.R +++ b/R/var_rr_dr.R @@ -1,8 +1,9 @@ ## Sandwich estimator for variance of RR var_rr_dr <- function( - y, x, va, vb, vc, alpha_dr, alpha_ml, beta_ml, gamma, - optimal, weights) { + y, x, va, vb, vc, alpha_dr, alpha_ml, beta_ml, gamma, + optimal, weights +) { ######################################## pscore <- as.vector(expit(vc %*% gamma)) @@ -91,7 +92,6 @@ var_rr_dr <- function( weights) %*% dwti_by_dgamma # 2 by 2 - ############################################################################# Assembling semi-parametric variance matrix U <- va * wt * (x - pscore) * (H_alpha - p0) # n by 2 @@ -105,7 +105,6 @@ var_rr_dr <- function( USigma <- t(Utilde * weights) %*% Utilde / sum(weights) - ################################### Asymptotic var matrix for alpha_dr alpha_dr_variance <- solve(tau) %*% USigma %*% solve(tau) / sum(weights)