diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
new file mode 100644
index 0000000..12b061b
--- /dev/null
+++ b/.github/workflows/check.yml
@@ -0,0 +1,49 @@
+---
+name: Check
+
+on:
+  pull_request:
+    types:
+      - opened
+      - synchronize
+      - reopened
+      - ready_for_review
+    branches:
+      - master
+      - development
+  push:
+    branches:
+      - master
+      - development
+
+  workflow_dispatch:
+
+jobs:
+  style:
+    if: github.event_name == 'pull_request'
+    name: Style Check
+    uses: insightsengineering/r.pkg.template/.github/workflows/style.yaml@main
+    secrets:
+      REPO_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+    with:
+      auto-update: false
+
+  unit-test:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/insightsengineering/rstudio
+    steps:
+        -
+          name: Checkout repo
+          uses: actions/checkout@v4.1.1
+        -
+          name: Build
+          run: |
+            Rscript -e "devtools::install()"
+        -
+          name: Test
+          run: |
+            Rscript -e "testthat::test_local()"
+
+
+
diff --git a/.gitignore b/.gitignore
index 2263322..3ecce47 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ docker
 rstudio
 *.o
 *.so
+RcppExports.cpp
diff --git a/NAMESPACE b/NAMESPACE
index 9d02f22..29a1604 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,12 +1,10 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(predict,brm)
-S3method(print,brm)
 export(brm)
-export(getProbRD)
-export(getProbRR)
-export(getProbScalarRD)
-export(getProbScalarRR)
+export(get_prob_rd)
+export(get_prob_rr)
+export(predict_brm)
+export(print_brm)
 exportPattern("ˆ[[:alpha:]]+")
 importFrom(Rcpp,evalCpp)
 useDynLib(brm)
diff --git a/R/1.1_MLE_Point.R b/R/1.1_MLE_Point.R
deleted file mode 100644
index 354b75f..0000000
--- a/R/1.1_MLE_Point.R
+++ /dev/null
@@ -1,63 +0,0 @@
-
-max.likelihood = function(param, y, x, va, vb, alpha.start, beta.start, weights,
-                          max.step, thres, pa, pb) {
-
-    startpars = c(alpha.start, beta.start)
-
-    getProb = if (param == "RR") getProbRR else getProbRD
-
-    ## negative log likelihood function
-    neg.log.likelihood = function(pars) {
-        alpha = pars[1:pa]
-        beta = pars[(pa + 1):(pa + pb)]
-        p0p1 = getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
-        p0 = p0p1[, 1];   p1 = p0p1[, 2]
-
-        return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weights[x == 0] +
-                        (y[x == 0]) * log(p0[x == 0]) * weights[x == 0]) - sum((1 - y[x ==
-                                                                                          1]) * log(1 - p1[x == 1]) * weights[x == 1] + (y[x == 1]) * log(p1[x ==
-                                                                                                                                                                 1]) * weights[x == 1]))
-    }
-
-    neg.log.likelihood.alpha = function(alpha){
-        p0p1 = getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
-        p0    = p0p1[,1];  p1 = p0p1[,2]
-
-        return(-sum((1-y[x==0])*log(1-p0[x==0])*weights[x==0] +
-                        (y[x==0])*log(p0[x==0])*weights[x==0]) -
-                   sum((1-y[x==1])*log(1-p1[x==1])*weights[x==1] +
-                           (y[x==1])*log(p1[x==1])*weights[x==1]))
-    }
-
-    neg.log.likelihood.beta = function(beta){
-        p0p1 = getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
-        p0    = p0p1[,1];  p1 = p0p1[,2]
-
-        return(-sum((1-y[x==0])*log(1-p0[x==0])*weights[x==0] +
-                        (y[x==0])*log(p0[x==0])*weights[x==0]) -
-                   sum((1-y[x==1])*log(1-p1[x==1])*weights[x==1] +
-                           (y[x==1])*log(p1[x==1])*weights[x==1]))
-    }
-
-
-    ## Optimization
-
-    Diff = function(x,y) sum((x-y)^2)/sum(x^2+thres)
-    alpha = alpha.start; beta = beta.start
-    diff = thres + 1; step = 0
-    while(diff > thres & step < max.step){
-        step = step + 1
-        opt1 = stats::optim(alpha,neg.log.likelihood.alpha,control=list(maxit=max(100,max.step/10)))
-        diff1 = Diff(opt1$par,alpha)
-        alpha = opt1$par
-        opt2 = stats::optim(beta,neg.log.likelihood.beta,control=list(maxit=max(100,max.step/10)))
-        diff  = max(diff1,Diff(opt2$par,beta))
-        beta = opt2$par
-    }
-
-    opt = list(par = c(alpha,beta), convergence = (step < max.step),
-               value = neg.log.likelihood(c(alpha,beta)), step = step)
-
-    return(opt)
-}
-
diff --git a/R/1.2_MLE_Var.R b/R/1.2_MLE_Var.R
deleted file mode 100644
index d45f7ad..0000000
--- a/R/1.2_MLE_Var.R
+++ /dev/null
@@ -1,52 +0,0 @@
-
-
-### variance calculation
-
-var.mle.rr = function(x, alpha.ml, beta.ml, va, vb, weights) {
-    
-    p0p1 = getProbRR(va %*% alpha.ml, vb %*% beta.ml)
-    n = dim(va)[1]
-    pA = rep(NA, n)     # P(Y=1|A,V); here A = X
-    pA[x == 0] = p0p1[x == 0, 1]
-    pA[x == 1] = p0p1[x == 1, 2]
-    
-    expect.dl.by.dpsi0.squared = (pA)/(1 - pA)
-    dpsi0.by.dphi = (1 - p0p1[, 1]) * (1 - p0p1[, 2])/((1 - p0p1[, 1]) + (1 - 
-        p0p1[, 2]))
-    dpsi0.by.dtheta = -(1 - p0p1[, 1])/((1 - p0p1[, 1]) + (1 - p0p1[, 2]))
-    tmp = cbind((dpsi0.by.dtheta + x) * va, dpsi0.by.dphi * vb)
-    ## since dtheta.by.dalpha = va, and dphi.by.dbeta = vb
-    fisher.info = (t(expect.dl.by.dpsi0.squared * weights * tmp) %*% tmp)
-    return(solve(fisher.info))
-}
-
-
-
-
-### variance calculation
-
-var.mle.rd = function(x, alpha.ml, beta.ml, va, vb, weights) {
-    
-    p0p1 = getProbRD(va %*% alpha.ml, vb %*% beta.ml)
-    # p0p1 = cbind(p0, p1): n * 2 matrix
-    p0 = p0p1[, 1]
-    p1 = p0p1[, 2]
-    n = nrow(va)
-    pA = p0             # P(Y=1|A,V); here A = X
-    pA[x == 1] = p1[x == 1]
-    s0 = p0 * (1 - p0)
-    s1 = p1 * (1 - p1)
-    sA = pA * (1 - pA)
-    
-    rho = as.vector(tanh(va %*% alpha.ml))  #estimated risk differences
-    
-    expect.dl.by.dpA.squared = 1/sA
-    dp0.by.dphi = s0 * s1/(s0 + s1)
-    dp0.by.drho = -s0/(s0 + s1)
-    drho.by.dalpha = (1 - rho^2) * va
-    dphi.by.dbeta = vb
-    
-    tmp = cbind((dp0.by.drho + x) * drho.by.dalpha, dp0.by.dphi * dphi.by.dbeta)
-    fisher.info = (t(expect.dl.by.dpA.squared * weights * tmp) %*% tmp)
-    return(solve(fisher.info))
-} 
diff --git a/R/1_CallMLE.R b/R/1_CallMLE.R
deleted file mode 100644
index aa82343..0000000
--- a/R/1_CallMLE.R
+++ /dev/null
@@ -1,47 +0,0 @@
-
-MLEst = function(param, y, x, va, vb, weights, max.step, thres, alpha.start, 
-    beta.start, pa, pb) {
-    
-    ## starting values for parameter optimization
-    if (is.null(alpha.start)) 
-        alpha.start = rep(0, pa)
-    if (is.null(beta.start)) 
-        beta.start = rep(0, pb)
-    
-    if (param == "OR") {
-        fit = stats::glm(y ~ vb - 1 + x * va - va - x, family = "binomial",
-                         weights = weights, start = c(beta.start, alpha.start))
-        
-        point.temp = summary(fit)$coefficients[, 1]
-        index = c((pb + 1):(pa + pb), 1:pb)
-        point.est = point.temp[index]
-        
-        cov = stats::vcov(fit)[index, index]
-        
-        converged = fit$converged
-        
-    } else {
-        
-        ### point estimate
-        mle = max.likelihood(param, y, x, va, vb, alpha.start, beta.start, 
-            weights, max.step, thres, pa, pb)
-        point.est = mle$par
-        converged = mle$convergence
-        # print(point.est)
-        alpha.ml = point.est[1:pa]
-        beta.ml = point.est[(pa + 1):(pa + pb)]
-        
-        ### Computing Fisher Information:
-        if (param == "RR") 
-            cov = var.mle.rr(x, alpha.ml, beta.ml, va, vb, weights)
-        if (param == "RD") 
-            cov = var.mle.rd(x, alpha.ml, beta.ml, va, vb, weights)
-        sd.est = sqrt(diag(cov))
-        
-    }
-    
-    name = paste(c(rep("alpha", pa), rep("beta", pb)), c(1:pa, 1:pb))
-    sol = WrapResults(point.est, cov, param, name, va, vb, converged)
-    return(sol)
-    
-} 
diff --git a/R/2.1_DR_Point.R b/R/2.1_DR_Point.R
deleted file mode 100644
index bee2cce..0000000
--- a/R/2.1_DR_Point.R
+++ /dev/null
@@ -1,65 +0,0 @@
-
-
-dr.estimate.onestep = function(param, y, x, va, vb, alpha.start, beta, pscore,
-    wt, weights, max.step, thres, message) {
-
-    startpars = c(alpha.start)  # pars only contain alpha
-    ## DR estimation equation^2
-    if (param == "RR") {
-        dr.objective = function(pars) {
-            p0 = getProbRR(mat_vec_mul(va,startpars), mat_vec_mul(vb, beta))[, 1]
-            H.alpha = as.vector(y * exp(-x * (va %*% pars)))
-            tmp = mat_vec_mul(t(va), (wt * (x - pscore) * (H.alpha - p0) * weights))
-            return(sum(tmp^2))
-        }
-    }
-    if (param == "RD") {
-        dr.objective = function(pars) {
-             p0 = getProbRD(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1]
-            H.alpha = y - x * tanh(mat_vec_mul(va, pars))
-            tmp = mat_vec_mul(t((H.alpha - p0) * (x - pscore)) , (va * wt * weights))
-            return(sum(tmp^2))
-        }
-    }
-
-        opt <- stats::optim(startpars, dr.objective, control=list(reltol=thres))
-        opt$convergence = (opt$convergence == 0) # change cf. optim()
-
-    return(opt)
-}
-
-
-
-dr.estimate.noiterate = function(param, y, x, va, vb, vc, alpha.ml, beta.ml,
-    gamma, optimal, weights, max.step, thres, alpha.start, message) {
-
-    pscore = as.vector(expit(mat_vec_mul(vc, gamma)))
-
-    if (optimal == TRUE) {
-        if (param == "RR") {
-            p0 = getProbRR(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))[, 1]
-            wt = as.vector(1/(1 - p0 + (1 - pscore) * (exp(mat_vec_mul(-va, alpha.ml)) -
-                1)))
-        }
-        if (param == "RD") {
-            p0 = getProbRD(mat_vec_mul(va,alpha.ml), mat_vec_mul(vb, beta.ml))[, 1]
-            rho = as.vector(tanh(mat_vec_mul(va, alpha.ml)))
-            wt = (1 - rho) * (1 + rho)/(p0 * (1 - p0) + rho * (1 - pscore) *
-                (1 - 2 * p0 - rho))
-        }
-    } else {
-        wt = rep(1, length(pscore))
-    }
-
-    if (is.null(alpha.start))
-        alpha.start = alpha.ml
-
-    alpha.dr.opt = dr.estimate.onestep(param, y, x, va, vb, alpha.start, beta.ml,
-        pscore, wt, weights, max.step, thres, message)
-
-    # if(MESSAGE){ print(paste('DR One Step: ',' Alpha:
-    # ',paste(round(alpha.dr,5),collapse=', '),' Beta:
-    # ',paste(round(beta.ml,5),collapse=', '))) }
-
-    return(alpha.dr.opt)
-}
diff --git a/R/2.2.1_Hessian_RD.R b/R/2.2.1_Hessian_RD.R
deleted file mode 100644
index 63a3dfb..0000000
--- a/R/2.2.1_Hessian_RD.R
+++ /dev/null
@@ -1,66 +0,0 @@
-Hessian2RD = function(y, x, va, vb, alpha.ml, beta.ml, cnt) {
-    # calculating the Hessian using the second derivative have to do so
-    # because under mis-specification of models Hessian no longer equals the
-    # square of the first order derivatives
-    
-    p0p1 = getProbRD(va %*% alpha.ml, vb %*% beta.ml)
-    # p0p1 = cbind(p0, p1): n * 2 matrix
-    p0 = p0p1[, 1]
-    p1 = p0p1[, 2]
-    n = nrow(va)
-    pA = p0
-    pA[x == 1] = p1[x == 1]
-    s0 = p0 * (1 - p0)
-    s1 = p1 * (1 - p1)
-    sA = pA * (1 - pA)
-    
-    rho = as.vector(tanh(va %*% alpha.ml))  #estimated risk differences
-    
-    ### First order derivatives ###
-    
-    dl.by.dpA = (y - pA)/sA
-    dp0.by.dphi = s0 * s1/(s0 + s1)
-    dp0.by.drho = -s0/(s0 + s1)
-    drho.by.dalpha = va * (1 - rho^2)
-    dphi.by.dbeta = vb
-    
-    dpA.by.drho = dp0.by.drho + x
-    dpA.by.dalpha = drho.by.dalpha * dpA.by.drho
-    dpA.by.dphi = dp0.by.dphi
-    dpA.by.dbeta = dphi.by.dbeta * dpA.by.dphi
-    
-    ### Second order derivatives ###
-    
-    d2l.by.dpA.2 = -(y - pA)^2/sA^2
-    d2pA.by.drho.2 = s0 * s1 * (2 - 2 * p0 - 2 * p1)/(s0 + s1)^3
-    d2pA.by.dphi.drho = (s0 * (1 - 2 * p1) - s1 * (1 - 2 * p0)) * s0 * s1/(s0 + 
-        s1)^3
-    d2pA.by.dphi.2 = (s0^2 * (1 - 2 * p1) + s1^2 * (1 - 2 * p0)) * s0 * s1/(s0 + 
-        s1)^3
-    
-    d2rho.by.dalpha.2 = -2 * t(va * rho) %*% drho.by.dalpha
-    
-    ### Compute elements of the Hessian matrix ###
-    
-    d2l.by.dalpha.2 = t(dpA.by.dalpha * d2l.by.dpA.2 * cnt) %*% dpA.by.dalpha + 
-        t(drho.by.dalpha * dl.by.dpA * d2pA.by.drho.2 * cnt) %*% drho.by.dalpha - 
-        2 * t(va * rho * dl.by.dpA * dpA.by.drho * cnt) %*% drho.by.dalpha
-    
-    d2l.by.dalpha.dbeta = t(dpA.by.dalpha * d2l.by.dpA.2 * cnt) %*% dpA.by.dbeta + 
-        t(drho.by.dalpha * dl.by.dpA * d2pA.by.dphi.drho * cnt) %*% dphi.by.dbeta
-    d2l.by.dbeta.dalpha = t(d2l.by.dalpha.dbeta)
-    
-    d2l.by.dbeta.2 = t(dpA.by.dbeta * d2l.by.dpA.2 * cnt) %*% dpA.by.dbeta + 
-        t(dphi.by.dbeta * dl.by.dpA * d2pA.by.dphi.2 * cnt) %*% dphi.by.dbeta
-    
-    hessian = -rbind(cbind(d2l.by.dalpha.2, d2l.by.dalpha.dbeta), cbind(d2l.by.dbeta.dalpha, 
-        d2l.by.dbeta.2))
-    ### NB Note the extra minus sign here
-    
-    return(list(hessian = hessian, p0 = p0, p1 = p1, pA = pA, s0 = s0, s1 = s1, 
-        sA = sA, rho = rho, dl.by.dpA = dl.by.dpA, dp0.by.dphi = dp0.by.dphi, 
-        dp0.by.drho = dp0.by.drho, drho.by.dalpha = drho.by.dalpha, dphi.by.dbeta = dphi.by.dbeta, 
-        dpA.by.drho = dpA.by.drho, dpA.by.dalpha = dpA.by.dalpha, dpA.by.dphi = dpA.by.dphi, 
-        dpA.by.dbeta = dpA.by.dbeta))
-    
-} 
diff --git a/R/2.2.1_Hessian_RR.R b/R/2.2.1_Hessian_RR.R
deleted file mode 100644
index 3d4dbdc..0000000
--- a/R/2.2.1_Hessian_RR.R
+++ /dev/null
@@ -1,73 +0,0 @@
-Hessian2RR = function(y, x, va, vb, alpha.ml, beta.ml, weights) {
-    # calculating the Hessian using the second derivative have to do so
-    # because under mis-specification of models Hessian no longer equals the
-    # square of the first order derivatives
-    
-    p0p1 = getProbRR(va %*% alpha.ml, vb %*% beta.ml)
-    # p0p1 = cbind(p0, p1): n * 2 matrix
-    p0 = p0p1[, 1]
-    p1 = p0p1[, 2]
-    n = nrow(va)
-    pA = p0
-    pA[x == 1] = p1[x == 1]
-    
-    
-    ### Building blocks
-    
-    dpsi0.by.dtheta = -(1 - p0)/(1 - p0 + 1 - p1)
-    dpsi0.by.dphi = (1 - p0) * (1 - p1)/(1 - p0 + 1 - p1)
-    
-    dtheta.by.dalpha = va
-    dphi.by.dbeta = vb
-    
-    dl.by.dpsi0 = (y - pA)/(1 - pA)
-    d2l.by.dpsi0.2 = (y - 1) * pA/((1 - pA)^2)
-    
-    
-    
-    ###### d2l.by.dalpha.2
-    
-    d2psi0.by.dtheta.2 = ((p0 - p1) * dpsi0.by.dtheta - (1 - p0) * p1)/((1 - 
-        p0 + 1 - p1)^2)
-    
-    d2l.by.dtheta.2 = d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x)^2 + dl.by.dpsi0 * 
-        d2psi0.by.dtheta.2
-    
-    d2l.by.dalpha.2 = t(dtheta.by.dalpha * d2l.by.dtheta.2 * weights) %*% 
-        dtheta.by.dalpha
-    
-    
-    ###### d2l.by.dalpha.dbeta
-    
-    d2psi0.by.dtheta.dphi = (1 - p0) * (1 - p1) * (p0 - p1)/(1 - p0 + 1 - 
-        p1)^3
-    
-    d2l.by.dtheta.dphi = d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x) * dpsi0.by.dphi + 
-        dl.by.dpsi0 * d2psi0.by.dtheta.dphi
-    
-    d2l.by.dalpha.dbeta = t(dtheta.by.dalpha * d2l.by.dtheta.dphi * weights) %*% 
-        dphi.by.dbeta
-    d2l.by.dbeta.dalpha = t(d2l.by.dalpha.dbeta)
-    # d2l.by.dalpha.dbeta is symmetric itself if (because) va=vb
-    
-    
-    #### d2l.by.dbeta2
-    
-    d2psi0.by.dphi.2 = (-(p0 * (1 - p1)^2 + p1 * (1 - p0)^2)/(1 - p0 + 1 - 
-        p1)^2) * dpsi0.by.dphi
-    
-    d2l.by.dphi.2 = d2l.by.dpsi0.2 * (dpsi0.by.dphi)^2 + dl.by.dpsi0 * d2psi0.by.dphi.2
-    
-    d2l.by.dbeta.2 = t(dphi.by.dbeta * d2l.by.dphi.2 * weights) %*% dphi.by.dbeta
-    
-    
-    
-    hessian = -rbind(cbind(d2l.by.dalpha.2, d2l.by.dalpha.dbeta), cbind(d2l.by.dbeta.dalpha, 
-        d2l.by.dbeta.2))
-    ### NB Note the extra minus sign here
-    
-    return(list(hessian = hessian, p0 = p0, p1 = p1, pA = pA, dpsi0.by.dtheta = dpsi0.by.dtheta, 
-        dpsi0.by.dphi = dpsi0.by.dphi, dtheta.by.dalpha = dtheta.by.dalpha, 
-        dphi.by.dbeta = dphi.by.dbeta, dl.by.dpsi0 = dl.by.dpsi0))
-    
-} 
diff --git a/R/2.2_DR_Var_RD.R b/R/2.2_DR_Var_RD.R
deleted file mode 100644
index 97730f0..0000000
--- a/R/2.2_DR_Var_RD.R
+++ /dev/null
@@ -1,120 +0,0 @@
-
-## Sandwich estimator for variance of RD
-
-var.rd.dr = function(y, x, va, vb, vc, alpha.dr, alpha.ml, beta.ml, gamma, 
-    optimal, weights) {
-    ######################################## 
-    
-    pscore = as.vector(expit(vc %*% gamma))
-    n = length(pscore)
-    
-    ### 1. - E[dS/d(alpha.ml,beta.ml)] ############################## Computing
-    ### the Hessian:
-    
-    Hrd = Hessian2RD(y, x, va, vb, alpha.ml, beta.ml, weights)
-    hessian = Hrd$hessian
-    p0  = Hrd$p0; p1 = Hrd$p1; pA = Hrd$pA
-    s0  = Hrd$s0; s1 = Hrd$s1; sA = Hrd$sA
-    rho = Hrd$rho
-    dl.by.dpA      = Hrd$dl.by.dpA
-    dp0.by.dphi    = Hrd$dp0.by.dphi
-    dp0.by.drho    = Hrd$dp0.by.drho
-    drho.by.dalpha = Hrd$drho.by.dalpha
-    dphi.by.dbeta  = Hrd$dphi.by.dbeta
-    dpA.by.drho    = Hrd$dpA.by.drho
-    dpA.by.dalpha  = Hrd$dpA.by.dalpha
-    dpA.by.dphi    = Hrd$dpA.by.dphi
-    dpA.by.dbeta   = Hrd$dpA.by.dbeta
-    
-    
-    ############# extra building blocks ##########################
-    
-    H.alpha = y - x * as.vector(tanh(va %*% alpha.dr))
-    
-    ############# Calculation of optimal vector (used in several places below) ##
-    
-    if (optimal == TRUE) {
-        wt = (1 - rho^2)/(pscore * s0 + (1 - pscore) * s1)
-    } else {
-        wt = rep(1, n)
-    }
-    
-    
-    ### 2. -E[dU.by.dalphaml.betaml] ####################################
-    
-    dU.by.dp0 = -va * wt * (x - pscore)  # n by 2
-    dp0.by.dalpha.beta = cbind(drho.by.dalpha * dp0.by.drho, dphi.by.dbeta * 
-        dp0.by.dphi)  # n by 4
-    
-    dU.by.dwt = va * (x - pscore) * (H.alpha - p0)  # n by 2
-    
-    if (optimal == TRUE) {
-        esA = pscore * s0 + (1 - pscore) * s1  # E[s_{1-A}]...
-        dwt.by.drho = (-2 * rho * esA - (1 - rho^2) * (1 - pscore) * (1 - 
-            2 * p1))/esA^2
-        dwt.by.dp0 = -(1 - rho^2) * (2 * pscore * rho + 1 - 2 * p1)/esA^2
-        
-        dwt.by.dalpha = drho.by.dalpha * (dwt.by.drho + dwt.by.dp0 * dp0.by.drho)
-        dwt.by.dbeta = dphi.by.dbeta * dwt.by.dp0 * dp0.by.dphi
-        
-        dwt.by.dalpha.beta = cbind(dwt.by.dalpha, dwt.by.dbeta)  # n by 4    
-    } else {
-        dwt.by.dalpha.beta = matrix(0, n, ncol(va) + ncol(vb))
-    }
-    
-    dU.by.dalpha.ml.beta.ml = t(dU.by.dp0 * weights) %*% (dp0.by.dalpha.beta) + 
-        t(dU.by.dwt * weights) %*% dwt.by.dalpha.beta
-    
-    
-    ### 3. tau = -E[dU/dalpha.dr] ######################################## (This
-    ### is the bread of the sandwich estimate)
-    
-    dU.by.dH = va * wt * (x - pscore)  # n by 2
-    rho.dr = as.vector(tanh(va %*% alpha.dr))
-    dH.by.dalpha.dr = -va * x * (1 - rho.dr^2)
-    
-    tau = -t(dU.by.dH * weights) %*% dH.by.dalpha.dr/sum(weights)  # 2 by 2
-    
-    
-    ### 4. E[d(prop score score equation)/dgamma]
-    
-    dpscore.by.dgamma = vc * pscore * (1 - pscore)  # n by 2   
-    part4 = -t(vc * weights) %*% dpscore.by.dgamma  # 2 by 2
-    
-    
-    ### 5. E[dU/dgamma]
-    
-    dU.by.dpscore = -va * wt * (H.alpha - p0)  # n by 2
-    
-    if (optimal == TRUE) {
-        dwt.by.dpscore = -(1 - rho^2) * (s0 - s1)/esA^2
-        dwt.by.dgamma = dpscore.by.dgamma * dwt.by.dpscore  # n by 2
-    } else {
-        dwt.by.dgamma = matrix(0, n, ncol(vc))
-    }
-    
-    dU.by.dgamma = t(dU.by.dpscore * weights) %*% dpscore.by.dgamma + t(dU.by.dwt * 
-        weights) %*% dwt.by.dgamma  # 2 by 2
-    
-    
-    
-    ############################################################################# Assembling semi-parametric variance matrix
-    
-    U = va * wt * (x - pscore) * (H.alpha - p0)  # n by 2
-    
-    S = cbind(dpA.by.dalpha, dpA.by.dbeta) * dl.by.dpA
-    
-    pscore.score = vc * (x - pscore)
-    
-    Utilde = U - t(dU.by.dalpha.ml.beta.ml %*% (-solve(hessian)) %*% t(S)) - 
-        t(dU.by.dgamma %*% (solve(part4)) %*% t(pscore.score))  # n by 2
-    USigma = t(Utilde * weights) %*% Utilde/sum(weights)
-    
-    
-    
-    ################################### Asymptotic var matrix for alpha.dr
-    
-    alpha.dr.variance = solve(tau) %*% USigma %*% solve(tau)/sum(weights)
-    return(alpha.dr.variance)
-    
-} 
diff --git a/R/2.2_DR_Var_RR.R b/R/2.2_DR_Var_RR.R
deleted file mode 100644
index 66277f6..0000000
--- a/R/2.2_DR_Var_RR.R
+++ /dev/null
@@ -1,113 +0,0 @@
-
-## Sandwich estimator for variance of RR
-
-var.rr.dr = function(y, x, va, vb, vc, alpha.dr, alpha.ml, beta.ml, gamma, 
-    optimal, weights) {
-    ######################################## 
-    
-    pscore = as.vector(expit(vc %*% gamma))
-    n = length(pscore)
-    
-    ### 1. - E[dS/d(alpha.ml,beta.ml)] ############################## Computing
-    ### the Hessian:
-    
-    Hrr = Hessian2RR(y, x, va, vb, alpha.ml, beta.ml, weights)
-    hessian = Hrr$hessian
-    p0 = Hrr$p0; p1  = Hrr$p1; pA = Hrr$pA
-    dpsi0.by.dtheta  = Hrr$dpsi0.by.dtheta
-    dpsi0.by.dphi    = Hrr$dpsi0.by.dphi
-    dtheta.by.dalpha = Hrr$dtheta.by.dalpha 
-    dphi.by.dbeta    = Hrr$dphi.by.dbeta
-    dl.by.dpsi0      = Hrr$dl.by.dpsi0
-    
-    ############# extra building blocks ##########################
-    
-    H.alpha = y * exp(-x * (as.vector(va %*% alpha.dr)))
-    
-    ############# Calculation of optimal vector (used in several places below) ##
-    
-    if (optimal == TRUE) {
-        theta = as.vector(va %*% alpha.ml)  # avoid n by 1 matrix
-        dtheta.by.dalpha.beta = cbind(va, matrix(0, n, length(beta.ml)))
-        wt = 1/(1 - p0 + (1 - pscore) * (exp(-theta) - 1))
-    } else {
-        wt = rep(1, n)
-    }
-    
-    
-    ### 2. -E[dU.by.dalphaml.betaml] ####################################
-    
-    dU.by.dp0 = -va * wt * (x - pscore)  # n by 2
-    dp0.by.dpsi0 = p0
-    dpsi0.by.dalpha.beta = cbind(dpsi0.by.dtheta * dtheta.by.dalpha, dpsi0.by.dphi * 
-        dphi.by.dbeta)  # n by 4
-    # 4 = 2 (alpha) + 2 (beta)
-    dp0.by.dalpha.beta = dpsi0.by.dalpha.beta * dp0.by.dpsi0  # n by 4
-    
-    dU.by.dwt = va * (x - pscore) * (H.alpha - p0)  # n by 2
-    dwt.by.dwti = -wt^2  # n
-    # wti is short for wt_inv
-    dU.by.dwti = dU.by.dwt * dwt.by.dwti  # n by 2
-    if (optimal == TRUE) {
-        dwti.by.dalpha.beta = -dp0.by.dalpha.beta - (1 - pscore) * exp(-theta) * 
-            dtheta.by.dalpha.beta  # n by 4    
-    } else {
-        dwti.by.dalpha.beta = matrix(0, n, ncol(va) + ncol(vb))
-    }
-    
-    dU.by.dalpha.ml.beta.ml = t(dU.by.dp0 * weights) %*% dp0.by.dalpha.beta + 
-        t(dU.by.dwti * weights) %*% dwti.by.dalpha.beta
-    
-    
-    ### 3. tau = -E[dU/dalpha.dr] ######################################## (This
-    ### is the bread of the sandwich estimate)
-    
-    dU.by.dH = va * wt * (x - pscore)  # n by 2
-    dH.by.dalpha.dr = -va * x * H.alpha  # n by 2 
-    
-    tau = -t(dU.by.dH * weights) %*% dH.by.dalpha.dr/sum(weights)  # 2 by 2
-    
-    
-    ### 4. E[d(prop score score equation)/dgamma]
-    
-    dpscore.by.dgamma = vc * pscore * (1 - pscore)  # n by 2   
-    part4 = -t(vc * weights) %*% dpscore.by.dgamma  # 2 by 2
-    
-    
-    ### 5. E[dU/dgamma]
-    
-    dU.by.dpscore = -va * wt * (H.alpha - p0)  # n by 2
-    
-    if (optimal == TRUE) {
-        dwti.by.dpscore = 1 - exp(-theta)  # n
-        dwti.by.dgamma = dpscore.by.dgamma * dwti.by.dpscore  # n by 2
-    } else {
-        dwti.by.dgamma = matrix(0, n, ncol(vc))
-    }
-    
-    dU.by.dgamma = t(dU.by.dpscore * weights) %*% dpscore.by.dgamma + t(dU.by.dwti * 
-        weights) %*% dwti.by.dgamma  # 2 by 2
-    
-    
-    
-    ############################################################################# Assembling semi-parametric variance matrix
-    
-    U = va * wt * (x - pscore) * (H.alpha - p0)  # n by 2
-    
-    S = cbind(dl.by.dpsi0 * (dpsi0.by.dtheta + x) * dtheta.by.dalpha, dl.by.dpsi0 * 
-        dpsi0.by.dphi * dphi.by.dbeta)
-    pscore.score = vc * (x - pscore)
-    
-    Utilde = U - t(dU.by.dalpha.ml.beta.ml %*% (-solve(hessian)) %*% t(S)) - 
-        t(dU.by.dgamma %*% (solve(part4)) %*% t(pscore.score))  # n by 2
-    USigma = t(Utilde * weights) %*% Utilde/sum(weights)
-    
-    
-    
-    ################################### Asymptotic var matrix for alpha.dr
-    
-    alpha.dr.variance = solve(tau) %*% USigma %*% solve(tau)/sum(weights)
-    
-    return(alpha.dr.variance)
-    
-} 
diff --git a/R/2_CallDR.R b/R/2_CallDR.R
deleted file mode 100644
index 6ca8816..0000000
--- a/R/2_CallDR.R
+++ /dev/null
@@ -1,31 +0,0 @@
-
-DREst = function(param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, 
-    weights, max.step, thres, alpha.start, beta.cov, gamma.cov, message) {
-    
-    dr.est = dr.estimate.noiterate(param, y, x, va, vb, vc, alpha.ml, beta.ml, 
-        gamma, optimal, weights, max.step, thres, alpha.start, message)
-    point.est = dr.est$par
-    converged = dr.est$convergence
-    
-    if (param == "RR") 
-        alpha.cov = var.rr.dr(y, x, va, vb, vc, point.est, alpha.ml, beta.ml, 
-                              gamma, optimal, weights)
-    if (param == "RD") 
-        alpha.cov = var.rd.dr(y, x, va, vb, vc, point.est, alpha.ml, beta.ml,
-                              gamma, optimal, weights)
-    
-    pa = dim(va)[2]
-    pb = dim(vb)[2]
-    pc = dim(vc)[2]
-    name = paste(c(rep("alpha", pa), rep("beta", pb), rep("gamma", pc)),
-                 c(1:pa, 1:pb, 1:pc))
-    point.est = c(point.est, beta.ml, gamma)
-    cov = matrix(NA,pa+pb+pc, pa+pb+pc)
-    cov[1:pa,1:pa] = alpha.cov
-    cov[(pa+1):(pa+pb),(pa+1):(pa+pb)] = beta.cov
-    cov[(pa+pb+1):(pa+pb+pc),(pa+pb+1):(pa+pb+pc)] = gamma.cov
-                 
-    sol = WrapResults(point.est, cov, param, name, va, vb, converged)
-    return(sol)
-    
-} 
diff --git a/R/MyFunc.R b/R/MyFunc.R
deleted file mode 100644
index 2c95ee1..0000000
--- a/R/MyFunc.R
+++ /dev/null
@@ -1,71 +0,0 @@
-logit = function(prob) {
-    log(prob) - log(1 - prob)
-}
-
-expit = function(logodds) {
-    1/(1 + exp(-logodds))
-}
-
-
-getlogop = function(p0, p1) {
-    log(p0) + log(p1) - log(1 - p0) - log(1 - p1)
-}
-
-getlogrr = function(p0, p1) {
-    log(p1) - log(p0)
-}
-
-getatanhrd = function(p0, p1) {
-    atanh(p1 - p0)
-}
-
-
-## Function for checking if two things are equal within numerical precision
-same = function(x, y, tolerance = .Machine$double.eps^0.5) {
-    abs(x - y) < tolerance
-}
-
-
-## Functions for wrapping estimation results into a nice format
-WrapResults = function(point.est, cov, param, name, va, vb, converged) {
-    
-    se.est = sqrt(diag(cov))
-    
-    conf.lower = point.est + stats::qnorm(0.025) * se.est
-    conf.upper = point.est + stats::qnorm(0.975) * se.est
-    p.temp = stats::pnorm(point.est/se.est, 0, 1)
-    p.value = 2 * pmin(p.temp, 1 - p.temp)
-    
-    names(point.est) = names(se.est) = rownames(cov) = colnames(cov) = names(conf.lower) = names(conf.upper) = names(p.value) = name
-    
-    coefficients = cbind(point.est, se.est, conf.lower, conf.upper, p.value)
-    
-    linear.predictors = va %*% point.est[1:ncol(va)]
-    if(param=="RR") param.est = exp(linear.predictors)
-    if(param=="RD") param.est = linear.predictors
-    if(param=="OR") param.est = expit(linear.predictors)
-    
-    sol = list(param = param, point.est = point.est, se.est = se.est, cov = cov, 
-        conf.lower = conf.lower, conf.upper = conf.upper, p.value = p.value,
-        coefficients = coefficients, param.est = param.est, va = va, vb = vb, 
-        converged = converged)
-    class(sol) = c("brm", "list")
-    attr(sol, "hidden") = c("param", "se.est", "cov", "conf.lower", "conf.upper", 
-        "p.value","coefficients", "param.est", "va", "vb", "converged")
-    
-    return(sol)
-    
-}
-
-
-## This function is useful for finding limits on the boundary 
-## It gives 0.5*exp(x)*(-1+sqrt(1+4exp(-x))) 
-## This is bounded between 0 and 1, and takes value (-1+sqrt(5))/2 at x=0 (Some relation to golden ratio) 
-## Limits are 0 and 1 as x goes to -infty and +infty respectively 
-## The function will never return NaN given a numerical input
-
-getPrbAux = function(x) {
-    ifelse((x < 17) & (x > (-500)),
-           0.5 * exp(x) * (-1 + (1 + 4 * exp(-x))^0.5),
-           ifelse(x<0, 0, 1))
-}
diff --git a/R/Print.R b/R/Print.R
deleted file mode 100644
index 4b5fe02..0000000
--- a/R/Print.R
+++ /dev/null
@@ -1,41 +0,0 @@
-
-#' Ancillary function for printing
-#'
-#' @param x a list obtained with the function 'brm'
-#' 
-#' @param ... additional arguments affecting the output
-#' 
-#' @export
-
-print.brm = function(x, ...) {
-    hid = attr(x, "hidden")
-    nhid = which(!names(x) %in% hid)
-    
-    if (x$param == "RR") {
-        cat("Parameter of interest: (conditional) relative risk;", "\n", "nuisance parameter: odds product.", 
-            "\n\n", sep = "")
-        cat("Target model:   log(RR) = alpha * va", "\n")
-        cat("Nuisance model: log(OP) = beta * vb", "\n\n")
-    }
-    if (x$param == "RD") {
-        cat("Parameter of interest: (conditional) risk difference;", "\n", 
-            "nuisance parameter: odds product.", "\n\n", sep = "")
-        cat("Target model:   log(RD) = alpha * va", "\n")
-        cat("Nuisance model: log(OP) = beta * vb", "\n\n")
-    }
-    if (x$param == "OR") {
-        cat("Parameter of interest: (conditional) odds ratio;", "\n", "nuisance parameter: baseline risk.", 
-            "\n\n", sep = "")
-        cat("Target model:   log(OR) = alpha * va", "\n")
-        cat("Nuisance model: log(p0) = beta * vb", "\n\n")
-    }
-    
-    
-    for (i in nhid) {
-        x[[i]] = round(x[[i]], 3)
-    }
-    
-    print(x[nhid], 3)
-    
-    cat("See the element '$coefficients' for more information.\n")
-}
diff --git a/R/ValidCheck.R b/R/ValidCheck.R
deleted file mode 100644
index 3b74d4c..0000000
--- a/R/ValidCheck.R
+++ /dev/null
@@ -1,36 +0,0 @@
-
-ValidCheck = function(param, y, x, va, vb, vc, weights, subset, est.method, 
-                      optimal, max.step, thres, alpha.start, beta.start) {
-    if (!is.character(param)) 
-        stop("Parameter must be a character")
-    if (!(param %in% c("RD", "RR", "OR"))) 
-        stop("Parameter can only take RR, RD or OR")
-    
-    if (sum(is.na(y)) + sum(is.na(x)) + sum(is.na(va)) + sum(is.na(vb)) + 
-        sum(is.na(vc)) + sum(is.na(weights)) > 0) 
-        warning("Observations with missing values will be removed.")
-    if (!(all(y %in% c(0, 1)))) 
-        stop("y values must be either 0 or 1.")
-    if (!(all(x %in% c(0, 1)))) 
-        stop("x values must be either 0 or 1.")
-    if (!identical(length(y), length(x), dim(va)[1], dim(vb)[1], dim(vc)[1])) 
-        stop("y, x and v must have the same length (dimension)")
-    
-    if(!is.numeric(weights))
-        stop("weights must either be NULL or take numerical values")
-    if(!is.numeric(subset))
-        stop("subset must either be NULL or take numerical values")
-    if (!(est.method %in% c("MLE", "DR"))) 
-        stop("Must use MLE or DR for estimation")
-    if (!is.logical(optimal)) 
-        stop("optimal must be a logical variable")
-    if(!is.numeric(max.step) & !is.null(max.step))
-        stop("max.step must be a number")
-    if(!is.numeric(thres))
-        stop("thres must be a number")
-    if(!is.null(alpha.start) & length(alpha.start) != dim(va)[2])
-        stop("length of alpha.start must match the dimension of va")
-    if(!is.null(beta.start) & length(beta.start) != dim(vb)[2])
-        stop("length of beta.start must match the dimension of vb")
-    
-} 
diff --git a/R/brm.R b/R/brm.R
index 3373435..9505ba9 100644
--- a/R/brm.R
+++ b/R/brm.R
@@ -1,20 +1,20 @@
-#' @title  Fitting Binary Regression Models
-#'
-#' @description The function \code{brm} in this package provides an alternative to generalized linear models for fitting binary regression models, in which both the response \eqn{y} and the primary exposure of interest \eqn{x} are binary. This is  especially useful if the interest lies in estimating the association between \eqn{x} and \eqn{y}, and how that association varies as a function of (other) covariates \eqn{v}.
-#' 
-#' Unlike \code{glm}, which uses a single link function for the outcome, \code{brm} separates the nuisance model from the target model. This separation provides opportunities to choose nuisance models independently of the target model. To see why this is important, we may contrast it with the use of a GLM to model the log relative risk. In this setting one might use a Poisson regression (with interaction term) \eqn{log P(y = 1|x, va, vb) = \alpha * x * va + \beta * vb} (though such a model ignores the fact that \eqn{y} is binary); here \eqn{va} and \eqn{vb} are subsets of \eqn{v}. Such a Poisson model can be seen as a combination of two parts: a target model \eqn{log RR(va) = \alpha * va} and a nuisance model \eqn{log P(y = 1|x = 0, vb) = \beta *  vb}. However, this nuisance model is variation dependent of the target model so that predicted probabilities may go outside of \eqn{[0,1]}. Furthermore, one cannot solve this problem under a GLM framework as with a GLM, the target model and nuisance model are determined  \emph{simultaneously} through a link function.
-#' 
-#'   More specifically, if the target model is a linear model on the conditional log Relative Risk (log RR) or ('logistically' transformed) conditional Risk Difference (atanh RD), \code{brm} fits a linear nuisance model for the conditional log Odds Product (log OP).  If the target model is a linear model on the conditional log Odds Ratio (log OR), \code{brm} fits a linear nuisance model on the conditional logit baseline risk, logit P(y = 1|x = 0, vb). Note in this case the target and nuisance models combine to form a simple logistic regression model (which is fitted using \code{glm}). 
-#'   
-#'    \code{brm} fits the three target models described above as they are simple and the parameter space is unconstrained. \code{brm} fits the nuisance models above as they are variation independent of the corresponding target model. This variation independence greatly facilitates parameter estimation and interpretation. 
-#'    
-#'    \code{brm} also provides doubly robust fitting as an option such that the estimates for \eqn{\alpha} are still consistent and asymptotically normal even when the nuisance model is misspecified, provided that we have a correctly specified logistic model for the exposure probability \eqn{P(x=1|v)}. Such doubly robust estimation is only possible for the Relative Risk and Risk Difference, but not the Odds Ratio.
-#'    
-#'   See Richardson et al. (2017) for more details.
-#'
-#' @references Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimation for the Relative Risk and Risk Difference." Journal of the American Statistical Association: Theory and Methods (2017).
-#' 
-#'
-#' @docType package
-#' @name brm-package
-NULL
\ No newline at end of file
+#' @title  Fitting Binary Regression Models
+#'
+#' @description The function \code{brm} in this package provides an alternative to generalized linear models for fitting binary regression models, in which both the response \eqn{y} and the primary exposure of interest \eqn{x} are binary. This is  especially useful if the interest lies in estimating the association between \eqn{x} and \eqn{y}, and how that association varies as a function of (other) covariates \eqn{v}.
+#'
+#' Unlike \code{glm}, which uses a single link function for the outcome, \code{brm} separates the nuisance model from the target model. This separation provides opportunities to choose nuisance models independently of the target model. To see why this is important, we may contrast it with the use of a GLM to model the log relative risk. In this setting one might use a Poisson regression (with interaction term) \eqn{log P(y = 1|x, va, vb) = \alpha * x * va + \beta * vb} (though such a model ignores the fact that \eqn{y} is binary); here \eqn{va} and \eqn{vb} are subsets of \eqn{v}. Such a Poisson model can be seen as a combination of two parts: a target model \eqn{log RR(va) = \alpha * va} and a nuisance model \eqn{log P(y = 1|x = 0, vb) = \beta *  vb}. However, this nuisance model is variation dependent of the target model so that predicted probabilities may go outside of \eqn{[0,1]}. Furthermore, one cannot solve this problem under a GLM framework as with a GLM, the target model and nuisance model are determined  \emph{simultaneously} through a link function.
+#'
+#'   More specifically, if the target model is a linear model on the conditional log Relative Risk (log RR) or ('logistically' transformed) conditional Risk Difference (atanh RD), \code{brm} fits a linear nuisance model for the conditional log Odds Product (log OP).  If the target model is a linear model on the conditional log Odds Ratio (log OR), \code{brm} fits a linear nuisance model on the conditional logit baseline risk, logit P(y = 1|x = 0, vb). Note in this case the target and nuisance models combine to form a simple logistic regression model (which is fitted using \code{glm}).
+#'
+#'    \code{brm} fits the three target models described above as they are simple and the parameter space is unconstrained. \code{brm} fits the nuisance models above as they are variation independent of the corresponding target model. This variation independence greatly facilitates parameter estimation and interpretation.
+#'
+#'    \code{brm} also provides doubly robust fitting as an option such that the estimates for \eqn{\alpha} are still consistent and asymptotically normal even when the nuisance model is misspecified, provided that we have a correctly specified logistic model for the exposure probability \eqn{P(x=1|v)}. Such doubly robust estimation is only possible for the Relative Risk and Risk Difference, but not the Odds Ratio.
+#'
+#'   See Richardson et al. (2017) for more details.
+#'
+#' @references Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimation for the Relative Risk and Risk Difference." Journal of the American Statistical Association: Theory and Methods (2017).
+#'
+#'
+#' @docType package
+#' @name brm-package
+NULL
diff --git a/R/dr_est.R b/R/dr_est.R
new file mode 100644
index 0000000..3cb5b74
--- /dev/null
+++ b/R/dr_est.R
@@ -0,0 +1,40 @@
+dr_est <- function(
+  param, y, x, va, vb, vc, alpha_ml, beta_ml, gamma, optimal,
+  weights, max_step, thres, alpha_start, beta_cov, gamma_cov, message
+) {
+  dr_est <- dr_estimate_noiterate(
+    param, y, x, va, vb, vc, alpha_ml, beta_ml,
+    gamma, optimal, weights, max_step, thres, alpha_start, message
+  )
+  point_est <- dr_est$par
+  converged <- dr_est$convergence
+
+  if (param == "RR") {
+    alpha_cov <- var_rr_dr(
+      y, x, va, vb, vc, point_est, alpha_ml, beta_ml,
+      gamma, optimal, weights
+    )
+  }
+  if (param == "RD") {
+    alpha_cov <- var_rd_dr(
+      y, x, va, vb, vc, point_est, alpha_ml, beta_ml,
+      gamma, optimal, weights
+    )
+  }
+
+  pa <- dim(va)[2]
+  pb <- dim(vb)[2]
+  pc <- dim(vc)[2]
+  name <- paste(
+    c(rep("alpha", pa), rep("beta", pb), rep("gamma", pc)),
+    c(1:pa, 1:pb, 1:pc)
+  )
+  point_est <- c(point_est, beta_ml, gamma)
+  cov <- matrix(NA, pa + pb + pc, pa + pb + pc)
+  cov[1:pa, 1:pa] <- alpha_cov
+  cov[(pa + 1):(pa + pb), (pa + 1):(pa + pb)] <- beta_cov
+  cov[(pa + pb + 1):(pa + pb + pc), (pa + pb + 1):(pa + pb + pc)] <- gamma_cov
+
+  sol <- wrap_results(point_est, cov, param, name, va, vb, converged)
+  return(sol)
+}
diff --git a/R/dr_point.R b/R/dr_point.R
new file mode 100644
index 0000000..a00d971
--- /dev/null
+++ b/R/dr_point.R
@@ -0,0 +1,67 @@
+dr_estimate_onestep <- function(
+  param, y, x, va, vb, alpha_start, beta, pscore,
+  wt, weights, max_step, thres, message
+) {
+  startpars <- c(alpha_start) # pars only contain alpha
+  ## DR estimation equation^2
+  if (param == "RR") {
+    dr_objective <- function(pars) {
+      p0 <- get_prob_rr(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1]
+      H_alpha <- as.vector(y * exp(-x * (va %*% pars)))
+      tmp <- mat_vec_mul(t(va), (wt * (x - pscore) * (H_alpha - p0) * weights))
+      return(sum(tmp^2))
+    }
+  }
+  if (param == "RD") {
+    dr_objective <- function(pars) {
+      p0 <- get_prob_rd(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1]
+      H_alpha <- y - x * tanh(mat_vec_mul(va, pars))
+      tmp <- mat_vec_mul(t((H_alpha - p0) * (x - pscore)), (va * wt * weights))
+      return(sum(tmp^2))
+    }
+  }
+
+  opt <- stats::optim(startpars, dr_objective, control = list(reltol = thres))
+  opt$convergence <- (opt$convergence == 0) # change cf. optim()
+
+  return(opt)
+}
+
+
+dr_estimate_noiterate <- function(
+  param, y, x, va, vb, vc, alpha_ml, beta_ml,
+  gamma, optimal, weights, max_step, thres, alpha_start, message
+) {
+  pscore <- as.vector(expit(mat_vec_mul(vc, gamma)))
+
+  if (optimal == TRUE) {
+    if (param == "RR") {
+      p0 <- get_prob_rr(mat_vec_mul(va, alpha_ml), mat_vec_mul(vb, beta_ml))[, 1]
+      wt <- as.vector(1 / (1 - p0 + (1 - pscore) * (exp(mat_vec_mul(-va, alpha_ml)) -
+        1)))
+    }
+    if (param == "RD") {
+      p0 <- get_prob_rd(mat_vec_mul(va, alpha_ml), mat_vec_mul(vb, beta_ml))[, 1]
+      rho <- as.vector(tanh(mat_vec_mul(va, alpha_ml)))
+      wt <- (1 - rho) * (1 + rho) / (p0 * (1 - p0) + rho * (1 - pscore) *
+        (1 - 2 * p0 - rho))
+    }
+  } else {
+    wt <- rep(1, length(pscore))
+  }
+
+  if (is.null(alpha_start)) {
+    alpha_start <- alpha_ml
+  }
+
+  alpha_dr_opt <- dr_estimate_onestep(
+    param, y, x, va, vb, alpha_start, beta_ml,
+    pscore, wt, weights, max_step, thres, message
+  )
+
+  # if(MESSAGE){ print(paste('DR One Step: ',' Alpha:
+  # ',paste(round(alpha_dr,5),collapse=', '),' Beta:
+  # ',paste(round(beta_ml,5),collapse=', '))) }
+
+  return(alpha_dr_opt)
+}
diff --git a/R/getProbScalarRD.R b/R/getProbScalarRD.R
deleted file mode 100644
index 2dfcada..0000000
--- a/R/getProbScalarRD.R
+++ /dev/null
@@ -1,99 +0,0 @@
-
-#' Calculate risks from arctanh RD and log OP
-#' 
-#' @param atanhrd arctanh of risk difference
-#' 
-#' @param logop log of odds product
-#' 
-#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. 
-#' 
-#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))}
-#' 
-#' @examples getProbScalarRD(0,0)
-#' 
-#' set.seed(0)
-#' logrr = rnorm(10,0,1)
-#' logop = rnorm(10,0,1)
-#' probs = mapply(getProbScalarRD, logrr, logop)
-#' rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-#' probs
-#' 
-#' @export
-
-
-getProbScalarRD = function(atanhrd, logop) {
-    
-    if(length(atanhrd) == 2){
-        logop   = atanhrd[2]
-        atanhrd = atanhrd[1]
-    }
-    
-    if (logop > 350) {
-        if (atanhrd < 0) {
-            p0 = 1
-            p1 = p0 + tanh(atanhrd)
-        } else {
-            p1 = 1
-            p0 = p1 - tanh(atanhrd)
-        }
-    } else {
-        ## not on boundary logop = 0; solving linear equations
-        if (same(logop, 0)) {
-            p0 = 0.5 * (1 - tanh(atanhrd))
-        } else {
-            p0 = (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * 
-                (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * 
-                (1 - tanh(atanhrd)) * (1 - exp(logop))))/(2 * (exp(logop) - 
-                1))
-        }
-        p1 = p0 + tanh(atanhrd)
-    }
-    return(c(p0, p1))
-} 
-
-#' Calculate risks from arctanh RD and log OP (vectorised)
-#' 
-#' @param atanhrd arctanh of risk difference
-#' 
-#' @param logop log of odds product
-#' 
-#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. 
-#' 
-#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
-#' 
-#' @examples getProbRD(0,0)
-#' 
-#' set.seed(0)
-#' logrr = rnorm(10,0,1)
-#' logop = rnorm(10,0,1)
-#' probs = getProbRD(logrr, logop)
-#' colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-#' probs
-#' 
-#' @export
-getProbRD = function(atanhrd, logop) {
-    if(is.matrix(atanhrd) && ncol(atanhrd) == 2){
-        logop = atanhrd[,2]
-        atanhrd = atanhrd[,1]
-    } else if(length(logop)==1 && is.na(logop) && length(atanhrd) == 2){
-        logop = atanhrd[2]
-        atanhrd = atanhrd[1]
-    }
-    p0 <- ifelse (logop > 350,
-                  ifelse(atanhrd < 0,
-                         1,
-                         1 - tanh(atanhrd)),
-                  ## not on boundary logop = 0; solving linear equations
-                  ifelse(same(logop, 0),
-                         0.5 * (1 - tanh(atanhrd)),
-                         (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * (1 - tanh(atanhrd)) * (1 - exp(logop))))/(2 * (exp(logop) - 1))))
-    p1 <- ifelse (logop > 350,
-                  ifelse(atanhrd < 0,
-                         1 + tanh(atanhrd),
-                         1),
-                  ## not on boundary logop = 0
-                  p0 + tanh(atanhrd))
-    cbind(p0,p1)
-}
diff --git a/R/getProbScalarRR.R b/R/getProbScalarRR.R
deleted file mode 100644
index 6949c62..0000000
--- a/R/getProbScalarRR.R
+++ /dev/null
@@ -1,117 +0,0 @@
-
-
-#' Calculate risks from log RR and log OP
-#'
-#' @param logrr log of relative risk
-#' 
-#' @param logop log of odds product
-#'
-#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-#' 
-#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))}
-#' 
-#' @examples getProbScalarRR(0,0)
-#' 
-#' set.seed(0)
-#' logrr = rnorm(10,0,1)
-#' logop = rnorm(10,0,1)
-#' probs = mapply(getProbScalarRR, logrr, logop)
-#' rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-#' probs
-#' 
-#' @export
-
-getProbScalarRR = function(logrr, logop = NA) {
-    
-    if(length(logrr) == 2){
-        logop = logrr[2]
-        logrr = logrr[1]
-    }
-    
-    if ((logop < (-12)) || (logop > 12) || (logrr < (-12)) || (logrr > 12)) {
-        # on the boundary South edge: large -ve logrr or (large -ve logop and -ve
-        # logrr)
-        if ((logrr < (-12)) || ((logop < (-12)) && (logrr < 0))) {
-            p0 = getPrbAux(logop - logrr)
-            p1 = 0
-        } else {
-            if ((logrr > 12) || ((logop < (-12)) && (logrr > 0))) {
-                # West edge: large +ve logrr or (large -ve logop and +ve logrr)
-                p0 = 0
-                p1 = getPrbAux(logrr + logop)
-            } else {
-                # North or East edges (=logop is large +ve)
-                p0 = min(exp(-logrr), 1)
-                p1 = min(exp(logrr), 1)
-            }
-        }
-    } else {
-        # not on the boundary logop = 0; solving linear equations logop not 0;
-        # solving a quadratic equation
-        if (same(logop, 0)) {
-            p0 = 1/(1 + exp(logrr))
-        } else {
-            p0 = (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + 
-                1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop))))/(2 * exp(logrr) * 
-                (1 - exp(logop)))
-        }
-        p1 = exp(logrr) * p0
-    }
-    return(c(p0, p1))
-} 
-
-#' Calculate risks from log RR and log OP (vectorised)
-#'
-#' @param logrr log of relative risk
-#' 
-#' @param logop log of odds product
-#'
-#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-#' 
-#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
-#' 
-#' @examples getProbRR(0,0)
-#' 
-#' set.seed(0)
-#' logrr = rnorm(10,0,1)
-#' logop = rnorm(10,0,1)
-#' probs = getProbRR(logrr, logop)
-#' colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-#' probs
-#' 
-#' @export
-getProbRR = function(logrr, logop = NA) {
-    if(is.matrix(logrr) && ncol(logrr) == 2){
-        logop = logrr[,2]
-        logrr = logrr[,1]
-    } else if(length(logop)==1 && is.na(logop) && length(logrr) == 2){
-        logop = logrr[2]
-        logrr = logrr[1]
-    }
-    p0 <- ifelse((logop < (-12)) | (logop > 12) | (logrr < (-12)) | (logrr > 12),
-                 ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve
-                 ## logrr)
-                 ifelse ((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)),
-                         getPrbAux(logop-logrr),
-                         ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)),
-                                ## West edge: large +ve logrr or (large -ve logop and +ve logrr)
-                                0,
-                                pmin(exp(-logrr), 1))),
-                 ## not on the boundary logop = 0; solving linear equations logop not 0;
-                 ## solving a quadratic equation
-          ifelse(same(logop, 0),
-                 1/(1 + exp(logrr)),
-                 (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + 1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop))))/(2 * exp(logrr) * (1 - exp(logop)))))
-    p1 <- ifelse((logop < (-12)) | (logop > 12) | (logrr < (-12)) | (logrr > 12),
-                 ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve
-                 ## logrr
-                 ifelse ((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)),
-                         0,
-                         ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)),
-                                ## West edge: large +ve logrr or (large -ve logop and +ve logrr)
-                                getPrbAux(logop + logrr),
-                                pmin(exp(logrr), 1))),
-                 ## not on the boundary logop = 0
-                 exp(logrr) * p0)
-    cbind(p0,p1)
-} 
diff --git a/R/get_prob_rd.R b/R/get_prob_rd.R
new file mode 100644
index 0000000..4bc99c0
--- /dev/null
+++ b/R/get_prob_rd.R
@@ -0,0 +1,50 @@
+#' Calculate risks from arctanh RD and log OP (vectorised)
+#'
+#' @param atanhrd arctanh of risk difference
+#'
+#' @param logop log of odds product
+#'
+#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
+#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}.
+#'
+#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
+#'
+#' @examples get_prob_rd(0, 0)
+#'
+#' set.seed(0)
+#' logrr <- rnorm(10, 0, 1)
+#' logop <- rnorm(10, 0, 1)
+#' probs <- get_prob_rd(logrr, logop)
+#' colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)")
+#' probs
+#'
+#' @export
+get_prob_rd <- function(atanhrd, logop) {
+  if (is.matrix(atanhrd) && ncol(atanhrd) == 2) {
+    logop <- atanhrd[, 2]
+    atanhrd <- atanhrd[, 1]
+  } else if (length(logop) == 1 && is.na(logop) && length(atanhrd) == 2) {
+    logop <- atanhrd[2]
+    atanhrd <- atanhrd[1]
+  }
+  p0 <- ifelse(logop > 350,
+    ifelse(atanhrd < 0,
+      1,
+      1 - tanh(atanhrd)
+    ),
+    ## not on boundary logop = 0; solving linear equations
+    ifelse(same(logop, 0),
+      0.5 * (1 - tanh(atanhrd)),
+      (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * (1 - tanh(atanhrd)) * (1 - exp(logop)))) / (2 * (exp(logop) - 1))
+    )
+  )
+  p1 <- ifelse(logop > 350,
+    ifelse(atanhrd < 0,
+      1 + tanh(atanhrd),
+      1
+    ),
+    ## not on boundary logop = 0
+    p0 + tanh(atanhrd)
+  )
+  cbind(p0, p1)
+}
diff --git a/R/get_prob_rr.R b/R/get_prob_rr.R
new file mode 100644
index 0000000..40097e2
--- /dev/null
+++ b/R/get_prob_rr.R
@@ -0,0 +1,62 @@
+#' Calculate risks from log RR and log OP
+#'
+#' @param logrr log of relative risk
+#'
+#' @param logop log of odds product
+#'
+#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
+#'
+#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
+#'
+#' @examples get_prob_rr(0, 0)
+#'
+#' set.seed(0)
+#' logrr <- rnorm(10, 0, 1)
+#' logop <- rnorm(10, 0, 1)
+#' probs <- get_prob_rr(logrr, logop)
+#' colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)")
+#' probs
+#'
+#' @export
+get_prob_rr <- function(logrr, logop = NA) {
+  if (is.matrix(logrr) && ncol(logrr) == 2) {
+    logop <- logrr[, 2]
+    logrr <- logrr[, 1]
+  } else if (length(logop) == 1 && is.na(logop) && length(logrr) == 2) {
+    logop <- logrr[2]
+    logrr <- logrr[1]
+  }
+  p0 <- ifelse((logop < (-12)) | (logop > 12) | (logrr < (-12)) | (logrr > 12),
+    ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve
+    ## logrr)
+    ifelse((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)),
+      get_prb_aux(logop - logrr),
+      ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)),
+        ## West edge: large +ve logrr or (large -ve logop and +ve logrr)
+        0,
+        pmin(exp(-logrr), 1)
+      )
+    ),
+    ## not on the boundary logop = 0; solving linear equations logop not 0;
+    ## solving a quadratic equation
+    ifelse(same(logop, 0),
+      1 / (1 + exp(logrr)),
+      (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + 1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop)))) / (2 * exp(logrr) * (1 - exp(logop)))
+    )
+  )
+  p1 <- ifelse((logop < (-12)) | (logop > 12) | (logrr < (-12)) | (logrr > 12),
+    ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve
+    ## logrr
+    ifelse((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)),
+      0,
+      ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)),
+        ## West edge: large +ve logrr or (large -ve logop and +ve logrr)
+        get_prb_aux(logop + logrr),
+        pmin(exp(logrr), 1)
+      )
+    ),
+    ## not on the boundary logop = 0
+    exp(logrr) * p0
+  )
+  cbind(p0, p1)
+}
diff --git a/R/hessian_2_rd.R b/R/hessian_2_rd.R
new file mode 100644
index 0000000..ba4cab3
--- /dev/null
+++ b/R/hessian_2_rd.R
@@ -0,0 +1,69 @@
+hessian_2_rd <- function(y, x, va, vb, alpha_ml, beta_ml, cnt) {
+  # calculating the Hessian using the second derivative have to do so
+  # because under mis-specification of models Hessian no longer equals the
+  # square of the first order derivatives
+
+  p0p1 <- get_prob_rd(va %*% alpha_ml, vb %*% beta_ml)
+  # p0p1 = cbind(p0, p1): n * 2 matrix
+  p0 <- p0p1[, 1]
+  p1 <- p0p1[, 2]
+  n <- nrow(va)
+  pA <- p0
+  pA[x == 1] <- p1[x == 1]
+  s0 <- p0 * (1 - p0)
+  s1 <- p1 * (1 - p1)
+  sA <- pA * (1 - pA)
+
+  rho <- as.vector(tanh(va %*% alpha_ml)) # estimated risk differences
+
+  ### First order derivatives ###
+
+  dl_by_dpA <- (y - pA) / sA
+  dp0_by_dphi <- s0 * s1 / (s0 + s1)
+  dp0_by_drho <- -s0 / (s0 + s1)
+  drho_by_dalpha <- va * (1 - rho^2)
+  dphi_by_dbeta <- vb
+
+  dpA_by_drho <- dp0_by_drho + x
+  dpA_by_dalpha <- drho_by_dalpha * dpA_by_drho
+  dpA_by_dphi <- dp0_by_dphi
+  dpA_by_dbeta <- dphi_by_dbeta * dpA_by_dphi
+
+  ### Second order derivatives ###
+
+  d2l_by_dpA_2 <- -(y - pA)^2 / sA^2
+  d2pA_by_drho_2 <- s0 * s1 * (2 - 2 * p0 - 2 * p1) / (s0 + s1)^3
+  d2pA_by_dphi_drho <- (s0 * (1 - 2 * p1) - s1 * (1 - 2 * p0)) * s0 * s1 / (s0 +
+    s1)^3
+  d2pA_by_dphi_2 <- (s0^2 * (1 - 2 * p1) + s1^2 * (1 - 2 * p0)) * s0 * s1 / (s0 +
+    s1)^3
+
+  d2rho_by_dalpha_2 <- -2 * t(va * rho) %*% drho_by_dalpha
+
+  ### Compute elements of the Hessian matrix ###
+
+  d2l_by_dalpha_2 <- t(dpA_by_dalpha * d2l_by_dpA_2 * cnt) %*% dpA_by_dalpha +
+    t(drho_by_dalpha * dl_by_dpA * d2pA_by_drho_2 * cnt) %*% drho_by_dalpha -
+    2 * t(va * rho * dl_by_dpA * dpA_by_drho * cnt) %*% drho_by_dalpha
+
+  d2l_by_dalpha_dbeta <- t(dpA_by_dalpha * d2l_by_dpA_2 * cnt) %*% dpA_by_dbeta +
+    t(drho_by_dalpha * dl_by_dpA * d2pA_by_dphi_drho * cnt) %*% dphi_by_dbeta
+  d2l_by_dbeta_dalpha <- t(d2l_by_dalpha_dbeta)
+
+  d2l_by_dbeta_2 <- t(dpA_by_dbeta * d2l_by_dpA_2 * cnt) %*% dpA_by_dbeta +
+    t(dphi_by_dbeta * dl_by_dpA * d2pA_by_dphi_2 * cnt) %*% dphi_by_dbeta
+
+  hessian <- -rbind(cbind(d2l_by_dalpha_2, d2l_by_dalpha_dbeta), cbind(
+    d2l_by_dbeta_dalpha,
+    d2l_by_dbeta_2
+  ))
+  ### NB Note the extra minus sign here
+
+  return(list(
+    hessian = hessian, p0 = p0, p1 = p1, pA = pA, s0 = s0, s1 = s1,
+    sA = sA, rho = rho, dl_by_dpA = dl_by_dpA, dp0_by_dphi = dp0_by_dphi,
+    dp0_by_drho = dp0_by_drho, drho_by_dalpha = drho_by_dalpha, dphi_by_dbeta = dphi_by_dbeta,
+    dpA_by_drho = dpA_by_drho, dpA_by_dalpha = dpA_by_dalpha, dpA_by_dphi = dpA_by_dphi,
+    dpA_by_dbeta = dpA_by_dbeta
+  ))
+}
diff --git a/R/hessian_2_rr.R b/R/hessian_2_rr.R
new file mode 100644
index 0000000..bfb4d14
--- /dev/null
+++ b/R/hessian_2_rr.R
@@ -0,0 +1,74 @@
+hessian_2_rr <- function(y, x, va, vb, alpha_ml, beta_ml, weights) {
+  # calculating the Hessian using the second derivative have to do so
+  # because under mis-specification of models Hessian no longer equals the
+  # square of the first order derivatives
+
+  p0p1 <- get_prob_rr(va %*% alpha_ml, vb %*% beta_ml)
+  # p0p1 = cbind(p0, p1): n * 2 matrix
+  p0 <- p0p1[, 1]
+  p1 <- p0p1[, 2]
+  n <- nrow(va)
+  pA <- p0
+  pA[x == 1] <- p1[x == 1]
+
+
+  ### Building blocks
+
+  dpsi0_by_dtheta <- -(1 - p0) / (1 - p0 + 1 - p1)
+  dpsi0_by_dphi <- (1 - p0) * (1 - p1) / (1 - p0 + 1 - p1)
+
+  dtheta_by_dalpha <- va
+  dphi_by_dbeta <- vb
+
+  dl_by_dpsi0 <- (y - pA) / (1 - pA)
+  d2l_by_dpsi0_2 <- (y - 1) * pA / ((1 - pA)^2)
+
+
+  ###### d2l_by_dalpha_2
+
+  d2psi0_by_dtheta_2 <- ((p0 - p1) * dpsi0_by_dtheta - (1 - p0) * p1) / ((1 -
+    p0 + 1 - p1)^2)
+
+  d2l_by_dtheta_2 <- d2l_by_dpsi0_2 * (dpsi0_by_dtheta + x)^2 + dl_by_dpsi0 *
+    d2psi0_by_dtheta_2
+
+  d2l_by_dalpha_2 <- t(dtheta_by_dalpha * d2l_by_dtheta_2 * weights) %*%
+    dtheta_by_dalpha
+
+
+  ###### d2l_by_dalpha_dbeta
+
+  d2psi0_by_dtheta_dphi <- (1 - p0) * (1 - p1) * (p0 - p1) / (1 - p0 + 1 -
+    p1)^3
+
+  d2l_by_dtheta_dphi <- d2l_by_dpsi0_2 * (dpsi0_by_dtheta + x) * dpsi0_by_dphi +
+    dl_by_dpsi0 * d2psi0_by_dtheta_dphi
+
+  d2l_by_dalpha_dbeta <- t(dtheta_by_dalpha * d2l_by_dtheta_dphi * weights) %*%
+    dphi_by_dbeta
+  d2l_by_dbeta_dalpha <- t(d2l_by_dalpha_dbeta)
+  # d2l_by_dalpha_dbeta is symmetric itself if (because) va=vb
+
+
+  #### d2l_by_dbeta2
+
+  d2psi0_by_dphi_2 <- (-(p0 * (1 - p1)^2 + p1 * (1 - p0)^2) / (1 - p0 + 1 -
+    p1)^2) * dpsi0_by_dphi
+
+  d2l_by_dphi_2 <- d2l_by_dpsi0_2 * (dpsi0_by_dphi)^2 + dl_by_dpsi0 * d2psi0_by_dphi_2
+
+  d2l_by_dbeta_2 <- t(dphi_by_dbeta * d2l_by_dphi_2 * weights) %*% dphi_by_dbeta
+
+
+  hessian <- -rbind(cbind(d2l_by_dalpha_2, d2l_by_dalpha_dbeta), cbind(
+    d2l_by_dbeta_dalpha,
+    d2l_by_dbeta_2
+  ))
+  ### NB Note the extra minus sign here
+
+  return(list(
+    hessian = hessian, p0 = p0, p1 = p1, pA = pA, dpsi0_by_dtheta = dpsi0_by_dtheta,
+    dpsi0_by_dphi = dpsi0_by_dphi, dtheta_by_dalpha = dtheta_by_dalpha,
+    dphi_by_dbeta = dphi_by_dbeta, dl_by_dpsi0 = dl_by_dpsi0
+  ))
+}
diff --git a/R/main.R b/R/main.R
index 7bdfa58..2635363 100644
--- a/R/main.R
+++ b/R/main.R
@@ -1,181 +1,200 @@
-#' Fitting Binary Regression Models
-#'
-#' @description  \code{brm} is used to estimate the association between two binary variables, and how that varies as a function of other covariates.
-#'
-#' @param param The measure of association. Can take value 'RD' (risk difference),
-#'  'RR' (relative risk) or 'OR' (odds ratio)
-#'  
-#' @param y The response vector. Should only take values 0 or 1.
-#' 
-#' @param x The exposure vector. Should only take values 0 or 1.
-#' 
-#' @param va The covariates matrix for the target model. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix.
-#' 
-#' @param vb The covariates matrix for the nuisance model. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. (If not specified, defaults to va.)
-#' 
-#' @param vc The covariates matrix for the probability of exposure, often called the propensity score. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. By default we fit a logistic regression model for the propensity score. (If not specified, defaults to va.)
-#' 
-#' @param weights An optional vector of 'prior weights' to be used in the fitting process. Should be NULL or a numeric vector.
-#' 
-#' @param subset An optional vector specifying a subset of observations to be used in the fitting process.
-#' 
-#' @param est.method The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation).
-#' 
-#' @param optimal   Use the optimal weighting function for the doubly robust estimator? Ignored if the estimation method is 'MLE'. The default is TRUE.
-#' 
-#' @param max.step  The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000.
-#' 
-#' @param thres Threshold for judging convergence. The default is 1e-6.
-#' 
-#' @param alpha.start   Starting values for the parameters in the target model.
-#' 
-#' @param beta.start    Starting values for the parameters in the nuisance model.
-#'
-#' @param message   Show optimization details? Ignored if the estimation method is 'MLE'. The default is FALSE.
-#' 
-#' @details  \code{brm} contains two parts: the target model for the dependence measure (RR, RD or OR) and the nuisance model; the latter is required for maximum likelihood estimation. 
-#'If \code{param="RR"} then the target model is \eqn{log RR(va) = \alpha*va}. 
-#'If \code{param="RD"} then the target model is \eqn{atanh RD(va) = \alpha*va}. 
-#'If \code{param="OR"} then the target model is \eqn{log OR(va) = \alpha*va}. 
-#'For RR and RD, the nuisance model is for the log Odds Product: \eqn{log OP(vb) = \beta*vb}. 
-#'For OR, the nuisance model is for the baseline risk: \eqn{logit(P(y=1|x=0,vb)) = \beta*vb.}
-#' In each case the nuisance model is variation independent of the target model, which  ensures that the predicted probabilities lie in \eqn{[0,1]}. 
-#' See Richardson et al. (2016+) for more details.
-#'
-#' If \code{est.method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details.
-#' 
-#' When estimating RR and RD, \code{est.method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency.
-#' 
-#' For the doubly robust estimation method, MLE is used to obtain preliminary estimates of \eqn{\alpha}, \eqn{\beta} and \eqn{\gamma}. The estimate of \eqn{\alpha} is then updated by solving a doubly-robust estimating equation. (The estimate for \eqn{\beta} remains the MLE.)
-#' 
-#' @return A list consisting of
-#'  \item{param}{the measure of association.} 
-#'
-#' \item{point.est}{ the point estimates.}
-#' 
-#' \item{se.est}{the standard error estimates.}
-#' 
-#' \item{cov}{estimate of the covariance matrix for the estimates.}
-#' 
-#' \item{conf.lower}{ the lower limit of the 95\% (marginal) confidence interval.}
-#' 
-#' \item{conf.upper}{ the upper limit of the 95\% (marginal) confidence interval.}
-#' 
-#' \item{p.value}{the two sided p-value for testing zero coefficients.}
-#' 
-#' \item{coefficients}{ the matrix summarizing key information: point estimate, 95\% confidence interval and p-value.}
-#' 
-#' \item{param.est}{the fitted RR/RD/OR.}
-#' 
-#' \item{va}{   the matrix of covariates for the target model.}
-#' 
-#' \item{vb}{   the matrix of covariates for the nuisance model.}
-#' 
-#' \item{converged}{  Did the maximization process converge? }
-#' 
-#' @author Linbo Wang <linbo.wang@utoronto.ca>, Mark Clements <mark.clements@ki.se>, Thomas Richardson <thomasr@uw.edu>
-#' 
-#' @references Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimation for the Relative Risk and Risk Difference." Journal of the American Statistical Association: Theory and Methods (2017).
-#' 
-#' Eric J. Tchetgen Tchetgen, James M. Robins and Andrea Rotnitzky. "On doubly robust estimation in a semiparametric odds ratio model." Biometrika 97.1 (2010): 171-180.
-#' 
-#' @seealso \code{getProbScalarRD}, \code{getProbRD} (vectorised), \code{getProbScalarRR} and \code{getProbRR} (vectorised) for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP);
-#' 
-#' \code{predict.blm} for obtaining fitted probabilities from \code{brm} fits.
-#' 
-#' @examples 
-#' set.seed(0)
-#' n           = 100
-#' alpha.true  = c(0,-1)
-#' beta.true   = c(-0.5,1)
-#' gamma.true  = c(0.1,-0.5)
-#' params.true = list(alpha.true=alpha.true, beta.true=beta.true, 
-#'      gamma.true=gamma.true)
-#' v.1         = rep(1,n)       # intercept term
-#' v.2         = runif(n,-2,2) 
-#' v           = cbind(v.1,v.2)
-#' pscore.true = exp(v %*% gamma.true) / (1+exp(v %*% gamma.true))
-#' p0p1.true   = getProbRR(v %*% alpha.true,v %*% beta.true)
-#' x           = rbinom(n, 1, pscore.true)  
-#' pA.true       = p0p1.true[,1]
-#' pA.true[x==1] = p0p1.true[x==1,2]
-#' y = rbinom(n, 1, pA.true)
-#' 
-#' fit.mle = brm(y,x,v,v,'RR','MLE',v,TRUE)
-#' fit.drw = brm(y,x,v,v,'RR','DR',v,TRUE)
-#' fit.dru = brm(y,x,v,v,'RR','DR',v,FALSE)
-#'
-#' fit.mle2 = brm(y,x,~v.2, ~v.2, 'RR','MLE', ~v.2,TRUE) # same as fit.mle
-#'
-#' @export
-
-
-brm = function(y, x, va, vb = NULL, param, est.method = "MLE", vc = NULL, 
-    optimal = TRUE, weights = NULL, subset = NULL, max.step = NULL, thres = 1e-8, 
-    alpha.start = NULL, beta.start = NULL, message = FALSE) {
-    
-    # default param = 'RR'; est.method = 'MLE'; va = v; vb = v; vc = v;
-    # weights = NULL; subset = NULL; optimal = TRUE; max.step = NULL; 
-    # thres = 1e-06; alpha.start = NULL; beta.start = NULL
-    
-    if (is.null(vb)) 
-        vb = va
-    if (is.null(vc)) 
-        vc = va
-    
-    if(class(va)[1] == "formula") va = stats::model.matrix(va)
-    if(class(vb)[1] == "formula") vb = stats::model.matrix(vb)
-    if(class(vc)[1] == "formula") vc = stats::model.matrix(vc)
-    
-    if(is.vector(va)) va = as.matrix(va, ncol = 1)
-    if(is.vector(vb)) vb = as.matrix(vb, ncol = 1)
-    if(is.vector(vc)) vc = as.matrix(vc, ncol = 1)
-    
-    if (is.null(weights)) 
-        weights = rep(1, length(y))
-    if (is.null(subset))
-        subset = 1:length(y)
-    
-    ValidCheck(param, y, x, va, vb, vc, weights, subset, est.method, optimal,
-               max.step, thres, alpha.start, beta.start)
-    
-    data = cbind(y,x,va,vb,vc,weights)[subset,]
-    subset = subset[rowSums(is.na(data)) == 0]
-    y = y[subset];  x = x[subset];  va = va[subset,,drop=FALSE];    vb = vb[subset,,drop=FALSE]
-    vc = vc[subset,,drop=FALSE];   weights = weights[subset]
-    
-    pa = dim(va)[2]
-    pb = dim(vb)[2]
-    if (is.null(max.step)) max.step = min(pa * 20, 1000)
-    
-    if (est.method == "MLE"){
-        sol = MLEst(param, y, x, va, vb, weights, max.step, thres, alpha.start, 
-                    beta.start, pa, pb)
-    }
-    if (est.method == "DR") {
-        if (param == "OR") {
-            cat("No doubly robust estimation methods for OR (with propensity score models) are available. Please refer to Tchetgen Tchetgen et al. (2010) for an alternative doubly robust estimation method. \n")
-            return()
-        }
-        if (is.null(alpha.start) | is.null(beta.start)){
-            sol = MLEst(param, y, x, va, vb, weights, max.step, thres, 
-                        alpha.start, beta.start, pa, pb)
-            alpha.ml = sol$point.est[1:pa]
-            beta.ml = sol$point.est[(pa+1):(pa+pb)]
-            beta.cov = sol$cov[(pa+1):(pa+pb),(pa+1):(pa+pb)]
-            alpha.start = alpha.ml
-        }else{
-            alpha.ml = alpha.start; beta.ml = beta.start
-            beta.cov = matrix(NA,pb,pb)
-        } 
-        
-        gamma.fit = stats::glm(x~vc-1, weight = weights, family="binomial")
-        gamma = gamma.fit$coefficients
-        gamma.cov = summary(gamma.fit)$cov.unscaled
-        sol = DREst(param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, 
-            weights, max.step, thres, alpha.start, beta.cov, gamma.cov, message)
-    }
-    
-    return(sol)
-    
-} 
+#' Fitting Binary Regression Models
+#'
+#' @description  \code{brm} is used to estimate the association between two binary variables, and how that varies as a function of other covariates.
+#'
+#' @param param The measure of association. Can take value 'RD' (risk difference),
+#'  'RR' (relative risk) or 'OR' (odds ratio)
+#'
+#' @param y The response vector. Should only take values 0 or 1.
+#'
+#' @param x The exposure vector. Should only take values 0 or 1.
+#'
+#' @param va The covariates matrix for the target model. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix.
+#'
+#' @param vb The covariates matrix for the nuisance model. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. (If not specified, defaults to va.)
+#'
+#' @param vc The covariates matrix for the probability of exposure, often called the propensity score. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. By default we fit a logistic regression model for the propensity score. (If not specified, defaults to va.)
+#'
+#' @param weights An optional vector of 'prior weights' to be used in the fitting process. Should be NULL or a numeric vector.
+#'
+#' @param subset An optional vector specifying a subset of observations to be used in the fitting process.
+#'
+#' @param est_method The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation).
+#'
+#' @param optimal   Use the optimal weighting function for the doubly robust estimator? Ignored if the estimation method is 'MLE'. The default is TRUE.
+#'
+#' @param max_step  The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000.
+#'
+#' @param thres Threshold for judging convergence. The default is 1e-6.
+#'
+#' @param alpha_start   Starting values for the parameters in the target model.
+#'
+#' @param beta_start    Starting values for the parameters in the nuisance model.
+#'
+#' @param message   Show optimization details? Ignored if the estimation method is 'MLE'. The default is FALSE.
+#'
+#' @details  \code{brm} contains two parts: the target model for the dependence measure (RR, RD or OR) and the nuisance model; the latter is required for maximum likelihood estimation.
+#' If \code{param="RR"} then the target model is \eqn{log RR(va) = \alpha*va}.
+#' If \code{param="RD"} then the target model is \eqn{atanh RD(va) = \alpha*va}.
+#' If \code{param="OR"} then the target model is \eqn{log OR(va) = \alpha*va}.
+#' For RR and RD, the nuisance model is for the log Odds Product: \eqn{log OP(vb) = \beta*vb}.
+#' For OR, the nuisance model is for the baseline risk: \eqn{logit(P(y=1|x=0,vb)) = \beta*vb.}
+#' In each case the nuisance model is variation independent of the target model, which  ensures that the predicted probabilities lie in \eqn{[0,1]}.
+#' See Richardson et al. (2016+) for more details.
+#'
+#' If \code{est_method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details.
+#'
+#' When estimating RR and RD, \code{est_method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency.
+#'
+#' For the doubly robust estimation method, MLE is used to obtain preliminary estimates of \eqn{\alpha}, \eqn{\beta} and \eqn{\gamma}. The estimate of \eqn{\alpha} is then updated by solving a doubly-robust estimating equation. (The estimate for \eqn{\beta} remains the MLE.)
+#'
+#' @return A list consisting of
+#'  \item{param}{the measure of association.}
+#'
+#' \item{point_est}{ the point estimates.}
+#'
+#' \item{se_est}{the standard error estimates.}
+#'
+#' \item{cov}{estimate of the covariance matrix for the estimates.}
+#'
+#' \item{conf_lower}{ the lower limit of the 95\% (marginal) confidence interval.}
+#'
+#' \item{conf_upper}{ the upper limit of the 95\% (marginal) confidence interval.}
+#'
+#' \item{p_value}{the two sided p-value for testing zero coefficients.}
+#'
+#' \item{coefficients}{ the matrix summarizing key information: point estimate, 95\% confidence interval and p-value.}
+#'
+#' \item{param_est}{the fitted RR/RD/OR.}
+#'
+#' \item{va}{   the matrix of covariates for the target model.}
+#'
+#' \item{vb}{   the matrix of covariates for the nuisance model.}
+#'
+#' \item{converged}{  Did the maximization process converge? }
+#'
+#' @author Linbo Wang <linbo.wang@utoronto.ca>, Mark Clements <mark.clements@ki.se>, Thomas Richardson <thomasr@uw.edu>
+#'
+#' @references Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimation for the Relative Risk and Risk Difference." Journal of the American Statistical Association: Theory and Methods (2017).
+#'
+#' Eric J. Tchetgen Tchetgen, James M. Robins and Andrea Rotnitzky. "On doubly robust estimation in a semiparametric odds ratio model." Biometrika 97.1 (2010): 171-180.
+#'
+#' @seealso \code{get_prob_rd}, and \code{get_prob_rr} for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP);
+#'
+#' \code{predict_brm} for obtaining fitted probabilities from \code{brm} fits.
+#'
+#' @examples
+#' set.seed(0)
+#' n <- 100
+#' alpha_true <- c(0, -1)
+#' beta_true <- c(-0.5, 1)
+#' gamma_true <- c(0.1, -0.5)
+#' params_true <- list(
+#'   alpha_true = alpha_true, beta_true = beta_true,
+#'   gamma_true = gamma_true
+#' )
+#' v.1 <- rep(1, n) # intercept term
+#' v.2 <- runif(n, -2, 2)
+#' v <- cbind(v.1, v.2)
+#' pscore_true <- exp(v %*% gamma_true) / (1 + exp(v %*% gamma_true))
+#' p0p1.true <- get_prob_rr(v %*% alpha_true, v %*% beta_true)
+#' x <- rbinom(n, 1, pscore_true)
+#' pA_true <- p0p1.true[, 1]
+#' pA_true[x == 1] <- p0p1.true[x == 1, 2]
+#' y <- rbinom(n, 1, pA_true)
+#'
+#' fit_mle <- brm(y, x, v, v, "RR", "MLE", v, TRUE)
+#' fit_drw <- brm(y, x, v, v, "RR", "DR", v, TRUE)
+#' fit_dru <- brm(y, x, v, v, "RR", "DR", v, FALSE)
+#'
+#' fit_mle2 <- brm(y, x, ~v.2, ~v.2, "RR", "MLE", ~v.2, TRUE) # same as fit_mle
+#'
+#' @export
+
+
+brm <- function(
+  y, x, va, vb = NULL, param, est_method = "MLE", vc = NULL,
+  optimal = TRUE, weights = NULL, subset = NULL, max_step = NULL, thres = 1e-8,
+  alpha_start = NULL, beta_start = NULL, message = FALSE
+) {
+  # default param = 'RR'; est_method = 'MLE'; va = v; vb = v; vc = v;
+  # weights = NULL; subset = NULL; optimal = TRUE; max_step = NULL;
+  # thres = 1e-06; alpha_start = NULL; beta_start = NULL
+
+  if (is.null(vb)) {
+    vb <- va
+  }
+  if (is.null(vc)) {
+    vc <- va
+  }
+
+  if (class(va)[1] == "formula") va <- stats::model.matrix(va)
+  if (class(vb)[1] == "formula") vb <- stats::model.matrix(vb)
+  if (class(vc)[1] == "formula") vc <- stats::model.matrix(vc)
+
+  if (is.vector(va)) va <- as.matrix(va, ncol = 1)
+  if (is.vector(vb)) vb <- as.matrix(vb, ncol = 1)
+  if (is.vector(vc)) vc <- as.matrix(vc, ncol = 1)
+
+  if (is.null(weights)) {
+    weights <- rep(1, length(y))
+  }
+  if (is.null(subset)) {
+    subset <- 1:length(y)
+  }
+
+  valid_check(
+    param, y, x, va, vb, vc, weights, subset, est_method, optimal,
+    max_step, thres, alpha_start, beta_start
+  )
+
+  data <- cbind(y, x, va, vb, vc, weights)[subset, ]
+  subset <- subset[rowSums(is.na(data)) == 0]
+  y <- y[subset]
+  x <- x[subset]
+  va <- va[subset, , drop = FALSE]
+  vb <- vb[subset, , drop = FALSE]
+  vc <- vc[subset, , drop = FALSE]
+  weights <- weights[subset]
+
+  pa <- dim(va)[2]
+  pb <- dim(vb)[2]
+  if (is.null(max_step)) max_step <- min(pa * 20, 1000)
+
+  if (est_method == "MLE") {
+    sol <- mle_est(
+      param, y, x, va, vb, weights, max_step, thres, alpha_start,
+      beta_start, pa, pb
+    )
+  }
+  if (est_method == "DR") {
+    if (param == "OR") {
+      cat("No doubly robust estimation methods for OR (with propensity score models) are available. Please refer to Tchetgen Tchetgen et al. (2010) for an alternative doubly robust estimation method. \n")
+      return()
+    }
+    if (is.null(alpha_start) | is.null(beta_start)) {
+      sol <- mle_est(
+        param, y, x, va, vb, weights, max_step, thres,
+        alpha_start, beta_start, pa, pb
+      )
+      alpha_ml <- sol$point_est[1:pa]
+      beta_ml <- sol$point_est[(pa + 1):(pa + pb)]
+      beta_cov <- sol$cov[(pa + 1):(pa + pb), (pa + 1):(pa + pb)]
+      alpha_start <- alpha_ml
+    } else {
+      alpha_ml <- alpha_start
+      beta_ml <- beta_start
+      beta_cov <- matrix(NA, pb, pb)
+    }
+
+    gamma_fit <- stats::glm(x ~ vc - 1, weight = weights, family = "binomial")
+    gamma <- gamma_fit$coefficients
+    gamma_cov <- summary(gamma_fit)$cov.unscaled
+    sol <- dr_est(
+      param, y, x, va, vb, vc, alpha_ml, beta_ml, gamma, optimal,
+      weights, max_step, thres, alpha_start, beta_cov, gamma_cov, message
+    )
+  }
+
+  return(sol)
+}
diff --git a/R/mle_est.R b/R/mle_est.R
new file mode 100644
index 0000000..8b538fe
--- /dev/null
+++ b/R/mle_est.R
@@ -0,0 +1,50 @@
+mle_est <- function(
+  param, y, x, va, vb, weights, max_step, thres, alpha_start,
+  beta_start, pa, pb
+) {
+  ## starting values for parameter optimization
+  if (is.null(alpha_start)) {
+    alpha_start <- rep(0, pa)
+  }
+  if (is.null(beta_start)) {
+    beta_start <- rep(0, pb)
+  }
+
+  if (param == "OR") {
+    fit <- stats::glm(y ~ vb - 1 + x * va - va - x,
+      family = "binomial",
+      weights = weights, start = c(beta_start, alpha_start)
+    )
+
+    point_temp <- summary(fit)$coefficients[, 1]
+    index <- c((pb + 1):(pa + pb), 1:pb)
+    point_est <- point_temp[index]
+
+    cov <- stats::vcov(fit)[index, index]
+
+    converged <- fit$converged
+  } else {
+    ### point estimate
+    mle <- mle_point(
+      param, y, x, va, vb, alpha_start, beta_start,
+      weights, max_step, thres, pa, pb
+    )
+    point_est <- mle$par
+    converged <- mle$convergence
+    # print(point_est)
+    alpha_ml <- point_est[1:pa]
+    beta_ml <- point_est[(pa + 1):(pa + pb)]
+
+    ### Computing Fisher Information:
+    if (param == "RR") {
+      cov <- mle_var_rr(x, alpha_ml, beta_ml, va, vb, weights)
+    }
+    if (param == "RD") {
+      cov <- mle_var_rd(x, alpha_ml, beta_ml, va, vb, weights)
+    }
+  }
+
+  name <- paste(c(rep("alpha", pa), rep("beta", pb)), c(1:pa, 1:pb))
+  sol <- wrap_results(point_est, cov, param, name, va, vb, converged)
+  return(sol)
+}
diff --git a/R/mle_point.R b/R/mle_point.R
new file mode 100644
index 0000000..cc28893
--- /dev/null
+++ b/R/mle_point.R
@@ -0,0 +1,67 @@
+mle_point <- function(param, y, x, va, vb, alpha_start, beta_start, weights,
+                      max_step, thres, pa, pb) {
+  startpars <- c(alpha_start, beta_start)
+
+  getProb <- if (param == "RR") get_prob_rr else get_prob_rd
+
+  ## negative log likelihood function
+  neg_log_likelihood <- function(pars) {
+    alpha <- pars[1:pa]
+    beta <- pars[(pa + 1):(pa + pb)]
+    p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weights[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weights[x == 0]) - sum((1 - y[x ==
+      1]) * log(1 - p1[x == 1]) * weights[x == 1] + (y[x == 1]) * log(p1[x ==
+      1]) * weights[x == 1]))
+  }
+
+  neg_log_likelihood_alpha <- function(alpha) {
+    p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weights[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weights[x == 0]) -
+      sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weights[x == 1] +
+        (y[x == 1]) * log(p1[x == 1]) * weights[x == 1]))
+  }
+
+  neg_log_likelihood_beta <- function(beta) {
+    p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weights[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weights[x == 0]) -
+      sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weights[x == 1] +
+        (y[x == 1]) * log(p1[x == 1]) * weights[x == 1]))
+  }
+
+
+  ## Optimization
+
+  Diff <- function(x, y) sum((x - y)^2) / sum(x^2 + thres)
+  alpha <- alpha_start
+  beta <- beta_start
+  diff <- thres + 1
+  step <- 0
+  while (diff > thres & step < max_step) {
+    step <- step + 1
+    opt1 <- stats::optim(alpha, neg_log_likelihood_alpha, control = list(maxit = max(100, max_step / 10)))
+    diff1 <- Diff(opt1$par, alpha)
+    alpha <- opt1$par
+    opt2 <- stats::optim(beta, neg_log_likelihood_beta, control = list(maxit = max(100, max_step / 10)))
+    diff <- max(diff1, Diff(opt2$par, beta))
+    beta <- opt2$par
+  }
+
+  opt <- list(
+    par = c(alpha, beta), convergence = (step < max_step),
+    value = neg_log_likelihood(c(alpha, beta)), step = step
+  )
+
+  return(opt)
+}
diff --git a/R/mle_var.R b/R/mle_var.R
new file mode 100644
index 0000000..f51e300
--- /dev/null
+++ b/R/mle_var.R
@@ -0,0 +1,46 @@
+### variance calculation
+
+mle_var_rr <- function(x, alpha_ml, beta_ml, va, vb, weights) {
+  p0p1 <- get_prob_rr(va %*% alpha_ml, vb %*% beta_ml)
+  n <- dim(va)[1]
+  pA <- rep(NA, n) # P(Y=1|A,V); here A = X
+  pA[x == 0] <- p0p1[x == 0, 1]
+  pA[x == 1] <- p0p1[x == 1, 2]
+
+  expect_dl_by_dpsi0_squared <- (pA) / (1 - pA)
+  dpsi0_by_dphi <- (1 - p0p1[, 1]) * (1 - p0p1[, 2]) / ((1 - p0p1[, 1]) + (1 -
+    p0p1[, 2]))
+  dpsi0_by_dtheta <- -(1 - p0p1[, 1]) / ((1 - p0p1[, 1]) + (1 - p0p1[, 2]))
+  tmp <- cbind((dpsi0_by_dtheta + x) * va, dpsi0_by_dphi * vb)
+  ## since dtheta_by_dalpha = va, and dphi_by_dbeta = vb
+  fisher_info <- (t(expect_dl_by_dpsi0_squared * weights * tmp) %*% tmp)
+  return(solve(fisher_info))
+}
+
+
+### variance calculation
+
+mle_var_rd <- function(x, alpha_ml, beta_ml, va, vb, weights) {
+  p0p1 <- get_prob_rd(va %*% alpha_ml, vb %*% beta_ml)
+  # p0p1 = cbind(p0, p1): n * 2 matrix
+  p0 <- p0p1[, 1]
+  p1 <- p0p1[, 2]
+  n <- nrow(va)
+  pA <- p0 # P(Y=1|A,V); here A = X
+  pA[x == 1] <- p1[x == 1]
+  s0 <- p0 * (1 - p0)
+  s1 <- p1 * (1 - p1)
+  sA <- pA * (1 - pA)
+
+  rho <- as_vector(tanh(va %*% alpha_ml)) # estimated risk differences
+
+  expect_dl_by_dpA_squared <- 1 / sA
+  dp0_by_dphi <- s0 * s1 / (s0 + s1)
+  dp0_by_drho <- -s0 / (s0 + s1)
+  drho_by_dalpha <- (1 - rho^2) * va
+  dphi_by_dbeta <- vb
+
+  tmp <- cbind((dp0_by_drho + x) * drho_by_dalpha, dp0_by_dphi * dphi_by_dbeta)
+  fisher_info <- (t(expect_dl_by_dpA_squared * weights * tmp) %*% tmp)
+  return(solve(fisher_info))
+}
diff --git a/R/predict.brm.R b/R/predict.brm.R
deleted file mode 100644
index e98bc0a..0000000
--- a/R/predict.brm.R
+++ /dev/null
@@ -1,71 +0,0 @@
-#'  Fitted probabilities from \code{brm} fits
-#' 
-#' @description  Calculate fitted probabilities from a fitted binary regression model object.
-#' 
-#' @param object    A fitted object from function \code{brm}.
-#' 
-#' @param va.new    An optional covariate matrix to make predictions with. If omitted, the original matrix va is used.
-#' 
-#' @param vb.new    An optional covariate matrix to make predictions with. If vb.new is omitted but va.new is not, then vb.new is set to be equal to va.new. If both vb.new and va.new are omitted, then the original matrix vb is used.
-#' 
-#' @param x.new     An optional vector of x.
-#' 
-#' @param ...    affecting the predictions produced.
-#' 
-#' @return If x.new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb).
-#' 
-#' If x.new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x.new,va,vb).
-#' 
-#' @export
-
-
-predict.brm = function(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...) {
-    
-    va = object$va
-    vb = object$vb
-    
-    if(is.null(vb.new)){
-        if(is.null(va.new)){
-            vb.new = vb
-        }else{
-            vb.new = va.new
-        }
-    }
-    if(is.null(va.new)) va.new = va
-    
-    n  = nrow(va.new)
-    pa = ncol(va.new)
-    pb = ncol(vb.new)
-    alpha.est = object$point.est[1:pa]
-    beta.est  = object$point.est[(pa+1):(pa+pb)]
-    
-    linear.predictors = cbind(va.new %*% alpha.est, vb.new %*% beta.est)
-    if(object$param=="RR") 
-        p0p1 = getProbRR(linear.predictors)
-    if(object$param=="RD") 
-        p0p1 = getProbRD(linear.predictors)
-    if(object$param=="OR"){
-        p0 = expit(linear.predictors[,2])
-        or = exp(linear.predictors[,1])
-        odds1 = or * (p0 / (1-p0))
-        p1 = odds1 / (1+odds1)
-        p0p1 = cbind(p0, p1)
-    }
-    colnames(p0p1) = c("p0", "p1")
-    
-    if(!is.null(x.new)){
-        px = rep(NA, n)     
-        px[x.new == 0] = p0p1[x.new == 0, 1]
-        px[x.new == 1] = p0p1[x.new == 1, 2]
-        return(px)
-    }else{
-        return(p0p1)
-    }
-    
-} 
-
-
-
-
-
-
diff --git a/R/predict_brm.R b/R/predict_brm.R
new file mode 100644
index 0000000..2773dd3
--- /dev/null
+++ b/R/predict_brm.R
@@ -0,0 +1,65 @@
+#'  Fitted probabilities from \code{brm} fits
+#'
+#' @description  Calculate fitted probabilities from a fitted binary regression model object.
+#'
+#' @param object    A fitted object from function \code{brm}.
+#'
+#' @param va_new    An optional covariate matrix to make predictions with. If omitted, the original matrix va is used.
+#'
+#' @param vb_new    An optional covariate matrix to make predictions with. If vb_new is omitted but va_new is not, then vb_new is set to be equal to va_new. If both vb_new and va_new are omitted, then the original matrix vb is used.
+#'
+#' @param x_new     An optional vector of x.
+#'
+#' @param ...    affecting the predictions produced.
+#'
+#' @return If x_new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb).
+#'
+#' If x_new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x_new,va,vb).
+#'
+#' @export
+
+
+predict_brm <- function(object, x_new = NULL, va_new = NULL, vb_new = NULL, ...) {
+  va <- object$va
+  vb <- object$vb
+
+  if (is.null(vb_new)) {
+    if (is.null(va_new)) {
+      vb_new <- vb
+    } else {
+      vb_new <- va_new
+    }
+  }
+  if (is.null(va_new)) va_new <- va
+
+  n <- nrow(va_new)
+  pa <- ncol(va_new)
+  pb <- ncol(vb_new)
+  alpha_est <- object$point_est[1:pa]
+  beta_est <- object$point_est[(pa + 1):(pa + pb)]
+
+  linear_predictors <- cbind(va_new %*% alpha_est, vb_new %*% beta_est)
+  if (object$param == "RR") {
+    p0p1 <- get_prob_rr(linear_predictors)
+  }
+  if (object$param == "RD") {
+    p0p1 <- get_prob_rd(linear_predictors)
+  }
+  if (object$param == "OR") {
+    p0 <- expit(linear_predictors[, 2])
+    or <- exp(linear_predictors[, 1])
+    odds1 <- or * (p0 / (1 - p0))
+    p1 <- odds1 / (1 + odds1)
+    p0p1 <- cbind(p0, p1)
+  }
+  colnames(p0p1) <- c("p0", "p1")
+
+  if (!is.null(x_new)) {
+    px <- rep(NA, n)
+    px[x_new == 0] <- p0p1[x_new == 0, 1]
+    px[x_new == 1] <- p0p1[x_new == 1, 2]
+    return(px)
+  } else {
+    return(p0p1)
+  }
+}
diff --git a/R/util.R b/R/util.R
new file mode 100644
index 0000000..dce7971
--- /dev/null
+++ b/R/util.R
@@ -0,0 +1,153 @@
+expit <- function(logodds) {
+  1 / (1 + exp(-logodds))
+}
+
+## Function for checking if two things are equal within numerical precision
+same <- function(x, y, tolerance = .Machine$double.eps^0.5) {
+  abs(x - y) < tolerance
+}
+
+
+## Functions for wrapping estimation results into a nice format
+wrap_results <- function(point_est, cov, param, name, va, vb, converged) {
+  se_est <- sqrt(diag(cov))
+
+  conf_lower <- point_est + stats::qnorm(0.025) * se_est
+  conf_upper <- point_est + stats::qnorm(0.975) * se_est
+  p_temp <- stats::pnorm(point_est / se_est, 0, 1)
+  p_value <- 2 * pmin(p_temp, 1 - p_temp)
+
+  names(point_est) <- names(se_est) <- rownames(cov) <- colnames(cov) <- names(conf_lower) <- names(conf_upper) <- names(p_value) <- name
+
+  coefficients <- cbind(point_est, se_est, conf_lower, conf_upper, p_value)
+
+  linear_predictors <- va %*% point_est[1:ncol(va)]
+  if (param == "RR") param_est <- exp(linear_predictors)
+  if (param == "RD") param_est <- linear_predictors
+  if (param == "OR") param_est <- expit(linear_predictors)
+
+  sol <- list(
+    param = param, point_est = point_est, se_est = se_est, cov = cov,
+    conf_lower = conf_lower, conf_upper = conf_upper, p_value = p_value,
+    coefficients = coefficients, param_est = param_est, va = va, vb = vb,
+    converged = converged
+  )
+  class(sol) <- c("brm", "list")
+  attr(sol, "hidden") <- c(
+    "param", "se_est", "cov", "conf_lower", "conf_upper",
+    "p_value", "coefficients", "param_est", "va", "vb", "converged"
+  )
+
+  return(sol)
+}
+
+
+## This function is useful for finding limits on the boundary
+## It gives 0.5*exp(x)*(-1+sqrt(1+4exp(-x)))
+## This is bounded between 0 and 1, and takes value (-1+sqrt(5))/2 at x=0 (Some relation to golden ratio)
+## Limits are 0 and 1 as x goes to -infty and +infty respectively
+## The function will never return NaN given a numerical input
+
+get_prb_aux <- function(x) {
+  ifelse((x < 17) & (x > (-500)),
+    0.5 * exp(x) * (-1 + (1 + 4 * exp(-x))^0.5),
+    ifelse(x < 0, 0, 1)
+  )
+}
+
+
+valid_check <- function(param, y, x, va, vb, vc, weights, subset, est_method,
+                        optimal, max_step, thres, alpha_start, beta_start) {
+  if (!is.character(param)) {
+    stop("Parameter must be a character")
+  }
+  if (!(param %in% c("RD", "RR", "OR"))) {
+    stop("Parameter can only take RR, RD or OR")
+  }
+
+  if (sum(is.na(y)) + sum(is.na(x)) + sum(is.na(va)) + sum(is.na(vb)) +
+    sum(is.na(vc)) + sum(is.na(weights)) > 0) {
+    warning("Observations with missing values will be removed.")
+  }
+  if (!(all(y %in% c(0, 1)))) {
+    stop("y values must be either 0 or 1.")
+  }
+  if (!(all(x %in% c(0, 1)))) {
+    stop("x values must be either 0 or 1.")
+  }
+  if (!identical(length(y), length(x), dim(va)[1], dim(vb)[1], dim(vc)[1])) {
+    stop("y, x and v must have the same length (dimension)")
+  }
+
+  if (!is.numeric(weights)) {
+    stop("weights must either be NULL or take numerical values")
+  }
+  if (!is.numeric(subset)) {
+    stop("subset must either be NULL or take numerical values")
+  }
+  if (!(est_method %in% c("MLE", "DR"))) {
+    stop("Must use MLE or DR for estimation")
+  }
+  if (!is.logical(optimal)) {
+    stop("optimal must be a logical variable")
+  }
+  if (!is.numeric(max_step) & !is.null(max_step)) {
+    stop("max_step must be a number")
+  }
+  if (!is.numeric(thres)) {
+    stop("thres must be a number")
+  }
+  if (!is.null(alpha_start) & length(alpha_start) != dim(va)[2]) {
+    stop("length of alpha_start must match the dimension of va")
+  }
+  if (!is.null(beta_start) & length(beta_start) != dim(vb)[2]) {
+    stop("length of beta_start must match the dimension of vb")
+  }
+}
+
+#' Ancillary function for printing
+#'
+#' @param x a list obtained with the function 'brm'
+#'
+#' @param ... additional arguments affecting the output
+#'
+#' @export
+
+print_brm <- function(x, ...) {
+  hid <- attr(x, "hidden")
+  nhid <- which(!names(x) %in% hid)
+
+  if (x$param == "RR") {
+    cat("Parameter of interest: (conditional) relative risk;", "\n", "nuisance parameter: odds product.",
+      "\n\n",
+      sep = ""
+    )
+    cat("Target model:   log(RR) = alpha * va", "\n")
+    cat("Nuisance model: log(OP) = beta * vb", "\n\n")
+  }
+  if (x$param == "RD") {
+    cat("Parameter of interest: (conditional) risk difference;", "\n",
+      "nuisance parameter: odds product.", "\n\n",
+      sep = ""
+    )
+    cat("Target model:   log(RD) = alpha * va", "\n")
+    cat("Nuisance model: log(OP) = beta * vb", "\n\n")
+  }
+  if (x$param == "OR") {
+    cat("Parameter of interest: (conditional) odds ratio;", "\n", "nuisance parameter: baseline risk.",
+      "\n\n",
+      sep = ""
+    )
+    cat("Target model:   log(OR) = alpha * va", "\n")
+    cat("Nuisance model: log(p0) = beta * vb", "\n\n")
+  }
+
+
+  for (i in nhid) {
+    x[[i]] <- round(x[[i]], 3)
+  }
+
+  print(x[nhid], 3)
+
+  cat("See the element '$coefficients' for more information.\n")
+}
diff --git a/R/var_rd_dr.R b/R/var_rd_dr.R
new file mode 100644
index 0000000..6f75d08
--- /dev/null
+++ b/R/var_rd_dr.R
@@ -0,0 +1,122 @@
+## Sandwich estimator for variance of RD
+
+var_rd_dr <- function(
+  y, x, va, vb, vc, alpha_dr, alpha_ml, beta_ml, gamma,
+  optimal, weights
+) {
+  ########################################
+
+  pscore <- as.vector(expit(vc %*% gamma))
+  n <- length(pscore)
+
+  ### 1. - E[dS/d(alpha_ml,beta_ml)] ############################## Computing
+  ### the Hessian:
+
+  Hrd <- hessian_2_rd(y, x, va, vb, alpha_ml, beta_ml, weights)
+  hessian <- Hrd$hessian
+  p0 <- Hrd$p0
+  p1 <- Hrd$p1
+  pA <- Hrd$pA
+  s0 <- Hrd$s0
+  s1 <- Hrd$s1
+  sA <- Hrd$sA
+  rho <- Hrd$rho
+  dl_by_dpA <- Hrd$dl_by_dpA
+  dp0_by_dphi <- Hrd$dp0_by_dphi
+  dp0_by_drho <- Hrd$dp0_by_drho
+  drho_by_dalpha <- Hrd$drho_by_dalpha
+  dphi_by_dbeta <- Hrd$dphi_by_dbeta
+  dpA_by_drho <- Hrd$dpA_by_drho
+  dpA_by_dalpha <- Hrd$dpA_by_dalpha
+  dpA_by_dphi <- Hrd$dpA_by_dphi
+  dpA_by_dbeta <- Hrd$dpA_by_dbeta
+
+
+  ############# extra building blocks ##########################
+
+  H_alpha <- y - x * as.vector(tanh(va %*% alpha_dr))
+
+  ############# Calculation of optimal vector (used in several places below) ##
+
+  if (optimal == TRUE) {
+    wt <- (1 - rho^2) / (pscore * s0 + (1 - pscore) * s1)
+  } else {
+    wt <- rep(1, n)
+  }
+
+
+  ### 2. -E[dU_by_dalphaml_betaml] ####################################
+
+  dU_by_dp0 <- -va * wt * (x - pscore) # n by 2
+  dp0_by_dalpha_beta <- cbind(drho_by_dalpha * dp0_by_drho, dphi_by_dbeta *
+    dp0_by_dphi) # n by 4
+
+  dU_by_dwt <- va * (x - pscore) * (H_alpha - p0) # n by 2
+
+  if (optimal == TRUE) {
+    esA <- pscore * s0 + (1 - pscore) * s1 # E[s_{1-A}]...
+    dwt_by_drho <- (-2 * rho * esA - (1 - rho^2) * (1 - pscore) * (1 -
+      2 * p1)) / esA^2
+    dwt_by_dp0 <- -(1 - rho^2) * (2 * pscore * rho + 1 - 2 * p1) / esA^2
+
+    dwt_by_dalpha <- drho_by_dalpha * (dwt_by_drho + dwt_by_dp0 * dp0_by_drho)
+    dwt_by_dbeta <- dphi_by_dbeta * dwt_by_dp0 * dp0_by_dphi
+
+    dwt_by_dalpha_beta <- cbind(dwt_by_dalpha, dwt_by_dbeta) # n by 4
+  } else {
+    dwt_by_dalpha_beta <- matrix(0, n, ncol(va) + ncol(vb))
+  }
+
+  dU_by_dalpha_ml_beta_ml <- t(dU_by_dp0 * weights) %*% (dp0_by_dalpha_beta) +
+    t(dU_by_dwt * weights) %*% dwt_by_dalpha_beta
+
+
+  ### 3. tau = -E[dU/dalpha_dr] ######################################## (This
+  ### is the bread of the sandwich estimate)
+
+  dU_by_dH <- va * wt * (x - pscore) # n by 2
+  rho_dr <- as.vector(tanh(va %*% alpha_dr))
+  dH_by_dalpha_dr <- -va * x * (1 - rho_dr^2)
+
+  tau <- -t(dU_by_dH * weights) %*% dH_by_dalpha_dr / sum(weights) # 2 by 2
+
+
+  ### 4. E[d(prop score score equation)/dgamma]
+
+  dpscore_by_dgamma <- vc * pscore * (1 - pscore) # n by 2
+  part4 <- -t(vc * weights) %*% dpscore_by_dgamma # 2 by 2
+
+
+  ### 5. E[dU/dgamma]
+
+  dU_by_dpscore <- -va * wt * (H_alpha - p0) # n by 2
+
+  if (optimal == TRUE) {
+    dwt_by_dpscore <- -(1 - rho^2) * (s0 - s1) / esA^2
+    dwt_by_dgamma <- dpscore_by_dgamma * dwt_by_dpscore # n by 2
+  } else {
+    dwt_by_dgamma <- matrix(0, n, ncol(vc))
+  }
+
+  dU_by_dgamma <- t(dU_by_dpscore * weights) %*% dpscore_by_dgamma + t(dU_by_dwt *
+    weights) %*% dwt_by_dgamma # 2 by 2
+
+
+  ############################################################################# Assembling semi-parametric variance matrix
+
+  U <- va * wt * (x - pscore) * (H_alpha - p0) # n by 2
+
+  S <- cbind(dpA_by_dalpha, dpA_by_dbeta) * dl_by_dpA
+
+  pscore_score <- vc * (x - pscore)
+
+  Utilde <- U - t(dU_by_dalpha_ml_beta_ml %*% (-solve(hessian)) %*% t(S)) -
+    t(dU_by_dgamma %*% (solve(part4)) %*% t(pscore_score)) # n by 2
+  USigma <- t(Utilde * weights) %*% Utilde / sum(weights)
+
+
+  ################################### Asymptotic var matrix for alpha_dr
+
+  alpha_dr_variance <- solve(tau) %*% USigma %*% solve(tau) / sum(weights)
+  return(alpha_dr_variance)
+}
diff --git a/R/var_rr_dr.R b/R/var_rr_dr.R
new file mode 100644
index 0000000..feac259
--- /dev/null
+++ b/R/var_rr_dr.R
@@ -0,0 +1,113 @@
+## Sandwich estimator for variance of RR
+
+var_rr_dr <- function(
+  y, x, va, vb, vc, alpha_dr, alpha_ml, beta_ml, gamma,
+  optimal, weights
+) {
+  ########################################
+
+  pscore <- as.vector(expit(vc %*% gamma))
+  n <- length(pscore)
+
+  ### 1. - E[dS/d(alpha_ml,beta_ml)] ############################## Computing
+  ### the Hessian:
+
+  Hrr <- hessian_2_rr(y, x, va, vb, alpha_ml, beta_ml, weights)
+  hessian <- Hrr$hessian
+  p0 <- Hrr$p0
+  p1 <- Hrr$p1
+  pA <- Hrr$pA
+  dpsi0_by_dtheta <- Hrr$dpsi0_by_dtheta
+  dpsi0_by_dphi <- Hrr$dpsi0_by_dphi
+  dtheta_by_dalpha <- Hrr$dtheta_by_dalpha
+  dphi_by_dbeta <- Hrr$dphi_by_dbeta
+  dl_by_dpsi0 <- Hrr$dl_by_dpsi0
+
+  ############# extra building blocks ##########################
+
+  H_alpha <- y * exp(-x * (as.vector(va %*% alpha_dr)))
+
+  ############# Calculation of optimal vector (used in several places below) ##
+
+  if (optimal == TRUE) {
+    theta <- as.vector(va %*% alpha_ml) # avoid n by 1 matrix
+    dtheta_by_dalpha_beta <- cbind(va, matrix(0, n, length(beta_ml)))
+    wt <- 1 / (1 - p0 + (1 - pscore) * (exp(-theta) - 1))
+  } else {
+    wt <- rep(1, n)
+  }
+
+
+  ### 2. -E[dU_by_dalphaml_betaml] ####################################
+
+  dU_by_dp0 <- -va * wt * (x - pscore) # n by 2
+  dp0_by_dpsi0 <- p0
+  dpsi0_by_dalpha_beta <- cbind(dpsi0_by_dtheta * dtheta_by_dalpha, dpsi0_by_dphi *
+    dphi_by_dbeta) # n by 4
+  # 4 = 2 (alpha) + 2 (beta)
+  dp0_by_dalpha_beta <- dpsi0_by_dalpha_beta * dp0_by_dpsi0 # n by 4
+
+  dU_by_dwt <- va * (x - pscore) * (H_alpha - p0) # n by 2
+  dwt_by_dwti <- -wt^2 # n
+  # wti is short for wt_inv
+  dU_by_dwti <- dU_by_dwt * dwt_by_dwti # n by 2
+  if (optimal == TRUE) {
+    dwti_by_dalpha_beta <- -dp0_by_dalpha_beta - (1 - pscore) * exp(-theta) *
+      dtheta_by_dalpha_beta # n by 4
+  } else {
+    dwti_by_dalpha_beta <- matrix(0, n, ncol(va) + ncol(vb))
+  }
+
+  dU_by_dalpha_ml_beta_ml <- t(dU_by_dp0 * weights) %*% dp0_by_dalpha_beta +
+    t(dU_by_dwti * weights) %*% dwti_by_dalpha_beta
+
+
+  ### 3. tau = -E[dU/dalpha_dr] ######################################## (This
+  ### is the bread of the sandwich estimate)
+
+  dU_by_dH <- va * wt * (x - pscore) # n by 2
+  dH_by_dalpha_dr <- -va * x * H_alpha # n by 2
+
+  tau <- -t(dU_by_dH * weights) %*% dH_by_dalpha_dr / sum(weights) # 2 by 2
+
+
+  ### 4. E[d(prop score score equation)/dgamma]
+
+  dpscore_by_dgamma <- vc * pscore * (1 - pscore) # n by 2
+  part4 <- -t(vc * weights) %*% dpscore_by_dgamma # 2 by 2
+
+
+  ### 5. E[dU/dgamma]
+
+  dU_by_dpscore <- -va * wt * (H_alpha - p0) # n by 2
+
+  if (optimal == TRUE) {
+    dwti_by_dpscore <- 1 - exp(-theta) # n
+    dwti_by_dgamma <- dpscore_by_dgamma * dwti_by_dpscore # n by 2
+  } else {
+    dwti_by_dgamma <- matrix(0, n, ncol(vc))
+  }
+
+  dU_by_dgamma <- t(dU_by_dpscore * weights) %*% dpscore_by_dgamma + t(dU_by_dwti *
+    weights) %*% dwti_by_dgamma # 2 by 2
+
+
+  ############################################################################# Assembling semi-parametric variance matrix
+
+  U <- va * wt * (x - pscore) * (H_alpha - p0) # n by 2
+
+  S <- cbind(dl_by_dpsi0 * (dpsi0_by_dtheta + x) * dtheta_by_dalpha, dl_by_dpsi0 *
+    dpsi0_by_dphi * dphi_by_dbeta)
+  pscore_score <- vc * (x - pscore)
+
+  Utilde <- U - t(dU_by_dalpha_ml_beta_ml %*% (-solve(hessian)) %*% t(S)) -
+    t(dU_by_dgamma %*% (solve(part4)) %*% t(pscore_score)) # n by 2
+  USigma <- t(Utilde * weights) %*% Utilde / sum(weights)
+
+
+  ################################### Asymptotic var matrix for alpha_dr
+
+  alpha_dr_variance <- solve(tau) %*% USigma %*% solve(tau) / sum(weights)
+
+  return(alpha_dr_variance)
+}
diff --git a/man/brm-package.Rd b/man/brm-package.Rd
index 761054f..6f9265b 100644
--- a/man/brm-package.Rd
+++ b/man/brm-package.Rd
@@ -9,12 +9,12 @@ The function \code{brm} in this package provides an alternative to generalized l
 
 Unlike \code{glm}, which uses a single link function for the outcome, \code{brm} separates the nuisance model from the target model. This separation provides opportunities to choose nuisance models independently of the target model. To see why this is important, we may contrast it with the use of a GLM to model the log relative risk. In this setting one might use a Poisson regression (with interaction term) \eqn{log P(y = 1|x, va, vb) = \alpha * x * va + \beta * vb} (though such a model ignores the fact that \eqn{y} is binary); here \eqn{va} and \eqn{vb} are subsets of \eqn{v}. Such a Poisson model can be seen as a combination of two parts: a target model \eqn{log RR(va) = \alpha * va} and a nuisance model \eqn{log P(y = 1|x = 0, vb) = \beta *  vb}. However, this nuisance model is variation dependent of the target model so that predicted probabilities may go outside of \eqn{[0,1]}. Furthermore, one cannot solve this problem under a GLM framework as with a GLM, the target model and nuisance model are determined  \emph{simultaneously} through a link function.
 
-  More specifically, if the target model is a linear model on the conditional log Relative Risk (log RR) or ('logistically' transformed) conditional Risk Difference (atanh RD), \code{brm} fits a linear nuisance model for the conditional log Odds Product (log OP).  If the target model is a linear model on the conditional log Odds Ratio (log OR), \code{brm} fits a linear nuisance model on the conditional logit baseline risk, logit P(y = 1|x = 0, vb). Note in this case the target and nuisance models combine to form a simple logistic regression model (which is fitted using \code{glm}). 
-  
-   \code{brm} fits the three target models described above as they are simple and the parameter space is unconstrained. \code{brm} fits the nuisance models above as they are variation independent of the corresponding target model. This variation independence greatly facilitates parameter estimation and interpretation. 
-   
+  More specifically, if the target model is a linear model on the conditional log Relative Risk (log RR) or ('logistically' transformed) conditional Risk Difference (atanh RD), \code{brm} fits a linear nuisance model for the conditional log Odds Product (log OP).  If the target model is a linear model on the conditional log Odds Ratio (log OR), \code{brm} fits a linear nuisance model on the conditional logit baseline risk, logit P(y = 1|x = 0, vb). Note in this case the target and nuisance models combine to form a simple logistic regression model (which is fitted using \code{glm}).
+
+   \code{brm} fits the three target models described above as they are simple and the parameter space is unconstrained. \code{brm} fits the nuisance models above as they are variation independent of the corresponding target model. This variation independence greatly facilitates parameter estimation and interpretation.
+
    \code{brm} also provides doubly robust fitting as an option such that the estimates for \eqn{\alpha} are still consistent and asymptotically normal even when the nuisance model is misspecified, provided that we have a correctly specified logistic model for the exposure probability \eqn{P(x=1|v)}. Such doubly robust estimation is only possible for the Relative Risk and Risk Difference, but not the Odds Ratio.
-   
+
   See Richardson et al. (2017) for more details.
 }
 \references{
diff --git a/man/brm.Rd b/man/brm.Rd
index 6454307..db9e623 100644
--- a/man/brm.Rd
+++ b/man/brm.Rd
@@ -10,15 +10,15 @@ brm(
   va,
   vb = NULL,
   param,
-  est.method = "MLE",
+  est_method = "MLE",
   vc = NULL,
   optimal = TRUE,
   weights = NULL,
   subset = NULL,
-  max.step = NULL,
+  max_step = NULL,
   thres = 1e-08,
-  alpha.start = NULL,
-  beta.start = NULL,
+  alpha_start = NULL,
+  beta_start = NULL,
   message = FALSE
 )
 }
@@ -34,7 +34,7 @@ brm(
 \item{param}{The measure of association. Can take value 'RD' (risk difference),
 'RR' (relative risk) or 'OR' (odds ratio)}
 
-\item{est.method}{The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation).}
+\item{est_method}{The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation).}
 
 \item{vc}{The covariates matrix for the probability of exposure, often called the propensity score. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. By default we fit a logistic regression model for the propensity score. (If not specified, defaults to va.)}
 
@@ -44,35 +44,35 @@ brm(
 
 \item{subset}{An optional vector specifying a subset of observations to be used in the fitting process.}
 
-\item{max.step}{The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000.}
+\item{max_step}{The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000.}
 
 \item{thres}{Threshold for judging convergence. The default is 1e-6.}
 
-\item{alpha.start}{Starting values for the parameters in the target model.}
+\item{alpha_start}{Starting values for the parameters in the target model.}
 
-\item{beta.start}{Starting values for the parameters in the nuisance model.}
+\item{beta_start}{Starting values for the parameters in the nuisance model.}
 
 \item{message}{Show optimization details? Ignored if the estimation method is 'MLE'. The default is FALSE.}
 }
 \value{
 A list consisting of
- \item{param}{the measure of association.} 
+ \item{param}{the measure of association.}
 
-\item{point.est}{ the point estimates.}
+\item{point_est}{ the point estimates.}
 
-\item{se.est}{the standard error estimates.}
+\item{se_est}{the standard error estimates.}
 
 \item{cov}{estimate of the covariance matrix for the estimates.}
 
-\item{conf.lower}{ the lower limit of the 95\% (marginal) confidence interval.}
+\item{conf_lower}{ the lower limit of the 95\% (marginal) confidence interval.}
 
-\item{conf.upper}{ the upper limit of the 95\% (marginal) confidence interval.}
+\item{conf_upper}{ the upper limit of the 95\% (marginal) confidence interval.}
 
-\item{p.value}{the two sided p-value for testing zero coefficients.}
+\item{p_value}{the two sided p-value for testing zero coefficients.}
 
 \item{coefficients}{ the matrix summarizing key information: point estimate, 95\% confidence interval and p-value.}
 
-\item{param.est}{the fitted RR/RD/OR.}
+\item{param_est}{the fitted RR/RD/OR.}
 
 \item{va}{   the matrix of covariates for the target model.}
 
@@ -84,44 +84,46 @@ A list consisting of
 \code{brm} is used to estimate the association between two binary variables, and how that varies as a function of other covariates.
 }
 \details{
-\code{brm} contains two parts: the target model for the dependence measure (RR, RD or OR) and the nuisance model; the latter is required for maximum likelihood estimation. 
-If \code{param="RR"} then the target model is \eqn{log RR(va) = \alpha*va}. 
-If \code{param="RD"} then the target model is \eqn{atanh RD(va) = \alpha*va}. 
-If \code{param="OR"} then the target model is \eqn{log OR(va) = \alpha*va}. 
-For RR and RD, the nuisance model is for the log Odds Product: \eqn{log OP(vb) = \beta*vb}. 
+\code{brm} contains two parts: the target model for the dependence measure (RR, RD or OR) and the nuisance model; the latter is required for maximum likelihood estimation.
+If \code{param="RR"} then the target model is \eqn{log RR(va) = \alpha*va}.
+If \code{param="RD"} then the target model is \eqn{atanh RD(va) = \alpha*va}.
+If \code{param="OR"} then the target model is \eqn{log OR(va) = \alpha*va}.
+For RR and RD, the nuisance model is for the log Odds Product: \eqn{log OP(vb) = \beta*vb}.
 For OR, the nuisance model is for the baseline risk: \eqn{logit(P(y=1|x=0,vb)) = \beta*vb.}
-In each case the nuisance model is variation independent of the target model, which  ensures that the predicted probabilities lie in \eqn{[0,1]}. 
+In each case the nuisance model is variation independent of the target model, which  ensures that the predicted probabilities lie in \eqn{[0,1]}.
 See Richardson et al. (2016+) for more details.
 
-If \code{est.method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details.
+If \code{est_method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details.
 
-When estimating RR and RD, \code{est.method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency.
+When estimating RR and RD, \code{est_method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency.
 
 For the doubly robust estimation method, MLE is used to obtain preliminary estimates of \eqn{\alpha}, \eqn{\beta} and \eqn{\gamma}. The estimate of \eqn{\alpha} is then updated by solving a doubly-robust estimating equation. (The estimate for \eqn{\beta} remains the MLE.)
 }
 \examples{
 set.seed(0)
-n           = 100
-alpha.true  = c(0,-1)
-beta.true   = c(-0.5,1)
-gamma.true  = c(0.1,-0.5)
-params.true = list(alpha.true=alpha.true, beta.true=beta.true, 
-     gamma.true=gamma.true)
-v.1         = rep(1,n)       # intercept term
-v.2         = runif(n,-2,2) 
-v           = cbind(v.1,v.2)
-pscore.true = exp(v \%*\% gamma.true) / (1+exp(v \%*\% gamma.true))
-p0p1.true   = getProbRR(v \%*\% alpha.true,v \%*\% beta.true)
-x           = rbinom(n, 1, pscore.true)  
-pA.true       = p0p1.true[,1]
-pA.true[x==1] = p0p1.true[x==1,2]
-y = rbinom(n, 1, pA.true)
-
-fit.mle = brm(y,x,v,v,'RR','MLE',v,TRUE)
-fit.drw = brm(y,x,v,v,'RR','DR',v,TRUE)
-fit.dru = brm(y,x,v,v,'RR','DR',v,FALSE)
-
-fit.mle2 = brm(y,x,~v.2, ~v.2, 'RR','MLE', ~v.2,TRUE) # same as fit.mle
+n <- 100
+alpha_true <- c(0, -1)
+beta_true <- c(-0.5, 1)
+gamma_true <- c(0.1, -0.5)
+params_true <- list(
+  alpha_true = alpha_true, beta_true = beta_true,
+  gamma_true = gamma_true
+)
+v.1 <- rep(1, n) # intercept term
+v.2 <- runif(n, -2, 2)
+v <- cbind(v.1, v.2)
+pscore_true <- exp(v \%*\% gamma_true) / (1 + exp(v \%*\% gamma_true))
+p0p1.true <- get_prob_rr(v \%*\% alpha_true, v \%*\% beta_true)
+x <- rbinom(n, 1, pscore_true)
+pA_true <- p0p1.true[, 1]
+pA_true[x == 1] <- p0p1.true[x == 1, 2]
+y <- rbinom(n, 1, pA_true)
+
+fit_mle <- brm(y, x, v, v, "RR", "MLE", v, TRUE)
+fit_drw <- brm(y, x, v, v, "RR", "DR", v, TRUE)
+fit_dru <- brm(y, x, v, v, "RR", "DR", v, FALSE)
+
+fit_mle2 <- brm(y, x, ~v.2, ~v.2, "RR", "MLE", ~v.2, TRUE) # same as fit_mle
 
 }
 \references{
@@ -130,9 +132,9 @@ Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimatio
 Eric J. Tchetgen Tchetgen, James M. Robins and Andrea Rotnitzky. "On doubly robust estimation in a semiparametric odds ratio model." Biometrika 97.1 (2010): 171-180.
 }
 \seealso{
-\code{getProbScalarRD}, \code{getProbRD} (vectorised), \code{getProbScalarRR} and \code{getProbRR} (vectorised) for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP);
+\code{get_prob_rd}, and \code{get_prob_rr} for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP);
 
-\code{predict.blm} for obtaining fitted probabilities from \code{brm} fits.
+\code{predict_brm} for obtaining fitted probabilities from \code{brm} fits.
 }
 \author{
 Linbo Wang <linbo.wang@utoronto.ca>, Mark Clements <mark.clements@ki.se>, Thomas Richardson <thomasr@uw.edu>
diff --git a/man/getProbRR.Rd b/man/getProbRR.Rd
deleted file mode 100644
index 838daee..0000000
--- a/man/getProbRR.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/getProbScalarRR.R
-\name{getProbRR}
-\alias{getProbRR}
-\title{Calculate risks from log RR and log OP (vectorised)}
-\usage{
-getProbRR(logrr, logop = NA)
-}
-\arguments{
-\item{logrr}{log of relative risk}
-
-\item{logop}{log of odds product}
-}
-\value{
-a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
-}
-\description{
-Calculate risks from log RR and log OP (vectorised)
-}
-\details{
-The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
-}
-\examples{
-getProbRR(0,0)
-
-set.seed(0)
-logrr = rnorm(10,0,1)
-logop = rnorm(10,0,1)
-probs = getProbRR(logrr, logop)
-colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-probs
-
-}
diff --git a/man/getProbScalarRD.Rd b/man/getProbScalarRD.Rd
deleted file mode 100644
index d997cb6..0000000
--- a/man/getProbScalarRD.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/getProbScalarRD.R
-\name{getProbScalarRD}
-\alias{getProbScalarRD}
-\title{Calculate risks from arctanh RD and log OP}
-\usage{
-getProbScalarRD(atanhrd, logop)
-}
-\arguments{
-\item{atanhrd}{arctanh of risk difference}
-
-\item{logop}{log of odds product}
-}
-\value{
-a vector \eqn{(P(y=1|x=0),P(y=1|x=1))}
-}
-\description{
-Calculate risks from arctanh RD and log OP
-}
-\details{
-The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}.
-}
-\examples{
-getProbScalarRD(0,0)
-
-set.seed(0)
-logrr = rnorm(10,0,1)
-logop = rnorm(10,0,1)
-probs = mapply(getProbScalarRD, logrr, logop)
-rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-probs
-
-}
diff --git a/man/getProbRD.Rd b/man/get_prob_rd.Rd
similarity index 65%
rename from man/getProbRD.Rd
rename to man/get_prob_rd.Rd
index 14e7292..fb193b8 100644
--- a/man/getProbRD.Rd
+++ b/man/get_prob_rd.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/getProbScalarRD.R
-\name{getProbRD}
-\alias{getProbRD}
+% Please edit documentation in R/get_prob_rd.R
+\name{get_prob_rd}
+\alias{get_prob_rd}
 \title{Calculate risks from arctanh RD and log OP (vectorised)}
 \usage{
-getProbRD(atanhrd, logop)
+get_prob_rd(atanhrd, logop)
 }
 \arguments{
 \item{atanhrd}{arctanh of risk difference}
@@ -18,17 +18,17 @@ a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
 Calculate risks from arctanh RD and log OP (vectorised)
 }
 \details{
-The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
+The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
 The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}.
 }
 \examples{
-getProbRD(0,0)
+get_prob_rd(0, 0)
 
 set.seed(0)
-logrr = rnorm(10,0,1)
-logop = rnorm(10,0,1)
-probs = getProbRD(logrr, logop)
-colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
+logrr <- rnorm(10, 0, 1)
+logop <- rnorm(10, 0, 1)
+probs <- get_prob_rd(logrr, logop)
+colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)")
 probs
 
 }
diff --git a/man/getProbScalarRR.Rd b/man/get_prob_rr.Rd
similarity index 54%
rename from man/getProbScalarRR.Rd
rename to man/get_prob_rr.Rd
index be6e84f..29f729a 100644
--- a/man/getProbScalarRR.Rd
+++ b/man/get_prob_rr.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/getProbScalarRR.R
-\name{getProbScalarRR}
-\alias{getProbScalarRR}
+% Please edit documentation in R/get_prob_rr.R
+\name{get_prob_rr}
+\alias{get_prob_rr}
 \title{Calculate risks from log RR and log OP}
 \usage{
-getProbScalarRR(logrr, logop = NA)
+get_prob_rr(logrr, logop = NA)
 }
 \arguments{
 \item{logrr}{log of relative risk}
@@ -12,7 +12,7 @@ getProbScalarRR(logrr, logop = NA)
 \item{logop}{log of odds product}
 }
 \value{
-a vector \eqn{(P(y=1|x=0),P(y=1|x=1))}
+a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
 }
 \description{
 Calculate risks from log RR and log OP
@@ -21,13 +21,13 @@ Calculate risks from log RR and log OP
 The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
 }
 \examples{
-getProbScalarRR(0,0)
+get_prob_rr(0, 0)
 
 set.seed(0)
-logrr = rnorm(10,0,1)
-logop = rnorm(10,0,1)
-probs = mapply(getProbScalarRR, logrr, logop)
-rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
+logrr <- rnorm(10, 0, 1)
+logop <- rnorm(10, 0, 1)
+probs <- get_prob_rr(logrr, logop)
+colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)")
 probs
 
 }
diff --git a/man/predict.brm.Rd b/man/predict.brm.Rd
deleted file mode 100644
index a83cdaa..0000000
--- a/man/predict.brm.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/predict.brm.R
-\name{predict.brm}
-\alias{predict.brm}
-\title{Fitted probabilities from \code{brm} fits}
-\usage{
-\method{predict}{brm}(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...)
-}
-\arguments{
-\item{object}{A fitted object from function \code{brm}.}
-
-\item{x.new}{An optional vector of x.}
-
-\item{va.new}{An optional covariate matrix to make predictions with. If omitted, the original matrix va is used.}
-
-\item{vb.new}{An optional covariate matrix to make predictions with. If vb.new is omitted but va.new is not, then vb.new is set to be equal to va.new. If both vb.new and va.new are omitted, then the original matrix vb is used.}
-
-\item{...}{affecting the predictions produced.}
-}
-\value{
-If x.new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb).
-
-If x.new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x.new,va,vb).
-}
-\description{
-Calculate fitted probabilities from a fitted binary regression model object.
-}
diff --git a/man/predict_brm.Rd b/man/predict_brm.Rd
new file mode 100644
index 0000000..04b70d6
--- /dev/null
+++ b/man/predict_brm.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/predict_brm.R
+\name{predict_brm}
+\alias{predict_brm}
+\title{Fitted probabilities from \code{brm} fits}
+\usage{
+predict_brm(object, x_new = NULL, va_new = NULL, vb_new = NULL, ...)
+}
+\arguments{
+\item{object}{A fitted object from function \code{brm}.}
+
+\item{x_new}{An optional vector of x.}
+
+\item{va_new}{An optional covariate matrix to make predictions with. If omitted, the original matrix va is used.}
+
+\item{vb_new}{An optional covariate matrix to make predictions with. If vb_new is omitted but va_new is not, then vb_new is set to be equal to va_new. If both vb_new and va_new are omitted, then the original matrix vb is used.}
+
+\item{...}{affecting the predictions produced.}
+}
+\value{
+If x_new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb).
+
+If x_new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x_new,va,vb).
+}
+\description{
+Calculate fitted probabilities from a fitted binary regression model object.
+}
diff --git a/man/print.brm.Rd b/man/print_brm.Rd
similarity index 71%
rename from man/print.brm.Rd
rename to man/print_brm.Rd
index 2430c01..6c04d12 100644
--- a/man/print.brm.Rd
+++ b/man/print_brm.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/Print.R
-\name{print.brm}
-\alias{print.brm}
+% Please edit documentation in R/util.R
+\name{print_brm}
+\alias{print_brm}
 \title{Ancillary function for printing}
 \usage{
-\method{print}{brm}(x, ...)
+print_brm(x, ...)
 }
 \arguments{
 \item{x}{a list obtained with the function 'brm'}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
deleted file mode 100644
index fa7583d..0000000
--- a/src/RcppExports.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-// Generated by using Rcpp::compileAttributes() -> do not edit by hand
-// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
-
-#include <RcppArmadillo.h>
-#include <Rcpp.h>
-
-using namespace Rcpp;
-
-#ifdef RCPP_USE_GLOBAL_ROSTREAM
-Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
-Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
-#endif
-
-// mat_vec_mul
-arma::mat mat_vec_mul(const arma::mat& m, const arma::colvec& v);
-RcppExport SEXP _brm_mat_vec_mul(SEXP mSEXP, SEXP vSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const arma::mat& >::type m(mSEXP);
-    Rcpp::traits::input_parameter< const arma::colvec& >::type v(vSEXP);
-    rcpp_result_gen = Rcpp::wrap(mat_vec_mul(m, v));
-    return rcpp_result_gen;
-END_RCPP
-}
-
-static const R_CallMethodDef CallEntries[] = {
-    {"_brm_mat_vec_mul", (DL_FUNC) &_brm_mat_vec_mul, 2},
-    {NULL, NULL, 0}
-};
-
-RcppExport void R_init_brm(DllInfo *dll) {
-    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
-    R_useDynamicSymbols(dll, FALSE);
-}
diff --git a/tests/testthat/test-brm.R b/tests/testthat/test-brm.R
index 5d9ba31..451c572 100644
--- a/tests/testthat/test-brm.R
+++ b/tests/testthat/test-brm.R
@@ -1,37 +1,39 @@
 # sanity checks for now
 
 set.seed(0)
-n = 100
-alpha.true = c(0,-1)
-beta.true = c(-0.5, 1)
-gamma.true = c(0.1,-0.5)
-params.true = list(alpha.true = alpha.true,
-                   beta.true = beta.true,
-                   gamma.true = gamma.true)
-v.1 = rep(1, n) # intercept term
-v.2 = runif(n,-2, 2)
-v = cbind(v.1, v.2)
-pscore.true = exp(v %*% gamma.true) / (1 + exp(v %*% gamma.true))
-p0p1.true = getProbRR(v %*% alpha.true, v %*% beta.true)
-x = rbinom(n, 1, pscore.true)
-pA.true = p0p1.true[, 1]
-pA.true[x == 1] = p0p1.true[x == 1, 2]
-y = rbinom(n, 1, pA.true)
+n <- 100
+alpha_true <- c(0, -1)
+beta_true <- c(-0.5, 1)
+gamma_true <- c(0.1, -0.5)
+params_true <- list(
+  alpha_true = alpha_true,
+  beta_true = beta_true,
+  gamma_true = gamma_true
+)
+v_1 <- rep(1, n) # intercept term
+v_2 <- runif(n, -2, 2)
+v <- cbind(v_1, v_2)
+pscore_true <- exp(v %*% gamma_true) / (1 + exp(v %*% gamma_true))
+p0p1_true <- get_prob_rr(v %*% alpha_true, v %*% beta_true)
+x <- rbinom(n, 1, pscore_true)
+pA_true <- p0p1_true[, 1]
+pA_true[x == 1] <- p0p1_true[x == 1, 2]
+y <- rbinom(n, 1, pA_true)
 
 test_that("example runs correctly with MLE", {
-  r = brm(y, x, v, v, 'RR', 'MLE', v, TRUE)
-  expected_result = c(
+  r <- brm(y, x, v, v, "RR", "MLE", v, TRUE)
+  expected_result <- c(
     "alpha 1" = -0.78035527,
     "alpha 2" = -1.63433532,
     "beta 1" = -1.06854529,
     "beta 2" = 0.02441527
   )
-  expect_equal(r$point.est, expected_result)
+  expect_equal(r$point_est, expected_result)
 })
 
 test_that("example runs correctly with DR", {
-  r = brm(y, x, v, v, 'RR', 'DR', v, TRUE)
-  expected_result = c(
+  r <- brm(y, x, v, v, "RR", "DR", v, TRUE)
+  expected_result <- c(
     "alpha 1" = -0.73984903,
     "alpha 2" = -1.33843580,
     "beta 1" = -1.06854529,
@@ -39,5 +41,5 @@ test_that("example runs correctly with DR", {
     "gamma 1" = 0.16779440,
     "gamma 2" = -0.47170547
   )
-  expect_equal(r$point.est, expected_result)
+  expect_equal(r$point_est, expected_result)
 })
diff --git a/tests/testthat/test-get_probs.R b/tests/testthat/test-get_probs.R
new file mode 100644
index 0000000..14703ce
--- /dev/null
+++ b/tests/testthat/test-get_probs.R
@@ -0,0 +1,86 @@
+test_that("get_prob_rr", {
+  expect_true(exists("get_prob_rr")) # prevent "empty test" notification
+
+  test_that("works when logrr < -12", {
+    logrr <- c(-15, -15)
+    logop <- c(-15, -15)
+    p0 <- get_prb_aux(logop - logrr)
+    p1 <- 0
+    expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1))
+  })
+
+  test_that("works when logrr = 0, logop = 0", {
+    p0 <- c(0.5, 0.5)
+    p1 <- c(0.5, 0.5)
+    expect_equal(get_prob_rr(c(0, 0), c(0, 0)), cbind(p0, p1))
+  })
+
+  test_that("works when logrr > 12, 12 > logop > -12", {
+    logrr <- c(13, 13)
+    logop <- c(5, 5)
+    p0 <- c(0, 0)
+    p1 <- get_prb_aux(logop + logrr)
+    expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1))
+  })
+
+  test_that("works when logrr < 12, logop > 12", {
+    logrr <- c(11, 11)
+    logop <- c(13, 13)
+    p0 <- exp(-logrr)
+    p1 <- get_prb_aux(logop + logrr)
+    expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1))
+  })
+
+  test_that("works when logrr < -12, logop > 12", {
+    logrr <- c(-11, -11)
+    logop <- c(13, 13)
+    p0 <- c(1, 1)
+    p1 <- exp(logrr)
+    expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1))
+  })
+
+  test_that("works when 12 > logrr > -12, 12 > logop > -12", {
+    logrr <- c(5, 5)
+    logop <- c(5, 5)
+    p0 <- (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + 1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop)))) / (2 * exp(logrr) * (1 - exp(logop)))
+    p1 <- exp(logrr) * p0
+    expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1))
+  })
+})
+
+
+test_that("get_probs_rd", {
+  expect_true(exists("get_prob_rd")) # prevent "empty test" notification
+
+  test_that("works when logop > 350, atanhrd > 0", {
+    logop <- c(351, 351)
+    atanhrd <- c(1, 1)
+    p0 <- 1 - tanh(atanhrd)
+    p1 <- c(1, 1)
+    expect_equal(get_prob_rd(atanhrd, logop), cbind(p0, p1))
+  })
+
+  test_that("works when logop > 350, atanhrd < 0", {
+    logop <- c(351, 351)
+    atanhrd <- c(-1, -1)
+    p0 <- c(1, 1)
+    p1 <- 1 + tanh(atanhrd)
+    expect_equal(get_prob_rd(atanhrd, logop), cbind(p0, p1))
+  })
+
+  test_that("works when logop == 0", {
+    logop <- c(0, 0)
+    atanhrd <- runif(2, -50, 50)
+    p0 <- 0.5 * (1 - tanh(atanhrd))
+    p1 <- p0 + tanh(atanhrd)
+    expect_equal(get_prob_rd(atanhrd, logop), cbind(p0, p1))
+  })
+
+  test_that("works when logop != 0 & logop < 350", {
+    logop <- c(349, 349)
+    atanhrd <- runif(2, -50, 50)
+    p0 <- (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * (1 - tanh(atanhrd)) * (1 - exp(logop)))) / (2 * (exp(logop) - 1))
+    p1 <- p0 + tanh(atanhrd)
+    expect_equal(get_prob_rd(atanhrd, logop), cbind(p0, p1))
+  })
+})
diff --git a/tests/testthat/test-mat_vec_mul.R b/tests/testthat/test-mat_vec_mul.R
index 73ee871..106accd 100644
--- a/tests/testthat/test-mat_vec_mul.R
+++ b/tests/testthat/test-mat_vec_mul.R
@@ -1,8 +1,8 @@
 test_that("mat_vec_mul works", {
-  m <- matrix(c(1,2,3,1,2,3), 2, 3)
-  v <- c(1,2,3)
-  r <- c(13,13)
-  dim(r) <- c(2,1)
-  
-  expect_equal(r, mat_vec_mul(m,v))
+  m <- matrix(c(1, 2, 3, 1, 2, 3), 2, 3)
+  v <- c(1, 2, 3)
+  r <- c(13, 13)
+  dim(r) <- c(2, 1)
+
+  expect_equal(r, mat_vec_mul(m, v))
 })
diff --git a/tests/testthat/test-util.R b/tests/testthat/test-util.R
new file mode 100644
index 0000000..1a0db71
--- /dev/null
+++ b/tests/testthat/test-util.R
@@ -0,0 +1,21 @@
+set.seed(0)
+
+test_that("expit function works", {
+  p <- runif(0, 1)
+  l <- log(p) - log(1 - p)
+  i <- expit(l)
+  expect_equal(i, 1 / l)
+})
+
+test_that("same function works", {
+  expect_true(same(5, 5))
+  expect_false(same(5, 5 + .Machine$double.eps^0.5))
+  expect_true(same(-5, 5, 10.1))
+  expect_false(same(1, 2, .5))
+})
+
+test_that("get_prob_aux function works", {
+  expect_equal(get_prb_aux(-501), 0)
+  expect_equal(get_prb_aux(20), 1)
+  expect_equal(get_prb_aux(0), .5 * (sqrt(5) - 1))
+})