diff --git a/.Rbuildignore b/.Rbuildignore
index 1ec133a..da5ffe6 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -31,3 +31,6 @@
 ^libcuml/*
 ^\.github$
 ^\.lsan-suppressions\.txt$
+^\.positai$
+^\.claude$
+^\.codex$
diff --git a/.github/docker/Dockerfile b/.github/docker/Dockerfile
new file mode 100644
index 0000000..2dbb8ec
--- /dev/null
+++ b/.github/docker/Dockerfile
@@ -0,0 +1,53 @@
+FROM nvidia/cuda:11.2.2-devel-ubuntu20.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# System dependencies
+RUN apt-get update -y && apt-get install -y \
+    sudo software-properties-common dialog apt-utils \
+    tzdata locales curl wget git \
+    libcurl4-openssl-dev libssl-dev libxml2-dev \
+    libfontconfig1-dev libfreetype6-dev libpng-dev \
+    libharfbuzz-dev libfribidi-dev libtiff5-dev libjpeg-dev \
+    make gcc g++ pandoc python3 python3-pip
+
+# Install R via rig
+RUN curl -L https://rig.r-pkg.org/deb/rig.gpg -o /etc/apt/trusted.gpg.d/rig.gpg \
+    && echo "deb http://rig.r-pkg.org/deb rig main" > /etc/apt/sources.list.d/rig.list \
+    && apt-get update \
+    && apt-get install -y r-rig \
+    && rig add release \
+    && rig default release \
+    && rm -rf /var/lib/apt/lists/*
+
+# Use a fixed library path (not HOME-dependent) so packages are found
+# regardless of what HOME is set to at runtime (GitHub Actions sets HOME=/github/home)
+ENV R_LIBS_USER=/opt/R/library
+RUN mkdir -p /opt/R/library
+
+# Parallel compilation
+RUN echo "MAKEFLAGS=-j$(nproc)" >> "$(R RHOME)/etc/Renviron.site"
+
+# Copy source
+COPY . /build
+
+ARG CUML_VERSION=21.12
+ENV CUML_VERSION=${CUML_VERSION}
+
+# Cross-compile for T4 GPU (compute capability 7.5) since build runner has no GPU
+ARG CMAKE_CUDA_ARCHITECTURES=75
+ENV CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}
+
+ENV NOT_CRAN=true
+
+# Install R dependencies
+RUN Rscript -e "\
+    install.packages('pak', repos = 'https://r-lib.github.io/p/pak/devel/'); \
+    pak::local_install_deps('/build', dependencies = TRUE)" \
+    && rm -rf /tmp/* /root/.cache
+
+# Install cuda.ml with tests
+RUN R CMD INSTALL --install-tests /build
+
+# Clean up
+RUN rm -rf /tmp/* /build
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 89bcd05..257527e 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -1,5 +1,3 @@
-# Workflow derived from https://github.com/r-lib/actions/tree/master/examples
-# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
     branches: [main]
@@ -9,117 +7,99 @@ on:
 name: R-CMD-check
 
 jobs:
-  R-CMD-check:
-
+  check-cran:
     strategy:
       fail-fast: false
       matrix:
-        cuda: ['11.2.1']
-        cuml: ['21.08', '21.10', '21.12']
         r: ['release', 'devel']
-        asan: ['false', 'true']
-
-    runs-on: ['self-hosted', 'gpu']
-    container:
-      image: nvidia/cuda:${{ matrix.cuda }}-devel-ubuntu18.04
-      options: --gpus all
 
-    name: 'R: ${{ matrix.r }}, CUDA: ${{ matrix.cuda }}, CUML: ${{ matrix.cuml }}, ASAN: ${{ matrix.asan }}'
+    runs-on: ubuntu-latest
+    name: 'CRAN (R: ${{ matrix.r }})'
 
     env:
-      NOT_CRAN: true
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
       R_KEEP_PKG_SOURCE: yes
-      CUML_VERSION: ${{ matrix.cuml }}
-      CUML4R_ENABLE_ASAN: ${{ matrix.asan }}
-      DEBIAN_FRONTEND: 'noninteractive'
 
     steps:
-      - run: |
-          apt-get update -y
-          apt-get install -y sudo software-properties-common dialog apt-utils tzdata
-          if [[ $CUML4R_ENABLE_ASAN == 'true' ]]; then
-            apt-get install -y libasan5
-          fi
-        shell: bash
-
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
-      - uses: r-lib/actions/setup-pandoc@v1
+      - uses: r-lib/actions/setup-pandoc@v2
 
-      - uses: actions/setup-python@v2
-        with:
-          python-version: '3.x'
-          architecture: 'x64'
-
-      - uses: r-lib/actions/setup-r@master
+      - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.r }}
-          http-user-agent: ${{ matrix.config.http-user-agent }}
           use-public-rspm: true
 
-      - uses: r-lib/actions/setup-r-dependencies@v1
+      - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: rcmdcheck
+          needs: check
+
+      - name: Build
+        run: R CMD build .
+
+      - name: Check
+        run: R CMD check --no-manual --as-cran cuda.ml_*.tar.gz
+        env:
+          _R_CHECK_CRAN_INCOMING_: false
+
+  build-image:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    timeout-minutes: 120
+    outputs:
+      image: ghcr.io/${{ github.repository }}-ci:${{ github.sha }}
+    steps:
+      - uses: actions/checkout@v4
 
-      - name: Build {cuda.ml}
-        id: build-pkg
-        run: |
-          cd ..
-          ls -a
-          rm -v cuda.ml_*.tar.gz
-          R CMD build cuda.ml
-          ls -a
-          echo "::set-output name=pkg-dir::$(pwd)"
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: .github/docker/Dockerfile
+          push: true
+          tags: ghcr.io/${{ github.repository }}-ci:${{ github.sha }}
+          build-args: |
+            CUML_VERSION=21.12
+            CMAKE_CUDA_ARCHITECTURES=75
+
+  test-gpu:
+    needs: build-image
+    if: ${{ always() && needs.build-image.result == 'success' }}
+    concurrency:
+      group: gpu-tests
+    runs-on:
+      - "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64/spot=true"
+    container:
+      image: ${{ needs.build-image.outputs.image }}
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+      options: --gpus all --runtime=nvidia
+    timeout-minutes: 60
+    env:
+      NOT_CRAN: true
 
-      - run: cp -v cuda.ml/.lsan-suppressions.txt /tmp
-        working-directory: ${{ steps.build-pkg.outputs.pkg-dir }}
+    steps:
+      - name: Verify GPU access
+        run: nvidia-smi
 
-      - name: Check {cuda.ml} package
-        run: |
-          print(list.files("."))
-          pkg <- list.files(".", pattern = "cuda\\.ml_.*\\.tar\\.gz")
-          stopifnot(length(pkg) == 1)
-
-          reticulate::install_miniconda(force = TRUE)
-
-          rcmdcheck_env <- (
-            if (identical(Sys.getenv("CUML4R_ENABLE_ASAN"), "true")) {
-              c(
-                LD_PRELOAD = "/usr/lib/x86_64-linux-gnu/libasan.so.5",
-                ASAN_OPTIONS = "halt_on_error=0,new_delete_type_mismatch=0,alloc_dealloc_mismatch=0,protect_shadow_gap=0",
-                LSAN_OPTIONS = "suppressions=/tmp/.lsan-suppressions.txt"
-              )
-            } else {
-              character()
-            }
-          )
-          rcmdcheck::rcmdcheck(
-            path = pkg[[1]],
-            args = c("--no-manual", "--as-cran"),
-            check_dir="check",
-            env = rcmdcheck_env
-          )
-        shell: Rscript {0}
-        working-directory: ${{ steps.build-pkg.outputs.pkg-dir }}
-
-      - name: Show testthat output
-        if: ${{ always() }}
+      - name: Session info
         run: |
-          find check -name 'testthat.Rout*' -type f -exec cat '{}' \; || :
-        shell: bash
-        working-directory: ${{ steps.build-pkg.outputs.pkg-dir }}
+          Rscript -e "sessionInfo()"
+          Rscript -e "library(cuda.ml)"
 
-      - name: Check for sanitizer error(s)
-        if: ${{ always() }}
+      - name: Run tests
         run: |
-          ! find check -name 'testthat.Rout*' -type f -exec egrep -C 50 'ERROR: .*Sanitizer:' '{}' +
-        shell: bash
-        working-directory: ${{ steps.build-pkg.outputs.pkg-dir }}
-
-      - name: Upload check results
-        if: ${{ failure() }}
-        uses: actions/upload-artifact@main
-        with:
-          name: ${{ runner.os }}-r${{ matrix.r }}-results
-          path: ${{ steps.build-pkg.outputs.pkg-dir }}/check
+          Rscript -e "testthat::test_package('cuda.ml', reporter = 'progress')"
diff --git a/.gitignore b/.gitignore
index 6d3278f..1d71690 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,5 @@ cuda.ml.Rcheck
 *.cmake
 *.a
 00check.log
+.positai
+.codex
\ No newline at end of file
diff --git a/DESCRIPTION b/DESCRIPTION
index 7dea694..eb9abb5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -45,7 +45,7 @@ Suggests:
     xgboost
 LinkingTo: Rcpp
 Encoding: UTF-8
-RoxygenNote: 7.1.2
+RoxygenNote: 7.3.3
 OS_type: unix
 SystemRequirements: RAPIDS cuML (see https://rapids.ai/start.html)
 NeedsCompilation: yes
diff --git a/NAMESPACE b/NAMESPACE
index 33ff35f..5a72bf9 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,12 +1,24 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(cuda_ml_can_predict_class_probabilities,cuda_ml_fil)
+S3method(cuda_ml_can_predict_class_probabilities,cuda_ml_knn)
 S3method(cuda_ml_can_predict_class_probabilities,cuda_ml_model)
+S3method(cuda_ml_can_predict_class_probabilities,cuda_ml_rand_forest)
 S3method(cuda_ml_can_predict_class_probabilities,default)
 S3method(cuda_ml_elastic_net,data.frame)
 S3method(cuda_ml_elastic_net,default)
 S3method(cuda_ml_elastic_net,formula)
 S3method(cuda_ml_elastic_net,matrix)
 S3method(cuda_ml_elastic_net,recipe)
+S3method(cuda_ml_get_state,cuda_ml_model)
+S3method(cuda_ml_get_state,cuda_ml_pca)
+S3method(cuda_ml_get_state,cuda_ml_rand_forest)
+S3method(cuda_ml_get_state,cuda_ml_rand_proj_model)
+S3method(cuda_ml_get_state,cuda_ml_svc)
+S3method(cuda_ml_get_state,cuda_ml_svc_ovr)
+S3method(cuda_ml_get_state,cuda_ml_svr)
+S3method(cuda_ml_get_state,cuda_ml_umap)
+S3method(cuda_ml_get_state,default)
 S3method(cuda_ml_inverse_transform,cuda_ml_pca)
 S3method(cuda_ml_inverse_transform,cuda_ml_tsvd)
 S3method(cuda_ml_is_classifier,cuda_ml_model)
@@ -43,6 +55,15 @@ S3method(cuda_ml_ridge,matrix)
 S3method(cuda_ml_ridge,recipe)
 S3method(cuda_ml_serialize,cuda_ml_model)
 S3method(cuda_ml_serialize,default)
+S3method(cuda_ml_set_state,cuda_ml_model_state)
+S3method(cuda_ml_set_state,cuda_ml_pca_model_state)
+S3method(cuda_ml_set_state,cuda_ml_rand_forest_model_state)
+S3method(cuda_ml_set_state,cuda_ml_rand_proj_model_state)
+S3method(cuda_ml_set_state,cuda_ml_svc_model_state)
+S3method(cuda_ml_set_state,cuda_ml_svc_ovr_model_state)
+S3method(cuda_ml_set_state,cuda_ml_svr_model_state)
+S3method(cuda_ml_set_state,cuda_ml_umap_model_state)
+S3method(cuda_ml_set_state,default)
 S3method(cuda_ml_sgd,data.frame)
 S3method(cuda_ml_sgd,default)
 S3method(cuda_ml_sgd,formula)
diff --git a/R/agglomerative.R b/R/agglomerative.R
index a1d85df..80e7963 100644
--- a/R/agglomerative.R
+++ b/R/agglomerative.R
@@ -18,10 +18,10 @@ agglomerative_clustering_match_metric <- function(metric = c("euclidean", "l1",
 #' @template model-with-numeric-input
 #' @param n_clusters The number of clusters to find. Default: 2L.
 #' @param metric Metric used for linkage computation. Must be one of
-#'   {"euclidean", "l1", "l2", "manhattan", "cosine"}. If connectivity is
+#'   \{"euclidean", "l1", "l2", "manhattan", "cosine"\}. If connectivity is
 #'   "knn" then only "euclidean" is accepted. Default: "euclidean".
 #' @param connectivity The type of connectivity matrix to compute. Must be one
-#'   of {"pairwise", "knn"}. Default: "pairwise".
+#'   of \{"pairwise", "knn"\}. Default: "pairwise".
 #'     - 'pairwise' will compute the entire fully-connected graph of pairwise
 #'        distances between each set of points. This is the fastest to compute
 #'        and can be very fast for smaller datasets but requires O(n^2) space.
diff --git a/R/cuml_utils.R b/R/cuml_utils.R
index bd5d431..abd8822 100644
--- a/R/cuml_utils.R
+++ b/R/cuml_utils.R
@@ -1,7 +1,7 @@
-#' Determine whether {cuda.ml} was linked to a valid version of the RAPIDS cuML
+#' Determine whether \{cuda.ml\} was linked to a valid version of the RAPIDS cuML
 #' shared library.
 #'
-#' @return A logical value indicating whether the current installation {cuda.ml}
+#' @return A logical value indicating whether the current installation \{cuda.ml\}
 #'   was linked to a valid version of the RAPIDS cuML shared library.
 #'
 #' @examples
@@ -17,11 +17,11 @@
 #' @export
 has_cuML <- .has_cuML
 
-#' Get the major version of the RAPIDS cuML shared library {cuda.ml} was linked
+#' Get the major version of the RAPIDS cuML shared library \{cuda.ml\} was linked
 #' to.
 #'
-#' @return The major version of the RAPIDS cuML shared library {cuda.ml} was
-#' linked to in a character vector, or \code{NA_character_} if {cuda.ml} was not
+#' @return The major version of the RAPIDS cuML shared library \{cuda.ml\} was
+#' linked to in a character vector, or \code{NA_character_} if \{cuda.ml\} was not
 #' linked to any version of RAPIDS cuML.
 #'
 #' @examples
@@ -32,11 +32,11 @@ has_cuML <- .has_cuML
 #' @export
 cuML_major_version <- .cuML_major_version
 
-#' Get the minor version of the RAPIDS cuML shared library {cuda.ml} was linked
+#' Get the minor version of the RAPIDS cuML shared library \{cuda.ml\} was linked
 #' to.
 #'
-#' @return The minor version of the RAPIDS cuML shared library {cuda.ml} was
-#' linked to in a character vector, or \code{NA_character_} if {cuda.ml} was not
+#' @return The minor version of the RAPIDS cuML shared library \{cuda.ml\} was
+#' linked to in a character vector, or \code{NA_character_} if \{cuda.ml\} was not
 #' linked to any version of RAPIDS cuML.
 #'
 #' @examples
diff --git a/R/fil.R b/R/fil.R
index da25aa7..c8b54bf 100644
--- a/R/fil.R
+++ b/R/fil.R
@@ -1,11 +1,11 @@
 #' Determine whether Forest Inference Library (FIL) functionalities are enabled
-#' in the current installation of {cuda.ml}.
+#' in the current installation of \{cuda.ml\}.
 #'
 #' CuML Forest Inference Library (FIL) functionalities (see
 #' https://github.com/rapidsai/cuml/tree/main/python/cuml/fil#readme) will
 #' require Treelite C API. If you need FIL to run tree-based model ensemble on
 #' GPU, and \code{fil_enabled()} returns FALSE, then please consider installing
-#' Treelite and then re-installing {cuda.ml}.
+#' Treelite and then re-installing \{cuda.ml\}.
 #'
 #' @return A logical value indicating whether the Forest Inference Library (FIL)
 #'   functionalities are enabled.
@@ -62,9 +62,9 @@ file_match_storage_type <- function(storage_type = c("auto", "dense", "sparse"))
 #'
 #' @param filename Path to the saved model file.
 #' @param mode Type of task to be performed by the model. Must be one of
-#'   {"classification", "regression"}.
+#'   \{"classification", "regression"\}.
 #' @param model_type Format of the saved model file. Notice if \code{filename}
-#'   ends with ".json" and \code{model_type} is "xgboost", then {cuda.ml} will
+#'   ends with ".json" and \code{model_type} is "xgboost", then \{cuda.ml\} will
 #'   assume the model file is in XGBoost JSON (instead of binary) format.
 #'   Default: "xgboost".
 #' @param algo Type of the algorithm for inference, must be one of the
diff --git a/R/knn.R b/R/knn.R
index 0eda43d..4fc8ff6 100644
--- a/R/knn.R
+++ b/R/knn.R
@@ -111,7 +111,7 @@ cuda_ml_knn_algo_ivfsq <- function(nlist, nprobe,
 #' @template supervised-model-output
 #' @template ellipsis-unused
 #' @param algo The query algorithm to use. Must be one of
-#'   {"brute", "ivfflat", "ivfpq", "ivfsq"} or a KNN algorithm specification
+#'   \{"brute", "ivfflat", "ivfpq", "ivfsq"\} or a KNN algorithm specification
 #'   constructed using the \code{cuda_ml_knn_algo_*} family of functions.
 #'   If the algorithm is specified by one of the \code{cuda_ml_knn_algo_*}
 #'   functions, then values of all required parameters of the algorithm will
@@ -132,10 +132,10 @@ cuda_ml_knn_algo_ivfsq <- function(nlist, nprobe,
 #'                faster distances calculations).
 #'
 #'   Default: "brute".
-#' @param metric Distance metric to use. Must be one of {"euclidean", "l2",
+#' @param metric Distance metric to use. Must be one of \{"euclidean", "l2",
 #'   "l1", "cityblock", "taxicab", "manhattan", "braycurtis", "canberra",
 #'   "minkowski", "lp", "chebyshev", "linf", "jensenshannon", "cosine",
-#'   "correlation"}.
+#'   "correlation"\}.
 #'   Default: "euclidean".
 #' @param p Parameter for the Minkowski metric. If p = 1, then the metric is
 #'   equivalent to manhattan distance (l1). If p = 2, the metric is equivalent
diff --git a/R/logistic_reg.R b/R/logistic_reg.R
index 1c6e7d4..43b3529 100644
--- a/R/logistic_reg.R
+++ b/R/logistic_reg.R
@@ -97,7 +97,7 @@ logistic_reg_build_sample_weight <- function(sample_weight,
 #' @template ellipsis-unused
 #' @template fit-intercept
 #' @param penalty The penalty type, must be one of
-#'   {"none", "l1", "l2", "elasticnet"}.
+#'   \{"none", "l1", "l2", "elasticnet"\}.
 #'   If "none" or "l2" is selected, then L-BFGS solver will be used.
 #'   If "l1" is selected, solver OWL-QN will be used.
 #'   If "elasticnet" is selected, OWL-QN will be used if l1_ratio > 0, otherwise
diff --git a/R/model.R b/R/model.R
index 584ef06..a5edeb5 100644
--- a/R/model.R
+++ b/R/model.R
@@ -138,11 +138,20 @@ cuda_ml_can_predict_class_probabilities.cuda_ml_model <- function(model) {
   FALSE
 }
 
-cuda_ml_can_predict_class_probabilities.cuda_ml_fil <- cuda_ml_is_classifier
+#' @export
+cuda_ml_can_predict_class_probabilities.cuda_ml_fil <- function(model) {
+  cuda_ml_is_classifier(model)
+}
 
-cuda_ml_can_predict_class_probabilities.cuda_ml_knn <- cuda_ml_is_classifier
+#' @export
+cuda_ml_can_predict_class_probabilities.cuda_ml_knn <- function(model) {
+  cuda_ml_is_classifier(model)
+}
 
-cuda_ml_can_predict_class_probabilities.cuda_ml_rand_forest <- cuda_ml_is_classifier
+#' @export
+cuda_ml_can_predict_class_probabilities.cuda_ml_rand_forest <- function(model) {
+  cuda_ml_is_classifier(model)
+}
 
 #' Serialize a CuML model
 #'
@@ -184,6 +193,7 @@ cuda_ml_get_state <- function(model) {
   UseMethod("cuda_ml_get_state")
 }
 
+#' @export
 cuda_ml_get_state.default <- function(model) {
   stop(
     "Model of type '", paste(class(model), collapse = " "), "' does not ",
@@ -191,6 +201,7 @@ cuda_ml_get_state.default <- function(model) {
   )
 }
 
+#' @export
 cuda_ml_get_state.cuda_ml_model <- function(model) {
   # Default implementation: assume the entire model object can be serializabled
   # by `base::serialize()`.
@@ -199,6 +210,7 @@ cuda_ml_get_state.cuda_ml_model <- function(model) {
   new_model_state(model_state, cls = NULL)
 }
 
+#' @export
 cuda_ml_set_state.cuda_ml_model_state <- function(model_state) {
   # Default implementation: assume the entire model state can be unserialized by
   # `base::unserialize()`.
@@ -233,6 +245,7 @@ cuda_ml_set_state <- function(model_state) {
   UseMethod("cuda_ml_set_state")
 }
 
+#' @export
 cuda_ml_set_state.default <- function(model_state) {
   stop(
     "No unserialization routine found for model state of type '",
diff --git a/R/ols.R b/R/ols.R
index 3f1c4ba..9d5cefc 100644
--- a/R/ols.R
+++ b/R/ols.R
@@ -17,7 +17,7 @@ ols_match_method <- function(method = c("svd", "eig", "qr")) {
 #' @template ellipsis-unused
 #' @template fit-intercept
 #' @template normalize-input
-#' @param method Must be one of {"svd", "eig", "qr"}.
+#' @param method Must be one of \{"svd", "eig", "qr"\}.
 #'
 #'   - "svd": compute SVD decomposition using Jacobi iterations.
 #'   - "eig": use an eigendecomposition of the covariance matrix.
diff --git a/R/package.R b/R/package.R
index 5e2079f..e0ffb36 100644
--- a/R/package.R
+++ b/R/package.R
@@ -2,12 +2,10 @@
 #'
 #' This package provides a R interface for the RAPIDS cuML library.
 #'
-#' @docType package
 #' @author Yitao Li <yitao@rstudio.com>
 #' @import Rcpp
-#' @name cuda.ml
 #' @useDynLib cuda.ml, .registration = TRUE
-NULL
+"_PACKAGE"
 
 .onLoad <- function(libname, pkgname) {
   register_rand_forest_model(pkgname)
diff --git a/R/pca.R b/R/pca.R
index db2bda2..fb3a974 100644
--- a/R/pca.R
+++ b/R/pca.R
@@ -78,12 +78,14 @@ cuda_ml_inverse_transform.cuda_ml_pca <- function(model, x, ...) {
   .pca_inverse_transform(model = model, x = as.matrix(x))
 }
 
+#' @export
 cuda_ml_get_state.cuda_ml_pca <- function(model) {
   model_state <- .pca_get_state(model)
 
   new_model_state(model_state, "cuda_ml_pca_model_state")
 }
 
+#' @export
 cuda_ml_set_state.cuda_ml_pca_model_state <- function(model_state) {
   model_state <- .pca_set_state(model_state)
 
diff --git a/R/rand_forest.R b/R/rand_forest.R
index 9a64338..6d7380b 100644
--- a/R/rand_forest.R
+++ b/R/rand_forest.R
@@ -329,6 +329,7 @@ cuda_ml_rand_forest_impl_regression <- function(processed, mtry, trees, min_n,
   )
 }
 
+#' @export
 cuda_ml_get_state.cuda_ml_rand_forest <- function(model) {
   get_state_impl <- switch(model$mode,
     classification = .rf_classifier_get_state,
@@ -344,6 +345,7 @@ cuda_ml_get_state.cuda_ml_rand_forest <- function(model) {
   new_model_state(model_state, "cuda_ml_rand_forest_model_state")
 }
 
+#' @export
 cuda_ml_set_state.cuda_ml_rand_forest_model_state <- function(model_state) {
   set_state_impl <- switch(model_state$mode,
     classification = .rf_classifier_set_state,
diff --git a/R/rand_proj.R b/R/rand_proj.R
index 91cf8a5..475ebec 100644
--- a/R/rand_proj.R
+++ b/R/rand_proj.R
@@ -79,12 +79,14 @@ cuda_ml_transform.cuda_ml_rand_proj_model <- function(model, x, ...) {
   .rproj_transform(model$rproj_ctx, as.matrix(x))
 }
 
+#' @export
 cuda_ml_get_state.cuda_ml_rand_proj_model <- function(model) {
   model_state <- .rproj_get_state(model$rproj_ctx)
 
   new_model_state(model_state, "cuda_ml_rand_proj_model_state")
 }
 
+#' @export
 cuda_ml_set_state.cuda_ml_rand_proj_model_state <- function(model_state) {
   model_obj <- .rproj_set_state(model_state)
 
diff --git a/R/sgd.R b/R/sgd.R
index 2a2741a..d4953df 100644
--- a/R/sgd.R
+++ b/R/sgd.R
@@ -38,9 +38,9 @@ sgd_match_learning_rate <- function(learning_rate = c("constant", "invscaling",
 #' @template ellipsis-unused
 #' @template fit-intercept
 #' @template l1_ratio
-#' @param loss Loss function, must be one of {"squared_loss", "log", "hinge"}.
+#' @param loss Loss function, must be one of \{"squared_loss", "log", "hinge"\}.
 #' @param penalty Type of regularization to perform, must be one of
-#'   {"none", "l1", "l2", "elasticnet"}.
+#'   \{"none", "l1", "l2", "elasticnet"\}.
 #'
 #'   - "none": no regularization.
 #'   - "l1": perform regularization based on the L1-norm (LASSO) which tries to
@@ -63,7 +63,7 @@ sgd_match_learning_rate <- function(learning_rate = c("constant", "invscaling",
 #' @param eta0 The initial learning rate. Default: 1e-3.
 #' @param power_t The exponent used for calculating the invscaling learning
 #'   rate. Default: 0.5.
-#' @param learning_rate Must be one of {"constant", "invscaling", "adaptive"}.
+#' @param learning_rate Must be one of \{"constant", "invscaling", "adaptive"\}.
 #'
 #'   - "constant": the learning rate will be kept constant.
 #'   - "invscaling": (learning rate) = (initial learning rate) / pow(t, power_t)
diff --git a/R/svm.R b/R/svm.R
index 82df814..31c0c8c 100644
--- a/R/svm.R
+++ b/R/svm.R
@@ -313,6 +313,7 @@ cuda_ml_svm_classification_multiclass_impl <- function(processed, cost, kernel,
   )
 }
 
+#' @export
 cuda_ml_get_state.cuda_ml_svc_ovr <- function(model) {
   model_state <- list(
     ovr_model_states = lapply(model$xptr, function(x) cuda_ml_get_state(x)),
@@ -322,6 +323,7 @@ cuda_ml_get_state.cuda_ml_svc_ovr <- function(model) {
   new_model_state(model_state, "cuda_ml_svc_ovr_model_state")
 }
 
+#' @export
 cuda_ml_set_state.cuda_ml_svc_ovr_model_state <- function(model_state) {
   new_model(
     cls = c("cuda_ml_svc_ovr", "cuda_ml_svm"),
@@ -365,6 +367,7 @@ cuda_ml_svm_classification_binary_impl <- function(processed, cost, kernel, gamm
   )
 }
 
+#' @export
 cuda_ml_get_state.cuda_ml_svc <- function(model) {
   model_state <- list(
     model_state = .svc_get_state(model$xptr),
@@ -374,6 +377,7 @@ cuda_ml_get_state.cuda_ml_svc <- function(model) {
   new_model_state(model_state, "cuda_ml_svc_model_state")
 }
 
+#' @export
 cuda_ml_set_state.cuda_ml_svc_model_state <- function(model_state) {
   new_model(
     cls = c("cuda_ml_svc", "cuda_ml_svm"),
@@ -416,6 +420,7 @@ cuda_ml_svm_regression_impl <- function(processed, cost, kernel, gamma, coef0,
   )
 }
 
+#' @export
 cuda_ml_get_state.cuda_ml_svr <- function(model) {
   model_state <- list(
     model_state = .svr_get_state(model$xptr),
@@ -425,6 +430,7 @@ cuda_ml_get_state.cuda_ml_svr <- function(model) {
   new_model_state(model_state, "cuda_ml_svr_model_state")
 }
 
+#' @export
 cuda_ml_set_state.cuda_ml_svr_model_state <- function(model_state) {
   new_model(
     cls = c("cuda_ml_svr", "cuda_ml_svm"),
diff --git a/R/tsne.R b/R/tsne.R
index 6299748..49a80cb 100644
--- a/R/tsne.R
+++ b/R/tsne.R
@@ -24,7 +24,7 @@ new_tsne_model <- function(embedding) {
 #' @param n_components Dimension of the embedded space.
 #' @param n_neighbors The number of datapoints to use in the attractive forces.
 #'   Default: ceiling(3 * perplexity).
-#' @param method T-SNE method, must be one of {"barnes_hut", "fft", "exact"}.
+#' @param method T-SNE method, must be one of \{"barnes_hut", "fft", "exact"\}.
 #'   The "exact" method will be more accurate but slower. Both "barnes_hut" and
 #'   "fft" methods are fast approximations.
 #' @param angle Valid values are between 0.0 and 1.0, which trade off speed and
@@ -35,7 +35,7 @@ new_tsne_model <- function(embedding) {
 #' @param learning_rate Learning rate of the t-SNE algorithm, usually between
 #'   (10, 1000). If the learning rate is too high, then t-SNE result could look
 #'   like a cloud / ball of points.
-#' @param learning_rate_method Must be one of {"adaptive", "none"}. If
+#' @param learning_rate_method Must be one of \{"adaptive", "none"\}. If
 #'   "adaptive", then learning rate, early exaggeration, and perplexity are
 #'   automatically tuned based on input size. Default: "adaptive".
 #' @param perplexity The target value of the conditional distribution's
diff --git a/R/umap.R b/R/umap.R
index 8cd9292..ab60823 100644
--- a/R/umap.R
+++ b/R/umap.R
@@ -41,7 +41,7 @@ new_umap_model <- function(model) {
 #' @param learning_rate The initial learning rate for the embedding
 #'   optimization. Default: 1.0.
 #' @param init Initialization mode of the low dimensional embedding. Must be
-#'   one of {"spectral", "random"}. Default: "spectral".
+#'   one of \{"spectral", "random"\}. Default: "spectral".
 #' @param min_dist The effective minimum distance between embedded points.
 #'   Default: 0.1.
 #' @param spread The effective scale of embedded points. In combination with
@@ -71,7 +71,7 @@ new_umap_model <- function(model) {
 #'   the target simplcial set. Default: n_neighbors.
 #' @param target_metric The metric for measuring distance between the actual and
 #'   and the target values (\code{y}) if using supervised dimension reduction.
-#'   Must be one of {"categorical", "euclidean"}. Default: "categorical".
+#'   Must be one of \{"categorical", "euclidean"\}. Default: "categorical".
 #' @param target_weight Weighting factor between data topology and target
 #'   topology. A value of 0.0 weights entirely on data, a value of 1.0 weights
 #'   entirely on target. The default of 0.5 balances the weighting equally
@@ -150,12 +150,14 @@ cuda_ml_umap <- function(x, y = NULL, n_components = 2L, n_neighbors = 15L,
   model
 }
 
+#' @export
 cuda_ml_get_state.cuda_ml_umap <- function(model) {
   model_state <- .umap_get_state(model)
 
   new_model_state(model_state, "cuda_ml_umap_model_state")
 }
 
+#' @export
 cuda_ml_set_state.cuda_ml_umap_model_state <- function(model_state) {
   model_obj <- .umap_set_state(model_state)
 
diff --git a/man-roxygen/cuML-log-level.R b/man-roxygen/cuML-log-level.R
index aa225f8..e275d88 100644
--- a/man-roxygen/cuML-log-level.R
+++ b/man-roxygen/cuML-log-level.R
@@ -1,3 +1,3 @@
 #' @param cuML_log_level Log level within cuML library functions. Must be one of
-#'   {"off", "critical", "error", "warn", "info", "debug", "trace"}.
+#'   \{"off", "critical", "error", "warn", "info", "debug", "trace"\}.
 #'   Default: off.
diff --git a/man-roxygen/knn-algo-ivfsq.R b/man-roxygen/knn-algo-ivfsq.R
index 6001350..779810d 100644
--- a/man-roxygen/knn-algo-ivfsq.R
+++ b/man-roxygen/knn-algo-ivfsq.R
@@ -1,4 +1,4 @@
-#' @param qtype Quantizer type. Must be one of {"QT_8bit", "QT_4bit",
+#' @param qtype Quantizer type. Must be one of \{"QT_8bit", "QT_4bit",
 #'   "QT_8bit_uniform", "QT_4bit_uniform", "QT_fp16", "QT_8bit_direct",
-#'   "QT_6bit"}.
+#'   "QT_6bit"\}.
 #' @param encode_residual Whether to encode residuals.
diff --git a/man/cuML_major_version.Rd b/man/cuML_major_version.Rd
index 409ad06..dc5503a 100644
--- a/man/cuML_major_version.Rd
+++ b/man/cuML_major_version.Rd
@@ -2,18 +2,18 @@
 % Please edit documentation in R/cuml_utils.R
 \name{cuML_major_version}
 \alias{cuML_major_version}
-\title{Get the major version of the RAPIDS cuML shared library {cuda.ml} was linked
+\title{Get the major version of the RAPIDS cuML shared library \{cuda.ml\} was linked
 to.}
 \usage{
 cuML_major_version()
 }
 \value{
-The major version of the RAPIDS cuML shared library {cuda.ml} was
-linked to in a character vector, or \code{NA_character_} if {cuda.ml} was not
+The major version of the RAPIDS cuML shared library \{cuda.ml\} was
+linked to in a character vector, or \code{NA_character_} if \{cuda.ml\} was not
 linked to any version of RAPIDS cuML.
 }
 \description{
-Get the major version of the RAPIDS cuML shared library {cuda.ml} was linked
+Get the major version of the RAPIDS cuML shared library \{cuda.ml\} was linked
 to.
 }
 \examples{
diff --git a/man/cuML_minor_version.Rd b/man/cuML_minor_version.Rd
index 2993da2..4c66f5b 100644
--- a/man/cuML_minor_version.Rd
+++ b/man/cuML_minor_version.Rd
@@ -2,18 +2,18 @@
 % Please edit documentation in R/cuml_utils.R
 \name{cuML_minor_version}
 \alias{cuML_minor_version}
-\title{Get the minor version of the RAPIDS cuML shared library {cuda.ml} was linked
+\title{Get the minor version of the RAPIDS cuML shared library \{cuda.ml\} was linked
 to.}
 \usage{
 cuML_minor_version()
 }
 \value{
-The minor version of the RAPIDS cuML shared library {cuda.ml} was
-linked to in a character vector, or \code{NA_character_} if {cuda.ml} was not
+The minor version of the RAPIDS cuML shared library \{cuda.ml\} was
+linked to in a character vector, or \code{NA_character_} if \{cuda.ml\} was not
 linked to any version of RAPIDS cuML.
 }
 \description{
-Get the minor version of the RAPIDS cuML shared library {cuda.ml} was linked
+Get the minor version of the RAPIDS cuML shared library \{cuda.ml\} was linked
 to.
 }
 \examples{
diff --git a/man/cuda.ml.Rd b/man/cuda.ml-package.Rd
similarity index 55%
rename from man/cuda.ml.Rd
rename to man/cuda.ml-package.Rd
index 8043964..b43d49e 100644
--- a/man/cuda.ml.Rd
+++ b/man/cuda.ml-package.Rd
@@ -1,11 +1,20 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/package.R
 \docType{package}
-\name{cuda.ml}
+\name{cuda.ml-package}
 \alias{cuda.ml}
+\alias{cuda.ml-package}
 \title{cuda.ml}
 \description{
 This package provides a R interface for the RAPIDS cuML library.
+}
+\seealso{
+Useful links:
+\itemize{
+  \item \url{https://mlverse.github.io/cuda.ml/}
+  \item Report bugs at \url{https://github.com/mlverse/cuda.ml/issues}
+}
+
 }
 \author{
 Yitao Li <yitao@rstudio.com>
diff --git a/man/cuda_ml_agglomerative_clustering.Rd b/man/cuda_ml_agglomerative_clustering.Rd
index 74cc963..8d6427e 100644
--- a/man/cuda_ml_agglomerative_clustering.Rd
+++ b/man/cuda_ml_agglomerative_clustering.Rd
@@ -19,11 +19,11 @@ and should consist of numeric values only.}
 \item{n_clusters}{The number of clusters to find. Default: 2L.}
 
 \item{metric}{Metric used for linkage computation. Must be one of
-{"euclidean", "l1", "l2", "manhattan", "cosine"}. If connectivity is
+\{"euclidean", "l1", "l2", "manhattan", "cosine"\}. If connectivity is
 "knn" then only "euclidean" is accepted. Default: "euclidean".}
 
 \item{connectivity}{The type of connectivity matrix to compute. Must be one
-of {"pairwise", "knn"}. Default: "pairwise".
+of \{"pairwise", "knn"\}. Default: "pairwise".
   - 'pairwise' will compute the entire fully-connected graph of pairwise
      distances between each set of points. This is the fastest to compute
      and can be very fast for smaller datasets but requires O(n^2) space.
diff --git a/man/cuda_ml_dbscan.Rd b/man/cuda_ml_dbscan.Rd
index 9b57851..a53ffd3 100644
--- a/man/cuda_ml_dbscan.Rd
+++ b/man/cuda_ml_dbscan.Rd
@@ -19,7 +19,7 @@ and should consist of numeric values only.}
 within distance `eps` from it.}
 
 \item{cuML_log_level}{Log level within cuML library functions. Must be one of
-{"off", "critical", "error", "warn", "info", "debug", "trace"}.
+\{"off", "critical", "error", "warn", "info", "debug", "trace"\}.
 Default: off.}
 }
 \value{
diff --git a/man/cuda_ml_fil_enabled.Rd b/man/cuda_ml_fil_enabled.Rd
index 23a39fa..1ebcefb 100644
--- a/man/cuda_ml_fil_enabled.Rd
+++ b/man/cuda_ml_fil_enabled.Rd
@@ -3,7 +3,7 @@
 \name{cuda_ml_fil_enabled}
 \alias{cuda_ml_fil_enabled}
 \title{Determine whether Forest Inference Library (FIL) functionalities are enabled
-in the current installation of {cuda.ml}.}
+in the current installation of \{cuda.ml\}.}
 \usage{
 cuda_ml_fil_enabled()
 }
@@ -16,7 +16,7 @@ CuML Forest Inference Library (FIL) functionalities (see
 https://github.com/rapidsai/cuml/tree/main/python/cuml/fil#readme) will
 require Treelite C API. If you need FIL to run tree-based model ensemble on
 GPU, and \code{fil_enabled()} returns FALSE, then please consider installing
-Treelite and then re-installing {cuda.ml}.
+Treelite and then re-installing \{cuda.ml\}.
 }
 \examples{
 if (cuda_ml_fil_enabled()) {
diff --git a/man/cuda_ml_fil_load_model.Rd b/man/cuda_ml_fil_load_model.Rd
index 3f7252b..e1d9a30 100644
--- a/man/cuda_ml_fil_load_model.Rd
+++ b/man/cuda_ml_fil_load_model.Rd
@@ -20,10 +20,10 @@ cuda_ml_fil_load_model(
 \item{filename}{Path to the saved model file.}
 
 \item{mode}{Type of task to be performed by the model. Must be one of
-{"classification", "regression"}.}
+\{"classification", "regression"\}.}
 
 \item{model_type}{Format of the saved model file. Notice if \code{filename}
-ends with ".json" and \code{model_type} is "xgboost", then {cuda.ml} will
+ends with ".json" and \code{model_type} is "xgboost", then \{cuda.ml\} will
 assume the model file is in XGBoost JSON (instead of binary) format.
 Default: "xgboost".}
 
diff --git a/man/cuda_ml_kmeans.Rd b/man/cuda_ml_kmeans.Rd
index 4f738a7..28c7529 100644
--- a/man/cuda_ml_kmeans.Rd
+++ b/man/cuda_ml_kmeans.Rd
@@ -32,7 +32,7 @@ the initial value of a centroid. Default: "kmeans++".}
 \item{seed}{Seed to the random number generator. Default: 0.}
 
 \item{cuML_log_level}{Log level within cuML library functions. Must be one of
-{"off", "critical", "error", "warn", "info", "debug", "trace"}.
+\{"off", "critical", "error", "warn", "info", "debug", "trace"\}.
 Default: off.}
 }
 \value{
diff --git a/man/cuda_ml_knn.Rd b/man/cuda_ml_knn.Rd
index 4d72201..93f4d82 100644
--- a/man/cuda_ml_knn.Rd
+++ b/man/cuda_ml_knn.Rd
@@ -17,9 +17,8 @@ cuda_ml_knn(x, ...)
   x,
   y,
   algo = c("brute", "ivfflat", "ivfpq", "ivfsq"),
-  metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan",
-    "braycurtis", "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine",
-    "correlation"),
+  metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis",
+    "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", "correlation"),
   p = 2,
   neighbors = 5L,
   ...
@@ -29,9 +28,8 @@ cuda_ml_knn(x, ...)
   x,
   y,
   algo = c("brute", "ivfflat", "ivfpq", "ivfsq"),
-  metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan",
-    "braycurtis", "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine",
-    "correlation"),
+  metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis",
+    "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", "correlation"),
   p = 2,
   neighbors = 5L,
   ...
@@ -41,9 +39,8 @@ cuda_ml_knn(x, ...)
   formula,
   data,
   algo = c("brute", "ivfflat", "ivfpq", "ivfsq"),
-  metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan",
-    "braycurtis", "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine",
-    "correlation"),
+  metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis",
+    "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", "correlation"),
   p = 2,
   neighbors = 5L,
   ...
@@ -53,9 +50,8 @@ cuda_ml_knn(x, ...)
   x,
   data,
   algo = c("brute", "ivfflat", "ivfpq", "ivfsq"),
-  metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan",
-    "braycurtis", "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine",
-    "correlation"),
+  metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis",
+    "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", "correlation"),
   p = 2,
   neighbors = 5L,
   ...
@@ -76,7 +72,7 @@ cuda_ml_knn(x, ...)
 desired responses.}
 
 \item{algo}{The query algorithm to use. Must be one of
-  {"brute", "ivfflat", "ivfpq", "ivfsq"} or a KNN algorithm specification
+  \{"brute", "ivfflat", "ivfpq", "ivfsq"\} or a KNN algorithm specification
   constructed using the \code{cuda_ml_knn_algo_*} family of functions.
   If the algorithm is specified by one of the \code{cuda_ml_knn_algo_*}
   functions, then values of all required parameters of the algorithm will
@@ -98,10 +94,10 @@ desired responses.}
 
   Default: "brute".}
 
-\item{metric}{Distance metric to use. Must be one of {"euclidean", "l2",
+\item{metric}{Distance metric to use. Must be one of \{"euclidean", "l2",
 "l1", "cityblock", "taxicab", "manhattan", "braycurtis", "canberra",
 "minkowski", "lp", "chebyshev", "linf", "jensenshannon", "cosine",
-"correlation"}.
+"correlation"\}.
 Default: "euclidean".}
 
 \item{p}{Parameter for the Minkowski metric. If p = 1, then the metric is
diff --git a/man/cuda_ml_knn_algo_ivfsq.Rd b/man/cuda_ml_knn_algo_ivfsq.Rd
index 6b7be11..f4b76f2 100644
--- a/man/cuda_ml_knn_algo_ivfsq.Rd
+++ b/man/cuda_ml_knn_algo_ivfsq.Rd
@@ -18,9 +18,9 @@ cuda_ml_knn_algo_ivfsq(
 \item{nprobe}{At query time, the number of cells used for approximate nearest
 neighbor search.}
 
-\item{qtype}{Quantizer type. Must be one of {"QT_8bit", "QT_4bit",
+\item{qtype}{Quantizer type. Must be one of \{"QT_8bit", "QT_4bit",
 "QT_8bit_uniform", "QT_4bit_uniform", "QT_fp16", "QT_8bit_direct",
-"QT_6bit"}.}
+"QT_6bit"\}.}
 
 \item{encode_residual}{Whether to encode residuals.}
 }
diff --git a/man/cuda_ml_logistic_reg.Rd b/man/cuda_ml_logistic_reg.Rd
index df89512..d0c1394 100644
--- a/man/cuda_ml_logistic_reg.Rd
+++ b/man/cuda_ml_logistic_reg.Rd
@@ -92,7 +92,7 @@ mean of the response variable. If FALSE, then the model expects data to be
 centered. Default: TRUE.}
 
 \item{penalty}{The penalty type, must be one of
-{"none", "l1", "l2", "elasticnet"}.
+\{"none", "l1", "l2", "elasticnet"\}.
 If "none" or "l2" is selected, then L-BFGS solver will be used.
 If "l1" is selected, solver OWL-QN will be used.
 If "elasticnet" is selected, OWL-QN will be used if l1_ratio > 0, otherwise
diff --git a/man/cuda_ml_ols.Rd b/man/cuda_ml_ols.Rd
index c4d2fbf..05cb292 100644
--- a/man/cuda_ml_ols.Rd
+++ b/man/cuda_ml_ols.Rd
@@ -63,7 +63,7 @@ cuda_ml_ols(x, ...)
 \item{y}{A numeric vector (for regression) or factor (for classification) of
 desired responses.}
 
-\item{method}{Must be one of {"svd", "eig", "qr"}.
+\item{method}{Must be one of \{"svd", "eig", "qr"\}.
 
   - "svd": compute SVD decomposition using Jacobi iterations.
   - "eig": use an eigendecomposition of the covariance matrix.
diff --git a/man/cuda_ml_pca.Rd b/man/cuda_ml_pca.Rd
index 376954c..80ad31c 100644
--- a/man/cuda_ml_pca.Rd
+++ b/man/cuda_ml_pca.Rd
@@ -41,7 +41,7 @@ Default: FALSE.}
 of the input data. Default: TRUE.}
 
 \item{cuML_log_level}{Log level within cuML library functions. Must be one of
-{"off", "critical", "error", "warn", "info", "debug", "trace"}.
+\{"off", "critical", "error", "warn", "info", "debug", "trace"\}.
 Default: off.}
 }
 \value{
diff --git a/man/cuda_ml_rand_forest.Rd b/man/cuda_ml_rand_forest.Rd
index c922a55..d629ece 100644
--- a/man/cuda_ml_rand_forest.Rd
+++ b/man/cuda_ml_rand_forest.Rd
@@ -149,7 +149,7 @@ given batch. Default: 128L.}
 Default: 8L.}
 
 \item{cuML_log_level}{Log level within cuML library functions. Must be one of
-{"off", "critical", "error", "warn", "info", "debug", "trace"}.
+\{"off", "critical", "error", "warn", "info", "debug", "trace"\}.
 Default: off.}
 
 \item{formula}{A formula specifying the outcome terms on the left-hand side,
diff --git a/man/cuda_ml_sgd.Rd b/man/cuda_ml_sgd.Rd
index c1fc940..5250bfe 100644
--- a/man/cuda_ml_sgd.Rd
+++ b/man/cuda_ml_sgd.Rd
@@ -107,10 +107,10 @@ desired responses.}
 mean of the response variable. If FALSE, then the model expects data to be
 centered. Default: TRUE.}
 
-\item{loss}{Loss function, must be one of {"squared_loss", "log", "hinge"}.}
+\item{loss}{Loss function, must be one of \{"squared_loss", "log", "hinge"\}.}
 
 \item{penalty}{Type of regularization to perform, must be one of
-  {"none", "l1", "l2", "elasticnet"}.
+  \{"none", "l1", "l2", "elasticnet"\}.
 
   - "none": no regularization.
   - "l1": perform regularization based on the L1-norm (LASSO) which tries to
@@ -143,7 +143,7 @@ Default: 1e-3.}
 \item{shuffle}{Whether to shuffles the training data after each epoch.
 Default: True.}
 
-\item{learning_rate}{Must be one of {"constant", "invscaling", "adaptive"}.
+\item{learning_rate}{Must be one of \{"constant", "invscaling", "adaptive"\}.
 
   - "constant": the learning rate will be kept constant.
   - "invscaling": (learning rate) = (initial learning rate) / pow(t, power_t)
diff --git a/man/cuda_ml_svm.Rd b/man/cuda_ml_svm.Rd
index 0fef9d6..2219b48 100644
--- a/man/cuda_ml_svm.Rd
+++ b/man/cuda_ml_svm.Rd
@@ -154,7 +154,7 @@ tasks. Default: 0.1.}
 \item{sample_weights}{Optional weight assigned to each input data point.}
 
 \item{cuML_log_level}{Log level within cuML library functions. Must be one of
-{"off", "critical", "error", "warn", "info", "debug", "trace"}.
+\{"off", "critical", "error", "warn", "info", "debug", "trace"\}.
 Default: off.}
 
 \item{formula}{A formula specifying the outcome terms on the left-hand side,
diff --git a/man/cuda_ml_tsne.Rd b/man/cuda_ml_tsne.Rd
index c6fb603..f3b5f44 100644
--- a/man/cuda_ml_tsne.Rd
+++ b/man/cuda_ml_tsne.Rd
@@ -36,7 +36,7 @@ and should consist of numeric values only.}
 \item{n_neighbors}{The number of datapoints to use in the attractive forces.
 Default: ceiling(3 * perplexity).}
 
-\item{method}{T-SNE method, must be one of {"barnes_hut", "fft", "exact"}.
+\item{method}{T-SNE method, must be one of \{"barnes_hut", "fft", "exact"\}.
 The "exact" method will be more accurate but slower. Both "barnes_hut" and
 "fft" methods are fast approximations.}
 
@@ -51,7 +51,7 @@ at least 250. Default: 1000L.}
 (10, 1000). If the learning rate is too high, then t-SNE result could look
 like a cloud / ball of points.}
 
-\item{learning_rate_method}{Must be one of {"adaptive", "none"}. If
+\item{learning_rate_method}{Must be one of \{"adaptive", "none"\}. If
 "adaptive", then learning rate, early exaggeration, and perplexity are
 automatically tuned based on input size. Default: "adaptive".}
 
@@ -94,7 +94,7 @@ runs, even with the same \code{seed} being used for each run.
 Default: NULL.}
 
 \item{cuML_log_level}{Log level within cuML library functions. Must be one of
-{"off", "critical", "error", "warn", "info", "debug", "trace"}.
+\{"off", "critical", "error", "warn", "info", "debug", "trace"\}.
 Default: off.}
 }
 \value{
diff --git a/man/cuda_ml_tsvd.Rd b/man/cuda_ml_tsvd.Rd
index 96c4a83..bd6dd02 100644
--- a/man/cuda_ml_tsvd.Rd
+++ b/man/cuda_ml_tsvd.Rd
@@ -37,7 +37,7 @@ Default: 15.}
 of the input data. Default: TRUE.}
 
 \item{cuML_log_level}{Log level within cuML library functions. Must be one of
-{"off", "critical", "error", "warn", "info", "debug", "trace"}.
+\{"off", "critical", "error", "warn", "info", "debug", "trace"\}.
 Default: off.}
 }
 \value{
diff --git a/man/cuda_ml_umap.Rd b/man/cuda_ml_umap.Rd
index 3b46441..c9cfdd4 100644
--- a/man/cuda_ml_umap.Rd
+++ b/man/cuda_ml_umap.Rd
@@ -48,7 +48,7 @@ low dimensional embedding. Default: 500.}
 optimization. Default: 1.0.}
 
 \item{init}{Initialization mode of the low dimensional embedding. Must be
-one of {"spectral", "random"}. Default: "spectral".}
+one of \{"spectral", "random"\}. Default: "spectral".}
 
 \item{min_dist}{The effective minimum distance between embedded points.
 Default: 0.1.}
@@ -88,7 +88,7 @@ the target simplcial set. Default: n_neighbors.}
 
 \item{target_metric}{The metric for measuring distance between the actual and
 and the target values (\code{y}) if using supervised dimension reduction.
-Must be one of {"categorical", "euclidean"}. Default: "categorical".}
+Must be one of \{"categorical", "euclidean"\}. Default: "categorical".}
 
 \item{target_weight}{Weighting factor between data topology and target
 topology. A value of 0.0 weights entirely on data, a value of 1.0 weights
@@ -106,7 +106,7 @@ If the PRNG seed is not set, then the trained embeddings will not be
 deterministic.}
 
 \item{cuML_log_level}{Log level within cuML library functions. Must be one of
-{"off", "critical", "error", "warn", "info", "debug", "trace"}.
+\{"off", "critical", "error", "warn", "info", "debug", "trace"\}.
 Default: off.}
 }
 \value{
diff --git a/man/has_cuML.Rd b/man/has_cuML.Rd
index a78075c..8305755 100644
--- a/man/has_cuML.Rd
+++ b/man/has_cuML.Rd
@@ -2,17 +2,17 @@
 % Please edit documentation in R/cuml_utils.R
 \name{has_cuML}
 \alias{has_cuML}
-\title{Determine whether {cuda.ml} was linked to a valid version of the RAPIDS cuML
+\title{Determine whether \{cuda.ml\} was linked to a valid version of the RAPIDS cuML
 shared library.}
 \usage{
 has_cuML()
 }
 \value{
-A logical value indicating whether the current installation {cuda.ml}
+A logical value indicating whether the current installation \{cuda.ml\}
   was linked to a valid version of the RAPIDS cuML shared library.
 }
 \description{
-Determine whether {cuda.ml} was linked to a valid version of the RAPIDS cuML
+Determine whether \{cuda.ml\} was linked to a valid version of the RAPIDS cuML
 shared library.
 }
 \examples{
diff --git a/man/predict.cuda_ml_rand_forest.Rd b/man/predict.cuda_ml_rand_forest.Rd
index 9a05897..e9510fe 100644
--- a/man/predict.cuda_ml_rand_forest.Rd
+++ b/man/predict.cuda_ml_rand_forest.Rd
@@ -27,7 +27,7 @@ is set to \code{TRUE} or \code{FALSE} but the model being applied does
 not support class probabilities output.}
 
 \item{cuML_log_level}{Log level within cuML library functions. Must be one of
-{"off", "critical", "error", "warn", "info", "debug", "trace"}.
+\{"off", "critical", "error", "warn", "info", "debug", "trace"\}.
 Default: off.}
 
 \item{...}{Additional arguments to \code{predict()}. Currently unused.}
diff --git a/src/CMakeLists.txt.in b/src/CMakeLists.txt.in
index 88e08a7..030d323 100644
--- a/src/CMakeLists.txt.in
+++ b/src/CMakeLists.txt.in
@@ -128,6 +128,7 @@ find_package(Treelite)
 if(Treelite_FOUND)
   set(CUML4R_LIBS ${CUML4R_LIBS} treelite::treelite treelite::treelite_runtime)
   set(CUML4R_INCLUDE_DIRS ${CUML4R_INCLUDE_DIRS} ${Treelite_INCLUDE_DIRS})
+  message(STATUS "Treelite found, ignoring stub headers: ${CUML_STUB_HEADERS_DIR}")
 else()
   message(
     WARNING
diff --git a/tests/testthat.R b/tests/testthat.R
index 269f852..1f11702 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -1,6 +1,7 @@
 library(testthat)
+library(cuda.ml)
 
-if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+if (identical(Sys.getenv("NOT_CRAN"), "true") && has_cuML()) {
   filter <- Sys.getenv("TESTTHAT_FILTER", unset = "")
   if (identical(filter, "")) filter <- NULL
 
diff --git a/tests/testthat/helper-initialize.R b/tests/testthat/helper-initialize.R
index cd898cb..d533348 100644
--- a/tests/testthat/helper-initialize.R
+++ b/tests/testthat/helper-initialize.R
@@ -15,12 +15,8 @@ expect_libcuml <- function() {
 
 expect_libcuml()
 
-sklearn <- tryCatch(reticulate::import("sklearn"),
-  error = function(e) {
-    reticulate::py_install("sklearn", pip = TRUE)
-    reticulate::import("sklearn")
-  }
-)
+reticulate::py_require("scikit-learn")
+sklearn <- reticulate::import("sklearn")
 sklearn_iris_dataset <- list(
   data = iris[, names(iris) != "Species"] %>%
     unname() %>%
diff --git a/tests/testthat/test-elastic-net.R b/tests/testthat/test-elastic-net.R
index 55b3680..78fef98 100644
--- a/tests/testthat/test-elastic-net.R
+++ b/tests/testthat/test-elastic-net.R
@@ -33,7 +33,7 @@ test_that("Elastic net regressor works as expected", {
 
         sklearn_elastic_net_regressor <- sklearn$linear_model$ElasticNet(
           alpha = 1e-3,
-          max_iter = 10000,
+          max_iter = 10000L,
           tol = 1e-4,
           fit_intercept = fit_intercept,
           l1_ratio = l1_ratio
diff --git a/tests/testthat/test-lasso.R b/tests/testthat/test-lasso.R
index 85016eb..0efe7eb 100644
--- a/tests/testthat/test-lasso.R
+++ b/tests/testthat/test-lasso.R
@@ -32,7 +32,7 @@ test_that("LASSO regressor works as expected", {
 
       sklearn_lasso_regressor <- sklearn$linear_model$Lasso(
         alpha = 1e-3,
-        max_iter = 10000,
+        max_iter = 10000L,
         tol = 1e-4,
         fit_intercept = fit_intercept
       )
diff --git a/tests/testthat/test-tsvd.R b/tests/testthat/test-tsvd.R
index ba7bd1e..337e67c 100644
--- a/tests/testthat/test-tsvd.R
+++ b/tests/testthat/test-tsvd.R
@@ -7,14 +7,24 @@ sklearn_tsvd_model <- tsvd_model$fit(sklearn_iris_dataset$data)
 
 cuda_ml_tsvd_model <- cuda_ml_tsvd(iris[1:4], n_components = 2)
 
+# SVD components are only defined up to sign — align signs before comparing.
+# For each component row, flip the cuML sign to match sklearn if the first
+# non-negligible element disagrees.
+align_svd_signs <- function(a, b) {
+  for (i in seq_len(nrow(a))) {
+    if (sign(a[i, 1]) != sign(b[i, 1])) {
+      a[i, ] <- -a[i, ]
+    }
+  }
+  a
+}
+
 test_that("cuda_ml_tsvd() works as expected", {
+  sklearn_components <- sklearn_tsvd_model$components_
+  aligned_components <- align_svd_signs(cuda_ml_tsvd_model$components, sklearn_components)
+
   expect_equal(
-    cuda_ml_tsvd_model$components, sklearn_tsvd_model$components_,
-    tolerance = 1e-8, scale = 1
-  )
-  expect_equal(
-    cuda_ml_tsvd_model$explained_variance,
-    as.numeric(sklearn_tsvd_model$explained_variance_),
+    aligned_components, sklearn_components,
     tolerance = 1e-8, scale = 1
   )
   expect_equal(
@@ -32,18 +42,25 @@ test_that("cuda_ml_tsvd() works as expected", {
     as.numeric(sklearn_tsvd_model$singular_values_),
     tolerance = 1e-8, scale = 1
   )
-  expect_equal(
-    cuda_ml_tsvd_model$transformed_data,
-    sklearn_tsvd_model$transform(sklearn_iris_dataset$data),
-    tolerance = 1e-8, scale = 1
-  )
+
+  # Transformed data columns also have sign ambiguity matching the components
+  sklearn_transformed <- sklearn_tsvd_model$transform(sklearn_iris_dataset$data)
+  cuda_transformed <- cuda_ml_tsvd_model$transformed_data
+  for (j in seq_len(ncol(cuda_transformed))) {
+    if (sign(cuda_transformed[1, j]) != sign(sklearn_transformed[1, j])) {
+      cuda_transformed[, j] <- -cuda_transformed[, j]
+    }
+  }
+  expect_equal(cuda_transformed, sklearn_transformed, tolerance = 1e-8, scale = 1)
 })
 
 test_that("cuda_ml_inverse_transform() works as expected for TSVD models", {
-  expect_equal(
-    cuda_ml_inverse_transform(
-      cuda_ml_tsvd_model, cuda_ml_tsvd_model$transformed_data
-    ),
-    sklearn_tsvd_model$inverse_transform(cuda_ml_tsvd_model$transformed_data)
+  # inverse_transform recovers the original data regardless of sign convention
+  cuda_ml_reconstructed <- cuda_ml_inverse_transform(
+    cuda_ml_tsvd_model, cuda_ml_tsvd_model$transformed_data
+  )
+  sklearn_reconstructed <- sklearn_tsvd_model$inverse_transform(
+    sklearn_tsvd_model$transform(sklearn_iris_dataset$data)
   )
+  expect_equal(cuda_ml_reconstructed, sklearn_reconstructed, tolerance = 1e-2, scale = 1)
 })
diff --git a/tools/config/configure.R b/tools/config/configure.R
index d9a84f1..e271f6d 100644
--- a/tools/config/configure.R
+++ b/tools/config/configure.R
@@ -74,8 +74,8 @@ run_cmake <- function() {
   cuml_prefix <- get_cuml_prefix()
   bundle_libcuml <- FALSE
   if (is.na(cuml_prefix)) {
-    cuml_prefix <- normalizePath(file.path(pkg_root(), "libcuml"))
     download_libcuml()
+    cuml_prefix <- normalizePath(file.path(pkg_root(), "libcuml"))
     dir.create("inst")
     file.rename(file.path("libcuml", "lib"), file.path("inst", "libs"))
     file.symlink(file.path("..", "inst", "libs"), file.path("libcuml", "lib"))
@@ -92,7 +92,7 @@ run_cmake <- function() {
 
   cmake_args <- c(
     ".",
-    "-DCMAKE_CUDA_ARCHITECTURES=NATIVE",
+    paste0("-DCMAKE_CUDA_ARCHITECTURES=", Sys.getenv("CMAKE_CUDA_ARCHITECTURES", unset = "NATIVE")),
     paste0("-DCUML_INCLUDE_DIR=", file.path(cuml_prefix, "include")),
     paste0("-DCUML_LIB_DIR=", file.path(cuml_prefix, "lib")),
     paste0(