mlverse · t-kalinowski · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -10,6 +10,7 @@
 ^src/CMakeLists\.txt$
 ^src/CMakeCache\.txt$
 ^src/CMakeFiles/*
+^src/\.cmake-build/*
 ^src/_deps/*
 ^src/eval_gpu_archs*
 ^src/*\.o$

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -46,5 +46,7 @@ LinkingTo: Rcpp
 Encoding: UTF-8
 RoxygenNote: 7.3.3
 OS_type: unix
-SystemRequirements: RAPIDS cuML (see https://rapids.ai/start.html)
+SystemRequirements: NVIDIA GPU and driver, CUDA Toolkit with nvcc, and uv or
+    Python/pip for automatic RAPIDS cuML bootstrap. Alternatively, an existing
+    RAPIDS cuML installation can be provided with CUML_PREFIX.
 NeedsCompilation: yes
diff --git a/R/cuml_utils.R b/R/cuml_utils.R
@@ -4,14 +4,23 @@
 #' @return A logical value indicating whether the current installation \{cuda.ml\}
 #'   was linked to a valid version of the RAPIDS cuML shared library.
 #'
+#' @details
+#' If this returns \code{FALSE}, \pkg{cuda.ml} was installed in stub-only mode.
+#' On a GPU machine, verify that \code{nvidia-smi} and \code{nvcc --version}
+#' both work, then reinstall \pkg{cuda.ml}. During installation, \pkg{cuda.ml}
+#' can bootstrap RAPIDS cuML from pip wheels with \code{uv} or Python/pip. If
+#' RAPIDS cuML is already installed, set \code{CUML_PREFIX} to a prefix
+#' containing \code{include/cuml} and \code{lib/libcuml++.so} before
+#' reinstalling.
+#'
 #' @examples
 #'
 #' library(cuda.ml)
 #'
 #' if (!has_cuML()) {
 #'   warning(
-#'     "Please install the RAPIDS cuML shared library first, and then re-",
-#'     "install {cuda.ml}."
+#'     "This installation was built without RAPIDS cuML. Verify `nvidia-smi` ",
+#'     "and `nvcc --version`, then reinstall {cuda.ml}."
 #'   )
 #' }
 #' @export

diff --git a/R/knn.R b/R/knn.R
@@ -72,6 +72,7 @@ cuda_ml_knn_algo_ivfpq <- function(nlist, nprobe, m, n_bits,
       nlist = as.integer(nlist),
       nprobe = as.integer(nprobe),
       M = as.integer(m),
+      n_bits = as.integer(n_bits),
       usePrecomputedTables = as.logical(use_precomputed_tables)
     )
   )

diff --git a/R/package.R b/R/package.R
@@ -2,6 +2,23 @@
 #'
 #' This package provides a R interface for the RAPIDS cuML library.
 #'
+#' @section Installation:
+#' A functional GPU installation requires an NVIDIA GPU with a working driver,
+#' a CUDA Toolkit installation that provides \code{nvcc}, and normal R package
+#' build tools. During installation, \pkg{cuda.ml} first looks for an existing
+#' RAPIDS installation through \code{CUML_PREFIX} or \code{CUDA_PATH}. If none
+#' is found, it can bootstrap RAPIDS cuML from pip wheels with \code{uv} or
+#' Python/pip and link against the resulting local prefix.
+#'
+#' On machines without a usable NVIDIA driver/GPU and \code{nvcc}, including
+#' CRAN check machines, \pkg{cuda.ml} may install in stub-only mode. In that
+#' mode \code{has_cuML()} returns \code{FALSE}, and cuML-backed algorithms are
+#' unavailable until the system prerequisites are installed and \pkg{cuda.ml}
+#' is reinstalled.
+#'
+#' Useful environment variables include \code{CUDA_HOME}, \code{CUML_PREFIX},
+#' \code{CUML_BOOTSTRAP}, and \code{CUML_BOOTSTRAP_CACHE}.
+#'
 #' @author Yitao Li <yitao@rstudio.com>
 #' @import Rcpp
 #' @useDynLib cuda.ml, .registration = TRUE
@@ -17,20 +34,15 @@
   if (!has_cuML()) {
     packageStartupMessage(
       "
-      The current installation of {", pkgname, "} will not function as expected
-      because it was not linked with a valid version of the RAPIDS cuML shared
-      library.
+      The current installation of {", pkgname, "} was built without a usable
+      RAPIDS cuML shared library.
+
+      To fix this, ensure `nvidia-smi` and `nvcc --version` both work, then
+      reinstall {", pkgname, "}. During installation, {", pkgname, "} can
+      bootstrap RAPIDS cuML from pip wheels with `uv` or Python/pip.
 
-      To fix this issue, please follow https://rapids.ai/start.html#get-rapids
-      to install the RAPIDS cuML shared library from Conda and ensure the
-      'CUML_PREFIX' env variable is set to a valid RAPIDS conda env directory
-      (e.g., '/home/user/anaconda3/envs/rapids-21.06', '/usr', or similar)
-      during the installation of {", pkgname, "} or alternatively, follow
-      https://github.com/yitao-li/cuml-installation-notes#build-from-source-without-conda-and-without-multi-gpu-support
-      or
-      https://github.com/yitao-li/cuml-installation-notes#build-from-source-without-conda-and-with-multi-gpu-support
-      or similar to build and install RAPIDS cuML library from source, and
-      then re-install {", pkgname, "}.\n\n
+      If RAPIDS is already installed, set `CUML_PREFIX` to a prefix containing
+      include/cuml and lib/libcuml++.so before reinstalling.\n\n
       "
     )
   }

diff --git a/R/rand_forest.R b/R/rand_forest.R
@@ -331,6 +331,14 @@ cuda_ml_rand_forest_impl_regression <- function(processed, mtry, trees, min_n,
 
 #' @export
 cuda_ml_get_state.cuda_ml_rand_forest <- function(model) {
+  if (!cuda_ml_fil_enabled()) {
+    stop(
+      "Random forest serialization requires Treelite/FIL support, but FIL is ",
+      "disabled in this cuda.ml build.",
+      call. = FALSE
+    )
+  }
+
   get_state_impl <- switch(model$mode,
     classification = .rf_classifier_get_state,
     regression = .rf_regressor_get_state

diff --git a/R/rand_proj.R b/R/rand_proj.R
@@ -5,6 +5,22 @@ new_rproj_model <- function(rproj_ctx) {
   model
 }
 
+cuda_ml_rand_proj_available <- function() {
+  tryCatch(
+    {
+      .rproj_johnson_lindenstrauss_min_dim(2L, 0.5)
+      TRUE
+    },
+    error = function(e) {
+      if (grepl("random projection support is not available", e$message)) {
+        FALSE
+      } else {
+        stop(e)
+      }
+    }
+  )
+}
+
 #' Random projection for dimensionality reduction.
 #'
 #' Generate a random projection matrix for dimensionality reduction, and

diff --git a/R/tsvd.R b/R/tsvd.R
@@ -48,11 +48,25 @@ cuda_ml_tsvd <- function(x,
     transform_input = transform_input,
     verbosity = cuML_log_level
   )
+  model <- tsvd_flip_signs(model)
   class(model) <- c("cuda_ml_tsvd", class(model))
 
   model
 }
 
+tsvd_flip_signs <- function(model) {
+  signs <- apply(model$components, 1L, function(x) {
+    if (x[[which.max(abs(x))]] < 0) -1 else 1
+  })
+
+  model$components <- sweep(model$components, 1L, signs, `*`)
+  if (!is.null(model$transformed_data)) {
+    model$transformed_data <- sweep(model$transformed_data, 2L, signs, `*`)
+  }
+
+  model
+}
+
 #' @export
 cuda_ml_transform.cuda_ml_tsvd <- function(model, x, ...) {
   .tsvd_transform(model = model, x = as.matrix(x))

diff --git a/README.Rmd b/README.Rmd
@@ -176,115 +176,117 @@ about the MNIST dataset:
 
 ## Installation
 
-In order for {cuda.ml} to work as expected, the C++/CUDA source code of
-{cuda.ml} must be linked with CUDA runtime and a valid copy of the RAPIDS cuML
-library.
+For a fully functional installation, {cuda.ml} needs:
 
-Before installing {cuda.ml} itself, it may be worthwhile to take a quick look
-through the sub-sections below on how to properly setup all of {cuda.ml}'s
-required runtime dependencies.
+- an NVIDIA GPU with a working NVIDIA driver;
+- a CUDA Toolkit installation that provides `nvcc`;
+- normal R package build tools; and
+- either `uv` or Python with `pip`.
 
-### Quick note on installing the RAPIDS cuML library:
+When those prerequisites are present, {cuda.ml} can bootstrap RAPIDS cuML from
+pip wheels during installation. You do not need conda, and you usually do not
+need to set `CUML_PREFIX` manually.
 
-Although Conda is the only officially supported distribution channel at the
-moment for RAPIDS cuML (i.e., see https://rapids.ai/start.html#get-rapids),
-you can still build and install this library from source without relying on
-Conda.
-See https://github.com/yitao-li/cuml-installation-notes for build-from-source
-instructions.
+On a new Ubuntu installation, install R/build/Python prerequisites:
 
-### Quick install instructions for Ubuntu 20-04:
-
-#### Install deps:
-```
-sudo apt install -y cmake ccache libblas3 liblapack3
+```bash
+sudo apt update
+sudo apt install -y r-base-dev build-essential git cmake \
+  python3 python3-pip python3-venv ubuntu-drivers-common
 ```
 
+Install the NVIDIA driver, reboot, and verify that the driver can see your GPU:
 
-### Install CUDA
-(consult https://developer.nvidia.com/cuda-downloads for other platforms)
 ```bash
-wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
-sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
-wget https://developer.download.nvidia.com/compute/cuda/11.4.2/local_installers/cuda-repo-ubuntu2004-11-4-local_11.4.2-470.57.02-1_amd64.deb
-sudo dpkg -i cuda-repo-ubuntu2004-11-4-local_11.4.2-470.57.02-1_amd64.deb
-sudo apt-key add /var/cuda-repo-ubuntu2004-11-4-local/7fa2af80.pub
-sudo apt-get update
-sudo apt-get -y install cuda
+sudo ubuntu-drivers install
+sudo reboot
+
+nvidia-smi
 ```
-### Add CUDA executables to path
-(nvcc is needed for building the C++/CUDA source code of {cuda.ml})
+
+Install a CUDA Toolkit that includes `nvcc`. Use NVIDIA's CUDA Linux
+installation guide for your Ubuntu release to add the CUDA apt repository, then:
+
 ```bash
-echo "export PATH=$PATH:/usr/local/cuda/bin" >> ~/.bashrc
-source ~/.bashrc
+sudo apt update
+sudo apt install -y cuda-toolkit
+
+nvcc --version
 ```
 
-### Install Miniconda:
+If the toolkit is installed but `nvcc` is not on `PATH`, set `CUDA_HOME` to the
+toolkit prefix before installing {cuda.ml}, for example:
+
 ```bash
-wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
-chmod +x Miniconda3-latest-Linux-x86_64.sh
-./Miniconda3-latest-Linux-x86_64.sh -b
-# consult https://rapids.ai/start.html for alternatives
+export CUDA_HOME=/usr/local/cuda
 ```
 
-### Create and configure the conda env
-```
-# This is a relatively big download, may take a while
-~/miniconda3/bin/conda create -n rapids-21.08 -c rapidsai -c nvidia -c conda-forge \
-    rapids-blazing=21.08 python=3.8 cudatoolkit=11.2
-```
+Then install {cuda.ml}:
 
-### Install cmake
-CUDA dependencies require a relatively recent version of CMake, so you need to install it manually
-```bash
-wget https://github.com/Kitware/CMake/releases/download/v3.22.0/cmake-3.22.0.tar.gz
-cd cmake-3.22.0
-./bootstrap && make -j8 && sudo make install
-cd ..
+``` r
+install.packages("cuda.ml")
 ```
 
-### Activate the conda env:
-```bash
-. ~/miniconda3/bin/activate
-conda activate rapids-21.08
+And verify that the installed package was linked with real cuML:
+
+``` r
+library(cuda.ml)
+has_cuML()
 ```
 
-### Consider adjusting `LD_LIBRARY_PATH`
+If this returns `TRUE`, {cuda.ml} is using RAPIDS cuML. If it returns `FALSE`,
+the package installed in stub-only mode; check the install output for the first
+missing prerequisite.
 
-The subsequent steps may (or may not) fail without the following:
+### What happens during installation
 
-```bash
-export LD_LIBRARY_PATH=~/miniconda3/envs/rapids-21.08/lib
-```
+The configure script first looks for an existing RAPIDS installation through
+`CUML_PREFIX` or `CUDA_PATH`. If no existing installation is found, and a
+working NVIDIA driver/GPU plus `nvcc` are available, it bootstraps RAPIDS cuML
+from pip wheels into a cache directory and links {cuda.ml} against that prefix.
 
-If you get some error indicating a GLIBC version mismatch in the subsequent
-steps, then please try adjusting `LD_LIBRARY_PATH` as a workaround.
+The bootstrap prefers `uv` when available, then reticulate's managed `uv`, then
+`python -m pip`, `python3 -m pip`, `pip`, and `pip3`.
 
+Useful environment variables:
 
-### Consider enabling ccache
+- `CUDA_HOME`: CUDA Toolkit prefix containing `bin/nvcc`.
+- `CUML_PREFIX`: existing RAPIDS prefix containing `include/cuml` and
+  `lib/libcuml++.so`.
+- `CUML_BOOTSTRAP=0`: disable automatic RAPIDS pip bootstrap.
+- `CUML_BOOTSTRAP_CACHE`: cache directory for bootstrapped RAPIDS headers and
+  libraries.
+- `CUML_PIP_VERSION`: RAPIDS pip wheel version to install.
 
-To speed up recompilation times during development, set this env var:
-```bash
-echo "export CUML4R_ENABLE_CCACHE=1" >> ~/.bashrc
-. ~/.bashrc
-```
+### CRAN and machines without GPUs
 
-### Install {cuda.ml} the R package:
+On CRAN, or on machines without a usable NVIDIA GPU/driver and `nvcc`, {cuda.ml}
+can still install in stub-only mode. In that mode `has_cuML()` returns `FALSE`
+and cuML-backed algorithms are not usable until the system prerequisites are
+installed and {cuda.ml} is reinstalled.
 
-You can install the released version of {cuda.ml} from
-[CRAN](https://CRAN.R-project.org) with:
+### Manual RAPIDS installations
 
-``` r
-install.packages("cuda.ml")
-```
+If you already have RAPIDS cuML from pip, conda, or a source build, set
+`CUML_PREFIX` to a prefix containing `include/cuml` and `lib/libcuml++.so`
+before installing {cuda.ml}. In this case the automatic bootstrap is skipped.
 
-And the development version from [GitHub](https://github.com/) with:
+### Development version
+
+Install the development version from [GitHub](https://github.com/) with:
 
 ``` r
 # install.packages("devtools")
 devtools::install_github("mlverse/cuda.ml")
 ```
 
+To speed up recompilation times during development, set this env var:
+
+```bash
+echo "export CUML4R_ENABLE_CCACHE=1" >> ~/.bashrc
+. ~/.bashrc
+```
+
 
 ## Appendix