From 9f7140b28275385578e1224258f8a98f15f50e5e Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Fri, 24 Apr 2026 11:08:52 -0400 Subject: [PATCH 1/7] Update roxygen --- .Rbuildignore | 3 +++ .gitignore | 2 ++ DESCRIPTION | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.Rbuildignore b/.Rbuildignore index 1ec133a..da5ffe6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -31,3 +31,6 @@ ^libcuml/* ^\.github$ ^\.lsan-suppressions\.txt$ +^\.positai$ +^\.claude$ +^\.codex$ diff --git a/.gitignore b/.gitignore index 6d3278f..1d71690 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ cuda.ml.Rcheck *.cmake *.a 00check.log +.positai +.codex \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 7dea694..eb9abb5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,7 +45,7 @@ Suggests: xgboost LinkingTo: Rcpp Encoding: UTF-8 -RoxygenNote: 7.1.2 +RoxygenNote: 7.3.3 OS_type: unix SystemRequirements: RAPIDS cuML (see https://rapids.ai/start.html) NeedsCompilation: yes From 95a688129a851caa99db11d6f02c5e98cbe5a8b9 Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Fri, 24 Apr 2026 11:22:19 -0400 Subject: [PATCH 2/7] export S3 methods --- NAMESPACE | 18 ++++++++++++++++++ R/model.R | 4 ++++ R/pca.R | 2 ++ R/rand_forest.R | 2 ++ R/rand_proj.R | 2 ++ R/svm.R | 6 ++++++ R/umap.R | 2 ++ man/cuda.ml.Rd | 12 ------------ 8 files changed, 36 insertions(+), 12 deletions(-) delete mode 100644 man/cuda.ml.Rd diff --git a/NAMESPACE b/NAMESPACE index 33ff35f..5a409d0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,15 @@ S3method(cuda_ml_elastic_net,default) S3method(cuda_ml_elastic_net,formula) S3method(cuda_ml_elastic_net,matrix) S3method(cuda_ml_elastic_net,recipe) +S3method(cuda_ml_get_state,cuda_ml_model) +S3method(cuda_ml_get_state,cuda_ml_pca) +S3method(cuda_ml_get_state,cuda_ml_rand_forest) +S3method(cuda_ml_get_state,cuda_ml_rand_proj_model) +S3method(cuda_ml_get_state,cuda_ml_svc) +S3method(cuda_ml_get_state,cuda_ml_svc_ovr) +S3method(cuda_ml_get_state,cuda_ml_svr) +S3method(cuda_ml_get_state,cuda_ml_umap) +S3method(cuda_ml_get_state,default) S3method(cuda_ml_inverse_transform,cuda_ml_pca) S3method(cuda_ml_inverse_transform,cuda_ml_tsvd) S3method(cuda_ml_is_classifier,cuda_ml_model) @@ -43,6 +52,15 @@ S3method(cuda_ml_ridge,matrix) S3method(cuda_ml_ridge,recipe) S3method(cuda_ml_serialize,cuda_ml_model) S3method(cuda_ml_serialize,default) +S3method(cuda_ml_set_state,cuda_ml_model_state) +S3method(cuda_ml_set_state,cuda_ml_pca_model_state) +S3method(cuda_ml_set_state,cuda_ml_rand_forest_model_state) +S3method(cuda_ml_set_state,cuda_ml_rand_proj_model_state) +S3method(cuda_ml_set_state,cuda_ml_svc_model_state) +S3method(cuda_ml_set_state,cuda_ml_svc_ovr_model_state) +S3method(cuda_ml_set_state,cuda_ml_svr_model_state) +S3method(cuda_ml_set_state,cuda_ml_umap_model_state) +S3method(cuda_ml_set_state,default) S3method(cuda_ml_sgd,data.frame) S3method(cuda_ml_sgd,default) S3method(cuda_ml_sgd,formula) diff --git a/R/model.R b/R/model.R index 584ef06..da70baf 100644 --- a/R/model.R +++ b/R/model.R @@ -184,6 +184,7 @@ cuda_ml_get_state <- function(model) { UseMethod("cuda_ml_get_state") } +#' @export cuda_ml_get_state.default <- function(model) { stop( "Model of type '", paste(class(model), collapse = " "), "' does not ", @@ -191,6 +192,7 @@ cuda_ml_get_state.default <- function(model) { ) } +#' @export cuda_ml_get_state.cuda_ml_model <- function(model) { # Default implementation: assume the entire model object can be serializabled # by `base::serialize()`. @@ -199,6 +201,7 @@ cuda_ml_get_state.cuda_ml_model <- function(model) { new_model_state(model_state, cls = NULL) } +#' @export cuda_ml_set_state.cuda_ml_model_state <- function(model_state) { # Default implementation: assume the entire model state can be unserialized by # `base::unserialize()`. @@ -233,6 +236,7 @@ cuda_ml_set_state <- function(model_state) { UseMethod("cuda_ml_set_state") } +#' @export cuda_ml_set_state.default <- function(model_state) { stop( "No unserialization routine found for model state of type '", diff --git a/R/pca.R b/R/pca.R index db2bda2..fb3a974 100644 --- a/R/pca.R +++ b/R/pca.R @@ -78,12 +78,14 @@ cuda_ml_inverse_transform.cuda_ml_pca <- function(model, x, ...) { .pca_inverse_transform(model = model, x = as.matrix(x)) } +#' @export cuda_ml_get_state.cuda_ml_pca <- function(model) { model_state <- .pca_get_state(model) new_model_state(model_state, "cuda_ml_pca_model_state") } +#' @export cuda_ml_set_state.cuda_ml_pca_model_state <- function(model_state) { model_state <- .pca_set_state(model_state) diff --git a/R/rand_forest.R b/R/rand_forest.R index 9a64338..6d7380b 100644 --- a/R/rand_forest.R +++ b/R/rand_forest.R @@ -329,6 +329,7 @@ cuda_ml_rand_forest_impl_regression <- function(processed, mtry, trees, min_n, ) } +#' @export cuda_ml_get_state.cuda_ml_rand_forest <- function(model) { get_state_impl <- switch(model$mode, classification = .rf_classifier_get_state, @@ -344,6 +345,7 @@ cuda_ml_get_state.cuda_ml_rand_forest <- function(model) { new_model_state(model_state, "cuda_ml_rand_forest_model_state") } +#' @export cuda_ml_set_state.cuda_ml_rand_forest_model_state <- function(model_state) { set_state_impl <- switch(model_state$mode, classification = .rf_classifier_set_state, diff --git a/R/rand_proj.R b/R/rand_proj.R index 91cf8a5..475ebec 100644 --- a/R/rand_proj.R +++ b/R/rand_proj.R @@ -79,12 +79,14 @@ cuda_ml_transform.cuda_ml_rand_proj_model <- function(model, x, ...) { .rproj_transform(model$rproj_ctx, as.matrix(x)) } +#' @export cuda_ml_get_state.cuda_ml_rand_proj_model <- function(model) { model_state <- .rproj_get_state(model$rproj_ctx) new_model_state(model_state, "cuda_ml_rand_proj_model_state") } +#' @export cuda_ml_set_state.cuda_ml_rand_proj_model_state <- function(model_state) { model_obj <- .rproj_set_state(model_state) diff --git a/R/svm.R b/R/svm.R index 82df814..31c0c8c 100644 --- a/R/svm.R +++ b/R/svm.R @@ -313,6 +313,7 @@ cuda_ml_svm_classification_multiclass_impl <- function(processed, cost, kernel, ) } +#' @export cuda_ml_get_state.cuda_ml_svc_ovr <- function(model) { model_state <- list( ovr_model_states = lapply(model$xptr, function(x) cuda_ml_get_state(x)), @@ -322,6 +323,7 @@ cuda_ml_get_state.cuda_ml_svc_ovr <- function(model) { new_model_state(model_state, "cuda_ml_svc_ovr_model_state") } +#' @export cuda_ml_set_state.cuda_ml_svc_ovr_model_state <- function(model_state) { new_model( cls = c("cuda_ml_svc_ovr", "cuda_ml_svm"), @@ -365,6 +367,7 @@ cuda_ml_svm_classification_binary_impl <- function(processed, cost, kernel, gamm ) } +#' @export cuda_ml_get_state.cuda_ml_svc <- function(model) { model_state <- list( model_state = .svc_get_state(model$xptr), @@ -374,6 +377,7 @@ cuda_ml_get_state.cuda_ml_svc <- function(model) { new_model_state(model_state, "cuda_ml_svc_model_state") } +#' @export cuda_ml_set_state.cuda_ml_svc_model_state <- function(model_state) { new_model( cls = c("cuda_ml_svc", "cuda_ml_svm"), @@ -416,6 +420,7 @@ cuda_ml_svm_regression_impl <- function(processed, cost, kernel, gamma, coef0, ) } +#' @export cuda_ml_get_state.cuda_ml_svr <- function(model) { model_state <- list( model_state = .svr_get_state(model$xptr), @@ -425,6 +430,7 @@ cuda_ml_get_state.cuda_ml_svr <- function(model) { new_model_state(model_state, "cuda_ml_svr_model_state") } +#' @export cuda_ml_set_state.cuda_ml_svr_model_state <- function(model_state) { new_model( cls = c("cuda_ml_svr", "cuda_ml_svm"), diff --git a/R/umap.R b/R/umap.R index 8cd9292..f325aad 100644 --- a/R/umap.R +++ b/R/umap.R @@ -150,12 +150,14 @@ cuda_ml_umap <- function(x, y = NULL, n_components = 2L, n_neighbors = 15L, model } +#' @export cuda_ml_get_state.cuda_ml_umap <- function(model) { model_state <- .umap_get_state(model) new_model_state(model_state, "cuda_ml_umap_model_state") } +#' @export cuda_ml_set_state.cuda_ml_umap_model_state <- function(model_state) { model_obj <- .umap_set_state(model_state) diff --git a/man/cuda.ml.Rd b/man/cuda.ml.Rd deleted file mode 100644 index 8043964..0000000 --- a/man/cuda.ml.Rd +++ /dev/null @@ -1,12 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/package.R -\docType{package} -\name{cuda.ml} -\alias{cuda.ml} -\title{cuda.ml} -\description{ -This package provides a R interface for the RAPIDS cuML library. -} -\author{ -Yitao Li -} From 4eced56808c0deeeb61337ef69d0c4d18c072182 Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Fri, 24 Apr 2026 11:22:51 -0400 Subject: [PATCH 3/7] roxygen updates --- R/package.R | 4 +--- man/cuda.ml-package.Rd | 21 +++++++++++++++++++++ man/cuda_ml_knn.Rd | 20 ++++++++------------ 3 files changed, 30 insertions(+), 15 deletions(-) create mode 100644 man/cuda.ml-package.Rd diff --git a/R/package.R b/R/package.R index 5e2079f..e0ffb36 100644 --- a/R/package.R +++ b/R/package.R @@ -2,12 +2,10 @@ #' #' This package provides a R interface for the RAPIDS cuML library. #' -#' @docType package #' @author Yitao Li #' @import Rcpp -#' @name cuda.ml #' @useDynLib cuda.ml, .registration = TRUE -NULL +"_PACKAGE" .onLoad <- function(libname, pkgname) { register_rand_forest_model(pkgname) diff --git a/man/cuda.ml-package.Rd b/man/cuda.ml-package.Rd new file mode 100644 index 0000000..b43d49e --- /dev/null +++ b/man/cuda.ml-package.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/package.R +\docType{package} +\name{cuda.ml-package} +\alias{cuda.ml} +\alias{cuda.ml-package} +\title{cuda.ml} +\description{ +This package provides a R interface for the RAPIDS cuML library. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://mlverse.github.io/cuda.ml/} + \item Report bugs at \url{https://github.com/mlverse/cuda.ml/issues} +} + +} +\author{ +Yitao Li +} diff --git a/man/cuda_ml_knn.Rd b/man/cuda_ml_knn.Rd index 4d72201..a0ffa76 100644 --- a/man/cuda_ml_knn.Rd +++ b/man/cuda_ml_knn.Rd @@ -17,9 +17,8 @@ cuda_ml_knn(x, ...) x, y, algo = c("brute", "ivfflat", "ivfpq", "ivfsq"), - metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", - "braycurtis", "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", - "correlation"), + metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis", + "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", "correlation"), p = 2, neighbors = 5L, ... @@ -29,9 +28,8 @@ cuda_ml_knn(x, ...) x, y, algo = c("brute", "ivfflat", "ivfpq", "ivfsq"), - metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", - "braycurtis", "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", - "correlation"), + metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis", + "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", "correlation"), p = 2, neighbors = 5L, ... @@ -41,9 +39,8 @@ cuda_ml_knn(x, ...) formula, data, algo = c("brute", "ivfflat", "ivfpq", "ivfsq"), - metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", - "braycurtis", "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", - "correlation"), + metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis", + "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", "correlation"), p = 2, neighbors = 5L, ... @@ -53,9 +50,8 @@ cuda_ml_knn(x, ...) x, data, algo = c("brute", "ivfflat", "ivfpq", "ivfsq"), - metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", - "braycurtis", "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", - "correlation"), + metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis", + "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", "correlation"), p = 2, neighbors = 5L, ... From 0358231eb5e69ed1bd130e75c43abc70b3fe41e3 Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Fri, 24 Apr 2026 11:37:38 -0400 Subject: [PATCH 4/7] update maintainer --- DESCRIPTION | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index eb9abb5..3983761 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,15 +10,15 @@ Authors@R: comment = c(ORCID = "0000-0002-1261-905X")), person(given = "Tomasz", family = "Kalinowski", - role = c("cph", "ctb"), - email = "tomasz.kalinowski@rstudio.com"), + role = c("cre", "cph", "ctb"), + email = "tomasz.kalinowski@posit.co"), person(given = "Daniel", family = "Falbel", - role = c("aut", "cre", "cph"), - email = "daniel@rstudio.com"), + role = c("aut", "cph"), + email = "daniel@posit.co"), person(given = "RStudio", role = c("cph", "fnd"))) -Maintainer: Daniel Falbel +Maintainer: Tomasz Kalinowski Description: R interface for RAPIDS cuML (), a suite of GPU-accelerated machine learning libraries powered by CUDA (). From 7b61eb7eb2671bd13de8f70e5f846e21feefd6f0 Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Tue, 28 Apr 2026 15:26:03 -0400 Subject: [PATCH 5/7] WIP - updates for cuda 12 --- .Rbuildignore | 1 + DESCRIPTION | 4 +- R/cuml_utils.R | 13 +- R/package.R | 38 ++- README.Rmd | 148 +++++------ README.md | 120 +++++---- man/cuda.ml-package.Rd | 19 ++ man/has_cuML.Rd | 13 +- src/CMakeLists.txt.in | 38 ++- src/agglomerative_clustering.cu | 20 +- src/async_utils.cuh | 28 +-- src/cd_fit_impl.cu | 9 +- src/cuml_utils.cpp | 3 - src/dbscan.cu | 15 +- src/device_allocator.cu | 6 + src/device_allocator.h | 6 + src/fil.cu | 5 +- src/fil_utils.cu | 4 + src/fil_utils.h | 4 + src/handle_utils.cu | 7 + src/handle_utils.h | 2 +- src/kmeans.cu | 24 +- src/knn.cu | 32 ++- src/lm.cu | 1 - src/lm_predict.cu | 1 - src/ols_fit_impl.cu | 7 + src/pca.cu | 20 +- src/pinned_host_vector.h | 28 +-- src/preprocessor.h | 8 + src/qn.cu | 46 +++- src/random_forest_classifier.cu | 32 ++- src/random_forest_regressor.cu | 28 ++- src/random_projection.cpp | 25 +- src/random_projection.cu | 5 +- src/ridge_fit_impl.cu | 7 + src/stream_allocator.cu | 2 - src/stubs/treelite/c_api.h | 2 + src/svm_classifier.cu | 15 +- src/svm_regressor.cu | 8 +- src/svm_serde.cu | 59 ++++- src/svm_serde.h | 21 +- src/tsne.cu | 9 +- src/tsvd.cu | 24 +- src/umap.cu | 58 ++++- src/warn_cuml_missing.h | 9 +- tests/testthat/helper-initialize.R | 2 + tools/config/Makefile.cmake.in | 8 + tools/config/cleanup.R | 2 +- tools/config/configure.R | 97 ++++++-- tools/config/utils/bootstrap.R | 379 +++++++++++++++++++++++++++++ tools/config/utils/cuml.R | 99 ++++++-- tools/config/utils/nvcc.R | 102 +++++++- 52 files changed, 1315 insertions(+), 348 deletions(-) create mode 100644 tools/config/Makefile.cmake.in create mode 100644 tools/config/utils/bootstrap.R diff --git a/.Rbuildignore b/.Rbuildignore index da5ffe6..3e514cb 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -10,6 +10,7 @@ ^src/CMakeLists\.txt$ ^src/CMakeCache\.txt$ ^src/CMakeFiles/* +^src/\.cmake-build/* ^src/_deps/* ^src/eval_gpu_archs* ^src/*\.o$ diff --git a/DESCRIPTION b/DESCRIPTION index 3983761..16d9c54 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,5 +47,7 @@ LinkingTo: Rcpp Encoding: UTF-8 RoxygenNote: 7.3.3 OS_type: unix -SystemRequirements: RAPIDS cuML (see https://rapids.ai/start.html) +SystemRequirements: NVIDIA GPU and driver, CUDA Toolkit with nvcc, and uv or + Python/pip for automatic RAPIDS cuML bootstrap. Alternatively, an existing + RAPIDS cuML installation can be provided with CUML_PREFIX. NeedsCompilation: yes diff --git a/R/cuml_utils.R b/R/cuml_utils.R index bd5d431..103383f 100644 --- a/R/cuml_utils.R +++ b/R/cuml_utils.R @@ -4,14 +4,23 @@ #' @return A logical value indicating whether the current installation {cuda.ml} #' was linked to a valid version of the RAPIDS cuML shared library. #' +#' @details +#' If this returns \code{FALSE}, \pkg{cuda.ml} was installed in stub-only mode. +#' On a GPU machine, verify that \code{nvidia-smi} and \code{nvcc --version} +#' both work, then reinstall \pkg{cuda.ml}. During installation, \pkg{cuda.ml} +#' can bootstrap RAPIDS cuML from pip wheels with \code{uv} or Python/pip. If +#' RAPIDS cuML is already installed, set \code{CUML_PREFIX} to a prefix +#' containing \code{include/cuml} and \code{lib/libcuml++.so} before +#' reinstalling. +#' #' @examples #' #' library(cuda.ml) #' #' if (!has_cuML()) { #' warning( -#' "Please install the RAPIDS cuML shared library first, and then re-", -#' "install {cuda.ml}." +#' "This installation was built without RAPIDS cuML. Verify `nvidia-smi` ", +#' "and `nvcc --version`, then reinstall {cuda.ml}." #' ) #' } #' @export diff --git a/R/package.R b/R/package.R index e0ffb36..5de704c 100644 --- a/R/package.R +++ b/R/package.R @@ -2,6 +2,23 @@ #' #' This package provides a R interface for the RAPIDS cuML library. #' +#' @section Installation: +#' A functional GPU installation requires an NVIDIA GPU with a working driver, +#' a CUDA Toolkit installation that provides \code{nvcc}, and normal R package +#' build tools. During installation, \pkg{cuda.ml} first looks for an existing +#' RAPIDS installation through \code{CUML_PREFIX} or \code{CUDA_PATH}. If none +#' is found, it can bootstrap RAPIDS cuML from pip wheels with \code{uv} or +#' Python/pip and link against the resulting local prefix. +#' +#' On machines without a usable NVIDIA driver/GPU and \code{nvcc}, including +#' CRAN check machines, \pkg{cuda.ml} may install in stub-only mode. In that +#' mode \code{has_cuML()} returns \code{FALSE}, and cuML-backed algorithms are +#' unavailable until the system prerequisites are installed and \pkg{cuda.ml} +#' is reinstalled. +#' +#' Useful environment variables include \code{CUDA_HOME}, \code{CUML_PREFIX}, +#' \code{CUML_BOOTSTRAP}, and \code{CUML_BOOTSTRAP_CACHE}. +#' #' @author Yitao Li #' @import Rcpp #' @useDynLib cuda.ml, .registration = TRUE @@ -17,20 +34,15 @@ if (!has_cuML()) { packageStartupMessage( " - The current installation of {", pkgname, "} will not function as expected - because it was not linked with a valid version of the RAPIDS cuML shared - library. + The current installation of {", pkgname, "} was built without a usable + RAPIDS cuML shared library. + + To fix this, ensure `nvidia-smi` and `nvcc --version` both work, then + reinstall {", pkgname, "}. During installation, {", pkgname, "} can + bootstrap RAPIDS cuML from pip wheels with `uv` or Python/pip. - To fix this issue, please follow https://rapids.ai/start.html#get-rapids - to install the RAPIDS cuML shared library from Conda and ensure the - 'CUML_PREFIX' env variable is set to a valid RAPIDS conda env directory - (e.g., '/home/user/anaconda3/envs/rapids-21.06', '/usr', or similar) - during the installation of {", pkgname, "} or alternatively, follow - https://github.com/yitao-li/cuml-installation-notes#build-from-source-without-conda-and-without-multi-gpu-support - or - https://github.com/yitao-li/cuml-installation-notes#build-from-source-without-conda-and-with-multi-gpu-support - or similar to build and install RAPIDS cuML library from source, and - then re-install {", pkgname, "}.\n\n + If RAPIDS is already installed, set `CUML_PREFIX` to a prefix containing + include/cuml and lib/libcuml++.so before reinstalling.\n\n " ) } diff --git a/README.Rmd b/README.Rmd index f8efc88..8089726 100644 --- a/README.Rmd +++ b/README.Rmd @@ -176,115 +176,117 @@ about the MNIST dataset: ## Installation -In order for {cuda.ml} to work as expected, the C++/CUDA source code of -{cuda.ml} must be linked with CUDA runtime and a valid copy of the RAPIDS cuML -library. +For a fully functional installation, {cuda.ml} needs: -Before installing {cuda.ml} itself, it may be worthwhile to take a quick look -through the sub-sections below on how to properly setup all of {cuda.ml}'s -required runtime dependencies. +- an NVIDIA GPU with a working NVIDIA driver; +- a CUDA Toolkit installation that provides `nvcc`; +- normal R package build tools; and +- either `uv` or Python with `pip`. -### Quick note on installing the RAPIDS cuML library: +When those prerequisites are present, {cuda.ml} can bootstrap RAPIDS cuML from +pip wheels during installation. You do not need conda, and you usually do not +need to set `CUML_PREFIX` manually. -Although Conda is the only officially supported distribution channel at the -moment for RAPIDS cuML (i.e., see https://rapids.ai/start.html#get-rapids), -you can still build and install this library from source without relying on -Conda. -See https://github.com/yitao-li/cuml-installation-notes for build-from-source -instructions. +On a new Ubuntu installation, install R/build/Python prerequisites: -### Quick install instructions for Ubuntu 20-04: - -#### Install deps: -``` -sudo apt install -y cmake ccache libblas3 liblapack3 +```bash +sudo apt update +sudo apt install -y r-base-dev build-essential git cmake \ + python3 python3-pip python3-venv ubuntu-drivers-common ``` +Install the NVIDIA driver, reboot, and verify that the driver can see your GPU: -### Install CUDA -(consult https://developer.nvidia.com/cuda-downloads for other platforms) ```bash -wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin -sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 -wget https://developer.download.nvidia.com/compute/cuda/11.4.2/local_installers/cuda-repo-ubuntu2004-11-4-local_11.4.2-470.57.02-1_amd64.deb -sudo dpkg -i cuda-repo-ubuntu2004-11-4-local_11.4.2-470.57.02-1_amd64.deb -sudo apt-key add /var/cuda-repo-ubuntu2004-11-4-local/7fa2af80.pub -sudo apt-get update -sudo apt-get -y install cuda +sudo ubuntu-drivers install +sudo reboot + +nvidia-smi ``` -### Add CUDA executables to path -(nvcc is needed for building the C++/CUDA source code of {cuda.ml}) + +Install a CUDA Toolkit that includes `nvcc`. Use NVIDIA's CUDA Linux +installation guide for your Ubuntu release to add the CUDA apt repository, then: + ```bash -echo "export PATH=$PATH:/usr/local/cuda/bin" >> ~/.bashrc -source ~/.bashrc +sudo apt update +sudo apt install -y cuda-toolkit + +nvcc --version ``` -### Install Miniconda: +If the toolkit is installed but `nvcc` is not on `PATH`, set `CUDA_HOME` to the +toolkit prefix before installing {cuda.ml}, for example: + ```bash -wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -chmod +x Miniconda3-latest-Linux-x86_64.sh -./Miniconda3-latest-Linux-x86_64.sh -b -# consult https://rapids.ai/start.html for alternatives +export CUDA_HOME=/usr/local/cuda ``` -### Create and configure the conda env -``` -# This is a relatively big download, may take a while -~/miniconda3/bin/conda create -n rapids-21.08 -c rapidsai -c nvidia -c conda-forge \ - rapids-blazing=21.08 python=3.8 cudatoolkit=11.2 -``` +Then install {cuda.ml}: -### Install cmake -CUDA dependencies require a relatively recent version of CMake, so you need to install it manually -```bash -wget https://github.com/Kitware/CMake/releases/download/v3.22.0/cmake-3.22.0.tar.gz -cd cmake-3.22.0 -./bootstrap && make -j8 && sudo make install -cd .. +``` r +install.packages("cuda.ml") ``` -### Activate the conda env: -```bash -. ~/miniconda3/bin/activate -conda activate rapids-21.08 +And verify that the installed package was linked with real cuML: + +``` r +library(cuda.ml) +has_cuML() ``` -### Consider adjusting `LD_LIBRARY_PATH` +If this returns `TRUE`, {cuda.ml} is using RAPIDS cuML. If it returns `FALSE`, +the package installed in stub-only mode; check the install output for the first +missing prerequisite. -The subsequent steps may (or may not) fail without the following: +### What happens during installation -```bash -export LD_LIBRARY_PATH=~/miniconda3/envs/rapids-21.08/lib -``` +The configure script first looks for an existing RAPIDS installation through +`CUML_PREFIX` or `CUDA_PATH`. If no existing installation is found, and a +working NVIDIA driver/GPU plus `nvcc` are available, it bootstraps RAPIDS cuML +from pip wheels into a cache directory and links {cuda.ml} against that prefix. -If you get some error indicating a GLIBC version mismatch in the subsequent -steps, then please try adjusting `LD_LIBRARY_PATH` as a workaround. +The bootstrap prefers `uv` when available, then reticulate's managed `uv`, then +`python -m pip`, `python3 -m pip`, `pip`, and `pip3`. +Useful environment variables: -### Consider enabling ccache +- `CUDA_HOME`: CUDA Toolkit prefix containing `bin/nvcc`. +- `CUML_PREFIX`: existing RAPIDS prefix containing `include/cuml` and + `lib/libcuml++.so`. +- `CUML_BOOTSTRAP=0`: disable automatic RAPIDS pip bootstrap. +- `CUML_BOOTSTRAP_CACHE`: cache directory for bootstrapped RAPIDS headers and + libraries. +- `CUML_PIP_VERSION`: RAPIDS pip wheel version to install. -To speed up recompilation times during development, set this env var: -```bash -echo "export CUML4R_ENABLE_CCACHE=1" >> ~/.bashrc -. ~/.bashrc -``` +### CRAN and machines without GPUs -### Install {cuda.ml} the R package: +On CRAN, or on machines without a usable NVIDIA GPU/driver and `nvcc`, {cuda.ml} +can still install in stub-only mode. In that mode `has_cuML()` returns `FALSE` +and cuML-backed algorithms are not usable until the system prerequisites are +installed and {cuda.ml} is reinstalled. -You can install the released version of {cuda.ml} from -[CRAN](https://CRAN.R-project.org) with: +### Manual RAPIDS installations -``` r -install.packages("cuda.ml") -``` +If you already have RAPIDS cuML from pip, conda, or a source build, set +`CUML_PREFIX` to a prefix containing `include/cuml` and `lib/libcuml++.so` +before installing {cuda.ml}. In this case the automatic bootstrap is skipped. -And the development version from [GitHub](https://github.com/) with: +### Development version + +Install the development version from [GitHub](https://github.com/) with: ``` r # install.packages("devtools") devtools::install_github("mlverse/cuda.ml") ``` +To speed up recompilation times during development, set this env var: + +```bash +echo "export CUML4R_ENABLE_CCACHE=1" >> ~/.bashrc +. ~/.bashrc +``` + ## Appendix diff --git a/README.md b/README.md index 52460e0..277195c 100644 --- a/README.md +++ b/README.md @@ -263,110 +263,100 @@ From this type of visualization, we can qualitatively understand the following a ## Installation -In order for {cuda.ml} to work as expected, the C++/CUDA source code of {cuda.ml} must be linked with CUDA runtime and a valid copy of the RAPIDS cuML library. +For a fully functional installation, {cuda.ml} needs: -Before installing {cuda.ml} itself, it may be worthwhile to take a quick look through the sub-sections below on how to properly setup all of {cuda.ml}'s required runtime dependencies. +- an NVIDIA GPU with a working NVIDIA driver; +- a CUDA Toolkit installation that provides `nvcc`; +- normal R package build tools; and +- either `uv` or Python with `pip`. -### Quick note on installing the RAPIDS cuML library: +When those prerequisites are present, {cuda.ml} can bootstrap RAPIDS cuML from pip wheels during installation. You do not need conda, and you usually do not need to set `CUML_PREFIX` manually. -Although Conda is the only officially supported distribution channel at the moment for RAPIDS cuML (i.e., see ), you can still build and install this library from source without relying on Conda. See for build-from-source instructions. - -### Quick install instructions for Ubuntu 20-04: - -#### Install deps: - - sudo apt install -y cmake ccache libblas3 liblapack3 - -### Install CUDA - -(consult for other platforms) +On a new Ubuntu installation, install R/build/Python prerequisites: ``` bash -wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin -sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 -wget https://developer.download.nvidia.com/compute/cuda/11.4.2/local_installers/cuda-repo-ubuntu2004-11-4-local_11.4.2-470.57.02-1_amd64.deb -sudo dpkg -i cuda-repo-ubuntu2004-11-4-local_11.4.2-470.57.02-1_amd64.deb -sudo apt-key add /var/cuda-repo-ubuntu2004-11-4-local/7fa2af80.pub -sudo apt-get update -sudo apt-get -y install cuda +sudo apt update +sudo apt install -y r-base-dev build-essential git cmake \ + python3 python3-pip python3-venv ubuntu-drivers-common ``` -### Add CUDA executables to path - -(nvcc is needed for building the C++/CUDA source code of {cuda.ml}) +Install the NVIDIA driver, reboot, and verify that the driver can see your GPU: ``` bash -echo "export PATH=$PATH:/usr/local/cuda/bin" >> ~/.bashrc -source ~/.bashrc +sudo ubuntu-drivers install +sudo reboot + +nvidia-smi ``` -### Install Miniconda: +Install a CUDA Toolkit that includes `nvcc`. Use NVIDIA's CUDA Linux installation guide for your Ubuntu release to add the CUDA apt repository, then: ``` bash -wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -chmod +x Miniconda3-latest-Linux-x86_64.sh -./Miniconda3-latest-Linux-x86_64.sh -b -# consult https://rapids.ai/start.html for alternatives -``` +sudo apt update +sudo apt install -y cuda-toolkit -### Create and configure the conda env +nvcc --version +``` - # This is a relatively big download, may take a while - ~/miniconda3/bin/conda create -n rapids-21.08 -c rapidsai -c nvidia -c conda-forge \ - rapids-blazing=21.08 python=3.8 cudatoolkit=11.2 +If the toolkit is installed but `nvcc` is not on `PATH`, set `CUDA_HOME` to the toolkit prefix before installing {cuda.ml}, for example: -### Install cmake +``` bash +export CUDA_HOME=/usr/local/cuda +``` -CUDA dependencies require a relatively recent version of CMake, so you need to install it manually +Then install {cuda.ml}: -``` bash -wget https://github.com/Kitware/CMake/releases/download/v3.22.0/cmake-3.22.0.tar.gz -cd cmake-3.22.0 -./bootstrap && make -j8 && sudo make install -cd .. +``` r +install.packages("cuda.ml") ``` -### Activate the conda env: +And verify that the installed package was linked with real cuML: -``` bash -. ~/miniconda3/bin/activate -conda activate rapids-21.08 +``` r +library(cuda.ml) +has_cuML() ``` -### Consider adjusting `LD_LIBRARY_PATH` +If this returns `TRUE`, {cuda.ml} is using RAPIDS cuML. If it returns `FALSE`, the package installed in stub-only mode; check the install output for the first missing prerequisite. -The subsequent steps may (or may not) fail without the following: +### What happens during installation -``` bash -export LD_LIBRARY_PATH=~/miniconda3/envs/rapids-21.08/lib -``` +The configure script first looks for an existing RAPIDS installation through `CUML_PREFIX` or `CUDA_PATH`. If no existing installation is found, and a working NVIDIA driver/GPU plus `nvcc` are available, it bootstraps RAPIDS cuML from pip wheels into a cache directory and links {cuda.ml} against that prefix. -If you get some error indicating a GLIBC version mismatch in the subsequent steps, then please try adjusting `LD_LIBRARY_PATH` as a workaround. +The bootstrap prefers `uv` when available, then reticulate's managed `uv`, then `python -m pip`, `python3 -m pip`, `pip`, and `pip3`. -### Consider enabling ccache +Useful environment variables: -To speed up recompilation times during development, set this env var: +- `CUDA_HOME`: CUDA Toolkit prefix containing `bin/nvcc`. +- `CUML_PREFIX`: existing RAPIDS prefix containing `include/cuml` and `lib/libcuml++.so`. +- `CUML_BOOTSTRAP=0`: disable automatic RAPIDS pip bootstrap. +- `CUML_BOOTSTRAP_CACHE`: cache directory for bootstrapped RAPIDS headers and libraries. +- `CUML_PIP_VERSION`: RAPIDS pip wheel version to install. -``` bash -echo "export CUML4R_ENABLE_CCACHE=1" >> ~/.bashrc -. ~/.bashrc -``` +### CRAN and machines without GPUs -### Install {cuda.ml} the R package: +On CRAN, or on machines without a usable NVIDIA GPU/driver and `nvcc`, {cuda.ml} can still install in stub-only mode. In that mode `has_cuML()` returns `FALSE` and cuML-backed algorithms are not usable until the system prerequisites are installed and {cuda.ml} is reinstalled. -You can install the released version of {cuda.ml} from [CRAN](https://CRAN.R-project.org) with: +### Manual RAPIDS installations -``` r -install.packages("cuda.ml") -``` +If you already have RAPIDS cuML from pip, conda, or a source build, set `CUML_PREFIX` to a prefix containing `include/cuml` and `lib/libcuml++.so` before installing {cuda.ml}. In this case the automatic bootstrap is skipped. -And the development version from [GitHub](https://github.com/) with: +### Development version + +Install the development version from [GitHub](https://github.com/) with: ``` r # install.packages("devtools") devtools::install_github("mlverse/cuda.ml") ``` +To speed up recompilation times during development, set this env var: + +``` bash +echo "export CUML4R_ENABLE_CCACHE=1" >> ~/.bashrc +. ~/.bashrc +``` + ## Appendix
Inspect MNIST images diff --git a/man/cuda.ml-package.Rd b/man/cuda.ml-package.Rd index b43d49e..8502f4d 100644 --- a/man/cuda.ml-package.Rd +++ b/man/cuda.ml-package.Rd @@ -8,6 +8,25 @@ \description{ This package provides a R interface for the RAPIDS cuML library. } +\section{Installation}{ + +A functional GPU installation requires an NVIDIA GPU with a working driver, +a CUDA Toolkit installation that provides \code{nvcc}, and normal R package +build tools. During installation, \pkg{cuda.ml} first looks for an existing +RAPIDS installation through \code{CUML_PREFIX} or \code{CUDA_PATH}. If none +is found, it can bootstrap RAPIDS cuML from pip wheels with \code{uv} or +Python/pip and link against the resulting local prefix. + +On machines without a usable NVIDIA driver/GPU and \code{nvcc}, including +CRAN check machines, \pkg{cuda.ml} may install in stub-only mode. In that +mode \code{has_cuML()} returns \code{FALSE}, and cuML-backed algorithms are +unavailable until the system prerequisites are installed and \pkg{cuda.ml} +is reinstalled. + +Useful environment variables include \code{CUDA_HOME}, \code{CUML_PREFIX}, +\code{CUML_BOOTSTRAP}, and \code{CUML_BOOTSTRAP_CACHE}. +} + \seealso{ Useful links: \itemize{ diff --git a/man/has_cuML.Rd b/man/has_cuML.Rd index a78075c..b6a7fe9 100644 --- a/man/has_cuML.Rd +++ b/man/has_cuML.Rd @@ -15,14 +15,23 @@ A logical value indicating whether the current installation {cuda.ml} Determine whether {cuda.ml} was linked to a valid version of the RAPIDS cuML shared library. } +\details{ +If this returns \code{FALSE}, \pkg{cuda.ml} was installed in stub-only mode. +On a GPU machine, verify that \code{nvidia-smi} and \code{nvcc --version} +both work, then reinstall \pkg{cuda.ml}. During installation, \pkg{cuda.ml} +can bootstrap RAPIDS cuML from pip wheels with \code{uv} or Python/pip. If +RAPIDS cuML is already installed, set \code{CUML_PREFIX} to a prefix +containing \code{include/cuml} and \code{lib/libcuml++.so} before +reinstalling. +} \examples{ library(cuda.ml) if (!has_cuML()) { warning( - "Please install the RAPIDS cuML shared library first, and then re-", - "install {cuda.ml}." + "This installation was built without RAPIDS cuML. Verify `nvidia-smi` ", + "and `nvcc --version`, then reinstall {cuda.ml}." ) } } diff --git a/src/CMakeLists.txt.in b/src/CMakeLists.txt.in index 88e08a7..0149509 100644 --- a/src/CMakeLists.txt.in +++ b/src/CMakeLists.txt.in @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR) -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") @@ -17,6 +17,7 @@ FetchContent_Declare( rapids-cmake GIT_REPOSITORY https://github.com/rapidsai/rapids-cmake.git GIT_TAG origin/branch-21.10 + UPDATE_DISCONNECTED TRUE ) FetchContent_MakeAvailable(rapids-cmake) include(rapids-cuda) @@ -34,7 +35,7 @@ endif(DEFINED ENV{CUML4R_ENABLE_CCACHE}) if(DEFINED CUML_INCLUDE_DIR) # CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES is needed so that cuda_runtime.h is found # CUML_INCLUDE_DIR is needed so that kmeans/kmeans_c.h is found - set(CUML4R_INCLUDE_DIRS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUML_INCLUDE_DIR}) + set(CUML4R_INCLUDE_DIRS ${CUML_INCLUDE_DIR} ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) else() message(FATAL_ERROR "CUML_INCLUDE_DIR not specified.") endif(DEFINED CUML_INCLUDE_DIR) @@ -50,6 +51,20 @@ include_directories(@RCPP_INCLUDE_DIR@) include_directories(${TREELITE_C_API_INCLUDE_DIR}) +set(CUML4R_RPROJ_C_API_FOUND FALSE) +foreach(CUML4R_INC_DIR IN LISTS CUML4R_INCLUDE_DIRS) + if(EXISTS "${CUML4R_INC_DIR}/cuml/random_projection/rproj_c.h") + set(CUML4R_RPROJ_C_API_FOUND TRUE) + endif() +endforeach() +if(NOT CUML4R_RPROJ_C_API_FOUND) + message( + STATUS + "cuML random projection C API headers were not found; random projection support will be disabled." + ) + add_definitions(-DCUML4R_RPROJ_C_API_MISSING) +endif(NOT CUML4R_RPROJ_C_API_FOUND) + if(DEFINED ENV{CUML4R_ENABLE_ASAN}) if($ENV{CUML4R_ENABLE_ASAN} MATCHES "true") add_compile_options(-fno-omit-frame-pointer -fsanitize-recover=address) @@ -121,24 +136,25 @@ add_library( # Need to set linker language to CUDA to link the CUDA Runtime set_target_properties(cuda.ml PROPERTIES LINKER_LANGUAGE "CUDA") set_target_properties(cuda.ml PROPERTIES PREFIX "") +target_compile_options( + cuda.ml + PRIVATE + $<$:-Wno-deprecated-declarations> + $<$:-Xcompiler=-Wno-deprecated-declarations> +) set(CUML4R_LIBS cuda.ml PRIVATE cuml++ cublas cusolver cudart cusparse) -find_package(Treelite) +find_package(Treelite QUIET) if(Treelite_FOUND) set(CUML4R_LIBS ${CUML4R_LIBS} treelite::treelite treelite::treelite_runtime) set(CUML4R_INCLUDE_DIRS ${CUML4R_INCLUDE_DIRS} ${Treelite_INCLUDE_DIRS}) else() message( - WARNING - " - Unable to locate 'TreeLite' using CMake. Forest Inference Library (FIL) - functionalities from {cuda.ml} will be disabled! - - Please install the treelite C API and re-install {cuda.ml} if you want to - enable FIL functionalities. - " + STATUS + "Treelite was not found; Forest Inference Library (FIL) support will be disabled." ) + add_definitions(-DCUML4R_TREELITE_C_API_MISSING) set( CUML4R_INCLUDE_DIRS ${CUML4R_INCLUDE_DIRS} ${CUML_STUB_HEADERS_DIR} ) diff --git a/src/agglomerative_clustering.cu b/src/agglomerative_clustering.cu index d5bb06f..cf24067 100644 --- a/src/agglomerative_clustering.cu +++ b/src/agglomerative_clustering.cu @@ -6,9 +6,9 @@ #include "preprocessor.h" #include "stream_allocator.h" -#include -#include #include +#include +#include #include @@ -38,9 +38,18 @@ __host__ Rcpp::List agglomerative_clustering(Rcpp::NumericMatrix const& x, async_copy(stream_view.value(), h_x.cbegin(), h_x.cend(), d_x.begin()); // single-linkage hierarchical clustering output - auto out = std::make_unique>(); thrust::device_vector d_labels(n_samples); thrust::device_vector d_children((n_samples - 1) * 2); + +#if CUML_VERSION_MAJOR >= 24 + ML::linkage::single_linkage( + handle, /*X=*/d_x.data().get(), /*n_rows=*/n_samples, + /*n_cols=*/n_features, /*n_clusters=*/n_clusters, + /*metric=*/static_cast(metric), + /*children=*/d_children.data().get(), /*labels=*/d_labels.data().get(), + /*use_knn=*/!pairwise_conn, /*c=*/n_neighbors); +#else + auto out = std::make_unique>(); out->labels = d_labels.data().get(); out->children = d_children.data().get(); @@ -56,6 +65,7 @@ __host__ Rcpp::List agglomerative_clustering(Rcpp::NumericMatrix const& x, /*metric=*/static_cast(metric), /*c=*/n_neighbors, n_clusters); } +#endif CUDA_RT_CALL(cudaStreamSynchronize(stream_view.value())); @@ -69,7 +79,11 @@ __host__ Rcpp::List agglomerative_clustering(Rcpp::NumericMatrix const& x, CUDA_RT_CALL(cudaStreamSynchronize(stream_view.value())); +#if CUML_VERSION_MAJOR >= 24 + result["n_clusters"] = n_clusters; +#else result["n_clusters"] = out->n_clusters; +#endif result["children"] = Rcpp::transpose(Rcpp::IntegerMatrix(2, n_samples - 1, h_children.begin())); result["labels"] = Rcpp::IntegerVector(h_labels.cbegin(), h_labels.cend()); diff --git a/src/async_utils.cuh b/src/async_utils.cuh index ed9050a..2a6d0a4 100644 --- a/src/async_utils.cuh +++ b/src/async_utils.cuh @@ -4,33 +4,21 @@ #include "cuda_utils.h" #include "preprocessor.h" -#include "unique_marker.cuh" -#include -#include - -#include +#include +#include namespace cuml4r { -// To ensure the correct async behavior, an `AsyncCopyCtx` object must be -// destroyed after the stream associated with the copy operation is -// synchronized, not before. -struct AsyncCopyCtx { - thrust::system::cuda::unique_eager_event event; - unique_marker marker; -}; +struct AsyncCopyCtx {}; // perform a copy operation that is asynchronous with respect to the host // and synchronous with respect to the stream specified -template -__host__ CUML4R_NODISCARD auto async_copy(cudaStream_t stream, Args&&... args) { - auto e = thrust::async::copy(std::forward(args)...); - auto& s = e.stream(); - unique_marker m; - CUDA_RT_CALL(cudaEventRecord(m.get(), s.get())); - CUDA_RT_CALL(cudaStreamWaitEvent(stream, m.get(), cudaEventWaitDefault)); - return AsyncCopyCtx{std::move(e), std::move(m)}; +template +__host__ CUML4R_NODISCARD auto async_copy( + cudaStream_t stream, InputIt first, InputIt last, OutputIt result) { + thrust::copy(thrust::cuda::par.on(stream), first, last, result); + return AsyncCopyCtx{}; } } // namespace cuml4r diff --git a/src/cd_fit_impl.cu b/src/cd_fit_impl.cu index c6e052e..6c8e67d 100644 --- a/src/cd_fit_impl.cu +++ b/src/cd_fit_impl.cu @@ -1,6 +1,8 @@ #include "lm_params.h" +#include "preprocessor.h" #include +#include namespace cuml4r { namespace detail { @@ -14,8 +16,13 @@ __host__ void cd_fit_impl(raft::handle_t& handle, lm::Params const& params, /*labels=*/params.d_labels, /*coef=*/params.d_coef, /*intercept=*/params.intercept, /*fit_intercept=*/params.fit_intercept, +#if (CUML4R_LIBCUML_VERSION(CUML_VERSION_MAJOR, CUML_VERSION_MINOR) < \ + CUML4R_LIBCUML_VERSION(24, 0)) /*normalize=*/params.normalize_input, epochs, loss, alpha, - l1_ratio, shuffle, tol); +#else + epochs, +#endif + loss, alpha, l1_ratio, shuffle, tol); } } // namespace detail diff --git a/src/cuml_utils.cpp b/src/cuml_utils.cpp index 4f07355..86a06db 100644 --- a/src/cuml_utils.cpp +++ b/src/cuml_utils.cpp @@ -4,9 +4,6 @@ #include -static_assert(CUML_VERSION_MAJOR == 21, - "{cuda.ml} currently only supports linking to RAPIDS cuML 21.x!"); - #endif #include diff --git a/src/dbscan.cu b/src/dbscan.cu index e385995..7c3a534 100644 --- a/src/dbscan.cu +++ b/src/dbscan.cu @@ -5,9 +5,9 @@ #include "preprocessor.h" #include "stream_allocator.h" -#include -#include #include +#include +#include #include @@ -41,10 +41,21 @@ __host__ Rcpp::List dbscan(Rcpp::NumericMatrix const& x, int const min_pts, ML::Dbscan::fit(handle, /*input=*/d_src_data.data().get(), /*n_rows=*/n_samples, /*n_cols=*/n_features, eps, min_pts, +#if (CUML4R_LIBCUML_VERSION(CUML_VERSION_MAJOR, CUML_VERSION_MINOR) >= \ + CUML4R_LIBCUML_VERSION(24, 0)) + /*metric=*/ML::distance::DistanceType::L2SqrtUnexpanded, + /*labels=*/d_labels.data().get(), + /*core_sample_indices=*/nullptr, /*sample_weight=*/nullptr, + max_bytes_per_batch, /*eps_nn_method=*/ML::Dbscan::BRUTE_FORCE, + /*verbosity=*/static_cast( + verbosity), + /*opg=*/false); +#else /*metric=*/raft::distance::L2SqrtUnexpanded, /*labels=*/d_labels.data().get(), /*core_sample_indices=*/nullptr, max_bytes_per_batch, /*verbosity=*/verbosity, /*opg=*/false); +#endif CUDA_RT_CALL(cudaStreamSynchronize(stream_view.value())); diff --git a/src/device_allocator.cu b/src/device_allocator.cu index fe13909..139bc2d 100644 --- a/src/device_allocator.cu +++ b/src/device_allocator.cu @@ -2,6 +2,10 @@ #include "device_allocator.h" +#include + +#if CUML_VERSION_MAJOR < 24 + #include namespace { @@ -19,6 +23,8 @@ __host__ std::shared_ptr getDeviceAllocator() { } // namespace cuml4r +#endif + #else #include "warn_cuml_missing.h" diff --git a/src/device_allocator.h b/src/device_allocator.h index 124c3b1..098639d 100644 --- a/src/device_allocator.h +++ b/src/device_allocator.h @@ -2,6 +2,10 @@ #ifdef HAS_CUML +#include + +#if CUML_VERSION_MAJOR < 24 + #include namespace raft { @@ -20,6 +24,8 @@ std::shared_ptr getDeviceAllocator(); } // namespace cuml4r +#endif + #else #include "warn_cuml_missing.h" diff --git a/src/fil.cu b/src/fil.cu index 1545177..b6d07e9 100644 --- a/src/fil.cu +++ b/src/fil.cu @@ -8,8 +8,9 @@ #include "stream_allocator.h" #include "treelite_utils.cuh" +#ifndef CUML4R_TREELITE_C_API_MISSING + #include -#include #include #include @@ -172,3 +173,5 @@ __host__ Rcpp::NumericMatrix fil_predict( } } // namespace cuml4r + +#endif diff --git a/src/fil_utils.cu b/src/fil_utils.cu index e36d501..32ea9fb 100644 --- a/src/fil_utils.cu +++ b/src/fil_utils.cu @@ -1,5 +1,7 @@ #include "fil_utils.h" +#ifndef CUML4R_TREELITE_C_API_MISSING + namespace cuml4r { namespace fil { @@ -19,3 +21,5 @@ __host__ forest_uptr make_forest(raft::handle_t const& handle, } // namespace fil } // namespace cuml4r + +#endif diff --git a/src/fil_utils.h b/src/fil_utils.h index a5702d0..4973bd4 100644 --- a/src/fil_utils.h +++ b/src/fil_utils.h @@ -1,5 +1,7 @@ #pragma once +#ifndef CUML4R_TREELITE_C_API_MISSING + #include #include @@ -26,3 +28,5 @@ forest_uptr make_forest(raft::handle_t const& handle, } // namespace fil } // namespace cuml4r + +#endif diff --git a/src/handle_utils.cu b/src/handle_utils.cu index 9c61b7b..28d36c2 100644 --- a/src/handle_utils.cu +++ b/src/handle_utils.cu @@ -3,6 +3,9 @@ #ifdef HAS_CUML +#include +#include + namespace cuml4r { namespace handle_utils { @@ -11,7 +14,11 @@ __host__ void initializeHandle(raft::handle_t& handle, if (stream_view.value() == 0) { stream_view = stream_allocator::getOrCreateStream(); } +#if CUML_VERSION_MAJOR >= 24 + raft::resource::set_cuda_stream(handle, stream_view); +#else handle.set_stream(stream_view.value()); +#endif } } // namespace handle_utils diff --git a/src/handle_utils.h b/src/handle_utils.h index f00d622..bba115a 100644 --- a/src/handle_utils.h +++ b/src/handle_utils.h @@ -2,7 +2,7 @@ #ifdef HAS_CUML -#include +#include #include namespace cuml4r { diff --git a/src/kmeans.cu b/src/kmeans.cu index a3357b7..ad0c0b7 100644 --- a/src/kmeans.cu +++ b/src/kmeans.cu @@ -6,9 +6,9 @@ #include "preprocessor.h" #include "stream_allocator.h" -#include -#include #include +#include +#include #include @@ -35,8 +35,15 @@ __host__ Rcpp::List kmeans(Rcpp::NumericMatrix const& x, int const k, params.inertia_check = true; } params.init = static_cast(init_method); +#if (CUML4R_LIBCUML_VERSION(CUML_VERSION_MAJOR, CUML_VERSION_MINOR) >= \ + CUML4R_LIBCUML_VERSION(24, 0)) + params.rng_state = raft::random::RngState( + seed, raft::random::GeneratorType::GenPhilox); + params.verbosity = static_cast(verbosity); +#else params.seed = seed; params.verbosity = verbosity; +#endif auto stream_view = stream_allocator::getOrCreateStream(); raft::handle_t handle; @@ -53,7 +60,7 @@ __host__ Rcpp::List kmeans(Rcpp::NumericMatrix const& x, int const k, // kmeans outputs thrust::device_vector d_pred_centroids(n_centroid_values); - AsyncCopyCtx centroids_h2d; + CUML4R_MAYBE_UNUSED AsyncCopyCtx centroids_h2d; if (params.init == ML::kmeans::KMeansParams::InitMethod::Array) { auto const m_centroids = Matrix<>(centroids, /*transpose=*/false); auto const& h_centroids = m_centroids.values; @@ -64,9 +71,20 @@ __host__ Rcpp::List kmeans(Rcpp::NumericMatrix const& x, int const k, double inertia = 0; int n_iter = 0; +#if (CUML4R_LIBCUML_VERSION(CUML_VERSION_MAJOR, CUML_VERSION_MINOR) >= \ + CUML4R_LIBCUML_VERSION(24, 0)) + ML::kmeans::fit(handle, params, d_src_data.data().get(), n_samples, + n_features, /*sample_weight=*/nullptr, + d_pred_centroids.data().get(), inertia, n_iter); + ML::kmeans::predict(handle, params, d_pred_centroids.data().get(), + d_src_data.data().get(), n_samples, n_features, + /*sample_weight=*/nullptr, /*normalize_weights=*/false, + d_pred_labels.data().get(), inertia); +#else ML::kmeans::fit_predict(handle, params, d_src_data.data().get(), n_samples, n_features, 0, d_pred_centroids.data().get(), d_pred_labels.data().get(), inertia, n_iter); +#endif CUDA_RT_CALL(cudaStreamSynchronize(stream_view.value())); diff --git a/src/knn.cu b/src/knn.cu index 13894d8..3775f95 100644 --- a/src/knn.cu +++ b/src/knn.cu @@ -8,7 +8,6 @@ #include "random_forest.cuh" #include "stream_allocator.h" -#include #include #include #include @@ -21,7 +20,16 @@ #include #include -#if CUML_VERSION_MAJOR == 21 +#if (CUML4R_LIBCUML_VERSION(CUML_VERSION_MAJOR, CUML_VERSION_MINOR) >= \ + CUML4R_LIBCUML_VERSION(24, 0)) + +using knnIndex = ML::knnIndex; +using knnIndexParam = ML::knnIndexParam; +using IVFFlatParam = ML::IVFFlatParam; +using IVFPQParam = ML::IVFPQParam; +using knnDistanceType = ML::distance::DistanceType; + +#elif CUML_VERSION_MAJOR == 21 #if CUML4R_CONCAT(0x, CUML_VERSION_MINOR) >= 0x08 #include @@ -32,6 +40,7 @@ using QuantizerType = raft::spatial::knn::QuantizerType; using IVFFlatParam = raft::spatial::knn::IVFFlatParam; using IVFPQParam = raft::spatial::knn::IVFPQParam; using IVFSQParam = raft::spatial::knn::IVFSQParam; +using knnDistanceType = raft::distance::DistanceType; #else @@ -41,6 +50,7 @@ using QuantizerType = ML::QuantizerType; using IVFFlatParam = ML::IVFFlatParam; using IVFPQParam = ML::IVFPQParam; using IVFSQParam = ML::IVFSQParam; +using knnDistanceType = raft::distance::DistanceType; #endif #endif @@ -66,6 +76,8 @@ constexpr auto kMetric = "metric"; constexpr auto kNumSamples = "n_samples"; constexpr auto kNumDims = "n_dims"; +#if (CUML4R_LIBCUML_VERSION(CUML_VERSION_MAJOR, CUML_VERSION_MINOR) < \ + CUML4R_LIBCUML_VERSION(24, 0)) std::unordered_map const kQuantizerTypes{ {"QT_8bit", QuantizerType::QT_8bit}, {"QT_4bit", QuantizerType::QT_4bit}, @@ -74,6 +86,7 @@ std::unordered_map const kQuantizerTypes{ {"QT_fp16", QuantizerType::QT_fp16}, {"QT_8bit_direct", QuantizerType::QT_8bit_direct}, {"QT_6bit", QuantizerType::QT_6bit}}; +#endif // Additional info for setting KNN params struct ParamsDetails { @@ -105,8 +118,7 @@ class PredictionCtx { nFeatures_(x.ncol()), modelKnnIndex_(Rcpp::XPtr(static_cast(model[kIndex]))), modelAlgoType_(static_cast(Rcpp::as(model[kAlgo]))), - modelDistType_(static_cast( - Rcpp::as(model[kMetric]))), + modelDistType_(static_cast(Rcpp::as(model[kMetric]))), modelP_(Rcpp::as(model[kP])), modelNSamples_(Rcpp::as(model[kNumSamples])), modelNDims_(Rcpp::as(model[kNumDims])), @@ -167,7 +179,7 @@ class PredictionCtx { // attributes from the KNN model object Rcpp::XPtr const modelKnnIndex_; Algo const modelAlgoType_; - raft::distance::DistanceType const modelDistType_; + knnDistanceType const modelDistType_; float const modelP_; int const modelNSamples_; int const modelNDims_; @@ -278,6 +290,11 @@ __host__ std::unique_ptr build_ivfpq_algo_params( __host__ std::unique_ptr build_ivfsq_algo_params( Rcpp::List params, bool const automated) { +#if (CUML4R_LIBCUML_VERSION(CUML_VERSION_MAJOR, CUML_VERSION_MINOR) >= \ + CUML4R_LIBCUML_VERSION(24, 0)) + Rcpp::stop("IVFSQ KNN is unsupported by this cuML version"); + return nullptr; +#else if (automated) { params[kNumLists] = 8; params[kNumProbes] = 2; @@ -299,6 +316,7 @@ __host__ std::unique_ptr build_ivfsq_algo_params( algo_params->encodeResidual = Rcpp::as(params[kEncodeResidual]); return algo_params; +#endif } __host__ std::unique_ptr build_algo_params( @@ -324,7 +342,7 @@ __host__ std::unique_ptr build_algo_params( __host__ std::unique_ptr build_knn_index( raft::handle_t& handle, float* const d_input, int const n_samples, int const n_features, Algo const algo_type, - raft::distance::DistanceType const dist_type, float const p, + knnDistanceType const dist_type, float const p, Rcpp::List const& algo_params) { std::unique_ptr knn_index(nullptr); @@ -360,7 +378,7 @@ __host__ Rcpp::List knn_fit(Rcpp::NumericMatrix const& x, int const algo, int const metric, float const p, Rcpp::List const& algo_params) { auto const algo_type = static_cast(algo); - auto const dist_type = static_cast(metric); + auto const dist_type = static_cast(metric); auto const input_m = Matrix(x, /*transpose=*/false); int const n_samples = input_m.numRows; int const n_features = input_m.numCols; diff --git a/src/lm.cu b/src/lm.cu index de8b0b8..f6c3e93 100644 --- a/src/lm.cu +++ b/src/lm.cu @@ -8,7 +8,6 @@ #include "preprocessor.h" #include "stream_allocator.h" -#include #include #include diff --git a/src/lm_predict.cu b/src/lm_predict.cu index d8f5531..1a592c4 100644 --- a/src/lm_predict.cu +++ b/src/lm_predict.cu @@ -6,7 +6,6 @@ #include "preprocessor.h" #include "stream_allocator.h" -#include #include #include diff --git a/src/ols_fit_impl.cu b/src/ols_fit_impl.cu index d507b99..96b2b0c 100644 --- a/src/ols_fit_impl.cu +++ b/src/ols_fit_impl.cu @@ -1,6 +1,8 @@ #include "lm_params.h" +#include "preprocessor.h" #include +#include namespace cuml4r { namespace detail { @@ -14,7 +16,12 @@ __host__ void ols_fit_impl(raft::handle_t& handle, lm::Params const& params, /*coef=*/params.d_coef, /*intercept=*/params.intercept, /*fit_intercept=*/params.fit_intercept, +#if (CUML4R_LIBCUML_VERSION(CUML_VERSION_MAJOR, CUML_VERSION_MINOR) < \ + CUML4R_LIBCUML_VERSION(24, 0)) /*normalize=*/params.normalize_input, algo); +#else + algo); +#endif } } // namespace detail diff --git a/src/pca.cu b/src/pca.cu index 3901667..592a674 100644 --- a/src/pca.cu +++ b/src/pca.cu @@ -6,9 +6,9 @@ #include "preprocessor.h" #include "stream_allocator.h" -#include #include #include +#include #include @@ -123,7 +123,13 @@ __host__ Rcpp::List pca_fit_transform(Rcpp::NumericMatrix const& x, /*singular_vals=*/d_singular_vals.data().get(), /*mu=*/d_mu.data().get(), /*noise_vars=*/d_noise_vars.data().get(), - /*prms=*/*params); + /*prms=*/*params +#if (CUML4R_LIBCUML_VERSION(CUML_VERSION_MAJOR, CUML_VERSION_MINOR) >= \ + CUML4R_LIBCUML_VERSION(24, 0)) + , + /*flip_signs_based_on_U=*/true +#endif + ); } else { ML::pcaFit(handle, /*input=*/d_input.data().get(), @@ -133,7 +139,13 @@ __host__ Rcpp::List pca_fit_transform(Rcpp::NumericMatrix const& x, /*singular_vals=*/d_singular_vals.data().get(), /*mu=*/d_mu.data().get(), /*noise_vars=*/d_noise_vars.data().get(), - /*prms=*/*params); + /*prms=*/*params +#if (CUML4R_LIBCUML_VERSION(CUML_VERSION_MAJOR, CUML_VERSION_MINOR) >= \ + CUML4R_LIBCUML_VERSION(24, 0)) + , + /*flip_signs_based_on_U=*/true +#endif + ); } CUDA_RT_CALL(cudaStreamSynchronize(stream_view.value())); @@ -149,7 +161,7 @@ __host__ Rcpp::List pca_fit_transform(Rcpp::NumericMatrix const& x, pinned_host_vector h_mu(n_cols); pinned_host_vector h_noise_vars(1); - AsyncCopyCtx transformed_data_d2h; + CUML4R_MAYBE_UNUSED AsyncCopyCtx transformed_data_d2h; if (transform_input) { transformed_data_d2h = async_copy(stream_view.value(), d_transformed_data.cbegin(), diff --git a/src/pinned_host_vector.h b/src/pinned_host_vector.h index a0d6359..772787c 100644 --- a/src/pinned_host_vector.h +++ b/src/pinned_host_vector.h @@ -2,39 +2,15 @@ #ifdef HAS_CUML -#include -#include - -#include +#include namespace cuml4r { template -using pinned_host_vector = - thrust::host_vector>; +using pinned_host_vector = std::vector; } // namespace cuml4r -namespace Rcpp { -namespace traits { - -template