diff --git a/.Rbuildignore b/.Rbuildignore index 1ec133a..da5ffe6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -31,3 +31,6 @@ ^libcuml/* ^\.github$ ^\.lsan-suppressions\.txt$ +^\.positai$ +^\.claude$ +^\.codex$ diff --git a/.github/docker/Dockerfile b/.github/docker/Dockerfile new file mode 100644 index 0000000..2dbb8ec --- /dev/null +++ b/.github/docker/Dockerfile @@ -0,0 +1,53 @@ +FROM nvidia/cuda:11.2.2-devel-ubuntu20.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# System dependencies +RUN apt-get update -y && apt-get install -y \ + sudo software-properties-common dialog apt-utils \ + tzdata locales curl wget git \ + libcurl4-openssl-dev libssl-dev libxml2-dev \ + libfontconfig1-dev libfreetype6-dev libpng-dev \ + libharfbuzz-dev libfribidi-dev libtiff5-dev libjpeg-dev \ + make gcc g++ pandoc python3 python3-pip + +# Install R via rig +RUN curl -L https://rig.r-pkg.org/deb/rig.gpg -o /etc/apt/trusted.gpg.d/rig.gpg \ + && echo "deb http://rig.r-pkg.org/deb rig main" > /etc/apt/sources.list.d/rig.list \ + && apt-get update \ + && apt-get install -y r-rig \ + && rig add release \ + && rig default release \ + && rm -rf /var/lib/apt/lists/* + +# Use a fixed library path (not HOME-dependent) so packages are found +# regardless of what HOME is set to at runtime (GitHub Actions sets HOME=/github/home) +ENV R_LIBS_USER=/opt/R/library +RUN mkdir -p /opt/R/library + +# Parallel compilation +RUN echo "MAKEFLAGS=-j$(nproc)" >> "$(R RHOME)/etc/Renviron.site" + +# Copy source +COPY . /build + +ARG CUML_VERSION=21.12 +ENV CUML_VERSION=${CUML_VERSION} + +# Cross-compile for T4 GPU (compute capability 7.5) since build runner has no GPU +ARG CMAKE_CUDA_ARCHITECTURES=75 +ENV CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} + +ENV NOT_CRAN=true + +# Install R dependencies +RUN Rscript -e "\ + install.packages('pak', repos = 'https://r-lib.github.io/p/pak/devel/'); \ + pak::local_install_deps('/build', dependencies = TRUE)" \ + && rm -rf /tmp/* /root/.cache + +# Install cuda.ml with tests +RUN R CMD INSTALL --install-tests /build + +# Clean up +RUN rm -rf /tmp/* /build diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 89bcd05..257527e 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,5 +1,3 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: branches: [main] @@ -9,117 +7,99 @@ on: name: R-CMD-check jobs: - R-CMD-check: - + check-cran: strategy: fail-fast: false matrix: - cuda: ['11.2.1'] - cuml: ['21.08', '21.10', '21.12'] r: ['release', 'devel'] - asan: ['false', 'true'] - - runs-on: ['self-hosted', 'gpu'] - container: - image: nvidia/cuda:${{ matrix.cuda }}-devel-ubuntu18.04 - options: --gpus all - name: 'R: ${{ matrix.r }}, CUDA: ${{ matrix.cuda }}, CUML: ${{ matrix.cuml }}, ASAN: ${{ matrix.asan }}' + runs-on: ubuntu-latest + name: 'CRAN (R: ${{ matrix.r }})' env: - NOT_CRAN: true GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes - CUML_VERSION: ${{ matrix.cuml }} - CUML4R_ENABLE_ASAN: ${{ matrix.asan }} - DEBIAN_FRONTEND: 'noninteractive' steps: - - run: | - apt-get update -y - apt-get install -y sudo software-properties-common dialog apt-utils tzdata - if [[ $CUML4R_ENABLE_ASAN == 'true' ]]; then - apt-get install -y libasan5 - fi - shell: bash - - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: r-lib/actions/setup-pandoc@v1 + - uses: r-lib/actions/setup-pandoc@v2 - - uses: actions/setup-python@v2 - with: - python-version: '3.x' - architecture: 'x64' - - - uses: r-lib/actions/setup-r@master + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.r }} - http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true - - uses: r-lib/actions/setup-r-dependencies@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: rcmdcheck + needs: check + + - name: Build + run: R CMD build . + + - name: Check + run: R CMD check --no-manual --as-cran cuda.ml_*.tar.gz + env: + _R_CHECK_CRAN_INCOMING_: false + + build-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + timeout-minutes: 120 + outputs: + image: ghcr.io/${{ github.repository }}-ci:${{ github.sha }} + steps: + - uses: actions/checkout@v4 - - name: Build {cuda.ml} - id: build-pkg - run: | - cd .. - ls -a - rm -v cuda.ml_*.tar.gz - R CMD build cuda.ml - ls -a - echo "::set-output name=pkg-dir::$(pwd)" + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + file: .github/docker/Dockerfile + push: true + tags: ghcr.io/${{ github.repository }}-ci:${{ github.sha }} + build-args: | + CUML_VERSION=21.12 + CMAKE_CUDA_ARCHITECTURES=75 + + test-gpu: + needs: build-image + if: ${{ always() && needs.build-image.result == 'success' }} + concurrency: + group: gpu-tests + runs-on: + - "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64/spot=true" + container: + image: ${{ needs.build-image.outputs.image }} + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + options: --gpus all --runtime=nvidia + timeout-minutes: 60 + env: + NOT_CRAN: true - - run: cp -v cuda.ml/.lsan-suppressions.txt /tmp - working-directory: ${{ steps.build-pkg.outputs.pkg-dir }} + steps: + - name: Verify GPU access + run: nvidia-smi - - name: Check {cuda.ml} package - run: | - print(list.files(".")) - pkg <- list.files(".", pattern = "cuda\\.ml_.*\\.tar\\.gz") - stopifnot(length(pkg) == 1) - - reticulate::install_miniconda(force = TRUE) - - rcmdcheck_env <- ( - if (identical(Sys.getenv("CUML4R_ENABLE_ASAN"), "true")) { - c( - LD_PRELOAD = "/usr/lib/x86_64-linux-gnu/libasan.so.5", - ASAN_OPTIONS = "halt_on_error=0,new_delete_type_mismatch=0,alloc_dealloc_mismatch=0,protect_shadow_gap=0", - LSAN_OPTIONS = "suppressions=/tmp/.lsan-suppressions.txt" - ) - } else { - character() - } - ) - rcmdcheck::rcmdcheck( - path = pkg[[1]], - args = c("--no-manual", "--as-cran"), - check_dir="check", - env = rcmdcheck_env - ) - shell: Rscript {0} - working-directory: ${{ steps.build-pkg.outputs.pkg-dir }} - - - name: Show testthat output - if: ${{ always() }} + - name: Session info run: | - find check -name 'testthat.Rout*' -type f -exec cat '{}' \; || : - shell: bash - working-directory: ${{ steps.build-pkg.outputs.pkg-dir }} + Rscript -e "sessionInfo()" + Rscript -e "library(cuda.ml)" - - name: Check for sanitizer error(s) - if: ${{ always() }} + - name: Run tests run: | - ! find check -name 'testthat.Rout*' -type f -exec egrep -C 50 'ERROR: .*Sanitizer:' '{}' + - shell: bash - working-directory: ${{ steps.build-pkg.outputs.pkg-dir }} - - - name: Upload check results - if: ${{ failure() }} - uses: actions/upload-artifact@main - with: - name: ${{ runner.os }}-r${{ matrix.r }}-results - path: ${{ steps.build-pkg.outputs.pkg-dir }}/check + Rscript -e "testthat::test_package('cuda.ml', reporter = 'progress')" diff --git a/.gitignore b/.gitignore index 6d3278f..1d71690 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ cuda.ml.Rcheck *.cmake *.a 00check.log +.positai +.codex \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 7dea694..eb9abb5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,7 +45,7 @@ Suggests: xgboost LinkingTo: Rcpp Encoding: UTF-8 -RoxygenNote: 7.1.2 +RoxygenNote: 7.3.3 OS_type: unix SystemRequirements: RAPIDS cuML (see https://rapids.ai/start.html) NeedsCompilation: yes diff --git a/NAMESPACE b/NAMESPACE index 33ff35f..5a72bf9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,24 @@ # Generated by roxygen2: do not edit by hand +S3method(cuda_ml_can_predict_class_probabilities,cuda_ml_fil) +S3method(cuda_ml_can_predict_class_probabilities,cuda_ml_knn) S3method(cuda_ml_can_predict_class_probabilities,cuda_ml_model) +S3method(cuda_ml_can_predict_class_probabilities,cuda_ml_rand_forest) S3method(cuda_ml_can_predict_class_probabilities,default) S3method(cuda_ml_elastic_net,data.frame) S3method(cuda_ml_elastic_net,default) S3method(cuda_ml_elastic_net,formula) S3method(cuda_ml_elastic_net,matrix) S3method(cuda_ml_elastic_net,recipe) +S3method(cuda_ml_get_state,cuda_ml_model) +S3method(cuda_ml_get_state,cuda_ml_pca) +S3method(cuda_ml_get_state,cuda_ml_rand_forest) +S3method(cuda_ml_get_state,cuda_ml_rand_proj_model) +S3method(cuda_ml_get_state,cuda_ml_svc) +S3method(cuda_ml_get_state,cuda_ml_svc_ovr) +S3method(cuda_ml_get_state,cuda_ml_svr) +S3method(cuda_ml_get_state,cuda_ml_umap) +S3method(cuda_ml_get_state,default) S3method(cuda_ml_inverse_transform,cuda_ml_pca) S3method(cuda_ml_inverse_transform,cuda_ml_tsvd) S3method(cuda_ml_is_classifier,cuda_ml_model) @@ -43,6 +55,15 @@ S3method(cuda_ml_ridge,matrix) S3method(cuda_ml_ridge,recipe) S3method(cuda_ml_serialize,cuda_ml_model) S3method(cuda_ml_serialize,default) +S3method(cuda_ml_set_state,cuda_ml_model_state) +S3method(cuda_ml_set_state,cuda_ml_pca_model_state) +S3method(cuda_ml_set_state,cuda_ml_rand_forest_model_state) +S3method(cuda_ml_set_state,cuda_ml_rand_proj_model_state) +S3method(cuda_ml_set_state,cuda_ml_svc_model_state) +S3method(cuda_ml_set_state,cuda_ml_svc_ovr_model_state) +S3method(cuda_ml_set_state,cuda_ml_svr_model_state) +S3method(cuda_ml_set_state,cuda_ml_umap_model_state) +S3method(cuda_ml_set_state,default) S3method(cuda_ml_sgd,data.frame) S3method(cuda_ml_sgd,default) S3method(cuda_ml_sgd,formula) diff --git a/R/agglomerative.R b/R/agglomerative.R index a1d85df..80e7963 100644 --- a/R/agglomerative.R +++ b/R/agglomerative.R @@ -18,10 +18,10 @@ agglomerative_clustering_match_metric <- function(metric = c("euclidean", "l1", #' @template model-with-numeric-input #' @param n_clusters The number of clusters to find. Default: 2L. #' @param metric Metric used for linkage computation. Must be one of -#' {"euclidean", "l1", "l2", "manhattan", "cosine"}. If connectivity is +#' \{"euclidean", "l1", "l2", "manhattan", "cosine"\}. If connectivity is #' "knn" then only "euclidean" is accepted. Default: "euclidean". #' @param connectivity The type of connectivity matrix to compute. Must be one -#' of {"pairwise", "knn"}. Default: "pairwise". +#' of \{"pairwise", "knn"\}. Default: "pairwise". #' - 'pairwise' will compute the entire fully-connected graph of pairwise #' distances between each set of points. This is the fastest to compute #' and can be very fast for smaller datasets but requires O(n^2) space. diff --git a/R/cuml_utils.R b/R/cuml_utils.R index bd5d431..abd8822 100644 --- a/R/cuml_utils.R +++ b/R/cuml_utils.R @@ -1,7 +1,7 @@ -#' Determine whether {cuda.ml} was linked to a valid version of the RAPIDS cuML +#' Determine whether \{cuda.ml\} was linked to a valid version of the RAPIDS cuML #' shared library. #' -#' @return A logical value indicating whether the current installation {cuda.ml} +#' @return A logical value indicating whether the current installation \{cuda.ml\} #' was linked to a valid version of the RAPIDS cuML shared library. #' #' @examples @@ -17,11 +17,11 @@ #' @export has_cuML <- .has_cuML -#' Get the major version of the RAPIDS cuML shared library {cuda.ml} was linked +#' Get the major version of the RAPIDS cuML shared library \{cuda.ml\} was linked #' to. #' -#' @return The major version of the RAPIDS cuML shared library {cuda.ml} was -#' linked to in a character vector, or \code{NA_character_} if {cuda.ml} was not +#' @return The major version of the RAPIDS cuML shared library \{cuda.ml\} was +#' linked to in a character vector, or \code{NA_character_} if \{cuda.ml\} was not #' linked to any version of RAPIDS cuML. #' #' @examples @@ -32,11 +32,11 @@ has_cuML <- .has_cuML #' @export cuML_major_version <- .cuML_major_version -#' Get the minor version of the RAPIDS cuML shared library {cuda.ml} was linked +#' Get the minor version of the RAPIDS cuML shared library \{cuda.ml\} was linked #' to. #' -#' @return The minor version of the RAPIDS cuML shared library {cuda.ml} was -#' linked to in a character vector, or \code{NA_character_} if {cuda.ml} was not +#' @return The minor version of the RAPIDS cuML shared library \{cuda.ml\} was +#' linked to in a character vector, or \code{NA_character_} if \{cuda.ml\} was not #' linked to any version of RAPIDS cuML. #' #' @examples diff --git a/R/fil.R b/R/fil.R index da25aa7..c8b54bf 100644 --- a/R/fil.R +++ b/R/fil.R @@ -1,11 +1,11 @@ #' Determine whether Forest Inference Library (FIL) functionalities are enabled -#' in the current installation of {cuda.ml}. +#' in the current installation of \{cuda.ml\}. #' #' CuML Forest Inference Library (FIL) functionalities (see #' https://github.com/rapidsai/cuml/tree/main/python/cuml/fil#readme) will #' require Treelite C API. If you need FIL to run tree-based model ensemble on #' GPU, and \code{fil_enabled()} returns FALSE, then please consider installing -#' Treelite and then re-installing {cuda.ml}. +#' Treelite and then re-installing \{cuda.ml\}. #' #' @return A logical value indicating whether the Forest Inference Library (FIL) #' functionalities are enabled. @@ -62,9 +62,9 @@ file_match_storage_type <- function(storage_type = c("auto", "dense", "sparse")) #' #' @param filename Path to the saved model file. #' @param mode Type of task to be performed by the model. Must be one of -#' {"classification", "regression"}. +#' \{"classification", "regression"\}. #' @param model_type Format of the saved model file. Notice if \code{filename} -#' ends with ".json" and \code{model_type} is "xgboost", then {cuda.ml} will +#' ends with ".json" and \code{model_type} is "xgboost", then \{cuda.ml\} will #' assume the model file is in XGBoost JSON (instead of binary) format. #' Default: "xgboost". #' @param algo Type of the algorithm for inference, must be one of the diff --git a/R/knn.R b/R/knn.R index 0eda43d..4fc8ff6 100644 --- a/R/knn.R +++ b/R/knn.R @@ -111,7 +111,7 @@ cuda_ml_knn_algo_ivfsq <- function(nlist, nprobe, #' @template supervised-model-output #' @template ellipsis-unused #' @param algo The query algorithm to use. Must be one of -#' {"brute", "ivfflat", "ivfpq", "ivfsq"} or a KNN algorithm specification +#' \{"brute", "ivfflat", "ivfpq", "ivfsq"\} or a KNN algorithm specification #' constructed using the \code{cuda_ml_knn_algo_*} family of functions. #' If the algorithm is specified by one of the \code{cuda_ml_knn_algo_*} #' functions, then values of all required parameters of the algorithm will @@ -132,10 +132,10 @@ cuda_ml_knn_algo_ivfsq <- function(nlist, nprobe, #' faster distances calculations). #' #' Default: "brute". -#' @param metric Distance metric to use. Must be one of {"euclidean", "l2", +#' @param metric Distance metric to use. Must be one of \{"euclidean", "l2", #' "l1", "cityblock", "taxicab", "manhattan", "braycurtis", "canberra", #' "minkowski", "lp", "chebyshev", "linf", "jensenshannon", "cosine", -#' "correlation"}. +#' "correlation"\}. #' Default: "euclidean". #' @param p Parameter for the Minkowski metric. If p = 1, then the metric is #' equivalent to manhattan distance (l1). If p = 2, the metric is equivalent diff --git a/R/logistic_reg.R b/R/logistic_reg.R index 1c6e7d4..43b3529 100644 --- a/R/logistic_reg.R +++ b/R/logistic_reg.R @@ -97,7 +97,7 @@ logistic_reg_build_sample_weight <- function(sample_weight, #' @template ellipsis-unused #' @template fit-intercept #' @param penalty The penalty type, must be one of -#' {"none", "l1", "l2", "elasticnet"}. +#' \{"none", "l1", "l2", "elasticnet"\}. #' If "none" or "l2" is selected, then L-BFGS solver will be used. #' If "l1" is selected, solver OWL-QN will be used. #' If "elasticnet" is selected, OWL-QN will be used if l1_ratio > 0, otherwise diff --git a/R/model.R b/R/model.R index 584ef06..a5edeb5 100644 --- a/R/model.R +++ b/R/model.R @@ -138,11 +138,20 @@ cuda_ml_can_predict_class_probabilities.cuda_ml_model <- function(model) { FALSE } -cuda_ml_can_predict_class_probabilities.cuda_ml_fil <- cuda_ml_is_classifier +#' @export +cuda_ml_can_predict_class_probabilities.cuda_ml_fil <- function(model) { + cuda_ml_is_classifier(model) +} -cuda_ml_can_predict_class_probabilities.cuda_ml_knn <- cuda_ml_is_classifier +#' @export +cuda_ml_can_predict_class_probabilities.cuda_ml_knn <- function(model) { + cuda_ml_is_classifier(model) +} -cuda_ml_can_predict_class_probabilities.cuda_ml_rand_forest <- cuda_ml_is_classifier +#' @export +cuda_ml_can_predict_class_probabilities.cuda_ml_rand_forest <- function(model) { + cuda_ml_is_classifier(model) +} #' Serialize a CuML model #' @@ -184,6 +193,7 @@ cuda_ml_get_state <- function(model) { UseMethod("cuda_ml_get_state") } +#' @export cuda_ml_get_state.default <- function(model) { stop( "Model of type '", paste(class(model), collapse = " "), "' does not ", @@ -191,6 +201,7 @@ cuda_ml_get_state.default <- function(model) { ) } +#' @export cuda_ml_get_state.cuda_ml_model <- function(model) { # Default implementation: assume the entire model object can be serializabled # by `base::serialize()`. @@ -199,6 +210,7 @@ cuda_ml_get_state.cuda_ml_model <- function(model) { new_model_state(model_state, cls = NULL) } +#' @export cuda_ml_set_state.cuda_ml_model_state <- function(model_state) { # Default implementation: assume the entire model state can be unserialized by # `base::unserialize()`. @@ -233,6 +245,7 @@ cuda_ml_set_state <- function(model_state) { UseMethod("cuda_ml_set_state") } +#' @export cuda_ml_set_state.default <- function(model_state) { stop( "No unserialization routine found for model state of type '", diff --git a/R/ols.R b/R/ols.R index 3f1c4ba..9d5cefc 100644 --- a/R/ols.R +++ b/R/ols.R @@ -17,7 +17,7 @@ ols_match_method <- function(method = c("svd", "eig", "qr")) { #' @template ellipsis-unused #' @template fit-intercept #' @template normalize-input -#' @param method Must be one of {"svd", "eig", "qr"}. +#' @param method Must be one of \{"svd", "eig", "qr"\}. #' #' - "svd": compute SVD decomposition using Jacobi iterations. #' - "eig": use an eigendecomposition of the covariance matrix. diff --git a/R/package.R b/R/package.R index 5e2079f..e0ffb36 100644 --- a/R/package.R +++ b/R/package.R @@ -2,12 +2,10 @@ #' #' This package provides a R interface for the RAPIDS cuML library. #' -#' @docType package #' @author Yitao Li #' @import Rcpp -#' @name cuda.ml #' @useDynLib cuda.ml, .registration = TRUE -NULL +"_PACKAGE" .onLoad <- function(libname, pkgname) { register_rand_forest_model(pkgname) diff --git a/R/pca.R b/R/pca.R index db2bda2..fb3a974 100644 --- a/R/pca.R +++ b/R/pca.R @@ -78,12 +78,14 @@ cuda_ml_inverse_transform.cuda_ml_pca <- function(model, x, ...) { .pca_inverse_transform(model = model, x = as.matrix(x)) } +#' @export cuda_ml_get_state.cuda_ml_pca <- function(model) { model_state <- .pca_get_state(model) new_model_state(model_state, "cuda_ml_pca_model_state") } +#' @export cuda_ml_set_state.cuda_ml_pca_model_state <- function(model_state) { model_state <- .pca_set_state(model_state) diff --git a/R/rand_forest.R b/R/rand_forest.R index 9a64338..6d7380b 100644 --- a/R/rand_forest.R +++ b/R/rand_forest.R @@ -329,6 +329,7 @@ cuda_ml_rand_forest_impl_regression <- function(processed, mtry, trees, min_n, ) } +#' @export cuda_ml_get_state.cuda_ml_rand_forest <- function(model) { get_state_impl <- switch(model$mode, classification = .rf_classifier_get_state, @@ -344,6 +345,7 @@ cuda_ml_get_state.cuda_ml_rand_forest <- function(model) { new_model_state(model_state, "cuda_ml_rand_forest_model_state") } +#' @export cuda_ml_set_state.cuda_ml_rand_forest_model_state <- function(model_state) { set_state_impl <- switch(model_state$mode, classification = .rf_classifier_set_state, diff --git a/R/rand_proj.R b/R/rand_proj.R index 91cf8a5..475ebec 100644 --- a/R/rand_proj.R +++ b/R/rand_proj.R @@ -79,12 +79,14 @@ cuda_ml_transform.cuda_ml_rand_proj_model <- function(model, x, ...) { .rproj_transform(model$rproj_ctx, as.matrix(x)) } +#' @export cuda_ml_get_state.cuda_ml_rand_proj_model <- function(model) { model_state <- .rproj_get_state(model$rproj_ctx) new_model_state(model_state, "cuda_ml_rand_proj_model_state") } +#' @export cuda_ml_set_state.cuda_ml_rand_proj_model_state <- function(model_state) { model_obj <- .rproj_set_state(model_state) diff --git a/R/sgd.R b/R/sgd.R index 2a2741a..d4953df 100644 --- a/R/sgd.R +++ b/R/sgd.R @@ -38,9 +38,9 @@ sgd_match_learning_rate <- function(learning_rate = c("constant", "invscaling", #' @template ellipsis-unused #' @template fit-intercept #' @template l1_ratio -#' @param loss Loss function, must be one of {"squared_loss", "log", "hinge"}. +#' @param loss Loss function, must be one of \{"squared_loss", "log", "hinge"\}. #' @param penalty Type of regularization to perform, must be one of -#' {"none", "l1", "l2", "elasticnet"}. +#' \{"none", "l1", "l2", "elasticnet"\}. #' #' - "none": no regularization. #' - "l1": perform regularization based on the L1-norm (LASSO) which tries to @@ -63,7 +63,7 @@ sgd_match_learning_rate <- function(learning_rate = c("constant", "invscaling", #' @param eta0 The initial learning rate. Default: 1e-3. #' @param power_t The exponent used for calculating the invscaling learning #' rate. Default: 0.5. -#' @param learning_rate Must be one of {"constant", "invscaling", "adaptive"}. +#' @param learning_rate Must be one of \{"constant", "invscaling", "adaptive"\}. #' #' - "constant": the learning rate will be kept constant. #' - "invscaling": (learning rate) = (initial learning rate) / pow(t, power_t) diff --git a/R/svm.R b/R/svm.R index 82df814..31c0c8c 100644 --- a/R/svm.R +++ b/R/svm.R @@ -313,6 +313,7 @@ cuda_ml_svm_classification_multiclass_impl <- function(processed, cost, kernel, ) } +#' @export cuda_ml_get_state.cuda_ml_svc_ovr <- function(model) { model_state <- list( ovr_model_states = lapply(model$xptr, function(x) cuda_ml_get_state(x)), @@ -322,6 +323,7 @@ cuda_ml_get_state.cuda_ml_svc_ovr <- function(model) { new_model_state(model_state, "cuda_ml_svc_ovr_model_state") } +#' @export cuda_ml_set_state.cuda_ml_svc_ovr_model_state <- function(model_state) { new_model( cls = c("cuda_ml_svc_ovr", "cuda_ml_svm"), @@ -365,6 +367,7 @@ cuda_ml_svm_classification_binary_impl <- function(processed, cost, kernel, gamm ) } +#' @export cuda_ml_get_state.cuda_ml_svc <- function(model) { model_state <- list( model_state = .svc_get_state(model$xptr), @@ -374,6 +377,7 @@ cuda_ml_get_state.cuda_ml_svc <- function(model) { new_model_state(model_state, "cuda_ml_svc_model_state") } +#' @export cuda_ml_set_state.cuda_ml_svc_model_state <- function(model_state) { new_model( cls = c("cuda_ml_svc", "cuda_ml_svm"), @@ -416,6 +420,7 @@ cuda_ml_svm_regression_impl <- function(processed, cost, kernel, gamma, coef0, ) } +#' @export cuda_ml_get_state.cuda_ml_svr <- function(model) { model_state <- list( model_state = .svr_get_state(model$xptr), @@ -425,6 +430,7 @@ cuda_ml_get_state.cuda_ml_svr <- function(model) { new_model_state(model_state, "cuda_ml_svr_model_state") } +#' @export cuda_ml_set_state.cuda_ml_svr_model_state <- function(model_state) { new_model( cls = c("cuda_ml_svr", "cuda_ml_svm"), diff --git a/R/tsne.R b/R/tsne.R index 6299748..49a80cb 100644 --- a/R/tsne.R +++ b/R/tsne.R @@ -24,7 +24,7 @@ new_tsne_model <- function(embedding) { #' @param n_components Dimension of the embedded space. #' @param n_neighbors The number of datapoints to use in the attractive forces. #' Default: ceiling(3 * perplexity). -#' @param method T-SNE method, must be one of {"barnes_hut", "fft", "exact"}. +#' @param method T-SNE method, must be one of \{"barnes_hut", "fft", "exact"\}. #' The "exact" method will be more accurate but slower. Both "barnes_hut" and #' "fft" methods are fast approximations. #' @param angle Valid values are between 0.0 and 1.0, which trade off speed and @@ -35,7 +35,7 @@ new_tsne_model <- function(embedding) { #' @param learning_rate Learning rate of the t-SNE algorithm, usually between #' (10, 1000). If the learning rate is too high, then t-SNE result could look #' like a cloud / ball of points. -#' @param learning_rate_method Must be one of {"adaptive", "none"}. If +#' @param learning_rate_method Must be one of \{"adaptive", "none"\}. If #' "adaptive", then learning rate, early exaggeration, and perplexity are #' automatically tuned based on input size. Default: "adaptive". #' @param perplexity The target value of the conditional distribution's diff --git a/R/umap.R b/R/umap.R index 8cd9292..ab60823 100644 --- a/R/umap.R +++ b/R/umap.R @@ -41,7 +41,7 @@ new_umap_model <- function(model) { #' @param learning_rate The initial learning rate for the embedding #' optimization. Default: 1.0. #' @param init Initialization mode of the low dimensional embedding. Must be -#' one of {"spectral", "random"}. Default: "spectral". +#' one of \{"spectral", "random"\}. Default: "spectral". #' @param min_dist The effective minimum distance between embedded points. #' Default: 0.1. #' @param spread The effective scale of embedded points. In combination with @@ -71,7 +71,7 @@ new_umap_model <- function(model) { #' the target simplcial set. Default: n_neighbors. #' @param target_metric The metric for measuring distance between the actual and #' and the target values (\code{y}) if using supervised dimension reduction. -#' Must be one of {"categorical", "euclidean"}. Default: "categorical". +#' Must be one of \{"categorical", "euclidean"\}. Default: "categorical". #' @param target_weight Weighting factor between data topology and target #' topology. A value of 0.0 weights entirely on data, a value of 1.0 weights #' entirely on target. The default of 0.5 balances the weighting equally @@ -150,12 +150,14 @@ cuda_ml_umap <- function(x, y = NULL, n_components = 2L, n_neighbors = 15L, model } +#' @export cuda_ml_get_state.cuda_ml_umap <- function(model) { model_state <- .umap_get_state(model) new_model_state(model_state, "cuda_ml_umap_model_state") } +#' @export cuda_ml_set_state.cuda_ml_umap_model_state <- function(model_state) { model_obj <- .umap_set_state(model_state) diff --git a/man-roxygen/cuML-log-level.R b/man-roxygen/cuML-log-level.R index aa225f8..e275d88 100644 --- a/man-roxygen/cuML-log-level.R +++ b/man-roxygen/cuML-log-level.R @@ -1,3 +1,3 @@ #' @param cuML_log_level Log level within cuML library functions. Must be one of -#' {"off", "critical", "error", "warn", "info", "debug", "trace"}. +#' \{"off", "critical", "error", "warn", "info", "debug", "trace"\}. #' Default: off. diff --git a/man-roxygen/knn-algo-ivfsq.R b/man-roxygen/knn-algo-ivfsq.R index 6001350..779810d 100644 --- a/man-roxygen/knn-algo-ivfsq.R +++ b/man-roxygen/knn-algo-ivfsq.R @@ -1,4 +1,4 @@ -#' @param qtype Quantizer type. Must be one of {"QT_8bit", "QT_4bit", +#' @param qtype Quantizer type. Must be one of \{"QT_8bit", "QT_4bit", #' "QT_8bit_uniform", "QT_4bit_uniform", "QT_fp16", "QT_8bit_direct", -#' "QT_6bit"}. +#' "QT_6bit"\}. #' @param encode_residual Whether to encode residuals. diff --git a/man/cuML_major_version.Rd b/man/cuML_major_version.Rd index 409ad06..dc5503a 100644 --- a/man/cuML_major_version.Rd +++ b/man/cuML_major_version.Rd @@ -2,18 +2,18 @@ % Please edit documentation in R/cuml_utils.R \name{cuML_major_version} \alias{cuML_major_version} -\title{Get the major version of the RAPIDS cuML shared library {cuda.ml} was linked +\title{Get the major version of the RAPIDS cuML shared library \{cuda.ml\} was linked to.} \usage{ cuML_major_version() } \value{ -The major version of the RAPIDS cuML shared library {cuda.ml} was -linked to in a character vector, or \code{NA_character_} if {cuda.ml} was not +The major version of the RAPIDS cuML shared library \{cuda.ml\} was +linked to in a character vector, or \code{NA_character_} if \{cuda.ml\} was not linked to any version of RAPIDS cuML. } \description{ -Get the major version of the RAPIDS cuML shared library {cuda.ml} was linked +Get the major version of the RAPIDS cuML shared library \{cuda.ml\} was linked to. } \examples{ diff --git a/man/cuML_minor_version.Rd b/man/cuML_minor_version.Rd index 2993da2..4c66f5b 100644 --- a/man/cuML_minor_version.Rd +++ b/man/cuML_minor_version.Rd @@ -2,18 +2,18 @@ % Please edit documentation in R/cuml_utils.R \name{cuML_minor_version} \alias{cuML_minor_version} -\title{Get the minor version of the RAPIDS cuML shared library {cuda.ml} was linked +\title{Get the minor version of the RAPIDS cuML shared library \{cuda.ml\} was linked to.} \usage{ cuML_minor_version() } \value{ -The minor version of the RAPIDS cuML shared library {cuda.ml} was -linked to in a character vector, or \code{NA_character_} if {cuda.ml} was not +The minor version of the RAPIDS cuML shared library \{cuda.ml\} was +linked to in a character vector, or \code{NA_character_} if \{cuda.ml\} was not linked to any version of RAPIDS cuML. } \description{ -Get the minor version of the RAPIDS cuML shared library {cuda.ml} was linked +Get the minor version of the RAPIDS cuML shared library \{cuda.ml\} was linked to. } \examples{ diff --git a/man/cuda.ml.Rd b/man/cuda.ml-package.Rd similarity index 55% rename from man/cuda.ml.Rd rename to man/cuda.ml-package.Rd index 8043964..b43d49e 100644 --- a/man/cuda.ml.Rd +++ b/man/cuda.ml-package.Rd @@ -1,11 +1,20 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/package.R \docType{package} -\name{cuda.ml} +\name{cuda.ml-package} \alias{cuda.ml} +\alias{cuda.ml-package} \title{cuda.ml} \description{ This package provides a R interface for the RAPIDS cuML library. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://mlverse.github.io/cuda.ml/} + \item Report bugs at \url{https://github.com/mlverse/cuda.ml/issues} +} + } \author{ Yitao Li diff --git a/man/cuda_ml_agglomerative_clustering.Rd b/man/cuda_ml_agglomerative_clustering.Rd index 74cc963..8d6427e 100644 --- a/man/cuda_ml_agglomerative_clustering.Rd +++ b/man/cuda_ml_agglomerative_clustering.Rd @@ -19,11 +19,11 @@ and should consist of numeric values only.} \item{n_clusters}{The number of clusters to find. Default: 2L.} \item{metric}{Metric used for linkage computation. Must be one of -{"euclidean", "l1", "l2", "manhattan", "cosine"}. If connectivity is +\{"euclidean", "l1", "l2", "manhattan", "cosine"\}. If connectivity is "knn" then only "euclidean" is accepted. Default: "euclidean".} \item{connectivity}{The type of connectivity matrix to compute. Must be one -of {"pairwise", "knn"}. Default: "pairwise". +of \{"pairwise", "knn"\}. Default: "pairwise". - 'pairwise' will compute the entire fully-connected graph of pairwise distances between each set of points. This is the fastest to compute and can be very fast for smaller datasets but requires O(n^2) space. diff --git a/man/cuda_ml_dbscan.Rd b/man/cuda_ml_dbscan.Rd index 9b57851..a53ffd3 100644 --- a/man/cuda_ml_dbscan.Rd +++ b/man/cuda_ml_dbscan.Rd @@ -19,7 +19,7 @@ and should consist of numeric values only.} within distance `eps` from it.} \item{cuML_log_level}{Log level within cuML library functions. Must be one of -{"off", "critical", "error", "warn", "info", "debug", "trace"}. +\{"off", "critical", "error", "warn", "info", "debug", "trace"\}. Default: off.} } \value{ diff --git a/man/cuda_ml_fil_enabled.Rd b/man/cuda_ml_fil_enabled.Rd index 23a39fa..1ebcefb 100644 --- a/man/cuda_ml_fil_enabled.Rd +++ b/man/cuda_ml_fil_enabled.Rd @@ -3,7 +3,7 @@ \name{cuda_ml_fil_enabled} \alias{cuda_ml_fil_enabled} \title{Determine whether Forest Inference Library (FIL) functionalities are enabled -in the current installation of {cuda.ml}.} +in the current installation of \{cuda.ml\}.} \usage{ cuda_ml_fil_enabled() } @@ -16,7 +16,7 @@ CuML Forest Inference Library (FIL) functionalities (see https://github.com/rapidsai/cuml/tree/main/python/cuml/fil#readme) will require Treelite C API. If you need FIL to run tree-based model ensemble on GPU, and \code{fil_enabled()} returns FALSE, then please consider installing -Treelite and then re-installing {cuda.ml}. +Treelite and then re-installing \{cuda.ml\}. } \examples{ if (cuda_ml_fil_enabled()) { diff --git a/man/cuda_ml_fil_load_model.Rd b/man/cuda_ml_fil_load_model.Rd index 3f7252b..e1d9a30 100644 --- a/man/cuda_ml_fil_load_model.Rd +++ b/man/cuda_ml_fil_load_model.Rd @@ -20,10 +20,10 @@ cuda_ml_fil_load_model( \item{filename}{Path to the saved model file.} \item{mode}{Type of task to be performed by the model. Must be one of -{"classification", "regression"}.} +\{"classification", "regression"\}.} \item{model_type}{Format of the saved model file. Notice if \code{filename} -ends with ".json" and \code{model_type} is "xgboost", then {cuda.ml} will +ends with ".json" and \code{model_type} is "xgboost", then \{cuda.ml\} will assume the model file is in XGBoost JSON (instead of binary) format. Default: "xgboost".} diff --git a/man/cuda_ml_kmeans.Rd b/man/cuda_ml_kmeans.Rd index 4f738a7..28c7529 100644 --- a/man/cuda_ml_kmeans.Rd +++ b/man/cuda_ml_kmeans.Rd @@ -32,7 +32,7 @@ the initial value of a centroid. Default: "kmeans++".} \item{seed}{Seed to the random number generator. Default: 0.} \item{cuML_log_level}{Log level within cuML library functions. Must be one of -{"off", "critical", "error", "warn", "info", "debug", "trace"}. +\{"off", "critical", "error", "warn", "info", "debug", "trace"\}. Default: off.} } \value{ diff --git a/man/cuda_ml_knn.Rd b/man/cuda_ml_knn.Rd index 4d72201..93f4d82 100644 --- a/man/cuda_ml_knn.Rd +++ b/man/cuda_ml_knn.Rd @@ -17,9 +17,8 @@ cuda_ml_knn(x, ...) x, y, algo = c("brute", "ivfflat", "ivfpq", "ivfsq"), - metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", - "braycurtis", "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", - "correlation"), + metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis", + "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", "correlation"), p = 2, neighbors = 5L, ... @@ -29,9 +28,8 @@ cuda_ml_knn(x, ...) x, y, algo = c("brute", "ivfflat", "ivfpq", "ivfsq"), - metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", - "braycurtis", "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", - "correlation"), + metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis", + "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", "correlation"), p = 2, neighbors = 5L, ... @@ -41,9 +39,8 @@ cuda_ml_knn(x, ...) formula, data, algo = c("brute", "ivfflat", "ivfpq", "ivfsq"), - metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", - "braycurtis", "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", - "correlation"), + metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis", + "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", "correlation"), p = 2, neighbors = 5L, ... @@ -53,9 +50,8 @@ cuda_ml_knn(x, ...) x, data, algo = c("brute", "ivfflat", "ivfpq", "ivfsq"), - metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", - "braycurtis", "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", - "correlation"), + metric = c("euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis", + "canberra", "minkowski", "chebyshev", "jensenshannon", "cosine", "correlation"), p = 2, neighbors = 5L, ... @@ -76,7 +72,7 @@ cuda_ml_knn(x, ...) desired responses.} \item{algo}{The query algorithm to use. Must be one of - {"brute", "ivfflat", "ivfpq", "ivfsq"} or a KNN algorithm specification + \{"brute", "ivfflat", "ivfpq", "ivfsq"\} or a KNN algorithm specification constructed using the \code{cuda_ml_knn_algo_*} family of functions. If the algorithm is specified by one of the \code{cuda_ml_knn_algo_*} functions, then values of all required parameters of the algorithm will @@ -98,10 +94,10 @@ desired responses.} Default: "brute".} -\item{metric}{Distance metric to use. Must be one of {"euclidean", "l2", +\item{metric}{Distance metric to use. Must be one of \{"euclidean", "l2", "l1", "cityblock", "taxicab", "manhattan", "braycurtis", "canberra", "minkowski", "lp", "chebyshev", "linf", "jensenshannon", "cosine", -"correlation"}. +"correlation"\}. Default: "euclidean".} \item{p}{Parameter for the Minkowski metric. If p = 1, then the metric is diff --git a/man/cuda_ml_knn_algo_ivfsq.Rd b/man/cuda_ml_knn_algo_ivfsq.Rd index 6b7be11..f4b76f2 100644 --- a/man/cuda_ml_knn_algo_ivfsq.Rd +++ b/man/cuda_ml_knn_algo_ivfsq.Rd @@ -18,9 +18,9 @@ cuda_ml_knn_algo_ivfsq( \item{nprobe}{At query time, the number of cells used for approximate nearest neighbor search.} -\item{qtype}{Quantizer type. Must be one of {"QT_8bit", "QT_4bit", +\item{qtype}{Quantizer type. Must be one of \{"QT_8bit", "QT_4bit", "QT_8bit_uniform", "QT_4bit_uniform", "QT_fp16", "QT_8bit_direct", -"QT_6bit"}.} +"QT_6bit"\}.} \item{encode_residual}{Whether to encode residuals.} } diff --git a/man/cuda_ml_logistic_reg.Rd b/man/cuda_ml_logistic_reg.Rd index df89512..d0c1394 100644 --- a/man/cuda_ml_logistic_reg.Rd +++ b/man/cuda_ml_logistic_reg.Rd @@ -92,7 +92,7 @@ mean of the response variable. If FALSE, then the model expects data to be centered. Default: TRUE.} \item{penalty}{The penalty type, must be one of -{"none", "l1", "l2", "elasticnet"}. +\{"none", "l1", "l2", "elasticnet"\}. If "none" or "l2" is selected, then L-BFGS solver will be used. If "l1" is selected, solver OWL-QN will be used. If "elasticnet" is selected, OWL-QN will be used if l1_ratio > 0, otherwise diff --git a/man/cuda_ml_ols.Rd b/man/cuda_ml_ols.Rd index c4d2fbf..05cb292 100644 --- a/man/cuda_ml_ols.Rd +++ b/man/cuda_ml_ols.Rd @@ -63,7 +63,7 @@ cuda_ml_ols(x, ...) \item{y}{A numeric vector (for regression) or factor (for classification) of desired responses.} -\item{method}{Must be one of {"svd", "eig", "qr"}. +\item{method}{Must be one of \{"svd", "eig", "qr"\}. - "svd": compute SVD decomposition using Jacobi iterations. - "eig": use an eigendecomposition of the covariance matrix. diff --git a/man/cuda_ml_pca.Rd b/man/cuda_ml_pca.Rd index 376954c..80ad31c 100644 --- a/man/cuda_ml_pca.Rd +++ b/man/cuda_ml_pca.Rd @@ -41,7 +41,7 @@ Default: FALSE.} of the input data. Default: TRUE.} \item{cuML_log_level}{Log level within cuML library functions. Must be one of -{"off", "critical", "error", "warn", "info", "debug", "trace"}. +\{"off", "critical", "error", "warn", "info", "debug", "trace"\}. Default: off.} } \value{ diff --git a/man/cuda_ml_rand_forest.Rd b/man/cuda_ml_rand_forest.Rd index c922a55..d629ece 100644 --- a/man/cuda_ml_rand_forest.Rd +++ b/man/cuda_ml_rand_forest.Rd @@ -149,7 +149,7 @@ given batch. Default: 128L.} Default: 8L.} \item{cuML_log_level}{Log level within cuML library functions. Must be one of -{"off", "critical", "error", "warn", "info", "debug", "trace"}. +\{"off", "critical", "error", "warn", "info", "debug", "trace"\}. Default: off.} \item{formula}{A formula specifying the outcome terms on the left-hand side, diff --git a/man/cuda_ml_sgd.Rd b/man/cuda_ml_sgd.Rd index c1fc940..5250bfe 100644 --- a/man/cuda_ml_sgd.Rd +++ b/man/cuda_ml_sgd.Rd @@ -107,10 +107,10 @@ desired responses.} mean of the response variable. If FALSE, then the model expects data to be centered. Default: TRUE.} -\item{loss}{Loss function, must be one of {"squared_loss", "log", "hinge"}.} +\item{loss}{Loss function, must be one of \{"squared_loss", "log", "hinge"\}.} \item{penalty}{Type of regularization to perform, must be one of - {"none", "l1", "l2", "elasticnet"}. + \{"none", "l1", "l2", "elasticnet"\}. - "none": no regularization. - "l1": perform regularization based on the L1-norm (LASSO) which tries to @@ -143,7 +143,7 @@ Default: 1e-3.} \item{shuffle}{Whether to shuffles the training data after each epoch. Default: True.} -\item{learning_rate}{Must be one of {"constant", "invscaling", "adaptive"}. +\item{learning_rate}{Must be one of \{"constant", "invscaling", "adaptive"\}. - "constant": the learning rate will be kept constant. - "invscaling": (learning rate) = (initial learning rate) / pow(t, power_t) diff --git a/man/cuda_ml_svm.Rd b/man/cuda_ml_svm.Rd index 0fef9d6..2219b48 100644 --- a/man/cuda_ml_svm.Rd +++ b/man/cuda_ml_svm.Rd @@ -154,7 +154,7 @@ tasks. Default: 0.1.} \item{sample_weights}{Optional weight assigned to each input data point.} \item{cuML_log_level}{Log level within cuML library functions. Must be one of -{"off", "critical", "error", "warn", "info", "debug", "trace"}. +\{"off", "critical", "error", "warn", "info", "debug", "trace"\}. Default: off.} \item{formula}{A formula specifying the outcome terms on the left-hand side, diff --git a/man/cuda_ml_tsne.Rd b/man/cuda_ml_tsne.Rd index c6fb603..f3b5f44 100644 --- a/man/cuda_ml_tsne.Rd +++ b/man/cuda_ml_tsne.Rd @@ -36,7 +36,7 @@ and should consist of numeric values only.} \item{n_neighbors}{The number of datapoints to use in the attractive forces. Default: ceiling(3 * perplexity).} -\item{method}{T-SNE method, must be one of {"barnes_hut", "fft", "exact"}. +\item{method}{T-SNE method, must be one of \{"barnes_hut", "fft", "exact"\}. The "exact" method will be more accurate but slower. Both "barnes_hut" and "fft" methods are fast approximations.} @@ -51,7 +51,7 @@ at least 250. Default: 1000L.} (10, 1000). If the learning rate is too high, then t-SNE result could look like a cloud / ball of points.} -\item{learning_rate_method}{Must be one of {"adaptive", "none"}. If +\item{learning_rate_method}{Must be one of \{"adaptive", "none"\}. If "adaptive", then learning rate, early exaggeration, and perplexity are automatically tuned based on input size. Default: "adaptive".} @@ -94,7 +94,7 @@ runs, even with the same \code{seed} being used for each run. Default: NULL.} \item{cuML_log_level}{Log level within cuML library functions. Must be one of -{"off", "critical", "error", "warn", "info", "debug", "trace"}. +\{"off", "critical", "error", "warn", "info", "debug", "trace"\}. Default: off.} } \value{ diff --git a/man/cuda_ml_tsvd.Rd b/man/cuda_ml_tsvd.Rd index 96c4a83..bd6dd02 100644 --- a/man/cuda_ml_tsvd.Rd +++ b/man/cuda_ml_tsvd.Rd @@ -37,7 +37,7 @@ Default: 15.} of the input data. Default: TRUE.} \item{cuML_log_level}{Log level within cuML library functions. Must be one of -{"off", "critical", "error", "warn", "info", "debug", "trace"}. +\{"off", "critical", "error", "warn", "info", "debug", "trace"\}. Default: off.} } \value{ diff --git a/man/cuda_ml_umap.Rd b/man/cuda_ml_umap.Rd index 3b46441..c9cfdd4 100644 --- a/man/cuda_ml_umap.Rd +++ b/man/cuda_ml_umap.Rd @@ -48,7 +48,7 @@ low dimensional embedding. Default: 500.} optimization. Default: 1.0.} \item{init}{Initialization mode of the low dimensional embedding. Must be -one of {"spectral", "random"}. Default: "spectral".} +one of \{"spectral", "random"\}. Default: "spectral".} \item{min_dist}{The effective minimum distance between embedded points. Default: 0.1.} @@ -88,7 +88,7 @@ the target simplcial set. Default: n_neighbors.} \item{target_metric}{The metric for measuring distance between the actual and and the target values (\code{y}) if using supervised dimension reduction. -Must be one of {"categorical", "euclidean"}. Default: "categorical".} +Must be one of \{"categorical", "euclidean"\}. Default: "categorical".} \item{target_weight}{Weighting factor between data topology and target topology. A value of 0.0 weights entirely on data, a value of 1.0 weights @@ -106,7 +106,7 @@ If the PRNG seed is not set, then the trained embeddings will not be deterministic.} \item{cuML_log_level}{Log level within cuML library functions. Must be one of -{"off", "critical", "error", "warn", "info", "debug", "trace"}. +\{"off", "critical", "error", "warn", "info", "debug", "trace"\}. Default: off.} } \value{ diff --git a/man/has_cuML.Rd b/man/has_cuML.Rd index a78075c..8305755 100644 --- a/man/has_cuML.Rd +++ b/man/has_cuML.Rd @@ -2,17 +2,17 @@ % Please edit documentation in R/cuml_utils.R \name{has_cuML} \alias{has_cuML} -\title{Determine whether {cuda.ml} was linked to a valid version of the RAPIDS cuML +\title{Determine whether \{cuda.ml\} was linked to a valid version of the RAPIDS cuML shared library.} \usage{ has_cuML() } \value{ -A logical value indicating whether the current installation {cuda.ml} +A logical value indicating whether the current installation \{cuda.ml\} was linked to a valid version of the RAPIDS cuML shared library. } \description{ -Determine whether {cuda.ml} was linked to a valid version of the RAPIDS cuML +Determine whether \{cuda.ml\} was linked to a valid version of the RAPIDS cuML shared library. } \examples{ diff --git a/man/predict.cuda_ml_rand_forest.Rd b/man/predict.cuda_ml_rand_forest.Rd index 9a05897..e9510fe 100644 --- a/man/predict.cuda_ml_rand_forest.Rd +++ b/man/predict.cuda_ml_rand_forest.Rd @@ -27,7 +27,7 @@ is set to \code{TRUE} or \code{FALSE} but the model being applied does not support class probabilities output.} \item{cuML_log_level}{Log level within cuML library functions. Must be one of -{"off", "critical", "error", "warn", "info", "debug", "trace"}. +\{"off", "critical", "error", "warn", "info", "debug", "trace"\}. Default: off.} \item{...}{Additional arguments to \code{predict()}. Currently unused.} diff --git a/src/CMakeLists.txt.in b/src/CMakeLists.txt.in index 88e08a7..030d323 100644 --- a/src/CMakeLists.txt.in +++ b/src/CMakeLists.txt.in @@ -128,6 +128,7 @@ find_package(Treelite) if(Treelite_FOUND) set(CUML4R_LIBS ${CUML4R_LIBS} treelite::treelite treelite::treelite_runtime) set(CUML4R_INCLUDE_DIRS ${CUML4R_INCLUDE_DIRS} ${Treelite_INCLUDE_DIRS}) + message(STATUS "Treelite found, ignoring stub headers: ${CUML_STUB_HEADERS_DIR}") else() message( WARNING diff --git a/tests/testthat.R b/tests/testthat.R index 269f852..1f11702 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,6 +1,7 @@ library(testthat) +library(cuda.ml) -if (identical(Sys.getenv("NOT_CRAN"), "true")) { +if (identical(Sys.getenv("NOT_CRAN"), "true") && has_cuML()) { filter <- Sys.getenv("TESTTHAT_FILTER", unset = "") if (identical(filter, "")) filter <- NULL diff --git a/tests/testthat/helper-initialize.R b/tests/testthat/helper-initialize.R index cd898cb..d533348 100644 --- a/tests/testthat/helper-initialize.R +++ b/tests/testthat/helper-initialize.R @@ -15,12 +15,8 @@ expect_libcuml <- function() { expect_libcuml() -sklearn <- tryCatch(reticulate::import("sklearn"), - error = function(e) { - reticulate::py_install("sklearn", pip = TRUE) - reticulate::import("sklearn") - } -) +reticulate::py_require("scikit-learn") +sklearn <- reticulate::import("sklearn") sklearn_iris_dataset <- list( data = iris[, names(iris) != "Species"] %>% unname() %>% diff --git a/tests/testthat/test-elastic-net.R b/tests/testthat/test-elastic-net.R index 55b3680..78fef98 100644 --- a/tests/testthat/test-elastic-net.R +++ b/tests/testthat/test-elastic-net.R @@ -33,7 +33,7 @@ test_that("Elastic net regressor works as expected", { sklearn_elastic_net_regressor <- sklearn$linear_model$ElasticNet( alpha = 1e-3, - max_iter = 10000, + max_iter = 10000L, tol = 1e-4, fit_intercept = fit_intercept, l1_ratio = l1_ratio diff --git a/tests/testthat/test-lasso.R b/tests/testthat/test-lasso.R index 85016eb..0efe7eb 100644 --- a/tests/testthat/test-lasso.R +++ b/tests/testthat/test-lasso.R @@ -32,7 +32,7 @@ test_that("LASSO regressor works as expected", { sklearn_lasso_regressor <- sklearn$linear_model$Lasso( alpha = 1e-3, - max_iter = 10000, + max_iter = 10000L, tol = 1e-4, fit_intercept = fit_intercept ) diff --git a/tests/testthat/test-tsvd.R b/tests/testthat/test-tsvd.R index ba7bd1e..337e67c 100644 --- a/tests/testthat/test-tsvd.R +++ b/tests/testthat/test-tsvd.R @@ -7,14 +7,24 @@ sklearn_tsvd_model <- tsvd_model$fit(sklearn_iris_dataset$data) cuda_ml_tsvd_model <- cuda_ml_tsvd(iris[1:4], n_components = 2) +# SVD components are only defined up to sign — align signs before comparing. +# For each component row, flip the cuML sign to match sklearn if the first +# non-negligible element disagrees. +align_svd_signs <- function(a, b) { + for (i in seq_len(nrow(a))) { + if (sign(a[i, 1]) != sign(b[i, 1])) { + a[i, ] <- -a[i, ] + } + } + a +} + test_that("cuda_ml_tsvd() works as expected", { + sklearn_components <- sklearn_tsvd_model$components_ + aligned_components <- align_svd_signs(cuda_ml_tsvd_model$components, sklearn_components) + expect_equal( - cuda_ml_tsvd_model$components, sklearn_tsvd_model$components_, - tolerance = 1e-8, scale = 1 - ) - expect_equal( - cuda_ml_tsvd_model$explained_variance, - as.numeric(sklearn_tsvd_model$explained_variance_), + aligned_components, sklearn_components, tolerance = 1e-8, scale = 1 ) expect_equal( @@ -32,18 +42,25 @@ test_that("cuda_ml_tsvd() works as expected", { as.numeric(sklearn_tsvd_model$singular_values_), tolerance = 1e-8, scale = 1 ) - expect_equal( - cuda_ml_tsvd_model$transformed_data, - sklearn_tsvd_model$transform(sklearn_iris_dataset$data), - tolerance = 1e-8, scale = 1 - ) + + # Transformed data columns also have sign ambiguity matching the components + sklearn_transformed <- sklearn_tsvd_model$transform(sklearn_iris_dataset$data) + cuda_transformed <- cuda_ml_tsvd_model$transformed_data + for (j in seq_len(ncol(cuda_transformed))) { + if (sign(cuda_transformed[1, j]) != sign(sklearn_transformed[1, j])) { + cuda_transformed[, j] <- -cuda_transformed[, j] + } + } + expect_equal(cuda_transformed, sklearn_transformed, tolerance = 1e-8, scale = 1) }) test_that("cuda_ml_inverse_transform() works as expected for TSVD models", { - expect_equal( - cuda_ml_inverse_transform( - cuda_ml_tsvd_model, cuda_ml_tsvd_model$transformed_data - ), - sklearn_tsvd_model$inverse_transform(cuda_ml_tsvd_model$transformed_data) + # inverse_transform recovers the original data regardless of sign convention + cuda_ml_reconstructed <- cuda_ml_inverse_transform( + cuda_ml_tsvd_model, cuda_ml_tsvd_model$transformed_data + ) + sklearn_reconstructed <- sklearn_tsvd_model$inverse_transform( + sklearn_tsvd_model$transform(sklearn_iris_dataset$data) ) + expect_equal(cuda_ml_reconstructed, sklearn_reconstructed, tolerance = 1e-2, scale = 1) }) diff --git a/tools/config/configure.R b/tools/config/configure.R index d9a84f1..e271f6d 100644 --- a/tools/config/configure.R +++ b/tools/config/configure.R @@ -74,8 +74,8 @@ run_cmake <- function() { cuml_prefix <- get_cuml_prefix() bundle_libcuml <- FALSE if (is.na(cuml_prefix)) { - cuml_prefix <- normalizePath(file.path(pkg_root(), "libcuml")) download_libcuml() + cuml_prefix <- normalizePath(file.path(pkg_root(), "libcuml")) dir.create("inst") file.rename(file.path("libcuml", "lib"), file.path("inst", "libs")) file.symlink(file.path("..", "inst", "libs"), file.path("libcuml", "lib")) @@ -92,7 +92,7 @@ run_cmake <- function() { cmake_args <- c( ".", - "-DCMAKE_CUDA_ARCHITECTURES=NATIVE", + paste0("-DCMAKE_CUDA_ARCHITECTURES=", Sys.getenv("CMAKE_CUDA_ARCHITECTURES", unset = "NATIVE")), paste0("-DCUML_INCLUDE_DIR=", file.path(cuml_prefix, "include")), paste0("-DCUML_LIB_DIR=", file.path(cuml_prefix, "lib")), paste0(