Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions c/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ add_library(
src/neighbors/refine.cpp
src/neighbors/tiered_index.cpp
src/neighbors/all_neighbors.cpp
src/preprocessing/pca.cpp
src/preprocessing/quantize/binary.cpp
src/preprocessing/quantize/pq.cpp
src/preprocessing/quantize/scalar.cpp
Expand Down
1 change: 1 addition & 0 deletions c/include/cuvs/core/all.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <cuvs/neighbors/mg_ivf_pq.h>
#endif

#include <cuvs/preprocessing/pca.h>
#include <cuvs/preprocessing/quantize/binary.h>
#include <cuvs/preprocessing/quantize/pq.h>
#include <cuvs/preprocessing/quantize/scalar.h>
219 changes: 219 additions & 0 deletions c/include/cuvs/preprocessing/pca.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <cuvs/core/c_api.h>
#include <dlpack/dlpack.h>
#include <stdbool.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

/**
* @defgroup preprocessing_c_pca C API for PCA (Principal Component Analysis)
Comment thread
lowener marked this conversation as resolved.
* @{
*/

/**
* @brief Solver algorithm for PCA eigen decomposition.
*/
enum cuvsPcaSolver {
/** Covariance + divide-and-conquer eigen decomposition */
CUVS_PCA_COV_EIG_DQ = 0,
/** Covariance + Jacobi eigen decomposition */
CUVS_PCA_COV_EIG_JACOBI = 1
};

/**
* @brief Parameters for PCA decomposition.
*/
struct cuvsPcaParams {
/** Number of principal components to keep. */
int n_components;

/**
* If false, data passed to fit are overwritten and running fit(X).transform(X) will
* not yield the expected results; use fit_transform(X) instead.
*/
bool copy;

/**
* When true the component vectors are multiplied by the square root of n_samples and then
* divided by the singular values to ensure uncorrelated outputs with unit component-wise
* variances.
*/
bool whiten;

/** Solver algorithm to use. */
enum cuvsPcaSolver algorithm;

/** Tolerance for singular values (used by Jacobi solver). */
float tol;

/** Number of iterations for the power method (Jacobi solver). */
int n_iterations;
};

typedef struct cuvsPcaParams* cuvsPcaParams_t;

/**
* @brief Allocate PCA params and populate with default values.
*
* @param[out] params cuvsPcaParams_t to allocate
* @return cuvsError_t
*/
cuvsError_t cuvsPcaParamsCreate(cuvsPcaParams_t* params);

/**
* @brief De-allocate PCA params.
*
* @param[in] params cuvsPcaParams_t to de-allocate
* @return cuvsError_t
*/
cuvsError_t cuvsPcaParamsDestroy(cuvsPcaParams_t params);

/**
* @brief Perform PCA fit operation.
*
* Computes the principal components, explained variances, singular values, and column means
* from the input data.
*
* @code {.c}
* #include <cuvs/core/c_api.h>
* #include <cuvs/preprocessing/pca.h>
*
* // Create cuvsResources_t
* cuvsResources_t res;
* cuvsResourcesCreate(&res);
*
* // Create PCA params
* cuvsPcaParams_t params;
* cuvsPcaParamsCreate(&params);
* params->n_components = 2;
*
* // Assume populated DLManagedTensor objects (col-major, float32, device memory)
* DLManagedTensor input; // [n_rows x n_cols]
* DLManagedTensor components; // [n_components x n_cols]
* DLManagedTensor explained_var; // [n_components]
* DLManagedTensor explained_var_ratio; // [n_components]
* DLManagedTensor singular_vals; // [n_components]
* DLManagedTensor mu; // [n_cols]
* DLManagedTensor noise_vars; // [1] (scalar)
*
* cuvsPcaFit(res, params, &input, &components, &explained_var,
* &explained_var_ratio, &singular_vals, &mu, &noise_vars, false);
*
* // Cleanup
* cuvsPcaParamsDestroy(params);
* cuvsResourcesDestroy(res);
* @endcode
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] params PCA parameters
* @param[inout] input input data [n_rows x n_cols] (col-major, float32, device)
* @param[out] components principal components [n_components x n_cols] (col-major, float32, device)
* @param[out] explained_var explained variances [n_components] (float32, device)
* @param[out] explained_var_ratio explained variance ratios [n_components] (float32, device)
* @param[out] singular_vals singular values [n_components] (float32, device)
* @param[out] mu column means [n_cols] (float32, device)
* @param[out] noise_vars noise variance [1] (float32, device)
* @param[in] flip_signs_based_on_U whether to determine signs by U (true) or V.T (false)
* @return cuvsError_t
*/
cuvsError_t cuvsPcaFit(cuvsResources_t res,
cuvsPcaParams_t params,
DLManagedTensor* input,
DLManagedTensor* components,
DLManagedTensor* explained_var,
DLManagedTensor* explained_var_ratio,
DLManagedTensor* singular_vals,
DLManagedTensor* mu,
DLManagedTensor* noise_vars,
bool flip_signs_based_on_U);

/**
* @brief Perform PCA fit and transform in a single operation.
*
* Computes the principal components and transforms the input data into the eigenspace.
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] params PCA parameters
* @param[inout] input input data [n_rows x n_cols] (col-major, float32, device)
* @param[out] trans_input transformed data [n_rows x n_components] (col-major, float32, device)
* @param[out] components principal components [n_components x n_cols] (col-major, float32, device)
* @param[out] explained_var explained variances [n_components] (float32, device)
* @param[out] explained_var_ratio explained variance ratios [n_components] (float32, device)
* @param[out] singular_vals singular values [n_components] (float32, device)
* @param[out] mu column means [n_cols] (float32, device)
* @param[out] noise_vars noise variance [1] (float32, device)
* @param[in] flip_signs_based_on_U whether to determine signs by U (true) or V.T (false)
* @return cuvsError_t
*/
cuvsError_t cuvsPcaFitTransform(cuvsResources_t res,
cuvsPcaParams_t params,
DLManagedTensor* input,
DLManagedTensor* trans_input,
DLManagedTensor* components,
DLManagedTensor* explained_var,
DLManagedTensor* explained_var_ratio,
DLManagedTensor* singular_vals,
DLManagedTensor* mu,
DLManagedTensor* noise_vars,
bool flip_signs_based_on_U);

/**
* @brief Perform PCA transform operation.
*
* Transforms the input data into the eigenspace using previously computed principal components.
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] params PCA parameters
* @param[inout] input data to transform [n_rows x n_cols] (col-major, float32, device)
* @param[in] components principal components [n_components x n_cols] (col-major, float32, device)
* @param[in] singular_vals singular values [n_components] (float32, device)
* @param[in] mu column means [n_cols] (float32, device)
* @param[out] trans_input transformed data [n_rows x n_components] (col-major, float32, device)
* @return cuvsError_t
*/
cuvsError_t cuvsPcaTransform(cuvsResources_t res,
cuvsPcaParams_t params,
DLManagedTensor* input,
DLManagedTensor* components,
DLManagedTensor* singular_vals,
DLManagedTensor* mu,
DLManagedTensor* trans_input);

/**
* @brief Perform PCA inverse transform operation.
*
* Transforms data from the eigenspace back to the original space.
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] params PCA parameters
* @param[in] trans_input transformed data [n_rows x n_components] (col-major, float32, device)
* @param[in] components principal components [n_components x n_cols] (col-major, float32, device)
* @param[in] singular_vals singular values [n_components] (float32, device)
* @param[in] mu column means [n_cols] (float32, device)
* @param[out] output reconstructed data [n_rows x n_cols] (col-major, float32, device)
* @return cuvsError_t
*/
cuvsError_t cuvsPcaInverseTransform(cuvsResources_t res,
cuvsPcaParams_t params,
DLManagedTensor* trans_input,
DLManagedTensor* components,
DLManagedTensor* singular_vals,
DLManagedTensor* mu,
DLManagedTensor* output);

/**
* @}
*/

#ifdef __cplusplus
}
#endif
Loading
Loading