-
Notifications
You must be signed in to change notification settings - Fork 182
PCA C and Python API #1987
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
PCA C and Python API #1987
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
3fdc31b
pca-c-api
aamijar eb42865
Merge branch 'main' into pca-c-api
aamijar 27f73ac
enum prefix with CUVS
aamijar f8eda8a
Merge branch 'main' into pca-c-api
aamijar 9e0d06a
Merge branch 'main' into pca-c-api
aamijar b01371a
address comments
aamijar c0c3a78
Merge branch 'main' into pca-c-api
aamijar 0184ff2
add Python PCA API
aamijar e828244
fix python api
aamijar 96acd11
Merge branch 'main' into pca-c-api
aamijar File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,219 @@ | ||
| /* | ||
| * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <cuvs/core/c_api.h> | ||
| #include <dlpack/dlpack.h> | ||
| #include <stdbool.h> | ||
| #include <stdint.h> | ||
|
|
||
| #ifdef __cplusplus | ||
| extern "C" { | ||
| #endif | ||
|
|
||
| /** | ||
| * @defgroup preprocessing_c_pca C API for PCA (Principal Component Analysis) | ||
| * @{ | ||
| */ | ||
|
|
||
| /** | ||
| * @brief Solver algorithm for PCA eigen decomposition. | ||
| */ | ||
| enum cuvsPcaSolver { | ||
| /** Covariance + divide-and-conquer eigen decomposition */ | ||
| CUVS_PCA_COV_EIG_DQ = 0, | ||
| /** Covariance + Jacobi eigen decomposition */ | ||
| CUVS_PCA_COV_EIG_JACOBI = 1 | ||
| }; | ||
|
|
||
| /** | ||
| * @brief Parameters for PCA decomposition. | ||
| */ | ||
| struct cuvsPcaParams { | ||
| /** Number of principal components to keep. */ | ||
| int n_components; | ||
|
|
||
| /** | ||
| * If false, data passed to fit are overwritten and running fit(X).transform(X) will | ||
| * not yield the expected results; use fit_transform(X) instead. | ||
| */ | ||
| bool copy; | ||
|
|
||
| /** | ||
| * When true the component vectors are multiplied by the square root of n_samples and then | ||
| * divided by the singular values to ensure uncorrelated outputs with unit component-wise | ||
| * variances. | ||
| */ | ||
| bool whiten; | ||
|
|
||
| /** Solver algorithm to use. */ | ||
| enum cuvsPcaSolver algorithm; | ||
|
|
||
| /** Tolerance for singular values (used by Jacobi solver). */ | ||
| float tol; | ||
|
|
||
| /** Number of iterations for the power method (Jacobi solver). */ | ||
| int n_iterations; | ||
| }; | ||
|
|
||
| typedef struct cuvsPcaParams* cuvsPcaParams_t; | ||
|
|
||
| /** | ||
| * @brief Allocate PCA params and populate with default values. | ||
| * | ||
| * @param[out] params cuvsPcaParams_t to allocate | ||
| * @return cuvsError_t | ||
| */ | ||
| cuvsError_t cuvsPcaParamsCreate(cuvsPcaParams_t* params); | ||
|
|
||
| /** | ||
| * @brief De-allocate PCA params. | ||
| * | ||
| * @param[in] params cuvsPcaParams_t to de-allocate | ||
| * @return cuvsError_t | ||
| */ | ||
| cuvsError_t cuvsPcaParamsDestroy(cuvsPcaParams_t params); | ||
|
|
||
| /** | ||
| * @brief Perform PCA fit operation. | ||
| * | ||
| * Computes the principal components, explained variances, singular values, and column means | ||
| * from the input data. | ||
| * | ||
| * @code {.c} | ||
| * #include <cuvs/core/c_api.h> | ||
| * #include <cuvs/preprocessing/pca.h> | ||
| * | ||
| * // Create cuvsResources_t | ||
| * cuvsResources_t res; | ||
| * cuvsResourcesCreate(&res); | ||
| * | ||
| * // Create PCA params | ||
| * cuvsPcaParams_t params; | ||
| * cuvsPcaParamsCreate(¶ms); | ||
| * params->n_components = 2; | ||
| * | ||
| * // Assume populated DLManagedTensor objects (col-major, float32, device memory) | ||
| * DLManagedTensor input; // [n_rows x n_cols] | ||
| * DLManagedTensor components; // [n_components x n_cols] | ||
| * DLManagedTensor explained_var; // [n_components] | ||
| * DLManagedTensor explained_var_ratio; // [n_components] | ||
| * DLManagedTensor singular_vals; // [n_components] | ||
| * DLManagedTensor mu; // [n_cols] | ||
| * DLManagedTensor noise_vars; // [1] (scalar) | ||
| * | ||
| * cuvsPcaFit(res, params, &input, &components, &explained_var, | ||
| * &explained_var_ratio, &singular_vals, &mu, &noise_vars, false); | ||
| * | ||
| * // Cleanup | ||
| * cuvsPcaParamsDestroy(params); | ||
| * cuvsResourcesDestroy(res); | ||
| * @endcode | ||
| * | ||
| * @param[in] res cuvsResources_t opaque C handle | ||
| * @param[in] params PCA parameters | ||
| * @param[inout] input input data [n_rows x n_cols] (col-major, float32, device) | ||
| * @param[out] components principal components [n_components x n_cols] (col-major, float32, device) | ||
| * @param[out] explained_var explained variances [n_components] (float32, device) | ||
| * @param[out] explained_var_ratio explained variance ratios [n_components] (float32, device) | ||
| * @param[out] singular_vals singular values [n_components] (float32, device) | ||
| * @param[out] mu column means [n_cols] (float32, device) | ||
| * @param[out] noise_vars noise variance [1] (float32, device) | ||
| * @param[in] flip_signs_based_on_U whether to determine signs by U (true) or V.T (false) | ||
| * @return cuvsError_t | ||
| */ | ||
| cuvsError_t cuvsPcaFit(cuvsResources_t res, | ||
| cuvsPcaParams_t params, | ||
| DLManagedTensor* input, | ||
| DLManagedTensor* components, | ||
| DLManagedTensor* explained_var, | ||
| DLManagedTensor* explained_var_ratio, | ||
| DLManagedTensor* singular_vals, | ||
| DLManagedTensor* mu, | ||
| DLManagedTensor* noise_vars, | ||
| bool flip_signs_based_on_U); | ||
|
|
||
| /** | ||
| * @brief Perform PCA fit and transform in a single operation. | ||
| * | ||
| * Computes the principal components and transforms the input data into the eigenspace. | ||
| * | ||
| * @param[in] res cuvsResources_t opaque C handle | ||
| * @param[in] params PCA parameters | ||
| * @param[inout] input input data [n_rows x n_cols] (col-major, float32, device) | ||
| * @param[out] trans_input transformed data [n_rows x n_components] (col-major, float32, device) | ||
| * @param[out] components principal components [n_components x n_cols] (col-major, float32, device) | ||
| * @param[out] explained_var explained variances [n_components] (float32, device) | ||
| * @param[out] explained_var_ratio explained variance ratios [n_components] (float32, device) | ||
| * @param[out] singular_vals singular values [n_components] (float32, device) | ||
| * @param[out] mu column means [n_cols] (float32, device) | ||
| * @param[out] noise_vars noise variance [1] (float32, device) | ||
| * @param[in] flip_signs_based_on_U whether to determine signs by U (true) or V.T (false) | ||
| * @return cuvsError_t | ||
| */ | ||
| cuvsError_t cuvsPcaFitTransform(cuvsResources_t res, | ||
| cuvsPcaParams_t params, | ||
| DLManagedTensor* input, | ||
| DLManagedTensor* trans_input, | ||
| DLManagedTensor* components, | ||
| DLManagedTensor* explained_var, | ||
| DLManagedTensor* explained_var_ratio, | ||
| DLManagedTensor* singular_vals, | ||
| DLManagedTensor* mu, | ||
| DLManagedTensor* noise_vars, | ||
| bool flip_signs_based_on_U); | ||
|
|
||
| /** | ||
| * @brief Perform PCA transform operation. | ||
| * | ||
| * Transforms the input data into the eigenspace using previously computed principal components. | ||
| * | ||
| * @param[in] res cuvsResources_t opaque C handle | ||
| * @param[in] params PCA parameters | ||
| * @param[inout] input data to transform [n_rows x n_cols] (col-major, float32, device) | ||
| * @param[in] components principal components [n_components x n_cols] (col-major, float32, device) | ||
| * @param[in] singular_vals singular values [n_components] (float32, device) | ||
| * @param[in] mu column means [n_cols] (float32, device) | ||
| * @param[out] trans_input transformed data [n_rows x n_components] (col-major, float32, device) | ||
| * @return cuvsError_t | ||
| */ | ||
| cuvsError_t cuvsPcaTransform(cuvsResources_t res, | ||
| cuvsPcaParams_t params, | ||
| DLManagedTensor* input, | ||
| DLManagedTensor* components, | ||
| DLManagedTensor* singular_vals, | ||
| DLManagedTensor* mu, | ||
| DLManagedTensor* trans_input); | ||
|
|
||
| /** | ||
| * @brief Perform PCA inverse transform operation. | ||
| * | ||
| * Transforms data from the eigenspace back to the original space. | ||
| * | ||
| * @param[in] res cuvsResources_t opaque C handle | ||
| * @param[in] params PCA parameters | ||
| * @param[in] trans_input transformed data [n_rows x n_components] (col-major, float32, device) | ||
| * @param[in] components principal components [n_components x n_cols] (col-major, float32, device) | ||
| * @param[in] singular_vals singular values [n_components] (float32, device) | ||
| * @param[in] mu column means [n_cols] (float32, device) | ||
| * @param[out] output reconstructed data [n_rows x n_cols] (col-major, float32, device) | ||
| * @return cuvsError_t | ||
| */ | ||
| cuvsError_t cuvsPcaInverseTransform(cuvsResources_t res, | ||
| cuvsPcaParams_t params, | ||
| DLManagedTensor* trans_input, | ||
| DLManagedTensor* components, | ||
| DLManagedTensor* singular_vals, | ||
| DLManagedTensor* mu, | ||
| DLManagedTensor* output); | ||
|
|
||
| /** | ||
| * @} | ||
| */ | ||
|
|
||
| #ifdef __cplusplus | ||
| } | ||
| #endif | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.