Skip to content

Commit 3f95d4a

Browse files
authored
All-neighbors API docs (rapidsai#944)
docs for all-neighbors API Authors: - Jinsol Park (https://github.com/jinsolp) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: rapidsai#944
1 parent b0f45bb commit 3f95d4a

5 files changed

Lines changed: 113 additions & 58 deletions

File tree

cpp/include/cuvs/neighbors/all_neighbors.hpp

Lines changed: 53 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
#include <variant>
2323

2424
namespace cuvs::neighbors::all_neighbors {
25+
/**
26+
* @defgroup all_neighbors_cpp_params The all-neighbors algorithm parameters.
27+
* @{
28+
*/
2529

2630
/**
2731
* @brief Parameters used to build an all-neighbors knn graph (find nearest neighbors for all the
@@ -43,20 +47,21 @@ using GraphBuildParams =
4347
std::variant<graph_build_params::ivf_pq_params, graph_build_params::nn_descent_params>;
4448

4549
/**
46-
* @brief Parameters used to build an all-neighbors graph (find nearest neighbors for all the
47-
* training vectors)
48-
*
49-
* graph_build_params: graph building parameters for the given graph building algorithm. defaults
50-
* to ivfpq.
51-
* n_nearest_clusters: number of nearest clusters each data point will be assigned to in
52-
* the batching algorithm
53-
* n_clusters: number of total clusters (aka batches) to split the data into. If set to 1, algorithm
54-
* creates an all-neighbors graph without batching
55-
* metric: metric type
50+
* @brief Parameters used to build an all-neighbors graph (find nearest neighbors for all the
51+
* training vectors).
52+
* For scalability, the all-neighbors graph construction algorithm partitions a set of training
53+
* vectors into overlapping clusters, computes a local knn graph on each cluster, and merges the
54+
* local graphs into a single global graph.
55+
* Device memory usage and accuracy can be configured by changing the `overlap_factor` and
56+
* `n_clusters`.
57+
* The algorithm used to build each local graph is also configurable.
5658
*
5759
*/
5860
struct all_neighbors_params {
5961
/** Parameters for knn graph building algorithm
62+
* Approximate nearest neighbors methods are used to build the knn graph. Currently supported
63+
* options are 'IVF-PQ' and 'NN Descent'. IVF-PQ is more accurate, but slower compared to NN
64+
* Descent.
6065
*
6166
* Set ivf_pq_params, or nn_descent_params to select the graph build
6267
* algorithm and control their parameters.
@@ -74,35 +79,55 @@ struct all_neighbors_params {
7479
GraphBuildParams graph_build_params;
7580

7681
/**
77-
* Usage of n_nearest_clusters and n_clusters
82+
* Number of nearest clusters each data point will be assigned to in the batching algorithm.
83+
* Start with `overlap_factor = 2` and gradually increase (2->3->4 ...) for better accuracy at the
84+
* cost of device memory usage.
85+
*/
86+
size_t overlap_factor = 2;
87+
88+
/**
89+
* Number of total clusters (aka batches) to split the data into. If set to 1, algorithm creates
90+
* an all-neighbors graph without batching.
91+
* Start with `n_clusters = 4` and increase (4 → 8 → 16...) for less device memory usage at the
92+
* cost of accuracy. This is independent from `overlap_factor` as long as `overlap_factor` <
93+
* `n_clusters`.
7894
*
79-
* The ratio of n_nearest_clusters / n_clusters determines device memory usage.
80-
* Approximately (n_nearest_clusters / n_clusters) * num_rows_in_entire_data number of rows will
95+
* The ratio of `overlap_factor / n_clusters` determines device memory usage.
96+
* Approximately `(overlap_factor / n_clusters) * num_rows_in_entire_data` number of rows will
8197
* be put on device memory at once.
82-
* E.g. between (n_nearest_clusters / n_clusters) = 2/10 and 2/20, the latter will use less device
98+
* E.g. between `(overlap_factor / n_clusters)` = 2/10 and 2/20, the latter will use less device
8399
* memory.
84100
*
85-
* Larger n_nearest_clusters results in better accuracy of the final all-neighbors knn
86-
* graph. E.g. With the similar device memory usages, (n_nearest_clusters / n_clusters) = 4/20
101+
* Larger `overlap_factor` results in better accuracy of the final all-neighbors knn
102+
* graph. E.g. While using similar device memory, `(overlap_factor / n_clusters)` = 4/20
87103
* will have better accuracy than 2/10 at the cost of performance.
104+
*
88105
*/
89-
size_t n_nearest_clusters = 2;
90-
size_t n_clusters = 1; // defaults to not batching
106+
size_t n_clusters = 1; // defaults to not batching
107+
108+
/** Metric used. */
91109
cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded;
92110
};
93111

112+
/** @} */
113+
114+
/**
115+
* @defgroup all_neighbors_cpp_build The all-neighbors knn graph build
116+
* @{
117+
*/
118+
94119
/**
95120
* @brief Builds an approximate all-neighbors knn graph (find nearest neighbors for all the
96121
* training vectors)
97122
*
98123
* Usage example:
99124
* @code{.cpp}
100-
* using namespace cuvs::neighbors;
101-
* // use default index parameters
102-
* all_neighbors::all_neighbors_params params;
125+
* using namespace cuvs::neighbors;
126+
* // use default index parameters
127+
* all_neighbors::all_neighbors_params params;
103128
* auto indices = raft::make_device_matrix<int64_t, int64_t>(handle, n_row, k);
104129
* auto distances = raft::make_device_matrix<float, int64_t>(handle, n_row, k);
105-
* all_neighbors::build(res, params, dataset, indices.view(), distances.view());
130+
* all_neighbors::build(res, params, dataset, indices.view(), distances.view());
106131
* @endcode
107132
*
108133
* @param[in] handle raft::resources is an object mangaging resources
@@ -127,12 +152,12 @@ void build(
127152
*
128153
* Usage example:
129154
* @code{.cpp}
130-
* using namespace cuvs::neighbors;
131-
* // use default index parameters
132-
* all_neighbors::all_neighbors_params params;
155+
* using namespace cuvs::neighbors;
156+
* // use default index parameters
157+
* all_neighbors::all_neighbors_params params;
133158
* auto indices = raft::make_device_matrix<int64_t, int64_t>(handle, n_row, k);
134159
* auto distances = raft::make_device_matrix<float, int64_t>(handle, n_row, k);
135-
* all_neighbors::build(res, params, dataset, indices.view(), distances.view());
160+
* all_neighbors::build(res, params, dataset, indices.view(), distances.view());
136161
* @endcode
137162
*
138163
* @param[in] handle raft::resources is an object mangaging resources
@@ -149,4 +174,6 @@ void build(
149174
raft::device_matrix_view<const float, int64_t, row_major> dataset,
150175
raft::device_matrix_view<int64_t, int64_t, row_major> indices,
151176
std::optional<raft::device_matrix_view<float, int64_t, row_major>> distances = std::nullopt);
177+
178+
/** @} */
152179
} // namespace cuvs::neighbors::all_neighbors

cpp/src/neighbors/all_neighbors/all_neighbors_batched.cuh

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ void get_centroids_on_data_subsample(raft::resources const& res,
7171
template <typename T, typename IdxT>
7272
void single_gpu_assign_clusters(
7373
raft::resources const& res,
74-
size_t n_nearest_clusters,
74+
size_t overlap_factor,
7575
size_t n_clusters,
7676
size_t n_rows_per_batch,
7777
size_t base_row_offset,
@@ -89,10 +89,10 @@ void single_gpu_assign_clusters(
8989
auto dataset_batch_d =
9090
raft::make_device_matrix<T, IdxT, raft::row_major>(res, n_rows_per_batch, num_cols);
9191

92-
auto nearest_clusters_idx_d = raft::make_device_matrix<IdxT, int64_t, raft::row_major>(
93-
res, n_rows_per_batch, n_nearest_clusters);
94-
auto nearest_clusters_dist_d = raft::make_device_matrix<T, int64_t, raft::row_major>(
95-
res, n_rows_per_batch, n_nearest_clusters);
92+
auto nearest_clusters_idx_d =
93+
raft::make_device_matrix<IdxT, int64_t, raft::row_major>(res, n_rows_per_batch, overlap_factor);
94+
auto nearest_clusters_dist_d =
95+
raft::make_device_matrix<T, int64_t, raft::row_major>(res, n_rows_per_batch, overlap_factor);
9696

9797
std::optional<raft::device_vector_view<const T, int64_t>> norms_view;
9898
cuvs::neighbors::brute_force::index<T> brute_force_index(res, centroids, norms_view, metric);
@@ -111,21 +111,21 @@ void single_gpu_assign_clusters(
111111
raft::make_const_mdspan(dataset_batch_d.view()),
112112
nearest_clusters_idx_d.view(),
113113
nearest_clusters_dist_d.view());
114-
raft::copy(global_nearest_cluster.data_handle() + row_offset * n_nearest_clusters,
114+
raft::copy(global_nearest_cluster.data_handle() + row_offset * overlap_factor,
115115
nearest_clusters_idx_d.data_handle(),
116-
n_rows_of_current_batch * n_nearest_clusters,
116+
n_rows_of_current_batch * overlap_factor,
117117
resource::get_cuda_stream(res));
118118
}
119119
}
120120

121121
/**
122-
* Assign each data point to top n_nearest_clusters number of clusters. Loads the data in batches
122+
* Assign each data point to top overlap_factor number of clusters. Loads the data in batches
123123
* onto device for efficiency. Arguments:
124124
* - [in] res: raft resource
125125
* - [in] params: params for graph building
126126
* - [in] dataset [num_rows x num_cols]: entire dataset located on host memory
127127
* - [in] centroids [n_clusters x num_cols] : centroid vectors
128-
* - [out] global_nearest_cluster [num_rows X n_nearest_clusters] : top n_nearest_clusters closest
128+
* - [out] global_nearest_cluster [num_rows X overlap_factor] : top overlap_factor closest
129129
* clusters for each data point
130130
*/
131131
template <typename T, typename IdxT>
@@ -169,7 +169,7 @@ void assign_clusters(raft::resources const& res,
169169
size_t base_row_offset_for_this_rank = n_rows_per_cluster * base_cluster_idx;
170170

171171
single_gpu_assign_clusters(dev_res,
172-
params.n_nearest_clusters,
172+
params.overlap_factor,
173173
n_clusters_for_this_rank,
174174
n_rows_per_cluster,
175175
base_row_offset_for_this_rank,
@@ -180,7 +180,7 @@ void assign_clusters(raft::resources const& res,
180180
}
181181
} else {
182182
single_gpu_assign_clusters(res,
183-
params.n_nearest_clusters,
183+
params.overlap_factor,
184184
params.n_clusters,
185185
n_rows_per_cluster,
186186
0,
@@ -195,9 +195,9 @@ void assign_clusters(raft::resources const& res,
195195
* Getting data indices that belong to cluster
196196
* Arguments:
197197
* - [in] res: raft resource
198-
* - [in] global_nearest_cluster [num_rows X n_nearest_clusters] : top n_nearest_clusters closest
198+
* - [in] global_nearest_cluster [num_rows X overlap_factor] : top overlap_factor closest
199199
* clusters for each data point
200-
* - [out] inverted_indices [num_rows x n_nearest_clusters sized vector] : vector for data indices
200+
* - [out] inverted_indices [num_rows x overlap_factor sized vector] : vector for data indices
201201
* for each cluster
202202
* - [out] cluster_sizes [n_cluster] : cluster size for each cluster
203203
* - [out] cluster_offsets [n_cluster] : offset in inverted_indices for each cluster
@@ -210,17 +210,17 @@ void get_inverted_indices(raft::resources const& res,
210210
raft::host_vector_view<IdxT, IdxT> cluster_offsets)
211211
{
212212
// build sparse inverted indices and get number of data points for each cluster
213-
size_t num_rows = global_nearest_cluster.extent(0);
214-
size_t n_nearest_clusters = global_nearest_cluster.extent(1);
215-
size_t n_clusters = cluster_sizes.extent(0);
213+
size_t num_rows = global_nearest_cluster.extent(0);
214+
size_t overlap_factor = global_nearest_cluster.extent(1);
215+
size_t n_clusters = cluster_sizes.extent(0);
216216

217217
auto local_offsets = raft::make_host_vector<IdxT>(n_clusters);
218218

219219
std::fill(cluster_sizes.data_handle(), cluster_sizes.data_handle() + n_clusters, 0);
220220
std::fill(local_offsets.data_handle(), local_offsets.data_handle() + n_clusters, 0);
221221

222222
for (size_t i = 0; i < num_rows; i++) {
223-
for (size_t j = 0; j < n_nearest_clusters; j++) {
223+
for (size_t j = 0; j < overlap_factor; j++) {
224224
IdxT cluster_id = global_nearest_cluster(i, j);
225225
cluster_sizes(cluster_id) += 1;
226226
}
@@ -231,7 +231,7 @@ void get_inverted_indices(raft::resources const& res,
231231
cluster_offsets(i) = cluster_offsets(i - 1) + cluster_sizes(i - 1);
232232
}
233233
for (size_t i = 0; i < num_rows; i++) {
234-
for (size_t j = 0; j < n_nearest_clusters; j++) {
234+
for (size_t j = 0; j < overlap_factor; j++) {
235235
IdxT cluster_id = global_nearest_cluster(i, j);
236236
inverted_indices(cluster_offsets(cluster_id) + local_offsets(cluster_id)) = i;
237237
local_offsets(cluster_id) += 1;
@@ -389,20 +389,19 @@ void batch_build(
389389
size_t num_cols = static_cast<size_t>(dataset.extent(1));
390390
size_t k = indices.extent(1);
391391

392-
RAFT_EXPECTS(params.n_clusters > params.n_nearest_clusters,
393-
"n_nearest_clusters should be smaller than n_clusters. We recommend starting from "
394-
"n_nearest_clusters=2 and gradually increase it for better knn graph recall.");
392+
RAFT_EXPECTS(params.n_clusters > params.overlap_factor,
393+
"overlap_factor should be smaller than n_clusters. We recommend starting from "
394+
"overlap_factor=2 and gradually increase it for better knn graph recall.");
395395

396396
auto centroids = raft::make_device_matrix<T, IdxT>(handle, params.n_clusters, num_cols);
397397
get_centroids_on_data_subsample<T, IdxT>(handle, params.metric, dataset, centroids.view());
398398

399-
auto global_nearest_cluster =
400-
raft::make_host_matrix<IdxT, IdxT>(num_rows, params.n_nearest_clusters);
399+
auto global_nearest_cluster = raft::make_host_matrix<IdxT, IdxT>(num_rows, params.overlap_factor);
401400
assign_clusters<T, IdxT>(
402401
handle, params, dataset, centroids.view(), global_nearest_cluster.view());
403402

404403
auto inverted_indices =
405-
raft::make_host_vector<IdxT, IdxT, raft::row_major>(num_rows * params.n_nearest_clusters);
404+
raft::make_host_vector<IdxT, IdxT, raft::row_major>(num_rows * params.overlap_factor);
406405
auto cluster_sizes = raft::make_host_vector<IdxT, IdxT, raft::row_major>(params.n_clusters);
407406
auto cluster_offsets = raft::make_host_vector<IdxT, IdxT, raft::row_major>(params.n_clusters);
408407
get_inverted_indices(handle,

cpp/tests/neighbors/all_neighbors.cuh

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, const AllNeighborsInputs&
5858
os << "dataset shape=" << p.n_rows << "x" << p.dim << ", k=" << p.k
5959
<< ", metric=" << static_cast<int>(std::get<1>(p.build_algo_metric_recall))
6060
<< ", clusters=" << std::get<0>(p.cluster_nearestcluster)
61-
<< ", num nearest clusters=" << std::get<1>(p.cluster_nearestcluster) << std::endl;
61+
<< ", overlap_factor=" << std::get<1>(p.cluster_nearestcluster) << std::endl;
6262
return os;
6363
}
6464

@@ -73,9 +73,9 @@ void get_graphs(raft::resources& handle,
7373
size_t queries_size)
7474
{
7575
all_neighbors_params params;
76-
params.n_clusters = std::get<0>(ps.cluster_nearestcluster);
77-
params.n_nearest_clusters = std::get<1>(ps.cluster_nearestcluster);
78-
params.metric = std::get<1>(ps.build_algo_metric_recall);
76+
params.n_clusters = std::get<0>(ps.cluster_nearestcluster);
77+
params.overlap_factor = std::get<1>(ps.cluster_nearestcluster);
78+
params.metric = std::get<1>(ps.build_algo_metric_recall);
7979

8080
auto build_algo = std::get<0>(ps.build_algo_metric_recall);
8181

@@ -91,8 +91,7 @@ void get_graphs(raft::resources& handle,
9191
ivfq_build_params.build_params.metric = params.metric;
9292
// heuristically good ivfpq n_lists
9393
ivfq_build_params.build_params.n_lists = std::max(
94-
5u,
95-
static_cast<uint32_t>(ps.n_rows * params.n_nearest_clusters / (5000 * params.n_clusters)));
94+
5u, static_cast<uint32_t>(ps.n_rows * params.overlap_factor / (5000 * params.n_clusters)));
9695
params.graph_build_params = ivfq_build_params;
9796
}
9897

@@ -209,7 +208,7 @@ const std::vector<AllNeighborsInputs> inputsSingle =
209208
std::make_tuple(NN_DESCENT, cuvs::distance::DistanceType::L2SqrtExpanded, 0.9),
210209
std::make_tuple(NN_DESCENT, cuvs::distance::DistanceType::CosineExpanded, 0.9),
211210
std::make_tuple(NN_DESCENT, cuvs::distance::DistanceType::InnerProduct, 0.8)},
212-
{std::make_tuple(1lu, 2lu)}, // min_recall, n_clusters, num_nearest_cluster
211+
{std::make_tuple(1lu, 2lu)}, // min_recall, n_clusters, overlap_factor
213212
{5000, 7151}, // n_rows
214213
{64, 137}, // dim
215214
{16, 23}, // graph_degree
@@ -227,7 +226,7 @@ const std::vector<AllNeighborsInputs> inputsBatch =
227226
std::make_tuple(4lu, 2lu),
228227
std::make_tuple(7lu, 2lu),
229228
std::make_tuple(10lu, 2lu),
230-
}, // min_recall, n_clusters, num_nearest_cluster
229+
}, // min_recall, n_clusters, overlap_factor
231230
{5000, 7151}, // n_rows
232231
{64, 137}, // dim
233232
{16, 23}, // graph_degree

docs/source/cpp_api/neighbors.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Nearest Neighbors
99
:maxdepth: 2
1010
:caption: Contents:
1111

12+
neighbors_all_neighbors.rst
1213
neighbors_bruteforce.rst
1314
neighbors_cagra.rst
1415
neighbors_dynamic_batching.rst
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
All-neighbors
2+
==========
3+
4+
All-neighbors allows building an approximate all-neighbors knn graph. Given a full dataset, it finds nearest neighbors for all the training vectors in the dataset.
5+
6+
.. role:: py(code)
7+
:language: c++
8+
:class: highlight
9+
10+
``#include <cuvs/neighbors/all_neighbors.hpp>``
11+
12+
namespace *cuvs::neighbors::all_neighbors*
13+
14+
All neighbors knn graph build parameters
15+
----------------------
16+
17+
.. doxygengroup:: all_neighbors_cpp_params
18+
:project: cuvs
19+
:members:
20+
:content-only:
21+
22+
23+
Build
24+
-----------
25+
26+
.. doxygengroup:: all_neighbors_cpp_build
27+
:project: cuvs
28+
:members:
29+
:content-only:

0 commit comments

Comments
 (0)