@@ -71,7 +71,7 @@ void get_centroids_on_data_subsample(raft::resources const& res,
7171template <typename T, typename IdxT>
7272void single_gpu_assign_clusters (
7373 raft::resources const & res,
74- size_t n_nearest_clusters ,
74+ size_t overlap_factor ,
7575 size_t n_clusters,
7676 size_t n_rows_per_batch,
7777 size_t base_row_offset,
@@ -89,10 +89,10 @@ void single_gpu_assign_clusters(
8989 auto dataset_batch_d =
9090 raft::make_device_matrix<T, IdxT, raft::row_major>(res, n_rows_per_batch, num_cols);
9191
92- auto nearest_clusters_idx_d = raft::make_device_matrix<IdxT, int64_t , raft::row_major>(
93- res, n_rows_per_batch, n_nearest_clusters );
94- auto nearest_clusters_dist_d = raft::make_device_matrix<T, int64_t , raft::row_major>(
95- res, n_rows_per_batch, n_nearest_clusters );
92+ auto nearest_clusters_idx_d =
93+ raft::make_device_matrix<IdxT, int64_t , raft::row_major>( res, n_rows_per_batch, overlap_factor );
94+ auto nearest_clusters_dist_d =
95+ raft::make_device_matrix<T, int64_t , raft::row_major>( res, n_rows_per_batch, overlap_factor );
9696
9797 std::optional<raft::device_vector_view<const T, int64_t >> norms_view;
9898 cuvs::neighbors::brute_force::index<T> brute_force_index (res, centroids, norms_view, metric);
@@ -111,21 +111,21 @@ void single_gpu_assign_clusters(
111111 raft::make_const_mdspan (dataset_batch_d.view ()),
112112 nearest_clusters_idx_d.view (),
113113 nearest_clusters_dist_d.view ());
114- raft::copy (global_nearest_cluster.data_handle () + row_offset * n_nearest_clusters ,
114+ raft::copy (global_nearest_cluster.data_handle () + row_offset * overlap_factor ,
115115 nearest_clusters_idx_d.data_handle (),
116- n_rows_of_current_batch * n_nearest_clusters ,
116+ n_rows_of_current_batch * overlap_factor ,
117117 resource::get_cuda_stream (res));
118118 }
119119}
120120
121121/* *
122- * Assign each data point to top n_nearest_clusters number of clusters. Loads the data in batches
122+ * Assign each data point to top overlap_factor number of clusters. Loads the data in batches
123123 * onto device for efficiency. Arguments:
124124 * - [in] res: raft resource
125125 * - [in] params: params for graph building
126126 * - [in] dataset [num_rows x num_cols]: entire dataset located on host memory
127127 * - [in] centroids [n_clusters x num_cols] : centroid vectors
128- * - [out] global_nearest_cluster [num_rows X n_nearest_clusters ] : top n_nearest_clusters closest
128+ * - [out] global_nearest_cluster [num_rows X overlap_factor ] : top overlap_factor closest
129129 * clusters for each data point
130130 */
131131template <typename T, typename IdxT>
@@ -169,7 +169,7 @@ void assign_clusters(raft::resources const& res,
169169 size_t base_row_offset_for_this_rank = n_rows_per_cluster * base_cluster_idx;
170170
171171 single_gpu_assign_clusters (dev_res,
172- params.n_nearest_clusters ,
172+ params.overlap_factor ,
173173 n_clusters_for_this_rank,
174174 n_rows_per_cluster,
175175 base_row_offset_for_this_rank,
@@ -180,7 +180,7 @@ void assign_clusters(raft::resources const& res,
180180 }
181181 } else {
182182 single_gpu_assign_clusters (res,
183- params.n_nearest_clusters ,
183+ params.overlap_factor ,
184184 params.n_clusters ,
185185 n_rows_per_cluster,
186186 0 ,
@@ -195,9 +195,9 @@ void assign_clusters(raft::resources const& res,
195195 * Getting data indices that belong to cluster
196196 * Arguments:
197197 * - [in] res: raft resource
198- * - [in] global_nearest_cluster [num_rows X n_nearest_clusters ] : top n_nearest_clusters closest
198+ * - [in] global_nearest_cluster [num_rows X overlap_factor ] : top overlap_factor closest
199199 * clusters for each data point
200- * - [out] inverted_indices [num_rows x n_nearest_clusters sized vector] : vector for data indices
200+ * - [out] inverted_indices [num_rows x overlap_factor sized vector] : vector for data indices
201201 * for each cluster
202202 * - [out] cluster_sizes [n_cluster] : cluster size for each cluster
203203 * - [out] cluster_offsets [n_cluster] : offset in inverted_indices for each cluster
@@ -210,17 +210,17 @@ void get_inverted_indices(raft::resources const& res,
210210 raft::host_vector_view<IdxT, IdxT> cluster_offsets)
211211{
212212 // build sparse inverted indices and get number of data points for each cluster
213- size_t num_rows = global_nearest_cluster.extent (0 );
214- size_t n_nearest_clusters = global_nearest_cluster.extent (1 );
215- size_t n_clusters = cluster_sizes.extent (0 );
213+ size_t num_rows = global_nearest_cluster.extent (0 );
214+ size_t overlap_factor = global_nearest_cluster.extent (1 );
215+ size_t n_clusters = cluster_sizes.extent (0 );
216216
217217 auto local_offsets = raft::make_host_vector<IdxT>(n_clusters);
218218
219219 std::fill (cluster_sizes.data_handle (), cluster_sizes.data_handle () + n_clusters, 0 );
220220 std::fill (local_offsets.data_handle (), local_offsets.data_handle () + n_clusters, 0 );
221221
222222 for (size_t i = 0 ; i < num_rows; i++) {
223- for (size_t j = 0 ; j < n_nearest_clusters ; j++) {
223+ for (size_t j = 0 ; j < overlap_factor ; j++) {
224224 IdxT cluster_id = global_nearest_cluster (i, j);
225225 cluster_sizes (cluster_id) += 1 ;
226226 }
@@ -231,7 +231,7 @@ void get_inverted_indices(raft::resources const& res,
231231 cluster_offsets (i) = cluster_offsets (i - 1 ) + cluster_sizes (i - 1 );
232232 }
233233 for (size_t i = 0 ; i < num_rows; i++) {
234- for (size_t j = 0 ; j < n_nearest_clusters ; j++) {
234+ for (size_t j = 0 ; j < overlap_factor ; j++) {
235235 IdxT cluster_id = global_nearest_cluster (i, j);
236236 inverted_indices (cluster_offsets (cluster_id) + local_offsets (cluster_id)) = i;
237237 local_offsets (cluster_id) += 1 ;
@@ -389,20 +389,19 @@ void batch_build(
389389 size_t num_cols = static_cast <size_t >(dataset.extent (1 ));
390390 size_t k = indices.extent (1 );
391391
392- RAFT_EXPECTS (params.n_clusters > params.n_nearest_clusters ,
393- " n_nearest_clusters should be smaller than n_clusters. We recommend starting from "
394- " n_nearest_clusters =2 and gradually increase it for better knn graph recall." );
392+ RAFT_EXPECTS (params.n_clusters > params.overlap_factor ,
393+ " overlap_factor should be smaller than n_clusters. We recommend starting from "
394+ " overlap_factor =2 and gradually increase it for better knn graph recall." );
395395
396396 auto centroids = raft::make_device_matrix<T, IdxT>(handle, params.n_clusters , num_cols);
397397 get_centroids_on_data_subsample<T, IdxT>(handle, params.metric , dataset, centroids.view ());
398398
399- auto global_nearest_cluster =
400- raft::make_host_matrix<IdxT, IdxT>(num_rows, params.n_nearest_clusters );
399+ auto global_nearest_cluster = raft::make_host_matrix<IdxT, IdxT>(num_rows, params.overlap_factor );
401400 assign_clusters<T, IdxT>(
402401 handle, params, dataset, centroids.view (), global_nearest_cluster.view ());
403402
404403 auto inverted_indices =
405- raft::make_host_vector<IdxT, IdxT, raft::row_major>(num_rows * params.n_nearest_clusters );
404+ raft::make_host_vector<IdxT, IdxT, raft::row_major>(num_rows * params.overlap_factor );
406405 auto cluster_sizes = raft::make_host_vector<IdxT, IdxT, raft::row_major>(params.n_clusters );
407406 auto cluster_offsets = raft::make_host_vector<IdxT, IdxT, raft::row_major>(params.n_clusters );
408407 get_inverted_indices (handle,
0 commit comments