33//! This module implements HNSW as a proper PostgreSQL index access method,
44//! storing the graph structure in PostgreSQL pages for persistence.
55
6+ use pgrx:: pg_sys:: {
7+ self , bytea, BlockNumber , Buffer , Cost , Datum , IndexAmRoutine , IndexBuildResult ,
8+ IndexBulkDeleteCallback , IndexBulkDeleteResult , IndexInfo , IndexPath , IndexScanDesc ,
9+ IndexUniqueCheck , IndexVacuumInfo , ItemPointer , ItemPointerData , NodeTag , Page , PageHeaderData ,
10+ PlannerInfo , Relation , ScanDirection , ScanKey , Selectivity , Size , TIDBitmap ,
11+ } ;
612use pgrx:: prelude:: * ;
7- use pgrx:: pg_sys:: { self , Relation , IndexInfo , IndexBuildResult , IndexVacuumInfo ,
8- IndexBulkDeleteResult , IndexBulkDeleteCallback , PlannerInfo , IndexPath ,
9- Cost , Selectivity , IndexScanDesc , ScanDirection , TIDBitmap , ScanKey ,
10- IndexUniqueCheck , ItemPointer , Datum , Buffer , BlockNumber , Page ,
11- IndexAmRoutine , NodeTag , bytea, ItemPointerData , PageHeaderData , Size } ;
1213use pgrx:: Internal ;
13- use std:: ptr;
1414use std:: mem:: size_of;
15+ use std:: ptr;
1516
16- use crate :: distance:: { DistanceMetric , distance } ;
17+ use crate :: distance:: { distance , DistanceMetric } ;
1718use crate :: index:: HnswConfig ;
1819
1920// ============================================================================
@@ -31,11 +32,11 @@ const HNSW_PAGE_DELETED: u8 = 2;
3132
3233/// Maximum neighbors per node (aligned with default M)
3334#[ allow( dead_code) ]
34- const MAX_NEIGHBORS_L0 : usize = 32 ; // 2*M for layer 0
35+ const MAX_NEIGHBORS_L0 : usize = 32 ; // 2*M for layer 0
3536#[ allow( dead_code) ]
36- const MAX_NEIGHBORS : usize = 16 ; // M for other layers
37+ const MAX_NEIGHBORS : usize = 16 ; // M for other layers
3738#[ allow( dead_code) ]
38- const MAX_LAYERS : usize = 16 ; // Maximum graph layers
39+ const MAX_LAYERS : usize = 16 ; // Maximum graph layers
3940
4041/// P_NEW equivalent for allocating new pages
4142const P_NEW_BLOCK : BlockNumber = pg_sys:: InvalidBlockNumber ;
@@ -73,10 +74,10 @@ impl Default for HnswMetaPage {
7374 ef_construction : 64 ,
7475 entry_point : pg_sys:: InvalidBlockNumber ,
7576 max_layer : 0 ,
76- metric : 0 , // L2 by default
77+ metric : 0 , // L2 by default
7778 _padding : 0 ,
7879 node_count : 0 ,
79- next_block : 1 , // First node page
80+ next_block : 1 , // First node page
8081 }
8182 }
8283}
@@ -89,7 +90,7 @@ struct HnswNodePageHeader {
8990 #[ allow( dead_code) ]
9091 max_layer : u8 ,
9192 _padding : [ u8 ; 2 ] ,
92- item_id : ItemPointerData , // TID of the heap tuple
93+ item_id : ItemPointerData , // TID of the heap tuple
9394}
9495
9596/// Neighbor entry in the graph
@@ -137,7 +138,8 @@ unsafe fn get_meta_page(index_rel: Relation) -> (Page, Buffer) {
137138unsafe fn get_or_create_meta_page ( index_rel : Relation , for_write : bool ) -> ( Page , Buffer ) {
138139 // Check if the relation has any blocks
139140 // Use MAIN_FORKNUM (0) for the main relation fork
140- let nblocks = pg_sys:: RelationGetNumberOfBlocksInFork ( index_rel, pg_sys:: ForkNumber :: MAIN_FORKNUM ) ;
141+ let nblocks =
142+ pg_sys:: RelationGetNumberOfBlocksInFork ( index_rel, pg_sys:: ForkNumber :: MAIN_FORKNUM ) ;
141143
142144 let buffer = if nblocks == 0 {
143145 // New index - allocate first page using P_NEW (InvalidBlockNumber)
@@ -166,7 +168,8 @@ unsafe fn read_metadata(page: Page) -> HnswMetaPage {
166168/// Write metadata to page
167169unsafe fn write_metadata ( page : Page , meta : & HnswMetaPage ) {
168170 let header = page as * mut PageHeaderData ;
169- let data_ptr = ( header as * mut u8 ) . add ( std:: mem:: size_of :: < PageHeaderData > ( ) ) as * mut HnswMetaPage ;
171+ let data_ptr =
172+ ( header as * mut u8 ) . add ( std:: mem:: size_of :: < PageHeaderData > ( ) ) as * mut HnswMetaPage ;
170173 ptr:: write ( data_ptr, * meta) ;
171174}
172175
@@ -259,7 +262,11 @@ unsafe fn calculate_distance(
259262// Access Method Callbacks
260263// ============================================================================
261264
262- /// Build callback - builds the index from scratch
265+ /// Build callback - builds the HNSW index from scratch
266+ ///
267+ /// Extracts vector dimensions from the indexed column's type modifier.
268+ /// The column must be declared with explicit dimensions, e.g., `ruvector(384)`.
269+ /// Returns an error if dimensions are not specified.
263270#[ pg_guard]
264271unsafe extern "C" fn hnsw_build (
265272 _heap : Relation ,
@@ -268,8 +275,32 @@ unsafe extern "C" fn hnsw_build(
268275) -> * mut IndexBuildResult {
269276 pgrx:: log!( "HNSW: Starting index build" ) ;
270277
271- // Parse index options
272- let dimensions = 128 ; // TODO: Extract from index definition
278+ // Extract dimensions from the indexed column's type modifier
279+ // When user defines ruvector(384), typmod = 384
280+ let dimensions = {
281+ // RelationGetDescr(index) -> (*index).rd_att
282+ let index_desc = ( * index) . rd_att ;
283+ if index_desc. is_null ( ) || ( * index_desc) . natts < 1 {
284+ pgrx:: error!( "HNSW: Cannot build index - no indexed columns found" ) ;
285+ }
286+
287+ // TupleDescAttr(desc, 0) -> (*desc).attrs.as_ptr().add(0)
288+ let attr = ( * index_desc) . attrs . as_ptr ( ) . add ( 0 ) ;
289+ let typmod = ( * attr) . atttypmod ;
290+
291+ if typmod > 0 {
292+ typmod as u32
293+ } else {
294+ // typmod = -1 means dimensions not specified in type declaration
295+ // This happens with: CREATE TABLE t (v ruvector) instead of ruvector(384)
296+ pgrx:: error!(
297+ "HNSW: Vector column must have dimensions specified. \
298+ Use ruvector(dimensions) instead of ruvector, e.g., ruvector(384)"
299+ ) ;
300+ }
301+ } ;
302+
303+ pgrx:: log!( "HNSW: Building index with {} dimensions" , dimensions) ;
273304 let config = HnswConfig :: default ( ) ;
274305
275306 // Initialize metadata page
@@ -298,7 +329,10 @@ unsafe extern "C" fn hnsw_build(
298329 // This is a simplified version - full implementation would use IndexBuildHeapScan
299330 let tuple_count = 0.0 ;
300331
301- pgrx:: log!( "HNSW: Index build complete, {} tuples indexed" , tuple_count as u64 ) ;
332+ pgrx:: log!(
333+ "HNSW: Index build complete, {} tuples indexed" ,
334+ tuple_count as u64
335+ ) ;
302336
303337 // Return build result
304338 let mut result = PgBox :: < IndexBuildResult > :: alloc0 ( ) ;
@@ -308,15 +342,38 @@ unsafe extern "C" fn hnsw_build(
308342}
309343
310344/// Build empty index callback
345+ ///
346+ /// Creates an empty HNSW index with proper dimensions from the column's type modifier.
311347#[ pg_guard]
312348unsafe extern "C" fn hnsw_buildempty ( index : Relation ) {
313349 pgrx:: log!( "HNSW: Building empty index" ) ;
314350
351+ // Extract dimensions from the indexed column's type modifier
352+ let dimensions = {
353+ // RelationGetDescr(index) -> (*index).rd_att
354+ let index_desc = ( * index) . rd_att ;
355+ if !index_desc. is_null ( ) && ( * index_desc) . natts >= 1 {
356+ // TupleDescAttr(desc, 0) -> (*desc).attrs.as_ptr().add(0)
357+ let attr = ( * index_desc) . attrs . as_ptr ( ) . add ( 0 ) ;
358+ let typmod = ( * attr) . atttypmod ;
359+ if typmod > 0 {
360+ typmod as u32
361+ } else {
362+ 0
363+ }
364+ } else {
365+ 0
366+ }
367+ } ;
368+
315369 // Initialize metadata page only
316370 let ( page, buffer) = get_or_create_meta_page ( index, true ) ;
317371 pg_sys:: PageInit ( page, pg_sys:: BLCKSZ as Size , 0 ) ;
318372
319- let meta = HnswMetaPage :: default ( ) ;
373+ let meta = HnswMetaPage {
374+ dimensions,
375+ ..Default :: default ( )
376+ } ;
320377 write_metadata ( page, & meta) ;
321378
322379 pg_sys:: MarkBufferDirty ( buffer) ;
@@ -410,12 +467,12 @@ unsafe extern "C" fn hnsw_costestimate(
410467
411468 // Total cost is O(log n) for HNSW
412469 let log_tuples = tuples. max ( 1.0 ) . ln ( ) ;
413- * index_total_cost = log_tuples * 10.0 ; // Scale factor for page accesses
470+ * index_total_cost = log_tuples * 10.0 ; // Scale factor for page accesses
414471
415472 // HNSW provides good selectivity for top-k queries
416- * index_selectivity = 0.01 ; // Typically returns ~1% of tuples
417- * index_correlation = 0.0 ; // No correlation with physical order
418- * index_pages = ( tuples / 100.0 ) . max ( 1.0 ) ; // Rough estimate
473+ * index_selectivity = 0.01 ; // Typically returns ~1% of tuples
474+ * index_correlation = 0.0 ; // No correlation with physical order
475+ * index_pages = ( tuples / 100.0 ) . max ( 1.0 ) ; // Rough estimate
419476}
420477
421478/// Get tuple callback (for index scans)
@@ -480,10 +537,7 @@ unsafe extern "C" fn hnsw_canreturn(_index: Relation, attno: ::std::os::raw::c_i
480537
481538/// Options callback - parse index options
482539#[ pg_guard]
483- unsafe extern "C" fn hnsw_options (
484- _reloptions : Datum ,
485- _validate : bool ,
486- ) -> * mut bytea {
540+ unsafe extern "C" fn hnsw_options ( _reloptions : Datum , _validate : bool ) -> * mut bytea {
487541 pgrx:: log!( "HNSW: Parsing options" ) ;
488542
489543 // TODO: Parse m, ef_construction, metric from reloptions
@@ -501,14 +555,14 @@ static HNSW_AM_HANDLER: IndexAmRoutine = IndexAmRoutine {
501555 type_ : NodeTag :: T_IndexAmRoutine ,
502556
503557 // Index structure capabilities
504- amstrategies : 1 , // One strategy: nearest neighbor
505- amsupport : 1 , // One support function: distance
558+ amstrategies : 1 , // One strategy: nearest neighbor
559+ amsupport : 1 , // One support function: distance
506560 amoptsprocnum : 0 ,
507561 amcanorder : false ,
508- amcanorderbyop : true , // Supports ORDER BY with distance operators
562+ amcanorderbyop : true , // Supports ORDER BY with distance operators
509563 amcanbackward : false ,
510564 amcanunique : false ,
511- amcanmulticol : false , // Single column only (vector)
565+ amcanmulticol : false , // Single column only (vector)
512566 amoptionalkey : true ,
513567 amsearcharray : false ,
514568 amsearchnulls : false ,
0 commit comments