diff --git a/src/db.rs b/src/db.rs index cbcea94..5cb7284 100644 --- a/src/db.rs +++ b/src/db.rs @@ -261,9 +261,9 @@ impl Database { } } - /// Deletes all hash index sidecar files except those belonging to the given snapshots. + /// Keeps the `keep` most recent hash index files, deletes the rest. #[cfg(feature = "hash-idx")] - pub fn retain_hash_indexes(&self, keep: &[&ReadTransaction]) { + pub fn prune_hash_indexes(&self, keep: usize) { let db_path = match &self.path { Some(p) => p, None => return, @@ -277,10 +277,7 @@ impl Database { let prefix = format!("{}.", stem); let suffix = ".hidx.sqlite"; - let keep_offsets: std::collections::HashSet = keep.iter() - .map(|tx| tx.root_offset()) - .collect(); - + let mut index_files: Vec<(u64, std::path::PathBuf)> = Vec::new(); if let Ok(entries) = std::fs::read_dir(parent) { for entry in entries.flatten() { let name = entry.file_name(); @@ -291,14 +288,20 @@ impl Database { if let Some(rest) = name_str.strip_prefix(&prefix) { if let Some(offset_str) = rest.strip_suffix(suffix) { if let Ok(offset) = offset_str.parse::() { - if !keep_offsets.contains(&offset) { - let _ = std::fs::remove_file(entry.path()); - } + index_files.push((offset, entry.path())); } } } } } + + // Sort by offset descending (most recent first) + index_files.sort_by(|a, b| b.0.cmp(&a.0)); + + // Delete everything after the first `keep` + for (_, path) in index_files.into_iter().skip(keep) { + let _ = std::fs::remove_file(path); + } } pub fn begin_write(&self) -> Result> { diff --git a/src/lib.rs b/src/lib.rs index dfd649c..a86a78b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,6 +38,7 @@ pub type Result = core::result::Result; pub struct Configuration { pub cache_size: usize, pub auto_hash_index: bool, + pub hash_index_pruning: Option, _marker: PhantomData, } @@ -102,6 +103,7 @@ impl Configuration { Self { cache_size: DEFAULT_CACHE_SIZE, auto_hash_index: false, + hash_index_pruning: Some(120), _marker: PhantomData, } } @@ -115,6 +117,11 @@ impl Configuration { self.auto_hash_index = enabled; self } + + pub fn with_hash_index_pruning(mut self, keep: Option) -> Self { + self.hash_index_pruning = keep; + self + } } pub trait NodeHasher: Clone { diff --git a/src/tx.rs b/src/tx.rs index c354e0e..74393bf 100644 --- a/src/tx.rs +++ b/src/tx.rs @@ -193,6 +193,11 @@ impl ReadTransaction { // Auto-load the index we just built let _ = self.load_hash_index(); + // Auto-prune old indexes + if let Some(keep) = self.db.config.hash_index_pruning { + self.db.prune_hash_indexes(keep); + } + Ok(()) } diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 1b76c2f..9ce9670 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1574,6 +1574,49 @@ fn hash_index_reset_deletes_all_indexes() { let _ = std::fs::remove_file(&dir); } +#[cfg(feature = "hash-idx")] +#[test] +fn hash_index_prune_keeps_n_most_recent() { + let dir = std::env::temp_dir().join("spacedb_hidx_test_prune"); + let _ = std::fs::remove_file(&dir); + let db_path = dir.to_str().unwrap(); + + let db = Database::open(db_path).unwrap(); + let stem = std::path::Path::new(db_path).file_stem().unwrap().to_str().unwrap(); + let parent = std::path::Path::new(db_path).parent().unwrap(); + + let count_indexes = || -> usize { + std::fs::read_dir(parent).unwrap() + .filter_map(|e| e.ok()) + .filter(|e| { + let name = e.file_name().to_str().unwrap_or("").to_string(); + name.starts_with(&format!("{}.", stem)) && name.ends_with(".hidx.sqlite") + }) + .count() + }; + + // Create 4 snapshots, each with a hash index + for i in 0u8..4 { + let mut key = [0u8; 32]; + key[0] = i; + db.begin_write().unwrap() + .insert(key, vec![i]).unwrap() + .commit().unwrap(); + db.begin_read().unwrap().build_hash_index().unwrap(); + } + assert_eq!(count_indexes(), 4, "should have 4 index files"); + + // Prune to keep 2 most recent + db.prune_hash_indexes(2); + assert_eq!(count_indexes(), 2, "should have 2 index files after prune"); + + // Prune to keep 0 + db.prune_hash_indexes(0); + assert_eq!(count_indexes(), 0, "should have 0 index files after prune(0)"); + + let _ = std::fs::remove_file(&dir); +} + #[cfg(feature = "hash-idx")] #[test] fn hash_index_fingerprint_mismatch_after_rollback_and_new_writes() {