Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,9 @@ impl<H: NodeHasher> Database<H> {
}
}

/// Deletes all hash index sidecar files except those belonging to the given snapshots.
/// Keeps the `keep` most recent hash index files, deletes the rest.
#[cfg(feature = "hash-idx")]
pub fn retain_hash_indexes(&self, keep: &[&ReadTransaction<H>]) {
pub fn prune_hash_indexes(&self, keep: usize) {
let db_path = match &self.path {
Some(p) => p,
None => return,
Expand All @@ -277,10 +277,7 @@ impl<H: NodeHasher> Database<H> {
let prefix = format!("{}.", stem);
let suffix = ".hidx.sqlite";

let keep_offsets: std::collections::HashSet<u64> = keep.iter()
.map(|tx| tx.root_offset())
.collect();

let mut index_files: Vec<(u64, std::path::PathBuf)> = Vec::new();
if let Ok(entries) = std::fs::read_dir(parent) {
for entry in entries.flatten() {
let name = entry.file_name();
Expand All @@ -291,14 +288,20 @@ impl<H: NodeHasher> Database<H> {
if let Some(rest) = name_str.strip_prefix(&prefix) {
if let Some(offset_str) = rest.strip_suffix(suffix) {
if let Ok(offset) = offset_str.parse::<u64>() {
if !keep_offsets.contains(&offset) {
let _ = std::fs::remove_file(entry.path());
}
index_files.push((offset, entry.path()));
}
}
}
}
}

// Sort by offset descending (most recent first)
index_files.sort_by(|a, b| b.0.cmp(&a.0));

// Delete everything after the first `keep`
for (_, path) in index_files.into_iter().skip(keep) {
let _ = std::fs::remove_file(path);
}
}

pub fn begin_write(&self) -> Result<WriteTransaction<'_, H>> {
Expand Down
7 changes: 7 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pub type Result<T> = core::result::Result<T, Error>;
pub struct Configuration<Hasher: NodeHasher> {
pub cache_size: usize,
pub auto_hash_index: bool,
pub hash_index_pruning: Option<usize>,
_marker: PhantomData<Hasher>,
}

Expand Down Expand Up @@ -102,6 +103,7 @@ impl<Hasher: NodeHasher> Configuration<Hasher> {
Self {
cache_size: DEFAULT_CACHE_SIZE,
auto_hash_index: false,
hash_index_pruning: Some(120),
_marker: PhantomData,
}
}
Expand All @@ -115,6 +117,11 @@ impl<Hasher: NodeHasher> Configuration<Hasher> {
self.auto_hash_index = enabled;
self
}

pub fn with_hash_index_pruning(mut self, keep: Option<usize>) -> Self {
self.hash_index_pruning = keep;
self
}
}

pub trait NodeHasher: Clone {
Expand Down
5 changes: 5 additions & 0 deletions src/tx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,11 @@ impl<H: NodeHasher> ReadTransaction<H> {
// Auto-load the index we just built
let _ = self.load_hash_index();

// Auto-prune old indexes
if let Some(keep) = self.db.config.hash_index_pruning {
self.db.prune_hash_indexes(keep);
}

Ok(())
}

Expand Down
43 changes: 43 additions & 0 deletions tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1574,6 +1574,49 @@ fn hash_index_reset_deletes_all_indexes() {
let _ = std::fs::remove_file(&dir);
}

#[cfg(feature = "hash-idx")]
#[test]
fn hash_index_prune_keeps_n_most_recent() {
let dir = std::env::temp_dir().join("spacedb_hidx_test_prune");
let _ = std::fs::remove_file(&dir);
let db_path = dir.to_str().unwrap();

let db = Database::open(db_path).unwrap();
let stem = std::path::Path::new(db_path).file_stem().unwrap().to_str().unwrap();
let parent = std::path::Path::new(db_path).parent().unwrap();

let count_indexes = || -> usize {
std::fs::read_dir(parent).unwrap()
.filter_map(|e| e.ok())
.filter(|e| {
let name = e.file_name().to_str().unwrap_or("").to_string();
name.starts_with(&format!("{}.", stem)) && name.ends_with(".hidx.sqlite")
})
.count()
};

// Create 4 snapshots, each with a hash index
for i in 0u8..4 {
let mut key = [0u8; 32];
key[0] = i;
db.begin_write().unwrap()
.insert(key, vec![i]).unwrap()
.commit().unwrap();
db.begin_read().unwrap().build_hash_index().unwrap();
}
assert_eq!(count_indexes(), 4, "should have 4 index files");

// Prune to keep 2 most recent
db.prune_hash_indexes(2);
assert_eq!(count_indexes(), 2, "should have 2 index files after prune");

// Prune to keep 0
db.prune_hash_indexes(0);
assert_eq!(count_indexes(), 0, "should have 0 index files after prune(0)");

let _ = std::fs::remove_file(&dir);
}

#[cfg(feature = "hash-idx")]
#[test]
fn hash_index_fingerprint_mismatch_after_rollback_and_new_writes() {
Expand Down
Loading