From 3c8ecc9bff54fbf4893f98abad18eec5c935e543 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Wed, 25 Mar 2026 15:09:19 +0200 Subject: [PATCH 01/25] Candystore redesign --- .github/workflows/ci.yml | 97 ++ .github/workflows/ubuntu.yml | 42 - .github/workflows/windows.yml | 22 - .gitignore | 19 +- Cargo.lock | 704 +++++++-- Cargo.toml | 37 +- DESIGN.md | 325 ++++ README.md | 229 ++- candy-crasher/.gitignore | 1 - candy-crasher/Cargo.toml | 9 - candy-crasher/README.md | 31 - candy-longliving/.gitignore | 2 - candy-longliving/Cargo.toml | 7 - candy-longliving/src/main.rs | 69 - candy-perf/.gitignore | 1 - candy-perf/Cargo.toml | 7 - candy-perf/README.md | 75 - candy-perf/src/main.rs | 431 ------ diagram.png | Bin 42531 -> 0 bytes examples/atomics.rs | 5 +- examples/lists.rs | 5 +- examples/multithreaded.rs | 11 +- examples/perf.rs | 294 ++++ examples/simple.rs | 9 +- examples/typed.rs | 1 + mini-candy/Cargo.toml | 8 - mini-candy/README.md | 2 - mini-candy/src/main.rs | 294 ---- simulator/Cargo.lock | 140 -- simulator/Cargo.toml | 7 - simulator/README.md | 51 - simulator/rust-toolchain.toml | 2 - simulator/src/main.rs | 226 --- src/data_file.rs | 459 ++++++ src/hashing.rs | 100 -- src/index_file.rs | 859 +++++++++++ src/internal.rs | 360 +++++ src/lib.rs | 161 +- src/lists.rs | 863 ----------- src/pacer.rs | 253 ++++ src/queues.rs | 455 ------ src/router.rs | 548 ------- src/shard.rs | 1172 --------------- src/stats.rs | 245 --- src/store.rs | 1323 ++++++++++------- src/store/compaction.rs | 265 ++++ src/store/list.rs | 868 +++++++++++ src/store/open.rs | 240 +++ src/store/queue.rs | 643 ++++++++ src/store/recovery.rs | 196 +++ src/store/typed.rs | 834 +++++++++++ src/typed.rs | 759 ---------- src/types.rs | 316 ++++ tests/basic_ops.rs | 212 +++ tests/big_items.rs | 71 + tests/common/mod.rs | 41 +- tests/compaction.rs | 417 ++++++ tests/concurrency.rs | 332 +++++ candy-crasher/src/main.rs => tests/crasher.rs | 147 +- tests/data_loss.rs | 157 ++ tests/double_open.rs | 20 + tests/iteration.rs | 99 ++ tests/list.rs | 383 +++++ tests/maintenance.rs | 88 ++ tests/metrics.rs | 115 ++ tests/proptest_state_machine.rs | 97 ++ tests/queue.rs | 366 +++++ tests/recovery.rs | 759 ++++++++++ tests/rotation.rs | 130 ++ tests/shrink.rs | 44 + tests/test_atomics.rs | 40 - tests/test_bigval.rs | 32 - tests/test_flush_agg.rs | 51 - tests/test_list_collisions.rs | 76 - tests/test_lists.rs | 516 ------- tests/test_loading.rs | 72 - tests/test_logic.rs | 149 -- tests/test_merge.rs | 87 -- tests/test_multithreading.rs | 75 - tests/test_pre_split.rs | 181 --- tests/test_queues.rs | 109 -- tests/test_typed.rs | 113 -- tests/typed_list.rs | 167 +++ tests/typed_queue.rs | 63 + tests/typed_store.rs | 76 + 85 files changed, 11260 insertions(+), 8107 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .github/workflows/ubuntu.yml delete mode 100644 .github/workflows/windows.yml create mode 100644 DESIGN.md delete mode 100644 candy-crasher/.gitignore delete mode 100644 candy-crasher/Cargo.toml delete mode 100644 candy-crasher/README.md delete mode 100644 candy-longliving/.gitignore delete mode 100644 candy-longliving/Cargo.toml delete mode 100644 candy-longliving/src/main.rs delete mode 100644 candy-perf/.gitignore delete mode 100644 candy-perf/Cargo.toml delete mode 100644 candy-perf/README.md delete mode 100644 candy-perf/src/main.rs delete mode 100644 diagram.png create mode 100644 examples/perf.rs delete mode 100644 mini-candy/Cargo.toml delete mode 100644 mini-candy/README.md delete mode 100644 mini-candy/src/main.rs delete mode 100644 simulator/Cargo.lock delete mode 100644 simulator/Cargo.toml delete mode 100644 simulator/README.md delete mode 100644 simulator/rust-toolchain.toml delete mode 100644 simulator/src/main.rs create mode 100644 src/data_file.rs delete mode 100644 src/hashing.rs create mode 100644 src/index_file.rs create mode 100644 src/internal.rs delete mode 100644 src/lists.rs create mode 100644 src/pacer.rs delete mode 100644 src/queues.rs delete mode 100644 src/router.rs delete mode 100644 src/shard.rs delete mode 100644 src/stats.rs create mode 100644 src/store/compaction.rs create mode 100644 src/store/list.rs create mode 100644 src/store/open.rs create mode 100644 src/store/queue.rs create mode 100644 src/store/recovery.rs create mode 100644 src/store/typed.rs delete mode 100644 src/typed.rs create mode 100644 src/types.rs create mode 100644 tests/basic_ops.rs create mode 100644 tests/big_items.rs create mode 100644 tests/compaction.rs create mode 100644 tests/concurrency.rs rename candy-crasher/src/main.rs => tests/crasher.rs (70%) create mode 100644 tests/data_loss.rs create mode 100644 tests/double_open.rs create mode 100644 tests/iteration.rs create mode 100644 tests/list.rs create mode 100644 tests/maintenance.rs create mode 100644 tests/metrics.rs create mode 100644 tests/proptest_state_machine.rs create mode 100644 tests/queue.rs create mode 100644 tests/recovery.rs create mode 100644 tests/rotation.rs create mode 100644 tests/shrink.rs delete mode 100644 tests/test_atomics.rs delete mode 100644 tests/test_bigval.rs delete mode 100644 tests/test_flush_agg.rs delete mode 100644 tests/test_list_collisions.rs delete mode 100644 tests/test_lists.rs delete mode 100644 tests/test_loading.rs delete mode 100644 tests/test_logic.rs delete mode 100644 tests/test_merge.rs delete mode 100644 tests/test_multithreading.rs delete mode 100644 tests/test_pre_split.rs delete mode 100644 tests/test_queues.rs delete mode 100644 tests/test_typed.rs create mode 100644 tests/typed_list.rs create mode 100644 tests/typed_queue.rs create mode 100644 tests/typed_store.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..ab206b2 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,97 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +jobs: + fmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: rustup component add rustfmt + - run: cargo fmt -- --check + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: clippy-${{ runner.os }}-${{ hashFiles('**/Cargo.lock') }} + - run: rustup component add clippy + - run: cargo clippy --all-targets -- -D warnings + + test-ubuntu: + name: Test (Ubuntu, ${{ matrix.profile }}) + runs-on: ubuntu-latest + strategy: + matrix: + profile: [debug, release] + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: test-ubuntu-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }} + - name: Run tests + run: | + if [ "${{ matrix.profile }}" = "release" ]; then + cargo test --release + else + cargo test + fi + - name: Run examples + run: | + cargo run --example simple + cargo run --example multithreaded + cargo run --example atomics + cargo run --example lists + cargo run --example typed + cargo run --release --example perf + + test-windows: + name: Test (Windows, ${{ matrix.profile }}) + runs-on: windows-latest + strategy: + matrix: + profile: [debug, release] + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: test-windows-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }} + - name: Run tests + run: | + if ("${{ matrix.profile }}" -eq "release") { + cargo test --release + } else { + cargo test + } + - name: Run examples + run: | + cargo run --example simple + cargo run --example multithreaded + cargo run --example atomics + cargo run --example lists + cargo run --example typed + cargo run --release --example perf diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml deleted file mode 100644 index 7e98560..0000000 --- a/.github/workflows/ubuntu.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: Linux - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - -env: - CARGO_TERM_COLOR: always - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Build - run: cargo build - - name: Run tests - run: cargo test --release -- --nocapture - - name: Run simple example - run: cargo run --example simple - - name: Run multithreaded example - run: cargo run --example multithreaded - - name: Run lists example - run: cargo run --example lists - - name: Run typed example - run: cargo run --example typed - - name: Run perftest - run: cd candy-perf; cargo run --release - - name: Run crasher - run: cd candy-crasher; cargo run --release - - name: Run longliving - run: cd candy-longliving; cargo run --release -- 10 40001 10000 - - name: Run mini-candy - run: cd mini-candy; cargo run - - name: Run test-list-collisions - run: cargo test -F whitebox_testing --test test_list_collisions -- --nocapture - - name: Run test-flush-agg - run: cargo test -F flush_aggregation --test test_flush_agg -- --nocapture diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml deleted file mode 100644 index 7e079e1..0000000 --- a/.github/workflows/windows.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Windows - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - -env: - CARGO_TERM_COLOR: always - -jobs: - build: - - runs-on: windows-latest - - steps: - - uses: actions/checkout@v4 - - name: Build - run: cargo build - - name: Run simple example - run: cargo run --example simple diff --git a/.gitignore b/.gitignore index 95d8dd0..ea8c4bf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,18 +1 @@ -# Generated by Cargo -# will have compiled files and executables -debug/ -target/ -dbdir/ - -# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries -# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html -#Cargo.lock - -# These are backup files generated by rustfmt -**/*.rs.bk - -# MSVC Windows builds of rustc generate these, which store debugging information -*.pdb - -# Jetbrains files -.idea +/target diff --git a/Cargo.lock b/Cargo.lock index b62552e..80a011c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,81 +4,63 @@ version = 4 [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] -name = "bitflags" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" - -[[package]] -name = "bumpalo" -version = "3.19.0" +name = "autocfg" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] -name = "bytemuck" -version = "1.24.0" +name = "bit-set" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" dependencies = [ - "bytemuck_derive", + "bit-vec", ] [[package]] -name = "bytemuck_derive" -version = "1.10.2" +name = "bit-vec" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "candy-crasher" -version = "0.1.0" -dependencies = [ - "candystore", - "libc", - "rand 0.8.5", -] +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" [[package]] -name = "candy-longliving" -version = "0.1.0" -dependencies = [ - "candystore", -] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] -name = "candy-perf" -version = "0.1.0" -dependencies = [ - "candystore", -] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "candystore" -version = "0.5.6" +version = "0.6.0" dependencies = [ - "anyhow", - "bytemuck", - "crossbeam-channel", + "crc16-ibm3740-fast", "databuf", "fslock", "libc", - "memmap", + "memmap2", + "num_cpus", "parking_lot", - "rand 0.9.2", + "proptest", + "rand 0.10.0", "simd-itertools", "siphasher", + "smallvec", + "tempfile", + "thiserror", "uuid", + "zerocopy", ] [[package]] @@ -88,19 +70,46 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] -name = "crossbeam-channel" -version = "0.5.15" +name = "chacha20" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" dependencies = [ - "crossbeam-utils", + "cfg-if", + "cpufeatures", + "rand_core 0.10.0", ] [[package]] -name = "crossbeam-utils" -version = "0.8.21" +name = "core_detect" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +checksum = "7f8f80099a98041a3d1622845c271458a2d73e688351bf3cb999266764b81d48" + +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "crc-fast-gen" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d4e7ca1a78a554d1675e8489415c76c5fd804686a7b6902ed8ce55ab498364d" + +[[package]] +name = "crc16-ibm3740-fast" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bd5030bcadf3aa65886c31c60bb36fab8db9eae235ff081acb64ea962aa5d6" +dependencies = [ + "core_detect", + "crc-fast-gen", +] [[package]] name = "databuf" @@ -130,6 +139,40 @@ dependencies = [ "syn", ] +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "fslock" version = "0.2.1" @@ -142,42 +185,108 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", - "wasi", + "r-efi 5.3.0", + "wasip2", ] [[package]] name = "getrandom" -version = "0.3.4" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 6.0.0", + "rand_core 0.10.0", "wasip2", + "wasip3", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", ] +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + [[package]] name = "js-sys" -version = "0.3.82" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" dependencies = [ "once_cell", "wasm-bindgen", ] +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" -version = "0.2.177" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "lock_api" @@ -189,21 +298,24 @@ dependencies = [ ] [[package]] -name = "memmap" -version = "0.7.0" +name = "log" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" -dependencies = [ - "libc", - "winapi", -] +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] -name = "mini-candy" -version = "0.1.0" +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" dependencies = [ - "memmap", - "siphasher", + "libc", ] [[package]] @@ -228,11 +340,30 @@ dependencies = [ "target-features", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "parking_lot" @@ -266,20 +397,55 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] +[[package]] +name = "proptest" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37566cb3fdacef14c0737f9546df7cfeadbfbc9fef10991038bf5015d0c80532" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags", + "num-traits", + "rand 0.9.2", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" -version = "1.0.42" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -308,15 +474,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] -name = "rand" -version = "0.8.5" +name = "r-efi" +version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" [[package]] name = "rand" @@ -324,18 +485,19 @@ version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ - "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_chacha", + "rand_core 0.9.5", ] [[package]] -name = "rand_chacha" -version = "0.3.1" +name = "rand" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" dependencies = [ - "ppv-lite86", - "rand_core 0.6.4", + "chacha20", + "getrandom 0.4.2", + "rand_core 0.10.0", ] [[package]] @@ -345,25 +507,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] name = "rand_core" -version = "0.6.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.3.4", ] [[package]] name = "rand_core" -version = "0.9.3" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" + +[[package]] +name = "rand_xorshift" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" dependencies = [ - "getrandom 0.3.4", + "rand_core 0.9.5", ] [[package]] @@ -375,12 +543,43 @@ dependencies = [ "bitflags", ] +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + [[package]] name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "rusty-fork" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -388,26 +587,67 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "simd-itertools" -version = "0.3.0" +name = "semver" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a037ed5ba0cb7102a5b720453b642c5b2cf39960edd2ceace91af8ec3743082a" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ - "multiversion", + "serde_core", ] [[package]] -name = "simulator" -version = "0.1.0" +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ - "rand 0.8.5", + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "simd-itertools" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a037ed5ba0cb7102a5b720453b642c5b2cf39960edd2ceace91af8ec3743082a" +dependencies = [ + "multiversion", ] [[package]] name = "siphasher" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" [[package]] name = "smallvec" @@ -417,9 +657,9 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "syn" -version = "2.0.111" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -432,42 +672,99 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" [[package]] name = "uuid" -version = "1.18.1" +version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" +name = "wait-timeout" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.105" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" dependencies = [ "cfg-if", "once_cell", @@ -478,9 +775,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.105" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -488,9 +785,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.105" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" dependencies = [ "bumpalo", "proc-macro2", @@ -501,13 +798,47 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.105" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "winapi" version = "0.3.9" @@ -536,28 +867,125 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "zerocopy" -version = "0.8.30" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea879c944afe8a2b25fef16bb4ba234f47c694565e97383b36f3a878219065c" +checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.30" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf955aa904d6040f70dc8e9384444cb1030aed272ba3cb09bbc4ab9e7c1f34f5" +checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89" dependencies = [ "proc-macro2", "quote", "syn", ] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index 15241b8..af99fe1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,29 +1,24 @@ [package] name = "candystore" -version = "0.5.6" -edition = "2021" -license = "Apache-2.0" -keywords = ["key-value", "database", "persistent", "store", "rocksdb"] -description = "A lean, efficient and fast peristent in-process key-value store" -repository = "https://github.com/sweet-security/candystore" +version = "0.6.0" +edition = "2024" [dependencies] -bytemuck = { version = "1.24.0", features = ["derive"] } +crc16-ibm3740-fast = "0.5.0" databuf = "0.5.0" -memmap = "0.7.0" -siphasher = "1.0.1" -anyhow = "1.0.100" -parking_lot = "0.12.5" -uuid = "1.10.0" -rand = "0.9.2" fslock = "0.2.1" -libc = "0.2.158" -crossbeam-channel = "0.5.15" +memmap2 = "0.9.10" +num_cpus = "1.17.0" +parking_lot = "0.12.5" simd-itertools = "0.3.0" +siphasher = "1.0.2" +smallvec = { version = "1.15.1", features = ["write"] } +thiserror = "2.0.18" +uuid = "1.18.1" +zerocopy = { version = "0.8.47", features = ["derive"] } -[features] -whitebox_testing = [] -flush_aggregation = [] - -[workspace] -members = ["simulator", "candy-crasher", "candy-longliving", "candy-perf", "mini-candy"] +[dev-dependencies] +proptest = "1.10.0" +tempfile = "3" +rand = "0.10.0" +libc = "0.2.183" diff --git a/DESIGN.md b/DESIGN.md new file mode 100644 index 0000000..d07b595 --- /dev/null +++ b/DESIGN.md @@ -0,0 +1,325 @@ +# Design + +This document describes the storage model, concurrency strategy, recovery rules, and collection semantics used by `candystore`. + +## Goals + +- Fast point lookups for embedded workloads +- Durable append-only writes to data files +- Recoverability by replaying data files when the index is dirty +- Support for ordered collections and large values without a separate database layer +- Simple operational model for single-process ownership with multi-threaded access + +## High-Level Architecture + +The store has two durable layers: + +1. `data_XXXX` files: append-only log-structured storage +2. `index` + `rows`: mutable lookup structure used for fast reads + +ASCII overview: + +```text + +-------------------+ + set/get/remove -> | CandyStore API | + +---------+---------+ + | + v + +----------------------+ + | in-memory coordination| + | row locks + counters | + +-----+------------+----+ + | | + | | + v v + +-----------+ +----------------+ + | index/rows | | data_0000..N | + | mmap index | | append-only | + +-----------+ +----------------+ + ^ | + | | + +------------+ + rebuild / compaction +``` + +The important design choice is that the index is rebuildable. The data files are the durable source of truth for recovery. + +## Index Layout + +The index is stored in two files: + +- `index`: header, counters, global split level, dirty flag, hash key, waste accounting +- `rows`: fixed-size hash rows stored in page-sized records + +Each row contains: + +- a split level +- a checksum +- fixed-width signature slots +- fixed-width entry pointers + +ASCII row model: + +```text ++--------------------------------------------------------------+ +| split_level | checksum | signatures[ROW_WIDTH] | pointers[] | ++--------------------------------------------------------------+ +``` + +Pointers are compact and encode: + +- data file index +- aligned file offset +- size hint +- masked row selector bits + +The index is optimized for lookup speed, not for being the primary source of truth. + +## Data Files + +Each data file starts with a fixed header page containing: + +- file signature +- file format version +- file ordinal + +After the header page, entries are appended at 16-byte alignment. + +Two entry kinds exist today: + +- `Data`: key + value +- `Tombstone`: key only + +ASCII entry layout: + +```text +Data entry +---------- +u32 header +u16 key_len +u16 value_len +value bytes +key bytes +u16 checksum +padding to 16-byte alignment + +Tombstone entry +--------------- +u32 header +u16 key_len +key bytes +u16 checksum +padding to 16-byte alignment +``` + +The `header` packs: + +- an entry-offset-derived magic value +- namespace bits +- entry type bits + +Checksums cover the logical entry bytes before alignment padding. + +## Namespaces + +Namespaces partition the key space inside the same physical store: + +- user KV entries +- queue metadata and queue data +- list metadata, list index, and list data +- large-value metadata and chunks +- typed variants of the above + +This lets all features share the same physical storage while keeping their internal keys distinct. + +## Write Path + +Normal writes are append-and-swing-pointer operations. + +ASCII write flow: + +```text +client write + | + v +hash key -> lock logical shard -> find existing row slot + | + +--> append new entry to active data file + | + +--> update row pointer in index + | + +--> account old entry as waste when replacing/removing +``` + +Important consequences: + +- updates never overwrite old data in place +- remove operations append tombstones +- old versions remain in old data files until compaction removes them + +## Recovery Path + +On open, the store marks the index dirty immediately. This makes an interrupted open conservative. + +If the previous shutdown was unclean and the configured strategy rebuilds, recovery does: + +```text +reset index state +sort data files by ordinal +for each data file in order: + scan aligned entries + validate checksum and entry shape + replay data/tombstone into the index +``` + +Recovery invariants: + +- later entries win over earlier ones +- tombstones remove prior live values +- file order is determined only by file ordinal +- duplicate data-file ordinals are invalid because they make replay order ambiguous +- unknown entry types and unknown namespaces are treated as invalid data and fail rebuild +- recovered entries are validated against current key/value size limits before indexing + +This is the key reason data files can act as the long-term ground truth for the current format. + +## Compaction + +Compaction rewrites live entries from an old file into the active file, then deletes the old file. + +ASCII compaction flow: + +```text +old data file + | + v +scan entries in order + | + +--> if entry is still the current live version: + | append to active file + | replace pointer + | + +--> otherwise skip + | + v +delete compacted file +``` + +Compaction is rate-limited by a token-bucket pacer using `compaction_throughput_bytes_per_sec`. + +## Locking Model + +There are two main locking layers: + +1. per-row / per-shard locking around index mutation +2. logical key locks to serialize conflicting higher-level operations + +ASCII concurrency view: + +```text +thread A thread B + | | + +--> logical lock ---+ + | + v + row/shard lock + | + v + mutate row +``` + +This design allows unrelated keys to proceed concurrently while keeping conflicting operations consistent. + +The store also uses an on-disk `.lockfile` so only one process owns a store directory at a time. + +## Collection Semantics + +### Lists + +Lists are ordered maps keyed by `(list_key, item_key)`. + +- each list has metadata with `head`, `tail`, and `count` +- list order is stored through an index from logical position to item key +- item data stores the user value plus the logical index suffix +- updates can preserve position or promote the item to the tail, depending on the API +- retain/compaction can rewrite sparse lists into compact spans + +### Queues + +Queues are ordered sequences under a queue key. + +- each queue has metadata with `head`, `tail`, and `count` +- queue entries are stored by synthetic logical index +- head/tail peeks and pops skip holes caused by removals +- `queue_range` exposes the current logical span, not a dense ordinal count + +### Large Values + +Large values use queue-backed chunk storage. + +- metadata records identify the chunk queue +- data is split into chunk entries +- reads concatenate chunks in queue order + +## Typed API Design + +Typed wrappers are thin adapters over the untyped APIs. + +- keys and values are encoded with `databuf` +- type-specific IDs are appended to typed root keys +- typed collections reuse the same lower-level storage semantics + +This means typed and untyped APIs share the same durability and recovery model. + +## Dirty Shutdown Semantics + +Clean shutdown requires: + +1. background compaction thread stopped +2. data files synced +3. index header dirty flag cleared and flushed + +If any of those steps do not happen, the next open treats the index as dirty. + +## Format and Compatibility Notes + +Current compatibility assumptions: + +- the index stores the effective hash key, and reopen reuses the persisted key for existing stores +- data-file entry types and namespaces are intentionally strict during rebuild +- the current code does not define a stable cross-version migration policy yet + +For a future `1.0`, the minimum compatibility policy should define: + +- whether old data-file versions remain readable +- whether hash keys are user-managed forever or migrated differently +- what entry types and namespaces are reserved for future expansion +- how recovery should behave when a newer writer introduces unknown on-disk constructs + +## Why the Data Files Are the Source of Truth + +The index can be reset and replayed from the data files. + +That is only true if the data files remain: + +- append-only +- checksummed +- strictly parseable +- replayable in deterministic order + +The recent hardening work in this repository specifically enforces that rebuild fails closed on unknown entry metadata instead of silently discarding it. + +## Practical Limits + +- maximum user key length: `MAX_USER_KEY_SIZE` +- maximum inline value length: `MAX_USER_VALUE_SIZE` +- large values use chunked storage instead of a single inline entry +- maximum data-file size is bounded by pointer encoding limits + +## Suggested Future 1.0 Checklist + +- write a formal on-disk format spec for data files +- define the compatibility promise for `DATA_FILE_VERSION` +- decide whether hash-key compatibility remains config-managed +- add targeted corruption tests for truncated, unknown-type, and unknown-namespace entries +- document operational upgrade expectations explicitly \ No newline at end of file diff --git a/README.md b/README.md index 4d7c365..35ed843 100644 --- a/README.md +++ b/README.md @@ -1,150 +1,131 @@ -
- -
v0.5.6 fixes some potential concurrency issues and panics on missing shards after a crash
-
🪟 v0.5.5 now supports Windows (experimental)! 🎉
-
- -# CandyStore -A pure rust implementation of a fast (*blazingly* :tm:, of course), persistent, in-process key-value store, that relies -on a novel sharding algorithm. Just how blazingly? It's over 9000! - -| Operation | Time* | -|-----------|--------| -| Lookup | < 1us | -| Insert | < 2us | -| Removal | < 1us | - -The algorithm can be thought of as a "zero-overhead" extension to a hash table stored over files, -as it's designed to minimizes IO operations. See [the benchmark](candy-perf/README.md) and -[how to interpret the results*](#how-to-interpret-the-performance-results). - -## Overview -Being a hash-table, the key is hashed, producing a 64 bit number. The 16 most significant bits select -the *shard*, followed by 16 bits selecting the *row* in the shard, and the remaining 32 bits serve as an -opaque signature. The signature is matched against the signature array within the selected row. -The row also stores the file offset of the entry, which is used to retrive the entry's key and value. - -![](diagram.png) - -Each shard is mapped to a shard file, and a shard file can cover a wide range of consecutive shards. -We begin with a single shard file covering the whole shard span of `[0-65536]`. - -When a shard file gets too big, or when one of its rows gets full, it undergoes a *split*. -This operation takes all entries and splits them into a bottom half and a top half (of roughly -equal sizes). For instance, if the file covered shards `[0-65536)`, after the split we have two files, -one covering `[0-32768)` and the other covering `[32768-65536)`. This process repeats as needed, -and essentially builds a tree of shard files. Each file is split independently, and the amount of work -is constant (unlike LSM trees). +# candystore -``` - [0-65536) - / \ - / \ - [0-32768) [32768-65536) - / \ - / \ - [0-16384) [16384-32768) -``` +`candystore` is an embedded persistent key-value store for Rust with: -The shard file's header (the rows, signatures and file offsets) are kept in an `mmap`, and the rest -of the file's data is accessed using `pread` and `pwrite`. The file is only ever extended (until either -a split or *compaction* takes place), so the algorithm is *crash safe*, in the sense that it will always -return some valid version of a key-value pair, although it might lose unflushed data. +- append-only data files +- a mutable in-place index for fast lookups +- rebuild-from-data-files recovery +- list and queue collection APIs +- typed wrappers built on `databuf` +- background compaction -The library puts its faith in the kernel's page cache, and assumes the `mmap` and writes are flushed to -disk every so often. This allows us to forgo a journal or write-ahead log (WAL). +The index is an acceleration structure. The data files are the durable event log that recovery replays when the index is dirty. -The default parameters (chosen by simulations) are of shards with 64 rows, each with 512 entries. The chances -of collisions with these parameters are minimal, and they allow for ~90% utilization of the shard, while -requiring relatively small header tables (32K entries, taking up 384KB). With the expected 90% utilization, -you should be expect to hold 29.5K keys per shard. For a shard file of 64MB, that's 0.6% overhead. +## Highlights -Because the data structure is a hash table rather than a search tree, insertion, lookup and removal are -all O(1) operations. +- Fast point lookups through a memory-mapped index +- Crash recovery by rebuilding from append-only data files +- Ordered list API keyed by `(list, item)` +- Queue and large-value APIs +- Thread-safe shared access through `Arc` +- Typed APIs for encoded keys and values -The concept can be extended to a distributed database, by adding a layer of master-shards that select a -server, followed by the normal sharding mechanism described above. +## Quick Start -## Example ```rust use candystore::{CandyStore, Config, Result}; fn main() -> Result<()> { let db = CandyStore::open("/tmp/candy-dir", Config::default())?; - // simple API + db.set("user:1", "alice")?; + assert_eq!(db.get("user:1")?, Some(b"alice".to_vec())); - db.set("mykey", "myval")?; - assert_eq!(db.get("mykey")?, Some("myval".into())); + let status = db.replace("user:1", "alice-v2", Some("alice"))?; + assert!(status.was_replaced()); - assert_eq!(db.get("yourkey")?, None); + db.remove("user:1")?; + assert_eq!(db.get("user:1")?, None); + Ok(()) +} +``` - assert_eq!(db.iter().count(), 1); +## Collections - for res in db.iter() { - let (k, v) = res?; - assert_eq!(k, Vec::::from("mykey")); - assert_eq!(v, Vec::::from("myval")); - } +Lists are ordered maps scoped by a list key. - assert_eq!(db.iter().count(), 1); +```rust +use candystore::{CandyStore, Config, Result}; - // lists API +fn main() -> Result<()> { + let db = CandyStore::open("/tmp/candy-lists", Config::default())?; - db.set_in_list("mylist", "key1", "123")?; - db.set_in_list("mylist", "key2", "456")?; - assert_eq!(db.get_from_list("mylist", "key1")?, Some("123".into())); + db.set_in_list("langs", "rust", "systems")?; + db.set_in_list("langs", "python", "scripting")?; - assert_eq!(db.iter_list("mylist").count(), 2); + let items = db.iter_list("langs").collect::, _>>()?; + assert_eq!(items.len(), 2); + Ok(()) +} +``` - for res in db.iter_list("mylist") { - let (k, v) = res?; - println!("{k:?} => {v:?}"); - } +Queues store ordered values under a queue key. + +```rust +use candystore::{CandyStore, Config, Result}; + +fn main() -> Result<()> { + let db = CandyStore::open("/tmp/candy-queue", Config::default())?; + + db.push_to_queue_tail("jobs", "job-1")?; + db.push_to_queue_tail("jobs", "job-2")?; + + assert_eq!(db.pop_queue_head("jobs")?, Some(b"job-1".to_vec())); Ok(()) } ``` -## Design Goals -* Fast and efficient, with a very low memory footprint (~0.6% overhead) -* No heavy/unbounded merges -* No Write-Ahead Log (WAL) or journalling of any kind -* Process crash safe: you may lose the latest operations, but never be in an inconsistent state - if the process crashes. However, if the machine itself crashes, the data on disk may be in an - inconsistent state. -* Splitting/compaction happens per-shard, so there's no global locking -* Suitable for both write-heavy and read-heavy workloads -* Concurrent by design (multiple threads getting/setting/removing keys at the same time) -* The backing store is taken to be an SSD, thus it's not optimized for HDDs - -## Notes -* The file format is not yet stable -* Uses very little `unsafe` (required due to `mmap`) - -## Roadmap -* Distributed protocol based on file locks (meant to run on a shared network folder) -* Add generations as an adapter on top, so that older generations are compacted into exponentially larger - time spans. It's an alternative to TTL, and amortizes the number of times an entry will move around as the - dataset grows. -* Maybe add Arithmethic coding/Huffman coding as a cheap compression for the keys and values - -## How to Interpret the Performance Results -While the numbers above are incredible, it is obvious that any file-backed store will be limited by the -filesystem's latency and bandwidth. For example, you can expect a read latency of 20-100us from SSDs (NVMe), -so that's the lower bound on reading a random location in the file. - -What the numbers above measure is the performance of the *algorithm*, not the *storage*: given you can spare an -overhead of 0.6% mapped into memory, lookup/insert/removal require a single disk IO. Replacing (updating) an -existing element requires two IOs, since it needs to compare the key before writing it anew. -These IOs may return from the kernel's page cache, in which case it's practically immediate, or from disk, -in which case you can expect it to take 1-2 round-trip times of your device. - -Inserting to/removing from a lists require 2-3 IOs, since these operations need to update the list's -head or tail, as well as a "chain" element. Such operations should really be done with a "large enough page cache". -Updating/fetching an existing element element in a list is a single IO as above. - -If your memory is too constrainted for keeping the lookup tables mapped-in (i.e., they get evicted to disk), -you'll incur one more unit of "IO latency" for fetching the row from the table. Since the row spans 2KB (and -aligned to 4KB), it should behave nicely with 4K IOs. - -See also [this guide to LTO/PGO](https://github.com/sweet-security/candystore/issues/7) by Alexander Zaitsev. +## Typed API + +Typed wrappers encode keys and values with `databuf` and separate each typed key space with a per-type id. + +```rust +use std::sync::Arc; + +use candystore::{CandyStore, CandyTypedStore, Config, Result}; + +fn main() -> Result<()> { + let db = Arc::new(CandyStore::open("/tmp/candy-typed", Config::default())?); + let users = CandyTypedStore::>::new(db); + + users.set("scores", &vec![1, 2, 3])?; + assert_eq!(users.get("scores")?, Some(vec![1, 2, 3])); + Ok(()) +} +``` + +## Large Values + +`set_big` / `get_big` / `remove_big` store values larger than the inline value limit by chunking them across queue-backed entries. + +## Recovery Model + +On open, the store marks the index dirty before doing work. On clean drop it syncs data files, clears the dirty flag, and flushes the index header. + +If the store is reopened while dirty, behavior depends on `Config::rebuild_strategy`: + +- `FailIfDirty`: reject open +- `RebuildIfDirty`: rebuild the index from data files +- `ResetDBIfDirty`: clear the directory and recreate an empty store +- `TrustDirtyIndexIfChecksumCorrectOrFail`: accept the dirty index only if row checksums match +- `TrustDirtyIndexIfChecksumCorrectOrRebuild`: trust valid checksums, otherwise rebuild + +## Operational Notes + +- `Config::hash_key` is part of on-disk compatibility. A store must be reopened with the same hash key. +- Data files are append-only. Background compaction rewrites live entries into the active file and deletes old files. +- Rebuild now fails closed on unknown entry types and unknown namespaces in data files rather than silently skipping them. +- The index format and data-file format are internal implementation details until a `1.0` compatibility policy is explicitly documented. + +## Examples + +See: + +- `examples/simple.rs` +- `examples/typed.rs` +- `examples/lists.rs` +- `examples/multithreaded.rs` + +## Design + +See `DESIGN.md` for the storage layout, locking model, rebuild path, and collection semantics. \ No newline at end of file diff --git a/candy-crasher/.gitignore b/candy-crasher/.gitignore deleted file mode 100644 index 9a59ec5..0000000 --- a/candy-crasher/.gitignore +++ /dev/null @@ -1 +0,0 @@ -dbdir diff --git a/candy-crasher/Cargo.toml b/candy-crasher/Cargo.toml deleted file mode 100644 index 71cb49b..0000000 --- a/candy-crasher/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] -name = "candy-crasher" -version = "0.1.0" -edition = "2021" - -[dependencies] -libc = "0.2.155" -rand = "0.8.5" -candystore={path=".."} diff --git a/candy-crasher/README.md b/candy-crasher/README.md deleted file mode 100644 index 72d62c4..0000000 --- a/candy-crasher/README.md +++ /dev/null @@ -1,31 +0,0 @@ -## VickiStore Crasher - -Fork a child process to insert 1M keys into the DB, while the parent kills it repeatedly. The test -makes sure the child is able to make progress as well as making sure the DB remains consistent. - -Note: the store is not meant to be used by multiple processes concurrently -- it uses thread syncrhonization, -not inter-process synchronization. The test uses the store only from a single process at a time. - - -``` -$ cargo run -child starting at 0 -[0] killing child -child starting at 20445 -[1] killing child -child starting at 31656 -[2] killing child -child starting at 55500 -. -. -. -child starting at 978418 -[219] killing child -child starting at 982138 -[220] killing child -child starting at 991255 -child finished -child finished in 221 iterations -Parent starts validating the DB... -DB validated successfully -``` diff --git a/candy-longliving/.gitignore b/candy-longliving/.gitignore deleted file mode 100644 index ca63e2e..0000000 --- a/candy-longliving/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -dbdir - diff --git a/candy-longliving/Cargo.toml b/candy-longliving/Cargo.toml deleted file mode 100644 index 347d19d..0000000 --- a/candy-longliving/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "candy-longliving" -version = "0.1.0" -edition = "2021" - -[dependencies] -candystore={path=".."} diff --git a/candy-longliving/src/main.rs b/candy-longliving/src/main.rs deleted file mode 100644 index 4527053..0000000 --- a/candy-longliving/src/main.rs +++ /dev/null @@ -1,69 +0,0 @@ -use std::{ - sync::{atomic::AtomicU64, Arc}, - time::Instant, -}; - -use candystore::{CandyStore, CandyTypedList, Config, Result}; - -fn main() -> Result<()> { - let args = std::env::args().collect::>(); - assert!( - args.len() == 4, - "usage: {} ", - args[0] - ); - let num_threads: usize = args[1].parse().expect("num_threads not a number"); - let num_iters: usize = args[2].parse().expect("num_iters not a number"); - let tail_length: usize = args[3].parse().expect("tail_length not a number"); - - let db = Arc::new(CandyStore::open( - "dbdir", - Config { - min_compaction_threashold: 1024 * 1024, - ..Default::default() - }, - )?); - db.clear()?; - - let mut handles = vec![]; - - let ops = Arc::new(AtomicU64::new(0)); - - for thd in 0..num_threads { - let db = db.clone(); - let ops = ops.clone(); - let h = std::thread::spawn(move || { - println!("started thread {thd}"); - let typed = CandyTypedList::::new(db.clone()); - let listname = format!("mylist"); //format!("mylist{thd}"); - let mut t0 = Instant::now(); - for i in 0..num_iters { - if i % 10000 == 0 { - let t1 = Instant::now(); - println!( - "thread {thd} at {i} {} rate={}us", - db.stats(), - t1.duration_since(t0).as_micros() / 10_000, - ); - t0 = t1; - } - - typed.set(&listname, &(thd * num_iters + i), "xxx")?; - ops.fetch_add(1, std::sync::atomic::Ordering::Relaxed); - if i >= tail_length { - typed.remove(&listname, &(thd * num_iters + i - tail_length))?; - ops.fetch_add(1, std::sync::atomic::Ordering::Relaxed); - } - } - Result::<()>::Ok(()) - }); - handles.push(h); - } - - for h in handles { - h.join().unwrap()?; - } - println!("ops={}", ops.load(std::sync::atomic::Ordering::Relaxed)); - - Ok(()) -} diff --git a/candy-perf/.gitignore b/candy-perf/.gitignore deleted file mode 100644 index 9a59ec5..0000000 --- a/candy-perf/.gitignore +++ /dev/null @@ -1 +0,0 @@ -dbdir diff --git a/candy-perf/Cargo.toml b/candy-perf/Cargo.toml deleted file mode 100644 index 0490d50..0000000 --- a/candy-perf/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "candy-perf" -version = "0.1.0" -edition = "2021" - -[dependencies] -candystore={path=".."} diff --git a/candy-perf/README.md b/candy-perf/README.md deleted file mode 100644 index 1f8bc69..0000000 --- a/candy-perf/README.md +++ /dev/null @@ -1,75 +0,0 @@ -Performance results from my machine - -* Ubuntu 24.04 LTS -* Lenovo ThinkPad X1 Carbon Gen 10 (12th Gen Intel® Core™ i7-1260P × 16) -* RAM: 32.0 GiB -* SSD: 512 GB -* Built with `cargo build --release` -* Running on a local filesystem - -### Smallish entries (4 byte keys, 3 byte values) -``` -1000000 small entries with pre-split - Small entries insert: 1.347us - Small entries get 100% existing: 0.477us - Small entries get 50% existing: 0.474us - Small entries removal: 0.493us - Small entries mixed: 1.822us - -1000000 small entries without pre-split - Small entries insert: 4.151us - Small entries get 100% existing: 0.517us - Small entries get 50% existing: 0.515us - Small entries removal: 0.535us - Small entries mixed: 4.633us -``` - -### Largish entries (100 byte keys, 300 byte values) -``` -500000 large entries with pre-split - Large entries insert: 1.624us - Large entries get 100% existing: 0.618us - Large entries removal: 0.128us - -500000 large entries without pre-split - Large entries insert: 5.422us - Large entries get 100% existing: 0.731us - Large entries removal: 0.139us -``` - -### Lists -``` -10 collections with 100000 items in each - Inserts: 8.104us - Updates: 2.593us - Gets: 0.612us - Iterations: 0.556us - Removal of 50% items: 7.945us - Discards: 0.972us -``` - -### Threads without contention (different keys) -``` -No-contention: 10 threads accessing 100000 different keys - with pre-split - Inserts: 3.238us - Gets: 1.004us - Removals: 0.929us - -No-contention: 10 threads accessing 100000 different keys - without pre-split - Inserts: 19.497us - Gets: 1.119us - Removals: 1.001us -``` - -### Threads with contention (same keys) -``` -Contention: 10 threads accessing 1000000 same keys - with pre-split - Inserts: 4.556us - Gets: 1.204us - Removals: 1.334us - -Contention: 10 threads accessing 1000000 same keys - without pre-split - Inserts: 12.167us - Gets: 2.195us - Removals: 2.257us -``` diff --git a/candy-perf/src/main.rs b/candy-perf/src/main.rs deleted file mode 100644 index 6712f4d..0000000 --- a/candy-perf/src/main.rs +++ /dev/null @@ -1,431 +0,0 @@ -use candystore::{CandyStore, Config, Result}; -use std::{ - hint::black_box, - sync::{atomic::AtomicU64, Arc}, - time::Instant, -}; - -fn run2(msg: &str, iters: u32, mut func: impl FnMut() -> Result<()>) -> Result<()> { - let t0 = Instant::now(); - func()?; - let t1 = Instant::now(); - println!( - "{msg}: {:.3}us", - ((t1.duration_since(t0).as_nanos() as f64) / 1000.0) / (iters as f64) - ); - Ok(()) -} - -fn run(msg: &str, iters: u32, mut func: impl FnMut(u32) -> Result<()>) -> Result<()> { - run2(msg, iters, || { - for i in 0u32..iters { - func(i)?; - } - Ok(()) - }) -} - -fn test_small_keys(num_keys: u32) -> Result<()> { - for pre_split in [true, false] { - let db = CandyStore::open( - "./dbdir", - Config { - expected_number_of_keys: if pre_split { num_keys as usize } else { 0 }, - ..Default::default() - }, - )?; - - db.clear()?; - - if pre_split { - println!("{num_keys} small entries with pre-split"); - } else { - println!("{num_keys} small entries without pre-split"); - } - - run(" Small entries insert", num_keys, |i| { - db.set(&(i * 2).to_le_bytes(), "xxx")?; - Ok(()) - })?; - - run(" Small entries get 100% existing", num_keys, |i| { - let val = db.get(&(i * 2).to_le_bytes())?; - black_box(val.unwrap()); - Ok(()) - })?; - - run(" Small entries get 50% existing", num_keys, |i| { - let val = db.get(&(i * 2).to_le_bytes())?; - black_box(val.unwrap()); - Ok(()) - })?; - - run(" Small entries removal", num_keys, |i| { - let val = db.remove(&(i * 2).to_le_bytes())?; - black_box(val.unwrap()); - Ok(()) - })?; - - db.clear()?; - - run(" Small entries mixed", num_keys, |i| { - db.set(&(i * 2).to_le_bytes(), "xxx")?; - let val = db.get(&(i / 2).to_le_bytes())?; - black_box(val); - if i % 8 == 7 { - db.remove(&(i / 2).to_le_bytes())?; - } - Ok(()) - })?; - - println!(); - } - - Ok(()) -} - -fn test_large_keys(num_keys: u32) -> Result<()> { - for pre_split in [true, false] { - let db = CandyStore::open( - "./dbdir", - Config { - expected_number_of_keys: if pre_split { num_keys as usize } else { 0 }, - ..Default::default() - }, - )?; - - db.clear()?; - - if pre_split { - println!("{num_keys} large entries with pre-split"); - } else { - println!("{num_keys} large entries without pre-split"); - } - - run(" Large entries insert", num_keys, |i| { - let mut key = [99u8; 100]; - key[0..4].copy_from_slice(&i.to_le_bytes()); - let val = [7u8; 300]; - db.set(&key, &val)?; - Ok(()) - })?; - - run(" Large entries get 100% existing", num_keys, |i| { - let mut key = [99u8; 100]; - key[0..4].copy_from_slice(&i.to_le_bytes()); - let val = db.get(&key)?; - black_box(val); - Ok(()) - })?; - - run(" Large entries removal", num_keys, |i| { - let mut key = [99u8; 100]; - key[0..4].copy_from_slice(&i.to_le_bytes()); - let val = db.remove(&(i * 2).to_le_bytes())?; - black_box(val); - Ok(()) - })?; - - println!(); - } - - Ok(()) -} - -fn test_lists(num_lists: u32, num_items_per_list: u32) -> Result<()> { - let db = CandyStore::open( - "./dbdir", - Config { - expected_number_of_keys: (num_lists * num_items_per_list) as usize, - ..Default::default() - }, - )?; - - println!("{num_lists} lists with {num_items_per_list} items in each"); - run2(" Inserts", num_lists * num_items_per_list, || { - for list in 0..num_lists { - for item in 0..num_items_per_list { - db.set_in_list(&list.to_le_bytes(), &item.to_le_bytes(), "xxx")?; - } - } - Ok(()) - })?; - - run2(" Updates", num_lists * num_items_per_list, || { - for list in 0..num_lists { - for item in 0..num_items_per_list { - db.set_in_list(&list.to_le_bytes(), &item.to_le_bytes(), "yyy")?; - } - } - Ok(()) - })?; - - run2(" Gets", num_lists * num_items_per_list, || { - for list in 0..num_lists { - for item in 0..num_items_per_list { - let val = db.get_from_list(&list.to_le_bytes(), &item.to_le_bytes())?; - black_box(val); - } - } - Ok(()) - })?; - - run2(" Iterations", num_lists * num_items_per_list, || { - for list in 0..num_lists { - let count = db.iter_list(&list.to_le_bytes()).count(); - black_box(count); - debug_assert_eq!(count, num_items_per_list as usize); - } - Ok(()) - })?; - - run2( - " Removal of 50% items", - num_lists * num_items_per_list / 2, - || { - for list in 0..num_lists { - for item in 0..num_items_per_list { - if item % 2 == 0 { - let val = db.remove_from_list(&list.to_le_bytes(), &item.to_le_bytes())?; - black_box(val.unwrap()); - } - } - } - Ok(()) - }, - )?; - - run2(" Discards", num_lists * num_items_per_list / 2, || { - for list in 0..num_lists { - db.discard_list(&list.to_le_bytes())?; - } - Ok(()) - })?; - - println!(); - - Ok(()) -} - -fn test_concurrency_without_contention(num_threads: u32, num_keys: u32) -> Result<()> { - for pre_split in [true, false] { - let db = Arc::new(CandyStore::open( - "./dbdir", - Config { - expected_number_of_keys: if pre_split { - (num_threads * num_keys) as usize - } else { - 0 - }, - ..Default::default() - }, - )?); - db.clear()?; - - if pre_split { - println!("No-contention: {num_threads} threads accessing {num_keys} different keys - with pre-split"); - } else { - println!( - "No-contention: {num_threads} threads accessing {num_keys} different keys - without pre-split" - ); - } - - let insert_time_ns = Arc::new(AtomicU64::new(0)); - let get_time_ns = Arc::new(AtomicU64::new(0)); - let removal_time_ns = Arc::new(AtomicU64::new(0)); - - let mut handles = vec![]; - for thd in 0..num_threads { - let db = db.clone(); - let insert_time_ns = insert_time_ns.clone(); - let get_time_ns = get_time_ns.clone(); - let removal_time_ns = removal_time_ns.clone(); - - let h = std::thread::spawn(move || { - { - let t0 = Instant::now(); - for i in thd * num_keys..(thd + 1) * num_keys { - let status = db.set(&i.to_le_bytes(), &thd.to_le_bytes())?; - debug_assert!(status.was_created()); - } - insert_time_ns.fetch_add( - Instant::now().duration_since(t0).as_nanos() as u64, - std::sync::atomic::Ordering::SeqCst, - ); - } - - { - let t0 = Instant::now(); - for i in thd * num_keys..(thd + 1) * num_keys { - let val = db.get(&i.to_le_bytes())?; - debug_assert_eq!(val, Some(thd.to_le_bytes().to_vec()), "thd={thd} i={i}"); - black_box(val.unwrap()); - } - get_time_ns.fetch_add( - Instant::now().duration_since(t0).as_nanos() as u64, - std::sync::atomic::Ordering::SeqCst, - ); - } - - { - let t0 = Instant::now(); - for i in thd * num_keys..(thd + 1) * num_keys { - let val = db.remove(&i.to_le_bytes())?; - debug_assert!(val.is_some()); - black_box(val.unwrap()); - } - removal_time_ns.fetch_add( - Instant::now().duration_since(t0).as_nanos() as u64, - std::sync::atomic::Ordering::SeqCst, - ); - } - - Result::<()>::Ok(()) - }); - handles.push(h); - } - for h in handles { - h.join().unwrap()?; - } - - let insert_time_ns = insert_time_ns.load(std::sync::atomic::Ordering::SeqCst) as f64; - let get_time_ns = get_time_ns.load(std::sync::atomic::Ordering::SeqCst) as f64; - let removal_time_ns = removal_time_ns.load(std::sync::atomic::Ordering::SeqCst) as f64; - let ops = (num_threads * num_keys) as f64; - - println!(" Inserts: {:.3}us", (insert_time_ns / 1000.0) / ops); - println!(" Gets: {:.3}us", (get_time_ns / 1000.0) / ops); - println!(" Removals: {:.3}us", (removal_time_ns / 1000.0) / ops); - println!(); - } - - Ok(()) -} - -fn do_inserts( - thd: u32, - num_keys: u32, - insert_time_ns: &Arc, - db: &Arc, -) -> Result<()> { - let t0 = Instant::now(); - for i in 0..num_keys { - db.set(&i.to_le_bytes(), &thd.to_le_bytes())?; - } - insert_time_ns.fetch_add( - Instant::now().duration_since(t0).as_nanos() as u64, - std::sync::atomic::Ordering::SeqCst, - ); - Ok(()) -} - -fn do_gets(num_keys: u32, get_time_ns: &Arc, db: &Arc) -> Result<()> { - let t0 = Instant::now(); - for i in 0..num_keys { - let val = db.get(&i.to_le_bytes())?; - black_box(val); - } - get_time_ns.fetch_add( - Instant::now().duration_since(t0).as_nanos() as u64, - std::sync::atomic::Ordering::SeqCst, - ); - Ok(()) -} - -fn do_removals( - num_keys: u32, - removal_time_ns: &Arc, - db: &Arc, -) -> Result<()> { - let t0 = Instant::now(); - for i in 0..num_keys { - let val = db.remove(&i.to_le_bytes())?; - black_box(val); - } - removal_time_ns.fetch_add( - Instant::now().duration_since(t0).as_nanos() as u64, - std::sync::atomic::Ordering::SeqCst, - ); - Ok(()) -} - -fn test_concurrency_with_contention(num_threads: u32, num_keys: u32) -> Result<()> { - for pre_split in [true, false] { - let db = Arc::new(CandyStore::open( - "./dbdir", - Config { - expected_number_of_keys: if pre_split { - (num_threads * num_keys) as usize - } else { - 0 - }, - ..Default::default() - }, - )?); - db.clear()?; - - if pre_split { - println!( - "Contention: {num_threads} threads accessing {num_keys} same keys - with pre-split" - ); - } else { - println!("Contention: {num_threads} threads accessing {num_keys} same keys - without pre-split"); - } - - let insert_time_ns = Arc::new(AtomicU64::new(0)); - let get_time_ns = Arc::new(AtomicU64::new(0)); - let removal_time_ns = Arc::new(AtomicU64::new(0)); - - let mut handles = vec![]; - for thd in 0..num_threads { - let db = db.clone(); - let insert_time_ns = insert_time_ns.clone(); - let get_time_ns = get_time_ns.clone(); - let removal_time_ns = removal_time_ns.clone(); - - let h = std::thread::spawn(move || { - if thd % 3 == 0 { - do_inserts(thd, num_keys, &insert_time_ns, &db)?; - do_gets(num_keys, &get_time_ns, &db)?; - do_removals(num_keys, &removal_time_ns, &db)?; - } else if thd % 3 == 1 { - do_gets(num_keys, &get_time_ns, &db)?; - do_removals(num_keys, &removal_time_ns, &db)?; - do_inserts(thd, num_keys, &insert_time_ns, &db)?; - } else { - do_removals(num_keys, &removal_time_ns, &db)?; - do_inserts(thd, num_keys, &insert_time_ns, &db)?; - do_gets(num_keys, &get_time_ns, &db)?; - } - - Result::<()>::Ok(()) - }); - handles.push(h); - } - for h in handles { - h.join().unwrap()?; - } - - let insert_time_ns = insert_time_ns.load(std::sync::atomic::Ordering::SeqCst) as f64; - let get_time_ns = get_time_ns.load(std::sync::atomic::Ordering::SeqCst) as f64; - let removal_time_ns = removal_time_ns.load(std::sync::atomic::Ordering::SeqCst) as f64; - let ops = (num_threads * num_keys) as f64; - - println!(" Inserts: {:.3}us", (insert_time_ns / 1000.0) / ops); - println!(" Gets: {:.3}us", (get_time_ns / 1000.0) / ops); - println!(" Removals: {:.3}us", (removal_time_ns / 1000.0) / ops); - println!(); - } - - Ok(()) -} - -fn main() -> Result<()> { - test_small_keys(1_000_000)?; - test_large_keys(500_000)?; - test_lists(10, 100_000)?; - test_concurrency_without_contention(10, 100_000)?; - test_concurrency_with_contention(10, 1_000_000)?; - - Ok(()) -} diff --git a/diagram.png b/diagram.png deleted file mode 100644 index 848558635a724f86118303be8a1337ea3a54747d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42531 zcmb@u1yo$!mNl9LNeC9)A$V~20+QhF?(Xg$AXu=Vg-h_@?h*=jcXyY<{T0dAU*G%s z-v0ajug_o%1~m?6pKWW+x#wIJA}cL|_!jr=ix)2t#Y6?=U%Y^Fe(~a!(i>RFC(&yr znUG)54xhyo-#{+UH-^EGzi}LeR2>y;j2&I{?2TTSSld_`(K#5{8yQ(UnA$iVL$&fj zZlZX;NxG(H zVuGI(T~iJg-8?0?*Sb%K6B8vVV&0UK3%&_?CvhMbt5Zm0CHK>`F!oY;U%q&(@Ta*| zp;O_chuP?qRh;z|W{z{iOvpR(%Q=anPpG|Q+gBo?U%!5}jPYD9oxYC&I1D*#ahYs+ z4xgoLW9OPB1^>C`E6^Qb1b<%}Qy*qRet%lW1;c~#`{Kycr6hU2*0#_O{@k{Z;+Oj8 zf0Gr;(0<=#=kV6~_gy5`(!ZpC-_|im^$!oD(wA4|&F79#FRl2csW%e}A0TZ-FQXr1 zn^t^W7#npg((y$_vbo>q8%V&gGsNyR?LuSAL3Wv>jf`B3sP*BzI)C!B3Q=L8P2_#T z`y3SbDc0U~WfxDh0}3h;$3yDZK{1EOQ2B{}1o%%}SlXzeSmnrfK3G#-P znQ&%{r_OApn+YDO=ROWY`Y6gQrM|9Yd=$sY^2Ev&?tF(<86wjFV^}VY zSAFX>9LV5-Vu_qP4NMLW2_dF1W(n1?@}tx0O{wZ8ADZU@e|h?}HMAs(DfGMr7LpFT zgaj94_6o)vQ(1k}^pxNGp_FbC*Y z!L=Ctb{~pg{_@ef6n(TJWn#_nxtf}whIuD?jEwc}rZG;}LV7xxr9=xan0s9+E4P}3 zUv)6%UB->1_9EWM$p!BFy?a$ThTjJqF*#1Fq$ z%pmd@3GB({NpI=MaG6E+A0XBj`^K5!WnGgNbA4*aCV$>KVbV6)q7};K!e>8KhQNoL z_ot-3XK+ARICZ#PFjBgVSA;td?7!;Y+R8yj_C3LxHnZ7VB1wJwR6aQkA^l#*d(r8w zN{BAW9_*^eB#Ptu#g^pUDinS$iIQIhHcKMraN;#cbgncl--wG?Q0e!iyQ_{TmZU<2!eVG=V$H52!S|! z&`R{7Zr3k@oYJ3T8v3WV&hlh9X-I%^7epLgUE{i|ekWUE1H#r++(+`0Ds@H#nK?-w zyTJ$BsgV}XT2I_&fy=YW5$md-C?7k6rNh)6H})e1%p88# zY@k8oCm6_bVb9g;F9~}D)6J1F=+D*Oi9(5f+e*#jZ@arFxNqJnUQz{GU*w1@P(RcL z4+fzkiE{d^Vjj^nwWL&uxNA!@gk=j_J1Z7JURG2uSC_I=VNRQZcQ%AWSw(+kK~-8? zvXhm8`Ke%l;zRk$bQzC9i>ma&Dd%NL(l2=8WSl!unOuC)e#6!J?h|l(s32-gMX*>r zSP>4~jw>7Tc2}4l*PAVW71^$PO%z6RzQw4k;o<(;(g|S2Zpn^E822<(u|d|dyB4kz z>N4wu#;a!B4#VsyoU6U!iRyaiy1feQAFLLU|4ib_b=o(b`0;rofP?%}(PgNZiWXg? z%DQ34X_(&;CBH1u60{2G{_5B)MUhv_pm*5^<#P$E<^|VM%pyREn44pP$5hYF_EcTu ztCZg%cS+P|-iOSPRBavMb%v_kkyCgPvOMEH@mbcEyy#1@vX8Nd4KX+OE!bjwO~q_) zwb$#o$N;p>TM&Jw@j{HJCPeCW=~pThOI8hr_-y0iw&Xi|CIp1>fc`iopXI@tZZ{-C zPB;u2w0s-h=S1Vj>h=v?X6n74Kf+dt_UD1bhOd-1n~h9a*S%Q){F%<9Yu5G$TI_@r zbwJWlmQ@q06aFHeavRI(`$E3kFvJ!*Hwu!gKsICb!I3W3bDrTOr+K?-i-g{n7Vk!k z-V8sU@oBp|GKTGlOX=S@cNEf6-7vE7JYbaJZ^9L8@qu^-Nn6*fu&CIpi9vY{u2W20rl+UPk9#;;`v(TFOoT+|pr_gu$2B&3y zt4e9+O5zT?3RTe1D5hLJ$T7KgGUf#MA(UL6Gh&sL$C*hOb+easbs=oYnuLAX5(CJyM|NMhRd1};sB(E>&)bBK@(Fb|1rYQh` zl#`Q&^Mupaz4Me~Ez+E;tE&@-%yC}3fnU_AolFiAoH;R(H!>TKas(njH9dixhqRB{ z8tX4bHE(pb2$0t@FA;m@KhQ&BlsqHCIO~~t1Qbdy43!+nY)qf}!npH9?QX30{-9o; z@{lpTrc|Ppr4*%ljlA7jzo+2$euH2k`I$r$)rndr0`I-H_UhxYvF$FKC<#gFp+l)G z`u*JoM>-oe2TaF+=L7)$3IR%ZjKj3sIrHjcZ?=4;Kd%C%ZG*#tL9l-3?eS~7Hc zox!3_@C~0KZ$;uJ_&0bm13x1k$o0Pj!2Wue3C+&iI+!zr#wp9K6CsvGBE*!JFJ~f& zHYMjSmaVDO-ljX41gmhxDL{VeHKU z8JO3~>n~pxEK2<2*aR|=AOB;P4*wD2{x!n?Encp~aD=h%0Dwx^lDRyY9Qo}N`e3`N z_dD*A72Y;bM59sdyWag0arFtED;AZ%6SvD=CWQok!v&fd3~{O=LPY|RVwKqy>Yav< zU~r3~J*(mQ)P!HjH_=hUzj=yqP~tOYKz-&uTzZ@ZizUdzP9fPvawBo|-)O$+U6`Ns zHGH;>6MK#Yam5)l@40l7bkS>AdPzjnD>YT%)X!5thWeEVQw-Ey6wMy~&`T>3><)U) z?Ae5k`{KsKPk|$udhc@Z30-etQBT`G8h24>YfQYdlMop%_l6PYY&tEyV>D3ih|)90 zWl77Y@~SH|cJn-NZh8RK=RILrHoDuL>YSO1>2MDMI!A?UMoJ?1rITSon+ZW2d5x$x zUFG_cA|$Vx?5?3@uQK)ST{#g`?Tr+i`Awo{`E8&hg_U!vfRx%f{vii|6t}ylhFXe4 z#;2D4PrHQtwoKy}1KBZ=90aXl!$INEDg8@!*8fQoHf1Hx&}o(r#k0i8PrwBq{fBHS z4}eUpr*92Tk|eGSa!&fc=%%uw%Qn_|hK=rB-jn)CLYbaspOmo*)D+SCQ(D!=yV z8;Bp^qCwN-t?xav&T~166jV?MkJf46z)oEu_Pj*N20@*s-6ESH@DlSR>NKF zE@L<-c9jT}fW(XsvSp1C6NksAu2+dOX-Pvr30f;X9%yfYsomoQA{9qJ?=}ZxmQUyE z45~bQh%LO+q+QaDnnn$v`(sLCLv$FwUwWCW&z`|6TP?O%z!h^bRdZ-9F9-(uDh%OH zyp=ZYUmyldn()2!$}Ka!P*oE8=C1J5~t>We8*dhEmXn^##HqLWuymMppX}xMF zuYyt|ouz|;sKYCjZ>8rc+(2(YS|77XD%AgYyx>q)nc_vf{lps@fwiSYe5pofwh}L% znN(a!S45LXt7JnB=SYWtW+rwoJ2xf{UOEzj(KheAySs|{S$2Q%HjN%TAmz@UoYaBo zlLrI$YvGzWvuFh{kJl%NEO2mYqRN{CD_K|jP%E#R^|F*(k7rMe82k=@o!{D{F)=Z9 zbXXdn7D`!Ln+mf>Mt)T)9jyV)cT9Pt){pYc09-z>aUf(`O^Y3j@&-fF){U?LQz_|9 zZFtAXm&Zj7g+la?@00ntWJ@`er*p_ zvo!tHftwBvg28KV7um^Iia4DbOlM{Qb{YQoAEX-T4*1KhS5$X<_x?{UlZooeU#P>9 zu?O2RiCD}=_p5eR$XEgBTSxlo@P6a$r|C>Om8C~92wtoy8@xQ%i>SiIvIN!vay5|- z$wa@Z@4ui+$Da0(%%qyk7ICG7VRtn8A3r$wzp>ccL8h?Q-{nQSHho|TYaCpk+9QSx>+BCm5Gb3r5mVR)Xq%76tj0X`mo`eTQlm%; zF246Hz1=8^OB!w3i>v(T!R2!=y5iZMbHqYJthKayRxm7gJa!ZRH_2)N4xV(ZGL{88 zZr0Z%rbxcbJ~EmgLhln6eCJwE&u-5CGbpl4-&n?(&t%%BC{mgvqw`Qv#wGe#t`;Y} zX}Fx+oP(ZLF&`!HxVQO$*CP6Szq{%3G*6Q7hHJD$@$z^^%0AB4d`C?sKwdiH3{RZ5 zEkY~Ab-F-gXY-*mw8ukZ71)bp3|A!c09H}fhu{ny)OuT%WxRSijqYyeDG{v<&Hky0 zYMq_rv?FN43a6L5>1(Cs~$-W`8bHRy!yUKN_%il(9IgGL<9`)KCRPV6O^iso= zGqa1&`B1`e{od&M0$cE|BZqb_t|M)xS+(6f6REMCin99#F%d4M^@TK%N#H}L{7+VL zmYW(j0H9239?@9>(!ayoWr6Kr{`5W~g}Ie|+m*U=ut%&KB)QuiJfp*4Q9ik{MtXQ} z#735_(9I6QNQ2{_I8E;=63Zjc4f}lY(@L@rjmBb9T@9SbVTZyuH#!w1&q&VFsbe+` zDFX-Nd&QP>e7No06E!(qh(RGS`v+!2yM&GS^+(&5RAOQ}!Tb*F>miQnfK#jzXg-M? z+^<}bWj4dGTMX>=g<1@AHJlwkVD^I-V?;MkaIZHWa(kqDJJ%O%S2kfn;6iXNffM7D zTn;R7+Y~Jv)~Z4)Ns2r6QIiHWHjbXxaaD4=y}Opd5fl*K#iCJbWpwmq4)>S=0u*G> z4=N1p-S1G1O93a_N*;v_ziOR~cd~y~Ia-k#yC9{b#i4@c!m;2yEE$_gpiawiR_25z z;$0<6c!wK^EtJdy=Yr>(Li~X;*fBFF{qD?(l?vAJ#q3ge?x)>9b!OvA?kKq2G4>f^ zbL|Xw(kV-7WX%J!?R0OpX#p-+R(QZ4n-8_+2B$zvK2zc%WvA2?O!0cp6b=73#e2VQ z4(B;--F(FKAEI?Q9ayk5?Qfg)u(5ODfi#72IGick3ES)AS}(-cXQqnrTN78z;PcGZ zQ9BN~16@?5%%|@6F_Z}zG;m(^b6M+WLN=;B%kSQrpPDxCFNDfD{Frgr`4Hq>yq@gB zZ{TkuaTMIM`t`+cHi=aH%qG#JBW(1$+Vt)>eo9QfsTo|o3M79Y{IUO&cLHlT zOzVB$H=)YJ(_Q^bZKOo@&$v^dtC?3V36<$&g8wa_41&tEM8rN>WmG4`5pj5uSF~nJ zaEdZWxcH=#RBt?*Bg|pEwNo#HN$s580W6&`IldM#C3NG5!c^Rxw2OSFYZn#$!xX3? zzHrLU?>&z7)cIhBfhk#hr$hR4%64lI=kr59n(z!9A~FRZeWJ3J#wsHAql=x*Buffe z*j(n+e$l75((I9Ev8GG)t?Ury`88-dr#p;abcrwO(y1K|89C1ySa2W#!u-l)c5;J|HcSNJ8!5ho&B=E-Kk@NW2 zdSC{oaz{ampXoUfcQQ(id!c>=Gp8YYVV0`RV=rqem^LC-YmDYzHVdoUlTax$0G{63 zZ@2pRQ7=ZYI4^%rLma897V^9Fct*dDTuOpuSUE@dGBmCBtR8w47A^+{({l`NMjMu6 zM>~?q;I~eYcA`00)BS@*=_>*sQg_pdOxl8h^>^#**{&*NYfHqQESXqwPE=CQs5clA z<3vlqWZCQU1BchnpH!ZC@i&yK25(D9FS!|(Ye}p>A1YTBc;#@8c|#&cK$wSAnts~$ z$fdukAE;o5cBfCYreN_Kdvz(g$lS4zVD@*rQo_bqr?BQ$g7vIFSl+z$htwfWXVjlg zJVSV8D*n;dZ-^j9Mg*wg#>YWDUSREsLgTZ#A&Qcz3&aB?F&ds4>+7?Fwec9hPM;lA z&_R2XTX9Y$_nH=?8P3nTMij~CO8u?3lo;*i3cjHSv}V5RH?_tr*1Q(Zjiw2CG)%uf zd>dYt|MS#QSxJwB=-IPYVntPmcCz?x>aQ}q#CEK?fl>{Rp}w|;TV${iC8+?(-Q__zk$t?{q-dZex2p#j^!hO0lcn1F!I zZ)#qT@Ape}baX%t(i1yN*?UJY@LE0JT{+!;A${VA2w>s~Q9EI$Lu!%9s}WF3WX^GA$??n|dtc}B)YnaT(tnunG&2Iz z(NU}>59N+G`g5(H$F=5k9({c7CTH}M;JXeg%I>`xo>O?d$R$0pn6_+oYy#1`0H>P$xljC&5S=KDLZs`UR6V~*|Q!D^GPJ|+2Ew>V@+51#U zGQ0`vqgiXno+gRD`esyC8F;zbxH-NSlF*?HX`0cw)WkO8GqDi&k_cAtf&>udwpLvh-a!^eqasu43oJu zX3rRIo9`BzdknwFZ%&<=QFSKdzME$JWl2B$K_Un%o!D`|@!b0c-g~jZe_7ePv&Ep>fU}#b7asrR{VTfHH+o~NnD<~B8q7iCuDsA$4=My~BQyRCtjU_k7!#elG`iOj zWi}7AuovkAtFR9{F10K&e47iu*YWT1^pqQ^tlbc`9{e0{-0bM}O8+Q9DSPq{1S>1CT^ce^EKqJe(Lal{TQi^^5iFe3*2w(Jv*p zvkh8un0^!oV5+ocB8kTTTqN`Nlih zMhq)+9jb_`Wuj)O)E#4yCU4mj_r$IBLVjXSlQ`N=A6hV~uJ zHE-*7-|0Xs3~loAx^B9EjmbpNPlgm_ds%R(TW;r z!a5qOq*W&|C22X`qv1Z8R7%IFYb{;%eD?hwQG(zVJ45BheO&am3PP^bHr&C_2a@#r zx|D!)#2-tKK4*f-zO>vf7xvXU5Nv?1QsYJg%Xd%2b7m}pz-qU1QoG}-z;^Z|ModUfoxcd_HWPH2B&elcdfSnM6U+hXLP4rtVH zdx53Oe8GQE%=D#NK_}by^+))mxFyiH#ukI2K;*hB>BzADDQ}3_Doiu3AM|jfI&=0(cQ1GufDzB{2?JY6qgJ4jhI~S7I zi2r=y-TJnQ?@42t5%`wHkn72>nYfm%%-+@j-6wmoFCLx2oTC?>W2v-F|LRxiR|5GM zPMWxd1S`3MI8K+(QxZ<7Kw1v-|H+8vd+Z* zR*X4E`!4;~xX{vt!O?OEi?Yd&M8gKVDOWqD1DQI>N1hG*p_Qzlpo~5vCS;QQG!h;_J(XZ`s_H)2hO7Oi*X_8NVy>d8BB;RO`?)J%L$iXQ(wdtMX?$YTX82&g6a(H z44q!`Q4ymwrMNEY3sXryPjyADPEPb1b9p>`-p$jZ)E%KfLX9aiGC^{eMT`OSKueOd zCyM`ZOVH|T!i`WR{*y>!?`i2CdyGRBNc`&z_0ys+hgHr*6`A$fQO=`2`JuMIHN(N6 zC$tb3^ZBg9#*){Z_(L6PV&oY2@GmIJOnswQlRsy3pZ5Uhf!YYezrCpdTzg~as_#4b z4JM}JE6szY@jZ1Q|4gDxllDgU=F<-aTjh0z@$uh_eZlmDF@1R;dv4~k+f>nfM4=W) zl5YlG`e11&^UP<-BChHQ!FbLM$tslMw$698=wmpe4?Z@uBTjqeQ&S|6kN|jfIACW z5ONW2UrlX#yIl|T}*u+(v*J@VEG7u)-yRlqwH+ZEnEA@UHDDr@^g*hF!)_b>| z2;yO5rHLy`CR}9Is7b8Lv?FXdi1a#_(V}>(q$`>#tjfKSO%D#V zDiBempnE`v0${bIMAJsQs>jCL)SozM)^4;y0@rNnY$b&axG`P9&^e&#< z*l5xVQ^oCB?!FimTac24B$ScoDJHoQ`$f-zlT&AMfS$(2x0dKbyQau@9*|3L6X@;L znzN8$hG4NZaEE~@^t@r_N+EoPjK~Zet7#nD5OY|IE>Zz_V`W$aF z9>kw%PpHSm4307&RT{w7)YJT56LVcpBL9$X>}8&zGvOv#fFL#2T13=_mM^n3FJ1kP z+}{$BCPV|HcOx$qUK;8_ti5G4>s2ueO1u=6a`$$Czsx2N$@K2e6g>8CS#TI43u41)5%p@B>qHgpUPBZ?JNSm~f8`T==63%# znm6)?VWcy_ox!E9Vxfppx?f4y26uI-=#--;gz?wGK%`ueKJF#&6O^UpX!#MgsHTid zjNYUOWJqJR8UHH|$LII{zj^cj(31bjF8TMvfCPaUGI?k+7Lbz@@YgvNG7D}#$YNgb z(@V-#8;SdH{-1C%6-EoE*XbWd7?Kns{^^_qYodijZTu9XEv#YwHgmqMMps7u8Id(r zXlaY#sP*2>O^2Ebm)YH7Pl|!`Vk8pbc;f2{voyexkW&I?BGHgzRd2hm2cD{O5?#2 zQzMNLa%0u&S%WAnHzX3U%R~J1TLKp>%HPK#z1vF-)Yp7{vm}ic4xJSxbWJF{uSvrj zE{!LeEnF)TyEH@VXny-zZeq;*qSXBQgi3e?g}T?5jS$cty|FI$G3h^rwxF3tZ9b(c zzA2P~p|7aShKbmAHnQnx+tY~pJ~D!5DWYdAd-+Qa!G_?|)2|pe^G=Dc^%~q5C?5rw zAP%RuvdkWj9AA+T{XcA;v_Z~rf?vZjv#_7e{lE;?;7*FTppk7~4fk0L!l@*QKw!y8 zJQbAM1!=MMuN%~^9}*4mKD6r2Plkp*C5cGfj+t#yMbNwZJ3HOm&C#kJTwu3wExO9E zL|_uzq?S^hz}=`BrkTT#6W zhm7>d4g>ZdjBDySt87^ptqXIN zIA;ya`7oI&wYYeE6Q!4Xj(Jq?|NR-$q7p|+B4z7(wOzOA)#K->dqCxgLtT~ISVeOU0Y+%*4*KRc4K4K{2({Z>|Mq-HXRm~b>rqLEkf1zjg!nA@ z(TY6%!jZ}C{E5?x#jPic3KVZeY&?#T_ESI4{5Vmt8*(OvCLk$^{$#ELw%29tbI~(b zL||e{zRuQY8-(d~L{)AUo=37l(>Qk85FTgaSPVEkI6MOjW+YW=O!O*cG!*kqo_ zU^gl1>mwjq+8_C>ouSZ;*N^~>f{r}qI*q5WX4dwB-51Lw)~ILAE9KUJjugbsTr+JO zZ)``JdUhEBSn>H@U&*2EQJMvbQiz=p&kxo6-c~EJK-lx+r6l#77q507R$r0$)S`M$ z+SW}dmVc_4i zq48dmRfqCp&>E_Sl^rFz^n4(tYYAP;x-=?9l-Z)f97>V#93Krkc6qHt_c2bc34B#$woy8DKt}LCEurgEeh?paJsKYg`#FrV%z+pw&qo=) zgu0NNE6J6ti($qi_DBsifh~l#^K7@E*ZJ~ZXj0T&XCCVjRY(Y+!YKX_4J3PJMC#2> za%WH*KqpoE_#!W{6((Pz)MTEy38X%CAhnssofuYK#j#NXtDI{Uwia2n1-(1g8azL$ zM+2cg9}g}x#wNHUfph1E*BejZ{St*bacDF-jyKDN-kHUJq){~ce<_X1t229^TwMiz zg+?8}s=!I(#D|^wUILhAkCA=-y#Z*&qwoO_<|(iRc_+h3yW!jUQbtj>oREBf**(oucd?HDjto>byqewzr-3wpP4s*Aur>* z=+~JIp(U4Cl_0@Ft=O?<^L+DD6Z7okB~s)3L>kGf-_QUZ+rm_*MT0MViTLiu+>}Fo z@=b=mAf+%JxBXz%O<*3>@VRf_3n&&4uXBk%JkHtm;lv4pZ{DPzp6i(L99#u1mlg0E z_5|%H-|HC!+d~p_2>=DhD8FH`_@FGFT{4H=iG78HK8Rue%yKLep!aVDzPAm5z(f;v z@MH&^WJ>d4L$mQL9gt<4%XNiJu_(oQr#QF{ji3PnTw&TbCe}tBed@5Xwgk(!N92$W z)w?;4y}MwS;}=k(KybW8pP*mk;Hx)(O<->w+l%IJQf__q$vaZ5Yr^|#{5_b7 zZ#LNkiphcYu*04L!87U;QSs`%6W_`~e1FtmjHnC3h-Nc`HO4ZZciVf~uYf16%G-G3 zU!hm1a%{ooi1SP&4TLt2j3FX@Teg6d|C3=9pSO1KbLUaaH8}3}fZ*xPAnCD5o8QkS z?jw{}5QR$~{9!If@NDRz8lt690-ES~jqg8!FGH$I(WG5nIh`6Mjh5MBJX}Z4d%df~ z(>l?@El>yvr8|~e8t+wMODRgvKnp7QHSQm*_5$Bc-P*j;&=7h}k2>Bo@x!p0?O{+SVqQ z%N%#VaQG^?x7Q7XUC#QwmEkZ;mU69&_3Av2vNKYXZ`Zy=oam?QJ0Ly`TWg-Y`TqfRn1>sGb_x%Txv63feiC+2Z6dnOj>o}U#_lIr7(%ITSm*Lpc zZS5RE_$_CbEt*5>^uc|?B-`LdZwT{%QIqZt>MPKUV))yeTKSLO)VFjnJiQ)Na!u3q zh%E9nE8A|nk;*@v_5|zebt>6!ATCRe;Or@z{otWu`*8KqgC2Ho$ogqFTtlKj`%C{j1N5W(NCkJH=++UOl-7Db1 zMP0B$h}U^Gsv@HNs@A~MQT)?%2XBt0u4bcjQ)~TqZ|}dT2G#A`yW^^0!_YrfVH+|3 zUpWO@%%Fj%XF8OJe^iHIO+#$@j!tJF#E%i^yRG%KWjiOOLSrKD!hFPjx9Nlzm?~^g zV|SJ={!8>94!hqrFvGO{VH6NPV=j>DLTFeEO{XYKYAIrC} z5ngNzwRK(Bxzg(@3MK`Gc^d=d6mD%* z2Eo@ehpu-)4l+*+GtL)lYpq+O(JhWiZr`#(mol^nd{WrqF+F$#ZKOGt#={Ic6Cwnm zUDr?GGL8g5*Qd5~gM-(}O<~(m#n^=5*p+mNq%p@`XJAI**T_=*UTzu#q%rmLdYXh= z37fJ5mj6=K-rDpGrn+Z-kG-9Wrpk54HYcRmG2Jg#ua3;@dCCN~?l67J-zp0uvLU+V zb3sxp(7$pBsWJ=Ha&?)cVqVn$r`oZlL$!=QBfSxyW9<{!-8kHFIpL^$c%8bqZ!+3? z07yIHjCN94v^7q@kVy!Z31I_AL}PSYtf-^YxX zG93&lXx_cuqF`P~+b;T4ykb~0K+vw>wS_if@-|et)M#@}41nob(Af2FOq}O~8^n>h z2pTm32~sSsKfk5^Y&Jn9c%~8%_8@m(=+)crJNjWcpg``~N4XYb{j&CcC*j{~>b;=k zHArzy$uh;sT37{wT2rKgPmz(~jBwW_?$8*ZDRU46@~_e&VzP9`J}!Fjcd)~ArST*v zbtdd-s!Zfa`zIb@fRdY;0NL^+_WjX5^P&idJIl>%)#5Lq$L>Hdf{)hZ*FRTn3-ZLr zoyMTu1J!)r2seRw(;d}1e=ZhOl^DRthlDb863Da+^)oC>d_~OX`}R`DyKhH=$q55`cIH7)G9gRJoYw56GTAo z4?n-t61fi-HY)mvNQieyi`GietwMw9R^~-MH`R&7 z^EJ#a9}h{~(&OA#0a(o0j}9pkP3-x3z}W9^w56LC3Pdh3|CDd^&@pp0d%;zz5MEch zKrr)(RVog*86%z6eDG{4C@(6nqs)0-sGSf`vb5);rzLEiGfFi%A)5V?z<;F^y@o?W z?fw7GM&N+%h{(V)Y`IYwf|kJPG4JFtSfN0FI3vt{)H-&&KUWiyV5rk>4r`2JJmhfF zCYG?JSh;UU|5G*&{;ox~{*oz1vQJ$B< zqTn0anqHvl4`qLM^h9HUMOvqf1lu}dHLj1_V;+SuFHGN9r|dt9J=vXw%+(}rD%`T@ z2ZYNDYa@|Ez}_(ugi(ZqFaGi!r%J$*DPwpx3aUCE@W=HQg~KlKNsnFHY;5lwc64=B zoQ=%323FqwcQ%Es&5r{qVm#$3a8NpiBO(@@KB>r@Je#yjcQ?cRHkG>b#7M`g1IbSb z?Imvsb}xO@5VD*9RHIohym``(b^bP*`wh}@(WSC?;QQ@?J&hz7h9wEX$@f3mEW3ku z+?#GdWQ0`7pi|o#ScGd^Y`Vy>ta=|R>RQ}^h90JY+YE+5&B`75EIby6^96Qzj7FKY zewDpSmwVEu;ic4WA*BnS7f%bbIPU~IwFC`O7C09AXn_;rQVtS>W8RVR^+xJjC3+!h zNiqdSylU37KaJJmHR>}-g5kf>kQ{yc;>Q9?Stk0HQGvi{dpzK|REp)pnO1-0F;ssw zaI=I_4B|Ez=u&s(Sc6eiR?}q%%9@)*bS(@yp`fDregA%o-Lys`DT4LGqxl`AP(Qr*v?^e9Lcq5!1WZ@CZ&BZi|Yh4T6OKe1g+HEzS*jF&KU$r$1Zt`M@EWDFYpJa5M4R+ z0V0Cg5l{3UszvYD%y!;B3R5u|Okc|a>N7bWc{%Y0E$e+p+zFrdwTVb$C`rxtliyHh z)-ryGVp-C8E`Sjf3?TWQ82vSdQu-r2Tp+k5|LD*(q@}GIQGs#&*xYsfc$GeG;$;(h z%=1zYesAkF`0gt#ED0kc^df)g@B@L}TVttPLQ;|Y7wM-u`r?I}+L}iju3Zt&clX2< z_%rI&!l7+MtfQz`CE-Pe)z`!+D;@gB!Fh?ZlfkUx@Mwsd?}Q8QXdO6~EPiNyh$SS7 zN~!aC$HjlNEhz1aEK?pJDzG-{AbS&#@pC_Ql8!H@HhpZJG#c#+nPj{BA*F}vBrKI~ zASz+89&byC|27w;!gp+5X7?1OH{|Yh)Y-bX^agsu3?!>0*6JxrA(Dytk%^7eJ|S#7 zcnp^oZyq#j*T5T{>KtYKs@`#x%G8B+6$h<20)^HfKwqA^-7sbQ(VuZcB zLy9Rld@(Yzv5t6Hg=$`XXlXyr!i|2HVyfiwO@Gftu(+q>Czs_XKK*W7-#!hVY4sowX9rCgeskg6 zmS#JsY-zk~^`JR4v}pTO%>$|r@e#wtUTc{89BeBx97lBpbOCRN?2Kl^_Tg5Icqj|_ zn{??ZX}b4L0rBHEFuo|uR0%XXAC6Wpf1p*pf`N4->cUF5`6R4N9OakK5SZna`t4+7 zcSV~Dnm~lEJ1U5SMx)k418CxcN%nY3cw2ie!j<*1>#eUTk0Y_OjzZsA-&<_tw^~Pf zyTNI9WPL!ZfZav~yV@z^16-mv0y~W}z^u>#o^IlgL}9h0m3*uQcRNXpUk5vEmcu(0 zUT&@)DZ@@Cop%QDlxCUInF{bqCZD>qv5{FRd?jKcHnV%ZQ#r&e0Sl--5Js7@n&+g7 z#?12n)ZnAcGc3W>>2z>iMwFz?0L2kWZMxur#Qua9v1R=!p)C2T8>OC`!dWn42s=!c z9}TG_Jy<31DXP%2L)CuiE{Z6avTpPD6mHeT%lNG`nYjHjA_AxXX)*eIRc}{4^YV3 z6=wU+Hv8a%gUg}bojC)fHC*Y=+Z}^oHGVj!ODVx=?@(L$msiploP_8V3;so%PwTel zkQ4T_<@>J%bp7bJ(Xz$L@D}%|l4^Kg>pkd6F8cU9-=T)MctnB%lLKT*Eq?~pz~ox; z1&sV%L~xdWCa-c6f}OZ^h<{5TsR;1g*l179_yp&85iaQ7lxHPHp)BH7e$fXX7DQ}ykBhE zr&k;wfQ$t)ZzL%rSHq~^WWIC>LL6Ux2o^F3!#zj;-2dt$8^Xh}@Yhs1P~`Dv5{-or zkp&!@CT`^*AoPpGx@dXKL+zw*wM%5uT0rNEF^`*0x9%HsxJej|c|)k} zdLi@GvkvCzP&vm`~O_$F|RTJ(;$42vIzXPgD6fjapdFIY_Ql@cj{XN^8v7P9?O#X5WZb%a@H;X@+@VIHBYQjLi6R zMr^Wtiz|?YmP7Kwt1|qqr5yxC??)!+tprKa{o2n8S8l-}+%F{}h4WGIp@%K$>zuF_-bsR~MV&oq|u+AUlM5yL| zm-T_Cj7S%uc%^rCnnf}J(te9i2~W!MvU_cORlAh<)zLHvQ7?6_{jQR?6IprT^L>2! ziGci6yVA)yvT$b=4a^gvTU>yMIkUWYJ-!crFkweWged*i2hW8@cx6Ln->Zk&NZiL} z_g^LtxCc}#1*^AtSntinT-?WP(+1^47ld0sK0>!~*CQUQOsYCLUs5!*bHO!<#TaNCtce@j4cdtza& zPQ^+}xk>U)%aB{`BQeb=od}h}1e5i72;JEe)h+>r4x<{1+dfs%cgFHcYo4(`mYC*CB z&b?jo2I73FJ4+ zQpD10Y1(d$6Q(<(_-n=d92bb51(k}$Y~t~b*-A!|?o#+}L+#p;RX`9*q>n0-wiK|+ z22-WY0&z^|;TORsCgjM3-x2{u^4LL*-job4bax>LJ^HadvDs7S{znkB=eofi-a_Dd z3wYVaotMxoc+RjBug<6SR`K9m^9u|Q^(9PYCDXm;g0@l>1E;MY^Xjs?;~$zCo{uW; z?4BK+$|aSZjEC;rC6uo?5+0c@mrp{gNaI~)>d30)W+=v zPaa&(72ONGzS}8R-g|g5!$Sl3_#QU@Sycp_fpD|)FlN`SKr_&MY^m{IyU-r$OnG21C z3j4J0y@`zTnR*Pf?9LVru;PsG>T)sRzeyX`D&Ot{?MmW7QOZ?cC#MHm-SV9C52aZF z0b1ILZ~{T0aAqafmj|eRDd#yVl>Xo#{7CBWROyci@6PzIP`*h6AfeErtXxh_LkX*~ zc5x%R`VemkMAC0WQv`T7W=lHn9W=)Wh%%0%WwUgzxV|r47@39nyn03OhL<0r zaz=Fh%LXKAkNCzTP<6{U+B;qXO7ERPEH~?=jQ|tW2oU;OfsGxHq|;sZelDjN`g34! z&|9=OOsdK~M(a|AVTlapq>V&u6rIP9Gem&4TqSfjsHVG%|V2QK|c+uO~@I>)s zBj9eq3EF?bl1r`RBnAT!glxR5bt)F~a25V5*nl;U#cJaZQcC1 z+i75J%;OaNny{i#bw*Dgic>x2qJ)}p91zm&Y6)FP`6kXdckD28H?#c!obL#wn-6!O{9r_&S!AW?V(=T?vwaek&%+B1U*}KYDw~c@m*)5s zt4ej>Qth7v@w&&;cgKpe9rz5WiA?Np>gkwy%8#U@t49oEYUR#$W%ABm%<+HKEG>pr zPX#6h$J;dJl_L7?tmh9q+_(H67SC()EYDD7-RwRsy-c~OKQT0g?Z^nfobhlUfm)DJxRyN zCr9tI8uWIDxR@GBqet@BkraE>`$&HC5YCN+Z7sDAKA&E0CW-T!1owE&0x{^HRgdO4 zv>^5mL-L`5QHm==8%Gq7GKr)7ka@hdzq`(UZ7d$y)}o|J_%%o|D|9xxjg)YYzEUaJ zjXePy{RqwHr|t8`J-MJfZKhHIYR7M$n?EE@dSC}8J=5#o?DI1e*`S{xdp=Qus3z8&?1W&aOICUrK#jmDPKgEEjrKDR~eN0%*JX(bfr2QoVX{#EBQ2L z%q+>|k%$!%l0QA(e1Klf6r05!EW5c~Ek6mcL>Z@F8>gVfU_tEXqmPp-8CB&#C5*Vc z%lh=_LWFmGBd=V&^673hwT{Jz((t%~w_0&ck&)TTW7qMHi47U;TlF`=6RWR!BC+WW zc|P16C)r?h@m*Ya<(hdM9~o`OvK}AnmYMNhLX1IAsTa$wIv{wM1p=B|5q$ZP29f*o z;1@y@BG~R@kX3K=Zeie(VlG^ci@zMmcld#j5*U(E$m1+lZj)waD$3Hayww^l&?#Kn>|k6~N7~vj0MJe@u3)&Ipz%zT8srswdc1T{((21XsdMi{uU9{ZJVPJ zy|WYy9XDU#+=VYa>={T?tqrW6W*B8rQR-WXXFrURxE-QzJ!O}h zH{TGP9%~~tzLew?2@=xFUa8yTV*-AJTU=AM=gA9Zn+2LMa*i@2SvhpVU%?dm+gP~N z#a;@3IyITIy3wMAt;hdfS!tgNy;$S=-{AyIqr)4T5*;%qS(dybL|<`(DNhSn4}MG9 zg+#_l!zL|h1j)s>>*60J)R#aX9^)Bt&Xy{0Zx8hrIh3$$hAM~W_!`LJb(zl>9re$Z z1f1&{V+E_p-OtxY4k^d6J;FUE>y-uim=(_tBlJ;p@*?ub>p!Dp6oh-|KW06p%9a}& zm5@wkX_s?ig<%}Tte#zuW1iohBtp;ep*55L=-u;l&^0}l7}CCCx0zpp`%;e+M!4b$ z88KbrU$gn*)vyS5#AgeSyK0Ka3vyU5JHC?Vuaf+jCSIy6LA}Mr9N`fP>Z>AnIly$p zmTNh z&pgur?%BO1IU{-V+>r5<2=;Sj)`aM|W4EsufAn1mOPno>LrS4P>X|6f=;O$ zMlHCFMa>8r=wi)dOxWg_dzbp2wpMsb0HWUIQ?8Z$zTB1T3kX5N$YK>Q+-HCV#KX{B zgjWzQvbz&hi^sfA_vv#eu4unh6)l6orC=AqCvR2L139xjDv{ovmBrbtGZV+s$IYYp~_bp{rQhL&wn@>OrveZNaIExO{dEz#sp(G{-Vw7(o{TQgW7#kJZ% zrO9Z8g$RKcli$v;@Xer&Dscr|o89ueFwo9U&-LN)Do@QSzZLeB#q6k3422tmU5(W6 zmrUrYk!SVSXnYB_O{}4lhK_U)puxZSd1gBse1YH+6!fzA4wc#Fy-@>A`0*`4a=xGM~cSJ4>j@~ z+L)9*r%c-wTkz-l{Eiu(2a1uO=F%(X7UKcPI!XqR}f&Rs#QVVl1 zl^I&I(Vw{e+ft_0?OZ-pmWwh15w;FXo_vM^WFArb@GuAvJwex#pE-QdyPt)UGr4+m zos8|m5Whul!vqu1`26-BEvOm?}-panT&`!i3Kgk_?<*PPIT&kFilK;xUm5x zj?WN^T_bYR;`Uc$hg9!&3-3>Rfv=44GjbX6c`Qd`m_7kla;- zS(yu{A`Tz53A=@y$s+YoCF6yE19PzOe5O7NTc0NQ?>JPQxTr=edQ!uv@W&~bPcAG` z?%YJ%WhSXkvh9zhIMt6M-L>wvZju)Q*-|HM{)-Z$^%@Hmyhf=Vk(-7}P6eO^%d2WE zd6QM|czz0Z=R532nWUzOZcjo;n;PCGFO(vJUDsM|+&O>AgdnJi5^wcqF@s0inzW39 zGQCsrx?=|2W%kB_WRX~3_%P9#9u2beJZXHg?T_juB*Uz{P0-mEkGsB+JDRQ)y^>J% z6*J1F*=-5{95-;WwO(3nJ>%obI|~KRDuj~I0t6S`nKa4y2x0Vm3CY2(Zu*9{Ngc0u zhog;1Dyu_X(;rknY$(gUhsLBE`q6`y^!B@|Yo z_t>s&xG%R9bv}fg2m*A%q$^FP7qH6K>3Zg5C;Niy*<-Z!W$AsMj}VHSPXtc%)tf!r zlQC2xbbYrtvnKj70dONXGgylpkW{Tyr10bdrZYPz1;t4sq8A5&0o_TS za{`SU?ZJ9}WWF^<2vsWZW_3u(iLAkR)ypM{%jMK=+Mml{OT0^j30)s7POPZEif)rm z@^FVJ^`IHpu%!=xu=83y&KyT!Q(x|}$BskSf|1)6PAK2jmS0E(n9!Ech{cD+^_?6?s@aF#9T&Q_mvLnfz_ zofM0}25KK(>daAj!q`UAuyXcr8|EkJs+(1Zp|s#Y@sVG|BB(*7ZmKc$;d>gFe9WBF zf^h4@O{-%Wa=$v?Rjv1FyPykhwMPBWS;=k1QyNI5d~wO_-w!0j54V$yR!J2Bhi_ z(}MBNuGUaq1vdON%dQ|$h+BZ#-OUoprQ#tl_HclIH8<)BfxWzIN2!73M|Bp^2#s)} zo37?9$^Q{#k-G5*hpdL<Ub8q$PJ7xZn5m5e@8>=(b1vdqaf~rb6OMQ*guH{~P1KR5 z_eK_Zg`s7jljX!0EF0`HVjmnQm%}tQK&x?CgtElyUs%tmHTtBW{|V6%oFRec{bx{y zgbj6xH?%BuIpTO+90e0IU^9q4f5tSx8?e-(nRK#> z7aW+F_BarO6*H1nJaBCB_NXcG#Zj8gJ7pvCl0P4!ITEDPQT$rr64N9Vq)ys<4 zojzh^@!g?6Ms(?1?po^1@N+rsIFg@?fFAMbcw1KHu<<$ya|SKDzfrQhWXeIXR1~6L zSIM|JT-Cb{L`4c+{#y%RGsjVbhBH+F`a-_UK+^DT<;5OFEARVq!#}73b{`y!Y1Nxp zP|c+a8Ozitg@lC0*7*4`j?XJ2DEA%SWsm!X_S_yCrtkMdVl8FRHiUl);$Du|*_>#} z%WUI~@3Y=VM&1aBp(d|7Ucfj3`6+cgp0C1G@($V_{6zEa>;EEaNFkY*S0?nEUtiXl zV?i9PKJ_E;?bUBD6_?-3HNmuLzA>zINvCWryphjY#(M%hLn|$?z?%C$Aul*HWeVl$ zb~XeV+r6|SuvtKDZGBCnSP+@S(g$!uMo@^R+haKr!?uV|bC=y#;1AJ1> zyRFHWvmYZaa%)798FmHzFJcVUpVpKw&gGvsK|%5gngS)=OE)@qao?4#Uw(4SBH{at z<}G6&Wg3QB0RIGENY2(h59vNuU?Lt0$U@AKIR=SvW6S?HHN~8bgdnQW83b}H4X~~w zV1TzMjL^|K6e8HCLa^!1y>f_zTNG8BBNQ1KRlBh@%7;SN+_4s^GVww>EEV0rloh^} z0A+RvL+Xykb=h3?tsMWj1DF0$L#(ZJkPd#e7Bl*g*Y2$EKNS`g>vO23yRVoNF)_uv z|3zNGMrQ9t4CF9>wWfmgJ~aDA#KM5=?fq^Kpyqa->i7=aY~mJ+a|7xw^aWK>eIAe? zky=jjt9frOVu#-{`NzIWFO1@y9uj?v_dPL4%h74XjPc^;1qC9Drq$9!lS_DsFFd#FI-ROgeOut%>Ca}BYQO^HOe-Xcx?DOM40KSlGFcm~| zU-?6YJ49(>qiveal3#cGCCLD)GB`Pa?D(%g#9vYh;GsxgeLdhF!@$3-wg1vghQtv> zgiiVojrQ2C^qWVMsM&c@j#X82`bkdG-99K8g`KZzNlbF6_7Eq`5eqss{9-mDOiJS6 z2tTsgf6oL8(=!m8(dd?ORqM<2<`WvKH7hf9OJQKWX}iDs;aOJW%Omie6u1ra zqidfJRf}G6klOY$*vzsay;9W6he^Q|w^Z4|OIojLE4%Pq>--#Q-MdAKnSgpRI! z211`y@J4s9R}}4XJ$lvr1P0;(;mKdtNetEd3s40Lf!Y~=Tda-M$A4~C>HqWFim{)T zrj`7F|Gy)}p%BH%+*dgL_p2!4TLnK1mh9tPkkKPa{g7>-GMtDJg2YD!w=sPR#Oi&m z1*9zb|FZJ#?S*dGL43IMNx{ycd6I z(#ECs?G?m8oI3?*3t7nv>xV#qO zVxf(&9aC}$e&e+qPP}uSDo?KlORuq9Arc{-Z$g!xQPLyEgh^Oy5a*MPJ=CN|NN zygmXESU~Cm(q!#VqS4FJ0$3+DJ@gfmeU+rhB6E=`GwHZd_3>gzmb8e_%r&u^yY<$x}9XLNdCfGFG!ro#PmXM$JL z;gr}OH=p?AS8tGF7Acpr4Dr!eTWqH6MBi;-!}OYrI8*4=KaiqH_gQ@;R01~z8G~4x zHlfdo^AR)nb2~f$Df0U$Y*kr25j#7U%&aWyv#(5ktUxn40dVi!k$vgIC!Y7F;A&w8 zm37KnTi(`en(m==6hPAb`z{@8hlqhOeJ-HX0c^c{$2m}Zh^KQ9>mXHOhB_}T`FW$R zW_YOuGt>!yh1I)4Sx+V9wG6hu?_Yrrd=oN-d%dX|ZY(lKJSEsrCmuIebyIsAs3@pE zt}7VSVfgPh{_mIk|1p(Kwh+DgvV$}NS1-^!BnDjI@?lc} z?Rg0fko_}HtpyiUa1m2qesVZ*N-~jLA4q@s(kwq|1D(s{)o|}A%}0!V-l%T{kQq_+ zl44LPfnVN+qnos(*-h9cO+mz|gxP^&(`KJC#{0IAssPX?2mZbZ&?ZPJ{^t9WUqLIrXbdPdBhn0Lv zovo_$cSYL1ycM;!e(^+?JH@WAxp!6=l?}m0ja~jRx$O`i34z!Ds26)?RAx zx|?PefPnzGgW~I?``fEhC^2_#2Z_VH3d!0S>a+UN1~5xYI4_J_heN(or2Z&^x!dY@ znPVY+bNI$p*m_qdh9Ir^eRE5ASu4` zkKobGw0_l)zc)REX@J0m)#N^2^|*LPcT%(D9jW!=Pr zTd>+zS5UE72uYVSSxsGN59aL)NR3C|XEBj28x|WV0VyJ@%_qpc0%b_ca?0Q+lh&$Y zw6W}4)v@bj@6w%b8E#T<(PF<5rD3vwe0Bbe=bZap9t) z6pL#?CnV)g>vB^ZFVr*V9&)MHp+ zUp8ZTmg~p5vQ692134&ruJHB=P0aSG42Gj+kCVOgVaU(R=X5yRJi(<3!esmDvzW}E zqvE$2YEJ9Iaqv^cmhqm=zCN=oG^BO&zmT_4Y1?^`V_6pQ2Mxx{>RoQELsR#r1xLL* zA$LeMSbx8*I5Y#$0nUtimhguwqR8+{Puez50yv>&i&H&=EBh}4_|YWgMvwVdXEge4 z&-Yq}U@Go)V7tDLS)R=19;>9;gCD!*MbaCz?bo^jjI*P4_8!nnOba?|g9SDcMG|kh zf#L2_l#@V!Qdi~Y(noJrcS64X<+rRn*JG|qV#_00XZ9aqD^5cztNG_FMMO$MZ|CNv zz7U6or8M1sK zF>fq&`;*_DeOw8#G?ZyC#2*zndWI~W+Byer7FB_O1{7ewi774UwzLh<8+}HaK0BR; z<2#(yk+qPe8x;JVwib`sJmdlknuvPez9#QE*2yuWMMROWPbm7hoa>v)!Sk(T%N`p$ zd^A{7XlS?7?Ntb?>&H1Vb5KZV{ehneyh(41s>H*RG`R&DT_7FeF8~=Odm%}6DJdYq zN~Ax7H^>~++3QTa&GsGB3wSXW6;{3E_S41RTI7NQ(Y{^YKq1b(^PvgSpESewYH(E+ zMcs*Ok#}FW8Jk!bAr6{afLqqYfCJyeSVMm%RWTuU@ntxK^4M6)3&yjVjVpeXEN=i} zvtE9Z(e|??%bCLjNpJVbYLprO;Wq#I4YT`seLa8N>5m94j}ec869Q#c_&Xc+oobK0 zsL9vFI6B&dSqlkErO6=g7ZO*JN%T}b z*uNuY*qzB5Gx{1X$f;jdQ+z9_`*=dhz!!lGwJU*BlEu!v#8G3qkLP#7XfEI&Vvo*^ zgOh5aSK&M%8cG9@QZHEh@WVUHwI8!}54YMAY<~i`tXk*98tox^IC`#bYc|D)P}xF5 z#f$$7E>c!xa55HkV`w7di3W59SQdl1&~%zpId@ES=tW1!0wS+ zUuTw8ZxbQ0%}0Bcrt1+f_!9)E4#@&wdZUlt3jPe8b;Qqh8% zPsFqU%A{mt9!VI192VZpOYM@*@)>%I)^~=x9QEMNF$0)cHOuXk^_y0Z>e_aRO{kjW zyNW$Ke|aY+at8>)`$6o>bhm+5@Uxm?sRzZ1NlD}3yIClbk=jiHzB{@UO zhWzgU%F9QEKT?5TMzFO(`p)k=H?)n-zbc$2bXwd+GDl$G%m@~`{~^;h`KiZTlCIvQDKpkJVIBpv8*=ueqg6x}~pqu1FUW5cSZ zgAC>|&AqxilKvfTJJo8yf%)YB#pJIw_J2wKc;Qz5B!5~`2ep6d@tXfyk00!v-}_}s zF9%azvv(v$Ay+78_YV0a5}mDExz5=9y$^Et&hcP;>WY`1cEZj zucGPb0-QH)^|+VnW`s{}hIKsWl-|Ew2)X0idqta8HMFz-=<70*KEop1BZT}l%NV-s z-gv0|4Ui%ZzVtI~B^P&m-Cq|wBLJ2|HlK){`CBwN2;pW41jXLR0q>%K^IA-8l@w6C8n)K6LGV=d#dvo!*4VK5l>Ozt_*5NuM#3>jMJY#{sDfZck zHJEcgf|ohc43kG^TboX0MoHAyqE$W$$(9F7EO@<2A5=#+`&BFb~0d3E<0;n6oR@W>x1&F#SEZjNZJwdkOKN%pJRHQLOdAt?i7DxZ{5 zA{}_4+*uFCW2ryVv?Navn)^~`nfhuaTpOXnKHItc(U>!{6jW@lDR+%){23d#02iPo z`ppxk-@phSQgEd|t_9s)jCMPnOvEQNabRH7cV_Vbv4$_m9r2ceOWV{O)wYmgv*GPW zm|8uUF#aF=$iSDDysj4akvaKBnyeA}MMsWf34Zs9JJ7W#5MRoKBH4%!zdU`L`jJ*R zEhk9y?C|Y#q{w^ zrBo(vrROqrEBW>;C?ayxNx^{pKC*`M4WfmYD6_Wss%a%dgDu0r5?@d&Uy9GEA~@sD z{kJ7&bjh`coLh!HUWAp;INpNN1p;Xvh}`jRVeY9Bn=d%Vr?G==v5H34!=xae{-x*@ zEPO*+8+=x(hN$ke-hXne>vr9mP*;b(;#q@A_k!>JD(Tfyj!A6zdsbHvjvxfRl??fY z=GiS4A(;TUU)EcRlOgVsY| zG2WFP@%0s_>_;(U*k@$??A~2dAU7cLg&*jHVx*K&BEkS*o?&s^$W(8o5$sZG-e~4- zD~c_mqV&=RmKiu7;Ouc-8ri`+`T|zEc#h+}>!_5)rUyh5@~L_GR67Ace2;sb7wEfT z!wrZi~7WnP)o|O4u8eVqUOs{+@ZCF-L)J{Cxt~9si)U>zE#|@*I$5=(#7A) zFq5~#Mb!T3`mayfPsx1l`0;QW&tp4m%(RLjt~k^%+Hrk7%~oTuUj4USI~5-u4Q3WAyKBH?d@azmqX2DI%JiuCQt~X3S9+--VJcd|2_SHcs$U_cA-CA zoWEE#tZ?7$`>sGMg9}KD(1i97n)CH$^5iuCy zfb01~o5!*9o}TofhtBoX%%4?^5BQK6deJ-T zYndJUrj5eKQxK5-wGMhKJkwZ7>+AwTA{x%mdrZ8K5*_=^ zg^V6kdOs|AP)WG4un7(HUn3I!D}zyy#dN&0c6%zGK9M7DjJp*uHWH13&V_(GBP~J# z+4NrhPyfJ`UJwyiP~$(PCI=CwCaYP=n|~0&8x`hlHz8!1ni5H(0@qNtJR5OX#&YN3 z0a44pLUsx;JP}t=nA?6_dh0tk_k33uf=gPz>74LqXwh2Ie+2fGgyx|tVJcfe^)@TD z8T>~Bq*q=u${+IIw94f|L<@+Tfy=Wte4_)Qo;`Q%cUx{ppSZD?ag_XXlTr9GM)ekQ zK2Xk6-~9a~@_=97W$k}n$jZdr_(Lqqd;82SR8t-ljXkC3I>eQM*_huDDe+sYWuwLK zSqb?H#49a`FwKl=`_y@je`+{0sDdd7YEz-#jP@5^Z+7R4I~qPGWs&refXjCN(EPU+ zpzC{tc!n|=FN@YS;|#}n|^1U`HCFDc#bd9Ivx{BwE6y`r(f z$F|hIpHOtkippkwV(66tX0L=MF@CK@{Fs@{fU$k(=CY)yXC+HtnvwNadI=bac5LKmvtw%GtT z$DbgDdsgcuu`Y7qQFMPp`1$78@8ysH)^`b1{@g@pEK?1eCbSjeHk~Uk*1n+I1>Z>e9P zcQ=2EhEQzp?|F$>9J;K;vOhW#=Y~IVRWRcOGzXT-#P;~^|NYYP=dUJ41>8x%$%J$v zNkw0$Qtow!hIUVnt}xgBdX3BcJNxIwvsG3Y&4XDnxkJ!TR+-p_mY>7c0ojZ!f4^Mj z#(H*w2y8B*6I|m*%YOVf=t+JR5wh*tFp3v^G3ExMVs*G9L39xMmAF8f- ze$+9K_W!Ops0avl{^H;NCpJ?m^)C{W){zLd^H;h3--9L)d%pv*r&9Y@Z4YEG<&lAP z*V|P($M0^8*b4PhXA}MSA1s)x4+qG;)Ts(OXOaFV(yowkXFV(S52b#075M2l-fxMI zZ*kJDd0N~u5VZh&*#EDZ|Gz`*zj)mLuQYCMBsT~#aR3R^cPAb0nxu*q0_yfp^-Om0 z^hEPXFRH&$(U>SI3cB^K;g6p~9x)ed;@&r3bA52Tq023Mkot>XS3A`z6Cv40l;eIW z1=g(KIb$;lqC|6y zAAE%AnYHS+B5*woUd9ibgTfm+sL`0iy2!*$;fN&T64i~t$)LbIhF5=q{;h`9@R-wI zt^RqnDR3fu=Vh_+^p0|%D`InXUYf1{%>)rLSQK8fUk;ehoB*eQ0J5B%9t?lJj}dZUqw??xKwY~VJar#s;5-sM&+vz1sETr_M-**VtfqH!q4Dqu6>G@ty z>sUi@zkZf+_~5^Oe!~0vRsZ#Mo*#)7(x;fv(glY72r5G#X6cv2QFu{m)NRvxA+3o8<|u0BZGY(7AxyXg#q zf>4U)$N=0vg(t0FI65kqbnw)OCby1GRg0?6#e_DY%7S8h!@`zP6H&@_JH>bOFXbM? zVZKD8>c;;<+qq)es?B->T}q}Bj?z18)H{`WHvP#_yh zA3+522gYhlj;G5yk%zS%1&lr%xiiN;^|K(3jdoOVpgW^)ZcKAHHPTrqF6c;$ji8h`fKhidVX)~l<= zDT4+I1_AfDA7ksX6op1g_MFXMD>&7SC&E8UPUOo5{X{FtrFd6jTReG|C_fS08*c7u_r4a-LWX=3?qh99I43OHxv20n8&t+R$TG)LkrGpfU};m+{E*+@hraNj6-taB8SH#&Mn+Cf#b_O%M^V#qHHBd$%o=33pR9~_bv8B) zi~G>PxBNC2p&ZUmd+YR;{Pq;M<}U2U?7X8|YOp*nu|mv}mcQii4Z_5AeXe(?03_5L zx|2V4W05BP0bq~p!sX7~eCav5Wwk;ntH9&uOQUP>2Haxeiuv_bgJsO!wH`0t`dHc< zT6rP8je9oB*$c7rtTQFY=$ zW`+knzP8i3i_r^Kx^8g43|O!?OXrxj3usyQI<7z?#@)d_*5+3Aou!?)bmS|LR3w;f zrWID&6K73)ug`(XRT#N=@2xNB4TP zVqsTar@MDg7VIaQ9Mi++* z{ex+a^tFt4w(&Jzf%9<$f10LuDBkig+;rDX+4?*n#$C9pTT$UFt44uAHNT8pIpmx7 z0!DTRb|^A0%x+tUW_?<5eiNv56wFa^_|buO|k+HmnFvtE&r6$YoX6}2<{^RD?1 ze%sYC!qd35D%aF*MOoX@re=hNt#;|Xz(vutbCNc$1}b{rqxm(r>X2v;PD|A49xbKQm3#8#PdBZ zZSyDnA8BA=;*zzPASFIJP<7G8IVYa_m4>yW_7CGR`zQGk6jneF=OVnM3Ad$GbUl?_ z-AORGX)jzefH18;^+g(cQFhoD&f3WgPRS}g-gNoCh7Tv+RjlWGmXt{T=@LABDNl!< z3}Q6_kH~EEQxt|dUKVKK>iCEu(qT#spXifFvQKH)%#~vnD=)P&zxln$>Bx`teRo2I zy;?Jtsv5`*LCJ_@S~+h}1=9h`_&0S&O4+FcBxkXCV4(p}MY$kU7;r~0ARya8>zMhA z;`CEJ%|Ysn$15HFT#^^2MK!akGv&>#eXW8dtfqE=QbE>xXJz?exm9A7q3^neRLqrM zcASswvsg0+EnQx=jjkr5Ivchlp{B|t6wPvEPSguyaS@+9)C+k9BH6Ddqv3iEbB0o+ zp7<(74m1v=T$Y-W3XZEixGK&)7m_#Kv{gIA%sJ6B4;N!*Yi`0VM;wgs0e^r zQ2Ix#a2(T@zR$(d3)yp+`cpM7@<@%~Q}$QTtSfw?;Jx(9Pq$FliW^+Cc=Mvg3ro48 z{GYcwTB*zYHM5dxd>^dZan&!UU#b9;vFcBTMIMv~$IoGM%dDBLZt7o}TDHvsboCf)d`sHiH6``XJ{wYal7Y-Rh|J*?^b zn^T;NDOh*gweKEF6-t|8&6#N$VN-pJDP-d?&XjvjZ%dlVS^%JVRvd63AaVupu7T<+ z>d!5PvoR_KSgNF&hY~Z+b-ZZo753?ZTD^}foDMT;Ep5&ih zQ`4W`bXNo@o8}8Xicgb1gcHwBI_qoOm!2o7tnz-+^bKCHNj^aI6#pO_J{`lDa{c7} z*5FP>`MIX~dMRKu-*Q@kYn_2aaMW=>eb-hdz@+6QIEh~J!h&0I)dH*ZP5h9MOW`p% ziTx#o=VaR<=KIAMw}zICtq47(Y07)+Lbt<@?oXuyE>H9F4fow7T)WyDv$qL9BMie; z1{GW@oZ5S{7>;Sfu8{<~^%8RWfACAq-kaq-#ho-g;iLqI{*`e`>F|vF;G!k;H1g*W z-I{ukHyt;R)~oqC)0Gof*9}9Jc~P;Pji0ynn)Js|@xM%Pk&P%OKG1SXKBB6cEdWsO`LQ%yzTaBh3kZXkd;l1}vhNL+h|73Cg=?8r{JC^f$5`>R%cFuxG~)rw&^ z#)ec>Ou`;AZb9=6Xq~e2{K*8OlGP9}u{P*36N^txO0l{}i=bk8AhWy*F|sxv>te%a zF`(WAjnVjBUA$HjT{jH9wfSeAtKTWPFi*Z3-UQPl2USa@PHx3d=o)?d;8KOWge{%NwL$m$!?3by{ zW($Q!nen9i8(bF$9YyC9E>C0=K!I{;Q?9Ui+IW<+u-&fovE8L6N%dNHz+1qjxO#QU z<>k5s_5xdRGmZmR=KUe!-q)aJ%bY0w#2ireDfz%j)scd0`^)tEg(tHSuh@)IG4TQC zMLhFQ_c1PPNUC0GiW$-kW(r6210&~Ji)rCeByM>#sViA(=FOx3PiyBL)l|0q@jM%2 z0iOfvh#-$qy1)no1q1~Z6b(u!fl#7?lu!jjAe5j8j-WINNH9`ELVy601WYI@g9;HF zgwTr=LjpnRB$VI9ao%s-SgIx-0kGbN2q8efHU(eswQa5IFbn$gWoQ+?Ad*+^2w0m8U3*i&(>>bwk%~_P{wb;_=rSqA6^H7)MWv& zr0CGDa9$Yp)zk}pgu$Qf!U?V-L2FE#$;?uK$lUJKs`y7Ktlqg4gSwW&OwBzjQ)tZa zW_xnrZ*%O&O9M5@Rsj)~y_gnTXo4qA{Zds(2?lzXVRfNuWh+fvkQDS^vg`!iepjSU zP%NWvcm8RMg3Enk`0d-bEzbdEklUr&kpsn*x%iXJnHI`sOehZ-D;epM?H}J7*RheQ zJvASGPmGo;yl*b1hYrGtBMxon9P_Kv6}YIFO=F@S^mpKOJLN9I26lRkKMJWOi}N#H z3I?m_SLe@*qS(;86j92`q);thyRkpOl93G|I2gc}l_PSI`_p~8s}M-j`AGcHDS7!< zbE#GHvFW=nA3cy+uf?^gjIw_7INNJFBS3@s-VhS?$uZJ3sQ!CUv=Wd6op)cM^sYGFUVXs9!T!2|@WCOG+{Nc(>N zEq`xnp`VD~se&O-rQGdWh)ikM0+k<36oJmC5h!VGzK`oNDJm_OL~Nh^c5ejXn%Iy# z<{`!hUNCU+x2tE+nKa_Eo!@{Z0lZ{a#?>p05ST9CzLk}C;JznrX*L5s$t*Y96BHNQ zbakd2N)1vu<>Xb%ewLLAG9+VcumfuE`|QxP8XBcT{9w#zQBNe?1phT17nu}LUe%^AN5x7iHp zcJKKZW0VESiNlC0z#rC#kls>KAT|(0hfEspUnTFNps#ihlk^Dz_Mg~SPf-hN?TzSf zDk_3++QZ5j>b0svTS2?bff`ZDU~+@vV>6M@O4bmkOYsc1m=pdn%^{$?Z(%+O1xQ`*q4Kx2NasNgxuBxg=iw*Utpgw)X^($a5j+@@i@t0jW zuG!1rWs5>ObdgNYioV3+BC+Q+`{ULoWqsdht{y$Dy%d?!hYgA@lsasuM7d>N`vXGS z=oZ3cy~DSb%m>~+C(}fR0ij_dJ1z$Wk!J@KnZl6_n%B^Gk`Bv>A^PDCleF2UfZT`< znFv2xVg26HF~=g2H(LdR9^C+$6epY2yX5E6&kjmFNELcEz@nnwXSRB3ZFX@~!PfF$ zX@ds%3u+3VPDdfR6}56k?a1YdV^Y^&4fT(8H|V=aI|}9zJY9XH^Lp&5ZLX0`5Lx#s zwz7?^D53(k)>7xt)9|rLg@Rp@(j-aae9)nPwjz z{e1wBq_X=C7m{g&bATspP#ue6Ao4NTDSHu>U>Nv$si(5*(VPZxm@}eubRXzKACP4v zxvP%%ZI=CZ?)FO1o6uTTR2GAZDA_y@Y3q8@C)-XMokX}!w#zPD3?^#rqlRa%kW4!N zNxEAx>D~OKo`>+BMWF*c1Oo=!EF|ezcWS%x1av&%5YnlZ?Bf~DOx&0Iyqn}Tut1Do zUCD{EbSA~_XOZo)#5CTmJjiOtGpaSD+cdIzK(cg^kA<&}S!zb<+8>jd-6v;G7RtUu z)UiK~)m9qoRU<-opqYhMKUvvaoBR&4e33P$q+mbQ7rHEnt)hVklzY2+SU>JeJ8)tJ z_n^729)eIFheDyLa|c#KR+ukSg&&XihQL$|s+LbkyDnzW3HysB6U>oQusTYr#f!9s zP|$G$pjdj;j+mu4G=%jD^eA%$D#i?@ zZ(Kxe;Zsb-V(`3J0PHTHlBG5@m#k8In#Z|Gwf>F@{=1?|jPVhx;A1_+{`fFcIs#XQl1N5AjTh9mhi{#$g$r<%c8 zj=(hKN5bJ#v2iRy!;o(M-@KS+QUAJ1Sz&9HmF4 z!LM}}+}F$kSEyAL;R<}Yd#-dW4+qy7y@zeH{V6&h|9MD7Hua`6-x2l%lUK9Ye_1F zCAM>=%WZVoTq#g|3_Ab>Qne=SG@=qVQK=>Ak)z&?5B#?%R-R5Bm)ytco|NP8JQcVT zW%71$*lIlbVGg$CwzYRphcrn2+cg59)QpJCC?J=yiU(67Y=M_1!BO+)(69}lX89i{sJCA7Z_jrJ~XPP8tK7IMa!pYKB{;|5K`}`7?9HTtLd0gG~Nyh znW5i`HiDXM!#dBm{$^W}JSfD6?&5BO__mtbFy3ot&d>B<^6 z_#M4?kipUb*?NsSDq)w2?JTXMv2Dq^2hSzu-@>I5R`Y&UA(OgQqvPCM{pn=Os}1so zDx4PVA)vwGQqhDJYU@T{G_m*jy)L|EiD+#_Ofh3lO&9)JuylU{;R`9XD z*X1lJ{3CEfFBmZBW5CMS%xgTZjTLon1{#(AJ^Weg^dGRTwaXW;x(cZv^>3z2QtMZs z0+-zn8Qw4V>{5R5uEDbDHP#x)N7RW#Ra{V7zTAkag+cR9)`P*oml_SkJ6q4|ygv6f zHyGNxw0r%);1^a_M!6a8Gdmq4gZ_Mq#h)3=e{08<`PhuT`b+(;$5rdBcfH*E{R~e_ z5NM;tnUkirotEI8`~WuT0jVnH=jXsu{?IoA@75mDHvt9t!UDK>|Y|WKetM`NrRRG$;KoE4J)|c3_!clZLN9`G;Ce@23;4i z^n4S!)r_w3n56>(4Sf3_6Xx8ZG$0DRI^-rHV+;5sWU}~izk4-H8Ti>l)h|HnU|qJB z_Qdhb-^0eA9rD*O@6UVw``PpdEby1xH(vQtrg$L~#sel}rvW+&^Ok7-s!C9Mpp}{h z)cvL{ET^Ft1t={;T}AAz>(a!MYTni9FldE_ZL)uMNB+VEswBRnFs9JAB@hh(fsXr+ zTscPgN;q#cc3;=|oA~le0C0#8vu{FRi)N`?&r5=bw{5*Q#Jfav|5#7f{NXh2Jv7bI zpxHX{i(KMeUI!i-#tlNB$ExUfD)ZhN6#1_t{9|O-Qz=!|2AJKmTDoY_G@iAP=YkT= zSo0`X;)6%(tMl#lf)}-1H~VX65NQahF<&Bn%WTz%&Fj`~)1TWeemOG!RB_8`>Mk1zBxE%!A{sB%JoQ-=4aLE?` z$q8ZC6SMz&*Z@2=ptg+_@xW@aqt|7JgXDHq33b`O(BQU`n|q9M@|u3+z7j@)=d-q< zxj95sBPRHL#=5M)H z&C&ua039b;>%Hpu$IiPxG0Kdw95p9hfC+;@$$NpJ?D(5QSqpcd!#})HUleH5`%r^p zq__u{E(OUMm!yp^?WS+sR0M^`L)Cyt_eEXK73@8~EHmz7|6XPWbT$AcJ``c6)ZDKoqKj4o4TBQ5r@}k?M4Kr~=jg~u*6K=!x|4L5y vDy6kKdA<=GviAAA74g5!Vc_f0jmyW9sI3J`?qT(7x1Tv>b&_g!@y7oEOeH?| diff --git a/examples/atomics.rs b/examples/atomics.rs index bc2a269..e108abc 100644 --- a/examples/atomics.rs +++ b/examples/atomics.rs @@ -14,12 +14,9 @@ use candystore::{CandyStore, Config, GetOrCreateStatus, Result}; // ... fn main() -> Result<()> { + _ = std::fs::remove_dir_all("/tmp/candy-dir"); let db = Arc::new(CandyStore::open("/tmp/candy-dir", Config::default())?); - // clear the DB just in case we has something there before. in real-life scenarios you would probably - // not clear the DB every time - db.clear()?; - let mut handles = vec![]; for thd in 0..3 { let db = db.clone(); diff --git a/examples/lists.rs b/examples/lists.rs index 1c27853..c9e813b 100644 --- a/examples/lists.rs +++ b/examples/lists.rs @@ -1,12 +1,9 @@ use candystore::{CandyStore, Config, Result}; fn main() -> Result<()> { + _ = std::fs::remove_dir_all("/tmp/candy-dir"); let db = CandyStore::open("/tmp/candy-dir", Config::default())?; - // clear the DB just in case we has something there before. in real-life scenarios you would probably - // not clear the DB every time - db.clear()?; - db.set_in_list("asia", "iraq", "arabic")?; db.set_in_list("asia", "china", "chinese")?; db.set_in_list("asia", "russia", "russian")?; diff --git a/examples/multithreaded.rs b/examples/multithreaded.rs index 7a0503c..c9f6a59 100644 --- a/examples/multithreaded.rs +++ b/examples/multithreaded.rs @@ -4,17 +4,14 @@ use std::{sync::Arc, time::Duration}; use candystore::{CandyStore, Config, Result}; fn main() -> Result<()> { + _ = std::fs::remove_dir_all("/tmp/candy-dir"); let db = Arc::new(CandyStore::open("/tmp/candy-dir-mt", Config::default())?); - // clear the DB just in case we has something there before. in real-life scenarios you would probably - // not clear the DB every time - db.clear()?; - // clone db and spawn thread 1 let db1 = db.clone(); let h1 = std::thread::spawn(move || -> Result<()> { for i in 0..100 { - db1.set(&format!("key{i}"), "thread 1")?; + db1.set(format!("key{i}"), "thread 1")?; std::thread::sleep(Duration::from_millis(1)); } Ok(()) @@ -24,7 +21,7 @@ fn main() -> Result<()> { let db2 = db.clone(); let h2 = std::thread::spawn(move || -> Result<()> { for i in 0..100 { - db2.set(&format!("key{i}"), "thread 2")?; + db2.set(format!("key{i}"), "thread 2")?; std::thread::sleep(Duration::from_millis(1)); } Ok(()) @@ -33,7 +30,7 @@ fn main() -> Result<()> { h1.join().unwrap()?; h2.join().unwrap()?; - for res in db.iter() { + for res in db.iter_items() { let (k, v) = res?; println!( "{} = {}", diff --git a/examples/perf.rs b/examples/perf.rs new file mode 100644 index 0000000..825676c --- /dev/null +++ b/examples/perf.rs @@ -0,0 +1,294 @@ +use candystore::{CandyStore, Config}; +use std::{ + hint::black_box, + sync::{Arc, atomic::AtomicU64}, + thread, + time::Instant, +}; + +fn run_perf( + store: Arc, + n: u32, + n_threads: usize, + key_size: usize, + val_size: usize, +) -> Result<(), Box> { + let mut handles = Vec::new(); + + let inserts_us = Arc::new(AtomicU64::new(0)); + let gets_us = Arc::new(AtomicU64::new(0)); + let iter_us = Arc::new(AtomicU64::new(0)); + let removes_us = Arc::new(AtomicU64::new(0)); + + for t in 0..n_threads { + let store = store.clone(); + let inserts_us = inserts_us.clone(); + let gets_us = gets_us.clone(); + let iter_us = iter_us.clone(); + let removes_us = removes_us.clone(); + + let handle = thread::spawn(move || { + let mut key = vec![b'k'; key_size.max(4)]; + let value = vec![b'v'; val_size]; + let start_idx = t as u32 * n; + let end_idx = start_idx + n; + + { + let t0 = Instant::now(); + for i in start_idx..end_idx { + key[..4].copy_from_slice(&i.to_le_bytes()); + store.set(&key, &value).unwrap(); + } + let duration = t0.elapsed(); + inserts_us.fetch_add( + duration.as_micros() as u64, + std::sync::atomic::Ordering::Relaxed, + ); + } + + { + let t0 = Instant::now(); + for i in start_idx..end_idx { + key[..4].copy_from_slice(&i.to_le_bytes()); + store.get(&key).unwrap(); + } + let duration = t0.elapsed(); + gets_us.fetch_add( + duration.as_micros() as u64, + std::sync::atomic::Ordering::Relaxed, + ); + } + + { + let t0 = Instant::now(); + black_box(store.iter_items().count()); + let duration = t0.elapsed(); + iter_us.fetch_add( + duration.as_micros() as u64, + std::sync::atomic::Ordering::Relaxed, + ); + } + + { + let t0 = Instant::now(); + for i in start_idx..end_idx { + key[..4].copy_from_slice(&i.to_le_bytes()); + store.remove(&key).unwrap(); + } + let duration = t0.elapsed(); + removes_us.fetch_add( + duration.as_micros() as u64, + std::sync::atomic::Ordering::Relaxed, + ); + } + }); + handles.push(handle); + } + + println!( + "Testing key-value using {} threads, each with {} items (key size: {}, value size: {})", + n_threads, n, key_size, val_size + ); + + for handle in handles { + handle.join().unwrap(); + } + + println!( + " Inserts: {} us/op", + inserts_us.load(std::sync::atomic::Ordering::Relaxed) as f64 + / (n_threads * n as usize) as f64 + ); + println!( + " Gets: {} us/op", + gets_us.load(std::sync::atomic::Ordering::Relaxed) as f64 / (n_threads * n as usize) as f64 + ); + println!( + " Removes: {} us/op\n", + removes_us.load(std::sync::atomic::Ordering::Relaxed) as f64 + / (n_threads * n as usize) as f64 + ); + + Ok(()) +} + +fn run_queue_perf( + store: Arc, + n: u32, + n_threads: usize, + val_size: usize, +) -> Result<(), Box> { + let mut handles = Vec::new(); + + let pushes_us = Arc::new(AtomicU64::new(0)); + let pops_us = Arc::new(AtomicU64::new(0)); + + for _ in 0..n_threads { + let store = store.clone(); + let pushes_us = pushes_us.clone(); + let pops_us = pops_us.clone(); + + let handle = thread::spawn(move || { + let value = vec![b'v'; val_size]; + { + let t0 = Instant::now(); + for _ in 0..n { + store.push_to_queue_tail("myqueue", &value).unwrap(); + } + let duration = t0.elapsed(); + pushes_us.fetch_add( + duration.as_micros() as u64, + std::sync::atomic::Ordering::Relaxed, + ); + } + + { + let t0 = Instant::now(); + for _ in 0..n { + store.pop_queue_head("myqueue").unwrap().unwrap(); + } + let duration = t0.elapsed(); + pops_us.fetch_add( + duration.as_micros() as u64, + std::sync::atomic::Ordering::Relaxed, + ); + } + }); + handles.push(handle); + } + + println!( + "Testing a queue using {} threads, each with {} items (value size: {})", + n_threads, n, val_size + ); + for handle in handles { + handle.join().unwrap(); + } + + println!( + " Pushes: {} us/op", + pushes_us.load(std::sync::atomic::Ordering::Relaxed) as f64 + / (n_threads * n as usize) as f64 + ); + println!( + " Pops: {} us/op\n", + pops_us.load(std::sync::atomic::Ordering::Relaxed) as f64 / (n_threads * n as usize) as f64 + ); + + Ok(()) +} + +fn run_list_perf( + store: Arc, + n: u32, + n_threads: usize, + key_size: usize, + val_size: usize, +) -> Result<(), Box> { + let mut handles = Vec::new(); + + let sets_us = Arc::new(AtomicU64::new(0)); + let gets_us = Arc::new(AtomicU64::new(0)); + let removes_us = Arc::new(AtomicU64::new(0)); + + for t in 0..n_threads { + let store = store.clone(); + let sets_us = sets_us.clone(); + let gets_us = gets_us.clone(); + let removes_us = removes_us.clone(); + + let handle = thread::spawn(move || { + let mut key = vec![b'k'; key_size.max(4)]; + let value = vec![b'v'; val_size]; + let start_idx = t as u32 * n; + let end_idx = start_idx + n; + + { + let t0 = Instant::now(); + for i in start_idx..end_idx { + key[..4].copy_from_slice(&i.to_le_bytes()); + store.set_in_list("mylist", &key, &value).unwrap(); + } + let duration = t0.elapsed(); + sets_us.fetch_add( + duration.as_micros() as u64, + std::sync::atomic::Ordering::Relaxed, + ); + } + + { + let t0 = Instant::now(); + for i in start_idx..end_idx { + key[..4].copy_from_slice(&i.to_le_bytes()); + store.get_from_list("mylist", &key).unwrap(); + } + let duration = t0.elapsed(); + gets_us.fetch_add( + duration.as_micros() as u64, + std::sync::atomic::Ordering::Relaxed, + ); + } + + { + let t0 = Instant::now(); + for i in start_idx..end_idx { + key[..4].copy_from_slice(&i.to_le_bytes()); + store.remove_from_list("mylist", &key).unwrap(); + } + let duration = t0.elapsed(); + removes_us.fetch_add( + duration.as_micros() as u64, + std::sync::atomic::Ordering::Relaxed, + ); + } + }); + handles.push(handle); + } + + println!( + "Testing a list using {} threads, each with {} items (value size: {})", + n_threads, n, val_size + ); + for handle in handles { + handle.join().unwrap(); + } + + println!( + " Sets: {} us/op", + sets_us.load(std::sync::atomic::Ordering::Relaxed) as f64 / (n_threads * n as usize) as f64 + ); + println!( + " Gets: {} us/op", + gets_us.load(std::sync::atomic::Ordering::Relaxed) as f64 / (n_threads * n as usize) as f64 + ); + println!( + " Removes: {} us/op\n", + removes_us.load(std::sync::atomic::Ordering::Relaxed) as f64 + / (n_threads * n as usize) as f64 + ); + + Ok(()) +} + +fn main() -> Result<(), Box> { + let dir = tempfile::tempdir()?; + let store = Arc::new(CandyStore::open(dir.path(), Config::default())?); + + // single threaded + run_perf(store.clone(), 1_000_000, 1, 16, 16)?; + run_perf(store.clone(), 100_000, 1, 1024, 4096)?; + + // multi threaded + run_perf(store.clone(), 250_000, 4, 16, 16)?; + //run_perf(store.clone(), 10_000, 20, 16, 16)?; + + // queues + run_queue_perf(store.clone(), 500_000, 1, 16)?; + run_queue_perf(store.clone(), 100_000, 4, 16)?; + + // lists + run_list_perf(store.clone(), 500_000, 1, 16, 16)?; + run_list_perf(store.clone(), 100_000, 4, 16, 16)?; + + Ok(()) +} diff --git a/examples/simple.rs b/examples/simple.rs index df1e568..3b6a4bc 100644 --- a/examples/simple.rs +++ b/examples/simple.rs @@ -3,12 +3,9 @@ use core::str; use candystore::{CandyStore, Config, Result}; fn main() -> Result<()> { + _ = std::fs::remove_dir_all("/tmp/candy-dir"); let db = CandyStore::open("/tmp/candy-dir", Config::default())?; - // clear the DB just in case we has something there before. in real-life scenarios you would probably - // not clear the DB every time - db.clear()?; - println!("{:?}", db.get("mykey")?); // None db.set("mykey", "myval")?; @@ -20,9 +17,9 @@ fn main() -> Result<()> { println!("{:?}", db.get("mykey")?); // None for i in 0..10 { - db.set(&format!("mykey{i}"), &format!("myval{i}"))?; + db.set(format!("mykey{i}"), format!("myval{i}"))?; } - for res in db.iter() { + for res in db.iter_items() { let (k, v) = res?; println!( "{} = {}", diff --git a/examples/typed.rs b/examples/typed.rs index f5085d2..71bbf7c 100644 --- a/examples/typed.rs +++ b/examples/typed.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use candystore::{CandyStore, CandyTypedStore, Config, Result}; fn main() -> Result<()> { + _ = std::fs::remove_dir_all("/tmp/candy-dir"); let db = Arc::new(CandyStore::open("/tmp/candy-dir", Config::default())?); let typed = CandyTypedStore::>::new(db); diff --git a/mini-candy/Cargo.toml b/mini-candy/Cargo.toml deleted file mode 100644 index 8fe30fd..0000000 --- a/mini-candy/Cargo.toml +++ /dev/null @@ -1,8 +0,0 @@ -[package] -name = "mini-candy" -version = "0.1.0" -edition = "2021" - -[dependencies] -memmap = "0.7.0" -siphasher = "1.0.1" diff --git a/mini-candy/README.md b/mini-candy/README.md deleted file mode 100644 index f0e32e9..0000000 --- a/mini-candy/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Mini Candy -A very minimal implementation of the algorithm in ~250 lines of code, for educational purposes diff --git a/mini-candy/src/main.rs b/mini-candy/src/main.rs deleted file mode 100644 index 263098d..0000000 --- a/mini-candy/src/main.rs +++ /dev/null @@ -1,294 +0,0 @@ -//! a very minimal implementation of CandyStore, for educational purposes. handles single-threaded get/set/remove/iter -//! -use std::{ - cell::RefCell, - fs::{File, OpenOptions}, - io::{Seek, Write}, - os::unix::fs::FileExt, - path::{Path, PathBuf}, -}; - -use memmap::{MmapMut, MmapOptions}; -use siphasher::sip::SipHasher24; - -type Result = std::io::Result; -const WIDTH: usize = 512; -const ROWS: usize = 64; - -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] -struct PartedHash(u64); - -impl PartedHash { - const INVALID_SIG: u32 = 0; - fn new(buf: &[u8]) -> Self { - Self(SipHasher24::new().hash(buf)) - } - fn sig(&self) -> u32 { - if self.0 as u32 == Self::INVALID_SIG { - 0x12345678 // can't return INVALID_SIG - } else { - self.0 as u32 - } - } - fn row(&self) -> usize { - (self.0 as usize >> 32) % ROWS - } - fn shard(&self) -> u32 { - (self.0 >> 48) as u32 - } -} - -#[derive(Debug, Clone, Copy)] -#[repr(C)] -struct Descriptor { - offset: u32, - klen: u16, - vlen: u16, -} - -#[repr(C)] -struct ShardRow { - sigs: [u32; WIDTH], - descs: [Descriptor; WIDTH], -} - -#[repr(C)] -struct ShardHeader { - rows: [ShardRow; ROWS], -} - -struct ShardFile { - start: u32, - end: u32, - file: RefCell, - mmap: MmapMut, -} - -type Buf = Vec; -type KV = (Buf, Buf); - -impl ShardFile { - const HEADER_SIZE: u64 = size_of::() as u64; - - fn open(dirpath: impl AsRef, start: u32, end: u32) -> Result { - let filepath = dirpath.as_ref().join(format!("{start}-{end}")); - let mut file = OpenOptions::new() - .read(true) - .write(true) - .create(true) - .truncate(true) - .open(filepath)?; - file.set_len(Self::HEADER_SIZE)?; - file.seek(std::io::SeekFrom::End(0))?; - let mmap = unsafe { - MmapOptions::new() - .len(Self::HEADER_SIZE as usize) - .map_mut(&file) - }?; - Ok(Self { - start, - end, - file: RefCell::new(file), - mmap, - }) - } - - fn header_row(&self, r: usize) -> &mut ShardRow { - &mut unsafe { &mut *(self.mmap.as_ptr() as *const ShardHeader as *mut ShardHeader) }.rows[r] - } - - fn read(&self, desc: Descriptor) -> Result { - let mut k = vec![0; desc.klen as usize]; - let mut v = vec![0; desc.vlen as usize]; - let f = self.file.borrow(); - f.read_exact_at(&mut k, desc.offset as u64)?; - f.read_exact_at(&mut v, desc.offset as u64 + desc.klen as u64)?; - Ok((k, v)) - } - fn write(&self, key: &[u8], val: &[u8]) -> Result { - let mut f = self.file.borrow_mut(); - let offset = f.stream_position()?; - f.write_all(key)?; - f.write_all(val)?; - Ok(Descriptor { - offset: offset as u32, - klen: key.len() as u16, - vlen: val.len() as u16, - }) - } - - fn get(&self, ph: PartedHash, key: &[u8]) -> Result> { - let row = self.header_row(ph.row()); - for (i, s) in row.sigs.iter().enumerate() { - if *s == ph.sig() { - let desc = row.descs[i]; - let (k, v) = self.read(desc)?; - if k == key { - return Ok(Some(v)); - } - } - } - Ok(None) - } - - fn set(&mut self, ph: PartedHash, key: &[u8], val: &[u8]) -> Result { - let row = self.header_row(ph.row()); - for (i, s) in row.sigs.iter().enumerate() { - if *s == ph.sig() { - let desc = row.descs[i]; - let (k, _) = self.read(desc)?; - if k == key { - row.descs[i] = self.write(key, val)?; - return Ok(true); - } - } - } - - for (i, s) in row.sigs.iter_mut().enumerate() { - if *s == PartedHash::INVALID_SIG { - // insert new - *s = ph.sig(); - row.descs[i] = self.write(key, val)?; - return Ok(true); - } - } - - Ok(false) - } - - fn remove(&mut self, ph: PartedHash, key: &[u8]) -> Result { - let row = self.header_row(ph.row()); - for (i, s) in row.sigs.iter_mut().enumerate() { - if *s == ph.sig() { - let desc = row.descs[i]; - let (k, _) = self.read(desc)?; - if k == key { - *s = PartedHash::INVALID_SIG; - return Ok(true); - } - } - } - Ok(false) - } - - fn iter<'a>(&'a self) -> impl Iterator> + 'a { - (0..ROWS).map(|r| self.header_row(r)).flat_map(|row| { - row.sigs.iter().enumerate().filter_map(|(i, sig)| { - if *sig == PartedHash::INVALID_SIG { - return None; - } - Some(self.read(row.descs[i])) - }) - }) - } -} - -struct Store { - dirpath: PathBuf, - shards: Vec, -} - -impl Store { - const MAX_SHARD: u32 = u16::MAX as u32 + 1; - - fn open(dirpath: impl AsRef) -> Result { - let dirpath = dirpath.as_ref().to_path_buf(); - std::fs::create_dir_all(&dirpath)?; - let first_shard = ShardFile::open(&dirpath, 0, Self::MAX_SHARD)?; - Ok(Self { - dirpath, - shards: vec![first_shard], - }) - } - - fn get(&self, key: &[u8]) -> Result> { - let ph = PartedHash::new(key); - for shard in self.shards.iter() { - if ph.shard() < shard.end { - return shard.get(ph, key); - } - } - unreachable!(); - } - - fn remove(&mut self, key: &[u8]) -> Result { - let ph = PartedHash::new(key); - for shard in self.shards.iter_mut() { - if ph.shard() < shard.end { - return shard.remove(ph, key); - } - } - unreachable!(); - } - - fn split(&mut self, shard_idx: usize) -> Result<()> { - let removed_shard = self.shards.remove(shard_idx); - - let start = removed_shard.start; - let end = removed_shard.end; - let mid = (removed_shard.start + removed_shard.end) / 2; - println!("splitting [{start}, {end}) to [{start}, {mid}) and [{mid}, {end})"); - - let mut bottom = ShardFile::open(&self.dirpath, start, mid)?; - let mut top = ShardFile::open(&self.dirpath, mid, end)?; - - for res in removed_shard.iter() { - let (key, val) = res?; - let ph = PartedHash::new(&key); - if ph.shard() < mid { - bottom.set(ph, &key, &val)?; - } else { - top.set(ph, &key, &val)?; - } - } - - std::fs::remove_file(self.dirpath.join(format!("{start}-{end}")))?; - - self.shards.push(bottom); - self.shards.push(top); - self.shards.sort_by(|x, y| x.end.cmp(&y.end)); - Ok(()) - } - - fn set(&mut self, key: &[u8], val: &[u8]) -> Result { - let ph = PartedHash::new(key); - loop { - let mut shard_to_split = None; - for (i, shard) in self.shards.iter_mut().enumerate() { - if ph.shard() < shard.end { - if shard.set(ph, key, val)? { - return Ok(true); - } - shard_to_split = Some(i); - break; - } - } - self.split(shard_to_split.unwrap())?; - } - } - - fn iter<'a>(&'a self) -> impl Iterator> + 'a { - self.shards.iter().flat_map(|shard| shard.iter()) - } -} - -fn main() -> Result<()> { - let mut db = Store::open("/tmp/mini-dbdir")?; - db.set(b"hello", b"world")?; - - println!("{:?}", db.get(b"hello")?); - println!("{:?}", db.get(b"nonexistent")?); - - db.remove(b"hello")?; - println!("{:?}", db.get(b"hello")?); - - println!("{}", db.iter().count()); - - for i in 0..100_000u32 { - db.set(&i.to_le_bytes(), &(i * 2).to_le_bytes())?; - } - - println!("{}", db.iter().count()); - - Ok(()) -} diff --git a/simulator/Cargo.lock b/simulator/Cargo.lock deleted file mode 100644 index c8878b3..0000000 --- a/simulator/Cargo.lock +++ /dev/null @@ -1,140 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "getrandom" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "libc" -version = "0.2.155" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" - -[[package]] -name = "ppv-lite86" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee4364d9f3b902ef14fab8a1ddffb783a1cb6b4bba3bfc1fa3922732c7de97f" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "proc-macro2" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "simulator" -version = "0.1.0" -dependencies = [ - "rand", -] - -[[package]] -name = "syn" -version = "2.0.72" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "zerocopy" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854e949ac82d619ee9a14c66a1b674ac730422372ccb759ce0c39cabcf2bf8e6" -dependencies = [ - "byteorder", - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml deleted file mode 100644 index bde7056..0000000 --- a/simulator/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "simulator" -version = "0.1.0" -edition = "2021" - -[dependencies] -rand = "0.8.5" diff --git a/simulator/README.md b/simulator/README.md deleted file mode 100644 index a86e22e..0000000 --- a/simulator/README.md +++ /dev/null @@ -1,51 +0,0 @@ -# Candy Simulator - -* Tests the fill level that each shard can have using different params -* Tests the number of collisions in the same row (signatures) -* Tests the performance of position_simd for various sizes (compile with `--release`) - -# Results -``` -r= 32 w= 32 avg=0.687102 elems= 1024 sz= 12KB collisions=0 collisions-probability=0.000000115483993 -r= 32 w= 64 avg=0.755089 elems= 2048 sz= 24KB collisions=0 collisions-probability=0.000000469386467 -r= 32 w= 128 avg=0.832785 elems= 4096 sz= 48KB collisions=0 collisions-probability=0.000001892445681 GOOD -r= 32 w= 256 avg=0.871744 elems= 8192 sz= 96KB collisions=0 collisions-probability=0.000007599563332 GOOD -r= 32 w= 512 avg=0.907163 elems= 16384 sz= 192KB collisions=0 collisions-probability=0.000030457509641 GOOD -r= 32 w=1024 avg=0.935280 elems= 32768 sz= 384KB collisions=0 collisions-probability=0.000121943667477 GOOD -r= 64 w= 32 avg=0.647315 elems= 2048 sz= 24KB collisions=0 collisions-probability=0.000000115483993 -r= 64 w= 64 avg=0.728652 elems= 4096 sz= 48KB collisions=0 collisions-probability=0.000000469386467 -r= 64 w= 128 avg=0.805568 elems= 8192 sz= 96KB collisions=0 collisions-probability=0.000001892445681 GOOD -r= 64 w= 256 avg=0.853133 elems= 16384 sz= 192KB collisions=0 collisions-probability=0.000007599563332 GOOD -r= 64 w= 512 avg=0.899420 elems= 32768 sz= 384KB collisions=0 collisions-probability=0.000030457509641 GOOD -r= 64 w=1024 avg=0.927043 elems= 65536 sz= 768KB collisions=6 collisions-probability=0.000121943667477 GOOD -r= 128 w= 32 avg=0.615332 elems= 4096 sz= 48KB collisions=0 collisions-probability=0.000000115483993 -r= 128 w= 64 avg=0.708627 elems= 8192 sz= 96KB collisions=0 collisions-probability=0.000000469386467 -r= 128 w= 128 avg=0.784355 elems= 16384 sz= 192KB collisions=0 collisions-probability=0.000001892445681 -r= 128 w= 256 avg=0.843362 elems= 32768 sz= 384KB collisions=0 collisions-probability=0.000007599563332 GOOD -r= 128 w= 512 avg=0.884743 elems= 65536 sz= 768KB collisions=0 collisions-probability=0.000030457509641 GOOD -r= 128 w=1024 avg=0.920297 elems= 131072 sz=1536KB collisions=3 collisions-probability=0.000121943667477 GOOD BIG -r= 256 w= 32 avg=0.599061 elems= 8192 sz= 96KB collisions=0 collisions-probability=0.000000115483993 -r= 256 w= 64 avg=0.688738 elems= 16384 sz= 192KB collisions=0 collisions-probability=0.000000469386467 -r= 256 w= 128 avg=0.768617 elems= 32768 sz= 384KB collisions=0 collisions-probability=0.000001892445681 -r= 256 w= 256 avg=0.832496 elems= 65536 sz= 768KB collisions=0 collisions-probability=0.000007599563332 GOOD -r= 256 w= 512 avg=0.877548 elems= 131072 sz=1536KB collisions=0 collisions-probability=0.000030457509641 GOOD BIG -r= 256 w=1024 avg=0.914863 elems= 262144 sz=3072KB collisions=6 collisions-probability=0.000121943667477 GOOD BIG -``` - -``` -width= 32 time per simd= 4ns -width= 64 time per simd= 21ns -width= 128 time per simd= 26ns -width= 256 time per simd= 36ns -width= 512 time per simd= 59ns -width=1024 time per simd= 100ns -``` - -``` -width= 32 time per non-simd= 25ns -width= 64 time per non-simd= 53ns -width= 128 time per non-simd= 85ns -width= 256 time per non-simd= 145ns -width= 512 time per non-simd= 266ns -width=1024 time per non-simd= 507ns -``` diff --git a/simulator/rust-toolchain.toml b/simulator/rust-toolchain.toml deleted file mode 100644 index 5d56faf..0000000 --- a/simulator/rust-toolchain.toml +++ /dev/null @@ -1,2 +0,0 @@ -[toolchain] -channel = "nightly" diff --git a/simulator/src/main.rs b/simulator/src/main.rs deleted file mode 100644 index 8da6c1c..0000000 --- a/simulator/src/main.rs +++ /dev/null @@ -1,226 +0,0 @@ -#![feature(btree_cursors)] -use std::{collections::BTreeMap, sync::atomic::AtomicUsize, time::Instant, u32}; - -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] -struct PartedHash { - shard_idx: u32, - row_idx: u32, - signature: u32, -} - -impl PartedHash { - fn new_random() -> Self { - Self { - shard_idx: rand::random(), - row_idx: rand::random(), - signature: rand::random(), - } - } -} - -#[derive(Debug, Default, Clone)] -struct ShardRow { - entries: Vec, -} - -static TOTAL_COLLISIONS: AtomicUsize = AtomicUsize::new(0); - -#[derive(Debug)] -struct Shard { - row_width: usize, - total: usize, - rows: Vec, -} -impl Shard { - fn new(num_rows: usize, row_width: usize) -> Self { - Self { - row_width, - total: 0, - rows: vec![ShardRow::default(); num_rows], - } - } - fn add(&mut self, h: PartedHash) -> bool { - let len = self.rows.len(); - let row = &mut self.rows[(h.row_idx as usize) % len]; - if row.entries.len() >= self.row_width { - false - } else { - if row - .entries - .iter() - .find(|h2| h2.signature == h.signature) - .is_some() - { - TOTAL_COLLISIONS.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - } - row.entries.push(h); - self.total += 1; - true - } - } -} - -struct DB { - num_rows: usize, - row_width: usize, - total: usize, - num_splits: usize, - fill_level_on_split: usize, - fill_levels: Vec, - shards: BTreeMap, -} -impl DB { - fn new(num_rows: usize, row_width: usize) -> Self { - let mut bt = BTreeMap::new(); - bt.insert(1 << 32, Shard::new(num_rows, row_width)); - Self { - num_rows, - row_width, - total: 0, - num_splits: 0, - fill_level_on_split: 0, - fill_levels: vec![], - shards: bt, - } - } - fn add(&mut self, to_add: PartedHash) { - let (key_before, key_after) = { - let shard_idx = to_add.shard_idx as u64; - let mut cursor = self - .shards - .lower_bound_mut(std::ops::Bound::Excluded(&shard_idx)); - let key_before = cursor.peek_prev().map(|(k, _)| *k).unwrap_or(0); - let Some((key_after, shard)) = cursor.peek_next() else { - panic!("no key_after for 0x{:x}", to_add.shard_idx); - }; - - if shard.add(to_add) { - self.total += 1; - return; - } - (key_before, *key_after) - }; - - let prev_shard = self.shards.remove(&key_after).unwrap(); - let midpoint = (key_before / 2) + (key_after / 2); - self.shards - .insert(midpoint, Shard::new(self.num_rows, self.row_width)); - self.shards - .insert(key_after, Shard::new(self.num_rows, self.row_width)); - - self.num_splits += 1; - self.fill_level_on_split += prev_shard.total; - - /*println!( - "split ({:3}) 0x{key_before:08x}..0x{midpoint:08x}..0x{key_after:09x} [total: {:8}, shard avg fill: {:.4}, shard size: {}]", - self.num_splits, - self.total, - ((self.fill_level_on_split as f64) / (self.num_splits as f64)) - / ((self.num_rows * self.row_width) as f64), - self.num_rows * self.row_width - );*/ - self.fill_levels.push( - ((self.fill_level_on_split as f64) / (self.num_splits as f64)) - / ((self.num_rows * self.row_width) as f64), - ); - self.total -= prev_shard.total; - - for row in prev_shard.rows.iter() { - for h in row.entries.iter() { - self.add(*h); - } - } - self.add(to_add); - } -} - -fn main() { - for rows in [32, 64, 128, 256] { - for width in [32, 64, 128, 256, 512, 1024] { - let mut db = DB::new(rows, width); - let mut added = 0; - TOTAL_COLLISIONS.store(0, std::sync::atomic::Ordering::SeqCst); - for _ in 0..100 { - for _ in 0..db.num_rows * db.row_width { - db.add(PartedHash::new_random()); - added += 1; - } - } - - let mut summed = 0; - for (_, sh) in db.shards.iter() { - summed += sh.total; - } - - let mut summed_last_fills = 0.0; - for lf in db.fill_levels.iter() { - summed_last_fills += lf; - } - - assert_eq!(db.total, summed); - assert_eq!(db.total, added); - let avg = summed_last_fills / (db.fill_levels.len() as f64); - let sz = (db.num_rows * db.row_width * 12) / 1024; - println!( - "r={rows:4} w={width:4} avg={:.6} elems={:7} sz={:4}KB collisions={} collisions-probability={:.015} {} {}", - avg, - db.num_rows * db.row_width, - sz, - TOTAL_COLLISIONS.load(std::sync::atomic::Ordering::SeqCst), - 1.0 - (-(width as f64) * (width as f64 - 1.0) / ((1u64 << 33) as f64)).exp(), - if avg > 0.8 {"GOOD"} else {""}, - if sz > 800 {"BIG"} else {""}, - ); - } - } - - let reps = 10_000_000usize; - for width in [32, 64, 128, 256, 512, 1024] { - let mut v = vec![0u32; width]; - for i in 0..width { - v[i] = i as u32; - } - v[width - 1] = 80808080; - assert_eq!(v.iter().position(|x| *x == 80808080), Some(width - 1)); - assert_eq!(v.iter().position(|x| *x == 80808081), None); - let mut pos: usize = 0; - - let t0 = Instant::now(); - for _ in 0..reps { - pos += v.iter().position(|x| *x == 80808080).unwrap_or(0); - pos += v.iter().position(|x| *x == 80808081).unwrap_or(0); - } - - println!( - "width={width:4} time per simd={:4}ns", - Instant::now().duration_since(t0).as_nanos() as usize / reps, - ); - - assert_eq!(pos, (width - 1) * reps); - } - - let reps = 10_000_000usize; - for width in [32, 64, 128, 256, 512, 1024] { - let mut v = vec![0u32; width]; - for i in 0..width { - v[i] = i as u32; - } - v[width - 1] = 80808080; - assert_eq!(v.iter().position(|x| *x == 80808080), Some(width - 1)); - assert_eq!(v.iter().position(|x| *x == 80808081), None); - let mut pos: usize = 0; - - let t0 = Instant::now(); - for _ in 0..reps { - pos += v.iter().position(|x| *x == 80808080).unwrap_or(0); - pos += v.iter().position(|x| *x == 80808081).unwrap_or(0); - } - - println!( - "width={width:4} time per non-simd={:4}ns", - Instant::now().duration_since(t0).as_nanos() as usize / reps, - ); - - assert_eq!(pos, (width - 1) * reps); - } -} diff --git a/src/data_file.rs b/src/data_file.rs new file mode 100644 index 0000000..0bc0d72 --- /dev/null +++ b/src/data_file.rs @@ -0,0 +1,459 @@ +use smallvec::SmallVec; +use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; + +use std::{ + fs::File, + mem::size_of, + path::Path, + sync::{ + Arc, + atomic::{AtomicU64, Ordering}, + }, +}; + +use crate::internal::{ + DATA_ENTRY_OFFSET_MAGIC, DATA_ENTRY_OFFSET_MASK, DATA_FILE_SIGNATURE, DATA_FILE_VERSION, + EntryType, FILE_OFFSET_ALIGNMENT, KEY_NAMESPACE_BITS, KVBuf, KVRef, KeyNamespace, + MAX_KEY_NAMESPACE, PAGE_SIZE, READ_BUFFER_SIZE, SIZE_HINT_UNIT, data_file_path, + invalid_data_error, read_available_at, read_into_at, sync_dir, write_all_at, +}; +use crate::types::{Config, Error, MAX_USER_KEY_SIZE, MAX_USER_VALUE_SIZE, Result}; + +const INLINE_SCRATCH_BUFFER_SIZE: usize = 1024; + +struct ParsedDataEntry { + data_len: usize, + vlen: u16, + ns: u8, +} + +#[derive(Clone, Copy, FromBytes, IntoBytes, KnownLayout, Immutable)] +#[repr(C)] +struct DataFileHeader { + magic: [u8; 8], + version: u32, + _padding0: u32, + ordinal: u64, + _trailer: [u8; 4096 - 24], +} + +const _: () = assert!(size_of::() == PAGE_SIZE); + +pub(crate) struct DataFile { + pub(crate) file: File, + file_offset: AtomicU64, + config: Arc, + pub(crate) file_idx: u16, + pub(crate) file_ordinal: u64, +} + +impl DataFile { + fn parse_data_entry(buf: &[u8], offset: u64) -> Result { + if buf.len() < 8 { + return Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "entry too short", + ))); + } + + let header = u32::from_le_bytes(buf[0..4].try_into().unwrap()); + let magic_offset = (((offset / FILE_OFFSET_ALIGNMENT) as u32) ^ DATA_ENTRY_OFFSET_MAGIC) + & DATA_ENTRY_OFFSET_MASK; + + if header & DATA_ENTRY_OFFSET_MASK != magic_offset { + return Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "corrupt entry magic", + ))); + } + + let klen = u16::from_le_bytes(buf[4..6].try_into().unwrap()); + let vlen = u16::from_le_bytes(buf[6..8].try_into().unwrap()); + let entry_len = 4 + 4 + klen as usize + vlen as usize + 2; + if buf.len() < entry_len { + return Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "entry too short", + ))); + } + + let checksum = u16::from_le_bytes(buf[entry_len - 2..entry_len].try_into().unwrap()); + if checksum != crc16_ibm3740_fast::hash(&buf[..entry_len - 2]) as u16 { + return Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "checksum mismatch", + ))); + } + + let ns = ((header >> 24) & ((1 << KEY_NAMESPACE_BITS) - 1)) as u8; + let entry_type = (header >> 30) & 0b11; + if entry_type != EntryType::Data as u32 { + return Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "invalid entry type", + ))); + } + + Ok(ParsedDataEntry { + data_len: 8 + vlen as usize + klen as usize, + vlen, + ns, + }) + } + + pub(crate) fn open(base_path: &Path, config: Arc, file_idx: u16) -> Result { + let file = File::options() + .read(true) + .write(true) + .open(data_file_path(base_path, file_idx)) + .map_err(Error::IOError)?; + let header = + read_available_at(&file, size_of::(), 0).map_err(Error::IOError)?; + if header.len() < size_of::() { + return Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "data file header too short", + ))); + } + let header = DataFileHeader::read_from_bytes(&header).map_err(|_| { + Error::IOError(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "invalid data file header size", + )) + })?; + if &header.magic != DATA_FILE_SIGNATURE || header.version != DATA_FILE_VERSION { + return Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "invalid data file header", + ))); + } + let mut file_offset = file + .metadata() + .map_err(Error::IOError)? + .len() + .saturating_sub(size_of::() as u64); + file_offset -= file_offset % FILE_OFFSET_ALIGNMENT; + file.set_len(size_of::() as u64 + file_offset) + .map_err(Error::IOError)?; + + Ok(Self { + file, + file_offset: AtomicU64::new(file_offset), + config, + file_idx, + file_ordinal: header.ordinal, + }) + } + + pub(crate) fn create( + base_path: &Path, + config: Arc, + file_idx: u16, + ordinal: u64, + ) -> Result { + let file = File::options() + .create(true) + .truncate(true) + .read(true) + .write(true) + .open(data_file_path(base_path, file_idx)) + .map_err(Error::IOError)?; + file.set_len(size_of::() as u64) + .map_err(Error::IOError)?; + let header = DataFileHeader { + magic: *DATA_FILE_SIGNATURE, + version: DATA_FILE_VERSION, + _padding0: 0, + ordinal, + _trailer: [0; 4096 - 24], + }; + write_all_at(&file, header.as_bytes(), 0).map_err(Error::IOError)?; + file.sync_all().map_err(Error::IOError)?; + sync_dir(base_path)?; + Ok(Self { + file, + file_offset: AtomicU64::new(0), + config, + file_idx, + file_ordinal: ordinal, + }) + } + + fn allocate(&self, len: u64) -> Result { + let mut file_offset = self.file_offset.load(Ordering::Relaxed); + loop { + if file_offset + len > self.config.max_data_file_size as u64 { + return Err(Error::RotateDataFile(self.file_idx)); + } + match self.file_offset.compare_exchange( + file_offset, + file_offset + len, + Ordering::Relaxed, + Ordering::Relaxed, + ) { + Ok(_) => return Ok(file_offset), + Err(current) => file_offset = current, + } + } + } + + fn append_entry( + &self, + entry_type: EntryType, + ns: KeyNamespace, + key: &[u8], + val: Option<&[u8]>, + ) -> Result<(u64, usize)> { + debug_assert!(key.len() <= MAX_USER_KEY_SIZE); + debug_assert!(ns as u8 <= MAX_KEY_NAMESPACE); + + let val_len = val.map_or(0, |v| v.len()); + if let Some(v) = val { + debug_assert!(v.len() <= MAX_USER_VALUE_SIZE); + } + + let entry_len = 4 + if val.is_some() { 4 } else { 2 } + val_len + key.len() + 2; + let aligned_len = entry_len.next_multiple_of(FILE_OFFSET_ALIGNMENT as usize); + let file_offset = self.allocate(aligned_len as u64)?; + debug_assert!(file_offset % FILE_OFFSET_ALIGNMENT == 0); + + let mut buf = SmallVec::<[u8; INLINE_SCRATCH_BUFFER_SIZE]>::with_capacity(aligned_len); + // We overwrite the entry bytes below and only zero the alignment padding. + unsafe { buf.set_len(aligned_len) }; + let buf = &mut buf[..]; + + let magic_offset = (((file_offset / FILE_OFFSET_ALIGNMENT) as u32) + ^ DATA_ENTRY_OFFSET_MAGIC) + & DATA_ENTRY_OFFSET_MASK; + let header = magic_offset | ((entry_type as u32) << 30) | ((ns as u32) << 24); + + buf[0..4].copy_from_slice(&header.to_le_bytes()); + buf[4..6].copy_from_slice(&(key.len() as u16).to_le_bytes()); + + if let Some(v) = val { + buf[6..8].copy_from_slice(&(v.len() as u16).to_le_bytes()); + buf[8..8 + v.len()].copy_from_slice(v); + buf[8 + v.len()..8 + v.len() + key.len()].copy_from_slice(key); + } else { + buf[6..6 + key.len()].copy_from_slice(key); + } + + buf[entry_len..aligned_len].fill(0); + let checksum = crc16_ibm3740_fast::hash(&buf[..entry_len - 2]) as u16; + buf[entry_len - 2..entry_len].copy_from_slice(&checksum.to_le_bytes()); + + write_all_at( + &self.file, + buf, + size_of::() as u64 + file_offset, + ) + .map_err(Error::IOError)?; + + Ok((file_offset, aligned_len)) + } + + pub(crate) fn append_kv( + &self, + ns: KeyNamespace, + key: &[u8], + val: &[u8], + ) -> Result<(u64, usize)> { + self.append_entry(EntryType::Data, ns, key, Some(val)) + } + + pub(crate) fn append_tombstone(&self, ns: KeyNamespace, key: &[u8]) -> Result { + self.append_entry(EntryType::Tombstone, ns, key, None) + .map(|(_, len)| len) + } + + pub(crate) fn read_kv_into<'a>( + &self, + offset: u64, + size_hint: usize, + buf: &'a mut Vec, + ) -> Result> { + debug_assert!(size_hint >= SIZE_HINT_UNIT); + read_into_at( + &self.file, + buf, + size_hint, + size_of::() as u64 + offset, + ) + .map_err(Error::IOError)?; + let parsed = Self::parse_data_entry(buf, offset)?; + buf.truncate(parsed.data_len); + Ok(KVRef { + buf, + vlen: parsed.vlen, + header_len: 8, + ns: parsed.ns, + entry_type: EntryType::Data, + }) + } + + pub(crate) fn read_kv(&self, offset: u64, size_hint: usize) -> Result { + debug_assert!(size_hint >= SIZE_HINT_UNIT); + let mut buf = read_available_at( + &self.file, + size_hint, + size_of::() as u64 + offset, + ) + .map_err(Error::IOError)?; + let parsed = Self::parse_data_entry(&buf, offset)?; + buf.truncate(parsed.data_len); + Ok(KVBuf { + buf, + vlen: parsed.vlen, + header_len: 8, + ns: parsed.ns, + entry_type: EntryType::Data, + }) + } + + fn ensure_verified_entry( + &self, + read_buf: &mut Vec, + buf_file_offset: &mut u64, + rel: usize, + entry_len: usize, + offset: u64, + ) -> Result> { + let start = if read_buf.len() - rel >= entry_len { + rel + } else { + read_into_at( + &self.file, + read_buf, + entry_len, + size_of::() as u64 + offset, + ) + .map_err(Error::IOError)?; + *buf_file_offset = offset; + if read_buf.len() < entry_len { + return Ok(None); + } + 0 + }; + + let entry_bytes = &read_buf[start..start + entry_len]; + let checksum = + u16::from_le_bytes(entry_bytes[entry_len - 2..entry_len].try_into().unwrap()); + if checksum != crc16_ibm3740_fast::hash(&entry_bytes[..entry_len - 2]) as u16 { + return Ok(None); + } + + Ok(Some(start)) + } + + pub(crate) fn read_next_entry_ref<'a>( + &self, + mut offset: u64, + read_buf: &'a mut Vec, + buf_file_offset: &mut u64, + ) -> Result, u64, u64)>> { + offset = offset.next_multiple_of(FILE_OFFSET_ALIGNMENT); + + loop { + let buf_start = if offset >= *buf_file_offset { + (offset - *buf_file_offset) as usize + } else { + read_buf.clear(); + 0 + }; + + if buf_start >= read_buf.len() || read_buf.len() - buf_start < 8 { + read_into_at( + &self.file, + read_buf, + READ_BUFFER_SIZE, + size_of::() as u64 + offset, + ) + .map_err(Error::IOError)?; + *buf_file_offset = offset; + if read_buf.len() < 8 { + return Ok(None); + } + } + + let rel = (offset - *buf_file_offset) as usize; + let avail = &read_buf[rel..]; + + let header = u32::from_le_bytes(avail[0..4].try_into().unwrap()); + let magic_offset = (((offset / FILE_OFFSET_ALIGNMENT) as u32) + ^ DATA_ENTRY_OFFSET_MAGIC) + & DATA_ENTRY_OFFSET_MASK; + if header & DATA_ENTRY_OFFSET_MASK != magic_offset { + offset += FILE_OFFSET_ALIGNMENT; + continue; + } + + let ns = ((header >> 24) & ((1 << KEY_NAMESPACE_BITS) - 1)) as u8; + let entry_type = (header >> 30) & 0b11; + + match entry_type { + x if x == EntryType::Data as u32 => { + let klen = u16::from_le_bytes(avail[4..6].try_into().unwrap()); + let vlen = u16::from_le_bytes(avail[6..8].try_into().unwrap()); + let entry_len = 4 + 4 + klen as usize + vlen as usize + 2; + + let Some(start) = self.ensure_verified_entry( + read_buf, + buf_file_offset, + rel, + entry_len, + offset, + )? + else { + offset += FILE_OFFSET_ALIGNMENT; + continue; + }; + let buf = &read_buf[start..start + 8 + vlen as usize + klen as usize]; + + return Ok(Some(( + KVRef { + buf, + vlen, + header_len: 8, + ns, + entry_type: EntryType::Data, + }, + offset, + offset + entry_len as u64, + ))); + } + x if x == EntryType::Tombstone as u32 => { + let klen = u16::from_le_bytes(avail[4..6].try_into().unwrap()); + let entry_len = 4 + 2 + klen as usize + 2; + + let Some(start) = self.ensure_verified_entry( + read_buf, + buf_file_offset, + rel, + entry_len, + offset, + )? + else { + offset += FILE_OFFSET_ALIGNMENT; + continue; + }; + let buf = &read_buf[start..start + 6 + klen as usize]; + + return Ok(Some(( + KVRef { + buf, + vlen: 0, + header_len: 6, + ns, + entry_type: EntryType::Tombstone, + }, + offset, + offset + entry_len as u64, + ))); + } + _ => { + return Err(invalid_data_error("unknown data entry type")); + } + } + } + } +} diff --git a/src/hashing.rs b/src/hashing.rs deleted file mode 100644 index 0ed1505..0000000 --- a/src/hashing.rs +++ /dev/null @@ -1,100 +0,0 @@ -use siphasher::sip128::{Hash128, SipHasher24}; - -use crate::shard::NUM_ROWS; - -use bytemuck::{Pod, Zeroable}; - -pub type HashSeed = [u8; 16]; - -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Pod, Zeroable, Hash)] -#[repr(transparent)] -pub(crate) struct PartedHash(u64); - -// impl std::fmt::Display for PartedHash { -// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -// write!( -// f, -// "{:04x}.{:04x}.{:08x}", -// self.shard_selector(), -// self.row_selector(), -// self.signature() -// ) -// } -// } - -pub(crate) const INVALID_SIG: u32 = 0; - -#[cfg(feature = "whitebox_testing")] -pub static mut HASH_BITS_TO_KEEP: u64 = u64::MAX; // which bits to keep from the hash - for testing collisions - -impl PartedHash { - pub fn new(seed: &HashSeed, buf: &[u8]) -> Self { - Self::from_hash(SipHasher24::new_with_key(&seed).hash(buf)) - } - - #[inline] - pub fn is_valid(&self) -> bool { - self.signature() != INVALID_SIG - } - - #[inline] - pub fn shard_selector(&self) -> u32 { - ((self.0 >> 48) & 0xffff) as u32 - } - - #[inline] - pub fn row_selector(&self) -> usize { - (((self.0 >> 32) as u16) as usize) % NUM_ROWS - } - - #[inline] - pub fn signature(&self) -> u32 { - self.0 as u32 - } - - #[allow(dead_code)] - pub fn as_u64(&self) -> u64 { - self.0 - } - - fn from_hash(h: Hash128) -> Self { - let mut sig = h.h1 as u32; - if sig == INVALID_SIG { - sig = h.h2 as u32; - if sig == INVALID_SIG { - sig = (h.h2 >> 32) as u32; - if sig == INVALID_SIG { - sig = 0x6052_c9b7; // this is so unlikely that it doesn't really matter - } - } - } - let shard = h.h1 & 0xffff_0000_0000_0000; - let row = h.h1 & 0x0000_ffff_0000_0000; - let val = shard | row | (sig as u64); - - #[cfg(feature = "whitebox_testing")] - let val = (val & unsafe { HASH_BITS_TO_KEEP }) | 1 /* make sure sig != 0 */; - - Self(val) - } -} - -#[test] -fn test_parted_hash() -> crate::Result<()> { - use bytemuck::{bytes_of, from_bytes}; - - let h1 = PartedHash::new(b"aaaabbbbccccdddd", b"hello world"); - assert_eq!(h1.0, 13445180190757400308,); - let h2 = PartedHash(13445180190757400308); - assert_eq!(PartedHash::new(b"aaaabbbbccccdddd", b"hello world"), h2); - - let h3 = PartedHash(0x1020304050607080); - assert_eq!( - bytes_of(&h3), - [0x80, 0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10] - ); - let h4: PartedHash = *from_bytes(&[0x80, 0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10]); - assert_eq!(h4, h3); - - Ok(()) -} diff --git a/src/index_file.rs b/src/index_file.rs new file mode 100644 index 0000000..7b10524 --- /dev/null +++ b/src/index_file.rs @@ -0,0 +1,859 @@ +use memmap2::MmapMut; +use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; +use simd_itertools::PositionSimd; +use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, TryFromBytes}; + +use std::{ + fs::File, + mem::{offset_of, size_of}, + ops::{Deref, DerefMut}, + path::Path, + sync::{ + Arc, + atomic::{AtomicU32, AtomicU64, Ordering}, + }, +}; + +use crate::internal::{ + FILE_OFFSET_ALIGNMENT, HashCoord, INDEX_FILE_SIGNATURE, INDEX_FILE_VERSION, MAX_DATA_FILES, + MIN_INITIAL_ROWS, MIN_SPLIT_LEVEL, PAGE_SIZE, ROW_WIDTH, SIZE_HINT_UNIT, index_file_path, + index_rows_file_path, invalid_data_error, read_available_at, unexpected_eof_error, +}; +use crate::types::{Config, Error, Result}; + +#[derive(FromBytes, IntoBytes, KnownLayout)] +#[repr(C)] +pub(crate) struct IndexFileHeader { + pub(crate) signature: [u8; 8], + pub(crate) version: u32, + _padding0: u32, + pub(crate) hash_key_0: u64, + pub(crate) hash_key_1: u64, + pub(crate) dirty: AtomicU64, + _padding1: [u8; 64 - 40], + pub(crate) global_split_level: AtomicU64, + _padding2: [u8; 128 - 72], + pub(crate) num_created: AtomicU64, + pub(crate) num_removed: AtomicU64, + pub(crate) num_replaced: AtomicU64, + pub(crate) written_bytes: AtomicU64, + pub(crate) waste_bytes: AtomicU64, + pub(crate) reclaimed_bytes: AtomicU64, + _padding3: [u8; 192 - 176], + /// Histogram buckets: [<64, <256, <1K, <4K, <16K, >=16K] + pub(crate) size_histogram: [AtomicU64; 6], + _trailer: [u8; PAGE_SIZE - 240], +} + +const _: () = assert!(offset_of!(IndexFileHeader, global_split_level) == 64); +const _: () = assert!(offset_of!(IndexFileHeader, num_created) == 128); +const _: () = assert!(offset_of!(IndexFileHeader, size_histogram) == 192); +const _: () = assert!(size_of::() == PAGE_SIZE); + +#[derive(Debug, Clone, Copy, PartialEq, Eq, FromBytes, IntoBytes, KnownLayout, Immutable)] +#[repr(transparent)] +pub(crate) struct EntryPointer(pub(crate) u64); + +impl EntryPointer { + pub(crate) const INVALID_POINTER: Self = Self(0); + + pub(crate) fn new( + file_idx: u16, + file_offset: u64, + size: usize, + masked_row_selector: u32, + ) -> Self { + debug_assert!(size > 0 && size <= u8::MAX as usize * SIZE_HINT_UNIT); + + let fi = (file_idx as u64) & ((1 << 12) - 1); + let fo = ((file_offset / FILE_OFFSET_ALIGNMENT) & ((1 << 26) - 1)) << 12; + let sh = (size.div_ceil(SIZE_HINT_UNIT) as u64) << (12 + 26); + let rs = (masked_row_selector as u64) << (12 + 26 + 8); + Self(fi | fo | sh | rs) + } + + pub(crate) fn file_idx(&self) -> u16 { + (self.0 & ((1 << 12) - 1)) as u16 + } + + pub(crate) fn file_offset(&self) -> u64 { + ((self.0 >> 12) & ((1 << 26) - 1)) * FILE_OFFSET_ALIGNMENT + } + + pub(crate) fn size_hint(&self) -> usize { + ((self.0 >> (12 + 26)) & ((1 << 8) - 1)) as usize * SIZE_HINT_UNIT + } + + pub(crate) fn masked_row_selector(&self) -> u32 { + (self.0 >> (12 + 26 + 8)) as u32 + } + + pub(crate) fn is_valid(&self) -> bool { + self.0 != Self::INVALID_POINTER.0 + } +} + +#[derive(FromBytes, IntoBytes, KnownLayout)] +#[repr(C)] +pub(crate) struct RowLayout { + pub(crate) split_level: AtomicU64, + checksum: u64, + _padding: [u8; 48], + pub(crate) signatures: [u32; ROW_WIDTH], + pub(crate) pointers: [EntryPointer; ROW_WIDTH], +} + +const _: () = assert!(size_of::() == PAGE_SIZE); +const _: () = assert!(offset_of!(RowLayout, signatures) % 8 == 0); +const _: () = assert!(offset_of!(RowLayout, pointers) % 8 == 0); + +impl RowLayout { + fn expected_checksum(&self) -> u64 { + let mut checksum = self.split_level.load(Ordering::Relaxed); + for idx in 0..ROW_WIDTH { + checksum ^= self.signatures[idx] as u64 ^ self.pointers[idx].0; + } + checksum + } + + pub(crate) fn checksum_matches(&self) -> bool { + self.checksum == self.expected_checksum() + } + + pub(crate) fn iter_matches(&self, hash_coord: HashCoord) -> RowMatchIterator<'_> { + RowMatchIterator { + row: self, + hash_coord, + offset: 0, + } + } + + pub(crate) fn find_free_slot(&self) -> Option { + self.signatures + .iter() + .position_simd(|&sig| sig == HashCoord::INVALID_SIG) + } + + pub(crate) fn insert(&mut self, idx: usize, sig: u32, ptr: EntryPointer) { + debug_assert!(self.signatures[idx] == HashCoord::INVALID_SIG); + self.signatures[idx] = sig; + self.pointers[idx] = ptr; + self.checksum ^= sig as u64 ^ ptr.0; + } + + pub(crate) fn remove(&mut self, idx: usize) { + let sig = self.signatures[idx]; + let ptr = self.pointers[idx]; + self.checksum ^= sig as u64 ^ ptr.0; + self.signatures[idx] = HashCoord::INVALID_SIG; + self.pointers[idx] = EntryPointer::INVALID_POINTER; + } + + pub(crate) fn replace_pointer(&mut self, idx: usize, new_ptr: EntryPointer) { + let old_ptr = self.pointers[idx]; + self.checksum ^= old_ptr.0 ^ new_ptr.0; + self.pointers[idx] = new_ptr; + } + + pub(crate) fn set_split_level(&mut self, new_sl: u64) { + let old_sl = self.split_level.load(Ordering::Relaxed); + self.checksum ^= old_sl ^ new_sl; + self.split_level.store(new_sl, Ordering::Release); + } +} + +pub(crate) struct RowMatchIterator<'a> { + row: &'a RowLayout, + hash_coord: HashCoord, + offset: usize, +} + +impl Iterator for RowMatchIterator<'_> { + type Item = (usize, EntryPointer); + + fn next(&mut self) -> Option { + while self.offset < ROW_WIDTH { + if let Some(idx) = self.row.signatures[self.offset..] + .iter() + .position_simd(|&sig| sig == self.hash_coord.sig) + { + let real_idx = self.offset + idx; + self.offset = real_idx + 1; + let ptr = self.row.pointers[real_idx]; + if ptr.is_valid() + && ptr.masked_row_selector() == self.hash_coord.masked_row_selector() + { + return Some((real_idx, ptr)); + } + } else { + self.offset = ROW_WIDTH; + } + } + None + } +} + +#[derive(FromBytes, IntoBytes, KnownLayout)] +#[repr(C)] +pub(crate) struct IndexFileLayout { + pub(crate) header: IndexFileHeader, + pub(crate) waste_levels: [AtomicU32; MAX_DATA_FILES as usize], +} + +const _: () = assert!(size_of::() == PAGE_SIZE * 5); + +fn row_count_for_len(len: usize) -> usize { + len / size_of::() +} + +fn row_offset(idx: usize) -> usize { + idx * size_of::() +} + +fn row_bytes(bytes: &[u8], idx: usize) -> &[u8] { + let start = row_offset(idx); + let end = start + size_of::(); + &bytes[start..end] +} + +fn row_bytes_mut(bytes: &mut [u8], idx: usize) -> &mut [u8] { + let start = row_offset(idx); + let end = start + size_of::(); + &mut bytes[start..end] +} + +fn row_ref_bytes(bytes: &[u8], idx: usize) -> &RowLayout { + unsafe { &*(row_bytes(bytes, idx).as_ptr() as *const RowLayout) } +} + +fn row_mut_bytes(bytes: &mut [u8], idx: usize) -> &mut RowLayout { + RowLayout::try_mut_from_bytes(row_bytes_mut(bytes, idx)) + .expect("row bytes should contain an aligned row") +} + +unsafe fn row_mut_ptr(base_ptr: *const u8, idx: usize) -> *mut RowLayout { + unsafe { base_ptr.add(row_offset(idx)) as *mut RowLayout } +} + +pub(crate) struct RowsTableReadGuard<'a> { + index_file: &'a IndexFile, + pub(crate) row_guard: RwLockReadGuard<'a, MmapMut>, +} + +impl<'a> RowsTableReadGuard<'a> { + pub(crate) fn row(&self, idx: usize) -> RowReadGuard<'_> { + let row_guard = self.index_file.row_locks[idx & self.index_file.row_locks_mask].read(); + let row_count = row_count_for_len(self.row_guard.len()); + assert!( + idx < row_count, + "row index out of bounds: {idx} >= {row_count}" + ); + let row = row_ref_bytes(&self.row_guard[..], idx); + RowReadGuard { + _row_guard: row_guard, + row, + } + } + + pub(crate) fn shard_id(&self, idx: usize) -> usize { + idx & self.index_file.row_locks_mask + } + + pub(crate) fn lock_shard(&self, shard_id: usize) -> RwLockWriteGuard<'_, ()> { + self.index_file.row_locks[shard_id].write() + } + + pub(crate) fn row_mut(&self, idx: usize) -> RowWriteGuard<'_> { + let row_guard = self.index_file.row_locks[idx & self.index_file.row_locks_mask].write(); + let row_count = row_count_for_len(self.row_guard.len()); + assert!( + idx < row_count, + "row index out of bounds: {idx} >= {row_count}" + ); + let row = unsafe { &mut *row_mut_ptr(self.row_guard.as_ptr(), idx) }; + RowWriteGuard { + _row_guard: row_guard, + row, + } + } + + pub(crate) unsafe fn unlocked_row_ptr(&self, idx: usize) -> *mut RowLayout { + let row_count = row_count_for_len(self.row_guard.len()); + assert!( + idx < row_count, + "row index out of bounds: {idx} >= {row_count}" + ); + unsafe { row_mut_ptr(self.row_guard.as_ptr(), idx) } + } +} + +pub(crate) struct RowsTableWriteGuard<'a> { + pub(crate) row_guard: RwLockWriteGuard<'a, MmapMut>, +} + +impl RowsTableWriteGuard<'_> { + fn row_mut(&mut self, idx: usize) -> &mut RowLayout { + let row_count = row_count_for_len(self.row_guard.len()); + assert!( + idx < row_count, + "row index out of bounds: {idx} >= {row_count}" + ); + row_mut_bytes(&mut self.row_guard[..], idx) + } +} + +pub(crate) struct RowReadGuard<'a> { + _row_guard: RwLockReadGuard<'a, ()>, + row: &'a RowLayout, +} + +impl Deref for RowReadGuard<'_> { + type Target = RowLayout; + + fn deref(&self) -> &Self::Target { + self.row + } +} + +pub(crate) struct RowWriteGuard<'a> { + _row_guard: RwLockWriteGuard<'a, ()>, + row: &'a mut RowLayout, +} + +impl Deref for RowWriteGuard<'_> { + type Target = RowLayout; + + fn deref(&self) -> &Self::Target { + self.row + } +} + +impl DerefMut for RowWriteGuard<'_> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.row + } +} + +pub(crate) struct IndexFile { + /// Kept on Windows so `sync_all` can call `FlushFileBuffers`. + /// On Linux the fd is closed after mmap; `msync` suffices for durability. + #[cfg(windows)] + header_file: File, + rows_file: File, + /// Fixed mapping covering the header + waste-level pages. Never remapped, + /// so `header_ref()` / `layout_prefix_ref()` are always stable without a lock. + header_mmap: MmapMut, + /// Growable mapping covering only the row pages. Remapped on grow/shrink/reset. + rows_mmap: RwLock, + row_locks: Vec>, + row_locks_mask: usize, + config: Arc, +} + +impl IndexFile { + #[cfg(target_os = "linux")] + fn maybe_lock_mmap(config: &Config, mmap: &MmapMut) { + if config.mlock_index { + let _ = mmap.lock(); + } + } + + #[cfg(not(target_os = "linux"))] + fn maybe_lock_mmap(_config: &Config, _mmap: &MmapMut) {} + + fn read_existing_header(header_file: &File, header_len: usize) -> Result<((u64, u64), u64)> { + if header_len < size_of::() { + return Err(unexpected_eof_error("index file header too short")); + } + if header_len != size_of::() { + return Err(invalid_data_error("index header file has unexpected size")); + } + + let header = read_available_at(header_file, size_of::(), 0) + .map_err(Error::IOError)?; + if header.len() < size_of::() { + return Err(unexpected_eof_error("index file header too short")); + } + let header = IndexFileHeader::read_from_bytes(&header) + .map_err(|_| invalid_data_error("invalid index file header size"))?; + if &header.signature != INDEX_FILE_SIGNATURE || header.version != INDEX_FILE_VERSION { + return Err(invalid_data_error("invalid index file header")); + } + + Ok(( + (header.hash_key_0, header.hash_key_1), + header.global_split_level.load(Ordering::Relaxed), + )) + } + + pub(crate) fn existing_hash_key(base_path: &Path) -> Result> { + let header_path = index_file_path(base_path); + let header_file = match File::options().read(true).open(header_path) { + Ok(file) => file, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(Error::IOError(err)), + }; + let header_len = header_file.metadata().map_err(Error::IOError)?.len() as usize; + if header_len == 0 { + return Ok(None); + } + + let (hash_key, _) = Self::read_existing_header(&header_file, header_len)?; + Ok(Some(hash_key)) + } + + fn validate_existing( + header_file: &File, + header_len: usize, + rows_len: usize, + hash_key: (u64, u64), + ) -> Result<()> { + if !rows_len.is_multiple_of(PAGE_SIZE) { + return Err(invalid_data_error( + "index rows file size is not page aligned", + )); + } + + let (stored_hash_key, gsl) = Self::read_existing_header(header_file, header_len)?; + if stored_hash_key != hash_key { + return Err(invalid_data_error("index hash key mismatch")); + } + + let row_count = row_count_for_len(rows_len); + if row_count < MIN_INITIAL_ROWS || !row_count.is_power_of_two() { + return Err(invalid_data_error("invalid index file row count")); + } + + if gsl < MIN_SPLIT_LEVEL as u64 { + return Err(invalid_data_error("invalid index global split level")); + } + + let active_rows = 1usize + .checked_shl(gsl as u32) + .ok_or_else(|| invalid_data_error("index global split level overflow"))?; + if active_rows > row_count { + return Err(invalid_data_error( + "index global split level exceeds file size", + )); + } + + Ok(()) + } + + pub(crate) fn flush_header(&self) -> Result<()> { + self.header_mmap.flush().map_err(Error::IOError) + } + + pub(crate) fn open(base_path: &Path, config: Arc) -> Result { + let hash_key = config.hash_key; + let num_rows = (config.initial_capacity / ROW_WIDTH) + .max(MIN_INITIAL_ROWS) + .next_power_of_two(); + let num_locks = config.max_concurrency.min(num_rows).next_power_of_two(); + let row_locks = (0..num_locks).map(|_| RwLock::new(())).collect::>(); + let row_locks_mask = num_locks - 1; + + let header_path = index_file_path(base_path); + let rows_path = index_rows_file_path(base_path); + + let header_file = File::options() + .create(true) + .truncate(false) + .read(true) + .write(true) + .open(header_path) + .map_err(Error::IOError)?; + let rows_file = File::options() + .create(true) + .truncate(false) + .read(true) + .write(true) + .open(rows_path) + .map_err(Error::IOError)?; + + let header_size = size_of::(); + let header_len = header_file.metadata().map_err(Error::IOError)?.len() as usize; + let rows_len = rows_file.metadata().map_err(Error::IOError)?.len() as usize; + let new_file = header_len == 0 && rows_len == 0; + let rows_size = num_rows * size_of::(); + + if new_file { + header_file + .set_len(header_size as u64) + .map_err(Error::IOError)?; + rows_file + .set_len(rows_size as u64) + .map_err(Error::IOError)?; + } else { + Self::validate_existing(&header_file, header_len, rows_len, config.hash_key)?; + } + + let actual_rows_size = if new_file { rows_size } else { rows_len }; + + let header_mmap = unsafe { + memmap2::MmapOptions::new() + .len(header_size) + .map_mut(&header_file) + } + .map_err(Error::IOError)?; + Self::maybe_lock_mmap(config.as_ref(), &header_mmap); + + let rows_mmap = unsafe { + memmap2::MmapOptions::new() + .len(actual_rows_size) + .map_mut(&rows_file) + } + .map_err(Error::IOError)?; + Self::maybe_lock_mmap(config.as_ref(), &rows_mmap); + + if new_file { + header_file.sync_all().map_err(Error::IOError)?; + } + + let inst = Self { + #[cfg(windows)] + header_file, + rows_file, + header_mmap, + rows_mmap: RwLock::new(rows_mmap), + row_locks, + row_locks_mask, + config, + }; + + if new_file { + let rows_table = inst.rows_table_mut(); + inst.init_header_and_rows(rows_table, hash_key, false)?; + } + + Ok(inst) + } + + pub(crate) fn verify_row_checksums(&self) -> Result<()> { + let row_table = self.rows_table(); + let row_count = row_count_for_len(row_table.row_guard.len()); + for row_idx in 0..row_count { + if !row_table.row(row_idx).checksum_matches() { + return Err(invalid_data_error("index row checksum mismatch")); + } + } + Ok(()) + } + + pub(crate) fn sync_all(&self) -> Result<()> { + self.header_mmap.flush().map_err(Error::IOError)?; + self.rows_mmap.write().flush().map_err(Error::IOError)?; + #[cfg(windows)] + self.header_file.sync_all().map_err(Error::IOError)?; + self.rows_file.sync_all().map_err(Error::IOError) + } + + pub(crate) fn file_size_bytes(&self) -> u64 { + let header = size_of::() as u64; + let rows = self.rows_file.metadata().map(|m| m.len()).unwrap_or(0); + header + rows + } + + pub(crate) fn rows_table(&self) -> RowsTableReadGuard<'_> { + RowsTableReadGuard { + index_file: self, + row_guard: self.rows_mmap.read(), + } + } + + pub(crate) fn rows_table_mut(&self) -> RowsTableWriteGuard<'_> { + RowsTableWriteGuard { + row_guard: self.rows_mmap.write(), + } + } + + /// Returns a direct reference to the header without acquiring any lock. + /// + /// Safe because the header mmap is never remapped and the header fields + /// used for stats are all `AtomicU64`. + fn full_header_ref(&self) -> &IndexFileLayout { + unsafe { &*(self.header_mmap.as_ptr() as *const IndexFileLayout) } + } + + pub(crate) fn header_ref(&self) -> &IndexFileHeader { + &self.full_header_ref().header + } + + pub(crate) fn add_file_waste(&self, file_idx: u16, waste: u32) -> u32 { + self.full_header_ref().waste_levels[file_idx as usize].fetch_add(waste, Ordering::Relaxed) + + waste + } + + pub(crate) fn file_waste(&self, file_idx: u16) -> u32 { + self.full_header_ref().waste_levels[file_idx as usize].load(Ordering::Relaxed) + } + + pub(crate) fn take_file_waste(&self, file_idx: u16) -> u32 { + self.full_header_ref().waste_levels[file_idx as usize].swap(0, Ordering::Relaxed) + } + + pub(crate) fn grow(&self, nsl: u64) -> Result<()> { + let mut layout_mut = self.rows_table_mut(); + let gsl = self.header_ref().global_split_level.load(Ordering::Acquire); + if nsl <= gsl { + return Ok(()); + } + + let required_rows_size = (1usize << nsl) * size_of::(); + if layout_mut.row_guard.len() < required_rows_size { + let alloc_split = nsl + self.config.remap_scaler as u64; + let new_rows_size = (1usize << alloc_split) * size_of::(); + + self.rows_file + .set_len(new_rows_size as u64) + .map_err(Error::IOError)?; + + #[cfg(target_os = "linux")] + unsafe { + layout_mut + .row_guard + .remap(new_rows_size, memmap2::RemapOptions::new().may_move(true)) + } + .map_err(Error::IOError)?; + + #[cfg(not(target_os = "linux"))] + { + *layout_mut.row_guard = unsafe { + memmap2::MmapOptions::new() + .len(new_rows_size) + .map_mut(&self.rows_file) + } + .map_err(Error::IOError)?; + } + + Self::maybe_lock_mmap(self.config.as_ref(), &layout_mut.row_guard); + } + + self.header_ref() + .global_split_level + .store(nsl, Ordering::Release); + Ok(()) + } + + pub(crate) fn num_rows(&self) -> usize { + let gsl = self.header_ref().global_split_level.load(Ordering::Acquire) as usize; + 1usize << gsl + } + + pub(crate) fn shrink(&self, min_rows: usize) -> Result { + let mut row_table = self.rows_table_mut(); + + loop { + let global_split_level = self.header_ref().global_split_level.load(Ordering::Acquire); + let current_rows = 1usize << global_split_level; + if current_rows <= min_rows { + break; + } + + let next_level = global_split_level - 1; + let half_count = 1usize << next_level; + + let mut can_merge = true; + for idx in 0..half_count { + let row1 = row_ref_bytes(&row_table.row_guard[..], idx); + let row1_split = row1.split_level.load(Ordering::Acquire); + if row1_split != global_split_level { + continue; + } + + let row2 = row_ref_bytes(&row_table.row_guard[..], idx + half_count); + let count1 = row1 + .signatures + .iter() + .filter(|&&sig| sig != HashCoord::INVALID_SIG) + .count(); + let count2 = row2 + .signatures + .iter() + .filter(|&&sig| sig != HashCoord::INVALID_SIG) + .count(); + if count1 + count2 > ROW_WIDTH { + can_merge = false; + break; + } + } + + if !can_merge { + break; + } + + for idx in 0..half_count { + let row1 = unsafe { &mut *row_mut_ptr(row_table.row_guard.as_ptr(), idx) }; + let row2 = + unsafe { &mut *row_mut_ptr(row_table.row_guard.as_ptr(), idx + half_count) }; + + if row1.split_level.load(Ordering::Acquire) != global_split_level { + continue; + } + + let mut dest_idx = 0usize; + for src_idx in 0..ROW_WIDTH { + if row2.signatures[src_idx] == HashCoord::INVALID_SIG { + continue; + } + + while dest_idx < ROW_WIDTH + && row1.signatures[dest_idx] != HashCoord::INVALID_SIG + { + dest_idx += 1; + } + + if dest_idx >= ROW_WIDTH { + break; + } + + row1.insert(dest_idx, row2.signatures[src_idx], row2.pointers[src_idx]); + row2.remove(src_idx); + } + + row2.set_split_level(0); + row1.set_split_level(next_level); + } + + self.header_ref() + .global_split_level + .store(next_level, Ordering::Release); + } + + let final_level = self.header_ref().global_split_level.load(Ordering::Acquire); + let new_rows_size = (1usize << final_level) * size_of::(); + + if new_rows_size < row_table.row_guard.len() { + #[cfg(target_os = "linux")] + { + unsafe { + row_table + .row_guard + .remap(new_rows_size, memmap2::RemapOptions::new().may_move(true)) + } + .map_err(Error::IOError)?; + self.rows_file + .set_len(new_rows_size as u64) + .map_err(Error::IOError)?; + } + + #[cfg(not(target_os = "linux"))] + { + row_table.row_guard.flush().map_err(Error::IOError)?; + + #[cfg(windows)] + { + // On Windows we must unmap before truncating. + *row_table.row_guard = memmap2::MmapOptions::new() + .len(1) + .map_anon() + .map_err(Error::IOError)?; + } + + self.rows_file + .set_len(new_rows_size as u64) + .map_err(Error::IOError)?; + *row_table.row_guard = unsafe { + memmap2::MmapOptions::new() + .len(new_rows_size) + .map_mut(&self.rows_file) + } + .map_err(Error::IOError)?; + } + + Self::maybe_lock_mmap(self.config.as_ref(), &row_table.row_guard); + } + + Ok(1usize << final_level) + } + + fn init_header_and_rows( + &self, + mut rows_table: RowsTableWriteGuard, + hash_key: (u64, u64), + dirty: bool, + ) -> Result<()> { + // Zero both mmaps first, then populate. + rows_table.row_guard.fill(0); + // Safety: header_mmap is a contiguous MmapMut that we own; no other &mut exists yet. + unsafe { + std::ptr::write_bytes( + self.header_mmap.as_ptr() as *mut u8, + 0, + self.header_mmap.len(), + ); + } + + // Now create the mutable reference after zeroing is complete. + // Safety: only called during init (open) or reset, both single-threaded + // w.r.t. this store instance. + let layout = unsafe { &mut *(self.header_mmap.as_ptr() as *mut IndexFileLayout) }; + + layout.header.signature = *INDEX_FILE_SIGNATURE; + layout.header.version = INDEX_FILE_VERSION; + layout + .header + .dirty + .store(if dirty { 1 } else { 0 }, Ordering::Release); + layout.header.hash_key_0 = hash_key.0; + layout.header.hash_key_1 = hash_key.1; + layout + .header + .global_split_level + .store(MIN_SPLIT_LEVEL as u64, Ordering::Release); + + for row_idx in 0..MIN_INITIAL_ROWS { + rows_table + .row_mut(row_idx) + .set_split_level(MIN_SPLIT_LEVEL as u64); + } + + self.flush_header()?; + rows_table.row_guard.flush().map_err(Error::IOError)?; + self.rows_file.sync_all().map_err(Error::IOError)?; + Ok(()) + } + + pub(crate) fn reset(&self) -> Result<()> { + let min_rows_size = MIN_INITIAL_ROWS * size_of::(); + let mut row_table = self.rows_table_mut(); + + #[cfg(target_os = "linux")] + unsafe { + self.rows_file + .set_len(min_rows_size as u64) + .map_err(Error::IOError)?; + row_table + .row_guard + .remap(min_rows_size, memmap2::RemapOptions::new().may_move(true)) + } + .map_err(Error::IOError)?; + + #[cfg(not(target_os = "linux"))] + { + row_table.row_guard.flush().map_err(Error::IOError)?; + + #[cfg(windows)] + { + *row_table.row_guard = memmap2::MmapOptions::new() + .len(1) + .map_anon() + .map_err(Error::IOError)?; + } + + self.rows_file + .set_len(min_rows_size as u64) + .map_err(Error::IOError)?; + *row_table.row_guard = unsafe { + memmap2::MmapOptions::new() + .len(min_rows_size) + .map_mut(&self.rows_file) + } + .map_err(Error::IOError)?; + } + + Self::maybe_lock_mmap(self.config.as_ref(), &row_table.row_guard); + + self.init_header_and_rows(row_table, self.config.hash_key, true) + } +} diff --git a/src/internal.rs b/src/internal.rs new file mode 100644 index 0000000..7f0a6d5 --- /dev/null +++ b/src/internal.rs @@ -0,0 +1,360 @@ +use siphasher::sip128::{Hasher128, SipHasher13}; + +use std::{ + fs::File, + hash::Hasher, + path::{Path, PathBuf}, +}; + +use crate::types::{Error, Result}; + +pub(crate) const PAGE_SIZE: usize = 4096; +pub(crate) const ROW_WIDTH: usize = 16 * 21; +pub(crate) const MIN_SPLIT_LEVEL: usize = 3; +pub(crate) const MASKED_ROW_SELECTOR_BITS: u32 = 18; +pub(crate) const MIN_INITIAL_ROWS: usize = 1 << MIN_SPLIT_LEVEL; +pub(crate) const MAX_REPRESENTABLE_FILE_SIZE: u32 = + ((1u32 << 26) - 1) * FILE_OFFSET_ALIGNMENT as u32; +pub(crate) const ENTRY_TYPE_SHIFT: u32 = 14; +pub(crate) const MAX_INTERNAL_KEY_SIZE: usize = (1 << ENTRY_TYPE_SHIFT) - 1; +pub(crate) const MAX_INTERNAL_VALUE_SIZE: usize = (1 << 16) - 1; +pub(crate) const MAX_DATA_FILES: u16 = 1 << 12; +pub(crate) const MAX_DATA_FILE_IDX: u16 = MAX_DATA_FILES - 1; + +pub(crate) const INDEX_FILE_SIGNATURE: &[u8; 8] = b"CandyStr"; +pub(crate) const INDEX_FILE_VERSION: u32 = 0x0002_0002; +pub(crate) const DATA_FILE_SIGNATURE: &[u8; 8] = b"CandyDat"; +pub(crate) const DATA_FILE_VERSION: u32 = 0x0002_0001; +pub(crate) const FILE_OFFSET_ALIGNMENT: u64 = 16; +pub(crate) const SIZE_HINT_UNIT: usize = 512; +pub(crate) const DATA_ENTRY_OFFSET_MAGIC: u32 = 0x91c8_d7cd; +pub(crate) const DATA_ENTRY_OFFSET_MASK: u32 = (1 << 24) - 1; +pub(crate) const KEY_NAMESPACE_BITS: u8 = 6; +pub(crate) const MAX_KEY_NAMESPACE: u8 = (1 << KEY_NAMESPACE_BITS) - 1; +pub(crate) const READ_BUFFER_SIZE: usize = 128 * 1024; + +pub(crate) fn aligned_data_entry_waste(klen: usize, vlen: usize) -> u32 { + (10 + klen as u32 + vlen as u32).next_multiple_of(FILE_OFFSET_ALIGNMENT as u32) +} + +pub(crate) fn aligned_tombstone_entry_waste(klen: usize) -> u32 { + (8 + klen as u32).next_multiple_of(FILE_OFFSET_ALIGNMENT as u32) +} + +pub(crate) fn aligned_data_entry_size(klen: usize, vlen: usize) -> u64 { + (10 + klen as u64 + vlen as u64).next_multiple_of(FILE_OFFSET_ALIGNMENT) +} + +pub(crate) fn index_file_path(base_path: &Path) -> PathBuf { + base_path.join("index") +} + +pub(crate) fn index_rows_file_path(base_path: &Path) -> PathBuf { + base_path.join("rows") +} + +pub(crate) fn data_file_path(base_path: &Path, file_idx: u16) -> PathBuf { + base_path.join(format!("data_{file_idx:04}")) +} + +#[cfg(unix)] +pub(crate) fn sync_dir(path: &Path) -> Result<()> { + File::open(path) + .map_err(Error::IOError)? + .sync_all() + .map_err(Error::IOError) +} + +#[cfg(not(unix))] +pub(crate) fn sync_dir(_path: &Path) -> Result<()> { + Ok(()) +} + +pub(crate) fn parse_data_file_idx(path: &Path) -> Option { + let name = path.file_name()?.to_str()?; + let suffix = name.strip_prefix("data_")?; + if suffix.len() != 4 { + return None; + } + suffix.parse().ok() +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct RangeMetadata { + pub(crate) head: u64, + pub(crate) tail: u64, + pub(crate) count: u64, +} + +impl RangeMetadata { + pub(crate) fn new() -> Self { + Self { + head: 1u64 << 63, + tail: (1u64 << 63) - 1, + count: 0, + } + } + + pub(crate) fn to_bytes(self) -> [u8; 24] { + let mut buf = [0u8; 24]; + buf[0..8].copy_from_slice(&self.head.to_le_bytes()); + buf[8..16].copy_from_slice(&self.tail.to_le_bytes()); + buf[16..24].copy_from_slice(&self.count.to_le_bytes()); + buf + } + + pub(crate) fn from_bytes(bytes: &[u8]) -> Option { + if bytes.len() != 24 { + return None; + } + Some(Self { + head: u64::from_le_bytes(bytes[0..8].try_into().ok()?), + tail: u64::from_le_bytes(bytes[8..16].try_into().ok()?), + count: u64::from_le_bytes(bytes[16..24].try_into().ok()?), + }) + } +} + +#[repr(u16)] +pub(crate) enum EntryType { + Data = 0, + Tombstone = 1, + _Unused2 = 2, + _Unused3 = 3, +} + +pub(crate) fn invalid_data_error(message: &'static str) -> Error { + Error::IOError(std::io::Error::new( + std::io::ErrorKind::InvalidData, + message, + )) +} + +pub(crate) fn unexpected_eof_error(message: &'static str) -> Error { + Error::IOError(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + message, + )) +} + +pub(crate) fn is_resettable_open_error(err: &Error) -> bool { + matches!( + err, + Error::IOError(io_err) + if matches!( + io_err.kind(), + std::io::ErrorKind::InvalidData | std::io::ErrorKind::UnexpectedEof + ) + ) +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[repr(u8)] +pub(crate) enum KeyNamespace { + User = 0, + QueueMeta = 1, + QueueData = 2, + BigMeta = 3, + BigData = 4, + ListMeta = 5, + ListIndex = 6, + ListData = 7, + Typed = 8, + TypedQueueMeta = 9, + TypedQueueData = 10, + TypedBigMeta = 11, + TypedBigData = 12, + TypedListMeta = 13, + TypedListIndex = 14, + TypedListData = 15, +} + +impl KeyNamespace { + pub(crate) fn from_u8(ns: u8) -> Option { + match ns { + x if x == Self::User as u8 => Some(Self::User), + x if x == Self::QueueMeta as u8 => Some(Self::QueueMeta), + x if x == Self::QueueData as u8 => Some(Self::QueueData), + x if x == Self::BigMeta as u8 => Some(Self::BigMeta), + x if x == Self::BigData as u8 => Some(Self::BigData), + x if x == Self::ListMeta as u8 => Some(Self::ListMeta), + x if x == Self::ListIndex as u8 => Some(Self::ListIndex), + x if x == Self::ListData as u8 => Some(Self::ListData), + x if x == Self::Typed as u8 => Some(Self::Typed), + x if x == Self::TypedQueueMeta as u8 => Some(Self::TypedQueueMeta), + x if x == Self::TypedQueueData as u8 => Some(Self::TypedQueueData), + x if x == Self::TypedBigMeta as u8 => Some(Self::TypedBigMeta), + x if x == Self::TypedBigData as u8 => Some(Self::TypedBigData), + x if x == Self::TypedListMeta as u8 => Some(Self::TypedListMeta), + x if x == Self::TypedListIndex as u8 => Some(Self::TypedListIndex), + x if x == Self::TypedListData as u8 => Some(Self::TypedListData), + _ => None, + } + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub(crate) struct HashCoord { + pub(crate) sig: u32, + pub(crate) row_selector: u32, +} + +impl HashCoord { + pub(crate) const INVALID_SIG: u32 = 0; + + pub(crate) fn new(ns: KeyNamespace, key: &[u8], hash_key: (u64, u64)) -> Self { + let mut hasher = SipHasher13::new_with_keys(hash_key.0, hash_key.1); + hasher.write_u8(ns as u8); + hasher.write(key); + let h = hasher.finish128(); + let row_selector = h.h1 as u32; + let mut sig = (h.h1 >> 32) as u32; + if sig == Self::INVALID_SIG { + sig = h.h2 as u32; + if sig == Self::INVALID_SIG { + sig = (h.h2 >> 32) as u32; + if sig == Self::INVALID_SIG { + sig = 0x6419_9a93; + } + } + } + + Self { sig, row_selector } + } + + pub(crate) fn masked_row_selector(&self) -> u32 { + (self.row_selector >> MIN_SPLIT_LEVEL) & ((1 << MASKED_ROW_SELECTOR_BITS) - 1) + } + + pub(crate) fn row_index(&self, split_level: u64) -> usize { + ((self.row_selector as u64) & ((1 << split_level) - 1)) as usize + } +} + +pub(crate) struct KVBuf { + pub(crate) buf: Vec, + pub(crate) vlen: u16, + pub(crate) header_len: u16, + #[allow(dead_code)] + pub(crate) ns: u8, + #[allow(dead_code)] + pub(crate) entry_type: EntryType, +} + +impl KVBuf { + pub(crate) fn value(&self) -> &[u8] { + let start = self.header_len as usize; + &self.buf[start..start + self.vlen as usize] + } + + pub(crate) fn key(&self) -> &[u8] { + &self.buf[self.header_len as usize + self.vlen as usize..] + } + + pub(crate) fn into_value(mut self) -> Vec { + let start = self.header_len as usize; + let vlen = self.vlen as usize; + if start > 0 { + self.buf.copy_within(start..start + vlen, 0); + } + self.buf.truncate(vlen); + self.buf + } +} + +pub(crate) struct KVRef<'a> { + pub(crate) buf: &'a [u8], + pub(crate) vlen: u16, + pub(crate) header_len: u16, + pub(crate) ns: u8, + pub(crate) entry_type: EntryType, +} + +impl KVRef<'_> { + pub(crate) fn value(&self) -> &[u8] { + let start = self.header_len as usize; + &self.buf[start..start + self.vlen as usize] + } + + pub(crate) fn key(&self) -> &[u8] { + &self.buf[self.header_len as usize + self.vlen as usize..] + } +} + +#[cfg(unix)] +pub(crate) fn read_into_at( + f: &File, + buf: &mut Vec, + count: usize, + file_offset: u64, +) -> std::io::Result<()> { + buf.resize(count, 0); + let mut offset = 0; + while offset < count { + let n = std::os::unix::fs::FileExt::read_at( + f, + &mut buf[offset..], + file_offset + offset as u64, + )?; + if n == 0 { + break; + } else { + offset += n; + } + } + buf.truncate(offset); + Ok(()) +} + +#[cfg(windows)] +pub(crate) fn read_into_at( + f: &File, + buf: &mut Vec, + count: usize, + file_offset: u64, +) -> std::io::Result<()> { + buf.resize(count, 0); + let mut offset = 0; + while offset < count { + let n = std::os::windows::fs::FileExt::seek_read( + f, + &mut buf[offset..], + file_offset + offset as u64, + )?; + if n == 0 { + break; + } else { + offset += n; + } + } + buf.truncate(offset); + Ok(()) +} + +pub(crate) fn read_available_at( + f: &File, + count: usize, + file_offset: u64, +) -> std::io::Result> { + let mut buf = Vec::new(); + read_into_at(f, &mut buf, count, file_offset)?; + Ok(buf) +} + +#[cfg(unix)] +pub(crate) fn write_all_at(f: &File, buf: &[u8], offset: u64) -> std::io::Result<()> { + std::os::unix::fs::FileExt::write_all_at(f, buf, offset) +} + +#[cfg(windows)] +pub(crate) fn write_all_at(f: &File, mut buf: &[u8], mut offset: u64) -> std::io::Result<()> { + while !buf.is_empty() { + let written = std::os::windows::fs::FileExt::seek_write(f, buf, offset)?; + if written == 0 { + return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof)); + } + buf = &buf[written..]; + offset += written as u64; + } + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index a8ee29e..885bafb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,142 +1,21 @@ -//! A fast (*blazingly*, of course), persistent, in-process key-value store that relies on a novel sharding -//! algorithm. Since Candy does not rely on log-structured merge (LSM) trees or B-Trees, no journal/WAL is needed -//! and IOs go directly to file. -//! -//! The algorithm can be thought of as a "zero-overhead" extension to a hash table that's stored over files, -//! as it's designed to minimizes disk IO operations. Most operations add an overhead of 1-2 microseconds -//! to the disk IO latency, and operations generally require 1-4 disk IOs. -//! -//! The algorithm, for the most part, is crash-safe. That is, you can crash at any point and still be in a consistent -//! state. You might lose the ongoing operation, but we consider this acceptable. -//! -//! Candy is designed to consume very little memory: entries are written directly to the shard-file, and only a -//! table of ~380KB is kept `mmap`-ed (it is also file-backed, so can be evicted if needed). A shard-file can -//! hold around 30K entries, and more shard-files are created as needed. -//! -//! A unique feature of Candy is the support of *lists*, which allow creating cheap collections. -//! -//! Note: the file format is not yet stable! -//! -//! Example: -//! ``` -//! use candystore::{CandyStore, Config, Result}; -//! -//! fn main() -> Result<()> { -//! let db = CandyStore::open("/tmp/candy-dir", Config::default())?; -//! db.set("hello", "world")?; -//! assert_eq!(db.get("hello")?, Some("world".into())); -//! db.remove("hello")?; -//! assert_eq!(db.get("hello")?, None); -//! -//! // lists -//! db.set_in_list("italian", "bye", "arrivederci")?; -//! db.set_in_list("italian", "thanks", "grazie")?; -//! assert_eq!(db.get_from_list("italian", "bye")?, Some("arrivederci".into())); -//! -//! db.set_in_list("spanish", "bye", "adios")?; -//! db.set_in_list("spanish", "thanks", "gracias")?; -//! -//! let items = db.iter_list("spanish").map(|res| res.unwrap()).collect::>(); -//! assert_eq!(items, vec![("bye".into(), "adios".into()), ("thanks".into(), "gracias".into())]); -//! -//! Ok(()) -//! } -//! ``` - -mod hashing; -mod lists; -mod queues; -mod router; -mod shard; -mod stats; +mod data_file; +mod index_file; +mod internal; +mod pacer; mod store; -mod typed; - -pub use hashing::HashSeed; -pub use lists::{ListCompactionParams, ListIterator}; -pub use stats::Stats; -pub use store::{CandyStore, GetOrCreateStatus, ReplaceStatus, SetStatus}; -pub use typed::{CandyTypedDeque, CandyTypedKey, CandyTypedList, CandyTypedStore}; - -use std::fmt::{Display, Formatter}; - -#[cfg(feature = "whitebox_testing")] -pub use hashing::HASH_BITS_TO_KEEP; - -#[derive(Debug, PartialEq, Eq, Clone)] -pub enum CandyError { - KeyTooLong(usize), - ValueTooLong(usize), - EntryCannotFitInShard(usize, usize), -} - -impl Display for CandyError { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - match self { - Self::KeyTooLong(sz) => write!(f, "key too long {sz}"), - Self::ValueTooLong(sz) => write!(f, "value too long {sz}"), - Self::EntryCannotFitInShard(sz, max) => { - write!(f, "entry too big ({sz}) for a single shard file ({max})") - } - } - } -} - -impl std::error::Error for CandyError {} - -pub type Result = anyhow::Result; - -/// The configuration options for CandyStore. Comes with sane defaults, feel free to use them -#[derive(Debug, Clone)] -pub struct Config { - /// we don't want huge shards, because splitting would be expensive - pub max_shard_size: u32, - /// should be ~10% of max_shard_size - pub min_compaction_threashold: u32, - /// just some entropy, not so important unless you fear DoS - pub hash_seed: HashSeed, - /// hint for creating number of shards accordingly) - pub expected_number_of_keys: usize, - /// number of keyed locks for concurrent list ops - pub max_concurrent_list_ops: u32, - /// whether or not to truncate up shard files to their max size (spare files) - pub truncate_up: bool, - /// whether or not to clear the DB if the version is unsupported - pub clear_on_unsupported_version: bool, - /// whether or not to mlock the shard headers to RAM (POSIX only) - pub mlock_headers: bool, - /// number of background compaction threads - pub num_compaction_threads: usize, - /// optionally delay modifying operations before for the given duration before flushing data to disk, - /// to ensure reboot consistency - #[cfg(feature = "flush_aggregation")] - pub flush_aggregation_delay: Option, -} - -impl Default for Config { - fn default() -> Self { - Self { - max_shard_size: 64 * 1024 * 1024, - min_compaction_threashold: 8 * 1024 * 1024, - hash_seed: *b"kOYLu0xvq2WtzcKJ", - expected_number_of_keys: 0, - max_concurrent_list_ops: 64, - truncate_up: true, - clear_on_unsupported_version: false, - mlock_headers: false, - num_compaction_threads: 4, - #[cfg(feature = "flush_aggregation")] - flush_aggregation_delay: None, - } - } -} - -pub(crate) const MAX_TOTAL_KEY_SIZE: usize = 0x3fff; // 14 bits -pub(crate) const MAX_TOTAL_VALUE_SIZE: usize = 0xffff; // 16 bits -pub(crate) const NAMESPACING_RESERVED_SIZE: usize = 0xff; -pub(crate) const VALUE_RESERVED_SIZE: usize = 0xff; -pub const MAX_KEY_SIZE: usize = MAX_TOTAL_KEY_SIZE - NAMESPACING_RESERVED_SIZE; -pub const MAX_VALUE_SIZE: usize = MAX_TOTAL_VALUE_SIZE - VALUE_RESERVED_SIZE; - -const _: () = assert!(MAX_KEY_SIZE <= u16::MAX as usize); -const _: () = assert!(MAX_VALUE_SIZE <= u16::MAX as usize); +mod types; + +/// The main untyped store API. +pub use crate::store::{ + CandyStore, CandyTypedDeque, CandyTypedKey, CandyTypedList, CandyTypedStore, KVPair, + ListIterator, +}; +/// Public configuration, error, and stats types. +pub use crate::types::*; + +/// Backward-compatible alias for the crate error type. +pub type CandyError = Error; +/// Maximum supported user key length in bytes. +pub const MAX_KEY_LEN: usize = MAX_USER_KEY_SIZE; +/// Maximum supported inline value length in bytes. +pub const MAX_VALUE_LEN: usize = MAX_USER_VALUE_SIZE; diff --git a/src/lists.rs b/src/lists.rs deleted file mode 100644 index 2e8c83e..0000000 --- a/src/lists.rs +++ /dev/null @@ -1,863 +0,0 @@ -use std::ops::Range; - -use crate::{ - hashing::PartedHash, - shard::{InsertMode, KVPair}, - store::{CHAIN_NAMESPACE, ITEM_NAMESPACE, LIST_NAMESPACE}, - CandyStore, GetOrCreateStatus, ReplaceStatus, Result, SetStatus, -}; - -use bytemuck::{bytes_of, from_bytes, Pod, Zeroable}; -use parking_lot::MutexGuard; - -#[derive(Clone, Copy, Pod, Zeroable)] -#[repr(C)] -struct List { - head_idx: u64, // inclusive - tail_idx: u64, // exclusive - num_items: u64, -} - -impl std::fmt::Debug for List { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "List(0x{:016x}..0x{:016x} items={})", - self.head_idx, self.tail_idx, self.num_items - ) - } -} - -impl List { - fn span_len(&self) -> u64 { - self.tail_idx - self.head_idx - } - fn holes(&self) -> u64 { - self.span_len() - self.num_items - } - fn is_empty(&self) -> bool { - self.head_idx == self.tail_idx - } -} - -#[derive(Debug, Clone, Copy, Pod, Zeroable)] -#[repr(C, packed)] -struct ChainKey { - list_ph: PartedHash, - idx: u64, - namespace: u8, -} - -#[derive(Debug)] -pub struct ListCompactionParams { - pub min_length: u64, - pub min_holes_ratio: f64, -} - -impl Default for ListCompactionParams { - fn default() -> Self { - Self { - min_length: 100, - min_holes_ratio: 0.25, - } - } -} - -pub struct ListIterator<'a> { - store: &'a CandyStore, - list_key: Vec, - list_ph: PartedHash, - range: Option>, - fwd: bool, -} - -impl<'a> Iterator for ListIterator<'a> { - type Item = Result; - - fn next(&mut self) -> Option { - if self.range.is_none() { - let _guard = self.store.lock_list(self.list_ph); - let list_bytes = match self.store.get_raw(&self.list_key) { - Ok(Some(list_bytes)) => list_bytes, - Ok(None) => return None, - Err(e) => return Some(Err(e)), - }; - let list = *from_bytes::(&list_bytes); - self.range = Some(list.head_idx..list.tail_idx); - } - - loop { - let idx = if self.fwd { - self.range.as_mut().unwrap().next() - } else { - self.range.as_mut().unwrap().next_back() - }; - let Some(idx) = idx else { - return None; - }; - - match self.store.get_from_list_at_index(self.list_ph, idx, true) { - Err(e) => return Some(Err(e)), - Ok(Some((_, k, v))) => return Some(Ok((k, v))), - Ok(None) => { - // try next index - } - } - } - } - - fn size_hint(&self) -> (usize, Option) { - if let Some(ref range) = self.range { - range.size_hint() - } else { - (0, None) - } - } -} - -#[derive(Debug)] -enum InsertToListStatus { - Created(Vec), - DoesNotExist, - WrongValue(Vec), - ExistingValue(Vec), - Replaced(Vec), -} - -impl CandyStore { - const FIRST_LIST_IDX: u64 = 0x8000_0000_0000_0000; - - fn make_list_key(&self, mut list_key: Vec) -> (PartedHash, Vec) { - list_key.extend_from_slice(LIST_NAMESPACE); - (PartedHash::new(&self.config.hash_seed, &list_key), list_key) - } - - fn make_item_key(&self, list_ph: PartedHash, mut item_key: Vec) -> (PartedHash, Vec) { - item_key.extend_from_slice(bytes_of(&list_ph)); - item_key.extend_from_slice(ITEM_NAMESPACE); - (PartedHash::new(&self.config.hash_seed, &item_key), item_key) - } - - pub(crate) fn lock_list(&self, list_ph: PartedHash) -> MutexGuard<'_, ()> { - self.keyed_locks[(list_ph.signature() & self.keyed_locks_mask) as usize].lock() - } - - fn _insert_to_list( - &self, - list_key: Vec, - item_key: Vec, - mut val: Vec, - mode: InsertMode, - ) -> Result { - let (list_ph, list_key) = self.make_list_key(list_key); - let (item_ph, item_key) = self.make_item_key(list_ph, item_key); - - let _guard = self.lock_list(list_ph); - - // if the item already exists, it's already part of the list. just update it and preserve the index - if let Some(mut existing_val) = self.get_raw(&item_key)? { - match mode { - InsertMode::GetOrCreate => { - existing_val.truncate(existing_val.len() - size_of::()); - return Ok(InsertToListStatus::ExistingValue(existing_val)); - } - InsertMode::Replace(expected_val) => { - if let Some(expected_val) = expected_val { - if expected_val != &existing_val[existing_val.len() - size_of::()..] { - existing_val.truncate(existing_val.len() - size_of::()); - return Ok(InsertToListStatus::WrongValue(existing_val)); - } - } - // fall through - } - InsertMode::Set => { - // fall through - } - } - - val.extend_from_slice(&existing_val[existing_val.len() - size_of::()..]); - self.replace_raw(&item_key, &val, None)?; - existing_val.truncate(existing_val.len() - size_of::()); - return Ok(InsertToListStatus::Replaced(existing_val)); - } - - if matches!(mode, InsertMode::Replace(_)) { - // not allowed to create - return Ok(InsertToListStatus::DoesNotExist); - } - - // get of create the list - let res = self.get_or_create_raw( - &list_key, - bytes_of(&List { - head_idx: Self::FIRST_LIST_IDX, - tail_idx: Self::FIRST_LIST_IDX + 1, - num_items: 1, - }) - .to_owned(), - )?; - - match res { - crate::GetOrCreateStatus::CreatedNew(_) => { - // list was just created. create chain - self.set_raw( - bytes_of(&ChainKey { - list_ph, - idx: Self::FIRST_LIST_IDX, - namespace: CHAIN_NAMESPACE, - }), - bytes_of(&item_ph), - )?; - - // create item - val.extend_from_slice(bytes_of(&Self::FIRST_LIST_IDX)); - self.set_raw(&item_key, &val)?; - } - crate::GetOrCreateStatus::ExistingValue(list_bytes) => { - let mut list = *from_bytes::(&list_bytes); - - let idx = list.tail_idx; - list.tail_idx += 1; - - // update list - list.num_items += 1; - self.set_raw(&list_key, bytes_of(&list))?; - - // create chain - self.set_raw( - bytes_of(&ChainKey { - list_ph, - idx, - namespace: CHAIN_NAMESPACE, - }), - bytes_of(&item_ph), - )?; - - // create item - val.extend_from_slice(bytes_of(&idx)); - self.set_raw(&item_key, &val)?; - } - } - - val.truncate(val.len() - size_of::()); - Ok(InsertToListStatus::Created(val)) - } - - /// Inserts or updates an element `item_key` that belongs to list `list_key`. Returns [SetStatus::CreatedNew] if - /// the item did not exist, or [SetStatus::PrevValue] with the previous value of the item. - /// - /// See also [Self::set]. - pub fn set_in_list< - B1: AsRef<[u8]> + ?Sized, - B2: AsRef<[u8]> + ?Sized, - B3: AsRef<[u8]> + ?Sized, - >( - &self, - list_key: &B1, - item_key: &B2, - val: &B3, - ) -> Result { - self.owned_set_in_list( - list_key.as_ref().to_owned(), - item_key.as_ref().to_owned(), - val.as_ref().to_owned(), - false, - ) - } - - /// Like [Self::set_in_list] but "promotes" the element to the tail of the list: it's basically a - /// remove + insert operation. This can be usede to implement LRUs, where older elements are at the - /// beginning and newer ones at the end. - /// - /// Note: **not crash-safe** - pub fn set_in_list_promoting< - B1: AsRef<[u8]> + ?Sized, - B2: AsRef<[u8]> + ?Sized, - B3: AsRef<[u8]> + ?Sized, - >( - &self, - list_key: &B1, - item_key: &B2, - val: &B3, - ) -> Result { - self.owned_set_in_list( - list_key.as_ref().to_owned(), - item_key.as_ref().to_owned(), - val.as_ref().to_owned(), - true, - ) - } - - /// Owned version of [Self::set_in_list], which also takes promote as a parameter - pub fn owned_set_in_list( - &self, - list_key: Vec, - item_key: Vec, - val: Vec, - promote: bool, - ) -> Result { - if promote { - self.owned_remove_from_list(list_key.clone(), item_key.clone())?; - } - match self._insert_to_list(list_key, item_key, val, InsertMode::Set)? { - InsertToListStatus::Created(_v) => Ok(SetStatus::CreatedNew), - InsertToListStatus::Replaced(v) => Ok(SetStatus::PrevValue(v)), - _ => unreachable!(), - } - } - - /// Like [Self::set_in_list], but will only replace (update) an existing item, i.e., it will never create the - /// key - pub fn replace_in_list< - B1: AsRef<[u8]> + ?Sized, - B2: AsRef<[u8]> + ?Sized, - B3: AsRef<[u8]> + ?Sized, - >( - &self, - list_key: &B1, - item_key: &B2, - val: &B3, - expected_val: Option<&B3>, - ) -> Result { - self.owned_replace_in_list( - list_key.as_ref().to_owned(), - item_key.as_ref().to_owned(), - val.as_ref().to_owned(), - expected_val.map(|ev| ev.as_ref()), - ) - } - - /// Owned version of [Self::replace_in_list] - pub fn owned_replace_in_list( - &self, - list_key: Vec, - item_key: Vec, - val: Vec, - expected_val: Option<&[u8]>, - ) -> Result { - match self._insert_to_list(list_key, item_key, val, InsertMode::Replace(expected_val))? { - InsertToListStatus::DoesNotExist => Ok(ReplaceStatus::DoesNotExist), - InsertToListStatus::Replaced(v) => Ok(ReplaceStatus::PrevValue(v)), - InsertToListStatus::WrongValue(v) => Ok(ReplaceStatus::WrongValue(v)), - _ => unreachable!(), - } - } - - /// Like [Self::set_in_list] but will not replace (update) the element if it already exists - it will only - /// create the element with the default value if it did not exist. - pub fn get_or_create_in_list< - B1: AsRef<[u8]> + ?Sized, - B2: AsRef<[u8]> + ?Sized, - B3: AsRef<[u8]> + ?Sized, - >( - &self, - list_key: &B1, - item_key: &B2, - default_val: &B3, - ) -> Result { - self.owned_get_or_create_in_list( - list_key.as_ref().to_owned(), - item_key.as_ref().to_owned(), - default_val.as_ref().to_owned(), - ) - } - - /// Owned version of [Self::get_or_create_in_list] - pub fn owned_get_or_create_in_list( - &self, - list_key: Vec, - item_key: Vec, - default_val: Vec, - ) -> Result { - match self._insert_to_list(list_key, item_key, default_val, InsertMode::GetOrCreate)? { - InsertToListStatus::ExistingValue(v) => Ok(GetOrCreateStatus::ExistingValue(v)), - InsertToListStatus::Created(v) => Ok(GetOrCreateStatus::CreatedNew(v)), - _ => unreachable!(), - } - } - - /// Gets a list element identified by `list_key` and `item_key`. This is an O(1) operation. - /// - /// See also: [Self::get] - pub fn get_from_list + ?Sized, B2: AsRef<[u8]> + ?Sized>( - &self, - list_key: &B1, - item_key: &B2, - ) -> Result>> { - self.owned_get_from_list(list_key.as_ref().to_owned(), item_key.as_ref().to_owned()) - } - - /// Owned version of [Self::get_from_list] - pub fn owned_get_from_list( - &self, - list_key: Vec, - item_key: Vec, - ) -> Result>> { - let (list_ph, _) = self.make_list_key(list_key); - let (_, item_key) = self.make_item_key(list_ph, item_key); - let Some(mut val) = self.get_raw(&item_key)? else { - return Ok(None); - }; - val.truncate(val.len() - size_of::()); - Ok(Some(val)) - } - - /// Removes a element from the list, identified by `list_key` and `item_key. The element can be - /// at any position in the list, not just the head or the tail, but in this case, it will create a "hole". - /// This means that iterations will go over the missing element's index every time, until the list is compacted. - /// - /// See also [Self::remove], [Self::compact_list_if_needed] - pub fn remove_from_list + ?Sized, B2: AsRef<[u8]> + ?Sized>( - &self, - list_key: &B1, - item_key: &B2, - ) -> Result>> { - self.owned_remove_from_list(list_key.as_ref().to_owned(), item_key.as_ref().to_owned()) - } - - /// Owned version of [Self::remove_from_list] - pub fn owned_remove_from_list( - &self, - list_key: Vec, - item_key: Vec, - ) -> Result>> { - let (list_ph, list_key) = self.make_list_key(list_key); - let (_, item_key) = self.make_item_key(list_ph, item_key); - - let _guard = self.lock_list(list_ph); - - let Some(mut existing_val) = self.get_raw(&item_key)? else { - return Ok(None); - }; - - let item_idx = u64::from_le_bytes( - (&existing_val[existing_val.len() - size_of::()..]) - .try_into() - .unwrap(), - ); - existing_val.truncate(existing_val.len() - size_of::()); - - // update list, if the item was the head/tail - if let Some(list_bytes) = self.get_raw(&list_key)? { - let mut list = *from_bytes::(&list_bytes); - - list.num_items -= 1; - - if list.head_idx == item_idx || list.tail_idx == item_idx + 1 { - if list.head_idx == item_idx { - list.head_idx += 1; - } else if list.tail_idx == item_idx + 1 { - list.tail_idx -= 1; - } - } - if list.is_empty() { - self.remove_raw(&list_key)?; - } else { - self.set_raw(&list_key, bytes_of(&list))?; - } - } - - // remove chain - self.remove_raw(bytes_of(&ChainKey { - list_ph, - idx: item_idx, - namespace: CHAIN_NAMESPACE, - }))?; - - // remove item - self.remove_raw(&item_key)?; - - Ok(Some(existing_val)) - } - - const LIST_KEY_SUFFIX_LEN: usize = size_of::() + ITEM_NAMESPACE.len(); - - fn get_from_list_at_index( - &self, - list_ph: PartedHash, - idx: u64, - truncate: bool, - ) -> Result, Vec)>> { - let Some(item_ph_bytes) = self.get_raw(bytes_of(&ChainKey { - idx, - list_ph, - namespace: CHAIN_NAMESPACE, - }))? - else { - return Ok(None); - }; - let item_ph = *from_bytes::(&item_ph_bytes); - - let mut suffix = [0u8; Self::LIST_KEY_SUFFIX_LEN]; - suffix[0..size_of::()].copy_from_slice(bytes_of(&list_ph)); - suffix[size_of::()..].copy_from_slice(ITEM_NAMESPACE); - - for (mut k, mut v) in self.get_by_hash(item_ph)? { - if k.ends_with(&suffix) && v.ends_with(bytes_of(&idx)) { - if truncate { - v.truncate(v.len() - size_of::()); - k.truncate(k.len() - suffix.len()); - } - return Ok(Some((item_ph, k, v))); - } - } - - Ok(None) - } - - /// Compacts (rewrites) the list such that there will be no holes. Holes are created when removing an - /// element from the middle of the list (not the head or tail), which makes iteration less efficient. - /// You should call this function every so often if you're removing elements from lists at random locations. - /// The function takes parameters that control when to compact: the list has to be of a minimal length and - /// have a minimal holes-to-length ratio. The default values are expected to be okay for most use cases. - /// Returns true if the list was compacted, false otherwise. - /// - /// Note: **Not crash-safe** - pub fn compact_list_if_needed + ?Sized>( - &self, - list_key: &B, - params: ListCompactionParams, - ) -> Result { - let (list_ph, list_key) = self.make_list_key(list_key.as_ref().to_owned()); - let _guard = self.lock_list(list_ph); - - let Some(list_bytes) = self.get_raw(&list_key)? else { - return Ok(false); - }; - let list = *from_bytes::(&list_bytes); - if list.span_len() < params.min_length { - return Ok(false); - } - if (list.holes() as f64) < (list.span_len() as f64) * params.min_holes_ratio { - return Ok(false); - } - - let mut new_idx = list.tail_idx; - for idx in list.head_idx..list.tail_idx { - let Some((item_ph, full_k, mut full_v)) = - self.get_from_list_at_index(list_ph, idx, false)? - else { - continue; - }; - - // create new chain - self.set_raw( - bytes_of(&ChainKey { - idx: new_idx, - list_ph, - namespace: CHAIN_NAMESPACE, - }), - bytes_of(&item_ph), - )?; - - // update item's index suffix - let offset = full_v.len() - size_of::(); - full_v[offset..].copy_from_slice(bytes_of(&new_idx)); - self.set_raw(&full_k, &full_v)?; - - // remove old chain - self.remove_raw(bytes_of(&ChainKey { - idx, - list_ph, - namespace: CHAIN_NAMESPACE, - }))?; - - new_idx += 1; - } - - if list.tail_idx == new_idx { - // list is now empty - self.remove_raw(&list_key)?; - } else { - // update list head and tail, set holes=0 - self.set_raw( - &list_key, - bytes_of(&List { - head_idx: list.tail_idx, - tail_idx: new_idx, - num_items: new_idx - list.tail_idx, - }), - )?; - } - - Ok(true) - } - - /// Iterates over the elements of the list (identified by `list_key`) from the beginning (head) - /// to the end (tail). Note that if items are removed at random locations in the list, the iterator - /// will need to skip these holes. If you remove elements from the middle (not head/tail) of the list - /// frequently, and wish to use iteration, consider compacting the list every so often using - /// [Self::compact_list_if_needed] - pub fn iter_list + ?Sized>(&self, list_key: &B) -> ListIterator<'_> { - self.owned_iter_list(list_key.as_ref().to_owned()) - } - - /// Owned version of [Self::iter_list] - pub fn owned_iter_list(&self, list_key: Vec) -> ListIterator<'_> { - let (list_ph, list_key) = self.make_list_key(list_key); - ListIterator { - store: &self, - list_key, - list_ph, - range: None, - fwd: true, - } - } - - /// Same as [Self::iter_list] but iterates from the end (tail) to the beginning (head) - pub fn iter_list_backwards + ?Sized>(&self, list_key: &B) -> ListIterator<'_> { - self.owned_iter_list_backwards(list_key.as_ref().to_owned()) - } - - /// Owned version of [Self::iter_list_backwards] - pub fn owned_iter_list_backwards(&self, list_key: Vec) -> ListIterator<'_> { - let (list_ph, list_key) = self.make_list_key(list_key); - ListIterator { - store: &self, - list_key, - list_ph, - range: None, - fwd: false, - } - } - - /// Discards the given list, removing all elements it contains and dropping the list itself. - /// This is more efficient than iteration + removal of each element. - pub fn discard_list + ?Sized>(&self, list_key: &B) -> Result { - self.owned_discard_list(list_key.as_ref().to_owned()) - } - - /// Owned version of [Self::discard_list] - pub fn owned_discard_list(&self, list_key: Vec) -> Result { - let (list_ph, list_key) = self.make_list_key(list_key); - let _guard = self.lock_list(list_ph); - - let Some(list_bytes) = self.get_raw(&list_key)? else { - return Ok(false); - }; - let list = *from_bytes::(&list_bytes); - for idx in list.head_idx..list.tail_idx { - let Some((_, full_key, _)) = self.get_from_list_at_index(list_ph, idx, false)? else { - continue; - }; - self.remove_raw(bytes_of(&ChainKey { - list_ph, - idx, - namespace: CHAIN_NAMESPACE, - }))?; - self.remove_raw(&full_key)?; - } - self.remove_raw(&list_key)?; - - Ok(true) - } - - /// Returns the first (head) element of the list - pub fn peek_list_head + ?Sized>(&self, list_key: &B) -> Result> { - self.owned_peek_list_head(list_key.as_ref().to_owned()) - } - - /// Owned version of [Self::peek_list_head] - pub fn owned_peek_list_head(&self, list_key: Vec) -> Result> { - let Some(kv) = self.owned_iter_list(list_key).next() else { - return Ok(None); - }; - Ok(Some(kv?)) - } - - /// Returns the last (tail) element of the list - pub fn peek_list_tail + ?Sized>(&self, list_key: &B) -> Result> { - self.owned_peek_list_tail(list_key.as_ref().to_owned()) - } - - /// Owned version of [Self::peek_list_tail] - pub fn owned_peek_list_tail(&self, list_key: Vec) -> Result> { - for kv in self.owned_iter_list_backwards(list_key) { - return Ok(Some(kv?)); - } - Ok(None) - } - - /// Removes and returns the first (head) element of the list - pub fn pop_list_head + ?Sized>(&self, list_key: &B) -> Result> { - self.owned_pop_list_head(list_key.as_ref().to_owned()) - } - - fn _operate_on_list( - &self, - list_key: Vec, - default: T, - func: impl FnOnce(PartedHash, Vec, List) -> Result, - ) -> Result { - let (list_ph, list_key) = self.make_list_key(list_key); - let _guard = self.lock_list(list_ph); - let Some(list_bytes) = self.get_raw(&list_key)? else { - return Ok(default); - }; - let list = *from_bytes::(&list_bytes); - func(list_ph, list_key, list) - } - - fn _owned_pop_list(&self, list_key: Vec, fwd: bool) -> Result> { - self._operate_on_list(list_key, None, |list_ph, list_key, mut list| { - let range = list.head_idx..list.tail_idx; - - let mut pop = |idx| -> Result> { - let Some((_, mut untrunc_k, mut untrunc_v)) = - self.get_from_list_at_index(list_ph, idx, false)? - else { - return Ok(None); - }; - - if fwd { - list.head_idx = idx + 1; - } else { - list.tail_idx = idx - 1; - } - list.num_items -= 1; - if list.is_empty() { - self.remove_raw(&list_key)?; - } else { - self.set_raw(&list_key, bytes_of(&list))?; - } - - // remove chain - self.remove_raw(bytes_of(&ChainKey { - list_ph, - idx, - namespace: CHAIN_NAMESPACE, - }))?; - - // remove item - self.remove_raw(&untrunc_k)?; - - untrunc_v.truncate(untrunc_v.len() - size_of::()); - untrunc_k.truncate(untrunc_k.len() - Self::LIST_KEY_SUFFIX_LEN); - Ok(Some((untrunc_k, untrunc_v))) - }; - - if fwd { - for idx in range { - if let Some(kv) = pop(idx)? { - return Ok(Some(kv)); - } - } - } else { - for idx in range.rev() { - if let Some(kv) = pop(idx)? { - return Ok(Some(kv)); - } - } - } - - Ok(None) - }) - } - - /// Owned version of [Self::peek_list_tail] - pub fn owned_pop_list_head(&self, list_key: Vec) -> Result> { - self._owned_pop_list(list_key, true /* fwd */) - } - - /// Removes and returns the last (tail) element of the list - pub fn pop_list_tail + ?Sized>(&self, list_key: &B) -> Result> { - self.owned_pop_list_tail(list_key.as_ref().to_owned()) - } - - /// Owned version of [Self::peek_list_tail] - pub fn owned_pop_list_tail(&self, list_key: Vec) -> Result> { - self._owned_pop_list(list_key, false /* fwd */) - } - - /// Returns the estimated list length - pub fn list_len + ?Sized>(&self, list_key: &B) -> Result { - self.owned_list_len(list_key.as_ref().to_owned()) - } - pub fn owned_list_len(&self, list_key: Vec) -> Result { - let (_, list_key) = self.make_list_key(list_key); - - let Some(list_bytes) = self.get_raw(&list_key)? else { - return Ok(0); - }; - - Ok(from_bytes::(&list_bytes).num_items as usize) - } - - /// iterate over the given list and retain all elements for which the predicate returns `true`. In other - /// words, drop all other elements. This operation is not crash safe, and holds the list locked during the - /// whole iteration, so no other gets/sets/deletes can be done in by other threads on this list while - /// iterating over it. Beware of deadlocks. - /// - /// This operation will also compact the list, basically popping all elements and re-pushing the retained - /// ones at the end, so no holes will exist by the end. - pub fn retain_in_list + ?Sized>( - &self, - list_key: &B, - func: impl FnMut(&[u8], &[u8]) -> Result, - ) -> Result<()> { - self.owned_retain_in_list(list_key.as_ref().to_owned(), func) - } - - /// owned version of [Self::retain_in_list] - pub fn owned_retain_in_list( - &self, - list_key: Vec, - mut func: impl FnMut(&[u8], &[u8]) -> Result, - ) -> Result<()> { - self._operate_on_list(list_key, (), |list_ph, list_key, mut list| { - let range = list.head_idx..list.tail_idx; - - for idx in range { - list.head_idx = idx + 1; - let Some((item_ph, untrunc_k, mut untrunc_v)) = - self.get_from_list_at_index(list_ph, idx, false)? - else { - continue; - }; - - untrunc_v.truncate(untrunc_v.len() - size_of::()); - let mut v = untrunc_v; - let k = &untrunc_k[..untrunc_k.len() - Self::LIST_KEY_SUFFIX_LEN]; - - // remove chain - self.remove_raw(bytes_of(&ChainKey { - list_ph, - idx, - namespace: CHAIN_NAMESPACE, - }))?; - - if func(k, &v)? { - let tail_idx = list.tail_idx; - list.tail_idx += 1; - - // create chain - self.set_raw( - bytes_of(&ChainKey { - list_ph, - idx: tail_idx, - namespace: CHAIN_NAMESPACE, - }), - bytes_of(&item_ph), - )?; - - // create new item - v.extend_from_slice(bytes_of(&tail_idx)); - self.set_raw(&untrunc_k, &v)?; - } else { - // drop from list - list.num_items -= 1; - - // remove item - self.remove_raw(&untrunc_k)?; - } - } - // defer updating the list to the very end to save on IOs - if list.is_empty() { - self.remove_raw(&list_key)?; - } else { - self.set_raw(&list_key, bytes_of(&list))?; - } - Ok(()) - }) - } -} diff --git a/src/pacer.rs b/src/pacer.rs new file mode 100644 index 0000000..2fb23b6 --- /dev/null +++ b/src/pacer.rs @@ -0,0 +1,253 @@ +use std::time::{Duration, Instant}; + +/// A token-bucket pacer. +/// +/// The pacer refills `tokens_per_unit` tokens every `time_unit`, up to `max_tokens`. +/// Calls to `consume` spend immediately available tokens and block until enough +/// tokens have accrued to satisfy the request. +pub struct Pacer { + time_unit: Duration, + tokens_per_unit: u64, + max_tokens: u64, // burst capacity + last_refill: Instant, + available_tokens: u64, +} + +impl Pacer { + /// Creates a new pacer. + /// + /// `tokens_per_unit` must be non-zero and `time_unit` must be non-zero. + /// `max_tokens` is promoted to at least `tokens_per_unit`, ensuring the + /// bucket can hold one full refill interval. + pub fn new(tokens_per_unit: u64, time_unit: Duration, max_tokens: u64) -> Self { + assert!(tokens_per_unit > 0 && !time_unit.is_zero()); + let max_tokens = max_tokens.max(tokens_per_unit); + + Pacer { + time_unit, + tokens_per_unit, + max_tokens, + last_refill: Instant::now(), + available_tokens: max_tokens, + } + } + + fn added_tokens( + elapsed_ns: u128, + time_unit_ns: u128, + tokens_per_unit: u64, + capacity: u64, + ) -> u64 { + let produced_tokens = elapsed_ns.saturating_mul(tokens_per_unit as u128) / time_unit_ns; + produced_tokens.min(capacity as u128) as u64 + } + + fn duration_from_nanos_saturating(total_nanos: u128) -> Duration { + let secs = total_nanos / 1_000_000_000; + if secs > u64::MAX as u128 { + return Duration::MAX; + } + + Duration::new(secs as u64, (total_nanos % 1_000_000_000) as u32) + } + + fn refill(&mut self, now: Instant) { + if self.available_tokens == self.max_tokens { + self.last_refill = now; + return; + } + + let elapsed_ns = now.saturating_duration_since(self.last_refill).as_nanos(); + let time_unit_ns = self.time_unit.as_nanos(); + let capacity = self.max_tokens - self.available_tokens; + let added_tokens = + Self::added_tokens(elapsed_ns, time_unit_ns, self.tokens_per_unit, capacity); + if added_tokens == 0 { + return; + } + + self.available_tokens += added_tokens; + + if self.available_tokens == self.max_tokens { + self.last_refill = now; + } else { + // Advance last_refill by exact time accounted for by added_tokens + let time_advanced_ns = + (added_tokens as u128 * time_unit_ns) / self.tokens_per_unit as u128; + self.last_refill += Self::duration_from_nanos_saturating(time_advanced_ns); + } + } + + fn time_until_tokens(&self, now: Instant, tokens_needed: u64) -> Duration { + let elapsed_ns = now.saturating_duration_since(self.last_refill).as_nanos(); + let time_unit_ns = self.time_unit.as_nanos(); + let target_ns = (tokens_needed as u128) + .saturating_mul(time_unit_ns) + .div_ceil(self.tokens_per_unit as u128); + let remaining_ns = target_ns.saturating_sub(elapsed_ns); + + Self::duration_from_nanos_saturating(remaining_ns) + } + + /// Consumes `tokens`, sleeping through the provided callback while waiting for refills. + pub fn consume_with_sleep_fn(&mut self, mut tokens: u64, mut sleep: impl FnMut(Duration)) { + while tokens > 0 { + let now = Instant::now(); + self.refill(now); + + if self.available_tokens > 0 { + let consumed = self.available_tokens.min(tokens); + self.available_tokens -= consumed; + tokens -= consumed; + if tokens == 0 { + break; + } + } + + let tokens_to_wait = tokens.min(self.max_tokens); + sleep(self.time_until_tokens(now, tokens_to_wait)); + } + } + + /// Consumes `tokens`, blocking the current thread until enough tokens are available. + pub fn consume(&mut self, tokens: u64) { + self.consume_with_sleep_fn(tokens, std::thread::sleep); + } +} + +#[cfg(test)] +mod tests { + use super::Pacer; + use std::time::{Duration, Instant}; + + #[test] + fn test_consume_zero() { + let mut pacer = Pacer::new(10, Duration::from_millis(10), 40); + pacer.consume_with_sleep_fn(0, |_| unreachable!()); + } + + #[test] + fn test_consume_exact_burst() { + let mut pacer = Pacer::new(10, Duration::from_millis(10), 40); + pacer.consume_with_sleep_fn(40, |_| unreachable!()); + } + + #[test] + fn test_consume_burst_plus_one_sleeps() { + let mut pacer = Pacer::new(10, Duration::from_millis(10), 40); + let mut slept = false; + pacer.consume_with_sleep_fn(41, |d| { + std::thread::sleep(d); + slept = true; + }); + assert!(slept); + } + + #[test] + fn test_tokens_refill_after_idle() { + let mut pacer = Pacer::new(10, Duration::from_millis(10), 40); + pacer.consume_with_sleep_fn(40, |_| unreachable!()); + std::thread::sleep(Duration::from_millis(30)); + pacer.consume_with_sleep_fn(20, |_| unreachable!()); + } + + #[test] + fn test_rate_accuracy() { + let mut pacer = Pacer::new(10, Duration::from_millis(10), 10); + pacer.consume(10); + let t0 = Instant::now(); + pacer.consume(50); + let d = t0.elapsed(); + assert!(d >= Duration::from_millis(40), "Too fast: {d:?}"); + assert!(d < Duration::from_millis(150), "Too slow: {d:?}"); + } + + #[test] + fn test_many_small_consumes() { + let mut pacer = Pacer::new(10, Duration::from_millis(10), 10); + pacer.consume(10); + let t0 = Instant::now(); + for _ in 0..30 { + pacer.consume(1); + } + let d = t0.elapsed(); + assert!(d >= Duration::from_millis(20), "Too fast: {d:?}"); + assert!(d < Duration::from_millis(150), "Too slow: {d:?}"); + } + + #[test] + fn test_partial_bucket_refills_before_small_consume() { + let mut pacer = Pacer::new(10, Duration::from_millis(10), 40); + + pacer.consume_with_sleep_fn(5, |_| unreachable!()); + std::thread::sleep(Duration::from_millis(10)); + pacer.consume_with_sleep_fn(1, |_| unreachable!()); + + assert_eq!(pacer.available_tokens, 39); + } + + #[test] + fn test_waits_for_fractional_token_interval() { + let mut pacer = Pacer::new(10, Duration::from_millis(10), 10); + let mut requested_sleep = None; + + pacer.consume_with_sleep_fn(10, |_| unreachable!()); + pacer.consume_with_sleep_fn(1, |duration| { + requested_sleep = Some(duration); + std::thread::sleep(duration); + }); + + let requested_sleep = requested_sleep.expect("consume should need to sleep"); + assert!( + requested_sleep > Duration::ZERO, + "sleep duration should be positive" + ); + assert!( + requested_sleep < Duration::from_millis(5), + "expected to wait for a fractional token interval, got {requested_sleep:?}" + ); + } + + #[test] + fn test_burst_capacity_promotion() { + let mut pacer = Pacer::new(100, Duration::from_secs(1), 10); + let mut slept = false; + pacer.consume_with_sleep_fn(100, |_| slept = true); + assert!( + !slept, + "Should not sleep if burst capacity was correctly promoted to 100" + ); + } + + #[test] + fn test_large_consumes_are_batched() { + let mut pacer = Pacer::new(10, Duration::from_millis(10), 20); + let mut sleep_count = 0; + + pacer.consume_with_sleep_fn(20, |_| unreachable!()); + pacer.consume_with_sleep_fn(50, |duration| { + sleep_count += 1; + std::thread::sleep(duration); + }); + + assert!( + sleep_count <= 4, + "Should sleep in large batches (<= 4 sleeps), but slept {} times", + sleep_count + ); + } + + #[test] + fn test_added_tokens_caps_before_u64_cast() { + let added_tokens = Pacer::added_tokens(u128::MAX, 1, u64::MAX, 7); + assert_eq!(added_tokens, 7); + } + + #[test] + fn test_duration_from_nanos_saturates() { + assert_eq!( + Pacer::duration_from_nanos_saturating(u128::MAX), + Duration::MAX + ); + } +} diff --git a/src/queues.rs b/src/queues.rs deleted file mode 100644 index 7575a72..0000000 --- a/src/queues.rs +++ /dev/null @@ -1,455 +0,0 @@ -use std::ops::Range; - -use crate::{ - hashing::PartedHash, - store::{QUEUE_ITEM_NAMESPACE, QUEUE_NAMESPACE}, - CandyStore, -}; -use anyhow::Result; -use bytemuck::{bytes_of, checked::from_bytes_mut, from_bytes, Pod, Zeroable}; - -#[derive(Clone, Copy, Pod, Zeroable)] -#[repr(C)] -struct Queue { - head_idx: u64, // inclusive - tail_idx: u64, // exclusive - num_items: u64, -} - -impl Queue { - #[allow(dead_code)] - fn span_len(&self) -> u64 { - self.tail_idx - self.head_idx - } - #[allow(dead_code)] - fn holes(&self) -> u64 { - self.span_len() - self.num_items - } - fn is_empty(&self) -> bool { - self.head_idx == self.tail_idx - } -} - -enum QueuePos { - Head, - Tail, -} - -pub struct QueueIterator<'a> { - store: &'a CandyStore, - queue_key: Vec, - range: Option>, - fwd: bool, -} - -impl<'a> Iterator for QueueIterator<'a> { - type Item = Result<(usize, Vec)>; - fn next(&mut self) -> Option { - if self.range.is_none() { - match self.store.fetch_queue(&self.queue_key) { - Ok(queue) => match queue { - Some(queue) => { - self.range = Some(queue.head_idx..queue.tail_idx); - } - None => return None, - }, - Err(e) => return Some(Err(e)), - } - } - - loop { - let idx = if self.fwd { - self.range.as_mut().unwrap().next() - } else { - self.range.as_mut().unwrap().next_back() - }; - let Some(idx) = idx else { - return None; - }; - - match self - .store - .get_raw(&self.store.make_queue_item_key(&self.queue_key, idx)) - { - Ok(v) => { - match v { - Some(v) => return Some(Ok((idx as usize, v))), - None => { - // continue, we might have holes - } - } - } - Err(e) => return Some(Err(e)), - } - } - } - - fn size_hint(&self) -> (usize, Option) { - if let Some(ref range) = self.range { - range.size_hint() - } else { - (0, None) - } - } -} - -impl CandyStore { - const FIRST_QUEUE_IDX: u64 = 0x8000_0000_0000_0000; - - fn make_queue_key(&self, queue_key: &[u8]) -> (PartedHash, Vec) { - let mut full_queue_key = queue_key.to_owned(); - full_queue_key.extend_from_slice(QUEUE_NAMESPACE); - ( - PartedHash::new(&self.config.hash_seed, &queue_key), - full_queue_key, - ) - } - fn make_queue_item_key(&self, queue_key: &[u8], idx: u64) -> Vec { - let mut item_key = queue_key.to_owned(); - item_key.extend_from_slice(bytes_of(&idx)); - item_key.extend_from_slice(QUEUE_ITEM_NAMESPACE); - item_key - } - - fn _push_to_queue(&self, queue_key: &[u8], val: &[u8], pos: QueuePos) -> Result { - let (queue_ph, full_queue_key) = self.make_queue_key(queue_key); - let _guard = self.lock_list(queue_ph); - - let status = self.get_or_create_raw( - &full_queue_key, - bytes_of(&Queue { - head_idx: Self::FIRST_QUEUE_IDX, - tail_idx: Self::FIRST_QUEUE_IDX + 1, - num_items: 1, - }) - .to_owned(), - )?; - - let item_idx = match status { - crate::GetOrCreateStatus::CreatedNew(_) => Self::FIRST_QUEUE_IDX, - crate::GetOrCreateStatus::ExistingValue(mut queue_bytes) => { - let queue = from_bytes_mut::(&mut queue_bytes); - let item_idx = match pos { - QueuePos::Head => { - queue.head_idx -= 1; - queue.head_idx - } - QueuePos::Tail => { - let item_idx = queue.tail_idx; - queue.tail_idx += 1; - item_idx - } - }; - queue.num_items += 1; - self.set_raw(&full_queue_key, &queue_bytes)?; - item_idx - } - }; - - self.set_raw(&self.make_queue_item_key(queue_key, item_idx), val)?; - Ok(item_idx as usize) - } - - /// Pushed a new element at the front (head) of the queue, returning the element's index in the queue - pub fn push_to_queue_head + ?Sized, B2: AsRef<[u8]> + ?Sized>( - &self, - queue_key: &B1, - val: &B2, - ) -> Result { - self._push_to_queue(queue_key.as_ref(), val.as_ref(), QueuePos::Head) - } - - /// Pushed a new element at the end (tail) of the queue, returning the element's index in the queue - pub fn push_to_queue_tail + ?Sized, B2: AsRef<[u8]> + ?Sized>( - &self, - queue_key: &B1, - val: &B2, - ) -> Result { - self._push_to_queue(queue_key.as_ref(), val.as_ref(), QueuePos::Tail) - } - - fn _pop_queue(&self, queue_key: &[u8], pos: QueuePos) -> Result)>> { - let (queue_ph, full_queue_key) = self.make_queue_key(queue_key); - let _guard = self.lock_list(queue_ph); - - let Some(mut queue_bytes) = self.get_raw(&full_queue_key)? else { - return Ok(None); - }; - let queue = from_bytes_mut::(&mut queue_bytes); - let mut res = None; - - match pos { - QueuePos::Head => { - while queue.head_idx < queue.tail_idx { - let idx = queue.head_idx; - queue.head_idx += 1; - if let Some(v) = self.remove_raw(&self.make_queue_item_key(queue_key, idx))? { - res = Some((idx as usize, v)); - queue.num_items -= 1; - break; - } - } - } - QueuePos::Tail => { - while queue.tail_idx > queue.head_idx { - queue.tail_idx -= 1; - let idx = queue.tail_idx; - if let Some(v) = self.remove_raw(&self.make_queue_item_key(queue_key, idx))? { - res = Some((idx as usize, v)); - queue.num_items -= 1; - break; - } - } - } - } - - if queue.is_empty() { - self.remove_raw(&full_queue_key)?; - } else { - self.set_raw(&full_queue_key, &queue_bytes)?; - } - - Ok(res) - } - - /// Removes and returns the head element and its index of the queue, or None if the queue is empty - pub fn pop_queue_head_with_idx + ?Sized>( - &self, - queue_key: &B, - ) -> Result)>> { - self._pop_queue(queue_key.as_ref(), QueuePos::Head) - } - - /// Removes and returns the head element of the queue, or None if the queue is empty - pub fn pop_queue_head + ?Sized>( - &self, - queue_key: &B, - ) -> Result>> { - Ok(self - .pop_queue_head_with_idx(queue_key.as_ref())? - .map(|iv| iv.1)) - } - - /// Removes and returns the tail element and its index of the queue, or None if the queue is empty - pub fn pop_queue_tail_with_idx + ?Sized>( - &self, - queue_key: &B, - ) -> Result)>> { - self._pop_queue(queue_key.as_ref(), QueuePos::Tail) - } - - /// Removes and returns the tail element of the queue, or None if the queue is empty - pub fn pop_queue_tail + ?Sized>( - &self, - queue_key: &B, - ) -> Result>> { - Ok(self.pop_queue_tail_with_idx(queue_key)?.map(|iv| iv.1)) - } - - /// Removes an element by index from the queue, returning the value it had or None if it did not exist (as well - /// as if the queue itself does not exist). - /// - /// This will leave a "hole" in the queue, which means we will skip over it in future iterations, but this could - /// lead to inefficienies as if you keep only the head and tail elements of a long queue, while removing elements - /// from the middle. - pub fn remove_from_queue + ?Sized>( - &self, - queue_key: &B, - idx: usize, - ) -> Result>> { - let idx = idx as u64; - let queue_key = queue_key.as_ref(); - let (queue_ph, full_queue_key) = self.make_queue_key(queue_key); - let _guard = self.lock_list(queue_ph); - - let Some(val) = self.remove_raw(&self.make_queue_item_key(queue_key, idx as u64))? else { - return Ok(None); - }; - - if let Some(mut queue_bytes) = self.get_raw(&full_queue_key)? { - let queue = from_bytes_mut::(&mut queue_bytes); - if queue.head_idx == idx { - queue.head_idx += 1; - } - if queue.tail_idx == idx + 1 { - queue.tail_idx -= 1; - } - queue.num_items -= 1; - if queue.is_empty() { - self.remove_raw(&full_queue_key)?; - } else { - self.set_raw(&full_queue_key, &queue_bytes)?; - } - } - - Ok(Some(val)) - } - - /// Discards the queue (dropping all elements in contains). Returns true if it had existed before, false otherwise - pub fn discard_queue + ?Sized>(&self, queue_key: &B) -> Result { - let queue_key = queue_key.as_ref(); - let (queue_ph, full_queue_key) = self.make_queue_key(queue_key); - let _guard = self.lock_list(queue_ph); - - let Some(queue_bytes) = self.get_raw(&full_queue_key)? else { - return Ok(false); - }; - let queue = from_bytes::(&queue_bytes); - - for i in queue.head_idx..queue.tail_idx { - self.remove_raw(&self.make_queue_item_key(queue_key, i as u64))?; - } - - self.remove_raw(&full_queue_key)?; - Ok(true) - } - - fn fetch_queue(&self, queue_key: &[u8]) -> Result> { - let queue_key = queue_key.as_ref(); - let (queue_ph, full_queue_key) = self.make_queue_key(queue_key); - let _guard = self.lock_list(queue_ph); - if let Some(queue_bytes) = self.get_raw(&full_queue_key)? { - Ok(Some(*from_bytes::(&queue_bytes))) - } else { - Ok(None) - } - } - - /// Extends the queue with elements from the given iterator. The queue will be created if it did not exist before, - /// and elements are pushed at the tail-end of the queue. This is more efficient than calling - /// [Self::push_to_queue_tail] in a loop - /// - /// Note: this is not an atomic (crash-safe) operation: if your program crashes while extending the queue, it - /// is possible that only some of the elements will have been appended. - /// - /// Returns the indices of the elements added (a range) - pub fn extend_queue<'a, B: AsRef<[u8]> + ?Sized>( - &self, - queue_key: &B, - items: impl Iterator>, - ) -> Result> { - let queue_key = queue_key.as_ref(); - let (queue_ph, full_queue_key) = self.make_queue_key(queue_key); - let _guard = self.lock_list(queue_ph); - - let mut queue_bytes = &mut self - .get_or_create_raw( - &full_queue_key, - bytes_of(&Queue { - head_idx: Self::FIRST_QUEUE_IDX, - tail_idx: Self::FIRST_QUEUE_IDX, - num_items: 0, - }) - .to_owned(), - )? - .value(); - - let queue = from_bytes_mut::(&mut queue_bytes); - - let first_idx = queue.tail_idx; - for item in items { - self.set_raw( - &self.make_queue_item_key(queue_key, queue.tail_idx), - item.as_ref(), - )?; - queue.tail_idx += 1; - queue.num_items += 1; - } - - let indices = first_idx as usize..queue.tail_idx as usize; - self.set_raw(&full_queue_key, &queue_bytes)?; - - Ok(indices) - } - - /// Returns (without removing) the head element of the queue and its index, or None if the queue is empty - pub fn peek_queue_head_with_idx + ?Sized>( - &self, - queue_key: &B, - ) -> Result)>> { - for res in self.iter_queue(queue_key) { - return Ok(Some(res?)); - } - Ok(None) - } - - /// Returns (without removing) the head element of the queue, or None if the queue is empty - pub fn peek_queue_head + ?Sized>( - &self, - queue_key: &B, - ) -> Result>> { - for res in self.iter_queue(queue_key) { - return Ok(Some(res?.1)); - } - Ok(None) - } - - /// Returns (without removing) the head element of the queue and its index, or None if the queue is empty - pub fn peek_queue_tail_with_idx + ?Sized>( - &self, - queue_key: &B, - ) -> Result)>> { - for res in self.iter_queue_backwards(queue_key) { - return Ok(Some(res?)); - } - Ok(None) - } - - /// Returns (without removing) the tail element of the queue, or None if the queue is empty - pub fn peek_queue_tail + ?Sized>( - &self, - queue_key: &B, - ) -> Result>> { - for res in self.iter_queue_backwards(queue_key) { - return Ok(Some(res?.1)); - } - Ok(None) - } - - /// Returns a forward iterator (head to tail) over the elements of the queue. If the queue does not exist, - /// this is an empty iterator. - /// - /// Note: the iterator will go over the indices that existed when it was created -- new elements that are - /// pushed afterwards will not be returned - pub fn iter_queue<'a, B: AsRef<[u8]> + ?Sized>(&'a self, queue_key: &B) -> QueueIterator<'a> { - QueueIterator { - store: &self, - queue_key: queue_key.as_ref().to_owned(), - range: None, - fwd: true, - } - } - - /// Returns a backward iterator (tail to head) over the elements of the queue. If the queue does not exist, - /// this is an empty iterator. - /// - /// Note: the iterator will go over the indices that existed when it was created -- new elements that are - /// pushed afterwards will not be returned - pub fn iter_queue_backwards<'a, B: AsRef<[u8]> + ?Sized>( - &'a self, - queue_key: &B, - ) -> QueueIterator<'a> { - QueueIterator { - store: &self, - queue_key: queue_key.as_ref().to_owned(), - range: None, - fwd: false, - } - } - - /// Returns a the length of the given queue (number of elements in the queue) or 0 if the queue does not exist - pub fn queue_len + ?Sized>(&self, queue_key: &B) -> Result { - let Some(queue) = self.fetch_queue(queue_key.as_ref())? else { - return Ok(0); - }; - Ok(queue.num_items as usize) - } - - /// Returns a the range (indices) of the given queue or an empty range if the queue does not exist - pub fn queue_range + ?Sized>(&self, queue_key: &B) -> Result> { - let Some(queue) = self.fetch_queue(queue_key.as_ref())? else { - return Ok(Self::FIRST_QUEUE_IDX as usize..Self::FIRST_QUEUE_IDX as usize); - }; - Ok(queue.head_idx as usize..queue.tail_idx as usize) - } -} diff --git a/src/router.rs b/src/router.rs deleted file mode 100644 index a53a26c..0000000 --- a/src/router.rs +++ /dev/null @@ -1,548 +0,0 @@ -use anyhow::ensure; -use parking_lot::RwLock; -use std::{ops::Range, sync::Arc}; - -use crate::shard::{CompactionThreadPool, InsertMode, InsertStatus, Shard}; -use crate::stats::InternalStats; -use crate::Result; -use crate::{hashing::PartedHash, store::InternalConfig}; - -fn consolidate_ranges(mut ranges: Vec>) -> (Vec>, Vec>) { - // we may encounter unfinished splits, where we have any combination of the bottom half, top half and - // original shard existing. in this case, we want to keep the largest of them, e.g, suppose we find - // [0..16, 0..32], we want to remove 0..16 and keep only 0..32. to do that, we sort by `start` - // followed by sorting by end, so [0..16, 16..32, 0..32] is sorted as [0..32, 0..16, 16..32], which means - // we'll encounter all over-arching ranges before smaller ones - ranges.sort_by(|a, b| { - if a.start == b.start { - b.end.cmp(&a.end) - } else { - a.start.cmp(&b.start) - } - }); - - let mut removed = vec![]; - let mut i = 1; - while i < ranges.len() { - if ranges[i].start >= ranges[i - 1].start && ranges[i].end <= ranges[i - 1].end { - removed.push(ranges.remove(i)); - } else { - i += 1; - } - } - (ranges, removed) -} - -#[test] -fn test_consolidate_ranges() { - assert_eq!(consolidate_ranges(vec![0..16]), (vec![0..16], vec![])); - assert_eq!( - consolidate_ranges(vec![16..32, 0..16]), - (vec![0..16, 16..32], vec![]) - ); - assert_eq!( - consolidate_ranges(vec![16..32, 0..16, 0..32]), - (vec![0..32], vec![0..16, 16..32]) - ); - assert_eq!( - consolidate_ranges(vec![16..32, 0..16, 0..32, 48..64, 32..48, 50..60]), - (vec![0..32, 32..48, 48..64], vec![0..16, 16..32, 50..60]) - ); -} - -#[derive(Clone)] -enum ShardNode { - Leaf(Arc), - Vertex(Arc, Arc), -} - -impl ShardNode { - fn span(&self) -> Range { - match self { - Self::Leaf(sh) => sh.span.clone(), - Self::Vertex(bottom, top) => bottom.span.start..top.span.end, - } - } - fn len(&self) -> u32 { - self.span().end - self.span().start - } -} - -pub(crate) struct ShardRouter { - span: Range, - config: Arc, - node: RwLock, - stats: Arc, - threadpool: Arc, -} - -impl ShardRouter { - pub(crate) const END_OF_SHARDS: u32 = 1u32 << 16; - - pub(crate) fn new( - config: Arc, - stats: Arc, - threadpool: Arc, - ) -> Result { - let mut shards = Self::load(&config, &stats, &threadpool)?; - if shards.is_empty() { - shards = Self::create_initial_shards(&config, &stats, &threadpool)?; - } - let root = Self::treeify(shards, &stats, &threadpool); - Ok(Self { - span: root.span(), - config, - node: RwLock::new(root), - stats, - threadpool, - }) - } - - fn load( - config: &Arc, - stats: &Arc, - threadpool: &Arc, - ) -> Result>> { - let mut found_shards = vec![]; - for res in std::fs::read_dir(&config.dir_path)? { - let entry = res?; - let filename = entry.file_name(); - let Some(filename) = filename.to_str() else { - continue; - }; - let Ok(filetype) = entry.file_type() else { - continue; - }; - if !filetype.is_file() { - continue; - } - if filename.starts_with("bottom_") - || filename.starts_with("top_") - || filename.starts_with("merge_") - { - std::fs::remove_file(entry.path())?; - continue; - } else if !filename.starts_with("shard_") { - continue; - } - let Some((_, span)) = filename.split_once("_") else { - continue; - }; - let Some((start, end)) = span.split_once("-") else { - continue; - }; - let start = u32::from_str_radix(start, 16).expect(filename); - let end = u32::from_str_radix(end, 16).expect(filename); - - ensure!( - start < end && end <= Self::END_OF_SHARDS, - "Bad span for {filename}" - ); - - found_shards.push(start..end); - } - - let (shards_to_keep, shards_to_remove) = consolidate_ranges(found_shards); - for span in shards_to_remove { - std::fs::remove_file( - config - .dir_path - .join(format!("shard_{:04x}-{:04x}", span.start, span.end)), - )?; - } - - if shards_to_keep.is_empty() { - return Ok(vec![]); - } - - let mut shards = vec![]; - let mut current = 0; - - for span in shards_to_keep { - if span.start > current { - let mut gap_start = current; - let gap_end = span.start; - while gap_start < gap_end { - let mut size = 1; - while gap_start % (size * 2) == 0 && gap_start + (size * 2) <= gap_end { - size *= 2; - } - shards.push(Arc::new(Shard::open( - gap_start..gap_start + size, - true, - config.clone(), - stats.clone(), - threadpool.clone(), - )?)); - gap_start += size; - } - } - - shards.push(Arc::new(Shard::open( - span.clone(), - false, - config.clone(), - stats.clone(), - threadpool.clone(), - )?)); - current = span.end; - } - - if current < Self::END_OF_SHARDS { - let mut gap_start = current; - let gap_end = Self::END_OF_SHARDS; - while gap_start < gap_end { - let mut size = 1; - while gap_start % (size * 2) == 0 && gap_start + (size * 2) <= gap_end { - size *= 2; - } - shards.push(Arc::new(Shard::open( - gap_start..gap_start + size, - true, - config.clone(), - stats.clone(), - threadpool.clone(), - )?)); - gap_start += size; - } - } - - Ok(shards) - } - - fn calc_step(num_items: usize) -> u32 { - let step = (Self::END_OF_SHARDS as f64) - / (num_items as f64 / Shard::EXPECTED_CAPACITY as f64).max(1.0); - 1 << (step as u32).ilog2() - } - pub(crate) fn calc_num_shards(num_items: usize) -> u32 { - Self::END_OF_SHARDS / Self::calc_step(num_items) - } - - fn create_initial_shards( - config: &Arc, - stats: &Arc, - threadpool: &Arc, - ) -> Result>> { - let step = Self::calc_step(config.expected_number_of_keys); - - let mut shards = vec![]; - let mut start = 0; - while start < Self::END_OF_SHARDS { - let end = start + step; - shards.push(Arc::new(Shard::open( - start..end, - true, - config.clone(), - stats.clone(), - threadpool.clone(), - )?)); - start = end; - } - - Ok(shards) - } - - fn from_shardnode( - n: ShardNode, - stats: Arc, - threadpool: Arc, - ) -> Self { - let config = match n { - ShardNode::Leaf(ref sh) => sh.config.clone(), - ShardNode::Vertex(ref bottom, _) => bottom.config.clone(), - }; - Self { - config, - span: n.span(), - node: RwLock::new(n), - stats, - threadpool, - } - } - - fn treeify( - shards: Vec>, - stats: &Arc, - threadpool: &Arc, - ) -> ShardNode { - // algorithm: first find the smallest span, and let that be our base unit, say it's 1K. then go over - // 0..64K in 1K increments and pair up every consecutive pairs whose size is 1K. we count on the spans to be - // sorted, so we'll merge 0..1K with 1K..2K, and not 1K..3K with 2K..3K. - // then we double our base unit and repeat, until base unit = 64K. - - let mut nodes = vec![]; - let mut unit: u32 = Self::END_OF_SHARDS; - { - let mut spans_debug: Vec> = vec![]; - for sh in shards { - assert!( - spans_debug.is_empty() || spans_debug.last().unwrap().start != sh.span.start, - "two elements with the same start {spans_debug:?} {:?}", - sh.span - ); - spans_debug.push(sh.span.clone()); - let n = ShardNode::Leaf(sh); - if unit > n.len() { - unit = n.len(); - } - nodes.push(n); - } - assert!( - spans_debug.is_sorted_by(|a, b| a.start < b.start), - "not sorted {spans_debug:?}" - ); - - assert!(unit >= 1 && unit.is_power_of_two(), "unit={unit}"); - assert!(nodes.len() > 0, "No shards to merge"); - assert!(nodes.len() > 1 || unit == Self::END_OF_SHARDS); - } - - while unit < Self::END_OF_SHARDS { - let mut i = 0; - while i < nodes.len() - 1 { - if nodes[i].len() == unit && nodes[i + 1].len() == unit { - let n0 = nodes.remove(i); - let n1 = nodes.remove(i); - nodes.insert( - i, - ShardNode::Vertex( - Arc::new(Self::from_shardnode(n0, stats.clone(), threadpool.clone())), - Arc::new(Self::from_shardnode(n1, stats.clone(), threadpool.clone())), - ), - ); - } else { - i += 1; - } - } - - unit *= 2; - } - - assert_eq!(nodes.len(), 1); - nodes.remove(0) - } - - pub(crate) fn shared_op( - &self, - shard_selector: u32, - func: impl FnOnce(&Shard) -> Result, - ) -> Result { - match &*self.node.read() { - ShardNode::Leaf(sh) => func(sh), - ShardNode::Vertex(bottom, top) => { - if shard_selector < bottom.span.end { - bottom.shared_op(shard_selector, func) - } else { - top.shared_op(shard_selector, func) - } - } - } - } - - pub(crate) fn clear(&self) -> Result<()> { - let mut guard = self.node.write(); - - for res in std::fs::read_dir(&self.config.dir_path)? { - let entry = res?; - let filename = entry.file_name(); - let Some(filename) = filename.to_str() else { - continue; - }; - let Ok(filetype) = entry.file_type() else { - continue; - }; - if !filetype.is_file() { - continue; - } - if filename.starts_with("shard_") - || filename.starts_with("compact_") - || filename.starts_with("bottom_") - || filename.starts_with("top_") - { - std::fs::remove_file(entry.path())?; - } - } - - let shards = Self::create_initial_shards(&self.config, &self.stats, &self.threadpool)?; - *guard = Self::treeify(shards, &self.stats, &self.threadpool); - - Ok(()) - } - - pub(crate) fn call_on_all_shards( - &self, - mut func: impl FnMut(&Shard) -> Result + Copy, - ) -> Result> { - match &*self.node.read() { - ShardNode::Leaf(sh) => Ok(vec![func(sh)?]), - ShardNode::Vertex(bottom, top) => { - let mut v = bottom.call_on_all_shards(func)?; - v.extend(top.call_on_all_shards(func)?); - Ok(v) - } - } - } - - pub(crate) fn insert( - &self, - ph: PartedHash, - full_key: &[u8], - val: &[u8], - mode: InsertMode, - ) -> Result { - loop { - let res = match &*self.node.read() { - ShardNode::Leaf(sh) => sh.insert(ph, full_key, val, mode)?, - ShardNode::Vertex(bottom, top) => { - if ph.shard_selector() < bottom.span.end { - bottom.insert(ph, full_key, val, mode)? - } else { - top.insert(ph, full_key, val, mode)? - } - } - }; - - match res { - InsertStatus::SplitNeeded => { - let mut guard = self.node.write(); - let ShardNode::Leaf(sh) = &*guard else { - // already split - continue; - }; - - let (bottom, top) = sh.split()?; - - *guard = ShardNode::Vertex( - Arc::new(ShardRouter { - span: bottom.span.clone(), - config: self.config.clone(), - node: RwLock::new(ShardNode::Leaf(Arc::new(bottom))), - stats: self.stats.clone(), - threadpool: self.threadpool.clone(), - }), - Arc::new(ShardRouter { - span: top.span.clone(), - config: self.config.clone(), - node: RwLock::new(ShardNode::Leaf(Arc::new(top))), - stats: self.stats.clone(), - threadpool: self.threadpool.clone(), - }), - ); - - // retry - } - _ => { - return Ok(res); - } - } - } - } - - fn _merge( - &self, - bottom: &ShardRouter, - top: &ShardRouter, - max_fill: usize, - shards_to_remove: &mut u32, - ) -> Result> { - if *shards_to_remove == 0 { - return Ok(None); - } - - let (bottom_node, top_node) = { - let bottom_guard = bottom.node.read(); - let top_guard = top.node.read(); - (bottom_guard.clone(), top_guard.clone()) - }; - - match (bottom_node, top_node) { - (ShardNode::Leaf(b), ShardNode::Leaf(t)) => { - if b.get_stats()?.num_items() > max_fill { - return Ok(None); - } - if t.get_stats()?.num_items() > max_fill { - return Ok(None); - } - if let Some(sh) = Shard::merge(&b, &t)? { - *shards_to_remove = *shards_to_remove - 1; - let span = sh.span.clone(); - Ok(Some(ShardRouter { - config: self.config.clone(), - node: RwLock::new(ShardNode::Leaf(Arc::new(sh))), - span, - stats: self.stats.clone(), - threadpool: self.threadpool.clone(), - })) - } else { - Ok(None) - } - } - (ShardNode::Leaf(_), ShardNode::Vertex(b, t)) => { - if let Some(merged_top) = self._merge(&b, &t, max_fill, shards_to_remove)? { - self._merge(bottom, &merged_top, max_fill, shards_to_remove) - } else { - Ok(None) - } - } - (ShardNode::Vertex(b, t), ShardNode::Leaf(_)) => { - if let Some(merged_bottom) = self._merge(&b, &t, max_fill, shards_to_remove)? { - self._merge(&merged_bottom, top, max_fill, shards_to_remove) - } else { - Ok(None) - } - } - (ShardNode::Vertex(b1, t1), ShardNode::Vertex(b2, t2)) => { - let m1 = self._merge(&b1, &t1, max_fill, shards_to_remove)?; - let m2 = self._merge(&b2, &t2, max_fill, shards_to_remove)?; - match (m1, m2) { - (Some(m1), Some(m2)) => self._merge(&m1, &m2, max_fill, shards_to_remove), - (Some(m1), None) => self._merge(&m1, top, max_fill, shards_to_remove), - (None, Some(m2)) => self._merge(bottom, &m2, max_fill, shards_to_remove), - (None, None) => Ok(None), - } - } - } - } - - pub(crate) fn merge_small_shards(&self, max_fill_level: f32) -> Result { - ensure!(max_fill_level > 0.0 && max_fill_level < 0.5); - let max_fill = (Shard::EXPECTED_CAPACITY as f32 * max_fill_level) as usize; - - let mut num_items = 0usize; - let mut starting_num_shards = 0u32; - for count in self.call_on_all_shards(|sh| Ok(sh.get_stats()?.num_items()))? { - starting_num_shards += 1; - num_items += count; - } - - let needed_shards = - Self::calc_num_shards(num_items.max(self.config.expected_number_of_keys)); - - if starting_num_shards <= needed_shards { - return Ok(false); - } - let mut shards_to_remove = starting_num_shards - needed_shards; - - { - let mut guard = self.node.write(); - - match &*guard { - ShardNode::Leaf(_) => None, - ShardNode::Vertex(bottom, top) => { - self._merge(&bottom, &top, max_fill, &mut shards_to_remove)? - } - }; - - *guard = Self::treeify( - Self::load(&self.config, &self.stats, &self.threadpool)?, - &self.stats, - &self.threadpool, - ); - } - - let new_num_shards: u32 = self.call_on_all_shards(|_| Ok(1))?.iter().sum(); - - Ok(new_num_shards != starting_num_shards) - } -} diff --git a/src/shard.rs b/src/shard.rs deleted file mode 100644 index 95cffa8..0000000 --- a/src/shard.rs +++ /dev/null @@ -1,1172 +0,0 @@ -use anyhow::bail; -use bytemuck::{bytes_of_mut, Pod, Zeroable}; -use parking_lot::{Mutex, RwLock, RwLockWriteGuard}; -use std::{ - fs::{File, OpenOptions}, - io::Read, - ops::Range, - path::{Path, PathBuf}, - sync::{ - atomic::{AtomicU64, AtomicUsize, Ordering}, - Arc, - }, - thread::JoinHandle, - time::Instant, -}; - -use memmap::{MmapMut, MmapOptions}; - -use crate::Result; -use crate::{ - hashing::{PartedHash, INVALID_SIG}, - stats::InternalStats, - store::InternalConfig, -}; - -// -// these numbers were chosen according to the simulation, as they allow for 90% utilization of the shard with -// virtually zero chance of in-row collisions and "smallish" shard size: shards start at 384KB and -// can hold 32K entries, and since we're limited at 4GB file sizes, we can key-value pairs of up to 128KB -// (keys and values are limited to 64KB each anyway) -// -// other good combinations are 32/512, 32/1024, 64/256, 64/1024, 128/512, 256/256 -// -pub(crate) const NUM_ROWS: usize = 64; -pub(crate) const ROW_WIDTH: usize = 512; - -#[repr(C)] -struct ShardRow { - signatures: [u32; ROW_WIDTH], - offsets_and_sizes: [u64; ROW_WIDTH], // | key_size: 16 | val_size: 16 | file_offset: 32 | -} - -impl ShardRow { - #[inline] - fn lookup(&self, sig: u32, start_idx: &mut usize) -> Option { - use simd_itertools::PositionSimd; - if let Some(rel_idx) = self.signatures[*start_idx..] - .iter() - .position_simd(|x| *x == sig) - { - let abs_idx = rel_idx + *start_idx; - *start_idx = abs_idx + 1; - Some(abs_idx) - } else { - None - } - } -} - -#[test] -fn test_row_lookup() -> Result<()> { - let mut row = ShardRow { - signatures: [0; ROW_WIDTH], - offsets_and_sizes: [0; ROW_WIDTH], - }; - - row.signatures[7] = 123; - row.signatures[8] = 123; - row.signatures[9] = 123; - row.signatures[90] = 123; - row.signatures[ROW_WIDTH - 1] = 999; - - let mut start = 0; - assert_eq!(row.lookup(123, &mut start), Some(7)); - assert_eq!(start, 8); - assert_eq!(row.lookup(123, &mut start), Some(8)); - assert_eq!(start, 9); - assert_eq!(row.lookup(123, &mut start), Some(9)); - assert_eq!(start, 10); - assert_eq!(row.lookup(123, &mut start), Some(90)); - assert_eq!(start, 91); - assert_eq!(row.lookup(123, &mut start), None); - assert_eq!(start, 91); - - start = 0; - assert_eq!(row.lookup(0, &mut start), Some(0)); - assert_eq!(start, 1); - - start = 0; - assert_eq!(row.lookup(999, &mut start), Some(ROW_WIDTH - 1)); - assert_eq!(start, ROW_WIDTH); - - assert_eq!(row.lookup(999, &mut start), None); - assert_eq!(start, ROW_WIDTH); - - Ok(()) -} - -#[repr(C, align(4096))] -struct PageAligned(T); - -pub(crate) const SHARD_FILE_MAGIC: [u8; 8] = *b"CandyStr"; -pub(crate) const SHARD_FILE_VERSION: u64 = 11; - -#[derive(Clone, Copy, Default, Debug, Pod, Zeroable)] -#[repr(C)] -struct MetaHeader { - magic: [u8; 8], - version: u64, -} - -#[repr(C)] -struct ShardHeader { - metadata: MetaHeader, - wasted_bytes: AtomicU64, - write_offset: AtomicU64, - num_inserts: AtomicU64, - num_removals: AtomicU64, - compacted_up_to: AtomicUsize, - rows: PageAligned<[ShardRow; NUM_ROWS]>, -} - -pub(crate) const HEADER_SIZE: u64 = size_of::() as u64; -const _: () = assert!(HEADER_SIZE % 4096 == 0); - -#[derive(Debug)] -pub(crate) enum InsertStatus { - Added, - Replaced(Vec), - KeyDoesNotExist, - SplitNeeded, - AlreadyExists(Vec), -} - -#[derive(Debug, Clone, Copy)] -pub(crate) enum InsertMode<'a> { - Set, - Replace(Option<&'a [u8]>), - GetOrCreate, -} - -enum TryReplaceStatus<'a> { - KeyDoesNotExist(RwLockWriteGuard<'a, ()>, bool), - KeyExistsNotReplaced(Vec), - KeyExistsReplaced(Vec), -} - -pub(crate) type KVPair = (Vec, Vec); - -struct MmapFile { - file: File, - mmap: MmapMut, -} - -#[cfg(unix)] -fn read_exact_at(f: &File, buf: &mut [u8], offset: u64) -> std::io::Result<()> { - std::os::unix::fs::FileExt::read_exact_at(f, buf, offset) -} - -#[cfg(unix)] -fn write_all_at(f: &File, buf: &[u8], offset: u64) -> std::io::Result<()> { - std::os::unix::fs::FileExt::write_all_at(f, buf, offset) -} - -#[cfg(windows)] -fn read_exact_at(f: &File, mut buf: &mut [u8], mut offset: u64) -> std::io::Result<()> { - while !buf.is_empty() { - match std::os::windows::fs::FileExt::seek_read(f, buf, offset) { - Ok(0) => break, - Ok(n) => { - let tmp = buf; - buf = &mut tmp[n..]; - offset += n as u64; - } - Err(e) => return Err(e), - } - } - if !buf.is_empty() { - Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof)) - } else { - Ok(()) - } -} - -#[cfg(windows)] -fn write_all_at(f: &File, mut buf: &[u8], mut offset: u64) -> std::io::Result<()> { - while !buf.is_empty() { - match std::os::windows::fs::FileExt::seek_write(f, buf, offset) { - Ok(0) => return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof)), - Ok(n) => { - buf = &buf[n..]; - offset += n as u64; - } - Err(e) => return Err(e), - } - } - Ok(()) -} - -impl MmapFile { - fn new(file: File, mlock_headers: bool) -> Result { - let mmap = unsafe { MmapOptions::new().len(HEADER_SIZE as usize).map_mut(&file) }?; - - #[cfg(windows)] - let _ = mlock_headers; // Prevent unused variable warning on Windows - - #[cfg(unix)] - if mlock_headers { - unsafe { libc::mlock(mmap.as_ptr() as *const _, mmap.len()) }; - } - - // optimization, we don't care about the return code - #[cfg(all(unix, not(target_os = "macos")))] - unsafe { - libc::posix_fallocate( - std::os::fd::AsRawFd::as_raw_fd(&file), - 0, - HEADER_SIZE as i64, - ) - }; - - let header = unsafe { &mut *(mmap.as_ptr() as *mut ShardHeader) }; - header.metadata.magic = SHARD_FILE_MAGIC; - header.metadata.version = SHARD_FILE_VERSION; - - Ok(Self { file, mmap }) - } - - fn create(filename: impl AsRef, config: &InternalConfig) -> Result { - let file = OpenOptions::new() - .read(true) - .write(true) - .create(true) - .truncate(true) - .open(filename)?; - file.set_len( - HEADER_SIZE - + if config.truncate_up { - config.max_shard_size as u64 - } else { - 0 - }, - )?; - Self::new(file, config.mlock_headers) - } - - #[inline(always)] - fn header(&self) -> &ShardHeader { - unsafe { &*(self.mmap.as_ptr() as *const ShardHeader) } - } - #[inline(always)] - fn header_mut(&self) -> &mut ShardHeader { - unsafe { &mut *(self.mmap.as_ptr() as *mut ShardHeader) } - } - #[inline(always)] - fn row(&self, row_idx: usize) -> &ShardRow { - &self.header().rows.0[row_idx] - } - #[inline(always)] - fn row_mut(&self, row_idx: usize) -> &mut ShardRow { - &mut self.header_mut().rows.0[row_idx] - } - - // reading doesn't require holding any locks - we only ever extend the file, never overwrite data - fn _read_kv( - &self, - stats: &InternalStats, - offset_and_size: u64, - include_val: bool, - ) -> Result { - let klen = (offset_and_size >> 48) as usize; - debug_assert_eq!(klen >> 14, 0, "attempting to read a special key"); - let vlen = if include_val { - ((offset_and_size >> 32) & 0xffff) as usize - } else { - 0 - }; - let offset = (offset_and_size as u32) as u64; - let mut buf = vec![0u8; klen + vlen]; - read_exact_at(&self.file, &mut buf, HEADER_SIZE + offset)?; - - stats.num_read_bytes.fetch_add(buf.len(), Ordering::Relaxed); - stats.num_read_ops.fetch_add(1, Ordering::Relaxed); - - if include_val { - let val = buf[klen..klen + vlen].to_owned(); - buf.truncate(klen); - - Ok((buf, val)) - } else { - Ok((buf, vec![])) - } - } - - fn read_kv(&self, stats: &InternalStats, offset_and_size: u64) -> Result { - self._read_kv(stats, offset_and_size, true) - } - - // writing doesn't require holding any locks since we write with an offset - fn write_kv(&self, stats: &InternalStats, key: &[u8], val: &[u8]) -> Result { - let entry_size = key.len() + val.len(); - let mut buf = vec![0u8; entry_size]; - buf[..key.len()].copy_from_slice(key); - buf[key.len()..].copy_from_slice(val); - - // atomically allocate some area. it may leak if the IO below fails or if we crash before updating the - // offsets_and_size array, but we're okay with leaks - let write_offset = self - .header() - .write_offset - .fetch_add(buf.len() as u64, Ordering::SeqCst) as u64; - - // now writing can be non-atomic (pwrite) - write_all_at(&self.file, &buf, HEADER_SIZE + write_offset)?; - stats.add_entry(entry_size); - - Ok(((key.len() as u64) << 48) | ((val.len() as u64) << 32) | write_offset) - } -} - -struct TPHandle { - rx: crossbeam_channel::Receiver>, -} -impl TPHandle { - fn wait(&self) -> Result<()> { - self.rx.recv()? - } - fn finished(&self) -> bool { - !self.rx.is_empty() - } -} - -struct CompactionInfo { - config: Arc, - stats: Arc, - files: Arc)>>, - row_locks: Arc<[RwLock<()>; NUM_ROWS]>, - t0: Instant, - src_filename: PathBuf, - target_filename: PathBuf, -} - -pub(crate) struct CompactionThreadPool { - tx: crossbeam_channel::Sender>)>>, - threads: Vec>>, -} - -impl CompactionThreadPool { - pub fn new(num_threads: usize) -> Self { - let (tx, rx) = crossbeam_channel::unbounded::< - Option<(CompactionInfo, crossbeam_channel::Sender>)>, - >(); - let mut threads = Vec::with_capacity(num_threads); - for _ in 0..num_threads { - let rx = rx.clone(); - let handle = std::thread::spawn(move || { - for elem in rx.iter() { - let Some((info, handle_tx)) = elem else { - break; - }; - let res = Shard::background_compact(info); - handle_tx.send(res)?; - } - Ok(()) - }); - threads.push(handle); - } - - Self { tx, threads } - } - - fn submit(&self, info: CompactionInfo) -> Result { - let (tx, rx) = crossbeam_channel::bounded(1); - self.tx.send(Some((info, tx)))?; - Ok(TPHandle { rx }) - } - - #[allow(dead_code)] - pub fn terminate(self) -> Result<()> { - for _ in self.threads.iter() { - self.tx.send(None)?; - } - - for th in self.threads { - match th.join() { - Err(e) => std::panic::resume_unwind(e), - Ok(res) => res?, - } - } - Ok(()) - } -} - -#[derive(Debug, Clone)] -pub(crate) struct ShardStats { - pub write_offset: usize, - pub wasted_bytes: usize, - pub num_inserts: usize, - pub num_removals: usize, -} - -impl ShardStats { - pub(crate) fn num_items(&self) -> usize { - self.num_inserts - self.num_removals - } -} - -pub(crate) struct Shard { - pub(crate) span: Range, - pub(crate) config: Arc, - stats: Arc, - files: Arc)>>, - row_locks: Arc<[RwLock<()>; NUM_ROWS]>, - threadpool: Arc, - compaction_handle: Arc>>, - #[cfg(feature = "flush_aggregation")] - sync_agg_mutex: parking_lot::Mutex<()>, - #[cfg(feature = "flush_aggregation")] - in_sync_agg_delay: std::sync::atomic::AtomicBool, -} - -impl Shard { - pub(crate) const EXPECTED_CAPACITY: usize = (NUM_ROWS * ROW_WIDTH * 9) / 10; // ~ 29,500 - - pub(crate) fn open( - span: Range, - truncate: bool, - config: Arc, - stats: Arc, - threadpool: Arc, - ) -> Result { - let filename = config - .dir_path - .join(format!("shard_{:04x}-{:04x}", span.start, span.end)); - let mut file = OpenOptions::new() - .create(true) - .read(true) - .write(true) - .truncate(truncate) - .open(&filename)?; - - let mut file_size = file.metadata()?.len(); - if file_size != 0 { - let mut meta_header = MetaHeader::default(); - let sz = file.read(bytes_of_mut(&mut meta_header))?; - if sz != size_of::() - || meta_header.magic != SHARD_FILE_MAGIC - || meta_header.version != SHARD_FILE_VERSION - { - if config.clear_on_unsupported_version { - file.set_len(0)?; - file_size = 0; - } else { - bail!( - "{filename:?} unsupported magic={:?} version=0x{:016x} size={}", - meta_header.magic, - meta_header.version, - file_size, - ); - } - } - - if file_size != 0 && file_size < HEADER_SIZE { - if config.clear_on_unsupported_version { - file.set_len(0)?; - file_size = 0; - } else { - bail!("corrupt shard file (size={})", file_size); - } - } - } - - if file_size == 0 { - if config.truncate_up { - // when creating, set the file's length so that we won't need to extend it every time we write - // (saves on file metadata updates) - file.set_len(HEADER_SIZE + config.max_shard_size as u64)?; - } else { - file.set_len(HEADER_SIZE)?; - } - } - - let mut row_locks = Vec::with_capacity(NUM_ROWS); - for _ in 0..NUM_ROWS { - row_locks.push(RwLock::new(())); - } - let row_locks: [RwLock<()>; NUM_ROWS] = row_locks.try_into().unwrap(); - - let mut mmap_file = MmapFile::new(file, config.mlock_headers)?; - - let compacted_filename = config - .dir_path - .join(format!("compact_{:04x}-{:04x}", span.start, span.end)); - if truncate { - _ = std::fs::remove_file(compacted_filename); - } else { - if let Ok(compacted_file) = OpenOptions::new() - .read(true) - .write(true) - .open(&compacted_filename) - { - let target = MmapFile::new(compacted_file, config.mlock_headers)?; - Self::do_compaction(&row_locks, &mmap_file, &target, &stats, &config)?; - std::fs::rename(compacted_filename, filename)?; - mmap_file = target; - } - } - - Ok(Self { - span, - config, - stats, - files: Arc::new(RwLock::new((mmap_file, None))), - row_locks: Arc::new(row_locks), - threadpool, - compaction_handle: Arc::new(Mutex::new(None)), - #[cfg(feature = "flush_aggregation")] - sync_agg_mutex: parking_lot::Mutex::new(()), - #[cfg(feature = "flush_aggregation")] - in_sync_agg_delay: std::sync::atomic::AtomicBool::new(false), - }) - } - - fn new( - span: Range, - mmap_file: MmapFile, - config: Arc, - stats: Arc, - threadpool: Arc, - ) -> Result { - let mut row_locks = Vec::with_capacity(NUM_ROWS); - for _ in 0..NUM_ROWS { - row_locks.push(RwLock::new(())); - } - let row_locks: [RwLock<()>; NUM_ROWS] = row_locks.try_into().unwrap(); - - Ok(Self { - span, - config, - stats, - files: Arc::new(RwLock::new((mmap_file, None))), - row_locks: Arc::new(row_locks), - threadpool, - compaction_handle: Arc::new(Mutex::new(None)), - #[cfg(feature = "flush_aggregation")] - sync_agg_mutex: parking_lot::Mutex::new(()), - #[cfg(feature = "flush_aggregation")] - in_sync_agg_delay: std::sync::atomic::AtomicBool::new(false), - }) - } - - fn do_compaction( - row_locks: &[RwLock<()>; NUM_ROWS], - src: &MmapFile, - target: &MmapFile, - stats: &InternalStats, - config: &InternalConfig, - ) -> Result<()> { - let mut first_row = true; - loop { - let row_idx = target.header().compacted_up_to.load(Ordering::Acquire); - if row_idx >= NUM_ROWS { - break; - } - - let _row_guard = row_locks[row_idx].write(); - let src_row = src.row(row_idx); - let target_row = target.row_mut(row_idx); - let mut target_col = 0; - - for (src_col, &sig) in src_row.signatures.iter().enumerate() { - if sig == INVALID_SIG { - continue; - } - let (k, v) = src.read_kv(&stats, src_row.offsets_and_sizes[src_col])?; - - assert!( - first_row || target_row.signatures[target_col] == INVALID_SIG, - "row={row_idx} col={target_col} sig={}", - target_row.signatures[target_col] - ); - let ph = PartedHash::new(&config.hash_seed, &k); - assert_eq!(ph.row_selector(), row_idx); - target_row.offsets_and_sizes[target_col] = target.write_kv(&stats, &k, &v)?; - std::sync::atomic::fence(Ordering::SeqCst); - target_row.signatures[target_col] = ph.signature(); - target.header().num_inserts.fetch_add(1, Ordering::Relaxed); - target_col += 1; - } - - target - .header() - .compacted_up_to - .fetch_add(1, Ordering::Release); - first_row = false; - } - - Ok(()) - } - - pub(crate) fn flush(&self) -> Result<()> { - //self.mmap.flush()? -- fdatasync should take care of that as well - self.files.read().0.file.sync_data()?; - Ok(()) - } - - pub(crate) fn split(&self) -> Result<(Shard, Shard)> { - let mut handle_guard = self.compaction_handle.lock(); - if let Some(handle) = handle_guard.take() { - handle.wait()?; - } - - let files_guard = self.files.write(); - - let mid = (self.span.start + self.span.end) / 2; - - let t0 = Instant::now(); - - let bottom_filename = self - .config - .dir_path - .join(format!("bottom_{:04x}-{:04x}", self.span.start, mid)); - let top_filename = self - .config - .dir_path - .join(format!("top_{:04x}-{:04x}", mid, self.span.end)); - - let bottom_file = MmapFile::create(&bottom_filename, &self.config)?; - let top_file = MmapFile::create(&top_filename, &self.config)?; - - for (row_idx, src_row) in files_guard.0.header().rows.0.iter().enumerate() { - let mut bottom_col = 0; - let mut top_col = 0; - for (col, &sig) in src_row.signatures.iter().enumerate() { - if sig == INVALID_SIG { - continue; - } - let (k, v) = files_guard - .0 - .read_kv(&self.stats, src_row.offsets_and_sizes[col])?; - let ph = PartedHash::new(&self.config.hash_seed, &k); - assert_eq!(row_idx, ph.row_selector()); - - let (file, col) = if ph.shard_selector() < mid { - (&bottom_file, &mut bottom_col) - } else { - (&top_file, &mut top_col) - }; - - let target_row = file.row_mut(ph.row_selector()); - assert_eq!( - target_row.signatures[*col], INVALID_SIG, - "row={} col={} sig={}", - row_idx, *col, target_row.signatures[*col] - ); - target_row.offsets_and_sizes[*col] = file.write_kv(&self.stats, &k, &v)?; - std::sync::atomic::fence(Ordering::SeqCst); - target_row.signatures[*col] = ph.signature(); - file.header().num_inserts.fetch_add(1, Ordering::Relaxed); - *col += 1; - } - } - - std::fs::rename( - bottom_filename, - self.config - .dir_path - .join(format!("shard_{:04x}-{:04x}", self.span.start, mid,)), - )?; - std::fs::rename( - top_filename, - self.config - .dir_path - .join(format!("shard_{:04x}-{:04x}", mid, self.span.end)), - )?; - std::fs::remove_file(self.config.dir_path.join(format!( - "shard_{:04x}-{:04x}", - self.span.start, self.span.end - )))?; - - self.stats.report_split( - t0, - bottom_file.header().write_offset.load(Ordering::Relaxed), - top_file.header().write_offset.load(Ordering::Relaxed), - ); - - let bottom = Self::new( - self.span.start..mid, - bottom_file, - self.config.clone(), - self.stats.clone(), - self.threadpool.clone(), - )?; - let top = Self::new( - mid..self.span.end, - top_file, - self.config.clone(), - self.stats.clone(), - self.threadpool.clone(), - )?; - - Ok((bottom, top)) - } - - pub(crate) fn merge(bottom: &Shard, top: &Shard) -> Result> { - let bottom_files = bottom.files.write(); - let top_files = top.files.write(); - - let tmp_filename = bottom.config.dir_path.join(format!( - "merge_{:04x}-{:04x}", - bottom.span.start, top.span.end - )); - let mmap_file = MmapFile::create(&tmp_filename, &bottom.config)?; - - let combined = Shard::new( - bottom.span.start..top.span.end, - mmap_file, - bottom.config.clone(), - bottom.stats.clone(), - bottom.threadpool.clone(), - )?; - let combined_files = combined.files.write(); - - for row_idx in 0..NUM_ROWS { - let mut target_col = 0; - for files in [&bottom_files, &top_files] { - let src_row = &files.0.header().rows.0[row_idx]; - for (src_col, &sig) in src_row.signatures.iter().enumerate() { - if sig == INVALID_SIG { - continue; - } - let (k, v) = files - .0 - .read_kv(&combined.stats, src_row.offsets_and_sizes[src_col])?; - let ph = PartedHash::new(&combined.config.hash_seed, &k); - assert_eq!(row_idx, ph.row_selector()); - - let target_row = combined_files.0.row_mut(ph.row_selector()); - if target_col >= ROW_WIDTH { - // too many items fall in this row, we can't merge - std::fs::remove_file(tmp_filename)?; - return Ok(None); - } - assert_eq!( - target_row.signatures[target_col], INVALID_SIG, - "row={} target_col={} sig={}", - row_idx, target_col, target_row.signatures[target_col] - ); - target_row.offsets_and_sizes[target_col] = - combined_files.0.write_kv(&combined.stats, &k, &v)?; - std::sync::atomic::fence(Ordering::SeqCst); - target_row.signatures[target_col] = ph.signature(); - combined_files - .0 - .header() - .num_inserts - .fetch_add(1, Ordering::Relaxed); - target_col += 1; - } - } - } - - let dst_filename = combined.config.dir_path.join(format!( - "shard_{:04x}-{:04x}", - combined.span.start, combined.span.end - )); - let bottom_filename = combined.config.dir_path.join(format!( - "shard_{:04x}-{:04x}", - bottom.span.start, bottom.span.end - )); - let top_filename = combined - .config - .dir_path - .join(format!("shard_{:04x}-{:04x}", top.span.start, top.span.end)); - - std::fs::rename(tmp_filename, dst_filename)?; - std::fs::remove_file(bottom_filename)?; - std::fs::remove_file(top_filename)?; - - drop(combined_files); - - Ok(Some(combined)) - } - - fn operate_on_row( - &self, - row_idx: usize, - func: impl FnOnce(&MmapFile, &ShardRow) -> Result, - ) -> Result { - let files_guard = self.files.read(); - let _row_guard = self.row_locks[row_idx].read(); - let file = if let Some(ref target) = files_guard.1 { - if row_idx < target.header().compacted_up_to.load(Ordering::Acquire) { - target - } else { - &files_guard.0 - } - } else { - &files_guard.0 - }; - - func(file, file.row(row_idx)) - } - - fn operate_on_row_mut( - &self, - row_idx: usize, - func: impl FnOnce(&MmapFile, bool, RwLockWriteGuard<()>, &mut ShardRow) -> Result, - ) -> Result { - let files_guard = self.files.read(); - let row_guard = self.row_locks[row_idx].write(); - let file = if let Some(ref target) = files_guard.1 { - if row_idx < target.header().compacted_up_to.load(Ordering::Acquire) { - target - } else { - &files_guard.0 - } - } else { - &files_guard.0 - }; - - func( - &file, - files_guard.1.is_some(), - row_guard, - file.row_mut(row_idx), - ) - } - - pub(crate) fn read_at( - &self, - row_idx: usize, - entry_idx: usize, - include_val: bool, - ) -> Result> { - self.operate_on_row(row_idx, |file, row| { - if row.signatures[entry_idx] != INVALID_SIG { - Ok(Some(file._read_kv( - &self.stats, - row.offsets_and_sizes[entry_idx], - include_val, - )?)) - } else { - Ok(None) - } - }) - } - - pub(crate) fn get_by_hash(&self, ph: PartedHash) -> Result> { - self.operate_on_row(ph.row_selector(), |file, row| { - let mut first_time = true; - let mut kvs = Vec::with_capacity(1); - let mut start = 0; - while let Some(idx) = row.lookup(ph.signature(), &mut start) { - kvs.push(file.read_kv(&self.stats, row.offsets_and_sizes[idx])?); - if first_time { - self.stats - .num_positive_lookups - .fetch_add(1, Ordering::Relaxed); - first_time = false; - } - } - if kvs.is_empty() { - self.stats - .num_negative_lookups - .fetch_add(1, Ordering::Relaxed); - } - Ok(kvs) - }) - } - - pub(crate) fn get(&self, ph: PartedHash, key: &[u8]) -> Result>> { - self.operate_on_row(ph.row_selector(), |file, row| { - let mut start = 0; - while let Some(idx) = row.lookup(ph.signature(), &mut start) { - let (k, v) = file.read_kv(&self.stats, row.offsets_and_sizes[idx])?; - if key == k { - self.stats - .num_positive_lookups - .fetch_add(1, Ordering::Relaxed); - return Ok(Some(v)); - } - } - self.stats - .num_negative_lookups - .fetch_add(1, Ordering::Relaxed); - Ok(None) - }) - } - - #[cfg(feature = "flush_aggregation")] - fn flush_aggregation(&self, file: &MmapFile) -> Result<()> { - let Some(delay) = self.config.flush_aggregation_delay else { - return Ok(()); - }; - - let do_sync = || -> Result<()> { - self.in_sync_agg_delay.store(true, Ordering::SeqCst); - std::thread::sleep(delay); - self.in_sync_agg_delay.store(false, Ordering::SeqCst); - file.file.sync_data()?; - Ok(()) - }; - - if let Some(_guard) = self.sync_agg_mutex.try_lock() { - // we're the first ones here. wait for the aggregation duration and sync the file - do_sync()?; - } else { - // another thread is currently sync'ing, we're waiting in line. if the holder of the lock is in the - // sleep (aggregation) phase, we can just wait for it to finish and return -- the other thread will - // have sync'ed us by the time we got the lock. otherwise, we'll need to sync as well - let was_in_delay = self.in_sync_agg_delay.load(Ordering::Relaxed); - let _guard = self.sync_agg_mutex.lock(); - if !was_in_delay { - do_sync()?; - } - } - Ok(()) - } - - fn try_replace<'a>( - &'a self, - file: &MmapFile, - row_guard: RwLockWriteGuard<'a, ()>, - row: &mut ShardRow, - ph: PartedHash, - key: &[u8], - val: &[u8], - mode: InsertMode, - ) -> Result> { - let mut start = 0; - let mut had_collision = false; - while let Some(idx) = row.lookup(ph.signature(), &mut start) { - let (k, existing_val) = file.read_kv(&self.stats, row.offsets_and_sizes[idx])?; - if key != k { - had_collision = true; - continue; - } - match mode { - InsertMode::GetOrCreate => { - // no-op, key already exists - self.stats - .num_positive_lookups - .fetch_add(1, Ordering::Relaxed); - return Ok(TryReplaceStatus::KeyExistsNotReplaced(existing_val)); - } - InsertMode::Set => { - // fall through - } - InsertMode::Replace(expected_val) => { - if expected_val.is_some_and(|expected_val| expected_val != existing_val) { - return Ok(TryReplaceStatus::KeyExistsNotReplaced(existing_val)); - } - } - } - - // optimization - if val != existing_val { - row.offsets_and_sizes[idx] = file.write_kv(&self.stats, key, val)?; - file.header() - .wasted_bytes - .fetch_add((k.len() + existing_val.len()) as u64, Ordering::Relaxed); - self.stats.num_updates.fetch_add(1, Ordering::Relaxed); - #[cfg(feature = "flush_aggregation")] - { - drop(row_guard); - self.flush_aggregation(file)?; - } - } - return Ok(TryReplaceStatus::KeyExistsReplaced(existing_val)); - } - - Ok(TryReplaceStatus::KeyDoesNotExist(row_guard, had_collision)) - } - - fn wait_for_compaction(&self) -> Result<()> { - let mut handle_guard = self.compaction_handle.lock(); - if let Some(handle) = handle_guard.take() { - handle.wait()?; - } - Ok(()) - } - - fn begin_compaction(&self, min_write_offset: u64) -> Result<()> { - let mut handle_guard = self.compaction_handle.lock(); - let mut files_guard = self.files.write(); - - if files_guard.0.header().write_offset.load(Ordering::Relaxed) < min_write_offset { - // already compacted by someone else - return Ok(()); - } - - if files_guard.1.is_some() { - // if the compaction target exists and the thread is still running -- all good - if let Some(ref handle) = *handle_guard { - if !handle.finished() { - return Ok(()); - } - } else { - return Ok(()); - } - } - - // the thread could've crashed in the middle of a compaction, and here's the place to extract the error - if let Some(handle) = handle_guard.take() { - handle.wait()?; - } - - assert!(files_guard.1.is_none()); - - let t0 = Instant::now(); - let src_filename = self.config.dir_path.join(format!( - "shard_{:04x}-{:04x}", - self.span.start, self.span.end - )); - let target_filename = self.config.dir_path.join(format!( - "compact_{:04x}-{:04x}", - self.span.start, self.span.end - )); - let target = MmapFile::create(&target_filename, &self.config)?; - target.header().compacted_up_to.store(0, Ordering::Release); - files_guard.1 = Some(target); - - let handle = self.threadpool.submit(CompactionInfo { - files: self.files.clone(), - stats: self.stats.clone(), - row_locks: self.row_locks.clone(), - config: self.config.clone(), - t0, - src_filename, - target_filename, - })?; - *handle_guard = Some(handle); - - Ok(()) - } - - fn background_compact(info: CompactionInfo) -> Result<()> { - let mut files_guard = info.files.upgradable_read(); - let src = &files_guard.0; - let target = files_guard.1.as_ref().unwrap(); - - Self::do_compaction(&info.row_locks, src, target, &info.stats, &info.config)?; - - std::fs::rename(&info.target_filename, &info.src_filename)?; - - info.stats.report_compaction( - info.t0, - src.header().write_offset.load(Ordering::Relaxed), - target.header().write_offset.load(Ordering::Relaxed), - ); - - files_guard.with_upgraded(|files| { - files.0 = files.1.take().unwrap(); - }); - Ok(()) - } - - pub(crate) fn insert( - &self, - ph: PartedHash, - full_key: &[u8], - val: &[u8], - mode: InsertMode, - ) -> Result { - let mut should_compact = None; - - let status = - self.operate_on_row_mut(ph.row_selector(), |file, is_compacting, row_guard, row| { - if !is_compacting { - if file.header().wasted_bytes.load(Ordering::Relaxed) - >= self.config.min_compaction_threashold as u64 - { - should_compact = Some(file.header().write_offset.load(Ordering::Relaxed)); - } else if file.header().write_offset.load(Ordering::Relaxed) - + (full_key.len() + val.len()) as u64 - > self.config.max_shard_size as u64 - { - return Ok(InsertStatus::SplitNeeded); - } - } - - let status = self.try_replace(file, row_guard, row, ph, &full_key, val, mode)?; - match status { - TryReplaceStatus::KeyDoesNotExist(_guard, had_collision) => { - if matches!(mode, InsertMode::Replace(_)) { - return Ok(InsertStatus::KeyDoesNotExist); - } - - // find an empty slot - let mut start = 0; - if let Some(idx) = row.lookup(INVALID_SIG, &mut start) { - let new_off = file.write_kv(&self.stats, &full_key, val)?; - - // we don't want a reorder to happen here - first write the offset, then the signature - row.offsets_and_sizes[idx] = new_off; - std::sync::atomic::fence(Ordering::SeqCst); - row.signatures[idx] = ph.signature(); - if had_collision { - self.stats.num_collisions.fetch_add(1, Ordering::Relaxed); - } - file.header().num_inserts.fetch_add(1, Ordering::Relaxed); - #[cfg(feature = "flush_aggregation")] - { - drop(_guard); - self.flush_aggregation(file)?; - } - Ok(InsertStatus::Added) - } else { - // no room in this row, must split - Ok(InsertStatus::SplitNeeded) - } - } - TryReplaceStatus::KeyExistsNotReplaced(existing) => { - Ok(InsertStatus::AlreadyExists(existing)) - } - TryReplaceStatus::KeyExistsReplaced(existing) => { - Ok(InsertStatus::Replaced(existing)) - } - } - })?; - - if let Some(min_write_offset) = should_compact { - self.begin_compaction(min_write_offset)?; - } - Ok(status) - } - - pub(crate) fn remove(&self, ph: PartedHash, key: &[u8]) -> Result>> { - self.operate_on_row_mut(ph.row_selector(), |file, _, _guard, row| { - let mut start = 0; - - while let Some(idx) = row.lookup(ph.signature(), &mut start) { - let (k, v) = file.read_kv(&self.stats, row.offsets_and_sizes[idx])?; - if key == k { - row.signatures[idx] = INVALID_SIG; - // we managed to remove this key - file.header().num_removals.fetch_add(1, Ordering::Relaxed); - file.header() - .wasted_bytes - .fetch_add((k.len() + v.len()) as u64, Ordering::Relaxed); - #[cfg(feature = "flush_aggregation")] - { - drop(_guard); - self.flush_aggregation(file)?; - } - return Ok(Some(v)); - } - } - - Ok(None) - }) - } - - pub(crate) fn get_stats(&self) -> Result { - self.wait_for_compaction()?; - let files_guard = self.files.read(); - let hdr = files_guard.0.header(); - Ok(ShardStats { - write_offset: hdr.write_offset.load(Ordering::Relaxed) as usize, - wasted_bytes: hdr.wasted_bytes.load(Ordering::Relaxed) as usize, - num_inserts: hdr.num_inserts.load(Ordering::Relaxed) as usize, - num_removals: hdr.num_removals.load(Ordering::Relaxed) as usize, - }) - } -} - -impl Drop for Shard { - fn drop(&mut self) { - _ = self.wait_for_compaction(); - } -} diff --git a/src/stats.rs b/src/stats.rs deleted file mode 100644 index 5f9c24e..0000000 --- a/src/stats.rs +++ /dev/null @@ -1,245 +0,0 @@ -use std::{ - fmt::Display, - sync::atomic::{AtomicUsize, Ordering}, - time::{Duration, Instant}, -}; - -use parking_lot::Mutex; - -use crate::{router::ShardRouter, shard::HEADER_SIZE}; - -#[derive(Default, Debug, Clone)] -pub struct Stats { - pub num_shards: usize, - pub num_splits: usize, - pub num_compactions: usize, - pub last_split_stats: Vec<(Duration, u64, u64)>, - pub last_compaction_stats: Vec<(Duration, u64, u64)>, - - pub occupied_bytes: usize, - pub wasted_bytes: usize, - - pub num_inserts: usize, - pub num_updates: usize, - pub num_positive_lookups: usize, - pub num_negative_lookups: usize, - pub num_removals: usize, - pub num_collisions: usize, - - pub num_read_ops: usize, - pub num_read_bytes: usize, - pub num_write_ops: usize, - pub num_write_bytes: usize, - - pub entries_under_128: usize, - pub entries_under_1k: usize, - pub entries_under_8k: usize, - pub entries_under_32k: usize, - pub entries_over_32k: usize, -} - -impl Stats { - pub const FILE_HEADER_SIZE: usize = HEADER_SIZE as usize; - - pub fn data_bytes(&self) -> usize { - self.occupied_bytes - self.wasted_bytes - } - pub fn total_occupied_bytes(&self) -> usize { - self.num_shards * Self::FILE_HEADER_SIZE + self.occupied_bytes - } - pub fn num_entries(&self) -> usize { - self.num_inserts - self.num_removals - } - pub fn average_entry_size(&self) -> usize { - self.data_bytes() - .checked_div(self.num_entries()) - .unwrap_or(0) - } - - pub fn required_num_shards(&self) -> usize { - ShardRouter::calc_num_shards(self.num_entries()) as usize - } - pub fn should_merge_small_shards(&self) -> bool { - self.num_shards > self.required_num_shards() * 2 - } -} - -impl Display for Stats { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "sh={} [sp={} com={}] [occ={} wst={}] [ins={} updt={} +lkup={} -lkup={} rem={} coll={}] R={}/{}b W={}/{}b", - self.num_shards, self.num_splits, self.num_compactions, self.occupied_bytes, self.wasted_bytes, - self.num_inserts, self.num_updates, self.num_positive_lookups, self.num_negative_lookups, - self.num_removals, self.num_collisions, self.num_read_ops, self.num_read_bytes, self.num_write_ops, - self.num_write_bytes) - } -} - -#[derive(Debug, Clone)] -pub(crate) struct CyclicArr { - idx: usize, - arr: [T; N], -} -impl Default for CyclicArr { - fn default() -> Self { - Self { - idx: 0, - arr: [T::default(); N], - } - } -} -impl CyclicArr { - pub(crate) fn push(&mut self, val: T) { - self.arr[self.idx % N] = val; - self.idx += 1; - } - pub(crate) fn clear(&mut self) { - self.idx = 0; - for i in 0..N { - self.arr[i] = T::default(); - } - } - fn iter<'a>(&'a self) -> impl Iterator { - (self.idx.checked_sub(N).unwrap_or(0)..self.idx).map(|idx| &self.arr[idx % N]) - } -} - -#[test] -fn test_cyclic_arr() { - let mut arr = CyclicArr::::default(); - assert!(arr.iter().collect::>().is_empty()); - arr.push(1); - arr.push(2); - arr.push(3); - assert_eq!(arr.iter().collect::>(), vec![&1,&2,&3]); - arr.push(4); - arr.push(5); - arr.push(6); - arr.push(7); - arr.push(8); - assert_eq!(arr.iter().collect::>(), vec![&1,&2,&3,&4,&5,&6,&7,&8]); - arr.push(9); - arr.push(10); - arr.push(11); - assert_eq!(arr.iter().collect::>(), vec![&4,&5,&6,&7,&8,&9,&10,&11]); - arr.clear(); - arr.push(12); - arr.push(13); - arr.push(14); - assert_eq!(arr.iter().collect::>(), vec![&12,&13,&14]); - for i in 15u32..1000 { - arr.push(i); - } - assert_eq!(arr.iter().collect::>(), vec![&992,&993,&994,&995,&996,&997,&998,&999]); -} - -#[derive(Debug, Default)] -pub struct InternalStats { - pub(crate) num_splits: AtomicUsize, - pub(crate) num_compactions: AtomicUsize, - pub(crate) last_compaction_stats: Mutex>, - pub(crate) last_split_stats: Mutex>, - - pub(crate) num_updates: AtomicUsize, - pub(crate) num_positive_lookups: AtomicUsize, - pub(crate) num_negative_lookups: AtomicUsize, - pub(crate) num_collisions: AtomicUsize, - - pub(crate) num_read_ops: AtomicUsize, - pub(crate) num_read_bytes: AtomicUsize, - pub(crate) num_write_ops: AtomicUsize, - pub(crate) num_write_bytes: AtomicUsize, - - pub(crate) entries_under_128: AtomicUsize, - pub(crate) entries_under_1k: AtomicUsize, - pub(crate) entries_under_8k: AtomicUsize, - pub(crate) entries_under_32k: AtomicUsize, - pub(crate) entries_over_32k: AtomicUsize, -} - -impl InternalStats { - pub(crate) fn add_entry(&self, sz: usize) { - self.num_write_bytes.fetch_add(sz, Ordering::Relaxed); - self.num_write_ops.fetch_add(1, Ordering::Relaxed); - match sz { - 0..128 => self.entries_under_128.fetch_add(1, Ordering::Relaxed), - 128..1024 => self.entries_under_1k.fetch_add(1, Ordering::Relaxed), - 1024..8192 => self.entries_under_8k.fetch_add(1, Ordering::Relaxed), - 8192..32768 => self.entries_under_32k.fetch_add(1, Ordering::Relaxed), - _ => self.entries_over_32k.fetch_add(1, Ordering::Relaxed), - }; - } - - pub(crate) fn report_split(&self, t0: Instant, bottom_size: u64, top_size: u64) { - let dur = Instant::now().duration_since(t0); - self.num_splits.fetch_add(1, Ordering::Relaxed); - self.last_split_stats - .lock() - .push((dur, bottom_size, top_size)); - } - - pub(crate) fn report_compaction(&self, t0: Instant, prev_size: u64, new_size: u64) { - let dur = Instant::now().duration_since(t0); - self.num_compactions.fetch_add(1, Ordering::Relaxed); - self.last_compaction_stats - .lock() - .push((dur, prev_size, new_size)); - } - - pub(crate) fn clear(&self) { - // store 0 in every stats... - - self.num_splits.store(0, Ordering::SeqCst); - self.num_compactions.store(0, Ordering::SeqCst); - self.last_split_stats.lock().clear(); - self.last_compaction_stats.lock().clear(); - - self.num_updates.store(0, Ordering::SeqCst); - self.num_positive_lookups.store(0, Ordering::SeqCst); - self.num_negative_lookups.store(0, Ordering::SeqCst); - self.num_collisions.store(0, Ordering::SeqCst); - - self.num_read_ops.store(0, Ordering::SeqCst); - self.num_read_bytes.store(0, Ordering::SeqCst); - self.num_write_ops.store(0, Ordering::SeqCst); - self.num_write_bytes.store(0, Ordering::SeqCst); - - self.entries_under_128.store(0, Ordering::SeqCst); - self.entries_under_1k.store(0, Ordering::SeqCst); - self.entries_under_8k.store(0, Ordering::SeqCst); - self.entries_under_32k.store(0, Ordering::SeqCst); - self.entries_over_32k.store(0, Ordering::SeqCst); - } - - pub(crate) fn fill_stats(&self, stats: &mut Stats) { - stats.num_splits = self.num_splits.load(Ordering::Relaxed); - stats.num_compactions = self.num_compactions.load(Ordering::Relaxed); - - { - let mut guard = self.last_split_stats.lock(); - stats.last_split_stats = guard.iter().copied().collect::>(); - guard.clear(); - } - { - let mut guard = self.last_compaction_stats.lock(); - stats.last_compaction_stats = guard.iter().copied().collect::>(); - guard.clear(); - } - - stats.num_updates = self.num_updates.load(Ordering::Relaxed); - stats.num_positive_lookups = self.num_positive_lookups.load(Ordering::Relaxed); - stats.num_negative_lookups = self.num_negative_lookups.load(Ordering::Relaxed); - stats.num_collisions = self.num_collisions.load(Ordering::Relaxed); - - stats.num_read_ops = self.num_read_ops.load(Ordering::Relaxed); - stats.num_read_bytes = self.num_read_bytes.load(Ordering::Relaxed); - stats.num_write_ops = self.num_write_ops.load(Ordering::Relaxed); - stats.num_write_bytes = self.num_write_bytes.load(Ordering::Relaxed); - - stats.entries_under_128 = self.entries_under_128.load(Ordering::Relaxed); - stats.entries_under_1k = self.entries_under_1k.load(Ordering::Relaxed); - stats.entries_under_8k = self.entries_under_8k.load(Ordering::Relaxed); - stats.entries_under_32k = self.entries_under_32k.load(Ordering::Relaxed); - stats.entries_over_32k = self.entries_over_32k.load(Ordering::Relaxed); - } -} diff --git a/src/store.rs b/src/store.rs index 00e2d73..abc14d2 100644 --- a/src/store.rs +++ b/src/store.rs @@ -1,622 +1,951 @@ -use anyhow::{anyhow, bail, ensure}; -use bytemuck::{bytes_of, from_bytes}; -use fslock::LockFile; -use parking_lot::Mutex; +mod compaction; +mod list; +mod open; +mod queue; +mod recovery; +mod typed; + +use parking_lot::{Condvar, Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use siphasher::sip::SipHasher13; + use std::{ - path::{Path, PathBuf}, - sync::Arc, + collections::HashMap, + hash::Hasher, + path::PathBuf, + sync::{ + Arc, + atomic::{AtomicBool, AtomicU16, AtomicU64, Ordering}, + }, }; use crate::{ - hashing::{HashSeed, PartedHash}, - router::ShardRouter, - shard::{CompactionThreadPool, InsertMode, InsertStatus, KVPair}, - Stats, MAX_KEY_SIZE, MAX_TOTAL_VALUE_SIZE, -}; -use crate::{ - shard::{NUM_ROWS, ROW_WIDTH}, - stats::InternalStats, + data_file::DataFile, + index_file::{EntryPointer, IndexFile, RowLayout, RowReadGuard, RowWriteGuard}, + internal::{ + HashCoord, KeyNamespace, MAX_DATA_FILE_IDX, MAX_DATA_FILES, MIN_SPLIT_LEVEL, ROW_WIDTH, + aligned_data_entry_size, aligned_data_entry_waste, aligned_tombstone_entry_waste, sync_dir, + }, + types::{Config, Error, GetOrCreateStatus, ReplaceStatus, Result, Stats}, }; -use crate::{CandyError, Config, Result, MAX_TOTAL_KEY_SIZE, MAX_VALUE_SIZE}; - -pub(crate) const USER_NAMESPACE: &[u8] = &[1]; -pub(crate) const TYPED_NAMESPACE: &[u8] = &[2]; -pub(crate) const LIST_NAMESPACE: &[u8] = &[3]; -pub(crate) const ITEM_NAMESPACE: &[u8] = &[4]; -pub(crate) const CHAIN_NAMESPACE: u8 = 5; -pub(crate) const QUEUE_NAMESPACE: &[u8] = &[6]; -pub(crate) const QUEUE_ITEM_NAMESPACE: &[u8] = &[7]; - -#[derive(Debug, Clone)] -pub(crate) struct InternalConfig { - pub dir_path: PathBuf, - pub max_shard_size: u32, - pub min_compaction_threashold: u32, - pub hash_seed: HashSeed, - pub expected_number_of_keys: usize, - pub max_concurrent_list_ops: u32, - pub truncate_up: bool, - pub clear_on_unsupported_version: bool, - pub mlock_headers: bool, - pub num_compaction_threads: usize, - #[cfg(feature = "flush_aggregation")] - pub flush_aggregation_delay: Option, +#[derive(Default)] +struct CompactionState { + wake_requested: bool, } -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum ReplaceStatus { - PrevValue(Vec), - WrongValue(Vec), - DoesNotExist, +struct StoreInner { + base_path: PathBuf, + config: Arc, + index_file: IndexFile, + logical_locks: Vec>, + logical_locks_mask: usize, + data_files: RwLock>>, + active_file_idx: AtomicU16, + active_file_ordinal: AtomicU64, + rotation_lock: Mutex<()>, + compaction_state: Mutex, + compaction_condvar: Condvar, + shutting_down: AtomicBool, + num_compactions: AtomicU64, + compaction_time_ms: AtomicU64, + compaction_errors: AtomicU64, + num_positive_lookups: AtomicU64, + num_negative_lookups: AtomicU64, + num_read_ops: AtomicU64, + num_read_bytes: AtomicU64, + num_write_ops: AtomicU64, + num_write_bytes: AtomicU64, } -impl ReplaceStatus { - pub fn was_replaced(&self) -> bool { - matches!(*self, Self::PrevValue(_)) - } - pub fn failed(&self) -> bool { - !matches!(*self, Self::PrevValue(_)) - } - pub fn is_key_missing(&self) -> bool { - matches!(*self, Self::DoesNotExist) - } - pub fn is_wrong_value(&self) -> bool { - matches!(*self, Self::WrongValue(_)) - } + +/// A persistent key-value store backed by append-only data files and a mutable index. +pub struct CandyStore { + inner: Arc, + _lockfile: fslock::LockFile, + compaction_thd: Option>, + allow_clean_shutdown: bool, + was_clean_shutdown: bool, +} + +pub use list::{KVPair, ListIterator}; +pub use typed::{CandyTypedDeque, CandyTypedKey, CandyTypedList, CandyTypedStore}; + +pub(super) struct OpenState { + index_file: IndexFile, + data_files: HashMap>, + active_file_idx: u16, + active_file_ordinal: u64, + was_clean_shutdown: bool, } -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum SetStatus { - PrevValue(Vec), - CreatedNew, +pub(super) enum DirtyOpenAction { + None, + RebuildIndex, + TrustIndex, + ResetDb, } -impl SetStatus { - pub fn was_created(&self) -> bool { - matches!(*self, Self::CreatedNew) + +impl StoreInner { + fn new( + base_path: PathBuf, + config: Arc, + state: OpenState, + num_logical_locks: usize, + ) -> Self { + Self { + base_path, + config, + index_file: state.index_file, + logical_locks: (0..num_logical_locks).map(|_| RwLock::new(())).collect(), + logical_locks_mask: num_logical_locks - 1, + data_files: RwLock::new(state.data_files), + active_file_idx: AtomicU16::new(state.active_file_idx), + active_file_ordinal: AtomicU64::new(state.active_file_ordinal), + rotation_lock: Mutex::new(()), + compaction_state: Mutex::new(CompactionState::default()), + compaction_condvar: Condvar::new(), + shutting_down: AtomicBool::new(false), + num_compactions: AtomicU64::new(0), + compaction_time_ms: AtomicU64::new(0), + compaction_errors: AtomicU64::new(0), + num_positive_lookups: AtomicU64::new(0), + num_negative_lookups: AtomicU64::new(0), + num_read_ops: AtomicU64::new(0), + num_read_bytes: AtomicU64::new(0), + num_write_ops: AtomicU64::new(0), + num_write_bytes: AtomicU64::new(0), + } } - pub fn was_replaced(&self) -> bool { - matches!(*self, Self::PrevValue(_)) + + fn record_lookup(&self, found: bool) { + if found { + self.num_positive_lookups.fetch_add(1, Ordering::Relaxed); + } else { + self.num_negative_lookups.fetch_add(1, Ordering::Relaxed); + } } -} -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum GetOrCreateStatus { - ExistingValue(Vec), - CreatedNew(Vec), -} -impl GetOrCreateStatus { - pub fn was_created(&self) -> bool { - matches!(*self, Self::CreatedNew(_)) + fn record_read(&self, bytes: u64) { + self.num_read_ops.fetch_add(1, Ordering::Relaxed); + self.num_read_bytes.fetch_add(bytes, Ordering::Relaxed); } - pub fn already_exists(&self) -> bool { - matches!(*self, Self::ExistingValue(_)) + + fn record_write(&self, bytes: u64) { + self.num_write_ops.fetch_add(1, Ordering::Relaxed); + self.num_write_bytes.fetch_add(bytes, Ordering::Relaxed); } - pub fn value(self) -> Vec { - match self { - Self::CreatedNew(val) => val, - Self::ExistingValue(val) => val, + + fn signal_compaction_scan(&self) { + let mut state = self.compaction_state.lock(); + if state.wake_requested { + return; } + state.wake_requested = true; + self.compaction_condvar.notify_one(); } -} -/// The CandyStore object. Note that it's fully sync'ed, so can be shared between threads using `Arc` -pub struct CandyStore { - pub(crate) root: ShardRouter, - pub(crate) config: Arc, - // locks for complicated operations - pub(crate) keyed_locks_mask: u32, - pub(crate) keyed_locks: Vec>, - _lockfile: LockFile, - stats: Arc, - //threadpool: Arc, -} -/// An iterator over a CandyStore. Note that it's safe to modify (insert/delete) keys while iterating, -/// but the results of the iteration may or may not include these changes. This is considered a -/// well-defined behavior of the store. -pub struct CandyStoreIterator<'a> { - store: &'a CandyStore, - shard_selector: u32, - row_idx: usize, - entry_idx: usize, - raw: bool, - include_val: bool, -} + fn maybe_signal_compaction_threshold_crossing( + &self, + file_idx: u16, + previous_waste: u32, + new_waste: u32, + ) { + if file_idx == self.active_file_idx.load(Ordering::Acquire) { + return; + } -impl<'a> CandyStoreIterator<'a> { - fn new(store: &'a CandyStore, raw: bool, include_val: bool) -> Self { - Self { - store, - shard_selector: 0, - row_idx: 0, - entry_idx: 0, - raw, - include_val, + let threshold = self.config.compaction_min_threshold; + if previous_waste <= threshold && new_waste > threshold { + self.signal_compaction_scan(); } } - /// Returns the cookie of the next item in the store. This can be used later to construct an iterator - /// that starts at the given point. - pub fn cookie(&self) -> u64 { - ((self.shard_selector as u64 & 0xffff) << 32) - | ((self.row_idx as u64 & 0xffff) << 16) - | (self.entry_idx as u64 & 0xffff) + fn next_compaction_candidate(&self) -> Option<(u16, u64)> { + let active_file_idx = self.active_file_idx.load(Ordering::Acquire); + let files = self.data_files.read(); + files + .iter() + .filter_map(|(&file_idx, data_file)| { + if file_idx == active_file_idx + || self.index_file.file_waste(file_idx) <= self.config.compaction_min_threshold + { + return None; + } + Some((file_idx, data_file.file_ordinal)) + }) + .min_by_key(|(_, file_ordinal)| *file_ordinal) } - // Constructs an iterator starting at the given cookie - pub fn from_cookie(store: &'a CandyStore, cookie: u64, raw: bool, include_val: bool) -> Self { - Self { - store, - shard_selector: ((cookie >> 32) & 0xffff) as u32, - row_idx: ((cookie >> 16) & 0xffff) as usize, - entry_idx: (cookie & 0xffff) as usize, - raw, - include_val, - } + fn logical_lock_index(&self, ns: KeyNamespace, key: &[u8]) -> usize { + let mut hasher = SipHasher13::new_with_keys(0x1701_0a66_2024_6b90, 0x284f_fa2e_3e02_3e2a); + hasher.write_u8(ns as u8); + hasher.write(key); + (hasher.finish() as usize) & self.logical_locks_mask } -} -impl<'a> Iterator for CandyStoreIterator<'a> { - type Item = Result; + fn data_file(&self, file_idx: u16) -> Result> { + self.data_files + .read() + .get(&file_idx) + .cloned() + .ok_or(Error::MissingDataFile(file_idx)) + } - fn next(&mut self) -> Option { - while self.shard_selector < ShardRouter::END_OF_SHARDS { - let res = self.store.root.shared_op(self.shard_selector, |sh| { - while self.row_idx < NUM_ROWS { - let row_idx = self.row_idx; - let entry_idx = self.entry_idx; + fn bump_histogram(&self, entry_size: u64) { + // Buckets: [<64, <256, <1K, <4K, <16K, >=16K] + // Boundaries at ilog2 = 6, 8, 10, 12, 14 → bucket = ((ilog2 - 4) / 2).clamp(0, 5) + let bucket = ((entry_size.max(1).ilog2() as usize).saturating_sub(4) / 2).min(5); + self.index_file.header_ref().size_histogram[bucket].fetch_add(1, Ordering::Relaxed); + } - self.entry_idx += 1; - if self.entry_idx >= ROW_WIDTH { - self.entry_idx = 0; - self.row_idx += 1; - } + fn _split_row(&self, hc: HashCoord, sl: u64, gsl: u64) -> Result<()> { + let nsl = sl + 1; + let low_row_idx = hc.row_index(sl); + let high_row_idx = low_row_idx | (1 << sl); - let Some((mut k, v)) = sh.read_at(row_idx, entry_idx, self.include_val)? else { - continue; - }; - if self.raw { - return Ok((sh.span.start, Some((k, v)))); - } else if k.ends_with(USER_NAMESPACE) { - k.truncate(k.len() - USER_NAMESPACE.len()); - return Ok((sh.span.start, Some((k, v)))); - } - } + if nsl > gsl { + self.index_file.grow(nsl)?; + } - self.entry_idx = 0; - self.row_idx = 0; - Ok((sh.span.end, None)) - }); + let rows_table = self.index_file.rows_table(); - match res { - Ok((shard_selector, kv)) => { - self.shard_selector = shard_selector; - if let Some(kv) = kv { - return Some(Ok(kv)); - } - // continue - } - Err(e) => return Some(Err(e)), - } - } + let low_shard = rows_table.shard_id(low_row_idx); + let high_shard = rows_table.shard_id(high_row_idx); - None - } -} + let _high_guard = if low_shard < high_shard { + None // low_row will automatically lock low_shard + } else if low_shard > high_shard { + Some(rows_table.lock_shard(high_shard)) + } else { + None + }; -impl CandyStore { - /// Opens or creates a new CandyStore. - /// * dir_path - the directory where shards will be kept - /// * config - the configuration options for the store - pub fn open(dir_path: impl AsRef, config: Config) -> Result { - let config = Arc::new(InternalConfig { - dir_path: dir_path.as_ref().to_path_buf(), - expected_number_of_keys: config.expected_number_of_keys, - hash_seed: config.hash_seed, - max_concurrent_list_ops: config.max_concurrent_list_ops, - max_shard_size: config.max_shard_size, - min_compaction_threashold: config.min_compaction_threashold, - truncate_up: config.truncate_up, - clear_on_unsupported_version: config.clear_on_unsupported_version, - mlock_headers: config.mlock_headers, - num_compaction_threads: config.num_compaction_threads, - #[cfg(feature = "flush_aggregation")] - flush_aggregation_delay: config.flush_aggregation_delay, - }); - - std::fs::create_dir_all(dir_path)?; - let lockfilename = config.dir_path.join(".lock"); - let mut lockfile = LockFile::open(&lockfilename)?; - if !lockfile.try_lock_with_pid()? { - let (pid, comm, stat) = if let Ok(mut pid) = std::fs::read_to_string(&lockfilename) { - // this may fail on non-linux OSs, but we default to "?" anyway - pid = pid.trim().to_owned(); - let exe: String = std::fs::read_link(format!("/proc/{pid}/exe")) - .unwrap_or("?".into()) - .to_string_lossy() - .to_string() - .to_owned(); - - let stat: String = std::fs::read_link(format!("/proc/{pid}/stat")) - .unwrap_or("?".into()) - .to_string_lossy() - .to_string() - .to_owned(); - - (pid, exe, stat) - } else { - ("?".into(), "?".into(), "?".into()) - }; + let mut low_row = rows_table.row_mut(low_row_idx); - bail!( - "Lock file {lockfilename:?} is held by pid {:?} exe={:?} stat {:?}", - pid, - comm, - stat - ); - } + let _high_guard_post = if low_shard < high_shard { + Some(rows_table.lock_shard(high_shard)) + } else { + None + }; - let mut num_keyed_locks = config.max_concurrent_list_ops.max(4); - if !num_keyed_locks.is_power_of_two() { - num_keyed_locks = 1 << (num_keyed_locks.ilog2() + 1); + if low_row.split_level.load(Ordering::Acquire) != sl { + return Ok(()); } - - let mut keyed_locks = vec![]; - for _ in 0..num_keyed_locks { - keyed_locks.push(Mutex::new(())); + // SAFETY: the high row (being created) has a split_level of 0, making it unusable by anyone. + // We properly hold the high_row shard lock if it differs from the low_row shard. + let high_row = unsafe { &mut *rows_table.unlocked_row_ptr(high_row_idx) }; + debug_assert_eq!(high_row.split_level.load(Ordering::Acquire), 0); + let split_bit = 1 << (sl - MIN_SPLIT_LEVEL as u64); + for col in 0..ROW_WIDTH { + let entry = low_row.pointers[col]; + if low_row.signatures[col] != HashCoord::INVALID_SIG + && entry.is_valid() + && (entry.masked_row_selector() as u64) & split_bit != 0 + { + high_row.insert(col, low_row.signatures[col], entry); + low_row.remove(col); + } } - let stats = Arc::new(InternalStats::default()); - let threadpool = Arc::new(CompactionThreadPool::new(config.num_compaction_threads)); - let root = ShardRouter::new(config.clone(), stats.clone(), threadpool.clone())?; + low_row.set_split_level(nsl); + high_row.set_split_level(nsl); - Ok(Self { - config, - root, - keyed_locks_mask: num_keyed_locks - 1, - keyed_locks, - _lockfile: lockfile, - stats, - //threadpool, - }) + Ok(()) } - /// returns the directory where shards are kept - pub fn get_shards_directory(&self) -> &Path { - &self.config.dir_path - } + /// Rotate to a new data file when the active one is full. + /// + /// The `rotation_lock` serializes concurrent rotations, so the read-then-write + /// on `data_files` (find a free index, then insert) is not a TOCTOU race. + /// `compact_file` also writes to `data_files` (removing files) but only + /// touches non-active indices, so there is no conflict. + fn _rotate_data_file(&self, active_idx: u16) -> Result<()> { + let _rot_lock = self.rotation_lock.lock(); + + if self.active_file_idx.load(Ordering::Acquire) != active_idx { + return Ok(()); + } - /// Syncs all in-memory changes of all shards to disk. Concurrent changes are allowed while - /// flushing, and may result in partially-sync'ed store. Use sparingly, as this is a costly operaton. - pub fn flush(&self) -> Result<()> { - self.root.call_on_all_shards(|sh| sh.flush())?; - Ok(()) - } + let active_ordinal = if let Ok(active_file) = self.data_file(active_idx) { + let _ = active_file.file.sync_all(); + active_file.file_ordinal + } else { + 0 + }; + + let mut next_idx = (self.active_file_idx.load(Ordering::Relaxed) + 1) & MAX_DATA_FILE_IDX; + let mut attempts = 0; + { + let files = self.data_files.read(); + while files.contains_key(&next_idx) { + next_idx = (next_idx + 1) & MAX_DATA_FILE_IDX; + attempts += 1; + if attempts > MAX_DATA_FILES { + return Err(Error::TooManyDataFiles); + } + } + } - /// Clears the store (erasing all keys), and removing all shard files - pub fn clear(&self) -> Result<()> { - self.root.clear()?; - self.stats.clear(); + let ordinal = self.active_file_ordinal.fetch_add(1, Ordering::Relaxed) + 1; + let data_file = Arc::new(DataFile::create( + self.base_path.as_path(), + self.config.clone(), + next_idx, + ordinal, + )?); + + self.data_files.write().insert(next_idx, data_file); + self.active_file_idx.store(next_idx, Ordering::Release); + + if active_ordinal != 0 + && self.index_file.file_waste(active_idx) > self.config.compaction_min_threshold + { + self.signal_compaction_scan(); + } Ok(()) } - pub(crate) fn ensure_sizes(key: &[u8], val: &[u8]) -> Result<()> { - ensure!(key.len() <= MAX_KEY_SIZE, CandyError::KeyTooLong(key.len())); - ensure!( - val.len() <= MAX_VALUE_SIZE, - CandyError::ValueTooLong(val.len()) - ); + fn _mut_op( + &self, + ns: KeyNamespace, + key: &[u8], + val: &[u8], + mut op: impl FnMut(HashCoord, RowWriteGuard, &[u8], &[u8]) -> Result, + ) -> Result { + let entry_size = aligned_data_entry_size(key.len(), val.len()) as usize; + if key.len() > crate::types::MAX_USER_KEY_SIZE + || val.len() > crate::types::MAX_USER_VALUE_SIZE + || entry_size > self.config.max_data_file_size as usize + { + return Err(Error::PayloadTooLarge(entry_size)); + } - Ok(()) - } + let hc = HashCoord::new(ns, key, self.config.hash_key); + + loop { + let res = { + let row_table = self.index_file.rows_table(); + let gsl = self + .index_file + .header_ref() + .global_split_level + .load(Ordering::Acquire); + let mut sl = gsl; + let mut res = None; + + loop { + debug_assert!(sl >= MIN_SPLIT_LEVEL as u64, "sl={sl}"); + let row = row_table.row_mut(hc.row_index(sl)); + let row_sl = row.split_level.load(Ordering::Acquire); + if row_sl == 0 { + sl -= 1; + continue; + } + if row_sl > sl { + break; + } - pub(crate) fn make_user_key(&self, mut key: Vec) -> Vec { - key.extend_from_slice(USER_NAMESPACE); - key - } + res = Some(op(hc, row, key, val)); + break; + } - pub(crate) fn get_by_hash(&self, ph: PartedHash) -> Result> { - debug_assert!(ph.is_valid()); - self.root - .shared_op(ph.shard_selector(), |sh| sh.get_by_hash(ph)) - } + res + }; - pub(crate) fn get_raw(&self, full_key: &[u8]) -> Result>> { - let ph = PartedHash::new(&self.config.hash_seed, full_key); - self.root - .shared_op(ph.shard_selector(), |sh| sh.get(ph, &full_key)) - } + let Some(res) = res else { + continue; + }; - /// Gets the value of a key from the store. If the key does not exist, `None` will be returned. - /// The data is fully-owned, no references are returned. - pub fn get + ?Sized>(&self, key: &B) -> Result>> { - self.owned_get(key.as_ref().to_owned()) + match res { + Ok(res) => return Ok(res), + Err(Error::SplitRow(sl)) => { + let gsl = self + .index_file + .header_ref() + .global_split_level + .load(Ordering::Acquire); + self._split_row(hc, sl, gsl)?; + } + Err(Error::RotateDataFile(active_idx)) => { + self._rotate_data_file(active_idx)?; + } + Err(err) => return Err(err), + } + } } +} - /// Same as [Self::get] but takes an owned key - pub fn owned_get(&self, key: Vec) -> Result>> { - self.get_raw(&self.make_user_key(key)) +impl CandyStore { + fn logical_read_guard(&self, ns: KeyNamespace, key: &[u8]) -> RwLockReadGuard<'_, ()> { + self.inner.logical_locks[self.inner.logical_lock_index(ns, key)].read() } - /// Checks whether the given key exists in the store - pub fn contains + ?Sized>(&self, key: &B) -> Result { - self.owned_contains(key.as_ref().to_owned()) + fn logical_write_guard(&self, ns: KeyNamespace, key: &[u8]) -> RwLockWriteGuard<'_, ()> { + self.inner.logical_locks[self.inner.logical_lock_index(ns, key)].write() } - /// Same as [Self::contains] but takes an owned key - pub fn owned_contains(&self, key: Vec) -> Result { - Ok(self.get_raw(&self.make_user_key(key))?.is_some()) + fn _immut_op( + &self, + ns: KeyNamespace, + key: &[u8], + mut op: impl FnMut(HashCoord, RowReadGuard, &[u8]) -> Result, + ) -> Result { + let hc = HashCoord::new(ns, key, self.inner.config.hash_key); + loop { + let row_table = self.inner.index_file.rows_table(); + let gsl = self + .inner + .index_file + .header_ref() + .global_split_level + .load(Ordering::Acquire); + let mut sl = gsl; + loop { + debug_assert!(sl >= MIN_SPLIT_LEVEL as u64, "sl={sl}"); + let row = row_table.row(hc.row_index(sl)); + let row_sl = row.split_level.load(Ordering::Acquire); + if row_sl == 0 { + sl -= 1; + continue; + } + if row_sl > sl { + break; + } + return op(hc, row, key); + } + } } - pub(crate) fn remove_raw(&self, full_key: &[u8]) -> Result>> { - let ph = PartedHash::new(&self.config.hash_seed, full_key); - self.root - .shared_op(ph.shard_selector(), |sh| sh.remove(ph, &full_key)) + fn get_ns(&self, ns: KeyNamespace, key: &[u8]) -> Result>> { + self._immut_op(ns, key, |hc, row, key| { + let files = self.inner.data_files.read(); + for (_, entry) in row.iter_matches(hc) { + let Some(file) = files.get(&entry.file_idx()) else { + continue; + }; + self.inner.record_read(entry.size_hint() as u64); + let kv = match file.read_kv(entry.file_offset(), entry.size_hint()) { + Ok(kv) => kv, + Err(Error::IOError(e)) + if e.kind() == std::io::ErrorKind::UnexpectedEof + || e.kind() == std::io::ErrorKind::InvalidData => + { + continue; + } + Err(e) => return Err(e), + }; + if kv.key() == key { + return Ok(Some(kv.value().to_vec())); + } + } + Ok(None) + }) } - /// Removes a key-value pair from the store, returning `None` if the key did not exist, - /// or `Some(old_value)` if it did - pub fn remove + ?Sized>(&self, key: &B) -> Result>> { - self.owned_remove(key.as_ref().to_owned()) + /// Returns the current value for `key`, if it exists. + pub fn get(&self, key: impl AsRef<[u8]>) -> Result>> { + let value = self.get_ns(KeyNamespace::User, key.as_ref())?; + self.inner.record_lookup(value.is_some()); + Ok(value) } - /// Same as [Self::remove] but takes an owned key - pub fn owned_remove(&self, key: Vec) -> Result>> { - self.remove_raw(&self.make_user_key(key)) + /// Returns `true` if `key` currently exists. + pub fn contains(&self, key: impl AsRef<[u8]>) -> Result { + self.get(key).map(|value| value.is_some()) } - pub(crate) fn insert_internal( + fn get_or_create_ns( &self, - full_key: &[u8], - val: &[u8], - mode: InsertMode, - ) -> Result { - let ph = PartedHash::new(&self.config.hash_seed, full_key); + ns: KeyNamespace, + key: &[u8], + default_val: &[u8], + ) -> Result { + self.inner + ._mut_op(ns, key, default_val, |hc, mut row, key, val| { + let files = self.inner.data_files.read(); + for (_, entry) in row.iter_matches(hc) { + let Some(file) = files.get(&entry.file_idx()) else { + continue; + }; + self.inner.record_read(entry.size_hint() as u64); + let kv = file.read_kv(entry.file_offset(), entry.size_hint())?; + if kv.key() == key { + return Ok(GetOrCreateStatus::ExistingValue(kv.into_value())); + } + } - ensure!( - full_key.len() <= MAX_TOTAL_KEY_SIZE, - CandyError::KeyTooLong(full_key.len()) - ); - ensure!( - val.len() <= MAX_TOTAL_VALUE_SIZE, - CandyError::ValueTooLong(val.len()) - ); + if let Some(col) = row.find_free_slot() { + let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); + let active_file = files + .get(&active_idx) + .ok_or(Error::MissingDataFile(active_idx))?; + let (file_off, size) = active_file.append_kv(ns, key, val)?; + self.inner.record_write(size as u64); + row.insert( + col, + hc.sig, + EntryPointer::new(active_idx, file_off, size, hc.masked_row_selector()), + ); + self.record_write_stats(key.len(), val.len()); + Ok(GetOrCreateStatus::CreatedNew(val.to_vec())) + } else { + Err(Error::SplitRow(row.split_level.load(Ordering::Relaxed))) + } + }) + } - if full_key.len() + val.len() > self.config.max_shard_size as usize { - return Err(anyhow!(CandyError::EntryCannotFitInShard( - full_key.len() + val.len(), - self.config.max_shard_size as usize - ))); - } + /// Returns the existing value for `key`, or inserts `default_val` and returns it. + pub fn get_or_create + ?Sized, B2: AsRef<[u8]> + ?Sized>( + &self, + key: &B1, + default_val: &B2, + ) -> Result { + self.get_or_create_ns(KeyNamespace::User, key.as_ref(), default_val.as_ref()) + } - self.root.insert(ph, full_key, val, mode) + fn track_update_waste(&self, file_idx: u16, _file_ordinal: u64, klen: usize, vlen: usize) { + let added_waste = aligned_data_entry_waste(klen, vlen); + let new_waste = self.inner.index_file.add_file_waste(file_idx, added_waste); + self.inner.maybe_signal_compaction_threshold_crossing( + file_idx, + new_waste.saturating_sub(added_waste), + new_waste, + ); } - pub(crate) fn set_raw(&self, full_key: &[u8], val: &[u8]) -> Result { - match self.insert_internal(full_key, val, InsertMode::Set)? { - InsertStatus::Added => Ok(SetStatus::CreatedNew), - InsertStatus::Replaced(v) => Ok(SetStatus::PrevValue(v)), - InsertStatus::AlreadyExists(v) => Ok(SetStatus::PrevValue(v)), - InsertStatus::KeyDoesNotExist => unreachable!(), - InsertStatus::SplitNeeded => unreachable!(), - } + fn record_write_stats(&self, klen: usize, vlen: usize) { + let entry_size = aligned_data_entry_size(klen, vlen); + let h = self.inner.index_file.header_ref(); + h.written_bytes.fetch_add(entry_size, Ordering::Relaxed); + h.num_created.fetch_add(1, Ordering::Relaxed); + self.inner.bump_histogram(entry_size); } - /// Inserts a key-value pair, creating it or replacing an existing pair. Note that if the program crashed - /// while or "right after" this operation, or if the operating system is unable to flush the page cache, - /// you may lose some data. However, you will still be in a consistent state, where you will get a previous - /// version of the state. - /// - /// While this method is O(1) amortized, every so often it will trigger either a shard compaction or a - /// shard split, which requires rewriting the whole shard. However, unlike LSM trees, this operation is - /// constant in size - pub fn set + ?Sized, B2: AsRef<[u8]> + ?Sized>( + fn record_replace_stats( &self, - key: &B1, - val: &B2, - ) -> Result { - self.owned_set(key.as_ref().to_owned(), val.as_ref()) + old_klen: usize, + old_vlen: usize, + new_klen: usize, + new_vlen: usize, + ) { + let old_entry_size = aligned_data_entry_size(old_klen, old_vlen); + let new_entry_size = aligned_data_entry_size(new_klen, new_vlen); + let h = self.inner.index_file.header_ref(); + h.written_bytes.fetch_add(new_entry_size, Ordering::Relaxed); + h.waste_bytes.fetch_add(old_entry_size, Ordering::Relaxed); + h.num_replaced.fetch_add(1, Ordering::Relaxed); + } + + fn record_remove_stats(&self, klen: usize, vlen: usize) { + let entry_size = aligned_data_entry_size(klen, vlen); + let h = self.inner.index_file.header_ref(); + h.waste_bytes.fetch_add(entry_size, Ordering::Relaxed); + h.num_removed.fetch_add(1, Ordering::Relaxed); + } + + fn set_ns(&self, ns: KeyNamespace, key: &[u8], val: &[u8]) -> Result>> { + self.inner._mut_op(ns, key, val, |hc, mut row, key, val| { + let files = self.inner.data_files.read(); + for (col, entry) in row.iter_matches(hc) { + let Some(file) = files.get(&entry.file_idx()) else { + continue; + }; + self.inner.record_read(entry.size_hint() as u64); + let kv = file.read_kv(entry.file_offset(), entry.size_hint())?; + if kv.key() == key { + let klen = kv.key().len(); + let vlen = kv.value().len(); + let old_val = kv.into_value(); + let src_file_idx = file.file_idx; + let src_file_ordinal = file.file_ordinal; + + let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); + let active_file = files + .get(&active_idx) + .ok_or(Error::MissingDataFile(active_idx))?; + let (file_off, size) = active_file.append_kv(ns, key, val)?; + self.inner.record_write(size as u64); + + row.replace_pointer( + col, + EntryPointer::new(active_idx, file_off, size, hc.masked_row_selector()), + ); + self.track_update_waste(src_file_idx, src_file_ordinal, klen, vlen); + self.record_replace_stats(klen, vlen, key.len(), val.len()); + return Ok(Some(old_val)); + } + } + + if let Some(col) = row.find_free_slot() { + let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); + let active_file = files + .get(&active_idx) + .ok_or(Error::MissingDataFile(active_idx))?; + let (file_off, size) = active_file.append_kv(ns, key, val)?; + self.inner.record_write(size as u64); + row.insert( + col, + hc.sig, + EntryPointer::new(active_idx, file_off, size, hc.masked_row_selector()), + ); + self.record_write_stats(key.len(), val.len()); + Ok(None) + } else { + Err(Error::SplitRow(row.split_level.load(Ordering::Relaxed))) + } + }) } - /// Same as [Self::set], but the key passed owned to this function - pub fn owned_set(&self, key: Vec, val: &[u8]) -> Result { - Self::ensure_sizes(&key, &val)?; - self.set_raw(&self.make_user_key(key), val) + /// Inserts or replaces `key` with `val`. + pub fn set(&self, key: impl AsRef<[u8]>, val: impl AsRef<[u8]>) -> Result { + Ok( + match self.set_ns(KeyNamespace::User, key.as_ref(), val.as_ref())? { + Some(previous) => crate::SetStatus::PrevValue(previous), + None => crate::SetStatus::CreatedNew, + }, + ) } - pub(crate) fn replace_raw( + fn replace_ns( &self, - full_key: &[u8], + ns: KeyNamespace, + key: &[u8], val: &[u8], expected_val: Option<&[u8]>, ) -> Result { - match self.insert_internal(full_key, val, InsertMode::Replace(expected_val))? { - InsertStatus::Added => unreachable!(), - InsertStatus::Replaced(v) => Ok(ReplaceStatus::PrevValue(v)), - InsertStatus::AlreadyExists(v) => Ok(ReplaceStatus::WrongValue(v)), - InsertStatus::KeyDoesNotExist => Ok(ReplaceStatus::DoesNotExist), - InsertStatus::SplitNeeded => unreachable!(), - } + self.inner._mut_op(ns, key, val, |hc, mut row, key, val| { + let files = self.inner.data_files.read(); + for (col, entry) in row.iter_matches(hc) { + let Some(file) = files.get(&entry.file_idx()) else { + continue; + }; + self.inner.record_read(entry.size_hint() as u64); + let kv = file.read_kv(entry.file_offset(), entry.size_hint())?; + if kv.key() == key { + if let Some(expected) = expected_val + && kv.value() != expected + { + return Ok(ReplaceStatus::WrongValue(kv.into_value())); + } + + let klen = kv.key().len(); + let vlen = kv.value().len(); + let old_val = kv.into_value(); + let src_file_idx = file.file_idx; + let src_file_ordinal = file.file_ordinal; + + let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); + let active_file = files + .get(&active_idx) + .ok_or(Error::MissingDataFile(active_idx))?; + let (file_off, size) = active_file.append_kv(ns, key, val)?; + self.inner.record_write(size as u64); + row.replace_pointer( + col, + EntryPointer::new(active_idx, file_off, size, hc.masked_row_selector()), + ); + self.track_update_waste(src_file_idx, src_file_ordinal, klen, vlen); + self.record_replace_stats(klen, vlen, key.len(), val.len()); + return Ok(ReplaceStatus::PrevValue(old_val)); + } + } + Ok(ReplaceStatus::DoesNotExist) + }) } - /// Replaces the value of an existing key with a new value. If the key existed, returns - /// `PrevValue(value)` with its old value, and if it did not, returns `DoesNotExist` but - /// does not create the key. - /// - /// See [Self::set] for more details - pub fn replace + ?Sized, B2: AsRef<[u8]> + ?Sized>( + /// Replaces `key` with `val` only if the current value matches `expected_val` when provided. + pub fn replace + ?Sized, B2: AsRef<[u8]> + ?Sized, B3: AsRef<[u8]> + ?Sized>( &self, key: &B1, val: &B2, - expected_val: Option<&B2>, + expected_val: Option<&B3>, ) -> Result { - self.owned_replace( - key.as_ref().to_owned(), + self.replace_ns( + KeyNamespace::User, + key.as_ref(), val.as_ref(), - expected_val.map(|ev| ev.as_ref()), + expected_val.map(|expected| expected.as_ref()), ) } - /// Same as [Self::replace], but the key passed owned to this function - pub fn owned_replace( - &self, - key: Vec, - val: &[u8], - expected_val: Option<&[u8]>, - ) -> Result { - Self::ensure_sizes(&key, &val)?; - self.replace_raw(&self.make_user_key(key), val, expected_val) - } - - pub(crate) fn get_or_create_raw( - &self, - full_key: &[u8], - default_val: Vec, - ) -> Result { - match self.insert_internal(full_key, &default_val, InsertMode::GetOrCreate)? { - InsertStatus::Added => Ok(GetOrCreateStatus::CreatedNew(default_val)), - InsertStatus::AlreadyExists(v) => Ok(GetOrCreateStatus::ExistingValue(v)), - InsertStatus::Replaced(_) => unreachable!(), - InsertStatus::KeyDoesNotExist => unreachable!(), - InsertStatus::SplitNeeded => unreachable!(), + fn track_tombstone_waste(&self, file_idx: u16, _file_ordinal: u64, klen: usize, vlen: usize) { + let active_idx = self.inner.active_file_idx.load(Ordering::Relaxed); + if file_idx == active_idx { + self.inner.index_file.add_file_waste( + file_idx, + aligned_data_entry_waste(klen, vlen) + aligned_tombstone_entry_waste(klen), + ); + } else { + let old_entry_waste = aligned_data_entry_waste(klen, vlen); + let new_waste = self + .inner + .index_file + .add_file_waste(file_idx, old_entry_waste); + self.inner.maybe_signal_compaction_threshold_crossing( + file_idx, + new_waste.saturating_sub(old_entry_waste), + new_waste, + ); + self.inner + .index_file + .add_file_waste(active_idx, aligned_tombstone_entry_waste(klen)); } } - /// Gets the value of the given key or creates it with the given default value. If the key did not exist, - /// returns `CreatedNew(default_val)`, and if it did, returns `ExistingValue(value)`. - /// This is done atomically, so it can be used to create a key only if it did not exist before, - /// like `open` with `O_EXCL`. - /// - /// See [Self::set] for more details - pub fn get_or_create + ?Sized, B2: AsRef<[u8]> + ?Sized>( - &self, - key: &B1, - default_val: &B2, - ) -> Result { - self.owned_get_or_create(key.as_ref().to_owned(), default_val.as_ref().to_owned()) + fn remove_ns(&self, ns: KeyNamespace, key: &[u8]) -> Result>> { + self.inner._mut_op(ns, key, &[], |hc, mut row, key, _| { + let files = self.inner.data_files.read(); + for (col, entry) in row.iter_matches(hc) { + let Some(file) = files.get(&entry.file_idx()) else { + continue; + }; + self.inner.record_read(entry.size_hint() as u64); + let kv = file.read_kv(entry.file_offset(), entry.size_hint())?; + + if kv.key() == key { + let klen = kv.key().len(); + let vlen = kv.value().len(); + let old_val = kv.into_value(); + let src_file_idx = file.file_idx; + let src_file_ordinal = file.file_ordinal; + + let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); + let active_file = files + .get(&active_idx) + .ok_or(Error::MissingDataFile(active_idx))?; + let tombstone_size = active_file.append_tombstone(ns, key)?; + self.inner.record_write(tombstone_size as u64); + + row.remove(col); + self.track_tombstone_waste(src_file_idx, src_file_ordinal, klen, vlen); + self.record_remove_stats(klen, vlen); + return Ok(Some(old_val)); + } + } + + Ok(None) + }) } - /// Same as [Self::get_or_create], but the `key` and `default_val` are passed owned to this function - pub fn owned_get_or_create( - &self, - key: Vec, - default_val: Vec, - ) -> Result { - Self::ensure_sizes(&key, &default_val)?; - self.get_or_create_raw(&self.make_user_key(key), default_val) + /// Removes `key` and returns its previous value if it existed. + pub fn remove(&self, key: impl AsRef<[u8]>) -> Result>> { + self.remove_ns(KeyNamespace::User, key.as_ref()) + } + + /// Iterates over all currently live user key/value pairs. + pub fn iter_items(&self) -> impl Iterator, Vec)>> + '_ { + let mut row_idx = 0usize; + let mut row_entries: Vec = Vec::with_capacity(ROW_WIDTH); + let mut batch_files = None::>>>; + let mut scratch_buf = Vec::new(); + let mut ptr_idx = 0usize; + + std::iter::from_fn(move || { + loop { + if ptr_idx < row_entries.len() { + let ptr = row_entries[ptr_idx]; + ptr_idx += 1; + let files = batch_files + .as_ref() + .expect("row entries should only be drained with a file map guard"); + let Some(file) = files.get(&ptr.file_idx()) else { + continue; + }; + self.inner.record_read(ptr.size_hint() as u64); + let kv = match file.read_kv_into( + ptr.file_offset(), + ptr.size_hint(), + &mut scratch_buf, + ) { + Ok(kv) => kv, + Err(Error::IOError(e)) + if e.kind() == std::io::ErrorKind::UnexpectedEof + || e.kind() == std::io::ErrorKind::InvalidData => + { + continue; + } + Err(e) => return Some(Err(e)), + }; + if kv.ns != KeyNamespace::User as u8 { + continue; + } + let key = kv.key().to_vec(); + let value = kv.value().to_vec(); + return Some(Ok((key, value))); + } + + row_entries.clear(); + batch_files = None; + ptr_idx = 0; + + loop { + let row_table = self.inner.index_file.rows_table(); + let gsl = self + .inner + .index_file + .header_ref() + .global_split_level + .load(Ordering::Acquire); + let active_rows = 1usize << gsl; + + if row_idx >= active_rows { + break; + } + + let idx = row_idx; + row_idx += 1; + + let row = row_table.row(idx); + if row.split_level.load(Ordering::Acquire) == 0 { + continue; + } + for col in 0..ROW_WIDTH { + if row.signatures[col] != HashCoord::INVALID_SIG + && row.pointers[col].is_valid() + { + row_entries.push(row.pointers[col]); + } + } + batch_files = Some(self.inner.data_files.read()); + break; + } + + if row_entries.is_empty() { + return None; + } + } + }) } - /// Returns an iterator over the whole store (skipping lists or typed items) - pub fn iter(&self) -> CandyStoreIterator<'_> { - CandyStoreIterator::new(self, false, true) + /// Flushes index and data files to stable storage. + pub fn flush(&self) -> Result<()> { + self.inner.index_file.sync_all()?; + let files = self.inner.data_files.read(); + for data_file in files.values() { + data_file.file.sync_all().map_err(Error::IOError)?; + } + sync_dir(&self.inner.base_path) } - /// Returns an iterator of keys only over the whole store (skipping lists or typed items) - pub fn iter_keys(&self) -> impl Iterator>> + use<'_> { - CandyStoreIterator::new(self, false, true).map(|res| match res { - Ok(kv) => Ok(kv.0), - Err(e) => Err(e), - }) + /// Returns whether the store was opened from a clean shutdown state. + pub fn was_clean_shutdown(&self) -> bool { + self.was_clean_shutdown } - pub fn iter_raw(&self) -> CandyStoreIterator<'_> { - CandyStoreIterator::new(self, true, true) + /// Returns the number of background compaction errors observed since open. + pub fn compaction_errors(&self) -> u64 { + self.inner.compaction_errors.load(Ordering::Relaxed) } - /// Returns an iterator starting from the specified cookie (obtained via [CandyStoreIterator::cookie]) - pub fn iter_from_cookie(&self, cookie: u64) -> CandyStoreIterator<'_> { - CandyStoreIterator::from_cookie(self, cookie, false, true) + /// Returns the number of currently live entries. + pub fn num_items(&self) -> usize { + self.stats().num_entries() as usize } - /// Returns an iterator of keys only starting from the specified cookie (obtained via [CandyStoreIterator::cookie]) - pub fn iter_keys_from_cookie( - &self, - cookie: u64, - ) -> impl Iterator>> + use<'_> { - CandyStoreIterator::from_cookie(self, cookie, false, true).map(|res| match res { - Ok(kv) => Ok(kv.0), - Err(e) => Err(e), - }) + /// Returns the current index capacity in entries. + pub fn capacity(&self) -> usize { + let row_table = self.inner.index_file.rows_table(); + let row_count = row_table.row_guard.len() / std::mem::size_of::(); + row_count * ROW_WIDTH } - /// Returns useful stats about the store - pub fn stats(&self) -> Stats { - let shard_stats = self.root.call_on_all_shards(|sh| sh.get_stats()).unwrap(); + /// Shrinks the index when the reclaimable row ratio is at least `min_wasted_ratio`. + pub fn shrink_to_fit(&self, min_wasted_ratio: f64) -> Result { + let _key_guards = self + .inner + .logical_locks + .iter() + .map(|lock| lock.write()) + .collect::>(); + + let min_wasted_ratio = min_wasted_ratio.clamp(0.0, 1.0); + let current_rows = self.inner.index_file.num_rows(); + if current_rows == 0 { + return Ok(0); + } - let mut stats = Stats::default(); - self.stats.fill_stats(&mut stats); + let required_rows = self.num_items().div_ceil(ROW_WIDTH * 8 / 10).max(1); + let min_rows_cfg = (self.inner.config.initial_capacity / ROW_WIDTH) + .max(1usize << MIN_SPLIT_LEVEL) + .max(1); + let min_rows = required_rows.max(min_rows_cfg); - for stats2 in shard_stats { - stats.num_shards += 1; - stats.occupied_bytes += stats2.write_offset; - stats.wasted_bytes += stats2.wasted_bytes; - stats.num_inserts += stats2.num_inserts; - stats.num_removals += stats2.num_removals; + let reclaimable_rows = current_rows.saturating_sub(min_rows); + let reclaimable_ratio = reclaimable_rows as f64 / current_rows as f64; + if reclaimable_ratio < min_wasted_ratio { + return Ok(current_rows); } - stats + + self.inner.index_file.shrink(min_rows_cfg) } - /// Merges small shards (shards with a used capacity of less than `max_fill_level`), `max_fill_level` should - /// be a number between 0 and 0.5, the reasonable choice is 0.25. - /// - /// Note 1: this is an expensive operation that takes a global lock on the store (no other operations can - /// take place while merging is in progress). Only use it if you expect the number of items to be at half or - /// less than what it was (i.e., after a peak period) - /// - /// Note 2: merging will stop once we reach the number of shards required for [Config::expected_number_of_keys], - /// if configured - /// - /// Returns true if any shards were merged, false otherwise - pub fn merge_small_shards(&self, max_fill_level: f32) -> Result { - self.root.merge_small_shards(max_fill_level) + /// Synchronous alias for [`CandyStore::shrink_to_fit`]. + pub fn shrink_index_blocking(&self, min_wasted_ratio: f64) -> Result { + self.shrink_to_fit(min_wasted_ratio) } - /// Sets a big item, whose value is unlimited in size. Behind the scenes the value is split into chunks - /// and stored as a list. This makes this API non-atomic, i.e., crashing while writing a big value may later - /// allow you to retrieve a partial result. It is up to the caller to add a length field or a checksum to make - /// sure the value is correct. - /// - /// Returns true if the value had existed before (thus it was replaced), false otherwise - pub fn set_big + ?Sized, B2: AsRef<[u8]> + ?Sized>( - &self, - key: &B1, - val: &B2, - ) -> Result { - let existed = self.discard_queue(key)?; - self.extend_queue(key, val.as_ref().chunks(MAX_VALUE_SIZE))?; - self.push_to_queue_tail(key, bytes_of(&val.as_ref().len()))?; - Ok(existed) - } - - /// Returns a big item, collecting all the underlying chunks into a single value that's returned to the - /// caller. - pub fn get_big(&self, key: &[u8]) -> Result>> { - let mut val = vec![]; - let range = self.queue_range(key)?; - for res in self.iter_queue(key) { - let (idx, chunk) = res?; - // last element should encode the byte length of the item - if it's missing or encodes a different length, - // consider it corrupt and ignore this element - if idx + 1 == range.end { - if chunk.len() == size_of::() && *from_bytes::(&chunk) == val.len() { - return Ok(Some(val)); - } - } else { - val.extend_from_slice(&chunk); - } + /// Returns a snapshot of store statistics and accounting counters. + pub fn stats(&self) -> Stats { + let h = self.inner.index_file.header_ref(); + let num_rows = self.inner.index_file.num_rows() as u64; + let capacity = num_rows.saturating_mul(ROW_WIDTH as u64); + let num_items = h + .num_created + .load(Ordering::Relaxed) + .saturating_sub(h.num_removed.load(Ordering::Relaxed)); + Stats { + num_rows, + capacity, + num_items, + index_size_bytes: self.inner.index_file.file_size_bytes(), + num_compactions: self.inner.num_compactions.load(Ordering::Relaxed), + compaction_time_ms: self.inner.compaction_time_ms.load(Ordering::Relaxed), + num_data_files: self.inner.data_files.read().len() as u64, + num_positive_lookups: self.inner.num_positive_lookups.load(Ordering::Relaxed), + num_negative_lookups: self.inner.num_negative_lookups.load(Ordering::Relaxed), + num_read_ops: self.inner.num_read_ops.load(Ordering::Relaxed), + num_read_bytes: self.inner.num_read_bytes.load(Ordering::Relaxed), + num_write_ops: self.inner.num_write_ops.load(Ordering::Relaxed), + num_write_bytes: self.inner.num_write_bytes.load(Ordering::Relaxed), + num_created: h.num_created.load(Ordering::Relaxed), + num_removed: h.num_removed.load(Ordering::Relaxed), + num_replaced: h.num_replaced.load(Ordering::Relaxed), + written_bytes: h.written_bytes.load(Ordering::Relaxed), + waste_bytes: h.waste_bytes.load(Ordering::Relaxed), + reclaimed_bytes: h.reclaimed_bytes.load(Ordering::Relaxed), + entries_under_64: h.size_histogram[0].load(Ordering::Relaxed), + entries_under_256: h.size_histogram[1].load(Ordering::Relaxed), + entries_under_1024: h.size_histogram[2].load(Ordering::Relaxed), + entries_under_4096: h.size_histogram[3].load(Ordering::Relaxed), + entries_under_16384: h.size_histogram[4].load(Ordering::Relaxed), + entries_over_16384: h.size_histogram[5].load(Ordering::Relaxed), } - Ok(None) } - /// Removes a big item by key. Returns true if the key had existed, false otherwise. - /// See also [Self::set_big] - pub fn remove_big(&self, key: &[u8]) -> Result { - self.discard_queue(key) + /// Simulates a crash by dropping the instance without performing clean shutdown operations (e.g. marking the index as clean). + pub fn _abort_for_testing(mut self) { + self.allow_clean_shutdown = false; + drop(self); } } -// impl Drop for CandyStore { -// fn drop(&mut self) { -// _ = self.threadpool.terminate(); -// } -// } +#[cfg(test)] +mod tests { + use super::*; + + use tempfile::tempdir; + + #[test] + fn test_compaction_errors_reports_counter() -> Result<()> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + assert_eq!(db.compaction_errors(), 0); + + db.inner.compaction_errors.store(7, Ordering::Relaxed); + + assert_eq!(db.compaction_errors(), 7); + + Ok(()) + } +} diff --git a/src/store/compaction.rs b/src/store/compaction.rs new file mode 100644 index 0000000..5c15279 --- /dev/null +++ b/src/store/compaction.rs @@ -0,0 +1,265 @@ +use std::{ + path::Path, + sync::{Arc, atomic::Ordering}, +}; + +use crate::{ + index_file::EntryPointer, + internal::{EntryType, KeyNamespace, data_file_path, invalid_data_error, sync_dir}, + pacer::Pacer, + types::{Error, Result}, +}; + +use super::{CandyStore, StoreInner}; + +impl StoreInner { + pub(super) fn compact_file( + &self, + file_idx: u16, + expected_ordinal: u64, + pacer: &mut Pacer, + #[cfg(windows)] pending_deletions: &mut Vec, + ) -> Result<()> { + if self.active_file_idx.load(Ordering::Acquire) == file_idx { + return Ok(()); + } + + let source_file = match self.data_file(file_idx) { + Ok(f) => f, + Err(Error::MissingDataFile(_)) => return Ok(()), + Err(e) => return Err(e), + }; + if source_file.file_ordinal != expected_ordinal { + return Ok(()); + } + + let mut offset = 0u64; + let mut read_buf = Vec::new(); + let mut buf_file_offset = 0u64; + let mut match_scratch = Vec::new(); + + loop { + if self.shutting_down.load(Ordering::Acquire) { + return Ok(()); + } + + let Some((kv, entry_offset, next_offset)) = + source_file.read_next_entry_ref(offset, &mut read_buf, &mut buf_file_offset)? + else { + break; + }; + offset = next_offset; + self.record_read(next_offset - entry_offset); + + pacer.consume(next_offset - entry_offset); + + let Some(ns) = KeyNamespace::from_u8(kv.ns) else { + return Err(invalid_data_error("unknown key namespace in data file")); + }; + + if let EntryType::Data = kv.entry_type { + let key = kv.key(); + let val = kv.value(); + + self._mut_op(ns, key, val, |hc, mut row, key, val| { + let files = self.data_files.read(); + for (col, entry) in row.iter_matches(hc) { + let Some(file) = files.get(&entry.file_idx()) else { + continue; + }; + self.record_read(entry.size_hint() as u64); + let existing_kv = file.read_kv_into( + entry.file_offset(), + entry.size_hint(), + &mut match_scratch, + )?; + if existing_kv.key() == key { + if entry.file_idx() != file_idx || entry.file_offset() != entry_offset { + return Ok(()); + } + + let active_idx = self.active_file_idx.load(Ordering::Acquire); + let active_file = files + .get(&active_idx) + .ok_or(Error::MissingDataFile(active_idx))?; + let (file_off, size) = active_file.append_kv(ns, key, val)?; + self.record_write(size as u64); + row.replace_pointer( + col, + EntryPointer::new( + active_idx, + file_off, + size, + hc.masked_row_selector(), + ), + ); + return Ok(()); + } + } + Ok(()) + })?; + } + } + + let removed = self.data_files.write().remove(&file_idx); + drop(source_file); // MUST drop before removing file to release mmap and handle + drop(removed); // Drop any open handles to the file + + // Take file waste regardless of whether remove succeeds, to avoid infinite 100% loop if remove fails + let reclaimed = self.index_file.take_file_waste(file_idx); + self.index_file + .header_ref() + .reclaimed_bytes + .fetch_add(reclaimed as u64, Ordering::Relaxed); + + let file_path = data_file_path(self.base_path.as_path(), file_idx); + match std::fs::remove_file(&file_path) { + Ok(()) => sync_dir(self.base_path.as_path())?, + #[cfg(windows)] + Err(_) => pending_deletions.push(file_path), + #[cfg(not(windows))] + Err(err) => return Err(Error::IOError(err)), + } + Ok(()) + } +} + +impl CandyStore { + pub(super) fn stop_compaction(&mut self) { + self.inner.shutting_down.store(true, Ordering::Release); + { + let mut state = self.inner.compaction_state.lock(); + state.wake_requested = true; + self.inner.compaction_condvar.notify_all(); + } + if let Some(thd) = self.compaction_thd.take() { + let _ = thd.join(); + } + } + + pub(super) fn clear_directory_contents(base_path: &Path) -> Result<()> { + let mut removed_any = false; + for entry in std::fs::read_dir(base_path).map_err(Error::IOError)? { + let entry = entry.map_err(Error::IOError)?; + let path = entry.path(); + if path.file_name().and_then(|name| name.to_str()) == Some(".lockfile") { + continue; + } + + let file_type = entry.file_type().map_err(Error::IOError)?; + if file_type.is_dir() { + std::fs::remove_dir_all(&path).map_err(Error::IOError)?; + removed_any = true; + } else if file_type.is_file() || file_type.is_symlink() { + std::fs::remove_file(&path).map_err(Error::IOError)?; + removed_any = true; + } + } + if removed_any { + sync_dir(base_path)?; + } + Ok(()) + } + + #[cfg(windows)] + fn retry_pending_deletions(ctx: &StoreInner, pending: &mut Vec) { + let before = pending.len(); + pending.retain(|path| std::fs::remove_file(path).is_err()); + if pending.len() < before { + let _ = sync_dir(ctx.base_path.as_path()); + } + } + + pub(super) fn start_compaction(&mut self) { + let ctx = Arc::clone(&self.inner); + let thd = std::thread::spawn(move || { + let throughput_bytes_per_sec = + (ctx.config.compaction_throughput_bytes_per_sec as u64).max(1); + let tokens_per_unit = (throughput_bytes_per_sec / 10).max(1); + let burst_size = tokens_per_unit.saturating_mul(2); + let mut pacer = Pacer::new( + tokens_per_unit, + std::time::Duration::from_millis(100), + burst_size, + ); + + #[cfg(windows)] + let mut pending_deletions = Vec::::new(); + loop { + { + let mut state = ctx.compaction_state.lock(); + while !state.wake_requested && !ctx.shutting_down.load(Ordering::Acquire) { + ctx.compaction_condvar.wait(&mut state); + } + + if ctx.shutting_down.load(Ordering::Acquire) { + break; + } + + state.wake_requested = false; + } + while let Some((file_idx, file_ordinal)) = ctx.next_compaction_candidate() { + if ctx.shutting_down.load(Ordering::Acquire) { + return; + } + #[cfg(windows)] + Self::retry_pending_deletions(&ctx, &mut pending_deletions); + let t0 = std::time::Instant::now(); + let res = ctx.compact_file( + file_idx, + file_ordinal, + &mut pacer, + #[cfg(windows)] + &mut pending_deletions, + ); + ctx.compaction_time_ms + .fetch_add(t0.elapsed().as_millis() as u64, Ordering::Relaxed); + match res { + Ok(()) => { + ctx.num_compactions.fetch_add(1, Ordering::Relaxed); + } + Err(_e) => { + ctx.compaction_errors.fetch_add(1, Ordering::Relaxed); + } + } + } + #[cfg(windows)] + Self::retry_pending_deletions(&ctx, &mut pending_deletions); + } + }); + + self.compaction_thd = Some(thd); + self.inner.signal_compaction_scan(); + } +} + +impl Drop for CandyStore { + fn drop(&mut self) { + self.stop_compaction(); + + if !self.allow_clean_shutdown { + return; + } + let data_files_synced = self + .inner + .data_files + .read() + .values() + .all(|df| df.file.sync_all().is_ok()); + if !data_files_synced { + return; + } + self.inner + .index_file + .header_ref() + .dirty + .store(0, Ordering::Release); + if self.inner.index_file.flush_header().is_err() { + self.inner + .index_file + .header_ref() + .dirty + .store(1, Ordering::Release); + } + } +} diff --git a/src/store/list.rs b/src/store/list.rs new file mode 100644 index 0000000..99cbb37 --- /dev/null +++ b/src/store/list.rs @@ -0,0 +1,868 @@ +use siphasher::sip::SipHasher13; +use smallvec::SmallVec; + +use std::{hash::Hasher, ops::Range}; + +use crate::{ + internal::{KeyNamespace, RangeMetadata, aligned_data_entry_size}, + store::CandyStore, + types::{ + Error, GetOrCreateStatus, ListCompactionParams, MAX_USER_KEY_SIZE, MAX_USER_VALUE_SIZE, + ReplaceStatus, Result, SetStatus, + }, +}; + +/// A list item as `(item_key, value)`. +pub type KVPair = (Vec, Vec); + +#[derive(Clone, Copy)] +pub(super) struct ListNamespaces { + pub(super) meta: KeyNamespace, + pub(super) index: KeyNamespace, + pub(super) data: KeyNamespace, +} + +const LIST_NS: ListNamespaces = ListNamespaces { + meta: KeyNamespace::ListMeta, + index: KeyNamespace::ListIndex, + data: KeyNamespace::ListData, +}; + +/// Double-ended iterator over live list items in logical order. +pub struct ListIterator<'a> { + store: &'a CandyStore, + list: Vec, + ns: ListNamespaces, + next_idx: u64, + end_idx: u64, + initial_next_idx: u64, + initial_end_idx: u64, +} + +type ListMetadata = RangeMetadata; + +impl ListIterator<'_> { + fn heal_head(&self, new_head: u64) { + let _ = self.try_heal_head(new_head); + } + + fn try_heal_head(&self, new_head: u64) -> Result<()> { + let _lock = self.store.logical_write_guard(self.ns.meta, &self.list); + let mut meta = get_list_meta(self.store, self.ns, &self.list)?; + if meta.head >= self.initial_next_idx && meta.head < new_head { + meta.head = new_head; + set_list_meta(self.store, self.ns, &self.list, meta)?; + } + Ok(()) + } + + fn heal_tail(&self, new_tail: u64) { + let _ = self.try_heal_tail(new_tail); + } + + fn try_heal_tail(&self, new_tail: u64) -> Result<()> { + let _lock = self.store.logical_write_guard(self.ns.meta, &self.list); + let mut meta = get_list_meta(self.store, self.ns, &self.list)?; + if meta.tail <= self.initial_end_idx && meta.tail > new_tail { + meta.tail = new_tail; + set_list_meta(self.store, self.ns, &self.list, meta)?; + } + Ok(()) + } +} + +impl Iterator for ListIterator<'_> { + type Item = Result; + + fn next(&mut self) -> Option { + while self.next_idx <= self.end_idx { + let idx = self.next_idx; + self.next_idx += 1; + + if idx > self.initial_next_idx + 1000 { + self.heal_head(idx); + self.initial_next_idx = idx; + } + + let idx_key = make_list_index_key(&self.list, idx); + let key = match self.store.get_ns(self.ns.index, &idx_key) { + Ok(Some(key)) => key, + Ok(None) => continue, + Err(err) => return Some(Err(err)), + }; + + let data_key = make_list_data_key(&self.list, &key); + let value = match self.store.get_ns(self.ns.data, &data_key) { + Ok(Some(value)) => value, + Ok(None) => continue, + Err(err) => return Some(Err(err)), + }; + + return Some(Ok((key, strip_idx_suffix(value)))); + } + + None + } +} + +impl DoubleEndedIterator for ListIterator<'_> { + fn next_back(&mut self) -> Option<::Item> { + while self.next_idx <= self.end_idx { + let idx = self.end_idx; + if self.end_idx == 0 { + self.next_idx = 1; + } else { + self.end_idx -= 1; + } + + if idx + 1000 < self.initial_end_idx { + self.heal_tail(idx); + self.initial_end_idx = idx; + } + + let idx_key = make_list_index_key(&self.list, idx); + let key = match self.store.get_ns(self.ns.index, &idx_key) { + Ok(Some(key)) => key, + Ok(None) => continue, + Err(err) => return Some(Err(err)), + }; + + let data_key = make_list_data_key(&self.list, &key); + let value = match self.store.get_ns(self.ns.data, &data_key) { + Ok(Some(value)) => value, + Ok(None) => continue, + Err(err) => return Some(Err(err)), + }; + + return Some(Ok((key, strip_idx_suffix(value)))); + } + + None + } +} + +impl CandyStore { + /// Inserts or replaces `item_key` in `list_key`, placing the item at the tail. + pub fn set_in_list< + B1: AsRef<[u8]> + ?Sized, + B2: AsRef<[u8]> + ?Sized, + B3: AsRef<[u8]> + ?Sized, + >( + &self, + list_key: &B1, + item_key: &B2, + val: &B3, + ) -> Result { + let previous = self.list_set_at_tail_with_ns( + LIST_NS, + list_key.as_ref(), + item_key.as_ref(), + val.as_ref(), + )?; + Ok(match previous { + Some(previous) => SetStatus::PrevValue(previous), + None => SetStatus::CreatedNew, + }) + } + + /// Inserts or replaces `item_key` in `list_key`, moving it to the logical tail. + pub fn set_in_list_promoting< + B1: AsRef<[u8]> + ?Sized, + B2: AsRef<[u8]> + ?Sized, + B3: AsRef<[u8]> + ?Sized, + >( + &self, + list_key: &B1, + item_key: &B2, + val: &B3, + ) -> Result { + let previous = + self.list_promote_with_ns(LIST_NS, list_key.as_ref(), item_key.as_ref(), val.as_ref())?; + Ok(match previous { + Some(previous) => SetStatus::PrevValue(previous), + None => SetStatus::CreatedNew, + }) + } + + /// Replaces a list item only if its current value matches `expected_val` when provided. + pub fn replace_in_list< + B1: AsRef<[u8]> + ?Sized, + B2: AsRef<[u8]> + ?Sized, + B3: AsRef<[u8]> + ?Sized, + B4: AsRef<[u8]> + ?Sized, + >( + &self, + list_key: &B1, + item_key: &B2, + val: &B3, + expected_val: Option<&B4>, + ) -> Result { + self.list_replace_with_ns( + LIST_NS, + list_key.as_ref(), + item_key.as_ref(), + val.as_ref(), + expected_val.map(|expected| expected.as_ref()), + ) + } + + /// Returns the current list item value, or inserts `default_val` if the item is missing. + pub fn get_or_create_in_list< + B1: AsRef<[u8]> + ?Sized, + B2: AsRef<[u8]> + ?Sized, + B3: AsRef<[u8]> + ?Sized, + >( + &self, + list_key: &B1, + item_key: &B2, + default_val: &B3, + ) -> Result { + self.list_get_or_create_with_ns( + LIST_NS, + list_key.as_ref(), + item_key.as_ref(), + default_val.as_ref(), + ) + } + + /// Returns the current value for `item_key` in `list_key`, if present. + pub fn get_from_list + ?Sized, B2: AsRef<[u8]> + ?Sized>( + &self, + list_key: &B1, + item_key: &B2, + ) -> Result>> { + self.list_get_with_ns(LIST_NS, list_key.as_ref(), item_key.as_ref()) + } + + /// Removes `item_key` from `list_key` and returns its previous value if it existed. + pub fn remove_from_list + ?Sized, B2: AsRef<[u8]> + ?Sized>( + &self, + list_key: &B1, + item_key: &B2, + ) -> Result>> { + self.list_remove_with_ns(LIST_NS, list_key.as_ref(), item_key.as_ref()) + } + + /// Compacts list storage when `params` indicate enough holes exist to justify rewriting it. + pub fn compact_list_if_needed + ?Sized>( + &self, + list_key: &B, + params: ListCompactionParams, + ) -> Result { + self.list_compact_with_ns(LIST_NS, list_key.as_ref(), params) + } + + /// Iterates over live items in `list_key` from head to tail. + pub fn iter_list + ?Sized>(&self, list_key: &B) -> ListIterator<'_> { + self.list_iter_with_ns(LIST_NS, list_key.as_ref()) + } + + /// Removes all items in `list_key`. + pub fn discard_list + ?Sized>(&self, list_key: &B) -> Result { + self.list_discard_with_ns(LIST_NS, list_key.as_ref()) + } + + /// Returns the head item of `list_key` without removing it. + pub fn peek_list_head + ?Sized>(&self, list_key: &B) -> Result> { + self.peek_list_head_with_ns(LIST_NS, list_key.as_ref()) + } + + /// Returns the tail item of `list_key` without removing it. + pub fn peek_list_tail + ?Sized>(&self, list_key: &B) -> Result> { + self.peek_list_tail_with_ns(LIST_NS, list_key.as_ref()) + } + + /// Removes and returns the head item of `list_key`. + pub fn pop_list_head + ?Sized>(&self, list_key: &B) -> Result> { + self.pop_list_head_with_ns(LIST_NS, list_key.as_ref()) + } + + /// Removes and returns the tail item of `list_key`. + pub fn pop_list_tail + ?Sized>(&self, list_key: &B) -> Result> { + self.pop_list_tail_with_ns(LIST_NS, list_key.as_ref()) + } + + /// Returns the number of live items in `list_key`. + pub fn list_len + ?Sized>(&self, list_key: &B) -> Result { + self.list_len_with_ns(LIST_NS, list_key.as_ref()) + } + + /// Retains only items for which `func` returns `true`, preserving list order. + pub fn retain_in_list + ?Sized>( + &self, + list_key: &B, + func: impl FnMut(&[u8], &[u8]) -> Result, + ) -> Result<()> { + self.list_retain_with_ns(LIST_NS, list_key.as_ref(), func) + } + + pub(super) fn list_retain_with_ns( + &self, + ns: ListNamespaces, + list_key: &[u8], + mut func: impl FnMut(&[u8], &[u8]) -> Result, + ) -> Result<()> { + let _lock = self.logical_write_guard(ns.meta, list_key); + let mut meta = get_list_meta(self, ns, list_key)?; + if meta.count == 0 { + return Ok(()); + } + + let original_head = meta.head; + let original_tail = meta.tail; + let mut new_tail = meta.tail; + let mut retained_count = 0u64; + + for idx in original_head..=original_tail { + let idx_key = make_list_index_key(list_key, idx); + let key = match self.get_ns(ns.index, &idx_key)? { + Some(key) => key, + None => continue, + }; + + let data_key = make_list_data_key(list_key, &key); + let val_with_idx = match self.get_ns(ns.data, &data_key)? { + Some(value) => value, + None => { + self.remove_ns(ns.index, &idx_key)?; + continue; + } + }; + let value = strip_idx_suffix(val_with_idx); + + self.remove_ns(ns.index, &idx_key)?; + + if func(&key, &value)? { + new_tail += 1; + let new_value = append_idx_suffix(&value, new_tail); + self.set_ns(ns.data, &data_key, &new_value)?; + + let new_idx_key = make_list_index_key(list_key, new_tail); + self.set_ns(ns.index, &new_idx_key, &key)?; + retained_count += 1; + } else { + self.remove_ns(ns.data, &data_key)?; + } + } + + if retained_count == 0 { + meta = ListMetadata::new(); + } else { + meta.head = original_tail + 1; + meta.tail = new_tail; + meta.count = retained_count; + } + + set_list_meta(self, ns, list_key, meta)?; + Ok(()) + } + + pub(super) fn list_set_at_tail_with_ns( + &self, + ns: ListNamespaces, + list: &[u8], + key: &[u8], + value: &[u8], + ) -> Result>> { + self.validate_list_item_sizes(list, key, value)?; + let _lock = self.logical_write_guard(ns.meta, list); + + let mut meta = get_list_meta(self, ns, list)?; + let data_key = make_list_data_key(list, key); + + if let Some(existing) = self.get_ns(ns.data, &data_key)? { + let idx = extract_idx_suffix(&existing); + let new_value = append_idx_suffix(value, idx); + let old_with_idx = self.set_ns(ns.data, &data_key, &new_value)?; + + // Always write the index entry: after a crash the OS may have + // flushed the metadata update but not the corresponding index + // write, leaving the entry invisible in the list. + let idx_key = make_list_index_key(list, idx); + self.set_ns(ns.index, &idx_key, key)?; + + if meta.count == 0 || idx > meta.tail { + if meta.count == 0 { + meta.head = idx; + } + if idx > meta.tail { + meta.tail = idx; + } + meta.count += 1; + set_list_meta(self, ns, list, meta)?; + } + + return Ok(old_with_idx.map(strip_idx_suffix)); + } + + let new_idx = meta.tail + 1; + let value_with_idx = append_idx_suffix(value, new_idx); + self.set_ns(ns.data, &data_key, &value_with_idx)?; + + let idx_key = make_list_index_key(list, new_idx); + self.set_ns(ns.index, &idx_key, key)?; + + if meta.count == 0 { + meta.head = new_idx; + } + meta.tail = new_idx; + meta.count += 1; + set_list_meta(self, ns, list, meta)?; + + Ok(None) + } + + pub(super) fn list_replace_with_ns( + &self, + ns: ListNamespaces, + list: &[u8], + key: &[u8], + value: &[u8], + expected: Option<&[u8]>, + ) -> Result { + self.validate_list_item_sizes(list, key, value)?; + let _lock = self.logical_write_guard(ns.meta, list); + + let data_key = make_list_data_key(list, key); + let Some(existing_value) = self.get_ns(ns.data, &data_key)? else { + return Ok(ReplaceStatus::DoesNotExist); + }; + + let previous = strip_idx_suffix(existing_value.clone()); + if let Some(expected) = expected + && previous != expected + { + return Ok(ReplaceStatus::WrongValue(previous)); + } + + let idx = extract_idx_suffix(&existing_value); + let new_value = append_idx_suffix(value, idx); + self.set_ns(ns.data, &data_key, &new_value)?; + Ok(ReplaceStatus::PrevValue(previous)) + } + + pub(super) fn list_get_or_create_with_ns( + &self, + ns: ListNamespaces, + list: &[u8], + key: &[u8], + value: &[u8], + ) -> Result { + self.validate_list_item_sizes(list, key, value)?; + let _lock = self.logical_write_guard(ns.meta, list); + + let data_key = make_list_data_key(list, key); + if let Some(existing) = self.get_ns(ns.data, &data_key)? { + return Ok(GetOrCreateStatus::ExistingValue(strip_idx_suffix(existing))); + } + + let mut meta = get_list_meta(self, ns, list)?; + let new_idx = meta.tail + 1; + let value_with_idx = append_idx_suffix(value, new_idx); + self.set_ns(ns.data, &data_key, &value_with_idx)?; + + let idx_key = make_list_index_key(list, new_idx); + self.set_ns(ns.index, &idx_key, key)?; + + if meta.count == 0 { + meta.head = new_idx; + } + meta.tail = new_idx; + meta.count += 1; + set_list_meta(self, ns, list, meta)?; + + Ok(GetOrCreateStatus::CreatedNew(value.to_vec())) + } + + pub(super) fn list_promote_with_ns( + &self, + ns: ListNamespaces, + list: &[u8], + key: &[u8], + value: &[u8], + ) -> Result>> { + self.validate_list_item_sizes(list, key, value)?; + let _lock = self.logical_write_guard(ns.meta, list); + let mut meta = get_list_meta(self, ns, list)?; + let data_key = make_list_data_key(list, key); + + let mut old_value = None; + let mut old_idx_key = None; + if let Some(existing) = self.get_ns(ns.data, &data_key)? { + let idx = extract_idx_suffix(&existing); + old_idx_key = Some(make_list_index_key(list, idx)); + old_value = Some(strip_idx_suffix(existing)); + } else { + meta.count += 1; + } + + let new_idx = meta.tail + 1; + let value_with_idx = append_idx_suffix(value, new_idx); + self.set_ns(ns.data, &data_key, &value_with_idx)?; + + let idx_key = make_list_index_key(list, new_idx); + self.set_ns(ns.index, &idx_key, key)?; + + if let Some(old_idx_key) = old_idx_key { + self.remove_ns(ns.index, &old_idx_key)?; + } + + if meta.count == 1 { + meta.head = new_idx; + } + meta.tail = new_idx; + set_list_meta(self, ns, list, meta)?; + + Ok(old_value) + } + + pub(super) fn list_get_with_ns( + &self, + ns: ListNamespaces, + list: &[u8], + key: &[u8], + ) -> Result>> { + let _lock = self.logical_read_guard(ns.meta, list); + let data_key = make_list_data_key(list, key); + Ok(self.get_ns(ns.data, &data_key)?.map(strip_idx_suffix)) + } + + pub(super) fn list_remove_with_ns( + &self, + ns: ListNamespaces, + list: &[u8], + key: &[u8], + ) -> Result>> { + let _lock = self.logical_write_guard(ns.meta, list); + self._list_remove_with_ns(ns, list, key) + } + + fn _list_remove_with_ns( + &self, + ns: ListNamespaces, + list: &[u8], + key: &[u8], + ) -> Result>> { + let mut meta = get_list_meta(self, ns, list)?; + let data_key = make_list_data_key(list, key); + let removed = match self.remove_ns(ns.data, &data_key)? { + Some(value) => value, + None => return Ok(None), + }; + + let idx = extract_idx_suffix(&removed); + let idx_key = make_list_index_key(list, idx); + self.remove_ns(ns.index, &idx_key)?; + + let old_value = Some(strip_idx_suffix(removed)); + meta.count = meta.count.saturating_sub(1); + + if meta.count == 0 { + meta = ListMetadata::new(); + } else { + let mut check_head = idx == meta.head; + if !check_head { + let head_key = make_list_index_key(list, meta.head); + if self.get_ns(ns.index, &head_key)?.is_none() { + check_head = true; + } + } + + if check_head { + let mut new_head = meta.head; + loop { + if new_head > meta.tail { + meta = ListMetadata::new(); + break; + } + if new_head == idx { + new_head += 1; + continue; + } + let probe_idx_key = make_list_index_key(list, new_head); + if self.get_ns(ns.index, &probe_idx_key)?.is_some() { + meta.head = new_head; + break; + } + new_head += 1; + } + } + + if meta.count > 0 { + let mut check_tail = idx == meta.tail; + if !check_tail { + let tail_key = make_list_index_key(list, meta.tail); + if self.get_ns(ns.index, &tail_key)?.is_none() { + check_tail = true; + } + } + + if check_tail { + let mut new_tail = meta.tail; + loop { + if new_tail < meta.head { + meta = ListMetadata::new(); + break; + } + if new_tail == idx { + if new_tail == 0 { + break; + } + new_tail -= 1; + continue; + } + let probe_idx_key = make_list_index_key(list, new_tail); + if self.get_ns(ns.index, &probe_idx_key)?.is_some() { + meta.tail = new_tail; + break; + } + if new_tail == 0 { + meta = ListMetadata::new(); + break; + } + new_tail -= 1; + } + } + } + } + + set_list_meta(self, ns, list, meta)?; + Ok(old_value) + } + + pub(super) fn list_discard_with_ns(&self, ns: ListNamespaces, list: &[u8]) -> Result { + let _lock = self.logical_write_guard(ns.meta, list); + let meta = get_list_meta(self, ns, list)?; + if meta.count == 0 { + return Ok(false); + } + + for idx in meta.head..=meta.tail { + let idx_key = make_list_index_key(list, idx); + if let Some(key) = self.remove_ns(ns.index, &idx_key)? { + let data_key = make_list_data_key(list, &key); + self.remove_ns(ns.data, &data_key)?; + } + } + + self.remove_ns(ns.meta, list)?; + Ok(true) + } + + pub(super) fn list_compact_with_ns( + &self, + ns: ListNamespaces, + list: &[u8], + params: ListCompactionParams, + ) -> Result { + let _lock = self.logical_write_guard(ns.meta, list); + let mut meta = get_list_meta(self, ns, list)?; + if meta.count == 0 { + return Ok(false); + } + + let span = if meta.tail >= meta.head { + meta.tail - meta.head + 1 + } else { + 0 + }; + if span == 0 || span < params.min_length { + return Ok(false); + } + + let holes_ratio = (span - meta.count) as f64 / span as f64; + if holes_ratio < params.min_holes_ratio { + return Ok(false); + } + + let limit = meta.tail; + while meta.head <= limit { + let idx_key = make_list_index_key(list, meta.head); + if let Some(key) = self.get_ns(ns.index, &idx_key)? { + let data_key = make_list_data_key(list, &key); + if let Some(value_with_idx) = self.get_ns(ns.data, &data_key)? { + let value = strip_idx_suffix(value_with_idx); + + meta.tail += 1; + let new_idx = meta.tail; + let new_value_with_idx = append_idx_suffix(&value, new_idx); + // Overwrite data in-place (not remove+set) to avoid data + // loss if a crash occurs between the two operations + self.set_ns(ns.data, &data_key, &new_value_with_idx)?; + + // Write new index before removing old so the entry is + // always reachable via at least one index position + let new_idx_key = make_list_index_key(list, new_idx); + self.set_ns(ns.index, &new_idx_key, &key)?; + } + self.remove_ns(ns.index, &idx_key)?; + } + meta.head += 1; + set_list_meta(self, ns, list, meta)?; + } + + Ok(true) + } + + fn peek_list_head_with_ns( + &self, + ns: ListNamespaces, + list_key: &[u8], + ) -> Result> { + self.list_iter_with_ns(ns, list_key).next().transpose() + } + + fn peek_list_tail_with_ns( + &self, + ns: ListNamespaces, + list_key: &[u8], + ) -> Result> { + self.list_iter_with_ns(ns, list_key).next_back().transpose() + } + + pub(super) fn pop_list_head_with_ns( + &self, + ns: ListNamespaces, + list_key: &[u8], + ) -> Result> { + let _lock = self.logical_write_guard(ns.meta, list_key); + let head = self.peek_list_head_with_ns(ns, list_key)?; + if let Some((key, _)) = head + && let Some(value) = self._list_remove_with_ns(ns, list_key, &key)? + { + return Ok(Some((key, value))); + } + Ok(None) + } + + pub(super) fn pop_list_tail_with_ns( + &self, + ns: ListNamespaces, + list_key: &[u8], + ) -> Result> { + let _lock = self.logical_write_guard(ns.meta, list_key); + let tail = self.peek_list_tail_with_ns(ns, list_key)?; + if let Some((key, _)) = tail + && let Some(value) = self._list_remove_with_ns(ns, list_key, &key)? + { + return Ok(Some((key, value))); + } + Ok(None) + } + + pub(super) fn list_iter_with_ns<'a>( + &'a self, + ns: ListNamespaces, + list: &[u8], + ) -> ListIterator<'a> { + let meta = get_list_meta(self, ns, list).unwrap_or_else(|_| ListMetadata::new()); + ListIterator { + store: self, + list: list.to_vec(), + ns, + next_idx: meta.head, + end_idx: meta.tail, + initial_next_idx: meta.head, + initial_end_idx: meta.tail, + } + } + + pub(super) fn list_len_with_ns(&self, ns: ListNamespaces, list: &[u8]) -> Result { + Ok(get_list_meta(self, ns, list)?.count as usize) + } + + pub(super) fn list_range_with_ns( + &self, + ns: ListNamespaces, + list: &[u8], + ) -> Result> { + let meta = get_list_meta(self, ns, list)?; + if meta.head > meta.tail { + return Ok(0..0); + } + Ok(meta.head as usize..meta.tail.saturating_add(1) as usize) + } + + fn validate_list_item_sizes(&self, list: &[u8], key: &[u8], value: &[u8]) -> Result<()> { + let data_key_len = make_list_data_key(list, key).len(); + let data_value_len = value.len() + size_of::(); + validate_internal_entry(self, data_key_len, data_value_len)?; + + let index_key_len = make_list_index_key(list, 0).len(); + validate_internal_entry(self, index_key_len, key.len()) + } +} + +fn validate_internal_entry(store: &CandyStore, key_len: usize, value_len: usize) -> Result<()> { + let entry_size = aligned_data_entry_size(key_len, value_len) as usize; + if key_len > MAX_USER_KEY_SIZE + || value_len > MAX_USER_VALUE_SIZE + || entry_size > store.inner.config.max_data_file_size as usize + { + return Err(Error::PayloadTooLarge(entry_size)); + } + Ok(()) +} + +fn get_list_meta(store: &CandyStore, ns: ListNamespaces, list: &[u8]) -> Result { + if let Some(value) = store.get_ns(ns.meta, list)? + && let Some(meta) = ListMetadata::from_bytes(&value) + { + return Ok(meta); + } + Ok(ListMetadata::new()) +} + +fn set_list_meta( + store: &CandyStore, + ns: ListNamespaces, + list: &[u8], + meta: ListMetadata, +) -> Result<()> { + store.set_ns(ns.meta, list, &meta.to_bytes())?; + Ok(()) +} + +fn hash_list_key(list: &[u8]) -> u64 { + let mut hasher = SipHasher13::new_with_keys(0x7ac1485be800c70e, 0x22ac1dcc7992c592); + hasher.write(list); + hasher.finish() +} + +fn make_list_data_key(list: &[u8], key: &[u8]) -> SmallVec<[u8; 128]> { + let hash = hash_list_key(list); + let mut out = SmallVec::<[u8; 128]>::with_capacity(8 + key.len()); + out.extend_from_slice(&hash.to_le_bytes()); + out.extend_from_slice(key); + out +} + +fn make_list_index_key(list: &[u8], idx: u64) -> SmallVec<[u8; 16]> { + let hash = hash_list_key(list); + let mut out = SmallVec::<[u8; 16]>::with_capacity(16); + out.extend_from_slice(&hash.to_le_bytes()); + out.extend_from_slice(&idx.to_be_bytes()); + out +} + +fn append_idx_suffix(value: &[u8], idx: u64) -> SmallVec<[u8; 128]> { + let mut out = SmallVec::<[u8; 128]>::with_capacity(value.len() + size_of::()); + out.extend_from_slice(value); + out.extend_from_slice(&idx.to_le_bytes()); + out +} + +fn strip_idx_suffix(mut value: Vec) -> Vec { + if value.len() >= size_of::() { + value.truncate(value.len() - size_of::()); + } + value +} + +fn extract_idx_suffix(value: &[u8]) -> u64 { + let n = value.len(); + if n < size_of::() { + return 0; + } + u64::from_le_bytes(value[n - size_of::()..n].try_into().unwrap()) +} diff --git a/src/store/open.rs b/src/store/open.rs new file mode 100644 index 0000000..6ea96f6 --- /dev/null +++ b/src/store/open.rs @@ -0,0 +1,240 @@ +use std::{ + collections::{HashMap, HashSet}, + path::Path, + sync::{Arc, atomic::Ordering}, +}; + +use crate::{ + data_file::DataFile, + index_file::IndexFile, + internal::{MAX_REPRESENTABLE_FILE_SIZE, is_resettable_open_error, parse_data_file_idx}, + types::{Config, Error, INITIAL_DATA_FILE_ORDINAL, RebuildStrategy, Result}, +}; + +use super::{CandyStore, DirtyOpenAction, OpenState, StoreInner}; + +impl CandyStore { + fn clear_db_files(base_path: &Path) -> Result<()> { + Self::clear_directory_contents(base_path) + } + + fn open_state(base_path: &Path, config: Arc) -> Result { + let index_file = IndexFile::open(base_path, config.clone())?; + let mut data_files = HashMap::new(); + let mut seen_ordinals = HashSet::new(); + let mut active_file_idx = 0; + let mut active_file_ordinal = INITIAL_DATA_FILE_ORDINAL; + + for entry in std::fs::read_dir(base_path).map_err(Error::IOError)? { + let entry = entry.map_err(Error::IOError)?; + let path = entry.path(); + let Some(file_idx) = parse_data_file_idx(&path) else { + continue; + }; + let data_file = Arc::new(DataFile::open(base_path, config.clone(), file_idx)?); + if !seen_ordinals.insert(data_file.file_ordinal) { + return Err(crate::internal::invalid_data_error( + "duplicate data file ordinal", + )); + } + if data_files.is_empty() || data_file.file_ordinal > active_file_ordinal { + active_file_idx = file_idx; + active_file_ordinal = data_file.file_ordinal; + } + data_files.insert(file_idx, data_file); + } + + if data_files.is_empty() { + let data_file = Arc::new(DataFile::create( + base_path, + config.clone(), + active_file_idx, + active_file_ordinal, + )?); + data_files.insert(active_file_idx, data_file); + } + + let was_clean_shutdown = { + let header = index_file.header_ref(); + let was_clean = header.dirty.load(Ordering::Acquire) == 0; + header.dirty.store(1, Ordering::Release); + index_file.flush_header()?; + was_clean + }; + + Ok(OpenState { + index_file, + data_files, + active_file_idx, + active_file_ordinal, + was_clean_shutdown, + }) + } + + fn acquire_lockfile(base_path: &Path) -> Result { + let lockfile_path = base_path.join(".lockfile"); + let mut lockfile = fslock::LockFile::open(&lockfile_path).map_err(Error::IOError)?; + if !lockfile.try_lock().unwrap_or(false) { + let content = + String::from_utf8_lossy(&std::fs::read(&lockfile_path).unwrap_or("".into())) + .into_owned(); + + return Err(Error::LockfileTaken(lockfile_path, content)); + } + + let content = format!( + "[{}] {}", + std::process::id(), + std::env::args().collect::>().join(" ") + ); + _ = std::fs::write(&lockfile_path, content).map_err(Error::IOError); + Ok(lockfile) + } + + fn open_or_reset_state(base_path: &Path, config: Arc) -> Result { + match Self::open_state(base_path, config.clone()) { + Ok(state) => Ok(state), + Err(err) if config.reset_on_invalid_data && is_resettable_open_error(&err) => { + Self::clear_db_files(base_path)?; + let mut state = Self::open_state(base_path, config)?; + state.was_clean_shutdown = false; + Ok(state) + } + Err(err) => Err(err), + } + } + + fn normalize_config_for_path(base_path: &Path, config: Config) -> Result> { + let max_data_file_size = config.max_data_file_size.min(MAX_REPRESENTABLE_FILE_SIZE); + let mut normalized = Config { + max_data_file_size, + compaction_min_threshold: config + .compaction_min_threshold + .min((max_data_file_size as f64 * 0.8) as u32), + remap_scaler: config.remap_scaler.clamp(1, 4), + ..config + }; + + match IndexFile::existing_hash_key(base_path) { + Ok(Some(hash_key)) => normalized.hash_key = hash_key, + Ok(None) => {} + Err(err) if is_resettable_open_error(&err) => {} + Err(err) => return Err(err), + } + + Ok(Arc::new(normalized)) + } + + fn resolve_dirty_open( + base_path: &Path, + config: Arc, + state: OpenState, + ) -> Result<(OpenState, DirtyOpenAction)> { + if state.was_clean_shutdown { + return Ok((state, DirtyOpenAction::None)); + } + + let action = match config.rebuild_strategy { + RebuildStrategy::FailIfDirty => return Err(Error::DirtyIndex), + RebuildStrategy::RebuildIfDirty => DirtyOpenAction::RebuildIndex, + RebuildStrategy::ResetDBIfDirty => DirtyOpenAction::ResetDb, + RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrFail => { + state.index_file.verify_row_checksums()?; + DirtyOpenAction::TrustIndex + } + RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrRebuild => { + match state.index_file.verify_row_checksums() { + Ok(()) => DirtyOpenAction::TrustIndex, + Err(Error::IOError(io_err)) + if io_err.kind() == std::io::ErrorKind::InvalidData => + { + DirtyOpenAction::RebuildIndex + } + Err(err) => return Err(err), + } + } + }; + + if matches!(action, DirtyOpenAction::ResetDb) { + drop(state); + Self::clear_db_files(base_path)?; + let mut reset_state = Self::open_state(base_path, config)?; + reset_state.was_clean_shutdown = false; + return Ok((reset_state, DirtyOpenAction::ResetDb)); + } + + Ok((state, action)) + } + + /// Opens a store at `path`, creating it if needed. + /// + /// If `config.reset_on_invalid_data` is enabled, or if + /// `config.rebuild_strategy` is `ResetDBIfDirty`, opening may reset the + /// database directory by removing all contents and recreating fresh store + /// files. While the store is open, the active `.lockfile` is preserved so + /// the directory remains locked against concurrent opens. + pub fn open(path: impl AsRef, config: Config) -> Result { + let base_path = path.as_ref().to_path_buf(); + std::fs::create_dir_all(&base_path).map_err(Error::IOError)?; + + let lockfile = Self::acquire_lockfile(&base_path)?; + let config = Self::normalize_config_for_path(&base_path, config)?; + + let state = Self::open_or_reset_state(&base_path, config.clone())?; + let (state, dirty_open_action) = + Self::resolve_dirty_open(&base_path, config.clone(), state)?; + let was_clean_shutdown = state.was_clean_shutdown; + let num_logical_locks = config.max_concurrency.max(8).next_power_of_two(); + + let mut store = Self { + inner: Arc::new(StoreInner::new( + base_path, + config.clone(), + state, + num_logical_locks, + )), + _lockfile: lockfile, + compaction_thd: None, + allow_clean_shutdown: was_clean_shutdown, + was_clean_shutdown, + }; + + if !was_clean_shutdown { + match dirty_open_action { + DirtyOpenAction::None | DirtyOpenAction::ResetDb | DirtyOpenAction::TrustIndex => {} + DirtyOpenAction::RebuildIndex => store.recover_index()?, + } + store.allow_clean_shutdown = true; + } + + store.start_compaction(); + Ok(store) + } + + /// Clears the store and recreates a fresh empty database in the same + /// directory. + /// + /// This removes all directory contents, including unrelated files and + /// subdirectories, before recreating the store files. While the store is + /// open, the active `.lockfile` is preserved so the directory remains + /// locked against concurrent opens. + pub fn clear(&mut self) -> Result<()> { + let base_path = self.inner.base_path.clone(); + let config = self.inner.config.clone(); + let num_logical_locks = self.inner.logical_locks.len(); + + self.stop_compaction(); + + self.inner.data_files.write().clear(); + Self::clear_db_files(base_path.as_path())?; + + let state = Self::open_state(base_path.as_path(), config.clone())?; + self.inner = Arc::new(StoreInner::new(base_path, config, state, num_logical_locks)); + + self.allow_clean_shutdown = true; + self.was_clean_shutdown = true; + self.start_compaction(); + + Ok(()) + } +} diff --git a/src/store/queue.rs b/src/store/queue.rs new file mode 100644 index 0000000..15f02a8 --- /dev/null +++ b/src/store/queue.rs @@ -0,0 +1,643 @@ +use siphasher::sip::SipHasher13; + +use std::{hash::Hasher, mem::size_of, ops::Range}; + +use crate::{ + internal::{KeyNamespace, RangeMetadata, aligned_data_entry_size}, + store::CandyStore, + types::{Error, MAX_USER_VALUE_SIZE, Result}, +}; + +#[derive(Clone, Copy)] +pub(super) struct QueueNamespaces { + pub(super) meta: KeyNamespace, + pub(super) data: KeyNamespace, +} + +const QUEUE_NS: QueueNamespaces = QueueNamespaces { + meta: KeyNamespace::QueueMeta, + data: KeyNamespace::QueueData, +}; + +const BIG_NS: QueueNamespaces = QueueNamespaces { + meta: KeyNamespace::BigMeta, + data: KeyNamespace::BigData, +}; + +/// Double-ended iterator over live queue items and their logical indices. +pub struct QueueIterator<'a> { + store: &'a CandyStore, + queue: Vec, + ns: QueueNamespaces, + next_idx: u64, + end_idx: u64, + initial_next_idx: u64, + initial_end_idx: u64, +} + +type QueueMetadata = RangeMetadata; + +impl<'a> QueueIterator<'a> { + fn heal_head(&self, new_head: u64) { + let _ = self.try_heal_head(new_head); + } + + fn try_heal_head(&self, new_head: u64) -> Result<()> { + let _lock = self.store.logical_write_guard(self.ns.meta, &self.queue); + let mut meta = get_queue_meta(self.store, self.ns, &self.queue)?; + if meta.head >= self.initial_next_idx && meta.head < new_head { + meta.head = new_head; + set_queue_meta(self.store, self.ns, &self.queue, meta)?; + } + Ok(()) + } + + fn heal_tail(&self, new_tail: u64) { + let _ = self.try_heal_tail(new_tail); + } + + fn try_heal_tail(&self, new_tail: u64) -> Result<()> { + let _lock = self.store.logical_write_guard(self.ns.meta, &self.queue); + let mut meta = get_queue_meta(self.store, self.ns, &self.queue)?; + if meta.tail <= self.initial_end_idx && meta.tail > new_tail { + meta.tail = new_tail; + set_queue_meta(self.store, self.ns, &self.queue, meta)?; + } + Ok(()) + } +} + +impl Iterator for QueueIterator<'_> { + type Item = Result<(usize, Vec)>; + + fn next(&mut self) -> Option { + while self.next_idx <= self.end_idx { + let idx = self.next_idx; + self.next_idx += 1; + + if idx > self.initial_next_idx + 1000 { + self.heal_head(idx); + self.initial_next_idx = idx; + } + + let key = make_queue_data_key(&self.queue, idx); + match self.store.get_ns(self.ns.data, &key) { + Ok(Some(v)) => return Some(Ok((idx as usize, v))), + Ok(None) => continue, + Err(e) => return Some(Err(e)), + } + } + None + } +} + +impl DoubleEndedIterator for QueueIterator<'_> { + fn next_back(&mut self) -> Option<::Item> { + while self.next_idx <= self.end_idx { + let idx = self.end_idx; + if self.end_idx == 0 { + self.next_idx = 1; + } else { + self.end_idx -= 1; + } + + if idx + 1000 < self.initial_end_idx { + self.heal_tail(idx); + self.initial_end_idx = idx; + } + + let key = make_queue_data_key(&self.queue, idx); + match self.store.get_ns(self.ns.data, &key) { + Ok(Some(v)) => return Some(Ok((idx as usize, v))), + Ok(None) => continue, + Err(e) => return Some(Err(e)), + } + } + None + } +} + +impl CandyStore { + /// Pushes `val` to the head of `queue_key` and returns its logical index. + pub fn push_to_queue_head + ?Sized, B2: AsRef<[u8]> + ?Sized>( + &self, + queue_key: &B1, + val: &B2, + ) -> Result { + self.queue_push_head_with_ns(QUEUE_NS, queue_key.as_ref(), val.as_ref()) + .map(|idx| idx as usize) + } + + /// Pushes `val` to the tail of `queue_key` and returns its logical index. + pub fn push_to_queue_tail + ?Sized, B2: AsRef<[u8]> + ?Sized>( + &self, + queue_key: &B1, + val: &B2, + ) -> Result { + self.queue_push_tail_with_ns(QUEUE_NS, queue_key.as_ref(), val.as_ref()) + .map(|idx| idx as usize) + } + + /// Removes and returns the head value of `queue_key`. + pub fn pop_queue_head + ?Sized>( + &self, + queue_key: &B, + ) -> Result>> { + Ok(self + .queue_pop_head_with_ns(QUEUE_NS, queue_key.as_ref())? + .map(|(_, value)| value)) + } + + /// Removes and returns the head item of `queue_key` together with its logical index. + pub fn pop_queue_head_with_idx + ?Sized>( + &self, + queue_key: &B, + ) -> Result)>> { + Ok(self + .queue_pop_head_with_ns(QUEUE_NS, queue_key.as_ref())? + .map(|(idx, value)| (idx as usize, value))) + } + + /// Removes and returns the tail value of `queue_key`. + pub fn pop_queue_tail + ?Sized>( + &self, + queue_key: &B, + ) -> Result>> { + Ok(self + .queue_pop_tail_with_ns(QUEUE_NS, queue_key.as_ref())? + .map(|(_, value)| value)) + } + + /// Removes and returns the tail item of `queue_key` together with its logical index. + pub fn pop_queue_tail_with_idx + ?Sized>( + &self, + queue_key: &B, + ) -> Result)>> { + Ok(self + .queue_pop_tail_with_ns(QUEUE_NS, queue_key.as_ref())? + .map(|(idx, value)| (idx as usize, value))) + } + + /// Returns the head value of `queue_key` without removing it. + pub fn peek_queue_head + ?Sized>( + &self, + queue_key: &B, + ) -> Result>> { + Ok(self + .queue_peek_head_with_ns(QUEUE_NS, queue_key.as_ref())? + .map(|(_, value)| value)) + } + + /// Returns the head item of `queue_key` and its logical index without removing it. + pub fn peek_queue_head_with_idx + ?Sized>( + &self, + queue_key: &B, + ) -> Result)>> { + Ok(self + .queue_peek_head_with_ns(QUEUE_NS, queue_key.as_ref())? + .map(|(idx, value)| (idx as usize, value))) + } + + /// Returns the tail value of `queue_key` without removing it. + pub fn peek_queue_tail + ?Sized>( + &self, + queue_key: &B, + ) -> Result>> { + Ok(self + .queue_peek_tail_with_ns(QUEUE_NS, queue_key.as_ref())? + .map(|(_, value)| value)) + } + + /// Returns the tail item of `queue_key` and its logical index without removing it. + pub fn peek_queue_tail_with_idx + ?Sized>( + &self, + queue_key: &B, + ) -> Result)>> { + Ok(self + .queue_peek_tail_with_ns(QUEUE_NS, queue_key.as_ref())? + .map(|(idx, value)| (idx as usize, value))) + } + + /// Removes and returns the item at logical index `idx`, if it exists. + pub fn remove_from_queue + ?Sized>( + &self, + queue_key: &B, + idx: usize, + ) -> Result>> { + self.queue_remove_with_ns(QUEUE_NS, queue_key.as_ref(), idx as u64) + } + + /// Removes all items from `queue_key`. + pub fn discard_queue + ?Sized>(&self, queue_key: &B) -> Result { + self.queue_discard_with_ns(QUEUE_NS, queue_key.as_ref()) + } + + /// Appends all provided values to the tail of `queue_key`. + pub fn extend_queue + ?Sized>( + &self, + queue_key: &B, + items: impl IntoIterator>, + ) -> Result> { + let mut start = None; + let mut end = None; + + for item in items { + let idx = self.push_to_queue_tail(queue_key, &item)?; + if start.is_none() { + start = Some(idx); + } + end = Some(idx + 1); + } + + Ok(match (start, end) { + (Some(start), Some(end)) => start..end, + _ => { + let range = self.queue_range(queue_key)?; + range.start..range.start + } + }) + } + + /// Returns the number of live items in `queue_key`. + pub fn queue_len + ?Sized>(&self, queue_key: &B) -> Result { + Ok(self.queue_len_with_ns(QUEUE_NS, queue_key.as_ref())? as usize) + } + + /// Returns the current inclusive-exclusive logical index span for `queue_key`. + pub fn queue_range + ?Sized>(&self, queue_key: &B) -> Result> { + self.queue_range_with_ns(QUEUE_NS, queue_key.as_ref()) + } + + /// Iterates over live items in `queue_key` from head to tail. + pub fn iter_queue<'a, B: AsRef<[u8]> + ?Sized>(&'a self, queue_key: &B) -> QueueIterator<'a> { + self.queue_iter_with_ns(QUEUE_NS, queue_key.as_ref()) + } + + /// Stores a large value under `key`, chunking it across queue-backed data entries if needed. + pub fn set_big + ?Sized, B2: AsRef<[u8]> + ?Sized>( + &self, + key: &B1, + value: &B2, + ) -> Result { + self.queue_set_big_with_ns(BIG_NS, key.as_ref(), value.as_ref()) + } + + /// Loads a value previously stored with [`CandyStore::set_big`]. + pub fn get_big + ?Sized>(&self, key: &B) -> Result>> { + self.queue_get_big_with_ns(BIG_NS, key.as_ref()) + } + + /// Removes a value previously stored with [`CandyStore::set_big`]. + pub fn remove_big + ?Sized>(&self, key: &B) -> Result { + self.queue_discard_with_ns(BIG_NS, key.as_ref()) + } + + pub(super) fn queue_push_tail_with_ns( + &self, + ns: QueueNamespaces, + queue: &[u8], + value: &[u8], + ) -> Result { + let _lock = self.logical_write_guard(ns.meta, queue); + self._queue_push_tail_with_ns(ns, queue, value) + } + + fn _queue_push_tail_with_ns( + &self, + ns: QueueNamespaces, + queue: &[u8], + value: &[u8], + ) -> Result { + let mut meta = get_queue_meta(self, ns, queue)?; + let new_tail = meta.tail + 1; + let key = make_queue_data_key(queue, new_tail); + self.set_ns(ns.data, &key, value)?; + meta.tail = new_tail; + meta.count += 1; + if meta.head > meta.tail { + meta.head = new_tail; + } + set_queue_meta(self, ns, queue, meta)?; + Ok(new_tail) + } + + pub(super) fn queue_push_head_with_ns( + &self, + ns: QueueNamespaces, + queue: &[u8], + value: &[u8], + ) -> Result { + let _lock = self.logical_write_guard(ns.meta, queue); + let mut meta = get_queue_meta(self, ns, queue)?; + let new_head = meta.head - 1; + let key = make_queue_data_key(queue, new_head); + self.set_ns(ns.data, &key, value)?; + meta.head = new_head; + meta.count += 1; + if meta.tail < meta.head { + meta.tail = new_head; + } + set_queue_meta(self, ns, queue, meta)?; + Ok(new_head) + } + + pub(super) fn queue_pop_head_with_ns( + &self, + ns: QueueNamespaces, + queue: &[u8], + ) -> Result)>> { + let _lock = self.logical_write_guard(ns.meta, queue); + let mut meta = get_queue_meta(self, ns, queue)?; + loop { + if meta.head > meta.tail { + return Ok(None); + } + + let idx = meta.head; + let key = make_queue_data_key(queue, idx); + let value = self.remove_ns(ns.data, &key)?; + meta.head += 1; + + if let Some(value) = value { + meta.count = meta.count.saturating_sub(1); + if meta.head > meta.tail { + meta = QueueMetadata::new(); + } + set_queue_meta(self, ns, queue, meta)?; + return Ok(Some((idx, value))); + } + + set_queue_meta(self, ns, queue, meta)?; + } + } + + pub(super) fn queue_pop_tail_with_ns( + &self, + ns: QueueNamespaces, + queue: &[u8], + ) -> Result)>> { + let _lock = self.logical_write_guard(ns.meta, queue); + let mut meta = get_queue_meta(self, ns, queue)?; + loop { + if meta.head > meta.tail { + return Ok(None); + } + + let idx = meta.tail; + let key = make_queue_data_key(queue, idx); + let value = self.remove_ns(ns.data, &key)?; + meta.tail = meta.tail.saturating_sub(1); + + if let Some(value) = value { + meta.count = meta.count.saturating_sub(1); + if meta.head > meta.tail { + meta = QueueMetadata::new(); + } + set_queue_meta(self, ns, queue, meta)?; + return Ok(Some((idx, value))); + } + + set_queue_meta(self, ns, queue, meta)?; + } + } + + pub(super) fn queue_peek_head_with_ns( + &self, + ns: QueueNamespaces, + queue: &[u8], + ) -> Result)>> { + let _lock = self.logical_read_guard(ns.meta, queue); + let meta = get_queue_meta(self, ns, queue)?; + if meta.head > meta.tail { + return Ok(None); + } + for idx in meta.head..=meta.tail { + let key = make_queue_data_key(queue, idx); + if let Some(value) = self.get_ns(ns.data, &key)? { + return Ok(Some((idx, value))); + } + } + Ok(None) + } + + pub(super) fn queue_peek_tail_with_ns( + &self, + ns: QueueNamespaces, + queue: &[u8], + ) -> Result)>> { + let _lock = self.logical_read_guard(ns.meta, queue); + let meta = get_queue_meta(self, ns, queue)?; + if meta.head > meta.tail { + return Ok(None); + } + for idx in (meta.head..=meta.tail).rev() { + let key = make_queue_data_key(queue, idx); + if let Some(value) = self.get_ns(ns.data, &key)? { + return Ok(Some((idx, value))); + } + } + Ok(None) + } + + pub(super) fn queue_len_with_ns(&self, ns: QueueNamespaces, queue: &[u8]) -> Result { + Ok(get_queue_meta(self, ns, queue)?.count) + } + + pub(super) fn queue_discard_with_ns(&self, ns: QueueNamespaces, queue: &[u8]) -> Result { + let _lock = self.logical_write_guard(ns.meta, queue); + self._queue_discard_with_ns(ns, queue) + } + + fn _queue_discard_with_ns(&self, ns: QueueNamespaces, queue: &[u8]) -> Result { + let mut meta = get_queue_meta(self, ns, queue)?; + let had_items = meta.head <= meta.tail; + while meta.head <= meta.tail { + let key = make_queue_data_key(queue, meta.head); + _ = self.remove_ns(ns.data, &key)?; + meta.head += 1; + } + + self.remove_ns(ns.meta, queue)?; + Ok(had_items) + } + + fn queue_remove_with_ns( + &self, + ns: QueueNamespaces, + queue: &[u8], + idx: u64, + ) -> Result>> { + let _lock = self.logical_write_guard(ns.meta, queue); + let mut meta = get_queue_meta(self, ns, queue)?; + let key = make_queue_data_key(queue, idx); + let removed = match self.remove_ns(ns.data, &key)? { + Some(value) => value, + None => return Ok(None), + }; + + meta.count = meta.count.saturating_sub(1); + + if idx == meta.head { + meta.head += 1; + } + + if meta.tail == idx { + meta.tail = meta.tail.saturating_sub(1); + } + + if meta.head > meta.tail { + meta = QueueMetadata::new(); + } + + set_queue_meta(self, ns, queue, meta)?; + Ok(Some(removed)) + } + + pub(super) fn queue_iter_with_ns<'a>( + &'a self, + ns: QueueNamespaces, + queue: &[u8], + ) -> QueueIterator<'a> { + let meta = get_queue_meta(self, ns, queue).unwrap_or_else(|_| QueueMetadata::new()); + QueueIterator { + store: self, + queue: queue.to_vec(), + ns, + next_idx: meta.head, + end_idx: meta.tail, + initial_next_idx: meta.head, + initial_end_idx: meta.tail, + } + } + + pub(super) fn queue_range_with_ns( + &self, + ns: QueueNamespaces, + queue: &[u8], + ) -> Result> { + let meta = get_queue_meta(self, ns, queue)?; + if meta.count == 0 || meta.head > meta.tail { + return Ok(0..0); + } + Ok(meta.head as usize..meta.tail.saturating_add(1) as usize) + } + + pub(super) fn queue_set_big_with_ns( + &self, + ns: QueueNamespaces, + key: &[u8], + value: &[u8], + ) -> Result { + let _lock = self.logical_write_guard(ns.meta, key); + let existed = self._queue_discard_with_ns(ns, key)?; + + let max_chunk_len = self.max_big_chunk_len(key)?; + + for chunk in value.chunks(max_chunk_len) { + self._queue_push_tail_with_ns(ns, key, chunk)?; + } + + self._queue_push_tail_with_ns(ns, key, &value.len().to_le_bytes())?; + Ok(existed) + } + + pub(super) fn queue_get_big_with_ns( + &self, + ns: QueueNamespaces, + key: &[u8], + ) -> Result>> { + let _lock = self.logical_read_guard(ns.meta, key); + let meta = get_queue_meta(self, ns, key)?; + let expected_chunks = meta.count; + if expected_chunks == 0 { + return Ok(None); + } + + let mut collected = Vec::new(); + let mut seen = 0u64; + for idx in meta.head..=meta.tail { + let item_key = make_queue_data_key(key, idx); + let Some(chunk) = self.get_ns(ns.data, &item_key)? else { + continue; + }; + + seen += 1; + if seen == expected_chunks && chunk.len() == size_of::() { + let recorded_len = usize::from_le_bytes(chunk.as_slice().try_into().unwrap()); + if recorded_len == collected.len() { + return Ok(Some(collected)); + } + return Ok(None); + } + + collected.extend_from_slice(&chunk); + if seen == expected_chunks { + return Ok(None); + } + } + + Ok(None) + } + + fn max_big_chunk_len(&self, key: &[u8]) -> Result { + let data_key_len = make_queue_data_key(key, 0).len(); + if aligned_data_entry_size(data_key_len, size_of::()) as usize + > self.inner.config.max_data_file_size as usize + { + return Err(Error::PayloadTooLarge(aligned_data_entry_size( + data_key_len, + size_of::(), + ) as usize)); + } + + let mut max_chunk_len = MAX_USER_VALUE_SIZE; + while max_chunk_len > 0 + && aligned_data_entry_size(data_key_len, max_chunk_len) as usize + > self.inner.config.max_data_file_size as usize + { + max_chunk_len -= 1; + } + + if max_chunk_len == 0 { + return Err(Error::PayloadTooLarge( + aligned_data_entry_size(data_key_len, 1) as usize, + )); + } + + Ok(max_chunk_len) + } +} + +fn get_queue_meta(store: &CandyStore, ns: QueueNamespaces, queue: &[u8]) -> Result { + if let Some(value) = store.get_ns(ns.meta, queue)? + && let Some(meta) = QueueMetadata::from_bytes(&value) + { + return Ok(meta); + } + Ok(QueueMetadata::new()) +} + +fn set_queue_meta( + store: &CandyStore, + ns: QueueNamespaces, + queue: &[u8], + meta: QueueMetadata, +) -> Result<()> { + store.set_ns(ns.meta, queue, &meta.to_bytes())?; + Ok(()) +} + +fn hash_queue_key(queue: &[u8]) -> u64 { + let mut hasher = SipHasher13::new_with_keys(0xb1ccc559a9924eaa, 0x1b1a682059c2d599); + hasher.write(queue); + hasher.finish() +} + +fn make_queue_data_key(queue: &[u8], seq: u64) -> [u8; 16] { + let hash = hash_queue_key(queue); + + let mut key = [0u8; 16]; + key[..8].copy_from_slice(&hash.to_le_bytes()); + key[8..].copy_from_slice(&seq.to_be_bytes()); + key +} diff --git a/src/store/recovery.rs b/src/store/recovery.rs new file mode 100644 index 0000000..379c9ed --- /dev/null +++ b/src/store/recovery.rs @@ -0,0 +1,196 @@ +use std::sync::{Arc, atomic::Ordering}; + +use crate::{ + data_file::DataFile, + index_file::EntryPointer, + internal::{ + EntryType, FILE_OFFSET_ALIGNMENT, HashCoord, KVRef, KeyNamespace, aligned_data_entry_size, + aligned_data_entry_waste, aligned_tombstone_entry_waste, invalid_data_error, + }, + types::{Error, MAX_USER_KEY_SIZE, MAX_USER_VALUE_SIZE, Result}, +}; + +use super::CandyStore; + +impl CandyStore { + pub(super) fn recover_index(&self) -> Result<()> { + self.inner.index_file.reset()?; + + let mut sorted_files: Vec> = + self.inner.data_files.read().values().cloned().collect(); + sorted_files.sort_by_key(|df| df.file_ordinal); + + for data_file in &sorted_files { + let mut offset = 0u64; + let mut read_buf = Vec::new(); + let mut buf_file_offset = 0u64; + let mut match_scratch = Vec::new(); + loop { + let Some((kv, entry_offset, next_offset)) = + data_file.read_next_entry_ref(offset, &mut read_buf, &mut buf_file_offset)? + else { + break; + }; + offset = next_offset; + + let Some(ns) = KeyNamespace::from_u8(kv.ns) else { + return Err(invalid_data_error("unknown key namespace in data file")); + }; + + self.recover_entry(data_file, ns, kv, entry_offset, &mut match_scratch)?; + } + } + + Ok(()) + } + + fn recover_entry( + &self, + data_file: &Arc, + ns: KeyNamespace, + kv: KVRef<'_>, + entry_offset: u64, + match_scratch: &mut Vec, + ) -> Result<()> { + match kv.entry_type { + EntryType::Data => { + self.recover_data_entry(data_file, ns, kv, entry_offset, match_scratch) + } + EntryType::Tombstone => self.recover_tombstone_entry(data_file, ns, kv, match_scratch), + _ => Ok(()), + } + } + + fn recover_data_entry( + &self, + data_file: &Arc, + ns: KeyNamespace, + kv: KVRef<'_>, + entry_offset: u64, + match_scratch: &mut Vec, + ) -> Result<()> { + let key = kv.key(); + let val = kv.value(); + self.validate_recovered_data_entry(key, val)?; + let entry_len = 4 + 4 + key.len() + val.len() + 2; + let aligned_len = entry_len.next_multiple_of(FILE_OFFSET_ALIGNMENT as usize); + let hc = HashCoord::new(ns, key, self.inner.config.hash_key); + let ptr = EntryPointer::new( + data_file.file_idx, + entry_offset, + aligned_len, + hc.masked_row_selector(), + ); + + let entry_size = aligned_data_entry_size(key.len(), val.len()); + + self.inner._mut_op(ns, key, &[], |hc, mut row, key, _| { + let files = self.inner.data_files.read(); + for (col, entry) in row.iter_matches(hc) { + let file = files + .get(&entry.file_idx()) + .ok_or(Error::MissingDataFile(entry.file_idx()))?; + let existing_kv = + file.read_kv_into(entry.file_offset(), entry.size_hint(), match_scratch)?; + if existing_kv.key() == key { + let old_size = + aligned_data_entry_size(existing_kv.key().len(), existing_kv.value().len()); + self.record_recovered_waste( + entry, + existing_kv.key().len(), + existing_kv.value().len(), + )?; + row.replace_pointer(col, ptr); + let h = self.inner.index_file.header_ref(); + h.num_replaced.fetch_add(1, Ordering::Relaxed); + h.written_bytes.fetch_add(entry_size, Ordering::Relaxed); + h.waste_bytes.fetch_add(old_size, Ordering::Relaxed); + return Ok(()); + } + } + if let Some(col) = row.find_free_slot() { + row.insert(col, hc.sig, ptr); + let h = self.inner.index_file.header_ref(); + h.num_created.fetch_add(1, Ordering::Relaxed); + h.written_bytes.fetch_add(entry_size, Ordering::Relaxed); + self.inner.bump_histogram(entry_size); + Ok(()) + } else { + Err(Error::SplitRow(row.split_level.load(Ordering::Relaxed))) + } + }) + } + + fn recover_tombstone_entry( + &self, + data_file: &Arc, + ns: KeyNamespace, + kv: KVRef<'_>, + match_scratch: &mut Vec, + ) -> Result<()> { + let key = kv.key(); + self.validate_recovered_tombstone_entry(key)?; + self.inner + .index_file + .add_file_waste(data_file.file_idx, aligned_tombstone_entry_waste(key.len())); + + self.inner._mut_op(ns, key, &[], |hc, mut row, key, _| { + let files = self.inner.data_files.read(); + for (col, entry) in row.iter_matches(hc) { + let file = files + .get(&entry.file_idx()) + .ok_or(Error::MissingDataFile(entry.file_idx()))?; + let existing_kv = + file.read_kv_into(entry.file_offset(), entry.size_hint(), match_scratch)?; + if existing_kv.key() == key { + let old_size = + aligned_data_entry_size(existing_kv.key().len(), existing_kv.value().len()); + self.record_recovered_waste( + entry, + existing_kv.key().len(), + existing_kv.value().len(), + )?; + row.remove(col); + let h = self.inner.index_file.header_ref(); + h.num_removed.fetch_add(1, Ordering::Relaxed); + h.waste_bytes.fetch_add(old_size, Ordering::Relaxed); + return Ok(()); + } + } + Ok(()) + }) + } + + fn record_recovered_waste(&self, entry: EntryPointer, klen: usize, vlen: usize) -> Result<()> { + let old_aligned_len = aligned_data_entry_waste(klen, vlen); + self.inner + .index_file + .add_file_waste(entry.file_idx(), old_aligned_len); + Ok(()) + } + + fn validate_recovered_data_entry(&self, key: &[u8], val: &[u8]) -> Result<()> { + let entry_size = aligned_data_entry_size(key.len(), val.len()) as usize; + if key.len() > MAX_USER_KEY_SIZE + || val.len() > MAX_USER_VALUE_SIZE + || entry_size > self.inner.config.max_data_file_size as usize + { + return Err(invalid_data_error( + "recovered data entry exceeds configured limits", + )); + } + Ok(()) + } + + fn validate_recovered_tombstone_entry(&self, key: &[u8]) -> Result<()> { + let entry_size = aligned_tombstone_entry_waste(key.len()) as usize; + if key.len() > MAX_USER_KEY_SIZE + || entry_size > self.inner.config.max_data_file_size as usize + { + return Err(invalid_data_error( + "recovered tombstone entry exceeds configured limits", + )); + } + Ok(()) + } +} diff --git a/src/store/typed.rs b/src/store/typed.rs new file mode 100644 index 0000000..f8d1b94 --- /dev/null +++ b/src/store/typed.rs @@ -0,0 +1,834 @@ +use std::{borrow::Borrow, marker::PhantomData, ops::Range, sync::Arc}; + +use databuf::{DecodeOwned, Encode, config::num::LE}; +use smallvec::SmallVec; + +use crate::{ + internal::KeyNamespace, + store::CandyStore, + types::{Error, ListCompactionParams, Result}, +}; + +#[derive(Clone, Copy)] +struct TypedBigNamespaces { + meta: KeyNamespace, + data: KeyNamespace, +} + +const TYPED_BIG_NS: TypedBigNamespaces = TypedBigNamespaces { + meta: KeyNamespace::TypedBigMeta, + data: KeyNamespace::TypedBigData, +}; + +const TYPED_QUEUE_NS: super::queue::QueueNamespaces = super::queue::QueueNamespaces { + meta: KeyNamespace::TypedQueueMeta, + data: KeyNamespace::TypedQueueData, +}; + +const TYPED_LIST_NS: super::list::ListNamespaces = super::list::ListNamespaces { + meta: KeyNamespace::TypedListMeta, + index: KeyNamespace::TypedListIndex, + data: KeyNamespace::TypedListData, +}; + +const INLINE_TYPED_BUF_SIZE: usize = 128; + +type InlineBytes = SmallVec<[u8; INLINE_TYPED_BUF_SIZE]>; + +/// Marker trait for typed keys and collection identifiers used by the typed wrappers. +pub trait CandyTypedKey: Encode + DecodeOwned { + const TYPE_ID: u32; +} + +macro_rules! typed_builtin { + ($ty:ty, $type_id:literal) => { + impl CandyTypedKey for $ty { + const TYPE_ID: u32 = $type_id; + } + }; +} + +typed_builtin!(u8, 1); +typed_builtin!(u16, 2); +typed_builtin!(u32, 3); +typed_builtin!(u64, 4); +typed_builtin!(u128, 5); +typed_builtin!(i8, 6); +typed_builtin!(i16, 7); +typed_builtin!(i32, 8); +typed_builtin!(i64, 9); +typed_builtin!(i128, 10); +typed_builtin!(bool, 11); +typed_builtin!(usize, 12); +typed_builtin!(isize, 13); +typed_builtin!(char, 14); +typed_builtin!(String, 15); +typed_builtin!(Vec, 16); +typed_builtin!(uuid::Bytes, 17); + +/// Typed wrapper over the store key-value API. +pub struct CandyTypedStore { + store: Arc, + _phantom: PhantomData<(K, V)>, +} + +/// Typed wrapper over the queue API. +pub struct CandyTypedDeque { + store: Arc, + _phantom: PhantomData<(L, V)>, +} + +/// Typed wrapper over the ordered map/list API. +pub struct CandyTypedList { + store: Arc, + _phantom: PhantomData<(L, K, V)>, +} + +impl Clone for CandyTypedDeque { + fn clone(&self) -> Self { + Self { + store: Arc::clone(&self.store), + _phantom: PhantomData, + } + } +} + +impl Clone for CandyTypedStore { + fn clone(&self) -> Self { + Self { + store: Arc::clone(&self.store), + _phantom: PhantomData, + } + } +} + +impl Clone for CandyTypedList { + fn clone(&self) -> Self { + Self { + store: Arc::clone(&self.store), + _phantom: PhantomData, + } + } +} + +impl CandyTypedStore +where + K: CandyTypedKey + Encode, + V: Encode + DecodeOwned, +{ + /// Creates a typed key-value view over `store`. + pub fn new(store: Arc) -> Self { + Self { + store, + _phantom: PhantomData, + } + } + + fn make_key(key: &Q) -> InlineBytes + where + K: Borrow, + { + append_type_id(encode_to_smallvec(key), K::TYPE_ID) + } + + /// Returns the decoded value for `key`, if present. + pub fn get(&self, key: &Q) -> Result> + where + K: Borrow, + { + let key_bytes = Self::make_key(key); + self.store + .get_ns(KeyNamespace::Typed, &key_bytes)? + .map(|bytes| decode_from_bytes::(&bytes)) + .transpose() + } + + /// Inserts or replaces `key` with `val`. + pub fn set( + &self, + key: &Q1, + val: &Q2, + ) -> Result> + where + K: Borrow, + V: Borrow, + { + let key_bytes = Self::make_key(key); + let value_bytes = encode_to_smallvec(val); + self.store + .set_ns(KeyNamespace::Typed, &key_bytes, &value_bytes)? + .map(|prev| decode_from_bytes::(&prev)) + .transpose() + } + + /// Removes `key` and returns its previous decoded value if it existed. + pub fn remove(&self, key: &Q) -> Result> + where + K: Borrow, + { + let key_bytes = Self::make_key(key); + self.store + .remove_ns(KeyNamespace::Typed, &key_bytes)? + .map(|prev| decode_from_bytes::(&prev)) + .transpose() + } + + /// Returns `true` if `key` currently exists. + pub fn contains(&self, key: &Q) -> Result + where + K: Borrow, + { + let key_bytes = Self::make_key(key); + self.store + .get_ns(KeyNamespace::Typed, &key_bytes) + .map(|value| value.is_some()) + } + + /// Returns the current value for `key`, or inserts and returns `val` if the key is missing. + pub fn get_or_create( + &self, + key: &Q1, + val: &Q2, + ) -> Result + where + K: Borrow, + V: Borrow, + { + let key_bytes = Self::make_key(key); + let value_bytes = encode_to_smallvec(val); + let status = self + .store + .get_or_create_ns(KeyNamespace::Typed, &key_bytes, &value_bytes)?; + match status { + crate::GetOrCreateStatus::ExistingValue(value) + | crate::GetOrCreateStatus::CreatedNew(value) => decode_from_bytes::(&value), + } + } + + /// Replaces `key` with `val` only if the current value matches `expected_val` when provided. + pub fn replace( + &self, + key: &Q1, + val: &Q2, + expected_val: Option<&Q3>, + ) -> Result> + where + K: Borrow, + V: Borrow, + { + let key_bytes = Self::make_key(key); + let value_bytes = encode_to_smallvec(val); + let expected_bytes = expected_val.map(encode_to_smallvec); + match self.store.replace_ns( + KeyNamespace::Typed, + &key_bytes, + &value_bytes, + expected_bytes.as_deref(), + )? { + crate::ReplaceStatus::PrevValue(prev) => decode_from_bytes::(&prev).map(Some), + crate::ReplaceStatus::WrongValue(_) | crate::ReplaceStatus::DoesNotExist => Ok(None), + } + } + + /// Stores a large typed value under `key`. + pub fn set_big( + &self, + key: &Q1, + val: &Q2, + ) -> Result + where + K: Borrow, + V: Borrow, + { + let key_bytes = Self::make_key(key); + let value_bytes = encode_to_smallvec(val); + self.store.queue_set_big_with_ns( + super::queue::QueueNamespaces { + meta: TYPED_BIG_NS.meta, + data: TYPED_BIG_NS.data, + }, + &key_bytes, + &value_bytes, + ) + } + + /// Loads a large typed value previously stored with [`CandyTypedStore::set_big`]. + pub fn get_big(&self, key: &Q) -> Result> + where + K: Borrow, + { + let key_bytes = Self::make_key(key); + self.store + .queue_get_big_with_ns( + super::queue::QueueNamespaces { + meta: TYPED_BIG_NS.meta, + data: TYPED_BIG_NS.data, + }, + &key_bytes, + )? + .map(|value| decode_from_bytes::(&value)) + .transpose() + } + + /// Removes a large typed value previously stored with [`CandyTypedStore::set_big`]. + pub fn remove_big(&self, key: &Q) -> Result + where + K: Borrow, + { + let key_bytes = Self::make_key(key); + self.store.queue_discard_with_ns( + super::queue::QueueNamespaces { + meta: TYPED_BIG_NS.meta, + data: TYPED_BIG_NS.data, + }, + &key_bytes, + ) + } +} + +impl CandyTypedDeque +where + L: CandyTypedKey + Encode, + V: Encode + DecodeOwned, +{ + /// Creates a typed queue view over `store`. + pub fn new(store: Arc) -> Self { + Self { + store, + _phantom: PhantomData, + } + } + + fn make_queue_key(queue_key: &Q) -> InlineBytes + where + L: Borrow, + { + append_type_id(encode_to_smallvec(queue_key), L::TYPE_ID) + } + + /// Pushes `val` to the tail of `queue_key`. + pub fn push_tail( + &self, + queue_key: &Q, + val: &QV, + ) -> Result<()> + where + L: Borrow, + V: Borrow, + { + let qkey = Self::make_queue_key(queue_key); + let vbytes = encode_to_smallvec(val); + self.store + .queue_push_tail_with_ns(TYPED_QUEUE_NS, &qkey, &vbytes) + .map(|_| ()) + } + + /// Pushes `val` to the head of `queue_key`. + pub fn push_head( + &self, + queue_key: &Q, + val: &QV, + ) -> Result<()> + where + L: Borrow, + V: Borrow, + { + let qkey = Self::make_queue_key(queue_key); + let vbytes = encode_to_smallvec(val); + self.store + .queue_push_head_with_ns(TYPED_QUEUE_NS, &qkey, &vbytes) + .map(|_| ()) + } + + /// Removes and returns the head item of `queue_key` together with its logical index. + pub fn pop_head_with_idx(&self, queue_key: &Q) -> Result> + where + L: Borrow, + { + let qkey = Self::make_queue_key(queue_key); + match self.store.queue_pop_head_with_ns(TYPED_QUEUE_NS, &qkey)? { + Some((idx, value)) => { + decode_from_bytes::(&value).map(|value| Some((idx as usize, value))) + } + None => Ok(None), + } + } + + /// Removes and returns the head value of `queue_key`. + pub fn pop_head(&self, queue_key: &Q) -> Result> + where + L: Borrow, + { + Ok(self.pop_head_with_idx(queue_key)?.map(|(_, value)| value)) + } + + /// Removes and returns the tail item of `queue_key` together with its logical index. + pub fn pop_tail_with_idx(&self, queue_key: &Q) -> Result> + where + L: Borrow, + { + let qkey = Self::make_queue_key(queue_key); + match self.store.queue_pop_tail_with_ns(TYPED_QUEUE_NS, &qkey)? { + Some((idx, value)) => { + decode_from_bytes::(&value).map(|value| Some((idx as usize, value))) + } + None => Ok(None), + } + } + + /// Removes and returns the tail value of `queue_key`. + pub fn pop_tail(&self, queue_key: &Q) -> Result> + where + L: Borrow, + { + Ok(self.pop_tail_with_idx(queue_key)?.map(|(_, value)| value)) + } + + /// Returns the head item of `queue_key` and its logical index without removing it. + pub fn peek_head_with_idx( + &self, + queue_key: &Q, + ) -> Result> + where + L: Borrow, + { + let qkey = Self::make_queue_key(queue_key); + match self.store.queue_peek_head_with_ns(TYPED_QUEUE_NS, &qkey)? { + Some((idx, value)) => { + decode_from_bytes::(&value).map(|value| Some((idx as usize, value))) + } + None => Ok(None), + } + } + + /// Returns the head value of `queue_key` without removing it. + pub fn peek_head(&self, queue_key: &Q) -> Result> + where + L: Borrow, + { + Ok(self.peek_head_with_idx(queue_key)?.map(|(_, value)| value)) + } + + /// Returns the tail item of `queue_key` and its logical index without removing it. + pub fn peek_tail_with_idx( + &self, + queue_key: &Q, + ) -> Result> + where + L: Borrow, + { + let qkey = Self::make_queue_key(queue_key); + match self.store.queue_peek_tail_with_ns(TYPED_QUEUE_NS, &qkey)? { + Some((idx, value)) => { + decode_from_bytes::(&value).map(|value| Some((idx as usize, value))) + } + None => Ok(None), + } + } + + /// Returns the tail value of `queue_key` without removing it. + pub fn peek_tail(&self, queue_key: &Q) -> Result> + where + L: Borrow, + { + Ok(self.peek_tail_with_idx(queue_key)?.map(|(_, value)| value)) + } + + /// Returns the number of live items in `queue_key`. + pub fn len(&self, queue_key: &Q) -> Result + where + L: Borrow, + { + let qkey = Self::make_queue_key(queue_key); + self.store + .queue_len_with_ns(TYPED_QUEUE_NS, &qkey) + .map(|len| len as usize) + } + + /// Returns the current inclusive-exclusive logical index span for `queue_key`. + pub fn range(&self, queue_key: &Q) -> Result> + where + L: Borrow, + { + let qkey = Self::make_queue_key(queue_key); + self.store.queue_range_with_ns(TYPED_QUEUE_NS, &qkey) + } + + /// Returns `true` when `queue_key` has no live items. + pub fn is_empty(&self, queue_key: &Q) -> Result + where + L: Borrow, + { + let qkey = Self::make_queue_key(queue_key); + self.store + .queue_len_with_ns(TYPED_QUEUE_NS, &qkey) + .map(|len| len == 0) + } + + /// Removes all items from `queue_key`. + pub fn discard(&self, queue_key: &Q) -> Result + where + L: Borrow, + { + let qkey = Self::make_queue_key(queue_key); + self.store.queue_discard_with_ns(TYPED_QUEUE_NS, &qkey) + } + + /// Iterates over live items in `queue_key` from head to tail. + pub fn iter<'a, Q: ?Sized + Encode>( + &'a self, + queue_key: &Q, + ) -> impl DoubleEndedIterator> + 'a + where + L: Borrow, + { + let qkey = Self::make_queue_key(queue_key); + self.store + .queue_iter_with_ns(TYPED_QUEUE_NS, &qkey) + .map(|res| { + res.and_then(|(idx, value)| { + decode_from_bytes::(&value).map(|value| (idx, value)) + }) + }) + } +} + +impl CandyTypedList +where + L: CandyTypedKey + Encode, + K: Encode + DecodeOwned, + V: Encode + DecodeOwned, +{ + /// Creates a typed ordered-map/list view over `store`. + pub fn new(store: Arc) -> Self { + Self { + store, + _phantom: PhantomData, + } + } + + fn make_list_key(list_key: &Q) -> InlineBytes + where + L: Borrow, + { + append_type_id(encode_to_smallvec(list_key), L::TYPE_ID) + } + + fn make_item_key(item_key: &Q) -> InlineBytes + where + K: Borrow, + { + encode_to_smallvec(item_key) + } + + /// Returns `true` if `item_key` exists in `list_key`. + pub fn contains( + &self, + list_key: &Q1, + item_key: &Q2, + ) -> Result + where + L: Borrow, + K: Borrow, + { + self.get(list_key, item_key).map(|value| value.is_some()) + } + + /// Inserts or replaces `item_key` in `list_key`, placing it at the logical tail. + pub fn set( + &self, + list_key: &Q1, + item_key: &Q2, + val: &Q3, + ) -> Result> + where + L: Borrow, + K: Borrow, + V: Borrow, + { + let lkey = Self::make_list_key(list_key); + let ikey = Self::make_item_key(item_key); + let vbytes = encode_to_smallvec(val); + self.store + .list_set_at_tail_with_ns(TYPED_LIST_NS, &lkey, &ikey, &vbytes)? + .map(|prev| decode_from_bytes::(&prev)) + .transpose() + } + + /// Returns the current value for `item_key`, or inserts `default_val` if it is missing. + pub fn get_or_create( + &self, + list_key: &Q1, + item_key: &Q2, + default_val: &Q3, + ) -> Result + where + L: Borrow, + K: Borrow, + { + let lkey = Self::make_list_key(list_key); + let ikey = Self::make_item_key(item_key); + let vbytes = encode_to_smallvec(default_val); + match self + .store + .list_get_or_create_with_ns(TYPED_LIST_NS, &lkey, &ikey, &vbytes)? + { + crate::GetOrCreateStatus::ExistingValue(value) + | crate::GetOrCreateStatus::CreatedNew(value) => decode_from_bytes::(&value), + } + } + + /// Replaces `item_key` only if its current value matches `expected_val` when provided. + pub fn replace< + Q1: ?Sized + Encode, + Q2: ?Sized + Encode, + Q3: ?Sized + Encode, + Q4: ?Sized + Encode, + >( + &self, + list_key: &Q1, + item_key: &Q2, + val: &Q3, + expected_val: Option<&Q4>, + ) -> Result> + where + L: Borrow, + K: Borrow, + V: Borrow, + { + let lkey = Self::make_list_key(list_key); + let ikey = Self::make_item_key(item_key); + let vbytes = encode_to_smallvec(val); + let expected_bytes = expected_val.map(encode_to_smallvec); + match self.store.list_replace_with_ns( + TYPED_LIST_NS, + &lkey, + &ikey, + &vbytes, + expected_bytes.as_deref(), + )? { + crate::ReplaceStatus::PrevValue(prev) => decode_from_bytes::(&prev).map(Some), + crate::ReplaceStatus::WrongValue(_) | crate::ReplaceStatus::DoesNotExist => Ok(None), + } + } + + /// Returns the decoded value for `item_key`, if present. + pub fn get( + &self, + list_key: &Q1, + item_key: &Q2, + ) -> Result> + where + L: Borrow, + K: Borrow, + { + let lkey = Self::make_list_key(list_key); + let ikey = Self::make_item_key(item_key); + self.store + .list_get_with_ns(TYPED_LIST_NS, &lkey, &ikey)? + .map(|value| decode_from_bytes::(&value)) + .transpose() + } + + /// Removes `item_key` and returns its previous decoded value if it existed. + pub fn remove( + &self, + list_key: &Q1, + item_key: &Q2, + ) -> Result> + where + L: Borrow, + K: Borrow, + { + let lkey = Self::make_list_key(list_key); + let ikey = Self::make_item_key(item_key); + self.store + .list_remove_with_ns(TYPED_LIST_NS, &lkey, &ikey)? + .map(|value| decode_from_bytes::(&value)) + .transpose() + } + + /// Returns the number of live items in `list_key`. + pub fn len(&self, list_key: &Q) -> Result + where + L: Borrow, + { + let lkey = Self::make_list_key(list_key); + self.store.list_len_with_ns(TYPED_LIST_NS, &lkey) + } + + /// Returns the current inclusive-exclusive logical span for `list_key`. + pub fn range(&self, list_key: &Q) -> Result> + where + L: Borrow, + { + let lkey = Self::make_list_key(list_key); + self.store.list_range_with_ns(TYPED_LIST_NS, &lkey) + } + + /// Returns `true` when `list_key` has no live items. + pub fn is_empty(&self, list_key: &Q) -> Result + where + L: Borrow, + { + self.len(list_key).map(|len| len == 0) + } + + /// Removes all items from `list_key`. + pub fn discard(&self, list_key: &Q) -> Result + where + L: Borrow, + { + let lkey = Self::make_list_key(list_key); + self.store.list_discard_with_ns(TYPED_LIST_NS, &lkey) + } + + /// Compacts `list_key` when `params` indicate enough holes exist to justify rewriting it. + pub fn compact_if_needed( + &self, + list_key: &Q, + params: ListCompactionParams, + ) -> Result + where + L: Borrow, + { + let lkey = Self::make_list_key(list_key); + self.store + .list_compact_with_ns(TYPED_LIST_NS, &lkey, params) + } + + /// Inserts or replaces `item_key`, moving it to the logical tail and returning the previous value when present. + pub fn set_promoting( + &self, + list_key: &Q1, + item_key: &Q2, + value: &Q3, + ) -> Result> + where + L: Borrow, + K: Borrow, + V: Borrow, + { + let lkey = Self::make_list_key(list_key); + let ikey = Self::make_item_key(item_key); + let vbytes = encode_to_smallvec(value); + self.store + .list_promote_with_ns(TYPED_LIST_NS, &lkey, &ikey, &vbytes)? + .map(|prev| decode_from_bytes::(&prev)) + .transpose() + } + + /// Iterates over live items in `list_key` from head to tail. + pub fn iter<'a, Q: ?Sized + Encode>( + &'a self, + list_key: &Q, + ) -> impl DoubleEndedIterator> + 'a + where + L: Borrow, + { + let lkey = Self::make_list_key(list_key); + self.store + .list_iter_with_ns(TYPED_LIST_NS, &lkey) + .map(|res| { + res.and_then(|(key, value)| { + Ok(( + decode_from_bytes::(&key)?, + decode_from_bytes::(&value)?, + )) + }) + }) + } + + /// Removes and returns the tail item of `list_key`. + pub fn pop_tail(&self, list_key: &Q) -> Result> + where + L: Borrow, + { + let lkey = Self::make_list_key(list_key); + match self.store.pop_list_tail_with_ns(TYPED_LIST_NS, &lkey)? { + Some((key, value)) => Ok(Some(( + decode_from_bytes::(&key)?, + decode_from_bytes::(&value)?, + ))), + None => Ok(None), + } + } + + /// Removes and returns the head item of `list_key`. + pub fn pop_head(&self, list_key: &Q) -> Result> + where + L: Borrow, + { + let lkey = Self::make_list_key(list_key); + match self.store.pop_list_head_with_ns(TYPED_LIST_NS, &lkey)? { + Some((key, value)) => Ok(Some(( + decode_from_bytes::(&key)?, + decode_from_bytes::(&value)?, + ))), + None => Ok(None), + } + } + + /// Returns the tail item of `list_key` without removing it. + pub fn peek_tail(&self, list_key: &Q) -> Result> + where + L: Borrow, + { + let mut iter = self.iter(list_key); + match iter.next_back() { + Some(Ok(pair)) => Ok(Some(pair)), + Some(Err(err)) => Err(err), + None => Ok(None), + } + } + + /// Returns the head item of `list_key` without removing it. + pub fn peek_head(&self, list_key: &Q) -> Result> + where + L: Borrow, + { + match self.iter(list_key).next() { + Some(Ok(pair)) => Ok(Some(pair)), + Some(Err(err)) => Err(err), + None => Ok(None), + } + } + + /// Retains only items for which `func` returns `true`, preserving list order. + pub fn retain( + &self, + list_key: &Q, + mut func: impl FnMut(&K, &V) -> Result, + ) -> Result<()> + where + L: Borrow, + { + let lkey = Self::make_list_key(list_key); + self.store + .list_retain_with_ns(TYPED_LIST_NS, &lkey, |k_bytes, v_bytes| { + let key = decode_from_bytes::(k_bytes)?; + let value = decode_from_bytes::(v_bytes)?; + func(&key, &value) + }) + } +} + +fn decode_from_bytes(bytes: &[u8]) -> Result { + T::from_bytes::(bytes).map_err(|err| { + Error::IOError(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("decode error: {err}"), + )) + }) +} + +fn encode_to_smallvec(value: &T) -> InlineBytes { + let mut bytes = InlineBytes::new(); + value.encode::(&mut bytes).unwrap(); + bytes +} + +fn append_type_id(mut bytes: InlineBytes, type_id: u32) -> InlineBytes { + bytes.extend_from_slice(&type_id.to_le_bytes()); + bytes +} diff --git a/src/typed.rs b/src/typed.rs deleted file mode 100644 index 8afc13d..0000000 --- a/src/typed.rs +++ /dev/null @@ -1,759 +0,0 @@ -use anyhow::anyhow; -use bytemuck::bytes_of; -use std::{borrow::Borrow, marker::PhantomData, ops::Range, sync::Arc}; - -use crate::{ - store::{ReplaceStatus, SetStatus, TYPED_NAMESPACE}, - CandyStore, ListCompactionParams, -}; - -use crate::Result; -use databuf::{config::num::LE, DecodeOwned, Encode}; - -pub trait CandyTypedKey: Encode + DecodeOwned { - /// a random number that remains consistent (unlike [std::any::TypeId]), so that `MyPair(u32, u32)` - /// is different from `YourPair(u32, u32)` - const TYPE_ID: u32; -} - -macro_rules! typed_builtin { - ($t:ty, $v:literal) => { - impl CandyTypedKey for $t { - const TYPE_ID: u32 = $v; - } - }; -} - -typed_builtin!(u8, 1); -typed_builtin!(u16, 2); -typed_builtin!(u32, 3); -typed_builtin!(u64, 4); -typed_builtin!(u128, 5); -typed_builtin!(i8, 6); -typed_builtin!(i16, 7); -typed_builtin!(i32, 8); -typed_builtin!(i64, 9); -typed_builtin!(i128, 10); -typed_builtin!(bool, 11); -typed_builtin!(usize, 12); -typed_builtin!(isize, 13); -typed_builtin!(char, 14); -typed_builtin!(String, 15); -typed_builtin!(Vec, 16); -typed_builtin!(uuid::Bytes, 17); - -fn from_bytes(bytes: &[u8]) -> Result { - T::from_bytes::(bytes).map_err(|e| anyhow!(e)) -} - -/// Typed stores are wrappers around an underlying [CandyStore], that serialize keys and values (using [databuf]). -/// These are but thin wrappers, and multiple such wrappers can exist over the same store. -/// -/// The keys and values must support [Encode] and [DecodeOwned], with the addition that keys also provide -/// a `TYPE_ID` const, via the [CandyTypedKey] trait. -/// -/// Notes: -/// * All APIs take keys and values by-ref, because they will serialize them, so taking owned values doesn't -/// make sense -/// * [CandyStore::iter] will skip typed items, since it's meaningless to interpret them without the wrapper -pub struct CandyTypedStore { - store: Arc, - _phantom: PhantomData<(K, V)>, -} - -impl Clone for CandyTypedStore { - fn clone(&self) -> Self { - Self { - store: self.store.clone(), - _phantom: Default::default(), - } - } -} - -impl CandyTypedStore -where - K: CandyTypedKey, - V: Encode + DecodeOwned, -{ - /// Constructs a typed wrapper over a CandyStore - pub fn new(store: Arc) -> Self { - Self { - store, - _phantom: Default::default(), - } - } - - fn make_key(key: &Q) -> Vec - where - K: Borrow, - { - let mut kbytes = key.to_bytes::(); - kbytes.extend_from_slice(bytes_of(&K::TYPE_ID)); - kbytes.extend_from_slice(TYPED_NAMESPACE); - kbytes - } - - /// Same as [CandyStore::contains] but serializes the key - pub fn contains(&self, key: &Q) -> Result - where - K: Borrow, - { - Ok(self.store.get_raw(&Self::make_key(key))?.is_some()) - } - - /// Same as [CandyStore::get] but serializes the key and deserializes the value - pub fn get(&self, key: &Q) -> Result> - where - K: Borrow, - { - let kbytes = Self::make_key(key); - if let Some(vbytes) = self.store.get_raw(&kbytes)? { - Ok(Some(from_bytes::(&vbytes)?)) - } else { - Ok(None) - } - } - - /// Same as [CandyStore::replace] but serializes the key and the value - pub fn replace( - &self, - key: &Q1, - val: &Q2, - expected_val: Option<&Q2>, - ) -> Result> - where - K: Borrow, - V: Borrow, - { - let kbytes = Self::make_key(key); - let vbytes = val.to_bytes::(); - let ebytes = expected_val.map(|ev| ev.to_bytes::()).unwrap_or(vec![]); - match self - .store - .replace_raw(&kbytes, &vbytes, expected_val.map(|_| &*ebytes))? - { - ReplaceStatus::DoesNotExist => Ok(None), - ReplaceStatus::PrevValue(v) => Ok(Some(from_bytes::(&v)?)), - ReplaceStatus::WrongValue(_) => Ok(None), - } - } - - /// Same as [CandyStore::set] but serializes the key and the value. - pub fn set( - &self, - key: &Q1, - val: &Q2, - ) -> Result> - where - K: Borrow, - V: Borrow, - { - let kbytes = Self::make_key(key); - let vbytes = val.to_bytes::(); - match self.store.set_raw(&kbytes, &vbytes)? { - SetStatus::CreatedNew => Ok(None), - SetStatus::PrevValue(v) => Ok(Some(from_bytes::(&v)?)), - } - } - - /// Same as [CandyStore::get_or_create] but serializes the key and the default value - pub fn get_or_create( - &self, - key: &Q1, - default_val: &Q2, - ) -> Result - where - K: Borrow, - V: Borrow, - { - let kbytes = Self::make_key(key); - Ok(from_bytes::( - &self - .store - .get_or_create_raw(&kbytes, default_val.to_bytes::())? - .value(), - )?) - } - - /// Same as [CandyStore::remove] but serializes the key - pub fn remove(&self, k: &Q) -> Result> - where - K: Borrow, - { - let kbytes = Self::make_key(k); - if let Some(vbytes) = self.store.remove_raw(&kbytes)? { - Ok(Some(from_bytes::(&vbytes)?)) - } else { - Ok(None) - } - } - - /// Same as [CandyStore::get_big] but serializes the key and deserializes the value - pub fn get_big(&self, key: &Q) -> Result> - where - K: Borrow, - { - let kbytes = Self::make_key(key); - if let Some(vbytes) = self.store.get_big(&kbytes)? { - Ok(Some(from_bytes::(&vbytes)?)) - } else { - Ok(None) - } - } - - /// Same as [CandyStore::set_big] but serializes the key and the value. - pub fn set_big( - &self, - key: &Q1, - val: &Q2, - ) -> Result - where - K: Borrow, - V: Borrow, - { - let kbytes = Self::make_key(key); - let vbytes = val.to_bytes::(); - self.store.set_big(&kbytes, &vbytes) - } - - /// Same as [CandyStore::remove_big] but serializes the key - pub fn remove_big(&self, k: &Q) -> Result - where - K: Borrow, - { - let kbytes = Self::make_key(k); - self.store.remove_big(&kbytes) - } -} - -/// A wrapper around [CandyStore] that exposes the list API in a typed manner. See [CandyTypedStore] for more -/// info -pub struct CandyTypedList { - store: Arc, - _phantom: PhantomData<(L, K, V)>, -} - -impl Clone for CandyTypedList { - fn clone(&self) -> Self { - Self { - store: self.store.clone(), - _phantom: Default::default(), - } - } -} - -impl CandyTypedList -where - L: CandyTypedKey, - K: Encode + DecodeOwned, - V: Encode + DecodeOwned, -{ - /// Constructs a [CandyTypedList] over an existing [CandyStore] - pub fn new(store: Arc) -> Self { - Self { - store, - _phantom: PhantomData, - } - } - - fn make_list_key(list_key: &Q) -> Vec - where - L: Borrow, - { - let mut kbytes = list_key.to_bytes::(); - kbytes.extend_from_slice(bytes_of(&L::TYPE_ID)); - kbytes - } - - /// Tests if the given typed `item_key` exists in this list (identified by `list_key`) - pub fn contains( - &self, - list_key: &Q1, - item_key: &Q2, - ) -> Result - where - L: Borrow, - K: Borrow, - { - let list_key = Self::make_list_key(list_key); - let item_key = item_key.to_bytes::(); - Ok(self - .store - .owned_get_from_list(list_key, item_key)? - .is_some()) - } - - /// Same as [CandyStore::get_from_list], but `list_key` and `item_key` are typed - pub fn get( - &self, - list_key: &Q1, - item_key: &Q2, - ) -> Result> - where - L: Borrow, - K: Borrow, - { - let list_key = Self::make_list_key(list_key); - let item_key = item_key.to_bytes::(); - if let Some(vbytes) = self.store.owned_get_from_list(list_key, item_key)? { - Ok(Some(from_bytes::(&vbytes)?)) - } else { - Ok(None) - } - } - - fn _set( - &self, - list_key: &Q1, - item_key: &Q2, - val: &Q3, - promote: bool, - ) -> Result> - where - L: Borrow, - K: Borrow, - V: Borrow, - { - let list_key = Self::make_list_key(list_key); - let item_key = item_key.to_bytes::(); - let val = val.to_bytes::(); - match self - .store - .owned_set_in_list(list_key, item_key, val, promote)? - { - SetStatus::CreatedNew => Ok(None), - SetStatus::PrevValue(v) => Ok(Some(from_bytes::(&v)?)), - } - } - - /// Same as [CandyStore::set_in_list], but `list_key`, `item_key` and `val` are typed - pub fn set( - &self, - list_key: &Q1, - item_key: &Q2, - val: &Q3, - ) -> Result> - where - L: Borrow, - K: Borrow, - V: Borrow, - { - self._set(list_key, item_key, val, false) - } - - /// Same as [CandyStore::set_in_list_promoting], but `list_key`, `item_key` and `val` are typed - pub fn set_promoting( - &self, - list_key: &Q1, - item_key: &Q2, - val: &Q3, - ) -> Result> - where - L: Borrow, - K: Borrow, - V: Borrow, - { - self._set(list_key, item_key, val, true) - } - - /// Same as [CandyStore::get_or_create_in_list], but `list_key`, `item_key` and `default_val` are typed - pub fn get_or_create( - &self, - list_key: &Q1, - item_key: &Q2, - default_val: &Q3, - ) -> Result - where - L: Borrow, - K: Borrow, - { - let list_key = Self::make_list_key(list_key); - let item_key = item_key.to_bytes::(); - let default_val = default_val.to_bytes::(); - let vbytes = self - .store - .owned_get_or_create_in_list(list_key, item_key, default_val)? - .value(); - from_bytes::(&vbytes) - } - - /// Same as [CandyStore::replace_in_list], but `list_key`, `item_key` and `val` are typed - pub fn replace( - &self, - list_key: &Q1, - item_key: &Q2, - val: &Q3, - expected_val: Option<&Q3>, - ) -> Result> - where - L: Borrow, - K: Borrow, - V: Borrow, - { - let list_key = Self::make_list_key(list_key); - let item_key = item_key.to_bytes::(); - let val = val.to_bytes::(); - let ebytes = expected_val - .map(|ev| ev.to_bytes::()) - .unwrap_or_default(); - match self.store.owned_replace_in_list( - list_key, - item_key, - val, - expected_val.map(|_| &*ebytes), - )? { - ReplaceStatus::DoesNotExist => Ok(None), - ReplaceStatus::PrevValue(v) => Ok(Some(from_bytes::(&v)?)), - ReplaceStatus::WrongValue(_) => Ok(None), - } - } - - /// Same as [CandyStore::remove_from_list], but `list_key` and `item_key` are typed - pub fn remove( - &self, - list_key: &Q1, - item_key: &Q2, - ) -> Result> - where - L: Borrow, - K: Borrow, - { - let list_key = Self::make_list_key(list_key); - let item_key = item_key.to_bytes::(); - if let Some(vbytes) = self.store.owned_remove_from_list(list_key, item_key)? { - Ok(Some(from_bytes::(&vbytes)?)) - } else { - Ok(None) - } - } - - /// Same as [CandyStore::iter_list], but `list_key` is typed - pub fn iter<'a, Q: ?Sized + Encode>( - &'a self, - list_key: &Q, - ) -> impl Iterator> + 'a - where - L: Borrow, - { - let list_key = Self::make_list_key(list_key); - self.store.owned_iter_list(list_key).map(|res| match res { - Err(e) => Err(e), - Ok((k, v)) => { - let key = from_bytes::(&k)?; - let val = from_bytes::(&v)?; - Ok((key, val)) - } - }) - } - - /// Same as [CandyStore::iter_list_backwards], but `list_key` is typed - pub fn iter_backwards<'a, Q: ?Sized + Encode>( - &'a self, - list_key: &Q, - ) -> impl Iterator> + 'a - where - L: Borrow, - { - let list_key = Self::make_list_key(list_key); - self.store - .owned_iter_list_backwards(list_key) - .map(|res| match res { - Err(e) => Err(e), - Ok((k, v)) => { - let key = from_bytes::(&k)?; - let val = from_bytes::(&v)?; - Ok((key, val)) - } - }) - } - - /// Same as [CandyStore::discard_list], but `list_key` is typed - pub fn discard(&self, list_key: &Q) -> Result - where - L: Borrow, - { - let list_key = Self::make_list_key(list_key); - self.store.owned_discard_list(list_key) - } - - /// Same as [CandyStore::compact_list_if_needed], but `list_key` is typed - pub fn compact_if_needed( - &self, - list_key: &Q, - params: ListCompactionParams, - ) -> Result - where - L: Borrow, - { - let list_key = Self::make_list_key(list_key); - self.store.compact_list_if_needed(&list_key, params) - } - - /// Same as [CandyStore::pop_list_tail], but `list_key` is typed - pub fn pop_tail(&self, list_key: &Q) -> Result> - where - L: Borrow, - { - let list_key = Self::make_list_key(list_key); - let Some((k, v)) = self.store.owned_pop_list_tail(list_key)? else { - return Ok(None); - }; - Ok(Some((from_bytes::(&k)?, from_bytes::(&v)?))) - } - - /// Same as [CandyStore::pop_list_head], but `list_key` is typed - pub fn pop_head(&self, list_key: &Q) -> Result> - where - L: Borrow, - { - let list_key = Self::make_list_key(list_key); - let Some((k, v)) = self.store.owned_pop_list_head(list_key)? else { - return Ok(None); - }; - Ok(Some((from_bytes::(&k)?, from_bytes::(&v)?))) - } - - /// Same as [CandyStore::peek_list_tail], but `list_key` is typed - pub fn peek_tail(&self, list_key: &Q) -> Result> - where - L: Borrow, - { - let list_key = Self::make_list_key(list_key); - let Some((k, v)) = self.store.owned_peek_list_tail(list_key)? else { - return Ok(None); - }; - Ok(Some((from_bytes::(&k)?, from_bytes::(&v)?))) - } - - /// Same as [CandyStore::peek_list_head], but `list_key` is typed - pub fn peek_head(&self, list_key: &Q) -> Result> - where - L: Borrow, - { - let list_key = Self::make_list_key(list_key); - let Some((k, v)) = self.store.owned_peek_list_head(list_key)? else { - return Ok(None); - }; - Ok(Some((from_bytes::(&k)?, from_bytes::(&v)?))) - } - - /// Same as [CandyStore::list_len], but `list_key` is typed - pub fn len(&self, list_key: &Q) -> Result - where - L: Borrow, - { - self.store.owned_list_len(Self::make_list_key(list_key)) - } - - /// Same as [CandyStore::retain_in_list], but `list_key` is typed - pub fn retain( - &self, - list_key: &Q, - mut func: impl FnMut(&K, &V) -> Result, - ) -> Result<()> - where - L: Borrow, - { - let list_key = Self::make_list_key(list_key); - self.store.owned_retain_in_list(list_key, |k, v| { - let tk = from_bytes::(&k)?; - let tv = from_bytes::(&v)?; - func(&tk, &tv) - }) - } -} - -/// A wrapper around [CandyStore] that exposes the queue API in a typed manner. See [CandyTypedStore] for more -/// info -pub struct CandyTypedDeque { - store: Arc, - _phantom: PhantomData<(L, V)>, -} - -impl Clone for CandyTypedDeque { - fn clone(&self) -> Self { - Self { - store: self.store.clone(), - _phantom: Default::default(), - } - } -} - -impl CandyTypedDeque -where - L: CandyTypedKey, - V: Encode + DecodeOwned, -{ - pub fn new(store: Arc) -> Self { - Self { - store, - _phantom: Default::default(), - } - } - - /// Pushes a value at the beginning (head) of the queue - pub fn push_head( - &self, - queue_key: &Q1, - val: &Q2, - ) -> Result<()> - where - L: Borrow, - V: Borrow, - { - let queue_key = CandyTypedList::::make_list_key(queue_key); - let val = val.to_bytes::(); - self.store.push_to_queue_head(&queue_key, &val)?; - Ok(()) - } - - /// Pushes a value at the end (tail) of the queue - pub fn push_tail( - &self, - queue_key: &Q1, - val: &Q2, - ) -> Result<()> - where - L: Borrow, - V: Borrow, - { - let queue_key = CandyTypedList::::make_list_key(queue_key); - let val = val.to_bytes::(); - self.store.push_to_queue_tail(&queue_key, &val)?; - Ok(()) - } - - /// Pops a value from the beginning (head) of the queue - pub fn pop_head_with_idx(&self, queue_key: &Q) -> Result> - where - L: Borrow, - { - let queue_key = CandyTypedList::::make_list_key(queue_key); - let Some((idx, v)) = self.store.pop_queue_head_with_idx(&queue_key)? else { - return Ok(None); - }; - Ok(Some((idx, from_bytes::(&v)?))) - } - - /// Pops a value from the beginning (head) of the queue - pub fn pop_head(&self, queue_key: &Q) -> Result> - where - L: Borrow, - { - Ok(self.pop_head_with_idx(queue_key)?.map(|iv| iv.1)) - } - - /// Pops a value from the end (tail) of the queue - pub fn pop_tail_with_idx(&self, queue_key: &Q) -> Result> - where - L: Borrow, - { - let queue_key = CandyTypedList::::make_list_key(queue_key); - let Some((idx, v)) = self.store.pop_queue_tail_with_idx(&queue_key)? else { - return Ok(None); - }; - Ok(Some((idx, from_bytes::(&v)?))) - } - - /// Pops a value from the end (tail) of the queue - pub fn pop_tail(&self, queue_key: &Q) -> Result> - where - L: Borrow, - { - Ok(self.pop_tail_with_idx(queue_key)?.map(|iv| iv.1)) - } - - /// Peek at the value from the beginning (head) of the queue and its index - pub fn peek_head_with_idx( - &self, - queue_key: &Q, - ) -> Result> - where - L: Borrow, - { - let queue_key = CandyTypedList::::make_list_key(queue_key); - let Some((idx, v)) = self.store.peek_queue_head_with_idx(&queue_key)? else { - return Ok(None); - }; - Ok(Some((idx, from_bytes::(&v)?))) - } - - /// Peek at the value from the beginning (head) of the queue - pub fn peek_head(&self, queue_key: &Q) -> Result> - where - L: Borrow, - { - Ok(self.peek_head_with_idx(queue_key)?.map(|iv| iv.1)) - } - - /// Peek at the value from the end (tail) of the queue - pub fn peek_tail_with_idx( - &self, - queue_key: &Q, - ) -> Result> - where - L: Borrow, - { - let queue_key = CandyTypedList::::make_list_key(queue_key); - let Some((idx, v)) = self.store.peek_queue_tail_with_idx(&queue_key)? else { - return Ok(None); - }; - Ok(Some((idx, from_bytes::(&v)?))) - } - - /// Peek at the value from the end (tail) of the queue - pub fn peek_tail(&self, queue_key: &Q) -> Result> - where - L: Borrow, - { - Ok(self.peek_tail_with_idx(queue_key)?.map(|iv| iv.1)) - } - - /// See [CandyTypedList::iter] - pub fn iter<'a, Q: ?Sized + Encode>( - &'a self, - queue_key: &Q, - ) -> impl Iterator> + 'a - where - L: Borrow, - { - let queue_key = CandyTypedList::::make_list_key(queue_key); - self.store.iter_queue(&queue_key).map(|res| match res { - Err(e) => Err(e), - Ok((idx, v)) => Ok((idx, from_bytes::(&v).unwrap())), - }) - } - - /// See [CandyTypedList::iter_backwards] - pub fn iter_backwards<'a, Q: ?Sized + Encode>( - &'a self, - queue_key: &Q, - ) -> impl Iterator> + 'a - where - L: Borrow, - { - let queue_key = CandyTypedList::::make_list_key(queue_key); - self.store - .iter_queue_backwards(&queue_key) - .map(|res| match res { - Err(e) => Err(e), - Ok((idx, v)) => Ok((idx, from_bytes::(&v).unwrap())), - }) - } - - pub fn len(&self, queue_key: &Q) -> Result - where - L: Borrow, - { - let queue_key = CandyTypedList::::make_list_key(queue_key); - self.store.queue_len(&queue_key) - } - - pub fn range(&self, queue_key: &Q) -> Result> - where - L: Borrow, - { - let queue_key = CandyTypedList::::make_list_key(queue_key); - self.store.queue_range(&queue_key) - } -} diff --git a/src/types.rs b/src/types.rs new file mode 100644 index 0000000..9af9143 --- /dev/null +++ b/src/types.rs @@ -0,0 +1,316 @@ +/// Maximum supported data-file size after internal encoding overhead limits. +pub const MAX_FILE_SIZE: usize = (1 << 30) - (1 << 24); +/// Maximum supported user key length in bytes. +pub const MAX_USER_KEY_SIZE: usize = crate::internal::MAX_INTERNAL_KEY_SIZE - 16; +/// Maximum supported inline value length in bytes. +pub const MAX_USER_VALUE_SIZE: usize = crate::internal::MAX_INTERNAL_VALUE_SIZE - 64; + +pub(crate) const ROW_WIDTH: usize = crate::internal::ROW_WIDTH; +pub(crate) const INITIAL_DATA_FILE_ORDINAL: u64 = 0x00bd_38a0_2a35_1cdf; + +use crate::internal::MIN_INITIAL_ROWS; + +#[derive(Debug, Clone, Copy)] +/// How opening a store should handle a dirty index. +pub enum RebuildStrategy { + /// Reject opening a store whose index is marked dirty. + FailIfDirty, + /// Rebuild the index from data files when the store is dirty. + RebuildIfDirty, + /// Reset the database when the store is dirty. + /// + /// This removes all directory contents before recreating the store state. + /// While the store is open, the active `.lockfile` is preserved so the + /// directory remains locked against concurrent opens. + ResetDBIfDirty, + /// Trust a dirty index only if row checksums still match; otherwise fail. + TrustDirtyIndexIfChecksumCorrectOrFail, + /// Trust a dirty index if row checksums match; otherwise rebuild. + TrustDirtyIndexIfChecksumCorrectOrRebuild, +} + +#[derive(Debug, Clone, Copy)] +/// Runtime configuration for opening a store. +pub struct Config { + /// SipHash keys used for row selection and signatures. + /// + /// When a store is created or fully reset, this key is written into the + /// index header. Reopening an existing store reuses the persisted hash key + /// from disk even if a different value is provided here. + pub hash_key: (u64, u64), + /// Whether to try to lock index mmaps into memory. + pub mlock_index: bool, + /// Growth factor used when remapping index structures. + pub remap_scaler: u8, + /// Initial target capacity in number of key/value entries. + pub initial_capacity: usize, + /// Maximum size of a single data file in bytes. + pub max_data_file_size: u32, + /// Minimum per-file waste threshold before background compaction considers it. + pub compaction_min_threshold: u32, + /// Maximum logical concurrency used to size internal lock tables. + pub max_concurrency: usize, + /// Reset the database if opening encounters invalid on-disk data. + /// + /// This removes all directory contents before recreating the store state. + /// While the store is open, the active `.lockfile` is preserved so the + /// directory remains locked against concurrent opens. + pub reset_on_invalid_data: bool, + /// Target background compaction throughput in bytes per second. + pub compaction_throughput_bytes_per_sec: usize, + /// Dirty-index handling policy used during open. + pub rebuild_strategy: RebuildStrategy, +} + +impl Default for Config { + fn default() -> Self { + Self { + hash_key: (0x7c2b_23a8_12c2_005f, 0x1f6a_4035_386e_c891), + mlock_index: false, + remap_scaler: 1, + initial_capacity: MIN_INITIAL_ROWS * ROW_WIDTH, + max_data_file_size: 64 * 1024 * 1024, + compaction_min_threshold: 24 * 1024 * 1024, + max_concurrency: (2 * num_cpus::get()).clamp(16, 64), + reset_on_invalid_data: false, + compaction_throughput_bytes_per_sec: 4 * 1024 * 1024, + rebuild_strategy: RebuildStrategy::FailIfDirty, + } + } +} + +#[derive(thiserror::Error, Debug)] +/// Errors returned by store operations and open/recovery flows. +pub enum Error { + #[error("IO error: {0}")] + IOError(std::io::Error), + + #[error("Index file is dirty")] + DirtyIndex, + + #[error("Missing data file: {0}")] + MissingDataFile(u16), + + #[error("Data file {0} reached size limit")] + RotateDataFile(u16), + + #[error("Row needs splitting at split level {0}")] + SplitRow(u64), + + #[error("Too many data files")] + TooManyDataFiles, + + #[error("Lockfile {0} is taken by {1}")] + LockfileTaken(std::path::PathBuf, String), + + #[error("Payload {0} too large")] + PayloadTooLarge(usize), +} + +/// Convenience result type used by the crate. +pub type Result = std::result::Result; + +#[derive(Debug, Clone, PartialEq, Eq)] +/// Outcome of a conditional replace operation. +pub enum ReplaceStatus { + /// The key existed and the previous value was replaced. + PrevValue(Vec), + /// The key existed, but its current value did not match the expected value. + WrongValue(Vec), + /// The key did not exist. + DoesNotExist, +} + +impl ReplaceStatus { + /// Returns `true` when the value was replaced. + pub fn was_replaced(&self) -> bool { + matches!(self, Self::PrevValue(_)) + } + + /// Returns `true` when the replace operation did not update the value. + pub fn failed(&self) -> bool { + !self.was_replaced() + } + + /// Returns `true` when the target key was missing. + pub fn is_key_missing(&self) -> bool { + matches!(self, Self::DoesNotExist) + } + + /// Returns `true` when the expected value check failed. + pub fn is_wrong_value(&self) -> bool { + matches!(self, Self::WrongValue(_)) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +/// Outcome of a set operation. +pub enum SetStatus { + /// The key existed and the previous value was returned. + PrevValue(Vec), + /// The key was newly inserted. + CreatedNew, +} + +impl SetStatus { + /// Returns `true` when the key did not previously exist. + pub fn was_created(&self) -> bool { + matches!(self, Self::CreatedNew) + } + + /// Returns `true` when the key previously existed and was overwritten. + pub fn was_replaced(&self) -> bool { + matches!(self, Self::PrevValue(_)) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +/// Outcome of a get-or-create operation. +pub enum GetOrCreateStatus { + /// The key already existed and its current value was returned. + ExistingValue(Vec), + /// The key was created with the provided default value. + CreatedNew(Vec), +} + +impl GetOrCreateStatus { + /// Returns `true` when the key was inserted by the operation. + pub fn was_created(&self) -> bool { + matches!(self, Self::CreatedNew(_)) + } + + /// Returns `true` when the key already existed. + pub fn already_exists(&self) -> bool { + matches!(self, Self::ExistingValue(_)) + } + + /// Returns the resulting value regardless of whether it was created or already existed. + pub fn value(self) -> Vec { + match self { + Self::ExistingValue(value) | Self::CreatedNew(value) => value, + } + } +} + +#[derive(Debug, Clone, Copy)] +/// Heuristics controlling list compaction. +pub struct ListCompactionParams { + /// Minimum list span length before compaction is considered. + pub min_length: u64, + /// Minimum hole ratio required to trigger compaction. + pub min_holes_ratio: f64, +} + +impl Default for ListCompactionParams { + fn default() -> Self { + Self { + min_length: 100, + min_holes_ratio: 0.25, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// Snapshot of store-level counters and size statistics. +pub struct Stats { + /// Number of allocated index rows. + pub num_rows: u64, + /// Theoretical maximum number of entries at the current row count. + pub capacity: u64, + /// Number of currently live entries. + pub num_items: u64, + /// Total bytes occupied by index metadata files. + pub index_size_bytes: u64, + /// Number of completed background compactions. + pub num_compactions: u64, + /// Total time spent in compaction, in milliseconds. + pub compaction_time_ms: u64, + /// Number of data files currently present. + pub num_data_files: u64, + /// Number of successful key lookups. + pub num_positive_lookups: u64, + /// Number of failed key lookups. + pub num_negative_lookups: u64, + /// Number of read operations performed against data files. + pub num_read_ops: u64, + /// Total bytes read from data files. + pub num_read_bytes: u64, + /// Number of write operations performed against data files. + pub num_write_ops: u64, + /// Total bytes written to data files. + pub num_write_bytes: u64, + /// Number of entry creations recorded. + pub num_created: u64, + /// Number of entry removals recorded. + pub num_removed: u64, + /// Number of entry replacements recorded. + pub num_replaced: u64, + /// Total logical bytes written as live entries. + pub written_bytes: u64, + /// Total bytes currently accounted as waste before reclamation. + pub waste_bytes: u64, + /// Total bytes reclaimed by compaction. + pub reclaimed_bytes: u64, + /// Histogram bucket for entries under 64 bytes. + pub entries_under_64: u64, + /// Histogram bucket for entries under 256 bytes. + pub entries_under_256: u64, + /// Histogram bucket for entries under 1024 bytes. + pub entries_under_1024: u64, + /// Histogram bucket for entries under 4096 bytes. + pub entries_under_4096: u64, + /// Histogram bucket for entries under 16384 bytes. + pub entries_under_16384: u64, + /// Histogram bucket for entries of 16384 bytes or larger. + pub entries_over_16384: u64, +} + +impl Stats { + /// Returns the fraction of the current index capacity occupied by live entries. + pub fn fill_level(&self) -> f64 { + if self.capacity == 0 { + return 0.0; + } + self.num_items as f64 / self.capacity as f64 + } + + /// Returns the number of live entries. + pub fn num_entries(&self) -> u64 { + self.num_items + } + + /// Returns the current unreclaimed waste in bytes. + pub fn current_waste(&self) -> u64 { + self.waste_bytes.saturating_sub(self.reclaimed_bytes) + } + + /// Returns live data bytes after subtracting current waste. + pub fn data_bytes(&self) -> u64 { + self.written_bytes.saturating_sub(self.current_waste()) + } + + /// Returns bytes currently occupied by live data. + pub fn occupied_bytes(&self) -> u64 { + self.data_bytes() + } + + /// Returns current unreclaimed waste in bytes. + pub fn wasted_bytes(&self) -> u64 { + self.current_waste() + } + + /// Returns the number of inserted entries. + pub fn num_inserts(&self) -> u64 { + self.num_created + } + + /// Returns the number of updated entries. + pub fn num_updates(&self) -> u64 { + self.num_replaced + } + + /// Returns the number of removed entries. + pub fn num_removals(&self) -> u64 { + self.num_removed + } +} diff --git a/tests/basic_ops.rs b/tests/basic_ops.rs new file mode 100644 index 0000000..e4c9cba --- /dev/null +++ b/tests/basic_ops.rs @@ -0,0 +1,212 @@ +mod common; + +use candystore::{ + CandyStore, Config, Error, GetOrCreateStatus, MAX_KEY_LEN, MAX_VALUE_LEN, ReplaceStatus, + SetStatus, +}; +use std::sync::{Arc, Barrier}; +use std::thread; +use tempfile::tempdir; + +#[test] +fn test_basic() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + assert!(db.get("hello")?.is_none()); + + assert!(matches!(db.set("hello", "world")?, SetStatus::CreatedNew)); + assert_eq!(db.get("hello")?, Some("world".into())); + + assert!( + matches!(db.set("hello", "earth")?, SetStatus::PrevValue(ref value) if value == b"world") + ); + assert_eq!(db.get("hello")?, Some("earth".into())); + + assert_eq!(db.remove("hello")?, Some("earth".into())); + assert!(db.get("hello")?.is_none()); + assert!(db.remove("hello")?.is_none()); + + Ok(()) +} + +#[test] +fn test_reopen_existing_db() -> Result<(), Error> { + let dir = tempdir().unwrap(); + + { + let db = CandyStore::open(dir.path(), Config::default())?; + assert!(matches!(db.set("hello", "world")?, SetStatus::CreatedNew)); + assert!(matches!(db.set("goodbye", "earth")?, SetStatus::CreatedNew)); + } + + let db = CandyStore::open(dir.path(), Config::default())?; + assert_eq!(db.get("hello")?, Some("world".into())); + assert_eq!(db.get("goodbye")?, Some("earth".into())); + + Ok(()) +} + +#[test] +fn test_reopen_with_different_hash_key_uses_persisted_key() -> Result<(), Error> { + let dir = tempdir().unwrap(); + + let original_config = Config { + hash_key: (1, 2), + ..Config::default() + }; + let different_config = Config { + hash_key: (3, 4), + ..original_config + }; + + { + let db = CandyStore::open(dir.path(), original_config)?; + db.set("hello", "world")?; + } + + let db = CandyStore::open(dir.path(), different_config)?; + assert_eq!(db.get("hello")?, Some("world".into())); + db.set("goodbye", "earth")?; + drop(db); + + let db = CandyStore::open(dir.path(), original_config)?; + assert_eq!(db.get("hello")?, Some("world".into())); + assert_eq!(db.get("goodbye")?, Some("earth".into())); + + Ok(()) +} +#[test] +fn test_oversized_value_rejected() { + let dir = tempfile::tempdir().unwrap(); + let config = candystore::Config { + max_data_file_size: 1024 * 1024, + ..Default::default() + }; + let db = candystore::CandyStore::open(dir.path(), config).unwrap(); + + let large_value = vec![0u8; 2 * 1024 * 1024]; // 2MB + + // Should gracefully reject the oversized key/value pair + let result = db.set("key", &large_value); + assert!(result.is_err()); +} + +#[test] +fn test_max_key_len() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let key = vec![b'k'; MAX_KEY_LEN]; + db.set(&key, b"value")?; + assert_eq!(db.get(&key)?, Some(b"value".to_vec())); + + Ok(()) +} + +#[test] +fn test_key_too_long() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let key = vec![b'k'; MAX_KEY_LEN + 1]; + assert!(db.set(&key, b"value").is_err()); + + Ok(()) +} + +#[test] +fn test_max_value_len() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let value = vec![b'v'; MAX_VALUE_LEN]; + db.set(b"key", &value)?; + assert_eq!(db.get(b"key")?, Some(value)); + + Ok(()) +} + +#[test] +fn test_value_too_long() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let value = vec![b'v'; MAX_VALUE_LEN + 1]; + assert!(db.set(b"key", &value).is_err()); + + Ok(()) +} + +#[test] +fn test_empty_key_and_value() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + db.set(b"", b"value")?; + assert_eq!(db.get(b"")?, Some(b"value".to_vec())); + + db.set(b"empty", b"")?; + assert_eq!(db.get(b"empty")?, Some(Vec::new())); + + Ok(()) +} + +#[test] +fn test_get_or_create_and_replace() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let created = db.get_or_create("hello", "world")?; + assert!(matches!(created, GetOrCreateStatus::CreatedNew(ref value) if value == b"world")); + + let existing = db.get_or_create("hello", "other")?; + assert!(matches!(existing, GetOrCreateStatus::ExistingValue(ref value) if value == b"world")); + + let wrong = db.replace("hello", "earth", Some(&"wrong"))?; + assert!(matches!(wrong, ReplaceStatus::WrongValue(ref value) if value == b"world")); + assert_eq!(db.get("hello")?, Some(b"world".to_vec())); + + let replaced = db.replace("hello", "earth", Some(&"world"))?; + assert!(matches!(replaced, ReplaceStatus::PrevValue(ref value) if value == b"world")); + assert_eq!(db.get("hello")?, Some(b"earth".to_vec())); + + let missing = db.replace("missing", "value", Option::<&str>::None)?; + assert!(matches!(missing, ReplaceStatus::DoesNotExist)); + + Ok(()) +} + +#[test] +fn test_get_or_create_is_atomic_under_contention() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = Arc::new(CandyStore::open(dir.path(), Config::default())?); + let barrier = Arc::new(Barrier::new(8)); + + let mut handles = Vec::new(); + for idx in 0..8 { + let db = Arc::clone(&db); + let barrier = Arc::clone(&barrier); + handles.push(thread::spawn(move || { + let value = format!("value-{idx}"); + barrier.wait(); + db.get_or_create("shared", &value).unwrap() + })); + } + + let mut created_values = Vec::new(); + let mut seen_values = Vec::new(); + for handle in handles { + match handle.join().unwrap() { + GetOrCreateStatus::CreatedNew(value) => created_values.push(value), + GetOrCreateStatus::ExistingValue(value) => seen_values.push(value), + } + } + + assert_eq!(created_values.len(), 1); + let winning_value = created_values.pop().unwrap(); + assert_eq!(db.get("shared")?, Some(winning_value.clone())); + assert!(seen_values.into_iter().all(|value| value == winning_value)); + + Ok(()) +} diff --git a/tests/big_items.rs b/tests/big_items.rs new file mode 100644 index 0000000..f6e3980 --- /dev/null +++ b/tests/big_items.rs @@ -0,0 +1,71 @@ +use candystore::{CandyStore, Config, Error, MAX_USER_VALUE_SIZE}; +use tempfile::tempdir; + +fn patterned_bytes(len: usize) -> Vec { + (0..len).map(|idx| (idx % 251) as u8).collect() +} + +#[test] +fn test_set_get_remove_big() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let value = patterned_bytes(MAX_USER_VALUE_SIZE + 4096); + + assert!(!db.set_big("blob", &value)?); + assert_eq!(db.get_big("blob")?, Some(value.clone())); + assert!(db.remove_big("blob")?); + assert_eq!(db.get_big("blob")?, None); + assert!(!db.remove_big("blob")?); + + Ok(()) +} + +#[test] +fn test_set_big_reports_replacement() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let first = patterned_bytes(MAX_USER_VALUE_SIZE + 17); + let second = patterned_bytes(MAX_USER_VALUE_SIZE * 2 + 33); + + assert!(!db.set_big("blob", &first)?); + assert!(db.set_big("blob", &second)?); + assert_eq!(db.get_big("blob")?, Some(second)); + + Ok(()) +} + +#[test] +fn test_big_persists_across_reopen() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let value = patterned_bytes(MAX_USER_VALUE_SIZE * 2 + 123); + + { + let db = CandyStore::open(dir.path(), Config::default())?; + db.set_big("blob", &value)?; + } + + let reopened = CandyStore::open(dir.path(), Config::default())?; + assert_eq!(reopened.get_big("blob")?, Some(value)); + + Ok(()) +} + +#[test] +fn test_big_can_exceed_single_value_limit() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open( + dir.path(), + Config { + max_data_file_size: 16 * 1024, + ..Config::default() + }, + )?; + + let value = patterned_bytes(MAX_USER_VALUE_SIZE * 3 + 777); + db.set_big("rotating_blob", &value)?; + assert_eq!(db.get_big("rotating_blob")?, Some(value)); + + Ok(()) +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 13a58a6..bb4b4db 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,16 +1,37 @@ -use candystore::Result; -use rand::random; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::path::Path; -pub fn run_in_tempdir(f: impl FnOnce(&str) -> Result<()>) -> Result<()> { - let rand: u64 = random(); - let dir = format!("/tmp/candy-{rand}"); - _ = std::fs::remove_dir_all(&dir); +use candystore::{Config, RebuildStrategy}; - f(&dir)?; +#[allow(dead_code)] +pub fn small_file_config() -> Config { + Config { + max_data_file_size: 16 * 1024, + ..Config::default() + } +} - _ = std::fs::remove_dir_all(&dir); - Ok(()) +#[allow(dead_code)] +pub fn rebuild_if_dirty_config() -> Config { + Config { + rebuild_strategy: RebuildStrategy::RebuildIfDirty, + ..Config::default() + } } #[allow(dead_code)] -pub const LONG_VAL: &str = "a very long valueeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; +pub fn corrupt_first_row_checksum(path: &Path) { + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(path.join("rows")) + .unwrap(); + let checksum_offset = 8; + file.seek(SeekFrom::Start(checksum_offset)).unwrap(); + let mut buf = [0u8; 8]; + file.read_exact(&mut buf).unwrap(); + let checksum = u64::from_le_bytes(buf) ^ 1; + file.seek(SeekFrom::Start(checksum_offset)).unwrap(); + file.write_all(&checksum.to_le_bytes()).unwrap(); + file.sync_all().unwrap(); +} diff --git a/tests/compaction.rs b/tests/compaction.rs new file mode 100644 index 0000000..d8a3c7d --- /dev/null +++ b/tests/compaction.rs @@ -0,0 +1,417 @@ +mod common; + +use std::collections::BTreeSet; +use std::sync::{Arc, Barrier}; +use std::thread; + +use candystore::{CandyStore, Config, Error}; +use tempfile::tempdir; + +#[test] +fn test_background_compaction() -> Result<(), Error> { + let dir = tempdir().unwrap(); + + let config = Config { + max_data_file_size: 1024, + compaction_min_threshold: 256, + ..Config::default() + }; + + let db = CandyStore::open(dir.path(), config)?; + + let data_files = || -> BTreeSet { + std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(|entry| { + entry + .ok() + .and_then(|entry| entry.file_name().into_string().ok()) + .filter(|name| name.starts_with("data_")) + }) + .collect() + }; + + for i in 0..100 { + db.set(format!("key{i:04}"), format!("value{i:04}"))?; + } + + let initial_files = data_files(); + assert!(initial_files.len() > 1, "should have multiple data files"); + + for i in 0..100 { + db.set(format!("key{i:04}"), format!("updated{i:04}"))?; + } + + let mut files_after = data_files(); + for _ in 0..100 { + std::thread::sleep(std::time::Duration::from_millis(10)); + files_after = data_files(); + if initial_files.iter().any(|file| !files_after.contains(file)) { + break; + } + } + + assert!( + initial_files.iter().any(|file| !files_after.contains(file)), + "compaction should have removed at least one initial data file: initial={initial_files:?}, current={files_after:?}" + ); + + for i in 0..100 { + let key = format!("key{i:04}"); + let expected = format!("updated{i:04}"); + assert_eq!( + db.get(&key)?, + Some(expected.into_bytes()), + "key {key} should have updated value after compaction" + ); + } + + Ok(()) +} + +#[test] +fn test_background_compaction_after_reopen_without_writes() -> Result<(), Error> { + let dir = tempdir().unwrap(); + + let config = Config { + max_data_file_size: 1024, + compaction_min_threshold: 256, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + + for i in 0..200 { + db.set(format!("key{i:04}"), vec![b'a'; 64])?; + } + + for i in 0..200 { + assert_eq!(db.remove(format!("key{i:04}"))?, Some(vec![b'a'; 64])); + } + + assert!( + std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(|e| e.ok()) + .filter(|e| e + .file_name() + .to_str() + .is_some_and(|s| s.starts_with("data_"))) + .count() + > 1, + "expected multiple data files before reopen" + ); + } + + let db = CandyStore::open(dir.path(), config)?; + + let count_data_files = || -> usize { + std::fs::read_dir(dir.path()) + .unwrap() + .filter(|e| { + e.as_ref() + .ok() + .and_then(|e| e.file_name().to_str().map(|s| s.starts_with("data_"))) + .unwrap_or(false) + }) + .count() + }; + + let files_before = count_data_files(); + assert!(files_before > 1, "expected compaction backlog after reopen"); + + for _ in 0..100 { + std::thread::sleep(std::time::Duration::from_millis(10)); + if count_data_files() < files_before { + break; + } + } + + let files_after = count_data_files(); + assert!( + files_after < files_before, + "reopened store should compact without new writes: before={files_before}, after={files_after}" + ); + + for i in 0..200 { + assert_eq!( + db.get(format!("key{i:04}"))?, + None, + "key{i:04} should remain deleted" + ); + } + + Ok(()) +} + +#[test] +fn test_background_compaction_drains_large_backlog() -> Result<(), Error> { + let dir = tempdir().unwrap(); + + let config = Config { + max_data_file_size: 256, + compaction_min_threshold: 128, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + + for i in 0..180 { + db.set(format!("key{i:04}"), vec![b'a'; 96])?; + } + + for i in 0..180 { + assert_eq!(db.remove(format!("key{i:04}"))?, Some(vec![b'a'; 96])); + } + } + + let db = CandyStore::open(dir.path(), config)?; + + let count_data_files = || -> usize { + std::fs::read_dir(dir.path()) + .unwrap() + .filter(|e| { + e.as_ref() + .ok() + .and_then(|e| e.file_name().to_str().map(|s| s.starts_with("data_"))) + .unwrap_or(false) + }) + .count() + }; + + let files_before = count_data_files(); + assert!( + files_before > 17, + "expected a large stale-file backlog before compaction starts: {files_before}" + ); + + for _ in 0..300 { + std::thread::sleep(std::time::Duration::from_millis(10)); + if count_data_files() <= files_before.saturating_sub(17) { + break; + } + } + + let files_after = count_data_files(); + assert!( + files_after <= files_before.saturating_sub(17), + "compaction worker should drain a large backlog after being woken: before={files_before}, after={files_after}" + ); + + for i in 0..180 { + assert_eq!( + db.get(format!("key{i:04}"))?, + None, + "key{i:04} should remain deleted" + ); + } + + Ok(()) +} + +#[test] +fn test_compaction_updates_reclaimed_bytes() -> Result<(), Error> { + let dir = tempdir().unwrap(); + + let config = Config { + max_data_file_size: 1024, + compaction_min_threshold: 256, + ..Config::default() + }; + + let db = CandyStore::open(dir.path(), config)?; + + for i in 0..100 { + db.set(format!("key{i:04}"), format!("value{i:04}"))?; + } + + // Update all keys to generate waste + for i in 0..100 { + db.set(format!("key{i:04}"), format!("updated{i:04}"))?; + } + + // Wait for compaction to run + for _ in 0..200 { + std::thread::sleep(std::time::Duration::from_millis(10)); + if db.stats().num_compactions > 0 { + break; + } + } + + let stats = db.stats(); + assert!( + stats.num_compactions > 0, + "compaction should have run at least once" + ); + assert!( + stats.reclaimed_bytes > 0, + "reclaimed_bytes should be positive after compaction" + ); + assert!( + stats.waste_bytes > 0, + "waste_bytes must be positive (total waste ever generated)" + ); + + for i in 0..100 { + let key = format!("key{i:04}"); + let expected = format!("updated{i:04}"); + assert_eq!(db.get(&key)?, Some(expected.into_bytes())); + } + + Ok(()) +} + +#[test] +fn test_concurrent_updates_with_compaction() -> Result<(), Error> { + const THREADS: usize = 8; + const KEYS: usize = 200; + const ROUNDS: usize = 20; + + let dir = tempdir().unwrap(); + + let config = Config { + max_data_file_size: 2048, + compaction_min_threshold: 512, + ..Config::default() + }; + + let db = Arc::new(CandyStore::open(dir.path(), config)?); + + // Seed initial keys + for i in 0..KEYS { + db.set(format!("key{i:04}"), format!("v0_{i:04}"))?; + } + + let barrier = Arc::new(Barrier::new(THREADS)); + let handles: Vec<_> = (0..THREADS) + .map(|t| { + let db = db.clone(); + let barrier = barrier.clone(); + thread::spawn(move || { + barrier.wait(); + for round in 0..ROUNDS { + for i in 0..KEYS { + let key = format!("key{i:04}"); + let val = format!("v{round}_{t}_{i:04}"); + db.set(&key, &val).unwrap(); + } + } + }) + }) + .collect(); + + for h in handles { + h.join().unwrap(); + } + + // Give compaction time to finish remaining work + for _ in 0..100 { + std::thread::sleep(std::time::Duration::from_millis(10)); + } + + // All keys should still be readable + for i in 0..KEYS { + let key = format!("key{i:04}"); + assert!(db.get(&key)?.is_some(), "key {key} should exist"); + } + + let stats = db.stats(); + assert!( + stats.num_compactions > 0, + "compaction should have run during concurrent updates" + ); + assert!( + stats.reclaimed_bytes > 0, + "reclaimed_bytes should be positive after concurrent updates + compaction" + ); + + Ok(()) +} + +#[test] +fn test_concurrent_removes_trigger_compaction() -> Result<(), Error> { + let dir = tempdir().unwrap(); + + let config = Config { + max_data_file_size: 1024, + compaction_min_threshold: 256, + ..Config::default() + }; + + let db = Arc::new(CandyStore::open(dir.path(), config)?); + + // Create keys spread across many files + for i in 0..300 { + db.set(format!("key{i:04}"), vec![b'x'; 64])?; + } + + let files_before = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(|e| e.ok()) + .filter(|e| { + e.file_name() + .to_str() + .is_some_and(|s| s.starts_with("data_")) + }) + .count(); + assert!(files_before > 2); + + // Remove all keys concurrently — tombstone waste should trigger compaction + const THREADS: usize = 8; + let barrier = Arc::new(Barrier::new(THREADS)); + let handles: Vec<_> = (0..THREADS) + .map(|t| { + let db = db.clone(); + let barrier = barrier.clone(); + thread::spawn(move || { + barrier.wait(); + for i in (t..300).step_by(THREADS) { + let _ = db.remove(format!("key{i:04}")).unwrap(); + } + }) + }) + .collect(); + + for h in handles { + h.join().unwrap(); + } + + // Wait for compaction + for _ in 0..200 { + std::thread::sleep(std::time::Duration::from_millis(10)); + let files_now = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(|e| e.ok()) + .filter(|e| { + e.file_name() + .to_str() + .is_some_and(|s| s.starts_with("data_")) + }) + .count(); + if files_now < files_before { + break; + } + } + + let files_after = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(|e| e.ok()) + .filter(|e| { + e.file_name() + .to_str() + .is_some_and(|s| s.starts_with("data_")) + }) + .count(); + assert!( + files_after < files_before, + "compaction should remove files after concurrent removes: before={files_before}, after={files_after}" + ); + + for i in 0..300 { + assert_eq!(db.get(format!("key{i:04}"))?, None); + } + + Ok(()) +} diff --git a/tests/concurrency.rs b/tests/concurrency.rs new file mode 100644 index 0000000..bca6a6e --- /dev/null +++ b/tests/concurrency.rs @@ -0,0 +1,332 @@ +mod common; + +use std::sync::{Arc, Barrier}; +use std::thread; + +use candystore::{CandyStore, Config, Error, SetStatus}; +use tempfile::tempdir; + +#[test] +fn test_multi_threaded_disjoint_writes() -> Result<(), Error> { + const THREADS: usize = 30; + const KEYS_PER_THREAD: usize = 10_000; + + let dir = tempdir().unwrap(); + let db = Arc::new(CandyStore::open(dir.path(), Config::default())?); + let barrier = Arc::new(Barrier::new(THREADS)); + + let handles: Vec<_> = (0..THREADS) + .map(|thread_idx| { + let db = db.clone(); + let barrier = barrier.clone(); + thread::spawn(move || { + barrier.wait(); + for key_idx in 0..KEYS_PER_THREAD { + let key = format!("mt_key_{thread_idx:02}_{key_idx:04}"); + let value = format!("mt_val_{thread_idx:02}_{key_idx:04}"); + assert!(matches!( + db.set(&key, &value).unwrap(), + SetStatus::CreatedNew + )); + assert_eq!(db.get(&key).unwrap(), Some(value.into())); + } + }) + }) + .collect(); + + for handle in handles { + handle.join().unwrap(); + } + + for thread_idx in 0..THREADS { + for key_idx in 0..KEYS_PER_THREAD { + let key = format!("mt_key_{thread_idx:02}_{key_idx:04}"); + let value = format!("mt_val_{thread_idx:02}_{key_idx:04}"); + assert_eq!(db.get(&key)?, Some(value.into())); + } + } + + Ok(()) +} + +#[test] +fn test_multi_threaded_reads() -> Result<(), Error> { + const THREADS: usize = 30; + const KEYS: usize = 10_000; + + let dir = tempdir().unwrap(); + let db = Arc::new(CandyStore::open(dir.path(), Config::default())?); + + for key_idx in 0..KEYS { + let key = format!("read_key_{key_idx:04}"); + let value = format!("read_val_{key_idx:04}"); + assert!(matches!(db.set(&key, &value)?, SetStatus::CreatedNew)); + } + + let barrier = Arc::new(Barrier::new(THREADS)); + let handles: Vec<_> = (0..THREADS) + .map(|_| { + let db = db.clone(); + let barrier = barrier.clone(); + thread::spawn(move || { + barrier.wait(); + for key_idx in 0..KEYS { + let key = format!("read_key_{key_idx:04}"); + let value = format!("read_val_{key_idx:04}"); + assert_eq!(db.get(&key).unwrap(), Some(value.into())); + } + }) + }) + .collect(); + + for handle in handles { + handle.join().unwrap(); + } + + Ok(()) +} + +#[test] +fn test_multi_threaded_same_key_writes() -> Result<(), Error> { + const THREADS: usize = 30; + + let dir = tempdir().unwrap(); + let db = Arc::new(CandyStore::open(dir.path(), Config::default())?); + let barrier = Arc::new(Barrier::new(THREADS)); + + let handles: Vec<_> = (0..THREADS) + .map(|thread_idx| { + let db = db.clone(); + let barrier = barrier.clone(); + thread::spawn(move || { + let value = format!("same_value_{thread_idx:02}"); + barrier.wait(); + db.set("shared-key", &value).unwrap(); + }) + }) + .collect(); + + for handle in handles { + handle.join().unwrap(); + } + + let final_value = db.get("shared-key")?.expect("value should exist"); + assert!( + std::str::from_utf8(&final_value) + .unwrap() + .starts_with("same_value_") + ); + + Ok(()) +} + +#[test] +fn test_multi_threaded_same_key_read_write() -> Result<(), Error> { + const THREADS: usize = 30; + const WRITES_PER_THREAD: usize = 10_000; + + let dir = tempdir().unwrap(); + let db = Arc::new(CandyStore::open(dir.path(), Config::default())?); + assert!(matches!( + db.set("shared-key", "seed")?, + SetStatus::CreatedNew + )); + + let barrier = Arc::new(Barrier::new(THREADS)); + let handles: Vec<_> = (0..THREADS) + .map(|thread_idx| { + let db = db.clone(); + let barrier = barrier.clone(); + thread::spawn(move || { + barrier.wait(); + for write_idx in 0..WRITES_PER_THREAD { + if thread_idx % 2 == 0 { + let value = format!("rw_{thread_idx:02}_{write_idx:02}"); + db.set("shared-key", &value).unwrap(); + } else { + let value = db.get("shared-key").unwrap(); + assert!(value.is_some()); + } + } + }) + }) + .collect(); + + for handle in handles { + handle.join().unwrap(); + } + + assert!(db.get("shared-key")?.is_some()); + + Ok(()) +} + +#[test] +fn test_multi_threaded_writes_with_splits_and_rotation() -> Result<(), Error> { + const THREADS: usize = 30; + const KEYS_PER_THREAD: usize = 2_000; // to avoid too many open files in small config + + let dir = tempdir().unwrap(); + let db = Arc::new(CandyStore::open(dir.path(), common::small_file_config())?); + let barrier = Arc::new(Barrier::new(THREADS)); + + let handles: Vec<_> = (0..THREADS) + .map(|thread_idx| { + let db = db.clone(); + let barrier = barrier.clone(); + thread::spawn(move || { + barrier.wait(); + for key_idx in 0..KEYS_PER_THREAD { + let key = format!("mt_split_rotate_key_{thread_idx:02}_{key_idx:04}"); + let value = format!( + "mt_split_rotate_val_{thread_idx:02}_{key_idx:04}_{}", + "x".repeat(48) + ); + assert!(matches!( + db.set(&key, &value).unwrap(), + SetStatus::CreatedNew + )); + } + }) + }) + .collect(); + + for handle in handles { + handle.join().unwrap(); + } + + for thread_idx in 0..THREADS { + for key_idx in 0..KEYS_PER_THREAD { + let key = format!("mt_split_rotate_key_{thread_idx:02}_{key_idx:04}"); + let value = format!( + "mt_split_rotate_val_{thread_idx:02}_{key_idx:04}_{}", + "x".repeat(48) + ); + assert_eq!(db.get(&key)?, Some(value.into())); + } + } + + let data_file_count = std::fs::read_dir(dir.path()) + .map_err(Error::IOError)? + .filter_map(|entry| entry.ok()) + .filter(|entry| { + entry + .file_name() + .to_str() + .is_some_and(|name| name.starts_with("data_")) + }) + .count(); + assert!( + data_file_count > 1, + "expected concurrent writes to trigger rotation with small files" + ); + + Ok(()) +} + +fn expected_key(is_shared: bool, thread_idx: usize, key_idx: usize) -> Vec { + let mut key = String::new(); + if is_shared { + key.push_str(&format!("shared_key_{key_idx}")); + } else { + key.push_str(&format!("distinct_key_{thread_idx}_{key_idx}")); + } + // Mix in some large keys + if key_idx.is_multiple_of(7) { + key.push_str(&"K".repeat(150)); + } + key.into_bytes() +} + +fn expected_value(key: &[u8]) -> Vec { + let mut val = String::from_utf8_lossy(key).into_owned(); + let length_marker = key.iter().map(|&b| b as usize).sum::(); + if length_marker % 3 == 0 { + val.push_str(&"V".repeat(5000)); + } else if length_marker % 5 == 0 { + val.push_str(&"V".repeat(100)); // Medium + } + val.into_bytes() +} + +fn pseudo_rand(seed: &mut u64) -> u64 { + *seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1); + *seed +} + +#[test] +fn test_concurrent_mixed_workload() -> Result<(), Error> { + const THREADS: usize = 30; + const OPERATIONS_PER_THREAD: usize = 10_000; + const SHARED_KEYS_TOTAL: usize = 2_000; + const DISTINCT_KEYS_PER_THREAD: usize = 300; // 30 * 300 = 9000 distinct total + + let dir = tempdir().unwrap(); + let db = Arc::new(CandyStore::open(dir.path(), Config::default())?); + let barrier = Arc::new(Barrier::new(THREADS)); + + let handles: Vec<_> = (0..THREADS) + .map(|thread_idx| { + let db = db.clone(); + let barrier = barrier.clone(); + thread::spawn(move || { + let mut seed = (thread_idx as u64 + 1) * 123456789; + + // Track our own distinct keys so we strictly assert them + let mut distinct_state = vec![false; DISTINCT_KEYS_PER_THREAD]; + + barrier.wait(); + + for _ in 0..OPERATIONS_PER_THREAD { + let r = pseudo_rand(&mut seed); + let is_shared = (r % 100) < 50; // 50% operations on shared pool, 50% on distinct + + let key_idx = if is_shared { + (pseudo_rand(&mut seed) as usize) % SHARED_KEYS_TOTAL + } else { + (pseudo_rand(&mut seed) as usize) % DISTINCT_KEYS_PER_THREAD + }; + + let key = expected_key(is_shared, thread_idx, key_idx); + let val = expected_value(&key); + + let op = pseudo_rand(&mut seed) % 100; + if op < 40 { + // 40% Set + db.set(&key, &val).unwrap(); + if !is_shared { + distinct_state[key_idx] = true; + } + } else if op < 80 { + // 40% Get + let actual = db.get(&key).unwrap(); + if is_shared { + // Validation: Either exactly the expected value or None! + if let Some(v) = actual { + assert_eq!(v, val, "Shared key data corrupted!"); + } + } else { + if distinct_state[key_idx] { + assert_eq!(actual, Some(val), "Distinct key missing!"); + } else { + assert_eq!(actual, None, "Distinct key found but not set!"); + } + } + } else { + // 20% Remove + let _ = db.remove(&key).unwrap(); + if !is_shared { + distinct_state[key_idx] = false; + } + } + } + }) + }) + .collect(); + + for handle in handles { + handle.join().unwrap(); + } + + Ok(()) +} diff --git a/candy-crasher/src/main.rs b/tests/crasher.rs similarity index 70% rename from candy-crasher/src/main.rs rename to tests/crasher.rs index 02115bb..2da9658 100644 --- a/candy-crasher/src/main.rs +++ b/tests/crasher.rs @@ -1,27 +1,40 @@ +#![cfg(unix)] + use std::ptr::null_mut; use std::time::Duration; use std::{ops::Range, sync::atomic::AtomicU64, sync::atomic::Ordering::SeqCst}; -use candystore::{CandyStore, Config, Result}; -use rand::Rng; +use candystore::{CandyStore, Config, RebuildStrategy, Result}; +use rand::RngExt; + +#[cfg(debug_assertions)] +const TARGET: u32 = 100_000; +#[cfg(debug_assertions)] +const SLEEP_RANGE: Range = 300..800; +#[cfg(not(debug_assertions))] const TARGET: u32 = 1_000_000; -const CONFIG: Config = Config { - max_shard_size: 64 * 1024 * 1024, - min_compaction_threashold: 8 * 1024 * 1024, - hash_seed: *b"kOYLu0xvq2WtzcKJ", - expected_number_of_keys: 0, - max_concurrent_list_ops: 64, - truncate_up: true, - clear_on_unsupported_version: true, - mlock_headers: false, - num_compaction_threads: 4, -}; +#[cfg(not(debug_assertions))] +const SLEEP_RANGE: Range = 50..500; + +fn get_config() -> Config { + Config { + max_data_file_size: 64 * 1024 * 1024, + compaction_min_threshold: 8 * 1024 * 1024, + hash_key: (0xb047_a3ef_b334_9804, 0x807d_3135_878e_9b27), + initial_capacity: 1024, + max_concurrency: 64, + rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrFail, + ..Default::default() + } +} + +const DB_DIR: &str = "/tmp/dbdir_crash"; fn child_inserts() -> Result<()> { // our job is to create 1M entries while being killed by our evil parent - let store = CandyStore::open("dbdir", CONFIG)?; + let store = CandyStore::open(DB_DIR, get_config())?; let highest_bytes = store.get("highest")?.unwrap_or(vec![0, 0, 0, 0]); let highest = u32::from_le_bytes(highest_bytes.try_into().unwrap()); @@ -33,8 +46,8 @@ fn child_inserts() -> Result<()> { println!("child starting at {highest}"); for i in highest..TARGET { - store.set(&i.to_le_bytes(), "i am a key")?; - store.set("highest", &i.to_le_bytes())?; + store.set(i.to_le_bytes(), "i am a key")?; + store.set("highest", i.to_le_bytes())?; } println!("child finished"); @@ -44,7 +57,7 @@ fn child_inserts() -> Result<()> { fn child_removals() -> Result<()> { // our job is to remove 1M entries while being killed by our evil parent - let store = CandyStore::open("dbdir", CONFIG)?; + let store = CandyStore::open(DB_DIR, get_config())?; let lowest_bytes = store.get("lowest")?.unwrap_or(vec![0, 0, 0, 0]); let lowest = u32::from_le_bytes(lowest_bytes.try_into().unwrap()); @@ -55,9 +68,11 @@ fn child_removals() -> Result<()> { println!("child starting at {lowest}"); + assert!(!store.contains("highest")?, "\"highest\" got resurrected"); + for i in lowest..TARGET { - store.remove(&i.to_le_bytes())?; - store.set("lowest", &i.to_le_bytes())?; + store.remove(i.to_le_bytes())?; + store.set("lowest", i.to_le_bytes())?; } println!("child finished"); @@ -67,7 +82,7 @@ fn child_removals() -> Result<()> { fn child_list_inserts() -> Result<()> { // our job is to insert 1M entries to a list while being killed by our evil parent - let store = CandyStore::open("dbdir", CONFIG)?; + let store = CandyStore::open(DB_DIR, get_config())?; let highest_bytes = store.get("list_highest")?.unwrap_or(vec![0, 0, 0, 0]); let highest = u32::from_le_bytes(highest_bytes.try_into().unwrap()); @@ -81,7 +96,7 @@ fn child_list_inserts() -> Result<()> { for i in highest..TARGET { store.set_in_list("xxx", &i.to_le_bytes(), "yyy")?; - store.set("list_highest", &i.to_le_bytes())?; + store.set("list_highest", i.to_le_bytes())?; } println!("child finished"); @@ -91,7 +106,7 @@ fn child_list_inserts() -> Result<()> { fn child_list_removals() -> Result<()> { // our job is to remove 1M entries to a list while being killed by our evil parent - let store = CandyStore::open("dbdir", CONFIG)?; + let store = CandyStore::open(DB_DIR, get_config())?; let lowest_bytes = store.get("list_lowest")?.unwrap_or(vec![0, 0, 0, 0]); let lowest = u32::from_le_bytes(lowest_bytes.try_into().unwrap()); @@ -124,7 +139,7 @@ fn child_list_removals() -> Result<()> { old.is_none() || old == Some("yyy".into()), "{i} old={old:?}" ); - store.set("list_lowest", &i.to_le_bytes())?; + store.set("list_lowest", i.to_le_bytes())?; } println!("child finished"); @@ -133,7 +148,7 @@ fn child_list_removals() -> Result<()> { } fn child_list_iterator_removals() -> Result<()> { - let store = CandyStore::open("dbdir", CONFIG)?; + let store = CandyStore::open(DB_DIR, get_config())?; if rand::random() { //println!("FWD"); @@ -147,7 +162,7 @@ fn child_list_iterator_removals() -> Result<()> { } } else { //println!("BACK"); - for (i, res) in store.iter_list_backwards("xxx").enumerate() { + for (i, res) in store.iter_list("xxx").rev().enumerate() { let (k, v) = res?; let v2 = u32::from_le_bytes(v.try_into().unwrap()); if i == 0 { @@ -164,24 +179,24 @@ fn child_list_iterator_removals() -> Result<()> { fn parent_run( shared_stuff: &SharedStuff, + child_name: &str, mut child_func: impl FnMut() -> Result<()>, - sleep: Range, ) -> Result<()> { + println!("======== Parent starts {child_name} ========"); for i in 0.. { let pid = unsafe { libc::fork() }; assert!(pid >= 0); if pid == 0 { let res = child_func(); - if res.is_err() { + if let Err(e) = res { + eprintln!("Child failed: {}", e); shared_stuff.failed.store(1, SeqCst); } - res.unwrap(); unsafe { libc::exit(0) }; } else { // parent - std::thread::sleep(Duration::from_millis( - rand::thread_rng().gen_range(sleep.clone()), - )); + let dur = Duration::from_millis(rand::rng().random_range(SLEEP_RANGE)); + std::thread::sleep(dur); let mut status = 0i32; let rc = unsafe { libc::waitpid(pid, &mut status, libc::WNOHANG) }; if rc == 0 { @@ -190,7 +205,7 @@ fn parent_run( panic!("child crashed at iteration {i}"); } - println!("[{i}] killing child"); + println!("[{i}] killing child after {dur:?}"); unsafe { libc::kill(pid, libc::SIGKILL); libc::wait(&mut status); @@ -218,8 +233,14 @@ struct SharedStuff { failed: AtomicU64, } -fn main() -> Result<()> { - _ = std::fs::remove_dir_all("dbdir"); +#[test] +fn test_crash_recovery() -> Result<()> { + // Only run on Linux because of fork/mmap + if cfg!(not(target_os = "linux")) { + return Ok(()); + } + + _ = std::fs::remove_dir_all(DB_DIR); let map_addr = unsafe { libc::mmap( @@ -234,29 +255,20 @@ fn main() -> Result<()> { assert_ne!(map_addr, libc::MAP_FAILED); let shared_stuff = unsafe { &*(map_addr as *const SharedStuff) }; + shared_stuff.failed.store(0, SeqCst); - // let store = CandyStore::open( - // "dbdir", - // Config { - // expected_number_of_keys: 1_000_000, - // clear_on_unsupported_version: true, - // ..Default::default() - // }, - // )?; - // drop(store); - - parent_run(shared_stuff, child_inserts, 10..300)?; + parent_run(shared_stuff, "child_inserts", child_inserts)?; { println!("Parent starts validating the DB..."); - let store = CandyStore::open("dbdir", CONFIG)?; + let store = CandyStore::open(DB_DIR, get_config())?; assert_eq!( store.remove("highest")?, Some((TARGET - 1).to_le_bytes().to_vec()) ); let mut count = 0; - for res in store.iter() { + for res in store.iter_items() { let (k, v) = res?; assert_eq!(v, b"i am a key"); let k = u32::from_le_bytes(k.try_into().unwrap()); @@ -268,32 +280,28 @@ fn main() -> Result<()> { println!("DB validated successfully"); } - parent_run(shared_stuff, child_removals, 10..30)?; + parent_run(shared_stuff, "child_removals", child_removals)?; { println!("Parent starts validating the DB..."); - let store = CandyStore::open("dbdir", CONFIG)?; + let store = CandyStore::open(DB_DIR, get_config())?; assert_eq!( store.remove("lowest")?, Some((TARGET - 1).to_le_bytes().to_vec()) ); - assert_eq!( - store.iter().count(), - 0, - "{:?}", - store.iter().collect::>() - ); + let items = store.iter_items().collect::>(); + assert_eq!(items.len(), 0, "{items:?}"); println!("DB validated successfully"); } - parent_run(shared_stuff, child_list_inserts, 10..300)?; + parent_run(shared_stuff, "child_list_inserts", child_list_inserts)?; { println!("Parent starts validating the DB..."); - let store = CandyStore::open("dbdir", CONFIG)?; + let store = CandyStore::open(DB_DIR, get_config())?; assert_eq!( store.remove("list_highest")?, Some((TARGET - 1).to_le_bytes().to_vec()) @@ -308,12 +316,12 @@ fn main() -> Result<()> { println!("DB validated successfully"); } - parent_run(shared_stuff, child_list_removals, 10..80)?; + parent_run(shared_stuff, "child_list_removals", child_list_removals)?; { println!("Parent starts validating the DB..."); - let store = CandyStore::open("dbdir", CONFIG)?; + let store = CandyStore::open(DB_DIR, get_config())?; assert_eq!( store.remove("list_lowest")?, Some((TARGET - 1).to_le_bytes().to_vec()) @@ -321,19 +329,19 @@ fn main() -> Result<()> { assert_eq!(store.iter_list("xxx").count(), 0); - println!("leaked: {}", store.iter_raw().count()); + println!("leaked: {}", store.iter_items().count()); store.discard_list("xxx")?; println!("DB validated successfully"); } { - println!("Parent creates 1M members in a list..."); + println!("Parent creates {} members in a list...", TARGET); - let store = CandyStore::open("dbdir", CONFIG)?; + let store = CandyStore::open(DB_DIR, get_config())?; let t0 = std::time::Instant::now(); - for i in 0u32..1_000_000 { - if i % 65536 == 0 { + for i in 0u32..TARGET { + if i % 100000 == 0 { println!("{i}"); } store.set_in_list("xxx", &i.to_le_bytes(), &i.to_le_bytes())?; @@ -344,21 +352,26 @@ fn main() -> Result<()> { ); } - parent_run(shared_stuff, child_list_iterator_removals, 10..200)?; + parent_run( + shared_stuff, + "child_list_iterator_removals", + child_list_iterator_removals, + )?; { println!("Parent starts validating the DB..."); - let store = CandyStore::open("dbdir", CONFIG)?; + let store = CandyStore::open(DB_DIR, get_config())?; assert_eq!(store.iter_list("xxx").count(), 0); // we will surely leak some entries that were unlinked from the list before they were removed - println!("leaked: {}", store.iter_raw().count()); + println!("leaked: {}", store.iter_items().count()); store.discard_list("xxx")?; println!("DB validated successfully"); } + _ = std::fs::remove_dir_all(DB_DIR); Ok(()) } diff --git a/tests/data_loss.rs b/tests/data_loss.rs new file mode 100644 index 0000000..52007ee --- /dev/null +++ b/tests/data_loss.rs @@ -0,0 +1,157 @@ +use std::fs::OpenOptions; +use std::io::{Seek, SeekFrom, Write}; + +use candystore::{CandyStore, Config}; + +fn first_data_file_path(dir: &std::path::Path) -> std::path::PathBuf { + std::fs::read_dir(dir) + .unwrap() + .filter_map(|entry| entry.ok()) + .find(|entry| entry.file_name().to_string_lossy().starts_with("data_")) + .unwrap() + .path() +} + +fn zero_range(path: &std::path::Path, start: u64, len: usize) { + let mut file = OpenOptions::new().write(true).open(path).unwrap(); + file.seek(SeekFrom::Start(start)).unwrap(); + file.write_all(&vec![0u8; len]).unwrap(); + file.sync_all().unwrap(); +} + +#[test] +fn test_zeroed_tail_data_file_lookup() { + let dir = tempfile::tempdir().unwrap(); + let config = Config::default(); + + { + let store = CandyStore::open(dir.path(), config).unwrap(); + for idx in 0..1000 { + store + .set( + format!("key:{idx:04}").as_bytes(), + format!("val:{idx:04}").as_bytes(), + ) + .unwrap(); + } + store.flush().unwrap(); + } + + let data_path = first_data_file_path(dir.path()); + let file_len = std::fs::metadata(&data_path).unwrap().len(); + let zero_len = 2400usize; + zero_range(&data_path, file_len - zero_len as u64, zero_len); + + let store = CandyStore::open(dir.path(), config).unwrap(); + let mut missing = 0; + for idx in 0..1000 { + let key = format!("key:{idx:04}"); + match store.get(key.as_bytes()).unwrap() { + Some(value) => assert_eq!(value, format!("val:{idx:04}").into_bytes()), + None => missing += 1, + } + } + + assert!(missing > 0); + assert!(missing < 1000); +} + +#[test] +fn test_truncated_data_file_queues() { + let dir = tempfile::tempdir().unwrap(); + let config = Config::default(); + let num_queues = 50; + let items_per_queue = 10; + + { + let store = CandyStore::open(dir.path(), config).unwrap(); + for item_idx in 0..items_per_queue { + for queue_idx in 0..num_queues { + store + .push_to_queue_tail( + format!("queue:{queue_idx}").as_str(), + format!("val:{item_idx:04}").as_bytes(), + ) + .unwrap(); + } + } + store.flush().unwrap(); + } + + let data_path = first_data_file_path(dir.path()); + let file = OpenOptions::new().write(true).open(&data_path).unwrap(); + let file_len = file.metadata().unwrap().len(); + file.set_len(file_len - 2400).unwrap(); + + let store = CandyStore::open(dir.path(), config).unwrap(); + + let mut total_missing = 0; + for queue_idx in 0..num_queues { + let queue_key = format!("queue:{queue_idx}"); + let items: Vec<_> = store + .iter_queue(&queue_key) + .collect::>>() + .unwrap(); + + assert!(items.len() <= items_per_queue); + total_missing += items_per_queue - items.len(); + + for (idx, (_queue_idx, value)) in items.into_iter().enumerate() { + assert_eq!(value, format!("val:{idx:04}").into_bytes()); + } + } + + assert!(total_missing > 0); + assert!(total_missing < num_queues * items_per_queue); +} + +#[test] +fn test_truncated_data_file_lists() { + let dir = tempfile::tempdir().unwrap(); + let config = Config::default(); + let num_lists = 50; + let items_per_list = 10; + + { + let store = CandyStore::open(dir.path(), config).unwrap(); + for item_idx in 0..items_per_list { + for list_idx in 0..num_lists { + store + .set_in_list( + format!("list:{list_idx}").as_str(), + format!("key:{item_idx:04}").as_bytes(), + format!("val:{item_idx:04}").as_bytes(), + ) + .unwrap(); + } + } + store.flush().unwrap(); + } + + let data_path = first_data_file_path(dir.path()); + let file = OpenOptions::new().write(true).open(&data_path).unwrap(); + let file_len = file.metadata().unwrap().len(); + file.set_len(file_len - 2400).unwrap(); + + let store = CandyStore::open(dir.path(), config).unwrap(); + + let mut total_missing = 0; + for list_idx in 0..num_lists { + let list_key = format!("list:{list_idx}"); + let items: Vec<_> = store + .iter_list(&list_key) + .collect::>>() + .unwrap(); + + assert!(items.len() <= items_per_list); + total_missing += items_per_list - items.len(); + + for (idx, (key, value)) in items.into_iter().enumerate() { + assert_eq!(key, format!("key:{idx:04}").into_bytes()); + assert_eq!(value, format!("val:{idx:04}").into_bytes()); + } + } + + assert!(total_missing > 0); + assert!(total_missing < num_lists * items_per_list); +} diff --git a/tests/double_open.rs b/tests/double_open.rs new file mode 100644 index 0000000..b722a50 --- /dev/null +++ b/tests/double_open.rs @@ -0,0 +1,20 @@ +use candystore::{CandyStore, Config, Error}; +use tempfile::tempdir; + +#[test] +fn test_double_open_fails() { + let dir = tempdir().unwrap(); + let _db1 = CandyStore::open(dir.path(), Config::default()).unwrap(); + + // from the same thread + let db2_res = CandyStore::open(dir.path(), Config::default()); + assert!(matches!(db2_res, Err(Error::LockfileTaken(_, _)))); + + // from a different thread + std::thread::spawn(move || { + let db3_res = CandyStore::open(dir.path(), Config::default()); + assert!(matches!(db3_res, Err(Error::LockfileTaken(_, _)))); + }) + .join() + .unwrap(); +} diff --git a/tests/iteration.rs b/tests/iteration.rs new file mode 100644 index 0000000..0d47e40 --- /dev/null +++ b/tests/iteration.rs @@ -0,0 +1,99 @@ +mod common; + +use std::collections::HashMap; + +use candystore::{CandyStore, Config, Error}; +use tempfile::tempdir; + +#[test] +fn test_iter_items_empty_db() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let items: Vec<_> = db.iter_items().collect::>()?; + assert!(items.is_empty()); + + Ok(()) +} + +#[test] +fn test_iter_items_basic() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let mut expected = HashMap::new(); + for i in 0..100 { + let key = format!("iter_key_{i:04}"); + let value = format!("iter_val_{i:04}"); + db.set(&key, &value)?; + expected.insert(key.into_bytes(), value.into_bytes()); + } + + let items: HashMap, Vec> = db.iter_items().collect::>()?; + assert_eq!(items.len(), expected.len()); + assert_eq!(items, expected); + + Ok(()) +} + +#[test] +fn test_iter_items_after_updates_and_removes() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + for i in 0..50 { + db.set(format!("key_{i:04}"), format!("val_{i:04}"))?; + } + + for i in 0..20 { + db.set(format!("key_{i:04}"), format!("updated_{i:04}"))?; + } + + for i in 40..50 { + db.remove(format!("key_{i:04}"))?; + } + + let items: HashMap, Vec> = db.iter_items().collect::>()?; + assert_eq!(items.len(), 40); + + for i in 0..20 { + let key = format!("key_{i:04}"); + assert_eq!( + items.get(key.as_bytes()), + Some(&format!("updated_{i:04}").into_bytes()) + ); + } + for i in 20..40 { + let key = format!("key_{i:04}"); + assert_eq!( + items.get(key.as_bytes()), + Some(&format!("val_{i:04}").into_bytes()) + ); + } + for i in 40..50 { + let key = format!("key_{i:04}"); + assert!(!items.contains_key(key.as_bytes())); + } + + Ok(()) +} + +#[test] +fn test_iter_items_with_splits_and_rotation() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), common::small_file_config())?; + + let mut expected = HashMap::new(); + for i in 0..2000 { + let key = format!("split_iter_key_{i:05}"); + let value = format!("split_iter_val_{i:05}_{}", "x".repeat(48)); + db.set(&key, &value)?; + expected.insert(key.into_bytes(), value.into_bytes()); + } + + let items: HashMap, Vec> = db.iter_items().collect::>()?; + assert_eq!(items.len(), expected.len()); + assert_eq!(items, expected); + + Ok(()) +} diff --git a/tests/list.rs b/tests/list.rs new file mode 100644 index 0000000..f815d33 --- /dev/null +++ b/tests/list.rs @@ -0,0 +1,383 @@ +use candystore::{ + CandyStore, Config, GetOrCreateStatus, ListCompactionParams, ReplaceStatus, SetStatus, +}; +use std::sync::{Arc, Barrier}; +use std::thread; +use tempfile::tempdir; + +#[test] +fn test_list_set_get_len() { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default()).unwrap(); + let list = b"l1"; + + assert_eq!(db.list_len(list).unwrap(), 0); + + assert!(matches!( + db.set_in_list(list, b"k1", b"v1").unwrap(), + SetStatus::CreatedNew + )); + assert!(matches!( + db.set_in_list(list, b"k2", b"v2").unwrap(), + SetStatus::CreatedNew + )); + + assert_eq!(db.list_len(list).unwrap(), 2); + assert_eq!(db.get_from_list(list, b"k1").unwrap(), Some(b"v1".to_vec())); + assert_eq!(db.get_from_list(list, b"k2").unwrap(), Some(b"v2".to_vec())); + + assert!( + matches!(db.set_in_list(list, b"k1", b"v1b").unwrap(), SetStatus::PrevValue(ref value) if value == b"v1") + ); + assert_eq!(db.list_len(list).unwrap(), 2); + assert_eq!( + db.get_from_list(list, b"k1").unwrap(), + Some(b"v1b".to_vec()) + ); +} + +#[test] +fn test_list_remove_and_iteration() { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default()).unwrap(); + let list = b"l2"; + + db.set_in_list(list, b"a", b"1").unwrap(); + db.set_in_list(list, b"b", b"2").unwrap(); + db.set_in_list(list, b"c", b"3").unwrap(); + + assert_eq!( + db.remove_from_list(list, b"a").unwrap(), + Some(b"1".to_vec()) + ); + let items: Vec<_> = db.iter_list(list).map(|entry| entry.unwrap()).collect(); + assert_eq!( + items, + vec![ + (b"b".to_vec(), b"2".to_vec()), + (b"c".to_vec(), b"3".to_vec()) + ] + ); + + assert_eq!( + db.remove_from_list(list, b"c").unwrap(), + Some(b"3".to_vec()) + ); + let items: Vec<_> = db.iter_list(list).map(|entry| entry.unwrap()).collect(); + assert_eq!(items, vec![(b"b".to_vec(), b"2".to_vec())]); + + assert_eq!( + db.remove_from_list(list, b"b").unwrap(), + Some(b"2".to_vec()) + ); + assert_eq!(db.list_len(list).unwrap(), 0); + assert_eq!(db.iter_list(list).count(), 0); +} + +#[test] +fn test_list_iteration_skips_holes_and_reverse() { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default()).unwrap(); + let list = b"l3"; + + db.set_in_list(list, b"k1", b"v1").unwrap(); + db.set_in_list(list, b"k2", b"v2").unwrap(); + db.set_in_list(list, b"k3", b"v3").unwrap(); + db.set_in_list(list, b"k4", b"v4").unwrap(); + db.remove_from_list(list, b"k2").unwrap(); + + let forward: Vec<_> = db.iter_list(list).map(|entry| entry.unwrap()).collect(); + assert_eq!( + forward, + vec![ + (b"k1".to_vec(), b"v1".to_vec()), + (b"k3".to_vec(), b"v3".to_vec()), + (b"k4".to_vec(), b"v4".to_vec()), + ] + ); + + let reverse: Vec<_> = db + .iter_list(list) + .rev() + .map(|entry| entry.unwrap()) + .collect(); + assert_eq!( + reverse, + vec![ + (b"k4".to_vec(), b"v4".to_vec()), + (b"k3".to_vec(), b"v3".to_vec()), + (b"k1".to_vec(), b"v1".to_vec()), + ] + ); +} + +#[test] +fn test_list_persistence_and_discard() { + let dir = tempdir().unwrap(); + let path = dir.path().to_path_buf(); + + { + let db = CandyStore::open(&path, Config::default()).unwrap(); + db.set_in_list(b"persist", b"k", b"v").unwrap(); + } + + { + let db = CandyStore::open(&path, Config::default()).unwrap(); + assert_eq!(db.list_len(b"persist").unwrap(), 1); + assert_eq!( + db.get_from_list(b"persist", b"k").unwrap(), + Some(b"v".to_vec()) + ); + assert!(db.discard_list(b"persist").unwrap()); + assert_eq!(db.list_len(b"persist").unwrap(), 0); + } +} + +#[test] +fn test_list_promoting_matches_legacy_tail_semantics() { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default()).unwrap(); + let list = b"promo"; + + db.set_in_list(list, b"a", b"1").unwrap(); + db.set_in_list(list, b"b", b"2").unwrap(); + db.set_in_list(list, b"c", b"3").unwrap(); + + assert!( + matches!(db.set_in_list_promoting(list, b"b", b"2x").unwrap(), SetStatus::PrevValue(ref value) if value == b"2") + ); + + let items: Vec<_> = db.iter_list(list).map(|entry| entry.unwrap()).collect(); + assert_eq!(items.last().unwrap(), &(b"b".to_vec(), b"2x".to_vec())); + + assert!(matches!( + db.set_in_list_promoting(list, b"d", b"4").unwrap(), + SetStatus::CreatedNew + )); + + let items: Vec<_> = db.iter_list(list).map(|entry| entry.unwrap()).collect(); + assert_eq!(items.last().unwrap(), &(b"d".to_vec(), b"4".to_vec())); +} + +#[test] +fn test_list_compact_if_needed() { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default()).unwrap(); + let list = b"compact"; + + db.set_in_list(list, b"a", b"1").unwrap(); + db.set_in_list(list, b"b", b"2").unwrap(); + db.set_in_list(list, b"c", b"3").unwrap(); + db.remove_from_list(list, b"b").unwrap(); + + assert!( + db.compact_list_if_needed( + list, + ListCompactionParams { + min_length: 1, + min_holes_ratio: 0.2 + } + ) + .unwrap() + ); + let items: Vec<_> = db.iter_list(list).map(|entry| entry.unwrap()).collect(); + assert_eq!( + items, + vec![ + (b"a".to_vec(), b"1".to_vec()), + (b"c".to_vec(), b"3".to_vec()) + ] + ); + assert!( + !db.compact_list_if_needed( + list, + ListCompactionParams { + min_length: 1, + min_holes_ratio: 0.5 + } + ) + .unwrap() + ); +} + +#[test] +fn test_replace_and_get_or_create_in_list() { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default()).unwrap(); + let list = b"replace"; + + assert!( + matches!(db.get_or_create_in_list(list, b"k1", b"v1").unwrap(), GetOrCreateStatus::CreatedNew(ref value) if value == b"v1") + ); + assert!( + matches!(db.get_or_create_in_list(list, b"k1", b"other").unwrap(), GetOrCreateStatus::ExistingValue(ref value) if value == b"v1") + ); + assert!( + matches!(db.replace_in_list(list, b"k1", b"v2", Some(b"zz")).unwrap(), ReplaceStatus::WrongValue(ref value) if value == b"v1") + ); + assert!( + matches!(db.replace_in_list(list, b"k1", b"v2", None::<&[u8]>).unwrap(), ReplaceStatus::PrevValue(ref value) if value == b"v1") + ); + assert_eq!(db.get_from_list(list, b"k1").unwrap(), Some(b"v2".to_vec())); + assert!(matches!( + db.replace_in_list(list, b"missing", b"v", None::<&[u8]>) + .unwrap(), + ReplaceStatus::DoesNotExist + )); +} + +#[test] +fn test_list_pop_peek_and_retain() { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default()).unwrap(); + let list = b"poppeek"; + + db.set_in_list(list, b"k1", b"v1").unwrap(); + db.set_in_list(list, b"k2", b"v2").unwrap(); + db.set_in_list(list, b"k3", b"v3").unwrap(); + + assert_eq!( + db.peek_list_head(list).unwrap().unwrap(), + (b"k1".to_vec(), b"v1".to_vec()) + ); + assert_eq!( + db.peek_list_tail(list).unwrap().unwrap(), + (b"k3".to_vec(), b"v3".to_vec()) + ); + assert_eq!( + db.pop_list_head(list).unwrap().unwrap(), + (b"k1".to_vec(), b"v1".to_vec()) + ); + assert_eq!( + db.pop_list_tail(list).unwrap().unwrap(), + (b"k3".to_vec(), b"v3".to_vec()) + ); + + db.set_in_list(list, b"k4", b"v4").unwrap(); + db.set_in_list(list, b"k5", b"v5").unwrap(); + db.retain_in_list(list, |key, _| Ok(key != b"k4")).unwrap(); + + let items: Vec<_> = db.iter_list(list).map(|entry| entry.unwrap()).collect(); + assert_eq!( + items, + vec![ + (b"k2".to_vec(), b"v2".to_vec()), + (b"k5".to_vec(), b"v5".to_vec()) + ] + ); + assert!( + !db.compact_list_if_needed( + list, + ListCompactionParams { + min_length: 1, + min_holes_ratio: 0.1, + }, + ) + .unwrap() + ); +} + +#[test] +fn test_list_compaction_uses_span_like_legacy_candystore() { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default()).unwrap(); + let list = b"span_compact"; + + for idx in 0..10u8 { + db.set_in_list(list, &[idx], &[idx]).unwrap(); + } + for idx in 1..9u8 { + db.remove_from_list(list, &[idx]).unwrap(); + } + + assert!( + db.compact_list_if_needed( + list, + ListCompactionParams { + min_length: 5, + min_holes_ratio: 0.5, + }, + ) + .unwrap() + ); +} + +#[test] +fn test_list_concurrency_basic() { + let dir = tempdir().unwrap(); + let db = Arc::new(CandyStore::open(dir.path(), Config::default()).unwrap()); + let list_key = b"concurrent_list"; + let num_threads = 8; + let items_per_thread = 200; + let barrier = Arc::new(Barrier::new(num_threads)); + + let mut handles = Vec::new(); + for thread_idx in 0..num_threads { + let db = Arc::clone(&db); + let barrier = Arc::clone(&barrier); + handles.push(thread::spawn(move || { + barrier.wait(); + for item_idx in 0..items_per_thread { + let key = format!("t{thread_idx}-{item_idx}"); + let value = format!("val-{thread_idx}-{item_idx}"); + db.set_in_list(list_key, key.as_bytes(), value.as_bytes()) + .unwrap(); + } + + for item_idx in 0..items_per_thread { + let key = format!("t{thread_idx}-{item_idx}"); + db.remove_from_list(list_key, key.as_bytes()).unwrap(); + } + })); + } + + for handle in handles { + handle.join().unwrap(); + } + + assert_eq!(db.list_len(list_key).unwrap(), 0); + assert_eq!(db.iter_list(list_key).count(), 0); +} + +#[test] +fn test_list_concurrency_promoting() { + let dir = tempdir().unwrap(); + let db = Arc::new(CandyStore::open(dir.path(), Config::default()).unwrap()); + let list_key = b"concurrent_list_promo"; + let num_threads = 4; + let items_per_thread = 100; + let barrier = Arc::new(Barrier::new(num_threads)); + + for idx in 0..100 { + db.set_in_list(list_key, format!("base-{idx}").as_bytes(), b"base") + .unwrap(); + } + + let mut handles = Vec::new(); + for thread_idx in 0..num_threads { + let db = Arc::clone(&db); + let barrier = Arc::clone(&barrier); + handles.push(thread::spawn(move || { + barrier.wait(); + for item_idx in 0..items_per_thread { + let key = format!("t{thread_idx}-{item_idx}"); + db.set_in_list_promoting(list_key, key.as_bytes(), b"val") + .unwrap(); + } + + for base_idx in 0..50 { + let key = format!("base-{base_idx}"); + db.set_in_list_promoting(list_key, key.as_bytes(), b"base-promoted") + .unwrap(); + } + })); + } + + for handle in handles { + handle.join().unwrap(); + } + + let expected_len = 100 + num_threads * items_per_thread; + assert_eq!(db.list_len(list_key).unwrap(), expected_len); + assert_eq!(db.iter_list(list_key).count(), expected_len); +} diff --git a/tests/maintenance.rs b/tests/maintenance.rs new file mode 100644 index 0000000..0d3d758 --- /dev/null +++ b/tests/maintenance.rs @@ -0,0 +1,88 @@ +mod common; + +use std::fs; + +use candystore::{CandyStore, Config, Error}; +use tempfile::tempdir; + +#[test] +fn test_clear_resets_store_files_and_contents() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 1024, + ..Config::default() + }; + + let mut db = CandyStore::open(dir.path(), config)?; + + for i in 0..100 { + db.set(format!("key{i:04}"), vec![b'x'; 64])?; + } + for i in 0..50 { + db.set(format!("key{i:04}"), vec![b'y'; 64])?; + } + for i in 50..75 { + db.remove(format!("key{i:04}"))?; + } + + let data_files_before = std::fs::read_dir(dir.path()) + .map_err(Error::IOError)? + .filter_map(|entry| entry.ok()) + .filter(|entry| { + entry + .file_name() + .to_str() + .is_some_and(|name| name.starts_with("data_")) + }) + .count(); + assert!(data_files_before > 1); + + fs::write(dir.path().join("extra.txt"), b"junk").map_err(Error::IOError)?; + fs::create_dir(dir.path().join("extra_dir")).map_err(Error::IOError)?; + fs::write(dir.path().join("extra_dir").join("nested.txt"), b"junk").map_err(Error::IOError)?; + + db.clear()?; + + assert!(db.get("key0000")?.is_none()); + assert_eq!(db.iter_items().count(), 0); + assert!(!dir.path().join("extra.txt").exists()); + assert!(!dir.path().join("extra_dir").exists()); + + let data_files_after = std::fs::read_dir(dir.path()) + .map_err(Error::IOError)? + .filter_map(|entry| entry.ok()) + .filter(|entry| { + entry + .file_name() + .to_str() + .is_some_and(|name| name.starts_with("data_")) + }) + .count(); + assert_eq!(data_files_after, 1); + + db.set("fresh", "value")?; + assert_eq!(db.get("fresh")?, Some(b"value".to_vec())); + drop(db); + + let reopened = CandyStore::open(dir.path(), config)?; + assert!(reopened.get("key0000")?.is_none()); + assert_eq!(reopened.get("fresh")?, Some(b"value".to_vec())); + + Ok(()) +} + +#[test] +fn test_explicit_close_releases_lock_and_persists_clean_shutdown() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config::default(); + + let db = CandyStore::open(dir.path(), config)?; + db.set("key", "value")?; + drop(db); + + let reopened = CandyStore::open(dir.path(), config)?; + assert!(reopened.was_clean_shutdown()); + assert_eq!(reopened.get("key")?, Some(b"value".to_vec())); + + Ok(()) +} diff --git a/tests/metrics.rs b/tests/metrics.rs new file mode 100644 index 0000000..141b055 --- /dev/null +++ b/tests/metrics.rs @@ -0,0 +1,115 @@ +use candystore::{CandyStore, Config}; + +const ROW_WIDTH: u64 = (16 * 21) as u64; + +#[test] +fn test_metrics_updates() -> Result<(), Box> { + let dir = tempfile::tempdir()?; + let config = Config { + initial_capacity: 1000, + ..Default::default() + }; + + let db = CandyStore::open(dir.path(), config)?; + + let stats = db.stats(); + assert_eq!(stats.num_rows, 8); + assert_eq!(stats.capacity, 8 * ROW_WIDTH); + assert_eq!(stats.num_items, 0); + assert_eq!(stats.fill_level(), 0.0); + assert_eq!(stats.num_positive_lookups, 0); + assert_eq!(stats.num_negative_lookups, 0); + assert_eq!(stats.num_read_ops, 0); + assert_eq!(stats.num_read_bytes, 0); + assert_eq!(stats.num_write_ops, 0); + assert_eq!(stats.num_write_bytes, 0); + assert_eq!(stats.num_created, 0); + assert_eq!(stats.num_removed, 0); + assert_eq!(stats.num_replaced, 0); + assert_eq!(stats.written_bytes, 0); + + db.set("key1", "val1")?; + + let stats = db.stats(); + assert_eq!(stats.num_items, 1); + assert_eq!(stats.num_entries(), 1); + assert_eq!(stats.num_inserts(), 1); + assert_eq!(stats.num_updates(), 0); + assert_eq!(stats.num_removals(), 0); + assert_eq!(stats.num_created, 1); + assert_eq!(stats.num_removed, 0); + assert_eq!(stats.num_replaced, 0); + assert!(stats.written_bytes > 0); + assert_eq!(stats.num_write_ops, 1); + assert!(stats.num_write_bytes > 0); + assert!(stats.index_size_bytes > 0); + assert_eq!(stats.num_data_files, 1); + assert!(stats.occupied_bytes() > 0); + + db.set("key1", "val2")?; + + let stats = db.stats(); + assert_eq!(stats.num_items, 1); + assert_eq!(stats.num_updates(), 1); + assert_eq!(stats.num_created, 1); + assert_eq!(stats.num_replaced, 1); + assert_eq!(stats.num_removed, 0); + assert_eq!(stats.num_write_ops, 2); + + db.remove("key1")?; + + let stats = db.stats(); + assert_eq!(stats.num_items, 0); + assert_eq!(stats.num_removals(), 1); + assert!(stats.wasted_bytes() > 0); + assert_eq!(stats.num_write_ops, 3); + assert_eq!(stats.num_created, 1); + assert_eq!(stats.num_replaced, 1); + assert_eq!(stats.num_removed, 1); + + assert_eq!(db.get("missing")?, None); + assert_eq!(db.get("key1")?, None); + + let stats = db.stats(); + assert_eq!(stats.num_positive_lookups, 0); + assert_eq!(stats.num_negative_lookups, 2); + assert_eq!(stats.num_read_ops, 2); + assert!(stats.num_read_bytes > 0); + + Ok(()) +} + +#[test] +fn test_metrics_compaction() -> Result<(), Box> { + let dir = tempfile::tempdir()?; + let config = Config { + max_data_file_size: 4096, + compaction_min_threshold: 10, + ..Default::default() + }; + + let db = CandyStore::open(dir.path(), config)?; + + for i in 0..500 { + db.set( + "key", + format!("value_that_is_long_enough_to_take_up_space_{}", i), + )?; + } + + for i in 0..100 { + db.set(format!("other_key_{}", i), "val")?; + std::thread::sleep(std::time::Duration::from_millis(2)); + } + + let stats = db.stats(); + assert!(stats.written_bytes > 0); + assert!(stats.num_replaced > 0); + assert!(stats.num_items > 0); + assert!(stats.capacity >= stats.num_items); + assert!(stats.fill_level() > 0.0); + assert!(stats.num_write_ops > 0); + assert!(stats.num_write_bytes > 0); + + Ok(()) +} diff --git a/tests/proptest_state_machine.rs b/tests/proptest_state_machine.rs new file mode 100644 index 0000000..f97584e --- /dev/null +++ b/tests/proptest_state_machine.rs @@ -0,0 +1,97 @@ +use candystore::{CandyStore, Config, RebuildStrategy}; +use proptest::prelude::*; +use std::collections::BTreeMap; +use tempfile::TempDir; + +#[derive(Debug, Clone)] +enum Op { + Set(String, String), + Get(String), + Remove(String), + CleanShutdown, + SimulateCrash, +} + +fn op_strategy() -> impl Strategy { + // Narrow key space to highly encourage collisions (overwrites, deletes of existing keys) + let key_strat = "[a-d]{1,2}"; + // Variable size payload to occasionally trigger rotation in small stores + let val_strat = "[a-zA-Z0-9]{0,50}"; + + prop_oneof![ + // Weight probabilities so we mostly mutate state, check it, and occasionally restart + 40 => (key_strat, val_strat).prop_map(|(k, v)| Op::Set(k, v)), + 40 => key_strat.prop_map(Op::Get), + 20 => key_strat.prop_map(Op::Remove), + 8 => Just(Op::CleanShutdown), + 2 => Just(Op::SimulateCrash), + ] +} + +proptest! { + // 200 randomized sequences with up to 2000 operations each for a deeper stress test + #![proptest_config(ProptestConfig::with_cases(200))] + + #[test] + fn test_candystore_state_machine(ops in proptest::collection::vec(op_strategy(), 1..2000)) { + let dir = TempDir::new().unwrap(); + + // Small file size so we generate many data files, rotations, and splits within 200 operations + let config = Config { + max_data_file_size: 1024 * 4, // 4KB boundaries + rebuild_strategy: RebuildStrategy::RebuildIfDirty, + ..Default::default() + }; + + // The authoritative reference state + let mut oracle = BTreeMap::new(); + + let mut db_opt = Some(CandyStore::open(dir.path(), config).unwrap()); + + for op in ops { + match op { + Op::Set(k, v) => { + oracle.insert(k.clone(), v.clone()); + let db = db_opt.as_ref().unwrap(); + let _ = db.set(k.as_bytes(), v.as_bytes()).unwrap(); + } + Op::Get(k) => { + let db = db_opt.as_ref().unwrap(); + let expected = oracle.get(&k); + let actual = db.get(k.as_bytes()).unwrap(); + + match expected { + Some(v) => assert_eq!(Some(v.as_bytes()), actual.as_deref()), + None => assert_eq!(None, actual), + } + } + Op::Remove(k) => { + oracle.remove(&k); + let db = db_opt.as_ref().unwrap(); + let _ = db.remove(k.as_bytes()).unwrap(); + } + Op::CleanShutdown => { + // Close the current DB instance by dropping it, then reopen + drop(db_opt.take().unwrap()); + db_opt = Some(CandyStore::open(dir.path(), config).unwrap()); + assert!(db_opt.as_ref().unwrap().was_clean_shutdown()); + } + Op::SimulateCrash => { + // Force a rebuild + db_opt.take().unwrap()._abort_for_testing(); + db_opt = Some(CandyStore::open(dir.path(), config).unwrap()); + assert!(!db_opt.as_ref().unwrap().was_clean_shutdown()); + } + } + } + + // Final verification pass: check the oracle exact matches internal state + let db = db_opt.as_ref().unwrap(); + + // Verify every key that should exist, DOES exist + for (k, v) in oracle.iter() { + let actual = db.get(k.as_bytes()).unwrap().expect("Key should exist in store"); + assert_eq!(v.as_bytes(), actual.as_slice()); + } + } +} diff --git a/tests/queue.rs b/tests/queue.rs new file mode 100644 index 0000000..947250a --- /dev/null +++ b/tests/queue.rs @@ -0,0 +1,366 @@ +mod common; + +use std::sync::{ + Arc, + atomic::{AtomicBool, AtomicUsize, Ordering}, +}; + +use candystore::{CandyStore, Config, Error}; +use tempfile::tempdir; + +#[test] +fn test_queue_fifo() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + db.push_to_queue_tail(&b"my_queue"[..], &b"item1"[..])?; + db.push_to_queue_tail(&b"my_queue"[..], &b"item2"[..])?; + db.push_to_queue_tail(&b"my_queue"[..], &b"item3"[..])?; + + assert_eq!(db.queue_len(&b"my_queue"[..])?, 3); + assert_eq!( + db.pop_queue_head(&b"my_queue"[..])?, + Some(b"item1".to_vec()) + ); + assert_eq!( + db.pop_queue_head(&b"my_queue"[..])?, + Some(b"item2".to_vec()) + ); + assert_eq!( + db.pop_queue_head(&b"my_queue"[..])?, + Some(b"item3".to_vec()) + ); + assert_eq!(db.pop_queue_head(&b"my_queue"[..])?, None); + assert_eq!(db.queue_len(&b"my_queue"[..])?, 0); + + Ok(()) +} + +#[test] +fn test_queue_lifo() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + db.push_to_queue_tail(&b"stack"[..], &b"item1"[..])?; + db.push_to_queue_tail(&b"stack"[..], &b"item2"[..])?; + db.push_to_queue_tail(&b"stack"[..], &b"item3"[..])?; + + assert_eq!(db.pop_queue_tail(&b"stack"[..])?, Some(b"item3".to_vec())); + assert_eq!(db.pop_queue_tail(&b"stack"[..])?, Some(b"item2".to_vec())); + assert_eq!(db.pop_queue_tail(&b"stack"[..])?, Some(b"item1".to_vec())); + assert_eq!(db.pop_queue_tail(&b"stack"[..])?, None); + + Ok(()) +} + +#[test] +fn test_queue_deque() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + db.push_to_queue_head(&b"deque"[..], &b"1"[..])?; + db.push_to_queue_head(&b"deque"[..], &b"2"[..])?; + db.push_to_queue_tail(&b"deque"[..], &b"3"[..])?; + db.push_to_queue_tail(&b"deque"[..], &b"4"[..])?; + + assert_eq!(db.queue_len(&b"deque"[..])?, 4); + assert_eq!(db.peek_queue_head(&b"deque"[..])?, Some(b"2".to_vec())); + assert_eq!(db.peek_queue_tail(&b"deque"[..])?, Some(b"4".to_vec())); + assert_eq!(db.pop_queue_head(&b"deque"[..])?, Some(b"2".to_vec())); + assert_eq!(db.pop_queue_tail(&b"deque"[..])?, Some(b"4".to_vec())); + assert_eq!(db.pop_queue_head(&b"deque"[..])?, Some(b"1".to_vec())); + assert_eq!(db.pop_queue_tail(&b"deque"[..])?, Some(b"3".to_vec())); + assert_eq!(db.pop_queue_head(&b"deque"[..])?, None); + + Ok(()) +} + +#[test] +fn test_queue_with_idx_methods() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let first = db.push_to_queue_tail(&b"idxq"[..], &b"a"[..])?; + let second = db.push_to_queue_tail(&b"idxq"[..], &b"b"[..])?; + let third = db.push_to_queue_head(&b"idxq"[..], &b"z"[..])?; + + assert!(third < first && first < second); + assert_eq!( + db.peek_queue_head_with_idx(&b"idxq"[..])?, + Some((third, b"z".to_vec())) + ); + assert_eq!( + db.peek_queue_tail_with_idx(&b"idxq"[..])?, + Some((second, b"b".to_vec())) + ); + assert_eq!( + db.pop_queue_head_with_idx(&b"idxq"[..])?, + Some((third, b"z".to_vec())) + ); + assert_eq!( + db.pop_queue_tail_with_idx(&b"idxq"[..])?, + Some((second, b"b".to_vec())) + ); + assert_eq!( + db.pop_queue_head_with_idx(&b"idxq"[..])?, + Some((first, b"a".to_vec())) + ); + assert_eq!(db.pop_queue_tail_with_idx(&b"idxq"[..])?, None); + + Ok(()) +} + +#[test] +fn test_queue_empty_push_head_has_stable_value_semantics() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + assert!(db.queue_range(&b"head_first"[..])?.is_empty()); + + let idx = db.push_to_queue_head(&b"head_first"[..], &b"x"[..])?; + assert_eq!( + db.peek_queue_head_with_idx(&b"head_first"[..])?, + Some((idx, b"x".to_vec())) + ); + assert_eq!( + db.peek_queue_tail_with_idx(&b"head_first"[..])?, + Some((idx, b"x".to_vec())) + ); + assert_eq!(db.queue_len(&b"head_first"[..])?, 1); + + Ok(()) +} + +#[test] +fn test_queue_peek_skips_holes_like_legacy_candystore() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let first = db.push_to_queue_tail(&b"peek_holes_head"[..], &b"v1"[..])?; + let second = db.push_to_queue_tail(&b"peek_holes_head"[..], &b"v2"[..])?; + let third = db.push_to_queue_tail(&b"peek_holes_head"[..], &b"v3"[..])?; + + assert_eq!( + db.remove_from_queue(&b"peek_holes_head"[..], second)?, + Some(b"v2".to_vec()) + ); + assert_eq!( + db.remove_from_queue(&b"peek_holes_head"[..], first)?, + Some(b"v1".to_vec()) + ); + assert_eq!( + db.peek_queue_head_with_idx(&b"peek_holes_head"[..])?, + Some((third, b"v3".to_vec())) + ); + assert_eq!(db.queue_range(&b"peek_holes_head"[..])?, second..third + 1); + + let first = db.push_to_queue_tail(&b"peek_holes_tail"[..], &b"v1"[..])?; + let second = db.push_to_queue_tail(&b"peek_holes_tail"[..], &b"v2"[..])?; + let third = db.push_to_queue_tail(&b"peek_holes_tail"[..], &b"v3"[..])?; + + assert_eq!( + db.remove_from_queue(&b"peek_holes_tail"[..], second)?, + Some(b"v2".to_vec()) + ); + assert_eq!( + db.remove_from_queue(&b"peek_holes_tail"[..], third)?, + Some(b"v3".to_vec()) + ); + assert_eq!( + db.peek_queue_tail_with_idx(&b"peek_holes_tail"[..])?, + Some((first, b"v1".to_vec())) + ); + assert_eq!(db.queue_range(&b"peek_holes_tail"[..])?, first..third); + + Ok(()) +} + +#[test] +fn test_extend_queue_returns_inserted_range() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let first = db.extend_queue(&b"bulk"[..], [&b"v1"[..], &b"v2"[..], &b"v3"[..]])?; + assert_eq!(first.len(), 3); + assert_eq!(db.queue_range(&b"bulk"[..])?, first.clone()); + + let second = db.extend_queue(&b"bulk"[..], [&b"v4"[..], &b"v5"[..]])?; + assert_eq!(second.start, first.end); + assert_eq!(second.len(), 2); + assert_eq!(db.queue_range(&b"bulk"[..])?, first.start..second.end); + + let items: Vec<_> = db.iter_queue(&b"bulk"[..]).collect::>()?; + assert_eq!( + items, + vec![ + (first.start, b"v1".to_vec()), + (first.start + 1, b"v2".to_vec()), + (first.start + 2, b"v3".to_vec()), + (second.start, b"v4".to_vec()), + (second.start + 1, b"v5".to_vec()), + ] + ); + + Ok(()) +} + +#[test] +fn test_queue_persistence() -> Result<(), Error> { + let dir = tempdir().unwrap(); + + { + let db = CandyStore::open(dir.path(), Config::default())?; + db.push_to_queue_tail(&b"q1"[..], &b"val1"[..])?; + db.push_to_queue_tail(&b"q1"[..], &b"val2"[..])?; + } + + { + let db = CandyStore::open(dir.path(), Config::default())?; + assert_eq!(db.queue_len(&b"q1"[..])?, 2); + assert_eq!(db.pop_queue_head(&b"q1"[..])?, Some(b"val1".to_vec())); + } + + { + let db = CandyStore::open(dir.path(), Config::default())?; + assert_eq!(db.queue_len(&b"q1"[..])?, 1); + assert_eq!(db.pop_queue_head(&b"q1"[..])?, Some(b"val2".to_vec())); + assert_eq!(db.pop_queue_head(&b"q1"[..])?, None); + } + + Ok(()) +} + +#[test] +fn test_queue_reverse_iteration_skips_holes() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + db.push_to_queue_tail(&b"q_rev_iter"[..], &b"v1"[..])?; + db.push_to_queue_tail(&b"q_rev_iter"[..], &b"v2"[..])?; + db.push_to_queue_tail(&b"q_rev_iter"[..], &b"v3"[..])?; + db.push_to_queue_tail(&b"q_rev_iter"[..], &b"v4"[..])?; + + assert_eq!(db.pop_queue_head(&b"q_rev_iter"[..])?, Some(b"v1".to_vec())); + assert_eq!(db.pop_queue_head(&b"q_rev_iter"[..])?, Some(b"v2".to_vec())); + + let rev_items: Vec<_> = db + .iter_queue(&b"q_rev_iter"[..]) + .rev() + .map(|res| res.unwrap().1) + .collect(); + assert_eq!(rev_items, vec![b"v4".to_vec(), b"v3".to_vec()]); + + let fwd_items: Vec<_> = db + .iter_queue(&b"q_rev_iter"[..]) + .map(|res| res.unwrap().1) + .collect(); + assert_eq!(fwd_items, vec![b"v3".to_vec(), b"v4".to_vec()]); + + Ok(()) +} + +#[test] +fn test_multiple_queues() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + db.push_to_queue_tail(&b"q1"[..], &b"v1"[..])?; + db.push_to_queue_tail(&b"q2"[..], &b"v2"[..])?; + + assert_eq!(db.pop_queue_head(&b"q1"[..])?, Some(b"v1".to_vec())); + assert_eq!(db.pop_queue_head(&b"q2"[..])?, Some(b"v2".to_vec())); + + Ok(()) +} + +#[test] +fn test_queue_remove_hole_is_skipped() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + let idx1 = db.push_to_queue_tail(&b"holey"[..], &b"v1"[..])?; + let idx2 = db.push_to_queue_tail(&b"holey"[..], &b"v2"[..])?; + let idx3 = db.push_to_queue_tail(&b"holey"[..], &b"v3"[..])?; + + assert!(idx1 < idx2 && idx2 < idx3); + assert_eq!( + db.remove_from_queue(&b"holey"[..], idx2)?, + Some(b"v2".to_vec()) + ); + + let items: Vec<_> = db.iter_queue(&b"holey"[..]).collect::>()?; + assert_eq!(items.len(), 2); + assert_eq!(items[0].1, b"v1".to_vec()); + assert_eq!(items[1].1, b"v3".to_vec()); + + Ok(()) +} + +#[test] +fn test_queue_concurrency() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = Arc::new(CandyStore::open(dir.path(), common::small_file_config())?); + let queue = b"concurrent_queue"; + + let producers = 4; + let items_per_producer = 1000; + let consumers = 4; + let finished = Arc::new(AtomicBool::new(false)); + let consumed = Arc::new(AtomicUsize::new(0)); + + let mut consumer_handles = Vec::new(); + for _ in 0..consumers { + let db = db.clone(); + let finished = finished.clone(); + let consumed = consumed.clone(); + consumer_handles.push(std::thread::spawn(move || { + loop { + match db.pop_queue_head(&queue[..]).unwrap() { + Some(_) => { + consumed.fetch_add(1, Ordering::Relaxed); + } + None => { + if finished.load(Ordering::Relaxed) { + match db.pop_queue_head(&queue[..]).unwrap() { + Some(_) => { + consumed.fetch_add(1, Ordering::Relaxed); + } + None => break, + } + } else { + std::thread::yield_now(); + } + } + } + } + })); + } + + let mut producer_handles = Vec::new(); + for producer in 0..producers { + let db = db.clone(); + producer_handles.push(std::thread::spawn(move || { + for item in 0..items_per_producer { + let value = format!("p{producer}-{item}"); + db.push_to_queue_tail(&queue[..], value.as_bytes()).unwrap(); + } + })); + } + + for handle in producer_handles { + handle.join().unwrap(); + } + finished.store(true, Ordering::Relaxed); + + for handle in consumer_handles { + handle.join().unwrap(); + } + + assert_eq!( + consumed.load(Ordering::Relaxed), + producers * items_per_producer + ); + assert_eq!(db.queue_len(&queue[..])?, 0); + + Ok(()) +} diff --git a/tests/recovery.rs b/tests/recovery.rs new file mode 100644 index 0000000..cd035f0 --- /dev/null +++ b/tests/recovery.rs @@ -0,0 +1,759 @@ +mod common; + +use std::collections::HashSet; +use std::fs; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::sync::Arc; + +use candystore::{ + CandyStore, CandyTypedDeque, CandyTypedList, CandyTypedStore, Config, Error, RebuildStrategy, +}; +use tempfile::tempdir; + +fn patterned_bytes_with_seed(len: usize, seed: usize) -> Vec { + (0..len) + .map(|idx| (((idx * 31) + (seed * 17)) % 251) as u8) + .collect() +} + +fn rewrite_first_data_entry_header( + dir: &std::path::Path, + rewrite: impl FnOnce(u32) -> u32, +) -> Result<(), Error> { + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(dir.join("data_0000")) + .map_err(Error::IOError)?; + + file.seek(SeekFrom::Start(4096)).map_err(Error::IOError)?; + let mut entry_header = [0u8; 8]; + file.read_exact(&mut entry_header).map_err(Error::IOError)?; + + let header = u32::from_le_bytes(entry_header[0..4].try_into().unwrap()); + let klen = u16::from_le_bytes(entry_header[4..6].try_into().unwrap()) as usize; + let vlen = u16::from_le_bytes(entry_header[6..8].try_into().unwrap()) as usize; + let entry_len = 4 + 4 + klen + vlen + 2; + + file.seek(SeekFrom::Start(4096)).map_err(Error::IOError)?; + let mut entry = vec![0u8; entry_len]; + file.read_exact(&mut entry).map_err(Error::IOError)?; + entry[0..4].copy_from_slice(&rewrite(header).to_le_bytes()); + + let checksum = crc16_ibm3740_fast::hash(&entry[..entry_len - 2]) as u16; + entry[entry_len - 2..entry_len].copy_from_slice(&checksum.to_le_bytes()); + + file.seek(SeekFrom::Start(4096)).map_err(Error::IOError)?; + file.write_all(&entry).map_err(Error::IOError)?; + file.sync_all().map_err(Error::IOError)?; + Ok(()) +} + +fn rewrite_data_file_ordinal( + dir: &std::path::Path, + file_idx: u16, + ordinal: u64, +) -> Result<(), Error> { + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(dir.join(format!("data_{file_idx:04}"))) + .map_err(Error::IOError)?; + + file.seek(SeekFrom::Start(16)).map_err(Error::IOError)?; + file.write_all(&ordinal.to_le_bytes()) + .map_err(Error::IOError)?; + file.sync_all().map_err(Error::IOError)?; + Ok(()) +} + +#[test] +fn test_clean_shutdown_flag() -> Result<(), Error> { + let dir = tempdir().unwrap(); + + { + let db = CandyStore::open(dir.path(), Config::default())?; + assert!(db.was_clean_shutdown()); + db.set("hello", "world")?; + } + + { + let db = CandyStore::open(dir.path(), Config::default())?; + assert!(db.was_clean_shutdown()); + assert_eq!(db.get("hello")?, Some("world".into())); + } + + Ok(()) +} + +#[test] +fn test_dirty_shutdown_detected() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = common::rebuild_if_dirty_config(); + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("hello", "world")?; + db._abort_for_testing(); + } + + { + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + } + + Ok(()) +} + +#[test] +fn test_recovery_after_dirty_shutdown() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = common::rebuild_if_dirty_config(); + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key1", "val1")?; + db.set("key2", "val2")?; + db.set("key3", "val3")?; + db.set("key2", "val2_updated")?; + db.remove("key3")?; + db._abort_for_testing(); + } + + { + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + assert_eq!(db.get("key1")?, Some(b"val1".to_vec())); + assert_eq!(db.get("key2")?, Some(b"val2_updated".to_vec())); + assert!(db.get("key3")?.is_none()); + } + + Ok(()) +} + +#[test] +fn test_recovery_uses_persisted_hash_key_on_reopen() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let original_config = Config { + hash_key: (1, 2), + rebuild_strategy: RebuildStrategy::RebuildIfDirty, + ..Config::default() + }; + let different_config = Config { + hash_key: (3, 4), + ..original_config + }; + + { + let db = CandyStore::open(dir.path(), original_config)?; + db.set("key1", "val1")?; + db.set("key2", "val2")?; + db._abort_for_testing(); + } + + { + let db = CandyStore::open(dir.path(), different_config)?; + assert!(!db.was_clean_shutdown()); + assert_eq!(db.get("key1")?, Some(b"val1".to_vec())); + assert_eq!(db.get("key2")?, Some(b"val2".to_vec())); + db.set("key3", "val3")?; + } + + { + let db = CandyStore::open(dir.path(), original_config)?; + assert_eq!(db.get("key1")?, Some(b"val1".to_vec())); + assert_eq!(db.get("key2")?, Some(b"val2".to_vec())); + assert_eq!(db.get("key3")?, Some(b"val3".to_vec())); + } + + Ok(()) +} + +#[test] +fn test_recovery_rebuilds_waste_stats() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = common::rebuild_if_dirty_config(); + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key1", "val1")?; + db.set("key2", "val2")?; + db.set("key3", "val3")?; + db.set("key2", "val2_updated")?; + db.remove("key3")?; + db._abort_for_testing(); + } + + { + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + let stats = db.stats(); + assert_eq!(stats.waste_bytes, 64); + } + + Ok(()) +} + +#[test] +fn test_recovery_with_many_keys_and_splits() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let mut config = common::small_file_config(); + config.rebuild_strategy = RebuildStrategy::RebuildIfDirty; + + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..500 { + db.set(format!("k{i:04}"), format!("v{i:04}"))?; + } + for i in (0..500).step_by(3) { + db.set(format!("k{i:04}"), format!("updated_{i}"))?; + } + for i in (0..500).step_by(7) { + db.remove(format!("k{i:04}"))?; + } + db._abort_for_testing(); + } + + { + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + + for i in 0..500 { + let key = format!("k{i:04}"); + if i % 7 == 0 { + assert!(db.get(&key)?.is_none(), "key {key} should be removed"); + } else if i % 3 == 0 { + assert_eq!( + db.get(&key)?, + Some(format!("updated_{i}").into_bytes()), + "key {key} should be updated" + ); + } else { + assert_eq!( + db.get(&key)?, + Some(format!("v{i:04}").into_bytes()), + "key {key} should have original value" + ); + } + } + } + + Ok(()) +} + +#[test] +fn test_rebuild_if_dirty_recovers_large_dataset_across_multiple_data_files() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 64 * 1024 * 1024, + compaction_throughput_bytes_per_sec: 1024, + rebuild_strategy: RebuildStrategy::RebuildIfDirty, + ..Config::default() + }; + const TARGET_NUM_DATA_FILES: u64 = 5; + const VALUE_SIZE: usize = 60 * 1024; + const NUM_REMOVALS: usize = 128; + + let total_keys; + let removed_keys; + + { + let db = CandyStore::open(dir.path(), config)?; + let mut next_idx = 0usize; + while db.stats().num_data_files < TARGET_NUM_DATA_FILES { + let key = format!("large-rebuild-{next_idx:06}"); + let value = patterned_bytes_with_seed(VALUE_SIZE, next_idx); + db.set(&key, &value)?; + next_idx += 1; + } + + total_keys = next_idx; + removed_keys = ((total_keys - NUM_REMOVALS)..total_keys).collect::>(); + for idx in &removed_keys { + let key = format!("large-rebuild-{idx:06}"); + assert!( + db.remove(&key)?.is_some(), + "expected {key} to exist before removal" + ); + } + + let stats = db.stats(); + assert!( + stats.num_data_files >= TARGET_NUM_DATA_FILES, + "expected at least {TARGET_NUM_DATA_FILES} data files, got {}", + stats.num_data_files + ); + + db._abort_for_testing(); + } + + { + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + assert!( + db.stats().num_data_files >= TARGET_NUM_DATA_FILES, + "rebuild should preserve the multi-file dataset" + ); + + for idx in 0..total_keys { + let key = format!("large-rebuild-{idx:06}"); + if removed_keys.contains(&idx) { + assert!( + db.get(&key)?.is_none(), + "removed key {key} reappeared after rebuild" + ); + } else { + let expected = patterned_bytes_with_seed(VALUE_SIZE, idx); + assert_eq!( + db.get(&key)?, + Some(expected), + "key {key} did not survive large rebuild correctly" + ); + } + } + } + + Ok(()) +} + +#[test] +fn test_rebuild_if_dirty_recovers_with_corrupted_rows_checksum() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = common::rebuild_if_dirty_config(); + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key1", "val1")?; + db.set("key2", "val2")?; + db.set("key2", "val2_updated")?; + db.remove("key1")?; + db._abort_for_testing(); + } + + common::corrupt_first_row_checksum(dir.path()); + + { + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + assert!(db.get("key1")?.is_none()); + assert_eq!(db.get("key2")?, Some(b"val2_updated".to_vec())); + } + + { + let db = CandyStore::open(dir.path(), config)?; + assert!(db.was_clean_shutdown()); + assert!(db.get("key1")?.is_none()); + assert_eq!(db.get("key2")?, Some(b"val2_updated".to_vec())); + } + + Ok(()) +} + +#[test] +fn test_rebuild_if_dirty_rejects_unknown_data_entry_type() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = common::rebuild_if_dirty_config(); + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key1", "val1")?; + db._abort_for_testing(); + } + + rewrite_first_data_entry_header(dir.path(), |header| (header & !(0b11 << 30)) | (0b10 << 30))?; + + match CandyStore::open(dir.path(), config) { + Err(Error::IOError(io_err)) if io_err.kind() == std::io::ErrorKind::InvalidData => Ok(()), + Err(err) => panic!("expected invalid-data error for unknown entry type, got {err}"), + Ok(_) => panic!("expected open to fail for unknown entry type"), + } +} + +#[test] +fn test_rebuild_if_dirty_rejects_unknown_data_namespace() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = common::rebuild_if_dirty_config(); + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key1", "val1")?; + db._abort_for_testing(); + } + + rewrite_first_data_entry_header(dir.path(), |header| { + let cleared = header & !(0x3f << 24); + cleared | (63 << 24) + })?; + + match CandyStore::open(dir.path(), config) { + Err(Error::IOError(io_err)) if io_err.kind() == std::io::ErrorKind::InvalidData => Ok(()), + Err(err) => panic!("expected invalid-data error for unknown namespace, got {err}"), + Ok(_) => panic!("expected open to fail for unknown namespace"), + } +} + +#[test] +fn test_open_rejects_duplicate_data_file_ordinals() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = common::small_file_config(); + + { + let db = CandyStore::open(dir.path(), config)?; + for idx in 0..64 { + db.set(format!("dup-ordinal-{idx:03}"), vec![b'x'; 512])?; + if db.stats().num_data_files >= 2 { + break; + } + } + assert!( + db.stats().num_data_files >= 2, + "expected multiple data files" + ); + } + + rewrite_data_file_ordinal(dir.path(), 1, 0x00bd_38a0_2a35_1cdf)?; + + match CandyStore::open(dir.path(), config) { + Err(Error::IOError(io_err)) if io_err.kind() == std::io::ErrorKind::InvalidData => Ok(()), + Err(err) => panic!("expected invalid-data error for duplicate ordinal, got {err}"), + Ok(_) => panic!("expected open to fail for duplicate data file ordinals"), + } +} + +#[test] +fn test_rebuild_if_dirty_recovers_lists() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = common::rebuild_if_dirty_config(); + let list = b"rebuild-list"; + + { + let db = CandyStore::open(dir.path(), config)?; + db.set_in_list(list, b"a", b"1")?; + db.set_in_list(list, b"b", b"2")?; + db.set_in_list(list, b"c", b"3")?; + db.set_in_list(list, b"b", b"2b")?; + db.remove_from_list(list, b"a")?; + db._abort_for_testing(); + } + + { + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + assert_eq!(db.list_len(list)?, 2); + assert_eq!(db.get_from_list(list, b"a")?, None); + assert_eq!(db.get_from_list(list, b"b")?, Some(b"2b".to_vec())); + assert_eq!(db.get_from_list(list, b"c")?, Some(b"3".to_vec())); + + let items: Vec<_> = db.iter_list(list).collect::>()?; + assert_eq!( + items, + vec![ + (b"b".to_vec(), b"2b".to_vec()), + (b"c".to_vec(), b"3".to_vec()), + ] + ); + } + + Ok(()) +} + +#[test] +fn test_rebuild_if_dirty_recovers_queues() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = common::rebuild_if_dirty_config(); + let queue = b"rebuild-queue"; + + let first_idx; + let keep_idx; + let removed_idx; + + { + let db = CandyStore::open(dir.path(), config)?; + first_idx = db.push_to_queue_tail(queue, b"tail-1")?; + keep_idx = db.push_to_queue_tail(queue, b"tail-2")?; + removed_idx = db.push_to_queue_tail(queue, b"tail-3")?; + db.push_to_queue_head(queue, b"head-0")?; + + assert_eq!(db.pop_queue_head(queue)?, Some(b"head-0".to_vec())); + assert_eq!( + db.remove_from_queue(queue, removed_idx)?, + Some(b"tail-3".to_vec()) + ); + db._abort_for_testing(); + } + + { + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + assert_eq!(db.queue_len(queue)?, 2); + assert_eq!(db.peek_queue_head(queue)?, Some(b"tail-1".to_vec())); + assert_eq!(db.peek_queue_tail(queue)?, Some(b"tail-2".to_vec())); + assert_eq!(db.remove_from_queue(queue, removed_idx)?, None); + + let items: Vec<_> = db.iter_queue(queue).collect::>()?; + assert_eq!( + items, + vec![ + (first_idx, b"tail-1".to_vec()), + (keep_idx, b"tail-2".to_vec()), + ] + ); + } + + Ok(()) +} + +#[test] +fn test_rebuild_if_dirty_recovers_typed_data() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = common::rebuild_if_dirty_config(); + let list_key = 7u32; + let queue_key = 9u32; + + { + let store = Arc::new(CandyStore::open(dir.path(), config)?); + let typed_kv = CandyTypedStore::::new(Arc::clone(&store)); + let typed_list = CandyTypedList::::new(Arc::clone(&store)); + let typed_queue = CandyTypedDeque::::new(Arc::clone(&store)); + + typed_kv.set(&1u32, &"one".to_string())?; + typed_kv.set(&1u32, &"uno".to_string())?; + typed_kv.set(&2u32, &"two".to_string())?; + assert_eq!(typed_kv.remove(&2u32)?, Some("two".to_string())); + + typed_list.set(&list_key, &1u32, &"a".to_string())?; + typed_list.set(&list_key, &2u32, &"b".to_string())?; + typed_list.set(&list_key, &3u32, &"c".to_string())?; + assert_eq!(typed_list.remove(&list_key, &2u32)?, Some("b".to_string())); + + typed_queue.push_tail(&queue_key, &10u32)?; + typed_queue.push_tail(&queue_key, &20u32)?; + typed_queue.push_head(&queue_key, &5u32)?; + assert_eq!(typed_queue.pop_tail(&queue_key)?, Some(20u32)); + + drop(typed_queue); + drop(typed_list); + drop(typed_kv); + Arc::into_inner(store).unwrap()._abort_for_testing(); + } + + { + let store = Arc::new(CandyStore::open(dir.path(), config)?); + let typed_kv = CandyTypedStore::::new(Arc::clone(&store)); + let typed_list = CandyTypedList::::new(Arc::clone(&store)); + let typed_queue = CandyTypedDeque::::new(Arc::clone(&store)); + + assert!(!store.was_clean_shutdown()); + + assert_eq!(typed_kv.get(&1u32)?, Some("uno".to_string())); + assert_eq!(typed_kv.get(&2u32)?, None); + + let typed_list_items: Vec<_> = typed_list.iter(&list_key).collect::>()?; + assert_eq!( + typed_list_items, + vec![(1u32, "a".to_string()), (3u32, "c".to_string())] + ); + + let typed_queue_items: Vec<_> = typed_queue.iter(&queue_key).collect::>()?; + assert_eq!(typed_queue_items.len(), 2); + assert_eq!(typed_queue_items[0].1, 5u32); + assert_eq!(typed_queue_items[1].1, 10u32); + assert_eq!(typed_queue.peek_head(&queue_key)?, Some(5u32)); + assert_eq!(typed_queue.peek_tail(&queue_key)?, Some(10u32)); + assert_eq!( + typed_queue.peek_head_with_idx(&queue_key)?, + Some(typed_queue_items[0]) + ); + assert_eq!( + typed_queue.peek_tail_with_idx(&queue_key)?, + Some(typed_queue_items[1]) + ); + } + + Ok(()) +} + +#[test] +fn test_fail_if_dirty_rejects_reopen() -> Result<(), Error> { + let dir = tempdir().unwrap(); + + { + let db = CandyStore::open(dir.path(), Config::default())?; + db.set("key", "value")?; + db._abort_for_testing(); + } + + assert!(matches!( + CandyStore::open(dir.path(), Config::default()), + Err(Error::DirtyIndex) + )); + + Ok(()) +} + +#[test] +fn test_trust_dirty_index_if_checksum_correct_or_fail() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrFail, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key1", "val1")?; + db.set("key2", "val2")?; + db.set("key2", "val2_updated")?; + db._abort_for_testing(); + } + + { + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + assert_eq!(db.get("key1")?, Some(b"val1".to_vec())); + assert_eq!(db.get("key2")?, Some(b"val2_updated".to_vec())); + let waste_after_trust = db.stats().waste_bytes; + drop(db); + + let waste_after_clean = CandyStore::open(dir.path(), config)?.stats().waste_bytes; + assert_eq!(waste_after_clean, waste_after_trust); + } + + Ok(()) +} + +#[test] +fn test_trust_dirty_index_fails_on_checksum_mismatch() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrFail, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key", "value")?; + db._abort_for_testing(); + } + + common::corrupt_first_row_checksum(dir.path()); + + assert!(matches!( + CandyStore::open(dir.path(), config), + Err(Error::IOError(io_err)) if io_err.kind() == std::io::ErrorKind::InvalidData + )); + + Ok(()) +} + +#[test] +fn test_trust_dirty_index_rebuilds_on_checksum_mismatch() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrRebuild, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key1", "val1")?; + db.set("key2", "val2")?; + db.set("key2", "val2_updated")?; + db._abort_for_testing(); + } + + common::corrupt_first_row_checksum(dir.path()); + + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + assert_eq!(db.get("key1")?, Some(b"val1".to_vec())); + assert_eq!(db.get("key2")?, Some(b"val2_updated".to_vec())); + + Ok(()) +} + +#[test] +fn test_reset_db_if_dirty_clears_state() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + rebuild_strategy: RebuildStrategy::ResetDBIfDirty, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key", "value")?; + db._abort_for_testing(); + } + + fs::write(dir.path().join("extra.txt"), b"junk").map_err(Error::IOError)?; + fs::create_dir(dir.path().join("extra_dir")).map_err(Error::IOError)?; + fs::write(dir.path().join("extra_dir").join("nested.txt"), b"junk").map_err(Error::IOError)?; + + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + assert!(db.get("key")?.is_none()); + assert!(!dir.path().join("extra.txt").exists()); + assert!(!dir.path().join("extra_dir").exists()); + + Ok(()) +} + +#[test] +fn test_reset_on_invalid_data_clears_corrupt_store() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + rebuild_strategy: RebuildStrategy::RebuildIfDirty, + reset_on_invalid_data: true, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key", "value")?; + } + + fs::write(dir.path().join("index"), b"bad").map_err(Error::IOError)?; + fs::write(dir.path().join("rows"), b"bad").map_err(Error::IOError)?; + fs::write(dir.path().join("extra.txt"), b"junk").map_err(Error::IOError)?; + fs::create_dir(dir.path().join("extra_dir")).map_err(Error::IOError)?; + fs::write(dir.path().join("extra_dir").join("nested.txt"), b"junk").map_err(Error::IOError)?; + + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + assert!(db.get("key")?.is_none()); + assert!(!dir.path().join("extra.txt").exists()); + assert!(!dir.path().join("extra_dir").exists()); + + db.set("fresh", "value")?; + assert_eq!(db.get("fresh")?, Some(b"value".to_vec())); + + Ok(()) +} + +#[test] +fn test_recover_from_truncated_data_file() -> Result<(), Box> { + let dir = tempfile::tempdir()?; + { + let db = candystore::CandyStore::open(dir.path(), candystore::Config::default())?; + db.set("key1", "value1")?; + db.set("key2", "value2")?; + } + + // Corrupt the data file by truncating the last 5 bytes + let data_file = std::fs::read_dir(dir.path())? + .filter_map(|res| res.ok()) + .find(|entry| entry.file_name().to_string_lossy().starts_with("data_")) + .unwrap(); + let file = std::fs::OpenOptions::new() + .write(true) + .open(data_file.path())?; + let len = file.metadata()?.len(); + file.set_len(len - 5)?; + + // We expect clear recovery (key2 was truncated, thus doesn't exist, but key1 is readable) + let db = candystore::CandyStore::open(dir.path(), candystore::Config::default())?; + assert_eq!(db.get("key1")?.as_deref(), Some("value1".as_bytes())); + assert_eq!(db.get("key2")?, None); + Ok(()) +} diff --git a/tests/rotation.rs b/tests/rotation.rs new file mode 100644 index 0000000..f943ac0 --- /dev/null +++ b/tests/rotation.rs @@ -0,0 +1,130 @@ +mod common; + +use candystore::{CandyStore, Config, Error, SetStatus}; +use tempfile::tempdir; + +#[test] +fn test_many_inserts_trigger_splits() -> Result<(), Error> { + const KEYS: usize = 5000; + + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + for key_idx in 0..KEYS { + let key = format!("split_key_{key_idx:05}"); + let value = format!("split_val_{key_idx:05}"); + assert!(matches!(db.set(&key, &value)?, SetStatus::CreatedNew)); + } + + for key_idx in 0..KEYS { + let key = format!("split_key_{key_idx:05}"); + let value = format!("split_val_{key_idx:05}"); + assert_eq!(db.get(&key)?, Some(value.into())); + } + + Ok(()) +} + +#[test] +fn test_rotation_preserves_reads() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), common::small_file_config())?; + + for key_idx in 0..512 { + let key = format!("rotate_key_{key_idx:04}"); + let value = format!("rotate_val_{key_idx:04}_{}", "x".repeat(64)); + assert!(matches!(db.set(&key, &value)?, SetStatus::CreatedNew)); + } + + for key_idx in 0..512 { + let key = format!("rotate_key_{key_idx:04}"); + assert!(db.get(&key)?.is_some(), "missing key after rotation: {key}"); + } + + Ok(()) +} + +#[test] +fn test_splits_and_rotation_with_small_files() -> Result<(), Error> { + const KEYS: usize = 5000; + + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), common::small_file_config())?; + + for key_idx in 0..KEYS { + let key = format!("split_rotate_key_{key_idx:05}"); + let value = format!("split_rotate_val_{key_idx:05}_{}", "x".repeat(48)); + assert!(matches!(db.set(&key, &value)?, SetStatus::CreatedNew)); + } + + for key_idx in 0..KEYS { + let key = format!("split_rotate_key_{key_idx:05}"); + let value = format!("split_rotate_val_{key_idx:05}_{}", "x".repeat(48)); + assert_eq!(db.get(&key)?, Some(value.into())); + } + + let data_file_count = std::fs::read_dir(dir.path()) + .map_err(Error::IOError)? + .filter_map(|entry| entry.ok()) + .filter(|entry| { + entry + .file_name() + .to_str() + .is_some_and(|name| name.starts_with("data_")) + }) + .count(); + assert!( + data_file_count > 1, + "expected rotation to create multiple data files" + ); + + let index_rows_size = std::fs::metadata(dir.path().join("rows")) + .map_err(Error::IOError)? + .len(); + assert!( + index_rows_size > 4 * 4096, + "expected index rows growth after row splitting, got rows size {index_rows_size}" + ); + + Ok(()) +} + +#[test] +fn test_splits_rotation_and_reopen_with_small_files() -> Result<(), Error> { + const KEYS: usize = 5000; + + let dir = tempdir().unwrap(); + + { + let db = CandyStore::open(dir.path(), common::small_file_config())?; + for key_idx in 0..KEYS { + let key = format!("reopen_split_rotate_key_{key_idx:05}"); + let value = format!("reopen_split_rotate_val_{key_idx:05}_{}", "x".repeat(48)); + assert!(matches!(db.set(&key, &value)?, SetStatus::CreatedNew)); + } + } + + let db = CandyStore::open(dir.path(), common::small_file_config())?; + for key_idx in 0..KEYS { + let key = format!("reopen_split_rotate_key_{key_idx:05}"); + let value = format!("reopen_split_rotate_val_{key_idx:05}_{}", "x".repeat(48)); + assert_eq!(db.get(&key)?, Some(value.into())); + } + + let data_file_count = std::fs::read_dir(dir.path()) + .map_err(Error::IOError)? + .filter_map(|entry| entry.ok()) + .filter(|entry| { + entry + .file_name() + .to_str() + .is_some_and(|name| name.starts_with("data_")) + }) + .count(); + assert!( + data_file_count > 1, + "expected rotation to persist multiple data files after reopen" + ); + + Ok(()) +} diff --git a/tests/shrink.rs b/tests/shrink.rs new file mode 100644 index 0000000..8ee1460 --- /dev/null +++ b/tests/shrink.rs @@ -0,0 +1,44 @@ +use candystore::{CandyStore, Config, Result, SetStatus}; + +const ROW_WIDTH: usize = 16 * 21; + +#[test] +fn test_shrink_to_fit_preserves_remaining_keys() -> Result<()> { + let dir = tempfile::tempdir().unwrap(); + let config = Config { + initial_capacity: 4 * ROW_WIDTH, + ..Config::default() + }; + let store = CandyStore::open(dir.path(), config)?; + + for i in 0..10_000 { + let key = format!("key_{i}"); + assert!(matches!( + store.set(key.as_bytes(), b"value")?, + SetStatus::CreatedNew + )); + } + + let before = store.capacity(); + + for i in 0..9_000 { + let key = format!("key_{i}"); + store.remove(key.as_bytes())?; + } + + let shrunk_rows = store.shrink_to_fit(0.2)?; + assert!(shrunk_rows > 0); + assert!(store.capacity() <= before); + + for i in 9_000..10_000 { + let key = format!("key_{i}"); + assert_eq!(store.get(key.as_bytes())?, Some(b"value".to_vec())); + } + + for i in 0..9_000 { + let key = format!("key_{i}"); + assert_eq!(store.get(key.as_bytes())?, None); + } + + Ok(()) +} diff --git a/tests/test_atomics.rs b/tests/test_atomics.rs deleted file mode 100644 index 3c96404..0000000 --- a/tests/test_atomics.rs +++ /dev/null @@ -1,40 +0,0 @@ -mod common; - -use candystore::{CandyStore, Config, GetOrCreateStatus, ReplaceStatus, Result, SetStatus}; - -use crate::common::run_in_tempdir; - -#[test] -fn test_atomics() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open(dir, Config::default())?; - - assert!(db.get_or_create("aaa", "1111")?.was_created()); - - assert!(db.replace("aaa", "2222", None)?.was_replaced()); - - assert_eq!(db.get("aaa")?, Some("2222".into())); - - assert!(db.get_or_create("aaa", "1111")?.already_exists()); - - assert!(!db.replace("bbb", "3333", None)?.was_replaced()); - - assert!(db.set("bbb", "4444")?.was_created()); - assert_eq!(db.set("bbb", "5555")?, SetStatus::PrevValue("4444".into())); - - assert_eq!( - db.get_or_create("bbb", "6666")?, - GetOrCreateStatus::ExistingValue("5555".into()) - ); - - assert_eq!(db.get_or_create("cccc", "6666")?.value(), b"6666"); - assert_eq!(db.get_or_create("aaa", "6666")?.value(), b"2222"); - - assert_eq!( - db.replace("aaa", "6666", Some("2222"))?, - ReplaceStatus::PrevValue("2222".into()) - ); - - Ok(()) - }) -} diff --git a/tests/test_bigval.rs b/tests/test_bigval.rs deleted file mode 100644 index 850444e..0000000 --- a/tests/test_bigval.rs +++ /dev/null @@ -1,32 +0,0 @@ -mod common; - -use std::sync::Arc; - -use candystore::{CandyStore, CandyTypedStore, Config, Result}; - -use crate::common::run_in_tempdir; - -#[test] -fn test_bigval() -> Result<()> { - run_in_tempdir(|dir| { - let db = Arc::new(CandyStore::open(dir, Config::default())?); - - assert_eq!(db.set_big(b"mykey", &vec![0x99; 1_000_000])?, false); - assert_eq!(db.get_big(b"yourkey")?, None); - assert_eq!(db.get_big(b"mykey")?, Some(vec![0x99; 1_000_000])); - assert_eq!(db.remove_big(b"mykey")?, true); - assert_eq!(db.get_big(b"mykey")?, None); - assert_eq!(db.set_big(b"mykey", &vec![0x88; 100_000])?, false); - assert_eq!(db.set_big(b"mykey", &vec![0x77; 100_000])?, true); - assert_eq!(db.get_big(b"mykey")?, Some(vec![0x77; 100_000])); - - let typed = CandyTypedStore::>::new(db); - assert_eq!(typed.set_big("hello", &vec![123456789; 100_000])?, false); - assert_eq!(typed.get_big("world")?, None); - assert_eq!(typed.get_big("hello")?, Some(vec![123456789; 100_000])); - assert_eq!(typed.remove_big("hello")?, true); - assert_eq!(typed.remove_big("hello")?, false); - - Ok(()) - }) -} diff --git a/tests/test_flush_agg.rs b/tests/test_flush_agg.rs deleted file mode 100644 index a1d4488..0000000 --- a/tests/test_flush_agg.rs +++ /dev/null @@ -1,51 +0,0 @@ -#![cfg(feature = "flush_aggregation")] - -mod common; - -use std::{ - sync::{Arc, Barrier}, - time::{Duration, Instant}, -}; - -use candystore::{CandyStore, Config, Result}; - -use crate::common::run_in_tempdir; - -#[test] -fn test_flush_aggregation() -> Result<()> { - run_in_tempdir(|dir| { - let db = Arc::new(CandyStore::open( - dir, - Config { - flush_aggregation_delay: Some(Duration::from_millis(1)), - ..Default::default() - }, - )?); - - let num_threads = 10; - let barrier = Arc::new(Barrier::new(num_threads)); - let mut handles = vec![]; - - for i in 0..num_threads { - let db = db.clone(); - let barrier = barrier.clone(); - let h = std::thread::spawn(move || { - barrier.wait(); - let t0 = Instant::now(); - for j in 0..10 { - db.set(&format!("key{i}-{j}"), "val")?; - } - let dur = Instant::now().duration_since(t0); - Result::::Ok(dur) - }); - handles.push(h); - } - - for (i, h) in handles.into_iter().enumerate() { - let dur = h.join().unwrap()?; - println!("{i}: {dur:?}"); - } - - Ok(()) - }) -} diff --git a/tests/test_list_collisions.rs b/tests/test_list_collisions.rs deleted file mode 100644 index cbd39c3..0000000 --- a/tests/test_list_collisions.rs +++ /dev/null @@ -1,76 +0,0 @@ -#![cfg(feature = "whitebox_testing")] - -mod common; - -use candystore::{CandyStore, Config, Result, HASH_BITS_TO_KEEP}; - -use crate::common::run_in_tempdir; - -#[test] -fn test_list_collisions() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open(dir, Config::default())?; - - db.clear()?; - - // force many elements to end up with the same PartedHash - unsafe { HASH_BITS_TO_KEEP = 0xff00_000f_0000_00ff }; - - for i in 0u32..100_000 { - if i % 10_000 == 0 { - println!("push {i}"); - } - db.set_in_list("xxx", &i.to_le_bytes(), &i.to_le_bytes())?; - } - - for i in 0u32..100_000 { - if i % 10_000 == 0 { - println!("pop {i}"); - } - assert_eq!(db.pop_list_head("xxx")?.unwrap().1, &i.to_le_bytes()); - } - - assert!(db.pop_list_head("xxx")?.is_none()); - assert!(db.pop_list_tail("xxx")?.is_none()); - assert_eq!(db.iter_list("xxx").count(), 0); - - unsafe { HASH_BITS_TO_KEEP = 0x0000_000f_0000_00ff }; - - for i in 0u32..1000 { - db.set_in_list("xxx", &i.to_le_bytes(), &i.to_le_bytes())?; - } - for i in 400u32..600 { - assert_eq!( - db.remove_from_list("xxx", &i.to_le_bytes())?, - Some(i.to_le_bytes().to_vec()) - ); - } - - for i in 0u32..100 { - assert_eq!( - db.remove_from_list("xxx", &i.to_le_bytes())?, - Some(i.to_le_bytes().to_vec()) - ); - } - - for i in (900u32..1000).rev() { - assert_eq!( - db.remove_from_list("xxx", &i.to_le_bytes())?, - Some(i.to_le_bytes().to_vec()) - ); - } - - let remaining = db - .iter_list("xxx") - .map(|res| u32::from_le_bytes(res.unwrap().1.try_into().unwrap())) - .collect::>(); - - let expectd = (100..400).chain(600..900).collect::>(); - assert_eq!(remaining, expectd); - - db.discard_list("xxx")?; - assert!(db.pop_list_head("xxx")?.is_none()); - - Ok(()) - }) -} diff --git a/tests/test_lists.rs b/tests/test_lists.rs deleted file mode 100644 index 9ad5f4a..0000000 --- a/tests/test_lists.rs +++ /dev/null @@ -1,516 +0,0 @@ -mod common; - -use std::sync::{atomic::AtomicUsize, Arc}; - -use candystore::{ - CandyStore, CandyTypedDeque, CandyTypedList, Config, GetOrCreateStatus, ListCompactionParams, - ReplaceStatus, Result, SetStatus, -}; - -use crate::common::run_in_tempdir; - -#[test] -fn test_lists() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open( - dir, - Config { - max_shard_size: 20 * 1024, // use small files to force lots of splits and compactions - min_compaction_threashold: 10 * 1024, - ..Default::default() - }, - )?; - - db.set_in_list("texas", "dallas", "500,000")?; - db.set_in_list("texas", "austin", "300,000")?; - db.set_in_list("texas", "houston", "700,000")?; - db.set_in_list("texas", "dallas", "450,000")?; - - assert_eq!(db.get_from_list("texas", "dallas")?, Some("450,000".into())); - assert_eq!(db.get_from_list("texas", "austin")?, Some("300,000".into())); - assert_eq!( - db.get_from_list("texas", "houston")?, - Some("700,000".into()) - ); - - assert_eq!(db.iter_list("texas").count(), 3); - assert_eq!(db.list_len("texas")?, 3); - assert_eq!(db.iter_list("arkansas").count(), 0); - assert_eq!(db.list_len("arkansas")?, 0); - - let items = db - .iter_list("texas") - .map(|res| res.unwrap()) - .collect::>(); - assert_eq!(items[0].0, "dallas".as_bytes()); - assert_eq!(items[2].0, "houston".as_bytes()); - - db.discard_list("texas")?; - assert_eq!(db.get_from_list("texas", "houston")?, None); - assert_eq!(db.get_from_list("texas", "dallas")?, None); - assert_eq!(db.iter_list("texas").count(), 0); - - db.set_in_list("xxx", "k1", "v1")?; - db.set_in_list("xxx", "k2", "v2")?; - db.set_in_list("xxx", "k3", "v3")?; - db.set_in_list("xxx", "k4", "v4")?; - - // remove from the middle - assert_eq!(db.remove_from_list("xxx", "k3")?, Some("v3".into())); - assert_eq!(db.iter_list("xxx").count(), 3); - assert_eq!(db.list_len("xxx")?, 3); - // remove first - assert_eq!(db.remove_from_list("xxx", "k1")?, Some("v1".into())); - assert_eq!(db.iter_list("xxx").count(), 2); - assert_eq!(db.list_len("xxx")?, 2); - // remove last - assert_eq!(db.remove_from_list("xxx", "k4")?, Some("v4".into())); - assert_eq!(db.iter_list("xxx").count(), 1); - assert_eq!(db.list_len("xxx")?, 1); - // remove single - assert_eq!(db.remove_from_list("xxx", "k2")?, Some("v2".into())); - assert_eq!(db.iter_list("xxx").count(), 0); - assert_eq!(db.list_len("xxx")?, 0); - - for i in 0..10_000 { - db.set_in_list("xxx", &format!("my key {i}"), - "very long key aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")?; - assert_eq!(db.list_len("xxx")?, i + 1); - } - - // make sure we survive splits - assert!(db.stats().num_splits > 1); - - for (i, res) in db.iter_list("xxx").enumerate() { - let (k, _) = res?; - assert_eq!(k, format!("my key {i}").as_bytes()); - db.remove_from_list("xxx", &k)?; - assert_eq!(db.list_len("xxx")?, 10_000 - i - 1); - } - - assert_eq!(db.iter_list("xxx").count(), 0); - - Ok(()) - }) -} - -#[test] -fn test_typed_lists() -> Result<()> { - run_in_tempdir(|dir| { - let db = Arc::new(CandyStore::open(dir, Config::default())?); - - let typed = CandyTypedList::::new(db); - typed.set("texas", &108, &2005)?; - typed.set("texas", &555, &2006)?; - typed.set("texas", &827, &2007)?; - typed.set("texas", &123, &2008)?; - typed.set("texas", &555, &2009)?; - - assert_eq!(typed.get("texas", &555)?, Some(2009)); - assert_eq!(typed.get("texas", &66666666)?, None); - - assert!(typed.remove("texas", &827)?.is_some()); - assert!(typed.remove("texas", &827)?.is_none()); - assert!(typed.remove("texas", &66666666)?.is_none()); - - let items = typed - .iter("texas") - .map(|res| res.unwrap().1) - .collect::>(); - assert_eq!(items, vec![2005, 2009, 2008]); - - Ok(()) - }) -} - -#[test] -fn test_lists_multithreading() -> Result<()> { - run_in_tempdir(|dir| { - let db = Arc::new(CandyStore::open(dir, Config::default())?); - - let removed = Arc::new(AtomicUsize::new(0)); - let created = Arc::new(AtomicUsize::new(0)); - let gotten = Arc::new(AtomicUsize::new(0)); - let replaced = Arc::new(AtomicUsize::new(0)); - - let num_thds = 10; - let num_iters = 1000; - - let mut handles = vec![]; - for thd in 0..num_thds { - let db = db.clone(); - let removed = removed.clone(); - let created = created.clone(); - let replaced = replaced.clone(); - let gotten = gotten.clone(); - let h = std::thread::spawn(move || { - for _ in 0..num_iters { - let idx1: u8 = rand::random(); - if db - .set_in_list("xxx", &format!("key{idx1}"), &format!("val-{thd}"))? - .was_created() - { - created.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - } else { - replaced.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - } - - std::thread::yield_now(); - - let idx2: u8 = rand::random(); - if let Some(v) = db.get_from_list("xxx", &format!("key{idx2}"))? { - assert!(v.starts_with(b"val-")); - gotten.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - } - - std::thread::yield_now(); - let idx3: u8 = rand::random(); - if db.remove_from_list("xxx", &format!("key{idx3}"))?.is_some() { - removed.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - } - std::thread::yield_now(); - } - Result::<()>::Ok(()) - }); - handles.push(h); - } - - for h in handles { - h.join().unwrap()?; - } - - let reamining = db.iter_list("xxx").count(); - let created = created.load(std::sync::atomic::Ordering::SeqCst); - let replaced = replaced.load(std::sync::atomic::Ordering::SeqCst); - let removed = removed.load(std::sync::atomic::Ordering::SeqCst); - let gotten = gotten.load(std::sync::atomic::Ordering::SeqCst); - - assert_eq!(created - removed, reamining); - assert_eq!(created + replaced, num_iters * num_thds); - - println!("created={created} replaced={replaced} removed={removed} gotten={gotten} reamining={reamining}"); - - Ok(()) - }) -} - -#[test] -fn test_list_atomics() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open(dir, Config::default())?; - - assert_eq!( - db.get_or_create_in_list("xxx", "yyy", "1")?, - GetOrCreateStatus::CreatedNew("1".into()) - ); - - assert_eq!( - db.get_or_create_in_list("xxx", "yyy", "2")?, - GetOrCreateStatus::ExistingValue("1".into()) - ); - - assert_eq!( - db.replace_in_list("xxx", "yyy", "3", None)?, - ReplaceStatus::PrevValue("1".into()) - ); - - assert_eq!( - db.replace_in_list("xxx", "zzz", "3", None)?, - ReplaceStatus::DoesNotExist - ); - - assert_eq!( - db.get_or_create_in_list("xxx", "yyy", "7")?, - GetOrCreateStatus::ExistingValue("3".into()) - ); - - assert_eq!( - db.set_in_list("xxx", "yyy", "4")?, - SetStatus::PrevValue("3".into()) - ); - - assert_eq!(db.get_from_list("xxx", "yyy")?, Some("4".into())); - - Ok(()) - }) -} - -#[test] -fn test_typed_queue() -> Result<()> { - run_in_tempdir(|dir| { - let db = Arc::new(CandyStore::open(dir, Config::default())?); - - let queue = CandyTypedDeque::::new(db); - assert_eq!(queue.pop_head("orders")?, None); - - for i in 10..30 { - queue.push_tail("orders", &i)?; - } - for i in 10..20 { - assert_eq!(queue.pop_head("orders")?, Some(i)); - } - for i in (20..30).rev() { - assert_eq!(queue.pop_tail("orders")?, Some(i)); - } - - assert_eq!(queue.pop_head("orders")?, None); - - queue.push_tail("orders", &100)?; - queue.push_tail("orders", &101)?; - queue.push_tail("orders", &102)?; - queue.push_head("orders", &103)?; - queue.push_head("orders", &104)?; - queue.push_head("orders", &105)?; - - let items = queue - .iter("orders") - .map(|res| res.unwrap().1) - .collect::>(); - - assert_eq!(items, vec![105, 104, 103, 100, 101, 102]); - - let items = queue - .iter_backwards("orders") - .map(|res| res.unwrap().1) - .collect::>(); - - assert_eq!(items, vec![102, 101, 100, 103, 104, 105]); - - Ok(()) - }) -} - -#[test] -fn test_rev_iter() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open(dir, Config::default())?; - - db.set_in_list("mylist", "item1", "xxx")?; - db.set_in_list("mylist", "item2", "xxx")?; - db.set_in_list("mylist", "item3", "xxx")?; - db.set_in_list("mylist", "item4", "xxx")?; - - let items = db - .iter_list("mylist") - .map(|res| res.unwrap().0) - .collect::>(); - - assert_eq!(items, vec![b"item1", b"item2", b"item3", b"item4"]); - - let items = db - .iter_list_backwards("mylist") - .map(|res| res.unwrap().0) - .collect::>(); - - assert_eq!(items, vec![b"item4", b"item3", b"item2", b"item1"]); - - assert_eq!( - db.peek_list_head("mylist")?, - Some(("item1".into(), "xxx".into())) - ); - - assert_eq!( - db.peek_list_tail("mylist")?, - Some(("item4".into(), "xxx".into())) - ); - - Ok(()) - }) -} - -#[test] -fn test_promote() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open(dir, Config::default())?; - - let items = || { - db.iter_list("mylist") - .map(|res| res.unwrap().0) - .collect::>() - }; - - db.set_in_list("mylist", "item1", "xxx")?; - db.set_in_list("mylist", "item2", "xxx")?; - db.set_in_list("mylist", "item3", "xxx")?; - db.set_in_list("mylist", "item4", "xxx")?; - - assert_eq!(items(), vec![b"item1", b"item2", b"item3", b"item4"]); - - // no promotion happens - db.set_in_list("mylist", "item2", "yyy")?; - assert_eq!(items(), vec![b"item1", b"item2", b"item3", b"item4"]); - - // promote a middle element - db.set_in_list_promoting("mylist", "item2", "zzz")?; - assert_eq!(items(), vec![b"item1", b"item3", b"item4", b"item2"]); - - // promote head element - db.set_in_list_promoting("mylist", "item1", "zzz")?; - assert_eq!(items(), vec![b"item3", b"item4", b"item2", b"item1"]); - - // promote tail element - db.set_in_list_promoting("mylist", "item1", "zzz")?; - assert_eq!(items(), vec![b"item3", b"item4", b"item2", b"item1"]); - - Ok(()) - }) -} - -#[test] -fn test_typed_promote() -> Result<()> { - run_in_tempdir(|dir| { - let db = Arc::new(CandyStore::open(dir, Config::default())?); - let typed = CandyTypedList::::new(db); - - let items = || { - typed - .iter("mylist") - .map(|res| res.unwrap().0) - .collect::>() - }; - - typed.set("mylist", &1, "xxx")?; - typed.set("mylist", &2, "xxx")?; - typed.set("mylist", &3, "xxx")?; - typed.set("mylist", &4, "xxx")?; - assert_eq!(items(), &[1, 2, 3, 4]); - - typed.set("mylist", &2, "yyy")?; - assert_eq!(items(), &[1, 2, 3, 4]); - - typed.set_promoting("mylist", &2, "zzz")?; - assert_eq!(items(), &[1, 3, 4, 2]); - - typed.set_promoting("mylist", &1, "zzz")?; - assert_eq!(items(), &[3, 4, 2, 1]); - - typed.set_promoting("mylist", &1, "zzz")?; - assert_eq!(items(), &[3, 4, 2, 1]); - - Ok(()) - }) -} - -#[test] -fn test_list_compaction() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open(dir, Config::default())?; - - for i in 0u32..1000 { - db.set_in_list("xxx", &i.to_le_bytes(), "yyy")?; - } - assert!(!db.compact_list_if_needed("xxx", ListCompactionParams::default())?); - - for i in 0u32..1000 { - if i % 3 == 1 { - assert!(db.remove_from_list("xxx", &i.to_le_bytes())?.is_some()); - } - } - - let keys1 = db - .iter_list("xxx") - .map(|res| u32::from_le_bytes(res.unwrap().0.try_into().unwrap())) - .collect::>(); - for k in keys1.iter() { - assert!(k % 3 != 1, "{k}"); - } - - assert!(db.compact_list_if_needed("xxx", ListCompactionParams::default())?); - - let keys2 = db - .iter_list("xxx") - .map(|res| u32::from_le_bytes(res.unwrap().0.try_into().unwrap())) - .collect::>(); - - assert_eq!(keys1, keys2); - - Ok(()) - }) -} - -#[test] -fn test_list_retain() -> Result<()> { - run_in_tempdir(|dir| { - let db = Arc::new(CandyStore::open(dir, Config::default())?); - - { - let mut dropped = 0; - - for i in 0u32..1000 { - db.set_in_list("xxx", &i.to_le_bytes(), "yyy")?; - } - for i in 0u32..1000 { - if i % 7 == 0 { - db.remove_from_list("xxx", &i.to_le_bytes())?; - dropped += 1; - } - } - - db.retain_in_list("xxx", |k, _v| { - let k2 = u32::from_le_bytes(k.try_into().unwrap()); - if k2 % 5 == 0 { - dropped += 1; - Ok(false) - } else { - Ok(true) // keep - } - })?; - - assert_eq!(db.list_len("xxx")?, 1000 - dropped); - - let mut found = vec![]; - for item in db.iter_list("xxx") { - let (k, v) = item?; - let k2 = u32::from_le_bytes(k.try_into().unwrap()); - assert_ne!(k2 % 7, 0); - assert_ne!(k2 % 5, 0); - assert_eq!(v, b"yyy"); - found.push(k2); - } - assert_eq!(found.len(), 1000 - dropped); - - db.retain_in_list("xxx", |_k, _v| Ok(false))?; - - assert_eq!(db.list_len("xxx")?, 0); - } - - { - let typed = CandyTypedList::::new(db); - - let mut dropped = 0; - - for i in 0u32..1000 { - typed.set("xxx", &i, &(i * 2))?; - } - for i in 0u32..1000 { - if i % 7 == 0 { - typed.remove("xxx", &i)?; - dropped += 1 - } - } - - typed.retain("xxx", |k, _v| { - if k % 5 == 0 { - dropped += 1; - Ok(false) - } else { - Ok(true) // keep - } - })?; - - assert_eq!(typed.len("xxx")?, 1000 - dropped); - - let mut found = vec![]; - for item in typed.iter("xxx") { - let (k, v) = item?; - assert_ne!(k % 7, 0); - assert_ne!(k % 5, 0); - assert_eq!(v, k * 2); - found.push(k); - } - assert_eq!(found.len(), 1000 - dropped); - } - - Ok(()) - }) -} diff --git a/tests/test_loading.rs b/tests/test_loading.rs deleted file mode 100644 index 5470601..0000000 --- a/tests/test_loading.rs +++ /dev/null @@ -1,72 +0,0 @@ -mod common; - -use candystore::{CandyStore, Config, Result}; - -use crate::common::{run_in_tempdir, LONG_VAL}; - -#[test] -fn test_loading() -> Result<()> { - run_in_tempdir(|dir| { - let config = Config { - max_shard_size: 20 * 1024, // use small files to force lots of splits and compactions - min_compaction_threashold: 10 * 1024, - ..Default::default() - }; - - { - let db = CandyStore::open(dir, config.clone())?; - - for i in 0..1000 { - db.set(&format!("unique key {i}"), LONG_VAL)?; - } - - assert!(db.stats().num_splits > 1); - assert_eq!(db.iter().count(), 1000); - } - - { - let db = CandyStore::open(dir, config.clone())?; - - assert_eq!(db.iter().count(), 1000); - - for res in db.iter() { - let (key, val) = res?; - assert_eq!(val, LONG_VAL.as_bytes()); - assert!(key.starts_with(b"unique key ")); - } - } - - { - let existing = std::fs::read_dir(dir)? - .map(|res| res.unwrap().file_name().to_str().unwrap().to_string()) - .filter(|name| name.starts_with("shard_")) - .collect::>(); - - std::fs::write(format!("{dir}/top_1234-5678"), "xxxx")?; - std::fs::write(format!("{dir}/bottom_1234-5678"), "xxxx")?; - - let (_, span) = existing[0].split_once("_").unwrap(); - let (start, end) = span.split_once("-").unwrap(); - let start = u32::from_str_radix(start, 16).unwrap(); - let end = u32::from_str_radix(end, 16).unwrap(); - let mid = (start + end) / 2; - std::fs::write(format!("{dir}/shard_{start:04x}-{mid:04x}"), "xxxx")?; - std::fs::write(format!("{dir}/shard_{mid:04x}-{end:04x}"), "xxxx")?; - - let db = CandyStore::open(dir, config)?; - - assert!(!std::fs::exists(format!("{dir}/top_1234-5678"))?); - assert!(!std::fs::exists(format!("{dir}/bottom_1234-5678"))?); - assert!(!std::fs::exists(format!( - "{dir}/shard_{start:04x}-{mid:04x}" - ))?); - assert!(!std::fs::exists(format!( - "{dir}/shard_{mid:04x}-{end:04x}" - ))?); - - assert_eq!(db.iter().count(), 1000); - } - - Ok(()) - }) -} diff --git a/tests/test_logic.rs b/tests/test_logic.rs deleted file mode 100644 index e2a7c19..0000000 --- a/tests/test_logic.rs +++ /dev/null @@ -1,149 +0,0 @@ -mod common; - -use std::collections::HashSet; - -use candystore::{CandyStore, Config, Result, MAX_VALUE_SIZE}; - -use crate::common::{run_in_tempdir, LONG_VAL}; - -#[test] -fn test_logic() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open( - dir, - Config { - max_shard_size: 20 * 1024, // use small files to force lots of splits and compactions - min_compaction_threashold: 10 * 1024, - ..Default::default() - }, - )?; - - assert!(db.get("my name")?.is_none()); - db.set("my_name", "inigo montoya")?; - db.set("your_name", "dread pirate robert")?; - - assert!(db.contains("my_name")?); - assert!(!db.contains("My NaMe")?); - - assert_eq!(db.get("my_name")?, Some("inigo montoya".into())); - assert_eq!(db.get("your_name")?, Some("dread pirate robert".into())); - db.set("your_name", "vizzini")?; - assert_eq!(db.get("your_name")?, Some("vizzini".into())); - assert_eq!(db.remove("my_name")?, Some("inigo montoya".into())); - assert!(db.remove("my_name")?.is_none()); - assert!(db.get("my name")?.is_none()); - - let stats = db.stats(); - assert_eq!(stats.num_entries(), 1); - assert_eq!(stats.num_compactions, 0); - assert_eq!(stats.num_splits, 0); - println!("{stats}"); - - for _ in 0..1000 { - db.set( - "a very long keyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy", - LONG_VAL, - )?; - assert!(db - .remove("a very long keyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy")? - .is_some()); - } - - let stats1 = db.stats(); - println!("{stats1}"); - assert_eq!(stats1.num_entries(), 1); - assert!(stats1.num_compactions >= 2); - assert_eq!(stats1.num_splits, 0); - - for i in 0..1000 { - db.set(&format!("unique key {i}"), LONG_VAL)?; - } - - let stats2 = db.stats(); - assert_eq!(stats2.num_entries(), 1001); - assert!(stats2.num_splits > stats1.num_splits); - - assert_eq!(db.get("your_name")?, Some("vizzini".into())); - db.clear()?; - assert_eq!(db.get("your_name")?, None); - - let stats3 = db.stats(); - assert_eq!(stats3.num_entries(), 0); - assert_eq!(stats3.num_compactions, 0); - assert_eq!(stats3.num_splits, 0); - - for i in 0..1000 { - db.set(&format!("unique key {i}"), LONG_VAL)?; - } - - let mut all_keys = HashSet::new(); - - for res in db.iter() { - let (key, val) = res?; - assert_eq!(val, LONG_VAL.as_bytes()); - assert!(key.starts_with(b"unique key ")); - all_keys.insert(key); - } - - assert_eq!(all_keys.len(), 1000); - - all_keys.clear(); - - let cookie = { - let mut iter1 = db.iter(); - for _ in 0..100 { - let res = iter1.next().unwrap(); - let (key, _) = res?; - all_keys.insert(key); - } - iter1.cookie() - }; - - for res in db.iter_from_cookie(cookie) { - let (key, _) = res?; - all_keys.insert(key); - } - - assert_eq!(all_keys.len(), 1000); - - let mut all_keys2 = HashSet::new(); - - for res in db.iter_keys() { - let key = res?; - all_keys2.insert(key); - } - - assert_eq!(all_keys, all_keys2); - - Ok(()) - }) -} - -#[test] -fn test_histogram() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open( - dir, - Config { - expected_number_of_keys: 100_000, // pre-split - ..Default::default() - }, - )?; - - db.set("k1", "bbb")?; - db.set("k2", &vec![b'b'; 100])?; - db.set("k3", &vec![b'b'; 500])?; - db.set("k4", &vec![b'b'; 5000])?; - db.set("k4", &vec![b'b'; 4500])?; - db.set("k5", &vec![b'b'; 50000])?; - db.set("kkkkkkkkkkkkkkk", &vec![b'b'; MAX_VALUE_SIZE])?; - - let stats = db.stats(); - assert_eq!(stats.entries_under_128, 2); - assert_eq!(stats.entries_under_1k, 1); - assert_eq!(stats.entries_under_8k, 2); - assert_eq!(stats.entries_over_32k, 2); - - Ok(()) - }) -} diff --git a/tests/test_merge.rs b/tests/test_merge.rs deleted file mode 100644 index 0b3ca11..0000000 --- a/tests/test_merge.rs +++ /dev/null @@ -1,87 +0,0 @@ -mod common; - -use candystore::{CandyStore, Config, Result}; - -use crate::common::run_in_tempdir; - -#[test] -fn test_merge() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open(dir, Config::default())?; - - for i in 0u32..100_000 { - db.set(&i.to_le_bytes(), "val")?; - } - assert_eq!(db.stats().num_entries(), 100_000); - assert_eq!(db.stats().num_shards, 4); - - for i in 0u32..100_000 { - if i % 16 != 0 { - db.remove(&i.to_le_bytes())?.unwrap(); - } - } - assert_eq!(db.stats().num_entries(), 6250); - db.merge_small_shards(0.25)?; - assert_eq!(db.stats().num_shards, 1); - - for i in 0u32..100_000 { - db.set(&i.to_le_bytes(), "val")?; - } - assert_eq!(db.stats().num_entries(), 100_000); - assert_eq!(db.stats().num_shards, 4); - for i in 0u32..100_000 { - if i % 4 != 0 { - db.remove(&i.to_le_bytes())?.unwrap(); - } - } - assert_eq!(db.stats().num_entries(), 25_000); - db.merge_small_shards(0.25)?; - assert_eq!(db.stats().num_shards, 2); - - for i in 0u32..100_000 { - if (i % 4 == 0) && (i % 16 != 0) { - db.remove(&i.to_le_bytes())?.unwrap(); - } - } - assert_eq!(db.stats().num_entries(), 6250); - db.merge_small_shards(0.25)?; - assert_eq!(db.stats().num_shards, 1); - - Ok(()) - }) -} - -#[test] -fn test_merge_with_expected_num_keys() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open( - dir, - Config { - expected_number_of_keys: 200_000, - ..Default::default() - }, - )?; - - assert_eq!(db.stats().num_entries(), 0); - assert_eq!(db.stats().num_shards, 8); - db.merge_small_shards(0.25)?; - assert_eq!(db.stats().num_shards, 8); - - for i in 0u32..900_000 { - db.set(&i.to_le_bytes(), "val")?; - } - assert_eq!(db.stats().num_entries(), 900_000); - assert_eq!(db.stats().num_shards, 32); - - for i in 0u32..900_000 { - if i % 16 != 0 { - db.remove(&i.to_le_bytes())?.unwrap(); - } - } - assert_eq!(db.stats().num_entries(), 56250); - db.merge_small_shards(0.25)?; - assert_eq!(db.stats().num_shards, 8); - - Ok(()) - }) -} diff --git a/tests/test_multithreading.rs b/tests/test_multithreading.rs deleted file mode 100644 index 9e8bd2d..0000000 --- a/tests/test_multithreading.rs +++ /dev/null @@ -1,75 +0,0 @@ -mod common; - -use std::sync::{atomic::AtomicUsize, Arc}; - -use candystore::{CandyStore, Config, Result}; -use rand::random; - -use crate::common::run_in_tempdir; - -#[test] -fn test_multithreaded() -> Result<()> { - run_in_tempdir(|dir| { - for attempt in 0..10 { - let db = Arc::new(CandyStore::open( - dir, - Config { - max_shard_size: 20 * 1024, - min_compaction_threashold: 10 * 1024, - ..Default::default() - }, - )?); - - const NUM_ITEMS: usize = 10_000; - let succ_gets = Arc::new(AtomicUsize::new(0)); - let succ_removals = Arc::new(AtomicUsize::new(0)); - - let mut thds = Vec::new(); - for thid in 0..50 { - let db = db.clone(); - let succ_gets = succ_gets.clone(); - let succ_removals = succ_removals.clone(); - let handle = std::thread::spawn(move || -> Result<()> { - let value = format!("data{thid}"); - for i in 0..NUM_ITEMS { - let key = format!("key{i}"); - db.set(&key, &value)?; - - if random::() > 0.8 { - if db.remove(&key)?.is_some() { - succ_removals.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - } - } else { - let val2 = db.get(&key)?; - if let Some(val2) = val2 { - assert!(val2.starts_with(b"data")); - succ_gets.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - } - } - } - Ok(()) - }); - //handle.join().unwrap().unwrap(); - thds.push(handle); - } - - for thd in thds { - thd.join().unwrap()?; - } - - let gets = succ_gets.load(std::sync::atomic::Ordering::SeqCst); - let removals = succ_removals.load(std::sync::atomic::Ordering::SeqCst); - - let stats = db.stats(); - println!("[{attempt}] gets={gets} removals={removals} stats={stats}",); - - assert_eq!(db.iter().count(), db.stats().num_entries()); - assert!( - stats.num_entries() >= (NUM_ITEMS * 7) / 10 - && stats.num_entries() <= (NUM_ITEMS * 9) / 10 - ); - db.clear()?; - } - Ok(()) - }) -} diff --git a/tests/test_pre_split.rs b/tests/test_pre_split.rs deleted file mode 100644 index 38afe81..0000000 --- a/tests/test_pre_split.rs +++ /dev/null @@ -1,181 +0,0 @@ -mod common; - -use candystore::{CandyError, CandyStore, Config, Result}; - -use crate::common::run_in_tempdir; - -#[test] -fn test_pre_split() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open( - dir, - Config { - max_shard_size: 20 * 1024, // use small files to force lots of splits and compactions - min_compaction_threashold: 10 * 1024, - expected_number_of_keys: 1_000_000, - ..Default::default() - }, - )?; - - db.set("aaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")?; - - let files = std::fs::read_dir(&dir)? - .map(|res| res.unwrap().file_name().to_string_lossy().to_string()) - .filter(|filename| filename.starts_with("shard_")) - .collect::>(); - - assert_eq!(files.len(), 64); - - let stats = db.stats(); - assert_eq!(stats.num_shards, 64); - assert_eq!(stats.num_inserts, 1); - assert_eq!(stats.wasted_bytes, 0); - - db.set("bbb", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")?; - - let stats = db.stats(); - assert_eq!(stats.num_inserts, 2); - assert_eq!(stats.wasted_bytes, 0); - - db.set("aaa", "xxx")?; - - let stats = db.stats(); - assert_eq!(stats.num_inserts, 2); - - // test accounting, it's a bit of an implementation detail, but we have to account for the - // namespace byte as well - assert_eq!( - stats.wasted_bytes, - "aaa?".len() + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".len() - ); - - db.remove("aaa")?; - let stats = db.stats(); - assert_eq!(stats.num_inserts, 2); - assert_eq!(stats.num_removals, 1); - assert_eq!( - stats.wasted_bytes, - "aaa?".len() - + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".len() - + "aaa?".len() - + "xxx".len() - ); - - Ok(()) - }) -} - -#[test] -fn test_compaction() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open( - dir, - Config { - max_shard_size: 1000, - min_compaction_threashold: 900, - ..Default::default() - }, - )?; - - // fill the shard to the brim, creating waste - for i in 0..10 { - db.set("aaa", &format!("11112222333344445555666677778888999900001111222233334444555566667777888899990000111122223333444{:x}", i))?; - - let stats = db.stats(); - assert_eq!(stats.num_inserts, 1, "i={i}"); - assert_eq!(stats.occupied_bytes, 100 * (i + 1), "i={i}"); - assert_eq!(stats.wasted_bytes, 100 * i, "i={i}"); - } - - assert_eq!(db.stats().num_compactions, 0); - - // insert a new entry, which will cause a compaction - db.set("bbb", "x")?; - assert_eq!(db.stats().num_compactions, 1); - - let stats = db.stats(); - assert_eq!(stats.occupied_bytes, 100 + "bbb?".len() + "x".len()); - assert_eq!(stats.wasted_bytes, 0); - - Ok(()) - }) -} - -#[test] -fn test_too_large() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open( - dir, - Config { - max_shard_size: 1000, - min_compaction_threashold: 1000, - ..Default::default() - }, - )?; - - assert!(matches!( - db.set("yyy", &vec![7u8; 1000]) - .unwrap_err() - .downcast::() - .unwrap(), - CandyError::EntryCannotFitInShard(_, _) - )); - - db.set("yyy", &vec![7u8; 700])?; - let stats = db.stats(); - assert_eq!(stats.num_splits, 0); - assert_eq!(stats.num_compactions, 0); - - db.set("zzz", &vec![7u8; 700])?; - let stats = db.stats(); - assert_eq!(stats.num_compactions, 0); - assert_eq!(stats.num_splits, 1); - - Ok(()) - }) -} - -#[test] -fn test_compaction_stats() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open( - dir, - Config { - max_shard_size: 20_000, - min_compaction_threashold: 10_000, - ..Default::default() - }, - )?; - - let stats1 = db.stats(); - assert!(stats1.last_compaction_stats.is_empty()); - assert!(stats1.last_split_stats.is_empty()); - - for i in 1..500 { - db.set(&format!("key{i}"), &format!("val{i:0200}"))?; - } - - let stats2 = db.stats(); - println!("stats2={stats2:?}"); - assert!(stats2.last_compaction_stats.is_empty()); - assert!(stats2.last_split_stats.len() > 0); - - for i in 500..10000 { - db.set("key", &format!("val{i:0200}"))?; - } - - let stats3 = db.stats(); - println!("{stats3:?}"); - assert!(stats3.last_compaction_stats.len() > 1); - - for _ in 0..1000 { - assert!(db.get("key")?.is_some()); - } - - let stats4 = db.stats(); - assert!(stats4.last_compaction_stats.is_empty()); - assert!(stats4.last_split_stats.is_empty()); - - Ok(()) - }) -} diff --git a/tests/test_queues.rs b/tests/test_queues.rs deleted file mode 100644 index 40fbac0..0000000 --- a/tests/test_queues.rs +++ /dev/null @@ -1,109 +0,0 @@ -mod common; - -use candystore::{CandyStore, Config, Result}; - -use crate::common::run_in_tempdir; - -#[test] -fn test_queues() -> Result<()> { - run_in_tempdir(|dir| { - let db = CandyStore::open(dir, Config::default())?; - - db.push_to_queue_tail("work", "item1")?; - db.push_to_queue_tail("work", "item2")?; - db.push_to_queue_tail("work", "item3")?; - - assert_eq!(db.peek_queue_head("work")?, Some("item1".into())); - assert_eq!(db.peek_queue_tail("work")?, Some("item3".into())); - - assert_eq!(db.pop_queue_head("work")?, Some("item1".into())); - assert_eq!(db.pop_queue_head("work")?, Some("item2".into())); - assert_eq!(db.pop_queue_head("work")?, Some("item3".into())); - assert_eq!(db.pop_queue_head("work")?, None); - - db.push_to_queue_head("rev", "item1")?; - db.push_to_queue_head("rev", "item2")?; - db.push_to_queue_head("rev", "item3")?; - assert_eq!(db.pop_queue_tail("rev")?, Some("item1".into())); - assert_eq!(db.pop_queue_tail("rev")?, Some("item2".into())); - assert_eq!(db.pop_queue_tail("rev")?, Some("item3".into())); - assert_eq!(db.pop_queue_tail("rev")?, None); - - assert_eq!(db.queue_len("work")?, 0); - - for i in 1000u32..2000 { - db.push_to_queue_tail("work", &i.to_le_bytes())?; - } - assert_eq!(db.queue_len("work")?, 1000); - assert_eq!(db.queue_len("joke")?, 0); - - for (i, res) in db.iter_queue("work").enumerate() { - let (idx, val) = res?; - let v = u32::from_le_bytes(val.try_into().unwrap()); - assert_eq!(v, 1000 + i as u32); - - // create some holes - if v % 5 == 0 { - assert!(db.remove_from_queue("work", idx)?.is_some()); - } - } - - let mut count = 0; - for res in db.iter_queue("work") { - let (_, val) = res?; - let v = u32::from_le_bytes(val.try_into().unwrap()); - assert_ne!(v % 5, 0); - count += 1; - } - assert!(count == 800); - - let mut count2 = 0; - while let Some(val) = db.pop_queue_head("work")? { - let v = u32::from_le_bytes(val.try_into().unwrap()); - assert_ne!(v % 5, 0); - count2 += 1; - if count2 > 400 { - break; - } - } - while let Some(val) = db.pop_queue_tail("work")? { - let v = u32::from_le_bytes(val.try_into().unwrap()); - assert_ne!(v % 5, 0); - count2 += 1; - } - - assert_eq!(count, count2); - assert_eq!(db.queue_len("work")?, 0); - - db.push_to_queue_tail("work", "item1")?; - db.push_to_queue_tail("work", "item2")?; - db.push_to_queue_tail("work", "item3")?; - assert_eq!(db.queue_len("work")?, 3); - db.extend_queue("work", ["item4", "item5"].iter())?; - assert_eq!(db.queue_len("work")?, 5); - - let items = db - .iter_queue("work") - .map(|res| std::str::from_utf8(&res.unwrap().1).unwrap().to_owned()) - .collect::>(); - assert_eq!(items, ["item1", "item2", "item3", "item4", "item5"]); - - db.discard_queue("work")?; - assert_eq!(db.queue_len("work")?, 0); - - db.extend_queue("work", (1u32..10).map(|i| i.to_le_bytes()))?; - let items = db - .iter_queue("work") - .map(|res| u32::from_le_bytes(res.unwrap().1.try_into().unwrap())) - .collect::>(); - assert_eq!(items, (1u32..10).collect::>()); - - let items = db - .iter_queue_backwards("work") - .map(|res| u32::from_le_bytes(res.unwrap().1.try_into().unwrap())) - .collect::>(); - assert_eq!(items, (1u32..10).rev().collect::>()); - - Ok(()) - }) -} diff --git a/tests/test_typed.rs b/tests/test_typed.rs deleted file mode 100644 index b9ffa1a..0000000 --- a/tests/test_typed.rs +++ /dev/null @@ -1,113 +0,0 @@ -mod common; - -use std::sync::Arc; - -use candystore::{CandyStore, CandyTypedKey, CandyTypedStore, Config, Result}; - -use crate::common::run_in_tempdir; - -use databuf::{Decode, Encode}; - -#[derive(Debug, Encode, Decode)] -struct MyKey { - x: u32, - y: u64, - z: String, -} - -impl CandyTypedKey for MyKey { - const TYPE_ID: u32 = 0x3476a551; -} - -#[derive(Debug, PartialEq, Eq, Encode, Decode)] -struct MyVal { - a: [u8; 7], - b: i16, - c: String, -} - -#[test] -fn test_typed() -> Result<()> { - run_in_tempdir(|dir| { - let db = Arc::new(CandyStore::open(dir, Config::default())?); - - let typed = CandyTypedStore::::new(db.clone()); - typed.set( - &MyKey { - x: 12, - y: 34, - z: "hello".into(), - }, - &MyVal { - a: [7, 7, 7, 7, 7, 7, 7], - b: 31415, - c: "world".into(), - }, - )?; - - assert_eq!( - typed - .get(&MyKey { - x: 12, - y: 34, - z: "hello".into(), - }) - .unwrap(), - Some(MyVal { - a: [7, 7, 7, 7, 7, 7, 7], - b: 31415, - c: "world".into() - }) - ); - - assert_eq!( - typed - .get(&MyKey { - x: 12, - y: 34, - z: "ola".into(), - }) - .unwrap(), - None - ); - - assert_eq!( - typed - .remove(&MyKey { - x: 12, - y: 34, - z: "hello".into(), - }) - .unwrap(), - Some(MyVal { - a: [7, 7, 7, 7, 7, 7, 7], - b: 31415, - c: "world".into() - }) - ); - - assert_eq!( - typed - .get(&MyKey { - x: 12, - y: 34, - z: "hello".into(), - }) - .unwrap(), - None - ); - - // two typed-stores can co-exist on the same underlying store - let typed2 = CandyTypedStore::>::new(db); - typed2.set("hello", &vec![1, 2, 3])?; - typed2.set("world", &vec![4, 5, 6, 7])?; - - assert_eq!(typed2.get("hello").unwrap(), Some(vec![1, 2, 3])); - assert_eq!(typed2.get("world").unwrap(), Some(vec![4, 5, 6, 7])); - - assert_eq!(typed2.remove("hello").unwrap(), Some(vec![1, 2, 3])); - assert_eq!(typed2.remove("hello").unwrap(), None); - - Ok(()) - }) -} diff --git a/tests/typed_list.rs b/tests/typed_list.rs new file mode 100644 index 0000000..74f73dc --- /dev/null +++ b/tests/typed_list.rs @@ -0,0 +1,167 @@ +use std::sync::Arc; + +use candystore::{CandyStore, CandyTypedList, Config, ListCompactionParams, Result}; +use tempfile::TempDir; + +#[test] +fn test_typed_list_iter_rev() -> Result<()> { + let temp_dir = TempDir::new().unwrap(); + let store = Arc::new(CandyStore::open(temp_dir.path(), Config::default())?); + let list = CandyTypedList::::new(Arc::clone(&store)); + + let lkey = 42u32; + list.set(&lkey, &1u32, &"a".to_string())?; + list.set(&lkey, &2u32, &"b".to_string())?; + list.set(&lkey, &3u32, &"c".to_string())?; + + assert_eq!(list.remove(&lkey, &2u32)?, Some("b".to_string())); + + let fwd: Vec<_> = list.iter(&lkey).map(|r| r.unwrap()).collect(); + assert_eq!(fwd, vec![(1u32, "a".to_string()), (3u32, "c".to_string())]); + + let rev: Vec<_> = list.iter(&lkey).rev().map(|r| r.unwrap()).collect(); + assert_eq!(rev, vec![(3u32, "c".to_string()), (1u32, "a".to_string())]); + + Ok(()) +} + +#[test] +fn test_typed_list_admin_ops() -> Result<()> { + let temp_dir = TempDir::new().unwrap(); + let store = Arc::new(CandyStore::open(temp_dir.path(), Config::default())?); + let list = CandyTypedList::::new(Arc::clone(&store)); + + let lkey = 9u32; + list.set(&lkey, &1u32, &"a".to_string())?; + list.set(&lkey, &2u32, &"b".to_string())?; + list.set(&lkey, &3u32, &"c".to_string())?; + assert_eq!(list.len(&lkey)?, 3); + + assert_eq!( + list.set_promoting(&lkey, &2u32, &"b".to_string())?, + Some("b".to_string()) + ); + let order: Vec<_> = list.iter(&lkey).map(|r| r.unwrap()).collect(); + assert_eq!( + order, + vec![ + (1u32, "a".to_string()), + (3u32, "c".to_string()), + (2u32, "b".to_string()) + ] + ); + + assert_eq!( + list.set_promoting(&lkey, &3u32, &"cc".to_string())?, + Some("c".to_string()) + ); + let order: Vec<_> = list.iter(&lkey).map(|r| r.unwrap()).collect(); + assert_eq!( + order, + vec![ + (1u32, "a".to_string()), + (2u32, "b".to_string()), + (3u32, "cc".to_string()) + ] + ); + + assert_eq!(list.set_promoting(&lkey, &4u32, &"d".to_string())?, None); + let order: Vec<_> = list.iter(&lkey).map(|r| r.unwrap()).collect(); + assert_eq!( + order, + vec![ + (1u32, "a".to_string()), + (2u32, "b".to_string()), + (3u32, "cc".to_string()), + (4u32, "d".to_string()) + ] + ); + + let existing = list.remove(&lkey, &1u32)?.unwrap(); + list.set(&lkey, &1u32, &existing)?; + let order: Vec<_> = list.iter(&lkey).map(|r| r.unwrap()).collect(); + assert_eq!( + order, + vec![ + (2u32, "b".to_string()), + (3u32, "cc".to_string()), + (4u32, "d".to_string()), + (1u32, "a".to_string()) + ] + ); + + assert_eq!(list.remove(&lkey, &3u32)?, Some("cc".to_string())); + let range_before = list.range(&lkey)?; + assert!(!range_before.is_empty()); + assert_eq!(list.len(&lkey)?, 3); + + let _ = list.compact_if_needed( + &lkey, + ListCompactionParams { + min_length: 0, + min_holes_ratio: 0.0, + }, + )?; + + let order: Vec<_> = list.iter(&lkey).map(|r| r.unwrap()).collect(); + assert_eq!( + order, + vec![ + (2u32, "b".to_string()), + (4u32, "d".to_string()), + (1u32, "a".to_string()) + ] + ); + + assert!(list.discard(&lkey)?); + assert!(list.is_empty(&lkey)?); + assert!(list.range(&lkey)?.is_empty()); + Ok(()) +} + +#[test] +fn test_typed_list_retain() -> Result<()> { + let temp_dir = TempDir::new().unwrap(); + let store = Arc::new(CandyStore::open(temp_dir.path(), Config::default())?); + let list = CandyTypedList::::new(Arc::clone(&store)); + + let lkey = 1u32; + list.set(&lkey, &10u32, &"ten".to_string())?; + list.set(&lkey, &20u32, &"twenty".to_string())?; + list.set(&lkey, &30u32, &"thirty".to_string())?; + list.set(&lkey, &40u32, &"forty".to_string())?; + + list.retain(&lkey, |k, _| Ok(*k % 20 == 0))?; + let items: Vec<_> = list.iter(&lkey).map(|r| r.unwrap().0).collect(); + assert_eq!(items, vec![20u32, 40u32]); + + list.retain(&lkey, |_, v| Ok(v.starts_with('f')))?; + let items: Vec<_> = list.iter(&lkey).map(|r| r.unwrap().0).collect(); + assert_eq!(items, vec![40u32]); + assert_eq!(list.range(&lkey)?.len(), list.len(&lkey)?); + + Ok(()) +} + +#[test] +fn test_typed_list_pop_peek() -> Result<()> { + let temp_dir = TempDir::new().unwrap(); + let store = Arc::new(CandyStore::open(temp_dir.path(), Config::default())?); + let list = CandyTypedList::::new(Arc::clone(&store)); + + let lkey = 100u32; + list.set(&lkey, &1u32, &"one".to_string())?; + list.set(&lkey, &2u32, &"two".to_string())?; + + assert_eq!(list.peek_head(&lkey)?, Some((1u32, "one".to_string()))); + assert_eq!(list.peek_tail(&lkey)?, Some((2u32, "two".to_string()))); + + assert_eq!(list.pop_head(&lkey)?, Some((1u32, "one".to_string()))); + assert_eq!(list.len(&lkey)?, 1); + + assert_eq!(list.pop_tail(&lkey)?, Some((2u32, "two".to_string()))); + assert_eq!(list.len(&lkey)?, 0); + assert!(list.pop_head(&lkey)?.is_none()); + + Ok(()) +} diff --git a/tests/typed_queue.rs b/tests/typed_queue.rs new file mode 100644 index 0000000..6fa5ef1 --- /dev/null +++ b/tests/typed_queue.rs @@ -0,0 +1,63 @@ +use std::sync::Arc; + +use candystore::{CandyStore, CandyTypedDeque, Config, Result}; +use tempfile::TempDir; + +#[test] +fn test_typed_queue_iter_rev() -> Result<()> { + let temp_dir = TempDir::new().unwrap(); + let store = Arc::new(CandyStore::open(temp_dir.path(), Config::default())?); + let queue = CandyTypedDeque::::new(Arc::clone(&store)); + + let qkey = 7u32; + queue.push_tail(&qkey, &10u32)?; + queue.push_tail(&qkey, &20u32)?; + queue.push_tail(&qkey, &30u32)?; + + let fwd: Vec<_> = queue.iter(&qkey).map(|r| r.unwrap().1).collect(); + assert_eq!(fwd, vec![10, 20, 30]); + + let rev: Vec<_> = queue.iter(&qkey).rev().map(|r| r.unwrap().1).collect(); + assert_eq!(rev, vec![30, 20, 10]); + + Ok(()) +} + +#[test] +fn test_typed_queue_discard() -> Result<()> { + let temp_dir = TempDir::new().unwrap(); + let store = Arc::new(CandyStore::open(temp_dir.path(), Config::default())?); + let queue = CandyTypedDeque::::new(Arc::clone(&store)); + + let qkey = 5u32; + queue.push_tail(&qkey, &1u32)?; + queue.push_tail(&qkey, &2u32)?; + assert_eq!(queue.len(&qkey)?, 2); + assert_eq!( + queue.range(&qkey)?, + 9223372036854775808usize..9223372036854775810usize + ); + + assert!(queue.discard(&qkey)?); + assert_eq!(queue.len(&qkey)?, 0); + assert!(queue.pop_head(&qkey)?.is_none()); + assert!(queue.is_empty(&qkey)?); + Ok(()) +} + +#[test] +fn test_typed_queue_empty_push_head_has_simple_range_semantics() -> Result<()> { + let temp_dir = TempDir::new().unwrap(); + let store = Arc::new(CandyStore::open(temp_dir.path(), Config::default())?); + let queue = CandyTypedDeque::::new(Arc::clone(&store)); + + let qkey = 11u32; + assert!(queue.range(&qkey)?.is_empty()); + + queue.push_head(&qkey, &99u32)?; + assert_eq!(queue.peek_head(&qkey)?, Some(99u32)); + assert_eq!(queue.peek_tail(&qkey)?, Some(99u32)); + assert_eq!(queue.len(&qkey)?, 1); + + Ok(()) +} diff --git a/tests/typed_store.rs b/tests/typed_store.rs new file mode 100644 index 0000000..ac0bba5 --- /dev/null +++ b/tests/typed_store.rs @@ -0,0 +1,76 @@ +use std::sync::Arc; + +use candystore::{CandyStore, CandyTypedStore, Config, MAX_USER_VALUE_SIZE, Result}; +use tempfile::TempDir; + +#[test] +fn test_typed_kv_store() -> Result<()> { + let temp_dir = TempDir::new().unwrap(); + let store = Arc::new(CandyStore::open(temp_dir.path(), Config::default())?); + let kv = CandyTypedStore::::new(Arc::clone(&store)); + + assert!(!kv.contains(&1u32)?); + assert!(kv.set(&1u32, &"one".to_string())?.is_none()); + assert!(kv.contains(&1u32)?); + assert_eq!(kv.get(&1u32)?, Some("one".to_string())); + assert_eq!(kv.set(&1u32, &"uno".to_string())?, Some("one".to_string())); + assert_eq!(kv.remove(&1u32)?, Some("uno".to_string())); + assert!(kv.get(&1u32)?.is_none()); + assert!(!kv.contains(&1u32)?); + Ok(()) +} + +#[test] +fn test_typed_atomic_ops() -> Result<()> { + let temp_dir = TempDir::new().unwrap(); + let store = Arc::new(CandyStore::open(temp_dir.path(), Config::default())?); + let kv = CandyTypedStore::::new(Arc::clone(&store)); + + assert_eq!( + kv.get_or_create(&7u32, &"seven".to_string())?, + "seven".to_string() + ); + assert_eq!( + kv.get_or_create(&7u32, &"changed".to_string())?, + "seven".to_string() + ); + assert_eq!(kv.get(&7u32)?, Some("seven".to_string())); + assert!( + kv.replace(&8u32, &"nope".to_string(), None::<&String>)? + .is_none() + ); + assert!(kv.get(&8u32)?.is_none()); + assert!( + kv.replace(&7u32, &"nope".to_string(), Some(&"wrong".to_string()))? + .is_none() + ); + assert_eq!(kv.get(&7u32)?, Some("seven".to_string())); + assert_eq!( + kv.replace(&7u32, &"new".to_string(), None::<&String>)?, + Some("seven".to_string()) + ); + assert_eq!(kv.get(&7u32)?, Some("new".to_string())); + + Ok(()) +} + +#[test] +fn test_typed_big_value_round_trip() -> Result<()> { + let temp_dir = TempDir::new().unwrap(); + let store = Arc::new(CandyStore::open(temp_dir.path(), Config::default())?); + let kv = CandyTypedStore::>::new(Arc::clone(&store)); + + let key = 3u32; + let big1 = vec![1u8; MAX_USER_VALUE_SIZE + 123]; + let big2 = vec![2u8; MAX_USER_VALUE_SIZE * 2 + 17]; + + assert!(!kv.set_big(&key, &big1)?); + assert_eq!(kv.get_big(&key)?, Some(big1.clone())); + assert!(kv.set_big(&key, &big2)?); + assert_eq!(kv.get_big(&key)?, Some(big2.clone())); + assert!(kv.remove_big(&key)?); + assert!(kv.get_big(&key)?.is_none()); + assert!(!kv.remove_big(&key)?); + + Ok(()) +} From b9a19ed008ae0ced39b79e213aef1b6acf9b583d Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Wed, 25 Mar 2026 15:13:42 +0200 Subject: [PATCH 02/25] CI fixes --- .github/workflows/ci.yml | 58 +++++++++++++++++++++++++++++----------- tests/compaction.rs | 46 +++++++++++++++++++++---------- 2 files changed, 75 insertions(+), 29 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ab206b2..dec8bf7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,12 +2,15 @@ name: CI on: push: - branches: [main] pull_request: - branches: [main] + workflow_dispatch: + +permissions: + contents: read env: CARGO_TERM_COLOR: always + RUST_BACKTRACE: "1" RUSTFLAGS: "-D warnings" jobs: @@ -15,7 +18,12 @@ jobs: name: Rustfmt runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 + - name: Print runner info + run: | + echo "cores: $(nproc)" + echo "cpu model: $(lscpu | sed -n 's/^Model name:[[:space:]]*//p' | head -n 1)" + echo "ram: $(free -h | awk '/Mem:/ {print $2}')" - run: rustup component add rustfmt - run: cargo fmt -- --check @@ -23,8 +31,13 @@ jobs: name: Clippy runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: actions/cache@v4 + - uses: actions/checkout@v6 + - name: Print runner info + run: | + echo "cores: $(nproc)" + echo "cpu model: $(lscpu | sed -n 's/^Model name:[[:space:]]*//p' | head -n 1)" + echo "ram: $(free -h | awk '/Mem:/ {print $2}')" + - uses: actions/cache@v5 with: path: | ~/.cargo/registry @@ -41,8 +54,13 @@ jobs: matrix: profile: [debug, release] steps: - - uses: actions/checkout@v4 - - uses: actions/cache@v4 + - uses: actions/checkout@v6 + - name: Print runner info + run: | + echo "cores: $(nproc)" + echo "cpu model: $(lscpu | sed -n 's/^Model name:[[:space:]]*//p' | head -n 1)" + echo "ram: $(free -h | awk '/Mem:/ {print $2}')" + - uses: actions/cache@v5 with: path: | ~/.cargo/registry @@ -52,9 +70,9 @@ jobs: - name: Run tests run: | if [ "${{ matrix.profile }}" = "release" ]; then - cargo test --release + cargo test --release -- --test-threads=1 else - cargo test + cargo test -- --test-threads=1 fi - name: Run examples run: | @@ -63,7 +81,8 @@ jobs: cargo run --example atomics cargo run --example lists cargo run --example typed - cargo run --release --example perf + - name: Run perf + run: cargo run --release --example perf test-windows: name: Test (Windows, ${{ matrix.profile }}) @@ -72,8 +91,16 @@ jobs: matrix: profile: [debug, release] steps: - - uses: actions/checkout@v4 - - uses: actions/cache@v4 + - uses: actions/checkout@v6 + - name: Print runner info + shell: pwsh + run: | + $cpu = Get-CimInstance Win32_Processor | Select-Object -First 1 + $computer = Get-CimInstance Win32_ComputerSystem + Write-Host "cores: $($cpu.NumberOfCores)" + Write-Host "cpu model: $($cpu.Name.Trim())" + Write-Host "ram: $([math]::Round($computer.TotalPhysicalMemory / 1GB, 2)) GB" + - uses: actions/cache@v5 with: path: | ~/.cargo/registry @@ -83,9 +110,9 @@ jobs: - name: Run tests run: | if ("${{ matrix.profile }}" -eq "release") { - cargo test --release + cargo test --release -- --test-threads=1 } else { - cargo test + cargo test -- --test-threads=1 } - name: Run examples run: | @@ -94,4 +121,5 @@ jobs: cargo run --example atomics cargo run --example lists cargo run --example typed - cargo run --release --example perf + - name: Run perf + run: cargo run --release --example perf diff --git a/tests/compaction.rs b/tests/compaction.rs index d8a3c7d..0819154 100644 --- a/tests/compaction.rs +++ b/tests/compaction.rs @@ -149,26 +149,34 @@ fn test_background_compaction_after_reopen_without_writes() -> Result<(), Error> fn test_background_compaction_drains_large_backlog() -> Result<(), Error> { let dir = tempdir().unwrap(); - let config = Config { - max_data_file_size: 256, - compaction_min_threshold: 128, + let write_config = Config { + // Keep one value per file while leaving each stale file below the setup-time + // compaction threshold so backlog creation does not race the background worker. + max_data_file_size: 200, + compaction_min_threshold: 160, + ..Config::default() + }; + + let compact_config = Config { + max_data_file_size: write_config.max_data_file_size, + compaction_min_threshold: 64, ..Config::default() }; + const NUM_KEYS: usize = 64; + { - let db = CandyStore::open(dir.path(), config)?; + let db = CandyStore::open(dir.path(), write_config)?; - for i in 0..180 { + for i in 0..NUM_KEYS { db.set(format!("key{i:04}"), vec![b'a'; 96])?; } - for i in 0..180 { + for i in 0..NUM_KEYS { assert_eq!(db.remove(format!("key{i:04}"))?, Some(vec![b'a'; 96])); } } - let db = CandyStore::open(dir.path(), config)?; - let count_data_files = || -> usize { std::fs::read_dir(dir.path()) .unwrap() @@ -181,26 +189,36 @@ fn test_background_compaction_drains_large_backlog() -> Result<(), Error> { .count() }; + let setup_files = count_data_files(); + assert!( + setup_files >= NUM_KEYS, + "expected backlog setup to create many stale files: {setup_files}" + ); + + let db = CandyStore::open(dir.path(), compact_config)?; + let files_before = count_data_files(); assert!( - files_before > 17, - "expected a large stale-file backlog before compaction starts: {files_before}" + files_before >= setup_files.saturating_sub(2), + "expected reopen to begin with nearly the full stale-file backlog: setup={setup_files}, before={files_before}" ); + let min_expected_drained = (setup_files / 2).max(8); + for _ in 0..300 { std::thread::sleep(std::time::Duration::from_millis(10)); - if count_data_files() <= files_before.saturating_sub(17) { + if count_data_files() + min_expected_drained <= files_before { break; } } let files_after = count_data_files(); assert!( - files_after <= files_before.saturating_sub(17), - "compaction worker should drain a large backlog after being woken: before={files_before}, after={files_after}" + files_after + min_expected_drained <= files_before, + "compaction worker should drain a large backlog after being woken: before={files_before}, after={files_after}, expected_drain={min_expected_drained}" ); - for i in 0..180 { + for i in 0..NUM_KEYS { assert_eq!( db.get(format!("key{i:04}"))?, None, From 6ad6742d14ba33ddcd710da55e8da86464e2e580 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Wed, 25 Mar 2026 18:28:39 +0200 Subject: [PATCH 03/25] Add stats backwards-compatibility and make db.clear operate without &mut self --- README.md | 1 + src/index_file.rs | 13 +- src/store.rs | 290 +++++++++++++++++++++++++++++++++------- src/store/compaction.rs | 103 ++++++++------ src/store/open.rs | 77 +++++++---- src/store/recovery.rs | 3 +- src/types.rs | 13 ++ tests/maintenance.rs | 2 +- tests/metrics.rs | 6 + tests/recovery.rs | 29 ++++ tests/shrink.rs | 2 +- 11 files changed, 412 insertions(+), 127 deletions(-) diff --git a/README.md b/README.md index 35ed843..e55e87d 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,7 @@ If the store is reopened while dirty, behavior depends on `Config::rebuild_strat - `ResetDBIfDirty`: clear the directory and recreate an empty store - `TrustDirtyIndexIfChecksumCorrectOrFail`: accept the dirty index only if row checksums match - `TrustDirtyIndexIfChecksumCorrectOrRebuild`: trust valid checksums, otherwise rebuild +- `TrustDirtyIndexIfChecksumCorrectOrReset`: trust valid checksums, otherwise reset the database ## Operational Notes diff --git a/src/index_file.rs b/src/index_file.rs index 7b10524..7da0083 100644 --- a/src/index_file.rs +++ b/src/index_file.rs @@ -12,6 +12,7 @@ use std::{ Arc, atomic::{AtomicU32, AtomicU64, Ordering}, }, + time::{Duration, Instant}, }; use crate::internal::{ @@ -592,15 +593,17 @@ impl IndexFile { self.full_header_ref().waste_levels[file_idx as usize].swap(0, Ordering::Relaxed) } - pub(crate) fn grow(&self, nsl: u64) -> Result<()> { + pub(crate) fn grow(&self, nsl: u64) -> Result> { let mut layout_mut = self.rows_table_mut(); let gsl = self.header_ref().global_split_level.load(Ordering::Acquire); if nsl <= gsl { - return Ok(()); + return Ok(None); } + let mut remap_dur = None; let required_rows_size = (1usize << nsl) * size_of::(); if layout_mut.row_guard.len() < required_rows_size { + let remap_start = Instant::now(); let alloc_split = nsl + self.config.remap_scaler as u64; let new_rows_size = (1usize << alloc_split) * size_of::(); @@ -627,12 +630,13 @@ impl IndexFile { } Self::maybe_lock_mmap(self.config.as_ref(), &layout_mut.row_guard); + remap_dur = Some(remap_start.elapsed()); } self.header_ref() .global_split_level .store(nsl, Ordering::Release); - Ok(()) + Ok(remap_dur) } pub(crate) fn num_rows(&self) -> usize { @@ -814,9 +818,8 @@ impl IndexFile { Ok(()) } - pub(crate) fn reset(&self) -> Result<()> { + pub(crate) fn reset(&self, mut row_table: RowsTableWriteGuard<'_>) -> Result<()> { let min_rows_size = MIN_INITIAL_ROWS * size_of::(); - let mut row_table = self.rows_table_mut(); #[cfg(target_os = "linux")] unsafe { diff --git a/src/store.rs b/src/store.rs index abc14d2..a2d1a4e 100644 --- a/src/store.rs +++ b/src/store.rs @@ -11,11 +11,12 @@ use siphasher::sip::SipHasher13; use std::{ collections::HashMap, hash::Hasher, - path::PathBuf, + path::{Path, PathBuf}, sync::{ Arc, - atomic::{AtomicBool, AtomicU16, AtomicU64, Ordering}, + atomic::{AtomicBool, AtomicU16, AtomicU32, AtomicU64, Ordering}, }, + time::Duration, }; use crate::{ @@ -23,9 +24,12 @@ use crate::{ index_file::{EntryPointer, IndexFile, RowLayout, RowReadGuard, RowWriteGuard}, internal::{ HashCoord, KeyNamespace, MAX_DATA_FILE_IDX, MAX_DATA_FILES, MIN_SPLIT_LEVEL, ROW_WIDTH, - aligned_data_entry_size, aligned_data_entry_waste, aligned_tombstone_entry_waste, sync_dir, + aligned_data_entry_size, aligned_data_entry_waste, aligned_tombstone_entry_waste, + index_file_path, index_rows_file_path, sync_dir, + }, + types::{ + Config, Error, GetOrCreateStatus, INITIAL_DATA_FILE_ORDINAL, ReplaceStatus, Result, Stats, }, - types::{Config, Error, GetOrCreateStatus, ReplaceStatus, Result, Stats}, }; #[derive(Default)] @@ -33,6 +37,44 @@ struct CompactionState { wake_requested: bool, } +#[derive(Default)] +struct InnerStats { + num_compactions: AtomicU64, + compaction_time_ms: AtomicU64, + compaction_errors: AtomicU64, + num_positive_lookups: AtomicU64, + num_negative_lookups: AtomicU64, + num_collisions: AtomicU64, + last_remap_dur_ms: AtomicU64, + last_compaction_dur_ms: AtomicU64, + last_compaction_reclaimed_bytes: AtomicU32, + last_compaction_moved_bytes: AtomicU32, + num_read_ops: AtomicU64, + num_read_bytes: AtomicU64, + num_write_ops: AtomicU64, + num_write_bytes: AtomicU64, +} + +impl InnerStats { + fn reset(&self) { + self.num_compactions.store(0, Ordering::Relaxed); + self.compaction_time_ms.store(0, Ordering::Relaxed); + self.compaction_errors.store(0, Ordering::Relaxed); + self.num_positive_lookups.store(0, Ordering::Relaxed); + self.num_negative_lookups.store(0, Ordering::Relaxed); + self.num_collisions.store(0, Ordering::Relaxed); + self.last_remap_dur_ms.store(0, Ordering::Relaxed); + self.last_compaction_dur_ms.store(0, Ordering::Relaxed); + self.last_compaction_reclaimed_bytes + .store(0, Ordering::Relaxed); + self.last_compaction_moved_bytes.store(0, Ordering::Relaxed); + self.num_read_ops.store(0, Ordering::Relaxed); + self.num_read_bytes.store(0, Ordering::Relaxed); + self.num_write_ops.store(0, Ordering::Relaxed); + self.num_write_bytes.store(0, Ordering::Relaxed); + } +} + struct StoreInner { base_path: PathBuf, config: Arc, @@ -46,24 +88,16 @@ struct StoreInner { compaction_state: Mutex, compaction_condvar: Condvar, shutting_down: AtomicBool, - num_compactions: AtomicU64, - compaction_time_ms: AtomicU64, - compaction_errors: AtomicU64, - num_positive_lookups: AtomicU64, - num_negative_lookups: AtomicU64, - num_read_ops: AtomicU64, - num_read_bytes: AtomicU64, - num_write_ops: AtomicU64, - num_write_bytes: AtomicU64, + stats: InnerStats, } /// A persistent key-value store backed by append-only data files and a mutable index. pub struct CandyStore { inner: Arc, _lockfile: fslock::LockFile, - compaction_thd: Option>, - allow_clean_shutdown: bool, - was_clean_shutdown: bool, + compaction_thd: Mutex>>, + allow_clean_shutdown: AtomicBool, + was_clean_shutdown: AtomicBool, } pub use list::{KVPair, ListIterator}; @@ -104,34 +138,91 @@ impl StoreInner { compaction_state: Mutex::new(CompactionState::default()), compaction_condvar: Condvar::new(), shutting_down: AtomicBool::new(false), - num_compactions: AtomicU64::new(0), - compaction_time_ms: AtomicU64::new(0), - compaction_errors: AtomicU64::new(0), - num_positive_lookups: AtomicU64::new(0), - num_negative_lookups: AtomicU64::new(0), - num_read_ops: AtomicU64::new(0), - num_read_bytes: AtomicU64::new(0), - num_write_ops: AtomicU64::new(0), - num_write_bytes: AtomicU64::new(0), + stats: InnerStats::default(), } } + fn reset(&self) -> Result<()> { + let _rotation_lock = self.rotation_lock.lock(); + let _logical_guards = self + .logical_locks + .iter() + .map(|lock| lock.write()) + .collect::>(); + let row_table = self.index_file.rows_table_mut(); + let mut data_files = self.data_files.write(); + + data_files.clear(); + self.index_file.reset(row_table)?; + + let index_path = index_file_path(self.base_path.as_path()); + let rows_path = index_rows_file_path(self.base_path.as_path()); + let mut removed_any = false; + for entry in std::fs::read_dir(&self.base_path).map_err(Error::IOError)? { + let entry = entry.map_err(Error::IOError)?; + let path = entry.path(); + if path.file_name().and_then(|name| name.to_str()) == Some(".lockfile") + || path == index_path + || path == rows_path + { + continue; + } + + let file_type = entry.file_type().map_err(Error::IOError)?; + if file_type.is_dir() { + std::fs::remove_dir_all(&path).map_err(Error::IOError)?; + removed_any = true; + } else if file_type.is_file() || file_type.is_symlink() { + std::fs::remove_file(&path).map_err(Error::IOError)?; + removed_any = true; + } + } + if removed_any { + sync_dir(self.base_path.as_path())?; + } + + let active_file_idx = 0; + let active_file_ordinal = INITIAL_DATA_FILE_ORDINAL; + let data_file = Arc::new(DataFile::create( + self.base_path.as_path(), + self.config.clone(), + active_file_idx, + active_file_ordinal, + )?); + data_files.insert(active_file_idx, data_file); + self.active_file_idx + .store(active_file_idx, Ordering::Release); + self.active_file_ordinal + .store(active_file_ordinal, Ordering::Release); + self.stats.reset(); + + Ok(()) + } + fn record_lookup(&self, found: bool) { if found { - self.num_positive_lookups.fetch_add(1, Ordering::Relaxed); + self.stats + .num_positive_lookups + .fetch_add(1, Ordering::Relaxed); } else { - self.num_negative_lookups.fetch_add(1, Ordering::Relaxed); + self.stats + .num_negative_lookups + .fetch_add(1, Ordering::Relaxed); } } fn record_read(&self, bytes: u64) { - self.num_read_ops.fetch_add(1, Ordering::Relaxed); - self.num_read_bytes.fetch_add(bytes, Ordering::Relaxed); + self.stats.num_read_ops.fetch_add(1, Ordering::Relaxed); + self.stats + .num_read_bytes + .fetch_add(bytes, Ordering::Relaxed); } fn record_write(&self, bytes: u64) { - self.num_write_ops.fetch_add(1, Ordering::Relaxed); - self.num_write_bytes.fetch_add(bytes, Ordering::Relaxed); + self.stats.num_write_ops.fetch_add(1, Ordering::Relaxed); + self.stats + .num_write_bytes + .fetch_add(bytes, Ordering::Relaxed); } fn signal_compaction_scan(&self) { @@ -202,8 +293,13 @@ impl StoreInner { let low_row_idx = hc.row_index(sl); let high_row_idx = low_row_idx | (1 << sl); - if nsl > gsl { - self.index_file.grow(nsl)?; + if nsl > gsl + && let Some(remap_dur) = self.index_file.grow(nsl)? + { + self.stats.last_remap_dur_ms.store( + u64::try_from(remap_dur.as_millis()).unwrap_or(u64::MAX), + Ordering::Relaxed, + ); } let rows_table = self.index_file.rows_table(); @@ -376,6 +472,10 @@ impl StoreInner { } impl CandyStore { + pub fn get_db_path(&self) -> &Path { + &self.inner.base_path + } + fn logical_read_guard(&self, ns: KeyNamespace, key: &[u8]) -> RwLockReadGuard<'_, ()> { self.inner.logical_locks[self.inner.logical_lock_index(ns, key)].read() } @@ -436,6 +536,11 @@ impl CandyStore { }; if kv.key() == key { return Ok(Some(kv.value().to_vec())); + } else { + self.inner + .stats + .num_collisions + .fetch_add(1, Ordering::Relaxed); } } Ok(None) @@ -573,6 +678,11 @@ impl CandyStore { self.track_update_waste(src_file_idx, src_file_ordinal, klen, vlen); self.record_replace_stats(klen, vlen, key.len(), val.len()); return Ok(Some(old_val)); + } else { + self.inner + .stats + .num_collisions + .fetch_add(1, Ordering::Relaxed); } } @@ -829,12 +939,12 @@ impl CandyStore { /// Returns whether the store was opened from a clean shutdown state. pub fn was_clean_shutdown(&self) -> bool { - self.was_clean_shutdown + self.was_clean_shutdown.load(Ordering::Relaxed) } /// Returns the number of background compaction errors observed since open. pub fn compaction_errors(&self) -> u64 { - self.inner.compaction_errors.load(Ordering::Relaxed) + self.inner.stats.compaction_errors.load(Ordering::Relaxed) } /// Returns the number of currently live entries. @@ -850,7 +960,7 @@ impl CandyStore { } /// Shrinks the index when the reclaimable row ratio is at least `min_wasted_ratio`. - pub fn shrink_to_fit(&self, min_wasted_ratio: f64) -> Result { + pub fn shrink_to_fit_blocking(&self, min_wasted_ratio: f64) -> Result { let _key_guards = self .inner .logical_locks @@ -879,11 +989,6 @@ impl CandyStore { self.inner.index_file.shrink(min_rows_cfg) } - /// Synchronous alias for [`CandyStore::shrink_to_fit`]. - pub fn shrink_index_blocking(&self, min_wasted_ratio: f64) -> Result { - self.shrink_to_fit(min_wasted_ratio) - } - /// Returns a snapshot of store statistics and accounting counters. pub fn stats(&self) -> Stats { let h = self.inner.index_file.header_ref(); @@ -898,15 +1003,43 @@ impl CandyStore { capacity, num_items, index_size_bytes: self.inner.index_file.file_size_bytes(), - num_compactions: self.inner.num_compactions.load(Ordering::Relaxed), - compaction_time_ms: self.inner.compaction_time_ms.load(Ordering::Relaxed), + num_compactions: self.inner.stats.num_compactions.load(Ordering::Relaxed), + compaction_time_ms: self.inner.stats.compaction_time_ms.load(Ordering::Relaxed), num_data_files: self.inner.data_files.read().len() as u64, - num_positive_lookups: self.inner.num_positive_lookups.load(Ordering::Relaxed), - num_negative_lookups: self.inner.num_negative_lookups.load(Ordering::Relaxed), - num_read_ops: self.inner.num_read_ops.load(Ordering::Relaxed), - num_read_bytes: self.inner.num_read_bytes.load(Ordering::Relaxed), - num_write_ops: self.inner.num_write_ops.load(Ordering::Relaxed), - num_write_bytes: self.inner.num_write_bytes.load(Ordering::Relaxed), + num_positive_lookups: self + .inner + .stats + .num_positive_lookups + .load(Ordering::Relaxed), + num_negative_lookups: self + .inner + .stats + .num_negative_lookups + .load(Ordering::Relaxed), + num_collisions: self.inner.stats.num_collisions.load(Ordering::Relaxed), + last_remap_dur: Duration::from_millis( + self.inner.stats.last_remap_dur_ms.load(Ordering::Relaxed), + ), + last_compaction_dur: Duration::from_millis( + self.inner + .stats + .last_compaction_dur_ms + .load(Ordering::Relaxed), + ), + last_compaction_reclaimed_bytes: self + .inner + .stats + .last_compaction_reclaimed_bytes + .load(Ordering::Relaxed), + last_compaction_moved_bytes: self + .inner + .stats + .last_compaction_moved_bytes + .load(Ordering::Relaxed), + num_read_ops: self.inner.stats.num_read_ops.load(Ordering::Relaxed), + num_read_bytes: self.inner.stats.num_read_bytes.load(Ordering::Relaxed), + num_write_ops: self.inner.stats.num_write_ops.load(Ordering::Relaxed), + num_write_bytes: self.inner.stats.num_write_bytes.load(Ordering::Relaxed), num_created: h.num_created.load(Ordering::Relaxed), num_removed: h.num_removed.load(Ordering::Relaxed), num_replaced: h.num_replaced.load(Ordering::Relaxed), @@ -923,8 +1056,8 @@ impl CandyStore { } /// Simulates a crash by dropping the instance without performing clean shutdown operations (e.g. marking the index as clean). - pub fn _abort_for_testing(mut self) { - self.allow_clean_shutdown = false; + pub fn _abort_for_testing(self) { + self.allow_clean_shutdown.store(false, Ordering::Relaxed); drop(self); } } @@ -942,10 +1075,63 @@ mod tests { assert_eq!(db.compaction_errors(), 0); - db.inner.compaction_errors.store(7, Ordering::Relaxed); + db.inner.stats.compaction_errors.store(7, Ordering::Relaxed); assert_eq!(db.compaction_errors(), 7); Ok(()) } + + #[test] + fn test_stats_reports_transient_collision_counter() -> Result<()> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + db.inner.stats.num_collisions.store(11, Ordering::Relaxed); + + assert_eq!(db.stats().num_collisions, 11); + + Ok(()) + } + + #[test] + fn test_stats_reports_last_remap_duration() -> Result<()> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + db.inner + .stats + .last_remap_dur_ms + .store(17, Ordering::Relaxed); + + assert_eq!(db.stats().last_remap_dur, Duration::from_millis(17)); + + Ok(()) + } + + #[test] + fn test_stats_reports_last_compaction_stats() -> Result<()> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + db.inner + .stats + .last_compaction_dur_ms + .store(23, Ordering::Relaxed); + db.inner + .stats + .last_compaction_reclaimed_bytes + .store(1234, Ordering::Relaxed); + db.inner + .stats + .last_compaction_moved_bytes + .store(5678, Ordering::Relaxed); + + let stats = db.stats(); + assert_eq!(stats.last_compaction_dur, Duration::from_millis(23)); + assert_eq!(stats.last_compaction_reclaimed_bytes, 1234); + assert_eq!(stats.last_compaction_moved_bytes, 5678); + + Ok(()) + } } diff --git a/src/store/compaction.rs b/src/store/compaction.rs index 5c15279..f170095 100644 --- a/src/store/compaction.rs +++ b/src/store/compaction.rs @@ -1,7 +1,4 @@ -use std::{ - path::Path, - sync::{Arc, atomic::Ordering}, -}; +use std::sync::{Arc, atomic::Ordering}; use crate::{ index_file::EntryPointer, @@ -12,6 +9,11 @@ use crate::{ use super::{CandyStore, StoreInner}; +pub(super) struct CompactionOutcome { + pub(super) reclaimed_bytes: u32, + pub(super) moved_bytes: u32, +} + impl StoreInner { pub(super) fn compact_file( &self, @@ -19,28 +21,43 @@ impl StoreInner { expected_ordinal: u64, pacer: &mut Pacer, #[cfg(windows)] pending_deletions: &mut Vec, - ) -> Result<()> { + ) -> Result { if self.active_file_idx.load(Ordering::Acquire) == file_idx { - return Ok(()); + return Ok(CompactionOutcome { + reclaimed_bytes: 0, + moved_bytes: 0, + }); } let source_file = match self.data_file(file_idx) { Ok(f) => f, - Err(Error::MissingDataFile(_)) => return Ok(()), + Err(Error::MissingDataFile(_)) => { + return Ok(CompactionOutcome { + reclaimed_bytes: 0, + moved_bytes: 0, + }); + } Err(e) => return Err(e), }; if source_file.file_ordinal != expected_ordinal { - return Ok(()); + return Ok(CompactionOutcome { + reclaimed_bytes: 0, + moved_bytes: 0, + }); } let mut offset = 0u64; + let mut moved_bytes = 0u64; let mut read_buf = Vec::new(); let mut buf_file_offset = 0u64; let mut match_scratch = Vec::new(); loop { if self.shutting_down.load(Ordering::Acquire) { - return Ok(()); + return Ok(CompactionOutcome { + reclaimed_bytes: 0, + moved_bytes: 0, + }); } let Some((kv, entry_offset, next_offset)) = @@ -84,6 +101,7 @@ impl StoreInner { .ok_or(Error::MissingDataFile(active_idx))?; let (file_off, size) = active_file.append_kv(ns, key, val)?; self.record_write(size as u64); + moved_bytes = moved_bytes.saturating_add(size as u64); row.replace_pointer( col, EntryPointer::new( @@ -120,47 +138,26 @@ impl StoreInner { #[cfg(not(windows))] Err(err) => return Err(Error::IOError(err)), } - Ok(()) + Ok(CompactionOutcome { + reclaimed_bytes: reclaimed, + moved_bytes: moved_bytes.min(u64::from(u32::MAX)) as u32, + }) } } impl CandyStore { - pub(super) fn stop_compaction(&mut self) { + pub(super) fn stop_compaction(&self) { self.inner.shutting_down.store(true, Ordering::Release); { let mut state = self.inner.compaction_state.lock(); state.wake_requested = true; self.inner.compaction_condvar.notify_all(); } - if let Some(thd) = self.compaction_thd.take() { + if let Some(thd) = self.compaction_thd.lock().take() { let _ = thd.join(); } } - pub(super) fn clear_directory_contents(base_path: &Path) -> Result<()> { - let mut removed_any = false; - for entry in std::fs::read_dir(base_path).map_err(Error::IOError)? { - let entry = entry.map_err(Error::IOError)?; - let path = entry.path(); - if path.file_name().and_then(|name| name.to_str()) == Some(".lockfile") { - continue; - } - - let file_type = entry.file_type().map_err(Error::IOError)?; - if file_type.is_dir() { - std::fs::remove_dir_all(&path).map_err(Error::IOError)?; - removed_any = true; - } else if file_type.is_file() || file_type.is_symlink() { - std::fs::remove_file(&path).map_err(Error::IOError)?; - removed_any = true; - } - } - if removed_any { - sync_dir(base_path)?; - } - Ok(()) - } - #[cfg(windows)] fn retry_pending_deletions(ctx: &StoreInner, pending: &mut Vec) { let before = pending.len(); @@ -170,7 +167,13 @@ impl CandyStore { } } - pub(super) fn start_compaction(&mut self) { + pub(super) fn start_compaction(&self) { + let mut compaction_thd = self.compaction_thd.lock(); + if compaction_thd.is_some() { + return; + } + + self.inner.shutting_down.store(false, Ordering::Release); let ctx = Arc::clone(&self.inner); let thd = std::thread::spawn(move || { let throughput_bytes_per_sec = @@ -212,14 +215,26 @@ impl CandyStore { #[cfg(windows)] &mut pending_deletions, ); - ctx.compaction_time_ms - .fetch_add(t0.elapsed().as_millis() as u64, Ordering::Relaxed); + let compaction_millis = + u64::try_from(t0.elapsed().as_millis()).unwrap_or(u64::MAX); + ctx.stats + .compaction_time_ms + .fetch_add(compaction_millis, Ordering::Relaxed); match res { - Ok(()) => { - ctx.num_compactions.fetch_add(1, Ordering::Relaxed); + Ok(outcome) => { + ctx.stats.num_compactions.fetch_add(1, Ordering::Relaxed); + ctx.stats + .last_compaction_dur_ms + .store(compaction_millis, Ordering::Relaxed); + ctx.stats + .last_compaction_reclaimed_bytes + .store(outcome.reclaimed_bytes, Ordering::Relaxed); + ctx.stats + .last_compaction_moved_bytes + .store(outcome.moved_bytes, Ordering::Relaxed); } Err(_e) => { - ctx.compaction_errors.fetch_add(1, Ordering::Relaxed); + ctx.stats.compaction_errors.fetch_add(1, Ordering::Relaxed); } } } @@ -228,7 +243,7 @@ impl CandyStore { } }); - self.compaction_thd = Some(thd); + *compaction_thd = Some(thd); self.inner.signal_compaction_scan(); } } @@ -237,7 +252,7 @@ impl Drop for CandyStore { fn drop(&mut self) { self.stop_compaction(); - if !self.allow_clean_shutdown { + if !self.allow_clean_shutdown.load(Ordering::Relaxed) { return; } let data_files_synced = self diff --git a/src/store/open.rs b/src/store/open.rs index 6ea96f6..f9d8a08 100644 --- a/src/store/open.rs +++ b/src/store/open.rs @@ -7,7 +7,9 @@ use std::{ use crate::{ data_file::DataFile, index_file::IndexFile, - internal::{MAX_REPRESENTABLE_FILE_SIZE, is_resettable_open_error, parse_data_file_idx}, + internal::{ + MAX_REPRESENTABLE_FILE_SIZE, is_resettable_open_error, parse_data_file_idx, sync_dir, + }, types::{Config, Error, INITIAL_DATA_FILE_ORDINAL, RebuildStrategy, Result}, }; @@ -15,7 +17,27 @@ use super::{CandyStore, DirtyOpenAction, OpenState, StoreInner}; impl CandyStore { fn clear_db_files(base_path: &Path) -> Result<()> { - Self::clear_directory_contents(base_path) + let mut removed_any = false; + for entry in std::fs::read_dir(base_path).map_err(Error::IOError)? { + let entry = entry.map_err(Error::IOError)?; + let path = entry.path(); + if path.file_name().and_then(|name| name.to_str()) == Some(".lockfile") { + continue; + } + + let file_type = entry.file_type().map_err(Error::IOError)?; + if file_type.is_dir() { + std::fs::remove_dir_all(&path).map_err(Error::IOError)?; + removed_any = true; + } else if file_type.is_file() || file_type.is_symlink() { + std::fs::remove_file(&path).map_err(Error::IOError)?; + removed_any = true; + } + } + if removed_any { + sync_dir(base_path)?; + } + Ok(()) } fn open_state(base_path: &Path, config: Arc) -> Result { @@ -153,6 +175,17 @@ impl CandyStore { Err(err) => return Err(err), } } + RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrReset => { + match state.index_file.verify_row_checksums() { + Ok(()) => DirtyOpenAction::TrustIndex, + Err(Error::IOError(io_err)) + if io_err.kind() == std::io::ErrorKind::InvalidData => + { + DirtyOpenAction::ResetDb + } + Err(err) => return Err(err), + } + } }; if matches!(action, DirtyOpenAction::ResetDb) { @@ -169,10 +202,10 @@ impl CandyStore { /// Opens a store at `path`, creating it if needed. /// /// If `config.reset_on_invalid_data` is enabled, or if - /// `config.rebuild_strategy` is `ResetDBIfDirty`, opening may reset the - /// database directory by removing all contents and recreating fresh store - /// files. While the store is open, the active `.lockfile` is preserved so - /// the directory remains locked against concurrent opens. + /// `config.rebuild_strategy` can reset the database during dirty recovery, + /// opening may remove all contents and recreate fresh store files. While + /// the store is open, the active `.lockfile` is preserved so the directory + /// remains locked against concurrent opens. pub fn open(path: impl AsRef, config: Config) -> Result { let base_path = path.as_ref().to_path_buf(); std::fs::create_dir_all(&base_path).map_err(Error::IOError)?; @@ -186,7 +219,7 @@ impl CandyStore { let was_clean_shutdown = state.was_clean_shutdown; let num_logical_locks = config.max_concurrency.max(8).next_power_of_two(); - let mut store = Self { + let store = Self { inner: Arc::new(StoreInner::new( base_path, config.clone(), @@ -194,9 +227,9 @@ impl CandyStore { num_logical_locks, )), _lockfile: lockfile, - compaction_thd: None, - allow_clean_shutdown: was_clean_shutdown, - was_clean_shutdown, + compaction_thd: parking_lot::Mutex::new(None), + allow_clean_shutdown: std::sync::atomic::AtomicBool::new(was_clean_shutdown), + was_clean_shutdown: std::sync::atomic::AtomicBool::new(was_clean_shutdown), }; if !was_clean_shutdown { @@ -204,7 +237,9 @@ impl CandyStore { DirtyOpenAction::None | DirtyOpenAction::ResetDb | DirtyOpenAction::TrustIndex => {} DirtyOpenAction::RebuildIndex => store.recover_index()?, } - store.allow_clean_shutdown = true; + store + .allow_clean_shutdown + .store(true, std::sync::atomic::Ordering::Relaxed); } store.start_compaction(); @@ -218,21 +253,17 @@ impl CandyStore { /// subdirectories, before recreating the store files. While the store is /// open, the active `.lockfile` is preserved so the directory remains /// locked against concurrent opens. - pub fn clear(&mut self) -> Result<()> { - let base_path = self.inner.base_path.clone(); - let config = self.inner.config.clone(); - let num_logical_locks = self.inner.logical_locks.len(); - + pub fn clear(&self) -> Result<()> { + // stop bg thread self.stop_compaction(); - self.inner.data_files.write().clear(); - Self::clear_db_files(base_path.as_path())?; - - let state = Self::open_state(base_path.as_path(), config.clone())?; - self.inner = Arc::new(StoreInner::new(base_path, config, state, num_logical_locks)); + // now we're single-threaded. take all locks and clear state + self.inner.reset()?; - self.allow_clean_shutdown = true; - self.was_clean_shutdown = true; + self.allow_clean_shutdown + .store(true, std::sync::atomic::Ordering::Relaxed); + self.was_clean_shutdown + .store(true, std::sync::atomic::Ordering::Relaxed); self.start_compaction(); Ok(()) diff --git a/src/store/recovery.rs b/src/store/recovery.rs index 379c9ed..5d9fbf0 100644 --- a/src/store/recovery.rs +++ b/src/store/recovery.rs @@ -14,7 +14,8 @@ use super::CandyStore; impl CandyStore { pub(super) fn recover_index(&self) -> Result<()> { - self.inner.index_file.reset()?; + let row_table = self.inner.index_file.rows_table_mut(); + self.inner.index_file.reset(row_table)?; let mut sorted_files: Vec> = self.inner.data_files.read().values().cloned().collect(); diff --git a/src/types.rs b/src/types.rs index 9af9143..daa7eb1 100644 --- a/src/types.rs +++ b/src/types.rs @@ -9,6 +9,7 @@ pub(crate) const ROW_WIDTH: usize = crate::internal::ROW_WIDTH; pub(crate) const INITIAL_DATA_FILE_ORDINAL: u64 = 0x00bd_38a0_2a35_1cdf; use crate::internal::MIN_INITIAL_ROWS; +use std::time::Duration; #[derive(Debug, Clone, Copy)] /// How opening a store should handle a dirty index. @@ -27,6 +28,8 @@ pub enum RebuildStrategy { TrustDirtyIndexIfChecksumCorrectOrFail, /// Trust a dirty index if row checksums match; otherwise rebuild. TrustDirtyIndexIfChecksumCorrectOrRebuild, + /// Trust a dirty index if row checksums match; otherwise reset the database. + TrustDirtyIndexIfChecksumCorrectOrReset, } #[derive(Debug, Clone, Copy)] @@ -231,6 +234,16 @@ pub struct Stats { pub num_positive_lookups: u64, /// Number of failed key lookups. pub num_negative_lookups: u64, + /// Number of probes that had to inspect a second matching index entry. + pub num_collisions: u64, + /// Time spent in the most recent grow remap operation. + pub last_remap_dur: Duration, + /// Time spent in the most recent successful file compaction. + pub last_compaction_dur: Duration, + /// Bytes reclaimed by the most recent successful file compaction. + pub last_compaction_reclaimed_bytes: u32, + /// Bytes rewritten by the most recent successful file compaction. + pub last_compaction_moved_bytes: u32, /// Number of read operations performed against data files. pub num_read_ops: u64, /// Total bytes read from data files. diff --git a/tests/maintenance.rs b/tests/maintenance.rs index 0d3d758..a843cfd 100644 --- a/tests/maintenance.rs +++ b/tests/maintenance.rs @@ -13,7 +13,7 @@ fn test_clear_resets_store_files_and_contents() -> Result<(), Error> { ..Config::default() }; - let mut db = CandyStore::open(dir.path(), config)?; + let db = CandyStore::open(dir.path(), config)?; for i in 0..100 { db.set(format!("key{i:04}"), vec![b'x'; 64])?; diff --git a/tests/metrics.rs b/tests/metrics.rs index 141b055..e0b28e1 100644 --- a/tests/metrics.rs +++ b/tests/metrics.rs @@ -1,4 +1,5 @@ use candystore::{CandyStore, Config}; +use std::time::Duration; const ROW_WIDTH: u64 = (16 * 21) as u64; @@ -19,6 +20,11 @@ fn test_metrics_updates() -> Result<(), Box> { assert_eq!(stats.fill_level(), 0.0); assert_eq!(stats.num_positive_lookups, 0); assert_eq!(stats.num_negative_lookups, 0); + assert_eq!(stats.num_collisions, 0); + assert_eq!(stats.last_remap_dur, Duration::ZERO); + assert_eq!(stats.last_compaction_dur, Duration::ZERO); + assert_eq!(stats.last_compaction_reclaimed_bytes, 0); + assert_eq!(stats.last_compaction_moved_bytes, 0); assert_eq!(stats.num_read_ops, 0); assert_eq!(stats.num_read_bytes, 0); assert_eq!(stats.num_write_ops, 0); diff --git a/tests/recovery.rs b/tests/recovery.rs index cd035f0..9eea297 100644 --- a/tests/recovery.rs +++ b/tests/recovery.rs @@ -672,6 +672,35 @@ fn test_trust_dirty_index_rebuilds_on_checksum_mismatch() -> Result<(), Error> { Ok(()) } +#[test] +fn test_trust_dirty_index_resets_on_checksum_mismatch() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrReset, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key", "value")?; + db._abort_for_testing(); + } + + fs::write(dir.path().join("extra.txt"), b"junk").map_err(Error::IOError)?; + fs::create_dir(dir.path().join("extra_dir")).map_err(Error::IOError)?; + fs::write(dir.path().join("extra_dir").join("nested.txt"), b"junk").map_err(Error::IOError)?; + + common::corrupt_first_row_checksum(dir.path()); + + let db = CandyStore::open(dir.path(), config)?; + assert!(!db.was_clean_shutdown()); + assert!(db.get("key")?.is_none()); + assert!(!dir.path().join("extra.txt").exists()); + assert!(!dir.path().join("extra_dir").exists()); + + Ok(()) +} + #[test] fn test_reset_db_if_dirty_clears_state() -> Result<(), Error> { let dir = tempdir().unwrap(); diff --git a/tests/shrink.rs b/tests/shrink.rs index 8ee1460..540ece8 100644 --- a/tests/shrink.rs +++ b/tests/shrink.rs @@ -26,7 +26,7 @@ fn test_shrink_to_fit_preserves_remaining_keys() -> Result<()> { store.remove(key.as_bytes())?; } - let shrunk_rows = store.shrink_to_fit(0.2)?; + let shrunk_rows = store.shrink_to_fit_blocking(0.2)?; assert!(shrunk_rows > 0); assert!(store.capacity() <= before); From 3799b1e97aa60f5f70ad1461be9700da509f6697 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Thu, 26 Mar 2026 13:43:30 +0200 Subject: [PATCH 04/25] Compaction scans index rather than files; add progressive rebuild; better organization of index header structure --- Cargo.toml | 4 + DESIGN.md | 325 --------------------------- README.md | 151 +++---------- src/index_file.rs | 43 +++- src/internal.rs | 1 + src/store.rs | 53 +++-- src/store/compaction.rs | 480 +++++++++++++++++++++++++++++++++------- src/store/open.rs | 22 +- src/store/recovery.rs | 144 +++++++++++- src/types.rs | 2 +- tests/compaction.rs | 33 +-- tests/crasher.rs | 1 + tests/recovery.rs | 292 +++++++++++++++++++++++- 13 files changed, 981 insertions(+), 570 deletions(-) delete mode 100644 DESIGN.md diff --git a/Cargo.toml b/Cargo.toml index af99fe1..6ca4d27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,10 @@ name = "candystore" version = "0.6.0" edition = "2024" +license = "Apache-2.0" +keywords = ["key-value", "database", "persistent", "store", "rocksdb"] +description = "A lean, efficient and fast peristent in-process key-value store" +repository = "https://github.com/sweet-security/candystore" [dependencies] crc16-ibm3740-fast = "0.5.0" diff --git a/DESIGN.md b/DESIGN.md deleted file mode 100644 index d07b595..0000000 --- a/DESIGN.md +++ /dev/null @@ -1,325 +0,0 @@ -# Design - -This document describes the storage model, concurrency strategy, recovery rules, and collection semantics used by `candystore`. - -## Goals - -- Fast point lookups for embedded workloads -- Durable append-only writes to data files -- Recoverability by replaying data files when the index is dirty -- Support for ordered collections and large values without a separate database layer -- Simple operational model for single-process ownership with multi-threaded access - -## High-Level Architecture - -The store has two durable layers: - -1. `data_XXXX` files: append-only log-structured storage -2. `index` + `rows`: mutable lookup structure used for fast reads - -ASCII overview: - -```text - +-------------------+ - set/get/remove -> | CandyStore API | - +---------+---------+ - | - v - +----------------------+ - | in-memory coordination| - | row locks + counters | - +-----+------------+----+ - | | - | | - v v - +-----------+ +----------------+ - | index/rows | | data_0000..N | - | mmap index | | append-only | - +-----------+ +----------------+ - ^ | - | | - +------------+ - rebuild / compaction -``` - -The important design choice is that the index is rebuildable. The data files are the durable source of truth for recovery. - -## Index Layout - -The index is stored in two files: - -- `index`: header, counters, global split level, dirty flag, hash key, waste accounting -- `rows`: fixed-size hash rows stored in page-sized records - -Each row contains: - -- a split level -- a checksum -- fixed-width signature slots -- fixed-width entry pointers - -ASCII row model: - -```text -+--------------------------------------------------------------+ -| split_level | checksum | signatures[ROW_WIDTH] | pointers[] | -+--------------------------------------------------------------+ -``` - -Pointers are compact and encode: - -- data file index -- aligned file offset -- size hint -- masked row selector bits - -The index is optimized for lookup speed, not for being the primary source of truth. - -## Data Files - -Each data file starts with a fixed header page containing: - -- file signature -- file format version -- file ordinal - -After the header page, entries are appended at 16-byte alignment. - -Two entry kinds exist today: - -- `Data`: key + value -- `Tombstone`: key only - -ASCII entry layout: - -```text -Data entry ----------- -u32 header -u16 key_len -u16 value_len -value bytes -key bytes -u16 checksum -padding to 16-byte alignment - -Tombstone entry ---------------- -u32 header -u16 key_len -key bytes -u16 checksum -padding to 16-byte alignment -``` - -The `header` packs: - -- an entry-offset-derived magic value -- namespace bits -- entry type bits - -Checksums cover the logical entry bytes before alignment padding. - -## Namespaces - -Namespaces partition the key space inside the same physical store: - -- user KV entries -- queue metadata and queue data -- list metadata, list index, and list data -- large-value metadata and chunks -- typed variants of the above - -This lets all features share the same physical storage while keeping their internal keys distinct. - -## Write Path - -Normal writes are append-and-swing-pointer operations. - -ASCII write flow: - -```text -client write - | - v -hash key -> lock logical shard -> find existing row slot - | - +--> append new entry to active data file - | - +--> update row pointer in index - | - +--> account old entry as waste when replacing/removing -``` - -Important consequences: - -- updates never overwrite old data in place -- remove operations append tombstones -- old versions remain in old data files until compaction removes them - -## Recovery Path - -On open, the store marks the index dirty immediately. This makes an interrupted open conservative. - -If the previous shutdown was unclean and the configured strategy rebuilds, recovery does: - -```text -reset index state -sort data files by ordinal -for each data file in order: - scan aligned entries - validate checksum and entry shape - replay data/tombstone into the index -``` - -Recovery invariants: - -- later entries win over earlier ones -- tombstones remove prior live values -- file order is determined only by file ordinal -- duplicate data-file ordinals are invalid because they make replay order ambiguous -- unknown entry types and unknown namespaces are treated as invalid data and fail rebuild -- recovered entries are validated against current key/value size limits before indexing - -This is the key reason data files can act as the long-term ground truth for the current format. - -## Compaction - -Compaction rewrites live entries from an old file into the active file, then deletes the old file. - -ASCII compaction flow: - -```text -old data file - | - v -scan entries in order - | - +--> if entry is still the current live version: - | append to active file - | replace pointer - | - +--> otherwise skip - | - v -delete compacted file -``` - -Compaction is rate-limited by a token-bucket pacer using `compaction_throughput_bytes_per_sec`. - -## Locking Model - -There are two main locking layers: - -1. per-row / per-shard locking around index mutation -2. logical key locks to serialize conflicting higher-level operations - -ASCII concurrency view: - -```text -thread A thread B - | | - +--> logical lock ---+ - | - v - row/shard lock - | - v - mutate row -``` - -This design allows unrelated keys to proceed concurrently while keeping conflicting operations consistent. - -The store also uses an on-disk `.lockfile` so only one process owns a store directory at a time. - -## Collection Semantics - -### Lists - -Lists are ordered maps keyed by `(list_key, item_key)`. - -- each list has metadata with `head`, `tail`, and `count` -- list order is stored through an index from logical position to item key -- item data stores the user value plus the logical index suffix -- updates can preserve position or promote the item to the tail, depending on the API -- retain/compaction can rewrite sparse lists into compact spans - -### Queues - -Queues are ordered sequences under a queue key. - -- each queue has metadata with `head`, `tail`, and `count` -- queue entries are stored by synthetic logical index -- head/tail peeks and pops skip holes caused by removals -- `queue_range` exposes the current logical span, not a dense ordinal count - -### Large Values - -Large values use queue-backed chunk storage. - -- metadata records identify the chunk queue -- data is split into chunk entries -- reads concatenate chunks in queue order - -## Typed API Design - -Typed wrappers are thin adapters over the untyped APIs. - -- keys and values are encoded with `databuf` -- type-specific IDs are appended to typed root keys -- typed collections reuse the same lower-level storage semantics - -This means typed and untyped APIs share the same durability and recovery model. - -## Dirty Shutdown Semantics - -Clean shutdown requires: - -1. background compaction thread stopped -2. data files synced -3. index header dirty flag cleared and flushed - -If any of those steps do not happen, the next open treats the index as dirty. - -## Format and Compatibility Notes - -Current compatibility assumptions: - -- the index stores the effective hash key, and reopen reuses the persisted key for existing stores -- data-file entry types and namespaces are intentionally strict during rebuild -- the current code does not define a stable cross-version migration policy yet - -For a future `1.0`, the minimum compatibility policy should define: - -- whether old data-file versions remain readable -- whether hash keys are user-managed forever or migrated differently -- what entry types and namespaces are reserved for future expansion -- how recovery should behave when a newer writer introduces unknown on-disk constructs - -## Why the Data Files Are the Source of Truth - -The index can be reset and replayed from the data files. - -That is only true if the data files remain: - -- append-only -- checksummed -- strictly parseable -- replayable in deterministic order - -The recent hardening work in this repository specifically enforces that rebuild fails closed on unknown entry metadata instead of silently discarding it. - -## Practical Limits - -- maximum user key length: `MAX_USER_KEY_SIZE` -- maximum inline value length: `MAX_USER_VALUE_SIZE` -- large values use chunked storage instead of a single inline entry -- maximum data-file size is bounded by pointer encoding limits - -## Suggested Future 1.0 Checklist - -- write a formal on-disk format spec for data files -- define the compatibility promise for `DATA_FILE_VERSION` -- decide whether hash-key compatibility remains config-managed -- add targeted corruption tests for truncated, unknown-type, and unknown-namespace entries -- document operational upgrade expectations explicitly \ No newline at end of file diff --git a/README.md b/README.md index e55e87d..cf2b873 100644 --- a/README.md +++ b/README.md @@ -1,132 +1,45 @@ -# candystore +# CandyStore -`candystore` is an embedded persistent key-value store for Rust with: +A pure Rust implementation of a fast (*blazingly* :tm:, of course), persistent, in-process +key-value store that relies on a hash-based sharding algorithm. All operations — lookup, +insert, and removal — are O(1). -- append-only data files -- a mutable in-place index for fast lookups -- rebuild-from-data-files recovery -- list and queue collection APIs -- typed wrappers built on `databuf` -- background compaction +| Operation | Time* | +|-----------|--------| +| Lookup | < 1us | +| Insert | < 2us | +| Removal | < 1us | -The index is an acceleration structure. The data files are the durable event log that recovery replays when the index is dirty. +The algorithm can be thought of as a "zero-overhead" extension to a hash table stored over +files, designed to minimize IO operations. See [how to interpret the results\*](#how-to-interpret-the-performance-results). -## Highlights +## Overview -- Fast point lookups through a memory-mapped index -- Crash recovery by rebuilding from append-only data files -- Ordered list API keyed by `(list, item)` -- Queue and large-value APIs -- Thread-safe shared access through `Arc` -- Typed APIs for encoded keys and values -## Quick Start +## How to Interpret the Performance Results -```rust -use candystore::{CandyStore, Config, Result}; +While the numbers above are incredible, it is obvious that any file-backed store will be +limited by the filesystem's latency and bandwidth. For example, you can expect a read +latency of 20-100us from SSDs (NVMe), so that's the lower bound on reading a random +location in the file. -fn main() -> Result<()> { - let db = CandyStore::open("/tmp/candy-dir", Config::default())?; +What the numbers above measure is the performance of the *algorithm*, not the *storage*: +given you can spare an overhead of 0.6% mapped into memory, lookup/insert/removal require +a single disk IO. Replacing (updating) an existing element requires two IOs, since it needs +to compare the key before writing it anew. These IOs may return from the kernel's page +cache, in which case it's practically immediate, or from disk, in which case you can expect +it to take 1-2 round-trip times of your device. - db.set("user:1", "alice")?; - assert_eq!(db.get("user:1")?, Some(b"alice".to_vec())); +Inserting to/removing from lists require 2-3 IOs, since these operations need to update +the list's head or tail, as well as a "chain" element. Such operations should really be done +with a "large enough page cache". Updating/fetching an existing element in a list is a +single IO as above. - let status = db.replace("user:1", "alice-v2", Some("alice"))?; - assert!(status.was_replaced()); - - db.remove("user:1")?; - assert_eq!(db.get("user:1")?, None); - Ok(()) -} -``` - -## Collections - -Lists are ordered maps scoped by a list key. - -```rust -use candystore::{CandyStore, Config, Result}; - -fn main() -> Result<()> { - let db = CandyStore::open("/tmp/candy-lists", Config::default())?; - - db.set_in_list("langs", "rust", "systems")?; - db.set_in_list("langs", "python", "scripting")?; - - let items = db.iter_list("langs").collect::, _>>()?; - assert_eq!(items.len(), 2); - Ok(()) -} -``` - -Queues store ordered values under a queue key. - -```rust -use candystore::{CandyStore, Config, Result}; - -fn main() -> Result<()> { - let db = CandyStore::open("/tmp/candy-queue", Config::default())?; - - db.push_to_queue_tail("jobs", "job-1")?; - db.push_to_queue_tail("jobs", "job-2")?; - - assert_eq!(db.pop_queue_head("jobs")?, Some(b"job-1".to_vec())); - Ok(()) -} -``` - -## Typed API - -Typed wrappers encode keys and values with `databuf` and separate each typed key space with a per-type id. - -```rust -use std::sync::Arc; - -use candystore::{CandyStore, CandyTypedStore, Config, Result}; - -fn main() -> Result<()> { - let db = Arc::new(CandyStore::open("/tmp/candy-typed", Config::default())?); - let users = CandyTypedStore::>::new(db); - - users.set("scores", &vec![1, 2, 3])?; - assert_eq!(users.get("scores")?, Some(vec![1, 2, 3])); - Ok(()) -} -``` - -## Large Values - -`set_big` / `get_big` / `remove_big` store values larger than the inline value limit by chunking them across queue-backed entries. - -## Recovery Model - -On open, the store marks the index dirty before doing work. On clean drop it syncs data files, clears the dirty flag, and flushes the index header. - -If the store is reopened while dirty, behavior depends on `Config::rebuild_strategy`: - -- `FailIfDirty`: reject open -- `RebuildIfDirty`: rebuild the index from data files -- `ResetDBIfDirty`: clear the directory and recreate an empty store -- `TrustDirtyIndexIfChecksumCorrectOrFail`: accept the dirty index only if row checksums match -- `TrustDirtyIndexIfChecksumCorrectOrRebuild`: trust valid checksums, otherwise rebuild -- `TrustDirtyIndexIfChecksumCorrectOrReset`: trust valid checksums, otherwise reset the database - -## Operational Notes - -- `Config::hash_key` is part of on-disk compatibility. A store must be reopened with the same hash key. -- Data files are append-only. Background compaction rewrites live entries into the active file and deletes old files. -- Rebuild now fails closed on unknown entry types and unknown namespaces in data files rather than silently skipping them. -- The index format and data-file format are internal implementation details until a `1.0` compatibility policy is explicitly documented. - -## Examples - -See: - -- `examples/simple.rs` -- `examples/typed.rs` -- `examples/lists.rs` -- `examples/multithreaded.rs` +If your memory is too constrained for keeping the lookup tables mapped-in (i.e., they get +evicted to disk), you'll incur one more unit of "IO latency" for fetching the row from the +table. Since the row spans 4KB, it should behave nicely with 4K IOs. ## Design -See `DESIGN.md` for the storage layout, locking model, rebuild path, and collection semantics. \ No newline at end of file +See `DESIGN.md` for the full storage layout, locking model, rebuild path, collection +semantics, and compaction strategy. \ No newline at end of file diff --git a/src/index_file.rs b/src/index_file.rs index 7da0083..8df61b4 100644 --- a/src/index_file.rs +++ b/src/index_file.rs @@ -27,28 +27,50 @@ use crate::types::{Config, Error, Result}; pub(crate) struct IndexFileHeader { pub(crate) signature: [u8; 8], pub(crate) version: u32, - _padding0: u32, + _padding16: u32, pub(crate) hash_key_0: u64, pub(crate) hash_key_1: u64, - pub(crate) dirty: AtomicU64, - _padding1: [u8; 64 - 40], + _padding64: [u8; 64 - 4 * 8], + + /////////////////////////////////// + // runtime state + /////////////////////////////////// pub(crate) global_split_level: AtomicU64, - _padding2: [u8; 128 - 72], + _padding128: [u8; 64 - 8], + + /////////////////////////////////// + // rebuild state + /////////////////////////////////// + pub(crate) dirty: AtomicU64, + /// Ordinal of the checkpointed file during progressive rebuild, or 0 if no + /// rebuild checkpoint is active. + pub(crate) rebuild_checkpoint_ordinal: AtomicU64, + /// Packed `(file_idx, file_offset)` for the progressive rebuild checkpoint. + pub(crate) rebuild_checkpoint_ptr: AtomicU64, + /// Checksum covering `(rebuild_checkpoint_ordinal, rebuild_checkpoint_ptr)`. + pub(crate) rebuild_checkpoint_checksum: AtomicU64, + _padding1024: [u8; 896 - 4 * 8], + + /////////////////////////////////// + // stats + /////////////////////////////////// pub(crate) num_created: AtomicU64, pub(crate) num_removed: AtomicU64, pub(crate) num_replaced: AtomicU64, pub(crate) written_bytes: AtomicU64, pub(crate) waste_bytes: AtomicU64, pub(crate) reclaimed_bytes: AtomicU64, - _padding3: [u8; 192 - 176], + _padding1088: [u8; 64 - 6 * 8], /// Histogram buckets: [<64, <256, <1K, <4K, <16K, >=16K] pub(crate) size_histogram: [AtomicU64; 6], - _trailer: [u8; PAGE_SIZE - 240], + _padding1152: [u8; 64 - 6 * 8], + + _trailer: [u8; PAGE_SIZE - 1152], } const _: () = assert!(offset_of!(IndexFileHeader, global_split_level) == 64); -const _: () = assert!(offset_of!(IndexFileHeader, num_created) == 128); -const _: () = assert!(offset_of!(IndexFileHeader, size_histogram) == 192); +const _: () = assert!(offset_of!(IndexFileHeader, num_created) == 1024); +const _: () = assert!(offset_of!(IndexFileHeader, size_histogram) == 1088); const _: () = assert!(size_of::() == PAGE_SIZE); #[derive(Debug, Clone, Copy, PartialEq, Eq, FromBytes, IntoBytes, KnownLayout, Immutable)] @@ -445,6 +467,11 @@ impl IndexFile { self.header_mmap.flush().map_err(Error::IOError) } + pub(crate) fn flush_rows(&self) -> Result<()> { + self.rows_mmap.write().flush().map_err(Error::IOError)?; + self.rows_file.sync_all().map_err(Error::IOError) + } + pub(crate) fn open(base_path: &Path, config: Arc) -> Result { let hash_key = config.hash_key; let num_rows = (config.initial_capacity / ROW_WIDTH) diff --git a/src/internal.rs b/src/internal.rs index 7f0a6d5..4ea444d 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -227,6 +227,7 @@ impl HashCoord { } pub(crate) fn row_index(&self, split_level: u64) -> usize { + debug_assert!(split_level >= MIN_SPLIT_LEVEL as u64, "sl={split_level}"); ((self.row_selector as u64) & ((1 << split_level) - 1)) as usize } } diff --git a/src/store.rs b/src/store.rs index a2d1a4e..18ba71a 100644 --- a/src/store.rs +++ b/src/store.rs @@ -250,10 +250,10 @@ impl StoreInner { } } - fn next_compaction_candidate(&self) -> Option<(u16, u64)> { + fn next_compaction_candidates(&self, max_candidates: usize) -> Vec<(u16, u64)> { let active_file_idx = self.active_file_idx.load(Ordering::Acquire); let files = self.data_files.read(); - files + let mut candidates = files .iter() .filter_map(|(&file_idx, data_file)| { if file_idx == active_file_idx @@ -261,9 +261,25 @@ impl StoreInner { { return None; } - Some((file_idx, data_file.file_ordinal)) + Some(( + file_idx, + data_file.file_ordinal, + self.index_file.file_waste(file_idx), + )) }) - .min_by_key(|(_, file_ordinal)| *file_ordinal) + .collect::>(); + candidates.sort_by(|left, right| { + right + .2 + .cmp(&left.2) + .then_with(|| left.1.cmp(&right.1)) + .then_with(|| left.0.cmp(&right.0)) + }); + candidates + .into_iter() + .take(max_candidates) + .map(|(file_idx, file_ordinal, _)| (file_idx, file_ordinal)) + .collect() } fn logical_lock_index(&self, ns: KeyNamespace, key: &[u8]) -> usize { @@ -306,18 +322,14 @@ impl StoreInner { let low_shard = rows_table.shard_id(low_row_idx); let high_shard = rows_table.shard_id(high_row_idx); - - let _high_guard = if low_shard < high_shard { - None // low_row will automatically lock low_shard - } else if low_shard > high_shard { - Some(rows_table.lock_shard(high_shard)) - } else { - None - }; + debug_assert!( + low_shard <= high_shard, + "high_row_idx sets a higher bit, so high_shard >= low_shard" + ); let mut low_row = rows_table.row_mut(low_row_idx); - let _high_guard_post = if low_shard < high_shard { + let _high_guard = if low_shard < high_shard { Some(rows_table.lock_shard(high_shard)) } else { None @@ -430,14 +442,15 @@ impl StoreInner { let mut res = None; loop { - debug_assert!(sl >= MIN_SPLIT_LEVEL as u64, "sl={sl}"); let row = row_table.row_mut(hc.row_index(sl)); let row_sl = row.split_level.load(Ordering::Acquire); if row_sl == 0 { + // nonexistent row sl -= 1; continue; } if row_sl > sl { + // split happened, retry break; } @@ -460,6 +473,7 @@ impl StoreInner { .header_ref() .global_split_level .load(Ordering::Acquire); + // note: it is critical we do not hold the row's lock here self._split_row(hc, sl, gsl)?; } Err(Error::RotateDataFile(active_idx)) => { @@ -501,14 +515,15 @@ impl CandyStore { .load(Ordering::Acquire); let mut sl = gsl; loop { - debug_assert!(sl >= MIN_SPLIT_LEVEL as u64, "sl={sl}"); let row = row_table.row(hc.row_index(sl)); let row_sl = row.split_level.load(Ordering::Acquire); if row_sl == 0 { + // nonexistent row sl -= 1; continue; } if row_sl > sl { + // split happened, retry break; } return op(hc, row, key); @@ -658,6 +673,10 @@ impl CandyStore { self.inner.record_read(entry.size_hint() as u64); let kv = file.read_kv(entry.file_offset(), entry.size_hint())?; if kv.key() == key { + // optimization + if kv.value() == val { + return Ok(Some(kv.into_value())); + } let klen = kv.key().len(); let vlen = kv.value().len(); let old_val = kv.into_value(); @@ -737,6 +756,10 @@ impl CandyStore { { return Ok(ReplaceStatus::WrongValue(kv.into_value())); } + // optimization + if kv.value() == val { + return Ok(ReplaceStatus::PrevValue(kv.into_value())); + } let klen = kv.key().len(); let vlen = kv.value().len(); diff --git a/src/store/compaction.rs b/src/store/compaction.rs index f170095..2cf84fa 100644 --- a/src/store/compaction.rs +++ b/src/store/compaction.rs @@ -1,8 +1,9 @@ use std::sync::{Arc, atomic::Ordering}; use crate::{ + data_file::DataFile, index_file::EntryPointer, - internal::{EntryType, KeyNamespace, data_file_path, invalid_data_error, sync_dir}, + internal::{KeyNamespace, data_file_path, invalid_data_error, sync_dir}, pacer::Pacer, types::{Error, Result}, }; @@ -10,136 +11,200 @@ use crate::{ use super::{CandyStore, StoreInner}; pub(super) struct CompactionOutcome { + pub(super) compacted_files: u64, pub(super) reclaimed_bytes: u32, pub(super) moved_bytes: u32, } impl StoreInner { - pub(super) fn compact_file( + pub(super) fn compact_files( &self, - file_idx: u16, - expected_ordinal: u64, + candidates: &[(u16, u64)], pacer: &mut Pacer, #[cfg(windows)] pending_deletions: &mut Vec, ) -> Result { - if self.active_file_idx.load(Ordering::Acquire) == file_idx { + if candidates.is_empty() { return Ok(CompactionOutcome { + compacted_files: 0, reclaimed_bytes: 0, moved_bytes: 0, }); } - let source_file = match self.data_file(file_idx) { - Ok(f) => f, - Err(Error::MissingDataFile(_)) => { - return Ok(CompactionOutcome { - reclaimed_bytes: 0, - moved_bytes: 0, - }); - } - Err(e) => return Err(e), - }; - if source_file.file_ordinal != expected_ordinal { + let active_file_idx = self.active_file_idx.load(Ordering::Acquire); + let files = self.data_files.read(); + let sources = candidates + .iter() + .filter_map(|&(file_idx, expected_ordinal)| { + if file_idx == active_file_idx { + return None; + } + + let data_file = files.get(&file_idx)?.clone(); + if data_file.file_ordinal != expected_ordinal { + return None; + } + + Some((file_idx, data_file)) + }) + .collect::>(); + drop(files); + + if sources.is_empty() { return Ok(CompactionOutcome { + compacted_files: 0, reclaimed_bytes: 0, moved_bytes: 0, }); } - let mut offset = 0u64; let mut moved_bytes = 0u64; let mut read_buf = Vec::new(); - let mut buf_file_offset = 0u64; - let mut match_scratch = Vec::new(); + let mut row_idx = 0; loop { if self.shutting_down.load(Ordering::Acquire) { return Ok(CompactionOutcome { + compacted_files: 0, reclaimed_bytes: 0, moved_bytes: 0, }); } - let Some((kv, entry_offset, next_offset)) = - source_file.read_next_entry_ref(offset, &mut read_buf, &mut buf_file_offset)? - else { - break; + // Snapshot one row at a time, then drop the rows table read lock before any I/O. + let snapshot: Vec<(usize, EntryPointer, Arc)> = { + let rows = self.index_file.rows_table(); + let active_rows = self.index_file.num_rows(); + if row_idx >= active_rows { + break; + } + + let row = rows.row(row_idx); + row.pointers + .iter() + .enumerate() + .filter_map(|(col, &entry)| { + if !entry.is_valid() { + return None; + } + let (_, source_file) = + sources.iter().find(|(idx, _)| *idx == entry.file_idx())?; + Some((col, entry, source_file.clone())) + }) + .collect() }; - offset = next_offset; - self.record_read(next_offset - entry_offset); - pacer.consume(next_offset - entry_offset); + for (col, entry, source_file) in &snapshot { + self.record_read(entry.size_hint() as u64); + pacer.consume(entry.size_hint() as u64); - let Some(ns) = KeyNamespace::from_u8(kv.ns) else { - return Err(invalid_data_error("unknown key namespace in data file")); - }; + let kv = source_file.read_kv_into( + entry.file_offset(), + entry.size_hint(), + &mut read_buf, + )?; - if let EntryType::Data = kv.entry_type { - let key = kv.key(); - let val = kv.value(); + let Some(ns) = KeyNamespace::from_u8(kv.ns) else { + return Err(invalid_data_error("unknown key namespace in data file")); + }; - self._mut_op(ns, key, val, |hc, mut row, key, val| { + // re-acquire the write lock and verify the pointer hasn't been moved + // by a concurrent set/remove before appending + replacing + let mut rotate_idx_req = None; + loop { + if let Some(rotate_idx) = rotate_idx_req.take() { + self._rotate_data_file(rotate_idx)?; + } + + let rows = self.index_file.rows_table(); + let active_rows = self.index_file.num_rows(); + if row_idx >= active_rows { + break; + } + + let mut row = rows.row_mut(row_idx); + if row.pointers[*col] != *entry { + // a concurrent op already moved/removed this entry -- skip it + break; + } + + let active_idx = self.active_file_idx.load(Ordering::Acquire); let files = self.data_files.read(); - for (col, entry) in row.iter_matches(hc) { - let Some(file) = files.get(&entry.file_idx()) else { - continue; - }; - self.record_read(entry.size_hint() as u64); - let existing_kv = file.read_kv_into( - entry.file_offset(), - entry.size_hint(), - &mut match_scratch, - )?; - if existing_kv.key() == key { - if entry.file_idx() != file_idx || entry.file_offset() != entry_offset { - return Ok(()); - } - - let active_idx = self.active_file_idx.load(Ordering::Acquire); - let active_file = files - .get(&active_idx) - .ok_or(Error::MissingDataFile(active_idx))?; - let (file_off, size) = active_file.append_kv(ns, key, val)?; + let active_file = files + .get(&active_idx) + .ok_or(Error::MissingDataFile(active_idx))?; + + match active_file.append_kv(ns, kv.key(), kv.value()) { + Ok((file_off, size)) => { + drop(files); self.record_write(size as u64); moved_bytes = moved_bytes.saturating_add(size as u64); row.replace_pointer( - col, + *col, EntryPointer::new( active_idx, file_off, size, - hc.masked_row_selector(), + entry.masked_row_selector(), ), ); - return Ok(()); + break; + } + Err(Error::RotateDataFile(rotate_idx)) => { + drop(files); + drop(row); + rotate_idx_req = Some(rotate_idx); + } + Err(err) => { + drop(files); + return Err(err); } } - Ok(()) - })?; + } } + + row_idx += 1; } - let removed = self.data_files.write().remove(&file_idx); - drop(source_file); // MUST drop before removing file to release mmap and handle - drop(removed); // Drop any open handles to the file + let removed = { + let mut files = self.data_files.write(); + sources + .iter() + .filter_map(|(file_idx, _)| { + files + .remove(file_idx) + .map(|data_file| (*file_idx, data_file)) + }) + .collect::>() + }; + let compacted_files = removed.len() as u64; + drop(sources); + + let mut reclaimed_bytes = 0u64; + for (file_idx, data_file) in removed { + drop(data_file); + + reclaimed_bytes = + reclaimed_bytes.saturating_add(self.index_file.take_file_waste(file_idx) as u64); + + let file_path = data_file_path(self.base_path.as_path(), file_idx); + match std::fs::remove_file(&file_path) { + Ok(()) => sync_dir(self.base_path.as_path())?, + #[cfg(windows)] + Err(_) => pending_deletions.push(file_path), + #[cfg(not(windows))] + Err(err) => return Err(Error::IOError(err)), + } + } - // Take file waste regardless of whether remove succeeds, to avoid infinite 100% loop if remove fails - let reclaimed = self.index_file.take_file_waste(file_idx); self.index_file .header_ref() .reclaimed_bytes - .fetch_add(reclaimed as u64, Ordering::Relaxed); + .fetch_add(reclaimed_bytes, Ordering::Relaxed); - let file_path = data_file_path(self.base_path.as_path(), file_idx); - match std::fs::remove_file(&file_path) { - Ok(()) => sync_dir(self.base_path.as_path())?, - #[cfg(windows)] - Err(_) => pending_deletions.push(file_path), - #[cfg(not(windows))] - Err(err) => return Err(Error::IOError(err)), - } Ok(CompactionOutcome { - reclaimed_bytes: reclaimed, + compacted_files, + reclaimed_bytes: reclaimed_bytes.min(u64::from(u32::MAX)) as u32, moved_bytes: moved_bytes.min(u64::from(u32::MAX)) as u32, }) } @@ -176,8 +241,16 @@ impl CandyStore { self.inner.shutting_down.store(false, Ordering::Release); let ctx = Arc::clone(&self.inner); let thd = std::thread::spawn(move || { - let throughput_bytes_per_sec = - (ctx.config.compaction_throughput_bytes_per_sec as u64).max(1); + if ctx.config.compaction_throughput_bytes_per_sec == 0 { + // Compaction disabled — park until shutdown. + let mut state = ctx.compaction_state.lock(); + while !ctx.shutting_down.load(Ordering::Acquire) { + ctx.compaction_condvar.wait(&mut state); + } + return; + } + + let throughput_bytes_per_sec = ctx.config.compaction_throughput_bytes_per_sec as u64; let tokens_per_unit = (throughput_bytes_per_sec / 10).max(1); let burst_size = tokens_per_unit.saturating_mul(2); let mut pacer = Pacer::new( @@ -201,16 +274,19 @@ impl CandyStore { state.wake_requested = false; } - while let Some((file_idx, file_ordinal)) = ctx.next_compaction_candidate() { + loop { + let candidates = ctx.next_compaction_candidates(4); + if candidates.is_empty() { + break; + } if ctx.shutting_down.load(Ordering::Acquire) { return; } #[cfg(windows)] Self::retry_pending_deletions(&ctx, &mut pending_deletions); let t0 = std::time::Instant::now(); - let res = ctx.compact_file( - file_idx, - file_ordinal, + let res = ctx.compact_files( + &candidates, &mut pacer, #[cfg(windows)] &mut pending_deletions, @@ -222,7 +298,9 @@ impl CandyStore { .fetch_add(compaction_millis, Ordering::Relaxed); match res { Ok(outcome) => { - ctx.stats.num_compactions.fetch_add(1, Ordering::Relaxed); + ctx.stats + .num_compactions + .fetch_add(outcome.compacted_files, Ordering::Relaxed); ctx.stats .last_compaction_dur_ms .store(compaction_millis, Ordering::Relaxed); @@ -278,3 +356,245 @@ impl Drop for CandyStore { } } } + +#[cfg(test)] +mod tests { + use super::*; + + use std::{sync::Arc, thread, time::Duration}; + + use crate::{CandyStore, Config}; + + fn count_live_entries_in_file(store: &CandyStore, file_idx: u16) -> u64 { + let rows = store.inner.index_file.rows_table(); + let num_rows = store.inner.index_file.num_rows(); + let mut count = 0u64; + + for row_idx in 0..num_rows { + let row = rows.row(row_idx); + for entry in row.pointers.iter() { + if entry.is_valid() && entry.file_idx() == file_idx { + count += 1; + } + } + } + + count + } + + #[test] + fn test_compaction_reads_only_live_entries_for_target_file() -> Result<()> { + let dir = tempfile::tempdir().map_err(Error::IOError)?; + let db = CandyStore::open( + dir.path(), + Config { + max_data_file_size: 8192, + compaction_min_threshold: u32::MAX, + ..Default::default() + }, + )?; + db.stop_compaction(); + + for idx in 0..24 { + db.set("hot", format!("hot-value-{idx:02}-{}", "x".repeat(48)))?; + } + + let mut filler_idx = 0u64; + while db.inner.data_files.read().len() == 1 { + db.set( + format!("filler-{filler_idx}"), + format!("filler-value-{}", "y".repeat(48)), + )?; + filler_idx += 1; + } + + let active_idx = db.inner.active_file_idx.load(Ordering::Acquire); + let (target_idx, target_ordinal) = { + let files = db.inner.data_files.read(); + let (&target_idx, target_file) = files + .iter() + .find(|(idx, _)| **idx != active_idx) + .expect("expected a non-active file to compact"); + (target_idx, target_file.file_ordinal) + }; + + let live_entries = count_live_entries_in_file(&db, target_idx); + assert!( + live_entries > 0, + "expected live entries in the compacted file" + ); + + let before_read_ops = db.stats().num_read_ops; + let mut pacer = Pacer::new(u64::MAX / 4, Duration::from_secs(1), u64::MAX / 4); + db.inner.shutting_down.store(false, Ordering::Release); + let outcome = db.inner.compact_files( + &[(target_idx, target_ordinal)], + &mut pacer, + #[cfg(windows)] + &mut Vec::new(), + )?; + + assert_eq!(outcome.compacted_files, 1); + assert_eq!(db.stats().num_read_ops - before_read_ops, live_entries); + assert_eq!(count_live_entries_in_file(&db, target_idx), 0); + + Ok(()) + } + + #[test] + fn test_compaction_batch_reads_live_entries_for_all_target_files() -> Result<()> { + let dir = tempfile::tempdir().map_err(Error::IOError)?; + let db = CandyStore::open( + dir.path(), + Config { + max_data_file_size: 2048, + compaction_min_threshold: u32::MAX, + ..Default::default() + }, + )?; + db.stop_compaction(); + + for idx in 0..32 { + db.set(format!("hot-{idx}"), format!("seed-{}", "x".repeat(48)))?; + } + + while db.inner.data_files.read().len() < 5 { + let idx = db.stats().num_write_ops; + db.set( + format!("roll-{idx}"), + format!("roll-value-{}", "y".repeat(48)), + )?; + } + + let pre_compaction_active_idx = db.inner.active_file_idx.load(Ordering::Acquire); + db.inner._rotate_data_file(pre_compaction_active_idx)?; + + let active_idx = db.inner.active_file_idx.load(Ordering::Acquire); + let targets = { + let files = db.inner.data_files.read(); + let mut target_files = files + .iter() + .filter(|(file_idx, _)| **file_idx != active_idx) + .map(|(&file_idx, data_file)| (file_idx, data_file.file_ordinal)) + .collect::>(); + target_files.sort_by_key(|(file_idx, _)| *file_idx); + target_files.truncate(4); + target_files + }; + assert_eq!(targets.len(), 4); + + let live_entries = targets + .iter() + .map(|(file_idx, _)| count_live_entries_in_file(&db, *file_idx)) + .sum::(); + assert!(live_entries > 0); + + let before_read_ops = db.stats().num_read_ops; + let mut pacer = Pacer::new(u64::MAX / 4, Duration::from_secs(1), u64::MAX / 4); + db.inner.shutting_down.store(false, Ordering::Release); + let outcome = db.inner.compact_files( + &targets, + &mut pacer, + #[cfg(windows)] + &mut Vec::new(), + )?; + + assert_eq!(outcome.compacted_files, 4); + assert_eq!(db.stats().num_read_ops - before_read_ops, live_entries); + for (file_idx, _) in targets { + assert_eq!(count_live_entries_in_file(&db, file_idx), 0); + } + + Ok(()) + } + + #[test] + fn test_compaction_allows_concurrent_index_growth() -> Result<()> { + let dir = tempfile::tempdir().map_err(Error::IOError)?; + let db = Arc::new(CandyStore::open( + dir.path(), + Config { + initial_capacity: 16, + remap_scaler: 1, + max_data_file_size: 64 * 1024, + compaction_min_threshold: u32::MAX, + ..Default::default() + }, + )?); + db.stop_compaction(); + + let mut expected = Vec::new(); + for idx in 0..64u32 { + let key = format!("seed-{idx:04}"); + let value = format!("seed-value-{idx:04}-{}", "x".repeat(768)); + db.set(&key, &value)?; + expected.push((key, value.into_bytes())); + } + + let target_idx = db.inner.active_file_idx.load(Ordering::Acquire); + let target_ordinal = { + let files = db.inner.data_files.read(); + files + .get(&target_idx) + .expect("target file should exist before rotation") + .file_ordinal + }; + db.inner._rotate_data_file(target_idx)?; + + let live_entries = count_live_entries_in_file(&db, target_idx); + assert!( + live_entries >= 8, + "expected a file with enough live entries to slow compaction" + ); + + let rows_before = db.inner.index_file.num_rows(); + let db_for_compaction = Arc::clone(&db); + let compaction_handle = thread::spawn(move || { + let mut pacer = Pacer::new(256, Duration::from_millis(10), 256); + db_for_compaction + .inner + .shutting_down + .store(false, Ordering::Release); + db_for_compaction.inner.compact_files( + &[(target_idx, target_ordinal)], + &mut pacer, + #[cfg(windows)] + &mut Vec::new(), + ) + }); + + let mut grew = false; + for idx in 0..20_000u32 { + let key = format!("grow-{idx:04}"); + let value = format!("grow-value-{idx:04}-{}", "y".repeat(96)); + db.set(&key, &value)?; + expected.push((key, value.into_bytes())); + + if db.inner.index_file.num_rows() > rows_before { + grew = true; + break; + } + } + + let outcome = compaction_handle + .join() + .expect("compaction thread panicked")?; + assert_eq!(outcome.compacted_files, 1); + assert!( + grew, + "expected concurrent writes to force index growth during compaction" + ); + assert!(db.inner.index_file.num_rows() > rows_before); + assert_eq!(count_live_entries_in_file(&db, target_idx), 0); + + for (key, value) in expected { + assert_eq!( + db.get(&key)?, + Some(value), + "key {key} should remain readable" + ); + } + + Ok(()) + } +} diff --git a/src/store/open.rs b/src/store/open.rs index f9d8a08..9028156 100644 --- a/src/store/open.rs +++ b/src/store/open.rs @@ -233,8 +233,28 @@ impl CandyStore { }; if !was_clean_shutdown { + let header = store.inner.index_file.header_ref(); + let has_pending_rebuild = header + .rebuild_checkpoint_ordinal + .load(std::sync::atomic::Ordering::Acquire) + != 0 + || header + .rebuild_checkpoint_ptr + .load(std::sync::atomic::Ordering::Acquire) + != 0 + || header + .rebuild_checkpoint_checksum + .load(std::sync::atomic::Ordering::Acquire) + != 0; + match dirty_open_action { - DirtyOpenAction::None | DirtyOpenAction::ResetDb | DirtyOpenAction::TrustIndex => {} + DirtyOpenAction::None | DirtyOpenAction::ResetDb => {} + // A pending checkpoint means rebuild was interrupted — resume it + // regardless of whether the strategy would trust the index. + DirtyOpenAction::TrustIndex if has_pending_rebuild => { + store.recover_index()?; + } + DirtyOpenAction::TrustIndex => {} DirtyOpenAction::RebuildIndex => store.recover_index()?, } store diff --git a/src/store/recovery.rs b/src/store/recovery.rs index 5d9fbf0..1248d3c 100644 --- a/src/store/recovery.rs +++ b/src/store/recovery.rs @@ -1,4 +1,4 @@ -use std::sync::{Arc, atomic::Ordering}; +use std::sync::{Arc, atomic::AtomicU64, atomic::Ordering}; use crate::{ data_file::DataFile, @@ -12,20 +12,100 @@ use crate::{ use super::CandyStore; +static RECOVERED_ENTRIES_FOR_ABORT_TEST: AtomicU64 = AtomicU64::new(0); + impl CandyStore { - pub(super) fn recover_index(&self) -> Result<()> { - let row_table = self.inner.index_file.rows_table_mut(); - self.inner.index_file.reset(row_table)?; + /// How many entries to process before flushing a mid-file checkpoint. + const REBUILD_CHECKPOINT_INTERVAL: u64 = 1000; + + fn rebuild_checkpoint_checksum(ordinal: u64, ptr: u64) -> u64 { + ordinal.rotate_left(17) ^ ptr.rotate_right(11) ^ 0x5a17_b1d2_c3e4_f607 + } + + fn encode_rebuild_checkpoint_ptr(file_idx: u16, file_offset: u64) -> u64 { + let fi = (file_idx as u64) & ((1 << 12) - 1); + let fo = (file_offset / FILE_OFFSET_ALIGNMENT) << 12; + fi | fo + } + + fn decode_rebuild_checkpoint_ptr(ptr: u64) -> (u16, u64) { + let file_idx = (ptr & ((1 << 12) - 1)) as u16; + let file_offset = (ptr >> 12) * FILE_OFFSET_ALIGNMENT; + (file_idx, file_offset) + } + + fn read_rebuild_checkpoint(&self) -> Option<(u64, u16, u64)> { + let header = self.inner.index_file.header_ref(); + let ordinal = header.rebuild_checkpoint_ordinal.load(Ordering::Acquire); + let ptr = header.rebuild_checkpoint_ptr.load(Ordering::Acquire); + let checksum = header.rebuild_checkpoint_checksum.load(Ordering::Acquire); + + if ordinal == 0 && ptr == 0 && checksum == 0 { + return None; + } + + if checksum != Self::rebuild_checkpoint_checksum(ordinal, ptr) { + return None; + } + + let (file_idx, file_offset) = Self::decode_rebuild_checkpoint_ptr(ptr); + Some((ordinal, file_idx, file_offset)) + } + + fn maybe_abort_rebuild_for_testing(&self) { + let Ok(after) = std::env::var("CANDYSTORE_ABORT_REBUILD_AFTER") else { + return; + }; + let Ok(after) = after.parse::() else { + return; + }; + if after == 0 { + return; + } + + let recovered = RECOVERED_ENTRIES_FOR_ABORT_TEST.fetch_add(1, Ordering::Relaxed) + 1; + if recovered >= after { + std::process::abort(); + } + } + pub(super) fn recover_index(&self) -> Result<()> { let mut sorted_files: Vec> = self.inner.data_files.read().values().cloned().collect(); sorted_files.sort_by_key(|df| df.file_ordinal); + let checkpoint = + self.read_rebuild_checkpoint() + .and_then(|(ordinal, file_idx, file_offset)| { + sorted_files + .iter() + .find(|df| df.file_idx == file_idx && df.file_ordinal == ordinal) + .map(|_| (ordinal, file_idx, file_offset)) + }); + + if checkpoint.is_none() { + // No previous progress, checkpoint corruption, or the checkpointed + // file no longer matches the persisted stable identity — full rebuild. + let row_table = self.inner.index_file.rows_table_mut(); + self.inner.index_file.reset(row_table)?; + } + for data_file in &sorted_files { - let mut offset = 0u64; + let start_offset = if checkpoint.is_some_and(|(ordinal, file_idx, _)| { + data_file.file_ordinal == ordinal && data_file.file_idx == file_idx + }) { + checkpoint.unwrap().2 + } else if checkpoint.is_some_and(|(ordinal, _, _)| data_file.file_ordinal < ordinal) { + continue; + } else { + 0 + }; + + let mut offset = start_offset; let mut read_buf = Vec::new(); let mut buf_file_offset = 0u64; let mut match_scratch = Vec::new(); + let mut entries_since_checkpoint = 0u64; loop { let Some((kv, entry_offset, next_offset)) = data_file.read_next_entry_ref(offset, &mut read_buf, &mut buf_file_offset)? @@ -39,12 +119,59 @@ impl CandyStore { }; self.recover_entry(data_file, ns, kv, entry_offset, &mut match_scratch)?; + self.maybe_abort_rebuild_for_testing(); + + entries_since_checkpoint += 1; + if entries_since_checkpoint >= Self::REBUILD_CHECKPOINT_INTERVAL { + self.flush_rebuild_checkpoint( + data_file.file_ordinal, + data_file.file_idx, + offset, + )?; + entries_since_checkpoint = 0; + } } + + // File fully processed — keep progress at this file's final offset. + self.flush_rebuild_checkpoint(data_file.file_ordinal, data_file.file_idx, offset)?; } + // Rebuild complete — clear checkpoint. + self.clear_rebuild_checkpoint()?; + Ok(()) } + fn flush_rebuild_checkpoint(&self, ordinal: u64, file_idx: u16, offset: u64) -> Result<()> { + self.inner.index_file.flush_rows()?; + + let ptr = Self::encode_rebuild_checkpoint_ptr(file_idx, offset); + let checksum = Self::rebuild_checkpoint_checksum(ordinal, ptr); + let header = self.inner.index_file.header_ref(); + header + .rebuild_checkpoint_ordinal + .store(ordinal, Ordering::Release); + header.rebuild_checkpoint_ptr.store(ptr, Ordering::Release); + header + .rebuild_checkpoint_checksum + .store(checksum, Ordering::Release); + self.inner.index_file.flush_header() + } + + fn clear_rebuild_checkpoint(&self) -> Result<()> { + self.inner.index_file.flush_rows()?; + + let header = self.inner.index_file.header_ref(); + header + .rebuild_checkpoint_ordinal + .store(0, Ordering::Release); + header.rebuild_checkpoint_ptr.store(0, Ordering::Release); + header + .rebuild_checkpoint_checksum + .store(0, Ordering::Release); + self.inner.index_file.flush_header() + } + fn recover_entry( &self, data_file: &Arc, @@ -94,6 +221,13 @@ impl CandyStore { let existing_kv = file.read_kv_into(entry.file_offset(), entry.size_hint(), match_scratch)?; if existing_kv.key() == key { + if entry == ptr { + // Resuming from a crash during rebuild may subject some already-processed + // entries to multiple iterations. If the row pointer is already identical, + // this entry was fully processed and we can skip it. + return Ok(()); + } + let old_size = aligned_data_entry_size(existing_kv.key().len(), existing_kv.value().len()); self.record_recovered_waste( diff --git a/src/types.rs b/src/types.rs index daa7eb1..64393a9 100644 --- a/src/types.rs +++ b/src/types.rs @@ -77,7 +77,7 @@ impl Default for Config { max_concurrency: (2 * num_cpus::get()).clamp(16, 64), reset_on_invalid_data: false, compaction_throughput_bytes_per_sec: 4 * 1024 * 1024, - rebuild_strategy: RebuildStrategy::FailIfDirty, + rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrRebuild, } } } diff --git a/tests/compaction.rs b/tests/compaction.rs index 0819154..e7a3603 100644 --- a/tests/compaction.rs +++ b/tests/compaction.rs @@ -78,9 +78,14 @@ fn test_background_compaction_after_reopen_without_writes() -> Result<(), Error> compaction_min_threshold: 256, ..Config::default() }; + let write_config = Config { + compaction_throughput_bytes_per_sec: 0, + ..config + }; + let files_before; { - let db = CandyStore::open(dir.path(), config)?; + let db = CandyStore::open(dir.path(), write_config)?; for i in 0..200 { db.set(format!("key{i:04}"), vec![b'a'; 64])?; @@ -90,22 +95,21 @@ fn test_background_compaction_after_reopen_without_writes() -> Result<(), Error> assert_eq!(db.remove(format!("key{i:04}"))?, Some(vec![b'a'; 64])); } - assert!( - std::fs::read_dir(dir.path()) - .unwrap() - .filter_map(|e| e.ok()) - .filter(|e| e - .file_name() + files_before = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(|e| e.ok()) + .filter(|e| { + e.file_name() .to_str() - .is_some_and(|s| s.starts_with("data_"))) - .count() - > 1, - "expected multiple data files before reopen" + .is_some_and(|s| s.starts_with("data_")) + }) + .count(); + assert!( + files_before > 1, + "expected multiple data files before close" ); } - let db = CandyStore::open(dir.path(), config)?; - let count_data_files = || -> usize { std::fs::read_dir(dir.path()) .unwrap() @@ -118,8 +122,7 @@ fn test_background_compaction_after_reopen_without_writes() -> Result<(), Error> .count() }; - let files_before = count_data_files(); - assert!(files_before > 1, "expected compaction backlog after reopen"); + let db = CandyStore::open(dir.path(), config)?; for _ in 0..100 { std::thread::sleep(std::time::Duration::from_millis(10)); diff --git a/tests/crasher.rs b/tests/crasher.rs index 2da9658..a051aeb 100644 --- a/tests/crasher.rs +++ b/tests/crasher.rs @@ -25,6 +25,7 @@ fn get_config() -> Config { initial_capacity: 1024, max_concurrency: 64, rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrFail, + //rebuild_strategy: RebuildStrategy::RebuildIfDirty, ..Default::default() } } diff --git a/tests/recovery.rs b/tests/recovery.rs index 9eea297..94ce96e 100644 --- a/tests/recovery.rs +++ b/tests/recovery.rs @@ -67,6 +67,55 @@ fn rewrite_data_file_ordinal( Ok(()) } +fn rebuild_checkpoint_checksum(ordinal: u64, ptr: u64) -> u64 { + ordinal.rotate_left(17) ^ ptr.rotate_right(11) ^ 0x5a17_b1d2_c3e4_f607 +} + +fn encode_rebuild_checkpoint_ptr(file_idx: u16, file_offset: u64) -> u64 { + let fi = (file_idx as u64) & ((1 << 12) - 1); + let fo = (file_offset / 16) << 12; + fi | fo +} + +fn write_rebuild_checkpoint( + dir: &std::path::Path, + ordinal: u64, + file_idx: u16, + file_offset: u64, +) -> Result<(), Error> { + let ptr = encode_rebuild_checkpoint_ptr(file_idx, file_offset); + let checksum = rebuild_checkpoint_checksum(ordinal, ptr); + + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(dir.join("index")) + .map_err(Error::IOError)?; + + file.seek(SeekFrom::Start(40)).map_err(Error::IOError)?; + file.write_all(&ordinal.to_le_bytes()) + .map_err(Error::IOError)?; + file.write_all(&ptr.to_le_bytes()).map_err(Error::IOError)?; + file.write_all(&checksum.to_le_bytes()) + .map_err(Error::IOError)?; + file.sync_all().map_err(Error::IOError)?; + Ok(()) +} + +fn corrupt_rebuild_checkpoint_checksum(dir: &std::path::Path) -> Result<(), Error> { + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(dir.join("index")) + .map_err(Error::IOError)?; + + file.seek(SeekFrom::Start(56)).map_err(Error::IOError)?; + file.write_all(&0xdead_beef_dead_beefu64.to_le_bytes()) + .map_err(Error::IOError)?; + file.sync_all().map_err(Error::IOError)?; + Ok(()) +} + #[test] fn test_clean_shutdown_flag() -> Result<(), Error> { let dir = tempdir().unwrap(); @@ -576,6 +625,10 @@ fn test_rebuild_if_dirty_recovers_typed_data() -> Result<(), Error> { #[test] fn test_fail_if_dirty_rejects_reopen() -> Result<(), Error> { let dir = tempdir().unwrap(); + let fail_config = Config { + rebuild_strategy: RebuildStrategy::FailIfDirty, + ..Config::default() + }; { let db = CandyStore::open(dir.path(), Config::default())?; @@ -584,7 +637,7 @@ fn test_fail_if_dirty_rejects_reopen() -> Result<(), Error> { } assert!(matches!( - CandyStore::open(dir.path(), Config::default()), + CandyStore::open(dir.path(), fail_config), Err(Error::DirtyIndex) )); @@ -786,3 +839,240 @@ fn test_recover_from_truncated_data_file() -> Result<(), Box Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 1024, + rebuild_strategy: RebuildStrategy::RebuildIfDirty, + ..Config::default() + }; + + // Phase 1: write data across multiple files, then crash. + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..100 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db._abort_for_testing(); + } + + // Phase 2: reopen triggers rebuild. Verify all data survived. + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..100 { + assert_eq!( + db.get(format!("key{i:04}"))?, + Some(format!("val{i:04}").into_bytes()), + "key{i:04} missing after full rebuild" + ); + } + // Clean shutdown — checkpoint should be 0 now. + } + + Ok(()) +} + +#[test] +fn test_progressive_rebuild_survives_interrupted_rebuild() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 1024, + rebuild_strategy: RebuildStrategy::RebuildIfDirty, + ..Config::default() + }; + + // Phase 1: write data, crash. + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..100 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db._abort_for_testing(); + } + + // Phase 2: reopen triggers rebuild which completes. Then crash again. + { + let db = CandyStore::open(dir.path(), config)?; + // Write more data on top of the rebuilt index. + for i in 100..150 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db._abort_for_testing(); + } + + // Phase 3: another rebuild — should rebuild from scratch (checkpoint was + // cleared after phase 2's successful rebuild) and recover everything. + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..150 { + assert_eq!( + db.get(format!("key{i:04}"))?, + Some(format!("val{i:04}").into_bytes()), + "key{i:04} missing after second rebuild" + ); + } + } + + Ok(()) +} + +#[test] +fn test_progressive_rebuild_with_trust_strategy_resumes_pending() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 1024, + rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrRebuild, + ..Config::default() + }; + + // Phase 1: write data, crash. + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..100 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db._abort_for_testing(); + } + + // Phase 2: reopen triggers rebuild (checksums wrong after crash). + // Rebuild completes. Write more, then crash. + { + let db = CandyStore::open(dir.path(), config)?; + for i in 100..200 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db._abort_for_testing(); + } + + // Phase 3: reopen. Checksums may pass (TrustIndex), but if there's a + // pending checkpoint, rebuild must resume. Since phase 2's rebuild + // completed (checkpoint cleared), and we crashed after new writes, + // the trust-checksums path should handle it. + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..200 { + assert_eq!( + db.get(format!("key{i:04}"))?, + Some(format!("val{i:04}").into_bytes()), + "key{i:04} missing after trust-or-rebuild" + ); + } + } + + Ok(()) +} + +#[test] +fn test_progressive_rebuild_restarts_on_missing_checkpoint_file() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 1024, + rebuild_strategy: RebuildStrategy::RebuildIfDirty, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..100 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db._abort_for_testing(); + } + + write_rebuild_checkpoint(dir.path(), u64::MAX - 7, 7, 0)?; + + let db = CandyStore::open(dir.path(), config)?; + for i in 0..100 { + assert_eq!( + db.get(format!("key{i:04}"))?, + Some(format!("val{i:04}").into_bytes()), + "key{i:04} missing after restart-from-scratch rebuild" + ); + } + + Ok(()) +} + +#[test] +fn test_progressive_rebuild_restarts_on_corrupt_checkpoint_tuple() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 1024, + rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrRebuild, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..100 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db._abort_for_testing(); + } + + write_rebuild_checkpoint(dir.path(), 0x1234_5678_9abc_def0, 3, 128)?; + corrupt_rebuild_checkpoint_checksum(dir.path())?; + + let db = CandyStore::open(dir.path(), config)?; + for i in 0..100 { + assert_eq!( + db.get(format!("key{i:04}"))?, + Some(format!("val{i:04}").into_bytes()), + "key{i:04} missing after rebuild with corrupt checkpoint" + ); + } + + Ok(()) +} + +#[cfg(unix)] +#[test] +fn test_progressive_rebuild_resumes_after_real_mid_rebuild_crash() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 256 * 1024, + rebuild_strategy: RebuildStrategy::RebuildIfDirty, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..3000u32 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db._abort_for_testing(); + } + + let pid = unsafe { libc::fork() }; + assert!(pid >= 0); + if pid == 0 { + unsafe { + libc::setenv( + c"CANDYSTORE_ABORT_REBUILD_AFTER".as_ptr(), + c"1200".as_ptr(), + 1, + ); + } + let _ = CandyStore::open(dir.path(), config); + unsafe { libc::_exit(0) }; + } + + let mut status = 0i32; + let wait_rc = unsafe { libc::waitpid(pid, &mut status, 0) }; + assert_eq!(wait_rc, pid); + assert!(libc::WIFSIGNALED(status)); + assert_eq!(libc::WTERMSIG(status), libc::SIGABRT); + + let db = CandyStore::open(dir.path(), config)?; + for i in 0..3000u32 { + assert_eq!( + db.get(format!("key{i:04}"))?, + Some(format!("val{i:04}").into_bytes()), + "key{i:04} missing after resume-from-offset rebuild" + ); + } + + Ok(()) +} From c4611edafbc64c2ff47a743cbc898f6b86f2bfef Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Thu, 26 Mar 2026 22:54:01 +0200 Subject: [PATCH 05/25] readme --- README.md | 165 ++++++++++++++++++++++++++++++++++++++++------- algo.png | Bin 0 -> 91019 bytes examples/perf.rs | 32 +++++++-- split.png | Bin 0 -> 128427 bytes 4 files changed, 167 insertions(+), 30 deletions(-) create mode 100644 algo.png create mode 100644 split.png diff --git a/README.md b/README.md index cf2b873..9d93906 100644 --- a/README.md +++ b/README.md @@ -8,38 +8,153 @@ insert, and removal — are O(1). |-----------|--------| | Lookup | < 1us | | Insert | < 2us | -| Removal | < 1us | +| Removal | < 2us | -The algorithm can be thought of as a "zero-overhead" extension to a hash table stored over -files, designed to minimize IO operations. See [how to interpret the results\*](#how-to-interpret-the-performance-results). +On my laptop (32 core AMD RYZEN AI MAX+ 395 with 64GB RAM, running Ubuntu 25.10 kernel `6.17.0-19-generic`) I'm getting -## Overview +```bash +$ cargo run --release --example perf +Testing key-value using 1 threads, each with 1000000 items (key size: 16, value size: 16) + Inserts: 0.539149 us/op + Positive Lookups: 0.298013 us/op + Negative Lookups: 0.044203 us/op + Removes: 0.573369 us/op +``` -## How to Interpret the Performance Results +See [how to interpret the results\*](#how-to-interpret-the-performance-results). -While the numbers above are incredible, it is obvious that any file-backed store will be -limited by the filesystem's latency and bandwidth. For example, you can expect a read -latency of 20-100us from SSDs (NVMe), so that's the lower bound on reading a random -location in the file. +## APIs + +Candy offers +* A simple key-value API (`get`, `set`, `remove` and atomic operations like `replace`) +* A typed interface on top (`get`, `set`, etc.) +* Double-ended queues (`push_to_queue_tail`, `pop_queue_head`, etc.) as well as a typed interface on top of them +* Lists (`get_from_list`, `set_in_list`, etc.) as well as a typed interface on top of them +* The DB is completely thread-safe in idiomatic Rust (just `Arc<>` it) + +```rust +use candystore::{CandyStore, Config, Result}; + +fn main() -> Result<()> { + let db = CandyStore::open("/path/to/db", Config::default())?; + + db.set("hello", "world")?; + let val = db.get("hello")?; + assert_eq!(val, Some(b"world".to_vec())); + db.remove("hello")?; + + db.set_in_list("cities", "barcelona", "")?; + db.set_in_list("cities", "chicago", "")?; + db.set_in_list("cities", "caracas", "")?; + + let cities: Vec = db.iter_list("cities") + .map(|res| String::from_utf8(res.unwrap().0).unwrap()) + .collect(); + + assert_eq!(cities, vec!["barcelona", "chicago", "caracas"]); + + Ok(()) +} +``` + +## Algorithm + +The algorithm can be thought of as a "zero-overhead extension" to a hash table stored over +files, designed to minimize IO operations. It does not employ a WAL or a journal, and instead +uses append-only files that serve both as a source of truth and as the final data structure. +Unlike LSM-based stores that need to maintain large SSTables in memory, sort them and later +merge them, Candy uses a small mmap'ed index that points to on-disk data directly. + +![Algorithm](algo.png) + +The core of the algorithm is the concept of *hash coordinates*: breaking up a 64-bit hash +into two 32-bit values, a *row selector* and a *signature*, which can be thought of +as coordinates into the rows table. First, the row selector is used to locate the relevant +row in the table, and the signature locates the column. To find the row, we take the row +selector's bits and mask them with a *split level mask*, essentially, the number of rows +in the table. + +To locate the signature (32 bits) within the row, we employ a parallel lookup (using SIMD) +to find the matching column(s). Then we fetch the corresponding pointers for the matched +columns, from which we extract another 18 bits of entropy. If both match, we fetch the entry +from the relevant file (the pointer stores a file index and a file offset). + +Note: the chances of a collision (meaning we fetch a wrong entry from the file) are +virtually zero, about 10^-10 according to the birthday paradox (a collision in 336 uniformly-distributed +50-bits numbers). -What the numbers above measure is the performance of the *algorithm*, not the *storage*: -given you can spare an overhead of 0.6% mapped into memory, lookup/insert/removal require -a single disk IO. Replacing (updating) an existing element requires two IOs, since it needs -to compare the key before writing it anew. These IOs may return from the kernel's page -cache, in which case it's practically immediate, or from disk, in which case you can expect -it to take 1-2 round-trip times of your device. +Candy supports up to 4096 files, each up to 1GB in size (a span of 4TB). In terms of key-space, +Candy allows 2^21 rows, each with 336 keys, so a total of 704M keys. The maximum size of a key +is 16KB and the maximum size of a value is 64KB. Of course these are theoretical limits, +it would be wise to halve them in practice due to imbalances. -Inserting to/removing from lists require 2-3 IOs, since these operations need to update -the list's head or tail, as well as a "chain" element. Such operations should really be done -with a "large enough page cache". Updating/fetching an existing element in a list is a -single IO as above. +### Splitting -If your memory is too constrained for keeping the lookup tables mapped-in (i.e., they get -evicted to disk), you'll incur one more unit of "IO latency" for fetching the row from the -table. Since the row spans 4KB, it should behave nicely with 4K IOs. +What happens when a row reaches its limit of 336 keys? We need to split it, of course. +To do that, we increase the row's split level by one, which means we take an extra bit +into account when selecting the row. For example, row 2 (0b010) will be split into +rows 2 (0b0010) and 10 (0b1010). Because the bits are uniformly distributed, we expect about +half of the entries to move from row 2 to row 10. -## Design +![Split](split.png) + +Note that splitting may require increasing the global split level (the size of the table) +which will incur doubling the mmap's size. This may sound like a costly operation, but since +it's file-backed it's mostly only page-table work, and it's amortized. And since we only +split a single row -- we do not need to rehash the whole table -- the amount of work we +do is O(1). + +Another optimization is that the pointer contains 18 bits of the row selector, which means +we do not need to read and recompute the hash coordinates of the keys. Splitting is thus +memory-bound. + +### Compaction + +Data is always written (appended) to the *active data file*. When a file reaches a certain size, +Candy rotates the active files and creates a new one. The old file becomes an *immutable data +file*. + +As data is created, updated and removed, the store accumulates waste. To handle it we have +*background compaction*: a thread that iterates over the rows table, finds all entries that +belong to files that should be compacted and rewrites them to the active file. After such +a pass, it simply deletes the old immutable file since no entry points to it. + +You can configure the throughput (bytes per second) of compaction. + +### Rebuild + +We trust the operating system to flush the data files and mmap'ed rows table to storage, +which means that even if your process crashes, your data will be fully consistent. However, +this is not true on a power failure or a kernel panic -- in which case the state of the +index file is completely unknown. In such cases Candy has a rebuild mechanism that essentially +wipes the rows table clean and reads all the data files in order, replaying every set/remove +as it happened, and achieves a consistent state. + +## Design Goals + +Unlike many key-value stores, Candy serves the purpose of *reducing* the memory footprint of your +process, e.g., offloading data to the disk instead of keeping it in-memory. It intentionally does not +include any caching/LRU layer like many traditional KVs/DBs. + +Example use cases for Candy are +* A hash table that needs to hold more keys than you can hold in memory +* Persistent work queues (e.g., producers append large work items to a queue and consumers then fetch + and perform them) +* A caching layer for your application logic + +It is designed to be durable for process crashes (where the kernel will flush everything properly) +but it does not attempt to optimize for durability under kernel panics (full rebuild). + +## How to Interpret the Performance Results + +While the numbers shown above are incredible, it is obvious that any persistent store will be +limited by the underlying latency and bandwidth of the storage. For example, you can expect a +read latency of 20-100us from SSDs (NVMe), so that's the lower bound on reading a random +location in the file. -See `DESIGN.md` for the full storage layout, locking model, rebuild path, collection -semantics, and compaction strategy. \ No newline at end of file +What the numbers above measure is the performance of the *algorithm*, not the *storage layer*: +given the index can be kept mapped into memory (12 bytes per item), lookup and insert require +a single disk IO, while updating or removing requires two IOs. These IOs may be served +from the kernel's page cache, in which case you only pay for the syscall's latency, or +from disk, in which case you can expect it to take 1-2 round-trip times of your device. diff --git a/algo.png b/algo.png new file mode 100644 index 0000000000000000000000000000000000000000..c374baac3a9aa6b90027ea2eeadfa754cc20f438 GIT binary patch literal 91019 zcmd43Ra{k3*DnmBfC>VF5)vXIASGRbfKt*ZNGsCaog#u#lG5GXodVL`-Q5jR-`LOd zzUTK{o$KR;3VXBmT62#0$C&((l@`UkM|2Mf2?e*4i{!A#fM?z5#1lAfuVi4KF+7fT%-Q!9Nl>s^$3e#Flr z-Xv_P^V!Ud|Xo+uCHxqpw)5=XHEy-Y1Nqw*Sb08^DkuxjznsmakWLeIT zA&X7!o4hK0p)XZ9bvQO<_%~z95N%yoH$SIt%8(Cie@?F){Ua+kEA9R+#qD0jS+P-v zw12}ELMQ*vUvKUvGwj^@pD*A$c;9cc|DRXoo{7r}|DV^Wg8v_WDe0S-xcKLR6md4w zsR+4bkwbu`U+p9A7oL2Rp!mgW4PJ1nfMnR3afD!Jjf&!r2@=Y`_jt{wgztv9uY zJol~kc`KbgJdhzrQBzYxqmYI{N=m9iwI?H5WV8Q?*XzT)yXhw2?%po{4Q`=LwS%GBRkS{s|%?BB%CKK=1O!$t66p+4s|Ni*Hf8vFjG6SAm2oG0JPjAju(LKre z{GcmdAYQjS3I`v*btG3ccSTQEw@APLnd8aUkHA2zy1F`yCmeozd*<28f0~*MX6s%V zjpWFsUmXp~u$fGRx?G%O4kEU3sXe4PjERpg26knC!Ix>E%tUo#G+!o`$A-&lr3;0u znhFB>shfvSMg|T4_1PC5yB)vsa!!9DcCEHwpD*D~lo$xZ59;?P z-GOLX`ggopxg*+sMDro*eVh)D%Rja+tpTYzo#AI^XM2~2z4#fP12t+j_BfDxxhkcW zCc##FbAnN927jX1jn0loH4Ub#*z;=+?jgZarOCwepZ)u%qH|Moz7lP{JrTA!n$N1; zhE2q2+O;!XEiNOYw=tAiqTl~jNz-L`<@#i2y0a~a!eBTn%+XZfY|hhI>dn`$Uw!=i zmbxNYov)5ZH}r6@u^UPZ2KzRT9pXH{V?McMezG;r_xq6$`o4BhfRGm=3zFWiOixV} zbr%;GQ#^g@o-7*pkc=!}y0@!Kd#vE|)>I{~jEoGA^;$!v)#_N8$rGoOt0T zW~J{;nsr^8ZdWZZ1szhGr_DpSbbMu>9C@AB#PMk;( z-Cy<>S`-^SON~Yc>Rg;KP*GINj1lFFbH-j66Qetp_jlL}rH91C7!7f+oP2)#NVHt; z*r?O(iD`GcxzIeWjgEd|VPTP@Rw@6gJT5K{DJ3oKRz~M<2G#J5;cVPTk6L5|uA1Qz z5v+RBd98|>@>E+}TNSQXJdU;3L2U^*usiy{Brzc+k|Jor>{AcYD=ZdK;UO=fVCb_E zsg#@M_CyP~@$=g4G*3=OlknOWd}_P}>qWqAN$FS{2e&b6=v4=g=gzH){Pg9^pu^Q( z6!V=aZX~TQUyw5tbEB#Pp=jdb;k_9z(mCJA&gX*C^jtb-E<}bOxvMcaDMeeDMvC9K z(&3Q$%U?fo1_pn4)*`!I4ahcb`@N@-AI~T#n&>Ws2(JfcXX6p~07VJ{aIC}t7di(b zJDd-@Ik|UaxV?$|satilw6r3aKi`5f4=-aA5VXPWr78BM%fxF?M&5ySN9k&GpQ4#f zpRTq$T!}Wzx-yrH460?=5ZiE1Lk(|Q14wd5 zkjDzYcoQWM2)Ob+mx-fXp6!RyDtupC(+?o#zV#Xw3#r2SoW-3d z>SuX*xypE*PLkaJ0uCECB?3sJ{FN}lY(5NzkRMY+@ zz4e8Do(Ki3EAEwQw~DNY9C8lJ-OvOPXfK({#c4d#las2LJ(uUJ@f()Pw2$_u=+&$6 zYBhTI=9`8wZ~)>+r#H2<-0>Gtpd`Fww=;#@(BJ=ZbUE~Jb$h!zMLZ-!rIhYdPtqCm z(Bad&cke28qBtM4KZi1w5&6Qb_V_WB+YP<^9F#psAf%PF7rS*AdH-G=9UoJ%vf}0{ z8nL42!Xrb``hbaNvojS57y-Ee`VwESr{!hGIRat5j*3TMjTVtit{2_!ta(V7YL6by z%*?Rq_aV8txt0F77r}}GA%Zl-yY>?zS1w(e!FUlBRB%WL zx6KCL&d!cCd=emrkeHYp4wJo;6UFl5t>WGy(`g>~{1&K6Uh(l{-BIkxAF>rCV|j+N zG9~2NWMrX8lekwti?xYRT)wQVuGUlSRZM&Pn^7G}OhRJeWP6f>_2~ibkStHGF|-vx z8Wfb2-)uHVaBy)wLqc$8T`w4hvz2tgsS)vXuE^d2UTf_A9^ond%8UHNcEsMDZ zWOh#)&JK>S6?#~(+hYI0n=~8}ZkQ;j7@y#-rz$KwUpbxhK_887ilhs(LqkK0w)+z% zQLB^MJ39IsDyS7;8tHg`|NQ)XmHq%9A6ZYL}qOi=GNAf3=9mdlKwGi zs`61@7{pw%xkpZpjx4rYAHQP|3Y{M;A5Mw5(g?=a&>eOZ(ya<|qo$&aUA0i z*NkwxUGq_g{3z1tlus18lYR9|}QBjFbIghWK3kCA4s3}q@H zjemct*pr%TV>(@RXDQYzI5;>dAS;+X;u&D&c51=@mi(5Mmieu%E>SX<=C60|n_F3Z zfp!av-j|i6QEh87H8sUdskV2gn*0?&gQ{N*$+!7p2m(g44iUer*c3Q*IlBHO$R}n=D zOK4o?&6_uQgp|sjc;e#XNE^%r9fdTlx-T+;|#bO`1LQs1fG*@oJS{y!l2P397$k)g(9z0dp# zy&2Nrji~5gjf0iVIn=0^PAA5QXt=(*Kt7!Q2c2cFi?tUU7grd_%g_T-->;Ux`O{aP z1M=W^`G>TU_@?R4AD^E;wT{;MA)Pcx9*!0T=Nen}#=Y8bxP|DWta?4SKdtp8a)*m& zHm{EKio#3C5wD!eq8gE~NlAM;nd-ZM-zeAETLf&bDtV$}nwy)?0S}7dcj0MWj){%+ zxLB2=RT$CbmN@Z>jU`Ey2rJ^*b;un|5yxe3J7`2B!zLk-f>MI0q5b{+hRn?e#ci|- zU!a-CymERBWx=LxJ_Z&GyDLYXN85q&o)#v-(r<1@GXTQg0o*IlTuwMv`kHBpsq6-jBN(v?W#R8P4RCw^pI8Sz? z5v08#1({`wGC*;A3yX_TfJ6SB5_XM$1zevFpr+*`OVE?{A?!aup|!3r&jC<>2c|Q* zH6+Q$*W`_rE*?S+_=j6H+L)YF6go~TbPK~JoVS1`J{D=;UGDs?wAl6Dzn^Gd4L{Y&^~a6y>|G?|tC7t3ZgMVtOM8JcL6uN!8=OFY;?{ zH9*17WpSb(&Q-;?v$H!s1P*}=lq(e?3F&>##~@$_AJro`O{WMnH8t7Mhti~;WGUum zxvox>8hKY&R}a%LA|oT;MnzrN-3|2gL>bQdu;|NF_X)E3Xm>UqR%dZxLCYqtLG2|F zL=3bC#B;#4*$jt#fd8=A@6D+Nx* zqoTC5Gy)KrnVHqp)>@dGKYjH|{d=%$vw`IK`i$z5A*3hJ<^xa+fC4Q8uTCTuaE%f4 zM15&I#tciQG}VH9ea=GYFEZnLVZvcHBMHy9K2}Hpw_H4Kb)CF4RcTeT_vbzaAUw%SW6}2GApqRN4R&y!7S`6y?d^9(MMbj%zK6}1nAnU=!S*32i`6H6 z$TWwYa;MWBSo(ueO}9r5uQl;V+J|UvK|xomcU32OoKMMPv2Yujk78zZ%XU2DVEL?T zC-BVm{$w=ZN=uvh^*!tfK@kycunG?QE%*ilDR*66UB~6ytPTJ@Bk}?e{!znno&El^ zasNfafi9oxh*k)g!Y?;ZVh`Bj) z{mnIRwqmY4nZ);Gh7SEtd3kvon>94bPTZIu7)UW6!um*97tkap-LNeSEq?x>cz-dT zUO`e415&pMNG*Ka(%6$aEeH@qHP5fVCIS)!bf+Uw;5s7z^U6w3Z08@Z`w;yPfciVk z73rDWs9{6bq1UL1C@En>+y|oZcZ$A!i}Uc|L7zPFBM=eNAjBb5iyRJDysMaCQE0Q2 z@_T?3u1{BUuCK3Ct?2%gC!}ecwMy<6)7GX0Y0Z!HRTR~`@Qm({-|Poyf1EHfGS(ao zyt~|QB}06cL>Qd~=qDO(S2m?)v1qA7W~8ftK{V5EeKbTNTxju~ zs&NQ`O{bQL>-t45e9t^o=qu6N*XM%mn>*LZl0VM)B?SH5iX?#up>&zxStXBdI^0~H z8jKZ?!wnmGOt7~8{mWuFL<42K6^O0=WLfTq-VU$hr?HWAwcIaA%9bB_Db_9CG~3le zD|hF{)@CM6$Qxu!IK6U`EwOr`)>}Y671rwyK_&q;Mpga!O6bd%Rx6U( zpMJ9Z8GzdY{!7lqMIa;7dyA~NNPw#1GfN~h0fs(Xlv+N{D?Yxp$#NzrF$Df12n7z( z0+@Z#@ZKu7oGIzb;~_CADILU)dU<7%7{&8B1joBwM*^rUFdWV@rr={xDe(a&pBXE| z%gd{f^`3TTshFw!r)BfS%i{NWQKM@KbMwg(0TJO%pLaq%K=^!NA$i;*DtCJ@P zsZlD5EfF@~dacg`B8tB?@f;z`u=%T1v37g#Gg^huG~J9u9L5ow zmY_Z%*e6gXR-+MmNRIEI!Xk2|bX23WQ1kwO)dq1%$))so*CYTzTs%A~rC9Xz%g^>f zy#z(lJ5RlO0YD=3KmtGxfWtb=df-ACpv$lu4kcJD{^dKJvT{C~_vYSNgtCe-Apk?7 z$kw~MWY45o-9shBKm}F3!<)>tjkT9g7zo(j{(d5Ke}|buD+pT0;|&n2EdaG%#ytSM z8yEq6Xl{O95Q>{`HH{G8W6MrgKm$=K8a)y1 z-p>ysyKWv-_pWdT)gk&B05LyuRm-yi=jP^&m5F$5w`5U>KyY6L>IM0mt&9Wc@WoL# z*K!2z!-pV8q=9^mfH1I}i}2iVw+Pk5TmPdbMdF^}x16vy#JwOLosWJxL*4Eb6`F#0zb93`2Jv{(t zydZ>WL1{){?1Y4;pnMyb&gjLv`~~$mg2#pqkOG>lw~wQ&L?}NSTPUQp;YiL85P3jF zGhA{3*%n%uhI%WMY51vd*ujCp!Ew;6VYAvS$UtzT1}zjibJnJ6*2ltU;QrBJ9zdSd z?EigYm;l1Ge3`K_2)+OxpMp*ZaD3I#7}R}ocqkC_pux)(;8Flt51QNE?FEO$x0*nR zk0aFYM~o@?*NmMcdL!Qp;+2if&( z9t~;6`A{l5zykfE>}u=IxarGD41{Xbd?&Kk4ww~YFN5J z{!%W~y3^Cs0|BH?@_?6$$_vpM0lykw{Pgp?-PQf+uODG@+$6-^VZSJuxS}dK^h5NI zCz(MYyZ_d7ZHK~ZZfS`i9nd1W4bLMY2&g3^+@;eIN`qn4C;&-9P?7-?rh@`v*ivXV zTh|rKJE|*7K}Yu!qn!c@6FZh%|6s6z=R0nBa+QPzU)C)Xhw=+whjpLKo{IqN)0VQyoz(&$TXH~{}YiXImj>q3{)Ud=b-P|9}g=5QUrJ5yWOnIZ%;I=gK4|jLa^>^Vj*Z0^VBi} zh?Dx(t=|$2A$ZNK%V{i84MN(>EuTRMHqJ-=qF`$%T;*}-_r1Pq3JDGUmY>fAz=?YY zv~EJUGGKRH&{@W+Y>eWT!LWHaKR*w++#OIvJm8&$g@r#*la)anfnM!^${sqCeaM^T zhtN5j7o`op1}Vr2w~IiV3FeWibT|fyLf0rSQF~!x<3iAYE>fBqm5fMr?c z0J&9BU5#JGT`13lkODxr^_oDP!ZKVcr?nZO=2s>G*#n{(c2rbe{^1i&Q*A(R2x|u9 zj%W^(v!xK3oDqZx7Do5s84F7=l#BVzO<%~2>+5R_QofIvem<8W2_iTW($c!HRa=wg zfj=L;C<0<`Y`29R&Zz!VBVTW8tkCdvlQv7@;5wL=p!s)abx$CWAi}SJ;#diw7>Mor zRfdlRNX%GrL%?u^eowP}jj$m;V^pu&17MO7=Ekh_4WCH^BZBoUJqoA9MM+No%vTYr z`zmRFQ{}7Q6(A|9pwo+I8=LX8}TF zAYK=Mc$7?p$HuA$Nd3GgbY6ITdRk5w0*Vd@E-8Q()T*q1Knokc`iUry2*DjgL z@}Vm@ojG-6P^v}K2bT%^3g`!$NKM`%)aaxX*;^>w)5HpNTLuOOhV|MKB6c9h6fxu< z6@K1SksTM+7;Rtejc?Uvkt;JEH%>;T8;!zZ`RZfYg*EV-c*(iPGFVAgIMbI#*yIHd z6d7O`riUv=UiTh>m;M>r4y>ss80uDMd!Jo_K!LX7080PaT5_O-wVVG98@IyS*O`7; zBs}j)y0u_uI9~kvWJ-;+LNg7kfw;zWl?~L#xuvCMATvh8BDv7y_`v2go~elhtNdVG zFCH1}a1+;?D?6kaht*g#9#pE-?Dv`SHOC{Wiignd?tKizs&%_@9T*tMfcjk~85)yR zd&ghmafzf?>Sf=Gp4d>e-L8C{vqScr$Dh_#G=L^MGqvv^)Br_ZUSI4&3qZ?NjNTTN zqIH7xLD)>N-Y#=r+Oxp>8p_v1NFopcp+>S4`D8tyhCICy^hcQ2(3%e50*8QXe?#{g zNP2r8qPP{2x%nnEI&?pKmH#t>V3D9kw)|dD$&{X(m-maFo+#+CRmLOy&j75XDt4Jn z8!K#ZJNREGPF)raqqP7;a=+kwcwd-X*R%0gZoAdzCC-Y(t>@`;-z_!3)89WnP6ZW& z4@$D3I@R-lfPkaT(LhjLkP%)k0z|+gDTAK~s)ph;j?a|1Xd=MaURO|mt8BM_N~bRZ z%8KGp~B2_ey^k`%Fos?Xc|3RzFUsQas7%3~Q z&*`#9O_wJ8qg&MR_R)*FnhSIQJ0bnY+MRv+KCiRbHsu@wEOWF049)SfRJGIPVS{SR z+_;E{4Ac(|G3}th#-prssV?iP2r6@^(r$6gU9;P{mM2afbl|nS2 zGFAv|hu=$#OH0yKh6@Py=VzbUlCTLKA-s(f*V2VYI@N zx66UD(z6Uy5kaH+lpWkmVstTpmqB5D3h#!`LpYlMPIr4DjtO|I_2c4az3$^96{Wts zhkVNT@8*}UD>RbTrAVUHiZCvbf!StalK`2%qV-kP~}cHZ3ZZ`qEOMG15q+F%f`k278C&RjzBPOXg#3}0dmt&bbdGD0t4a3 z19qH;nW4|%?khg(LG*seOPGO>&ey2TDGQ5<=>q9&eKem8&{ts;;4v=xbZzX*$xs5)klh{14ifZnm_)Ma$CMJ$wpfVU<*ssDHd^30v`~3ZbDAe7@{$` zq~MFw-RIQQf9bI0`&b?+!ZZEX+wyN~Yg4nDfyC+8bi(h>fV!st;zll!tK;W(82HAO8NO9rt#DD1w636~+FJq_AfI1_cPX z%%1{V+nTQa4Y`H=X1OBjOU2@Lf>*rv}UT!DQ&+<@~sp5F!Gm%}=0gNzKYG3%<8 zn}_EBlo;(N2C_@iEbKqsgTwRR-`n2ahLm~$#?RJFEw3tC5V)VWKOq41Uz7+@GzTCL zkx)h^?xs_LzXJ_eOi1YKgkXlwY9yA>UFxxWPif?oJ8jLunD{d@6Bo^ChUk~TG%-1h z#~MKR9!k3fe4{w4odHZ9xRD7YS!gQ2mbgmkC1SXz?YE^=oUGU`1~FiAQ~u|02zlG z9z_WF0$x=iX$6F>TxLx8pNWB1323zM!s+K*@HKBIK-a3Ym3CZczRP}?3M{oUT#7F; z^s!RrTkI)32$3Js+juXXL~^i!T9i7*hlakfwq|?z@+G1-z@6`dIrNcX5o}M;($_$0 z16T`#4dLX%-30$mMfZw;s(3=hyaU(I)2Qu$8i?p`K$|XM#m9RF0q&pA zd13*D2D*j=i+6c>X%8wA!Zt!oMkMz1he#q~ME<;tI8l)BY6(_O8-(l<)9EO;t4&RJ zhqYlaOswE85Re&0wt#smy+}dW_9HLTv>-6S_ZL&2(F28ClCRju#!OcMSy7_9_$}_!(q)2 zpamoJBv2lj5mge&=4=A+^Lxx+OK-)$_yz_t9bwg^wzY{(l$!6uUKt4`!VYF+l?O-1zkl>_-$7 zEQ{4162#OW=prbuPj{vxIZTM4XP1~vhQolL`PDxg_FscA`^H`rAA&HaR=V)luaw8M z@>%r=mxrw-(8(o;c&BRfXaC<{c9Kl!R2FE{O^Zehx ztnQh^h0RT)VGQAypBj*0g5cN9p9ySYVzoINPy%9ib=ZldJi^O|z&gv`3Y}WB<^eUQ z1)S%5rA$Gw+J<-@kOB&|jy6!EwApLH>vD!21{S2s&Qwrdt_Oty`t>mk0px`KJb z=l_-J!{_|j`bKO-!v+AyO1b?uK(jsrZ5y!aGAsr_j5q`say~N$1e(RDlq&J) z`fL#^&h?)GF$sydm{gb~A1G zQGLG`q4dMOFM|#S({q!4C-;%D9p})iPe@Z#!Xz*pGc_&nMbXKBcEw$JkJs-0x~v1U zh+jKee-MW~CPKYu3XcaDH&g&6&G1tEWdaRgVVCx#m_-3ijd{PJ=O zaB9t|w+{mgLPL4oe{6CteynrM<$1Na_cwfZw|?kjeNU0Q$W^OW9`)wt#pbN$hcDdi zH8sbSefl{g-8@9aHV#%a)!hdwH9eNt;X1`J`R%1OWq}{Q)S_e`tg4-C2}`f8aviFk zOa)SS7po-MwqaJbGYgNnf3NT1lcQedD(CyF4L2Wc%Rp z(oNuT)txhqar*q@RqaDH6*KCKn@GOv8>BVsGSL^sDFGFqzJziEksP7Sko$^=U{Gxa zvz3@Yx>B2JaoB>1+X86hbr(Clh}ldZp9XD~@tQeYtX9~>wyv%e0Hn^aF5p}iR`^nq zezquoh=jSmPWVRi2kp)RI{BNRm!BVaXlsid;*x)B^JNpDC5O#xR*-q+)vvD&6WuIb zf!GT_?3lN>CT(|G65rEaMo;e!jU4n&T>Ylb9^I&?w#nmpkv^(CQZf^_>>RQzzjFGR zs-wtsOA(<>@hj|$EVB`IFCDYBnFRmW zuN)}K939b4zZl{@O6dg!Cy{HWIv-%$tg|>_egDi%027iyymt*lu|YR(=yKW@6%;HIx}^uoTKo0!tM3$Bn)jab@u7@2+8 zK68B?UvT7yAu)a5(nnNyY+qq(tT>QVooArJBXGE8DHKoWg!t*kfX=`m>)rvqOuB7u zhFtE?VaruUHLcS+;VHksR@kbV&7q~t2gqF9Jld19-4*r^NM1FztYxR9rM|SDyC3>u zQ7(aHR!VBw)R0cRz5R~B95E#|Rk6V&qx|5}?Gi%`?a4=hoK&nLEdAfGpP20nV>&0y<(!>%E%jlaewupXyqhp#VDg&Jnx)q3sC?Plr`6pFjVr zSd;1#xyE9?{MySHmBwtPat@D8PK%R2Z)=Q3Z_gQSv6t;kHMy@kDik{oqtR8jzd!N- z0pswBB3E&T;>YFS`1EcQB?ZMG9V-m~e`7!Q!h`X$XJE(85b?dfMHKk!TQjBNO<~Yq z%nh8rEN;2qj3GgBpG&(^Unl_?mq&roc8^yVSYPzgmn9_Wkw(p$K}HoH{ChkMiy$+t z63V3d{k~kkq3oAk59ST0QSt+4yvArdb58a!OPyD1czsb-rABxemw%n?TE67X&W$!4 ztNFjwiyX@Q;OS-ha5xlVw*c2uuo4*;JWc5BY8Hq zwR9?-U2Hl0k$EdyYp6psdo>(+#ne>Wr!q)tvG$hoSr)w=B0{7!Mp z*`ry9tgNhe1xOmFot8huaM^sHy!*pYA;i(fXs||{qJfyeVD@kPiOJ=9Um4XSI(wDe zYU(p4+g;u_u~F=66NL=o?X>ioS7^Xf&GmUSp zKN$>DA1?AVAH9z00>S#%;-{E1boOUg9sdIjfVjXUHelb5%_HN(-iTp_WKbK;)nFHm>Yf6_9A)l|jP5?n9 zQGDGnyEx2NV!Zswj#1$;+S-xrkn+`Ee9 zz~KtYN9WC%UI$0%K8Ukk?-Rk)M}ka7^fC72^y^B|3h{yK?DXJHC>J$ z0r(co9@fp?)4`;n8KnP>LltkipLv@LMrxl*j(F>&a&KiPC!{jz!BIsAr z%FrlfxxgZM3n#I%-YR6pQ&G;tt;;4K4D~JMRi!3i>jN-*}2Lc;ST!> zR;{(y(s4g1p#@7{lZfl4dC-N?^<`fuD0Vx-IHb?>w~3xaV7qg}6bE?B_&N@7`0xE1 zX-nb;*i0`YC7WAX9@)u}K9R7!_tc~P8Mya!*T?y8{lJ(P7=L18{r4l`bdLv+z|JRMof1_hA7Lkk6ag6WWw3Q zmgk1_-O+4CDyT?QdBN{Cv4I@J$YP!UMt?zE`S5v1M{dar>D3t-+1T-Wk9Nc==b~0S^{%XkcN+BR zmvY=(M_amj(0bpIqK0!DgS8T8wFVJ8y{wTt-Swv`vAwm`*^l#ce|N7s`;LxCDTfx( z4(4>4?DuRSG3;VQ(<`zm-+Vcq(JC4~fg(}mpVR7eLri-pwNmmK^Kq}c0{cl#J)z>usXpVS_xzp0jg~IcUn1ROt!Bh6a;a4rL})(=QaZN}mkn>Nv4I zX6m(!`S&D>&G2=q6NwLzzX*rftP~uHb8)mq`7~Ij4lt<42qmt16k{l6Jv=hF>D$6CNNr7$Y3U%xA#fov?J!iZg;Z6zUD#ENMwVsVqcTbkZ8OnGJ6 zEGen1lb`abi0^!=&Zo&Nb`P--Uw{nS%0%Mhq&|F-?Z2(n>vtrQEH< zY^N)$dXuAhEf4$hFp&&S|J}x3u+U~96t5oD7VaZt=p& z{U3F7kS<+LZs`Gd;irolDq%BJ%MHsP}&Uxyc7mc(I6cLKb#H{G5@pU^tSx}r{pDd%Z z4^KBH#5y6xSnP?d284?3bcJe~mv^)_frBJ4i@L>&Co)$huzzSG2BpKNc(pR@mrs2Q zHxvf7dYslQ0SA0X>9 z6F~XxvLf*p@l8&~4G55SV0R@e)j&D|vVJ2Dzu zqrU2iXx&Yf69)VAN&#gI(eum7((2OrGON7|+PFSGCNXQ!y7 zm51YWMGY-vIE|?c6!z5?*ILVI#kWi{s95%lqdK6c*r$p9=If+FobwXD$1Re);`&5c z96lr>G_-nS^Qa30Bk)h|`K^fKChLbG4=c^*iB-!@=yn$H#z5Zj`N`7@qX2X}Nqj{t z^aJ}a$sFAo4C?H7UM*U`Vd?FqU#alqLO$O4=7O01aNw9Un1mcHL;c3U(BAF?8FDOI zaHMh(-AW7Xa?h27mWX5f=Is)j7gp8!FvWdrBfhZVxNE9o8mquJKft=fDG-$K9h+)! zIlEA;$~i2}e`7W3f3BYm*AKbdR4a+;pJIrY2`77H>opi^Hb)XnNvxPQIZc~adFrkN zV1Na6_OVOk@gh@;O2B17*AQ49vh~?iE%@=hpU#<##xC_}`1y~0tGVvz+L`H7E-P#6 zjbFa`N+c92nTYlvx|r>tU-CbmIhM@4VFwXrny_bPjRZ@We<(k67_t!if+iF|i6nUCegH zOw*{n5XRT2bd0R)wJF}vOtjsu%K67m@H~Brmx=k0>-Ybi1(+^#Vf7Gv!R@ed8>W*G zvi^5WqQmp08`<=L@~WxykmnDJtX_CzUsGzYDk|ov30WV#g`)zU5&gS^`MdWy%HtzK z+YXj}(Z?&pGJIEG5?~DFE;jX%H?0pB2Ubnxkm)j;T?cMl$nDmcmXbVw{(R*2_1n^{ zQ|~vI3)wPbwN7ZA;i&V=9~uqM31^+#HB~u_towHruYURwoD=&&A4~ux3@J$DNHycepIA1L6ta^_x`7sC| zM-ssSz4O*nrf>G<(84*cT{%SgaD@0K5C_u(NO-@41gv60uDW=C9~}-`r-RXt7-5@l z^2WEbxPIwHIxw7o%1)WJ4>P~ETwG)LR^kcP23GkSRP%>r7>3%qx{u$bA&zIqOGUO- z9Sd%-*>T#Hs4xJ=H=e8;r^}e%SrN>Sa}k%829{&F*Qdd##??BU{X0{2v~`6a*Z`Re zOWbjz5z^k#QIc-=MDt&b<+VZzVy=o&x{N}TNoq)!$fJF(6EgG<>6+D#JDJAbZLvXotY45{btiR^x2UH>$WGJuiJf|cy-ATv>7+PcpAT=}50cME;| z&w^Jrj0+`oTU{2Us2*hog@zsl?x>>6v1zyznpxlrqRWYao(W^0^78UOVVDrF62=BW z5oQ$h=Zlmi7$Tn_y^=5$4~sLci{6}*&5C%ZMH<|5dAY_(D^^%>7;C|#(4^0nFnn#L zqj|llHIj=}_503Ck34lp*}9tN*>0MRp_w5*bFK)#g zV!Pabd>shuo*`dJcXN77AH{iy*5wi7{y=K1!hi%tlQ4-f-m>-5+v&zb){$>Ix}Ty| zuGG+mKjMf{wd}fm!lwRkC&Ep2dvO+bcHXe-(ZV*A zJ2#4$un4+3QBVhOTHJ7OEWY2kp5h*Fa&{f_(GuR5?inu*3UwDVRLSDm-kH9L-ww?r zldrV=P#4b~2tLVvV~X9~nLV_-Kgx5<4yWDfSH||4tL!%0OPK1L#>XSUF+W(@^B@+m z2sP7_{_^j}CEkeJH%jL)WNB&1&3YDkXmdypSW)oCg{>9IFvsCT4MCrW3tDG)SOx|T zHTGT}2C@s>9e4P(Pj=~TCyJ%pHs|_n2lGpJRW31?^;aTFb4FmSZt3^( z=Ep}BKMwI35sfjNfr!;~3)}h0Y`qhSD@$z$Tvghq$fyU) z{2lyG`FjJ?lm}6rug7`4c#PjlbdZdgp?~h3VSd)DvGmX1erHB1)oB#z`m|}uxN{Ta zwF?{gJp`u9Tb1uBDk}%lPZj(~UZLBa#_-~I*5Gr|>BQTm{q2q}kKn!GEHj>bw^bVb zK3@?ISn+ildH!1=@IeK?y{1~lWOZ06?(@VDjAqoEwDLcuf3N_rfO^bkEbGmw>E@y5Xwy?>{JD;dyV7aY zQcTIJ?46+s5o6_T^q&)z#W+aTJ462Xac*y~FydU&1l&Yp(u6P4E!8#S^U%87tW5y7 ze406@`W?Zcp((mbuV!y_(xlG=d3M)#YR8I>Wf(PTK9vU6 zUf5bayCi_|{jT_G$?g<{yQji_XqO`2vend0O-+q7w-tPPKC;=ZGc>MqLo1)A%rOj| zfUH1YwjSsH%1stWZ>DPNWwdPXsw=7_95ICK@F*^3RY;d?J=fS<>WRgpd!KQexTR49 zwY^E#K@v_m)Nos^6u>WtfLR=G#cT$1>d1EOjuVySesHRXPRrsL$jVlfO$SHVQj@oH zHua`Oqz{Yh%3{I?_m_WLUMlyd=2{tXOF5WqDm!Sak6Kpcjw+YO53+<~fffw28DcUr zt#E`I+5sF5vTL0y$4Pz>o~!za~2jHx;rifiD}pf-UJhwsiu=7O*1p3pn+r? zP|g2s2x*HMHjpLja8m6J4@68CpTaQ+aKBYvLH@z`nIcxLMm|4GtirE)cuui+GtRqP zk(S31;D&o*TQG%UZf4$V7(2wXSvDf4S304nsWfcpt(cn~kdZ$q8;>x$>8p?^d41Q2w5Ba$R+B4HP<*;fj}}hI@+x6GFb;_3Eo@ zG(X+#{HlkK-W_gF>~ohq#yK}X%WR@d64JywVm^rFOOH=Ut&NjuJik8*G;4Y1>h*X3 zsK)tZy}|-!80igITEdf#uT2`{hhG#R*pz1ZuS+K;=4W3?;d2kc#Iv}g)tFiKk&CW0 z!K)}FKG%oRA3sux2F8#25UG}vQFAE%-N(h(A4_;_VX;3|bBvUE)cx$;CDV-d(3tWY z(d_3+D=WHqblNg%YJ7Fiuh<txDUft^)86mVbQZ7nfAbb-otXIfuqBp# zHAsvSRectpX}aOkYZDBF3=LnRVWY)$%@TL_$srx1ah3q72b8hdjSd85|i-B|r)an&+A9jp0Uq?R^q^BrmK{_Y=8c z#+~e~&{xM`Q=Q0KeDpQxo#JO|rPJ}9qL1NZ#xH8{oovWWIU&Oz8)(9i6t9o$IJ$CzVK(AxYd%9ub>{k!A zPyRMjeYq7{h93UmsFQ9hH9g&EN9K>f=js#w&(ZRkLk8Jb)qBMfsRNfel1jmOlB1V7 z5~JJisn$8STB@azN&2o9M>wDr{LU&iMcW4bJnvPj z9L)d$M+#c9=m`Zib@)nd{nBtYSL{jl$Ytj`>+gMYrlvfTcLl}8f~oQ^^=1si^QZot zwOet!7{8Lw4Amk`dTsyhUn2)2V_d?VEYdB{p_@SG$9G<520Qe|{_a#DlnADU$2j=)2oA?vFP@ZHwvF8`qf6 zrO%6PF3A5K{gbrnYFEHNUtdj)`2RS2>!2#bc3&8jP^7z&P#UD9L%O?DxHWo~Wa_>VHkS<-Fi!3&{>xGAlU2UbP>98O@lRVeKlCrW z>qhUYvGB>QH$}?=SFX#>(B+P6Mb!Q%KG*k4774}xN@1(<23{$QjIGf%AN?cYL9kAE zz5e0Cjf}8(bL@IGx$an>U8zf47@M*h3OxTIejJN*$(SG;L5CC_9}}zA@CmDRJ4!SF zSVZJ=i<;+Ao!Cgez926330sisBs?XB2qoYdm~x)G zrkgwDjNepPq{+HDJ{bSdOdhNF60+IOu*Mzo4-(>_*5$MDNq)1n=js|2eL}>vt8Yt$ zMTqqvWBzvPQYvA(ZD>6w$1plGGcq9o4+t0Oa8W?Xc&<4YpmALq_qR0XXhAahd)VFO z=nKWSsu4VI#7~oxQ%x)^LZMJ1i_Kl>EGdj<$_4q>_LH@dCbUDcez5BNSZ#M^t9vzh~H*A(O zsMw9~PbLi#j6hf&S%QNTSRBN}JVW?exCVo#Gb*3lYhOjp$u+Vz?2CUyn@Wj)d~ZwI z=gqphyIVAePm%R|lliu?uI|sJk?*g0UBP~!warN`!I^1{%FlnuQ+~~jpB^tl6z7fy{9y-+I(9Zp+U_ubK8`A}vv3J1siAk}+K!IYZH zevh0c$b=yrpJ)B(3u3r3A#3SD{PSbM9oRpgws4{Z9f`kB%SoCoS}KuBiEZ>`>Cg7I z_#?!Bd&LnG+8mU9uWR(4^+HrHC8HMluV~nMI(it zL50k$ydf-_cA)_D=+8sGGkZl;I24ws7rldIDX8U9Dka3F$I2HABuuLFI(aZaop?`h zUFSuB(&Ge|m);5d84C17T_J5!9T5e5I8+@M3I$huwsO$~>Nu|TuCKLqUOv6FoLX#w zv>5bFgIOpuLoIhadxnb!-_8z$I^P&FgL<)xtGa0KV{I;$kYo+DLY^u;HuM`E0S^6J zRh+!H`ij#0Pw!HR=Dx zO~B>U$b>^un2C$=*i>lIiDH8@q$c&)yDU^qqbuJ1ii$mlC6()VoHwemk=Nm`ob%sZ zvS-h-lxIr#6cr^tIWdT;Ps;sS$7%Eu#zaQu*X1nfE9P1ViBh8BlxKFgx|L{Q=?UYX z$g8B?8ZpYBD>PDtns|T)Q946c!)mOQg;|et)Y|uY*B&>NlbE~IUq10yN0%j!)4o6< z%uo`lKK=uuP@qWaP}9B`OuWUa|NMt}*v(=q!LPYftvTN#&iX@vXCGIS47TSRYYTit zR!zx;P&$^E6NOq^Tt~RTrOa1jUw~sxg_-rkaHqbCou%`Ie&ZM9(Z_wbV)ZXwmwCeQ z-;`YH+DFb~aQby)h&#rk!T(%kw&M+oyWEi8XxV(g+DIACAFQ%GYzLy`?PNm> z2~30yNliaHv0ksufES|oseDlvg!*BL_7kdZ7;D`n^+Ktml183K^dWq@w18^7H+4odM zSA;19I)+hxR@!I}v+ljjkWEmOy&)<6l;+n6Mcka`UAbJg98a+MES{2ym69XRolc%} z+PtQYtXY^<@Ff9C{6uJ1UCNb5#eg!Tqe6#ko3@V#;L- zxsVrB;oowL=*>EtN-xw=V?~0@Nhl%o%Q|RkX9{XvviYv@0e(7KQ7((8i(}tvMbnPB zEZkySZlnoRbE<1qwu$n?b9Tl`Z6dbv$+vvE_t!5E+u!@~#H5$r{f0i zm>HD=)Yh*{G+7aUxb(A6mMwe*?f*!+V85S4HGqtNn1!giPeU!wNZ%4A=x#_-$unq_ z`jwS28!L&;HF}X4@L!7Lt5!NbvOu!9U%_!q48gQ8=@67q>}jxHvapnki+{%W5D89{{_ z(5|CgKIqb5B%39pwjUb4ybhQYC#(G^*^v}P;34>Z%YMvD23&hIUMDm*Ru$}bz7N}EV6n=+|b-7RsMRi&%TRJwH2dGaIS8r+0|reW9i?bIa&!YK_Www&Cn zfrP2yMqdN)WE6GV#(2#1eMwLRP!x5212$geqZcU2_x*>uVvqbd$+z+w~+lXk`$hhfG8u=|NHOF z+h|dQ$Xrw7jDreipN;QzA!Tn>sVOFu78ilLW0ADTFEJ$a9nb4!poX9-1KNvh*YR5D=g`b&ae~#mMH~94+=2y`{eiM z1PTifALP$xvmd@3-X|@2Jr0vrRKEfm*{$RCj~T&ne|jc;+u7Jucp>ON9&VHQJn&Z{ zNqRS+E0qJ5^wXNt2tf!onwNbM8OarXNZSQjaM#2#XKconh41MV6%_EmR!0*xn{Phr z!Nlsvh$q5Z6hD0PpWUx^mwAz=ViVh}mYi0i^#U8)2|b(9s2um=q7Ja)oG%T3jI{m! zO{cKyR|oTp-u<-Y00){w(E7l;al0-71_T8rE~#v7F*&a$c7G(J*5T2*Qdmg%P`WZN z&#ZMf%Gd7hp?BL(=2ITGcme}ezB5jes0aDfR_S~`AsNk4LPdOTBqrDSW(q zdp)z}&bF5osjWb1{oZbVFrmba^PsG3G6vu8n+=!@&v%0U^}J5coRro*&P=L|I03G~ zbt`-13+Vq+oVDPK(%<-PxRytyE7Ogi)e*_9yAw#~t4^0Mi-ITp>C2b3zk`INWEo&) z!B%ZCV!rYZ$K73hl6SK=qGYkjN9q*a;LouG>7FT#mQGomyqtCjzogHUb2aF zJ%A+0#>UmtEJlmZs_s=VJ8Oobg^!Q#?lILjJpA02(YBSX-u@t>tgNg+ov}!wHeYV~ z=sa2lj2ethO~t}C%dWf*zUV^61A&Q`wY+34RIeWX(QaNWu6I~Qi^1b+TPP7f(cNvk zd-WT&%ltI-pyv-FBfZfwnVOo~K$4r~S<&HK{Vjq)^I3Pt^0$G6Vw-t>{^r%q%@6(9 zjTQ7O#pZgN<+CCO^M>M*fzFQ|224y$){{T!8MW*_0*^x8tPSQkE)+Rju9HB2f8BI7 zI^4tVOXtJ33{D$W|HuqIEFz-g3x4;$p=90=7+)<76_vnNF9p`y)H58Foq!Kt1H)pM5jC=O~?uzUTArmhcI(5-v56rb57&3=U4^j+}hgpI-#FkK|P| z(-tNl=qj6B9fs%6o0IE5GRqMt^h{49H3}p^IqK`ct2gC z%I0HN@J)fr#ILoL8B`3k?Cha+Woa$TuZBB!YagAqf9v>(b!s1SkbuvfDdf}Jgw}kl zQ~&&{gYE-ft&L`9Pd;7LGxBQF8I>ki>*qze*wgm2!-uCna91_vV*W9BonE$9u`Hu? zuBMw0m4OHRk)suEQo~*+cOIPWL)T$~w~ETGx`-Ety67UO1+Fo&`3ML?YZX&^M4Ueu zEi5bm-{5rXWVrNfP7-ux)-wfAMPUubl2)w9>zmK(n?5#E>RR2={n+lfM}({}o76hN zruwHk1x!G44EEjv7C0$bwxJq&FHz8Y78k=W>|J4x|8;$ZIi#M&dA~k;XkmTlG(vtEyB|2|asFWCL z4Gj+mPkRcAA$^&l&#qjJ&PC6hl_6-t5At%Gp1}G^Ti5{)k~ElLE8Y1L~vS~ zf^x}JQa9)OtCjHWu0Q!C*RCV7kgRqjM#fhWg#if(Z-f=cxmoR%+ute>&#Wz8+q&$q zA-UC$gR{<0EHg))q47^W0IzCSJ~Y2q*x9~+d5fN&XI=6n-w1DLK={KxB+}>0Ee)ex z|F0HJjsq8ZypqD1DYX9TtDXl~=^@e%{V?gAsFxom;i7wbjBWR#74~KdA+3_2WO%Np z2V8U~E^j?nDr<+6r62kj_M~gWMJ2-3aupT8NfABxx5n!9Vo+~kcK&r(-2Dtod%dJ= zVvYV7rVe;j-8_zVxD2Ji8{+p(T(fSgH03OfYAIx-`?HO%SnZE%U2C~pFVMpBq_L29 zJv1ygelDw*6bl>Q=Gf|aE8U!$uPCP4`*`{Ef^wf;V?3c3ZEvI4(bT=(apdI9#F0*Y zoY$p4oa=Y#drz+Ig*^^IHN~%Bv;Q<6w!?pD{KoJ0Tv|Z`*n@^8S!x8L?|!H*NWl|T z#X|vB7=RtPhh41BU}DN~O25ADs>WHMNUYe8&0;rSm4%EUrVLeYXu!>fED?L(D)_dt z*R4d6NvDkW=$u|Ef)411dcpddVc^?{-6M@hmUyl|{8gJ3M%p{o5oFDgSh%Etpz`3d zRnN06oF@Jt_Hpb^M!jTrWRn@$o%n&2goKg!eNoR>NQTu)UCMH!(dEr=^nk=7N_U&z z5%H4uvoFwJnsiCJA}9|ko#j%9FDrFRN3^KaSQYRPlMviaOGM}h59jw0+|%jR2?SJ$ z3;Dcwak;g)TqulRGod6@HLQ!>0TaR$XCSnOSBm@%Fd=?ta-Po zEg$9&eN##=5i6gCKApXvmmBhI9Vil;zBrVr>2h*|F)XZtqwjcc=yRjZN|CX>gTY)` zb(xH*(m(x$TitsLsh}^zuCkGvfdvXJRyIFctTh*x29CBzvS-NS#Hox|zm}ttM&DM& zFE!Tz=%lVm=7fk6)&wfLA>$uq1jIU-|2_IZ5^fUNbXM)&jB)Gp#PDa_j(a%=? zw0$H5Z0YjZ?gc)d5DE{ciUZ}8{ert1R5jrldA_tFxSzrNTtbC9%1X+HW^=XA`EuDF zonpCfcwy$Lm9pDs#6>||c$ib_2~FTS$P5e`gANw*WyydZ%kI`vSW{DT?qbdB&kO!9 z4r?PzdM%#k*kG_!tX|%$G_ke1nMmLL$m8>@P?5s2uTBgU9_hn+4{tfo*Z@Lcd{#uU zy$EgYIPL5I159w4!QZ0Y_>2i|9=CcFZ?2DYWb?k|*7a8X`kLi8YNxlq9g%j^`vwL= z4frEfi_HqShf9qLoG$id-j}aHpKs;pm0|M>6!c#sBbn$2LYA{X**uYYpT)#r z0gN^c0I=A^H_6+{bO6Up<}v2;@x&Olt*~dwOivH5t!;}My_FXA7T|xlCh$V`Vo5(Y zJviZ7Z2U|D4SA_yt8RWZi#1tZ(#{i?nAm%vurPajNS=ux*t^)7u4`CJ^xlFep=56b z8FG6}@UYZkaM}!o(m$rLg_O586Oqmg$#C%Oc?AyN)_vOSCaSZa6`fswhk{4n9sD_W zd-lLNrk&P=q}KkEJnsya|jomUu)QEuFnKidywLGwvl=9y6rNtX14wsn~OOJa@#$R zNsj(Cb}!Yqy`}U?1XBf;E3GJ7a9zlpe7dFYU(x;}nl>ya5UhS_2aokLoQ_o$@r`>* zjwoI2Y`Od2R9p^=4Fh@f>tLW;;LYnsl_i0Rllva4#9%Nks{ls7iWRznw046l5z&%u zQ2UmRzT+cFfM`bz5zcHgtFzzw$HTAc3~!j3(N=JzT*T3kZ$(|KRzkqw9gM{EoA#wZ zo4K}T+|aD`ZdMAgMdJz~eQo!EL3r2W zACKV+OBhuO-vP4C)JdKl(bCDfQdbutX9K|+mJ&SpVn^Ohv<+W9M!aR(f7ulniS(;y z4$tip1zf-%Sd!F=&8&x*nK-wqYDQUyeGYESJE=2I&a8_vi;n45;X!+{*zPMuZ9_v# z>-_J|8V;Re4t98IfkkqQLQ89jjr}KP@UMQ`p9=b1CQ=SFD+-6jmI5}VHn&G~Uq|iX zeziXBdJ7U3mZqtW_YS&)cZlsaZC#+aW#izGg352q9*YG^pve>zQ7XmwzRQtxisfq+ z!KYnMtB?eTvgnSJd2mSRX6U22FuYcjqd4BzUYe5KWQ$S zdjKC4zIWu~+MCRCX_SxKVgEab%^HVA`h#WAU|EbibZV|gg4ORW=#pdZgFf%wFo*K- zs@AIrk}vxFD&QjFJ@87^NYh&1PxUZZZg;0Z+04l()t#|-=w z50p|Z6}PayE+MFRXFc>791;otYjKhERumn4G+pp>;N+xwMzJxX!t|}5dln|>tk@JY zi?i}<%f4?FbQcCox0(DRQ9OO;Y2)t3qRvwJ3JYnxVDIK5hJ!Fzjgm;)yJc2{1=h2E^yj`ge$TZbP_O}jd}n#TNI z5}BU2yJyzfx6C!~@yxIY=bRTFs6f^C#~2)$-a$IKze@@9EVVwim6hK78RnPU+# z|2RJNzX-eO#euxm~fGK&o{=qv1plv(hq;EA-}3@$`#I zRA=}b)+Wf~y+-&*KG)xIDpj=70FM&jS3BtA5;?bB4d2cIiNB9=iS0F#<8-i~2@AxmmCIzBeGN8{yp2r~i;D>xv&1MLL;M4sZ*QNGF#EB!|{*I5zd2djUU(nKesj@7a2mqabX;ApDRqb4*ojv;z-_$ z$h)92FVvYVMV_a(9`qTv=khf3Q6~+0kf%7i*58TC3xj2b)i3Cn4xy5?32t#DJZXNM zrQ!mEXTi2@xhU2f{4-_oldqg%9aUDisoYG=9MtH*9|5iIC8}~#qQlso8X$O-$)xOG z>1dG)v`9_7%`#gE5}gg)*mAOc4ASi+4Z1j zh$uxx!(P{I6&wQs5Yp0#pp}4X&_h2bAtBK$n=J~-`&m;n8E?$8sheKf-2+|0u_ame z3*U6LNTyFq-4@1wv4F7l{Pp|yYCntgp$tML4`X>-4b4z$T?WnH;a`D()AF`G4Im*F zo8c>5V>lrpFF_IayxcVAjUUasBdi~ak3<*eRuRD(<#rxzj}BF1s@ZSHhiYcyF-G+( z%~Dk5(|L7Hn*k`~FNywign(;tBE)I7?K2~s5#%1Pj>x1A)HbuTSo_O$VPN++dK|p1 zZM7lUXXNVY8dPPBl}dBhY1#BVSgd{}{J>{WT>)O{v7mSQXf&-SC>JJ$K59J6(62;HZfBF0LQi=ycm_&&1@~ce$Hzb0epqkU#TPC_F1sCwp+XD`F#$ zCNyb4-p7;ZK6Vy0W_=-Kold7I&tfG$%Xj1KSPi`DEuQH+ni7>FI-948j&LhNt|>Hz zn2O>VMOtCt@1O|v;>_sa7-Gl3x=~EpFpk**gXTz5ftDnL=K9ZnHdJiXDh2lLWr_<^SN*O@HDO%tvtK>^rC!{nm0XE{pzO&}oIUaFH$~ zg-@=`k{?&8Q|pQp5*VhSDrNY!wq9c;Qogy?Z?JcQh@h0w7OT}o+7cN~vB}ZWXJK^d z`sp6{$KjS(FgUAO^_lw4#l`fegWu~Mi4wj;QUc8beWw1k;w=9x2W0-y<${+$%_uBp zEylC&jaj@UH|}*@0*RKx4`sTdg(C9fDtB)Tvh|J1IDMY=AXR90bsNF@Tb~O4a^Q`u zGR((y4cW!uyWM2TQgTwQio^)A{j9ai?eMGT^ufhp4l7EQTpg;CKwN7xTI6Ax>V5K% zaHk1%NwRXfDaQtz^P2UvcS>n!xLX=6<}ypYrwcC))a&BtTS)2^OtNhX0}DjD;2R{%4h!XQpO|cLSdNl z#abl+xw&zQUSMP*=&!uy)3oXv`Tl^!wvO9jGQ7YL_7F-BM zCYRF&`^!zQpz5*{iNnbPwxJ!Dl|Q7FX7IQkY&F6`<0@_##Eo8!NzEm`Z2A4&QWW#O z_(qqc`wIez2yQW{$>1a+`>p>g#z|<{kN<9SklH)2A8DNQ`=`(L*{|PF4?llyF8PW; zC8;PPGICsoE!zCW`*~>AGYpDlwabK3id2>}+r+RQWM>ZN+%2r$YRk79t&&XKTUD7grlPBe}}JQg*fhgo0UXF0iuP;gzzSk!?o zgPyT~2o?kAOX}sC0ZRNe1rcs~p-GcUP;5+j4I^YgVz}=`d%8k@M2a@LFK03Dy?4I3 zq6fGrlTPz~E$p^j8R}B;oK1DTYc=Yj{dnWc3yVYxJ{40cs%r^ZZ+3=+P~h_iykj4x zM;Mdoqku;&Fo18XJ#Aw?$&5hgD3tkqO6p7nYw{ZG)YHaq{qE%xWU z&F|ToXfaB8d3k9$IckR8fVWfP(#(yX2iW&F7KxTiP4?9F4kdDyC{s~2l~q+#LPD9r z!G-axhYn|4&t_+fso@RVCIyvjY#3mfCA57_(85hgqaezGcug(m&u$gJ>X>iOFR}#P zLV<(KiAUei_y)h5BQ4(xGJgBRc%*+r%=Dfc|`{)ke27byQlof9D zUc>%zAw{pDX@8c#E?QmetD-r-d+_+Y`KH1EC2!j=>{mxp3Ihx!d|K zTI9Z9w^kSOA60n+JnRyWSVv4%mFY~xk{W~U~iIRKIk13N3*mV0L*xH_VzEmhzCZN762|B z)I`<&U~nhonCO{akg|@8fYK2sN)jUO2f!ZC?0~ji)g(#9txVv0%!Fus0 z5g=@t?wlabF3eGfC1K&7{r)vu>~tL}ESJRk@uK1t*Wt`PvfF2--S2T~n>020I^4jW z_E!jw6c~vT0Xl$Ura&)4R=#I%{tv}0_;tg#I#88Xd_bo`KoT;6Et93HR`mDx+dWv4 zy&`x-FG4d2P;EWkd=%B!XW9hC3n?4^*9A9+sy`JH>3j!appIES;5T1z)SVCFsQev1 zIZ9T|q}Ur8FbIFu;zc{WWo2UG=av*{ckRV7h|gqx@rQSIcAhF}Fh%g5J6`q;d4j;F zl2wZurM2ch?V4l*Nt7@q%}3`%X&O*9;1H3^%*MSVQ^_sW(_vy}4he}|%WCJI@DiaA zVP;=y{G3+IRbo9*s9B z4V61&qkEH!5J+3B5l5{=i=f*nB*(yk})R_l5JZulcVT`?06GO;>#TJs4wiV8?nn( zq1&^Kcq57|r8@UYZRfXbz**`|U=;CJeU?H%W&_9`iWwG~G z0G#}3-uCLaFpHMYRXn6sgx*aJxYSfs>4asVk<4;k`_-|O&hMKF6}CvFYHGq?4>~M3 z>iX{3;B{EHyhp)hLh2uMv33-5uivsEb!_s+{X!r%IoZndV!N*f+HN&}vBw{r+lOA| zb=bZ?%|O4mzb`sh_1EK$M|;b$z@4quQc^y=Ez6HmUO^${{|Fo+KtG$(+905PML!VU z0!O`0lQ%q|G|;?xlky$me4r&i|BWy#-SWswV!`NGjA4seK<`aA?P#tKHz^7pm7rk8 z>ett1pV@TccWi@|Y**UE#pFv6bQ!WtY9;naK2Od%gN8xr&`-WQ# z-m64wReckV;$VG&E!fxcd-$J~wZ_Y#!eHdl+b?mz_T&d!~j^{d5Vf7wQlwL z8I0}H1YrIuoks-w@rEIBbD?(ql{+UDm19&EpF9=AnZyX%t6%|o?6tTI(LG2mB|A*Ij`(W?LRqJh-@hujpk1fAW>{Y zk_!G3)|)=v9`jM%ppR$LNqt|&|8Bcv%@gS}AX=nwY=Q!p@OJ4PC@X`S^$-IwNKU7X z+#LEDY`o<$N`S@Clgl`;v43)k(r;vsiHS)E2_(Y8!oUgb$)VXL@+s!<$+~HCa}$VF z0>FR+m`e@<+ZQEZ0leNv0(m#7=o?Q`^ob@xR;StXm0aS;*5uB5i#L%!(97k^CG;#b zbCAvZlv_gF*LyWOGXr-s7+>ddGKo|dG!-OL2l|ACOnwK=%JIGj&6h710Q3|;G!N6)_ixLB~b#S##D{lmoFW|!(!__F(kURc7p?WB-(W*l$VDq zlw4eRUS7yHGbeva;Ki&Li&7MCLvRY#cU8k%Z{4u*FA(_oy#|t;3nfmKcINX774t$^ zQVx59;vqR?9yCdXP6y)}A{m=wP$|n^dBPG}2GFTgTP*M;j_7sv=hD8w@R7}?3b59# zCwd-)mZQpL;5@e#18a1>Ph38y3XA)^wP~;5yF+v-|LWDN-N{p$iz!C`zdCwuRtl?K zaIy(3`QRHas-V{>;M~)?R{(QTFj}S8DWHi+%5LjL+M@tDhw~c1pnq_F89v$&nIH8p=CQ(VXNt4VI@&&m%3W}iEqN2%&?>f({&T; z#hSOkkr0faPUrjTZGbrgY|`&{_sfZ$702kYI{)^6{*OJUOgarfB(kvKG31!0(=Km5 zIE@xg%%+I*-XR$Hn%FBPzQNDImTgKTWdkc>Y={+=9CKEntRx^Y?-~ zm#xhYc$00c1f&Cnp0%lV`t1|jE1e~?S~Yv42w)%_r@R|A@7Z(@` zM*t@*Ht(yWbc{Q=WMIGz+@+qJ-GIZBv8`&e1rYxfth8x- zkisqRdj%2M(d|)N39PRPq1oPIYU4NOlZ>i$b#(3X5N-i-}G^M@VZFf zKLGYU&I0PGOo(_6TM0nTR)Gd3^#U1#9AR>d*zX6t)i^2;764l@j`=Lizb8V8{{(z` z3E=8a4p$hC`ob?%BeJn;4;kl8O0682V>8W%k4;0jaX{Dav9Pg2qG}WCp$$oLX$3^7 zh6ZlG z=|bse$$NOk^pirp3P-4JGc~!!!dQ)lFi01YvPzUj`UXfj7DG327G*cS8Uh3~B0!Rdt(FHeiYW=t!t40-PnO-gO>OX_7nPL$zPlQ|bJ-*v({aGK}FO>=b zzQl^aeHED1c6CuZQ~CZKzymP{Aqfe5-h!`Ri&P&19V{FlV>Ek8IFJPq+GXLbwR)|z z;P{F8&Hfxi{+gKOO&Ifx0wX1^LjWNcpKyMeks_9m`ouxmyb)=+@U{ia$$sBTE{mgu z)D^|F#}NC)c^I}WiiuGJhCKm2z0(s4eWVgRRtGccejWGb)7AEzK-9SbCKk93gcd%1 z`f2*lQP?casS5B#kRXBqfX(#L(NPWn#U2j=fn^IAYfI(`9hAm=2Nn9QV$HCisxS8} znSxOTb}86-g(XLVJ4#V)D^G7R*Qc?G?|4ti^eF&#vtkhC?8EqKZA{cR)XuIV+5A7h zIt0&1MkAWg7}N?K5IxUsz~%8}m+X{N02Stm2i46*<+4dC1iZsDiqYYZ63AaAcHNzdC?C+g6Si;Y53T^HT2jvn+p?%}`L z9Db9RJI71iv0+PeTH`ax}qBSlo9rlkD#5~okOridDZ#8Lyi zuldq}!+L-RM6VLE-RTXpp?E0Yo;v&!Up%-w@ny@ z6oXwhE;UUiLH=SE5>ddk(dqJ^0dS=(n`2EXQ4j>)h+v8`VGS(de5Namlz{Up)TmwP zW6L6q8AOUez2N}_Sfy~9lTJ@h$FpuJZdAW8F`2BW{=%#TNfjy-={?Vqv}M1_mJra^ z)yYw&6KePz*5ZSZI~r9hoA|V27ib)(lxo_vPxBJHV09$9S-kAt+xr=mr$X>l# zFF)waTlZ-W(f=)dU^SNmiL|$b-9+YXn9bJ@yTsSm)pa)eaG+BN$|--c$dd#sbgI~0 zxduwm{&?eW5epJ*JEOlX>6~pL`T2=~IP%{4ug{xA!Ys~TprZ7^9wonp5N~eYjET() zy*ng6K#qQl$dWuXG@?q71%TL4+T*Le=?as%&;XbH*@W0Plbxv<8D!Ov8w2M^2cw#; zjxH~4y#>PkTu@D%?apPD8`Z3@b=@-Etyhq0`E&mVzEcI78p)`eOiktmAg1WP@a5(J zEr4hjd3(YPYy2-J{RZjsD4>n!Spjvzbt_;cGbOjDgPMNBeIGi}CjYqO3rG#I;G7<7 zGw1^nR2pm)0v1sh7u>Nim3TT8_L+5WJ?Z@2!-S{|sD9M&MYEEeoGGwU1s;d*?B_ZEt-yFar7^isY_PY1j%`QyVPM2S?kFo3N9VFCJ58zOK|JOxP8dXtOJY{ z`BR-qOmh9Mt}G7MUaH8f1`+j1MijiN+$mbLtA~Y(PoGsj#D4m3F2LtWUW>yAobc;^ zS{XrUUb##{9qho7tV!J1B1 z*90SGy3=CQV~|YDt@ZB)LID>5)IP+{zYEulpBR?2vs!0YITam1=+FWK;JVZDAu1di z38(RhW0=dycm$<*6==dYJ;NPw;+SHHCoffNe5blO@ zB+a%v^8$t@SD$-BQg*J}E1>sKRl|D{jBf0NA;BCiEMyU*ssy~q72L0!OUugWR0Nz) zSYxC={hU#{J!*H4Q!%x~A}brg{uZt77fh+Ye_IX2 zdk98-D+O|vkqH*nZpxs{C*{ zmZ(<`mcZze-4d4Z>=)2JI7<2`)w2jXPdR>h0Y&z}LItY}3h1pGh4;N+TnFYQot5iB zF5(e?8SyJ!uAhO9kC6KdmC*&s-Mn$5AGD}(F70K6M~`?h|8kT$qT)MV@1s?C;OuJ- z+OVe>qH3d~NswAHZp|9kPsr^+ibl-&5xB~yUVz9Lz5}$FYqz(GW)hQDzL%+lMS*Xq zPAH?#+uo}CKf!%iW3)ALW0e28A7&DEKU)!2sY5DjgHAlT+nnoR`-q-&R8}y@oj!eOqY>W zP5Z0UTA*vxmR@f83y5k`-gyYEt-;Xhlt6abRO$ie?HNc76AFxoz&qWN|Do2WiL_vA zn!=W?AT8}##@41#)2jMCNKb9zE~|xD&j&!f$Um%N(knvJ2h>qwJM^lT75_^YxeHfz= zjRUYe@FP5~p4M8XLs)=V7m;9y!@LT?ur+jI0V5?xQqe@-+mCN%d4OSS;Kl!zt-%co z-N$|*3p#11Kb>p3$JAhKe(JO4Qe!vqjrS`^Tz`U1;`fhYZM?CJ>A{p8{4?j1Gl@1* z0Tq}&D-TV$;!^wC)`}(wLKQ#ETuv9Liu`A=uvyGH5uWzRsp{AU@sL>Tu-05ivmx4S zGK!0H4#;pwF9lJH(2yc1gKlYnR5 z`>dMk>WP+rcfFj0wJMl++RK2jIz1V0G4&9c&~FCPuD^jdVlz{&09DQBv=_QRQxmzr zqzdK-5Fis+QiDmXrqv&ZWAmjA}e8v7Dv1xadD?GLMSCD#bcioLejOO zg&K$(JdWERdU|__fk41daa|hLk<-vi%Diij3=5_uB(h_0qKi9 zhjMf5hmK<)>SGLnBPE$waw%=0neZ6bcR)1+KHV<6vnW}3)o`FDn_@fjKBfxHyl>lD zzJF~qQ#@4ZUy^cv>gL&7G8Yq1Y0-j)MO$Gdwe}*bI^Y|2;US8#2|fYAAXtN(3&6@D z<@^*Eti2~uL~9dTM5dUuQuJUoK@gC3q5K;E`%0OPz188La6?ImBpgU`G@IJZmroP= zHkfn|f8KozT9mWxdm_NQjAv3of${YQBrd9!NRq(;IY_2y%J7d>xN@WR&S+zwmt&#( zJV3N(J9CdHk~W=UNL0^GC1cnpOwBk`ZYloCEWG;lZ{if7fB47P(*jFQNqGvb2B73q z{GD-E;8S1=D=RBHg;2OUgE6u5cuoMg75)j2YPq#OZXaxuG7`D&k&ngHN_FV@;LHFtNps9(rwg+rRdg0mOv zZ`9Y^-Ki>kPa~|=86X7S1`$sK0T|QZy1Iaft8@@#4Y+Mr2T6#0or~a6dz{dSpEfJw zpEn;Q*gW$4i`dPxLfr&+dWfVj{t+?C@a%C z_3X}-!nkU2W*kUx27O!R7Hq1bU9OPTC6b`H^GTpr>s!I;K2l{qaC$n_D0#c+CSvb> zTUXq=foP4d6cHsc31>A`j0a}RmY~c6d9eW?Itxq+ltAdu(^T(#Z!a;I1!$8&a)~8q ziqk=$WDo`gAYb_x7Se(=j66%&zKSR<&W z5M^ZlHhyzqPc2YS?5R9g{fGbuZin}&hE#-U1I|YbKdV$%1Tu+_Ge&{`4?i$R^otSm zQY%fKSNhe_fqr-~91`l;59Ct-?LB391@;)wgGa|wZK?b+1>_*B8hcQjAz?hm2nn`6 zZPE-MP-hXtqD{*E9zXE(dEdh&gFWYg%?bB!`4z$sFyi2?@pdDW2FTX$)J=4S;5{#Rx7k%B-wMw^VI#={RT2x zqKp_d_v;18U~JwkH$3nLI5p$}&eP@Bvm3NsapT73+44zEx}P#2*T&op?*G#Wm7*5i z^MOvLcd$CO;NXAinsU)wAvb44d++yqEtk;Ynq1#!;$-T9!xb!qO}|Gp7Ngd|@t=7MHfnRQC@@*1!?`j3MUV^9&iT_e6 zp7l1H{r(a2Bya_Y$xpzH?~P3PSW^aILVsj*%cEawyqRtTc`5QK@jhHsrcXS9p{zo< zqp7dixFeEx?lZ~B-swBEh~F@wl*FQ;FSgZ%vA}gMq+C;BFs391u5}mWDBko~6Fh_g3AbiRKu>9fnO5Q_3E zGLC}C9M4*m)OF&?3YoZGS01+qVgt|cj-@hAz)V?z5J4x)%1@B3lu^ z?mLDpnY&;dd>q6zqxO37;#rmWudLAZEfI+hcyvh4i{MZy77<*Dz=7D-c#Vr_dXf}G zlp4e)t<;U)!zhx*njL9ks|m;ot0)Uh5atXsV6o1Gos!+(3I(5&xEWt&`i`~?zfP8y zq9C@1U>Kip^sFh$&>}E59HG~5L?}fyxxc2*A0VoZyAkrf*Sfm z_tIJeHUUP-RA6-UYx(QNm2*{C%sQp`Q#6!iPl`StgrV zmyVZ*2nr-KeFe#aSDq(U-F8bs$z{GkcT5IWY2Sp{&04@CpPsduh533Gbz!Ycb0}RX zkEeW{;wsOGIm$S%uuou?C2Pk?LfU7x3x>PtPON6V!rt59;1^O(PQ>W+)A$~1Jm_!LIvnVQ z?`mEMz;dnQZVUx2-HzRip(?sMEDY>R&F7tqM)>Q^J4S!@=O(0O=skYwD`84t5wK*6 zSy>Sx37ilHw)2gfUNSglN6Fz56YGXjAN!Cd66fD$Vw)q=s&t9uAg!fs9~y)fe!xUf z;UNzp_Uz%IE!?Q2T3kyhyjcgop2IZ>&yr{PvKNouJUza~xIIC>#5uc8lCG={@Qc36 zAuFi)2dv8=v{XwqC+A=9DGa?ER-LqZU%B#euJe$S09%XG5q#&#@5_IK=)uy!E??GD zcYQFp?5#M}dJb&QMwFv&kC7k`k-`sVP?e?{yE&wG*y&x{2@=zw$*%Iieoru%DP`;?AA9Oyh-z z%G={@h=8_hR}#B9S1;T8_Xj=I{T2~q<=yH_TXIt+%=9#h)mJ6bDb#1&{DbO+uOgGo zOQfaG(2)JUxg$nVgvJ+;b_}V9Kmvyc%;};ZhnD{w5>v#(>H&xJ6)<+A*%vOZuGK&+ zQu&j{NNzs-Nm%$uBB*-BuodPZ{WH4rY6wNk{ z7=aF$~;$;Co9Z!$?3>B7tB6U^A(tqA*L{gEx?eLh=x#lSzSI)qKx z4a*B1&Bk_IL`7p5@WnJ;+0@aM+!g|)S%nl^0NIUNCG3BuWD6tP+soyrY0toPpO5QPXuAde2T>bvG z_WU|iocS6P?aJNw`gp09LF1f=o();6MTzspncnx?15~l|?qAl6?VdqZqhK@OVMk^- z)BO#-$XoZrTlD3vWx}H4Uv=-vPHr`<=c`w)UL)s|@~hC#5kPbqeke^|^B+wuTq=3- zA+3r_9Ppwvt~sO?i$tI}0eO%I59Q3&Lmja)wsGhOBDWVVrWkhObxlQ}&%;TKQ<}o0|jm z?%tnT&)rXgRUTV39ubNu2?01%{aG?51LHjk$_vx|Zj@EFyQ;VKJ1^tsc6mh-eW<1J zWLG_JIo=3lC1lyk>6c#m*(?NYA)^IDr12~zd13w}My*o~aNF>~kI~iJ zd76rQ0)tL))k0~eEl&i7xlDCY*n5I4E!iH;W=eJJ&kR(v3&kN0SFZ?p8F2hEb}j$) z%~4LFqo9im>u&*pB@SZW#1V!!tY>tY7uJ)+HW2ph$B=tj&dAd&MXOhzecmp7|NirC z-ivAQ#}Cj4YIW<`Z`e6+bLaT6-;lI1IA(Q{H0$r4<{L0$*C!Z1@;vzvyJe(}-Pxz% zup2y#&Kh<-&jqsvyP06|d5_j650-P|D9dV0#eQ}#gTo+|9KNbwd7_fl#EHC3W zH*05$xzRYbi=&H;hHN2ky_OE_#9Tp!;nTc2?)sz0Bqbz^7BK^jCZLCfGAo`md`w|^ zgt!`LMz;9m(DrXv>3n%@=h|60q1J@G>Us zvPQy7eid!^GRluRswvG?0^gd!)_ohzf1W@ZmoRyz|qH^WuM(DY!*adrHxqzg?aJ+n%N zf!cPguUaPQmP%14?CHbZOwb@wq?vN<89M=8d+@t9`1>mZDnhg9l2u``QQP_Zdjy(u zxM^~IOgt%XNSWaGZuBLkZ)pK9s!qz*hQJk`S`efYaS5i`4RE^1} zD;r$PTT4fIl*Fh0+@29#=y&Wpy-BQ#W$+rFh*;u~$3yy}@-UW;9=vi^?oehw>5{vZ zvr~$zIJ$%m z!zJ&aJfp1Ib-_&%FV&Uw)BM9_vXlIsm`@K+ce!s-)2{!=X9rA3(s+H~{8;Rq&1Q%! zX35ZfV=E;yZ=63x7x;0kl0A%IT`3$FG7hNSrcT~Zzet{4J8${rc+96lHh1dT@|i0m zHB+uM)tbxf+d4N(+sfKKb4Qt+Hi=s>2zyU3-d?-Dt*@-I&C>b)t;Hx|=2N8w7g=3| zw`9qs&eBP%n^0WHtX()zxBb;KI?8GCUcX{lAtk%KoZVjZ@#CMs5`4NB6=T0f?^G1$ z(+n6j5}SmsZ{(3(6K|~*++#aU6EHebI_lXJAM$R6OIc{{e6@8p)7qJ5>l9bl=Js|P z{oS`$q-*JQC9%^d_0mWeEf~s*L8SV=Y2Qhy{M|`PQhCAO##~`n6o8{L@0;O;60)$w zd159ej?Nb{o%ApE*Kh ziNX`XQ6pt+>1I+n!A0%Jr1clO>io~YjL0ZsOSMPH!OERFgu#a|gtarE2Q#MtmC6r* zoLvmm%HH6HL9?5@ekJ}AW9M5al+fhvF^Yk>)$AY8M_O)3wlL_0HO|^yx$nI4y&L)X z%)*tZ(HJxc`U7qy(w-V@A7>cmN0VFO{<^M_p`siG|{|cc2d* zj+S2CMAfWahJ+=>Xa8gS#7kr69fp3UwO_bQ=6wa5jTTJ~g~*}&X}%+)l2Q$Wr}Up= zql?mB0n;GH21ylKf!f8vBIXBkBmsKg7|%pH_h4jaPnF983mI|NE4AMIaeh%lhA2~E2)I(|hNDh`ID}@libDKLd z9vd;7^I2-mOTxI*rI%Ebem|*WXtcgNd#IOT3|cxro^SFZl$DiRYwyP%uTm@|nBJYK zCw%k>aoof74<-eq^Yr9q7aJz?j$yr`_)~Y_I6I$M6o(8*uoI!m?|1@jmK^ygfJS}T zPZ}I)ywI1xQ=&)-j9lhW^m#b%U^v${LbxD412!G>M1(JMUSdhoBZzIOx08RN~OicN(v{&zKKB*-;SHOB)1hElSD`4v_9N!`M6y@%c$m zU%WaLTZ%O@TbamU&F!0jDq{yjBiCdKfGb8?O$MbhBD3DykIoNJ2;|Vp4(lb8q`r+B zr#^m)R^1*hP18lj-7D^%WKXIcq_LHtT_x!&mOsLa=+`AnPYyl?Axp93DdV5F^kzK{ zE?OqWqFQ5$qWE_M|NrN|hCpt`B4S9}2I#$Vem!Z_rl2_-vsQp%-}S>i&^~-Vp!57ct5hMYAWC^ z(7Kt10IdUpgo0#k>E1#u<`}W~RI*%~%}#*L&YjIpTNM1hCqQZEPPykUT4DSS5ds!b zuf#K)nr-dlc_Oq9%4{G<7N8lg&|y46k`86YSdl{j5F7+z*zSu?$QT#I9E~T7ki=|r zE(;5#SDXu#O`7ZX2WvTa$?mUX=KO`{BajqRh*o8uCd&@Ogy^M$w=*^(la$TE z%*a5((M(a88f3Pi6P+h9TrXPq{G5?8SozLW0YFcJJKQYvkYjq;?Xwr0~W)==kN zdS_Lvg@G3m(zi=jFV%Gx#gn$DijQSCc~_V<4*e`Un579)z0L3aPPSBZUY2DAbTCKj zQ!6H!3|qQ%3@O$q&%X<$oN2au3XAOg$)8t2U8iDu z0)KC!8&6L>(nnAdN%^O1v#HMhj|;#7cZI>sR0RCMlcVL)GbxbpKQ)Sz+ik!$yWka- zMN-5WV!;R&Lsw5hIM*YhHV0Ajnac@>uY^}{GFtwCmT(W(bD+cgX_(|IRbG3$L^ulg z2Svqxu-Hh+EaKkIN9c3t;llcnD-rTzMC7aA_pB(vvENH+CzvR6A9Xu|4>f-X$+8#Sybydp z)%)eqg>=hna%Pvzi}$cbt+wyS@6{z;zistu>E$Zu$vx!0^WNmBJoCESar3=cWn$O~ z;n~1Pk|x6Wcd9~Pn%C>sD?X27pcGEq`$>~TNK8kzodd0eZrIFE+MX|sIqnWmxYU0s zyOx8^aG!N6OP`;{i8}g8<6#gDoJ?`rv@+lmM*DD*^5;elM*6-Fz@SX`&l|FqoiQ?V zL7^M+%3GpPT5@_tp^lD*6*^@1v-~O5xn;G+H#UFPZf1J)0(v?yQ(XYHIgGN|q(cj~ zGGj2X6=r{iv`LPI7`F~Zt3gg@W+r)(RGgytm;E`t;xmT|Vrl6hGC^nG#OYiR7asqg zZI)(YLe{%K1y4Hi0;YW8sFsY^(6+#Yy<1YDYI*i&Rj+dC zy1%JBdg?DG!(2zlc7*E_>mR#=89Wy8FT# z5qF2Ux;o7TA4k9E)@)m<&T6tlZ~C&KVss{Z6luuAL(bMh zamd5Ypc2el7ZPrdy|znO@4WR{<3o~`Qgv5-lLz;X@55#P^NwbE1K+B0JS2PMkxp zEXtYw2Z_`eTeIwJfbB*ORB~m~Ty6&#Ujs9emL5IFE@g>!iN(}fP`@mhk|{t!SAwd* zg3+`#)HxD0i7>G^io;LTJ9O%M#~UUysN?L5J`|VRRVPbIePKl!ywOWb?t`e3pa?D6 z#_mLJiBV#$U{PUZpOh1l)j?}fj8^nMEwBuh!*IlQ z-;EkyPT@Z0U?83q2ACr_B`E)$lFN`>ewW>)1Q@(EpOzxt6SdW2U z#QL9K>@zD_y`#MYI4uN%gpB-VzFk&Lt*X@-RoyF%A7oJk{1{fd;bc^E9(W`%$fvI# z4$kk_pBi;7QTrt1$Q?t)7gGIcZ)_eu4e(#QtuZDo2bguSc;c;QLp8{4I zr(XQNsCkvsab;voOYG56%Ta%%$MJUHfZ;`6ZekoV=5K%&kpWUPelYZdLyph!t!pJ1 z%~YuK)+l<}HCo#~ZHyamhg{?j1f#)p4kM6tkYT&osI8uw;|qBI1f&+DPlSY!{Am1__JTxWm8Az&Fd0J6abOF6qeWWx@BVz=F;-4n6`=CzP&M~nThsgOXB$ioc-9UZXL zx6Sq>tELqvW@;=EfAaq1{@nwcX$*+h&dtaOtX^M|JN?Z8JI&Qs9cXgDV>PC_rmq+< zL4+V7ApvL>p7R{u9by}{c_a52dm%#bsI}0+$HkQCRCe0bgSHKYey23#i1%hwG$J65 zs`vcLw#l92X!p2KDOv)ki+#2nEsh-p|AUh;k;Uz#d z_-y9TD&u^yIrNwFDyc_Lb-$XTPq7=kVemJznMpIbhYBMaI45 z7ToGB&b6s`22~rnW1IOE8Mk*t>02h5#d!uUk{~Hyrm7jHhsQ;d$q^|chV&a!_RI)= zTOFv_?bBGw8*+C$@zA!G`1LlA{zs6t3S;+DS}Lm8mlF2`%YM9szuIrewaPS6;g&!m z6h7GFpy${OJjE9CKS|riFbZJs^%+!50C^o+*GDKPm(en zJNc2!dIZR_-zni#dF(iiGD)Rt%})qb5ZWE$qZ4X_99_M)BzVHYaYCRnXpgU|asq4A zSsEOLBWZBYQ6L{b6^Wdmj(Fs)Z&wG5t>-W4b~rFuspN%7PTMKee&)dQILBzecTdu| z15Rb-bcnF?R4lyx3XG_=P@`Vn#UOVK6Rxp2r=PqdhS&*|HH)D%PqfCJkIl5~zIT27bp7t6ySHYkq2kSg&6Y#;%4CsA|8$;zSeBqe zfm?)tDH#gp@9zV(hGeT8xAK{(rwvLiDgQ zcs&?()B5TTt@5Q22fj6RnSz@P(|RqOkeHbN&h6*}6Ss5sD|?es8ow%GEAgi$ac(00 zOC27oS5bu^S?Vo+f6A|n0frtmEB$dHA6!1M4k~+lQ$V25DpP5B(U)an@`&jVuY>eI zk9H=c{l;TIupL9Iea-Q6!F6XUkwbrHdABMMXchJTw8X^6!wZZeY?lpuocrqaYdVpy zJb_QRwyL3U2|CE^-RqfQN2r=`ygt7z&~%n$<|+#xImpPqR#(zF_XYQ`LKnZ9+GyNo zj*paZIWI8P zU@Fj(dOBYrFL!&qhzVkP^V{BYYZ7*Z`<>IOq&P?dcW*I1;`*$!O3*}Zl49~`e4f^P zIGn89sG8)-l9y4LccMjG^7f%KjL7dt z4#2k8RHgre8fvX)Um{Kap2d}1pbfAGYj18{H=qE50Nu67@87ZPz!(=icR3Ijg`z#uZ?q}W8!fJLoJI#3L4O$L){)dS ziSDDtU~!U*FLoX-p+S1Wt3$a?8cD zB>Me!6Ojj8NN0eFKaF&kO9N%N8JFFLt#17*25=gCssrNk<;qB&##`2Ydz6s(LtOEt zmHcPMZt?40DPO)ftqqmHMVJkTXZWU31lrj@g4`p>E&J%bf3I8ZDSV6g=xBUvojO=Y zTcChVr#q^U$@IdN#>A$Sy>!-2Kx0~tEkaX%!+zlrK3-X(i6Y-m= zn?kiO^8EJ85qbWh)ZFvA^TFvt4ZF6^S7Ojy#chPh<#Z5K-YGJhJx{!nt zG_Y+|P(fDqqlRDH`Ux_cR^_kd8~4K3hjIE|12{RyXBwOzl(%kY1>u*lmAM}|TY~^* zi+j3cZk#p->29xYIMLA1IxRxNSHRgFlO^46Jx`;MAp4l)LB}=e$c%dO{l_=?pCbm5 zv2oALeIFC7Z|=_BBWyJhGbW>7PEe**!yF$vU``G8VK9p&XBwSM zb{v704)P`Pn~GoUERz1%%5i-xI0~OwQ^y)CME90!apT>0Ku@7)@END_tuWKqQQuq?{D?& zqW@yLZrz1CuPYRS40^mA%>U*^w?P7Y&OJ$D7?#_CSFpEZZH~^0U?U7ZYB@lG&@(K$ zh@{72c|F=vY7+LjsPko4R&Dp&agM3|YRi*}v8PZQj!!)>{hEvg2gMQ#kS$<6jN7xd zoZgo^@ugZuG19DnAiq6*2|L>bWEQBzy-34hWLLHq@A3TYEXd<}$4E_FZ7e(p_CIeK4 z#jmNAIjrn||F*c#|8(VrksRaf&BoswU0ZMN(^|(f<@tR3wDRF=@7tXz;k-TM?o{hct9NhS#cbdpjgc0MesfuJp~g|BWJuKX|~Wt<0B$f(%<^U>>p zq`yM`&wlVEW@o2VN(KZ30L9416899Ks4`d)ek5g@AO?o}oDT-9YUz%{TL~)jG|5wU z_jbe#onK-LXDKg!y)++MbNhR7IM+rE7`dJwAE00C`brt#brpW)yPr#dK3lq0LN~g) zlJEEYd89%^%%v*rQ8n{rK}~2+D(k}rzfrXc1d^7zTk;eh<_nmAi%$1?{`JG`*sXXR zZUDhYY&tGK%xZJqbkgZXd zJwtp;0SgqXzE|U-Kn8Z~!teb2b;kKH7GQIQYrC&)aL0PC%h}D{k_yGiEH5FRP!G@k z>~ho2u8CyDXmXJhzG}RsK|(?)@O~leIwD?g`&ft`Ug6p0mNN?y5a9b5;|Td2`kiBn zeP(4QO8G0(_MkP4CZ2DWpNW?K_SdYwO#O~pSZa4MOc}@BU~5Svqr;mnm-Zq!`9t;V z0h-(9pL%G#nH!^;?pDXL^@S1f<|kL9&QIuIY?$cSi6Ngab{Q^HbC#7#>c$PIn+ek3 z^(py3#24BM#^7whM!z!TGiFaa2)Y9DsP{0IZ!DNMX?!tGCrr$q}g^ zq&>NwNbm|Vnkv!21$?=Izo)N{yOm!)7Wqn`uAWqnZV1Bd@m<|4*L$id>capo0-XvW z^{^#1bLrN*C8w*KzaeI1STtg?wYOYZBUgvDoU0ME9X$%`OFeaeG)zZEL| zPfbcrs@|RsBRg5pfNYFzg9R63#d;R|uH#L57P9gRt82|Mx{c->*VsF_QY5YSSN7ai zzWmgXv1T7qq=>;TH7z!L{@)5P_H15S1A*R^Z=cvQBs@@nf#249-X70CX3uVcK*k(R zAI#6)n5u=5+ZbRW0RMpg2j8|TfniV-T$qh!aD~i2^A%ZllNxxNlDDuNB(hAGOjWaShOm{lnYde?iR+nhElN zo(kyGQSE-Ta(NN7L!?YA+v7b=-W*R&mbX$Tsvu*35wI{&Eg2xMJc+@44Ea3Tbt&l! zPjoxYhopD4_ql%G#p9FXhxr`Zuh1V9*EuZW#E_Kt19Np+>xE^DUvd@{vgSMHA&^}M zyHZ8p7}RywIR^AM{$hlKOYlI`-iEsJzR0APR(Z?fVvp9zBt%~SuDIBGqC{hKXt9VH zECUV8dwm~AYvmsi@F0#3EIuJYtv*b;DR z-f|o6D7H5V1dog;veL>w1Z=O7l^^D2v=g8NcAOpT_()3glQq5JODV9bQKK0cA3TAU z^yNh~d2ve+^ZTiksS{p>QFD0qo}DlEzVg{NTTv=M8P?Tu=;iU!kq6g(TRfM?Z>p=C zoAr}E<;<1y{_RFSX+#LCRvJg<2~8u^`)ogogVMYzU{vC*<0MG#hD!E-aY}?p%b<{! zRf*XZJsI2K}x_QN*aIC{&#oZhgkEu1a9kxzgRq~@jTh753PF%4NEdRmgoZGXD4Ci(x}`=RF0#U2vZN?U%9RGO+N6 zaPjN;cO8kdFdgnQJAcOR5#Cyxw5kxIgdlKYQ^%!;<9O_nU7xjA z^ybz?OMEDswCRyV#X(G16hjfaXI`Qb=gBJu5L2+Q@^7<#onO`VzqAeGTf(bax{sxH ze|VbB#8{8b2wVIBh=Y*-hG+Y3QetjU@_?%K03os2pq;?_y#PmITILRqxNW?IxYO#M zmu4kQe7QaUiLv~-kIhP>UdQ8$TRHkgm1p%nrTH>jaoZCke`!3cUJLq49-`ig?K5SI zyUCNZK9d%9S)&`@KYh5U^6oI%ofP&2XWNS@UQ2) zYxh$e+}48@#naG^zL$EP^6&~%wi*g|8^K4cyVI4Z5S45pd}$?!Qn*>2C;IX6vC!LX zJg_Yh3@{FU}+Sd@Y@RGPwa1;OlGW zo%qf9vzH83aRIf|i8SEePWQh= z)AJp;`X>?+G+=J-S(|s<&@EWGSY_Ua0W7-5yn5yK?`dW+w`nINLxmBHP=Kige2|3z z#XQcb<80Y?Jaw$JK7Stj=W)J7X%Wv@z7A^g6YBfOEORLfBg2ZzdnGnXYA^x)#_0{G z8K4!IGT(si3DPP*0l|hHs@slGFky5-r1N?|7!`XY6OM%d`N-OS3jthUF7TP4umI>T zsAC(AUP7c?R{dRCs^|B0Yu&>%ND{ux3#^Cbzl1U|p|*Kn$bp&l!>cA%p5Wrzi6c6g z46Rz)PY;4XR~2l6W{@>tgtPx^5LYIq(7)bC!*+Xho*@|^p_(HS#@T-LY~Hb6->6}9 zDDfAo_hXwB`Oh(6*vH~~L?Ir4Fy`ua9@BBt3=N$KH&`zb(8lNa4h(3njxVCL)8lYq zAr_9KwJhQ+{XjejK#Ft;f3f9X(6|A|sIuw)-F3HZ@hk@L+>lvn%aaYDBw9h8>wBWe z;r2?JZF|SxZ>+bQ+5R#aAQGmx3dz6}#No|{=4O&Q>v0Uie!n;`*EHZZo3>TaI5h-} zlA=TMI-92^o|p^{>YBt6#4 zk!H!F#thGn|7R}P{1%E14o|^&OGKN`r8(FZPz)mVjOZJ64kD!nbdbL(h=gX!@aw93~G8G4fLb0M>NMErEeDi$< zDq6d3Qs_UM%;dKXFD_<={20IH<>7&EBza|NnJ*VZ0#+Y=9lo%kykoUA+cr`KdBVw> zEjPF#xd)&K@)B-d5pW6_6s^*XkK(wG)MRBd-K2kSwzk^;&Kn68bsa3JsnY*qsz+kc zA3MGSd-=tR(+`ck!#nQu!9_a+8}X zGiZGJBS&=W*!e738@z;0;d=<6(N`9H9zcPFbm>Mvfg}7kW1Ld(<89dK{P7pzk2Mnb zw>;jF#V$;2pNBsl&`SmX-~D1BKHD#QK*?O;!#g!YdH zpV+7!>2@nP6Y$5LEdshGjO^?m=5XYtoAxOKTz=prGR7qN2_R z{;#2W9mNi-9yhSz`e=hE5&itX0QRa-@ZXL8JsXf&X>xN%&jB ziYl1kEP2}}xdYYf3a!HI1w`X96hurZM_OmgZ>XB{z(){a{I9dT{>DA9W2&<2htYhM z-QUNJbJP;Pka(PfC_tJ2I(e{DF9ffz%8eR-esORAw%UWrwKkG3R)^wsL-diAI0`_2PijJqu1Xr{s+5Oq8LoTYU4> zD6(%beOdPV^$5ek?)RYp?vqVHcs&J(=SDmKT&(QsSR=jk0s;<=SY(MtQMXlX>V?_@ zkPm@nk%`@3t$z}uI2`|^Od3nZT~HSx?TP-9H&6FVGbAu2g}!m_%bhF3k37j3kV<}d zll;5Sq4CYHyqR_jOm-5?>r2Y*DQ>^S`hbewi+%3MCQ*WSl>8j-E#f{Nrt=LW&sD#M z0bZfYZiala%tT;%>q~*VW_;1cy};Sg*bCgZ%>FHaDgVSu^!D~{?6b_gdHrmS8Ro^o zOi02QUY+%1_b)m^nySB*FW^lJdc|(gsYofVEUz=^Qm9v)r@B$$~9R_kl3j)dY-S?QhQLOec19V?=Y?N~ygNB$lH$(j^56M14J z5kIl9me(<2gA94~+Ooxsy

&r4<4MY4g;GpeD7whNni%=5WKmJJF0EI7~CR!}NfZ zojj?B164mi`LM))Hg{c@f1FmSizVf6^4-dnB|efFy>zADE8|YqJ})zKO(`|uAbeesjp>AURRMLV!LD$(@%6$DFRM~bryh*4e8=k~*k{XtM;TjVJas{qINyciprr?L5ap2cIM^q1JmQF| z@I=B(=B*|&f6Jl_OlHfa+uQoTAOE2v1Ne>5(H`$@*9v`tBeBD&(srAY)q`G*cbBFf z7Mn}ytHhYt9vqCCzx-Y@re9}JVC^!*WF$pXIH)L1+G@edy}l8Yrp61h^J~%#|Cfa~ zQvVNg9?-~hz1!%cUiRQmP8=E0($&IZQG`AN<&9&JYX9`I3hTs|!lrX@hPm(8FRS0q zK8qjF#Ix~5^I+QE@yY*s&>WJNm)9`j$x<3lQ9N!w$zp4JyvNa1L@hxn!Z) zx;z=*tD}7??`pNm<|1dVGcuz5NkTIm$nCA$vYc@=Hk>bOO!r?FtT&-pStPM_(BG>D zSeZRU^1e{`|rK%EN;JOB}bJD1H;Q>vnJkz1Q!c0F!Yi-U86`>n2fYo#pf- zlqyr2zxGBU_#CgoXrisv0-7Q{iku5r=BWZ0yJ6I}XvQ&HEf&;Zp zyV9UBSLo>SP5gicS+2ef5(;wac2nu6aEgrjkHrT}2Pj-xi&f~RpPpJnM2^47q~nX{_AkP?ciH&|)j%-1H*ThU zeX*3=!5Mmp593Js7c+yI(Ox3n{8@aY!gyJ0)nr?;y2Tkoaq#ua6>D(Pd9P6n8GnmP zRQ#yV!S6zaHbFaq%Oydr3@0Y!DsN7oC%T^+OZ3}V1@7BsZO^}6c;W#KljHue)?ij+ zy~Vmg=UE+e-Vz}_n}ky7E9TadoHrH3k%7NgL3Ajn8mc7H3cg0+gUh{}R!@`H9s>7bfV;YA2qdf{Pc)v_gK=VU~%?#26w z>jT~2#oE;2Jk}+!C0HMm<3~dQ*2BUgX$j#t#2%gm*(gA^dW}y^Dy15q z3DYNuxS=U~-o{-%fE&Fa+vQ+^oZa|D6i(CtZ8%Ve99>>20G9ai+G%Bh$+7|h0Ja6m z6ym79O)ml7>?(LT-Y#eqGD1wdy1gFu#`k7n@4^JBU;zxZ(o}?(7RY|u{^n4x zp>wdPIEgN#F0TYAheTkW5J@_a83RuizW^$Qha4H4u{B%0-0W~K5GYpiS{DVYC(ul{ z3ahYHy8*C%he4UyTv21vvH!Kg@aeQx!)b%1ZWTLX@=$pnp0my^V2z25Jj=hgJK@W* zGM+#$6(u1Jw5VS?2oH;(NSUA{wCxtha%k`9`%^|A%r;zw2bDTWq9g{f$14n&4s*oq z(YnW#*>8^odUz*Me)wh{3Weegu_!h<-Lp41%zuoc0Md=lHa^#9=m5<+62r~iN=!}; zp%f%Y%+38g`JUegg@>2-__BSm$@vLBAiX3cCB=?#?}9-y_k%f519v)8spd%pot^Q4 z;VUX(7c}tfyE$tEq~-;vxvm%E$395+h*jl2 zkj_-LbS@{Smot0rXkv%-PY2DzRO|4N?2~9{1E}9+O-?Z0lnQAqOf!5*ZZux?CKa_4f@PQ@q&v?i0mC>hRTTyvDynpUKM%5@BUT`O^Ilb=c#1gdPkp5vjk==0I;2XK^^q1aP+73AY4Kn&& zV~f-Ud>GUE$^DSEa(2Bcfx7~7N0qw^CIH&e1h%@6;Xy02KY#wTHuWV-|IT%56bfK# z)m#xANU>HKQ0U>hW=?l zK#A6hKGFYDc>7DmZ2!E^X!%ZlT~Gl6{EgoDWv7KWs?81X zRs+?bjAI`d8f7m_VW||>E*q|4uP|xD0gA%wjQwq*Bvb={v=8|Brd&?nny9A%kKj@} zNj9>;#gX}sDO+?41xQTWZ|z&EvgZd4oovxdPRIx6!q2J&;Y$Gr?I`cyc&z9CWa8CD3n#q(@Z~!YN4i2? zSv9^}pkaem>wSIurke5O;J_ndjb=G(fZj0KqhQ2^9X{jy>X@MDF1MW&s|PB|hUw#R z=+PLNOtpM|MF}l95{m@WPB}dG? zyRaH$ESf~1fQodH*UxNq$w|uGgZWIx&v!Q8d#jCf8}K=X!EXZ0r9ocBylXl5N(m;p zRM*u)KK=<93kwThT|YMYAbl5p_4cR2mFeDmu|^Bc&Scr)ct%vTB7mjvC@C@1EMZ`} zhD<1==F!tulO#7S?ZAqAqpX)#_=+j zEoU2kE_a}emH)H=38~3j$1)Wrp#q*)m?I5!m`Am2G0FVS32+lZirra51&O$c;zUzE zSaMwwDf%hm;Pvt2bidZkH?4)3zwl&d{J*0Mma+5!9_Rq6iGOS_CA#(0nLmK43S2fz z(J-;uYT6V@2gitqh8iId&@K!iW_#$Yw{-?1=jj%(W_9ALn<0rI8!R0D?VPgvaIwE8 zhOy%Daxnot)61&e-)a5bPVJ(iRh~yuEknh`r~)eHk+RZd@CMgVDtpuMspcQyN=FH= z6K!btpk=a!d}yCKB$Q(~2!Gtld=Dx8+uXw-#jn8WX^{KCl}Rqq!&MXIzr+I_wMD@~ zl%pDaSke6<*IfI)t=*7>DZ3Bjhp7{5iw5B;4X%fj@2y)Edk7e4c;hTO(L`y(j_WpW zC(vUjU^{=UOAA0=t6Fkk+tz$GmNug_p5H*M=aExO06O(y#q{d$jWD&2 z`y@Uc`eRw4mcLT?kL%kfxr+}33r_S9Ah2K)DF_ydDrMP)R}v8^boK$Yz(pkx0i@r9 z#ym=*Qm3WPzW$cO&~VGmb)|wHt>@wju;AY3HLkHs;*n-4)!!Uo1v|#{ib73?5LsC7 zqQ^3JnRUo}H_3fI5G&;Z;j!{HR)8D53kL~2z3|T#)?#~2hQeXc&?QOHIfiPlK@;c?fW3W zHC+Mz8c)UV%1%$tt+5Ou^dcX-kyy@y8fM2%bb zKFG)Sc~wJ%g25Q3I#E`twr^;1@?SU)_MmZUYLNVHxQ(3W(Yl z8#jp5!X(>4_G4HVjQ3<_|6(4jfVg6Gmy^=s@Ivd>N#x^>%|8_CejHLRL)P=k2!av+mXomR|Fj1?p*AOiC4wLl9qrj;&XmrSq|6@ghnwnP5$QL>H41mXtPw9ie zZ6M&;_Aoy(S*GLk=cin@uxlw8dr(+SJ*t%PmM{D3{xu1Sr!8!?@PkPg}+HqlF zzKqM&aMPmucQQ$x#$GshqXd`NkCgci2$Ntqg%h+O3%n>I=Ykt2?QH= zNnG5KR#i+*KQ!dLe~%u}0j<1PZg_zTcokIA($d!RpF?zR+6jxbHHht_bcifrGT8bm zP5(DR0pSX`ID`|bTjv56KCv~ur6ZZun+1~27dFep$ZiOLv^(e=!aAk!zUjrnavA@@ z;Uop5%Im^=4=h3#FN|LH?oyvrk3b8az_Fa z92C?Bp36#@P(nIi&)`WO(|@lI!4tnVPTxwa-bYbS>&^e!g1w?sbQ^%049{vJ0cyCi z>&UoTd0uvWotYzsa1`S-Wpg+h; zDLhG}emBTc4?a{0q7A4x>WEJb1TA^xCp+$dC!J}w{eN72Rallmw>By$Atll!-Q5k+ z-Cfe~(jgrR0@7Vl(jeUp(hXA5UDDm~@(t1)@Kl39?D=f^- z*ALwc!6d8U3HsM^v9Ktq3|1Lk>6i?rf1nbqMn%C4|Gua5w~)Uj`Xw&?X2{c*vOZ<- z{io`iA)g$g-NehzBqr}uJ3xg40s?ErV?~{p0U$X!LwTH?qdZG7z?%KXv6hT!w(+=$ zrTqoYJbueaB&KTO>c8!-ekqxmb$j~s8W_6vyH~ZkQ(dA={NX|Acu7~$tr8Otvi-C1 z@X0tgGpH{zA);4bO*aW-#V>R((f`bzx(V%Q(dOE`*xkqY#FP3$A{Ik}?Nf9m4SGK0 z1^g~v>GV#gJhM9Tmq&BPPf4G?vj(v63U-yyJgt}w6}f!J<+pL1lS*^N;t5vyjib%{ zm4Lh$ekg>!v@D*^zuhXq#Px06Z=A#9sU%bEm@0Y{V@_1T0mSFa<~JsmlkvVv?^{v= zfh$x`{>2xy)-!<|T-Y;ZhJobL!0om(SFv3ru6Lw{(@^0S_7njn^FED0dn%dVjP&KJ zE$J8FwVlpWFA}07JUbx1_@L|jtnH|*#8xV|^QoW@i;hP77W9o&aEei&Q?Ri0@|s%Rl-dwOBgWQ@a?m6-Eq^>D>M| zH`ZWq5x8X67c0K6)DHF0X$4i*`1kMN=+ylR`d|9ZjEVF*JLK67#zTK2wAze8Y%Xsk zphVzANU*n*vj`Fi;Q|N5$vq!-U-oAX;2FjKv+xIt5sK;Pn1l|7KMd-(M?lU!972dYa=qzRCZ^ z8R-{SBj2R^GpyBYXqkH!A5Odn0Y&VEIIIXdnvc3eKm(e?2U4%Gsw|~CIWtw>WWrs! z>SZB2yB`a=amH_U?r(qYw(g{P#V*$Hw4d|0_f5uO4sojHIjP@QqCRnjfH_$8YW)`M zn_%jBzpGnG8x+Ga`}3rCSQ_KL-Z$TO8l1M?6>EHpOU&pURF|D?vc*tVPLh_FHyRnV zRV&ewW=ZV$v|3Jqq}A*S2b`w5W@e(61@76rZ!D9>%|3)w0dw-rKWkFi*^JKnSMsc^ ztjWy2h@u&u2IHI@40Ol?ef1YUA@}YRl6r0KDVbPM@0oJ-99Q|o;BrzHk{D0qeazb% zr@bu7H-3~NNB!#FFf}@EF}|b?#SI<}Y&We1xDU&dV(D$2y~KYVIM3eV7U3qd%BMx} zcg^*)x!-w=SEypV4#ySyWZAf$;hH7QVV9af%Y~|}&Ut~^roT?< zc3ab>$LiP7w;pF6{x-^3Fu1t5B`Xa-Qn~E8BQPuN>(_UH4neuv+!n;j%PiCp9`-$F z;DP`=u@%9n`6vO_RmUr_1F<|6cWcK$!?wiqa&In|ruF%`-5uahHIDhC&d& zy54i%-K|$!pljA=TzPVoTMuUEa4;ew9ctwUaSaVPkwkoQn_uAr3oAeGOT=DC%X!dPIlpRBP$0@0sY4H7QH>OrkH^ZxOG?rsSTLc%R_2%sFHR#vir3&I2=1i%N? z7RL%O&mrOAV?yL0+WzOyPk@TYg7cPuzb5m= z@^&bc4Y)n6C3%D)ARwsgd36w<->w%&9Uhk_C1D>O5hafl*&wW}l0$Oy^5==~?{0b8 zwJDP0H%${C8{Z5030)mVeu7?~b9)@XLL!2KYDNwa>g&dwX@MHsj@Qy&WIU?wRQ%s`?jy639#1ddp9Vc=F~hV3vqoZrb$Oip-I?2F1hU9lyNwKp*HCR@v98Y)A@ zPF9p^MK}ync;0>K!bhoiyk-zfZwl|RA<@t5&H8-LQ)4HzMVnZlzEXFAFHn(eq300G z+5JKZ+KA^W(iLkZvB88pQ@p6=D)Q=g8huDO(R2&ES3L=yq7}m7WF7x0Qh7A)KCg}% z7U@(|3mGLtT^&|jLiEi3>SWW$zj`hZ{$I4J#$c;Dov*{GxGs@b_?d~{$xcuXEH0LN z6%G|fSA9q00NX97zMhNB&?_vayfwi>H(jAGvh)oSJGA#wrqbl1zaCfM%(kcUbYt0q z|M-CyIKL6zcbod1XOD3k+n&G30-rZb_J`hyJZDr|HF)EJ zi9oq@&X~gDB+Nq}AD`Qk0Nkt0Hv-FHeo%pkB+XGidgmRVF>p06RI6mo^gTl!Nv(y= z?J>Vskk@K=tTjGOH!eP_7II<|Xu4jzL^lD6Xtc+2-T@42_|LGLeK=l0LvO<=u%C^{ zcRg(KW$e z^I29j9Qqv78-VS}@+b4!zQm-{FyGb`0+W90<&>M(074{Zp%GyjO)~T&6^{W&OCPaM z2-pE&_TO?bhz1q(jc01ROUBI{t*I9RJ2q1;GZY+KJw{|WH~%nzaP_(9P1HYi>4h|H zvd^f!QSRz1cFD`kT~H~!koJ^lJHf+?2>QkG%kh4j0R;W^f47J_9jaQvR0c%05EiB= zN@GpftpM6T`6jxI`VHN{9*nz!D^~m5VlrD^w&$}>owc~}M^cUAgnh>up4)2#((9u! zWnk83g?~pb5Vzu+`3gPeYGY=j?7V|dwW0m?QLR*Y#J5FfWRHCRQjgya{h7o>+lz#P z&;hBXouC03B_4-!YXm}`X=fssmeXwSUUf%Lc z(D~BiQnE7U|NQ%FT;q~*>u0=EtS2*(brWtqBGI?-CdD}_`F8}g{Dxg%Tx9tmAk2X9 zE>W&J?#n0XB>oJw`4*w}M73SKuRgT%aLCar>c569L+521ZLaNfGj4j6lkIS&_!7Uk zIny{%9=gBmO@6DEf%niH=ve+i4Ed|@2ef#ZMcceDBs)?O=R4%iWx+%TgC+qPRrj(N z`+U)06UY}tqV9Mi2guWpZz&N?S+cX*Ls1K-A?CgUk|ab8jg3akILzF+RsSwMy#j%j zF4J^=YWK<+%=OG=Z(cJRwiPx=bhj-L2sysBx3}MZsQ67Joi({zs3NW-7Zk|r*9fqb zGyB#SXH*Y3=Gky>GZRC}O_QpV7uwSiFncrB{fj()^OMeynOIVtqvBM|7jbE6!@#%i z^z6)dehT->sRc+50O#n1wU`8qn@}}hks)`MkSp;SaGa=1LZe2 zcfeia_NqGN;cUY@Up{#-w6s{cxZ0JV`SM7ietGG|Y^52Wi6LP?f!k>QuOgaQC6^DR zv1P5z&(%Whmrly{if2?QewTK&H*$Ftp_RSR)@0GYxC{QphZo+D-ue=9rWSuM?74j} zxb;0<&GsxN5Bz7W$53{1y;9<8C*(^OYixVsaNo?Gjfz7N}Pc=@Md-0~H2RT5+_ zY*x}!XY8Znqb;#Hfa}YKpVY`cnsN%1`6=k;t z7e-~g%LonO?x`7JJ3CSfPkt9sSn_G0#=&EgM6GUNur-tls^4|8tJEQ##=OvtP>(p+ z4y@`CBg*-<$;r0G$o5ukl+h`7ubXIz$KRQ!I;`)K>UT5zp4(A4EbA!uqDGJ{i)~=+ zWrUEI#HtKFHV0;(wz(zVU5~P1!o$84N$GUVN{lNUueK2SYp*vFnh-Wu=fei{S$WbM z=71#k%-fq3u69zRZ#W@vy&)IV2?-#m1+liV81t-zU3KdH=M;$0NnR z@1>4Bp07pzvfTQbJ1?l4N~(FZY>=cNm;BXg(U6zpal@Cv$b*Pg9Aq*iN)W6!{NNec zX>U6FM{{@b9z8I#Hzwx|xQEq%$iYoh0o~4->dy<5R|iWWb2j+^4GhQUlXhHkwdH$< zht~^a2D=wZY8SsY@T6%YgOrfY?uW3Yz~9B$jkWPXz+W^73_gX~h7`1&Z* zWwDn7uINWGR76{kQ3bPOpLgdH!0HQWa_K3`6qKGHxV#v=UzU|tFu%<7oxOsZzT)Q_#D!>pCs$O-}exrLzPAPSL zfRE9u)Ce)VZ@d=t%uHY7gGXV|riqp?(SWuT6zmA7K5ll0U;wIXAfURwZ~RWuaDS7? zbZg~D8*w3`qTNyI|7N*#aAUIW2(Gn4CtYl&n_$9dR~MOl*d@PFeLT^i*e*HGMIGD{PvuwEGgAk zR8>2opFZ&v{EPYmW97EJ&5+kO(j&X`!fZ1j+$MVO7i=~C#lJHWNPk19Z*(BBB->xp z)K3!)%Br-pZ@-zb<3+@Y6h81CF0=<9werqJxmiBJD5ByH2B30lu7FnAXlak|oBdM% zk-!3oDii|2A?q_Me!U=PxbL-K&usm2Z5ZQExiFeibwfEu*!+7uDH^IGnxe%f$GD%- zpUQd+HgrlZn(Fp*FG^2NPJTWYM))Lt?Pjs%=UD^Mv1?o8|KW;?GK7Ut^dURKY^g>f?AvN@JG8HB`gyZfEr z%TCtdOL1gUQEWpNn=>VOuX7YydD$%|qABH5|3EuXT)Z1pGtM8% z9a~UaT3Ux%GK?y<{>|6YVKeF}5nNwfGU;Yh1dUKpR^4Hsc%*C1c7I>CPOFuVy_Q`L?mF%^7#5jxcifC*SY5Ec5Qw6)9%U zyiwY1i^VvM$FSSt|E(W5ZJD(`)!T4O19vh!%RUiFzB)tpzP9ZaKwT)NMF=nG@&Wui`TZ>)K*%g`6ep2cHL?VF1E?2l&wy^|4N3^1j&JQHpb#EcTF zWwv5(a@WmSq=#oywREzcbiXone?^{0!!KuCtulU?db=gHgWmniiI)n?EN+`16jyde zM$ws<%5a87o{EZpQNW2nG9!2{_VZ(SxPQW*&($LxrmW)?A|` zx%+Qn2mS8K1NO`8ERO5JTZ?5Tp_2vwM$jO2hjmHJF7O>4K&JB4rkGRGB5!ZZ4?j>+ z>>(f`P_DW1e?ZW|^bbh8>kqqIT&9y;d99h7Cvz<-+$fAqn)aU1Wq(_~z_$vg@Z2T6 zZmU1SI`+HUCwuZoq61r@vK=A{A-l7;FA6N_KomtmPZ4gTg)<40oRJc@QvgrI(%l(=cqha5A=c(Yy)hX@dE)_nf&1z}=a@Er)xL3IR z`|}l-%^VHO_1OJoyqJbhZM^6{ne_KCO#)5pDK%@gfZ)tSiSvxoP%GC!(LCjiCI0BC ze)o5YdU>5l97q~MnyGmZoMx*`RvBt3R_;7Yxx$T*~ z`?zyVfs*R>tkCheigj9%uKk~`)wb{Sq(wH4%cSU(?`8a`7_4rwIba@|gJWx0BE#dP zuNv>J;YV^b=S08by#&`L>dR!g(J;2^x#5yIL!~?!4lnkgXCUByPqh~ES)O-{Zz$}{XV8754cj{W1K9#z=gI;+5M>z;2RP`R_2?m@StskS)AfyF}1cFXq zEHXxE$L?m<;Ru9C)KeOK?h_~7K{+6&qQbz(mj`AP&fbwQ%YqZxJc0&13g+gNGi9=T zr;+R7&GN90m;8FlrN7ih@<6LWZ%_k#Pl1>mC@u^9+Uc<$x9G4jFh8fvNQPIoqkn8tUzrU0Izg|`d?Du!P}LuENpSw=k0VS4Q@!1S%3Tb3WoDD zf&L}o_Izi-?`WZ(soG+Uu@@J(L@sw6wY#FZG4|4UGBxPH+Y;>xMq{H5na(Bk$oJy3j$=Aa%Dc$p$<2Q($FCJH>H*D1U5jH)z$1#+0dK(PW&}`<@7!8Lw z7uSiNAY;a4g@^fh&R35ysfEJ(6`cmdT5rTYh-{#TUJsZ)c;_wMfXPf#vaTqMx!+Sl zC)>ubP(j)tHxLXXeX(8OjLXPSh!qS0c>LKv69J$X{W3ZG_3MN`r*!6~%P1zeKv8gW ze*-$VD##`4J;<+c*&e;mn2|u)YQ65up4mU#5qw(r8@~16boL$9Sxw`<)mG9 z9|xDqT@GPK!oI-TU-3RYF*`@JkDIH4y8#I}7xEQ>(wKV`)jEqEBP39wrjJ02~zzxCOb2BXf9sCHY!-|8h= zA@&xd>hX?gCr7Dzjj`&q7fmBb>!pt6YenY(J7lr^qk7ekTBKWKGAPXhvS|Ah??%K@D4#)9qUEdl}=Cuh(C3OdH)o#0^8CEZ-L z8&%x$*cmF&h;1C!FK-;4lJR`L`!;#*=hKa2;B5wv&s9osCJc+&MeBIzNdU4~)TPRmwr;}d+m+~a8UzRfmPR%ekr8*cDf;JN&bX=i6A=Xou< zjKh}ZM%CJr)>wUd22#E9&abA@S?m>lk1-9s$#N@)J~aO00!tdxk3m~lLV}luW`b0s zVDfalW~@lPIC7mAQ8_1`;bk=`M|@TvXxfFm9|d3@iSa*0={&DYj~e1IU;TbJ!;$UD zl*!YpQ-DdH?8Yks-R@M8X0Xm@>n(SA9w%zDu3F0-YgwalD&8>7=Zud(pVp1n8$7pY zMM$(IQUOU=vS8Bj{;w_c;WLS%fDmkSwu}N1pGem|{+tkY#F4$2k`yzr$@z46#&JP7 ziy94E-tMd0eK2P6;zT&<&FN*_u+kPa`rJ7g6V|Bi@4pdidW|lD03FF0xBMOvxmCUb zHC=lUg86rE+3p>2X}CsRCABM=x9>ZOnj;bMGWVXDpFa(c1M>bn{D>jej|gL(yHw+yhLqXTt2+&`(q^ut>+9`mEYN9n==Xpp)$xY7!t)Z+ z8%a3yCri}Xw%ulq6?(VNG@2nG3xUu+r%QoZ4gb}w3D1>s&_c5Vxn^<7(>ba62-KgD zuL~?&d2n4)D^*Ds^dZV}8-pqczUYf2+=jTsPipPhD6NW(i+3O6SjZh>MeX_MgRfz3mobXzvJQGM8Vu zJN|At(b`^~0^x&fvp$$Zy*0D!Qx2R>9EOLaaU7!cfM z$?gz6Ty!0KUw6#__&OHsqlbqFtG_iAO#DxXg!4i_ErpC8-hUMB)M|C*d4|Ek%;g6M z-dy7K_ zyH?y29=)#6E1j8^blgaCjj3l$$^qQ+2U07Se3g;?`1QM#@68^cSEy`uX{gj@#v{Mb ziMDVk;>|-a6TnAEsm1R6!Q47TP`aFX1r{C!nS_xta-rUiE^^<6H>mZBbj61!b*x}y zb?nw}^>APGBEfP(MfKKY`)$)@%-~>CW8>-Q6hgR{Exl@Q>(~SF_S)|50PqY007c=c z{VW{vmp{x_RIK-(owIc_k8GB36gHP1U94`=h=_CiCiBAqg5htUoCw$fH6hlW@5Br!4%A-U)+a*?j|9qf9R(IQR|l3je^y z7N=UEz~XwK#i-xZ-Qeia+_Ix~%fwtk5J&Z>h@KFQk3Vxfs>vrg1>q=1As#92lBA1# z^3n5z0>AyxjrLL9v<4a0imr<=w^5yi_NGa-)FWhnq+H#!?F&Sj5!5kPEoal@4^1_x@`HUl(pE_2Knm@svViDM`+Mk|yOefoD|G-aJz zlI#o+oW3X`w|5-?l~sWHbovT}G3baDPfxUbMtXM*elr|V%%Sm)t`g7+E+Ml+cUxizsiZ2}_d0ai=pgsl=6 zO!;#Nh+)$<1ECZDG!begch6#JW&Zo7Hvojdf4%73-Pf0+QUs)P?w;FvDlS8b3?-lf z{`~pjIjy!9qVlb?>GQt(VeL$^fEVYhHz>IprD>x7D4diyLSxdK>s~4qn-AzRC#c8M zP*EIb^dn1}Om69FY&Vma`h5{P_GJ%o69Typ3%&t&llDp-Sd7llwA78p0t5FS;DfjC zeCht`XY)ukDbu{GjpRb)?E+z$LGkN_sDnXG-ITJ{rtwjvBRKV}UpH!va5YZUUPPLTVjZiu z;q4-=`|5qP^Tm3r*sjy~yJyfV#QZrKy{}KT-cG;m;Y=2$;4Cz<#IT(xI-A2(00`e*MfcK99o2Q`ld1f~CA-mXXRZo0S5$hh)HW0&DUur`K) z$5Fzqy>G?OdqJuq`hoh?`*Os^H=~%IZKmKF#@w5?I@uf+V+Fqp+TTk5;6q_#bc^(a zxrh%0(J{e9p5HSByyhnrTUuM4ZTwDFMPNKH`sYWc_b=r#1ql@u6#=5p<94JcSfSf0F(UI@a)xP zvZZaVER`aeOc6Oodi|y;Uq3})_Tf(DjHc3B4=XI)&uX|=axw7JJVmD!>Glf2>h9l$jEm9XA%J}FOT=K zYll?>hOTT1k|4MU1Tq470iDfR7*pFSc%tFNLK&cqjZXfj2W97jd_Fx;BH(jP4mpnK z07wrg1sHA&i3{}<=e6eYx+fj+yFIMsUYL@~T`CiuZfC-IDr-kak$}o)$}Tdht?Tdg z<-(lljQe&W+NN1fthZvN|=EBk|Wl=C`5k4eq=AFrfJ~6>b;dzw2K$PMv8D{W!&( z)Moi6y#OU=oEqF>O=rc75@gi(REnH7x)BDV>s0b&diXKG-8K9m8$<1(AOeGi*FsSC0OGMZ zdgYT9<#}ws()rqL;IVLX6Uu+>l|C%Dd)x}1j_7B_)%jX$78iH-on5^Y*o9*^U{TSaqrQX8nrqPzVOq z%~A6)wP9OJtDkT9rkL0_n5u%z_%_Snv9|l{hJp{&B*`{@$Lh4|DKbJaek;HPr1^h; zVD?>zuu>>CcRXlPRu!JXR;AErH*QdMu}Xy-F|ebPWarsMXobb+;Di+e_Ht{CYv9U(7I*5ECltDV`rBmKI=&CqK3hMG*fgo2Ap&7&j8bz~V816@7Ge>N1UsWKUt@2xG0*VPfI zEW9Q9wdr$gJ5_cgCdf0UiY9L#lk$c$o4=dMasuza z&a$Ni?N;-~b-c(7K&}l@%vUs9_cBo`o(3}o6#eE4wVI?Hffl{^tBZ=;08^Z*;SXE4 zw|94};4*o2z~8%vxmIG(m;i_o*(PNmyfXHo#nk_uu)?+Nr%xYrN)yAw-?p^0B(qsW zlCG2T>xC{4&|J@-3jX~rh5TZ5EhFKVDQAcjvdAtV&#FN%|FXvVy~D@1_S37(E=;3D zgAYYHWl;J+Exy@@?l*4O{(|`+Q1j5}9`r(B^4?Rm71n@9*i&f|Y_;5i34_m7P62{X zX%mWBPsXVZHlF9lJDFZ7V}eiL!K-BX`ShW{Ixm;}{OrVn+rfdDge1$G&zTWp)1eOf9gF7`SVLu66l)`dg2^TYf`Sky8>$<9-9? zIY(#b9P61f;D(G6Az`8@379S*LL@lg4Qe((K>;BUEMVi14Py<5+PGO#Hni%1b`#qGAw2=%&^Q((;>9TU5J}QtQpt*J;P*vgw ze^cZ@9M!_4f@#j!)=*NlN+xfy9rNX?qRFlj;CyM6aSVV(f%~L*_FHt>DVH?g|&> zkfivRFi5pQ6_|zN;^Oj6V+&(@N*nZnGK%5eqGlA!;F@FeoVK<3!jMyj2t`svOiiiT zt+dG-87zF-EKEvd@%W!ZK0yb%4(evXE z^E|{mxrNU42q1R|e9;1f%shC>u@{FcY2WfIrVDm}G42cZhe{Gg!&e=x0V-2yYo-1+yHwzksr0&nBJ2G*J*YfC zP01z{Vg26?=a1~)tWat9hNW<_jc`~eVYG1!xt~sJN}OLrW5c-HcH>-092kR%AODGi zbLU68cWw+=fO@J?y!ChuEohK%aDYD!PaUp_THctR$)KX925ix;XIT#a#V710m!qtI zafykD!T_9qOxAncyv>R_zb8JooU4jxt+75{es!}}kOFAQkqW3B_1C9h45vMbMIhj+ zW%Mn7TnbZ}l+=~wW}pqsRgGBj0B8m)krM+>Io^Z_%tbHmP`plRHJHH*jGXb>8#=Uj z+B0T&Z16I^et*8Iuoqbsfk#D2Llcuro_CtO_SZ591TI;X6+hpyI5I(mrl`9q!@$5>SY8;?6w-?jj0R8fi&L zj6yfSzZ>-y03qjj-^hqQOTn!WQ?}UgO7>?Zu`?6LiqQ@9a#SocZ zitP45#JK|zpO-YKcHpwP9*tdyRqZUT`BhjVa;l$5S`Cd9RqW)h&SjVuVf3>Dsjk4>$Pc<%EQdGjkc`a#bnib67GATIvFfqe2q2%ekDvKz4HIGAlr z$Qy#vE47nJqMkl~J3;3v;m6U8$KV;s_Wrgt+k@RgA&pVZ&&`d-c-*-1dFP_Rp9y#GBZ4gv+}unK*;LyS{{Yj^ zzT8+yA_+%9-i=+jQf1~>WwN8;A0Ubpzg_EiuH$+DFS(zUr@s&h*JN&GGLvx#{v^zR z4&j~P=B?(HQGMMxKp}Q?WVzUzEB`&CE&*}q3nbHE(jNeXE_ejwERl_%+av5sYbKBZ z*=t<+=X6cs_Nd+4lg;on-)s$QM1i3CBrX_f z=@7U*iwbqpCdYHnuXh8O0R$sHL$tV@CMJzKD!$SaR4*n_J{v_Jzlj_8)&%d`z&61K zBlTU8T1hBQIrP1>yuv_E-n#&iCIgj^9Y@j~=@@?doG?~aZqQry1+m+k8-Ij+)+3$N zLZeFfE?qFp>3>B60Wi}$$vheL=e={yh29+RpEfWYj}?I@zxEX0xZo;!Sr_Qy84s3Rip0Q45F4GkM<~@6R@uxT8XBn9sNUz>v4uEs(Dh#pGkUXJ%Sf)IJ0Q| z_o#GAQK-EW2)XM0LwKuJ6A}%sRO4mvmIVcYXP6t2GQ~b=N}XD*e|BvMo<@up=+Iz2 zNdHSg+hl&^L+98Ggy`t#e9fksLK%mF6Jb=PPfbfS+nlNX8H0z#W80(0o%5;hI-mM2 zmYH()&oEgYpWF&0Q@jL?v=Ww|dGAdenf|ZH{pXL;R-!(L^u1~e z49TGWmpx~kfF+v})OomUZf<@y%{6(uJ{0AgZtuI$)6>Jc6@Wmm-Bh7;K7c$&UHvnR z4;9racwVo6Px8zQ!bln*&ZTQL;@>voL>^Q71%Vk)S3JXuwafK2fY6y+yBMhR6^47f2v^no_^5$cXGv;d>qiCHEPw7 zh#e?jYB`aM3)thZZvy{R7{&3~T?_XGYquL=>hoSxtBAa+B1J}Igh(L`brxhweNiA3@``}tlbq2X6QK}rDds)I((kE9C(V}>PG`)^&~&er!I zw|9O>^~%qCU-Gt6_J~=9tit=}qhri>`sZ0$&~qPpLj2*Fe5J)B{neXm%yXL~TY&Z| zEvlluM;8Y*N%EDurzhsG_GVoY@raL%T6CJfIstn)tVDljbcv_p*d=Q_7CX=<@@&1X zVp?c(j|wXcH}p5PTH6@n4-~S3mG|uHVhTc)S~7_uraAc$MQ8+KN)Bkqcw8+Tu7pH; z{kqw66@?bMUZcuVm5PA+zGh=@#Hn8;2o+qvxYN?!^Afl+8{e)aTE*SIemw(dj8O z8}X4Lk(B0})palR&)yG`Ki=15@;_xurPIjJsuy+V`{;(2EQu}=nO~o_inG*`igMP< zGx-m)ovl1}<`*WQ4!+~CTWa~-caHwN6vLSjG2x-F=Ih-Eed(wLfiC&C}}*xtg0HJn@}4nnZ%K88PJXwfHjV%l05UbKu!_n{S}wXD{QfEo8nSJnWe~)hjDfHTf4sm! zBuKgi^vFnLJZE~z<9%jc%m2}Q7TbdDe!mJSED#YG1^rj ziimGmRS`A1SG{<=aJq16;sz4&Izc86hmH4+_Cw0+_S3&Zh3a@#F?Ra^Yge+KDpaEB zg)NFW)c!IE(WNgwC=Z-7?zQ1GsZ0iJQ1Ish6EO4Y9!eOT!#>c(WJmPaxSo8e;H@3< zT?8cU$e}30K6%-=VJs&7CaHz1kppz3vR;=5?WT4Pmc65qmcq>aWG1p6uN_T zLv>qRS^_uf-y=(esHnS@!5U%B&GDyMq+@vnTm0 zOZvDuv8gTUVamAH^nt8&Z+bF;fcW_LDyk~d^3oIlQ-3LIY`jh=WdF+5}9ZY~;88_&w444FnTW`ay$k1>g0+o!ej zvwbzCxbcGr&f^)ry<_gA($p?1rRt2y+k*^7z@}Bxe#|Zt8^37_DpUtf6tyz8VNR7H z8g+e;2)r;bFDqSNoO5=unM4PdWSP7=@xsDtG3HwN2NhZ4Q2KBqj6Q;=qG|2EL)+W| zTBv=kz{(@^E ze;L5+5bz(Q*H$%EE}8z)WLn^W+`XP2Y$HqI1#o0RUDwV%iu`1+3vHY~J58yux1C#9a)>!vsX z7Bqzi3gT{MYAQbVnEr3$N~Nrif$P39&m}q=JzD}-v|utOF)^q;6BT)HhVf{3QT4&= z>fI@VG(bbXu4a(1v&RFg&aCOZ*%~87M@31ck2rS|aRl5Is0lhF*`LG0ym(<;L8{H1 z@wEWU0t&Hbje2nsM-)hY^`7pU5Q z1~W#Dl3dBRq+6y!!mtBtn8L@1db++;S0|xR&gAQxIf3JIJCy z4LF}HYhw#SHTJ&-D};@1muFA!{_%v2@45VQ;dS9<$S}xo_wZQTJq)am;?gOt`<-9K zGY-5R-r~Mi_SFL`?o9YQp4r|W#gND)!7DZ`Vv_GCqzZX0q=3k zVfyV9cp37@wpWI%q zYV+t^#dV-w1hi?b&5adXpttM<$mz{arfQEs!qfXcn22vT-_}#55G)pY9e^@P3YfMd zZ9@$9dua*)F>GZAyN=0y%F1%XZ*f8-&KFJnZ=5Cma*yS06JZgQe~@YvW?&%>-1ZwQx-&x85$G}hg-YCFu^|-6=K*-HzO2M(svlNehp`~+|_jSNm zGo+*n-z`tZ^!D#jnM?wASF3q3<*Pf#1(sg>z48X$#?;8i_P`XxZxE1iw5n}5yj%~q zdu#F4;LCo;MhvBw#r%lv+^$Ux37;M7amH@&c%fDQOK|%Axl9H0Dz&F)6P5uM)aRq&L{s9$GrtQMVm5kS zB7!Fgpu}|}SvHtIR_TG~0JeLFfiLgk5+7ISc(o|h-P;R9PtwhWz~*i=qm}Q)Y=ad8 zo8VJ)cQ>rRzrQp{e+K4VfFS+)VoZm+prGJKYvgN3(5HQ2_oIYv}isa>!SA zGSt!oL};{StzhKqeLCF-GIZuM>zkY3m^;(zc5-cgfSBT{;;}_2?8wsYTB)c3PS#jI zwt;BQxKgbIhuum`PuAlb+|vUVU+t@A(qEuKVcFgq7dr_3>8%7e?_x9btTt}7#eQ=o z!DW0)h0b&r7=g(?qIPgjt9w)lAC?Mjoq9b4-cl_PjWskfJYSqO+(zAvCa^medZ2e_xh@3!H^nB_wQpJ)E7}genyu+ zwqRc;nw;f6RXQ(_kX{j3EY3>9i0QYMSRU{14+RqsukJJXecGN8QZ6#agaIQ`=a#EQ zkCkfkLquBjlHucaMw7n30pL&pL_5Xx!^4rgh+@n`RTWhju=GV*k1 z48S7-#Qr^(2oPuWypF&s)2?ygcypC*{t|m5A@HMh=$&mmD!JA?cbQ*se#6m2?f8S ziGz>30cal|7(*mAvl)3tFssBNrimF+$c-}JfjZ%|;7zsT3Ra?>&gflhs^w%B1DH?G8;LpRUjo^%iDtYOsf_F!FiTiFf}t^G&uthT~QXS^WEM z1mo_>*M7`eHNFE#h)q{Vz2MyW&P^ztl%Ws0!xHT_iE-b-0@alJ877#UyW1+$4X2*= zxpv><8mUZ(*u zy@KQahj!R3d`z31$+_nXiAJnm7u zyI8!A=FR2e)1`)c3jDku7}?ljaI{C3$kj7W;Pm=+s{!kSk3=vOiz58@ zK^N?bPxF!l5Gi0c?Kggq0tp-oLosfdHCLwsS?k0^5|0oQQ2Lz!!VV(Fk1FpRzqX2` zWR6EgWr?T6JAHc_dgels#Z&hp`7-wcYC1OZ{rmeG!zDi>v%QK?BaX;$@qO`6ztp)H zFVXVwB;1!FoLg9E|CIk|HTEUYJ6^|l=Qvb3M)mYtTD<4Q&qB?lS(b=$$&%qbz34LQ zhMve=J)0vPCY+EbKFyIopRw4g6s!4R3{7S09NBrtFh5?cBt#pn<`bbDXVTmxMEV;} zFA08$Vk-p1<6#o@_3K#oco`BKBTiH1D}V2jrHIl*P($2za(4U`KZ4!khuJWw&H{`V zUO8HRx0K^=<+@b8+FZ*wF^(1P1MK___0AX3^qL0}(hQV_%D=vawl(CL!jXJbXIWQfXf2wp{|gJS7$3{%x*WBVl3OlY`Eig?t8`Y559G+Zh|XZirzyFp}OuRpWT3Q(>bRkK^}}q zWrL}d%{LiVc~-U*goK&gUta#?L|hL@W@1RF+=32kH#0#P2)*f9{6Wj0Qt8)l%ft5x z3HNV(eH&C#!t$swHFJ}cm~&V47`O#zb3s1&NvH%h$aml4(d>`g+ylaoARt^f;!Jum zQeGtBDv<--9dPI<^*ulrg`#7V3}q=j14)CfXrDCIm|cf9i!Lp!Klb`DCbG;^A^*D} z!&MGs+(G$R4Js~*MWzmTH!0=^YeOEt9P3@E#B%vH{lCr`9vJC_?wHC3PdRQZY~JOu zTm8-kcL=V7Rx_>l2e$U;m*;#Zj^YMzhXXnHjcLPGzcDHulTA*adUnXWYTK??Y&6~z zc&IzuimSyI!%04lSL-AcVZSa3dXeVevnb^fIn&R4$d?Xna`_jJ^ymLU@DD9$k7hdMmJpzsxDwqFmN=fNZ}NW{6^*}*{r@m3s>*bZFyW+<5N8)2V>}fk z9^2a5D-)8J9>%|oXUU=tnmbUi{yi+IH*99|`;_GO0#8(2BDyeYzV85PJd<@UL-}c^ zwF54HtlT~46YU!*MoFa_uenV`RIdz&)1^_&DceLoGvamhY?6AvyS;6PR}XW?6<%kl zz|^q%tK7%uQ*O&-`NNSVL-cPUlC>I+a_SRf@o;y8 z+HC|Sb7+{A_fzKcpdE=f^~JfHR-Kw_;t>fUZ-8;R`RjNY$W5skD@%)+gE&9qa%NxP$!GF z;9R-+=EO<&DL2o&R3ObFPyb21BXP&8?x}BrLuc1*mv-|z`~QmZPnA1v_MtD|b!cI)W z_3~{;Un0V)mafBn!o^uM=^ZU8FI+HSFpzlv&IVWGdz{*?r3*n^+-xI-uuB;(JAV-G zni}pTrue>)O0QM1G&JLM6$V4IT(Kgq0;9nL(b*YAo4QDR0u|jl`{wfJJy|%ScEo3s z!^`%EV;6Lm)3us9wDCPk4X)T&M#u(mXR z;aZ-n_y8etlgd(UDY|kS^D1gZ=fo{4{aHWJY;KEJ>~UFZM!an9$bUplO+C1#01Uc` z?8ee+Iyzyoh?3Hu6Br3{zH7?Q!zuS|G@IrLyDJ%SzRJMp%}k3Ci_!8I6#nk)!%XS6 z$GsR8#1@D_XYoVSO;)6{E%dm3*pT#@(Jm<7IwU?_msMDIaGj`ac~~#cPU2~|T)*YC zu{!q_Gd4_4kI_Xh-Z=5K<^MG||m5%e^-*;jLfaqn;#VF ze|7&?+s~I`H7=F@iX)UXYUNbD$#bfGH&&din>zKeIAzR}1zhf&IITaL==e5uc)>2n zX;#wsq3K?40z0W;e`-ii@MnSm{S(g3mC+wHC5O8h0`v*rl-p+M#Kbg>l{3AU2B;kv zDnV6Aim0o=VfWBQHeriV&#e#p#F4pqv;>`c>F-0PBP(iTrak0={W1@k*Q;MieW`)L zsi22M`QQQQRrmV?jSNa_20-;s$>V6+Ft6j^y!)#aKs#(M-P}5LyIjcp$c%C4I3gwLu;p4aKmX33{CpQAP9y2{nm)y*~1MU)x;yn}QYvH^~=NnytnK~`h( zWAQ}ZiY!SPN<*6cEK$(&R8eQDOq2@|ZhF^P`uaf4Hy~iPdLe}M<-W;n7E>}Kn+wn5 zoNmF9=_?46Ufp*lHk+LvjiyrgNS^Z84a8=3)DZX^sU!$|Ko~e(yT9r>)?JF02m_`kkW@}O_GjjBnbQc(@XzcGOBf^WRx@}@f zs1LpBkf@fq!z|&RymdvtNNwKhnJLsln*J}&JxnkC56 za{fHs81OPZP~HEC14^B-9$dwymrODd%5?kSq)Ycw0N!{*hYg9vxN z(&WPS95Jy=kZbS^3e1Ada#bu_I(BAlE$p+?2EcZGlmm9UPwUOe5raR&N_n&Meil=) zwhv~o?i^pd>A-$z+WTh z^Qn5Cj!@#u6clOdcjKWv0Jg8xtp9E(Io};XZhmu2a#;Ccxc;lw8Kf1rdRQ7YP7MV! zXK(50O>OQMhktgmHr|XBeDExXDIu9~0R;W;C}Ys`2l4^uW_ z+s|nnpXf|{uwbschfh~qXJ=---(2=L=o(C8d$C=;ssEVpZx&6Sk?I4@Z3oyLiEE9Y zK9|*INMt?*qKr=%sUk(=rBm%6zoTE*L zlD_e>zLM*9+8PkvoSSygj?#FZo#TmNS$wl-*nf+d_{~(qvGI1B=3L#`2{HcF&-<3G z%+d7Ef%e*`J62$8bM|t@wA7JZM=J5VcgrXACSv(2Mso*`SY3Mw4?X98YLr>9C8s2B z-TR%&ZEx1Aj^9gSR&D*0n3U`6O}h-C(aw*(r_$15I^98^!E>0(r(I)V*|6`7O1jTs zxi}sc85$H4CR*YsFr$`nW!F{U?yrWMs-c*%{EYJ<1NwTL%d{+)mp}rz8yl z>LX!X}Y4%?}XNKhZ#4ZMl83N%5JuInrIlPw3^ySuI|J=^scCQ z^s9%+O%_Vuc}6OxeOzcC?8n%h_jie@sC#o&9PRDf=voEaqppUqSsyEm*8EYZx&695 zT2dkM7DdFx(Z!WU94mxKpL9M~zNxyk!>GGDx)gZKxOsSHXU}eJRkGRrGQZ8JC;Q85 zEJjml@GjRQPxA4ZrDp0Hn|bQ!>*5`dn}zKuzl*(6)F3Ot-O|Z45@(L^q}y zwieU8s5m+A08qW(&I5XFTl1a9b8SI^K|#&ylb_r5ZjZ59-TpXYF(e>Ss}s(_$b~A5 zKG3bhg6*vIF@`Vk@hgAgp+%#OV&lzyPL{r#n3!#qGxxS+o_haka3lTs*2iXVoS^>t z%|{caON5zhc841Evr1?z`VvplRWd|Rm^Uw6uEMLiU)h!E<8vAA5Q~Mynwu{>jX0x7 z{~DOw;AfSW=;d$-i#p6i{WDeQooZ@oW+vk{W)C>D2kKNwRSN!4a}slmlp5k&p1$Tv zykU1dNO!Yymw6m*P``20HGN+$xs66&n z`=ny)qMYs7V0KSaPY)?r|K7v+=r;#i)$QBQR}@S1F7vzX-{!JDyK(KBWc}nWIg@@B zj_BZS$9vXSm>r>aZ^Aw2sj%7D*oaw8*Aa2dk&^xLB3Tpsde~j}%O8(8N7gMZZ-hkW z3sK+)uVP`^=yaKqVvUC9R&`k-w$*d+zlW~w!}!d_%+AM$7&;+@ttV?|rLH|%!$tZ( zEze5L3~L-0NQ(8bJN62iowrB1A4?Hr;>AxMAJFd7U&O=C-&D&y-rU{qwQ*Hz-1oGx zb$H?zOfWcku)NvNdJ@l*r&K^o)k3|wbS@gR^B@FG6g|V`<<6h9qVZ21B`@PY$e-0Y zpYIaCny$HeP4e2~7gM(zi*?^dG_op${@5)FntRo*^o@Gw3+W%*s^(<<#f(>Hbx;_6 zk~#3fV(;V9NFx{L+H`P<*CFjm%F}xlf61w+*y$d<_hg{y;sAz}{xqQgS=v?*g!`T) zg&$INu(#(Yoh>rd;ZXMC?9@FN$myr*B5v|}gezz3iOw8ZnI7~V+Jf2x8}r1cSAvP0 z`AGxJm@zE=3eC-}W;07mfv{g?$iqiayL>2zI5*XOcj;Z*fTAOekojPfa?ZfsHKjXa zPrw4wiKU_3n}QeoUWuFN*RC1QG`$H74#q2C<+K>Su3i1A&*p4t$$WQXnxeV+k19Ga zg^$TsVj`dd(HwO@KkSS-wGoYwFuq>kI%A#d8s_o-M)1vX$F|ejkcw&VLZi)G51Xwh zTiP;8YN5{WCH_)mO5ad(e&Sa1y{v>ASAs=F^Oh`h8qgrn8z@j`=}qBN|7){tIsL_h zNYI@dnfX=w8hCKNnueTGyX)=0TJjlq`K5B)v(dwi22T}3ZQ9y%Bk8I1(70CsVMMD^ zt=;oveiwH2lhVE(V!u&P;-if3fnnOsF|#x&a``WIQni2aakubraasD-3pOP*w>WoR ziScj&-uudxD>JjL0YvO3*Sq65&h?@7P~&wWXxf)}6`EqZt7G_}(pvtv49Y4#ARMSt zpoV?p4in;S|BNva^ZIpR8yi*_11)twvg79CYi(|(l*GWk=dn}Yo8u7M-SrIgYV=Z< z1D-0Lv$>s(exC?f9{!Zv5{(2tJ1SN734HKU*5WoROj|dD>??u*0s#Cbig5FOID;3k zyx#p%`4J8IlE6yHq9+C~0XM1o+)wkhr)Usf2m8db5c7hwa)!?P$1m`hb!vdtTmFOS zwfR!?Cg&FW<-z+X2)px69~Ybbl45}@-5Jp08v?K3Xrou7^Ht`8?jQhFHBC=bz~Hv% zM3vRM_wUV>vc<=J&D4hKzF6E8SvQ_MRztC9{bd8d&I#T|TxZ3s-bV7EkRG{_{XiHT8elPsnmpH{;W5%P^IC16MH+ko?J-nsC zd=5veBLY>1#y6Tn67!WmgIh;9cJLgjx_LlH2{qYwus+$EB^9k$X`xz1tgE3x1On!M z0FG+y@4xx-M`72Vt*>bskDRhbou>qQ;t!6-t5>hieWvJ`IpdW3ev&C~KJZacBK+># zz(B%0^=iQ!xrCi=V}`%7j2yqEClY`64RpP~@;lb$(mk9sqyO4i2J;khm?(NC5Mpia2qU%K`6HL7!jn|caPOne*yWdMy)AtOCpJeLYn%F;m%C#h z$y5J4_ldh+SM!W4M?~z$Bfxl(3AoU>PaL|9Q*uzHmaUO}yf>FpT@WA=4r6fc6oh`dH4~#MWT2?QA zQY&Z&1{$cOxuEM?q=L2&h;J3@{) znsQIz`w!Dw37@kJ`n4oV;v$Ek=!+*QBBoNxmTyQ6B{|x%~|v{um&cm?)}c+IY=J)q)IQE}83RR4Xi9J}2>elafOI zr(gb%*FZw^dK0(r z^Wi+D!^8QAt-~$j-StWB)L9-0794Vk&`?^)C3}Bbs3L7J7{zoVVu8M?MmCL{Qwkc{75aoDP(j-$9Px zY7U`x*|7+7JT*vm>f9wJPFQcPYc>#uF~!5zmX?+|ow9CB*>A01mHM5xs7zARH zG`vAQSn?cdtwbbphYXK*m%*S5>D4kt75|J(%0K*?~YJ8?8dJW$}k)5nW zAC4wCRXT-E6QFGfJX)oz;>c!v*JPla-~2yTU&IJl+(tnO>@L5vfm-{$=q|+~Esou_ z2|^05(=vNBE#iPQMTy>MlGO^4Dyb5MGtzIGYk`5~9SYuJVw|f$B?2e;YB9|37b{4N zl|LWIe?|vG;z}7y+l0|1G3mp}==%636|xs!7Cz0g<0>h&JGoI&k&aZDDd^PMcUdMB z6fkO4Sq7CEceeEO$Ygxg3$qvCaK0pin9OYAmwV($VaqQ|i^XtwUZUx8*bu;c6Mw;> z&FR)bw7WK>lo1$c=>toE%y*q?TQ~Ui3tfAD)<2B8F z>if_6M*epHsQyNp;2Po#@xj0VLw%^)g82mfkXL(7(Y5b|F9ZrTc*!xv|EMoaofxoA zZ9WL$U^GpTJCZah;ItgQ=Wsnr`s9gzvo!YGu_b&pi^x89n?np-rSL8Mds6G2%f@XJ z(YY)w8!L{@?PK&4Yj=fk+Q%LE&=ARpi?9hN5yc9L5!s(NT@G&_tP4r#=v)dWbn0`P z5u>5sAO3)qus>{&jXw5(6Z1ZJ620P_OX39_;iL{WrTvbRpx>J=y}*SaQ>u*371Dl8 zKb;&L&<&=D(*iBk>rzQDd!k9a^tnU8D$P%mHs7&wS{|Gx68!rtq?*s^{IP!+QRvKy z`or;WcP1OX2wNpRf?qExXn&fF{UD4*e$ICO#fCFpl5;KJFh%U=wAnY~-N<(}4riv} z)wS+yw=xCE-}De(e#ra7=ko#Eeh(YB$7)Qrp)#7?qeJvB$CkGN-|)U*ZHU%Nvk2@M z4=u;nkDQPwc&-VxdtC_hjO@4Qtnx@%K+QC zFE{@T%HDGz-7;F~_L4aw`M9pMqC#GIerC#t`}OH#N2{)*mET|WC2jWGh(oH_UJ-iZ zqji>9iN2x`TKG|s+rPO-s82c5G|RuS(zJdCPj>r!{PjKciCj4gn^7iQr9T=4GCX+h zXE^)oLg9?+)^lA$to82sZ`G1QJ^123_$&Oyy7BVoT3gEJo3oSs^xm~;vkK#@+P zl7EVhesk47!qw^PHg{K72cC@ENw19m!787>lKzFcqsUN00R_~>BWI5Vjzc?y-u!G& z^RP(joLSJs?ZheSVrQmeaF)G5zy11QT*1vVSD&pmN(ln10Y{P|Hnf7^urf=&uyN;l zqMmTG5F_KThIe!La}USmHH3JNpTkOM37H*fLNyNl>ItLtM zSlzp(x37rkw;eHr@=!`D)KsRXq{Kj2-FLANVL!};nYlS%*yyZ=bC=93yNE^`WpX<4 zqewJWpE;`+`O1?|`Kcvgg-L{Dc3yOBMH8~?PFHl3LZaP1=@!wpCKvlq8-?{~RSvY& z=!(j=t4v8rDPxc=`ZgIP2{y90DkP+5(y@w$a==+Ren0rNqwg}K-`d0Y#MSa2oA?1S z3I2FY#pG2qZ;bY=;Vk0KtvYq#MVpJ2+xPK}i<<1k^j8yh zPYFt@?n_{=vMW~{4LggG7^Y?avdn9lMf#6wKF7ED%bjdaX8AcEF6ECMx2X32>6>c# zy%CV^TFHQTr~N5unR`w;(`X`a>@mzX#N{$8$6vs;K_aN9@yUh?5zY?wu|8wAad7Ce zKZA;exU4Lb1=mvWT)VFA?~)N7uSP3%R(P|w=aS%Zv%+p=#^!o6j>^vaEbTfPNF zTU)kwtjt2%qRSt*`j%%75i6ia<2{r?zb>m!SSye&D|6b=zHoZ*P=Ti^yX4_4ria+! zjgYF$`jCi6=Z&8vE-+p6^xbIFd^ED*U~-(tW-(2E!M}t!H#$6XAO0-xYf|PCE6xGY z_mW4v4$fSyYj@5cKan*@n(}7PW@ii{2w5_0=Kq|@MwpGS@zD)i%o9B9v~;y;Ya=SD zgA_>XlNS|3P2sy?#jjy>cjMIXuMhvt(jjN5x~^hz9fMlK*^j3rmV`$tTwD67>~&F0 zJCVHg(ob4#yAD>%-wC=&<+{xu=^Kv3r7HB^)yTQQW-5(Mxfhj2s^NVZni7P@=eFrBF-fj*g4>(uGpyNZ=1=emZMH zUzZkvq?ni=0cr=uJtUc1-4ZH0Xu?Pq#1E8t)68w#%`Kprz+Pg!y41XMX875DV%dIK zNaYU8Iwler{_V8;+4vY%d8XXa%)3cPk*M}AdTWaEpu8DYfukOWY>Do!x7l89z6?lZ zM@yDO_g1ioa;spra_e9)2BlPr`4d%NdKt!ZCc~~x{8?L#({Xke-NsPYpl&_Ya*d=y zdLafXyAxi{qG1Ce2qYlec`{}Pl`;i%hfX@5I?TMT48Hpa2=8c$osEUuKRVH@ezT1@*T53$~fmu-* z1%5wD0+Jhy9ILCLkNDc#7XK8SmGAX&uqa=tay z<2AXu);_(Q#@?O%7)6FK+dqznQl7AN&N7%>m|9Nc9XAwS(90EfOG=1oeX{Z#G=F@! z7XSSFiaimhqgY(Y=j6fz{=0=7Xa9u-*yT}U)(cOpoDw^TaQ$tz#ADi*<n;N!RPYMWq*95pnoq``hk&Oh5jGQJxb;i}>vv7W@IDgH-e%^L@t<{-T44VDZr^59%tb>A-I(u37snU#Vx=v=WnV z$-jMlLKY-t%c%G3&PSou^7pU!AEt}2jFLq^a6J9aR;X)g`Drpw)A+H+ z;u?QZwZ#_Z>aS0i`N}Sn*ExL}Utwl3If~?e+Px4{oD)mJ%$&d_N=HS-iJ8sx>+=Os zNW4{3&Bo&F#tn5eqRs?#GJ$~bNq?%xkI4uaVnj3jzszywON9+HK67ojJ7oIMzuCZB z!9k(yBQ>?i3rG19ON>~;F%R?2?ZWGllGnt<>e~{hodSC*wH^Xh5xN|w6fU@{!i3?{ z-QR50>hgY#n({778vsTW06W@|#Nn~^C#iC$UN7WN5%`I>-%{!pdxEmF7Yng zTwdzEj&VJKpn{}jrS;p}_rCPHL;LT$!!wQlZhZK?a{u8tr_jj9l@R6ZNu^9Rx#3-p zNBt`tZq+==x;o#}wVR)sBXI{pUCP~y>A$|s8IM0}9e5}AEhwT2 zX^bb(+*GQvq!*Kr2%-4S&);n@Qci42!S5#>((f<#M)GJdvdhGogd`l2xz?^O+}CDa z#*U%HLd@;^x&iIP7jw@@EIfpc%w$t)iIABYgMfvQ-1Ju|B+gq@nADA_er;m&BTAG< zxC8_~%mM+ISv~Ki^4ivDK2nB73ndyS+__WSJe&|x_VJ{zc}bqE!K89etNN8s$fEUc-qhaf z{rTA=Of97Z*KVX~KY`=(b)h3=Oj|KOT~0oK_-PeWe)?nL;#su}U!cjOi@?0gZ7e6En9YrBcFNZQimy_4 z;t~^MI~_#j>2zJzE^r4xJN< zD9AHgdg7N!188Yzs3^00>V13~-aicr3+Y^7}3!()Qr*p>N-CP*8rA@>{qBD)7{C(s4{s+WFE__(Ffd+#}uo^|#)VfqKm|%b#gN zX8eEpiPPqt`3qEWK8x;e$Y(VWol~SHfi{T4=LqC3}Sdx}4 zCOQAnj9=f*4dVwNsN{{7#$-S9S@cyDFievg>Ito86ZG`+D|t919j3hQ^3q!-&eoBQ zmb7T#{j()(V=Y@ldOu~fBDJI!6{_8z-QHBQdHT4wJZ?GBt{&$s9iq{o3>q^A7_SA4V+XUcLMdL8hn6vVE0yiLtRK`Ja4JQrIWUOE(wk``=xHg4mnr zX#C{liBa_9+6 z3ud)U&&hdAwqs5nYUn#-FZy%1sY48K5SyDNAzc~JVj@9Sj$?S3@x;2+f~p0eiI{p+ z7_9kE)b7XhVNx6h1#X8#4nyi8X}>E&Lwlc!o^9YzfF3s}sAFT!$^ma!O3HNF=j|ml zpbejHif=`KQlv&Lm2xFa3%gPR8{Sr>7X16rlTe=MIXiK&-siVB#~Y*P-}|kl3VOVy zxZt|4pBn#G#JP}-ZD46|l4bHNJUslxm*knH8E+`wBf?^0+@9}G36fJ%W>!~|qi#zz ztj*0`A|xb)rd;#Li0UcNlEnw-)2l;^#Q1Xb`dyHZWW~sv$=+MGhpLvT&<%`hLmNs; zN{IK$ljfz-`V{B1W`kSN_%Ez)(8>KQLC>_^L!aAT#WQc&xkE@AP$f52{ih`BYh>4R z#jgY%ZOI}}N9jM@dc~;hDgU}L^47EVyzkY|C~S4-U4w^B(nN6lM9{KDFkmOcfBXhd z7x}oXNe>8$TKX#ph0|3;63SC`m}%&-@-Ik2F=&UfVpx5Hh6c2!e-vr6*~|z(wiy2U z@kGFNH&2s*61Rcmlu=3zHv1r|Kwx4u9+S^LKW8*hC_bsPCd%E6b;B)u)21iK{oCK) zD(Q;C`6?p$D#6tRD6EM?V7Mm+;FV$?Y-x2%#ci3;pREnEylzq5+-18?C)#ogYY5+J z=q~~r&ZqfaYTJ#K=l12P1N}KCTxaEC%F1|AQG;B#1p6R>6{h>yQR`FxrxVT3j#kCh z{SR`ovS^@7)|+94Xt+l`i_EdB6i07B>vHW;VDj^glFw~jBdy-PBig;ODSmjPQ2R(S zb)NtaFDiG9z5zJTs2@D|`uAiH)ftNYqE3dpz=F!8la-~p{I)fzGER-c@Ed={VVG5? zL^3HBJW4&fM@x&&#>Q65TUB%`m0Bjv_+{LyBpmUlJTaxSZ(&7Ic|ci^h89y) zDXQ1=PF((oMMe6xmbFuJ?gZir!8>-wFFr$wz2(zyfj*BGtD%$C)z!A{TeDTmPU)^R z6(zrllN^&i1--Rz;AOAr-!(&)8AJ!wVy;(5M@8MDpooCAt(abeI<)@SM#+aq9ymid zC0*vVHherhJgp~k@i$hoCj~3g`4d!evUTGo9g#s*j0DgAr7yidE=OiwyneCKW3F4P zgX89QVy_Pc%R3JjBk`fSHm@&R{;^;1=vJD?cKWd4EhStwuk%y)38!(Ipye)|@#jbz ze)1=&YeUiLz3axtJlzT{U7QLrL4kR!bDv3Zah1@atDxZm2M}tg&!*_+XsP~lMIopR zTFgZScE{o2;Zt(-u}J9J++*r3Cw2-9416GfN_GOb4VvFxg&g<^;6Tu^i0(OXn_NcK zlA*Mm4pi650U}~zCOhwk?@>{8`<$H|gjD;yeM=MSYw5SiJ8NHn`i0JQvzTn29W|pa zYtp_rhvi|C^BbO#52$*?B_uMlr-flYBBTdeDWy)Gc!w$nF|6!V3|LTeZJU556jYI) zG`Q~3gu=Z9<}E=D>Uq9$`!*_Cf|o6Fw4Du_%`C<=;oa==@;HDa!Pxj1JEL0(WN51p z;Y0B9z`Lkng7BdhjS`fTVc*7BY8umr0Y{MUX}RB{)GeHm%Yz3)1*#AlTL?UHFr-AwGV%6yw$RH=qObhvOSn; zCF94j3a7Y+t7Hmte+U9?tT}cH3JL^zhov-ZxNs40K!_2(Z10MAr?9V>G)$4az4_>BH1R7W1a?ukMEU9f#GVi5kilcNA&94@l`%@^Sh9kR=NKT z)!!QX-gQW~sXtsa@3XxiZG53mhx)E$9@6&}oy}S3xC?_o)CGVR48(ac#i?xpEY?*7 zpo0~L58ze?7@M-t7c|CkE{HL(u{%e%gr4N97_@G@F7p^-8Ljt3i#t@fgq%M1iq+&0 zhkP7CyvTX6!4FrJT%)|8JUk``(BN!}>0bK+>g^^9IkxmvP=%9e_&F3T&TO8?9y$wi z0ZwN&Y2%M6@k{x;ur6w+XJ=>td?Y(N-ouaong8q>aEK$oZeJyZ2d4?j#4R!-F~%>V zafpxR`zI9(IEXmvg%TUi?jTJDtYFGp+S@Utq(~~zQ*?PNJ9;l3wuTTYalhVY*Qy^9 zuHWbnJ3$a#zQ${2W^}ek+p5}V$fKN4F(^(Uz(8YB^SKedN#jkr;)8N$GXG>j6RHd> zj7Q1P3!t9#faXgJbPqN+XGNib-3m#pY5*kncy)x%q{bvzm45{*rAP|aBl1{LdyI)L z-@9V51VIuT&+dG?Aos=3Wi~>As<5#GqNAxHObiGn=m}W?1C^m3akJE7&CznSUP0D^ zhK9yyC0rD&@!@H--rVul76uB41r5|Z&DI;R#D2zf%ri~R&jLeayaObOPxn`b3n>Cn z&4BI(;9R0&W5Y$hH+o&zA4E_H-GyndOGs}r4?dv$JhQiOao;_DOkzCO{DDhD00SE1 zP}nc5Q^;s}D}~)%mP~UwH9#Cej&Vo=e#dA^Kn2KXc=McSxRFsh*7e~eC&kUBU=FX< zwpr@M&C{bBF%MV5h0cG40RYGT?{~~tMz_Ef|HeSjy%ScBBH1H3xMjSZzaI4D?DGf7 zYR<857A>e<=f_$R1W=aa{WXx^JCdwd0Jw+y(KdAXn}%`~Y?r=MqD)_KATZ$>0UE)4 zIPcxafd|k)Z&h&MCP071PWCH~SuFmoL~C9io~DfkwWZ-%-B0FINH||6+B+eti_P(xYH;&X%?IiCElc z&of@+gJG+GLh(?x3?Y+d`4wo-fcz>Z^cW95wop?6BSx_*uL_bRXk- zS7e;2*v;l5&3;ca-X*R^T3WSTK`6Yvr56AD@g%L02r%#8MWxq6j~^uC~umkJdPkpPi2+)!*;x zvLuogi`zOlAWXu(Ny^DIvG>#&fvZUY zO`#l=C4^Q7UNn(KW!W*RY6^om=*S1fuWS|?Uhe#*q{xSZ01AVZa39pp?$g@yi8s3{ zCec3Ac#(>RFsLyF09q?ycB{j4j}7Op;ALQTbJBkN*1E`oxR}dxHJAJKIybBHL;)ML%|4$uy}7&v$OuX}L41y@!jPBj@QH!D&Y%j2C5g#D zJQRbn05z|c-;y~29)2`}kc6q}>~Cjd21!}il7bT{)Zf7uBpxPbW|psUUE^$NrJjXq zy*}2m-x*(gy8ZIcNBpTyBi&&5)S*CXxEqL;(4ksp(r1{d^4#`51C6aNc|#sDB7hYTE!OfhP>_eq+lqf&KZvNbH63 z{8)t!(B*W0;1C z`iL`tw*LKC%e3_V&^@Y`sNQn9iEC5``-_a-mey7@ggqhjIXFZB;v(K);ibti1`1F} z4g`iR3GRoZp2YK*DuW4DS-$>Zay_mRLpXl!ug(2^bmZ4(Cxtn9^00Uo{Ywb&B{SL5 zh^dTj;u8^&tZ=Q0H*`$e9$qhT)l5=ka0Pq>+%|*rTOS`Zq&{ous)t>=1wKOciv$*mKRAo)d5$Nkd|1Et zL;E@g#_T43#^a2PjL67H1Oej4?6!f(hrpaD5zC+o^5btkPWDl#W90y`7?zfnmbMTo z@ZV9~Boj_7o17hwi5MJ+#MGh-e_TS0jEw9WZqjn(eOF>QDzB=#DR_tgd@cYsahlh8 zkmuR46gi&Ko0*LbmZ|g2)|;?DEC=$GGFeS~iNIMK%(nOuJu(d)ro`1uQ4FGq?kY8C zz7&*Mrb`lrju?C={UA5^lnndb-a>W|YyWxP_sFhL0us!tlB$~8Nt{+QfByzUoCLlG za0(f>0ep2Cfsw4H$w}Qg^RGYbp~G2W+6Phol{*i$d>z-un>#xt|CR<8q6k*GUAAeE z53eTcRs~|N0amXaEEwDxb-pNNH(?{t$QM^XSN36Y+#2M-BIYF?q76g&Wuc4XYp4QM zhtQwdw5FB!=kv-yS+?8DU6p@fS53_KBo+`tPB=ql?kQty>kK`sPSG_^V+y)N<@}Zjp zk`%K5je@smLU3_*Kw;YZ9u2W^bnF0wLlgeBYUDrOSUKQ{%Dt^9PR-TbF<~&0MkmFs z?^$ipQiUR#YIoF2_ajo4_?EPF8XRvhsFFEu@R?RkLho7hq*&i5{bJ+#7td=5%yQsD zBUd6k!B1S~M^aJAhy~l9iHmgO>hwdag*!U+w?n}Jd|hCN0C`@~`-q87B->v({Z{-{ z{YD?0Nq^`vd(E7fR-QWZ28RL;DFh=EHIo5ve%F@~^S*RU6ENV=g!@~n!h>%fEv8*& z-8X+RoQEqSDtbRW_<=P_bOpfk=umt?nhfd0VoD&b3wcr5!Id{;c8SfzM&Yz}pAKn? zA#Tigxp+|eV$vNE1{Y^K5k~X~Um=xWl4GMA_$Aq1U0G}2>v7vOjHp7qFH-rrw7u5q zbVth=k^3@rhCe=)Z1&d{)$I2(%O-E$2QyE-a~2|noz0eCX4Of#yYz@eA}}AxKz9Bm|_vJB1mL%aws6fGr<~| z_L&D^mcujN<**pOg>NUFjW?7d*9uUp`{AL1n0)*2)|U5Nk)|e3-47}Z?er>1kOKVr z75t=#)6qm;{zuRR8J{g0@;&?6%NY7+-IiYO-VGoK7ngd)Ldtk`OR7g_+4G4fpc04U z@bct?-Xw|na1-GWUBvBN8-gYKj`=>_8WQNd(Z3<#C=6TIJuBR<=`x341z>b{!6KHr z9e}-c!3Wl8AZA0`1! zXjEV@Ce);iM8thFGcThOGUlJlp{2A|u5_E!8QAKSTMaYXok^h7OqGWD#nK$97Lq(Jovm=m*a=yUyHZUY;c z@-A-QP5wZj;S8o>3&x-T)pQLIvyzbWZ0_`1wn_UbsHoija<;Dxco31(@9QfoA>bFJ z$cPCf)+VYYPxjZ!z*eIO$Dy_f61w15o;-bez3SB@Lowq&-0;lI%ss<4mD_+S1z0=` zZEwBSlVDIQ3`8l@1Tx?@D9c(zwnJBwg*2T?G?Rn&{_0wo9E~3)c6#+yh}VcZQ*Vsi z&Vd(l4SD)hR@y1;vzwGVdmbMuOoSl2dczl$%b_ex=!xguk1pyWkmI8v{xPf4J$JCn zhP~L&F-{3D3#EAHKO)RD#b&V;=;nh^C7IU&6OfxvgoTl)sHoD{i}Gc`nVJ=*OH&Pe zA64?H9P-3!?O@IUHu|;2{^i%2MSa#hjQGf9%ZXAD1C8v0hVY&SUhpq>-Pgg1A}={ui8ji)L7Oc zPo=}x59aiw-zke7}u&&M*&sM!Fx(!#6z(8ku*bnyaFLtz4K5FO{ zc0~3ppaze+WBE(177byXnOabAz>*YM8kxTgE-~|Yi2vc~kenP8%; zM0+dY14J|b@O-5PXi(aL+J8h-lQ7IAwE%>9e}5kk6>vNFIyySH$jPz)p+0Hx1iU;ixTdINhI+cw#+0dgLw_=l&wmG2LX5}lv`)sLUd*KH ztwMteI^oC*gSP1msCTeG@BSbi%Ygd(;dESFh&tn5J33xM%$5Da+VYIhvT!7E->2TT zUpW<&PdUE^nR{5##LW!x9rV%ivwNsHD*$?=E4R;nn+%vc2GCarfJG1_mLX0uy2Alw zaOUY)=Dc33-}|3?up*bArN6y`ba?I17f;uOyk0Ds*_nM->T-68oBRFE*Xi}k-_tMi z+An!SIAA!KB~@cP@6XNX3qw#UDlucnv$M0xzEUyy(7rTr)epkEzxhMk8Wku#0Ni!X z3T^`fa2dx#%uAJV$xjNqTw}|q<6}q`4Ttxz$y*&$%U|?Zd@%4bUet3kT6yq5KDEx* znzol9KAv$goT9_)4X%El*k+?s`vO4GP$-xh54tNJ=%L~eph2POA+7R%Z>g4YEw|zl zS|{@|adPVE53J$8Un>1{V7q$8a+=|^Adss4|-uVp)dp`re8 zz)?NQ;*=>-h?fRc{fBqAL^k(6J6D*hRxG@F)n4k@9;?E%wTByCEx)@kw z+=_n{QTG~{cPpA$wx6c!4`@S(WGkn?n6DfCkGKv0IU;g*$q_+R-S4+`5ATdHl#`9YWGY6WTvhwO6n*w0C-KYk)Aaw6ZXGWT$6qU|?xyY-PWO zT_*%@qQ<;Q)Yjm&y@{12g`$at0YcH(fr5>TLi)8W1seXYQZgM+gK3 z;>A-DMd!rj-%j>-W{2uG)>aJ_Ud&&9Cv!>ZS1A26k;h-~?>1fkdg=WuEfG3h{F~2i zeBl=1doq9j9lg@yO8bxR!V#BEMkj>1*hi!%^hLvtc#UMa*t^-;`Ptjzs`luJ5;_e< zFWrB4;|cu78hfQi2=VWS8)u}q^soHyH^i6OeE)mTjVI;7=i@N%#{4)Y_EzeD@3*8B z$@uq{OJ!jc3je$6=<|m0TZ4_!B_6m_mtaAyV;UVcD>e!DTZ)7WG zJ@L7CZT?pYgWk`e*Jr0Edxt$&f|_+d86cwx$%0UIPo6v(E;Nu#-n0ADbotMpKRh0X z_IXQiCqCIqIr1f@N+0e$x>Dbnz`f;;yQ*I_LcpSfKzFC0QL@4Yo$;L2o9zsHGph88 zUmx!*4rHm9&^I(RAjT>k7*rfWN%_TclykqT-NSPIRsL&y8e%<&X}eL1$4|I_T9rJKH*ek~@;Q7+@jMKGSMR0u#v1&Fs6(ZSU^}jk zwfS8m9&^=nx&QY}oNh&$kFa+OG}d|?VK#`l?wHN@%H2ObpG3EpMfdEfOr$yu7^XJ-hD|AFa5eeRGB$a5HJw zcKm4$+}PUEYYV3`AFqzf)hH{pL(9-Be`_t!Yq@mwD$a8y9p?BR%5~aPSC6yKlhDE&EJ8PM# zjpkfMUT2;H0s_XppM5pTJkVXF$!@E>wFh$v0s~Gv0)j`&Mdrib->i(5p>kC6&5^Fo z&O9fK&oA6e{cUl8DJ`C@-oe~wD%?<%JpWTnH>&#y1j*hnVAD8=^|bMog0 za_})x#%bD5@$lih+cYvWM@9U7OFN6`_JBT`0*9r+cYc2IIm(Z;TxO$iQ(j$GP5bsvJylkVWc&F}o^J;$e=9OIalFx>n@$cTfV{uy5%(V47J)pt-n<<-OczS$L zY&H}^uaqrU=jq{xOBk-_^?Wwo#nshNBM;J(sn_W4$|&lSC{75uaI(qBTHRUlhr0~f zgoP^F9s^l*mT3z;d7SID|kgu7>vF-eC{PEL>3AmvG3)1023 z=Cz-{;g3h|_^So3FAH~burs7RQe=ep;>8PCREHruF}J1tl8YSKhp2ic28~j4ct+Pd z61YR{&|On2E7J1vx5C20w�&UR60}8CY-;>#Sk#k6P8eEH-|*IGBrsAXY0jz6rmb zJ=|GRTa|Q4?WSwhta0T`;&(C}$X1GE)=qZaS+tp~$AZwfOD&t6nA4pS5s1%xj#s#Y z0iI{`bf|Dw*@UO0zW&3ZeC-w#>UPmc2c)*7c4`_LYzX2mniTgRQ4A_yRSO=#Mt@T; z`Jh$fYLM_k64&LVGx(iKm)Z1m zb)A=T%bv@~WWe?vLw+OSwS6b(wkrqcSc*DA!BPG)8!?yp^QTYGn+@d|9Bj^h)2>VI zOP93USyZY$J3V6Di%Br_c=__BJCwxv9oKt|s`Z5iNL7dk%%Mf-wS-{mVg(=0$ji4e zP+ewKcjtT4g2)7k0-JxNiuk+_3Yyv9m}bU^$&OGp1`(17pSR~bK0fX-=CE8K&(yzVhf>@H zq=|rX?prqQDp`{Ur*|$wR%mKKMZ2?DD~9RS{7`<1Yc(LhbT~_h5gE?#7|-I1K3I=C z6%C)6yj@&cTG|xSj6$KGKYxy?%KHZgtd7g7JF&NJ-yS&X+SoEKQQtYvRW0~oqQPx7 zuK&Bz!P(h)eYvPR8(C&C`ZM4*jqzAHD>lw`MW5!G8U43!-?A+?4s@puEt#M)u0cuY z+#D#k(W6(%OZ=1<6N3*+QSy?NlF}b5w{hPYP+2@HSf*`B8i=HEl5E&py^DeIrs zYUbvQI#zRoRZdnIL<^|VWcd3h1ymVi0M4!bVZBgZ3(5SU+qZ80fU3>wamay*1324} zjB4SKZa;qfSPz(kR{qN+C}_G+7#==+__cHBiKys+1zo3b6df5PR!Kl3CVgKnr+Dnf zNuu7)_}v=Tk7)~f{~ocNmP4JFcfMa0 zb~K1b>cu*HtG@yD`zqxq-@JR*H$R_w`e$&OW`!-UB@K;)Ty(f3OK zG(tkjPy-7+Ph8<>`Hh%o!2X(MXm3EKWPknRJm%cNWuF1~KpfHLh^2)k3%c=nuznts zg$2*S={KLKO@gy}dU|BVL39mnJWV{5%;gl9DEKGc=;9S&Vc`dql&nO-ux?){#cY;i zJWw`#8+0a@^6R=vYd5)bIJ#?9{!*$d!OLI7_W2WU3rj*pg~RFK1NO z)Lh2J?f9;>FuJf$p;F~&@!-J&9=Bb_mJrf3$CY1DtjH2Ne`YCU_ERXhATAymf8=*s z)rT^MS)<3%ZWi`PB)!rHJfVGxmN1c5pU%gP7_dbdW`?M}dgd5c(D7CZR_pHW4mq`p zEw)4YV-|;=$LRrjAfuZ~GJ0mEY+?>hK)UnBA6`pJ?#Qi(1f1>ER0>Q&hdqD){=H6j zvf%P)St#t`6dEm_BIH59Wv=RnU@j7gi9A{W45PNn%#Eh2Sf3n z<%0)rQJJzBo~K@BnY(Mm?|IAvnXIX$r6sH4TVXJy=H7g5;cv0?qob;|lBntW4;u;pkK@mb#*cnyYFMPgk91#T> zIwD;%rqKPs7DJ(+{ssgGrvoaN$;GL6`Ix1gtKOTXKm)ZXwF^IfqFbj#osiQslJBFU zVX^w7{F+_$yo`1XH{FM1#uoG~fpB(QDgI=fp^xT2jp59tU33c0!TQdQ!T$P`REkiN zw>M(0D+vkEWUb#nOFmuV{Q2`b1*-k0Q6J{#jZ0F}^-5(;l-;)HUv?$(ibAR!@E-+) zkpR_cqm?rBD_-xtKwn5oeAie$10Q{vb`glWe39L1VwFPMV|v22Z2>{S0fZYMCYdFG zIq76bSU~@5dqpQ>>oMRr#QwzDX^a6P?Y#1cbk67PSH%2|jMFWlnLrE{cW>Rg70Y2F zAEMwr47Y8J-u>g|Yn(IW`J{=!(znC-74hL(b(!ne` zzqR#w=9?NWtMLvfS6RDTj(12&uMKN(>$jM5<(yQbyy6_n{vT*-7`LGhhO}2`KEiem z8+!zpEMy5KKuJB1H^dPbe#RKu#%dQ8 zm6_~%#T)8m_6be{Sr3$ogtgkE5CAv^YQkl}r~{xX0^H{9dU*&*i`C}OQ-GsinE2iI zSs+R4!Q!FfekGIsw>vaNvNb6AX=WqspO`^u-mJ0&Frb(Y~KqE|&6{NX>@Z}k&sCR>1r zQ8jK(o`(xGnGP77HDZ)93u~!9*&TIXZ@}^PV2ND)U1bMSN_BNLtwIJaK;bW6zD&-} zHov`a)fjLAs&w4z%mb2+7!>>dg*z9olYFgB5p*m4mDyoh(g%E0CRrdrIVT;U5-|3! zSUPcR2C-<@wMF>0Fw7hy?*lK9CgHHs81#2aqwh8FUpC($&t`0 zu+_m%BH(-jWD^Xak&+7Hi?;%?Mzzx3B;&;B)ei7~xWYXxuM>xzDIs=Bu^^q*u?p{+ zI15`_w#%0p`5* zDK_=PcYx6m!6mp}k#Fy*~jegfKnWV!$1@gsfDEQuj z5qV>x3mzNI_)-a6*J`w6;moqcNkLk<7})%_tc=@kuH)sA8G2T|yj{0oeYI*WyG1Kc zlNX95rjY3t$Q2n%M>DE@4zW)noK1Ge=!Srox9mw)A9(yHE^nl$)VjM|!DmYRWOZ*C zJCfIKZXHk;U#BSkjT`kqWeFjPLeZUufWPxd{XbE`d0mI|XnGz3(E#$73|>5Q_4>Kd zGOJ7gM~jcCb-ZX_m0RBzK3>P3nwo-uk^`y%D%W#3-3t)8AqukPAY`iFp_6|oYQ6jw zVCg$wUsl_h=K!%bHa9VR1tTegK*(Y>E&wd&b9BB!=5>tj2)G!-s@B)7Eyt^0%(h3l zO!?vi)W}EeT20h;0u*Fl=4NGm+4;`Z&5hqcLr_2bg~aqgM(+tGYD1V ztB9AO-6K$CB7ul(p6pH7txt-`B=U$rv78^w)$p6P9e^7c5XuZ8Il^~!b>;Kuf|7s% zO_ha)%jQF! zwU%Mv7vnBy(#(Zto3@WM(@_iikWx%0Y6W4n(~#aFSAdc(0qE0ZI+(kVG?tE!PrdWK z@ZOl6i_mqaFsXkHhAiNrkTs`a4>4K;ZRV5mMX0_Q4IcDGe*mMI z84G*i|7&^h&}!ETyng)}Y7b#r@7FGyc=GmbR+qT9Pv3uoM0+!$?J4LSswJj(;c4t0 zenglsG5pv3c>3lD&^eHO98R-A3_II9w0=h`7vA=_AM)9o?u3sjf{yIs|GFYC>oJ85 zG=`)(iMxO5B;t|(uNl+!|8HZ4Z0`{9-=dp*T1-oHi$)U1N6oo1_q-3;PmZ=>?}>sh z28&^Pi=~O=3}hoov=@fXR7T9X81G$G&beDO5;S1itXsfSLlT1}gQm>fi+iMy=Tj)f z+Y&PL!zNDItXqw4jh%skOz7XeBFH`W?}NCM0RgqYzYmmjV|%*^bT3e=ppf*(goffl z4u$GExv&rbRn4_#JS${VD}QKuYU+(xT7zi`Uk!=Rd8+r;$61WBXDEF(gGw2-yz0Ju zr!iT_h7Z#lychXG@{gV!dbh9=3@q4>T+9fWy^A4K1Rg{S2pNuYJPH-CY3WJ0obZKC3MRmd>MX$+VD^gJl z^4hz&aWXlg36)Ygy{Q8WJ*J$-99?la5j3)hT`2_s6YTo-D5EcDfpM*gv}lP;6tBU3 z(xR}CB{nxYwfgN5+m>(n(-Rtp86$BuBnkFLN@8Inn>ZV5>b&T7!L_NWPqn&wdT>)t z=>Zl0PJp*oK29p&Hb#v@oIGmI&U@@)K3ISmdX1Yt>ed?DQTkoP1CUAhHI27%=ft<{)a(X&_4+@mgd% ze^Wj8dXjPi(N_Zn@ypBd$X{MV8Al7%TKRjA^UJvT2i@jT<4bm)YayrJJ4A|5Pe1K# z>l@6N#w@Bc-h(1b?Q=7&H`4WYaICx+kwQ#PU6tyfme5uqbp)YVW$F+eqEz*FbcM>f zCnlFCr7}??3EgfkpNHK3%trkR8b@B0ts$#1`e~4?r3E5hiCS%FJ|1+Q_74v-PA$(C z5-P{KyYaNiwcR~D)^|4=8Y!37^X!gf5>L?_#vA~0K25%r{?{*k?j|>A%n>WC!&Omv z0dJcRZ@boWa%s2m_TKwjJPY=DIN$h^y2uFj_sfsIkNwo`qQB&D{QJJ7nM_?*g{Ggw zTt{i#i@_mXi13l3HuXp9!CSI!j%pEyv*M;d)pA4~KsK30@vkW>qW&=pwJ_OWMn!YQ zyVKC&aC55eIr@_*+Q!M}zKzw_z0FqyQEET$+{4H{6f0O#IRg?wbck|?rRc*`OQZC6 z*4#N%J6?%m8mZC-BLAAf^}Mb2d(%VN14xs>&(~j7mfh#$Ywy%xc1~--O|vV5CS#>- zKHh9zr33$?*8aIq70Zn?GRLGBtJnD+k-0=J+|I1@IO;B5Yy0TwLmuA{dF3r;=gE}|?Yu7|Rat|;3iSO}>)oI$vt)lSQ*n2(CKgQMYo#g}qZbalJyZ=}j-D8-+ z5wI^y)yNXb$e3I+Alw(nG{E25W1p2Di0p24|Lj{D!Ov1HV0(52-)8OS+PQS$KaGRT zy*kfdPMDS8jT@aUqL|CASI;TnOEIr|C}qpX%}#k)9}QY$KhfuS_&+beQ1#_a$v+-eNzD~Zi>uaoa{7moJfvULww$Q-ZPVA?_`w2Q zrBBgEd+i35JI2tnQ%{wzu)TYa85e0I5!iW?She)nDTM4O0hImELrYm|(>H~PcOKop z|H-uCW}4rv3eH4>z@eM)$A!A?slN29fWd<9`v`w=D>GvI?WyTbQg^}?*qS?)SZ94N zu=z>U$L#9Ec4GVG9;HWnsBXPDK8w$XN2z6pd={W;_}@ix$>*87IEe*Bnb75fGNGn>vF{2aE(!BDea<67f{ zLXFxudp1;y+F+=V_oLWKLD2+!sQeLdTYP|motHO`l)voUcTZ7*36C@0GOLNt`fZPI z-CEUQAs8<6;{IKkap4g(AggzOZjtaE<5NjQeldxdBN4WVG`!y0( zi$3EDw3)8^Va$=8qLIJXr29f&VZV~`ZAUTlC9+~hOIvw1#D>jA_0A%NJs;^TgA&EA z819kNn%<mA~8-M{4KxZ1_mI#zi9?S8qlm?zKMnyTHe$oyy8(?w^*&wif` zq`RkIS=_u5#b8S#lnjp>OOCg=5Vd>G=Pg^uL2P1TM{3N)^}U8VI<3O0FziC{xr%xQDWkblLjUdq8*T$Vh4-reyfk&-qUkDy2+H zy50yL5$^7u-6Xz!)jKnj2J6|C+vND&yYDSPf;BvH&&j9G}M z$C!nzixYJzksjT#ym14s8E9{7M!VQW->h&U&rz{7-0Mn%o=Ab?34P|~<$%kG96OahC>^`?k zp}3*B7PGH+Kuxb)EIfR+-z3^KB!DEbDh20jWg={WW%bLDt_dI%Fm@QTi>VJ_+ufe6l=2U!}KLNQF*`eXa~ ziS6Y4p6~MONP~gFd)2-9rbgOU`3s9w30nO;V+$y zHT7LXs#V!xOUH9J>$9<%ZQ^F~Z5egw4VAu9yq?>!Da}A2yhEe>?yMmP`w;LK!Pdq+ zZdz68(*DB<84{#ZL~Z6;y(eOS-=KQ;ios+u1W!+?9)I*V?VowmZpK@R8i4R~V)7m7 zg%=ZgJ5)J{5oZW5`1R^`Jcp63A1}Gr=`PKw&_1_CvMI9tz0xs{#W`f7ihPxp#c7Rm zTuSXpZ|P4+S`NlSa(MeHBr@kTY;8OC+6XUbyrEN+wv zY}u1@&d>K%(9p<|W*P(zj#y6U6m!>X?{>wWV7pm!*$v(S4f(cbqS(%&iS%ut;4B2V z1SIj}(6-5m3&?a6YKiTg$X-e|9}1J6T7nKzB1*{dq#~&gHEqc^y_f3vc5@8?W-55?lVyBPCUT*&aFbWet6?pURXT+ zL)5|K$&{K*U|3{iYOR-CCJUX$Clc~d!o1T}aAnnroQxFNaeG-#%tl_&Wb?X-V6Ho* z2Xx{iXu?5sv6}y5RG`ZGTDlkS@zCo8Lz^jtR+g% zGlNIXm5+z+I|w9PY;I{D4gJl1WH_f|MO}ToXRyD2$o6OHrOBT7lLsQ{k~T9vH><0Y zLS^Id28=m2#~8Y|dRsp&R(|Zc0fpoeQ_P^|kzt3lW#+}CHTPpO17wWSkjG~8rBXlf zTdMRdEqXPhk#=jJhVb`m|8+`4yTKxaLlpNkPF8Thw`57BKzCO6L8jx27P$>j*g*2(1z0=gj>Y#DRQql#ZtwOHEP=S z#aq?UqovOfP@2kcrPZv)3%QT3wtf-Td-8)6;a+X$+3JK#fl|0=#Y%)a2K{vi zkGX#B6731@h5mvn1+R{}>#L0*(G|b2gX(6au&>S~Zm!ePm8;5lkBHMe+c4&CD3=9B zDglLxT)i|~wllsYYwNk-;*eFVU5%DJcG&itk+oG8cdNiSwymTX36zmZEvd&^0LU%$?V|P=Zo(*}O{!RI`H=mO{_VBOJ{@^lXIrZ(|vniXVBfQg^t9q1}3Mxb7 zCC0c2()1AV;SNK5o1od5E#=(LVBDR^I#R2Y4h<}xO3wk$ z?zP_){!F1xiZQ~!HeV*Y%^pZ$KTf}F6-YydTsH{I&$sepz7Gmr#GI7jTArZ!?>ms? z-P9_KFEHTeKF%5F+YD$S0OwX*a*JDA21iWIRmyFN|7@!7{k8lk*aLeFBqM3tB$;=o z)A5g9m#9M<{SF2ro9*TYk#I)R-&e}gpxI&!tz_1lYsl!+?=2uteL&ZCYQ0KOo=)r2 zr_W}i)2rKz^**3ts-J{SXgs;#Wqc(lOW9?d*h^Fo5WHZU1I>Jnxb~vd6zmRm&&!wqvE9Y>Ees|z(Pkzm) z{!KOHsB4i-C0;&rP4u!{ak)oT0kHCIA~4dtVREDm46^&zdy&A@xz z=rzh+lLsK3u$jx6{2+?rdtl*sd}X9apWBMhs>PVYb|+p?&IUI+DvXK!h$JpcZd^#+ zGKyYF?bwYVe}{Pfj*jKT%>|BgEcKgFfd@7}oMg1A&*Q{Q9SG&i9fi5>o*3D#kYML- zt92bE+2-pHYC2A&>@KY>p~Wjc?7m5$SIm3w`E zH?^B{isEY~XQNWp5mj#MJ#R$a`4`Rdo@pz^T)1;R^;HXv$xua&heiNloqolTxv8Dn zKk?&==Ao3~p#Tz^!LeK(wf^5BDJ#j_E%S>yG(pK7b3F7=Ly z_ZnaBm~zsA-8bf}rU~vI%T2gLDJN#@_g)Hi>fUqYLwc;VzRcc*eNG~WE1}R7`HJLs zTEwc)Sa zC?4oi0?r2-0*o?!+c`rJ=QW1AdK7MgqG7c-eCRL&G>K5)3pp`&?z2o(Zx&RaBU_S}%6r~BZBj^m zKBL^}=|6Fs)B1=?A(ZsVrYPo3;`j(pLg9d>^KSJ9ECl<8{X%^t2Z@5kNEy+a%}ALgdOyV`Xw(nKs^05TOJ(AYlOh@0i&2m}dj4CS z7IN8$SJ$mM7I{oRs>my;xhntK>+PPfu@@`o%O+}y>D|UM_c{{0{~QGFFQ19!pt%<( zXn%TfhS;QxqFa^|U76NsxDo_gJ4|(`om{`4s{f~0)=Nl8=t9@&n_7_{GKLup$k5Nb zkLgQZBj)NT?)v9*g9Zd1G)cjh&<9$4vddhY6jHm=J^&0HHwg*bz!?J`>@?`tfmJUO zWO^fYoJ1d~gwE*u5rnsHNf;Y*%A^QAn;1%_r2Tq_+v;aFURgtfK`hT}KLz&UQ2#n) zbD)WWo~GoAC+&-%u$NLuguHp-*lGZB(^^>LrsG24N-SkUqN%fg|}AH$yjU;zFp zV`~cp2wUg^K# zbc#@sz@Nzm-7LHI){8Nvdlf;!ox}Ld>||tQ?tI5~<1SW+Mq!T>6L#vUrD-J8!v2h( zYc)r#SXd_5C7_x_`POwC^v!HAs^_n`$KT1&81gE~&Tk}LXgVQNEX){rAh zZTKDk|Cwe@;fb24GQ(5&dG?vBttN?2Vkw!l$|W5wiP*opIekPY3)4jmmA|Sw>eTx5= zgX^vD6Xa*yhk8Op+gNErL=E5Q*f%ydrhbfUEuOII&-3r&?B|R!EB!k4l{!2zOn;S0 zGg2*-h`}cruNi0v(5b8NL?r8tDp5^HW(gS=+BZvkeMwAJnyW>T1!POsutL?LhMYh zkw4B1`8~fi#!$U)D|>w0chMZvb9~)M4UXm8+9^S4mZerh0bmTuR)~1K=uIWut^bEv z_&eZaP%(}Bztc4eod$(doy$@$sXU}u1dDnG2OY5+S2?qXgg zBbZCJ*l01*<(O?&9GZjB@_R^;%CfV>IWwR#o(lLYvE56+{!aQ^i6VM)UT~#0O$d9p zM`dHE+4gd8LO2zBish3wA#CFkyuo_mkQ1Y87b(3~1QEjt>5fYSNoD@E3HMvC(vsA7 zG#ABfpLCm#5Hv(dp2>eveW}jn8dQsY`10f?8Y>NI&Ged&mr=P91U9eh{`*Q{Gt<&; zBr@=f)3SaAe11tGUtYC1Ch|JO+s+~lRENCQmm+iIk}FVi)^Hqj^IGIepQyzP4kq4{ z<-wan9k$WL@*Lr3$n0ZkuG3Un_5ofAViqWOq;pdxi&$N^jSah#rNP|7@3<_NFlqt);~_1#;BYEQjAMpg@jH%$+2FU= zJ*gsKzXsEd)br=+s2Z@|+Aa^Xf>)vyWu`I8jWVASel|#L( zwA&8aK9U(S$+GtpXvS~*p9!^YF*~BOc9RIX9p$_3zSB77nc^=Aut3tNZ%!AdtZm$! z)zZ~{5IRs0T3ZEGRz}*ZxiL%Rea+cMYJ<;y-KVn{(S>7|@0+KXJ9DN0Bi;4yI0?S; zIDWmx9Hw;@V8MZfl{7f92q~U~-!Z7Z{;|;EJf%bZ zJ;-6Yy33uEeH{+MooJ{Lo)pxx@R+FWM}R{ibV zlxJ8c&{JZ<+H_s;tF_&Ke&*nz~HvW(H=D&T>!{fpIvw2ozCjXbi2}!LaL#i00vfK zOt@Z9Z@o6Mxx|LrBWJ$EwjPRUUR;Un(=4%`D(;nqS+H(!vF*<#SoQC;ABWl4*nqt% zbCX6U5#w0FI3dBogy=#&Z2I}raJU7X;uX@S{|Ag-uVKW2aFIuXTz~wk|8-KGVVbom zCrDzhZpx-slTey=f`vr zQ7{JyvKOWi4LU1q+UH*5e%uT_xdm0LKTQFcK>(OH^9A#w5p7Cs7g!Alxy%Wml|PeP zHh!+PI3#V`gKh0XuCiplk#_1(5%XBYNnjA!_D!{xQ?^wt^0(!(52^m=le=N?WeBXA zkP15UURA}i8C-!`6`a1q7Z=Z;*QfOYTcYRbfzjdaiX?c#65II=cS)AOg+c;jATZXJ zz54?BErha#0!4;fHRThClZ=`#Br-wJl4(-=hm{l^7Ir56@1)3|tHIPUn`yCem|VnI zEb~+j^}&VHmj$j|fwh`FGH`y+!oZ*sa-!U(F{@fH2?SXiSOQpz(=QgN4cN>KYYsw# zwU^>ehIN;|kaMnquN>huC+^nBe0>Zue3>c{!*s1)`0Q8|{E#ki&3*@nw8d>oFgspI zg{d!R7niW`@V8*M2G^cl{1&r-0Esu)8?%sLwZ`HB|D5p2_U%x?-PlS?7|H<`h+Kvg zp+qEY*aGV>k_Qrj%@43L5C9V^XqtRFoMYJI{u&29KOV@{P)YjG#Htz;8frA(^9iiu zn0Y5~PGm-#jFvvq@oH6x{}JD52!^^2u)Ja53Lt~U(q#8OA+Il;hIYSNz!`@b%tFBH zodX+uhMdifof}%=vPtB%wPe|;C9A1~~y#c)c>wA0XAHG)!a4&+! z2!U#s&F;PrbA%&}Z?JJNW_yG7$SGJ#Q}YP=BMF@UXCLmoM5^SfN5Svez*l<_m*BCn zF)a}6Ep#{b4FZBzu(pNeOxLKSAwL|2#;E*qe|vAKhVX{OB`53Sx(s_}+UI`_BTwI$ zfoWednsEvt0OKk@CQe+HjBSeZ7!GYic;jGrCi8YH##-%b-* zX+QZ6_oqV%g*^oTkk3I>KYjM>CHULGecuQ`9_A=i^Fox~N_~>c_>J{_Jnf~wuQg6f zg}jB+eFo;Z|3-et?!sgSCXG}u0lqhZ@fWw4-QFT2S=Zg=g~d<2oo{G64Satf*&nrX zB!H5H$CT3?Z+D+N{GW-($Bk1qqx{U7PoM2k_|!c-mZev#3XFfpCCOYv;-r8?{wR>m z=zS=X;cU&zwO32rY4A^xvZfHpkmK2H14V-`K0>jaFj&Yma%*EyJ~vHH%xACdKD!U5kS3VJ2G;o zHY9iS!M;bYK+K{AG(<~RG*pY7YbAHU&L%c9*a>N~FWnXIGdQ_#7Z^}(&2`&XtVXwj zR9F}=VwX{=Im8;VkVhx=fJ^MV#g?)%qx!0P&9#J)Le{qvXh!R-adOEs9k(Vf<;rs5 zFfW=A5_Aps_*8xk&SpsxCqv^fSwS!-!{Ceu<#lQOH8II+q30%i{#zn8H6AN9G}den z<1}8p2tYJ7|8~qu4v#C3ux`OD=v{xeF&p8lJhzjdoQRHlLq1PosIRsx_V<=fvNa7B zBBHlst}Nj4?$N_ViDJo}$SejX?doND!7wkgGu6h@PK}M-jXAGi;u#lyF`tv#IuB&Y z*`tu+>wAR_I;~rdwl#JCTZu;(3dR2W)=;BVdO&X5viiD?#L7}kZ0w}GVD8{4fsvz) z7v6)M&Yd~rhy8sX#5UQXJE>335GR9#SpPtSO61YQQ^!PD!Q`J^i=o=48wWAriRT_K z2M@bjh*zkrHla_hQnd^c(b3Tz@FkD^n^nXUgn>JEE77>s9|ML_5+7q@<9jhpPK-QVLJA^4vc;O{|vUsX>kl98DSguSPZ9jCo9B_~eGK-h_r z!6u&$-HJpp_=gh`;vVn|x4V>9noQm?i2OC9kMGmb&&&+|KuK%`6HfKalMhwU$DJr! zt;C54-z59ScU_n2|6YG+NW~++*BGph9>Z6($&a`+J4+YrcRi>oV%rqIe0hk@8uw|6 zjd!u(^mfwkLe~o_OO7Wj^T{9fRmT5j<2@gccm8sFjXU?8*qyG()9(kJZyC-M!Xq|M z%T`w+?&(WV|7Ya?!(@%iMq0PwBX)~8?x*o^(XG9-d?s+y&`^Lj_jWm~wGa@e3uq;F*L;vsB7Ob9Y zp>g$DJ$}>{tO7xY%X-m_4i@pj ziL@T0o407`hSXX5LzzP$h=wl}%UJQ%x!s>Kq<1{?{)aQCQwe z=CT}r2P_AvQb8DzQSfJP_L~lGlB=WRXiW_H>Y?tF3di74c0(GR)M@8L$v11;$BHO}5Lj_kfhFP^eE)Vmg?ioXdB>w5J+42>saJ z5=q%Ip&16s`8m4Q?VTbkN<4I%jnnECg6e7o@%yE!nw5^p*U_jb@0XXKN4+#$!w>Dx zkq)R2?|1$}&1p8&?o=_Ct3Co&%54~#-V3lQuisotwS6`jM|}FoHE)jxX~1`e6r=797@9g zGaE?JQdv`l`AC7Fb(5lz?jnE%Di3ZmTcNR3ZAYbkjq*RTZkL=;G-yvadKPDEFuabY zE{if7$m1M68(#Ws}} zhA^j9BJ&q*@8E z-k8ODNg1X9cPrhxWsdkTFMo|@jSq^IriQms7oDKBaaKB2@?X`p&$BkNBuK|f8FpkX zq0TJX627Z5s#++2QzquhxtlW(e^@mz<)l$Lq2Q&SJN%8SXBhiot{AMepsqN+crOIW zNmLwyEGDv7Cna+gbj8CH5ZHE}7wmDH-&pTS8_Hy|bI#xEv71{KMt}o6j!uCF*q%>W zOxa0h*>tx(vbuU;DSm=Z%6zzocZ`^eSk*?bDog+23HI@;?9A;fI})n?4V?}{$wh~< zx=6F=SiTkTPW_pXJj+JrDl;etZjOhyv6YsU%tmHWa-x83jsh%4GIp2-9Mm$-A zeE$HyT|v;dER-8+34BQgjrM5UwP0yO!~cLtDoo0P+1v+Y%*}zA2{3ws?><(F8uubL zydBUocvs$shEg)Ea>36JX=q^JtHbvyn%CyfdC)`^zo~}7ND1ympF=jH;J95n;N~u_ z^NNeJC8s_Qg;NF!FvDE{W?LpEQl9u;F*q0>8p4OZ8Axvn6LrG(0)f^bVRD<@d~^f8 zo}<~YXvD{LRuU`X#ffgcIhWn)GB3;u`oio$u6mg~as(M)pqjWuk9{6o&IzMsmY)Vp zFVm8EkLWAc34D(&TFr449-|4F5vsHrc%3j`RG^ z(^s`Wnz!)jkBZQjf*=T%~|T%a>cfhUes`oC9ruu*68??#iqq6mN~ZeE2eGTVwQ1E<>qoR1bLesq4eTxWSf%y8hwnS_ z%5G3I{TLM++X{w#SbqbQp1+;iu3)sXh^;9QE%bxV60`d_bv+q~brgUWV zBYN~3%vndflXy8P6XT_o*?*Sy2PJpZK!7F1&Lc}MB$9f`F$*uSq(sepQT#-v<0S%*H%g$=68Yf8wI$?(cI!7Kwc|`?5>Mgkgrl_Bp1H(ZYIlZ6T z4BTm++n^=yF*iSp@FMj8KU}>9RF~-*?v027qJmPQlprNYcdAG?NSCyfbc3jvd(CnNg#Y_Kao6>`KDFXuprO8hN{#{)njjkNrsMa> zs~Y^K@I@$AXtg2ZtX?!lIfw8d&$CS7GeM62kCWko^3%7jtDUP79sELuvp+Dq7GL(|EIvn^jo^Ej~6w5wQxt8Coxf*TNg7d}mpO1utfkH+`mYkA;fL+8| z-}FT)OUufJ=(89Y_$Z&ZCx9$m?{>s8NfFTygR?ocV0C?2F5H7t@TF;EQo8^uf{b5Dj^S~O)| zmZ6@Lh=r(sBFY~b3GZbz4U$mj9L;09XE{csyed6ZMy)jVww>2px38>(W#kOLGT~aP zhf_(>pyEaUH#-T<{C)}2+q?^WaR-xP+0UdjW>a=ey7gG2t?lf(=Hl88FoNljm-zna zQw@*Kfq|FA0!xzWQUbIJ$pgZq(bEbPEyGpS)ogB~iGj}SmmXoxI@y0qaQl{v$Fk<$ zb(!XM6uFh}m^iLoe^aJLjKNs?yXRGHoV-}VI|VHM{ma{B@b7JGaPwBzy!bzhsL$+oT6#K#1s&=;J|`>sH+^HoLvr}s-WG*+T`BiNQ8A%) zW9Iu(QDif($4Sb#`^GcOe^<;|i~LvVZ>-l)xU59!I(XxqEeP?zlGX2VdO8GBJ4Ceakqxi$N0%gU#XaRax%s41q$>I3UzzaPNiOm{&5}+-zJs{8cn zJR9IfkkR5eL62Dl?1gW^QBhF`6lD7I7gCK)%`8cqrzPLdiS6FThA*Ah_tg=4T@}2m zo?s{5>2=SDM`%`{pYlcezx(P+8Me;x%#-m{8#WY}RtR7C@Uh(Kq3I0fT}VnzDf;#A z%u=&CTHK!b{oV_}V1>>b$^4s_D{1s=$G{z($=;7QyedaH^3P$O3r4egbHu@E{*b3@rlla+77t3a0d zrd%DlQ{%~G#iVbbpP0%Mz%TNIwJZHBks0`8|NC3ngXxA-JNtYF~Xuc38gL($L^lk6NyH zux#62wb|m)urc|KLFXPQ=cp{lfv#tKhwZzc2*ylLuVe=WXLRfv%fh+8-p_}5Z+SOABQ=t zUZN&BZ{L0SUH;4W?+=K1T%)$?4@Z4E*T=bVaUV3O>SE75J<>5c?}}A$XIo)|Nip$H zJNT9XHc$jX{o@h;Ab9x77Kol8oSHR6OY~%k0*xT^wAW9RFa`x*z&HWB0bfy3k@nP{ zXJXGajmfFDdXneNZl0WrA|E8d%6!2tF!)YEX=S}KU?XBk4NKZ&Qx=8OO1-D2-o3{Q z{g6(l25o!W2I58Qt>0IwoZQrxX`s1qDCq2!(sJ=lt#FpzvM(+)BcS+IFY)t+GM3Ap z@Gu5b&3^ms5EtpVvy`xBBd&k)^4>&AV_IO18q%tm>X=Vmvn%Qy9xs}|PA3crSbjj&ta;kY5B9QD3I zFnP6O(qas~=HPbAZ8Bw@^6%$yT?a7pe79nssd^4RCQgMt{)1tQd6;i>KE+G+h&-q{ z+(w6AbEKr!?ezBM!G>I}Yw7X!aAJk~%-vN~uTk8x*l{i^rJNTmDwem4_(Y$(^J@-U zQMpfDVN9t05wGbaE-5|xi8J@!JE!ZA+=S>@5Gpx&xu%nD#7#E{fG-itJZNe-2=KqM;FzJ#}0j)(&r8YMrZA)plu7E z;a^IfVpn)Ska^RJtSsZ`WEpQG*)XiN#IoIcKXRmPM)I}bw^WvVVVq6mQ zOO11q6sj@g+|sdh^6L?E@APk7h2J*NmGG4@$cKi6hBXw-vtL zICfu|j2H@O<3#zgNnifu+}}da*W>Awu|u2sCnZ9oIK-m0Rktc!*Kgp`T)ytVXl%o{ z;Cg!#2q#WguNUcQ-$Tca({H1YNYbiwZF`f`MkkS_daXl_jZWddn}sw3Sv{Liig&=Y zYT3=aJVs((>!9~mx2E^L^mCrVX8c@xiCOP=9{rcEm)*2C?3WzA-eG`t&3LttcLVk) zNk*?-PhYx$riRmN;dNIBAP{}r6bPEBcwW9Xt+OW}>T&Qy=O;D8GyBrkw4y^pLXHYHPo>Xh2;WRqbrpBrPw9QFwzH)F zXJs@JmdI#{ki!|?aK6scY$aA_%n4sDUv2vb^<4>R@kgB05is|B*MYL#II#gRP@`3R zLhb?)mJiBoVe!J|g}klWX-$!um6R|dZCphafi>k^`7!vrCx>+e*U zN@l_%PieQmXjkve?nQ@|SZwR3w}?x~T=_9M`R4>Zg!|O)&nD&^cN_q8Oi@c4U6AeGJ6*VUF-B?;`lZq`!?9$2;P$%$%WpR?t?!ZQ z9E4)A+Ri4_AAagjSJJ%hzVpziU7)Kx@om>Z*TWnvjTW&6A(gSf3)7p0EQK{kox6BD zbo0NHqY_N#GPX8@JxwRS-7=u|YXbI5W8;hKwH)t!Qtn6-UDD4{U&@&>VHMC5=5Mux zO_~15x~A)Q?8|D@@%x)P-^N6hOy@az7^Bktpqx}n!4p69L$WQ!)1jL4XF_HJd0%8R zQk1?POw_r=!cwHwth0zP@KAdsWGMCji;;Xd;u+_NE^d=l71`J^d*-UQ1^mt($W=7>Dng)q>D4n%y7%qxjKGGz#mr<7h@l(SOr=tiEO>!zHnL0o1dn9IN_Bj90&8tVuU%pKFAYWQhpW>}ie_p+t z)8_VcoqgBsW7~Ujk!~8J2m9DlMQo4!?xsl^@(g)P6t&y^Fyve~-TwGrdI_N;TX(`( z@-ddhWJ#n<>~P`bk2e=Qwn39_zfd4OctqyO9vA6$&~1JBQh1Tk9{MF{NpR-hE{~O_ z_M=YO;bnPNbOWDmCoJBf#CnDTWunq;-1xjnYUzq*N0PL!GTJJM58CK?kH7;8rRHD< zd4k!ULX_DY)H2!i6CkHfq}R%36iXdZ*2|q@uYJ>QS!_R9>`KVkd~ohE<8r!|4#Flw zVe5Mj$GM8#{cNv2zag&EY?(dFdibe0FbXA%QmzOZ(^VP>nIa`mTfZhBZ0%|pTW za2w0l7-GH={WEWzx>qMS+{d}m+tua26aIK>dP{*ZzB=k#t8|f^~xKf-6|D zmQ<2bxTMRIidSY0a&d{(rO{A`1?=%a)duG(F5!cg1DSD*NrLtZ?NcbCuv_%`=;_`n zCyC_PHLEpQVL6NKu_%*?;t7U&SuA(A;@h;>Fzq|zhW&dkE$>M6FvECKV<1;I(wK=o zqv`9DV&bKS<{|B&kIl(#s@T>K|IfvY8?CNR`4ykOThr?L)==yf7==J2YePZ)ow1kb zDWZW%k_%Y_*{eqnwVH#T)kIh*z{F$GICW&dJXX13wq{mJ)oqc4$2Wce64W9`xv;SB zYN>mj86b4%NIIUWC?8kk<>lRO$!qQ!eA4iC-sXEQof*B9qjh1x&)4jV*~TPerK{2{ z2ikN>&4t{NkOODH%%T1A#MO7Ltq+jUyXUyR*m_$F%17PRSFVz5YCTm>?|ogOxuo5y zsUF~hBefkuRYB%~9YlT4H=OoOc_2d(>uJu-Od`#KD|c~ISHEp3i4aTsiVy8pJ8g`t z)5vvKxN$qOsoI^UvuExc{;{sA`z@h|kl z?;6{&n=UtB`6rvLA5%H%+l1ySXP;m=dO@m(ZRa$fq%JV<#{S{tE9SB0q@}CR4R8FL zN4|Q{u!s@b)>yKP3ztZZe(r!-DIib0Y!@CEa5{4+CU0v>{ zSu4w(BB%PUAVl1*GK^-vL&ovR11qI}8SZW1q$;ATaqyL7A@vVb0B-J3l%!@3I_jj2?u>#DLN`Bu(;3gm!N7K(7mTvO5o* z=0DTbmF~5Z2eAJtJTFNns}2;0uECi)m>qfidYXB`4pNbZ2OcLj5F0~QDY+w@a5ls8PU@H`mRX8pu;0;U%c=W z|2kYtHagT-oKlG&Q|?XeRa8h?LX(NZY&caWqpW0gg0wzkea_El{*gp?QtxYIF@u!r z+9P_)qd!*}66?d_AL`vxREp-b`rLoMNVu}8sS1_@8ZCNvvfd!A6Upt-QR-$R#42!*n1YSG#_;RLBF7=;3f3&LKh9_~* zU!l9r=B$k)bh#sdlE-Ko@j{)soyFgb{l35Auep`Mj`kDAcsJC7Cx*D5x!O%YG(9pH zVsAdT!V~=T$b9en#6P32`o8%Nj?ytL4lD2R$3|&Wmyb6I<)ajD+b&rWeP{GMsSmYF~ zM}C=$5-{N{UXoTF5+N397W29~Qe{cW8z0b3Qf=Za?O*oFS6XbN2>nnh@qGr63hBk8 z-*>2HJ$;z?9NE;V9lUPzE7XKN5}2u3L*FB2Sd89BDi|vtpF}?KHgm^1eH=>RWHqkA zW|W=5V9%DKB|_(-ypR>HUgJo&dGoIoCG+9NM91fP3mR&z$<)o4&$SMyc>+iU+mCuL z=Trt{jN$|H1H;fVrQ9;}Kel8yXK5JT_djN0{Y3R|HvdzYl7%H0xJ^Yso`wOR&uxau z0IEd>o&lV|GeN`59E48~Lgh7eVD3QSorawJovMxtRl^*NVX#L zZ948t9W5_;ZCM{ah4JW}4iCJJ?>hNQ>*aq8nmcum9S4@A<2m1l%^T{^>AS3CCPM{e zbWb%HWen$UB`Xs&CN3gSetC)no4gJPOZ^nmUH~f zR!@2SO)>d}1QcNZ2}K!{-=?KSb^LwprYa!@1HD^at(!!w7AQzwJ!+RRw>;AZ*B-v= zoStxfV++lbfIO&5Flg2VEwj-S>klHMus(s2FFsXQ^9lK#H>F?DzLVvc37XpYUFQ^*r?@}GBW43cMc$3{*)+si5=SZ(bK{6 zv1%@C9-|Id+oCy%u;(g~<0I$?p_l-TfeRnUfzsDpHBX_&P-8X@(#`3O4cWcP*AY+D zBop6Sp@a+Hz_DE zY(l&&xt%3Ny>PFm$zqE5SGbgF6sHw0toGK`;2gCIzqH*5l;3G4!xsDd1EjF|p} zI~yS0A#hv*2fLa} zV~!L3s&(syFeA>^i2iq_9X8An8xxv1?)}xv)2=rg9_BHp!+W=+IYymH#Qh3`H#eYu zC}(~#m-Yt!&NsV^jXjrIGOM~tET;StXI?-_rHINPhjIexfrP?X`8s@rjPSlkul#9Z z4`}MUq8fG0Rp4A{E%uK=8ej@FYM20RdZX%uGd*e&TV&CukuDqi^hG1k`~*bW?xy_( zY0~CFxxayX`ZG!xg>9P8oU3%*XI*GlPO@v+s2SyYho4OVFr%AhFZ0t%m>?Vrr$41H zj2r#7FSQ=Me@bC|b^)M85*iK;d@?fK`vX>uNHxr*c*%0i6Dm(>k`dI-9ym~}{3id9 z?6Gpr^K8Pg-`OTszJ%pH*H^Oc?p>2L)X1L$p|sBG5o_HiECY}UId7msmJyc!^8FQI z!IDvvA3@kW1tvnLjvHx~b3~v5>m~|K`|sZJ2ZY@(*G6W~d1*VoeQQu2uBmX(EtitdDmS3NdvH=1OAdFJT1hjw~CtN=uZUOyB1I*kb4GI8z`f?b;U~bLfA2&QEoj4c~ zZjX%o*ar#BJ@(f)u-oA3QL40!F*0iM#YYgdiX`0r02})i^icR{uV1d1cnU`LTLI?j za3; zm1U2_jimE*E3b>c$;=(AZKa|7tXLtgSADKe+Q@t!Y5MWa|=MwaX*oHdUPURt8(j)`{t7b z^U==qbr>J{dub-byZ={TsqPWmb52Wx(Hbh4a3>%nB%zXuoR2nn(3jmq4-s%rm6vyC zb?vF$-tRk4?+(tO|JvPmMvCu2Fb~NL^T#k|iNMLC1hpqDt_6~jxo=%TzfGKkF!O=u zratrv7vo|TU}l&>jp^kaEPkRX3bTtqKtp)x0C&Ig3iyie47|Ze|XKvj%{LSDZXycCBy3T__%|q9G`io zfeO3jHnEH*f24zO((?u1j~B)CnwCam_7+mH0haM&OA5&sXG$WoWY69J(KZzZBw+Z_ z$So1L=n4Rpi>(Kq*TNKe{3F1a!yQ@-Y(}7hCq5zkel`tb&t-0jfGZ0XV~vQr3j{qt zIa+rAgmyScFHCB84I1Z8)Y$=+2AFbY&kOe|$MtqE^n))vXdh@k0m2xEh)4pK==Px; zt^`a=Q_#|i-}wN8tV(4hz!7|Ru-;Gl*Z@Yrh2GXQ9Dt^-+X%EOz|SU=tQBTr%D3=W zAc!7qdv#n;>AZ~kl|Tx6iK%vE<4)jW+q3n$SY^}LmRkr-Jc8W- z^Y!`irLY>)1a1h9@8AjoVyrarJDEYdwQh&D{GV%p)yE8!TfkyB0=pFkTORGl3 zrCjKR$jB(072@OB?y7VD^Vd#RbHR*on%Q6$0<{C53!|gA*U-=kO#7*U5c>0YcZrDo zwR}RL1UW733lRzRhl6>!Ff9wgltPXg3k<7X9eL{n#tO(-?~5-bYL#%8!Tak1TKbhW zYAdsW?>~W#h>}W0)|N-5Bs;xJ`y?8y6s8@XajL60eSDL0SzRk3EuHF7-Tg9OXE)e? zQ=(z8bh_Cv3-<-Y1!!{A8rm^yc40Q$`7R9UE5P9M0qh+}!=TfTC^A5p(G0mtrf$8U z(8UoY5E0v}Qcyoaz63*fJfkxJ)n3fRJWuvS6rIM#VT8yUbKsq^4-JKTUB^Mq4{=>4?B%!W2yb!NVm z#z8+Ewg{vcfdCp+YYDMZQ(uS5X#WK;Y#FpFf=C5iX&Fn2j0(}hFq78)h%GS0*%Eyg z``Me~eY~=&hX}AT_3@M6h(6|^e=uOx3Oy>JqC$Z$75MXwLicY$0u9sDe%C4I&ukR8 zKpoW{bKa=NYGgi2pI`F};*!x4Q(VvvAh!vzfNL1+?n0BEIl^~|4?v&;4~)Due}dw4i=BNm-|)tzPjI8*+0?R4^?GQr%3zs_HfdP zY+KwS*rdMrRFK1*`dXZMsRmfBgqn^&A9{N1ZDZ#)A-EYgy=D?j0`v2~03JyPLIpqv z8i4!+0nov;DfPR7YHwb~M;BAKB5 z_rmMC|2?)9&(2G`G+Q|we&PWr0QPYPvTcwmmKo@^;f<)V`Zv4RQ;b`wlsu(X>*qZO7(Dd%*@U{2OE@~l1<35 zk$HD`+Zn{`n`@_g|3jZ5*ippU!BlgUHzGKkfvFAq{Xs!npm`az0U_`Ahn>6ICuN#; z9RQG0Ahx@5bU5Y7wmt{UVMs{8!Xja{jqH6AIfd?_Xv!V9(!oOKbCbT_uaB4@d}{iT zNf5qr{`Pd83wY%BxZV(hCZW`P6QfebF?h2+uuoehdvIqg+e5vMJ*xgNaenihM!kkS zNLANm?%M=9T3JPPM)c(k4Qc8h#XTJ9A)ZtL8=7k4y^!F2>2j~vW7>Y*S) zD7Kz?9_x5|xM>2*H4f|-U?=w@HctP#PywdaI$}F$$0TC3;r3Jbk>Sq&7{nABhyjpy zxnLa}i(obkAGl%yD1aFbkg55{BF1&F?m**B>xN5edO9o6K>_;x9DwPjpngC=rz|XG zP2s?zM$rAb9{b@9Z=psEV_|ktsY^f#hVu-&X6qN$(dlVF(54rA5);#&c5Lu)3xj1A z@TcjaVq7pD5fp@CxXyu45s~EtkX+D?^%NIkqFuXoe1!%UGb}dbkC*gx7A@sSjm#xzt88ucuJ*4atk_q1-|dKql#XHvR*v;* zG)fRJpBk{L z;GEqCrJGi@qyH1lGTPM$Cf#nZ6~dpn|KLFfEDIjTwWux@gtQC~=og|V1&z>X9^0E2jL z)Vl6FZ%tpJSIJ3&zXcG#d<_O|*xl-1I^kkPAj1&)A)Y#5X0Qab)V zls!Gdo{CbBptuusGstg@>=szH04a{u`;ZNaM#P{jt-uLhNEn&`EW}cYgkV*}CH|0* zkiT&8!?!FfcEz%r58Z^qBd}JNhjJev;QBF95fQ-G%D^+sYt+mfg>gtkt_xqmx!M!Y zCk7D~l2AZ`3>y(buQoW_5!DeZzWwMq!_EgFmIHzOdQfh~HexF?e?0&0sXR{5CMB?x z7r|{zA9O;#G$^S}7JyuzNcRB$3J6A0K%nlvE2AOt2^q6|)Lj5L-p{YtcZ+N^2q_;#P_wGD)$m<(eSPc+8 zf{DXoXS6U#e86@CN-UpXJJWJusfV==eFtPOqgIe_MZ3D{g5{3iV{r;9C<_6w8nJUg zjA;DLu=;MR&tDymLOS|s%vzeFfYbS&f z@NKs_-%N;uO-wA8RXXrPCu&?m5wEm{Iir?bCRWyv9>LR_Fe4Jds4e;G6*(-OSCE#W zNJlZh2N{ydszB>a^&0;D0VVY^0dOGzZ#oF^SziAp|Mlw^H-LP>N$8y+zA~jQJ)8&n zAQVKzao}f%kaWRL4B?Oi%e8yUdFMjCNd{!lVN;%$@yP$nEYrp$iKxE2fXmJZ>~1K6 zp*>nXg|`(}4^MDhE8pdrWA~ir-LrAd^BOU}N0LE&FaT%B@d@r$t(;C!O zKP@=NG2NbSGyWaO3@z%0xD|NNusIum;M)dek7!ui5T7HUVnE3^P^$+QF9pTKNQP3& zuy(Cz73zz?5(;bMyM_>>VgI}k6H`hNy;bGVQ6(F7#vB)p{{HEfZYn%PlsEx5#7vG2 zs&ht~zyw#!ONHp;U0D9J{9(jk0oZ4t!77X9525+83SKTVEDKZb*L`;B_TlxpMz_VHBnv}RyKqexy^f=Q7pipLlz04x_VY~L{7xhjyP zrV2h1GGLd1cV_}LS${wU3t3A{#-8e3iAr@WdfSwu z7-Bz|V|k{&xw+W`Z0B3zqudLQ%Wd8ME)X)pc=0Yo67-msz=PwBzs|53381X zxKo6cm64Ir-(nq+(@hBmD&Q9}3-b;iF>%9K;7X<)jz4jS&9!p@N0Bb42keDF&@Dos z@+&*PE-PmW;u?@jCc~4wNz5(p0<%Vv!1W7pY_UEd*3#BCovh+O9B#Jq;VVEk+%|Io zUp@;yAK)qmBXb+S!S)5Bx{xQ4wk{JzUvr;sJyOWm5lF-PqQTJI+{~)qhyla!rpT%T zTb!}hFTDWOo)t7_zmJ%QfYCtXF2SKbT4wnG)=>)hnScw^ zM}*MPbM=RjUT8Fx9UP?Z!n8SRtxC@>GW%;gxQkX=F{MGCTjR%V-23wc`PZVgnoTg%`2l=1Hj{49s$c!jpa+&NIQ1ES{?OIy z##H^ReaK2|UewTdVTkC#11x@0id{&|Ai$5|vVuxK1MEygh@aQSO0i%*5u~B`QuUpn zfcyzj4(xePNM3FNdH!lDLw06nCiqlz80){NOV9zXJ;Wq!5I?9xGy=9;Oi(KV%MTD9 zM$g%S=@RGA(3*asS`-99u+st1sFDxgX39AfR@umIP=EYQzQ z;LJc+?hxV7Z65&Qf$^;yq_JRsVt2Hqcc1GGn0Xq31L$YgV@^0>08yV)0zY>XN)0bJ zw0Ays!*o1iokl)szEE@1Y(U~e3F9wq74n%p-?o9*CHEAeFN`PM_qFq^oAFf5NBTGw z_LvOVnt^&>A5ioqmg7Tu$G6p`E32!=@m@y+8}JQc+ak85@WK#FF0clahcFc0t~XGN zV4L~2hLOE{)_xtKt&DPcyImiTrS)xgT>8cCswwoXLF4uy401{Y&i6b9iUD-PXi@eS*441Cd!Y)FR z8{pC-=Q~6#@M8=?;|iMI-V8aix|0=pzV}P54o~Y6`UuZdLPJNB*1A8puiGjyaa0>R zknWh&g1ujp8Dhl)#y^Ol07Bp7NkItd(;zd;(W(pq1uk;yL11tM6=)c~LySXbSRkBh zJ=#_j;Ks(r)^S_sK!Wfr)%=8PyH+}|Krre{l>j_5V&a2}iW(=IO0GG#%SspgR6gi6 z?02Yk>w|$AlJy}m&D>lA{M|ino3$pm1Xn}2+Q6PKn%jl~iXxIEQ;=ef4YY*2Kqz&% zIaSX0MqfX9=e&P)|F)+mv~{L2PNSq@PhZ0w_C8zNRy*@;$36k6fe9FxB1Sx&;2H*g z4c_3|7Yn<2^t;uu-uNr#SDdly44OsZN+ffmaKIxd5h%`cgBBNg1b2HJcjhHvI`V_J zx3_g71bf$8392fxn+uuffcp)S%aNRe9jBB^VF@s+-LP48}i;D{m za>#eb)$G9qD%Ve>y8_*GGnXif#QC_l*8?gAd-6=c1f(WuXSXEY05 zT@i3Q=yX&W@e4AL1k)R@CjB;co-Ww<5J%sDkD=7v1?MiZ&>+9SB_cA#C8RlAXw_Ef z9?Guk?xU=)?;a2eYkH~TFu~8y@ALI5lx`m!hbXAXs4P8fp;O6Lk)g{?riMr#j-Xx` zsEyqRMF}E`N8J1%(4duYxQGZJ8)Hl zAO_+4z}iqgfz5p%CRmWVG|UFV^6&xm5g^(T%LWKO$J8Dn4xmseY6gGDFk4zEY`|VX z@*#LtFrmNzc1_*IhtCLjiMm2eOEsuQf6upt^4QK}AZ;+%cGTS5YBsB3B4t%Q@Ipp> zvY;-q4F(RVzw>qLZ?>eCX&JqMLn0X;vV5d_%^97Z8C zQvjntBJ?m1u=%(#{qoGn6{;{`Z}_vLqhqmL0^TX4iP%FEB#WVe67ecKL!Aox@%wLgNmBIF-yvL!e{ z3ixCpqeg1?P&$=lFo6P8SlKO%9=cW~%;LzoKx!$v+jT>QQ$4B##Vr%IQKXxkbE zR$M28CivZ41CQBh$%VOjQAe`0&i@^~hjWk;&8?lD*^+{wi3Gm;S3%HuVV(?j!G!2>? zjziTT3hagbiL_!6u*L2!B^bd}J8kX^XCx4@eLY0oC{j>75$*kqf=Rf>=Lmh zgmb~##zqe^BE*W*9g2J4nMMMEe0QQyzFJ>BTxg|2SF0_G!Dg{HqV zu$RjgS$HKn)o2s1*;UD0wp(1BS}2P#yu>;&A8}j`P2UXBi2vN~z33 z{XuZ%pqYA^1rdaS6yVbgj|cI5fyslXfusV6(M?1eO&n6J$79su7Up^yr2hh`Ex}uW zB`Tah+z%&)XaI2=ymYYj4VFo-1@1=?xLzZ%3FNI9n3ywg`ywPFcnN*=o}RH6WcTLuxS?|`zarnOJIQ5l?v85d>-J`z`LLGC%JpA=D%40 zu0QhnT!Wh}$Zui%UQtOY{7@83E-IA3XbZq#0VDqVa4Goyb$ZEG3}p`lu!F#D;J<)) zfDT@o2!sdg%?k|h54)&G!rc=XAr-2LCQy`92R9|?7|g(l1;_F%FoEDgaK1c0TxS)` zCW71)Zk03$fue`XP9ZRY#%zW<_>V#9<$5Z2GyGIG!&aQw9SWXfhPX61l93X*5|*p+ zz9zJB@V*|w5t}e)gMs$pOjzLO>SIvzd{UwRBF%pPPADC1K}8xpP1F=LnyKoIa-Ifx zLq{3>gS(;T0hF0*U_jLtnv)ID9x_k^6=jve$2ZVio`KO9+?6!^{90PNs6P!l;e~}4 z_|yT1qm_xxv4fCbo!vsqA0)B}zxToB;Wr0w&Oldj)c5^4+LfORb^R4|ygz?7R>S3I za%O-*{|{^M(2w9i2cRu2oH*ZHb{9Jn1o(!}jcE>El3PbWD)1X1Tu_{$Q!jc1p{mD# z@LdcvqNFl;Q;ykf@u1Ht^q&S^bKF-9h2T=2*~ZSZf+ZTVL-}dMSsuy<8Ng?Q=li(O zzw+7bwVq9;xal^eU))`~f+p==F!SljPaHHpdL~}=9?L!O7 zv1@@^oYWQ#4!`W*{>l!)JEKO|P)(4=JFGThsA5BZNpSeQ%{0ELOE@SJ=}SO8DP1;R z64FxAeQPw-XVl+4FYPEqe$?=!t8yafP&t;ap#VZOKMIX&y_q|Jv|#j;wsE zeIpwH2tmbOn)&Eh1QU~Xi5AQcWl!SbpLkQ~Dq(N%Igu98ADzEGwb=RK-4H)sZa3*Y zs*wdZjycbmP;Dz-g!KFAhai#f+L-LFq(fAJ)AhnS>a1M^OJox_uS-}wqFTrqP|TXC zPxOOnjyR!Inv6$RgJL8|b~=O112j7c_D&`u^Fy_ZIb>yxTfYf>849r-6(Pugfo$1z znpT0lp{ekSsEPW+yf=!bX|W1ib|s!C=3MzwEa_ru|C`O_-~KLQ{HCa*s#0PI)nt}e zTbpV-C}=v4N@-Ez`y_2W7j-sA3((LMH985jMwFp>1)4c|PK;ON=ZYcJ~v zZe>zzHa3V}bk5G%9!#oGUNQ<)M6!Lgca<{M2}9|;3EA5}0v-PC5e-(m4MuvJVQX1K zNdn(TG}_M=&Tq=Sqf(-#NozI?j_PMM0)e1kb*vx&(nsbRhALOF5tH{~|3X*P|6k$M)X@_*P>|KwIZ6omb!CEW zo^!3z$5QFB<{KjEv4Y}M^nsGUu zSX^k3HtM4FR;otwS0C7Voh9&@mv-MY!4+-$Zoo>`>_*A(Kjlq=Gq^d0FNB4~Ri-?P zL9rIaSQxmaNJ1Z_v@&G*bbFTSyK?buXz+JAR4-}P_<(E%Mu^+{(bd&`ymn_hJ39DO zTOuZno*1(wkX8m7^mjx^r`RQyuWhX404Oaq&NuWd+3}ngk6t0}eyO=)v23nZxjCpt z5_ELzH;$b#q{UC)ET&1h$vz1JcQry))2Xv=2CRQiIas(svWAOi@8tqrK^xSLKf%vhSsllpdMW2QX=NuywvWr#XsBx~WHF&Q znXAZ|(BK>XoVBn>>wVmAXRzIk-5&O&A>w>|Cr3NGjH&2!s@8?2gkM?=5frpJGq2yEJD^(ivGaFELcMa5ZHc9}jW$W?J2>6Fp#`S2mqL?h&` zNmB^Jimh2&XmOE7CqYe(EfX~ILweiPG>VnOR!X|R+)VrUl~!-(x5C%X8e3IaUEf9` zE&IOGL*^Ih1%`__5FA2a`I`N)=Z^WYXX&Z0d$2k~#LdXT)p@e#@v`2a+s__^n>cKE zWua3?$nS^FKa1@G3uZ`R@FK2T$E;uMHtC=FrE4Zl$?l$>6Z-2rSFJYVzVfLjE$(?J z7InmQ{hi!<6w0;j(AL(i-f7g4@wLPAa`z&i+`O}Dt>#^vr%4q*y++o&QT_I|MdoQ}H&*g6%J85g^o#%0a?(>+bHRAWE zJOe84MDwoh?l(k5J;tHfZ6WhS`q@w{gBkDm!Ga{rICD%Bh>swy;QPGHHQEy z+SH^6P+%Axg<#6}n(CX3+xDvYN|$ZpgX?|uvVj(+-5U;(3e|h7gZYCh8+$F1R_EQX zd5RsDdmhqULMgo35LaD&x!1)?#8a<-a`nRBC-`AJi(7zevQw10#E>PBd(y4{^Mf*RI#v@M zN}x@kfLSoThl+;%&_dV%9^3i! z(MOGwgNyZw&&7g8YPhSbl@?@1jnzS1_}MSRWXCAJs4Pg9sQu87zgwV|vwOpIIQ1kZ zynJ-;Ta1rJGIXr{i)2*{4CL5xV9t&s&v<35xl@P?QcQ z6}~0N!nCL)sqFZbeZgt=H2MhnL{*iN5!U`@&ovU_6!JT!nOIo9q&B|T+i;bPV2tJr zo(&-&U0PZB@HIQzVH97roZafVDi_z#wLB_1mM@U|7eOLLoM_@mi$hTi10A5lCxf!J z*>DzRt<9UfiVE?KNKj^0R92L#|o{kri$psViw2yty$@zajs5`Oal(M*TMzj16aIR%xK>Q-6HxjDwG z!r$B*@H1s)_@p^aejdyIJz((o`1rJjzGX{(TG7#NB?$&9QI|_uV*K}i&i!ghtPy)X z!r4iHm0x$jn}4txy((zLW#RQd)Ku2Qx0iWRY9=dvmF&^E#h*jmLf2v*^{l8f$4Czm zNv+mp8Dige5q(+eC+aoyIAq>vlu9B-VDNDVLFN~;%MzisCN*0DMH-)wiNMm=VMg(j zfzeI+KhIi?l@f09a+btnOP{2vG7~SdHtQr#;vM6QDqDa-QK7fBaRp*j6cmr3XL_KDJ7ire}3qvS|PG#7cZFh;r{{ z<~qrFk<9>ccG$JVVg>J^rfY=pSd5GsbfPb5ntZETFRLq3G0ox!NmOa74YGyEvd2@+lGSbU z?MoQrryY*!9j4;WtVonUuNw9(Q58I^yxpJg=sr0r(@M+ub76GA(hRlRK~d{{FE7U{dwn2?su6k8rpHuirX% zt&w=~;-h{|ht-WnCXKgWJ7Kz%-CQ9uPe20~A3vI@rB$xk@7$MhQkQ~tr#XeRUASqu z+^5K0i1*}9zzgzLlA>03HK8-q_di%{dtI%p62ScL>kP~{6E|j#Z>W_)|VNagd=xBU-_J*M?IZC=9j(_~54}$tm~b%GBqU^2&^{3dJj7|;_=-Z;M)5{Rp1C)OnsuSAoAJbUh_>XsJP*vw-{A1BE}uk5=#;*%xynS4!{$JglgKJ z(|XtC8=!5VdMNm?=k%?jb)*TQKcZX-yb&v5IaQN5K%k>^4No}+)$8Y0D=r5n8qf2T zpOcd$I1$pHU`~y`18zC8;Nx>AT(zRVOl1~hQb0l3UATArg_!?>`9`*&Y%AkuluSC8 zpuHs_NuaIJ6|2G=f7sHwn~U2Oxd~#igyh4Zw?||{YspW^3urhwEsI!%7@~ifuhhS{ zj&|_digE2MO!DdURxa8slsm&EBgBd1-eM4bkf&p`!)6mbxquasr5<2lCoz%4%@dVkNR758w{AX~7`vK=^ znyvDm<-;rSypBp^GSQsx)r&oqbg)k<>ScsLF@1(#5nd!0sWjvvwDG`k^S06UFLGce zoA$r`1s(w$hLl)KxiJ58S?u&@Mn>6+_;Qh2rqBKZ$y4>Buv;H2aDpFdKD+kv?G0KC zpLZCjMUB^HU-^)?%HxcN6@4=zq%(>X80*6%DUlJ2IX|6SoS2AJh+!vrzw449D6uJP zyUDZUF|~P`U^cPCVsPn=jy+r8rpkTeDrh@z)@P6)OOx7A9QdlZF+clTFbnH;?_G%=cMYR+>?7Jp>HMiKGy zXfgE{MP#vMfR5ObS(<$N*EM3hN5$N8x5P}x)2}*uuP0^EaedUqR8ASP4$$-^N{&H& z=WotAogybzy?2CZgN@_FL~!hKytSS7=_!fGqr3N`HF62yiKn@C-w=8{{~%gKU;zG& z-J*=}NJ9JsD}dyfZ(fCz1o#zbqUnkGLMMBqJ`-A0KzisFNqlCpCIDkI$vU;yIeQ?) zX0{tz8Uc1N3OO?~225i=Gc#jl)UJE?-OdC})c8f?vw2!O=DF-P?g#j7Pdty9@rxJT z-@~BNH^thjHHW-{gNZ2;Zm`Boq6EJ;K835-iQN!?3upsNYgQ zKC`;?^-?zeXBED=&ARQ)dxiJ>V>y~>LYtEpI+*@H+RiGh%dT7Zf26ymyFpsIq>+*? z>5veRP`bNA8tD#cM3C<8?k)l8t~L4AUI%;a<9%J~(fi8d^UOKsm}A_(duSgOP6F9! zW*Nd_3laLiUU9~_959>=|9qa#Vy-rJ`buQz32l)lWj7%~hWf@~1qFqWd(P@3Bw56S zR8camt3c?7{n_4o>*Y#<(9_V9io@utxZd37H7LI$>6UYfa^3o&$JOHZ?(KIA-!gln zIvanEk1DFh+ac*6 z%pgNS`OejOVuO(ez5!h>?N=`otalE`8lK@>mx+GcuKI|rn_UwFP`3??fW%NK%Gi^&6J0|#?hR$-17?ZcdCMlS6|7l(oUbJ5-r5Ovv=)peyIx!8-&%_!XL0? z>zTiZ>?t>3G``$ni>eQeME{tzrDUK$%s~3^Q|Avr51tW0$)SWoJ$p!5EG1XM%7L*Y zNNxM70plAfr|>;i3W0UEQL!45fwu^FsF!~OClc1bfKdovd>cmka)`<-zI~ z4BO=v-;s2E<}*Pn)Q{{D0pFQ{phv6GmIgnleWJS*wkJ)3(pRS}D0McJK0#LTT+n4q zX(;n|%*WXpX)hV#Z`quS8X6j2b^;C( z?9S+YERo>u?k)vyJ)@6YjJAXp=-fm59J%y*2tw%>jGlTN@h6|%$~TUa5PV=63>DUX+%sO*S$W|$4GaE9Mkj&@?kqZX%_siKa)X!O@+KM(~bC0|tEDS6R zZfwR?em>)t9H;^`xj_*0^{JpE+5i2$J(({EPbLgFs> z?Ut#Tqf~)TSx@b$0l5GZ(Xv}}&~xMy1og*TKdm=XnTpLZS8S-!DrDJGWrQD^W~u`o zSm2-qPWjVWqs189Z(`-EB89QCN{Y>uxy)+u{p$xkdepeUdhw9J80nUyO2Od1OYA|+ zoHaO(euVF=Wctb!MJf>SN#SFb_GF^PbO~xpD*JP&KQ~~IIgVUq05ptbR*nUfLRAFJ zU^%`rkx3cU|D9Pu_DS(sr_v_D(CJ-TD&x;FpqNsIyvtmHE+gFT)wWjanR(0N>iU!7 z9|P+G6B3S|a+pi#TSCWBTjMs+FgD!Zn0_^S(1)3s^(B)BCZii1f541(yxc~5YlqRE z+CzjginL?D4}0w3(xu^>fP6ZKoZSlW)ykGlD5-ZoM&oz336+XW&C!l~Z82ZX71Y9+ z#;I0To>WM~`n9uqWBT)S+r=$s!?Tc(5WB+?K-4n2y$eSWa(ouKZJ{6!$D8QuHmR%{ z)L2GCc~uC@e~{lJzOQ#UMbfFWwLI7DI%oX7S=6t-tzGEm_>e)j2`5suFd7=-=jV62f5U+Pu!GTf|G8GuyHLCIDbnr$vFWtXtMba3PTDG(F)|sRivkv zzA(qc1SURlKrjRPDO=#9{-X11x05^-2CM1f$V22LOlKYt#*4lDumkbKTfAD5`FG{o z|74X|i>IZgS^YA|nAlpY+(cM&ynQeOR$NU>*{BW)4I#XCFuhU4o!)e#?b~c?67k6#%miO%rNVma;m{H`bt;J{$jLPtRl&w9NmbteN=w#zO$J6$)5Vl zA_%SGt3;uufXFHTAT-~ot9Yb5QocvuI_^_dlCzAu>S%`50K&TAl3s*-Nor0BgD@p# zZy2-O|7ZcYILg+Na9Cwokb$<)7ys!=2F2T(8YHIwR+=L_*YB%rAIXwAoeBR=+xYAZ zTcdX%UYTNaiRY+j?>^z;;6N}4Z39hZQHcl!5*S6`AwZ>!=VA~q>-5g1wDttJ8_qcv zBmJC-J%3#&8$YEs)HY+^_iYb#=N#`ff8v2mPB>6`L=tgHj%Ij{H)?tZ1k7=dAUZS< zKi;+YdU>uwAYgjH=1r*GPrX(w?=QOkK0fav2>CSXv5%o#Ri5M}yJo)Pt_ZIz-KSa8 zyz=*tNw~#_92kOht9Bt6Ut97~Lq}m;%0*PzUF1fy+_aOSLu!RuH8M(Pmg8@hbZJQ!yvuAw;N&kt1#xk$ks8|U?brl_k)TJHRLKnq zesQJwf#Y0`h649-F1U-#{c)|Bz5_d#is^tI?t-j~d6D1`F0gIG7jfda7EmjF?dKfq z>9cok9-Er#=Rwp;(wdSK??CWLvxDH=?Q?}C+K2LPwnL{pc3}3yeSS$=P}%x1e;^^# z#nxRBXX9HPS7pfsvHQ}{g*?;yu=chmq`Ic*L>a+38_*sh=joPZzN%lOvAkfS zNbW!;jS(vGvIoGFS9qVJG_Vzn&+imof|X7r+~CaPzY}{oY*u|!9}Si~r2+Vlil%Z! zR)6PJ6||*(Ef4qQl`xc~zJi2Ey!~#8*os-56h}bhj$Zn#iv}suZho!r)j$G%uwn*( zkU`R$ci_fZ5RNWgy$djiR|F97n^0RGoEIw}p2NOp%UnWoFcBdo;-(JJ{*_sGQ`e+&Pe z^?rdrdqSdvD0%CVNJU1ruF5o+VOp8>sBXjxN7Dk}(TCozM9b?}jTT1v7cOB_#v=H` zqi+g+=SV?$CVIm3GN{4_hDF^IX5m>ZZvr?pL%~Mym(9l_7NKWLZhRs(i&AGeT3agt zH*P6f)||Q4)gN=eMQ{qKJ{Zne*U$l7W690NdBsV0?Faiusfg)aGO!D!BE9h7(T^mj z8aRtIxM0x9@+mnvp~Y8R$s9j<_S)P(xl`HS^mzpY*?@w%)A=PF?8^lb2%B*wN=nqt znuAkPX3(3Nsj(104L`tvNk;8M+L5y@If$EQHLDt;S-rgx!`kJzIMDY4R z0uqy&7S;tH`iiQ?X?Y=LaJz4)udCJDrlS&l1-4B}uRNbx#6 z_Itb^v-fkj!od?1s)48B-NwVGPHI}|=XK0wH_fin|XL_`FTxfnoI@gn30N_~G&!pQ+jNk2~@3Je(= zQHKpEs}$BhGH_vpRcb6n0um95ZI}D%E!uFZs>7^ zgvxqkz!&kkY zVH?GMi{&%64VEn$^;}e7;$sBBq3YMp*Ec^n2t3@6+JfOS2<+@h_V*(SF!1W4tHRLi zRsmyF*D8ZET6YJG4}ZQ=U}0@2*L(sGD}-^}%7gW>&ZaAst2K8$JhI`)-_K9^x2@+0 zlQ!{78KP9fw(Xb+Z*yn(GidsP_Okfk>$1~z_}uX-alH1c$--gDgtZcc#YXJVP;?-5 z4Cf!66`$4oV>0$b941yv_~A=O`i=r6iH5!{>B_zkzSq+esCk7nyORwN)4U{T?ARih zhn$}_=y=`lMAG;$LrHD)#qEAEy2PJhSp6ad8+3Bln`Z85DTV&$!=-b%V?Xyckwm(1 zI653Ed>bLRNSP_q87y4Q3U{Owv{}?dNt3fuf?*zQp@aGq0!US;dLuiITi^74IQQ;q zJ$Uj>ist!FN}WwELWZ396hi7b?+3BgKHpE~w?*1fOppA=G}|#bi4E#QyzpHCme_pI z*pQOsoY=(OU((SnLYVe!#LBVer#c7aF|zr7o~uN+Ez4of+lU6QByw=Ug0!wGG^KpW zD~eMoTM4dP=Y$H9L0VUmibc8MM|9;dq8B+ybxtFfxCV640A+>NsL)3RhZZ=ghJNs% z1My&Q1Ocbr-G$}IAn@YA1oRFi8yf}~IQp(k=TM;%aMBS(JZOVSti4&H{z|{g*$BmL z)r`L_a&SP^`KEFn2#8LV)1&@-h3Ask9;!aR(y$U~$uggVFS)l^UWr&ZBA$-d@ zobV6s{(-*p%k%DqvV*FJLNi{c8~X3q-`5Npu=rb-V5xc{y22F#8o0ofbsdyTG!zj* zL1(tS%Wju5nUEOliKK;EF(-}0;jTMUPq(#|qsAiT$v-L!MW6cwL_voaCT-m2_T93zsjR!%V zH>BWq#`2}m{iWEbS993645##kT)o1p4=Wgl)Pub&8E8^0oL|jFJ6reDD;Sd1G6@7O(95fBy zL=~{z0Jb7xpneVZLs~2W2??l3O%DoUs52(K%tADDmy_vmu*Vu6^q;H0pR0@|Npp6e z=I57T5W4Zq62N_0Hz-vsWU2!X?zTQEARQ?`Fd4>a$qhmW5EQrX4F7UA%$apM_!WZcBQ5o5hysL zcW3>dtAU&~t}mlP8evLdL3OG{OYJmZ#J~xHfPesW-_Vr+53*e$sE!6LMnAuyj2vJJ zLXeV@@-jf!>)}+oomd@4l)8o()YEwF?9laHx49If94_WCJsyWqaBy(4G)G&wcWZOv zDQ$K15?Q}h^W1W7&-<;;t5C(A)oxg%DmXcDuXsOuc6A}-|9IfTVScfRh?k`Sj88a_ zFZdeagNj@PKKm;$IJOr^?<>FVtNge}+N#D?;s?c@ZT~Jxx=FpQ>i?8_66*tFQ)1h# z?jIk97M=l5vjK=YEdVh6&TxTsba2R;+=p3QZF0qav1D0s*+gYou$1dmXo)%FrUV?f z4}pO~5UAd!?;cQG8X;+VmYIcX+_?SgaA%y&hS=0-ty~_<+qX0tjGVXU#ktNA- z^E=4Z#B#^y-mH!$4Qjl_jw>k`Kp*^D{ZHj4xTJs{RITiN8>>5t=>lCA`c0d`G6O4z z0#IMU+2X+t{?u5)bc=CPdWDllf2WFp+`co7&oPWxO&y1-1{A211@7H}uO*rN>9w*} zk=S5j8T*$Jbgfn=CZKc3@W2R@TQB8gEJ<4+H7%zj?9bCuA?_y)+_1%=8;0{CxB0Zz z=BxGRD+8cy!T?e(ny)gS(En&+;{gFPdONzPF!K4?WqT9|?JowsCi-|#IV@cJ@n z1Cxco`vO-jUGZTa#@qWxU@`yQ@8R0jbU%9*N+T0s?$P$=r zNpmdcj?*~8ZtjK=~y2h8h|DYllC z4V&fzf}vC3YO+49X&|YuPYTA<*c9yGqXZ$H9x6|(b$3>eeQ<>r3PT2ECK&u-PA71V zl{U!8K^Ib{NcgE}X@vl1fK5O<-2fz5xrSC?7R|El{l3N16=-?C%qnFAZ1%;`qGn}t znFTq!JPj7&fQ1fT*bhUtR3S%|Vs}XnyvHZbI+CUL+e|o213DvO+3~QF`SUN%XGPYj zYs>o7nL}hB<72^2v-}nX_+>cI+XH4LxP>j%`57hjNUus#OI$*j3)t)nFNncD2DiU# z-MTmb5DS;ssY_fAW|)bEJ@5fy@&a`?T;_2CiV4TMLXDCAg=qXdi&pa~1v_kVauRm& zyqG>#Ubfr+xw1aoJXXvVYpO3#FjA0V_{Eswm7zh8BuZya&#S~@KEP{rfJ@CwJO`A1 zYu^;{XQ~>HrUf-F_0!6<;KlSFBJhMt(JA1-bTn0TAyO8-5gE8!4*D2PG{*K~uly zWlO<7d)JwR_gM6}FtIY!GKloZRFe!1)&PlXD3-ZF|DjUc1#G@zl9GY}keUO$Dwenb zrvu-nC@Hp81L{P;iQ?VYuW!Lhce|)$fgMU3%&|UGW3YYYT>_sXuiur36Lznpfq07} zs||u#Bu#hd$@wTdaJbt0%Kze~rh8FSG({{&6*qv)0K>rQHo}IY;Fw)cL;23oRKud- z5Ja>WlF0;^A@SO+LBQq&Qcm<=I;cTLY$iP@?ZSX&mrzp!C{Vo*U=qoxsbN4Q=<7FP zDpE*hg92_n8oL|P9Q$M89nxjM-FM4D8}p+IJ;>Pv?cWafU}5FIu`-kxsRFDtFoHn^ zCOVrDT3LHj#DkW~;9R6QNl=+;%d2OSLP)nX8}H#0njZxwN5#I33fps7=^ zet!>&gMnhjl_!(dK_~=gU2lox`n0l&e3gN683F;G5$(X3Weh+~wtA+f)S$_ZiG}3{ z(D_nNW||l{#*c-`F=t!V|ELr4m2N`F9inAuK?hFb1* z!HR|Ay1K*=P&GjSDMy{Fr$(&2m8punOg{Meu1J;19eHnxPCg({x2hht&nDt=ITHq| zjIR@=s5Oha>QGp61B2alXG%TN=<80X|};b92^~C z4*=v81tu&T0{6gr&wFkV9)U|~*h`KB+;f&Z(h{}6-^(Irw5~~2aKzcIhbc;J@j=oI zqoEWuakV`VajpXlZi2<^%`f=%spAIB7(TFfE7TMN;1g7aHmCT&-yHx<9r$X|BTZEf z*Vo%QIV{)wH|p@ixp3o|iceSw{&MBa<^Cjljg()T75h>q&X%I>Cgt$eO(aagahBA& zDSYGSg6s8G98UMEpFaLX#4pd<|K}7(EAV!yQsOyLm_<2h>zjSy%6-dzr zILv>C`T0`W-xMbCJ>W48YaVUbduDTb2(EWwU&waUYj8u{q>JbRvki%x<$qKET_T9c zWCIAbzN1C!`ED+oSBK2fpKqPT=ihxBN=f63`f8GI#$lEs><`_HSq!?nqG{JuE+m7m z+%brv-;;R<Y6t_gy4N{GBzBxcxLn3pV40(Av0eI2csQ#_N(?{ z)HxTL>@JXnZPHOzny<1*vraP4*O=4YyI%8N2_3D}K=*!-Fg&>OPUEl8I9QO`@m;>R zznfz?L|)z<^SXCF>fARev@^W+3ceC>+QL^P5egkE8}0E_iRC5mB986-U~&+|Od~u~ zY(n>PulzCl#o-R0Fe6xbLwP;i>*eco$P_r5M1?1fm^=i;hBP>Jt0ze^n0FZm?SL0g zDtBq1Ez`Q()Ay!WQ@PZHj(m8)OZU)IjnN5fZ)gZ5cqAb+skdckm#;Rnk&?qzyxOrk zZ9#;OZ@l=i4~MpVpnq!rE5t`PeoO~ZTuMX`jT7jy>_`y8aj%%k{{GNuE zW^FQ*%;?#dE%?e&x_mXHy%1ZZ12{pGEU zf(yQqla=Y%8ppQ(yZ1i$Dm+Xu{N+=BoVDezCClG>>Fba7$~$OLn1_QgE%AF=fsE() z2}bz*A7~A2FMjFRghq*Xbaau^qvgffPieiypMT6q=zucKF_w;elM=yPd%lEjFH|Fr?jM-AAYj1PN>_-r&>9Ef?);BBx|U=DAH!BrCC4e~KDl48}Fj0`$j zx3MJkLqQnfy|9I;`HvX;j?0tJq5!mP^e zov^#0VFz}UcFjs;D-rj0iVrmN@a|L7KKdwDI9iD04D}B%wk#mAT{lJ%{`!Y-YfJJN z72?M%u*kO^HrLIUVu=oHuLIFwgdp!yI-h2uO3ETw4v*=hL=SyE@B}k;CVAUq4GA*E z^da320|gT~9YiU>FcLB+s`Wm@W`Y+NNSS06uuu3uc29BiUs+08g~{*}p3?LX#1`|1 zt<+|giap6A=Fdy`Y3s^R#}TG%OP5z~eRs;yrAPGW4Ju4B4~>?7dGP2$$}xeIxc5DT zQH)<@S~vH+x>AmQ*36`zu)&fLgJ*`y*fmAM?gYigLNtoD`Khob-K)D$|3CFEGV||z zav7Or``7;t)RQyI4n?M<3nT_G+!wnrHpT95ZbD~mO{&KP8Em$ROXUrm3gHRuQlY9jWcPa<2EAXJk?WZQJwsC@%9JoH zvS_hv-l&QSvg?(#Ek}h3mUuH$)9-shRE*5dfkm0b-{j+}l54tOb)#%f+}4Ly?OCjf z=NRe0_jXFy6tLuhvq^j+TQ^o%`W3|pTqQmtXepkYn-FFi(o9F;lJ6*Dx1c0T(%=oLN;>}sF6hFnZJ5WNoEp<-+0}H zqGS!e>eS`t@uirdKc}!12lQ!claS$#ie{hWy&PhKz#)HkMS!TET5-VdRqvL);-Iy{ zB$*JhG9y<1*IHgef*-t}>Cf&hH0cY!HdUokwG!=rCXO_Xl_|(Aei#3p3bRo$k&$cp zS_htLzOFp~C%DhHoQyh-3cT66b72 z0j2o@W%qthVB5RI=h;?mByqF&kvRwQaDb95q%s!1F15%J zouEDTfdMPq5$8-iajU>;<|oZ7XnzbG{D_{&9|CIi@#F29eqU91W3MQcFuUFEf{19=C0n8#ZjgRyLWf4a3g zTxh~>ti<+&zjVBP+(q#_ZVFfxY4Db#6j_&6#nlJNgwg=Cfq|o9LCv`-7L985LKT$I zelKz4M691E&3lFki?>=W8=CkRZXVo5Rrc zGJH!-_ig>7E3|{=#Vj=!JfwDEBN{tj@hW}md{XAqym-cE%_SaVRS7*Xm%dZFgJ>~v zS`8$deZX;p@miRf85Js_Bx%nj$_0@|@d<&Nx{S%U>$jT>Dsgm()4UTXVs z^2W|?RJ3W~d>YBf@Kr~T#=2g$Re4t<{i$6G-|)`xtTteOvGmXb#q80%MI5OowJ&k( zx%mjW9O^`Ry+n#gWKX&`Up+dC;d@76AcwIYq~lKc7wJYff{R3;9nJAatlq$K8(u;^JE3nPCL)fD>d

rSgLEZv+(s>n|z^HxFK?-rL~gd#4*i!(MPj#&ViU@L+8d-0G_q5 z61|z6T>|5hyE*c642T(zg!7~0?U@~uZz{m3ZpHqqW$@%-#C(MPaVcTE{68khovL&- z7&!&7h}rRFTGdtHWh6H&H5C)SS_ul%q<3@$C!r~6Oq($^#S#z`4>f;~m;HLj%Q5+G zBGv*r#kga9N&ak8h)Gp7W@K*s{Z^>*?$1~YJKqN%bXPl)BMBL5xqth~Rbpv)wLe|v z#TTH-3^K9E!nc$X^Poq$5^9tB&0M&3RdHmybFFY4-J)vZ3bBLhavv!|Io=Z!f9Z3b zF3P-Ohwm4_71+C>!Y~_>tG4b(R7wj@ro9jL}f$`x^yx%RY`qC68oOoomqK}oC z(Ij4P9j+~Dr<4l`%D2|!Mnc6JGT}ri2#H3-ZS<9Zf#cKP2qLA*tJYhww9&~QY{PVV z*ia-W$2qY*=`u`N@nlr{*=0ElK8cbFWL;id@yrS{jWVWKaLinzRCgTC*qj zvv?{i_|r+<(n&4%(V-x>uz+>ue-pfBKr1YBkh8bq5EzZk;*g@eu+c?4RRp`o^y z7QY3XgTG5|?DB}=TUP&Z1geO7LPuCgOnQ1?pvXut<|h+~luBfR!SY0tJPf7`edmL| zgw+H>QWa~{r76KZqp<`==unUUNi>v+W5a&5`B%1rXM!LoDq{o5C3j3v1Vn`>;g(c_ zd3_^>Mt#C4of*+azs>R4yHNaVFQg%me3eipO;*t%Hp4DsAVX)Tf}`SYPECxvB*j8R zfpdDdc5nb6WK=2gRiw)pP9+6LB?X0JlF8lnsu5rfN_elu^t3|!Ug09C2Oc)(%!=n~ zP@O^%F?;|?^h98IgS%{*vUtU0&GnC<=Hm-~l1BYPaVrNT2>GcfR?sEcD}R0T<`qb@ zK6QC4syAl~7~ktx#izP@)7Sdxd#dj12YRX_s+GcY)?4 zHx{BhixU}~acF>0kk+eEiHNzzYp6<8Zy=eR6vkpRh66(VCX=drwgMAbYOHoX{7J9F zbDLTQO-?XZs#z5j6d#s6wsv=O$yw2FVPTcFamhKUupyuwp}t;%s!BhK1gMTzogqeS zd+FR9@8gFD+hqzg_T%hrx(MA8(H74lWC$2 zcs>qG6;Ne%EC0gr#qS^O5KX;Q5ExaEbe={lgYWx}EzTb%VvS#;Vk}_ZB z!40K7aVci<`i?Cx{uK7-6by;%H7K1CXv!?ff;ihQIa!c7WCE=iuSd&--JmuqujTIPIXu?DmC zloUeoX(&qg$t|?($c9u7CA4`nyc@75(2y-tbCb=2Z-T)zqnN{NhVp~mv~=dW#0P3q zR&RMpT>Tb(&=`mUh-4Kj+HlYz4;hd&wmfr&bDja@<)Pb2`ck-pfXReg?JRQ@ds$J}s-Ll>E(V1VT zlSwxifx36OUjuAbZGL4iKbY2Wp%koz>3{JV*)rw8NAVdCNtV-uf`EHnrPr;_q?23r zs}VyV(J!ak^0V}Q2#k}uHD=QqArZD&!&Y;Z6i7%&SxWRTc2Mt$`^kuCINJ=z&htVK zQFCSmH2)^42v=rEv(rhN5?R$LU&Y9fpPu+kbwZPpsw_1-69HMqw7AvU<7^$wL5&U+> z{Kzq@w|yf6J7B_S`)my=t|bi00&w;fqL2!xCic2-7pghNBB{w;9?O>bQQNobT67g` z@B8N|rGN57$;dzk`$%qu`>&aF`&q`V-qP4ogFOW9S}1NtP>dhX?A-_ZGutg75n=>Y zb7FAvO@Afs2Cr*Z>+PX$Z*LKhki@=T#Fg@>(8o_?0|#6{^)Q{EFQ|ONbr$KE{Acm) zsz#&hxKCx9a_@^o!E^fuBjewycqJ5xGUQ2vu=@3h z&Y9x)7k~PDMiZ|Jnri&A#No#&WFdAqe^NL9OOB`K5knGBvqm_;2_wAwEPH{myERTI&MvvDp9E@W$lNQdha*0CwtjTeH+Rm-RJzoXF~>5mQ|f5u{Xan= zwytHp*O8m~!Oe{yfzfWk$k-v3-gdug0V$kf!wu=6aN;6F1)Y%CM_wYgGBllLG`KQS z{$@0Oe^Q-auLBuzOPe@h$w3Af;YQUMsbQ|E-lZw4Oq|R6O_zB2>3uI&csE-|pU^Rc zfx5f$ydrEUFu$JLS0YccBjjzUjyN?I&cA@ZB{-0of!r__lP#6hDK~bB^Bov23syIn z7-HxHg(9mdcBCOyknCEA!mwePJ114h>3&8k=;GtXI>Xdknyr-9+eh?9nLhr^A7uj= zl}Ao{KcK3PYYuRZ%$-u%olPl&HdI;|pXy2ymCp}#-=U&+6a8QL@lm8eZwy?b_rCtj zmiuJNwxd^_Cv-bVV2jzh>rQdXAgv9kAWYGX^>+2dq!`Alf;fi?CqFJfO)M_& z4yGhU_4Y!4_nP~6)LlY8vSofVMgH6jq2`xc`k z`%ZK8IKciMZWkGo$ezpe{n?DRT&&0wAfU^^5br?8$rTWCRt6L97Pr!)GOv zT~9@App#oC^f|cH9+@f*rAaU^AA3fx`cN96nixQ3463qn+r;_rjW?jXx|e&)KF$u_ zs|=u|Gd%moq-5wvX{uK7xv>%Nj!KE=s~ByjqOyp10m9=&t@hBgk+58-l}jg8kO8_2 zvaYd-pE3qY-$uy)9dk19^DDJG=A0-}{jrgOL8V%z@gv+kfzsB<6l=?&wNja{F2g*V=QSn)YWwFG_Pgcifi2z9 z*;4=X^~Tc=bDxGj-uxPjW@X=&olLXbrC@;CN&wAxA)S?44C@4$S<+1Y+T|qKr=9(N zQd*HWy-daRK-17I&1ez{2Aqa8aomtD8>`7UPz!EnO zwo~0xBOXxL$1Jii@L@u)9!7O^h}mkV^Iuk8u7ZpBO*F3?f`F54&#wc`*n~@ba8Q3je%d$!Mv!XWMiO29@ZVyVc#Tn%Qc z9z4jv#JjVn)ftQk)2e*T$!zIfKY6@yUv^R}v$|S{B)As=1H*!s8#|P<7~k#vvk3vu z3!VUImpe1Ya*H&WCG6h((urhVQ-3f%)&*}XB-`F_j8$4vl^%H~ zKa$jaS?GCWjg@&}%<$h@R}LSfiu`|JaMKzYsF+D|gCgDmRq)B<25d+xS+I~mhuz%` zL!OYl_&X&Cka++N$3v4K@rpaXviS(*)j!WX5!v99cY&IJoVz<#U(Y{0Y=`QbN15{T z69|B^8>$D@q#@hM&VU{2sz)=zanqx))oeMmw}|&FX%mXeYmx<+CmfezhT?4(8eF`b zRF%@{{DM@K9r{d3vf#nBQKKJ8Kj6K|7B}rR*FglAw-xygJV)8hSKbC3v7_@odOQND z?^k{jK$>CUr+BqI?M=FqJ2)l)4J$96CtOw4H5lj=;&w?mCm7sr*!H5&9@TeRBgV%M z0P~{Bxwr^`8NfRr0DncrNx4Dp5YLxXyJ@k-D#dpEXE2bpwe04b)50$=RgJbb@~!P{u`-Pp zoB@T{kXsit|6UN>7cdt9Jhh(}F+oqZ(}3uyt7=oa((6bA=KPEZVz?=CaB^O;>gbvX zR45=F-@Cl!#qw%f=u<m}KK$@A}ku94{V0pxHs<3v1xp`vPA+Ko~>pkNKqSmP1A8vB3pIKLm1|5AB8ZtK4FF6OfmHVFTyU#e)c z#h4J%OoR*pOlQ=Zi)eeh23#C~E$Y`z>@FwN1@Z8Gk_2ko%MN~@j4nj-V%ond<#`Ov zV%)hu+xhiFMpnkJumig^vAipF+gl8%&G|^3#QbFaFDu%HKaaaDh1c26wYT4drAfRxo2LD zd2UtzUuFn+$_i%YX2?CP<$fgvvS{9iaX(67FpRxT@RZiQ8N51&uBo-pMq3Xs>&y#_ zmDBxZ|6Gc_dq-Ypvl`%fZca|Yu3y;<4AEO|@1*}dw|}|rDO=#Z+m2%o3nh z8U-uzrd^JCUK9+L7Y`m8u6#C3&hi}?9EFB})hJPs;mPiGXbQ&YVH=DjhfuAWIeAYT6EY6Coh7%&mZ(kMMWa;~ZTe9a9Ot-~T32?w!wFiLOMZ-ZU{ zl~vs&iJWYN&tNhO=V5R2B*PeIl_04vad>svtvu`(X_WCNKK(%gxM+J4C#~^ui<0TC zDPq+5;0Br9?SSOP-U`vG_dPP;eZh%zGQQ_*zx|iy#gO2}zFo!k9w-Y=Iy+nq@!@i% zqPnwqg0q0J0wx|=xEHj3w|@X{>1XCGJt|1i*>Z0mwLczE2f56B#1}fnR`V+70870f zA|(Py3$%V4N0M*U*$=~|@wvi%`#anI$Hu#Tu@@ecP+FOF!Y;jkc0TN%S29t`vsG9{ z3k!|se)s@sJEiJn+m7o$xu||xMwIY#0Bi8i^}a8ArYGPbrrGb$D=U$O=dw6(SOW`nU*V!SK>;DOx)jy7M4c~FR?g7gZ!;ue8?lnt~bL&%^ zjX{xd<}-X1`b!ULZ|)a%OBxR}1((=~TX2SqcYILVPwSCBwbE%&pFsxR8Mz-TW=~qj z$+C>%q`-}2Z?YI~g0x^AY>TNP_07;(oflZ*myW^fZY2M(N%|Jgv^0V)4xS$ii21_lCT5AfkHHUP8k zm#i;*xWIL6^M)A6$3M@O!E~}|{c+(kH+*#n4crd0cL=&<^S=*P5DHq*d)@9kPnCTn z16qC|_X{Wpn0SITV!zX>3M#Dx`uh4>%??zl+?HsuGBE$mi6f}|hZ9#sW5QK;ndgaV z7%l!(p-fqK#TsMg@x|7uZr;cG#@NJqwVDj9A}GjI>7Y~0|I3PFBE_-)$jl60Gn~PQ z(f%p}$SbHeH)p_#`yQZVR_MX23TK7_NY3J0S(g%(a`h`74i3csLj^0}HVi=K?%lWQ zl_5UB=3<@l22yFW`5MypEc}GAB5bW6Aw%7f{N7~Upr7o&x!|y6N6RVeV8-7jO(0cm zxW9~~_+q?XEeWC!izx@e3+@f!ChNeOka1MOW8-q28iAji$R;1<9p-HJP@4yG2zN<>O|I$;hsjl<4xLf)YQUlqpe=7 za~Zs~-}p(SB8kI2-J_`P4iz41e7TgY`Dzg##FVsCw4HvJ%4n4ouw=FdSaFH1ty8cU z7xb*n0Pf8*#OiP1tq3Kj5VnoLFMscC*%zM#DO3VqWaZtmk;y-ggL^BD@Gl=E_3alV z&{zQRz^US3E<4vH?H1p$B{}lg+%w$$_c1WSa4caMtdc6@{{<^s%zt@+@KmB!Mv2b~ z^%Hs{?pnBKboD8qR@%~~L7eD?9;k7m3TnM2ogfv)vi_i^TIGBU=z|3OiCvqAUsgg$ zXUJJYMz|k~`^8c&0*lUF&VTvdT$SGPRv)urZDLa-^k+?5@lC5q`m=tcyOy?8S*J?z zpsFd7y1kseH+gPnJ49Lz%k4$W)t-E9F;hzeJly>&J`LeXxL9YiqaGdxu2=kT6yG(H zg9jdRu%5=Jr)8ZV);}k!fSU*<;(A68$@*~YtC7*#tNrt0bT;ULij@`b-Km4dt-rQh z2##njJGw(C)B!Lr&kxr`MtO{-5(X^M7lbV)JXT3@VBM*=7}Yndz9DntB|$6{Fd0h+ z249~#960Hk%Ui6LV*|_mFyJPhMv{;-qPLD4N}tGjUbW=2uL>w(%s!4DIFr9grs!$mImqW?(TQ- zKhN`iIA6{T>c}vgU#z&-bzc#a*7}e1gPAv#X01i{Fz|YTBZo3g6y@0Tx`YAd5nscF z>;u7Go@!N(#pHJ)!KN|nOud8qu03WEXVQp%H<<1CQV9nzL!ph_U z$@cut1oiOBPpNdVMTQtD&&Q_V`Gzc!+hw_He|o`cs9PSjK=l;xxgfFS-x_H-mkzD@ zw&REu>Y|Ec*)qVscd|zBbsO>GS-YS3>i+zd56{6^M`@Ho88t#>reI4zCnB}|@-q3o z>m?qmUPKR2%fEm~NVH!&-Q%J8IEF1e@XUS}!LYR4rxg34?VJvPIuR$2Gx3VH^ELG% zwV5KW6Fzmucf(k_xBrRvd4Irh9#_B0qgljp=ZI1WDD6q)Ccc;W@;}mj0y4U=ZlmIR zq*IXx*Gq1X%fmjFqv>4KMNd~ay&a=WG;%z=C@_7o$?dTUl_;cu-%y>?j_~8`F+JGC z7m2yiz(NZpYWJ&0DU|Dij7a4>NYRY<{9n>=sEY;IAV33N;|`0a|GngZY;nbr4rCzC z;OTFY^?X^0ac8o0m@uEiFugLH^r;?7TM>T$Ga|H=%ZsW;VB6Y-_$IeSLxgbc!dl zg_S=|->*au0NwXt^S5uV*q>njUjlzS(T2AF0%H>NL)=yk9LJi*` z24@^%KC6fNarSetH#wAx2&r4 zs4V{NPX0Mdp#XHorv1Kv!v3eOClg2|jwPi)&MHY`XAKPxPq)=9s^|~L z3cfa)o1hvxs>R0GiL*1rr5_T1`2ci&a&-oR3qruPdUz0Ty`|0SL?swrdAJ&wn5(f~ z2rK5W9&uI4C0zoR$9lGQZ-2@%zU2`KOozH?7*IHTI3)C@X{i5IcO0Ef5 z7kw>0oXuH5bs2t(ChREyW7Ol+I-9}=9bSrU)PHL(O&ORL+Rd<|#56u>qe4L9YFnvI z;ZBV+6I(TMHRv~TS$z*zHQr|-Hl9l9(lnNdj=(W43)6kkXw<7${yjim|8M2Po$=-1khMQcb+=4yqaal03w(ynOsC)I=_sj%NGV=ln+B91x%vwx?wx zM)XQdEGtJvKm7dANZ!=!eSCkzMT#NXX8jFnf;3%8Zrp?3T_W&-GHJI;o8{9ZJ+IAW zZ^7W>K+PTIq0#=eA;0$~e~=_qgX=-JKx{5OuAl0%t55J^0+ehyANt-}vz{DsJn`hg z0b8%aAbln3v|8l+`aZ6}lMYOJ{J}d^paSxlxL5fl|Eg+>c0E0_G!@--VVO#i^D&b# z!pMZecByG&|EzQ!HC#I2y3TF?_MOkiYBb~CFMnsasI*DN?eURjbOve%wYj5 zUd>8lW7cOAcp2OI#t~@~P>%VD{9erFG&@}23<-x50!dGiY_?d!QVb;*qn%Gca}lG| zdW}^Cx~JNQwL-5)mN-8BW0TfpD?K8x>%DU~w^g_A&VPGj7HM2o5PgUTXx%@i7E4 zj8u8j8HG=@oCo?Z8{Nvp3mq z_w$rIPR`hjm#OVv$f#~l9g*~K?e6bK9kt$J?@g9HBNN!~@8mzO-X;H}827Ma~JxwqZE8y2gsXzYyR`Qa_jD|w^c;VO3L zS$<(Jr={&D3Ly{9kf(DNz(I)kJXMyX1|7RT=0G?(u_qkyW#1fnOkfmp!&00N2)LpN zOR^uDn?r1Ad3kbs@owlywTPBQH9WSK*|Zj%k@iztjaS#>QUvbItx=9ciz8p_wRj9^ zngQMVz?3Az>JwN{{48~-4_lzI&YLngOA^RwBNWlb+R6NA(}70Y8YG(geztsVqM5DwQ;Tx@PQ~RHHD}-ZLLXh*0o_^kucLQ!WBa^(Kd2CtvkD3mc}p}*yF7VmyQ9sR0KaP^tp3Dn8z?2=xzg$bFFY?osTh~WW3g06 zgz=)Jx5Pe7Mvfih3&QZ%b`q69?KEl zrfluf2-x(xKzv;} zkX(UT8S_Dw^_wve%S?kfOvo~b$9>KdN>M(JDbvnG_`Bd(*u*8p7 zO~9!o2nwVSqH*8AkM_18s(tWB?QigQrvI$51EZISyDOUG^BLo7w{PNCUz*vy7(QN( z=J?Q*=v839)?j<1_V07z>#fK`T~EW~9K-1=X$tPPwiIeb>%DUN%o3rO znHBb(zM8d$ugJxGAYeRVVk-YDn?e#pyFq68RK1dzEowt9f1N0OJR0bBcZoi~Afi$s zXwA2@g4N_mqnFC&-i^E zcQE60QQGXoky6R0l~|c#g3eNvT%C`K-A%FWb9}Y}wBsr~7lU7IUyMlaNOXDsEBO@Z z&Wfh~LF9I9_Z6@h5Ser7^qnCFL}&~fqP!ZGqF*BkRBmUx=#$nLp`fGZOf@qGonSJ2wt^216k_-Rr>191v?qtnLf0;KwoW_qu>Id| z&2{}l7oO+-o7jgJLqj+{)Ls`S&O^)T`asbZ!t1;(4L_B=;1I_Do?IYwI<{ha>^sew zHxRIG&v1!hC<(hwht8{ry%1MWK%-O6iUQNEZE1-@F)ihax$@-t#Q0ed2LV}EudQ`j zJAWz|8NnEjC@CoPjpZ!lfBKvNcGRBO%m7sydiEvIW#|2(-OpiBYq6NdVZmoRiIUE@ zTV~T&{s?AHn}0KX)at9Jxfb}(Q!Krcej>oP7Qae2|9vWt1t#hFOVHd4z{ucjm<)|Q zuOD?yP2p}Ac<^MIY8ZpZ=Bb#NU#6z)vm|=-5<0!+TAc$2`6oTY5 zdlaaB@V~3IkD3KzImAZenK^u2*GhC_*37I&sX_YcE=nU4-q*9tkjg){vp7djWfPi<*^7*rqT zV$QlExZLGL!Xy*K10!a5^lWAyDr~XnFcd$l+g8^P|{!+rAwnB$wWd#e&JuOsD?N`VJ zP)h+UOd*XOJ%LVvYu44`iLt7;SSyeBQLB|@e2?B@sReho_Q<5#`+7u`P&APQM^Y6p z05e$ANrzXP+cp}1kfnc}5hwh3?U>`*{+h6Ocr9#M{X-LQ%8Q}==Kb_20-lI!t|a{R z&5rFl2=cW*UX}o{G2(Oy&+(B|sUSD<*$5LP@~MKnz&;aBJjVC5{f-rH%`}3z*^W;q zQ15F+Adrzq)U}~R*`cSBW_9G%n`3H8ipkaCc+4AO{Ut^TBiXZKqlNJ*9V zZ}zUUHc6z1@?ce-rjQEbnp8;4Rh845J#vt6J*8xt&SX8b0A~+IUz^0V<2(cSbs(2h=47?|WRMRWq|BEIKvMw}>=6WP81*_BfWok2Akn6@#=@O& zP0yF3#)R#)PD#%ovcFG+MkWf`+1c5i|J4W$PHeZSp}}T2PwNQOfXmTS0tI=FGTkUi z*k@E}Z%=ev^`!23ZSAvk=eCeIM-7Bi-7Wme;c*DUXSi2{McIC$s<_MmQ8Ep<10Ztl zJp~zaA$=K;6W&y9{zWX1iz^Nu$?@*QYhaF%3t~7@liAdMV#OyUR7E|l#qoeiiid^e z2W~zWXwNn4<70{|yC4#6SqWdAZyZQ~X&Jo|xx`!zSqiPFq51G~?UMO1!hvv*cBw;M ziytV<*x?%Cx88Wxgqz?TQ4>uswfe?(M-t>|Rhbd;+ff3L7NPCFAvlO%d)3>h(eaGa zrPQ#KE``sW3WIz$@LO3HECeJiJ`<30m5&}Va4|Ji#1HvSU(gUDhh$nOaJngXKQptQsf0J}C+=M~fp;ilrb-25 zHrW_b2_S6;x~5c9va#M{@5YBd`G<4hK4A!EGu?zTgjqvOy-bg6qIjIc*d)s^ntmJ# zMTYT*5J%Gi#3 z;ZtNZ+$a7bdM*~0=s1&CbadL%oUpL4C;Ky8^Ua zpi&+Y5z!bVMQ6PDW4qL%Sz#0mJq3PtYkj=M>cu@X`XJ&yC^*>kruAfmhHpao^52W? zn#8!cICG9vFiL$p%ClYYj*A${b@=^slI31z=&1CT1s@G(YjgoiO)4=<%z z6q{tyY0>&Kb+(s3qQ6R?O@mQ|ahw9VFw#GG+LLE<8e$%+pmaXl=KzxHox<$pt*uZH zES{%R@9^fG5X6I;-DO`BB0f`Ty0W<$0`>qS5IL#BlynW^v;A_8GVt>-Ec@rsD~cf(P`g4R>Q1+>#D+%$M$^aDTv zaszYe$7j0Wk`Xaw{R~$r93wS_U?$7-V}be23W$zQ5prd%T9}z3S^y$)U^%jKP^upp z6ZHbL2s`HM9f660I;0I4C@dcx#exFN7~H>^I`a=7v!F6y)1X-uqUZIADd{^nq${iv zeR%eC0dr0jP=0`O10`+Ot?5fQySVgSde2?(kV^iB&X~Hdlc`zt(xx_9MIa*94{+KT z*#KuRIEb+4B^oROS{n$ZPW1*SpE~(zLUn$)Ft_eN10J&h5*?YP87 zoUj7|Q~qB=T=diBxR2%GeyHjPhym7FZNVDfP&Wyj~;y+H7uW8p)MHygwgTMnXgD3df}fVU*EVM@zJ?)s)9AK2^k^I2*W^BmkrH`Ws3z)yqS zdYYYCyBZ1XH(ax}yS{XKEUO!w@Sm&J;!bFbJ%Y}))44-JEz2jAr9#l4suu|&&XDGB zjg+FH!q%bh^YuHr>>k?#L)XN`E;mB<=gcN2!GqYJtHlJ1vJ6O;(`#~j!N=5#O5-Rv?s zbgDO(^7k$RH#TC6Wm6z=su*Jr)r@(lAD7~RWuM1ri zls7>D_Ax*Nj5Y^SGtlZmD)==pKLY>wzt5L99HD#|Lo1Lg0N6SRjz<9YN>&p^xgg8S zZ1}BQLH=7kB*Q+YKwIF{Nf{^iZ0QKoo;-rt-FkbeMxD-Zz~g7x{? zS`PS5f{%>hvt6VFT3BFmvpt$8>$21V6Puy-JoQQYzs3;*$gAy`(K* zO>ZWB5GKW&oxdG6CLippTas08*=;&2)fiG$^LNgH?8W zu~`TKgS5}&ke0HCI^w$>Fi!%d6}a>qK)_7l`HcUZ_0Fi=q#h!ttZe=x39L_1wM?&G z!PnX@@y7RBaT7~ROG9$PzgSrnjdB1}B1~Y*at{pQ+%ETtM7?i=KrxgD64rq+Vqmmu zxU3IisL3f99fAya9)I8{vN}6eb4{V9gQf2>GW)J*$7lEbg+(z5=^pJj-qOXwz{nn+ z0fVas8*27U1L8 zJ(TIdpj#>LGp%Q)SnunkG9*lZg1N!ZBsYJvRX4(SVj@^{l`Pv1JC^`0OM(VcO67i( zsyp7>I<-?N7fB8I`#r-?Do&wvk^|T1xXw;hhi_>pQP#`a8 zyRs&Rf|T?c(SS@ zz2lb7IxG)zL|2OHqQ(9`f)s8e7)j+2P4(E&hp@Vr{?GT3fEarqs&V{Cse{(&C#CWD z2uCdypQ;$bBMWIg7Kyz_09%H<>C70vFpB6jwDbCylspE75#N2xTE&T1dxA)y;wBpuF0aG?8 z?E$q+EnRv(WuUTGiF4}(z-;RXU0!(B9Mlw%W8Psmi6MO(((B3B_7IZ^8gDt8F2 z-Kf%q{%)q z=i0|u0`Js#O;k%)mPru-V$#Q@UpI$`WJuM^(5JFn(j27u38m5{vs7A#;a|eQemS#X zxaEya$UwtV`u!vzI%!P2dzTM&&q!KN41M^TKW=k$kY4LT5$Q&byH?6oNjN#>JAC!F zwF;f)O_aE_G#t~%-x>es{(r~y;>Wx1Yhn%QBX!2nOcIqVg_8CLN1EGjABz_6X0~EQ zfL5x}+$E%VSAbe61F7c*)o|K8P;wuD-Tb~(Xr5P}3hSs1`b|;Y>&)Meaq2XZ#(c#! z$Eq83cr^v^6?@TjQjEhI)}XzClkqyQL!5vsL@m!$;r%X<>BDZ>1y_s?TVQwE_f|*H z6T27lQ(=v9;DM3xS#gGBOZH%SYK=&IQJtm;6p-uQgc~dPV@U_2TMmRD3ea?V1ngQ+ z!rEI|MW|mSHt_U;Eh2Z+qU3V9oPO)~%+~`&>Rbx`&%RueoQkD?z7cUck(aCea=wZV zeW0{qp%d&mG!D<)sq3J{-Q&}S$(7JwUr#)0G$j{CVV++5ia*IlFTG3qu9QnK-aeGU ziy2-<_53EjR4AK9GVk4EN}`Fk{=4MQVG$lS%q|Z1rpO(=fZfhx5UEp@P0t|C4MW}Z1?W(Ddr83+H_ z*cbXk`pYYUSD1|h^23ancM?8pL3M78u^ameSE@uB827ig^YC#C^Yi;HbU+8Qj-HXR zy_HJVX^tF*y)bLvrKJ#xr}cT=S^BHz=l&<{pIC}$HtI@@nyrPOsP68zulZ+FD6up} zW!1F!U3uV$j>byW(of1Mq$`r+(TCu|S-rlJj^me45H;!GBRFUY;XIYUDnrRP4H^wk zYW}BDc`fi&fs;ReTH#%L3ttVh2MgA2df8`Qg@JHG@zTm6viQspp$ZC=0RFJ7(>6{|V-xBXBA@hM zkB7#%XNeas(yWN`>S8K$)*C3-mHa`gY`>J&wnE1dm{H;sqgSRwrmdNjf79OLNt1Iv z$R{M6YVzLTS7Evz?U%Uz(mm;aR&ztWojzZ;-UCxi7E5}EkMC%&3n=64xSHx;R*LV=zT?Y8Vf+ohK6a9RKOiEt_uBVsR*noFowW^>p z+e(Ua^RRzvQn{I4Z#d$@5D&#@s=+5>W-QE4@Td$SF%W`Q^L+RrncEhj9$9gJ6r8G8 zmcL!z#QI%-f}Q#fgOKP`|FZ)@x-ifyLq6~yw&Rjk*{xq|B%%s^QcVY zT%~^iNe`M;PCObc+Ht0HRX^rKw=u7cCt*nEO&>g}1mG{vtjnCgX-?VL3^&my$SN&E z6X-LJ0P?`^u#N=sU4g6iCu0DNyE~cQkmx8ETwJ`J&*_lQE&NWoTJaBvk`JdzlIqc-F;r9el`iEPZ&_$ z;H9N`WssnoWdg&X*TjFOm0RA7yI~SlHPXT0f@7}B5P3G!u&FBt!ptxv4x615}eCl)lTLpW!?e-<(om`KfrmMtg-)( zS8B@iWVQGu>%aWOofkC>co{87?8YFgU;@ zZ~S=wOKlZ&90}PC0CH6pE8w`fJ~pwTUk*)RnIV8)8y1_7YI7!%;mFBySS4HzkPA7Y z?my88O>Vd(V<{JG1jQ*Rmb^-hn(a~go{7t_{g<)gi;rP3VkY|7;lW7+PJM|A!$8vI z*CWRbPft&-><{~zj$;!tON^RKp|t+1ei!(Zrz%wmh+2W1`TeUp7L$>jpR|8_-znAk zOvJ$JaJ@*6`ZRtMu9e_vPC~a`GiRe1NJvp~A&0G8@CN~2Ra6(V!U+^O&KC+bNuQ@q zP<5JI1%y~xWW}T4Y67ZBJWIM*fB#m<08M|H{RVP!VMvbIV4B;-?#pKnXRr{g0ys4_ z85l0M)!57(FZn*@0??XJ)Qi96@pezAVfG&O^1a%=fL}i4BEn~3HEYe!cGcW*hGvvK zR<>*ukx{c+NuS4eQAx$@34!nT5O(Kx5d@J>?Mi)#WVGYz1XGrgh_kabum1MZCNO-5 zCa~9;-p=bQakf#{ML-Q_2sN39z3kxZ|@Y0BIZHKdlTIOaI9nC zgGs~8%skHH{YRSx^jD*RtG^Mz>n+9$(9MQ2!W}mLZmgrT{qknXce4d-?HN=l^qe{go`;w6bp(>xzZ$>zw$fY#QU07)p2~yZHiQj6! z$^89Q^7nas(#K|W5oEUX5uDkEM;`RRNnuR{i1bp&cCljFlA zT}<4o)#>)a3|#9bH3`F|CnsaX69$RAp!OwX;$d6kT1E>`?R5A?BALLdNV zZSRRDIazKGR9I^t9~-k8u(x_x_z6IHm3C z(UyTzHS<;)uB)oLcw;#>gpOOPg@W@tN!LEQtNe@NcL|(NmZdr=UlZ;V@BDLeUKh#{ zs=Ekh&s12!bEvFOW^gSfC=d`3fYuQRw(07fs|jO;sQp$8zOiNubTvc=yz-u+px{B?8`SEqwQiR#Z5cf@~0K4cv3Fkl=C{BEnZF=lUAuXpL$T zhN60%Q}ZYxQZA zJb=ChzL=6Cd_ETEy#EDg#K%yF$WH99q6^(VtsW!V~b27`T$|L%nu^;^2s0a~@I zOj_WOFoQd~opJ?wpkT!HJk!$fcIlF+>5*h4!-A;8GNiFk&T&<8<$^%>7hHj)I5L)# z8%39qIe&O2rHey|9Bq*1zQWU`6}lSnOKv9gf5b0Ag1|pw9+Mr;e>R$!-U#R2ndz=M z6BfA>@ps}+vEcGgPLR(G zKTHk^2xK_>jZp`t@s6NiWjCF%q^`S2JFYlo4%_~R0eVO^pH83rkLRv|HBE}&qbn3+ zN69CY%AYP<@xpn3I7B+_Ayx7F%#o6k(tuF9TDnICaS=i47ppAUMLr+--ovP4LL zdG8x=7)xf;k2yX*R`{j{_QX1u17cur8$vA;k@IZxUv41bPjww09uBdf`N?8`RueCS zV`N-f8~(H4^mCN2Qd=@9hDlS>G$FYJPOWCT%pz%6JTz_T(|1&8Ma|P@z3HMq!+hl$ zALXKK1=&NZ{I{=BS0a-)CtR_c7#h|Prn_SaUO-*fD5eI_FnIjk5n^-#=K(()zZ z`|+v_t=V9zk+ZHWtc)rEk9t=O_BcIM@q$74=|CD6&$z>SEw5Lr?-sdekC!w@nGq=r zGuAd;f?zx)mP$_0dX?WD&aPNw5ViQbvP`*}w>q>88`>y|elT;eU5|QIUu=+^f_lP! z*@wkVuJcMp0Xl=ow#7v@^j&ejF=ySJQ;>Gv=D#Ttob$PhURnf%+Z5vHH?BIkbAKAv zEUT-9Q%R+YO?TOUkZBLOO70q3ERR^^j#@eW&~44MrkVPJ$+?G!_KoeWyu#`}-^ZnS zj=oDUzH}Pwpar)k`d0ExL28fhhL*#wb&#CR7A<+h%@Qmx`@hP4WBh=?$jAuNsD~UL z7Q4En_vhgePkUtGYv*Gv|dqp2AKe)sBZ&>0nefDr33hz zVI3uJn1kZ`i;;d`IQXN`H;T9JJjKI>V#QiZG&I>TIU?nsCs`45(g{=%)?Ui^2ZY;% zm`X@c(jG{5sj_29wT!ytp18z;0ojL4O^+>2P0t^`?Fy7waS4x&Jb$T9nddm6bj~&x zCOGj-HC(Es{?l4w(2vCv_zQ@i&qUd`#;_yiPfdy@8eEDzr>wZE9mTJ7^E9MNH;0TN z>`K_3E$IWu+Y3F8a4sSR7#4^K-JJUln;{J@!-uFlinsN7_DjFYq|hRg=Og!FEUzvB z0_d_zA-@NqvAa3lIJ?C`VXl-~8w^B@-IrMD2*$SOgYg@}joeS_2knb-LT(Ezd{&*5 zsc(i{g3h}^V3+@{IclVNa)t1tpn#f%<>jwmj15<6z}Q-xDM=DABdxSifTew0R#wEh z6H$CTPOeIyB5@mf^bZqXjX8ybstc}q{7&x~_z#W|B{e&>AF3r%@-GcYWE17#L8~hx zBV&0-i;y}O35Lps6rXdt@_LqI~9&(TCJ?vF8(f1foeXy~I0qW?0-V?^US1ZU_G ze8nPxB>G8ucpy|;HQS39k1nr;Dj(~&5HsbYd@xfEep4E(ooB{4&FwLf7XSEo{3s(y?h24wjyX1A=TYi&y@F3weodgAjK@1@vv6C@GL&i* zTN+nrcg_qEW+LZ~TjT~0*_Dinqs~<^2{9=Mki#Wrby@wh>f3NYl5yfEDow=k16{q! zx8f3%Si8GBmlaXIlDSH6U*WA94 zM^De8SQIR|P!T*`$qE@;H&OeoLzJ7FE8y!(>TjY^tbq>HAAsYO$f%1TnHv^Yg$!S9 z65fTN3iARCN-r5Zx_#RX?5IIn8`I6j(h%a4dU54_R?>FaCri{d$L5 zU_4@Vfyoj#=OChFMdQA=g~Dl9&aw?C2&Y^%Neg|uC?0+ucb6=VP2amCDorY_mC5&n z`rn!Df-al}`v&}oCrs2;;Et=pKFyps6$b}4@gIbjuQj@W@pJT0BZ%MWsrsSDl&dn5 zz>*{npZ^}}j{Uaz{V&j7H(Q)>@V*rCJ83YXyPo!T z5my>v5WW4L`$POmhHdJf)r*J#6}cUw@#!%lyLALS83{Ss0$Px~Q;NNZcBa%@CLj=Z z{feG025Qn2@zS_f1xf9fc3fPlj;~99^4zdr_%HRHHN6R@mud}1R!Q<~s+@@&6($tK zd7Q*Uw2SjTm%M_T34LO1)a|!N>sQ8?ag_=p<4dXhstf3aVO<0UqI=PQyS}~IQE$_* z#wFCe3R9m=oS#lCiN)Dw4%l~nu}~AX-hC{CjCNVES_v;X>+o}_m)C5&bq}4Qp+WMY zCPVUySNCMa8x44_VBz4z8*f1se>&sOM@69l^Ay1G0e^2cO6L0M023hA(D%PU8yX>0 z99PojP$rB!T_wSM>Z_dNojUqfRR!}xiVAEGCkrB>;2GSPFJIiw8O$e5hR~1y{@M@b z`H>N&dP5|z^6&?7;E#%jIm({iGtHQe7tT(9D28gkL0H*oXQm zsT4yR)pw1Ow`fTaUipiQ_p%(n4nOXLwU}t&(MDC#2c_T2uKc?Z6hW}`0K}1KOhM0@ zTCtCN=F4^x-Fm9qPjrz9YnaTJsdR6+5)fEnIc<=0Yw&=nKIq>34gZw!+0M~XN?V&4 z9JE#P<<-Nwm4K0qI8t1K+&8qSbXBdKcNlay5zDmr60|r~QD!hm4BJts4!ZTk?dg~`gtzrgSF7eXqqklQwZbrw=k8i zS>>H1A8Tq%6;f!W#=BXDW<`*44)TG3{-Qp z?p{3E^bUc2+OW9^_<#1UC4Q*|d}pv;Y<~4ArdVVAB-x6mH0}Gx*D730OlgEwI2-Ii zTCLF<|6K5s$vXxU;Q(ub;bKSaG|U1JQhr8rl0KK zXvYwr8mD?JoGNUMIJf&-g@M#3zIW{`SpJ*;?;K{>_D+stzzZ$Ws(R*T2dOdrsmlUK zA|PN21OYST&7N07?XL&CzuLb7oxx$C%J@n%azM?(gZ?FWeqZ#r9yIwZ!JAz8U&Egb zE)(u=f1_{S48ap^K-A<-Cxc6*kK+O@MvE|jfY2``W!qZJN6GjxfM|QP7!dlFwl4D5 znbeqmSHEVNaOl6=3c<5Pd4oahukk0%_-riWefe4umdsZ z`2QCz)9T~>%<$@sA*%{A0O3Rbe(RE;60mjwQYByqw!_G>&y>5gdy>PGSX=JNF3k3U zCKoj)r-i!3mjOP~js&QJo?{zJv(+FD?Yo3HX2mxEu=C<`)2zv+p+5ogxh9Vb809Qs zX`swFyLC8)1M4R^S5V7Du7k^d7nF(CK6p$5@jH0&w9;!YX1aF*GFsbIKlJ3Om3cBO`0pSQ9;`4+0vOmX0zdR@?9}roQh3&zmU1xWJDozT$n- zF24cwtfCzw^9KXUiT%mX?T?H{tYCx+QBM7mbTlqi2t5WxSt{=AR)L~6B!__SRW=!N zFvEDVjhOsF&4>wOHn2l}ijebjl-Ms(l@>t;Y@g4P1*Q#a$N72E%fimk0IAYL2(8?pYV z^ibxgpUA9>Q_Nci9X0JnVH(qNG|e(0rYyD|#-v#2g(k?_T|JUX%eG3re@_j#OqYj( z5k>07TpNqczL9#~E$g&6MVb|y;M*t!1m6^fv58$!5HSvW0z$HrXdha5q6XTG2t3A!6XQals(eZn_NnA15^}Nb20!L@ zaSIP3p_o1v1yF?C2Yu0PHTqksU4=)&S8Cc4OT?GxND7b=Qb9Xn)BZ%`nuo_NYn>)H z{JB~(U=)Q}ct2+9yX@3wZ#~yQ0XRoSHL;`*A^^(@?hKXu%KA=rWW^f^T`2hH&ndZg z@4Dl1*z^3~0CmN1%K^W~IhI~iR8V6hVuSMy3XQCiUtwq=^ht=0j<`1lab>ggxziL~ zcdZGQVT0Rh_v_1asy7?x8?SdTq&Yu)cyDJg<>Th&vAo;;db+^}m0Ws0RGdM2P>_ei zYRuxF?R?wMEMT_MLgtCx^u)f{6T(J5t0TlJ$Zqr`p#^mg(u#6T9XU5L$>>Xgy!3iAKSW$Fn!t z(UBB+eB1q+Q~)dL-4^TWc&IYNp)Tp25;?b^thN=C5(Nlq{@C#>WH1uzkxXWlnq!+Y zJekd>dwObuOP!XdjTdggA%}lK!fRa~W{}LRh4Y+W1IFY{O(Colr@DX?XF9K?e`h#hC1r3B3ehfu%lThFKbBE>^?zK>SX;QOCFJ!2 zXY^KB2%vQU$C~Apyu0g-sgq|t$(u)Ra>(+UE)0<6v;*!daFA53Q4k2z(!&a`Itg02 z%p`H6c)&PXlp%^607o4xvIp{53NZt@XHsu||JDl#y)s#MylE>9BO|t+-Xd^y^9MP! z(djGO7fD&wpiH~J9*G<}7S{k9Pa zj{G`y%~|c2T8gj{%x|5#03*Hb(Q0+BCXW*of=C@T5}^<%#YXVlFX+QWtV(C6P;Cxa{Oehnv7W8Hf*ayDJ<$kp_2 zdv0?LG4_OYmrO@Tx7_{$0q)v11eiHN$6iq>CQGI<%M9ERf;B7tm*d{7;y}vr>(#aG zw>3a-ENiF?8vcNPK&5X+P3Ntn@rIRLm{}qm=heXB+3Bey=u)yf@2a`Ey8{SY03j@q zxloR-EXzTdkob?ga)JBTW72w^EaMe{F_WDwleIpLK8Klv9junk61mM7U4JA|2P~Ng zyzcoK-&L<^MV)#l%vOa|bV!)TUD}7mg$1(Kry97|Pm6vNulc?^{i+0MR3Fb6f5Fz2 z^v30dsx+t(-CrHO$UtrOr;*4-YQ2Yms+!-2Gv$LQPxN6>N15)MEu{r)F9x_;TVbhQ zwnpGxylPi|a{H-Ofh6>5^hl-N;pP4!QPkc?3aa~4*z=tW9!Odc4jSn(83u(gh#K%P zYCS#j#qweZaz9w`PF(ATv*5&lu+rhn(!;4I=hiy>DBRHVg*wgheSLz!ZHXeRdQo6>PfgG0ARB<#A zEgDU%CS{z`PF$vcvppUzLbsSLBErZPiJ;kv?do&u*F zY6R~o#3l3{#8l=z9hh9hLUyxxmFk=CRNxN$5Tc6>j<38=09aT!pW!|}(x40Lb$gEO zd37Lu#d}#(WTrPA4mzZ)hL(HTo@I0)3(V2XoZF;f0ky?Sqi@Llp_43Kco z{I{{D!%iChOQG7H%wtL8F7p?>cx|-x+gCB!+`P(_JHFoTlHz)2j>2(6hgR5J_V~+y z{{5#Kifqi`3QTLz=O{VZ!D_`x^-Dby6^`{n0r@7E@9xW%_|SxoX7bpwguR0%OI!NR zuN~ksho&-gV=2ilCj{cIrCc7_3RoRE$ zFZ{R;Fd(CFJ!1`Ulj>Z>V8Gm+t~L}}vrLquW%!hQNuI>4iV`BHR6sxd`-@iD4DGi) zPI=^LUzf^gk#f3MJIE#%Ry^3{mIPeLzjeYduQf`)Uttyg_+ha0mV8#2BWMc?1`4*m3+iR%{?uEI=O*t4MjD$_QPos+pz#PuCAVQ8G!YEYwGLyr==`b$g4UNSSRa2x#Xd`%u zK6$rv!!mMpT?zyG5!}I+Rn3U!?HR#mDc)oaUg1F22Mqgqer9o}>fN?=^L%zlOp6Z) zY{YEqo_Oi_m{w>@@yqg~by_TO}9;y{QO0J0KkfJ_LW z^b9)FC&Fg#==o#!DILXyqHGt5x%3IlFIJG2I~@UmjdvP(&@KKKEY<(V*jWHo*?#SQ z(~5L=wt5@+eiv2$Go@#VcO{%JEP3%Tw&lkshLJRYKO?2lLAW2yen(&!{D~rU z`P#;7P3gI}NB43&Qo)_pcf1Jc z+S0}d(WO8+!=tsQOzC_8lR(Tf^?1c)BwKyZ4<^C(T~oeM3|ed5vg!8yFF4eq*fZb$ zt+rdtHo2m~A_sX_aw=gY)-qAd_qIsDMGiTPk)4q#e6v<>SAQC2$%V#wTwPEa))qyB z+aO1+#H+xxnGo~n`tGoDtp>+i1zGD}7dZ%|m>K&lXcKC0XiEjg(NB0e(*8Pir-rWWkiOqNwA=^MwYhx+PnZf6&%)5C%(t9Ur z-7a0ae_5rz$(Ac5vYGrs2_=By?vccT-cNkNKO=H!Uhae04iBr&vfV$7B~LLl<03k{ zTLTndzCPA9#OL4Fdd8nuKB~RcrH1tGYBto7A8pTtp5A(v^jMwn@cip-csMO9yljU| ziv}|#Y4JezJeciHOCv@3gHbl9A{2vl z9vd`>7BK%Wb*epRmPGqOXBr#YL4=U>&1OHzefXZQ-V_j8{6o3-oydi?;Zxa5&p)3)RFs5FdJ@IBKBA9RUx7GA3{8`xkcokSZl?_IQL&M>h$aP{V)Ft9@sB_l`J+w0~{=gVHfR4G?Hq1G~Hn*m1jZ4q>EF8tn} zGF#oPRGsZ=K#fI5YcTqN1sYs=Ho3QY>b@p_IFDe0J4q8h_VZ?Wo9mhHfz-u4XOb|o z+t}b#=457H0+FY^F@4K^T}7K~&CWUbyXl#%QQL}_8@A|Im87@w2D-8a&H9t3E1s~s zZT-H9aN7A{ph_t?@p&&bmRKRnt)4&W;D8%RB8Ux8AOD%4} z8%S<;L$P>GaWJDa9bFgwI9zXWfYhIF1-H%Bu*UA4>Dqww#+TfoTzU3(zl*@-c9tN^ zxV=<3##il5qs(33LkE)HYbelbb<_4Y+Sp+_5CT5&XPzbX7}YB3{M80l+CMRc(R+UM z#@|H1X<*^o_qM&6`KJJO0RMg1Fk@GR;2!;tG$Gvgw;w;45zT89M_=Vm4BY-0mcd}~ zXo6NdsMJD`G}-8r4Ie_<$cVe^57TG|UXtO*&5dEj!+Oik9*1tvwA55p6ESLY_KJz6 zm`x44!?l5ks-(9{uW(_ICTKUJkgql~RPCysaMvLvl~{>|2#KGR_QJU?urfWId2s0OPZp$03zdtdu*cVr*)DzO7Ez{`Wg-|{2?>NV2b`rSVoUIcs)8$& zhO!OPr;MoC<59FflrVLyFLicxQX60kQhkl*7YyYM><+z~qgANUsEg|}QNk?hz8j%s z6~WGrzIB3v^rWTK*av}L6D9Lb-Pqk=i&yh%srVvTxiW7ApNtv!PN$+ymA8SE9EJp2 z`u?u-quLpXUSZoInH;^@9y1r1@ZjPPpdbYM7(3%tX}co{KJLMY&K5~dfXroqSrxbi zq0^CLBw|W~HyfRlWD|wcMi7`;!R~E;vM##8u1-g_E#?*R`*kJzZ2RZr56@_5OvWpD zfhYrwTXPF3zBI9oA1`K_qP_~M>~?XRo9ETu1Gt%p2Ru- z6dvZbJYW+dXeE+QX~9Py`J$QfEwxY^Au(~}J%MK0sc*^2Rh-Rlsz|ltK2qzIp=Fk| zTlsFZKU%qc2G9*!*aF0TPXdYY_hJ0SOkb$g_<6#W=&f~B#4U`k_dnYDlu6w3=6Cp@ z8+@~5(DZRovQ#CPPT!&(aGiJ`xw zagy+9$q8TzgYbm4cXco?>Z1SpYrtkMa=WRE$bt}`PTU10S+&LE-^f~SI)AEZK|<70 z56gcx_}81=y|{531QS;OScb>rVb1AlKe-iMe~`D(8R>+m%4Y#H?csGE76>IZ^V|&X z*$=jF?f0O6^l-(|EV0E!5(Zw($uF1M;GU1j*@k+;aT(7n4Knif$tIOq<_Y7<*qVGn!k} z2dMl%z(jP#KlrOEH1l6dczB=BpB`8X)A+oexEtR?P;MpB89@zD<2)F)@}GaWb4~p5 z{<+2QGyi8(msqdG(4XyDH@{@^i-SJnRFgp@VYQK+-=XS^o-2MzBh&MS)&G}Et$aun30 zU0jG(`#%PKJKnMMEN#$GCnS;kH?piejNCo#r}r|H-zrOwXop`LzAG0Pb6Q;{{gG0E zw9r>Lcq6W6YC^SVd2c2#d-317Ie?;J*5>M$QI7zxfm}S{PIXm$PVFa_a;Skk##jIhaYC+$;zt^V^COaU^}>`T|oJXxdb=LqN*_pZ2HfNAOm z9$sexr#4uy2UYv%g`E)IM_b=PBZ4$-Nl8iJP~XS7KA*e(@taYoHQ{lI7OT|B>0^1z zWo?{GX9mCsF)%WwCr^LidQ4@fhvjtR&tY=>-fE;|-FZ$w)zN>Y6cKIw|4@qV@coBU z?#u2-6RFp3bdx%H^+EH=q9vS zpBFv#X|Tc%{ckZvI3MEK9|Z>o&k~K4Z0bRt81=>4jh&FRg)XLvkkId zhE!xFCU2u=HiU6Sr>6gpu%c*y6}gCB5sAL|wBY$O%i>t=h6LqXyTrDkEKM;0K&s6= zXv6&YNG?Xt?8#qGbvAsuQ?%7p(+00U2ioc)E~;$c&k9~E|K5~wnr_+I-fjj#=yk5a zHRH5kE7y(_abA=|$G~_1H9f$wptt97vd5UGn*3I;%w!?a)Q`^~+D#^JnGy=qCqlcO zRM#uQuO!g{pU+`3eQTxnEZh5dcK!FtZRUr{a_x6={-q)qw8($pBODA}OU!Z<)dDpb z+Osknk(8>P9lG_|67nWWVf29J-CIIJjq=^#3y!1`l!uw3k#WS#S(?~OMayMkwmP~& z1G(S!-zVMumw*t|ZMk7HT|oIv^%~ctqfs+gMKB&W`~ODy5sIpKaH*i*XwIx#Y!U(O zb`HIMa#-G4=O45|X7o#R^kP{!9Vh>tos}%m>4|1^{g5ep-alTqAXPk7P6WNv*D$jV z4rc!|W*3MY1bnzN>C~&9aUY)*Xk91k4;p;Wk?*HKKZ}2|`jW`^9OrdnqNl_+eeywd zO_GshZuhvk7B(W6dQ|9^qe}N_dH>suygAH987q#jj&DCfedF57&gsaFVpg$Hb|G7S z@T)U54y_2zsI4=rI=zMD;%u6?f`dW|cSpdt0b0)mggo}>R&NMDvvHF` zQyoRRo_}Qs%KHKxE~7dRvfH|MdI|sO%*|eIREI^=eKC+Ag=Vy-W*KGNM2fJPH6O8*KcgsV+Gv!T}aKRk|3dSpp>Ry!ywZ=dM4R$8kQ zkiDe3lT!OermzJhALX1aa~y<(X3bt<+zw3fJ>{-?47#Ox0%tWqT(X!psku;RVP}7^ z#nPEH6ptAEQ;2(Aj5NWqjcTjBPq$oEQ0U)6`WA!NE;1SqqF!WQES;>y-7~JTs@lSp zw`BId(M`u6n86hGg@ofFfxPsCpfGO2a#SV~l;kf$Rk;!T=#~jzvT_GMjQ+|KI6v-M zsh_s0s%LQ^_L$|V=$Sc~HolwqZSDNhuW2qgdWs^MkB{#J%-FFhDZM}SlJthcAbt8N zSe(I(+6rQLja+PR6dG9XEjl9}*w}C&$JbPX#SgEFR}cvtnnVK4=b8l7ec^3b>JuYT z(V#+G*Cgik8bSZ=!W4S*20_2<(b%MeZmm2vNiuvIyw0s8s;Rsk_A{maQTj6`Got@0 zY&*`xqca98EcC$oJZgU$@-&CfJc-u}vLlanuxp=x^D(UTqAqzdktd(nV|ilVPT_Ok zJYxOQ;~*7lhomQ=_tW%dTGvZgll6-m9CWWYqZ;fczs#a^{ZcbHG%ySUo%~!?`EHoumZrh z>^6Xz=>0GFz?}UC4GT|Sr(9z?eZp!R13;$+z@$kH5oXU7`V7*b+GWHSU)R&jfI_UX{b#Rm3jD{ zg!N!dG@^WN;NNXNP(;xkf8%{L;8`p6ku5$xJ^-Qz7~F(6rdyMbOd$$_|P0`F_wr z2$+TdP-P)n;BzUA-H^kU#SnCuyazjUU|=BfHiB{#UN6Ocdob=cguq}u`CGs0{$97g zz8u_CaCGBI#F)Mvr~IEcu3+Bq^45}Q%~UrIWWQxd7Slv~U=%;%#wkzl1G_|)+w+`6 z8zA%mllMDRaph*;Q9tD-uMr~@?56rEZJ3fl`W=k7D|>#FCQPZayl;^N!8l(kE0H6` za90COY%=xPg!`gGVV3+jX!!W}VB!8P`^T_}ScS1IgJ!}w4am{YDa1W^&CLV z?TlBT?eFh%Xy@T5MA7uz8MCd1T$=}=EIK>d4hKI?rpNvw3@MPz3?%0g=r&?MyX|Mb z3clQ#z8Um%FgN^Msch!Wf1Wf3XoKcHOzZ@k!B5h=`+xtohLaZ?9RlwU=AUG@f3K1E zaN8Dmecl^NBDr#=Cyvis8$6_8bR_OsmQ}HRLahNEE!mtQ28Na-b|>Is6`MBGfxt=@ z_-hFLu@|lox6xx&W>g>eeC`W=>z`3urc-4iJ@*pl9JZv(%L^uv(lJ^Qp583?93{4v z5A$rJQygVN7fN|Wc=iWNa36<+hCYy%Mu4s~1Rf@sRIQ3}O3*`2E6mvuh&=U-yB_I~ z%QaG@hYvJ?R)9g(ZL8#J`5aAHBR_zhx#sAttu1j}vL~JZ{zS?uPR`D$LVwDDH}eYmdIa?J_y9UtUR`BkWJD3POVLwgjaAjV;MDr3Gh{~9Q+4lgM3Ky} zD~Hd?Sisbec;yILyOm?*Pw^hOAYldrj~zNXx)}fwbR!?Ote2o820+yE@!s>OUF(ZL zs?h*Ka)0{zdJ&uHI?iW5Q-j^@Y9Wk{=Q>g&6|X*Be<1sO`Vb}4plG##&d8Woopj~j z@?&29bGWGRAOlN_2v91rYHCp&$=nF!#|%&en_r)9p8eZo*I!#$cvD`EWN38%aAH+c zYjNIi2HhD5%}9Zch=Kf0u-Ow*+et>EKZR$&W3!}o?{Gu=0uT}FlhwBY&+ra_2#fGE zR7~!=2Nh&4yVSRF73?o{p}m&Ic%P7fjzB%N2b6~2`LaXeS$xyjZN>;SU2^E9zk~1R9Ari zW|3(Rm-!VfI>=xTxBEU^>T$h4U7bkIh7e0CDr^WC@jkNGJjc6r3k{+3Sw~qz{uiCs z@lGV*{ZJ5qe~68Vc@5*RY~a6;P0>sGP=G%N3AzD@S^WyoVNU)Ol?{GUCe|s^g{s@@ zJ?9Nl9Tn^+9szUKfa8GHH4<~dXZM@CQqo6N;!=L!KI;$Xn2ZHcPl8e!Q3fXoESB%^ zudj3|mT>bgwEiB@5f8XJI^G4aMbt`PqQ!XmQzXC(k)t@IC8uF#`AcM(Lwva*RrOU| znFfasgcP!6T;O0mH+DBw@HAFSjxY@Ka_-o#PWCIwRHIrLv};@UKy6Hh1N9bmG1%Eyaxg_s>ytS(f3&=EN7fNky z6LF((B;nt@ozSPn9B}_mv#)4UIgXaPGcX)lz+!DARa}Cr$ zW2pt!D0m&XO1WA%RQGPcHIhl@x1tfT438Oj>L8t*-oM(1fsUS}?9&JF!U*W6xt%#T zs=(Cb9owYXrWgV~Us_G8P1Mwy>QnW9$1*4RcMzE6X-lwJ{zy ziHpUa0dr~}P>Wf>H<|vrzHS3|L$=I2Yyc-!)v4px4|RXiKrnQ%L?=Ry;H!7^sm>Fv zQU(SFu*)NN+F!qZ)ya9kSLwLpefpQRmIlEEw5`kY<0V*5p+SM=IM;;RXi}%X%KE>w z0MTrZQp|u=Xw($Qd7;Gi$S`j&b}tRjOk-)^;~TqFuhoDSO)A}N4{I@pm4vN_3~W<}W8uamKD!2(D2 zSi|_+mjlI=UaF;~B|eKz9E5a+RysL3LG7W0WJ6hu z$3?n#ki{6hRtiB|lD7c?psWH65EcT7;eo2=`9F&1+gM?oRK)BdKLX$q90$_(xEa*j zKQ&KrdXz}33hE{WKh8Xc;mI;SKuQB!`_L4 z@Voq{_vzE8`I6dM87Q9r&S3E=K=q77RTXXLL0t>KC;y3DT^y)y%{6Cu_p`9DAP|1n zm#^&SnrIP&YiPMda6(+ZVNRz=ofi8}A??X9B^n&SoJ}IyT#{STIprYVRM>Nauntt??#x(Y< z{%=}W{>NOTT3T8^EOWgYO$b){y1NlAx<s^X(ay^{tqY6YlJ(13tKjz z86L@bVEjDmgZEL-RdFE#CTV%-RBm2yLs^;gPudq~Doa`+`(UIlG zWIf*E%-ds5#%a5;ss*vRp{*ahzFo}d=$~|5He4jU4WMuQjU zpt#U@JbU|(Y?j84m?0&$4)mt5C{HSA-n z5dREc94Fa-ny$qK_yIB%Av%y{5k`>q0lu{i@6+GN$_F}A*RGb$s<c z(iqRv*JI3a%2{>%EbQ^m)--P9S*TO>e5w;*kuTRXzn* zCHMF~EsoL%KE)KnWD>IB@4~PK>_iO3!W%=03VeCIGZz}t&3t0L_Uz+1jc_^j0%35) z^FGt;Bd|HDr3zDqZj0Ryl~Ao^`Fo-%mlo0@C1x*Gi}3bSPmO4Ax)j@<78jqxPr*2J`t$C)6%*7&{>ayccgn+cC{FlMu1;HVB+s- zA!p5i!H4+1{?YfbSp?NQp$oTs4%f2^{C2{A5jgbs^pK76ObwcBowDv7j5YW+Y)uBS zl58wSDds%IMUzjd^E-dF-PIF9EL?S*S>Q*~CGv8QLQOuseb)a7UQqE5?(e4FdwaZi z`Xg-de%c%)$4g#Yf)TV*G}p#8;xkc#bP{SUaB&DJJ*!?vZT(vX6d+)s(6a|pNhY*y zth~HLc1AE?Utcu_-em4v_~Wma_C*4lRf>Yd(JT20WpWP*IG(MRcd$7TW>~Ula(;4x{A|-PZ>p-jRFk~(j-b%-X0oN9(6Rh%;0r+f_#`EX{FD>ST^%zxz2A)t`s z^IV~J7oj^7YSCC=*q>vD9m!7L{p9ro(pSq@mjJi&6K-Y33*VupbEeSLTp0iQX!=+N zb7i?KG0`=3av`{tkk*~*`oh7Ni$!~JN1`Qj7LCwd)hK1Caq4)l!+MEUcUPnRV5>*l zN5nQ;yZ;jT<&}a^VedZ?zSL$(GHJKQSBm=nR_|WibneI=uJj$2qeyP|=kq_}8ey6#W#{Gx$AZVjab3~|*B>v)Vuy+J<*X^)WrZVEBN<0RcE{|RBz-;m?R*k}Swuvq zdMUq9#s`@YTa{}noq6?#-Zxtc(1`*X*Sd6!)>l~Sx$A%P%NHd7(wZ2^I26_#X;M-N zW|ENAkqTR~q|ho?vVG4S<5p@P`<;L}g0DsMh9JXqZ?~HzHZf5JER-4?_VE>QH{MUn zrtVAAsYU6uxwD7xl?;W1w?dAx)eDyRu`+$Wu9YwRZ5K9Fu|q6>GxrWjj_3Il_)ViX z+eu?ZD0J&JCy?NkFvnUtiDJ*H?q<@+> z7LIpAIq+ut|3zB~-yK_5At>Lcg+pWN5C$xL8qLT>r@9o5&hvzHhL{n&( zci}}WOso_eJ27dRniUZ$yusXcM5vC1wfnGYdfhc{r|0$pJRf*j0X$sV}?c25S_Rr13GoQ8uwv=T<@!%<2x_Sq_HvU z4b}2M>kpYf<%esmrzYMtRo-!j=u$pnSYkrE93gGhw_F;yaxE0n1YD?nd?QZVM-~{; zktGLgmLB`9N4*OqVS+m1vI8d3QWHV?T}89~^&&mK9AkJl3%{AGixLUlj}N2%czTZE zbotM#ahNIDQuTJtI0M7%AS+(Pbn4g|l`|<#A-e^3ebBbM(m_$5sJZq;SReV-f_ID? z9?r732cc#Knv6I5th-zoIV>^$rKeC`eX)8{^?as3>t&HHPnVR~{iO`rk3;##h@oHo z4GeVh6~7o>1gL7{R1b7BDPI5>)vx1XdlHl0f|1F&OIBRPyzKT(S-1Vt_lm9;N-V!i zjBX)pX7k>SF1tLLD!FMKY5e4+Ir+js}+`_4zX)pp(1SBl-P!|5pe&Vk{P zmYC@1PGyuq&W7ztIFw&1?XPFMs}~sgBf~kWwtB2`g@eX7f08K;Js1+67q7%hjJEv1 z@ian1ByObtI_~nqUO=h)iiJtGuYH_BNY>kCFq7}?Ta_Hgw6lUIwQt%0kK_v>E=}vh z-{aGdbxpX{Q=Pw|Hzg*4tA&Rv?M`5NqM5m%B#3 zpKzsIY`4-w=K)p}^p>La<6oz8^K=bcwospm{YU`7*+>{8adMqu)Hj@VY; z7ZVQ+(Ana15=p+xk?1=*s?D;}PdUI5nf8Dd70tp{|EW%*`>E11FlyBS1?`fK0)O_9 z>gqmKaunH>GfSL7g$%2lypfaPwF3hqdbL48RSdO1L%m7uUC9V5KZu0Yl8tF&eG(`Q;@cEqeu@Fb3FE zXlLv>lm8>(%(EQWO`Fc|@T+BgNYRzmpFkB?z|zKhKZBWHav|SfTt<`H&~yKDQGT~a z8)cy;QQ^U_V(c8@LRcMr>Wi6`x8=+iwznM)9=)Z=tFK?ortMt*+gdcQli*bUo(Zel zFhJ9G_t9&mIB`olwMa{^uwtT33yx`~no080H`>gVdyI1xzrMFhp)2ChU`EK!o@UPj z==9`dDH_>dMYdvLqV*kW-IAK<@bI?UIX~nChe24F0>GplF^sYV#KacW>JWs{8BHGz z$Ag7Pq1hJbbS14xLi8Q7RR-KtBm=$RaS@q}j0{b`>rY5;gLwt=#Aa!2y_BEfC$ssx zgph^?@)5xOG+Ly4K3nIEM$D!s2f#4GKhp|NFJIknkk?GmFJgcP4dQ*sC8i5^@h!%i}C@>(L|UD%U}0h zDr*Jl8=g9px!v`lmA$9j6=}$Mm739o)6TkA0WkjVrfh_M1?I ztb?zP<|bC8s0GOsA9Fm8&CG+lV$h=WxdK?K$37F1F%IXvrIt5n>oebk)F8Izwc1LT&nAZM1c!FUTWodqhnu*V()AYNAn*COG%1Jf5d5kMY$kpnApb0(0h0eSch zY&rUFct|)Bcvw4uKM#fzt9y>q6wnE3h1;XtyaT(`7;X=^w1JSZfk327`A5MeM0SQC z%#sK(4@W#`#qRu96{qw}&6ktd-H5qd4oiA5WK8(6sa>(F?+uBp zb9!u#@P7THPAZdz=f!^mC~nxsF5wcLy16;pgn4vR44H)9rdQ{|IuN5=Q&>7^yHk2U zuI!V}3Z!blfmJk(%&-IxTKC=k{Xr0TqNaxD@?M3*)in#?1Mmx&KHv;HVDl6f&?l}z zh3|l&k7&tDCeU~*xg=Nbw6rE_+B{c9tu<7o4xe({t-S;Z>&CCDd*+UWI-HRO$ ztuZ#c?XIL?r7P44i%@;pq=meUYoA>Els{fb5+{c#zN66_hzCi|pfmky!C1!aOiscgopBiPF2#WS~gAB$!|CW}^vK znhAcR8jnr>*k;z)2BgtW{0s8`Y($#LZxUa6H&Fual-AZ(jT{Bs3*adf=oG?51teG( zbc**NV1Q9nG=-W)%KwUHsViC$`sZGU>u-QCo(VlVB%T-skzwQG`T{wSoU@c9;e8+E zI;;I@;eeQC6ceLH-q|oau5g?Yg`SjpikGkLz}+l+*QLqfJA{%%$<_n)bu_- z>q}{Mod*znZtl^D_nAU*IraC$>_}Ee6<=JW-?{WFHEoRg?fzu^{DE)?-@@mM{VsaMYXE1uU_OI8;O*PDh;mGN zDQsdgvY;P7RKZA@3;TprT zmc;uM)!OXOKQC}h!t{_yNGOqdPZ!2&h`se8W<(1FhY(Uw+^9DEscb_p<%feP&@D-k z&3G3l^~olh*_H9^q+Gjf{}$dPDxX@%sxFbmk$VB!Lbo-yNowPo-)d_Bh@ST^^^tbbeP5yCFU9 zM)}6S__s;8@w4ZR&&BL6+iPC2y}R^1h}>;uDzjT~e@hW8n%18cx6>a*0;v~bR$>?r zbalx}jp|Wfym$ePNGvcbBEVQ3lIe8pr#z;~m(*1WRe+1MyH&xYPQDjG2Y zW>#1&(<0I=EkNV%fSrO6f@5IiXMzx}+1XkB1`j^+_r==zcnAhS@*@bSsl%aJ&H#Ul zY>pz3tyrh+&85e0K1mdes?gk=Ln+b4N>jTPseo0YfL5Z0Rib+Q|reXg*@84{zKgB?xHXuwsBXG575mIFv$;A%YUa<2LEjvX753~PUO+%d;ZsE zz1vGxs>Gzru`e@%!9PRNf|e6hko34au##D?7g>yw23@^9_Bv{Ge$pU0?M=J(+QLIp z;`^{H{_BN1Ljt^C4^AQjU%Z6>hMtj4Ivk78ZZlUhWhBc8Wi+ZCdw=B{tR6d0XQWSBmpkkERv-w=JQstg_=LUV5?jNtao zkdjWuHr^Pe9tj28y5n+x2-jeEPq{op$FG#TW!7Gs#Ix+uhUjW*ne`cQBDP5v$ND^J+5Ane@5)fFUj7CWyrzP)t7q2xzL!%R}w zcG{&ock+c@>Sv9j%gkx+iE{r_nBa1IO+Ug_mE4BxWgI#>IuNbkkze)vu*{(Y0MflD zik~y2ypL4NQ{~j9&eoaxm8nQb0&fztnRUl7!VqhD5(#mF^J7SOcz{WnqP;!0VowvxjAn z<1l0q%+2NXSEfXF;WOjUu0#FPF0&2S5XQ3Y{o3c-o+)FM_oPTk4a%Sp+P$JJTI>3C zV4Oev<|zdX6U-is_xeZ$vqEu?Z1cLFH!z&5dnX4S46WJkwDID&xVkv4tvYC)n^Rzd zC@eIZXj@eG;pe&rleOHuz84oAb3FbAe!C@iTrq#63Z5FTeR`qx63_2R)pV9?*4QRv ztWwMoKW-oSUHA0^0>u{(7NMe6z<|4i7bgH5a0}7zsj)99+%N}n4NHO)iry%cKp?%< zfvBLZ4en{0VOmzC0$OC}c+eqxx~%yR+iJt3>e+eR6`nCl9Fr1V0^cue6U0?-JiUXS zHa=*F16i(uI)0n9Ers+!r1Y<#Lsk>X`e|Z_nBGh9Zk4?ooS09(oSqKmV)(O+qrT!_ z3hc0&ZR(|!KAqQ3_Y}4b2hlLqnSL}hE*lZ;M(J7$&l%EIQ5{voE7Q%X_6qo7nWP_o zX!vcx@X0fW`R|^+E#3|%l6BLnvW%!VnfW|YSy6WxJzXNFVIQJ>_ zv%W|Dk^3hRBCSMCLK;jzZa*7)i0cq|+KJPq8~8!lDyH{SjB@Ul>7x6G3Wc;E0hjFW z%S{{?dp`8&x2d%%F}zn6=}P#~hQ=SD$q}zS;m{p|^#nUq2Nl0?$PB$r*LgUm``v<} zCgtD8$F_na)24zWwxsuf6KRViMBd{%^2^jJin-#(Z9*s9!pRrY*w{Ff`;Z*zc7`@dWl|1%tP(Vl9zvW2QqH8F zF8%->pqS+3o1hcI!lwg)JPHH=IIlwi)^;dl^Yf%)wO~<9@=olWYsch4$1m?Egs$RI zj?vAtlh!?UjkykMwP_7e;mtqpMgL40`e|zY)24g()ZuLM*U{1}k=j?B}{2ph-lv)|>R9t+Q_b-J3ru6WX%liuJa>5KzTtWiQBNYHf1A&4{ z`4v<#Kp5cwjtvT|qVVfK@}@-?hZh)syFd04ncm`j0y9iUk)B6Pgc{mNO$nHhK;OMH z&Cl=eVj{1bpprX-A=~}3l3UeHohdvtw6z+d<=c2IxmH4d35^jHeOya)9?RMvfGz}N1 zuTs$>)})!sLfcfi-n&fAOOXq&(XEZd>qmYj2odcYY2mxR{gRuOagQG;DuM(le5jJ zkWKxiZ z@edN2I=@R%&?@Y0*Uq6LX8q0`5G}~#cB7X&2#nvxQ@nuCH^d<9N#CL}n9?D+2g%rj zh0Nx6F(rsO7nYWQ_FxJ%Zz~XyAZKjv_&7KtgB~e7pr@zT{khb`ZZO4tdto_ezA|Wg|iC)aH6r@2$*pz6Flp9^Imh1)vP8UF)aC88n-~}<3&Cf5(R8{>ohs=|^`yog znSF;uF9qY%yYP>xrHMLYG9y%z`7uCPC*$JsCDTR?#Cbynn#|nBUv+qoEg&h77b1ta zpVZ(;T^T=GMmV668I>MQFk@Oa=BmOO`Q6DXe|VT;m;)!Mq|-jRGbzgIXkFuV|H+ShqljJf)?d4^OuRC(}0 zuu&S33DJ2zMlXJ%5dV8F3&{vIJzjY}P#1<~tPOw>fZ?LT91T9541acV^WUk@!SyVD zrC))v(x=Dqr;YPoof2E5v-UKNIYIr`SOe%RZOt}PBe7a;Zujn*HsQbxea~hP0SGlo zOnf?Sk4#Jqj0I?GCGlBc!nhk2%Ba~!Zwt_7g+xbp!HQO2sCpxwU0vZl*W>b++lI(! zK`2t-sBbe>a|hXofc4VF0i+X}@LZu|Vy2LGz}ZsjNh>qHVJv2Oo? zT|be~24}2-?j1wu?{6N(XWs*a8xGX45Yl~QB^A&9JE3&M0?||O z6$Z-vQ$=w6Q1$s6M}kr#BY$Y-`Q+jw2o#jv;q02Az9##(P{&OfhT}u}m({$?q-$`f zZEvD3R&D0$`s+VM-~g2$4y>sC8*jYZz3}Et@DHjPvp?7$?kqBAYnZ%1s`rYExvOl3 zP^qsbL5P42iF^?AddNXa5|Xpl8h+-)LGo>VGMiY-*3 z)aUmwhBAacvWJlh$Vr)0GVvM32}fG4DlacM8Hv9_5fKp?uj@P=O(tHX zTl^7fV`-H_u8_5?uJ8==cv9<~4)!~(Is(t?+S=M-)?(n6$Zgi(4t_Ib2N%i8-1m+o zL2Ihn?I}WKWen4M?f-a4qWc_tKMDyYODGRW$}nVw#vYN)hN@ z9Z%|rs$^+kllwhD-kteV#19jBq`xxJ;?qsbdbB@Xb8H~+I9z93T-;guT{506nIOY@ z=coQjUkAo(d6gh0v>eGR8i&p5){N%Mg~RRZvM2r#^pd|>xd^}AX3^8jA13t(>0EK; zKXpxb?lFp5Lg>wm8H~T%Hk#C@1ntMFszlJWj@Nif&3d#6GoCE9E3TiLSbskPU16xh z$K$rCDX{@xIXSxTQE5+{B#x*|npEF`$n8&q0e)q326$H9*1r^J%6MBqyf9v|O^~e? zXfgbu85s#r#HwO`FrzH(Xd`EE#@95DTQ=}C#->xv15dDZYk%1?2pM!TA|n3 z8{g;&t{Le3?awM(jvtpibKk<53#dwbUSK?Ssz#zEqN#HG@1#7}3mHq$264#W;xxyg z_$(ndk!}Nb>e1HBgZJ8woe$GvpNsDDbllKXLuu(0q)tswk0&@La6YDExGiwY@2b?e zt>rOaziElZnh}q*k)KYt5Nc0o1t4uZe;8<&znW8XGELk_# zt|gdx`I(vUnBmeqpHl;~)X?-_=o#+M{3shN&?MF=(Rx%K-4z(d^rr0Rw9I!#C| zf0*$|0jKHenK)D(A)3rxXo%i~yWzn3Ul;=X6xaI#oeKWt2L4Nv^i|?-*RM`diQ?&& z_LtBFm=F4Ao{$6Z>IeiL{X{fI_$t$#ZB`h8G7AVGsV3xi(`A8& zQMbLkygXkSOTW-NdJQS7NK4*)=g-X(yS5BrI(MAS-;S~%xWzc1J$Xez#&a5a9_8!f zGrzzXto!ku#X>MQRkG;fBezOxV`xIWw7)2dfa-#Cd4bsiwbgD0gIpRfq|A?pM+;yWTEo{xt zLx|KA@}p*u7ijz1zO`6n7^WR2gR@XJx?1zh&*FPDV>!+=q0rNB#%Glod_z4y+Ld|t zmvsN2A=jhGSsSsH`1@=dQ`p0!tc9ZYM=v@8a`Yk{jlG}RceCge>r>9Do}+fP;y)dj z_226_DZ@BF@#+<3VUtk!nNq#imMUMMSBnB3BEOS;v8~BU+Gt{WX9p>olbe$8`>c76 z7FvnzWzEGh+moxEnDj1^jkxjY2aK4uYNdSm_r<1?tcC3QbrN_?3Hv@=yn*)P(#Pkf zX4h|Klpd1z(;xSh9XPH0@n8Q{kL7o^Kn_w?*7w=g3hw?1+_0-Uz^O( zCwcAfeknD!L9hUUM#m%Nf@G|$bKd7rq)1_PpE*_oe*Qrb<;n36Psh{Y<@}_}pT{;Q zy0Y-Ey?bJk>-E?YOutUYD2PliC-?71Dv6wJKL^@97OvMo}L;|T@pi1srqr0`+eO%}#vBdltOz&hk{|94l z8C7M}wrek>yOHiL1*E$rMWh=9RJyynQ%WgG=?3YR29fUWZjjn@J@4~=-x&MX{-KVc zOND#Rd(JD)^SFqIqlSg%RkyyzYwGj&W+4m^sAsT%GZRn3MT6%?M>1Lwi5^vb=d8QU z^@axmHSXT7_=%^dl-)Ex+8$PM1e=m+(4b_I~4@ z4Lc#Ji^7}$Fl?*mPq+X%{rc1Ni(A5|xzbWaU=hRUFmwARrFF8@4D08;8}rXgh!#$lWhmY0MKzTyCKcIDmMNL~S8z3`ezS&TzTz^8&Qg&Aj9;rU1EOd4U~kF6+;@pdobk>@|LmsrPIR! zE+HkK2zhnpEzi>6U>!5kmQCK(JQYTG1)Xp2wE_N;ZP2X9VgKrb@)=E*Oe>jwiX$kz+SX1ai;iveuGoeN~dxnMNcXO#+U zVqTjFOwrpQF0~Q+lg;}Gb!OxJ^u0w-kWZpfaJ|^c9a}pr;tF6~y^hv}fn@oJ;Ps+8 zGI62>zQ>iU^ju&UQg(|fdDfTSD2Qe|R>(J{h%m@ZW>^|gF||{U_kmsMj>>A=nReb^ zCIVj9c&wX0rCHSao#f5$o@NZ77p2V{j4cJlaOzF#I81z6Fz41FHCSt*)}TJw925ZS zp}@F9G8{|qDl5U(1IO7Mqv3$tV}9G)EtEYk4rd%@jq?7o`|D65K5X*fzh(+MU_HY_ zTvxI$3g0tIE69XY&DQ{*gmfdTx^k<}AQRAwd-1sbGW@WGr>=Z9Fu)_jITgIhY7gRT z%9JTB-njYtFR{6yInd#h>B6ok&-!hC9}06J5MW*G{8g36vD)gdabI$thI9a(3gm~f z#iFBE0R|h76Qi#mvR<2O=Sa!b>;j*rm3{2at4!o%(XeJjF+w8RIFj^2cp87VsTQl( zoXE*Jfjw9|D~bb`C|3vE5=KUG*{y2K+=*pz7f#oUU!0D{Lv)u5oCUoe#$VUkekvb- zrQd{xaTqNDqV#kDZ&DDnbZ-s4Jevs-1+|mF=PQb!0Yl}TD5DqQrcDQnQOUHtwv)!T zm)dFCZs2MU82tJ>F^|D5p(k@j!xpvG736>F#hTLd`7B}0_Z`LYMZe965MM9h z3zE*Jzgx54?Ho+1wc8bgJ3Ec1u#Gkcd;bGe0|zxH9#I&d6Ix#^Ssw@vKwSuWM<{;d z(RS1OQd(26ri;#T*IWU2Kv1p~m3~O-&s%MESN(asfNoSaOR_zt6kCydnfNA-WjANbbbW?jI9-b~ne?r0AkymChxvFYcLsYUdIV^Hd5U z?OYpg<>8(nJ)=^ak_f>*eE-1SNTnU;kj$u!0y$mpb@bnn{$YeTnarHB)aD(yq^Vp# zCs1vuYvbmC**J9+x3T&TKk>Wellz5)IUho_Okk7qey-(=!R~k&FSA;uRXT19-@`8rZqb2;4up>D`jPzPpqi>bx z0sG-{6Dk-`V(V%Zy64zl=SCihLN>-P4f5jr&jW*;G81{KLt|>i=^l??KbXfWCK30q zjJgp0bcg^3z8`u7MirI{oh+Z$&S{9Bl;l6sB@_Ytn2-|(K+$zcv(VungJP=^81aGc zdHoc)xk>Af*NtbAowocI$qbFxLcvk;NVH?Wf^LEi9K7$R8C=D%j<kj4i^B*igmB0W3*#5zd=8v?;W~L6mTc#I3ezOT%gz@V+ZS0R$DbtWFoJd1A z%A~DY(G5?ayI5-AW+9a}zyJrJ}7jK-G9 z^a6Z<28zepAk;9W%AE;JA`EF4z5l3Rdn*h4;Nr(reW`Kb5Wmy|%_@f}RbUzcUPb8& za&P#1Y?0c$}!*;z&u^P)R+)TYCjRO6nQq~JbpZ7T%{3NXW z*wEalGi7KypB1{o3jB15$f6Z88wQSMs#|zN2j%JOJEkwDVJ%0yw!kt+PkxKn&Xsu` z5V+E@)ZD;vScCb#Kv5fXJMCci&$n1s8_X=>?(jbpzU%1@{S9|gh!8uQVi@b+kittC z(&+~i$Y%>46@$p*Cf7sTy5?}|9d|ODA(EL5J020rS5|ElP`nB_O2IHemrarnc9L$= zCOQJ^P6gJTsU^(h#C4_HKd>42gW%e}8(-!?V+cd5#_CNyYy02&Lw zRljp6-FZ#d`+#VR3_yn`1!KK06J?D5xf+^WeN(x0yI&Pw6h8Ln|FW(&+5zVW=uk!% z$yd-V21+ebHZ~YH**8FfgXu_sT)Yd@b_3Kj^&`)MY{GfW|0 z_0pq)xBj6Hx&0M#@7U0GcY_uOAcsCYJPLtfcyKcqGSNERrN@a<(|2^2ceh03q!Di% zKfUOF&8T1hCIL6`YuKVG{|Mja5hsld|mJl7!yhIqmGMK>9=;{<~2*}gr}1O0&N zr{b5zc1H@pyYFs(^n^7+RMnla72nO-g7~>)%sVEsCNeUgd2RO<)RAD@^O0h1kej$` zF#<|2P+Axt{2Q6YyXJ+gF5P%6)RuJx_&ye^F_*aOfU9v8TZ+?cn?FsB|H^~8P>~En zwaAkXwg`Ga-=_M=Sh^(D*!A~AA@e}1c741Q3A9I0FZkW{T%|EAlx_fJoPfcwcf?2! zZwr(QYuv^aXmNrb8{vl}Ujp+E9~hJc&8`QNigPq8e0JwrW9K@K29p%PfzfH@>xz#2 z2>Ekt{bQ2abn<1Kd>)jJl_{hYl9eS2qR~ik3XIOxclX^hGrRcb;8vR`U^_iGWAxsoQZ-_85N^1lf~+~b|Oe1-R{|L zzcHFS4-N}&3|Mfr$x+;^-kqv>C4+|Rv9WBihzHG}>4vtUBi@1R4f@)1+-?9XkvGkB z=^Yu!l!oe{VG{t5YzrmS|0(*&`%vWo>tlGwVzbXQK11HvOyWs~MoE32K7u2Kob)K z42(S|C`TUTF=|By__=L_Y`a|hk>h->|5!)U!(>vnGgJz^w1t#By7u-8NAEQLJgso; zR5=WsEC$kz#97)sUGk&tJtIS0R5zqq<}AF#&9vTq2YD+nmT1g3JI!&vRaV?){)r7D zjMrYnNDog6j07Xvej~=d-7^)0jxZ(XOA;nSoIEU zpuL9eD;nb{7L|1l|HyF#^mQ=t2Gfr4Q9OH$U;4 zID0xDO!gJyA6y9eNc6?%j|2Kp%@$2a^zSfZiH#6r!owwen+cb@TAM2#orY?qSU@X0 zoeSfD(uck`*p|IW7wkwOVoG8L5PLev;Yi&H;^S-MK`AbpG0_eR%Z4&Z@V@f-yuk@a zj(y)^irlhPN){4L95a6v8H(%hJ4^Lb~Y%sE63yPY~r4fsUqIi92pytJfEf zqBeyw_Ft~|?9A#s6$v~hhdV#j@;Rp~o=`E5m-`|=y;7-?m#(#$L9iA?sqlAga_O-> z@adx%(pYmjWSfS;UX2Y=A~rt#lm&Ho!5$+Y_`vz=O$Rt8F&7$$Zs8fdNk=+Pm=;U=pvK=T zn|6R8V-6(KB9E6Jz#SA|Ot^dXPly5goq|F)p#EUv(zF=Qai0C!qtq^R6efEJPsE1! z5l`vcLhg%mc81KBSQ$Pra}nbU#vTz661iim2tg)%Z!y3R_LX{o?B@7B%qOa0Yw*!| zDfKzJNvbR&6FjVF zLXj(1BdcfwK-&Bo>5fo+{pT(Hma{}@;y=K7m~Hjs1%13quxe^WUBvRBRoel&hJZT~ z0fZJn%KXvP444w2cnZM$ZisF94M;IDMH3n%aP^rrRra0^9Cut_a#l|L9!|lz{YQqS}xbt-D$^Kep zH8s4phjRt5v)-44oTf;CeNtsP!=4UU3?Sa`^Mips-H7cHZwIQc5Ok7C>jfU5PNev+ zECAF-gFsW%@rcKEIqJ_JsF0S5FO`fiij*Ixs;X+X-B%PUv<02Q%l#=-fF_X!CUTUk zns)gxXH^OzH!KLr2rUA=(bqa4sD*j}a2b?0r`hzI@IX@)YztXCyVqbcs!gH;3f6Xf z<9bV-8o&A*-~;5ep-#ykWGYTy-ym?9%g&~}-0L<_p*>93tU>`j+cxhxDT{p7w2GaC z*hanj&j?$Z<$7JUwsWDz0QndHKpS4tGX3j4?(mb3Pdw?&LBwj4U4$ia3%0Mgw&ia7`{$K5VMy-yXHD7<-=6j2;Qc{IY}s27UR{F*q$`!!AlfPp zvztj*1U11^4rkM5Q`$t#M*js)jtb!;aA{@)H~OH!0-X)!@-rbti>+|bJ0>Bx4-30X zC%@-P)g608!*CTt6y1f_R#3~-H*yK1-(Ee>37*{2??1=3AG4S2!F`$Q*GQwLWDNU> z6hVy=VTj+3@`RTb;X}q4;JG389uA%%r{4fx=d*TosFlAe_%)b^?8|R`A74F@6otgU zdI`x0%VUUiDa~U$jw#JUV4`YEU*B2>?}}GooA(xSJvss`co)ZulUxh$ce1uptAGMq zg9vVu593wK=?n9*%xKUwhdy{PNM2*{bxlnT3N@$O)5I7AHG8q9Zz z2K;JocJ)+LAcYg*w{Gw1Cjok5btlXBH$arcvg49CXw-;kpzB!MQ7|i(WSM zkFYCbet^q8tf|)dZCX6Oznov!65C%IGG`WE+&7d%<{qNxbv}}(;Eg23EiU|yUYm0$ zRJ$b8`&wJt=!FALcx~;QiZ3XjT8%gKF_wIJjt+cBpf4B#gx_lbDF$|Y4Ydjv7Z(f! zY+*^jfPkK7lA)Vu+1lC;E-C~yazV<{fuqBJ2TD1V3vpkyUg7|HEQfIy958E9ZhC)} zCVM%ueNkeOx-;2w=S#UUiwzMA#mM;6z9J@M_nd7(#NyGITW{(>`jUehH%_>a6a!J4 z6WKi*rn)|Xw&=$7%%mvabWRZ$$+6M)|#B`XUhlG!!t%{JIlko!K^16esZBE;~dfp_W$@Ua2A znueC>Op>DXjo^V{<<28?c)PIY_3C%qwoY|T{rt%rURRgwd&Xu_=d6%+DU%qX!eR=6 zDN5bu@^ zor6G<@&Z!cw9gDV$F!L#*9Vx%2tfN=1Dby5Wf2h(VN^307a#8r_6(puawuknK$yw% z%xB7KYw^IY>D4s!4hC}%7{gOio$pQfv1`$+QXZd*5dFefNa~ucXtg7Z>&DMpeoB1{N(|0!b7E`XYbhwZ(vdatFM;ygV=y&xAgSBLznw6s1L$^qO>n zr#xO~sToCalBT)_(1egpQwjVdnmCcrG5bHZsB1Du>?B{P6DbKJ=q7~6EY12%Lmrfj zMPK;G5L9agh{+!A4JoRHNeRPHJFM}K^%IRGYKwr%6C5Q&U=fly)@!>}F-Qln5EdhS z_&yQ>{6CU-(t4gpb)V;|^Hq0}bvMIXUV@YiKw)wK9rxFSTF z*O_ss3M)#6MqWX|nCCkOKtffx^?;Kc^6TgAj6h|XOwI~HSM86K%JcI&h598E--(2Sr4o8L?UoPWz0MvR9FzcW2~5A} z3+Sjl5FmM9Yfj%}prygB?(sqJ66K_LEEMP)hmzj`n+l9usU|*$%b^w&Dg%gtzt%!X zR4NRF!EM!HUeo#c%{ri=TrjVcYIEf)w);Fn`jbb2Y7uId3ZTpe*Yw(>iY6aEy6SkPlnb_+A!on)&1C3Ng`gZ%KeICEnHr>l1iPw{J;r#nyIM@*;1Dir) ze+Q*%`dQi8Wef~5-}NoqG_YXWXYM535;ZTyw6q9O@V*A4jJO-{74}yh{k}>^p9TuV zC{f4DNxinPMXxqZeh18dl>k5mwl72WIRG3&QxlP=+mAqu!xHXk({ct>tQ;?_=WEZ3 zxPT@~5fGm+g)gxn?ynER88SOSW6cf_{yfbp>1uPGSH*!obwHTFAo(XK66L`4nLKBz0n$%U`KH+w@o zVCnY9F=2~)dDTnh-sf=> z7-iQVQo*ZgmbT+2dSQ-$;3&t!*pK;ZG(g&yicJ{*YM;B~U}G8M*YZQKV_b4(1Ohto z5B-G$mU67MQbp|$a5;R%d%1DFaE$hAIU)Zd9(<&yXMWetTUP0w)p9+BmTB|V(BHer zDjsgK0!CZ`=Oa?P*2zsZE5#MRJOR01Z68->JVzp?X5;F~w?A{)9FMc5O40|=x}Q0e zDk;e-TA|srG%EZ#%FE9)wzd!;brG~Vn&SG&Gnt|D9xqUYAb?ULlr>Gto$>J|^i#wC9vMcQYpP>Ef+hdfT25#txcj5}%i{`kO-W~Qe>E(2dLwdd7robN zH_s^WHZwC{yDRK7TH3GE!AwMgUEs0qb$sR}_H;c#F3Je>#9W2`>HW5~_*8{Oy!H`W znoRC{Z63lv3gb7jvak^Or2tB-oSYm8gr~W_@nBXOH=m`O`MnlnByDb|mbjeltWvr9 zB+N1_273xBQlKHfAo-Q>OQak|3g062?^-T=X8zqh^zWT3Ps}SbvE>DAhH{1Bs*2lE znCKHJ+1WrUH|S8A3IF8_lU7nSDMlz6uk#VPU27845OmY5ubW&p-}w6-v~(9k1V9|51I=;c%&eBv#}+sG#X!rVzU6;FDh{8EB$yW4|nKY}91a4M+cS@V;5u zx@EauUCcS*C0ln$?NwMzxXjIh4YHeIiDsS&crKL-hrhP2$veq8v1wUSN<02>AH=E9 zz*x^Wpf_b~MJ5}A&p`iDAA81|nO7{F6}PF1&Y_}su5W9!3DT6&Gx!qa2-0-QOb_`g zzV0Du^=TbkA!~;4#B4|qph7w>&QsWCV}kQIhRV9xvv`|-i=$CHNfmlUOOnOAG%eT% zo$71qat?s&o1M)f{!0z~jg>j<2t2WH;jCZTM3Hr6QJxg}Pg4Ww6s|i0{!J#rirR~p z=aA2+-&JXTRg}<%%;-;>ATNF&&@s94K>ylOQ!Q)2XuvCVTG@usjM1s8$Lc#XmVsQJ zQT^!TkfPo=R6T6a*->hx!3Lp#n&n3~hxmoNdBz1Il9#-5lcGOJsXmPst87GkQ+NFV<=pIjId^F| z5@u<8im&+nY8dhKr-61ms-OSN?uU{xZVTrl6<@h(2a0^IcjY_D2qgl4r1r4tOHk$f-?-QQ{od^4x)7KnD$(*tqlPsBA_eEKcO`Mt zIz=kyp1RDAKhX&AMAHPJmGv1%BuJa{B@kfG8_h7|@oIKlbYmaFwjZ9s;btjfN!_YM z4jc8ptt3zx5bn>@7t`ZU9>yt=jnn4F*sN)jsSI>qBY&%DBE(G^JFvtsEx<=+mRxpl zN#gjWG45qLd_w23c&?o8F_uj9H|~~Y&HDb*>u_Jbm_SM-ENPCs!|Frm-~f>CmSKAJ zQWFI9E6OSTz7h3;g#eJRVAIl}sEXhphZUt~6yK3YGU$(+E0Ul5jU&CQfl=aSn9W8M z9%}??0<90ue-7Efqzq>@I^V{?UYmxE=2wLhIMPvW_G*nQ&0<3tgZ#lKg^H3TJY^iK z@%~BfyD;63!|MFNN#*2Kb?@7Zk9dyPs}*?Lnl2tXxSVIP;zh}%Z1mC zH#s-Q4PEW}kboXqn4rxGu7*Fd8>^p22@mgw>uk)a1fueBY5If!rJmfWhKy)d4c?an z+$!HQSu}zGNP*k;WlzM)&6DNiJQKsE#yt7wq z5W1`OYX?40G?9^eoQ`M5may)BDx2;U=1J18^bWC$RCMwE%#mH`W>}UbEFh!oBL|z7 zg200cLrGHZ#K9q6m+N|fDjE0qlb9Q)WjV1ws8I6f#9n|)iWw$-B8Of4!2V|pK=(tlyUsFPE6V8qN&l*nJp@3#mGb>4asZCeT z$d&bb7AEX)x$w_Z_0IiUO;?98o4o_S%5JQuB#d5=A9(f?ymj@G{3*J~x~Jw$Vn-Rq z5)BdurlC$9&a}6i*+&iGkID07o#DeO2qV?5BhzeLv9tPI&@a~ znxyQwy41iKFHJMx=Bohfpm`0A0ADn6t0$#=Mv?)l>AXh$fG#D?5l62XBHtt^a5exn1#Kg=jg!!4hXU`ES00~o5-#MvTFK1jZw39VW$9F;N zS?a*i4}ARvN|AUcOm&r$%*AsT0#2@Tr$}{1(ZDA-sAhvq z#CFNO_EQyW=JfWE1!*KqIo&cVR2Aq2@BUG;*=P<<;vvvs=`X;Fs-;M0emX8%bN|fA z`>D*Pg7qw(*5ZE6cuO+vDF6w>rA#uTB&ks-6vfayz%+o89o@op&S)pYbX~k8HQb?Z z>&k_r^DIdpv&2k+bmhav zI`jla@vl;J;-Rz=SaiqF^}7)JQ*io&MD`_&yoc^jIi0WnUpCAW;kh?Mg-&tGVOna9ToEY;19?6ar9; z@bR%rqnzs~nqErD^T6zL!0`Rt~kbst2rrHn^SVY z3*GIQzuFY1C!da;9YbhLGJ#f$wNoj*9Q7|th?)G9z%Jh56E(JT7jD^3xUga1SQm-G zOqWsV`_dIGt5lVrL{s6bMuD&piR$v|yV(r!kbgbDJu_{8TwxK2ImuDXV928KWeMBJ`@A5Lot*ZAPdBfVb$#($>+*@k1Jk*zmC*2$3+ z(T?1It#Sb@iRljv4AR1C!{*Iry}{}}{b?&6PoGP#LH8N^PWWH5&gT!ETdJ&^PYsQf za_l$R5u5cb3U+e)X4pBK35Z5gHy&~cDbm*G zdT9Lmk@0?qxhBIK;qG=zGi5Vi)8SRk7;EF^1?62P>a{C13kk%I^B&HC@H+k8=h8F9 z!+@{y`78F(VqzhrKk-XD=RVzBe%8)c7@|92l2Ysr)MK+L7f=&9FeH;rLtdBf{`{uP zqOMEzGjg$yo}LioXysF@2;a~P$C@&fp`Lsh>w(F6IrHf}72LMqlGwg3+JG-(pIaU( zuC{{2omYPMAf#O(W=aYFxWpee=K}S?gCvZ&T(mBenTZ(M-D~xayWW^>jf+k)alBI zpyHaY%#f~pH?`&@<|MFYC$Q$kw`wOKRB`OAJl?vr7u0Vcea+m(6oIugl(VaL%@9E? zB^R`0-o3|Fh-DTb@pY~=47pe5P&FJri4aTrJ@z-!K6vZicv|EY9}&8rO%}6Hn1{lC zPYp{Yjmzan&329@?B5Q^=6$i2bX=$G@}z!NIvU0q`dxm#jGJHM!_X+fXBuL00f8U@ z`GhVah-3XQe)G+!PUpRv8ll?buUS6Ge4&`JH<@y-;HO$ofuPh>Eu|~!_;1|8;c2zX z8y!+#PUp-5i#6EC$H%`dm=Uo0){yrZ`_d7WnPmzX%h$*pr^s0HyG7@)10vL>rM8B_E=K*54Cnz%z3+F zYqRNFLrwGVD;p}-m-2XO(@kiMhV&`bIS*UaC;Tzp3a?s&uxVtz`o4w}R};{0UJhDZ zB%Q2Sfh3A#zQ}ypagol+XTioMZEydBG^bm_Sw6zJF1f>^ZvAvka<=Uw>iYWnc%1?- zA896x(fdoZCxNAQ=Y-v$4|wm@w89+SKT#nGOcv?)aY5LU+?_G`x?7jPi(|p&Pl*)f zjH>qv?;YA09V`4&^5&@Y~nx=M}-Jx^9W3+U~FhN zJLc-b5eDDU`BgIFRYqo~Z|H;qT;`x~u;ZuEPAeXoD&?H?8%a}sk%!1v%_HSK51=KQ zlrq*UsH<38HS<>LkqC0@MeaLfHuO)|d~0Z-VEMR0+5Kyo;dCmsXP^6d5%^k5l>h8e zBDD;1y~7X%c!mBU!PpNBj@Y;)fy(-VLtWFmjlN3siMqQ@-BVgm_`*ZIIejSa=r&HFcyMW!tUnYmjV#->>sZ5E z5(p3##ml#!AGyVD7s&auJZiX@z`pRw6M5$z}ej& zc2$mG%l#^9@$3a==0+!~5MNC1-}4AMeY=VscbI?}O##(Ibu6dz*jB-!ugVOfNZQQ; zt;%u|y~w1O0>ymwsVF5dS+mzy45Nzg3*@ZW%CpM(#tKfkCc5rRrOlJzFA{gVZ}d3 zs;%K2N}pmv%i6Ub#e%53R|5K0?mq2s0}fs&20ZyX0U9l5c$f?tm1Kz+kuY)qB@#X; zK)=U<$AJ^qE7x!;v8)X=ReyXQcQ&OqjmnVL94>I6b578#^2tM;%ijfQE09FA-nI%t$Foi)lf zCzC~_vsOHy2&k8|8;)vr&w7%i|F1gyc9MYZiWQOdB*Y>5MN9J5Xo3p86l`2!KhuHz z2OsaLXZDo7tNE?=n9V+=k*vu2m8KNNGqRe?B13Hx*|A5|G4GP5iq?KH|hUm?_*o;21ldUIiDV+x0nVOlz|M$P>l?pw+Eh@vy z&(}y@oCpkh(Pw`=T$S=Hl$^psf8#ZqKpTu#APJ`&Jn$TrZ8y8s^WMr}c`zuX8u~>B zP(pebXc>p*7^>OV;<2WXd1l2NiE!hihPDDGgx^^yDxO|ABcOyU{^zqukgDPsP36&n z%7*sOXF`jMtz`d}d=UZbr)$~A>|%Ri!_M}3fn{q^?Ne%hY57?l95&M3%kv~NXyR~B z6EBAawhSDM*dO;Cz^r67wtXI|5RviEhY%6T#wC0sy}oAoet5XS=W@u1LB8l(1jZa0 zmj}~wzEb{?kJU}?NxZbM7IoYeypQR zq0x6unLNl-&L0l+d||7irmsI2+>L+ROfnCRc+6{ld^%a^UiNWPe8UQ#%wioyWA#(I zpwsB*PLfB8 z-Qd4}Rn7D#3P#R$P867jPg9#b;K8uh=+^tE&S2CX00$c1Z{l&YT$cq%i(u8`=^tQS*zi8uW15x2UU6U-Jt%J%LVG*0Uy(3sUoPq@mhv6 zWVs=DpR~PWy4ljqGMc}vNV=hsK|ix#04K%8PPtS!LiAVXlNMfbp#SNt5FL|LRkf}- z&+_`mu2I0+J;wDcvD9?+^{PAM`6X0b-F3&rSe0zl*~R}>zx9Q-b{Zo)d)GRA$@>Ze z?}~aW9+Icj3zlTowAfstP;3&C&^FhlP~-03lp^fQCp)7fBb|TEP%EwP%|YLJU2j?Y zYjX1XR0_lTxNPPrQ$>tnP4ZW%ul-)W56%6<{XMnXE1Lh>zP+)HaA!h|_obqksc+*s zJUUoBKD20Xp|-Q)`ChGu4=e4I^xxf&B=D}UX$ z@mnb#g#UuB)_vP-B&|~B78aZ+tnh?4|Fo>;Tg0_}_tn^czq_Rl|q%Fqbj^Mwd=5kpi2i!eYFIUKwim2*~~XS1=gWoq^gL0gR8O-XDy{(DL7u#Ag24~_-B z(d}&fF7K&G#knM+aQM=KE7M7HL*sy@=Z3k`Yu7EWtVeJ`3}auC%ske!kwCY_s$j6G-u6>VWLoJv zZ}z(r&&tm}C@xlyFI}&jNI-6i`totBCN{-@JUEe`;ZKXZ!f<9ClJ3)TtP;ufy8ja< zOq?|1hA5_JOz?%q5BrS*h~F!bdyL5KLPlTTQG<1}UN}<@49I(*YAsHG3${Au19^EN zdl%cbM`j!xi`AVp&{qSZPVj$0;tcINOESjVZXwMsCmk8B$G^|%EIGDHh_sp*(T^6K z#Wb$%75_bWSDXVwG#J6tiVWmBg&`Pye|x{oHtj)t?xhJB`)XqDNLg0dILqiX+Rch- z4z<|e38Fk4;9RU1!hx(^vSP+~p`eWL>Gn04dHts(2K4M_3;R|l8boE5=Y*X0mhq9f zEuozzxBWG$**5p@V?F{)lSHk@ZMd$BQ>dV!5)A&zHv0EL;c{qI)N9#Y@pgs84IChR z4!cO;JW}4t{Hf+cs>{zO@FT_VqJF^6s<`g{^}Wvz`!!YGule)Ax$sh43+Nypcqe?y>25=_>KG z;_Ufg8yOs1_pMf1&@mUsDNfp+cr@7b!w-k82%}{9NHI!vTQ!LKf3jDe&_YhVwiLub zS>r#KQO81X)ChxSGEFEZ48&u39u*VAOR8w%WEqCf?X#%Op3j2xjd9HoyCr>M#T90p zU;vl7GW^&@>O-%So@ZH~L*B$5kjfZ+Xk6A|C7>f3>416eT(I@YvpVqpWLPS%9Wy+P zR^e^+;7Rs0SAz<&z*Lbd&ECJCIn(}Bu1!JwfamN7D?Ge|VhVqgmw&03@^`POeLl@C z$6qr=f~|LV)Ruyktf`2!UXGG;AFV7BmlYlvd_iG-u${>zmV!I zXW%Pwe{&K{)5t!-Q#P|;dV0LcS<1?l+bD253!WJq(J5Ab_@IizpR9Q8?v~ltAxqxF zH6|SSO>-&;99xmyzfCSelv3}GFbxPA41I6mNoH)MCJ2-`97o@Ug+~VuZFS9LDgKUK z8R=Q5XbZn9ifoY@%<%Ty7;S5XfqW^MUhVzVN6p;yz{IJwo(X!-xtc429N-}Ox$+Zc z*=@}^kG|08tfiOmZk;jzp$Bs%D8D0M!DiQg_Lo*WL2tF!)p!aRB_sN4qujvlBC!F8 zW8SN(24Kfx0zQZ5>6WbFYU`M&%3zAl*8P>+Z+owsEj6cuIkJitevB)U={|klo8NO2 zpH%FqD4Y*w;3;+G48hI3Y1rR)ce!g*=qy6h z#a%yeTGfpQfI8rRLaLJ(O?Pdnlco}iWV%;f5AeRgx03XE=Gvpyash@+a5-$IuT}p? zaK~{>B=f6stCKcgBc|Hyk9{PqaP-|<4({Pp3dS&yIkTHO&6RUz-NO1ss?m|F%o&HH zd9difgnnahChSXv3BlT7$gKML)`cpDQ(q_<@01|U%y?(c=B6i%1BL|9Oa}}VZz43w z1U9u7CvJQy+=j^n8@kr)vMp0=K;h9yeULM++`#{e z-q&7X4CC`uG247yBRLoe*5!*JY;e5jzkg8{Qx=U!thaYxiM=PkocYF zP^j_YGm~1I?v3aJWZBI(r}`+S*@KFkzqc;|xqn&<62__Nb%KviV6uGJ7{>Yi>^CB= zFf5jI?@#>(jO)XB*xZ5pTXwxTME*k;!KaI!T(>V>Lxc6^Y~MO{R0BNFg>P2w9CwnW z1jbqg2+)bSRq9)xj!A%`K~gm%4<)vax#~ZyOXw^8ehZEoc)6SLSyF5}&Nq8+JtqVX zXB+VkrVC*p2?-V-C;MM=ocf0jErl-G+rMn$`lJj40Ykf2U7Hyr>>|inEaj@K@QdH) z7Q{Ng0ofR2^{|o4;D9TJ3})4u$h2~kQbv^8zwtTk1y&lG@3i=Ir#b^QlN0J}y>+>4 znNGOD7QFbW+->*h#LD;S+?%DPuUAVp$NqifWC6OKx8}S`^2O2lk$*va9tC!((Lue^ zY@;KI&%Z6t)Dd$bzE$j&+b&BH4f6NSZE0ZhoYigrR z>4WiO2TBcYeZs_O2zpDk9On%uT~*ygTM>%7@PUkqtiM6fkdIv9X01@ng6a0vSpnXU zI?~fGH{Bn?-CTbZ(7h(M^W9boBmzZ4n#Lo=+_#=bGGRGkG5P|=hreF7#P4OaeFI;j^4tHviUlh=7WW`ZQbS{_tp(+ zZt3bu>V2jO{~M9t?6&D>ea+V>lM)hfVbjT5-k8V76PQ;A_b+$Q zK(WR1q>>WHG=C z75eBUmfa!e5XrJUsVo6SQ~g67btfOK^}L{O+z)6G{nHnYpT_=8ehlO)V0^aOwir!M zKvb`{nvL>lxK^KDaT-#(9m9?NhwnEW+;D#+>{aSzUNhG>IATv)@NmgiZ?zb8sSfV$ zFQ^0$92BTeshF8N_CrU)r}Kgp_rkF$D4~IiX8%phNad1*da@w6(%6PdX+a_A6&j^U zLSq}>EXyT<=Df;o+z^0>&g4x#5YYdb)DYsXTVgK<>ZOahdefrx?2r%0@&vd#dWYYC zGE#}a1WcAoffkhE9%=T>bc39>F&PcH6IxfAz^Gx1tMZl z{@LF0k%5G{yu0>!YH<~GX(lUF9!h4e)0%FUE@~fJi!C7;m~;3JqHNJmug>P$Y`lrt z_4s1%|Dmf)kLaE~Zpgb}GPy~_sR zZHCAor!9Ihjd22`7;h6cY3h}?`e(`jy#@ccxkVh0pk(Nd1g}xvhWErOb0!-J@;2dN z>GI`QK?sBY=9qYunB&$R^*gbmY$3#=8|5 z$gVTF0yS3;#%a5+!g7XIi2)$j>j!9P_Ul$#|B>?$o=YeetQewX8(VtxzRs_=kWHXV41eQ$T&??@T;b0zK#Wc@l;#hO?W*jn zK;Gn4Ssjc5&NarnD1O$6!DjI(-k&JqUPrkAdz`8QH|!zPd3kf;73**sCRH#Q0+ZLUA*lEoG6*jxNC*aQ*{ ze&oNc-kbHsb56WX%&8<3mcHsuy#J|MgjqzuCG{2#e?@})Db+@;{cMkpK_L=E^?XAy zS+0+`Gq%G-w(oJZzj5$?D*NiFsJgcAK@d=pM!LI80RbuL4(V>`?o=A21wF$#5?st#({XFk_|N7SFg0&E4&di*%&%WXpS08_?9t(|=j@P~$0slcF-dZJ3{82!H zC`mu&QxNcMm&rqo#{mJhCGwHD`TF8TBkKyK^urFuw4{_i2>2$y3IG_;G%F|hwI~+c z1Awd4nIJK()ZnbWzMd?c+MhXnxQI~dIP;`%DJtUF#&vZF*w=+UHXU(4T%OSE&9(Gw zo<>?VJMt1aH_k-cbi}O)7$cN&lm_nba!r4Tjk!|jq)eW&R!uyM zq#W~T?Nroj8)t5Q3Yovu^XbZ`M|^VNmQHN)`1-|?C*MN?uk?Z6~{C60Xmd80Er* z(gKba$$Lx#nc1m#nIfvRUJ||5sWe-H52=c^JzL6BW3ypBARI>V<%Jyg3{|jhjFmtj z?+$9Q%Ixgz$xTFzS+lTMExJp@Wn|lq4SS9!25CWG>@BtZQO{6|;pE^F1aiqY%$#1G z|2(CQolDudKS6RvH~SD^0ARrg1^hDU(6ng^5D9y=(uJ+t3Fe5<6`l?-w=4rbqe(K6Nt=8ob(7`Hu^i$+Ko4lGeu!5>Azln1p-wMaeX$CDQWk3WXH5+5ck%%t{ z;B;C+zPUQSt<~Axy^`8|f86Vn+PeIReDu9$udi0sifr^=@k3!SwR(C3XfA)t?H7HK@mlzrCZUG?8nXD+Q+vxFIcY&k zJ)w{g0?buV-q|{hb>m#jSLiqezPn`LD0;*PoyFzf_=f`f?oPJco0y=;1ax{GuUn^* z6HN~5?(v}(1ugs21$7P1k|lp?cwS@(;QDpvD)IZxSc)20y%ik+5QU=Nv1AS`M8e6O z9IVQd-y3d4oQcvn+^e*nr!$zIHy1ruUVm(YBf{H4)~MwbwZ5QG`+VU!jLuaU0qWq5 zU#iEW!Pu!bafFKvxu z0GzJi!08fpl{dV3K=spXn>I#Qi>@4=Lsh|rV&V8?-(%8M;~^|`Rx)p-r>)iSx8 z`?pOU!mvla`%X%}%vLB<;^Z|Ib%ZHw)PkbNMe|ET9dDv%Z_qt=u-38l`g3*lfAvq$ z^fC(w{leQCu7eFk!Vk5Wb2X*V;dMbe9Au%X8`SoA$z@%?VNz0_C>E*T`(7^n@cz8djvSu%ghWGY*8;6&eDM1xV}M>N9q()7BVLrSuFpUd zqc>M=bp`nLtl+pUP7{R2&7o@vPe{G2=pgCpV>?rynv%;34Djxa&5Mc=xrg5I+Dts@ zimM`9#aKO_ZCHbT(=*K3EPFwFa56n_?k|b^7cP2ZBUhrSiRoQD4n1zuYk2;Yk<~l5 zJkxGi`d}vr2~T>{_MOlImTq9n=`scS{e4GE@=SS;Nh-wWZ)f%=y2v{b`5>;wUyDNB*6)B4*7=Kv7XxU^eCIHWm{)q+pBzV6d>C9q-WV@&G6YtP@wiB zV$`UlQPpA_CsW!tSBpF1jTA(A7bu7lmv-m>Q)4_{+UGP^#gu!=k(`Y^f|QCQXSKc4 z_+{O|-^QBVjA6x0yG|-4Cz=S_=1^Kst=-yizB2t6yf^&?;R0|!+VsX9Jv%qT5_%hZ z?+PcP<(d;6^H4thr20@r8T)UQ^V7c4q~UC5D*-kvi-D1`%c7(S9^~7H&9}1KT4id3 zCt9)vew494y?-9QV`>MQ50-JVSRihIqD)%xEYY|hpFCH2Fi0Vv47pj0r$*75XTIPRw-^wI$C290&epti+gqno&_EIf%EKAXuP4)67PEO4!D2S|3I zw01k39&iPit+7E7I5@i88i56pKX?dqG(1#y3>~Yd-w~Jr5+1;t1^6Hi1UfTki1$>w zu7^f~sDKWfffdYm#~lE}7VpwK;~fKH8t8eoQ<0`k_mPjs5v%pD1nPuldrMXKHHnYAQ$~20B9!z??e?S zT-4jKB{oJG5!d0wg{e#`)YXeWa_8C#{Da2^OPUU+3*x-~DBrpPipdC{$9vA7UN3Q2 zAV3^7tR=V4Wbr!j^DL*`bw&ifN`pRzFa;VGVKzfWTO)vE4gYR^BoJ|?$llkufZN^+ zz(R23VEOU*!TesP|3b^YQdWJhK}{2dMp;FvRO>G$gqlH$cyh0abx=<6FBG~=7;8ob z-fj4gdS=Ibq!c(bGnQzHf48<^Iw7kvGAXLIBl`p_aEl2u{pV@n?G1oW(|1uUlpI*IN6@T|KS4ouo#S^f|i$zQj_?4XP+fh zRRkIw;7O%+jG|xgKKmhDBGj2i4!#Z~-93taFUA5D71`~>%bX)0@C0wB5&0GQ*o=Gt zMwqO;k7qcAIvM+oXc-mfhm5Wd+pJur)YBeA9ZS0$s~HXOdXK4If;DaP#)QaXPbuFA z{r9AErKCnAjG!ieQ>!#00xu8dJHB@W3ZK?%{ zkHxz2$^$9}Kn#Q=3JcW>ExY&qtL|VE%Q%(cgx6wz_l9s+r6i!^pmpEt3BQf-7K3&v zCism=M*;q7eRYwLxkXnDRxelbClQBNnaXpMf>mt{$G}JDb4j3LM z23jWgWkOg2wI+0kz(ELs%y}W@$@QrFV(_s37#Y?&g49yGGIKKkf8diT7~PBsKs0A_m`#LC*yki=@x$sj#2Vhkk1H z_tn^fS9-J$)Fv)^C1A4uauPj%QMc3&(G-&lqU5w0nqOOj&3`8GXfo^4>~{#i7-Z{9tCB*0G6oUQqB1s@_X zRlG0)>Mhc^Tk9LuD~p3e=yz?&Qfnzizy7zZK}e-0TOuX%mJv0bjH$h*()&*#&5GfD zdX5JcMNTB27c`N>H;c-_PJVSTkOw3PQyjnwZYR&3yA$MfyQ@?Q525UUpBb`_UD^2c!vPP$$Wtk1X^Ak0DMCc z^B8oCET>&((uCcSdkzN8W>lB?Me4#Q7)KPvBj%JQ#l3@03iRe#a$ny%M)w^sH*^oR zt!}%9`;qMFICAADH0*uHDmY#n$A$RfH#oJ))XWno))D7O_uS2#vJjuz4*7a?ct1Xc zZE&fSRb4}gk5j`oYUEmW z>zA5!b2f+Jj6R1bJ8E=f98|A-FMd!;8{zrM&aQ1oA<%!Afb$?2JXhQ$K@6iUvSv-R zDz*vJkO&c%mJ}|iE_%;2Rb=Wy!7STt)nrrf3Vbj%;*RK#LGRLp`fLXo64#le{w`bv z5w7`^Bl-D}uhp+jAjT;MMe0_G9#%(B=s@u{3@zmGY_FHPKmaxG%clPu^FM5(K^C`1b8ZPvh6Y=VnOy=~;9lR8aLX zw663u3Y1BtUkZ2tCJCu@nO5ISmN6;8nk3z9cZ^7!p7_of zuU>GDCjyUAp@>y0j=osxLH*VA`TIj6tzo>HqQIBlPj_`n0tOyKg#UgTG{|5!<1_`g zuh?1Q7b=L@CcmTyq*Aa#NLi!5&c#!Ws z?}Axc4<~85*>~9{g@mRr1ypjRA?R1A zF`?TF@rUw-nbJl(e#s8XFaDDgq^DBH1}#_z(S)9)Q3zqCq+S|7P)t+{Nx?u`w=v7i zPT|F4C=%>k7Dckh4%C??$=KK;op|y^!*SrL;$pF zs2ZNLwaabJ4T2~gK2 zIHek=$ok4cXEpkzW7|?)zJ@F>!3G_*vkw7a^6iujqEUA`-Np zk>ZnVtcxe-H{LMPcRfvpiC3Se6Jm@SPuVZuo!IniYQb#pPkY&Oi}zKxde8Kzra9AH zWAlBj$MXXjo=-LJtVK zjXL-7`g04$a4Helt$?iCdm4DcYavz4{Bvz%UdXqq&0v$u_Q1=oT)=s1jq5jK4CT!E z3~l?0BTaE}=9Yg)n+_pR7J*)}Ef)bOO1|CRID8!aA(7~?DaKnrltfL>m|I)M#-;Pn z_~ej}bFZSbRzMIkWC!-2w=U8nr;WGYA^mS(!Rc7bMGcj;?(D*CBv-IW-ttoSpqxB$ zZR5F+AKvDjds@R&KKD|+xWr!BnYrSdaGKM>$==nvSM9*hYPX(u_Ng8sPQ*v~r`Zt& zg~&jqv#|(*qU|*J0`HE{mTS*;7W|N6JH1!RLx@T!JDmb(+Q`siUhQp=Cki(~xn_08 zO4OkOwZnk3fMYMyscg`k^3t~5m`cM$m@Kuh#OJXUe+mgMX9`$|d@j*{6MaEcSJ2@g zq3Zt9^+Hvs@eZRJ9cDN~>BaQEGw2o!%4u)aKzXG8t%aJM8_#;|K;J~rl(qPIz1G+> z;CS=rqeJ##*niTZeCJGiGJCy)MuH(hN9Da)DbxwHgmuLnZZy;%z%KwmAqhlRT6p1C{ z6t+^6&>^m$>VIKp4J!ugN4^U`u(EEy)RlYTaf@<+>kF(6;J=1{JKFf%AA^g5uep7j zW%J_SjIZol+(gLzNkOE79<6|AH`)H>5YW-~LY|Ov^ALP`ZjESCD zM(f%CIesH!VqElX?m##vECi5Uzix<=QFUA0|D;k9{qWQ|NL#&_Ny+$w>g>WWk!Thg zWVqsTU*CqGTQq|F#{en;(DezT(s}uO{io_jd_wdhhquP)YR+e~NRDMIqwV8-EiL*g zMth5&WT2CDL8hOEw(T^zrcpEhgSL^0k-==?A-$Kb%y-VN#;*arEm4u6(}{m^;l!<8 zjmRCt+_r4oSNNoWSDW(Rldj1|Z?N?6PFz;!%Yuh98TBUI$tiQ|itX~yQo=I8gO&PI zxDvx3P|k$R%<{^M0d#wHdPxZg!}VKyimD~8^KKPoE=&ASPytALG__K!s-u9C3vegilQv zkJmAV(hnCdX*Zhhb}*!q9;!~fJAabu?1Si`|1cj1IG54o2Pv1`h70v?t^iyDK*@dO zhgi9#uKusaS8jiFe`?mGmU~YhJ+59I8>(^fKc$>RXHerp$SY6(1M~)pPXHHZyVq+hP<-LC_?E?T8mF_T>-K>U$GTOPFe_r7vKJLh=Al7--ux z-kb>Y-t=PVfVwrPd{FFLEfXC>Ct&dkX?YSn=+vOGx1@Y(YPj&lh69Fr1ccm6 z%SuDrjt0MNd<;f(U)bG!qyj}}@!>d3M>lWS?ril5?PDAM;Yx|R!`Djy#zA>Wutd09 zm$i#TmsFG4#a)G66=lxn8h6(NmPV!~G&OoW2c+E5>A>ul7QhF zr}ZS9gX-_gnMm;I=G7faE!)@#cw#<%t5rLzt3i3N8D|d;i#(Za)o)Di&(D)FHmmuu#xkm$Y20eTEwpxw^MqQ5vsiXHk-R zwY4jE=lvkn6~G%u8A~noPgNxxM;2;OcTkQT1dFlgi5vFHgFE@y?s(aa2y8Uq8n}iG zUe&^#H8t=*f58X`K5WY5JVy2YIXNQtx@=Zi<*iGN`&nF^)4g@ zcs!iOhXW4qn>V-sEQK~pIjk1b3;CVsu-B;XP^NU3(85csSF0j1s4G~{@*bCsem{fw z&R7OQ9ie5o~ z1q4moRpI!^-n+{qL4e10euDMDiZ-j#DD!^L4Iy#sLifDe=II@3@$KxS0Pz~o%k=>r zXUiu|tX8c^_JjT3aC^JiU$F(L0kmMeVrIrT{j95M& zV&|HGQA>{ZC7c47VP$m2b&chMX@_UK4@&3d2%`Jr=#GxdgKaYCE`7^zFjz~93N|F&kkyLK^`0y=dCUGy&h-09A z=f0sh>enC-WyAYow?A%+m*Kv5cb?l-d{0oMGk1MnprN6SI4OR&pRw8e-5wezCEx7% z6zP7Mf4clwZ0~9XvPgIZ10h)(q!qcnx^ne=y~_RtpB)RMTjo8Os{#XgC3So{{_D#8 zIiNsnVVuMryIYZ0h*o3HZV;dYU13(Yp0PV$B$ z^;rtM&J?GPAxbTQ9fG8v)$A8Wn&nzJl zj@SO{zLr&2uWR)6wnbZ`awr4U@(`q3jRB_T73_yO<)q5O(qaB@14q$*3dLG(p6m{! z95&)&J;>THkhgU+q4!^KFP~jJHe!QU@0wNCWHnn=RckU}$9rS>`3rk4tNUJ0=tawe zGisVAHvw2n%$M6i6ZD+qUQ?u=Jer7qS(`8Nw=976LlzCFD&axDzAve`x^)dsj&Pm(OkAese}z98Jc1YS~WO4-gDtW^^U&aZ1q#pW9c#x|#-%lGphj~#QB9zuwAMqvgxid$>9F-j?{aPeMS`&Q%* z>D}e7VeXgA`b+waQOmA{9V?AwI!Q4sRX(S$DHP0Jr+^4pJ4cKIgE7Nli$juK`&Y9r z-iQ^ieEh5sT=W(q&c*YS;efBli$v_uM_X&iwXcMR2@r7w2a^r#J7YH-OpkN`>j*2J zF}UmtpoympR}g6U;|~B_*B4E!O|~`!ACZ zB}_(9`5%fyGwmG$k}RcYsqAa!0vyJ7pV++VGColpPbTnzxS2LJuo&KGy1n`B8_t>AIT(Z4{DP-uU)~4?`!~>KxULX0ON?oK ze2sVJ1sJl7YM~XlrpKp?X(K<>W7B)=rvuXG83r5&Hq=jl>{j(CMXq#221)EfFB<-2 zHtE{>w(ovnoyy8Q;}jDsf@Q(ZiBX8VAjWH} zS%SkB&hpL}$slb+wlMt0^#om%*6*ewKqZ6)lXFL~(H8ZF8QWLBoX6-0QDm;aPsmKy zB*$K7ph%PR38Ws_9Le{)Q)<%OOCgS?vGVJFV`uBuItg#)^<|J8MC{K+K!cqIyNomr zr}l_$mE55ZX|^Vch|gft?dS^NDVB@SuWzVFf3Jqe=kSgsuD1C!x8L3LOG9$Tc*KiQ zGs}L14%9`6L?5T%&UZNlIvVh^Bw|fm>o-poSu%N5{m!`2XVP}tu+}STvzq0}G=Y?P z0Gl~O(H{MG8c*cu=5JWYfrr4%A>fg$R)2pJbvovq%V@Wx8_40d>_0Vcwead-caa zn8TMJQdJ|49)FI!3uig~W>GyJRy>DA_C z);jd4KI|s9YLFN>^jPNOymgcV?gSS&QlwEB^Sb)g?YgtA8TMyHfYO?#{@B^JJ% zX?v7fJ}5FIwLl${Q7UcxNQs+>&%7EHFa@b{SxCvq$U>2>!xP4~eRZ(!0;>(WBwd?T ziFua_PPuO;sdHz_>kq34XMx=@dD-ESvdL^e4baFD{9s4zD}!l8=q9|zQ7xn-J%V3 z>>m?9YJ8GnabJ21s_uE^78q_PNx`Y#k?Fvqwymz$l4EV*H-e>y+CjVY1smqPdn0|7jT#gWJh+?h&} zrXQLpOX0LAkYj!Xie?FhsW6UYDoa6x;+-!qw7+rg@>ojDA?0>_3Sp-%SUfy8h`rbS z(pg225@`g2M{v6|kv!MuuQ>G!)?bp2a9=T}Ff)#sSJkdX@El8~sVcN#kM|*z(Mlo> zudDOc8R>@Xf{DF&rPdSX`o4JAziM17)a0Xxhnuww-U8cLaB{Tj*N03(vrGB+7qUL` z`?L`daM4I(MaMb4)$=iUw)=J|xN-^|pW3LuSW5wn{8Z*}9N#@A%pY(+(ThUheJKZC>CO8OVP-c1L)wmtp~hf%7tU7^y6Y9O zx)>uI^@Xk}aP$c>OSGOB@zZJ_N=T7r9W{U9fjAx1v(~nv>!VuP66L0Yw@`JE;)3ZR z&D!dKiD49~WZGL{p4n+1GJ5WQoynF&Yx1s3;8LSA+RS32DB$ftONW$~2mO=5ea&ja z^HJqcD!JIP&qPaiaUB<<O1SyT(PJ`eFdigU@S^9!=-Vcx#(H+zj| z_B`^-7p2jYASEJL2sCn!ZoIHNUVZAxYDM;N+9yxZ`GOKysS`i5w|0SnP|4sHAdVy7 zKNOG!KlPon7=Hq>oUbL~NP1Z}sRf`9#K?Cuxcv7&kgQ=c=CM)#9594~GKQR%78jUy zwt>7cdjmX{z4`lRRo%nzOMU{xf(g2!ThB8emiqw-=*KggY18%Ywy-&CyBU82ta8M+ zXSMAJ-2O1k!ap3Fvc=(ASQKL^^~k_LcyEkcw(G%OChoR}25DU{i$wre7S0x@3Vc6G8koLb*KJ+=>I=#ETWfrs)Je5Ct!kSC?O|HBAKT_DlSF zL&+b#w&ZJ^W1o)f%ggy02b|mEk6#|dzY(A6FL}6C$ipR~;m{^-l!I%D$qGodfGyAp zZg8mcE|^)~&}juVvj)1`4DAMI9DZ-lVMU%gzz;$KqyTW@dwxj70$JX%U|KS-#fJS; z_(Z_Y5KQ5AS>77y*t+i@6PGRiLa$o>1AiXO-d^#+FroR=jnBKzn9R4IhPU>)gn4D| zusIaR0SSzJN_Abu%_GhQ?57t)EuZ_8x>q_f4h`Bn6-srRo?E11VbR^aoi};nGea)jb;8iS3r&|2kWzn1X5Xqeo z@9~_YVb#1Cug${4B(bJdm>#(>t5fLNhSn$I&;yuDyR}{)( z_Ze=iUP#$;XP=>B-mz~DJUM9_zZwv7L>e44+OU}Qs}1UMA*hzj{-}ffWy1dO?!=+% zF00+{&Nx6pm1%dN@z3T)mgy9Alv*(Xhy}6)?~uXDs+lPK)O`P#J)L9o=%YV{!c%5A z;OuDmfErVv(jDNi3FNJS;>+R+#t>e3IPZ0jm&Ins%o8lp6gBuv2VfKX5J6D!vBQRP zBR+K*2xc2)L}qrZ_`aFS$B{;W#L!WbpGHcX9Y{ojHJ7HU+r7>jdvx79#NzU&COAB@ z1Bks8)Z~31Wv`G3mAGM0i^CuB`EU-KxlzHq7gY9K?*Nd$CUbi$)^jZ4j1wO}J05+Y zS4AW0#(ZaO$DQNY9_VBSdM?;R=99?4A+v1X!OI?j#cydcr$ki3x569gI(^PM?CLLFTva$z3O<=?WJkmey z?a`NZ!bLRB)EPdHAiCH7{vGbhTZB0#oF@3Ha<0`y}F`M~irL4i@A_E>fv}vFPu;{*rP;%f=?VGO1}n zl5OE~=3b2QhdJ2~mz`?ux4VnPer!yPLV*fl93{aCMvAQvcps*fetw{}?SED= z^O!0+w^e?!J7evlcbJ+6K{U==@VIDA@>_h4c!G|OaWwH+{oC2Ku%$)|nvYSk2$&jw z+}dO{++agI-k|wbZ9HJ`1t=5+TF(uBRjnqZp-A?eUX^=M#Xh?PV)o>{z4WnY5J=E^ zkcmakH46|5J+7;K(r>J4R+F}M*E&bmYCVtHtn2n)%`fF(Y1F79|2fsmy|`K`?EA*C z@`B*u++}4}Tv{M2Cwyc#ZqsfF_L6>f%0`|TWY9kqi>|zr`cL=Zz)Zn9_Li=d_hJw( z`P+!u+Reor*Skb&gjX`ORHBf=#%}z*5=4;lD&P=;DHl+~{$j1VC*h*7 zB~xL4ekB>MvzZLY7UtCr|+xymOh@BngYnb+E%p|r@$MGq9~8clA|$A|mli#eNw z9fDi12!R1K1fo(T-nIW`C;#eYNlO~aUPncfd%DzS5Ucz6j~LE$Hix?8kB)Ly5CK1` zm#h{eoSau*BumoMAy#(uOiooC=`~mkZELEGC~L`r z{aL{b3~6!Iao zt~oi4?5rBJR|gqt{lTKg;k!zUDP-+>$LN0J66K3oF!2N@T(r`B;yK9J{ePLfzdqS0 zar=VHZnC`E6$WKdcZcJvuCj*4n7Uh6C!JtWM;cxv0}8FHDk%~e?WD*yHS zSiRG@QQeSoVh^*N&$I&d>|puk3i?TZ>IR;8uou zEtO1Z8mOEAQ*dEG|An~{t3i4byY0C6jbsco_dri0R6&Ikq#uU~X;29j=n)A5Mxjk_ z-91*_J0jZQGqU>6(&3`y=L3!l>AQ*6ve7gtCRDLt10u^WmH)pBjH4+s9dzCOZvgSkb88A1~41#PX+WR4@}V@Rs7 zezjVsG_qDbdy0waeX>U>B};2=7JBz{1kMIq6zQKwc9K+7DmjJiPt30emH+w`)%PPQ zY;G;SSjkN#mgYO=zF(g-v~s*Ri>tnq_uMI_(nB)F7olQ*E=5jz$6LPJy0-F0a3t^J zk|X>-Z<~Da=xg;4e=WjcJir|Qt^5~@d5Ft-nD7Irh;;$jf{+z^Kw&Ta!AoCqH%F?p};GIRB0wh1|Q1X zha8QF5$l7|B#LH+fS?c2>y09XQpX{!Mr``Cm1Y1GvKv1A%D-Sb84B6xfXBYwR>Wk- zK^lEuBYxIr^tXdCr3{`~US9s>Mh__AwsXV^CP2$tVxR*2N;LS~y{WgaFWUR}z(A}H z*ek%4ah~PrfPetSQggP%;>4K`@QHgqV(lI9$AR-j_N{c8Ng<2rs>(A8f7V3y#G#il zVZ6dr4iGde#`i&S090ZTKe@ZNMzUDl9cOYZq7V`Z)&n5M7o)!rP%yj32XP6I_rN3p z&{{F(r(_;529pVVv`D4H-aA$damA)Od!HO{d(@?X9xYZRV zY?S+xB}!lP{h%>nVe6%Uo@g$SC^y(4KWieU8lE3-RYR;fOdO!tCZ-RGH7+r=`9(?dBEHAv{ZI3TCXca^!>WX+)Mlv(XKJ~WR^LzerFLuX%5x%F zfa=zBKcwpez?OF`|}+R{!-9w359-U)c693Lv13&Z|5(Q#=cSJbikd zUp;V2NX^M9HUZw%(&eJod6Q~(Tx{%1NGZ=ADZ#{xk}2Ykf#ki8x1xFPJpL5s?=$H( z!a;f~@*AoZRed6h@T*(5ZPS(KRPw8{%V%P$pUrVtKZR6&oG6#RrDLFhgRGlu23C4W z`};}%vqaJf)Gxc zL4CiMD4k-%r=porulIKM1Pii*K+^vU&D$q8R;g52;{t1CCGrh*WvjgGA}1H-tACb5 zG<6yR>v+gOmBz|s|Avaod*(%fd_uME70k~wpWB~+6+o^TESPhZ8pt*+F=7}%*?Sge zyn9}cu0e;j{tN{@S=LF=#M`@hwHEX3)2Ho^QerrwRIeWSaNj2%jvADSmvq<>Ciwr; z(E5t|S)Q7S6YqTzl|bXRQj#DREOON+vDf8T&o@4s+e8@}_#?^E2K53m#}-(L!Ww1w6YH!K zXP8fbVz2zIQD~Y34eTSPmu?3d-L$1p%Utu~wy}c_rR`}K{o@B=+Ze#? zNQ*&(7aJ;PmL`SYZ42hy%ZMme$7x>{hi}qBYYp3oG$u8vo4A-_$GM+zdUN=xFCngF z=4Rje#+%$k=KeWkxKa7V{ADW;2XPvXIx?ixz)6+C(MIwUOj-xAg?{)&NA*BH+f#!TrQoWqCHc9a~7=5SB1x6`B^e9 z##Z$n9*A!OjjgA0&i$aX^Rgm&i7t}vYSmWQpH*q8?2Y9G^16a1f&Q~sox9Rjr3{B1 zc~onR@prQ&I*;fQZw{>IAy~0=E593dv%LKp`sa(48zxITyU8}nlE_=XRum+Er@=7| zhUE`gHS!rNRIx}L%B2`ILI+{Fjeh|G>FkbYjNSf_*oSEoI&o1{dBs+oanaY9t>I+3 z7)4a=42IFQr>3+7I5JU|`G3>qxE8&u3P3gbZa8@4-ZcF2wLX3$<@mjt(bSK8k}+(W zAoAg7x#klEE4L|m`XFYl`As|Pd2q=x?iB};wqhgPMJJpWpkWg;H$@0X{gWM_ef87t z><*kvkZinG#TOT!p^Ez?ApONQH3JzQUB=-3pwV~FnMHXvf#Y&~hj0#XA5wODyFUwg za2Uzw!e+(aE!BPWcKt!|K!|#~5+C5Y;QF2cJ5uhSV25dEz)uO%{@Up)bPWXR_$IuU zJEFN*o6;XNO*AKIl`=!VX{+EfgeFS+>3ES?hrC#7Wz3we9cC*(e)eqC;*FP!Q?r7^ z`(M5aTb*e7@At(Rs4pa{gRV+Jnjj%)qobn(J)i$XI|$~h=Nn%E%$gC=L2E*>;aB-J za0c5bKgHxdLk3ch%-%0q9i{>*y%@>p_c#O+cUq9v-Dg8vh4H8r>&MSdq)oPMRwiV_ z-mj4JIX>^_9+0-hqz>PL04wkCB~P^E$|UZTf@DcF)d)82j~FuQe& zcwU734ntlbA1&e8BQUR6UpGOpptUH_${F+cu@?!Nh{?DW*$(IKo|}8;h$Sme8^JNa zky6R5JO_An2yQk|fn@^^Le}HE=Q@#=UZwXp7jD<1#@pk3c)U-B1vC!9Q+v)^0#&aj z*dIX#Y-DO|D6sU<=Hs)}u>SIb57Uf7=dk^D6{|+r)dVe$xaV@)th2)J#9_3zSz=@R zL~h=vxg2}co%yxAN|i;rf+fRT&D5n#)+f;$ zMteNG$DwAvmQjn=#|jnLaM7?eu?Sey9CO?^C+r_9@wQ&$eRSmh_y+Hz*U}qqMn?_C z*BXy5MTb(2@#L)ah20(%GT)(fh9fK(Fa!&vcl6Fpu6|UHOg=>u<}2s!E6xr2EI8>? zyV>|Wc1>6HTSL}0`0DEF!I5%+Qk`kgWDA@3l%z2pL0NU*IhoJvIkYp?*Vh*{`LXl# zY{}8XYjYbV`0gIR>n;8UOvG^KE=s;pTO20Dy-B3`1x&_t_u8>|Tb54~e>(b9!XM`k zmRZ#;1qP`DEH4+8UPV8=>--c(bJBUqN027SRk8-8ditP4X*???@vRSt@qQ^u4a zb*w#T8@jY%oUzIVfw9_{Y0gw{@kKI>D$(0t9}sSv4IU`LcB4|Gr{BhCapcx68+eU( zTm|pm1Opg7@P0%ZMMU5SQ9l@sB4l^ye@kU^Cj?8Tq;%o^(7>Zavqox~6nX`Aky!9v z0)`&T_p_t#@4(Mn$)g+fP1xDlpHzBHl;kx&3WYR0urY8u^r2w1|C=FTNf9l-(q?*O zQmw31Yt~xi{1W_-jPrN*1kHhI+vgb2KukvV9RHta4?3hA|7-@Jkh1*W2eSOlrOC)L zOZER&4q#>Jcl^zuLH^ku_)pwLLkoE6C;k6>z-u3KI|l_Af8JXN_$MJED_kt3_xXPS Dn*O4= literal 0 HcmV?d00001 From 7d1b347f8fbbf419a6c79c34dbe6f1e54ba5b95b Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Thu, 26 Mar 2026 22:55:48 +0200 Subject: [PATCH 06/25] perf --- examples/perf.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/perf.rs b/examples/perf.rs index a29f2dc..9c5d36c 100644 --- a/examples/perf.rs +++ b/examples/perf.rs @@ -52,7 +52,7 @@ fn run_perf( let t0 = Instant::now(); for i in start_idx..end_idx { key[..4].copy_from_slice(&i.to_le_bytes()); - store.get(&key).unwrap(); + store.get(&key).unwrap().unwrap(); } let duration = t0.elapsed(); pos_gets_us.fetch_add( @@ -66,7 +66,7 @@ fn run_perf( let t0 = Instant::now(); for i in start_idx..end_idx { key[..4].copy_from_slice(&i.to_le_bytes()); - store.get(&key).unwrap(); + assert!(store.get(&key).unwrap().is_none()); } let duration = t0.elapsed(); neg_gets_us.fetch_add( From df7626136b6ee569079bbb7fce20391fea3a8913 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Fri, 27 Mar 2026 15:07:57 +0300 Subject: [PATCH 07/25] Rotation: wait for modifying operations to finish before flushing the active file --- Cargo.toml | 3 + src/data_file.rs | 109 +++++++- src/index_file.rs | 136 +++++----- src/internal.rs | 6 +- src/lib.rs | 29 ++ src/store.rs | 159 +++++++---- src/store/compaction.rs | 57 ++-- src/store/open.rs | 169 ++++-------- src/store/recovery.rs | 394 +++++++++++++++------------ src/types.rs | 65 ++--- tests/common/mod.rs | 30 +-- tests/compaction.rs | 12 +- tests/crasher.rs | 4 +- tests/maintenance.rs | 1 - tests/metrics.rs | 8 + tests/proptest_state_machine.rs | 5 +- tests/recovery.rs | 460 ++++++++------------------------ tests/whitebox.rs | 366 +++++++++++++++++++++++++ 18 files changed, 1111 insertions(+), 902 deletions(-) create mode 100644 tests/whitebox.rs diff --git a/Cargo.toml b/Cargo.toml index 6ca4d27..2d23457 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,3 +26,6 @@ proptest = "1.10.0" tempfile = "3" rand = "0.10.0" libc = "0.2.183" + +[features] +whitebox-testing = [] diff --git a/src/data_file.rs b/src/data_file.rs index 0bc0d72..6c2524f 100644 --- a/src/data_file.rs +++ b/src/data_file.rs @@ -1,3 +1,4 @@ +use parking_lot::{Condvar, Mutex}; use smallvec::SmallVec; use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; @@ -7,7 +8,7 @@ use std::{ path::Path, sync::{ Arc, - atomic::{AtomicU64, Ordering}, + atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}, }, }; @@ -39,15 +40,37 @@ struct DataFileHeader { const _: () = assert!(size_of::() == PAGE_SIZE); +pub(crate) struct InflightGuard<'a> { + data_file: &'a DataFile, +} + +impl Drop for InflightGuard<'_> { + fn drop(&mut self) { + self.data_file.finish_inflight(); + } +} + +struct InflightWaiter { + mutex: Mutex<()>, + condvar: Condvar, +} + pub(crate) struct DataFile { pub(crate) file: File, file_offset: AtomicU64, + inflight_writes: AtomicU32, + sealed_for_rotation: AtomicBool, + inflight_waiter: InflightWaiter, config: Arc, pub(crate) file_idx: u16, pub(crate) file_ordinal: u64, } impl DataFile { + pub(crate) fn used_bytes(&self) -> u64 { + self.file_offset.load(Ordering::Acquire) + } + fn parse_data_entry(buf: &[u8], offset: u64) -> Result { if buf.len() < 8 { return Err(Error::IOError(std::io::Error::new( @@ -87,7 +110,7 @@ impl DataFile { let ns = ((header >> 24) & ((1 << KEY_NAMESPACE_BITS) - 1)) as u8; let entry_type = (header >> 30) & 0b11; - if entry_type != EntryType::Data as u32 { + if entry_type != EntryType::Insert as u32 && entry_type != EntryType::Update as u32 { return Err(Error::IOError(std::io::Error::new( std::io::ErrorKind::InvalidData, "invalid entry type", @@ -139,6 +162,12 @@ impl DataFile { Ok(Self { file, file_offset: AtomicU64::new(file_offset), + inflight_writes: AtomicU32::new(0), + sealed_for_rotation: AtomicBool::new(false), + inflight_waiter: InflightWaiter { + mutex: Mutex::new(()), + condvar: Condvar::new(), + }, config, file_idx, file_ordinal: header.ordinal, @@ -173,12 +202,39 @@ impl DataFile { Ok(Self { file, file_offset: AtomicU64::new(0), + inflight_writes: AtomicU32::new(0), + sealed_for_rotation: AtomicBool::new(false), + inflight_waiter: InflightWaiter { + mutex: Mutex::new(()), + condvar: Condvar::new(), + }, config, file_idx, file_ordinal: ordinal, }) } + fn start_inflight(&self) -> Result> { + self.inflight_writes.fetch_add(1, Ordering::SeqCst); + if self.sealed_for_rotation.load(Ordering::SeqCst) { + self.finish_inflight(); + return Err(Error::RotateDataFile(self.file_idx)); + } + + Ok(InflightGuard { data_file: self }) + } + + fn finish_inflight(&self) { + if self.inflight_writes.fetch_sub(1, Ordering::SeqCst) == 1 { + let _guard = self.inflight_waiter.mutex.lock(); + self.inflight_waiter.condvar.notify_all(); + } + } + + pub(crate) fn seal_for_rotation(&self) { + self.sealed_for_rotation.store(true, Ordering::SeqCst); + } + fn allocate(&self, len: u64) -> Result { let mut file_offset = self.file_offset.load(Ordering::Relaxed); loop { @@ -203,7 +259,7 @@ impl DataFile { ns: KeyNamespace, key: &[u8], val: Option<&[u8]>, - ) -> Result<(u64, usize)> { + ) -> Result<(u64, usize, InflightGuard<'_>)> { debug_assert!(key.len() <= MAX_USER_KEY_SIZE); debug_assert!(ns as u8 <= MAX_KEY_NAMESPACE); @@ -214,6 +270,7 @@ impl DataFile { let entry_len = 4 + if val.is_some() { 4 } else { 2 } + val_len + key.len() + 2; let aligned_len = entry_len.next_multiple_of(FILE_OFFSET_ALIGNMENT as usize); + let inflight_guard = self.start_inflight()?; let file_offset = self.allocate(aligned_len as u64)?; debug_assert!(file_offset % FILE_OFFSET_ALIGNMENT == 0); @@ -242,28 +299,47 @@ impl DataFile { let checksum = crc16_ibm3740_fast::hash(&buf[..entry_len - 2]) as u16; buf[entry_len - 2..entry_len].copy_from_slice(&checksum.to_le_bytes()); - write_all_at( + let res = write_all_at( &self.file, buf, size_of::() as u64 + file_offset, ) - .map_err(Error::IOError)?; + .map_err(Error::IOError); + res?; + + Ok((file_offset, aligned_len, inflight_guard)) + } + + /// Wait until all in-flight writes to this file have completed. + pub(crate) fn wait_inflight(&self) { + if self.inflight_writes.load(Ordering::SeqCst) == 0 { + return; + } - Ok((file_offset, aligned_len)) + let mut guard = self.inflight_waiter.mutex.lock(); + while self.inflight_writes.load(Ordering::Acquire) > 0 { + self.inflight_waiter.condvar.wait(&mut guard); + } } pub(crate) fn append_kv( &self, + entry_type: EntryType, ns: KeyNamespace, key: &[u8], val: &[u8], - ) -> Result<(u64, usize)> { - self.append_entry(EntryType::Data, ns, key, Some(val)) + ) -> Result<(u64, usize, InflightGuard<'_>)> { + debug_assert!(matches!(entry_type, EntryType::Insert | EntryType::Update)); + self.append_entry(entry_type, ns, key, Some(val)) } - pub(crate) fn append_tombstone(&self, ns: KeyNamespace, key: &[u8]) -> Result { + pub(crate) fn append_tombstone( + &self, + ns: KeyNamespace, + key: &[u8], + ) -> Result<(usize, InflightGuard<'_>)> { self.append_entry(EntryType::Tombstone, ns, key, None) - .map(|(_, len)| len) + .map(|(_, len, guard)| (len, guard)) } pub(crate) fn read_kv_into<'a>( @@ -287,7 +363,7 @@ impl DataFile { vlen: parsed.vlen, header_len: 8, ns: parsed.ns, - entry_type: EntryType::Data, + entry_type: EntryType::Insert, }) } @@ -306,7 +382,7 @@ impl DataFile { vlen: parsed.vlen, header_len: 8, ns: parsed.ns, - entry_type: EntryType::Data, + entry_type: EntryType::Insert, }) } @@ -391,7 +467,12 @@ impl DataFile { let entry_type = (header >> 30) & 0b11; match entry_type { - x if x == EntryType::Data as u32 => { + x if x == EntryType::Insert as u32 || x == EntryType::Update as u32 => { + let resolved_type = if x == EntryType::Insert as u32 { + EntryType::Insert + } else { + EntryType::Update + }; let klen = u16::from_le_bytes(avail[4..6].try_into().unwrap()); let vlen = u16::from_le_bytes(avail[6..8].try_into().unwrap()); let entry_len = 4 + 4 + klen as usize + vlen as usize + 2; @@ -415,7 +496,7 @@ impl DataFile { vlen, header_len: 8, ns, - entry_type: EntryType::Data, + entry_type: resolved_type, }, offset, offset + entry_len as u64, diff --git a/src/index_file.rs b/src/index_file.rs index 8df61b4..db2361c 100644 --- a/src/index_file.rs +++ b/src/index_file.rs @@ -10,7 +10,7 @@ use std::{ path::Path, sync::{ Arc, - atomic::{AtomicU32, AtomicU64, Ordering}, + atomic::{AtomicI64, AtomicU32, AtomicU64, Ordering}, }, time::{Duration, Instant}, }; @@ -41,36 +41,30 @@ pub(crate) struct IndexFileHeader { /////////////////////////////////// // rebuild state /////////////////////////////////// - pub(crate) dirty: AtomicU64, - /// Ordinal of the checkpointed file during progressive rebuild, or 0 if no - /// rebuild checkpoint is active. - pub(crate) rebuild_checkpoint_ordinal: AtomicU64, - /// Packed `(file_idx, file_offset)` for the progressive rebuild checkpoint. - pub(crate) rebuild_checkpoint_ptr: AtomicU64, - /// Checksum covering `(rebuild_checkpoint_ordinal, rebuild_checkpoint_ptr)`. - pub(crate) rebuild_checkpoint_checksum: AtomicU64, - _padding1024: [u8; 896 - 4 * 8], + /// Persisted replay cursor: the active-file position already reflected in the index. + pub(crate) commit_file_ordinal: AtomicU64, + /// Persisted replay cursor offset within `commit_file_ordinal`. + pub(crate) commit_offset: AtomicU64, + _padding1024: [u8; 896 - 2 * 8], /////////////////////////////////// // stats /////////////////////////////////// - pub(crate) num_created: AtomicU64, - pub(crate) num_removed: AtomicU64, - pub(crate) num_replaced: AtomicU64, - pub(crate) written_bytes: AtomicU64, - pub(crate) waste_bytes: AtomicU64, - pub(crate) reclaimed_bytes: AtomicU64, - _padding1088: [u8; 64 - 6 * 8], - /// Histogram buckets: [<64, <256, <1K, <4K, <16K, >=16K] - pub(crate) size_histogram: [AtomicU64; 6], - _padding1152: [u8; 64 - 6 * 8], - - _trailer: [u8; PAGE_SIZE - 1152], + pub(crate) committed_num_entries: AtomicU64, + _reserved_data_bytes: AtomicU64, + _reserved_waste_bytes: AtomicU64, + + pub(crate) uncommitted_entries_delta: AtomicI64, + _reserved_data_delta: AtomicI64, + _reserved_waste_delta: AtomicI64, + + _trailer: [u8; PAGE_SIZE - 1072], } const _: () = assert!(offset_of!(IndexFileHeader, global_split_level) == 64); -const _: () = assert!(offset_of!(IndexFileHeader, num_created) == 1024); -const _: () = assert!(offset_of!(IndexFileHeader, size_histogram) == 1088); +const _: () = assert!(offset_of!(IndexFileHeader, commit_file_ordinal) == 128); +const _: () = assert!(offset_of!(IndexFileHeader, commit_offset) == 136); +const _: () = assert!(offset_of!(IndexFileHeader, committed_num_entries) == 1024); const _: () = assert!(size_of::() == PAGE_SIZE); #[derive(Debug, Clone, Copy, PartialEq, Eq, FromBytes, IntoBytes, KnownLayout, Immutable)] @@ -120,8 +114,7 @@ impl EntryPointer { #[repr(C)] pub(crate) struct RowLayout { pub(crate) split_level: AtomicU64, - checksum: u64, - _padding: [u8; 48], + _padding: [u8; 56], pub(crate) signatures: [u32; ROW_WIDTH], pub(crate) pointers: [EntryPointer; ROW_WIDTH], } @@ -131,18 +124,6 @@ const _: () = assert!(offset_of!(RowLayout, signatures) % 8 == 0); const _: () = assert!(offset_of!(RowLayout, pointers) % 8 == 0); impl RowLayout { - fn expected_checksum(&self) -> u64 { - let mut checksum = self.split_level.load(Ordering::Relaxed); - for idx in 0..ROW_WIDTH { - checksum ^= self.signatures[idx] as u64 ^ self.pointers[idx].0; - } - checksum - } - - pub(crate) fn checksum_matches(&self) -> bool { - self.checksum == self.expected_checksum() - } - pub(crate) fn iter_matches(&self, hash_coord: HashCoord) -> RowMatchIterator<'_> { RowMatchIterator { row: self, @@ -160,27 +141,20 @@ impl RowLayout { pub(crate) fn insert(&mut self, idx: usize, sig: u32, ptr: EntryPointer) { debug_assert!(self.signatures[idx] == HashCoord::INVALID_SIG); self.signatures[idx] = sig; + crate::crash_point("insert_after_sig"); self.pointers[idx] = ptr; - self.checksum ^= sig as u64 ^ ptr.0; } pub(crate) fn remove(&mut self, idx: usize) { - let sig = self.signatures[idx]; - let ptr = self.pointers[idx]; - self.checksum ^= sig as u64 ^ ptr.0; self.signatures[idx] = HashCoord::INVALID_SIG; self.pointers[idx] = EntryPointer::INVALID_POINTER; } pub(crate) fn replace_pointer(&mut self, idx: usize, new_ptr: EntryPointer) { - let old_ptr = self.pointers[idx]; - self.checksum ^= old_ptr.0 ^ new_ptr.0; self.pointers[idx] = new_ptr; } pub(crate) fn set_split_level(&mut self, new_sl: u64) { - let old_sl = self.split_level.load(Ordering::Relaxed); - self.checksum ^= old_sl ^ new_sl; self.split_level.store(new_sl, Ordering::Release); } } @@ -220,6 +194,8 @@ impl Iterator for RowMatchIterator<'_> { #[repr(C)] pub(crate) struct IndexFileLayout { pub(crate) header: IndexFileHeader, + // note: we don't keep committed and uncommitted waste_levels for space efficiency and because + // they only need be approximate pub(crate) waste_levels: [AtomicU32; MAX_DATA_FILES as usize], } @@ -254,6 +230,18 @@ fn row_mut_bytes(bytes: &mut [u8], idx: usize) -> &mut RowLayout { .expect("row bytes should contain an aligned row") } +fn apply_signed_counter_delta(counter: &AtomicU64, delta: i64) { + if delta == 0 { + return; + } + + counter + .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |current| { + Some(current.saturating_add_signed(delta)) + }) + .unwrap(); +} + unsafe fn row_mut_ptr(base_ptr: *const u8, idx: usize) -> *mut RowLayout { unsafe { base_ptr.add(row_offset(idx)) as *mut RowLayout } } @@ -451,10 +439,10 @@ impl IndexFile { return Err(invalid_data_error("invalid index global split level")); } - let active_rows = 1usize + let uncommitted_rows = 1usize .checked_shl(gsl as u32) .ok_or_else(|| invalid_data_error("index global split level overflow"))?; - if active_rows > row_count { + if uncommitted_rows > row_count { return Err(invalid_data_error( "index global split level exceeds file size", )); @@ -467,11 +455,6 @@ impl IndexFile { self.header_mmap.flush().map_err(Error::IOError) } - pub(crate) fn flush_rows(&self) -> Result<()> { - self.rows_mmap.write().flush().map_err(Error::IOError)?; - self.rows_file.sync_all().map_err(Error::IOError) - } - pub(crate) fn open(base_path: &Path, config: Arc) -> Result { let hash_key = config.hash_key; let num_rows = (config.initial_capacity / ROW_WIDTH) @@ -551,29 +534,20 @@ impl IndexFile { if new_file { let rows_table = inst.rows_table_mut(); - inst.init_header_and_rows(rows_table, hash_key, false)?; + inst.init_header_and_rows(rows_table, hash_key)?; } Ok(inst) } - pub(crate) fn verify_row_checksums(&self) -> Result<()> { - let row_table = self.rows_table(); - let row_count = row_count_for_len(row_table.row_guard.len()); - for row_idx in 0..row_count { - if !row_table.row(row_idx).checksum_matches() { - return Err(invalid_data_error("index row checksum mismatch")); - } - } - Ok(()) - } - pub(crate) fn sync_all(&self) -> Result<()> { - self.header_mmap.flush().map_err(Error::IOError)?; + // Persist row updates before any header state that claims those rows are durable. self.rows_mmap.write().flush().map_err(Error::IOError)?; + self.rows_file.sync_all().map_err(Error::IOError)?; + self.header_mmap.flush().map_err(Error::IOError)?; #[cfg(windows)] self.header_file.sync_all().map_err(Error::IOError)?; - self.rows_file.sync_all().map_err(Error::IOError) + Ok(()) } pub(crate) fn file_size_bytes(&self) -> u64 { @@ -598,7 +572,7 @@ impl IndexFile { /// Returns a direct reference to the header without acquiring any lock. /// /// Safe because the header mmap is never remapped and the header fields - /// used for stats are all `AtomicU64`. + /// used for stats are all atomics. fn full_header_ref(&self) -> &IndexFileLayout { unsafe { &*(self.header_mmap.as_ptr() as *const IndexFileLayout) } } @@ -616,10 +590,29 @@ impl IndexFile { self.full_header_ref().waste_levels[file_idx as usize].load(Ordering::Relaxed) } + /// Returns the combined waste across all file slots. + pub(crate) fn total_waste(&self) -> u64 { + let ref_full = self.full_header_ref(); + let mut total = 0u64; + for waste in ref_full.waste_levels.iter() { + total += waste.load(Ordering::Relaxed) as u64; + } + total + } + + /// Takes the combined waste and resets it pub(crate) fn take_file_waste(&self, file_idx: u16) -> u32 { self.full_header_ref().waste_levels[file_idx as usize].swap(0, Ordering::Relaxed) } + pub(crate) fn rollover_uncommitted_counters(&self) { + let h = self.header_ref(); + apply_signed_counter_delta( + &h.committed_num_entries, + h.uncommitted_entries_delta.swap(0, Ordering::Relaxed), + ); + } + pub(crate) fn grow(&self, nsl: u64) -> Result> { let mut layout_mut = self.rows_table_mut(); let gsl = self.header_ref().global_split_level.load(Ordering::Acquire); @@ -802,7 +795,6 @@ impl IndexFile { &self, mut rows_table: RowsTableWriteGuard, hash_key: (u64, u64), - dirty: bool, ) -> Result<()> { // Zero both mmaps first, then populate. rows_table.row_guard.fill(0); @@ -822,10 +814,6 @@ impl IndexFile { layout.header.signature = *INDEX_FILE_SIGNATURE; layout.header.version = INDEX_FILE_VERSION; - layout - .header - .dirty - .store(if dirty { 1 } else { 0 }, Ordering::Release); layout.header.hash_key_0 = hash_key.0; layout.header.hash_key_1 = hash_key.1; layout @@ -884,6 +872,6 @@ impl IndexFile { Self::maybe_lock_mmap(self.config.as_ref(), &row_table.row_guard); - self.init_header_and_rows(row_table, self.config.hash_key, true) + self.init_header_and_rows(row_table, self.config.hash_key) } } diff --git a/src/internal.rs b/src/internal.rs index 4ea444d..83455f7 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -117,10 +117,10 @@ impl RangeMetadata { #[repr(u16)] pub(crate) enum EntryType { - Data = 0, - Tombstone = 1, + Insert = 0, + Update = 1, _Unused2 = 2, - _Unused3 = 3, + Tombstone = 3, } pub(crate) fn invalid_data_error(message: &'static str) -> Error { diff --git a/src/lib.rs b/src/lib.rs index 885bafb..38468ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,35 @@ mod pacer; mod store; mod types; +/// Named crash point for whitebox testing. +/// +/// When the `whitebox-testing` feature is enabled and the environment variable +/// `CANDYSTORE_CRASH_POINT` matches `name`, the process aborts after the number +/// of hits specified by `CANDYSTORE_CRASH_AFTER` (default 0 = immediate). +#[cfg(feature = "whitebox-testing")] +pub(crate) fn crash_point(name: &str) { + use std::sync::atomic::{AtomicU64, Ordering}; + static COUNTER: AtomicU64 = AtomicU64::new(0); + + let Ok(target) = std::env::var("CANDYSTORE_CRASH_POINT") else { + return; + }; + if target != name { + return; + } + let after: u64 = std::env::var("CANDYSTORE_CRASH_AFTER") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(0); + if COUNTER.fetch_add(1, Ordering::Relaxed) >= after { + std::process::abort(); + } +} + +#[cfg(not(feature = "whitebox-testing"))] +#[inline(always)] +pub(crate) fn crash_point(_name: &str) {} + /// The main untyped store API. pub use crate::store::{ CandyStore, CandyTypedDeque, CandyTypedKey, CandyTypedList, CandyTypedStore, KVPair, diff --git a/src/store.rs b/src/store.rs index 18ba71a..773a34c 100644 --- a/src/store.rs +++ b/src/store.rs @@ -23,9 +23,9 @@ use crate::{ data_file::DataFile, index_file::{EntryPointer, IndexFile, RowLayout, RowReadGuard, RowWriteGuard}, internal::{ - HashCoord, KeyNamespace, MAX_DATA_FILE_IDX, MAX_DATA_FILES, MIN_SPLIT_LEVEL, ROW_WIDTH, - aligned_data_entry_size, aligned_data_entry_waste, aligned_tombstone_entry_waste, - index_file_path, index_rows_file_path, sync_dir, + EntryType, HashCoord, KeyNamespace, MAX_DATA_FILE_IDX, MAX_DATA_FILES, MIN_SPLIT_LEVEL, + ROW_WIDTH, aligned_data_entry_size, aligned_data_entry_waste, + aligned_tombstone_entry_waste, index_file_path, index_rows_file_path, sync_dir, }, types::{ Config, Error, GetOrCreateStatus, INITIAL_DATA_FILE_ORDINAL, ReplaceStatus, Result, Stats, @@ -53,6 +53,11 @@ struct InnerStats { num_read_bytes: AtomicU64, num_write_ops: AtomicU64, num_write_bytes: AtomicU64, + num_created: AtomicU64, + num_removed: AtomicU64, + num_replaced: AtomicU64, + written_bytes: AtomicU64, + size_histogram: [AtomicU64; 6], } impl InnerStats { @@ -72,6 +77,13 @@ impl InnerStats { self.num_read_bytes.store(0, Ordering::Relaxed); self.num_write_ops.store(0, Ordering::Relaxed); self.num_write_bytes.store(0, Ordering::Relaxed); + self.num_created.store(0, Ordering::Relaxed); + self.num_removed.store(0, Ordering::Relaxed); + self.num_replaced.store(0, Ordering::Relaxed); + self.written_bytes.store(0, Ordering::Relaxed); + for bucket in &self.size_histogram { + bucket.store(0, Ordering::Relaxed); + } } } @@ -97,7 +109,6 @@ pub struct CandyStore { _lockfile: fslock::LockFile, compaction_thd: Mutex>>, allow_clean_shutdown: AtomicBool, - was_clean_shutdown: AtomicBool, } pub use list::{KVPair, ListIterator}; @@ -108,14 +119,6 @@ pub(super) struct OpenState { data_files: HashMap>, active_file_idx: u16, active_file_ordinal: u64, - was_clean_shutdown: bool, -} - -pub(super) enum DirtyOpenAction { - None, - RebuildIndex, - TrustIndex, - ResetDb, } impl StoreInner { @@ -301,7 +304,14 @@ impl StoreInner { // Buckets: [<64, <256, <1K, <4K, <16K, >=16K] // Boundaries at ilog2 = 6, 8, 10, 12, 14 → bucket = ((ilog2 - 4) / 2).clamp(0, 5) let bucket = ((entry_size.max(1).ilog2() as usize).saturating_sub(4) / 2).min(5); - self.index_file.header_ref().size_histogram[bucket].fetch_add(1, Ordering::Relaxed); + self.stats.size_histogram[bucket].fetch_add(1, Ordering::Relaxed); + } + + fn add_uncommitted_num_entries(&self, delta: i64) { + self.index_file + .header_ref() + .uncommitted_entries_delta + .fetch_add(delta, Ordering::Relaxed); } fn _split_row(&self, hc: HashCoord, sl: u64, gsl: u64) -> Result<()> { @@ -373,12 +383,8 @@ impl StoreInner { return Ok(()); } - let active_ordinal = if let Ok(active_file) = self.data_file(active_idx) { - let _ = active_file.file.sync_all(); - active_file.file_ordinal - } else { - 0 - }; + let active_file = self.data_file(active_idx)?; + let active_ordinal = active_file.file_ordinal; let mut next_idx = (self.active_file_idx.load(Ordering::Relaxed) + 1) & MAX_DATA_FILE_IDX; let mut attempts = 0; @@ -401,6 +407,22 @@ impl StoreInner { ordinal, )?); + active_file.seal_for_rotation(); + active_file.wait_inflight(); + let _ = active_file.file.sync_all(); + + self.index_file.rollover_uncommitted_counters(); + self.index_file + .header_ref() + .commit_file_ordinal + .store(active_file.file_ordinal, Ordering::Release); + self.index_file + .header_ref() + .commit_offset + .store(active_file.used_bytes(), Ordering::Release); + + let _ = self.index_file.sync_all(); + self.data_files.write().insert(next_idx, data_file); self.active_file_idx.store(next_idx, Ordering::Release); @@ -599,7 +621,8 @@ impl CandyStore { let active_file = files .get(&active_idx) .ok_or(Error::MissingDataFile(active_idx))?; - let (file_off, size) = active_file.append_kv(ns, key, val)?; + let (file_off, size, _inflight_guard) = + active_file.append_kv(EntryType::Insert, ns, key, val)?; self.inner.record_write(size as u64); row.insert( col, @@ -635,32 +658,37 @@ impl CandyStore { fn record_write_stats(&self, klen: usize, vlen: usize) { let entry_size = aligned_data_entry_size(klen, vlen); - let h = self.inner.index_file.header_ref(); - h.written_bytes.fetch_add(entry_size, Ordering::Relaxed); - h.num_created.fetch_add(1, Ordering::Relaxed); + self.inner.add_uncommitted_num_entries(1); + self.inner + .stats + .written_bytes + .fetch_add(entry_size, Ordering::Relaxed); + self.inner.stats.num_created.fetch_add(1, Ordering::Relaxed); self.inner.bump_histogram(entry_size); } fn record_replace_stats( &self, - old_klen: usize, - old_vlen: usize, + _old_klen: usize, + _old_vlen: usize, new_klen: usize, new_vlen: usize, ) { - let old_entry_size = aligned_data_entry_size(old_klen, old_vlen); let new_entry_size = aligned_data_entry_size(new_klen, new_vlen); - let h = self.inner.index_file.header_ref(); - h.written_bytes.fetch_add(new_entry_size, Ordering::Relaxed); - h.waste_bytes.fetch_add(old_entry_size, Ordering::Relaxed); - h.num_replaced.fetch_add(1, Ordering::Relaxed); + self.inner + .stats + .written_bytes + .fetch_add(new_entry_size, Ordering::Relaxed); + self.inner + .stats + .num_replaced + .fetch_add(1, Ordering::Relaxed); + self.inner.bump_histogram(new_entry_size); } - fn record_remove_stats(&self, klen: usize, vlen: usize) { - let entry_size = aligned_data_entry_size(klen, vlen); - let h = self.inner.index_file.header_ref(); - h.waste_bytes.fetch_add(entry_size, Ordering::Relaxed); - h.num_removed.fetch_add(1, Ordering::Relaxed); + fn record_remove_stats(&self, _klen: usize, _vlen: usize) { + self.inner.add_uncommitted_num_entries(-1); + self.inner.stats.num_removed.fetch_add(1, Ordering::Relaxed); } fn set_ns(&self, ns: KeyNamespace, key: &[u8], val: &[u8]) -> Result>> { @@ -687,8 +715,10 @@ impl CandyStore { let active_file = files .get(&active_idx) .ok_or(Error::MissingDataFile(active_idx))?; - let (file_off, size) = active_file.append_kv(ns, key, val)?; + let (file_off, size, _inflight_guard) = + active_file.append_kv(EntryType::Update, ns, key, val)?; self.inner.record_write(size as u64); + crate::crash_point("set_after_write_before_update"); row.replace_pointer( col, @@ -710,8 +740,10 @@ impl CandyStore { let active_file = files .get(&active_idx) .ok_or(Error::MissingDataFile(active_idx))?; - let (file_off, size) = active_file.append_kv(ns, key, val)?; + let (file_off, size, _inflight_guard) = + active_file.append_kv(EntryType::Insert, ns, key, val)?; self.inner.record_write(size as u64); + crate::crash_point("set_after_write_before_insert"); row.insert( col, hc.sig, @@ -771,7 +803,8 @@ impl CandyStore { let active_file = files .get(&active_idx) .ok_or(Error::MissingDataFile(active_idx))?; - let (file_off, size) = active_file.append_kv(ns, key, val)?; + let (file_off, size, _inflight_guard) = + active_file.append_kv(EntryType::Update, ns, key, val)?; self.inner.record_write(size as u64); row.replace_pointer( col, @@ -846,7 +879,8 @@ impl CandyStore { let active_file = files .get(&active_idx) .ok_or(Error::MissingDataFile(active_idx))?; - let tombstone_size = active_file.append_tombstone(ns, key)?; + let (tombstone_size, _inflight_guard) = + active_file.append_tombstone(ns, key)?; self.inner.record_write(tombstone_size as u64); row.remove(col); @@ -960,11 +994,6 @@ impl CandyStore { sync_dir(&self.inner.base_path) } - /// Returns whether the store was opened from a clean shutdown state. - pub fn was_clean_shutdown(&self) -> bool { - self.was_clean_shutdown.load(Ordering::Relaxed) - } - /// Returns the number of background compaction errors observed since open. pub fn compaction_errors(&self) -> u64 { self.inner.stats.compaction_errors.load(Ordering::Relaxed) @@ -1017,10 +1046,24 @@ impl CandyStore { let h = self.inner.index_file.header_ref(); let num_rows = self.inner.index_file.num_rows() as u64; let capacity = num_rows.saturating_mul(ROW_WIDTH as u64); + let num_items = h - .num_created + .committed_num_entries .load(Ordering::Relaxed) - .saturating_sub(h.num_removed.load(Ordering::Relaxed)); + .saturating_add_signed(h.uncommitted_entries_delta.load(Ordering::Relaxed)); + + // Derive data_bytes and waste_bytes from file sizes and per-file + // waste levels rather than maintaining them as persistent counters. + let total_used: u64 = self + .inner + .data_files + .read() + .values() + .map(|df| df.used_bytes()) + .sum(); + let waste = self.inner.index_file.total_waste(); + let data_bytes = total_used.saturating_sub(waste); + Stats { num_rows, capacity, @@ -1063,18 +1106,18 @@ impl CandyStore { num_read_bytes: self.inner.stats.num_read_bytes.load(Ordering::Relaxed), num_write_ops: self.inner.stats.num_write_ops.load(Ordering::Relaxed), num_write_bytes: self.inner.stats.num_write_bytes.load(Ordering::Relaxed), - num_created: h.num_created.load(Ordering::Relaxed), - num_removed: h.num_removed.load(Ordering::Relaxed), - num_replaced: h.num_replaced.load(Ordering::Relaxed), - written_bytes: h.written_bytes.load(Ordering::Relaxed), - waste_bytes: h.waste_bytes.load(Ordering::Relaxed), - reclaimed_bytes: h.reclaimed_bytes.load(Ordering::Relaxed), - entries_under_64: h.size_histogram[0].load(Ordering::Relaxed), - entries_under_256: h.size_histogram[1].load(Ordering::Relaxed), - entries_under_1024: h.size_histogram[2].load(Ordering::Relaxed), - entries_under_4096: h.size_histogram[3].load(Ordering::Relaxed), - entries_under_16384: h.size_histogram[4].load(Ordering::Relaxed), - entries_over_16384: h.size_histogram[5].load(Ordering::Relaxed), + num_created: self.inner.stats.num_created.load(Ordering::Relaxed), + num_removed: self.inner.stats.num_removed.load(Ordering::Relaxed), + num_replaced: self.inner.stats.num_replaced.load(Ordering::Relaxed), + written_bytes: self.inner.stats.written_bytes.load(Ordering::Relaxed), + data_bytes, + waste_bytes: waste, + entries_under_64: self.inner.stats.size_histogram[0].load(Ordering::Relaxed), + entries_under_256: self.inner.stats.size_histogram[1].load(Ordering::Relaxed), + entries_under_1024: self.inner.stats.size_histogram[2].load(Ordering::Relaxed), + entries_under_4096: self.inner.stats.size_histogram[3].load(Ordering::Relaxed), + entries_under_16384: self.inner.stats.size_histogram[4].load(Ordering::Relaxed), + entries_over_16384: self.inner.stats.size_histogram[5].load(Ordering::Relaxed), } } diff --git a/src/store/compaction.rs b/src/store/compaction.rs index 2cf84fa..260d4d8 100644 --- a/src/store/compaction.rs +++ b/src/store/compaction.rs @@ -129,14 +129,20 @@ impl StoreInner { } let active_idx = self.active_file_idx.load(Ordering::Acquire); - let files = self.data_files.read(); - let active_file = files + let active_file = self + .data_files + .read() .get(&active_idx) + .cloned() .ok_or(Error::MissingDataFile(active_idx))?; - match active_file.append_kv(ns, kv.key(), kv.value()) { - Ok((file_off, size)) => { - drop(files); + match active_file.append_kv( + crate::internal::EntryType::Update, + ns, + kv.key(), + kv.value(), + ) { + Ok((file_off, size, _inflight_guard)) => { self.record_write(size as u64); moved_bytes = moved_bytes.saturating_add(size as u64); row.replace_pointer( @@ -151,12 +157,10 @@ impl StoreInner { break; } Err(Error::RotateDataFile(rotate_idx)) => { - drop(files); drop(row); rotate_idx_req = Some(rotate_idx); } Err(err) => { - drop(files); return Err(err); } } @@ -180,6 +184,18 @@ impl StoreInner { let compacted_files = removed.len() as u64; drop(sources); + // Durability barrier: ensure all moved entries are durable in the active + // file and the updated index pointers are persisted before we delete the + // source files. Without this, a crash after deletion could leave the + // persisted index pointing at files that no longer exist. + if !removed.is_empty() { + let active_idx = self.active_file_idx.load(Ordering::Acquire); + if let Some(active_file) = self.data_files.read().get(&active_idx).cloned() { + let _ = active_file.file.sync_all(); + } + let _ = self.index_file.sync_all(); + } + let mut reclaimed_bytes = 0u64; for (file_idx, data_file) in removed { drop(data_file); @@ -197,10 +213,7 @@ impl StoreInner { } } - self.index_file - .header_ref() - .reclaimed_bytes - .fetch_add(reclaimed_bytes, Ordering::Relaxed); + let _ = self.index_file.flush_header(); Ok(CompactionOutcome { compacted_files, @@ -342,18 +355,24 @@ impl Drop for CandyStore { if !data_files_synced { return; } - self.inner - .index_file - .header_ref() - .dirty - .store(0, Ordering::Release); - if self.inner.index_file.flush_header().is_err() { + + // Advance the commit cursor so the next open can skip replay entirely. + let active_idx = self.inner.active_file_idx.load(Ordering::Relaxed); + if let Some(active_file) = self.inner.data_files.read().get(&active_idx).cloned() { + self.inner.index_file.rollover_uncommitted_counters(); self.inner .index_file .header_ref() - .dirty - .store(1, Ordering::Release); + .commit_file_ordinal + .store(active_file.file_ordinal, Ordering::Release); + self.inner + .index_file + .header_ref() + .commit_offset + .store(active_file.used_bytes(), Ordering::Release); } + + let _ = self.inner.index_file.sync_all(); } } diff --git a/src/store/open.rs b/src/store/open.rs index 9028156..e22acd5 100644 --- a/src/store/open.rs +++ b/src/store/open.rs @@ -1,7 +1,7 @@ use std::{ collections::{HashMap, HashSet}, path::Path, - sync::{Arc, atomic::Ordering}, + sync::Arc, }; use crate::{ @@ -10,12 +10,28 @@ use crate::{ internal::{ MAX_REPRESENTABLE_FILE_SIZE, is_resettable_open_error, parse_data_file_idx, sync_dir, }, - types::{Config, Error, INITIAL_DATA_FILE_ORDINAL, RebuildStrategy, Result}, + types::{Config, Error, INITIAL_DATA_FILE_ORDINAL, Result}, }; -use super::{CandyStore, DirtyOpenAction, OpenState, StoreInner}; +use super::{CandyStore, OpenState, StoreInner}; impl CandyStore { + fn build_store( + base_path: std::path::PathBuf, + config: Arc, + lockfile: fslock::LockFile, + ) -> Result { + let state = Self::open_or_reset_state(&base_path, config.clone())?; + let num_logical_locks = config.max_concurrency.max(8).next_power_of_two(); + + Ok(Self { + inner: Arc::new(StoreInner::new(base_path, config, state, num_logical_locks)), + _lockfile: lockfile, + compaction_thd: parking_lot::Mutex::new(None), + allow_clean_shutdown: std::sync::atomic::AtomicBool::new(true), + }) + } + fn clear_db_files(base_path: &Path) -> Result<()> { let mut removed_any = false; for entry in std::fs::read_dir(base_path).map_err(Error::IOError)? { @@ -76,20 +92,11 @@ impl CandyStore { data_files.insert(active_file_idx, data_file); } - let was_clean_shutdown = { - let header = index_file.header_ref(); - let was_clean = header.dirty.load(Ordering::Acquire) == 0; - header.dirty.store(1, Ordering::Release); - index_file.flush_header()?; - was_clean - }; - Ok(OpenState { index_file, data_files, active_file_idx, active_file_ordinal, - was_clean_shutdown, }) } @@ -118,9 +125,7 @@ impl CandyStore { Ok(state) => Ok(state), Err(err) if config.reset_on_invalid_data && is_resettable_open_error(&err) => { Self::clear_db_files(base_path)?; - let mut state = Self::open_state(base_path, config)?; - state.was_clean_shutdown = false; - Ok(state) + Self::open_state(base_path, config) } Err(err) => Err(err), } @@ -147,65 +152,12 @@ impl CandyStore { Ok(Arc::new(normalized)) } - fn resolve_dirty_open( - base_path: &Path, - config: Arc, - state: OpenState, - ) -> Result<(OpenState, DirtyOpenAction)> { - if state.was_clean_shutdown { - return Ok((state, DirtyOpenAction::None)); - } - - let action = match config.rebuild_strategy { - RebuildStrategy::FailIfDirty => return Err(Error::DirtyIndex), - RebuildStrategy::RebuildIfDirty => DirtyOpenAction::RebuildIndex, - RebuildStrategy::ResetDBIfDirty => DirtyOpenAction::ResetDb, - RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrFail => { - state.index_file.verify_row_checksums()?; - DirtyOpenAction::TrustIndex - } - RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrRebuild => { - match state.index_file.verify_row_checksums() { - Ok(()) => DirtyOpenAction::TrustIndex, - Err(Error::IOError(io_err)) - if io_err.kind() == std::io::ErrorKind::InvalidData => - { - DirtyOpenAction::RebuildIndex - } - Err(err) => return Err(err), - } - } - RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrReset => { - match state.index_file.verify_row_checksums() { - Ok(()) => DirtyOpenAction::TrustIndex, - Err(Error::IOError(io_err)) - if io_err.kind() == std::io::ErrorKind::InvalidData => - { - DirtyOpenAction::ResetDb - } - Err(err) => return Err(err), - } - } - }; - - if matches!(action, DirtyOpenAction::ResetDb) { - drop(state); - Self::clear_db_files(base_path)?; - let mut reset_state = Self::open_state(base_path, config)?; - reset_state.was_clean_shutdown = false; - return Ok((reset_state, DirtyOpenAction::ResetDb)); - } - - Ok((state, action)) - } - /// Opens a store at `path`, creating it if needed. /// - /// If `config.reset_on_invalid_data` is enabled, or if - /// `config.rebuild_strategy` can reset the database during dirty recovery, - /// opening may remove all contents and recreate fresh store files. While - /// the store is open, the active `.lockfile` is preserved so the directory - /// remains locked against concurrent opens. + /// If `config.reset_on_invalid_data` is enabled, opening may remove all + /// contents and recreate fresh store files when the on-disk data is + /// corrupt. While the store is open, the active `.lockfile` is preserved + /// so the directory remains locked against concurrent opens. pub fn open(path: impl AsRef, config: Config) -> Result { let base_path = path.as_ref().to_path_buf(); std::fs::create_dir_all(&base_path).map_err(Error::IOError)?; @@ -213,57 +165,30 @@ impl CandyStore { let lockfile = Self::acquire_lockfile(&base_path)?; let config = Self::normalize_config_for_path(&base_path, config)?; - let state = Self::open_or_reset_state(&base_path, config.clone())?; - let (state, dirty_open_action) = - Self::resolve_dirty_open(&base_path, config.clone(), state)?; - let was_clean_shutdown = state.was_clean_shutdown; - let num_logical_locks = config.max_concurrency.max(8).next_power_of_two(); - - let store = Self { - inner: Arc::new(StoreInner::new( - base_path, - config.clone(), - state, - num_logical_locks, - )), - _lockfile: lockfile, - compaction_thd: parking_lot::Mutex::new(None), - allow_clean_shutdown: std::sync::atomic::AtomicBool::new(was_clean_shutdown), - was_clean_shutdown: std::sync::atomic::AtomicBool::new(was_clean_shutdown), - }; - - if !was_clean_shutdown { - let header = store.inner.index_file.header_ref(); - let has_pending_rebuild = header - .rebuild_checkpoint_ordinal - .load(std::sync::atomic::Ordering::Acquire) - != 0 - || header - .rebuild_checkpoint_ptr - .load(std::sync::atomic::Ordering::Acquire) - != 0 - || header - .rebuild_checkpoint_checksum - .load(std::sync::atomic::Ordering::Acquire) - != 0; - - match dirty_open_action { - DirtyOpenAction::None | DirtyOpenAction::ResetDb => {} - // A pending checkpoint means rebuild was interrupted — resume it - // regardless of whether the strategy would trust the index. - DirtyOpenAction::TrustIndex if has_pending_rebuild => { - store.recover_index()?; - } - DirtyOpenAction::TrustIndex => {} - DirtyOpenAction::RebuildIndex => store.recover_index()?, + let store = Self::build_store(base_path.clone(), config.clone(), lockfile)?; + match store.recover_index() { + Ok(()) => { + store.start_compaction(); + Ok(store) } - store - .allow_clean_shutdown - .store(true, std::sync::atomic::Ordering::Relaxed); + Err(err) if config.reset_on_invalid_data && is_resettable_open_error(&err) => { + let store = std::mem::ManuallyDrop::new(store); + let inner = unsafe { std::ptr::read(&store.inner) }; + let lockfile = unsafe { std::ptr::read(&store._lockfile) }; + let compaction_thd = unsafe { std::ptr::read(&store.compaction_thd) }; + let _allow_clean_shutdown = unsafe { std::ptr::read(&store.allow_clean_shutdown) }; + drop(compaction_thd); + drop(inner); + + Self::clear_db_files(&base_path)?; + + let recovered = Self::build_store(base_path, config, lockfile)?; + recovered.recover_index()?; + recovered.start_compaction(); + Ok(recovered) + } + Err(err) => Err(err), } - - store.start_compaction(); - Ok(store) } /// Clears the store and recreates a fresh empty database in the same @@ -282,8 +207,6 @@ impl CandyStore { self.allow_clean_shutdown .store(true, std::sync::atomic::Ordering::Relaxed); - self.was_clean_shutdown - .store(true, std::sync::atomic::Ordering::Relaxed); self.start_compaction(); Ok(()) diff --git a/src/store/recovery.rs b/src/store/recovery.rs index 1248d3c..517fccd 100644 --- a/src/store/recovery.rs +++ b/src/store/recovery.rs @@ -1,175 +1,243 @@ -use std::sync::{Arc, atomic::AtomicU64, atomic::Ordering}; +use std::sync::{Arc, atomic::Ordering}; use crate::{ + crash_point, data_file::DataFile, index_file::EntryPointer, internal::{ - EntryType, FILE_OFFSET_ALIGNMENT, HashCoord, KVRef, KeyNamespace, aligned_data_entry_size, - aligned_data_entry_waste, aligned_tombstone_entry_waste, invalid_data_error, + EntryType, FILE_OFFSET_ALIGNMENT, HashCoord, KVRef, KeyNamespace, ROW_WIDTH, + aligned_data_entry_size, aligned_tombstone_entry_waste, invalid_data_error, }, types::{Error, MAX_USER_KEY_SIZE, MAX_USER_VALUE_SIZE, Result}, }; use super::CandyStore; -static RECOVERED_ENTRIES_FOR_ABORT_TEST: AtomicU64 = AtomicU64::new(0); +enum RebuildMode { + TailFrom(u64), + FullActiveFile, +} impl CandyStore { - /// How many entries to process before flushing a mid-file checkpoint. - const REBUILD_CHECKPOINT_INTERVAL: u64 = 1000; + /// How many bytes of replayed data between progressive checkpoints. + const REBUILD_CHECKPOINT_INTERVAL_BYTES: u64 = 256 * 1024; - fn rebuild_checkpoint_checksum(ordinal: u64, ptr: u64) -> u64 { - ordinal.rotate_left(17) ^ ptr.rotate_right(11) ^ 0x5a17_b1d2_c3e4_f607 - } + pub(super) fn recover_index(&self) -> Result<()> { + let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); + let active_file = self + .inner + .data_files + .read() + .get(&active_idx) + .cloned() + .ok_or(Error::MissingDataFile(active_idx))?; + + let commit_file_ordinal = self + .inner + .index_file + .header_ref() + .commit_file_ordinal + .load(Ordering::Acquire); + let commit_offset = self + .inner + .index_file + .header_ref() + .commit_offset + .load(Ordering::Acquire); + + let rebuild_mode = if active_file.file_ordinal == commit_file_ordinal { + self.validated_commit_offset(&active_file, commit_offset)? + } else { + RebuildMode::TailFrom(0) + }; - fn encode_rebuild_checkpoint_ptr(file_idx: u16, file_offset: u64) -> u64 { - let fi = (file_idx as u64) & ((1 << 12) - 1); - let fo = (file_offset / FILE_OFFSET_ALIGNMENT) << 12; - fi | fo - } + let start_offset = match rebuild_mode { + RebuildMode::TailFrom(offset) => offset, + RebuildMode::FullActiveFile => 0, + }; - fn decode_rebuild_checkpoint_ptr(ptr: u64) -> (u16, u64) { - let file_idx = (ptr & ((1 << 12) - 1)) as u16; - let file_offset = (ptr >> 12) * FILE_OFFSET_ALIGNMENT; - (file_idx, file_offset) - } + // The committed cursor marks the active-file prefix already reflected + // in the index. We only need to rebuild the uncommitted entries delta, + // since data_bytes and waste_bytes are derived from file sizes and + // per-file waste levels at query time. + // + // The entries delta can be recomputed exactly from the on-disk entry + // types: InsertData → +1, UpdateData → 0, Tombstone → −1. + self.inner + .index_file + .header_ref() + .uncommitted_entries_delta + .store(0, Ordering::Relaxed); + + // Pre-purge any index entries that point past the file's durable + // extent. This handles the case where the data file was truncated + // (e.g. disk-full or corruption) and ensures the replay loop won't + // encounter stale pointers when comparing existing entries. + let pre_purge_extent = active_file + .used_bytes() + .next_multiple_of(FILE_OFFSET_ALIGNMENT); + self.purge_uncommitted_file_entries(active_idx, pre_purge_extent)?; + + if matches!(rebuild_mode, RebuildMode::FullActiveFile) { + // The saved active-file cursor is no longer trustworthy. Remove + // every active-file pointer from the index, then rebuild that + // file's contribution from offset 0 on top of the older files. + self.purge_uncommitted_file_entries(active_idx, 0)?; + self.inner + .index_file + .header_ref() + .committed_num_entries + .store(self.count_live_index_entries(), Ordering::Relaxed); + } - fn read_rebuild_checkpoint(&self) -> Option<(u64, u16, u64)> { - let header = self.inner.index_file.header_ref(); - let ordinal = header.rebuild_checkpoint_ordinal.load(Ordering::Acquire); - let ptr = header.rebuild_checkpoint_ptr.load(Ordering::Acquire); - let checksum = header.rebuild_checkpoint_checksum.load(Ordering::Acquire); + let mut offset = start_offset; + let mut read_buf = Vec::new(); + let mut buf_file_offset = 0u64; + let mut match_scratch = Vec::new(); + let mut bytes_since_checkpoint = 0u64; + loop { + let Some((kv, entry_offset, next_offset)) = + active_file.read_next_entry_ref(offset, &mut read_buf, &mut buf_file_offset)? + else { + break; + }; + let entry_bytes = next_offset - offset; + offset = next_offset; - if ordinal == 0 && ptr == 0 && checksum == 0 { - return None; - } + let Some(ns) = KeyNamespace::from_u8(kv.ns) else { + return Err(invalid_data_error("unknown key namespace in data file")); + }; + + // Count the entry's contribution to the entries delta based on its + // on-disk type. This is unconditional — it doesn't matter whether + // the index pointer was already applied or not. + match kv.entry_type { + EntryType::Insert => self.inner.add_uncommitted_num_entries(1), + EntryType::Tombstone => self.inner.add_uncommitted_num_entries(-1), + _ => {} // UpdateData and any future types don't change num_entries + } + + // Fix up the index pointers (no stats accounting). + self.recover_entry(&active_file, ns, kv, entry_offset, &mut match_scratch)?; + crash_point("rebuild_entry"); - if checksum != Self::rebuild_checkpoint_checksum(ordinal, ptr) { - return None; + bytes_since_checkpoint += entry_bytes; + if bytes_since_checkpoint >= Self::REBUILD_CHECKPOINT_INTERVAL_BYTES { + self.flush_rebuild_checkpoint(active_file.file_ordinal, offset)?; + bytes_since_checkpoint = 0; + } } - let (file_idx, file_offset) = Self::decode_rebuild_checkpoint_ptr(ptr); - Some((ordinal, file_idx, file_offset)) + let durable_extent = offset.next_multiple_of(FILE_OFFSET_ALIGNMENT); + + // Purge any phantom index entries that reference the active file + // beyond this point (OS flushed the index page but not the data). + self.purge_uncommitted_file_entries(active_idx, durable_extent)?; + + // Advance the persisted replay cursor to the end of what recovery + // verified and applied to the index. + self.flush_rebuild_checkpoint(active_file.file_ordinal, durable_extent)?; + + Ok(()) } - fn maybe_abort_rebuild_for_testing(&self) { - let Ok(after) = std::env::var("CANDYSTORE_ABORT_REBUILD_AFTER") else { - return; - }; - let Ok(after) = after.parse::() else { - return; - }; - if after == 0 { - return; + fn validated_commit_offset( + &self, + active_file: &Arc, + checkpoint_offset: u64, + ) -> Result { + if checkpoint_offset == 0 { + return Ok(RebuildMode::TailFrom(0)); } - let recovered = RECOVERED_ENTRIES_FOR_ABORT_TEST.fetch_add(1, Ordering::Relaxed) + 1; - if recovered >= after { - std::process::abort(); + let used_bytes = active_file.used_bytes(); + if checkpoint_offset > used_bytes { + return Ok(RebuildMode::FullActiveFile); + } + if checkpoint_offset == used_bytes { + return Ok(RebuildMode::TailFrom(checkpoint_offset)); } - } - pub(super) fn recover_index(&self) -> Result<()> { - let mut sorted_files: Vec> = - self.inner.data_files.read().values().cloned().collect(); - sorted_files.sort_by_key(|df| df.file_ordinal); - - let checkpoint = - self.read_rebuild_checkpoint() - .and_then(|(ordinal, file_idx, file_offset)| { - sorted_files - .iter() - .find(|df| df.file_idx == file_idx && df.file_ordinal == ordinal) - .map(|_| (ordinal, file_idx, file_offset)) - }); - - if checkpoint.is_none() { - // No previous progress, checkpoint corruption, or the checkpointed - // file no longer matches the persisted stable identity — full rebuild. - let row_table = self.inner.index_file.rows_table_mut(); - self.inner.index_file.reset(row_table)?; + let mut probe_buf = Vec::new(); + let mut probe_file_offset = 0u64; + match active_file.read_next_entry_ref( + checkpoint_offset, + &mut probe_buf, + &mut probe_file_offset, + )? { + Some((_, entry_offset, _)) if entry_offset == checkpoint_offset => { + Ok(RebuildMode::TailFrom(checkpoint_offset)) + } + _ => Ok(RebuildMode::FullActiveFile), } + } - for data_file in &sorted_files { - let start_offset = if checkpoint.is_some_and(|(ordinal, file_idx, _)| { - data_file.file_ordinal == ordinal && data_file.file_idx == file_idx - }) { - checkpoint.unwrap().2 - } else if checkpoint.is_some_and(|(ordinal, _, _)| data_file.file_ordinal < ordinal) { - continue; - } else { - 0 - }; + fn flush_rebuild_checkpoint(&self, ordinal: u64, offset: u64) -> Result<()> { + let resume_offset = offset.next_multiple_of(FILE_OFFSET_ALIGNMENT); - let mut offset = start_offset; - let mut read_buf = Vec::new(); - let mut buf_file_offset = 0u64; - let mut match_scratch = Vec::new(); - let mut entries_since_checkpoint = 0u64; - loop { - let Some((kv, entry_offset, next_offset)) = - data_file.read_next_entry_ref(offset, &mut read_buf, &mut buf_file_offset)? - else { - break; - }; - offset = next_offset; - - let Some(ns) = KeyNamespace::from_u8(kv.ns) else { - return Err(invalid_data_error("unknown key namespace in data file")); - }; - - self.recover_entry(data_file, ns, kv, entry_offset, &mut match_scratch)?; - self.maybe_abort_rebuild_for_testing(); - - entries_since_checkpoint += 1; - if entries_since_checkpoint >= Self::REBUILD_CHECKPOINT_INTERVAL { - self.flush_rebuild_checkpoint( - data_file.file_ordinal, - data_file.file_idx, - offset, - )?; - entries_since_checkpoint = 0; - } - } + // Persist the same prefix in both index rows and committed counters + // before advancing the replay cursor. The cursor itself must hit disk + // after the rows it covers. + self.inner.index_file.rollover_uncommitted_counters(); - // File fully processed — keep progress at this file's final offset. - self.flush_rebuild_checkpoint(data_file.file_ordinal, data_file.file_idx, offset)?; - } + self.inner + .index_file + .header_ref() + .commit_file_ordinal + .store(ordinal, Ordering::Release); + self.inner + .index_file + .header_ref() + .commit_offset + .store(resume_offset, Ordering::Release); + + self.inner.index_file.sync_all() + } - // Rebuild complete — clear checkpoint. - self.clear_rebuild_checkpoint()?; + /// Remove index entries pointing to the active file at or beyond `durable_extent`. + fn purge_uncommitted_file_entries(&self, file_idx: u16, min_offset: u64) -> Result<()> { + let row_table = self.inner.index_file.rows_table(); + let num_rows = self.inner.index_file.num_rows(); + for row_idx in 0..num_rows { + let mut row = row_table.row_mut(row_idx); + if row.split_level.load(Ordering::Acquire) == 0 { + continue; + } + for col in 0..ROW_WIDTH { + if row.signatures[col] == HashCoord::INVALID_SIG { + continue; + } + let ptr = row.pointers[col]; + if !ptr.is_valid() { + continue; + } + if ptr.file_idx() == file_idx && ptr.file_offset() >= min_offset { + row.remove(col); + } + } + } Ok(()) } - fn flush_rebuild_checkpoint(&self, ordinal: u64, file_idx: u16, offset: u64) -> Result<()> { - self.inner.index_file.flush_rows()?; + fn count_live_index_entries(&self) -> u64 { + let row_table = self.inner.index_file.rows_table(); + let num_rows = self.inner.index_file.num_rows(); + let mut total = 0u64; - let ptr = Self::encode_rebuild_checkpoint_ptr(file_idx, offset); - let checksum = Self::rebuild_checkpoint_checksum(ordinal, ptr); - let header = self.inner.index_file.header_ref(); - header - .rebuild_checkpoint_ordinal - .store(ordinal, Ordering::Release); - header.rebuild_checkpoint_ptr.store(ptr, Ordering::Release); - header - .rebuild_checkpoint_checksum - .store(checksum, Ordering::Release); - self.inner.index_file.flush_header() - } + for row_idx in 0..num_rows { + let row = row_table.row(row_idx); + if row.split_level.load(Ordering::Acquire) == 0 { + continue; + } + for col in 0..ROW_WIDTH { + if row.signatures[col] != HashCoord::INVALID_SIG && row.pointers[col].is_valid() { + total += 1; + } + } + } - fn clear_rebuild_checkpoint(&self) -> Result<()> { - self.inner.index_file.flush_rows()?; - - let header = self.inner.index_file.header_ref(); - header - .rebuild_checkpoint_ordinal - .store(0, Ordering::Release); - header.rebuild_checkpoint_ptr.store(0, Ordering::Release); - header - .rebuild_checkpoint_checksum - .store(0, Ordering::Release); - self.inner.index_file.flush_header() + total } fn recover_entry( @@ -181,7 +249,7 @@ impl CandyStore { match_scratch: &mut Vec, ) -> Result<()> { match kv.entry_type { - EntryType::Data => { + EntryType::Insert | EntryType::Update => { self.recover_data_entry(data_file, ns, kv, entry_offset, match_scratch) } EntryType::Tombstone => self.recover_tombstone_entry(data_file, ns, kv, match_scratch), @@ -189,6 +257,8 @@ impl CandyStore { } } + /// Fix index pointers for a data/update entry. No stats accounting — + /// the entries delta is handled by the caller based on the on-disk entry type. fn recover_data_entry( &self, data_file: &Arc, @@ -210,8 +280,6 @@ impl CandyStore { hc.masked_row_selector(), ); - let entry_size = aligned_data_entry_size(key.len(), val.len()); - self.inner._mut_op(ns, key, &[], |hc, mut row, key, _| { let files = self.inner.data_files.read(); for (col, entry) in row.iter_matches(hc) { @@ -222,33 +290,23 @@ impl CandyStore { file.read_kv_into(entry.file_offset(), entry.size_hint(), match_scratch)?; if existing_kv.key() == key { if entry == ptr { - // Resuming from a crash during rebuild may subject some already-processed - // entries to multiple iterations. If the row pointer is already identical, - // this entry was fully processed and we can skip it. + // Already points at this entry — nothing to fix. return Ok(()); } - - let old_size = - aligned_data_entry_size(existing_kv.key().len(), existing_kv.value().len()); - self.record_recovered_waste( - entry, - existing_kv.key().len(), - existing_kv.value().len(), - )?; + if entry.file_idx() == data_file.file_idx + && entry.file_offset() > ptr.file_offset() + { + // A newer active-file entry already exists — skip. + return Ok(()); + } + // Older pointer — replace with this newer one. row.replace_pointer(col, ptr); - let h = self.inner.index_file.header_ref(); - h.num_replaced.fetch_add(1, Ordering::Relaxed); - h.written_bytes.fetch_add(entry_size, Ordering::Relaxed); - h.waste_bytes.fetch_add(old_size, Ordering::Relaxed); return Ok(()); } } + // Key not in index — insert it. if let Some(col) = row.find_free_slot() { row.insert(col, hc.sig, ptr); - let h = self.inner.index_file.header_ref(); - h.num_created.fetch_add(1, Ordering::Relaxed); - h.written_bytes.fetch_add(entry_size, Ordering::Relaxed); - self.inner.bump_histogram(entry_size); Ok(()) } else { Err(Error::SplitRow(row.split_level.load(Ordering::Relaxed))) @@ -256,18 +314,16 @@ impl CandyStore { }) } + /// Fix index pointers for a tombstone entry. No stats accounting. fn recover_tombstone_entry( &self, - data_file: &Arc, + _data_file: &Arc, ns: KeyNamespace, kv: KVRef<'_>, match_scratch: &mut Vec, ) -> Result<()> { let key = kv.key(); self.validate_recovered_tombstone_entry(key)?; - self.inner - .index_file - .add_file_waste(data_file.file_idx, aligned_tombstone_entry_waste(key.len())); self.inner._mut_op(ns, key, &[], |hc, mut row, key, _| { let files = self.inner.data_files.read(); @@ -278,17 +334,7 @@ impl CandyStore { let existing_kv = file.read_kv_into(entry.file_offset(), entry.size_hint(), match_scratch)?; if existing_kv.key() == key { - let old_size = - aligned_data_entry_size(existing_kv.key().len(), existing_kv.value().len()); - self.record_recovered_waste( - entry, - existing_kv.key().len(), - existing_kv.value().len(), - )?; row.remove(col); - let h = self.inner.index_file.header_ref(); - h.num_removed.fetch_add(1, Ordering::Relaxed); - h.waste_bytes.fetch_add(old_size, Ordering::Relaxed); return Ok(()); } } @@ -296,14 +342,6 @@ impl CandyStore { }) } - fn record_recovered_waste(&self, entry: EntryPointer, klen: usize, vlen: usize) -> Result<()> { - let old_aligned_len = aligned_data_entry_waste(klen, vlen); - self.inner - .index_file - .add_file_waste(entry.file_idx(), old_aligned_len); - Ok(()) - } - fn validate_recovered_data_entry(&self, key: &[u8], val: &[u8]) -> Result<()> { let entry_size = aligned_data_entry_size(key.len(), val.len()) as usize; if key.len() > MAX_USER_KEY_SIZE diff --git a/src/types.rs b/src/types.rs index 64393a9..ccff569 100644 --- a/src/types.rs +++ b/src/types.rs @@ -11,27 +11,6 @@ pub(crate) const INITIAL_DATA_FILE_ORDINAL: u64 = 0x00bd_38a0_2a35_1cdf; use crate::internal::MIN_INITIAL_ROWS; use std::time::Duration; -#[derive(Debug, Clone, Copy)] -/// How opening a store should handle a dirty index. -pub enum RebuildStrategy { - /// Reject opening a store whose index is marked dirty. - FailIfDirty, - /// Rebuild the index from data files when the store is dirty. - RebuildIfDirty, - /// Reset the database when the store is dirty. - /// - /// This removes all directory contents before recreating the store state. - /// While the store is open, the active `.lockfile` is preserved so the - /// directory remains locked against concurrent opens. - ResetDBIfDirty, - /// Trust a dirty index only if row checksums still match; otherwise fail. - TrustDirtyIndexIfChecksumCorrectOrFail, - /// Trust a dirty index if row checksums match; otherwise rebuild. - TrustDirtyIndexIfChecksumCorrectOrRebuild, - /// Trust a dirty index if row checksums match; otherwise reset the database. - TrustDirtyIndexIfChecksumCorrectOrReset, -} - #[derive(Debug, Clone, Copy)] /// Runtime configuration for opening a store. pub struct Config { @@ -51,18 +30,12 @@ pub struct Config { pub max_data_file_size: u32, /// Minimum per-file waste threshold before background compaction considers it. pub compaction_min_threshold: u32, - /// Maximum logical concurrency used to size internal lock tables. + /// Maximum logical concurrency used to size internal lock tables, defaults to num_cpus*2 pub max_concurrency: usize, /// Reset the database if opening encounters invalid on-disk data. - /// - /// This removes all directory contents before recreating the store state. - /// While the store is open, the active `.lockfile` is preserved so the - /// directory remains locked against concurrent opens. pub reset_on_invalid_data: bool, /// Target background compaction throughput in bytes per second. pub compaction_throughput_bytes_per_sec: usize, - /// Dirty-index handling policy used during open. - pub rebuild_strategy: RebuildStrategy, } impl Default for Config { @@ -77,7 +50,6 @@ impl Default for Config { max_concurrency: (2 * num_cpus::get()).clamp(16, 64), reset_on_invalid_data: false, compaction_throughput_bytes_per_sec: 4 * 1024 * 1024, - rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrRebuild, } } } @@ -88,9 +60,6 @@ pub enum Error { #[error("IO error: {0}")] IOError(std::io::Error), - #[error("Index file is dirty")] - DirtyIndex, - #[error("Missing data file: {0}")] MissingDataFile(u16), @@ -252,29 +221,29 @@ pub struct Stats { pub num_write_ops: u64, /// Total bytes written to data files. pub num_write_bytes: u64, - /// Number of entry creations recorded. + /// Number of entry creations recorded since open. pub num_created: u64, - /// Number of entry removals recorded. + /// Number of entry removals recorded since open. pub num_removed: u64, - /// Number of entry replacements recorded. + /// Number of entry replacements recorded since open. pub num_replaced: u64, - /// Total logical bytes written as live entries. + /// Total logical entry bytes written since open. pub written_bytes: u64, - /// Total bytes currently accounted as waste before reclamation. + /// Total bytes currently occupied by live entries. + pub data_bytes: u64, + /// Total bytes currently accounted as unreclaimed waste. pub waste_bytes: u64, - /// Total bytes reclaimed by compaction. - pub reclaimed_bytes: u64, - /// Histogram bucket for entries under 64 bytes. + /// Approximate histogram bucket for entries under 64 bytes since open. pub entries_under_64: u64, - /// Histogram bucket for entries under 256 bytes. + /// Approximate histogram bucket for entries under 256 bytes since open. pub entries_under_256: u64, - /// Histogram bucket for entries under 1024 bytes. + /// Approximate histogram bucket for entries under 1024 bytes since open. pub entries_under_1024: u64, - /// Histogram bucket for entries under 4096 bytes. + /// Approximate histogram bucket for entries under 4096 bytes since open. pub entries_under_4096: u64, - /// Histogram bucket for entries under 16384 bytes. + /// Approximate histogram bucket for entries under 16384 bytes since open. pub entries_under_16384: u64, - /// Histogram bucket for entries of 16384 bytes or larger. + /// Approximate histogram bucket for entries of 16384 bytes or larger since open. pub entries_over_16384: u64, } @@ -294,12 +263,12 @@ impl Stats { /// Returns the current unreclaimed waste in bytes. pub fn current_waste(&self) -> u64 { - self.waste_bytes.saturating_sub(self.reclaimed_bytes) + self.waste_bytes } - /// Returns live data bytes after subtracting current waste. + /// Returns bytes currently occupied by live data. pub fn data_bytes(&self) -> u64 { - self.written_bytes.saturating_sub(self.current_waste()) + self.data_bytes } /// Returns bytes currently occupied by live data. diff --git a/tests/common/mod.rs b/tests/common/mod.rs index bb4b4db..1fa1d6f 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,7 +1,4 @@ -use std::io::{Read, Seek, SeekFrom, Write}; -use std::path::Path; - -use candystore::{Config, RebuildStrategy}; +use candystore::Config; #[allow(dead_code)] pub fn small_file_config() -> Config { @@ -10,28 +7,3 @@ pub fn small_file_config() -> Config { ..Config::default() } } - -#[allow(dead_code)] -pub fn rebuild_if_dirty_config() -> Config { - Config { - rebuild_strategy: RebuildStrategy::RebuildIfDirty, - ..Config::default() - } -} - -#[allow(dead_code)] -pub fn corrupt_first_row_checksum(path: &Path) { - let mut file = std::fs::OpenOptions::new() - .read(true) - .write(true) - .open(path.join("rows")) - .unwrap(); - let checksum_offset = 8; - file.seek(SeekFrom::Start(checksum_offset)).unwrap(); - let mut buf = [0u8; 8]; - file.read_exact(&mut buf).unwrap(); - let checksum = u64::from_le_bytes(buf) ^ 1; - file.seek(SeekFrom::Start(checksum_offset)).unwrap(); - file.write_all(&checksum.to_le_bytes()).unwrap(); - file.sync_all().unwrap(); -} diff --git a/tests/compaction.rs b/tests/compaction.rs index e7a3603..65cc792 100644 --- a/tests/compaction.rs +++ b/tests/compaction.rs @@ -267,12 +267,12 @@ fn test_compaction_updates_reclaimed_bytes() -> Result<(), Error> { "compaction should have run at least once" ); assert!( - stats.reclaimed_bytes > 0, - "reclaimed_bytes should be positive after compaction" + stats.last_compaction_reclaimed_bytes > 0, + "last_compaction_reclaimed_bytes should be positive after compaction" ); assert!( - stats.waste_bytes > 0, - "waste_bytes must be positive (total waste ever generated)" + stats.waste_bytes < 200_000, + "waste_bytes should reflect current unreclaimed waste, not a lifetime total" ); for i in 0..100 { @@ -344,8 +344,8 @@ fn test_concurrent_updates_with_compaction() -> Result<(), Error> { "compaction should have run during concurrent updates" ); assert!( - stats.reclaimed_bytes > 0, - "reclaimed_bytes should be positive after concurrent updates + compaction" + stats.last_compaction_reclaimed_bytes > 0, + "last_compaction_reclaimed_bytes should be positive after concurrent updates + compaction" ); Ok(()) diff --git a/tests/crasher.rs b/tests/crasher.rs index a051aeb..c65fbe6 100644 --- a/tests/crasher.rs +++ b/tests/crasher.rs @@ -4,7 +4,7 @@ use std::ptr::null_mut; use std::time::Duration; use std::{ops::Range, sync::atomic::AtomicU64, sync::atomic::Ordering::SeqCst}; -use candystore::{CandyStore, Config, RebuildStrategy, Result}; +use candystore::{CandyStore, Config, Result}; use rand::RngExt; #[cfg(debug_assertions)] @@ -24,8 +24,6 @@ fn get_config() -> Config { hash_key: (0xb047_a3ef_b334_9804, 0x807d_3135_878e_9b27), initial_capacity: 1024, max_concurrency: 64, - rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrFail, - //rebuild_strategy: RebuildStrategy::RebuildIfDirty, ..Default::default() } } diff --git a/tests/maintenance.rs b/tests/maintenance.rs index a843cfd..c30ad3c 100644 --- a/tests/maintenance.rs +++ b/tests/maintenance.rs @@ -81,7 +81,6 @@ fn test_explicit_close_releases_lock_and_persists_clean_shutdown() -> Result<(), drop(db); let reopened = CandyStore::open(dir.path(), config)?; - assert!(reopened.was_clean_shutdown()); assert_eq!(reopened.get("key")?, Some(b"value".to_vec())); Ok(()) diff --git a/tests/metrics.rs b/tests/metrics.rs index e0b28e1..f73f556 100644 --- a/tests/metrics.rs +++ b/tests/metrics.rs @@ -33,6 +33,8 @@ fn test_metrics_updates() -> Result<(), Box> { assert_eq!(stats.num_removed, 0); assert_eq!(stats.num_replaced, 0); assert_eq!(stats.written_bytes, 0); + assert_eq!(stats.data_bytes, 0); + assert_eq!(stats.waste_bytes, 0); db.set("key1", "val1")?; @@ -46,6 +48,8 @@ fn test_metrics_updates() -> Result<(), Box> { assert_eq!(stats.num_removed, 0); assert_eq!(stats.num_replaced, 0); assert!(stats.written_bytes > 0); + assert!(stats.data_bytes > 0); + assert_eq!(stats.waste_bytes, 0); assert_eq!(stats.num_write_ops, 1); assert!(stats.num_write_bytes > 0); assert!(stats.index_size_bytes > 0); @@ -60,6 +64,8 @@ fn test_metrics_updates() -> Result<(), Box> { assert_eq!(stats.num_created, 1); assert_eq!(stats.num_replaced, 1); assert_eq!(stats.num_removed, 0); + assert!(stats.data_bytes > 0); + assert!(stats.waste_bytes > 0); assert_eq!(stats.num_write_ops, 2); db.remove("key1")?; @@ -72,6 +78,7 @@ fn test_metrics_updates() -> Result<(), Box> { assert_eq!(stats.num_created, 1); assert_eq!(stats.num_replaced, 1); assert_eq!(stats.num_removed, 1); + assert_eq!(stats.data_bytes, 0); assert_eq!(db.get("missing")?, None); assert_eq!(db.get("key1")?, None); @@ -111,6 +118,7 @@ fn test_metrics_compaction() -> Result<(), Box> { let stats = db.stats(); assert!(stats.written_bytes > 0); assert!(stats.num_replaced > 0); + assert!(stats.data_bytes > 0); assert!(stats.num_items > 0); assert!(stats.capacity >= stats.num_items); assert!(stats.fill_level() > 0.0); diff --git a/tests/proptest_state_machine.rs b/tests/proptest_state_machine.rs index f97584e..64c8eaa 100644 --- a/tests/proptest_state_machine.rs +++ b/tests/proptest_state_machine.rs @@ -1,4 +1,4 @@ -use candystore::{CandyStore, Config, RebuildStrategy}; +use candystore::{CandyStore, Config}; use proptest::prelude::*; use std::collections::BTreeMap; use tempfile::TempDir; @@ -39,7 +39,6 @@ proptest! { // Small file size so we generate many data files, rotations, and splits within 200 operations let config = Config { max_data_file_size: 1024 * 4, // 4KB boundaries - rebuild_strategy: RebuildStrategy::RebuildIfDirty, ..Default::default() }; @@ -74,13 +73,11 @@ proptest! { // Close the current DB instance by dropping it, then reopen drop(db_opt.take().unwrap()); db_opt = Some(CandyStore::open(dir.path(), config).unwrap()); - assert!(db_opt.as_ref().unwrap().was_clean_shutdown()); } Op::SimulateCrash => { // Force a rebuild db_opt.take().unwrap()._abort_for_testing(); db_opt = Some(CandyStore::open(dir.path(), config).unwrap()); - assert!(!db_opt.as_ref().unwrap().was_clean_shutdown()); } } } diff --git a/tests/recovery.rs b/tests/recovery.rs index 94ce96e..76eb585 100644 --- a/tests/recovery.rs +++ b/tests/recovery.rs @@ -5,9 +5,7 @@ use std::fs; use std::io::{Read, Seek, SeekFrom, Write}; use std::sync::Arc; -use candystore::{ - CandyStore, CandyTypedDeque, CandyTypedList, CandyTypedStore, Config, Error, RebuildStrategy, -}; +use candystore::{CandyStore, CandyTypedDeque, CandyTypedList, CandyTypedStore, Config, Error}; use tempfile::tempdir; fn patterned_bytes_with_seed(len: usize, seed: usize) -> Vec { @@ -67,100 +65,64 @@ fn rewrite_data_file_ordinal( Ok(()) } -fn rebuild_checkpoint_checksum(ordinal: u64, ptr: u64) -> u64 { - ordinal.rotate_left(17) ^ ptr.rotate_right(11) ^ 0x5a17_b1d2_c3e4_f607 -} +fn active_file_ordinal(dir: &std::path::Path) -> Result { + let mut max_ordinal: Option = None; -fn encode_rebuild_checkpoint_ptr(file_idx: u16, file_offset: u64) -> u64 { - let fi = (file_idx as u64) & ((1 << 12) - 1); - let fo = (file_offset / 16) << 12; - fi | fo -} + for entry in std::fs::read_dir(dir).map_err(Error::IOError)? { + let entry = entry.map_err(Error::IOError)?; + let path = entry.path(); + let Some(name) = path.file_name().and_then(|name| name.to_str()) else { + continue; + }; + if !name.starts_with("data_") { + continue; + } -fn write_rebuild_checkpoint( - dir: &std::path::Path, - ordinal: u64, - file_idx: u16, - file_offset: u64, -) -> Result<(), Error> { - let ptr = encode_rebuild_checkpoint_ptr(file_idx, file_offset); - let checksum = rebuild_checkpoint_checksum(ordinal, ptr); + let mut file = std::fs::OpenOptions::new() + .read(true) + .open(&path) + .map_err(Error::IOError)?; + file.seek(SeekFrom::Start(16)).map_err(Error::IOError)?; + let mut buf = [0u8; 8]; + file.read_exact(&mut buf).map_err(Error::IOError)?; + let ordinal = u64::from_le_bytes(buf); + max_ordinal = Some(max_ordinal.map_or(ordinal, |current| current.max(ordinal))); + } + max_ordinal.ok_or_else(|| { + Error::IOError(std::io::Error::new( + std::io::ErrorKind::NotFound, + "no data files found", + )) + }) +} + +fn write_commit_cursor(dir: &std::path::Path, offset: u64) -> Result<(), Error> { let mut file = std::fs::OpenOptions::new() .read(true) .write(true) .open(dir.join("index")) .map_err(Error::IOError)?; - file.seek(SeekFrom::Start(40)).map_err(Error::IOError)?; - file.write_all(&ordinal.to_le_bytes()) - .map_err(Error::IOError)?; - file.write_all(&ptr.to_le_bytes()).map_err(Error::IOError)?; - file.write_all(&checksum.to_le_bytes()) - .map_err(Error::IOError)?; - file.sync_all().map_err(Error::IOError)?; - Ok(()) -} + let ordinal = active_file_ordinal(dir)?; -fn corrupt_rebuild_checkpoint_checksum(dir: &std::path::Path) -> Result<(), Error> { - let mut file = std::fs::OpenOptions::new() - .read(true) - .write(true) - .open(dir.join("index")) + file.seek(SeekFrom::Start(128)).map_err(Error::IOError)?; + file.write_all(&ordinal.to_le_bytes()) .map_err(Error::IOError)?; - file.seek(SeekFrom::Start(56)).map_err(Error::IOError)?; - file.write_all(&0xdead_beef_dead_beefu64.to_le_bytes()) + file.seek(SeekFrom::Start(136)).map_err(Error::IOError)?; + file.write_all(&offset.to_le_bytes()) .map_err(Error::IOError)?; file.sync_all().map_err(Error::IOError)?; Ok(()) } -#[test] -fn test_clean_shutdown_flag() -> Result<(), Error> { - let dir = tempdir().unwrap(); - - { - let db = CandyStore::open(dir.path(), Config::default())?; - assert!(db.was_clean_shutdown()); - db.set("hello", "world")?; - } - - { - let db = CandyStore::open(dir.path(), Config::default())?; - assert!(db.was_clean_shutdown()); - assert_eq!(db.get("hello")?, Some("world".into())); - } - - Ok(()) -} - -#[test] -fn test_dirty_shutdown_detected() -> Result<(), Error> { - let dir = tempdir().unwrap(); - let config = common::rebuild_if_dirty_config(); - - { - let db = CandyStore::open(dir.path(), config)?; - db.set("hello", "world")?; - db._abort_for_testing(); - } - - { - let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); - } - - Ok(()) -} - #[test] fn test_recovery_after_dirty_shutdown() -> Result<(), Error> { let dir = tempdir().unwrap(); - let config = common::rebuild_if_dirty_config(); { - let db = CandyStore::open(dir.path(), config)?; + let db = CandyStore::open(dir.path(), Config::default())?; db.set("key1", "val1")?; db.set("key2", "val2")?; db.set("key3", "val3")?; @@ -170,8 +132,7 @@ fn test_recovery_after_dirty_shutdown() -> Result<(), Error> { } { - let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); + let db = CandyStore::open(dir.path(), Config::default())?; assert_eq!(db.get("key1")?, Some(b"val1".to_vec())); assert_eq!(db.get("key2")?, Some(b"val2_updated".to_vec())); assert!(db.get("key3")?.is_none()); @@ -185,7 +146,6 @@ fn test_recovery_uses_persisted_hash_key_on_reopen() -> Result<(), Error> { let dir = tempdir().unwrap(); let original_config = Config { hash_key: (1, 2), - rebuild_strategy: RebuildStrategy::RebuildIfDirty, ..Config::default() }; let different_config = Config { @@ -202,7 +162,6 @@ fn test_recovery_uses_persisted_hash_key_on_reopen() -> Result<(), Error> { { let db = CandyStore::open(dir.path(), different_config)?; - assert!(!db.was_clean_shutdown()); assert_eq!(db.get("key1")?, Some(b"val1".to_vec())); assert_eq!(db.get("key2")?, Some(b"val2".to_vec())); db.set("key3", "val3")?; @@ -218,36 +177,10 @@ fn test_recovery_uses_persisted_hash_key_on_reopen() -> Result<(), Error> { Ok(()) } -#[test] -fn test_recovery_rebuilds_waste_stats() -> Result<(), Error> { - let dir = tempdir().unwrap(); - let config = common::rebuild_if_dirty_config(); - - { - let db = CandyStore::open(dir.path(), config)?; - db.set("key1", "val1")?; - db.set("key2", "val2")?; - db.set("key3", "val3")?; - db.set("key2", "val2_updated")?; - db.remove("key3")?; - db._abort_for_testing(); - } - - { - let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); - let stats = db.stats(); - assert_eq!(stats.waste_bytes, 64); - } - - Ok(()) -} - #[test] fn test_recovery_with_many_keys_and_splits() -> Result<(), Error> { let dir = tempdir().unwrap(); - let mut config = common::small_file_config(); - config.rebuild_strategy = RebuildStrategy::RebuildIfDirty; + let config = common::small_file_config(); { let db = CandyStore::open(dir.path(), config)?; @@ -265,7 +198,6 @@ fn test_recovery_with_many_keys_and_splits() -> Result<(), Error> { { let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); for i in 0..500 { let key = format!("k{i:04}"); @@ -296,7 +228,6 @@ fn test_rebuild_if_dirty_recovers_large_dataset_across_multiple_data_files() -> let config = Config { max_data_file_size: 64 * 1024 * 1024, compaction_throughput_bytes_per_sec: 1024, - rebuild_strategy: RebuildStrategy::RebuildIfDirty, ..Config::default() }; const TARGET_NUM_DATA_FILES: u64 = 5; @@ -338,7 +269,6 @@ fn test_rebuild_if_dirty_recovers_large_dataset_across_multiple_data_files() -> { let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); assert!( db.stats().num_data_files >= TARGET_NUM_DATA_FILES, "rebuild should preserve the multi-file dataset" @@ -368,7 +298,7 @@ fn test_rebuild_if_dirty_recovers_large_dataset_across_multiple_data_files() -> #[test] fn test_rebuild_if_dirty_recovers_with_corrupted_rows_checksum() -> Result<(), Error> { let dir = tempdir().unwrap(); - let config = common::rebuild_if_dirty_config(); + let config = Config::default(); { let db = CandyStore::open(dir.path(), config)?; @@ -379,18 +309,14 @@ fn test_rebuild_if_dirty_recovers_with_corrupted_rows_checksum() -> Result<(), E db._abort_for_testing(); } - common::corrupt_first_row_checksum(dir.path()); - { let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); assert!(db.get("key1")?.is_none()); assert_eq!(db.get("key2")?, Some(b"val2_updated".to_vec())); } { let db = CandyStore::open(dir.path(), config)?; - assert!(db.was_clean_shutdown()); assert!(db.get("key1")?.is_none()); assert_eq!(db.get("key2")?, Some(b"val2_updated".to_vec())); } @@ -401,7 +327,7 @@ fn test_rebuild_if_dirty_recovers_with_corrupted_rows_checksum() -> Result<(), E #[test] fn test_rebuild_if_dirty_rejects_unknown_data_entry_type() -> Result<(), Error> { let dir = tempdir().unwrap(); - let config = common::rebuild_if_dirty_config(); + let config = Config::default(); { let db = CandyStore::open(dir.path(), config)?; @@ -421,7 +347,7 @@ fn test_rebuild_if_dirty_rejects_unknown_data_entry_type() -> Result<(), Error> #[test] fn test_rebuild_if_dirty_rejects_unknown_data_namespace() -> Result<(), Error> { let dir = tempdir().unwrap(); - let config = common::rebuild_if_dirty_config(); + let config = Config::default(); { let db = CandyStore::open(dir.path(), config)?; @@ -472,7 +398,7 @@ fn test_open_rejects_duplicate_data_file_ordinals() -> Result<(), Error> { #[test] fn test_rebuild_if_dirty_recovers_lists() -> Result<(), Error> { let dir = tempdir().unwrap(); - let config = common::rebuild_if_dirty_config(); + let config = Config::default(); let list = b"rebuild-list"; { @@ -487,7 +413,6 @@ fn test_rebuild_if_dirty_recovers_lists() -> Result<(), Error> { { let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); assert_eq!(db.list_len(list)?, 2); assert_eq!(db.get_from_list(list, b"a")?, None); assert_eq!(db.get_from_list(list, b"b")?, Some(b"2b".to_vec())); @@ -509,7 +434,7 @@ fn test_rebuild_if_dirty_recovers_lists() -> Result<(), Error> { #[test] fn test_rebuild_if_dirty_recovers_queues() -> Result<(), Error> { let dir = tempdir().unwrap(); - let config = common::rebuild_if_dirty_config(); + let config = Config::default(); let queue = b"rebuild-queue"; let first_idx; @@ -533,7 +458,6 @@ fn test_rebuild_if_dirty_recovers_queues() -> Result<(), Error> { { let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); assert_eq!(db.queue_len(queue)?, 2); assert_eq!(db.peek_queue_head(queue)?, Some(b"tail-1".to_vec())); assert_eq!(db.peek_queue_tail(queue)?, Some(b"tail-2".to_vec())); @@ -555,7 +479,7 @@ fn test_rebuild_if_dirty_recovers_queues() -> Result<(), Error> { #[test] fn test_rebuild_if_dirty_recovers_typed_data() -> Result<(), Error> { let dir = tempdir().unwrap(); - let config = common::rebuild_if_dirty_config(); + let config = Config::default(); let list_key = 7u32; let queue_key = 9u32; @@ -592,8 +516,6 @@ fn test_rebuild_if_dirty_recovers_typed_data() -> Result<(), Error> { let typed_list = CandyTypedList::::new(Arc::clone(&store)); let typed_queue = CandyTypedDeque::::new(Arc::clone(&store)); - assert!(!store.was_clean_shutdown()); - assert_eq!(typed_kv.get(&1u32)?, Some("uno".to_string())); assert_eq!(typed_kv.get(&2u32)?, None); @@ -623,169 +545,39 @@ fn test_rebuild_if_dirty_recovers_typed_data() -> Result<(), Error> { } #[test] -fn test_fail_if_dirty_rejects_reopen() -> Result<(), Error> { - let dir = tempdir().unwrap(); - let fail_config = Config { - rebuild_strategy: RebuildStrategy::FailIfDirty, - ..Config::default() - }; - - { - let db = CandyStore::open(dir.path(), Config::default())?; - db.set("key", "value")?; - db._abort_for_testing(); - } - - assert!(matches!( - CandyStore::open(dir.path(), fail_config), - Err(Error::DirtyIndex) - )); - - Ok(()) -} - -#[test] -fn test_trust_dirty_index_if_checksum_correct_or_fail() -> Result<(), Error> { - let dir = tempdir().unwrap(); - let config = Config { - rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrFail, - ..Config::default() - }; - - { - let db = CandyStore::open(dir.path(), config)?; - db.set("key1", "val1")?; - db.set("key2", "val2")?; - db.set("key2", "val2_updated")?; - db._abort_for_testing(); - } - - { - let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); - assert_eq!(db.get("key1")?, Some(b"val1".to_vec())); - assert_eq!(db.get("key2")?, Some(b"val2_updated".to_vec())); - let waste_after_trust = db.stats().waste_bytes; - drop(db); - - let waste_after_clean = CandyStore::open(dir.path(), config)?.stats().waste_bytes; - assert_eq!(waste_after_clean, waste_after_trust); - } - - Ok(()) -} - -#[test] -fn test_trust_dirty_index_fails_on_checksum_mismatch() -> Result<(), Error> { - let dir = tempdir().unwrap(); - let config = Config { - rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrFail, - ..Config::default() - }; - - { - let db = CandyStore::open(dir.path(), config)?; - db.set("key", "value")?; - db._abort_for_testing(); - } - - common::corrupt_first_row_checksum(dir.path()); - - assert!(matches!( - CandyStore::open(dir.path(), config), - Err(Error::IOError(io_err)) if io_err.kind() == std::io::ErrorKind::InvalidData - )); - - Ok(()) -} - -#[test] -fn test_trust_dirty_index_rebuilds_on_checksum_mismatch() -> Result<(), Error> { - let dir = tempdir().unwrap(); - let config = Config { - rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrRebuild, - ..Config::default() - }; - - { - let db = CandyStore::open(dir.path(), config)?; - db.set("key1", "val1")?; - db.set("key2", "val2")?; - db.set("key2", "val2_updated")?; - db._abort_for_testing(); - } - - common::corrupt_first_row_checksum(dir.path()); - - let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); - assert_eq!(db.get("key1")?, Some(b"val1".to_vec())); - assert_eq!(db.get("key2")?, Some(b"val2_updated".to_vec())); - - Ok(()) -} - -#[test] -fn test_trust_dirty_index_resets_on_checksum_mismatch() -> Result<(), Error> { +fn test_reset_on_invalid_data_clears_corrupt_store() -> Result<(), Error> { let dir = tempdir().unwrap(); let config = Config { - rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrReset, + reset_on_invalid_data: true, ..Config::default() }; { let db = CandyStore::open(dir.path(), config)?; db.set("key", "value")?; - db._abort_for_testing(); } + fs::write(dir.path().join("index"), b"bad").map_err(Error::IOError)?; + fs::write(dir.path().join("rows"), b"bad").map_err(Error::IOError)?; fs::write(dir.path().join("extra.txt"), b"junk").map_err(Error::IOError)?; fs::create_dir(dir.path().join("extra_dir")).map_err(Error::IOError)?; fs::write(dir.path().join("extra_dir").join("nested.txt"), b"junk").map_err(Error::IOError)?; - common::corrupt_first_row_checksum(dir.path()); - let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); assert!(db.get("key")?.is_none()); assert!(!dir.path().join("extra.txt").exists()); assert!(!dir.path().join("extra_dir").exists()); - Ok(()) -} - -#[test] -fn test_reset_db_if_dirty_clears_state() -> Result<(), Error> { - let dir = tempdir().unwrap(); - let config = Config { - rebuild_strategy: RebuildStrategy::ResetDBIfDirty, - ..Config::default() - }; - - { - let db = CandyStore::open(dir.path(), config)?; - db.set("key", "value")?; - db._abort_for_testing(); - } - - fs::write(dir.path().join("extra.txt"), b"junk").map_err(Error::IOError)?; - fs::create_dir(dir.path().join("extra_dir")).map_err(Error::IOError)?; - fs::write(dir.path().join("extra_dir").join("nested.txt"), b"junk").map_err(Error::IOError)?; - - let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); - assert!(db.get("key")?.is_none()); - assert!(!dir.path().join("extra.txt").exists()); - assert!(!dir.path().join("extra_dir").exists()); + db.set("fresh", "value")?; + assert_eq!(db.get("fresh")?, Some(b"value".to_vec())); Ok(()) } #[test] -fn test_reset_on_invalid_data_clears_corrupt_store() -> Result<(), Error> { +fn test_reset_on_invalid_data_clears_recovery_time_corruption() -> Result<(), Error> { let dir = tempdir().unwrap(); let config = Config { - rebuild_strategy: RebuildStrategy::RebuildIfDirty, reset_on_invalid_data: true, ..Config::default() }; @@ -793,16 +585,15 @@ fn test_reset_on_invalid_data_clears_corrupt_store() -> Result<(), Error> { { let db = CandyStore::open(dir.path(), config)?; db.set("key", "value")?; + db._abort_for_testing(); } - fs::write(dir.path().join("index"), b"bad").map_err(Error::IOError)?; - fs::write(dir.path().join("rows"), b"bad").map_err(Error::IOError)?; + rewrite_first_data_entry_header(dir.path(), |header| (header & !(0b11 << 30)) | (0b10 << 30))?; fs::write(dir.path().join("extra.txt"), b"junk").map_err(Error::IOError)?; fs::create_dir(dir.path().join("extra_dir")).map_err(Error::IOError)?; fs::write(dir.path().join("extra_dir").join("nested.txt"), b"junk").map_err(Error::IOError)?; let db = CandyStore::open(dir.path(), config)?; - assert!(!db.was_clean_shutdown()); assert!(db.get("key")?.is_none()); assert!(!dir.path().join("extra.txt").exists()); assert!(!dir.path().join("extra_dir").exists()); @@ -837,6 +628,33 @@ fn test_recover_from_truncated_data_file() -> Result<(), Box Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config::default(); + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key1", "value1")?; + db.set("key2", "value2")?; + db.set("key2", "value2_updated")?; + db.set("key3", "value3")?; + } + + write_commit_cursor(dir.path(), 5)?; + + let db = CandyStore::open(dir.path(), config)?; + assert_eq!(db.get("key1")?, Some(b"value1".to_vec())); + assert_eq!(db.get("key2")?, Some(b"value2_updated".to_vec())); + assert_eq!(db.get("key3")?, Some(b"value3".to_vec())); + assert_eq!(db.num_items(), 3); + assert_eq!(db.stats().num_entries(), 3); Ok(()) } @@ -845,7 +663,6 @@ fn test_progressive_rebuild_resumes_from_checkpoint() -> Result<(), Error> { let dir = tempdir().unwrap(); let config = Config { max_data_file_size: 1024, - rebuild_strategy: RebuildStrategy::RebuildIfDirty, ..Config::default() }; @@ -868,7 +685,7 @@ fn test_progressive_rebuild_resumes_from_checkpoint() -> Result<(), Error> { "key{i:04} missing after full rebuild" ); } - // Clean shutdown — checkpoint should be 0 now. + // Clean shutdown after rebuild should preserve all recovered data. } Ok(()) @@ -879,7 +696,6 @@ fn test_progressive_rebuild_survives_interrupted_rebuild() -> Result<(), Error> let dir = tempdir().unwrap(); let config = Config { max_data_file_size: 1024, - rebuild_strategy: RebuildStrategy::RebuildIfDirty, ..Config::default() }; @@ -902,8 +718,8 @@ fn test_progressive_rebuild_survives_interrupted_rebuild() -> Result<(), Error> db._abort_for_testing(); } - // Phase 3: another rebuild — should rebuild from scratch (checkpoint was - // cleared after phase 2's successful rebuild) and recover everything. + // Phase 3: another rebuild should start from the persisted replay cursor + // and recover everything written before the second crash. { let db = CandyStore::open(dir.path(), config)?; for i in 0..150 { @@ -923,7 +739,6 @@ fn test_progressive_rebuild_with_trust_strategy_resumes_pending() -> Result<(), let dir = tempdir().unwrap(); let config = Config { max_data_file_size: 1024, - rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrRebuild, ..Config::default() }; @@ -936,8 +751,7 @@ fn test_progressive_rebuild_with_trust_strategy_resumes_pending() -> Result<(), db._abort_for_testing(); } - // Phase 2: reopen triggers rebuild (checksums wrong after crash). - // Rebuild completes. Write more, then crash. + // Phase 2: reopen and write more, then crash. { let db = CandyStore::open(dir.path(), config)?; for i in 100..200 { @@ -946,10 +760,8 @@ fn test_progressive_rebuild_with_trust_strategy_resumes_pending() -> Result<(), db._abort_for_testing(); } - // Phase 3: reopen. Checksums may pass (TrustIndex), but if there's a - // pending checkpoint, rebuild must resume. Since phase 2's rebuild - // completed (checkpoint cleared), and we crashed after new writes, - // the trust-checksums path should handle it. + // Phase 3: reopen — recovery replays from the commit cursor, so all + // data from phases 1+2 should be accessible. { let db = CandyStore::open(dir.path(), config)?; for i in 0..200 { @@ -965,11 +777,10 @@ fn test_progressive_rebuild_with_trust_strategy_resumes_pending() -> Result<(), } #[test] -fn test_progressive_rebuild_restarts_on_missing_checkpoint_file() -> Result<(), Error> { +fn test_progressive_rebuild_ignores_bogus_checkpoint_offset() -> Result<(), Error> { let dir = tempdir().unwrap(); let config = Config { max_data_file_size: 1024, - rebuild_strategy: RebuildStrategy::RebuildIfDirty, ..Config::default() }; @@ -981,7 +792,9 @@ fn test_progressive_rebuild_restarts_on_missing_checkpoint_file() -> Result<(), db._abort_for_testing(); } - write_rebuild_checkpoint(dir.path(), u64::MAX - 7, 7, 0)?; + // Write a bogus commit cursor offset beyond any data — rebuild should + // fall back to replaying from offset 0. + write_commit_cursor(dir.path(), 0xFFFF_FFFF)?; let db = CandyStore::open(dir.path(), config)?; for i in 0..100 { @@ -996,83 +809,46 @@ fn test_progressive_rebuild_restarts_on_missing_checkpoint_file() -> Result<(), } #[test] -fn test_progressive_rebuild_restarts_on_corrupt_checkpoint_tuple() -> Result<(), Error> { +fn test_clean_reopen_rebuilds_invalid_active_checkpoint_across_multiple_data_files() +-> Result<(), Error> { let dir = tempdir().unwrap(); - let config = Config { - max_data_file_size: 1024, - rebuild_strategy: RebuildStrategy::TrustDirtyIndexIfChecksumCorrectOrRebuild, - ..Config::default() - }; + let config = common::small_file_config(); + + let total_base_keys; { let db = CandyStore::open(dir.path(), config)?; - for i in 0..100 { - db.set(format!("key{i:04}"), format!("val{i:04}"))?; + let mut next_idx = 0usize; + while db.stats().num_data_files < 3 { + let key = format!("multifile-base-{next_idx:04}"); + let value = patterned_bytes_with_seed(512, next_idx); + db.set(&key, &value)?; + next_idx += 1; } - db._abort_for_testing(); - } + total_base_keys = next_idx; - write_rebuild_checkpoint(dir.path(), 0x1234_5678_9abc_def0, 3, 128)?; - corrupt_rebuild_checkpoint_checksum(dir.path())?; - - let db = CandyStore::open(dir.path(), config)?; - for i in 0..100 { - assert_eq!( - db.get(format!("key{i:04}"))?, - Some(format!("val{i:04}").into_bytes()), - "key{i:04} missing after rebuild with corrupt checkpoint" + assert!( + db.stats().num_data_files >= 3, + "expected multiple data files before corrupting checkpoint" ); + assert_eq!(db.num_items(), total_base_keys); } - Ok(()) -} - -#[cfg(unix)] -#[test] -fn test_progressive_rebuild_resumes_after_real_mid_rebuild_crash() -> Result<(), Error> { - let dir = tempdir().unwrap(); - let config = Config { - max_data_file_size: 256 * 1024, - rebuild_strategy: RebuildStrategy::RebuildIfDirty, - ..Config::default() - }; + write_commit_cursor(dir.path(), 0xFFFF_FFFF)?; - { - let db = CandyStore::open(dir.path(), config)?; - for i in 0..3000u32 { - db.set(format!("key{i:04}"), format!("val{i:04}"))?; - } - db._abort_for_testing(); - } + let db = CandyStore::open(dir.path(), config)?; + assert!( + db.stats().num_data_files >= 3, + "expected the multi-file layout to survive recovery" + ); - let pid = unsafe { libc::fork() }; - assert!(pid >= 0); - if pid == 0 { - unsafe { - libc::setenv( - c"CANDYSTORE_ABORT_REBUILD_AFTER".as_ptr(), - c"1200".as_ptr(), - 1, - ); - } - let _ = CandyStore::open(dir.path(), config); - unsafe { libc::_exit(0) }; + for idx in 0..total_base_keys { + let key = format!("multifile-base-{idx:04}"); + assert_eq!(db.get(&key)?, Some(patterned_bytes_with_seed(512, idx))); } - let mut status = 0i32; - let wait_rc = unsafe { libc::waitpid(pid, &mut status, 0) }; - assert_eq!(wait_rc, pid); - assert!(libc::WIFSIGNALED(status)); - assert_eq!(libc::WTERMSIG(status), libc::SIGABRT); - - let db = CandyStore::open(dir.path(), config)?; - for i in 0..3000u32 { - assert_eq!( - db.get(format!("key{i:04}"))?, - Some(format!("val{i:04}").into_bytes()), - "key{i:04} missing after resume-from-offset rebuild" - ); - } + assert_eq!(db.num_items(), total_base_keys); + assert_eq!(db.stats().num_entries(), total_base_keys as u64); Ok(()) } diff --git a/tests/whitebox.rs b/tests/whitebox.rs new file mode 100644 index 0000000..e960bd5 --- /dev/null +++ b/tests/whitebox.rs @@ -0,0 +1,366 @@ +//! Whitebox tests requiring the `whitebox-testing` feature. +//! +//! Run via: `cargo test --features whitebox-testing --test whitebox` + +#![cfg(feature = "whitebox-testing")] + +mod common; + +use std::io::{Seek, SeekFrom, Write}; +use std::path::Path; + +use candystore::{CandyStore, Config, Error}; +use tempfile::tempdir; + +/// PAGE_SIZE for one RowLayout. +const PAGE_SIZE: usize = 4096; +/// Number of slots per row. +const ROW_WIDTH: usize = 336; +/// Offset of `signatures` array within a RowLayout (after split_level + padding). +const SIGS_OFFSET: usize = 64; +/// Offset of `pointers` array within a RowLayout (after signatures). +const PTRS_OFFSET: usize = SIGS_OFFSET + ROW_WIDTH * 4; +/// FILE_OFFSET_ALIGNMENT used in EntryPointer encoding. +const FILE_OFFSET_ALIGNMENT: u64 = 16; + +/// Offset of `commit_file_ordinal` in the index header. +const COMMIT_FILE_ORDINAL_OFFSET: u64 = 128; +/// Offset of `commit_offset` in the index header. +const COMMIT_OFFSET: u64 = 136; + +// ----------------------------------------------------------------------- +// Helpers +// ----------------------------------------------------------------------- + +fn encode_entry_pointer(file_idx: u16, file_offset: u64, size: usize) -> u64 { + let fi = (file_idx as u64) & ((1 << 12) - 1); + let fo = ((file_offset / FILE_OFFSET_ALIGNMENT) & ((1 << 26) - 1)) << 12; + let sh = (size.div_ceil(512) as u64) << (12 + 26); + fi | fo | sh +} + +/// Write a phantom entry into the rows file at the given row and column. +fn inject_phantom_entry( + dir: &Path, + row_idx: usize, + col: usize, + file_idx: u16, + file_offset: u64, +) -> Result<(), Error> { + let sig: u32 = 0xDEAD_BEEF; + let ptr = encode_entry_pointer(file_idx, file_offset, 512); + + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(dir.join("rows")) + .map_err(Error::IOError)?; + + let row_base = row_idx * PAGE_SIZE; + + // Write signature + let sig_off = (row_base + SIGS_OFFSET + col * 4) as u64; + file.seek(SeekFrom::Start(sig_off)) + .map_err(Error::IOError)?; + file.write_all(&sig.to_le_bytes()).map_err(Error::IOError)?; + + // Write pointer + let ptr_off = (row_base + PTRS_OFFSET + col * 8) as u64; + file.seek(SeekFrom::Start(ptr_off)) + .map_err(Error::IOError)?; + file.write_all(&ptr.to_le_bytes()).map_err(Error::IOError)?; + + file.sync_all().map_err(Error::IOError)?; + Ok(()) +} + +/// Read a signature from the rows file. +fn read_signature(dir: &Path, row_idx: usize, col: usize) -> Result { + use std::io::Read; + let mut file = std::fs::File::open(dir.join("rows")).map_err(Error::IOError)?; + let off = (row_idx * PAGE_SIZE + SIGS_OFFSET + col * 4) as u64; + file.seek(SeekFrom::Start(off)).map_err(Error::IOError)?; + let mut buf = [0u8; 4]; + file.read_exact(&mut buf).map_err(Error::IOError)?; + Ok(u32::from_le_bytes(buf)) +} + +fn active_file_ordinal(dir: &Path) -> Result { + use std::io::Read; + + let mut max_ordinal: Option = None; + for entry in std::fs::read_dir(dir).map_err(Error::IOError)? { + let entry = entry.map_err(Error::IOError)?; + let path = entry.path(); + let Some(name) = path.file_name().and_then(|name| name.to_str()) else { + continue; + }; + if !name.starts_with("data_") { + continue; + } + + let mut file = std::fs::File::open(path).map_err(Error::IOError)?; + file.seek(SeekFrom::Start(16)).map_err(Error::IOError)?; + let mut buf = [0u8; 8]; + file.read_exact(&mut buf).map_err(Error::IOError)?; + let ordinal = u64::from_le_bytes(buf); + max_ordinal = Some(max_ordinal.map_or(ordinal, |current| current.max(ordinal))); + } + + max_ordinal.ok_or_else(|| { + Error::IOError(std::io::Error::new( + std::io::ErrorKind::NotFound, + "no data files found", + )) + }) +} + +fn write_commit_cursor(dir: &Path, offset: u64) -> Result<(), Error> { + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(dir.join("index")) + .map_err(Error::IOError)?; + + let ordinal = active_file_ordinal(dir)?; + + file.seek(SeekFrom::Start(COMMIT_FILE_ORDINAL_OFFSET)) + .map_err(Error::IOError)?; + file.write_all(&ordinal.to_le_bytes()) + .map_err(Error::IOError)?; + + file.seek(SeekFrom::Start(COMMIT_OFFSET)) + .map_err(Error::IOError)?; + file.write_all(&offset.to_le_bytes()) + .map_err(Error::IOError)?; + file.sync_all().map_err(Error::IOError)?; + Ok(()) +} + +/// Fork, run `child_fn` in the child (which should abort), wait and assert +/// the child was killed by SIGABRT. +#[cfg(unix)] +fn fork_expect_abort(child_fn: impl FnOnce()) { + let pid = unsafe { libc::fork() }; + assert!(pid >= 0, "fork failed"); + if pid == 0 { + child_fn(); + // Should not reach here — child_fn should abort. + unsafe { libc::_exit(0) }; + } + let mut status = 0i32; + let wait_rc = unsafe { libc::waitpid(pid, &mut status, 0) }; + assert_eq!(wait_rc, pid); + assert!( + libc::WIFSIGNALED(status), + "child exited normally, expected signal" + ); + assert_eq!( + libc::WTERMSIG(status), + libc::SIGABRT, + "child killed by unexpected signal" + ); +} + +// ----------------------------------------------------------------------- +// Tests +// ----------------------------------------------------------------------- + +/// Inject phantom index entries pointing past the durable extent of the +/// active data file. Rebuild should purge them. +#[test] +fn test_rebuild_purges_phantom_entries() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config::default(); + + // Phase 1: write real data, then close cleanly. + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key1", "val1")?; + db.set("key2", "val2")?; + } + + // Phase 2: inject phantom entries. + // Use a last row slot (col 335) to avoid colliding with real entries. + inject_phantom_entry(dir.path(), 0, 335, 0, 0x100_000)?; + inject_phantom_entry(dir.path(), 1, 335, 0, 0x200_000)?; + + // Verify we actually wrote non-zero signatures. + assert_ne!(read_signature(dir.path(), 0, 335)?, 0); + assert_ne!(read_signature(dir.path(), 1, 335)?, 0); + + // Phase 3: reopen — recovery replays active file and purges phantoms. + { + let db = CandyStore::open(dir.path(), config)?; + assert_eq!(db.get("key1")?, Some(b"val1".to_vec())); + assert_eq!(db.get("key2")?, Some(b"val2".to_vec())); + } + + // Verify phantom signatures are cleared. + assert_eq!(read_signature(dir.path(), 0, 335)?, 0); + assert_eq!(read_signature(dir.path(), 1, 335)?, 0); + + Ok(()) +} + +/// A bogus commit cursor offset beyond the data extent is ignored and rebuild +/// restarts from offset 0 for the active file. +#[test] +fn test_bogus_checkpoint_offset_causes_full_replay() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config::default(); + + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..50 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db._abort_for_testing(); + } + + write_commit_cursor(dir.path(), 0xFFFF_FFFF)?; + + let db = CandyStore::open(dir.path(), config)?; + for i in 0..50 { + assert_eq!( + db.get(format!("key{i:04}"))?, + Some(format!("val{i:04}").into_bytes()), + "key{i:04} missing after bogus commit-cursor rebuild" + ); + } + + Ok(()) +} + +/// A commit cursor offset that points into the middle of an entry or padding +/// must be rejected rather than treated as a valid resume point. +#[test] +fn test_mid_entry_checkpoint_offset_restarts_from_zero() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config::default(); + + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..50 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db._abort_for_testing(); + } + + write_commit_cursor(dir.path(), 1)?; + + let db = CandyStore::open(dir.path(), config)?; + for i in 0..50 { + assert_eq!( + db.get(format!("key{i:04}"))?, + Some(format!("val{i:04}").into_bytes()), + "key{i:04} missing after mid-entry commit-cursor fallback" + ); + } + + Ok(()) +} + +/// Crash mid-rebuild via the `rebuild_entry` crash point, then resume. +#[cfg(unix)] +#[test] +fn test_mid_rebuild_crash_resumes_from_checkpoint() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 256 * 1024, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..3000u32 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db._abort_for_testing(); + } + + fork_expect_abort(|| { + unsafe { + libc::setenv( + c"CANDYSTORE_CRASH_POINT".as_ptr(), + c"rebuild_entry".as_ptr(), + 1, + ); + libc::setenv(c"CANDYSTORE_CRASH_AFTER".as_ptr(), c"1200".as_ptr(), 1); + } + let _ = CandyStore::open(dir.path(), config); + }); + + // Reopen — should resume from the persisted replay cursor. + let db = CandyStore::open(dir.path(), config)?; + for i in 0..3000u32 { + assert_eq!( + db.get(format!("key{i:04}"))?, + Some(format!("val{i:04}").into_bytes()), + "key{i:04} missing after resume-from-cursor rebuild" + ); + } + + Ok(()) +} + +/// Crash after data write but before index insert, then rebuild. +/// The data file has the entry but the index doesn't — replay should recover it. +#[cfg(unix)] +#[test] +fn test_crash_after_write_before_insert_recovers_on_rebuild() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config::default(); + + // Write some baseline data. + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..10 { + db.set(format!("base{i}"), format!("val{i}"))?; + } + db._abort_for_testing(); + } + + // Reopen cleanly to get a stable state, then crash mid-insert. + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..10 { + assert_eq!( + db.get(format!("base{i}"))?, + Some(format!("val{i}").into_bytes()) + ); + } + } + + // Now write one more key and crash after the data file write but before + // the index is updated. + fork_expect_abort(|| { + unsafe { + libc::setenv( + c"CANDYSTORE_CRASH_POINT".as_ptr(), + c"set_after_write_before_insert".as_ptr(), + 1, + ); + libc::setenv(c"CANDYSTORE_CRASH_AFTER".as_ptr(), c"0".as_ptr(), 1); + } + let db = CandyStore::open(dir.path(), config).unwrap(); + let _ = db.set("crash_key", "crash_val"); + }); + + // Rebuild should recover everything including the crash_key (data is + // durable in the active file even though the index insert never happened). + let db = CandyStore::open(dir.path(), config)?; + for i in 0..10 { + assert_eq!( + db.get(format!("base{i}"))?, + Some(format!("val{i}").into_bytes()), + "base{i} missing after crash recovery" + ); + } + // The crash_key's data was written to the file before the crash, so it + // should be recovered by replay. However, this depends on whether the + // OS flushed the data page to disk before abort — in this test the child + // is doing an in-process abort so the file write may or may not be + // durable. We verify the baseline keys survived; crash_key recovery is + // best-effort. + + Ok(()) +} From 0db9b578f07f265e716ef01a61f166df8bb5ea47 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Sat, 28 Mar 2026 23:44:02 +0300 Subject: [PATCH 08/25] perf --- examples/perf.rs | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/examples/perf.rs b/examples/perf.rs index 9c5d36c..889d5db 100644 --- a/examples/perf.rs +++ b/examples/perf.rs @@ -16,6 +16,7 @@ fn run_perf( let mut handles = Vec::new(); let inserts_us = Arc::new(AtomicU64::new(0)); + let updates_us = Arc::new(AtomicU64::new(0)); let pos_gets_us = Arc::new(AtomicU64::new(0)); let neg_gets_us = Arc::new(AtomicU64::new(0)); let iter_us = Arc::new(AtomicU64::new(0)); @@ -24,6 +25,7 @@ fn run_perf( for t in 0..n_threads { let store = store.clone(); let inserts_us = inserts_us.clone(); + let updates_us = updates_us.clone(); let pos_gets_us = pos_gets_us.clone(); let neg_gets_us = neg_gets_us.clone(); let iter_us = iter_us.clone(); @@ -31,7 +33,8 @@ fn run_perf( let handle = thread::spawn(move || { let mut key = vec![b'k'; key_size.max(4)]; - let value = vec![b'v'; val_size]; + let value1 = vec![b'v'; val_size]; + let value2 = vec![b'V'; val_size]; let start_idx = t as u32 * n; let end_idx = start_idx + n; @@ -39,7 +42,7 @@ fn run_perf( let t0 = Instant::now(); for i in start_idx..end_idx { key[..4].copy_from_slice(&i.to_le_bytes()); - store.set(&key, &value).unwrap(); + store.set(&key, &value1).unwrap(); } let duration = t0.elapsed(); inserts_us.fetch_add( @@ -48,6 +51,19 @@ fn run_perf( ); } + { + let t0 = Instant::now(); + for i in start_idx..end_idx { + key[..4].copy_from_slice(&i.to_le_bytes()); + store.set(&key, &value2).unwrap(); + } + let duration = t0.elapsed(); + updates_us.fetch_add( + duration.as_micros() as u64, + std::sync::atomic::Ordering::Relaxed, + ); + } + { let t0 = Instant::now(); for i in start_idx..end_idx { @@ -115,6 +131,11 @@ fn run_perf( inserts_us.load(std::sync::atomic::Ordering::Relaxed) as f64 / (n_threads * n as usize) as f64 ); + println!( + " Updates: {} us/op", + updates_us.load(std::sync::atomic::Ordering::Relaxed) as f64 + / (n_threads * n as usize) as f64 + ); println!( " Positive Lookups: {} us/op", pos_gets_us.load(std::sync::atomic::Ordering::Relaxed) as f64 @@ -125,6 +146,10 @@ fn run_perf( neg_gets_us.load(std::sync::atomic::Ordering::Relaxed) as f64 / (n_threads * n as usize) as f64 ); + println!( + " Iter all: {} us/op", + iter_us.load(std::sync::atomic::Ordering::Relaxed) as f64 / (n_threads * n as usize) as f64 + ); println!( " Removes: {} us/op\n", removes_us.load(std::sync::atomic::Ordering::Relaxed) as f64 From 4d6e405b4fef1c43f781ed38225065c7084cb994 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Sun, 29 Mar 2026 08:50:32 +0300 Subject: [PATCH 09/25] Add rebuild metrics; add checkpoint() API; stats cleanup --- Cargo.lock | 2 +- Cargo.toml | 4 +- README.md | 24 ++-- src/data_file.rs | 9 ++ src/index_file.rs | 20 ++- src/internal.rs | 4 +- src/store.rs | 267 ++++++++++++++++++++++++---------------- src/store/compaction.rs | 31 +---- src/store/list.rs | 26 ++-- src/store/queue.rs | 24 ++-- src/store/recovery.rs | 23 +++- src/types.rs | 101 +++++---------- tests/crasher.rs | 41 ++++-- tests/metrics.rs | 54 ++++---- tests/recovery.rs | 97 ++++++++++++++- 15 files changed, 434 insertions(+), 293 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 80a011c..b881db1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -43,7 +43,7 @@ checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "candystore" -version = "0.6.0" +version = "0.7.0" dependencies = [ "crc16-ibm3740-fast", "databuf", diff --git a/Cargo.toml b/Cargo.toml index 2d23457..d3a0c08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "candystore" -version = "0.6.0" +version = "0.7.0" edition = "2024" license = "Apache-2.0" keywords = ["key-value", "database", "persistent", "store", "rocksdb"] -description = "A lean, efficient and fast peristent in-process key-value store" +description = "A lean, efficient and fast persistent in-process key-value store" repository = "https://github.com/sweet-security/candystore" [dependencies] diff --git a/README.md b/README.md index 9d93906..25f098d 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,8 @@ insert, and removal — are O(1). | Operation | Time* | |-----------|--------| | Lookup | < 1us | -| Insert | < 2us | +| Insert | < 1us | +| Update | < 2us | | Removal | < 2us | On my laptop (32 core AMD RYZEN AI MAX+ 395 with 64GB RAM, running Ubuntu 25.10 kernel `6.17.0-19-generic`) I'm getting @@ -16,10 +17,13 @@ On my laptop (32 core AMD RYZEN AI MAX+ 395 with 64GB RAM, running Ubuntu 25.10 $ cargo run --release --example perf Testing key-value using 1 threads, each with 1000000 items (key size: 16, value size: 16) - Inserts: 0.539149 us/op - Positive Lookups: 0.298013 us/op - Negative Lookups: 0.044203 us/op - Removes: 0.573369 us/op + Inserts: 0.499239 us/op + Updates: 0.611424 us/op + Positive Lookups: 0.316884 us/op + Negative Lookups: 0.045079 us/op + Iter all: 0.373904 us/op + Removes: 0.588206 us/op + ``` See [how to interpret the results\*](#how-to-interpret-the-performance-results). @@ -81,8 +85,8 @@ columns, from which we extract another 18 bits of entropy. If both match, we fet from the relevant file (the pointer stores a file index and a file offset). Note: the chances of a collision (meaning we fetch a wrong entry from the file) are -virtually zero, about 10^-10 according to the birthday paradox (a collision in 336 uniformly-distributed -50-bits numbers). +virtually zero, about 1 in 20 billion according to the birthday paradox (a collision in 336 +uniformly-distributed 50-bits numbers). Candy supports up to 4096 files, each up to 1GB in size (a span of 4TB). In terms of key-space, Candy allows 2^21 rows, each with 336 keys, so a total of 704M keys. The maximum size of a key @@ -127,9 +131,9 @@ You can configure the throughput (bytes per second) of compaction. We trust the operating system to flush the data files and mmap'ed rows table to storage, which means that even if your process crashes, your data will be fully consistent. However, this is not true on a power failure or a kernel panic -- in which case the state of the -index file is completely unknown. In such cases Candy has a rebuild mechanism that essentially -wipes the rows table clean and reads all the data files in order, replaying every set/remove -as it happened, and achieves a consistent state. +index file is unknown. In such cases Candy has an efficient rebuild mechanism (based on checkpointing) +that essentially replays recent mutating operations in order and rebuilds the correct state from +the data files. ## Design Goals diff --git a/src/data_file.rs b/src/data_file.rs index 6c2524f..c529639 100644 --- a/src/data_file.rs +++ b/src/data_file.rs @@ -71,6 +71,15 @@ impl DataFile { self.file_offset.load(Ordering::Acquire) } + pub(crate) fn truncate_to_offset(&self, file_offset: u64) -> Result<()> { + debug_assert_eq!(file_offset % FILE_OFFSET_ALIGNMENT, 0); + self.file + .set_len(size_of::() as u64 + file_offset) + .map_err(Error::IOError)?; + self.file_offset.store(file_offset, Ordering::Release); + self.file.sync_all().map_err(Error::IOError) + } + fn parse_data_entry(buf: &[u8], offset: u64) -> Result { if buf.len() < 8 { return Err(Error::IOError(std::io::Error::new( diff --git a/src/index_file.rs b/src/index_file.rs index db2361c..b4768f0 100644 --- a/src/index_file.rs +++ b/src/index_file.rs @@ -550,6 +550,18 @@ impl IndexFile { Ok(()) } + pub(crate) fn sync_all_with_rows_guard( + &self, + rows_table: &mut RowsTableWriteGuard<'_>, + ) -> Result<()> { + rows_table.row_guard.flush().map_err(Error::IOError)?; + self.rows_file.sync_all().map_err(Error::IOError)?; + self.header_mmap.flush().map_err(Error::IOError)?; + #[cfg(windows)] + self.header_file.sync_all().map_err(Error::IOError)?; + Ok(()) + } + pub(crate) fn file_size_bytes(&self) -> u64 { let header = size_of::() as u64; let rows = self.rows_file.metadata().map(|m| m.len()).unwrap_or(0); @@ -664,9 +676,11 @@ impl IndexFile { 1usize << gsl } - pub(crate) fn shrink(&self, min_rows: usize) -> Result { - let mut row_table = self.rows_table_mut(); - + pub(crate) fn shrink_with_rows_guard( + &self, + min_rows: usize, + mut row_table: RowsTableWriteGuard<'_>, + ) -> Result { loop { let global_split_level = self.header_ref().global_split_level.load(Ordering::Acquire); let current_rows = 1usize << global_split_level; diff --git a/src/internal.rs b/src/internal.rs index 83455f7..96073d8 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -22,9 +22,9 @@ pub(crate) const MAX_DATA_FILES: u16 = 1 << 12; pub(crate) const MAX_DATA_FILE_IDX: u16 = MAX_DATA_FILES - 1; pub(crate) const INDEX_FILE_SIGNATURE: &[u8; 8] = b"CandyStr"; -pub(crate) const INDEX_FILE_VERSION: u32 = 0x0002_0002; +pub(crate) const INDEX_FILE_VERSION: u32 = 0x0002_0007; pub(crate) const DATA_FILE_SIGNATURE: &[u8; 8] = b"CandyDat"; -pub(crate) const DATA_FILE_VERSION: u32 = 0x0002_0001; +pub(crate) const DATA_FILE_VERSION: u32 = 0x0002_0002; pub(crate) const FILE_OFFSET_ALIGNMENT: u64 = 16; pub(crate) const SIZE_HINT_UNIT: usize = 512; pub(crate) const DATA_ENTRY_OFFSET_MAGIC: u32 = 0x91c8_d7cd; diff --git a/src/store.rs b/src/store.rs index 773a34c..378466c 100644 --- a/src/store.rs +++ b/src/store.rs @@ -40,7 +40,6 @@ struct CompactionState { #[derive(Default)] struct InnerStats { num_compactions: AtomicU64, - compaction_time_ms: AtomicU64, compaction_errors: AtomicU64, num_positive_lookups: AtomicU64, num_negative_lookups: AtomicU64, @@ -53,17 +52,17 @@ struct InnerStats { num_read_bytes: AtomicU64, num_write_ops: AtomicU64, num_write_bytes: AtomicU64, - num_created: AtomicU64, + num_inserted: AtomicU64, + num_updated: AtomicU64, num_removed: AtomicU64, - num_replaced: AtomicU64, - written_bytes: AtomicU64, + num_rebuilt_entries: AtomicU64, + num_rebuild_purged_bytes: AtomicU64, size_histogram: [AtomicU64; 6], } impl InnerStats { fn reset(&self) { self.num_compactions.store(0, Ordering::Relaxed); - self.compaction_time_ms.store(0, Ordering::Relaxed); self.compaction_errors.store(0, Ordering::Relaxed); self.num_positive_lookups.store(0, Ordering::Relaxed); self.num_negative_lookups.store(0, Ordering::Relaxed); @@ -77,10 +76,11 @@ impl InnerStats { self.num_read_bytes.store(0, Ordering::Relaxed); self.num_write_ops.store(0, Ordering::Relaxed); self.num_write_bytes.store(0, Ordering::Relaxed); - self.num_created.store(0, Ordering::Relaxed); + self.num_inserted.store(0, Ordering::Relaxed); self.num_removed.store(0, Ordering::Relaxed); - self.num_replaced.store(0, Ordering::Relaxed); - self.written_bytes.store(0, Ordering::Relaxed); + self.num_updated.store(0, Ordering::Relaxed); + self.num_rebuilt_entries.store(0, Ordering::Relaxed); + self.num_rebuild_purged_bytes.store(0, Ordering::Relaxed); for bucket in &self.size_histogram { bucket.store(0, Ordering::Relaxed); } @@ -91,8 +91,8 @@ struct StoreInner { base_path: PathBuf, config: Arc, index_file: IndexFile, - logical_locks: Vec>, - logical_locks_mask: usize, + list_meta_locks: Vec>, + list_meta_locks_mask: usize, data_files: RwLock>>, active_file_idx: AtomicU16, active_file_ordinal: AtomicU64, @@ -132,8 +132,8 @@ impl StoreInner { base_path, config, index_file: state.index_file, - logical_locks: (0..num_logical_locks).map(|_| RwLock::new(())).collect(), - logical_locks_mask: num_logical_locks - 1, + list_meta_locks: (0..num_logical_locks).map(|_| RwLock::new(())).collect(), + list_meta_locks_mask: num_logical_locks - 1, data_files: RwLock::new(state.data_files), active_file_idx: AtomicU16::new(state.active_file_idx), active_file_ordinal: AtomicU64::new(state.active_file_ordinal), @@ -146,13 +146,13 @@ impl StoreInner { } fn reset(&self) -> Result<()> { - let _rotation_lock = self.rotation_lock.lock(); let _logical_guards = self - .logical_locks + .list_meta_locks .iter() .map(|lock| lock.write()) .collect::>(); let row_table = self.index_file.rows_table_mut(); + let _rotation_lock = self.rotation_lock.lock(); let mut data_files = self.data_files.write(); data_files.clear(); @@ -289,7 +289,7 @@ impl StoreInner { let mut hasher = SipHasher13::new_with_keys(0x1701_0a66_2024_6b90, 0x284f_fa2e_3e02_3e2a); hasher.write_u8(ns as u8); hasher.write(key); - (hasher.finish() as usize) & self.logical_locks_mask + (hasher.finish() as usize) & self.list_meta_locks_mask } fn data_file(&self, file_idx: u16) -> Result> { @@ -314,6 +314,18 @@ impl StoreInner { .fetch_add(delta, Ordering::Relaxed); } + fn persist_active_file_checkpoint(&self, active_file: &Arc) { + self.index_file.rollover_uncommitted_counters(); + self.index_file + .header_ref() + .commit_file_ordinal + .store(active_file.file_ordinal, Ordering::Release); + self.index_file + .header_ref() + .commit_offset + .store(active_file.used_bytes(), Ordering::Release); + } + fn _split_row(&self, hc: HashCoord, sl: u64, gsl: u64) -> Result<()> { let nsl = sl + 1; let low_row_idx = hc.row_index(sl); @@ -377,6 +389,7 @@ impl StoreInner { /// `compact_file` also writes to `data_files` (removing files) but only /// touches non-active indices, so there is no conflict. fn _rotate_data_file(&self, active_idx: u16) -> Result<()> { + let mut rows_table = self.index_file.rows_table_mut(); let _rot_lock = self.rotation_lock.lock(); if self.active_file_idx.load(Ordering::Acquire) != active_idx { @@ -411,17 +424,8 @@ impl StoreInner { active_file.wait_inflight(); let _ = active_file.file.sync_all(); - self.index_file.rollover_uncommitted_counters(); - self.index_file - .header_ref() - .commit_file_ordinal - .store(active_file.file_ordinal, Ordering::Release); - self.index_file - .header_ref() - .commit_offset - .store(active_file.used_bytes(), Ordering::Release); - - let _ = self.index_file.sync_all(); + self.persist_active_file_checkpoint(&active_file); + let _ = self.index_file.sync_all_with_rows_guard(&mut rows_table); self.data_files.write().insert(next_idx, data_file); self.active_file_idx.store(next_idx, Ordering::Release); @@ -512,12 +516,44 @@ impl CandyStore { &self.inner.base_path } - fn logical_read_guard(&self, ns: KeyNamespace, key: &[u8]) -> RwLockReadGuard<'_, ()> { - self.inner.logical_locks[self.inner.logical_lock_index(ns, key)].read() + pub(super) fn checkpoint_locked(&self) -> Result<()> { + let _logical_guards = self + .inner + .list_meta_locks + .iter() + .map(|lock| lock.write()) + .collect::>(); + let mut rows_table = self.inner.index_file.rows_table_mut(); + let _rotation_lock = self.inner.rotation_lock.lock(); + + let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); + let active_file = self + .inner + .data_files + .read() + .get(&active_idx) + .cloned() + .ok_or(Error::MissingDataFile(active_idx))?; + + let files = self.inner.data_files.read(); + for data_file in files.values() { + data_file.file.sync_all().map_err(Error::IOError)?; + } + drop(files); + + self.inner.persist_active_file_checkpoint(&active_file); + self.inner + .index_file + .sync_all_with_rows_guard(&mut rows_table)?; + sync_dir(&self.inner.base_path) } - fn logical_write_guard(&self, ns: KeyNamespace, key: &[u8]) -> RwLockWriteGuard<'_, ()> { - self.inner.logical_locks[self.inner.logical_lock_index(ns, key)].write() + fn list_read_guard(&self, ns: KeyNamespace, key: &[u8]) -> RwLockReadGuard<'_, ()> { + self.inner.list_meta_locks[self.inner.logical_lock_index(ns, key)].read() + } + + fn list_write_guard(&self, ns: KeyNamespace, key: &[u8]) -> RwLockWriteGuard<'_, ()> { + self.inner.list_meta_locks[self.inner.logical_lock_index(ns, key)].write() } fn _immut_op( @@ -661,9 +697,8 @@ impl CandyStore { self.inner.add_uncommitted_num_entries(1); self.inner .stats - .written_bytes - .fetch_add(entry_size, Ordering::Relaxed); - self.inner.stats.num_created.fetch_add(1, Ordering::Relaxed); + .num_inserted + .fetch_add(1, Ordering::Relaxed); self.inner.bump_histogram(entry_size); } @@ -675,14 +710,7 @@ impl CandyStore { new_vlen: usize, ) { let new_entry_size = aligned_data_entry_size(new_klen, new_vlen); - self.inner - .stats - .written_bytes - .fetch_add(new_entry_size, Ordering::Relaxed); - self.inner - .stats - .num_replaced - .fetch_add(1, Ordering::Relaxed); + self.inner.stats.num_updated.fetch_add(1, Ordering::Relaxed); self.inner.bump_histogram(new_entry_size); } @@ -994,6 +1022,18 @@ impl CandyStore { sync_dir(&self.inner.base_path) } + /// Establishes a durable recovery checkpoint at the current end of the active file. + /// + /// This stops background compaction, blocks concurrent writers, syncs the + /// data and index files, and advances the persisted replay cursor so the + /// next open can resume from this point without replaying earlier writes. + pub fn checkpoint(&self) -> Result<()> { + self.stop_compaction(); + let res = self.checkpoint_locked(); + self.start_compaction(); + res + } + /// Returns the number of background compaction errors observed since open. pub fn compaction_errors(&self) -> u64 { self.inner.stats.compaction_errors.load(Ordering::Relaxed) @@ -1001,7 +1041,11 @@ impl CandyStore { /// Returns the number of currently live entries. pub fn num_items(&self) -> usize { - self.stats().num_entries() as usize + let h = self.inner.index_file.header_ref(); + h.committed_num_entries + .load(Ordering::Relaxed) + .saturating_add_signed(h.uncommitted_entries_delta.load(Ordering::Relaxed)) + as usize } /// Returns the current index capacity in entries. @@ -1013,15 +1057,16 @@ impl CandyStore { /// Shrinks the index when the reclaimable row ratio is at least `min_wasted_ratio`. pub fn shrink_to_fit_blocking(&self, min_wasted_ratio: f64) -> Result { - let _key_guards = self + let _logical_guards = self .inner - .logical_locks + .list_meta_locks .iter() .map(|lock| lock.write()) .collect::>(); + let row_table = self.inner.index_file.rows_table_mut(); let min_wasted_ratio = min_wasted_ratio.clamp(0.0, 1.0); - let current_rows = self.inner.index_file.num_rows(); + let current_rows = row_table.row_guard.len() / std::mem::size_of::(); if current_rows == 0 { return Ok(0); } @@ -1038,86 +1083,69 @@ impl CandyStore { return Ok(current_rows); } - self.inner.index_file.shrink(min_rows_cfg) + self.inner + .index_file + .shrink_with_rows_guard(min_rows_cfg, row_table) } /// Returns a snapshot of store statistics and accounting counters. pub fn stats(&self) -> Stats { - let h = self.inner.index_file.header_ref(); let num_rows = self.inner.index_file.num_rows() as u64; - let capacity = num_rows.saturating_mul(ROW_WIDTH as u64); - - let num_items = h - .committed_num_entries - .load(Ordering::Relaxed) - .saturating_add_signed(h.uncommitted_entries_delta.load(Ordering::Relaxed)); // Derive data_bytes and waste_bytes from file sizes and per-file // waste levels rather than maintaining them as persistent counters. - let total_used: u64 = self - .inner - .data_files - .read() - .values() - .map(|df| df.used_bytes()) - .sum(); - let waste = self.inner.index_file.total_waste(); - let data_bytes = total_used.saturating_sub(waste); + let (total_bytes, num_data_files) = { + let data_files = self.inner.data_files.read(); + + ( + data_files.values().map(|df| df.used_bytes()).sum(), + data_files.len() as u64, + ) + }; + let waste_bytes = self.inner.index_file.total_waste(); + let s = &self.inner.stats; Stats { num_rows, - capacity, - num_items, + num_items: self.num_items() as u64, index_size_bytes: self.inner.index_file.file_size_bytes(), - num_compactions: self.inner.stats.num_compactions.load(Ordering::Relaxed), - compaction_time_ms: self.inner.stats.compaction_time_ms.load(Ordering::Relaxed), - num_data_files: self.inner.data_files.read().len() as u64, - num_positive_lookups: self - .inner - .stats - .num_positive_lookups - .load(Ordering::Relaxed), - num_negative_lookups: self - .inner - .stats - .num_negative_lookups - .load(Ordering::Relaxed), - num_collisions: self.inner.stats.num_collisions.load(Ordering::Relaxed), - last_remap_dur: Duration::from_millis( - self.inner.stats.last_remap_dur_ms.load(Ordering::Relaxed), - ), + num_data_files, + + total_bytes, + waste_bytes, + + num_compactions: s.num_compactions.load(Ordering::Relaxed), + + last_remap_dur: Duration::from_millis(s.last_remap_dur_ms.load(Ordering::Relaxed)), last_compaction_dur: Duration::from_millis( - self.inner - .stats - .last_compaction_dur_ms - .load(Ordering::Relaxed), + s.last_compaction_dur_ms.load(Ordering::Relaxed), ), - last_compaction_reclaimed_bytes: self - .inner - .stats + last_compaction_reclaimed_bytes: s .last_compaction_reclaimed_bytes .load(Ordering::Relaxed), - last_compaction_moved_bytes: self - .inner - .stats - .last_compaction_moved_bytes - .load(Ordering::Relaxed), - num_read_ops: self.inner.stats.num_read_ops.load(Ordering::Relaxed), - num_read_bytes: self.inner.stats.num_read_bytes.load(Ordering::Relaxed), - num_write_ops: self.inner.stats.num_write_ops.load(Ordering::Relaxed), - num_write_bytes: self.inner.stats.num_write_bytes.load(Ordering::Relaxed), - num_created: self.inner.stats.num_created.load(Ordering::Relaxed), - num_removed: self.inner.stats.num_removed.load(Ordering::Relaxed), - num_replaced: self.inner.stats.num_replaced.load(Ordering::Relaxed), - written_bytes: self.inner.stats.written_bytes.load(Ordering::Relaxed), - data_bytes, - waste_bytes: waste, - entries_under_64: self.inner.stats.size_histogram[0].load(Ordering::Relaxed), - entries_under_256: self.inner.stats.size_histogram[1].load(Ordering::Relaxed), - entries_under_1024: self.inner.stats.size_histogram[2].load(Ordering::Relaxed), - entries_under_4096: self.inner.stats.size_histogram[3].load(Ordering::Relaxed), - entries_under_16384: self.inner.stats.size_histogram[4].load(Ordering::Relaxed), - entries_over_16384: self.inner.stats.size_histogram[5].load(Ordering::Relaxed), + last_compaction_moved_bytes: s.last_compaction_moved_bytes.load(Ordering::Relaxed), + + num_read_ops: s.num_read_ops.load(Ordering::Relaxed), + num_read_bytes: s.num_read_bytes.load(Ordering::Relaxed), + num_write_ops: s.num_write_ops.load(Ordering::Relaxed), + num_write_bytes: s.num_write_bytes.load(Ordering::Relaxed), + + num_inserted: s.num_inserted.load(Ordering::Relaxed), + num_updated: s.num_updated.load(Ordering::Relaxed), + num_removed: s.num_removed.load(Ordering::Relaxed), + num_positive_lookups: s.num_positive_lookups.load(Ordering::Relaxed), + num_negative_lookups: s.num_negative_lookups.load(Ordering::Relaxed), + num_collisions: s.num_collisions.load(Ordering::Relaxed), + + num_rebuilt_entries: s.num_rebuilt_entries.load(Ordering::Relaxed), + num_rebuild_purged_bytes: s.num_rebuild_purged_bytes.load(Ordering::Relaxed), + + entries_under_64: s.size_histogram[0].load(Ordering::Relaxed), + entries_under_256: s.size_histogram[1].load(Ordering::Relaxed), + entries_under_1024: s.size_histogram[2].load(Ordering::Relaxed), + entries_under_4096: s.size_histogram[3].load(Ordering::Relaxed), + entries_under_16384: s.size_histogram[4].load(Ordering::Relaxed), + entries_over_16384: s.size_histogram[5].load(Ordering::Relaxed), } } @@ -1200,4 +1228,25 @@ mod tests { Ok(()) } + + #[test] + fn test_stats_reports_rebuild_counters() -> Result<()> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + db.inner + .stats + .num_rebuilt_entries + .store(11, Ordering::Relaxed); + db.inner + .stats + .num_rebuild_purged_bytes + .store(96, Ordering::Relaxed); + + let stats = db.stats(); + assert_eq!(stats.num_rebuilt_entries, 11); + assert_eq!(stats.num_rebuild_purged_bytes, 96); + + Ok(()) + } } diff --git a/src/store/compaction.rs b/src/store/compaction.rs index 260d4d8..46ec796 100644 --- a/src/store/compaction.rs +++ b/src/store/compaction.rs @@ -306,9 +306,6 @@ impl CandyStore { ); let compaction_millis = u64::try_from(t0.elapsed().as_millis()).unwrap_or(u64::MAX); - ctx.stats - .compaction_time_ms - .fetch_add(compaction_millis, Ordering::Relaxed); match res { Ok(outcome) => { ctx.stats @@ -346,33 +343,7 @@ impl Drop for CandyStore { if !self.allow_clean_shutdown.load(Ordering::Relaxed) { return; } - let data_files_synced = self - .inner - .data_files - .read() - .values() - .all(|df| df.file.sync_all().is_ok()); - if !data_files_synced { - return; - } - - // Advance the commit cursor so the next open can skip replay entirely. - let active_idx = self.inner.active_file_idx.load(Ordering::Relaxed); - if let Some(active_file) = self.inner.data_files.read().get(&active_idx).cloned() { - self.inner.index_file.rollover_uncommitted_counters(); - self.inner - .index_file - .header_ref() - .commit_file_ordinal - .store(active_file.file_ordinal, Ordering::Release); - self.inner - .index_file - .header_ref() - .commit_offset - .store(active_file.used_bytes(), Ordering::Release); - } - - let _ = self.inner.index_file.sync_all(); + let _ = self.checkpoint_locked(); } } diff --git a/src/store/list.rs b/src/store/list.rs index 99cbb37..d0cbc7f 100644 --- a/src/store/list.rs +++ b/src/store/list.rs @@ -47,7 +47,7 @@ impl ListIterator<'_> { } fn try_heal_head(&self, new_head: u64) -> Result<()> { - let _lock = self.store.logical_write_guard(self.ns.meta, &self.list); + let _lock = self.store.list_write_guard(self.ns.meta, &self.list); let mut meta = get_list_meta(self.store, self.ns, &self.list)?; if meta.head >= self.initial_next_idx && meta.head < new_head { meta.head = new_head; @@ -61,7 +61,7 @@ impl ListIterator<'_> { } fn try_heal_tail(&self, new_tail: u64) -> Result<()> { - let _lock = self.store.logical_write_guard(self.ns.meta, &self.list); + let _lock = self.store.list_write_guard(self.ns.meta, &self.list); let mut meta = get_list_meta(self.store, self.ns, &self.list)?; if meta.tail <= self.initial_end_idx && meta.tail > new_tail { meta.tail = new_tail; @@ -302,7 +302,7 @@ impl CandyStore { list_key: &[u8], mut func: impl FnMut(&[u8], &[u8]) -> Result, ) -> Result<()> { - let _lock = self.logical_write_guard(ns.meta, list_key); + let _lock = self.list_write_guard(ns.meta, list_key); let mut meta = get_list_meta(self, ns, list_key)?; if meta.count == 0 { return Ok(()); @@ -365,7 +365,7 @@ impl CandyStore { value: &[u8], ) -> Result>> { self.validate_list_item_sizes(list, key, value)?; - let _lock = self.logical_write_guard(ns.meta, list); + let _lock = self.list_write_guard(ns.meta, list); let mut meta = get_list_meta(self, ns, list)?; let data_key = make_list_data_key(list, key); @@ -421,7 +421,7 @@ impl CandyStore { expected: Option<&[u8]>, ) -> Result { self.validate_list_item_sizes(list, key, value)?; - let _lock = self.logical_write_guard(ns.meta, list); + let _lock = self.list_write_guard(ns.meta, list); let data_key = make_list_data_key(list, key); let Some(existing_value) = self.get_ns(ns.data, &data_key)? else { @@ -449,7 +449,7 @@ impl CandyStore { value: &[u8], ) -> Result { self.validate_list_item_sizes(list, key, value)?; - let _lock = self.logical_write_guard(ns.meta, list); + let _lock = self.list_write_guard(ns.meta, list); let data_key = make_list_data_key(list, key); if let Some(existing) = self.get_ns(ns.data, &data_key)? { @@ -482,7 +482,7 @@ impl CandyStore { value: &[u8], ) -> Result>> { self.validate_list_item_sizes(list, key, value)?; - let _lock = self.logical_write_guard(ns.meta, list); + let _lock = self.list_write_guard(ns.meta, list); let mut meta = get_list_meta(self, ns, list)?; let data_key = make_list_data_key(list, key); @@ -522,7 +522,7 @@ impl CandyStore { list: &[u8], key: &[u8], ) -> Result>> { - let _lock = self.logical_read_guard(ns.meta, list); + let _lock = self.list_read_guard(ns.meta, list); let data_key = make_list_data_key(list, key); Ok(self.get_ns(ns.data, &data_key)?.map(strip_idx_suffix)) } @@ -533,7 +533,7 @@ impl CandyStore { list: &[u8], key: &[u8], ) -> Result>> { - let _lock = self.logical_write_guard(ns.meta, list); + let _lock = self.list_write_guard(ns.meta, list); self._list_remove_with_ns(ns, list, key) } @@ -631,7 +631,7 @@ impl CandyStore { } pub(super) fn list_discard_with_ns(&self, ns: ListNamespaces, list: &[u8]) -> Result { - let _lock = self.logical_write_guard(ns.meta, list); + let _lock = self.list_write_guard(ns.meta, list); let meta = get_list_meta(self, ns, list)?; if meta.count == 0 { return Ok(false); @@ -655,7 +655,7 @@ impl CandyStore { list: &[u8], params: ListCompactionParams, ) -> Result { - let _lock = self.logical_write_guard(ns.meta, list); + let _lock = self.list_write_guard(ns.meta, list); let mut meta = get_list_meta(self, ns, list)?; if meta.count == 0 { return Ok(false); @@ -725,7 +725,7 @@ impl CandyStore { ns: ListNamespaces, list_key: &[u8], ) -> Result> { - let _lock = self.logical_write_guard(ns.meta, list_key); + let _lock = self.list_write_guard(ns.meta, list_key); let head = self.peek_list_head_with_ns(ns, list_key)?; if let Some((key, _)) = head && let Some(value) = self._list_remove_with_ns(ns, list_key, &key)? @@ -740,7 +740,7 @@ impl CandyStore { ns: ListNamespaces, list_key: &[u8], ) -> Result> { - let _lock = self.logical_write_guard(ns.meta, list_key); + let _lock = self.list_write_guard(ns.meta, list_key); let tail = self.peek_list_tail_with_ns(ns, list_key)?; if let Some((key, _)) = tail && let Some(value) = self._list_remove_with_ns(ns, list_key, &key)? diff --git a/src/store/queue.rs b/src/store/queue.rs index 15f02a8..3d1d441 100644 --- a/src/store/queue.rs +++ b/src/store/queue.rs @@ -43,7 +43,7 @@ impl<'a> QueueIterator<'a> { } fn try_heal_head(&self, new_head: u64) -> Result<()> { - let _lock = self.store.logical_write_guard(self.ns.meta, &self.queue); + let _lock = self.store.list_write_guard(self.ns.meta, &self.queue); let mut meta = get_queue_meta(self.store, self.ns, &self.queue)?; if meta.head >= self.initial_next_idx && meta.head < new_head { meta.head = new_head; @@ -57,7 +57,7 @@ impl<'a> QueueIterator<'a> { } fn try_heal_tail(&self, new_tail: u64) -> Result<()> { - let _lock = self.store.logical_write_guard(self.ns.meta, &self.queue); + let _lock = self.store.list_write_guard(self.ns.meta, &self.queue); let mut meta = get_queue_meta(self.store, self.ns, &self.queue)?; if meta.tail <= self.initial_end_idx && meta.tail > new_tail { meta.tail = new_tail; @@ -298,7 +298,7 @@ impl CandyStore { queue: &[u8], value: &[u8], ) -> Result { - let _lock = self.logical_write_guard(ns.meta, queue); + let _lock = self.list_write_guard(ns.meta, queue); self._queue_push_tail_with_ns(ns, queue, value) } @@ -327,7 +327,7 @@ impl CandyStore { queue: &[u8], value: &[u8], ) -> Result { - let _lock = self.logical_write_guard(ns.meta, queue); + let _lock = self.list_write_guard(ns.meta, queue); let mut meta = get_queue_meta(self, ns, queue)?; let new_head = meta.head - 1; let key = make_queue_data_key(queue, new_head); @@ -346,7 +346,7 @@ impl CandyStore { ns: QueueNamespaces, queue: &[u8], ) -> Result)>> { - let _lock = self.logical_write_guard(ns.meta, queue); + let _lock = self.list_write_guard(ns.meta, queue); let mut meta = get_queue_meta(self, ns, queue)?; loop { if meta.head > meta.tail { @@ -376,7 +376,7 @@ impl CandyStore { ns: QueueNamespaces, queue: &[u8], ) -> Result)>> { - let _lock = self.logical_write_guard(ns.meta, queue); + let _lock = self.list_write_guard(ns.meta, queue); let mut meta = get_queue_meta(self, ns, queue)?; loop { if meta.head > meta.tail { @@ -406,7 +406,7 @@ impl CandyStore { ns: QueueNamespaces, queue: &[u8], ) -> Result)>> { - let _lock = self.logical_read_guard(ns.meta, queue); + let _lock = self.list_read_guard(ns.meta, queue); let meta = get_queue_meta(self, ns, queue)?; if meta.head > meta.tail { return Ok(None); @@ -425,7 +425,7 @@ impl CandyStore { ns: QueueNamespaces, queue: &[u8], ) -> Result)>> { - let _lock = self.logical_read_guard(ns.meta, queue); + let _lock = self.list_read_guard(ns.meta, queue); let meta = get_queue_meta(self, ns, queue)?; if meta.head > meta.tail { return Ok(None); @@ -444,7 +444,7 @@ impl CandyStore { } pub(super) fn queue_discard_with_ns(&self, ns: QueueNamespaces, queue: &[u8]) -> Result { - let _lock = self.logical_write_guard(ns.meta, queue); + let _lock = self.list_write_guard(ns.meta, queue); self._queue_discard_with_ns(ns, queue) } @@ -467,7 +467,7 @@ impl CandyStore { queue: &[u8], idx: u64, ) -> Result>> { - let _lock = self.logical_write_guard(ns.meta, queue); + let _lock = self.list_write_guard(ns.meta, queue); let mut meta = get_queue_meta(self, ns, queue)?; let key = make_queue_data_key(queue, idx); let removed = match self.remove_ns(ns.data, &key)? { @@ -528,7 +528,7 @@ impl CandyStore { key: &[u8], value: &[u8], ) -> Result { - let _lock = self.logical_write_guard(ns.meta, key); + let _lock = self.list_write_guard(ns.meta, key); let existed = self._queue_discard_with_ns(ns, key)?; let max_chunk_len = self.max_big_chunk_len(key)?; @@ -546,7 +546,7 @@ impl CandyStore { ns: QueueNamespaces, key: &[u8], ) -> Result>> { - let _lock = self.logical_read_guard(ns.meta, key); + let _lock = self.list_read_guard(ns.meta, key); let meta = get_queue_meta(self, ns, key)?; let expected_chunks = meta.count; if expected_chunks == 0 { diff --git a/src/store/recovery.rs b/src/store/recovery.rs index 517fccd..ba2fb4f 100644 --- a/src/store/recovery.rs +++ b/src/store/recovery.rs @@ -73,9 +73,8 @@ impl CandyStore { // extent. This handles the case where the data file was truncated // (e.g. disk-full or corruption) and ensures the replay loop won't // encounter stale pointers when comparing existing entries. - let pre_purge_extent = active_file - .used_bytes() - .next_multiple_of(FILE_OFFSET_ALIGNMENT); + let pre_rebuild_used_bytes = active_file.used_bytes(); + let pre_purge_extent = pre_rebuild_used_bytes.next_multiple_of(FILE_OFFSET_ALIGNMENT); self.purge_uncommitted_file_entries(active_idx, pre_purge_extent)?; if matches!(rebuild_mode, RebuildMode::FullActiveFile) { @@ -95,6 +94,7 @@ impl CandyStore { let mut buf_file_offset = 0u64; let mut match_scratch = Vec::new(); let mut bytes_since_checkpoint = 0u64; + let mut last_durable_offset = start_offset; loop { let Some((kv, entry_offset, next_offset)) = active_file.read_next_entry_ref(offset, &mut read_buf, &mut buf_file_offset)? @@ -114,11 +114,16 @@ impl CandyStore { match kv.entry_type { EntryType::Insert => self.inner.add_uncommitted_num_entries(1), EntryType::Tombstone => self.inner.add_uncommitted_num_entries(-1), - _ => {} // UpdateData and any future types don't change num_entries + _ => {} // UpdateData and any future types don't change num_items } // Fix up the index pointers (no stats accounting). self.recover_entry(&active_file, ns, kv, entry_offset, &mut match_scratch)?; + self.inner + .stats + .num_rebuilt_entries + .fetch_add(1, Ordering::Relaxed); + last_durable_offset = next_offset; crash_point("rebuild_entry"); bytes_since_checkpoint += entry_bytes; @@ -128,7 +133,15 @@ impl CandyStore { } } - let durable_extent = offset.next_multiple_of(FILE_OFFSET_ALIGNMENT); + let durable_extent = last_durable_offset.next_multiple_of(FILE_OFFSET_ALIGNMENT); + + if durable_extent < pre_rebuild_used_bytes { + self.inner + .stats + .num_rebuild_purged_bytes + .fetch_add(pre_rebuild_used_bytes - durable_extent, Ordering::Relaxed); + active_file.truncate_to_offset(durable_extent)?; + } // Purge any phantom index entries that reference the active file // beyond this point (OS flushed the index page but not the data). diff --git a/src/types.rs b/src/types.rs index ccff569..72501fc 100644 --- a/src/types.rs +++ b/src/types.rs @@ -187,32 +187,38 @@ impl Default for ListCompactionParams { pub struct Stats { /// Number of allocated index rows. pub num_rows: u64, - /// Theoretical maximum number of entries at the current row count. - pub capacity: u64, /// Number of currently live entries. pub num_items: u64, - /// Total bytes occupied by index metadata files. - pub index_size_bytes: u64, - /// Number of completed background compactions. - pub num_compactions: u64, - /// Total time spent in compaction, in milliseconds. - pub compaction_time_ms: u64, /// Number of data files currently present. pub num_data_files: u64, - /// Number of successful key lookups. - pub num_positive_lookups: u64, - /// Number of failed key lookups. - pub num_negative_lookups: u64, - /// Number of probes that had to inspect a second matching index entry. - pub num_collisions: u64, + + /// Total bytes occupied by index metadata files. + pub index_size_bytes: u64, + /// Time spent in the most recent grow remap operation. pub last_remap_dur: Duration, + /// Number of completed background compactions. + pub num_compactions: u64, /// Time spent in the most recent successful file compaction. pub last_compaction_dur: Duration, /// Bytes reclaimed by the most recent successful file compaction. pub last_compaction_reclaimed_bytes: u32, /// Bytes rewritten by the most recent successful file compaction. pub last_compaction_moved_bytes: u32, + + /// Number of entry creations recorded since open. + pub num_inserted: u64, + /// Number of entry removals recorded since open. + pub num_removed: u64, + /// Number of entry replacements recorded since open. + pub num_updated: u64, + /// Number of successful key lookups. + pub num_positive_lookups: u64, + /// Number of failed key lookups. + pub num_negative_lookups: u64, + /// Number of probes that had to inspect a second matching index entry. + pub num_collisions: u64, + /// Number of read operations performed against data files. pub num_read_ops: u64, /// Total bytes read from data files. @@ -221,18 +227,17 @@ pub struct Stats { pub num_write_ops: u64, /// Total bytes written to data files. pub num_write_bytes: u64, - /// Number of entry creations recorded since open. - pub num_created: u64, - /// Number of entry removals recorded since open. - pub num_removed: u64, - /// Number of entry replacements recorded since open. - pub num_replaced: u64, - /// Total logical entry bytes written since open. - pub written_bytes: u64, - /// Total bytes currently occupied by live entries. - pub data_bytes: u64, + + /// Number of entries replayed during the most recent recovery rebuild. + pub num_rebuilt_entries: u64, + /// Number of trailing data-file bytes discarded during the most recent recovery rebuild. + pub num_rebuild_purged_bytes: u64, + + /// Total bytes currently occupied by the data files (including waste) + pub total_bytes: u64, /// Total bytes currently accounted as unreclaimed waste. pub waste_bytes: u64, + /// Approximate histogram bucket for entries under 64 bytes since open. pub entries_under_64: u64, /// Approximate histogram bucket for entries under 256 bytes since open. @@ -248,51 +253,13 @@ pub struct Stats { } impl Stats { - /// Returns the fraction of the current index capacity occupied by live entries. - pub fn fill_level(&self) -> f64 { - if self.capacity == 0 { - return 0.0; - } - self.num_items as f64 / self.capacity as f64 - } - - /// Returns the number of live entries. - pub fn num_entries(&self) -> u64 { - self.num_items - } - - /// Returns the current unreclaimed waste in bytes. - pub fn current_waste(&self) -> u64 { - self.waste_bytes + /// Theoretical maximum number of entries at the current row count. + pub fn index_capacity(&self) -> u64 { + self.num_rows.saturating_mul(ROW_WIDTH as u64) } - /// Returns bytes currently occupied by live data. + /// bytes used for live data entries pub fn data_bytes(&self) -> u64 { - self.data_bytes - } - - /// Returns bytes currently occupied by live data. - pub fn occupied_bytes(&self) -> u64 { - self.data_bytes() - } - - /// Returns current unreclaimed waste in bytes. - pub fn wasted_bytes(&self) -> u64 { - self.current_waste() - } - - /// Returns the number of inserted entries. - pub fn num_inserts(&self) -> u64 { - self.num_created - } - - /// Returns the number of updated entries. - pub fn num_updates(&self) -> u64 { - self.num_replaced - } - - /// Returns the number of removed entries. - pub fn num_removals(&self) -> u64 { - self.num_removed + self.total_bytes.saturating_sub(self.waste_bytes) } } diff --git a/tests/crasher.rs b/tests/crasher.rs index c65fbe6..6b21a80 100644 --- a/tests/crasher.rs +++ b/tests/crasher.rs @@ -30,10 +30,21 @@ fn get_config() -> Config { const DB_DIR: &str = "/tmp/dbdir_crash"; -fn child_inserts() -> Result<()> { +fn record_rebuild_stats(shared_stuff: &SharedStuff, store: &CandyStore) { + let stats = store.stats(); + shared_stuff + .total_num_rebuilt_entries + .fetch_add(stats.num_rebuilt_entries, SeqCst); + shared_stuff + .total_num_dropped_bytes_on_rebuild + .fetch_add(stats.num_rebuild_purged_bytes, SeqCst); +} + +fn child_inserts(shared_stuff: &SharedStuff) -> Result<()> { // our job is to create 1M entries while being killed by our evil parent let store = CandyStore::open(DB_DIR, get_config())?; + record_rebuild_stats(shared_stuff, &store); let highest_bytes = store.get("highest")?.unwrap_or(vec![0, 0, 0, 0]); let highest = u32::from_le_bytes(highest_bytes.try_into().unwrap()); @@ -53,10 +64,11 @@ fn child_inserts() -> Result<()> { Ok(()) } -fn child_removals() -> Result<()> { +fn child_removals(shared_stuff: &SharedStuff) -> Result<()> { // our job is to remove 1M entries while being killed by our evil parent let store = CandyStore::open(DB_DIR, get_config())?; + record_rebuild_stats(shared_stuff, &store); let lowest_bytes = store.get("lowest")?.unwrap_or(vec![0, 0, 0, 0]); let lowest = u32::from_le_bytes(lowest_bytes.try_into().unwrap()); @@ -78,10 +90,11 @@ fn child_removals() -> Result<()> { Ok(()) } -fn child_list_inserts() -> Result<()> { +fn child_list_inserts(shared_stuff: &SharedStuff) -> Result<()> { // our job is to insert 1M entries to a list while being killed by our evil parent let store = CandyStore::open(DB_DIR, get_config())?; + record_rebuild_stats(shared_stuff, &store); let highest_bytes = store.get("list_highest")?.unwrap_or(vec![0, 0, 0, 0]); let highest = u32::from_le_bytes(highest_bytes.try_into().unwrap()); @@ -102,10 +115,11 @@ fn child_list_inserts() -> Result<()> { Ok(()) } -fn child_list_removals() -> Result<()> { +fn child_list_removals(shared_stuff: &SharedStuff) -> Result<()> { // our job is to remove 1M entries to a list while being killed by our evil parent let store = CandyStore::open(DB_DIR, get_config())?; + record_rebuild_stats(shared_stuff, &store); let lowest_bytes = store.get("list_lowest")?.unwrap_or(vec![0, 0, 0, 0]); let lowest = u32::from_le_bytes(lowest_bytes.try_into().unwrap()); @@ -146,8 +160,9 @@ fn child_list_removals() -> Result<()> { Ok(()) } -fn child_list_iterator_removals() -> Result<()> { +fn child_list_iterator_removals(shared_stuff: &SharedStuff) -> Result<()> { let store = CandyStore::open(DB_DIR, get_config())?; + record_rebuild_stats(shared_stuff, &store); if rand::random() { //println!("FWD"); @@ -179,14 +194,14 @@ fn child_list_iterator_removals() -> Result<()> { fn parent_run( shared_stuff: &SharedStuff, child_name: &str, - mut child_func: impl FnMut() -> Result<()>, + mut child_func: impl FnMut(&SharedStuff) -> Result<()>, ) -> Result<()> { println!("======== Parent starts {child_name} ========"); for i in 0.. { let pid = unsafe { libc::fork() }; assert!(pid >= 0); if pid == 0 { - let res = child_func(); + let res = child_func(shared_stuff); if let Err(e) = res { eprintln!("Child failed: {}", e); shared_stuff.failed.store(1, SeqCst); @@ -230,6 +245,8 @@ fn parent_run( struct SharedStuff { failed: AtomicU64, + total_num_rebuilt_entries: AtomicU64, + total_num_dropped_bytes_on_rebuild: AtomicU64, } #[test] @@ -255,6 +272,10 @@ fn test_crash_recovery() -> Result<()> { let shared_stuff = unsafe { &*(map_addr as *const SharedStuff) }; shared_stuff.failed.store(0, SeqCst); + shared_stuff.total_num_rebuilt_entries.store(0, SeqCst); + shared_stuff + .total_num_dropped_bytes_on_rebuild + .store(0, SeqCst); parent_run(shared_stuff, "child_inserts", child_inserts)?; @@ -371,6 +392,12 @@ fn test_crash_recovery() -> Result<()> { println!("DB validated successfully"); } + println!( + "rebuilt_entries_total={} dropped_bytes_on_rebuild_total={}", + shared_stuff.total_num_rebuilt_entries.load(SeqCst), + shared_stuff.total_num_dropped_bytes_on_rebuild.load(SeqCst) + ); + _ = std::fs::remove_dir_all(DB_DIR); Ok(()) } diff --git a/tests/metrics.rs b/tests/metrics.rs index f73f556..d72867f 100644 --- a/tests/metrics.rs +++ b/tests/metrics.rs @@ -15,9 +15,8 @@ fn test_metrics_updates() -> Result<(), Box> { let stats = db.stats(); assert_eq!(stats.num_rows, 8); - assert_eq!(stats.capacity, 8 * ROW_WIDTH); + assert_eq!(stats.index_capacity(), 8 * ROW_WIDTH); assert_eq!(stats.num_items, 0); - assert_eq!(stats.fill_level(), 0.0); assert_eq!(stats.num_positive_lookups, 0); assert_eq!(stats.num_negative_lookups, 0); assert_eq!(stats.num_collisions, 0); @@ -29,42 +28,41 @@ fn test_metrics_updates() -> Result<(), Box> { assert_eq!(stats.num_read_bytes, 0); assert_eq!(stats.num_write_ops, 0); assert_eq!(stats.num_write_bytes, 0); - assert_eq!(stats.num_created, 0); + assert_eq!(stats.num_inserted, 0); assert_eq!(stats.num_removed, 0); - assert_eq!(stats.num_replaced, 0); - assert_eq!(stats.written_bytes, 0); - assert_eq!(stats.data_bytes, 0); + assert_eq!(stats.num_updated, 0); + assert_eq!(stats.num_rebuilt_entries, 0); + assert_eq!(stats.num_rebuild_purged_bytes, 0); + assert_eq!(stats.data_bytes(), 0); assert_eq!(stats.waste_bytes, 0); db.set("key1", "val1")?; let stats = db.stats(); assert_eq!(stats.num_items, 1); - assert_eq!(stats.num_entries(), 1); - assert_eq!(stats.num_inserts(), 1); - assert_eq!(stats.num_updates(), 0); - assert_eq!(stats.num_removals(), 0); - assert_eq!(stats.num_created, 1); + assert_eq!(stats.num_inserted, 1); + assert_eq!(stats.num_updated, 0); assert_eq!(stats.num_removed, 0); - assert_eq!(stats.num_replaced, 0); - assert!(stats.written_bytes > 0); - assert!(stats.data_bytes > 0); + assert_eq!(stats.num_inserted, 1); + assert_eq!(stats.num_removed, 0); + assert_eq!(stats.num_updated, 0); + assert!(stats.data_bytes() > 0); assert_eq!(stats.waste_bytes, 0); assert_eq!(stats.num_write_ops, 1); assert!(stats.num_write_bytes > 0); assert!(stats.index_size_bytes > 0); assert_eq!(stats.num_data_files, 1); - assert!(stats.occupied_bytes() > 0); + assert!(stats.data_bytes() > 0); db.set("key1", "val2")?; let stats = db.stats(); assert_eq!(stats.num_items, 1); - assert_eq!(stats.num_updates(), 1); - assert_eq!(stats.num_created, 1); - assert_eq!(stats.num_replaced, 1); + assert_eq!(stats.num_updated, 1); + assert_eq!(stats.num_inserted, 1); + assert_eq!(stats.num_updated, 1); assert_eq!(stats.num_removed, 0); - assert!(stats.data_bytes > 0); + assert!(stats.data_bytes() > 0); assert!(stats.waste_bytes > 0); assert_eq!(stats.num_write_ops, 2); @@ -72,13 +70,13 @@ fn test_metrics_updates() -> Result<(), Box> { let stats = db.stats(); assert_eq!(stats.num_items, 0); - assert_eq!(stats.num_removals(), 1); - assert!(stats.wasted_bytes() > 0); + assert_eq!(stats.num_removed, 1); + assert!(stats.waste_bytes > 0); assert_eq!(stats.num_write_ops, 3); - assert_eq!(stats.num_created, 1); - assert_eq!(stats.num_replaced, 1); + assert_eq!(stats.num_inserted, 1); + assert_eq!(stats.num_updated, 1); assert_eq!(stats.num_removed, 1); - assert_eq!(stats.data_bytes, 0); + assert_eq!(stats.data_bytes(), 0); assert_eq!(db.get("missing")?, None); assert_eq!(db.get("key1")?, None); @@ -116,12 +114,10 @@ fn test_metrics_compaction() -> Result<(), Box> { } let stats = db.stats(); - assert!(stats.written_bytes > 0); - assert!(stats.num_replaced > 0); - assert!(stats.data_bytes > 0); + assert!(stats.num_updated > 0); + assert!(stats.data_bytes() > 0); assert!(stats.num_items > 0); - assert!(stats.capacity >= stats.num_items); - assert!(stats.fill_level() > 0.0); + assert!(stats.index_capacity() >= stats.num_items); assert!(stats.num_write_ops > 0); assert!(stats.num_write_bytes > 0); diff --git a/tests/recovery.rs b/tests/recovery.rs index 76eb585..09df15e 100644 --- a/tests/recovery.rs +++ b/tests/recovery.rs @@ -117,6 +117,62 @@ fn write_commit_cursor(dir: &std::path::Path, offset: u64) -> Result<(), Error> Ok(()) } +fn active_data_file_path(dir: &std::path::Path) -> Result { + let active_ordinal = active_file_ordinal(dir)?; + + for entry in std::fs::read_dir(dir).map_err(Error::IOError)? { + let entry = entry.map_err(Error::IOError)?; + let path = entry.path(); + let Some(name) = path.file_name().and_then(|name| name.to_str()) else { + continue; + }; + if !name.starts_with("data_") { + continue; + } + + let mut file = std::fs::OpenOptions::new() + .read(true) + .open(&path) + .map_err(Error::IOError)?; + file.seek(SeekFrom::Start(16)).map_err(Error::IOError)?; + let mut buf = [0u8; 8]; + file.read_exact(&mut buf).map_err(Error::IOError)?; + if u64::from_le_bytes(buf) == active_ordinal { + return Ok(path); + } + } + + Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::NotFound, + "active data file not found", + ))) +} + +fn append_aligned_tail_garbage(dir: &std::path::Path, len: usize) -> Result<(), Error> { + debug_assert_eq!(len % 16, 0); + + let path = active_data_file_path(dir)?; + let mut file = std::fs::OpenOptions::new() + .append(true) + .open(path) + .map_err(Error::IOError)?; + file.write_all(&vec![0xA5; len]).map_err(Error::IOError)?; + file.sync_all().map_err(Error::IOError)?; + Ok(()) +} + +fn assert_rebuild_stats_non_zero(db: &CandyStore) { + let stats = db.stats(); + assert!( + stats.num_rebuilt_entries > 0, + "expected rebuild to replay at least one entry" + ); + assert!( + stats.num_rebuild_purged_bytes > 0, + "expected rebuild to trim a dirty file tail" + ); +} + #[test] fn test_recovery_after_dirty_shutdown() -> Result<(), Error> { let dir = tempdir().unwrap(); @@ -629,7 +685,6 @@ fn test_recover_from_truncated_data_file() -> Result<(), Box Result<(), Error> { } db._abort_for_testing(); } + append_aligned_tail_garbage(dir.path(), 64)?; // Phase 2: reopen triggers rebuild. Verify all data survived. { let db = CandyStore::open(dir.path(), config)?; + assert_rebuild_stats_non_zero(&db); for i in 0..100 { assert_eq!( db.get(format!("key{i:04}"))?, @@ -717,11 +773,13 @@ fn test_progressive_rebuild_survives_interrupted_rebuild() -> Result<(), Error> } db._abort_for_testing(); } + append_aligned_tail_garbage(dir.path(), 64)?; // Phase 3: another rebuild should start from the persisted replay cursor // and recover everything written before the second crash. { let db = CandyStore::open(dir.path(), config)?; + assert_rebuild_stats_non_zero(&db); for i in 0..150 { assert_eq!( db.get(format!("key{i:04}"))?, @@ -759,11 +817,13 @@ fn test_progressive_rebuild_with_trust_strategy_resumes_pending() -> Result<(), } db._abort_for_testing(); } + append_aligned_tail_garbage(dir.path(), 64)?; // Phase 3: reopen — recovery replays from the commit cursor, so all // data from phases 1+2 should be accessible. { let db = CandyStore::open(dir.path(), config)?; + assert_rebuild_stats_non_zero(&db); for i in 0..200 { assert_eq!( db.get(format!("key{i:04}"))?, @@ -776,6 +836,38 @@ fn test_progressive_rebuild_with_trust_strategy_resumes_pending() -> Result<(), Ok(()) } +#[test] +fn test_checkpoint_advances_recovery_cursor() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 1024, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..100 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db.checkpoint()?; + db._abort_for_testing(); + } + + let db = CandyStore::open(dir.path(), config)?; + let stats = db.stats(); + assert_eq!(stats.num_rebuilt_entries, 0); + assert_eq!(stats.num_rebuild_purged_bytes, 0); + for i in 0..100 { + assert_eq!( + db.get(format!("key{i:04}"))?, + Some(format!("val{i:04}").into_bytes()), + "key{i:04} missing after checkpointed reopen" + ); + } + + Ok(()) +} + #[test] fn test_progressive_rebuild_ignores_bogus_checkpoint_offset() -> Result<(), Error> { let dir = tempdir().unwrap(); @@ -848,7 +940,6 @@ fn test_clean_reopen_rebuilds_invalid_active_checkpoint_across_multiple_data_fil } assert_eq!(db.num_items(), total_base_keys); - assert_eq!(db.stats().num_entries(), total_base_keys as u64); Ok(()) } From 446e263cc9874ac3ff7b54a5a22efca6981b9e70 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Sun, 29 Mar 2026 15:03:41 +0300 Subject: [PATCH 10/25] Fix small bug in shrink_to_fit; dedup common logic from list/queue iterators and updating to existing entries; remove dead code/struct members --- src/index_file.rs | 8 +-- src/store.rs | 166 +++++++++++++++++++++++++++++++++------------ src/store/list.rs | 30 ++++---- src/store/queue.rs | 30 ++++---- 4 files changed, 154 insertions(+), 80 deletions(-) diff --git a/src/index_file.rs b/src/index_file.rs index b4768f0..b9276a8 100644 --- a/src/index_file.rs +++ b/src/index_file.rs @@ -51,19 +51,13 @@ pub(crate) struct IndexFileHeader { // stats /////////////////////////////////// pub(crate) committed_num_entries: AtomicU64, - _reserved_data_bytes: AtomicU64, - _reserved_waste_bytes: AtomicU64, - pub(crate) uncommitted_entries_delta: AtomicI64, - _reserved_data_delta: AtomicI64, - _reserved_waste_delta: AtomicI64, - _trailer: [u8; PAGE_SIZE - 1072], + _trailer: [u8; PAGE_SIZE - 1024 - 2 * 8], } const _: () = assert!(offset_of!(IndexFileHeader, global_split_level) == 64); const _: () = assert!(offset_of!(IndexFileHeader, commit_file_ordinal) == 128); -const _: () = assert!(offset_of!(IndexFileHeader, commit_offset) == 136); const _: () = assert!(offset_of!(IndexFileHeader, committed_num_entries) == 1024); const _: () = assert!(size_of::() == PAGE_SIZE); diff --git a/src/store.rs b/src/store.rs index 378466c..413f910 100644 --- a/src/store.rs +++ b/src/store.rs @@ -24,7 +24,7 @@ use crate::{ index_file::{EntryPointer, IndexFile, RowLayout, RowReadGuard, RowWriteGuard}, internal::{ EntryType, HashCoord, KeyNamespace, MAX_DATA_FILE_IDX, MAX_DATA_FILES, MIN_SPLIT_LEVEL, - ROW_WIDTH, aligned_data_entry_size, aligned_data_entry_waste, + ROW_WIDTH, RangeMetadata, aligned_data_entry_size, aligned_data_entry_waste, aligned_tombstone_entry_waste, index_file_path, index_rows_file_path, sync_dir, }, types::{ @@ -103,6 +103,19 @@ struct StoreInner { stats: InnerStats, } +struct ExistingEntryUpdate<'a> { + files: &'a HashMap>, + ns: KeyNamespace, + key: &'a [u8], + val: &'a [u8], + hc: HashCoord, + col: usize, + src_file_idx: u16, + old_klen: usize, + old_vlen: usize, + crash_point_name: Option<&'a str>, +} + /// A persistent key-value store backed by append-only data files and a mutable index. pub struct CandyStore { inner: Arc, @@ -556,6 +569,50 @@ impl CandyStore { self.inner.list_meta_locks[self.inner.logical_lock_index(ns, key)].write() } + fn try_heal_range_head( + &self, + meta_ns: KeyNamespace, + range_key: &[u8], + initial_next_idx: u64, + new_head: u64, + mut get_meta: GetMeta, + mut set_meta: SetMeta, + ) -> Result<()> + where + GetMeta: FnMut(&CandyStore, &[u8]) -> Result, + SetMeta: FnMut(&CandyStore, &[u8], RangeMetadata) -> Result<()>, + { + let _lock = self.list_write_guard(meta_ns, range_key); + let mut meta = get_meta(self, range_key)?; + if meta.head >= initial_next_idx && meta.head < new_head { + meta.head = new_head; + set_meta(self, range_key, meta)?; + } + Ok(()) + } + + fn try_heal_range_tail( + &self, + meta_ns: KeyNamespace, + range_key: &[u8], + initial_end_idx: u64, + new_tail: u64, + mut get_meta: GetMeta, + mut set_meta: SetMeta, + ) -> Result<()> + where + GetMeta: FnMut(&CandyStore, &[u8]) -> Result, + SetMeta: FnMut(&CandyStore, &[u8], RangeMetadata) -> Result<()>, + { + let _lock = self.list_write_guard(meta_ns, range_key); + let mut meta = get_meta(self, range_key)?; + if meta.tail <= initial_end_idx && meta.tail > new_tail { + meta.tail = new_tail; + set_meta(self, range_key, meta)?; + } + Ok(()) + } + fn _immut_op( &self, ns: KeyNamespace, @@ -682,7 +739,7 @@ impl CandyStore { self.get_or_create_ns(KeyNamespace::User, key.as_ref(), default_val.as_ref()) } - fn track_update_waste(&self, file_idx: u16, _file_ordinal: u64, klen: usize, vlen: usize) { + fn track_update_waste(&self, file_idx: u16, klen: usize, vlen: usize) { let added_waste = aligned_data_entry_waste(klen, vlen); let new_waste = self.inner.index_file.add_file_waste(file_idx, added_waste); self.inner.maybe_signal_compaction_threshold_crossing( @@ -702,23 +759,43 @@ impl CandyStore { self.inner.bump_histogram(entry_size); } - fn record_replace_stats( - &self, - _old_klen: usize, - _old_vlen: usize, - new_klen: usize, - new_vlen: usize, - ) { + fn record_replace_stats(&self, new_klen: usize, new_vlen: usize) { let new_entry_size = aligned_data_entry_size(new_klen, new_vlen); self.inner.stats.num_updated.fetch_add(1, Ordering::Relaxed); self.inner.bump_histogram(new_entry_size); } - fn record_remove_stats(&self, _klen: usize, _vlen: usize) { + fn record_remove_stats(&self) { self.inner.add_uncommitted_num_entries(-1); self.inner.stats.num_removed.fetch_add(1, Ordering::Relaxed); } + fn apply_update_to_existing_entry( + &self, + row: &mut RowWriteGuard<'_>, + update: ExistingEntryUpdate<'_>, + ) -> Result<()> { + let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); + let active_file = update + .files + .get(&active_idx) + .ok_or(Error::MissingDataFile(active_idx))?; + let (file_off, size, _inflight_guard) = + active_file.append_kv(EntryType::Update, update.ns, update.key, update.val)?; + self.inner.record_write(size as u64); + if let Some(name) = update.crash_point_name { + crate::crash_point(name); + } + + row.replace_pointer( + update.col, + EntryPointer::new(active_idx, file_off, size, update.hc.masked_row_selector()), + ); + self.track_update_waste(update.src_file_idx, update.old_klen, update.old_vlen); + self.record_replace_stats(update.key.len(), update.val.len()); + Ok(()) + } + fn set_ns(&self, ns: KeyNamespace, key: &[u8], val: &[u8]) -> Result>> { self.inner._mut_op(ns, key, val, |hc, mut row, key, val| { let files = self.inner.data_files.read(); @@ -737,23 +814,22 @@ impl CandyStore { let vlen = kv.value().len(); let old_val = kv.into_value(); let src_file_idx = file.file_idx; - let src_file_ordinal = file.file_ordinal; - let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); - let active_file = files - .get(&active_idx) - .ok_or(Error::MissingDataFile(active_idx))?; - let (file_off, size, _inflight_guard) = - active_file.append_kv(EntryType::Update, ns, key, val)?; - self.inner.record_write(size as u64); - crate::crash_point("set_after_write_before_update"); - - row.replace_pointer( - col, - EntryPointer::new(active_idx, file_off, size, hc.masked_row_selector()), - ); - self.track_update_waste(src_file_idx, src_file_ordinal, klen, vlen); - self.record_replace_stats(klen, vlen, key.len(), val.len()); + self.apply_update_to_existing_entry( + &mut row, + ExistingEntryUpdate { + files: &files, + ns, + key, + val, + hc, + col, + src_file_idx, + old_klen: klen, + old_vlen: vlen, + crash_point_name: Some("set_after_write_before_update"), + }, + )?; return Ok(Some(old_val)); } else { self.inner @@ -825,21 +901,22 @@ impl CandyStore { let vlen = kv.value().len(); let old_val = kv.into_value(); let src_file_idx = file.file_idx; - let src_file_ordinal = file.file_ordinal; - let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); - let active_file = files - .get(&active_idx) - .ok_or(Error::MissingDataFile(active_idx))?; - let (file_off, size, _inflight_guard) = - active_file.append_kv(EntryType::Update, ns, key, val)?; - self.inner.record_write(size as u64); - row.replace_pointer( - col, - EntryPointer::new(active_idx, file_off, size, hc.masked_row_selector()), - ); - self.track_update_waste(src_file_idx, src_file_ordinal, klen, vlen); - self.record_replace_stats(klen, vlen, key.len(), val.len()); + self.apply_update_to_existing_entry( + &mut row, + ExistingEntryUpdate { + files: &files, + ns, + key, + val, + hc, + col, + src_file_idx, + old_klen: klen, + old_vlen: vlen, + crash_point_name: None, + }, + )?; return Ok(ReplaceStatus::PrevValue(old_val)); } } @@ -862,7 +939,7 @@ impl CandyStore { ) } - fn track_tombstone_waste(&self, file_idx: u16, _file_ordinal: u64, klen: usize, vlen: usize) { + fn track_tombstone_waste(&self, file_idx: u16, klen: usize, vlen: usize) { let active_idx = self.inner.active_file_idx.load(Ordering::Relaxed); if file_idx == active_idx { self.inner.index_file.add_file_waste( @@ -901,7 +978,6 @@ impl CandyStore { let vlen = kv.value().len(); let old_val = kv.into_value(); let src_file_idx = file.file_idx; - let src_file_ordinal = file.file_ordinal; let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); let active_file = files @@ -912,8 +988,8 @@ impl CandyStore { self.inner.record_write(tombstone_size as u64); row.remove(col); - self.track_tombstone_waste(src_file_idx, src_file_ordinal, klen, vlen); - self.record_remove_stats(klen, vlen); + self.track_tombstone_waste(src_file_idx, klen, vlen); + self.record_remove_stats(); return Ok(Some(old_val)); } } @@ -1085,7 +1161,7 @@ impl CandyStore { self.inner .index_file - .shrink_with_rows_guard(min_rows_cfg, row_table) + .shrink_with_rows_guard(min_rows, row_table) } /// Returns a snapshot of store statistics and accounting counters. diff --git a/src/store/list.rs b/src/store/list.rs index d0cbc7f..934bfba 100644 --- a/src/store/list.rs +++ b/src/store/list.rs @@ -47,13 +47,14 @@ impl ListIterator<'_> { } fn try_heal_head(&self, new_head: u64) -> Result<()> { - let _lock = self.store.list_write_guard(self.ns.meta, &self.list); - let mut meta = get_list_meta(self.store, self.ns, &self.list)?; - if meta.head >= self.initial_next_idx && meta.head < new_head { - meta.head = new_head; - set_list_meta(self.store, self.ns, &self.list, meta)?; - } - Ok(()) + self.store.try_heal_range_head( + self.ns.meta, + &self.list, + self.initial_next_idx, + new_head, + |store, list| get_list_meta(store, self.ns, list), + |store, list, meta| set_list_meta(store, self.ns, list, meta), + ) } fn heal_tail(&self, new_tail: u64) { @@ -61,13 +62,14 @@ impl ListIterator<'_> { } fn try_heal_tail(&self, new_tail: u64) -> Result<()> { - let _lock = self.store.list_write_guard(self.ns.meta, &self.list); - let mut meta = get_list_meta(self.store, self.ns, &self.list)?; - if meta.tail <= self.initial_end_idx && meta.tail > new_tail { - meta.tail = new_tail; - set_list_meta(self.store, self.ns, &self.list, meta)?; - } - Ok(()) + self.store.try_heal_range_tail( + self.ns.meta, + &self.list, + self.initial_end_idx, + new_tail, + |store, list| get_list_meta(store, self.ns, list), + |store, list, meta| set_list_meta(store, self.ns, list, meta), + ) } } diff --git a/src/store/queue.rs b/src/store/queue.rs index 3d1d441..683c856 100644 --- a/src/store/queue.rs +++ b/src/store/queue.rs @@ -43,13 +43,14 @@ impl<'a> QueueIterator<'a> { } fn try_heal_head(&self, new_head: u64) -> Result<()> { - let _lock = self.store.list_write_guard(self.ns.meta, &self.queue); - let mut meta = get_queue_meta(self.store, self.ns, &self.queue)?; - if meta.head >= self.initial_next_idx && meta.head < new_head { - meta.head = new_head; - set_queue_meta(self.store, self.ns, &self.queue, meta)?; - } - Ok(()) + self.store.try_heal_range_head( + self.ns.meta, + &self.queue, + self.initial_next_idx, + new_head, + |store, queue| get_queue_meta(store, self.ns, queue), + |store, queue, meta| set_queue_meta(store, self.ns, queue, meta), + ) } fn heal_tail(&self, new_tail: u64) { @@ -57,13 +58,14 @@ impl<'a> QueueIterator<'a> { } fn try_heal_tail(&self, new_tail: u64) -> Result<()> { - let _lock = self.store.list_write_guard(self.ns.meta, &self.queue); - let mut meta = get_queue_meta(self.store, self.ns, &self.queue)?; - if meta.tail <= self.initial_end_idx && meta.tail > new_tail { - meta.tail = new_tail; - set_queue_meta(self.store, self.ns, &self.queue, meta)?; - } - Ok(()) + self.store.try_heal_range_tail( + self.ns.meta, + &self.queue, + self.initial_end_idx, + new_tail, + |store, queue| get_queue_meta(store, self.ns, queue), + |store, queue, meta| set_queue_meta(store, self.ns, queue, meta), + ) } } From b2d863d9910a3409efc7f145b56b03616892066f Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Sun, 29 Mar 2026 15:31:52 +0300 Subject: [PATCH 11/25] Compaction: set thread name --- src/store/compaction.rs | 136 +++++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 66 deletions(-) diff --git a/src/store/compaction.rs b/src/store/compaction.rs index 46ec796..649f127 100644 --- a/src/store/compaction.rs +++ b/src/store/compaction.rs @@ -253,83 +253,87 @@ impl CandyStore { self.inner.shutting_down.store(false, Ordering::Release); let ctx = Arc::clone(&self.inner); - let thd = std::thread::spawn(move || { - if ctx.config.compaction_throughput_bytes_per_sec == 0 { - // Compaction disabled — park until shutdown. - let mut state = ctx.compaction_state.lock(); - while !ctx.shutting_down.load(Ordering::Acquire) { - ctx.compaction_condvar.wait(&mut state); - } - return; - } - - let throughput_bytes_per_sec = ctx.config.compaction_throughput_bytes_per_sec as u64; - let tokens_per_unit = (throughput_bytes_per_sec / 10).max(1); - let burst_size = tokens_per_unit.saturating_mul(2); - let mut pacer = Pacer::new( - tokens_per_unit, - std::time::Duration::from_millis(100), - burst_size, - ); - - #[cfg(windows)] - let mut pending_deletions = Vec::::new(); - loop { - { + let thd = std::thread::Builder::new() + .name("candy_compact".into()) + .spawn(move || { + if ctx.config.compaction_throughput_bytes_per_sec == 0 { + // Compaction disabled — park until shutdown. let mut state = ctx.compaction_state.lock(); - while !state.wake_requested && !ctx.shutting_down.load(Ordering::Acquire) { + while !ctx.shutting_down.load(Ordering::Acquire) { ctx.compaction_condvar.wait(&mut state); } + return; + } - if ctx.shutting_down.load(Ordering::Acquire) { - break; - } + let throughput_bytes_per_sec = + ctx.config.compaction_throughput_bytes_per_sec as u64; + let tokens_per_unit = (throughput_bytes_per_sec / 10).max(1); + let burst_size = tokens_per_unit.saturating_mul(2); + let mut pacer = Pacer::new( + tokens_per_unit, + std::time::Duration::from_millis(100), + burst_size, + ); - state.wake_requested = false; - } + #[cfg(windows)] + let mut pending_deletions = Vec::::new(); loop { - let candidates = ctx.next_compaction_candidates(4); - if candidates.is_empty() { - break; - } - if ctx.shutting_down.load(Ordering::Acquire) { - return; + { + let mut state = ctx.compaction_state.lock(); + while !state.wake_requested && !ctx.shutting_down.load(Ordering::Acquire) { + ctx.compaction_condvar.wait(&mut state); + } + + if ctx.shutting_down.load(Ordering::Acquire) { + break; + } + + state.wake_requested = false; } - #[cfg(windows)] - Self::retry_pending_deletions(&ctx, &mut pending_deletions); - let t0 = std::time::Instant::now(); - let res = ctx.compact_files( - &candidates, - &mut pacer, - #[cfg(windows)] - &mut pending_deletions, - ); - let compaction_millis = - u64::try_from(t0.elapsed().as_millis()).unwrap_or(u64::MAX); - match res { - Ok(outcome) => { - ctx.stats - .num_compactions - .fetch_add(outcome.compacted_files, Ordering::Relaxed); - ctx.stats - .last_compaction_dur_ms - .store(compaction_millis, Ordering::Relaxed); - ctx.stats - .last_compaction_reclaimed_bytes - .store(outcome.reclaimed_bytes, Ordering::Relaxed); - ctx.stats - .last_compaction_moved_bytes - .store(outcome.moved_bytes, Ordering::Relaxed); + loop { + let candidates = ctx.next_compaction_candidates(4); + if candidates.is_empty() { + break; } - Err(_e) => { - ctx.stats.compaction_errors.fetch_add(1, Ordering::Relaxed); + if ctx.shutting_down.load(Ordering::Acquire) { + return; + } + #[cfg(windows)] + Self::retry_pending_deletions(&ctx, &mut pending_deletions); + let t0 = std::time::Instant::now(); + let res = ctx.compact_files( + &candidates, + &mut pacer, + #[cfg(windows)] + &mut pending_deletions, + ); + let compaction_millis = + u64::try_from(t0.elapsed().as_millis()).unwrap_or(u64::MAX); + match res { + Ok(outcome) => { + ctx.stats + .num_compactions + .fetch_add(outcome.compacted_files, Ordering::Relaxed); + ctx.stats + .last_compaction_dur_ms + .store(compaction_millis, Ordering::Relaxed); + ctx.stats + .last_compaction_reclaimed_bytes + .store(outcome.reclaimed_bytes, Ordering::Relaxed); + ctx.stats + .last_compaction_moved_bytes + .store(outcome.moved_bytes, Ordering::Relaxed); + } + Err(_e) => { + ctx.stats.compaction_errors.fetch_add(1, Ordering::Relaxed); + } } } + #[cfg(windows)] + Self::retry_pending_deletions(&ctx, &mut pending_deletions); } - #[cfg(windows)] - Self::retry_pending_deletions(&ctx, &mut pending_deletions); - } - }); + }) + .unwrap(); *compaction_thd = Some(thd); self.inner.signal_compaction_scan(); From 3591df0cf4180201aeb006c2de38397393d5fbdb Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Sun, 29 Mar 2026 15:46:09 +0300 Subject: [PATCH 12/25] README --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 25f098d..ffaa145 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ +
+ +
😸 v0.7 brings true crash-consistency, improved compaction and an overall simpler design. +We're also close to a stable file format!
+
+ # CandyStore A pure Rust implementation of a fast (*blazingly* :tm:, of course), persistent, in-process From ab11493711b21b64ab9da38a8a0a57344dff2e08 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Sun, 29 Mar 2026 15:56:05 +0300 Subject: [PATCH 13/25] Update README.md --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ffaa145..27e897c 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,10 @@ -
- -
😸 v0.7 brings true crash-consistency, improved compaction and an overall simpler design. -We're also close to a stable file format!
-
+
+ +> [!NOTE] +> 😸 v0.7 brings true crash-consistency, improved compaction and an overall simpler design. +> We're also close to a stable file format! +> +> However, the file format is not compatible with older versions of Candy. # CandyStore From 20b2ce1e18ee6a8925f8f8a6a68799e3510c26b9 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Sun, 29 Mar 2026 16:01:46 +0300 Subject: [PATCH 14/25] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 27e897c..460e8b5 100644 --- a/README.md +++ b/README.md @@ -56,15 +56,15 @@ fn main() -> Result<()> { assert_eq!(val, Some(b"world".to_vec())); db.remove("hello")?; - db.set_in_list("cities", "barcelona", "")?; - db.set_in_list("cities", "chicago", "")?; - db.set_in_list("cities", "caracas", "")?; + db.set_in_list("cities", "Barcelona", "Spain")?; + db.set_in_list("cities", "Chicago", "USA")?; + db.set_in_list("cities", "Caracas", "Venezuela")?; let cities: Vec = db.iter_list("cities") .map(|res| String::from_utf8(res.unwrap().0).unwrap()) .collect(); - assert_eq!(cities, vec!["barcelona", "chicago", "caracas"]); + assert_eq!(cities, vec!["Barcelona", "Chicago", "Caracas"]); Ok(()) } From 3ebe3e4beadad0938dce300008e8695cd9b9a1a3 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Sun, 29 Mar 2026 16:21:39 +0300 Subject: [PATCH 15/25] Non-stop-the-world checkpointing --- src/data_file.rs | 241 +++++++++++++++++++++------- src/index_file.rs | 163 ++++++++++++++----- src/internal.rs | 2 +- src/store.rs | 273 ++++++++++++++++++++++--------- src/store/compaction.rs | 37 +++-- src/store/recovery.rs | 253 ++++++++++++++--------------- tests/common/mod.rs | 11 ++ tests/recovery.rs | 345 +++++++++++++++++++++++++++++++++++++++- tests/whitebox.rs | 39 ++++- 9 files changed, 1029 insertions(+), 335 deletions(-) diff --git a/src/data_file.rs b/src/data_file.rs index c529639..2842830 100644 --- a/src/data_file.rs +++ b/src/data_file.rs @@ -1,14 +1,15 @@ -use parking_lot::{Condvar, Mutex}; +use parking_lot::{Mutex, RwLock}; use smallvec::SmallVec; use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; use std::{ + collections::VecDeque, fs::File, mem::size_of, path::Path, sync::{ Arc, - atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}, + atomic::{AtomicBool, AtomicU64, Ordering}, }, }; @@ -40,27 +41,163 @@ struct DataFileHeader { const _: () = assert!(size_of::() == PAGE_SIZE); +struct InflightSlot { + seq: AtomicU64, + ordinal: AtomicU64, + offset: AtomicU64, +} + +pub(crate) struct InflightTracker { + snapshot_barrier: RwLock<()>, + next_seq: AtomicU64, + slots: Vec, + completed_deltas: Vec>>, +} + +impl InflightTracker { + pub(crate) fn new(num_shards: usize) -> Self { + Self { + snapshot_barrier: RwLock::new(()), + next_seq: AtomicU64::new(1), + slots: (0..num_shards) + .map(|_| InflightSlot { + seq: AtomicU64::new(0), + ordinal: AtomicU64::new(0), + offset: AtomicU64::new(0), + }) + .collect(), + completed_deltas: (0..num_shards) + .map(|_| Mutex::new(VecDeque::new())) + .collect(), + } + } + + fn reserve<'a>( + &'a self, + data_file: &DataFile, + shard_idx: usize, + len: u64, + delta: i8, + ) -> Result<(u64, InflightGuard<'a>)> { + let _barrier = self.snapshot_barrier.read(); + let offset = data_file.allocate(len)?; + let ordinal = data_file.file_ordinal; + let seq = self.next_seq.fetch_add(1, Ordering::Relaxed); + let slot = &self.slots[shard_idx]; + slot.ordinal.store(ordinal, Ordering::Relaxed); + slot.offset.store(offset, Ordering::Relaxed); + slot.seq.store(seq, Ordering::Release); + + Ok(( + offset, + InflightGuard { + tracker: self, + shard_idx, + seq, + delta, + armed: true, + }, + )) + } + + pub(crate) fn checkpoint_progress(&self, active_file: &DataFile) -> (u64, u64, i64) { + let _barrier = self.snapshot_barrier.write(); + + let mut min_slot: Option<(u64, u64, u64)> = None; + for slot in &self.slots { + let seq = slot.seq.load(Ordering::Acquire); + if seq == 0 { + continue; + } + let ordinal = slot.ordinal.load(Ordering::Relaxed); + let offset = slot.offset.load(Ordering::Relaxed); + let current = (seq, ordinal, offset); + min_slot = Some(min_slot.map_or(current, |min_current| min_current.min(current))); + } + + let checkpoint = min_slot + .map(|(_, ordinal, offset)| (ordinal, offset)) + .unwrap_or_else(|| (active_file.file_ordinal, active_file.used_bytes())); + let completed_before_seq = min_slot.map_or(u64::MAX, |(seq, _, _)| seq); + let mut committed_delta = 0i64; + for queue in &self.completed_deltas { + let mut queue = queue.lock(); + while let Some(&(seq, delta)) = queue.front() { + if seq >= completed_before_seq { + break; + } + queue.pop_front(); + committed_delta += delta; + } + } + + (checkpoint.0, checkpoint.1, committed_delta) + } + + pub(crate) fn clear_all(&self) { + let _barrier = self.snapshot_barrier.write(); + for slot in &self.slots { + slot.seq.store(0, Ordering::Release); + slot.ordinal.store(0, Ordering::Relaxed); + slot.offset.store(0, Ordering::Relaxed); + } + for queue in &self.completed_deltas { + queue.lock().clear(); + } + } + + fn clear_matching(&self, shard_idx: usize, expected_seq: u64) { + let _barrier = self.snapshot_barrier.read(); + let slot = &self.slots[shard_idx]; + if slot.seq.load(Ordering::Acquire) == expected_seq { + slot.seq.store(0, Ordering::Release); + } + } + + fn complete_matching(&self, shard_idx: usize, expected_seq: u64, delta: i8) { + let _barrier = self.snapshot_barrier.read(); + let slot = &self.slots[shard_idx]; + if slot.seq.load(Ordering::Acquire) == expected_seq { + slot.seq.store(0, Ordering::Release); + if delta != 0 { + self.completed_deltas[shard_idx] + .lock() + .push_back((expected_seq, i64::from(delta))); + } + } + } +} + pub(crate) struct InflightGuard<'a> { - data_file: &'a DataFile, + tracker: &'a InflightTracker, + shard_idx: usize, + seq: u64, + delta: i8, + armed: bool, } -impl Drop for InflightGuard<'_> { - fn drop(&mut self) { - self.data_file.finish_inflight(); +impl InflightGuard<'_> { + pub(crate) fn complete(mut self) { + if self.armed { + self.tracker + .complete_matching(self.shard_idx, self.seq, self.delta); + self.armed = false; + } } } -struct InflightWaiter { - mutex: Mutex<()>, - condvar: Condvar, +impl Drop for InflightGuard<'_> { + fn drop(&mut self) { + if self.armed { + self.tracker.clear_matching(self.shard_idx, self.seq); + } + } } pub(crate) struct DataFile { pub(crate) file: File, file_offset: AtomicU64, - inflight_writes: AtomicU32, sealed_for_rotation: AtomicBool, - inflight_waiter: InflightWaiter, config: Arc, pub(crate) file_idx: u16, pub(crate) file_ordinal: u64, @@ -171,12 +308,7 @@ impl DataFile { Ok(Self { file, file_offset: AtomicU64::new(file_offset), - inflight_writes: AtomicU32::new(0), sealed_for_rotation: AtomicBool::new(false), - inflight_waiter: InflightWaiter { - mutex: Mutex::new(()), - condvar: Condvar::new(), - }, config, file_idx, file_ordinal: header.ordinal, @@ -211,40 +343,21 @@ impl DataFile { Ok(Self { file, file_offset: AtomicU64::new(0), - inflight_writes: AtomicU32::new(0), sealed_for_rotation: AtomicBool::new(false), - inflight_waiter: InflightWaiter { - mutex: Mutex::new(()), - condvar: Condvar::new(), - }, config, file_idx, file_ordinal: ordinal, }) } - fn start_inflight(&self) -> Result> { - self.inflight_writes.fetch_add(1, Ordering::SeqCst); - if self.sealed_for_rotation.load(Ordering::SeqCst) { - self.finish_inflight(); - return Err(Error::RotateDataFile(self.file_idx)); - } - - Ok(InflightGuard { data_file: self }) - } - - fn finish_inflight(&self) { - if self.inflight_writes.fetch_sub(1, Ordering::SeqCst) == 1 { - let _guard = self.inflight_waiter.mutex.lock(); - self.inflight_waiter.condvar.notify_all(); - } - } - pub(crate) fn seal_for_rotation(&self) { self.sealed_for_rotation.store(true, Ordering::SeqCst); } fn allocate(&self, len: u64) -> Result { + if self.sealed_for_rotation.load(Ordering::SeqCst) { + return Err(Error::RotateDataFile(self.file_idx)); + } let mut file_offset = self.file_offset.load(Ordering::Relaxed); loop { if file_offset + len > self.config.max_data_file_size as u64 { @@ -262,13 +375,15 @@ impl DataFile { } } - fn append_entry( + fn append_entry<'a>( &self, entry_type: EntryType, ns: KeyNamespace, key: &[u8], val: Option<&[u8]>, - ) -> Result<(u64, usize, InflightGuard<'_>)> { + shard_idx: usize, + inflight_tracker: &'a InflightTracker, + ) -> Result<(u64, usize, InflightGuard<'a>)> { debug_assert!(key.len() <= MAX_USER_KEY_SIZE); debug_assert!(ns as u8 <= MAX_KEY_NAMESPACE); @@ -279,8 +394,13 @@ impl DataFile { let entry_len = 4 + if val.is_some() { 4 } else { 2 } + val_len + key.len() + 2; let aligned_len = entry_len.next_multiple_of(FILE_OFFSET_ALIGNMENT as usize); - let inflight_guard = self.start_inflight()?; - let file_offset = self.allocate(aligned_len as u64)?; + let delta = match entry_type { + EntryType::Insert => 1, + EntryType::Tombstone => -1, + _ => 0, + }; + let (file_offset, inflight_guard) = + inflight_tracker.reserve(self, shard_idx, aligned_len as u64, delta)?; debug_assert!(file_offset % FILE_OFFSET_ALIGNMENT == 0); let mut buf = SmallVec::<[u8; INLINE_SCRATCH_BUFFER_SIZE]>::with_capacity(aligned_len); @@ -319,36 +439,35 @@ impl DataFile { Ok((file_offset, aligned_len, inflight_guard)) } - /// Wait until all in-flight writes to this file have completed. - pub(crate) fn wait_inflight(&self) { - if self.inflight_writes.load(Ordering::SeqCst) == 0 { - return; - } - - let mut guard = self.inflight_waiter.mutex.lock(); - while self.inflight_writes.load(Ordering::Acquire) > 0 { - self.inflight_waiter.condvar.wait(&mut guard); - } - } - - pub(crate) fn append_kv( + pub(crate) fn append_kv<'a>( &self, entry_type: EntryType, ns: KeyNamespace, key: &[u8], val: &[u8], - ) -> Result<(u64, usize, InflightGuard<'_>)> { + shard_idx: usize, + inflight_tracker: &'a InflightTracker, + ) -> Result<(u64, usize, InflightGuard<'a>)> { debug_assert!(matches!(entry_type, EntryType::Insert | EntryType::Update)); - self.append_entry(entry_type, ns, key, Some(val)) + self.append_entry(entry_type, ns, key, Some(val), shard_idx, inflight_tracker) } - pub(crate) fn append_tombstone( + pub(crate) fn append_tombstone<'a>( &self, ns: KeyNamespace, key: &[u8], - ) -> Result<(usize, InflightGuard<'_>)> { - self.append_entry(EntryType::Tombstone, ns, key, None) - .map(|(_, len, guard)| (len, guard)) + shard_idx: usize, + inflight_tracker: &'a InflightTracker, + ) -> Result<(usize, InflightGuard<'a>)> { + self.append_entry( + EntryType::Tombstone, + ns, + key, + None, + shard_idx, + inflight_tracker, + ) + .map(|(_, len, guard)| (len, guard)) } pub(crate) fn read_kv_into<'a>( diff --git a/src/index_file.rs b/src/index_file.rs index b9276a8..2ff3eb2 100644 --- a/src/index_file.rs +++ b/src/index_file.rs @@ -5,12 +5,13 @@ use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, TryFromBytes}; use std::{ fs::File, + hash::Hasher, mem::{offset_of, size_of}, ops::{Deref, DerefMut}, path::Path, sync::{ Arc, - atomic::{AtomicI64, AtomicU32, AtomicU64, Ordering}, + atomic::{AtomicU32, AtomicU64, Ordering}, }, time::{Duration, Instant}, }; @@ -22,6 +23,34 @@ use crate::internal::{ }; use crate::types::{Config, Error, Result}; +const CHECKPOINT_SLOT_COUNT: usize = 2; + +#[derive(Clone, Copy)] +struct CheckpointCursor { + generation: u64, + file_ordinal: u64, + offset: u64, +} + +#[derive(FromBytes, IntoBytes, KnownLayout)] +#[repr(C)] +pub(crate) struct CheckpointSlot { + generation: AtomicU64, + file_ordinal: AtomicU64, + offset: AtomicU64, + checksum: AtomicU64, +} + +const _: () = assert!(size_of::() == 32); + +fn checkpoint_slot_checksum(generation: u64, file_ordinal: u64, offset: u64) -> u64 { + let mut hasher = siphasher::sip::SipHasher13::new(); + hasher.write_u64(generation); + hasher.write_u64(file_ordinal); + hasher.write_u64(offset); + hasher.finish() +} + #[derive(FromBytes, IntoBytes, KnownLayout)] #[repr(C)] pub(crate) struct IndexFileHeader { @@ -41,23 +70,21 @@ pub(crate) struct IndexFileHeader { /////////////////////////////////// // rebuild state /////////////////////////////////// - /// Persisted replay cursor: the active-file position already reflected in the index. - pub(crate) commit_file_ordinal: AtomicU64, - /// Persisted replay cursor offset within `commit_file_ordinal`. - pub(crate) commit_offset: AtomicU64, - _padding1024: [u8; 896 - 2 * 8], + /// Persisted replay cursor, double-buffered so recovery can pick the + /// newest valid slot after crashes or torn writes. + pub(crate) checkpoint_slots: [CheckpointSlot; CHECKPOINT_SLOT_COUNT], + _padding1024: [u8; 896 - CHECKPOINT_SLOT_COUNT * 32], /////////////////////////////////// // stats /////////////////////////////////// pub(crate) committed_num_entries: AtomicU64, - pub(crate) uncommitted_entries_delta: AtomicI64, - _trailer: [u8; PAGE_SIZE - 1024 - 2 * 8], + _trailer: [u8; PAGE_SIZE - 1024 - 8], } const _: () = assert!(offset_of!(IndexFileHeader, global_split_level) == 64); -const _: () = assert!(offset_of!(IndexFileHeader, commit_file_ordinal) == 128); +const _: () = assert!(offset_of!(IndexFileHeader, checkpoint_slots) == 128); const _: () = assert!(offset_of!(IndexFileHeader, committed_num_entries) == 1024); const _: () = assert!(size_of::() == PAGE_SIZE); @@ -224,18 +251,6 @@ fn row_mut_bytes(bytes: &mut [u8], idx: usize) -> &mut RowLayout { .expect("row bytes should contain an aligned row") } -fn apply_signed_counter_delta(counter: &AtomicU64, delta: i64) { - if delta == 0 { - return; - } - - counter - .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |current| { - Some(current.saturating_add_signed(delta)) - }) - .unwrap(); -} - unsafe fn row_mut_ptr(base_ptr: *const u8, idx: usize) -> *mut RowLayout { unsafe { base_ptr.add(row_offset(idx)) as *mut RowLayout } } @@ -269,7 +284,8 @@ impl<'a> RowsTableReadGuard<'a> { } pub(crate) fn row_mut(&self, idx: usize) -> RowWriteGuard<'_> { - let row_guard = self.index_file.row_locks[idx & self.index_file.row_locks_mask].write(); + let shard_idx = idx & self.index_file.row_locks_mask; + let row_guard = self.index_file.row_locks[shard_idx].write(); let row_count = row_count_for_len(self.row_guard.len()); assert!( idx < row_count, @@ -279,6 +295,7 @@ impl<'a> RowsTableReadGuard<'a> { RowWriteGuard { _row_guard: row_guard, row, + shard_idx, } } @@ -323,6 +340,7 @@ impl Deref for RowReadGuard<'_> { pub(crate) struct RowWriteGuard<'a> { _row_guard: RwLockWriteGuard<'a, ()>, row: &'a mut RowLayout, + pub(crate) shard_idx: usize, } impl Deref for RowWriteGuard<'_> { @@ -353,9 +371,76 @@ pub(crate) struct IndexFile { row_locks: Vec>, row_locks_mask: usize, config: Arc, + /// Cached checkpoint state so concurrent readers (e.g. compaction candidate + /// selection) always see a consistent snapshot without going through the + /// double-buffer slot protocol. + cached_checkpoint_generation: AtomicU64, + cached_checkpoint_ordinal: AtomicU64, + cached_checkpoint_offset: AtomicU64, } impl IndexFile { + fn read_checkpoint_slot(slot: &CheckpointSlot) -> Option { + let generation = slot.generation.load(Ordering::Acquire); + if generation == 0 { + return None; + } + + let file_ordinal = slot.file_ordinal.load(Ordering::Relaxed); + let offset = slot.offset.load(Ordering::Relaxed); + let checksum = slot.checksum.load(Ordering::Acquire); + if checksum != checkpoint_slot_checksum(generation, file_ordinal, offset) { + return None; + } + + Some(CheckpointCursor { + generation, + file_ordinal, + offset, + }) + } + + fn durable_checkpoint(&self) -> Option { + self.header_ref() + .checkpoint_slots + .iter() + .filter_map(Self::read_checkpoint_slot) + .max_by_key(|cursor| cursor.generation) + } + + pub(crate) fn checkpoint_cursor(&self) -> (u64, u64) { + let generation = self.cached_checkpoint_generation.load(Ordering::Acquire); + if generation == 0 { + return (0, 0); + } + let ordinal = self.cached_checkpoint_ordinal.load(Ordering::Relaxed); + let offset = self.cached_checkpoint_offset.load(Ordering::Relaxed); + (ordinal, offset) + } + + pub(crate) fn persist_checkpoint_cursor(&self, ordinal: u64, offset: u64) { + let current_gen = self.cached_checkpoint_generation.load(Ordering::Relaxed); + let next_generation = current_gen + .checked_add(1) + .expect("checkpoint generation overflow"); + let slot = + &self.header_ref().checkpoint_slots[next_generation as usize % CHECKPOINT_SLOT_COUNT]; + + slot.checksum.store(0, Ordering::Release); + slot.generation.store(next_generation, Ordering::Relaxed); + slot.file_ordinal.store(ordinal, Ordering::Relaxed); + slot.offset.store(offset, Ordering::Relaxed); + slot.checksum.store( + checkpoint_slot_checksum(next_generation, ordinal, offset), + Ordering::Release, + ); + + // Update the cache so concurrent readers see the new values immediately. + self.cached_checkpoint_ordinal.store(ordinal, Ordering::Relaxed); + self.cached_checkpoint_offset.store(offset, Ordering::Relaxed); + self.cached_checkpoint_generation.store(next_generation, Ordering::Release); + } + #[cfg(target_os = "linux")] fn maybe_lock_mmap(config: &Config, mmap: &MmapMut) { if config.mlock_index { @@ -524,11 +609,21 @@ impl IndexFile { row_locks, row_locks_mask, config, + cached_checkpoint_generation: AtomicU64::new(0), + cached_checkpoint_ordinal: AtomicU64::new(0), + cached_checkpoint_offset: AtomicU64::new(0), }; if new_file { let rows_table = inst.rows_table_mut(); inst.init_header_and_rows(rows_table, hash_key)?; + } else if let Some(cursor) = inst.durable_checkpoint() { + inst.cached_checkpoint_generation + .store(cursor.generation, Ordering::Relaxed); + inst.cached_checkpoint_ordinal + .store(cursor.file_ordinal, Ordering::Relaxed); + inst.cached_checkpoint_offset + .store(cursor.offset, Ordering::Relaxed); } Ok(inst) @@ -544,18 +639,6 @@ impl IndexFile { Ok(()) } - pub(crate) fn sync_all_with_rows_guard( - &self, - rows_table: &mut RowsTableWriteGuard<'_>, - ) -> Result<()> { - rows_table.row_guard.flush().map_err(Error::IOError)?; - self.rows_file.sync_all().map_err(Error::IOError)?; - self.header_mmap.flush().map_err(Error::IOError)?; - #[cfg(windows)] - self.header_file.sync_all().map_err(Error::IOError)?; - Ok(()) - } - pub(crate) fn file_size_bytes(&self) -> u64 { let header = size_of::() as u64; let rows = self.rows_file.metadata().map(|m| m.len()).unwrap_or(0); @@ -611,14 +694,6 @@ impl IndexFile { self.full_header_ref().waste_levels[file_idx as usize].swap(0, Ordering::Relaxed) } - pub(crate) fn rollover_uncommitted_counters(&self) { - let h = self.header_ref(); - apply_signed_counter_delta( - &h.committed_num_entries, - h.uncommitted_entries_delta.swap(0, Ordering::Relaxed), - ); - } - pub(crate) fn grow(&self, nsl: u64) -> Result> { let mut layout_mut = self.rows_table_mut(); let gsl = self.header_ref().global_split_level.load(Ordering::Acquire); @@ -670,6 +745,10 @@ impl IndexFile { 1usize << gsl } + pub(crate) fn num_shards(&self) -> usize { + self.row_locks.len() + } + pub(crate) fn shrink_with_rows_guard( &self, min_rows: usize, diff --git a/src/internal.rs b/src/internal.rs index 96073d8..2475462 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -22,7 +22,7 @@ pub(crate) const MAX_DATA_FILES: u16 = 1 << 12; pub(crate) const MAX_DATA_FILE_IDX: u16 = MAX_DATA_FILES - 1; pub(crate) const INDEX_FILE_SIGNATURE: &[u8; 8] = b"CandyStr"; -pub(crate) const INDEX_FILE_VERSION: u32 = 0x0002_0007; +pub(crate) const INDEX_FILE_VERSION: u32 = 0x0002_0009; pub(crate) const DATA_FILE_SIGNATURE: &[u8; 8] = b"CandyDat"; pub(crate) const DATA_FILE_VERSION: u32 = 0x0002_0002; pub(crate) const FILE_OFFSET_ALIGNMENT: u64 = 16; diff --git a/src/store.rs b/src/store.rs index 413f910..6ad418e 100644 --- a/src/store.rs +++ b/src/store.rs @@ -14,13 +14,13 @@ use std::{ path::{Path, PathBuf}, sync::{ Arc, - atomic::{AtomicBool, AtomicU16, AtomicU32, AtomicU64, Ordering}, + atomic::{AtomicBool, AtomicI64, AtomicU16, AtomicU32, AtomicU64, Ordering}, }, time::Duration, }; use crate::{ - data_file::DataFile, + data_file::{DataFile, InflightTracker}, index_file::{EntryPointer, IndexFile, RowLayout, RowReadGuard, RowWriteGuard}, internal::{ EntryType, HashCoord, KeyNamespace, MAX_DATA_FILE_IDX, MAX_DATA_FILES, MIN_SPLIT_LEVEL, @@ -94,12 +94,15 @@ struct StoreInner { list_meta_locks: Vec>, list_meta_locks_mask: usize, data_files: RwLock>>, + inflight_tracker: InflightTracker, active_file_idx: AtomicU16, active_file_ordinal: AtomicU64, + uncommitted_entries_delta: AtomicI64, + checkpoint_lock: Mutex<()>, rotation_lock: Mutex<()>, compaction_state: Mutex, compaction_condvar: Condvar, - shutting_down: AtomicBool, + compaction_shutting_down: AtomicBool, stats: InnerStats, } @@ -110,6 +113,7 @@ struct ExistingEntryUpdate<'a> { val: &'a [u8], hc: HashCoord, col: usize, + shard_idx: usize, src_file_idx: u16, old_klen: usize, old_vlen: usize, @@ -141,6 +145,7 @@ impl StoreInner { state: OpenState, num_logical_locks: usize, ) -> Self { + let num_shards = state.index_file.num_shards(); Self { base_path, config, @@ -148,12 +153,15 @@ impl StoreInner { list_meta_locks: (0..num_logical_locks).map(|_| RwLock::new(())).collect(), list_meta_locks_mask: num_logical_locks - 1, data_files: RwLock::new(state.data_files), + inflight_tracker: InflightTracker::new(num_shards), active_file_idx: AtomicU16::new(state.active_file_idx), active_file_ordinal: AtomicU64::new(state.active_file_ordinal), + uncommitted_entries_delta: AtomicI64::new(0), + checkpoint_lock: Mutex::new(()), rotation_lock: Mutex::new(()), compaction_state: Mutex::new(CompactionState::default()), compaction_condvar: Condvar::new(), - shutting_down: AtomicBool::new(false), + compaction_shutting_down: AtomicBool::new(false), stats: InnerStats::default(), } } @@ -169,6 +177,7 @@ impl StoreInner { let mut data_files = self.data_files.write(); data_files.clear(); + self.inflight_tracker.clear_all(); self.index_file.reset(row_table)?; let index_path = index_file_path(self.base_path.as_path()); @@ -210,6 +219,7 @@ impl StoreInner { .store(active_file_idx, Ordering::Release); self.active_file_ordinal .store(active_file_ordinal, Ordering::Release); + self.uncommitted_entries_delta.store(0, Ordering::Relaxed); self.stats.reset(); Ok(()) @@ -268,11 +278,13 @@ impl StoreInner { fn next_compaction_candidates(&self, max_candidates: usize) -> Vec<(u16, u64)> { let active_file_idx = self.active_file_idx.load(Ordering::Acquire); + let commit_file_ordinal = self.index_file.checkpoint_cursor().0; let files = self.data_files.read(); let mut candidates = files .iter() .filter_map(|(&file_idx, data_file)| { if file_idx == active_file_idx + || data_file.file_ordinal >= commit_file_ordinal || self.index_file.file_waste(file_idx) <= self.config.compaction_min_threshold { return None; @@ -313,6 +325,12 @@ impl StoreInner { .ok_or(Error::MissingDataFile(file_idx)) } + fn ordered_data_files(&self) -> Vec> { + let mut files = self.data_files.read().values().cloned().collect::>(); + files.sort_by_key(|data_file| data_file.file_ordinal); + files + } + fn bump_histogram(&self, entry_size: u64) { // Buckets: [<64, <256, <1K, <4K, <16K, >=16K] // Boundaries at ilog2 = 6, 8, 10, 12, 14 → bucket = ((ilog2 - 4) / 2).clamp(0, 5) @@ -321,22 +339,81 @@ impl StoreInner { } fn add_uncommitted_num_entries(&self, delta: i64) { - self.index_file - .header_ref() - .uncommitted_entries_delta + self.uncommitted_entries_delta .fetch_add(delta, Ordering::Relaxed); } - fn persist_active_file_checkpoint(&self, active_file: &Arc) { - self.index_file.rollover_uncommitted_counters(); - self.index_file - .header_ref() - .commit_file_ordinal - .store(active_file.file_ordinal, Ordering::Release); - self.index_file - .header_ref() - .commit_offset - .store(active_file.used_bytes(), Ordering::Release); + /// Applies `delta` to the persisted committed entry count, clamping at + /// zero. Returns the actual change applied (which may differ from `delta` + /// when the count would underflow). + fn advance_committed_num_entries(&self, delta: i64) -> i64 { + if delta == 0 { + return 0; + } + + let committed = &self.index_file.header_ref().committed_num_entries; + let mut current = committed.load(Ordering::Relaxed); + loop { + let updated = current.saturating_add_signed(delta); + match committed.compare_exchange_weak( + current, + updated, + Ordering::Relaxed, + Ordering::Relaxed, + ) { + Ok(_) => return updated as i64 - current as i64, + Err(observed) => current = observed, + } + } + } + + /// Folds a checkpointed delta into the persisted committed count and + /// adjusts the runtime uncommitted delta so that + /// `committed + uncommitted == live_count` is preserved. + /// + /// When many inserts and removes of the same keys happen within one + /// checkpoint window, the drained delta can be more negative than + /// `committed` can absorb (since it is unsigned). In that case only + /// the clamped portion is applied and the remainder stays in + /// `uncommitted_entries_delta`. + fn fold_checkpointed_num_entries(&self, delta: i64) { + if delta == 0 { + return; + } + + let actual = self.advance_committed_num_entries(delta); + self.uncommitted_entries_delta + .fetch_add(-actual, Ordering::Relaxed); + } + + fn persist_checkpoint_cursor(&self, ordinal: u64, offset: u64) { + self.index_file.persist_checkpoint_cursor(ordinal, offset); + } + + fn checkpoint_cursor(&self) -> Result<()> { + let _checkpoint_lock = self.checkpoint_lock.lock(); + let files = self.data_files.read(); + let active_idx = self.active_file_idx.load(Ordering::Acquire); + let active_file = files + .get(&active_idx) + .cloned() + .ok_or(Error::MissingDataFile(active_idx))?; + let (checkpoint_ordinal, checkpoint_offset, checkpointed_delta) = + self.inflight_tracker.checkpoint_progress(&active_file); + + let last_commit_ordinal = self.index_file.checkpoint_cursor().0; + + for data_file in files.values() { + if data_file.file_ordinal >= last_commit_ordinal { + data_file.file.sync_all().map_err(Error::IOError)?; + } + } + drop(files); + + self.fold_checkpointed_num_entries(checkpointed_delta); + self.persist_checkpoint_cursor(checkpoint_ordinal, checkpoint_offset); + self.index_file.sync_all()?; + sync_dir(&self.base_path) } fn _split_row(&self, hc: HashCoord, sl: u64, gsl: u64) -> Result<()> { @@ -402,7 +479,6 @@ impl StoreInner { /// `compact_file` also writes to `data_files` (removing files) but only /// touches non-active indices, so there is no conflict. fn _rotate_data_file(&self, active_idx: u16) -> Result<()> { - let mut rows_table = self.index_file.rows_table_mut(); let _rot_lock = self.rotation_lock.lock(); if self.active_file_idx.load(Ordering::Acquire) != active_idx { @@ -434,11 +510,6 @@ impl StoreInner { )?); active_file.seal_for_rotation(); - active_file.wait_inflight(); - let _ = active_file.file.sync_all(); - - self.persist_active_file_checkpoint(&active_file); - let _ = self.index_file.sync_all_with_rows_guard(&mut rows_table); self.data_files.write().insert(next_idx, data_file); self.active_file_idx.store(next_idx, Ordering::Release); @@ -449,6 +520,8 @@ impl StoreInner { self.signal_compaction_scan(); } + self.checkpoint_cursor()?; + Ok(()) } @@ -529,38 +602,6 @@ impl CandyStore { &self.inner.base_path } - pub(super) fn checkpoint_locked(&self) -> Result<()> { - let _logical_guards = self - .inner - .list_meta_locks - .iter() - .map(|lock| lock.write()) - .collect::>(); - let mut rows_table = self.inner.index_file.rows_table_mut(); - let _rotation_lock = self.inner.rotation_lock.lock(); - - let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); - let active_file = self - .inner - .data_files - .read() - .get(&active_idx) - .cloned() - .ok_or(Error::MissingDataFile(active_idx))?; - - let files = self.inner.data_files.read(); - for data_file in files.values() { - data_file.file.sync_all().map_err(Error::IOError)?; - } - drop(files); - - self.inner.persist_active_file_checkpoint(&active_file); - self.inner - .index_file - .sync_all_with_rows_guard(&mut rows_table)?; - sync_dir(&self.inner.base_path) - } - fn list_read_guard(&self, ns: KeyNamespace, key: &[u8]) -> RwLockReadGuard<'_, ()> { self.inner.list_meta_locks[self.inner.logical_lock_index(ns, key)].read() } @@ -714,8 +755,14 @@ impl CandyStore { let active_file = files .get(&active_idx) .ok_or(Error::MissingDataFile(active_idx))?; - let (file_off, size, _inflight_guard) = - active_file.append_kv(EntryType::Insert, ns, key, val)?; + let (file_off, size, inflight_guard) = active_file.append_kv( + EntryType::Insert, + ns, + key, + val, + row.shard_idx, + &self.inner.inflight_tracker, + )?; self.inner.record_write(size as u64); row.insert( col, @@ -723,6 +770,7 @@ impl CandyStore { EntryPointer::new(active_idx, file_off, size, hc.masked_row_selector()), ); self.record_write_stats(key.len(), val.len()); + inflight_guard.complete(); Ok(GetOrCreateStatus::CreatedNew(val.to_vec())) } else { Err(Error::SplitRow(row.split_level.load(Ordering::Relaxed))) @@ -780,8 +828,14 @@ impl CandyStore { .files .get(&active_idx) .ok_or(Error::MissingDataFile(active_idx))?; - let (file_off, size, _inflight_guard) = - active_file.append_kv(EntryType::Update, update.ns, update.key, update.val)?; + let (file_off, size, inflight_guard) = active_file.append_kv( + EntryType::Update, + update.ns, + update.key, + update.val, + update.shard_idx, + &self.inner.inflight_tracker, + )?; self.inner.record_write(size as u64); if let Some(name) = update.crash_point_name { crate::crash_point(name); @@ -793,6 +847,7 @@ impl CandyStore { ); self.track_update_waste(update.src_file_idx, update.old_klen, update.old_vlen); self.record_replace_stats(update.key.len(), update.val.len()); + inflight_guard.complete(); Ok(()) } @@ -815,6 +870,7 @@ impl CandyStore { let old_val = kv.into_value(); let src_file_idx = file.file_idx; + let shard_idx = row.shard_idx; self.apply_update_to_existing_entry( &mut row, ExistingEntryUpdate { @@ -824,6 +880,7 @@ impl CandyStore { val, hc, col, + shard_idx, src_file_idx, old_klen: klen, old_vlen: vlen, @@ -844,8 +901,14 @@ impl CandyStore { let active_file = files .get(&active_idx) .ok_or(Error::MissingDataFile(active_idx))?; - let (file_off, size, _inflight_guard) = - active_file.append_kv(EntryType::Insert, ns, key, val)?; + let (file_off, size, inflight_guard) = active_file.append_kv( + EntryType::Insert, + ns, + key, + val, + row.shard_idx, + &self.inner.inflight_tracker, + )?; self.inner.record_write(size as u64); crate::crash_point("set_after_write_before_insert"); row.insert( @@ -854,6 +917,7 @@ impl CandyStore { EntryPointer::new(active_idx, file_off, size, hc.masked_row_selector()), ); self.record_write_stats(key.len(), val.len()); + inflight_guard.complete(); Ok(None) } else { Err(Error::SplitRow(row.split_level.load(Ordering::Relaxed))) @@ -902,6 +966,7 @@ impl CandyStore { let old_val = kv.into_value(); let src_file_idx = file.file_idx; + let shard_idx = row.shard_idx; self.apply_update_to_existing_entry( &mut row, ExistingEntryUpdate { @@ -911,6 +976,7 @@ impl CandyStore { val, hc, col, + shard_idx, src_file_idx, old_klen: klen, old_vlen: vlen, @@ -983,13 +1049,18 @@ impl CandyStore { let active_file = files .get(&active_idx) .ok_or(Error::MissingDataFile(active_idx))?; - let (tombstone_size, _inflight_guard) = - active_file.append_tombstone(ns, key)?; + let (tombstone_size, inflight_guard) = active_file.append_tombstone( + ns, + key, + row.shard_idx, + &self.inner.inflight_tracker, + )?; self.inner.record_write(tombstone_size as u64); row.remove(col); self.track_tombstone_waste(src_file_idx, klen, vlen); self.record_remove_stats(); + inflight_guard.complete(); return Ok(Some(old_val)); } } @@ -1098,16 +1169,29 @@ impl CandyStore { sync_dir(&self.inner.base_path) } - /// Establishes a durable recovery checkpoint at the current end of the active file. + /// Establishes a durable recovery checkpoint. + /// + /// Reads the earliest in-flight `(file_ordinal, offset)` tuple across all + /// shards to determine the first position that may still require replay. + /// If no writes are in flight, the checkpoint targets the active file tail. + /// Syncs the data and index files and advances the persisted replay cursor + /// so the next open can resume from this point without replaying earlier + /// writes. /// - /// This stops background compaction, blocks concurrent writers, syncs the - /// data and index files, and advances the persisted replay cursor so the - /// next open can resume from this point without replaying earlier writes. + /// This does **not** block concurrent writers or compaction, but does block + /// compound operations like lists/queues to checkpoint at well-defined states pub fn checkpoint(&self) -> Result<()> { - self.stop_compaction(); - let res = self.checkpoint_locked(); - self.start_compaction(); - res + // take all list_meta_locks so we don't ever create a checkpoint that has half-baked + // queue/list. note that rotation-induced checkpoints may do that, but user-induced ones + // will not. + let _logical_guards = self + .inner + .list_meta_locks + .iter() + .map(|lock| lock.write()) + .collect::>(); + + self.inner.checkpoint_cursor() } /// Returns the number of background compaction errors observed since open. @@ -1117,11 +1201,19 @@ impl CandyStore { /// Returns the number of currently live entries. pub fn num_items(&self) -> usize { - let h = self.inner.index_file.header_ref(); - h.committed_num_entries - .load(Ordering::Relaxed) - .saturating_add_signed(h.uncommitted_entries_delta.load(Ordering::Relaxed)) - as usize + let committed = self + .inner + .index_file + .header_ref() + .committed_num_entries + .load(Ordering::Relaxed); + let uncommitted = self.inner.uncommitted_entries_delta.load(Ordering::Relaxed); + let count = committed.saturating_add_signed(uncommitted); + debug_assert!( + (committed as i128 + uncommitted as i128) >= 0, + "live entry count underflow: committed={committed}, uncommitted={uncommitted}" + ); + count as usize } /// Returns the current index capacity in entries. @@ -1236,6 +1328,8 @@ impl CandyStore { mod tests { use super::*; + use std::{thread, time::Instant}; + use tempfile::tempdir; #[test] @@ -1325,4 +1419,31 @@ mod tests { Ok(()) } + + #[test] + fn test_checkpoint_does_not_join_compaction_thread() -> Result<()> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + db.stop_compaction(); + *db.compaction_thd.lock() = Some(thread::spawn(|| { + thread::sleep(Duration::from_millis(400)); + })); + + let t0 = Instant::now(); + db.checkpoint()?; + assert!( + t0.elapsed() < Duration::from_millis(200), + "checkpoint should not wait for the compaction thread handle" + ); + + db.compaction_thd + .lock() + .take() + .expect("test compaction thread should still be present") + .join() + .expect("test compaction thread panicked"); + + Ok(()) + } } diff --git a/src/store/compaction.rs b/src/store/compaction.rs index 649f127..00bce42 100644 --- a/src/store/compaction.rs +++ b/src/store/compaction.rs @@ -63,7 +63,7 @@ impl StoreInner { let mut row_idx = 0; loop { - if self.shutting_down.load(Ordering::Acquire) { + if self.compaction_shutting_down.load(Ordering::Acquire) { return Ok(CompactionOutcome { compacted_files: 0, reclaimed_bytes: 0, @@ -141,8 +141,10 @@ impl StoreInner { ns, kv.key(), kv.value(), + row.shard_idx, + &self.inflight_tracker, ) { - Ok((file_off, size, _inflight_guard)) => { + Ok((file_off, size, inflight_guard)) => { self.record_write(size as u64); moved_bytes = moved_bytes.saturating_add(size as u64); row.replace_pointer( @@ -154,6 +156,7 @@ impl StoreInner { entry.masked_row_selector(), ), ); + inflight_guard.complete(); break; } Err(Error::RotateDataFile(rotate_idx)) => { @@ -225,7 +228,9 @@ impl StoreInner { impl CandyStore { pub(super) fn stop_compaction(&self) { - self.inner.shutting_down.store(true, Ordering::Release); + self.inner + .compaction_shutting_down + .store(true, Ordering::Release); { let mut state = self.inner.compaction_state.lock(); state.wake_requested = true; @@ -251,7 +256,9 @@ impl CandyStore { return; } - self.inner.shutting_down.store(false, Ordering::Release); + self.inner + .compaction_shutting_down + .store(false, Ordering::Release); let ctx = Arc::clone(&self.inner); let thd = std::thread::Builder::new() .name("candy_compact".into()) @@ -259,7 +266,7 @@ impl CandyStore { if ctx.config.compaction_throughput_bytes_per_sec == 0 { // Compaction disabled — park until shutdown. let mut state = ctx.compaction_state.lock(); - while !ctx.shutting_down.load(Ordering::Acquire) { + while !ctx.compaction_shutting_down.load(Ordering::Acquire) { ctx.compaction_condvar.wait(&mut state); } return; @@ -280,11 +287,13 @@ impl CandyStore { loop { { let mut state = ctx.compaction_state.lock(); - while !state.wake_requested && !ctx.shutting_down.load(Ordering::Acquire) { + while !state.wake_requested + && !ctx.compaction_shutting_down.load(Ordering::Acquire) + { ctx.compaction_condvar.wait(&mut state); } - if ctx.shutting_down.load(Ordering::Acquire) { + if ctx.compaction_shutting_down.load(Ordering::Acquire) { break; } @@ -295,7 +304,7 @@ impl CandyStore { if candidates.is_empty() { break; } - if ctx.shutting_down.load(Ordering::Acquire) { + if ctx.compaction_shutting_down.load(Ordering::Acquire) { return; } #[cfg(windows)] @@ -347,7 +356,7 @@ impl Drop for CandyStore { if !self.allow_clean_shutdown.load(Ordering::Relaxed) { return; } - let _ = self.checkpoint_locked(); + let _ = self.checkpoint(); } } @@ -420,7 +429,9 @@ mod tests { let before_read_ops = db.stats().num_read_ops; let mut pacer = Pacer::new(u64::MAX / 4, Duration::from_secs(1), u64::MAX / 4); - db.inner.shutting_down.store(false, Ordering::Release); + db.inner + .compaction_shutting_down + .store(false, Ordering::Release); let outcome = db.inner.compact_files( &[(target_idx, target_ordinal)], &mut pacer, @@ -485,7 +496,9 @@ mod tests { let before_read_ops = db.stats().num_read_ops; let mut pacer = Pacer::new(u64::MAX / 4, Duration::from_secs(1), u64::MAX / 4); - db.inner.shutting_down.store(false, Ordering::Release); + db.inner + .compaction_shutting_down + .store(false, Ordering::Release); let outcome = db.inner.compact_files( &targets, &mut pacer, @@ -547,7 +560,7 @@ mod tests { let mut pacer = Pacer::new(256, Duration::from_millis(10), 256); db_for_compaction .inner - .shutting_down + .compaction_shutting_down .store(false, Ordering::Release); db_for_compaction.inner.compact_files( &[(target_idx, target_ordinal)], diff --git a/src/store/recovery.rs b/src/store/recovery.rs index ba2fb4f..3d2fafd 100644 --- a/src/store/recovery.rs +++ b/src/store/recovery.rs @@ -13,9 +13,10 @@ use crate::{ use super::CandyStore; +#[derive(Clone, Copy)] enum RebuildMode { TailFrom(u64), - FullActiveFile, + FullFile, } impl CandyStore { @@ -23,81 +24,98 @@ impl CandyStore { const REBUILD_CHECKPOINT_INTERVAL_BYTES: u64 = 256 * 1024; pub(super) fn recover_index(&self) -> Result<()> { - let active_idx = self.inner.active_file_idx.load(Ordering::Acquire); - let active_file = self - .inner - .data_files - .read() - .get(&active_idx) - .cloned() - .ok_or(Error::MissingDataFile(active_idx))?; - - let commit_file_ordinal = self - .inner - .index_file - .header_ref() - .commit_file_ordinal - .load(Ordering::Acquire); - let commit_offset = self - .inner - .index_file - .header_ref() - .commit_offset - .load(Ordering::Acquire); - - let rebuild_mode = if active_file.file_ordinal == commit_file_ordinal { - self.validated_commit_offset(&active_file, commit_offset)? - } else { - RebuildMode::TailFrom(0) + let ordered_files = self.inner.ordered_data_files(); + let Some(last_file) = ordered_files.last().cloned() else { + return Ok(()); }; - let start_offset = match rebuild_mode { - RebuildMode::TailFrom(offset) => offset, - RebuildMode::FullActiveFile => 0, + let (commit_file_ordinal, commit_offset) = self.inner.index_file.checkpoint_cursor(); + + let start_idx = ordered_files + .iter() + .position(|data_file| data_file.file_ordinal >= commit_file_ordinal) + .unwrap_or(ordered_files.len() - 1); + let start_file = &ordered_files[start_idx]; + let rebuild_mode = if start_file.file_ordinal == commit_file_ordinal { + self.validated_commit_offset(start_file, commit_offset)? + } else { + RebuildMode::TailFrom(0) }; - // The committed cursor marks the active-file prefix already reflected - // in the index. We only need to rebuild the uncommitted entries delta, - // since data_bytes and waste_bytes are derived from file sizes and - // per-file waste levels at query time. - // - // The entries delta can be recomputed exactly from the on-disk entry - // types: InsertData → +1, UpdateData → 0, Tombstone → −1. + // Recompute the runtime-only delta from the persisted replay cursor. self.inner - .index_file - .header_ref() .uncommitted_entries_delta .store(0, Ordering::Relaxed); + let mut match_scratch = Vec::new(); + let mut bytes_since_checkpoint = 0u64; + let mut pending_committed_delta = 0i64; + + let mut final_cursor = (last_file.file_ordinal, last_file.used_bytes()); + for (idx, data_file) in ordered_files.iter().enumerate().skip(start_idx) { + let file_mode = if idx == start_idx { + rebuild_mode + } else { + RebuildMode::TailFrom(0) + }; + let durable_extent = self.rebuild_file_from( + data_file, + file_mode, + &mut bytes_since_checkpoint, + &mut pending_committed_delta, + &mut match_scratch, + )?; + final_cursor = (data_file.file_ordinal, durable_extent); + } + + self.persist_rebuild_checkpoint(final_cursor.0, final_cursor.1, pending_committed_delta)?; + debug_assert_eq!( + self.inner.uncommitted_entries_delta.load(Ordering::Relaxed), + 0 + ); + + Ok(()) + } + + fn rebuild_file_from( + &self, + data_file: &Arc, + rebuild_mode: RebuildMode, + bytes_since_checkpoint: &mut u64, + pending_committed_delta: &mut i64, + match_scratch: &mut Vec, + ) -> Result { + let start_offset = match rebuild_mode { + RebuildMode::TailFrom(offset) => offset, + RebuildMode::FullFile => 0, + }; // Pre-purge any index entries that point past the file's durable // extent. This handles the case where the data file was truncated // (e.g. disk-full or corruption) and ensures the replay loop won't // encounter stale pointers when comparing existing entries. - let pre_rebuild_used_bytes = active_file.used_bytes(); + let pre_rebuild_used_bytes = data_file.used_bytes(); let pre_purge_extent = pre_rebuild_used_bytes.next_multiple_of(FILE_OFFSET_ALIGNMENT); - self.purge_uncommitted_file_entries(active_idx, pre_purge_extent)?; + self.apply_recovery_delta( + self.purge_uncommitted_file_entries(data_file.file_idx, pre_purge_extent)?, + pending_committed_delta, + ); - if matches!(rebuild_mode, RebuildMode::FullActiveFile) { - // The saved active-file cursor is no longer trustworthy. Remove - // every active-file pointer from the index, then rebuild that - // file's contribution from offset 0 on top of the older files. - self.purge_uncommitted_file_entries(active_idx, 0)?; - self.inner - .index_file - .header_ref() - .committed_num_entries - .store(self.count_live_index_entries(), Ordering::Relaxed); + if matches!(rebuild_mode, RebuildMode::FullFile) { + // The saved checkpoint within this file is no longer trustworthy. + // Remove every pointer into it and rebuild its contribution from 0. + self.apply_recovery_delta( + self.purge_uncommitted_file_entries(data_file.file_idx, 0)?, + pending_committed_delta, + ); } let mut offset = start_offset; let mut read_buf = Vec::new(); let mut buf_file_offset = 0u64; - let mut match_scratch = Vec::new(); - let mut bytes_since_checkpoint = 0u64; let mut last_durable_offset = start_offset; loop { let Some((kv, entry_offset, next_offset)) = - active_file.read_next_entry_ref(offset, &mut read_buf, &mut buf_file_offset)? + data_file.read_next_entry_ref(offset, &mut read_buf, &mut buf_file_offset)? else { break; }; @@ -108,17 +126,10 @@ impl CandyStore { return Err(invalid_data_error("unknown key namespace in data file")); }; - // Count the entry's contribution to the entries delta based on its - // on-disk type. This is unconditional — it doesn't matter whether - // the index pointer was already applied or not. - match kv.entry_type { - EntryType::Insert => self.inner.add_uncommitted_num_entries(1), - EntryType::Tombstone => self.inner.add_uncommitted_num_entries(-1), - _ => {} // UpdateData and any future types don't change num_items - } - - // Fix up the index pointers (no stats accounting). - self.recover_entry(&active_file, ns, kv, entry_offset, &mut match_scratch)?; + self.apply_recovery_delta( + self.recover_entry(data_file, ns, kv, entry_offset, match_scratch)?, + pending_committed_delta, + ); self.inner .stats .num_rebuilt_entries @@ -126,10 +137,15 @@ impl CandyStore { last_durable_offset = next_offset; crash_point("rebuild_entry"); - bytes_since_checkpoint += entry_bytes; - if bytes_since_checkpoint >= Self::REBUILD_CHECKPOINT_INTERVAL_BYTES { - self.flush_rebuild_checkpoint(active_file.file_ordinal, offset)?; - bytes_since_checkpoint = 0; + *bytes_since_checkpoint += entry_bytes; + if *bytes_since_checkpoint >= Self::REBUILD_CHECKPOINT_INTERVAL_BYTES { + self.persist_rebuild_checkpoint( + data_file.file_ordinal, + offset, + *pending_committed_delta, + )?; + *pending_committed_delta = 0; + *bytes_since_checkpoint = 0; } } @@ -140,18 +156,14 @@ impl CandyStore { .stats .num_rebuild_purged_bytes .fetch_add(pre_rebuild_used_bytes - durable_extent, Ordering::Relaxed); - active_file.truncate_to_offset(durable_extent)?; + data_file.truncate_to_offset(durable_extent)?; } - // Purge any phantom index entries that reference the active file - // beyond this point (OS flushed the index page but not the data). - self.purge_uncommitted_file_entries(active_idx, durable_extent)?; - - // Advance the persisted replay cursor to the end of what recovery - // verified and applied to the index. - self.flush_rebuild_checkpoint(active_file.file_ordinal, durable_extent)?; - - Ok(()) + self.apply_recovery_delta( + self.purge_uncommitted_file_entries(data_file.file_idx, durable_extent)?, + pending_committed_delta, + ); + Ok(durable_extent) } fn validated_commit_offset( @@ -165,7 +177,7 @@ impl CandyStore { let used_bytes = active_file.used_bytes(); if checkpoint_offset > used_bytes { - return Ok(RebuildMode::FullActiveFile); + return Ok(RebuildMode::FullFile); } if checkpoint_offset == used_bytes { return Ok(RebuildMode::TailFrom(checkpoint_offset)); @@ -181,36 +193,24 @@ impl CandyStore { Some((_, entry_offset, _)) if entry_offset == checkpoint_offset => { Ok(RebuildMode::TailFrom(checkpoint_offset)) } - _ => Ok(RebuildMode::FullActiveFile), + _ => Ok(RebuildMode::FullFile), } } - fn flush_rebuild_checkpoint(&self, ordinal: u64, offset: u64) -> Result<()> { + fn persist_rebuild_checkpoint(&self, ordinal: u64, offset: u64, delta: i64) -> Result<()> { let resume_offset = offset.next_multiple_of(FILE_OFFSET_ALIGNMENT); - // Persist the same prefix in both index rows and committed counters - // before advancing the replay cursor. The cursor itself must hit disk - // after the rows it covers. - self.inner.index_file.rollover_uncommitted_counters(); - - self.inner - .index_file - .header_ref() - .commit_file_ordinal - .store(ordinal, Ordering::Release); - self.inner - .index_file - .header_ref() - .commit_offset - .store(resume_offset, Ordering::Release); + self.inner.fold_checkpointed_num_entries(delta); + self.inner.persist_checkpoint_cursor(ordinal, resume_offset); self.inner.index_file.sync_all() } /// Remove index entries pointing to the active file at or beyond `durable_extent`. - fn purge_uncommitted_file_entries(&self, file_idx: u16, min_offset: u64) -> Result<()> { + fn purge_uncommitted_file_entries(&self, file_idx: u16, min_offset: u64) -> Result { let row_table = self.inner.index_file.rows_table(); let num_rows = self.inner.index_file.num_rows(); + let mut removed = 0i64; for row_idx in 0..num_rows { let mut row = row_table.row_mut(row_idx); @@ -227,30 +227,11 @@ impl CandyStore { } if ptr.file_idx() == file_idx && ptr.file_offset() >= min_offset { row.remove(col); + removed += 1; } } } - Ok(()) - } - - fn count_live_index_entries(&self) -> u64 { - let row_table = self.inner.index_file.rows_table(); - let num_rows = self.inner.index_file.num_rows(); - let mut total = 0u64; - - for row_idx in 0..num_rows { - let row = row_table.row(row_idx); - if row.split_level.load(Ordering::Acquire) == 0 { - continue; - } - for col in 0..ROW_WIDTH { - if row.signatures[col] != HashCoord::INVALID_SIG && row.pointers[col].is_valid() { - total += 1; - } - } - } - - total + Ok(-removed) } fn recover_entry( @@ -260,18 +241,26 @@ impl CandyStore { kv: KVRef<'_>, entry_offset: u64, match_scratch: &mut Vec, - ) -> Result<()> { + ) -> Result { match kv.entry_type { EntryType::Insert | EntryType::Update => { self.recover_data_entry(data_file, ns, kv, entry_offset, match_scratch) } EntryType::Tombstone => self.recover_tombstone_entry(data_file, ns, kv, match_scratch), - _ => Ok(()), + _ => Ok(0), } } - /// Fix index pointers for a data/update entry. No stats accounting — - /// the entries delta is handled by the caller based on the on-disk entry type. + fn apply_recovery_delta(&self, delta: i64, pending_committed_delta: &mut i64) { + if delta == 0 { + return; + } + + self.inner.add_uncommitted_num_entries(delta); + *pending_committed_delta += delta; + } + + /// Fix index pointers for a data/update entry and return its live-entry delta. fn recover_data_entry( &self, data_file: &Arc, @@ -279,7 +268,7 @@ impl CandyStore { kv: KVRef<'_>, entry_offset: u64, match_scratch: &mut Vec, - ) -> Result<()> { + ) -> Result { let key = kv.key(); let val = kv.value(); self.validate_recovered_data_entry(key, val)?; @@ -304,37 +293,37 @@ impl CandyStore { if existing_kv.key() == key { if entry == ptr { // Already points at this entry — nothing to fix. - return Ok(()); + return Ok(0); } if entry.file_idx() == data_file.file_idx && entry.file_offset() > ptr.file_offset() { // A newer active-file entry already exists — skip. - return Ok(()); + return Ok(0); } // Older pointer — replace with this newer one. row.replace_pointer(col, ptr); - return Ok(()); + return Ok(0); } } // Key not in index — insert it. if let Some(col) = row.find_free_slot() { row.insert(col, hc.sig, ptr); - Ok(()) + Ok(1) } else { Err(Error::SplitRow(row.split_level.load(Ordering::Relaxed))) } }) } - /// Fix index pointers for a tombstone entry. No stats accounting. + /// Fix index pointers for a tombstone entry and return its live-entry delta. fn recover_tombstone_entry( &self, _data_file: &Arc, ns: KeyNamespace, kv: KVRef<'_>, match_scratch: &mut Vec, - ) -> Result<()> { + ) -> Result { let key = kv.key(); self.validate_recovered_tombstone_entry(key)?; @@ -348,10 +337,10 @@ impl CandyStore { file.read_kv_into(entry.file_offset(), entry.size_hint(), match_scratch)?; if existing_kv.key() == key { row.remove(col); - return Ok(()); + return Ok(-1); } } - Ok(()) + Ok(0) }) } diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 1fa1d6f..181cb8d 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,3 +1,5 @@ +use std::hash::Hasher; + use candystore::Config; #[allow(dead_code)] @@ -7,3 +9,12 @@ pub fn small_file_config() -> Config { ..Config::default() } } + +#[allow(dead_code)] +pub fn checkpoint_slot_checksum(generation: u64, ordinal: u64, offset: u64) -> u64 { + let mut hasher = siphasher::sip::SipHasher13::new(); + hasher.write_u64(generation); + hasher.write_u64(ordinal); + hasher.write_u64(offset); + hasher.finish() +} diff --git a/tests/recovery.rs b/tests/recovery.rs index 09df15e..227be63 100644 --- a/tests/recovery.rs +++ b/tests/recovery.rs @@ -8,6 +8,12 @@ use std::sync::Arc; use candystore::{CandyStore, CandyTypedDeque, CandyTypedList, CandyTypedStore, Config, Error}; use tempfile::tempdir; +use crate::common::checkpoint_slot_checksum; + +const CHECKPOINT_SLOT_0_OFFSET: u64 = 128; +const CHECKPOINT_SLOT_STRIDE: u64 = 32; +const CHECKPOINT_SLOT_CHECKSUM_OFFSET: u64 = 24; + fn patterned_bytes_with_seed(len: usize, seed: usize) -> Vec { (0..len) .map(|idx| (((idx * 31) + (seed * 17)) % 251) as u8) @@ -97,22 +103,165 @@ fn active_file_ordinal(dir: &std::path::Path) -> Result { }) } +fn data_files_by_ordinal(dir: &std::path::Path) -> Result, Error> { + let mut files = Vec::new(); + + for entry in std::fs::read_dir(dir).map_err(Error::IOError)? { + let entry = entry.map_err(Error::IOError)?; + let path = entry.path(); + let Some(name) = path.file_name().and_then(|name| name.to_str()) else { + continue; + }; + if !name.starts_with("data_") { + continue; + } + + let mut file = std::fs::OpenOptions::new() + .read(true) + .open(&path) + .map_err(Error::IOError)?; + file.seek(SeekFrom::Start(16)).map_err(Error::IOError)?; + let mut buf = [0u8; 8]; + file.read_exact(&mut buf).map_err(Error::IOError)?; + let ordinal = u64::from_le_bytes(buf); + let used_bytes = file + .metadata() + .map_err(Error::IOError)? + .len() + .saturating_sub(4096); + files.push((ordinal, used_bytes)); + } + + files.sort_by_key(|(ordinal, _)| *ordinal); + Ok(files) +} + +fn data_file_records_by_ordinal( + dir: &std::path::Path, +) -> Result, Error> { + let mut files = Vec::new(); + + for entry in std::fs::read_dir(dir).map_err(Error::IOError)? { + let entry = entry.map_err(Error::IOError)?; + let path = entry.path(); + let Some(name) = path.file_name().and_then(|name| name.to_str()) else { + continue; + }; + let Some(file_idx) = name + .strip_prefix("data_") + .and_then(|suffix| suffix.parse::().ok()) + else { + continue; + }; + + let mut file = std::fs::OpenOptions::new() + .read(true) + .open(&path) + .map_err(Error::IOError)?; + file.seek(SeekFrom::Start(16)).map_err(Error::IOError)?; + let mut buf = [0u8; 8]; + file.read_exact(&mut buf).map_err(Error::IOError)?; + let ordinal = u64::from_le_bytes(buf); + let used_bytes = file + .metadata() + .map_err(Error::IOError)? + .len() + .saturating_sub(4096); + files.push((file_idx, ordinal, used_bytes, path)); + } + + files.sort_by_key(|(_, ordinal, _, _)| *ordinal); + Ok(files) +} + fn write_commit_cursor(dir: &std::path::Path, offset: u64) -> Result<(), Error> { + let ordinal = active_file_ordinal(dir)?; + write_commit_cursor_for_ordinal(dir, ordinal, offset) +} + +fn write_commit_cursor_for_ordinal( + dir: &std::path::Path, + ordinal: u64, + offset: u64, +) -> Result<(), Error> { let mut file = std::fs::OpenOptions::new() .read(true) .write(true) .open(dir.join("index")) .map_err(Error::IOError)?; - let ordinal = active_file_ordinal(dir)?; + let generation = next_checkpoint_generation(&mut file)?; + let checksum = checkpoint_slot_checksum(generation, ordinal, offset); + let slot_offset = 128 + (generation as u64 % 2) * 32; + + file.seek(SeekFrom::Start(slot_offset)) + .map_err(Error::IOError)?; + file.write_all(&generation.to_le_bytes()) + .map_err(Error::IOError)?; - file.seek(SeekFrom::Start(128)).map_err(Error::IOError)?; + file.seek(SeekFrom::Start(slot_offset + 8)) + .map_err(Error::IOError)?; file.write_all(&ordinal.to_le_bytes()) .map_err(Error::IOError)?; - file.seek(SeekFrom::Start(136)).map_err(Error::IOError)?; + file.seek(SeekFrom::Start(slot_offset + 16)) + .map_err(Error::IOError)?; file.write_all(&offset.to_le_bytes()) .map_err(Error::IOError)?; + + file.seek(SeekFrom::Start(slot_offset + 24)) + .map_err(Error::IOError)?; + file.write_all(&checksum.to_le_bytes()) + .map_err(Error::IOError)?; + file.sync_all().map_err(Error::IOError)?; + Ok(()) +} + +fn next_checkpoint_generation(file: &mut std::fs::File) -> Result { + use std::io::Read; + + let mut max_generation = 0u64; + for slot_offset in [128u64, 160u64] { + file.seek(SeekFrom::Start(slot_offset)) + .map_err(Error::IOError)?; + let mut buf = [0u8; 8]; + file.read_exact(&mut buf).map_err(Error::IOError)?; + max_generation = max_generation.max(u64::from_le_bytes(buf)); + } + + Ok(max_generation + 1) +} + +fn corrupt_latest_checkpoint_slot_checksum(dir: &std::path::Path) -> Result<(), Error> { + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(dir.join("index")) + .map_err(Error::IOError)?; + + let mut latest_generation = 0u64; + let mut latest_slot_offset = CHECKPOINT_SLOT_0_OFFSET; + for slot_offset in [ + CHECKPOINT_SLOT_0_OFFSET, + CHECKPOINT_SLOT_0_OFFSET + CHECKPOINT_SLOT_STRIDE, + ] { + file.seek(SeekFrom::Start(slot_offset)) + .map_err(Error::IOError)?; + let mut buf = [0u8; 8]; + file.read_exact(&mut buf).map_err(Error::IOError)?; + let generation = u64::from_le_bytes(buf); + if generation >= latest_generation { + latest_generation = generation; + latest_slot_offset = slot_offset; + } + } + + file.seek(SeekFrom::Start( + latest_slot_offset + CHECKPOINT_SLOT_CHECKSUM_OFFSET, + )) + .map_err(Error::IOError)?; + file.write_all(&0u32.to_le_bytes()) + .map_err(Error::IOError)?; file.sync_all().map_err(Error::IOError)?; Ok(()) } @@ -900,6 +1049,38 @@ fn test_progressive_rebuild_ignores_bogus_checkpoint_offset() -> Result<(), Erro Ok(()) } +#[test] +fn test_progressive_rebuild_falls_back_to_older_valid_checkpoint_slot() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 1024, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + for i in 0..100 { + db.set(format!("key{i:04}"), format!("val{i:04}"))?; + } + db._abort_for_testing(); + } + + write_commit_cursor(dir.path(), 0)?; + write_commit_cursor(dir.path(), 0xFFFF_FFFF)?; + corrupt_latest_checkpoint_slot_checksum(dir.path())?; + + let db = CandyStore::open(dir.path(), config)?; + for i in 0..100 { + assert_eq!( + db.get(format!("key{i:04}"))?, + Some(format!("val{i:04}").into_bytes()), + "key{i:04} missing after fallback to older valid checkpoint slot" + ); + } + + Ok(()) +} + #[test] fn test_clean_reopen_rebuilds_invalid_active_checkpoint_across_multiple_data_files() -> Result<(), Error> { @@ -943,3 +1124,161 @@ fn test_clean_reopen_rebuilds_invalid_active_checkpoint_across_multiple_data_fil Ok(()) } + +#[test] +fn test_recovery_replays_later_files_after_checkpointing_an_older_file() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = common::small_file_config(); + + let total_keys; + + { + let db = CandyStore::open(dir.path(), config)?; + let mut next_idx = 0usize; + while db.stats().num_data_files < 3 { + let key = format!("older-cursor-{next_idx:04}"); + let value = patterned_bytes_with_seed(512, next_idx); + db.set(&key, &value)?; + next_idx += 1; + } + total_keys = next_idx; + } + + let files = data_files_by_ordinal(dir.path())?; + assert!( + files.len() >= 3, + "expected multiple data files for replay test" + ); + let (older_ordinal, older_used_bytes) = files[0]; + write_commit_cursor_for_ordinal(dir.path(), older_ordinal, older_used_bytes)?; + + let db = CandyStore::open(dir.path(), config)?; + assert!( + db.stats().num_rebuilt_entries > 0, + "expected recovery to replay later files after rewinding commit cursor" + ); + for idx in 0..total_keys { + let key = format!("older-cursor-{idx:04}"); + assert_eq!(db.get(&key)?, Some(patterned_bytes_with_seed(512, idx))); + } + assert_eq!(db.num_items(), total_keys); + + Ok(()) +} + +#[test] +fn test_recovery_replays_later_files_when_checkpoint_file_is_missing() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 16 * 1024, + compaction_min_threshold: u32::MAX, + compaction_throughput_bytes_per_sec: 0, + ..Config::default() + }; + + let mut expected = Vec::new(); + { + let db = CandyStore::open(dir.path(), config)?; + let mut update_idx = 0usize; + let final_hot = loop { + let value = patterned_bytes_with_seed(6 * 1024, update_idx); + db.set("hot", &value)?; + update_idx += 1; + if db.stats().num_data_files >= 5 { + break value; + } + }; + expected.push(("hot".to_owned(), final_hot)); + + for idx in 0..4usize { + let key = format!("tail-live-{idx:02}"); + let value = patterned_bytes_with_seed(2048, 10_000 + idx); + db.set(&key, &value)?; + expected.push((key, value)); + } + } + + let files = data_file_records_by_ordinal(dir.path())?; + assert!( + files.len() >= 4, + "expected enough rotated files to simulate a missing checkpoint file" + ); + + let (_, missing_ordinal, missing_used_bytes, missing_path) = &files[1]; + write_commit_cursor_for_ordinal(dir.path(), *missing_ordinal, *missing_used_bytes)?; + fs::remove_file(missing_path).map_err(Error::IOError)?; + + let db = CandyStore::open(dir.path(), config)?; + assert!( + db.stats().num_rebuilt_entries > 0, + "expected recovery to replay entries after a missing checkpoint file" + ); + for (key, value) in expected { + assert_eq!( + db.get(&key)?, + Some(value), + "{key} missing after replaying past a missing checkpoint file" + ); + } + + Ok(()) +} + +#[test] +fn test_recovery_ignores_missing_compacted_files_before_checkpoint_cursor() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 16 * 1024, + compaction_min_threshold: u32::MAX, + compaction_throughput_bytes_per_sec: 0, + ..Config::default() + }; + + let mut expected = Vec::new(); + { + let db = CandyStore::open(dir.path(), config)?; + let mut update_idx = 0usize; + let final_hot = loop { + let value = patterned_bytes_with_seed(6 * 1024, 20_000 + update_idx); + db.set("hot", &value)?; + update_idx += 1; + if db.stats().num_data_files >= 5 { + break value; + } + }; + expected.push(("hot".to_owned(), final_hot)); + + for idx in 0..6usize { + let key = format!("post-cursor-live-{idx:02}"); + let value = patterned_bytes_with_seed(1536, 30_000 + idx); + db.set(&key, &value)?; + expected.push((key, value)); + } + } + + let files = data_file_records_by_ordinal(dir.path())?; + assert!( + files.len() >= 5, + "expected enough files to simulate compacted files before the checkpoint cursor" + ); + + let (_, checkpoint_ordinal, checkpoint_used_bytes, _) = &files[2]; + let (_, _, _, missing_path) = &files[0]; + write_commit_cursor_for_ordinal(dir.path(), *checkpoint_ordinal, *checkpoint_used_bytes)?; + fs::remove_file(missing_path).map_err(Error::IOError)?; + + let db = CandyStore::open(dir.path(), config)?; + assert!( + db.stats().num_rebuilt_entries > 0, + "expected recovery to replay entries after skipping compacted files before the cursor" + ); + for (key, value) in expected { + assert_eq!( + db.get(&key)?, + Some(value), + "{key} missing after skipping compacted files before the checkpoint cursor" + ); + } + + Ok(()) +} diff --git a/tests/whitebox.rs b/tests/whitebox.rs index e960bd5..f4181cd 100644 --- a/tests/whitebox.rs +++ b/tests/whitebox.rs @@ -5,6 +5,7 @@ #![cfg(feature = "whitebox-testing")] mod common; +use crate::common::checkpoint_slot_checksum; use std::io::{Seek, SeekFrom, Write}; use std::path::Path; @@ -23,10 +24,12 @@ const PTRS_OFFSET: usize = SIGS_OFFSET + ROW_WIDTH * 4; /// FILE_OFFSET_ALIGNMENT used in EntryPointer encoding. const FILE_OFFSET_ALIGNMENT: u64 = 16; -/// Offset of `commit_file_ordinal` in the index header. -const COMMIT_FILE_ORDINAL_OFFSET: u64 = 128; -/// Offset of `commit_offset` in the index header. -const COMMIT_OFFSET: u64 = 136; +/// Offset of checkpoint slot 0 within the index header. +const CHECKPOINT_SLOT_0_OFFSET: u64 = 128; +const CHECKPOINT_SLOT_GENERATION_OFFSET: u64 = 0; +const CHECKPOINT_SLOT_ORDINAL_OFFSET: u64 = 8; +const CHECKPOINT_SLOT_FILE_OFFSET: u64 = 16; +const CHECKPOINT_SLOT_CHECKSUM_OFFSET: u64 = 24; // ----------------------------------------------------------------------- // Helpers @@ -121,16 +124,36 @@ fn write_commit_cursor(dir: &Path, offset: u64) -> Result<(), Error> { .map_err(Error::IOError)?; let ordinal = active_file_ordinal(dir)?; - - file.seek(SeekFrom::Start(COMMIT_FILE_ORDINAL_OFFSET)) + let generation = 1u64; + let checksum = checkpoint_slot_checksum(generation, ordinal, offset); + + file.seek(SeekFrom::Start( + CHECKPOINT_SLOT_0_OFFSET + CHECKPOINT_SLOT_GENERATION_OFFSET, + )) + .map_err(Error::IOError)?; + file.write_all(&generation.to_le_bytes()) .map_err(Error::IOError)?; + + file.seek(SeekFrom::Start( + CHECKPOINT_SLOT_0_OFFSET + CHECKPOINT_SLOT_ORDINAL_OFFSET, + )) + .map_err(Error::IOError)?; file.write_all(&ordinal.to_le_bytes()) .map_err(Error::IOError)?; - file.seek(SeekFrom::Start(COMMIT_OFFSET)) - .map_err(Error::IOError)?; + file.seek(SeekFrom::Start( + CHECKPOINT_SLOT_0_OFFSET + CHECKPOINT_SLOT_FILE_OFFSET, + )) + .map_err(Error::IOError)?; file.write_all(&offset.to_le_bytes()) .map_err(Error::IOError)?; + + file.seek(SeekFrom::Start( + CHECKPOINT_SLOT_0_OFFSET + CHECKPOINT_SLOT_CHECKSUM_OFFSET, + )) + .map_err(Error::IOError)?; + file.write_all(&checksum.to_le_bytes()) + .map_err(Error::IOError)?; file.sync_all().map_err(Error::IOError)?; Ok(()) } From 5e0c75014042949afb03f4bbf9fbbe0bcfff5714 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Mon, 30 Mar 2026 12:11:39 +0300 Subject: [PATCH 16/25] Background checkpointing --- README.md | 12 +- examples/perf.rs | 9 +- src/data_file.rs | 3 +- src/index_file.rs | 13 +- src/store.rs | 361 +++++++++++++++++++++++++++++++++------- src/store/checkpoint.rs | 289 ++++++++++++++++++++++++++++++++ src/store/compaction.rs | 10 +- src/store/open.rs | 21 ++- src/types.rs | 22 +++ tests/recovery.rs | 164 ++++++++++++++++++ 10 files changed, 830 insertions(+), 74 deletions(-) create mode 100644 src/store/checkpoint.rs diff --git a/README.md b/README.md index 460e8b5..c176f3e 100644 --- a/README.md +++ b/README.md @@ -134,14 +134,16 @@ a pass, it simply deletes the old immutable file since no entry points to it. You can configure the throughput (bytes per second) of compaction. -### Rebuild +### Checkpointing & Rebuild We trust the operating system to flush the data files and mmap'ed rows table to storage, which means that even if your process crashes, your data will be fully consistent. However, -this is not true on a power failure or a kernel panic -- in which case the state of the -index file is unknown. In such cases Candy has an efficient rebuild mechanism (based on checkpointing) -that essentially replays recent mutating operations in order and rebuilds the correct state from -the data files. +this is not true on a power failure or a kernel panic — in which case the state of the +index file is unknown relative to the data files. + +To handle this gracefully, Candy employs **background checkpointing**. Instead of synchronously `fsync`ing index and data files on every write (which would block the writer), a background worker asynchronously persists a consistent snapshot of the current state at user-defined intervals or after a configured amount of bytes have been written. + +On an unexpected crash or an unclean shutdown, Candy features an efficient rebuild mechanism. It resumes from the latest successful checkpoint and rapidly replays only the recent mutating operations, restoring the full, robust state from the append-only data files. ## Design Goals diff --git a/examples/perf.rs b/examples/perf.rs index 889d5db..202091d 100644 --- a/examples/perf.rs +++ b/examples/perf.rs @@ -319,7 +319,14 @@ fn run_list_perf( fn main() -> Result<(), Box> { let dir = tempfile::tempdir()?; - let store = Arc::new(CandyStore::open(dir.path(), Config::default())?); + let store = Arc::new(CandyStore::open( + dir.path(), + Config { + checkpoint_delta_bytes: None, + checkpoint_interval: None, + ..Default::default() + }, + )?); // single threaded run_perf(store.clone(), 1_000_000, 1, 16, 16)?; diff --git a/src/data_file.rs b/src/data_file.rs index 2842830..7bb646e 100644 --- a/src/data_file.rs +++ b/src/data_file.rs @@ -458,7 +458,7 @@ impl DataFile { key: &[u8], shard_idx: usize, inflight_tracker: &'a InflightTracker, - ) -> Result<(usize, InflightGuard<'a>)> { + ) -> Result<(u64, usize, InflightGuard<'a>)> { self.append_entry( EntryType::Tombstone, ns, @@ -467,7 +467,6 @@ impl DataFile { shard_idx, inflight_tracker, ) - .map(|(_, len, guard)| (len, guard)) } pub(crate) fn read_kv_into<'a>( diff --git a/src/index_file.rs b/src/index_file.rs index 2ff3eb2..47535bd 100644 --- a/src/index_file.rs +++ b/src/index_file.rs @@ -418,6 +418,10 @@ impl IndexFile { (ordinal, offset) } + pub(crate) fn checkpoint_generation(&self) -> u64 { + self.cached_checkpoint_generation.load(Ordering::Acquire) + } + pub(crate) fn persist_checkpoint_cursor(&self, ordinal: u64, offset: u64) { let current_gen = self.cached_checkpoint_generation.load(Ordering::Relaxed); let next_generation = current_gen @@ -436,9 +440,12 @@ impl IndexFile { ); // Update the cache so concurrent readers see the new values immediately. - self.cached_checkpoint_ordinal.store(ordinal, Ordering::Relaxed); - self.cached_checkpoint_offset.store(offset, Ordering::Relaxed); - self.cached_checkpoint_generation.store(next_generation, Ordering::Release); + self.cached_checkpoint_ordinal + .store(ordinal, Ordering::Relaxed); + self.cached_checkpoint_offset + .store(offset, Ordering::Relaxed); + self.cached_checkpoint_generation + .store(next_generation, Ordering::Release); } #[cfg(target_os = "linux")] diff --git a/src/store.rs b/src/store.rs index 6ad418e..7436d5a 100644 --- a/src/store.rs +++ b/src/store.rs @@ -1,3 +1,4 @@ +mod checkpoint; mod compaction; mod list; mod open; @@ -37,10 +38,55 @@ struct CompactionState { wake_requested: bool, } +// this is needed because std::io::Error is not clone() +#[derive(Debug, Clone)] +enum CheckpointFailure { + IO(std::io::ErrorKind, String), + MissingDataFile(u16), + Other(String), +} + +impl CheckpointFailure { + fn from_error(err: Error) -> Self { + match err { + Error::IOError(io_err) => Self::IO(io_err.kind(), io_err.to_string()), + Error::MissingDataFile(file_idx) => Self::MissingDataFile(file_idx), + other => Self::Other(other.to_string()), + } + } + + fn to_error(&self) -> Error { + match self { + Self::IO(kind, message) => Error::IOError(std::io::Error::new(*kind, message.clone())), + Self::MissingDataFile(file_idx) => Error::MissingDataFile(*file_idx), + Self::Other(message) => Error::IOError(std::io::Error::other(message.clone())), + } + } +} + +#[derive(Default)] +struct CheckpointState { + requested_epoch: u64, + handled_epoch: u64, + completed_epoch: u64, + last_failure_epoch: u64, + last_failure: Option, + last_checkpoint_dur_ms: u64, +} + +#[derive(Clone, Copy)] +struct CheckpointSnapshot { + checkpoint_ordinal: u64, + checkpoint_offset: u64, + checkpointed_delta: i64, + last_commit_ordinal: u64, +} + #[derive(Default)] struct InnerStats { num_compactions: AtomicU64, compaction_errors: AtomicU64, + checkpoint_errors: AtomicU64, num_positive_lookups: AtomicU64, num_negative_lookups: AtomicU64, num_collisions: AtomicU64, @@ -64,6 +110,7 @@ impl InnerStats { fn reset(&self) { self.num_compactions.store(0, Ordering::Relaxed); self.compaction_errors.store(0, Ordering::Relaxed); + self.checkpoint_errors.store(0, Ordering::Relaxed); self.num_positive_lookups.store(0, Ordering::Relaxed); self.num_negative_lookups.store(0, Ordering::Relaxed); self.num_collisions.store(0, Ordering::Relaxed); @@ -98,7 +145,9 @@ struct StoreInner { active_file_idx: AtomicU16, active_file_ordinal: AtomicU64, uncommitted_entries_delta: AtomicI64, - checkpoint_lock: Mutex<()>, + checkpoint_state: Mutex, + checkpoint_condvar: Condvar, + checkpoint_shutting_down: AtomicBool, rotation_lock: Mutex<()>, compaction_state: Mutex, compaction_condvar: Condvar, @@ -125,6 +174,7 @@ pub struct CandyStore { inner: Arc, _lockfile: fslock::LockFile, compaction_thd: Mutex>>, + checkpoint_thd: Mutex>>, allow_clean_shutdown: AtomicBool, } @@ -157,7 +207,9 @@ impl StoreInner { active_file_idx: AtomicU16::new(state.active_file_idx), active_file_ordinal: AtomicU64::new(state.active_file_ordinal), uncommitted_entries_delta: AtomicI64::new(0), - checkpoint_lock: Mutex::new(()), + checkpoint_state: Mutex::new(CheckpointState::default()), + checkpoint_condvar: Condvar::new(), + checkpoint_shutting_down: AtomicBool::new(false), rotation_lock: Mutex::new(()), compaction_state: Mutex::new(CompactionState::default()), compaction_condvar: Condvar::new(), @@ -220,6 +272,7 @@ impl StoreInner { self.active_file_ordinal .store(active_file_ordinal, Ordering::Release); self.uncommitted_entries_delta.store(0, Ordering::Relaxed); + *self.checkpoint_state.lock() = CheckpointState::default(); self.stats.reset(); Ok(()) @@ -244,11 +297,12 @@ impl StoreInner { .fetch_add(bytes, Ordering::Relaxed); } - fn record_write(&self, bytes: u64) { + fn record_write(&self, offset: u64, bytes: u64) { self.stats.num_write_ops.fetch_add(1, Ordering::Relaxed); self.stats .num_write_bytes .fetch_add(bytes, Ordering::Relaxed); + self.note_checkpoint_write(offset + bytes); } fn signal_compaction_scan(&self) { @@ -390,8 +444,19 @@ impl StoreInner { self.index_file.persist_checkpoint_cursor(ordinal, offset); } - fn checkpoint_cursor(&self) -> Result<()> { - let _checkpoint_lock = self.checkpoint_lock.lock(); + fn perform_checkpoint(&self) -> Result<()> { + let snapshot = self.snapshot_checkpoint_progress()?; + let current_cursor = self.index_file.checkpoint_cursor(); + if snapshot.checkpoint_ordinal == current_cursor.0 + && snapshot.checkpoint_offset == current_cursor.1 + && snapshot.checkpointed_delta == 0 + { + return Ok(()); + } + self.sync_checkpoint(snapshot) + } + + fn snapshot_checkpoint_progress(&self) -> Result { let files = self.data_files.read(); let active_idx = self.active_file_idx.load(Ordering::Acquire); let active_file = files @@ -400,22 +465,38 @@ impl StoreInner { .ok_or(Error::MissingDataFile(active_idx))?; let (checkpoint_ordinal, checkpoint_offset, checkpointed_delta) = self.inflight_tracker.checkpoint_progress(&active_file); - let last_commit_ordinal = self.index_file.checkpoint_cursor().0; + Ok(CheckpointSnapshot { + checkpoint_ordinal, + checkpoint_offset, + checkpointed_delta, + last_commit_ordinal, + }) + } + fn sync_checkpoint(&self, snap: CheckpointSnapshot) -> Result<()> { + let files = self.data_files.read(); for data_file in files.values() { - if data_file.file_ordinal >= last_commit_ordinal { + if data_file.file_ordinal >= snap.last_commit_ordinal { data_file.file.sync_all().map_err(Error::IOError)?; } } drop(files); - self.fold_checkpointed_num_entries(checkpointed_delta); - self.persist_checkpoint_cursor(checkpoint_ordinal, checkpoint_offset); + self.fold_checkpointed_num_entries(snap.checkpointed_delta); + self.persist_checkpoint_cursor(snap.checkpoint_ordinal, snap.checkpoint_offset); self.index_file.sync_all()?; sync_dir(&self.base_path) } + fn perform_checkpoint_with_logical_locks(&self) -> Result<()> { + let _logical_guards = self + .list_meta_locks + .iter() + .map(|lock| lock.write()) + .collect::>(); + self.perform_checkpoint() + } fn _split_row(&self, hc: HashCoord, sl: u64, gsl: u64) -> Result<()> { let nsl = sl + 1; let low_row_idx = hc.row_index(sl); @@ -479,49 +560,51 @@ impl StoreInner { /// `compact_file` also writes to `data_files` (removing files) but only /// touches non-active indices, so there is no conflict. fn _rotate_data_file(&self, active_idx: u16) -> Result<()> { - let _rot_lock = self.rotation_lock.lock(); + { + let _rot_lock = self.rotation_lock.lock(); - if self.active_file_idx.load(Ordering::Acquire) != active_idx { - return Ok(()); - } + if self.active_file_idx.load(Ordering::Acquire) != active_idx { + return Ok(()); + } - let active_file = self.data_file(active_idx)?; - let active_ordinal = active_file.file_ordinal; + let active_file = self.data_file(active_idx)?; + let active_ordinal = active_file.file_ordinal; - let mut next_idx = (self.active_file_idx.load(Ordering::Relaxed) + 1) & MAX_DATA_FILE_IDX; - let mut attempts = 0; - { - let files = self.data_files.read(); - while files.contains_key(&next_idx) { - next_idx = (next_idx + 1) & MAX_DATA_FILE_IDX; - attempts += 1; - if attempts > MAX_DATA_FILES { - return Err(Error::TooManyDataFiles); + let mut next_idx = + (self.active_file_idx.load(Ordering::Relaxed) + 1) & MAX_DATA_FILE_IDX; + let mut attempts = 0; + { + let files = self.data_files.read(); + while files.contains_key(&next_idx) { + next_idx = (next_idx + 1) & MAX_DATA_FILE_IDX; + attempts += 1; + if attempts > MAX_DATA_FILES { + return Err(Error::TooManyDataFiles); + } } } - } - let ordinal = self.active_file_ordinal.fetch_add(1, Ordering::Relaxed) + 1; - let data_file = Arc::new(DataFile::create( - self.base_path.as_path(), - self.config.clone(), - next_idx, - ordinal, - )?); + let ordinal = self.active_file_ordinal.fetch_add(1, Ordering::Relaxed) + 1; + let data_file = Arc::new(DataFile::create( + self.base_path.as_path(), + self.config.clone(), + next_idx, + ordinal, + )?); - active_file.seal_for_rotation(); + active_file.seal_for_rotation(); - self.data_files.write().insert(next_idx, data_file); - self.active_file_idx.store(next_idx, Ordering::Release); + self.data_files.write().insert(next_idx, data_file); + self.active_file_idx.store(next_idx, Ordering::Release); - if active_ordinal != 0 - && self.index_file.file_waste(active_idx) > self.config.compaction_min_threshold - { - self.signal_compaction_scan(); + if active_ordinal != 0 + && self.index_file.file_waste(active_idx) > self.config.compaction_min_threshold + { + self.signal_compaction_scan(); + } } - self.checkpoint_cursor()?; - + _ = self.request_checkpoint_epoch(); Ok(()) } @@ -763,7 +846,7 @@ impl CandyStore { row.shard_idx, &self.inner.inflight_tracker, )?; - self.inner.record_write(size as u64); + self.inner.record_write(file_off, size as u64); row.insert( col, hc.sig, @@ -836,7 +919,7 @@ impl CandyStore { update.shard_idx, &self.inner.inflight_tracker, )?; - self.inner.record_write(size as u64); + self.inner.record_write(file_off, size as u64); if let Some(name) = update.crash_point_name { crate::crash_point(name); } @@ -909,7 +992,7 @@ impl CandyStore { row.shard_idx, &self.inner.inflight_tracker, )?; - self.inner.record_write(size as u64); + self.inner.record_write(file_off, size as u64); crate::crash_point("set_after_write_before_insert"); row.insert( col, @@ -1049,13 +1132,13 @@ impl CandyStore { let active_file = files .get(&active_idx) .ok_or(Error::MissingDataFile(active_idx))?; - let (tombstone_size, inflight_guard) = active_file.append_tombstone( + let (file_off, tombstone_size, inflight_guard) = active_file.append_tombstone( ns, key, row.shard_idx, &self.inner.inflight_tracker, )?; - self.inner.record_write(tombstone_size as u64); + self.inner.record_write(file_off, tombstone_size as u64); row.remove(col); self.track_tombstone_waste(src_file_idx, klen, vlen); @@ -1178,20 +1261,12 @@ impl CandyStore { /// so the next open can resume from this point without replaying earlier /// writes. /// - /// This does **not** block concurrent writers or compaction, but does block - /// compound operations like lists/queues to checkpoint at well-defined states + /// This waits for the background checkpoint worker to establish a checkpoint + /// after taking all logical list/queue locks, so compound operations are + /// checkpointed only at well-defined boundaries. pub fn checkpoint(&self) -> Result<()> { - // take all list_meta_locks so we don't ever create a checkpoint that has half-baked - // queue/list. note that rotation-induced checkpoints may do that, but user-induced ones - // will not. - let _logical_guards = self - .inner - .list_meta_locks - .iter() - .map(|lock| lock.write()) - .collect::>(); - - self.inner.checkpoint_cursor() + let target_epoch = self.inner.request_checkpoint_epoch(); + self.inner.wait_for_checkpoint_epoch(target_epoch) } /// Returns the number of background compaction errors observed since open. @@ -1272,6 +1347,11 @@ impl CandyStore { }; let waste_bytes = self.inner.index_file.total_waste(); let s = &self.inner.stats; + let checkpoint_state = self.inner.checkpoint_state.lock(); + let checkpoint_generation = self.inner.index_file.checkpoint_generation(); + let checkpoint_epoch = checkpoint_state.completed_epoch; + let uncheckpointed_bytes = self.inner.approx_uncheckpointed_bytes(); + let last_checkpoint_dur = Duration::from_millis(checkpoint_state.last_checkpoint_dur_ms); Stats { num_rows, @@ -1283,8 +1363,13 @@ impl CandyStore { waste_bytes, num_compactions: s.num_compactions.load(Ordering::Relaxed), + checkpoint_errors: s.checkpoint_errors.load(Ordering::Relaxed), last_remap_dur: Duration::from_millis(s.last_remap_dur_ms.load(Ordering::Relaxed)), + checkpoint_generation, + checkpoint_epoch, + uncheckpointed_bytes, + last_checkpoint_dur, last_compaction_dur: Duration::from_millis( s.last_compaction_dur_ms.load(Ordering::Relaxed), ), @@ -1420,6 +1505,47 @@ mod tests { Ok(()) } + #[test] + fn test_stats_reports_checkpoint_state() -> Result<()> { + let dir = tempdir().unwrap(); + let db = CandyStore::open(dir.path(), Config::default())?; + + db.stop_compaction(); + db.set("checkpoint-stats", vec![b'z'; 512])?; + + let active_idx = db.inner.active_file_idx.load(Ordering::Acquire); + let active_ordinal = db + .inner + .data_files + .read() + .get(&active_idx) + .expect("active data file should exist") + .file_ordinal; + db.inner.persist_checkpoint_cursor(active_ordinal, 0); + + { + let mut checkpoint_state = db.inner.checkpoint_state.lock(); + checkpoint_state.completed_epoch = 13; + checkpoint_state.last_checkpoint_dur_ms = 29; + } + + let expected_dirty = db + .inner + .data_files + .read() + .get(&active_idx) + .expect("active data file should exist") + .used_bytes(); + + let stats = db.stats(); + assert!(stats.checkpoint_generation > 0); + assert_eq!(stats.checkpoint_epoch, 13); + assert_eq!(stats.uncheckpointed_bytes, expected_dirty); + assert_eq!(stats.last_checkpoint_dur, Duration::from_millis(29)); + + Ok(()) + } + #[test] fn test_checkpoint_does_not_join_compaction_thread() -> Result<()> { let dir = tempdir().unwrap(); @@ -1446,4 +1572,121 @@ mod tests { Ok(()) } + + #[test] + fn test_rotation_schedules_background_checkpoint() -> Result<()> { + let dir = tempdir().unwrap(); + let db = CandyStore::open( + dir.path(), + Config { + max_data_file_size: 2048, + compaction_min_threshold: u32::MAX, + compaction_throughput_bytes_per_sec: 0, + ..Config::default() + }, + )?; + + db.stop_compaction(); + while db.stats().num_data_files < 2 { + let idx = db.stats().num_write_ops; + db.set( + format!("rotate-{idx}"), + format!("payload-{}", "x".repeat(768)), + )?; + } + + let t0 = Instant::now(); + while db.inner.index_file.checkpoint_cursor() == (0, 0) { + assert!( + t0.elapsed() < Duration::from_secs(2), + "rotation should enqueue a checkpoint that advances the replay cursor" + ); + thread::sleep(Duration::from_millis(10)); + } + + Ok(()) + } + + #[test] + fn test_checkpoint_without_new_bytes_skips_io_and_advances_epoch() -> Result<()> { + let dir = tempdir().unwrap(); + let db = CandyStore::open( + dir.path(), + Config { + checkpoint_interval: None, + checkpoint_delta_bytes: None, + compaction_throughput_bytes_per_sec: 0, + ..Config::default() + }, + )?; + + db.stop_compaction(); + let cursor_before = db.inner.index_file.checkpoint_cursor(); + let requested_before = db.inner.checkpoint_state.lock().requested_epoch; + db.checkpoint()?; + let state = db.inner.checkpoint_state.lock(); + assert_eq!(state.requested_epoch, requested_before + 1); + assert_eq!(state.completed_epoch, requested_before + 1); + assert_eq!(db.inner.index_file.checkpoint_cursor(), cursor_before); + + Ok(()) + } + + #[test] + fn test_checkpoint_delta_bytes_schedules_background_checkpoint() -> Result<()> { + let dir = tempdir().unwrap(); + let db = CandyStore::open( + dir.path(), + Config { + checkpoint_interval: None, + checkpoint_delta_bytes: Some(512), + compaction_min_threshold: u32::MAX, + compaction_throughput_bytes_per_sec: 0, + ..Config::default() + }, + )?; + + db.stop_compaction(); + db.set("delta-threshold", vec![b'x'; 1024])?; + + let t0 = Instant::now(); + while db.inner.index_file.checkpoint_cursor() == (0, 0) { + assert!( + t0.elapsed() < Duration::from_secs(2), + "checkpoint_delta_bytes should schedule a background checkpoint" + ); + thread::sleep(Duration::from_millis(10)); + } + + Ok(()) + } + + #[test] + fn test_checkpoint_interval_schedules_background_checkpoint() -> Result<()> { + let dir = tempdir().unwrap(); + let db = CandyStore::open( + dir.path(), + Config { + checkpoint_interval: Some(Duration::from_millis(50)), + checkpoint_delta_bytes: None, + compaction_min_threshold: u32::MAX, + compaction_throughput_bytes_per_sec: 0, + ..Config::default() + }, + )?; + + db.stop_compaction(); + db.set("interval-threshold", vec![b'y'; 256])?; + + let t0 = Instant::now(); + while db.inner.index_file.checkpoint_cursor() == (0, 0) { + assert!( + t0.elapsed() < Duration::from_secs(2), + "checkpoint_interval should checkpoint dirty bytes even without explicit requests" + ); + thread::sleep(Duration::from_millis(10)); + } + + Ok(()) + } } diff --git a/src/store/checkpoint.rs b/src/store/checkpoint.rs new file mode 100644 index 0000000..c1b95f6 --- /dev/null +++ b/src/store/checkpoint.rs @@ -0,0 +1,289 @@ +use std::sync::Arc; +use std::sync::atomic::Ordering; +use std::time::Instant; + +use crate::types::{Error, Result}; + +use super::{CandyStore, CheckpointFailure, StoreInner}; + +/// RAII guard that marks the checkpoint worker as shut down and wakes all +/// waiters when the worker thread exits — whether it returns normally or +/// panics. This prevents `wait_for_checkpoint_epoch` from blocking forever +/// if the worker encounters an unexpected panic. +struct WorkerShutdownGuard<'a> { + inner: &'a StoreInner, +} + +impl Drop for WorkerShutdownGuard<'_> { + fn drop(&mut self) { + let _state = self.inner.checkpoint_state.lock(); + self.inner + .checkpoint_shutting_down + .store(true, Ordering::Release); + self.inner.checkpoint_condvar.notify_all(); + } +} + +impl StoreInner { + fn request_checkpoint_epoch_locked(state: &mut super::CheckpointState) -> u64 { + state.requested_epoch = state + .requested_epoch + .checked_add(1) + .expect("checkpoint epoch overflow"); + state.requested_epoch + } + + pub(super) fn approx_uncheckpointed_bytes(&self) -> u64 { + let active_idx = self.active_file_idx.load(Ordering::Acquire); + let files = self.data_files.read(); + let Some(active_file) = files.get(&active_idx) else { + return 0; + }; + + let (commit_file_ordinal, commit_offset) = self.index_file.checkpoint_cursor(); + let commit_offset = if commit_file_ordinal == active_file.file_ordinal { + commit_offset + } else { + 0 + }; + + active_file.used_bytes().saturating_sub(commit_offset) + } + + pub(super) fn request_checkpoint_epoch(&self) -> u64 { + let mut state = self.checkpoint_state.lock(); + let target_epoch = Self::request_checkpoint_epoch_locked(&mut state); + self.checkpoint_condvar.notify_all(); + target_epoch + } + + pub(super) fn note_checkpoint_write(&self, end_offset: u64) { + let Some(threshold) = self.config.checkpoint_delta_bytes else { + return; + }; + + // this is not atomic but it's fine -- rotation triggers checkpointing so if the last checkpoint + // happened before this file, we know a checkpoint is in progress. skip. + let (ordinal, commit_offset) = self.index_file.checkpoint_cursor(); + if self.active_file_ordinal.load(Ordering::Relaxed) != ordinal { + return; + } + + if end_offset <= commit_offset + threshold as u64 { + return; + } + + let mut state = self.checkpoint_state.lock(); + if state.requested_epoch > state.completed_epoch { + return; + } + + Self::request_checkpoint_epoch_locked(&mut state); + self.checkpoint_condvar.notify_all(); + } + + pub(super) fn wait_for_checkpoint_epoch(&self, target_epoch: u64) -> Result<()> { + let mut state = self.checkpoint_state.lock(); + loop { + if state.completed_epoch >= target_epoch { + return Ok(()); + } + if state.handled_epoch >= target_epoch && state.last_failure_epoch >= target_epoch { + return Err(state + .last_failure + .as_ref() + .map(CheckpointFailure::to_error) + .unwrap_or_else(|| { + Error::CheckpointShutdown( + "checkpoint worker stopped before completing request".into(), + ) + })); + } + if self.checkpoint_shutting_down.load(Ordering::Acquire) { + return Err(Error::CheckpointShutdown( + "checkpoint worker is shutting down".into(), + )); + } + self.checkpoint_condvar.wait(&mut state); + } + } + + fn run_checkpoint_worker(self: &Arc) { + let _shutdown_guard = WorkerShutdownGuard { inner: self }; + let interval = self.config.checkpoint_interval; + let threshold = self.config.checkpoint_delta_bytes.map(|value| value as u64); + let mut last_checkpoint_at = Instant::now(); + + loop { + let mut interval_elapsed = false; + { + let mut state = self.checkpoint_state.lock(); + loop { + if self.checkpoint_shutting_down.load(Ordering::Acquire) { + self.checkpoint_condvar.notify_all(); + return; + } + + if state.handled_epoch < state.requested_epoch { + break; + } + + if let Some(interval) = interval { + let remaining = interval.saturating_sub(last_checkpoint_at.elapsed()); + if remaining.is_zero() { + interval_elapsed = true; + break; + } + let wait_result = self.checkpoint_condvar.wait_for(&mut state, remaining); + if wait_result.timed_out() { + interval_elapsed = true; + break; + } + } else { + self.checkpoint_condvar.wait(&mut state); + } + } + } + + // Acquire all logical locks so compound list/queue operations + // are quiesced, then snapshot the checkpoint progress. Release + // the logical locks *before* the expensive fsync phase so writers + // are only blocked for the snapshot, not for the I/O. + let (target_epoch, snapshot) = { + let _logical_guards = self + .list_meta_locks + .iter() + .map(|lock| lock.write()) + .collect::>(); + let mut state = self.checkpoint_state.lock(); + if self.checkpoint_shutting_down.load(Ordering::Acquire) { + self.checkpoint_condvar.notify_all(); + return; + } + + let target_epoch = + (state.handled_epoch < state.requested_epoch).then_some(state.requested_epoch); + if target_epoch.is_none() && !interval_elapsed { + continue; + } + + let current_cursor = self.index_file.checkpoint_cursor(); + let snapshot = match self.snapshot_checkpoint_progress() { + Ok(snapshot) => { + let snapshot_is_noop = snapshot.checkpoint_ordinal == current_cursor.0 + && snapshot.checkpoint_offset == current_cursor.1 + && snapshot.checkpointed_delta == 0; + if snapshot_is_noop { + if let Some(target_epoch) = target_epoch { + state.handled_epoch = target_epoch; + state.completed_epoch = target_epoch; + state.last_checkpoint_dur_ms = 0; + if state.last_failure_epoch <= state.completed_epoch { + state.last_failure_epoch = 0; + state.last_failure = None; + } + self.checkpoint_condvar.notify_all(); + } + last_checkpoint_at = Instant::now(); + continue; + } + Ok(snapshot) + } + Err(e) => Err(e), + }; + + drop(state); + // _logical_guards dropped here + (target_epoch, snapshot) + }; + + let started_at = Instant::now(); + let snapshot_for_follow_up = snapshot.as_ref().ok().copied(); + let result = snapshot.and_then(|snap| self.sync_checkpoint(snap)); + + let mut state = self.checkpoint_state.lock(); + match result { + Ok(()) => { + state.last_checkpoint_dur_ms = + u64::try_from(started_at.elapsed().as_millis()).unwrap_or(u64::MAX); + if let Some(target_epoch) = target_epoch { + state.handled_epoch = state.handled_epoch.max(target_epoch); + state.completed_epoch = state.completed_epoch.max(target_epoch); + } + if state.last_failure_epoch <= state.completed_epoch { + state.last_failure_epoch = 0; + state.last_failure = None; + } + + let should_request_follow_up = match (threshold, snapshot_for_follow_up) { + (Some(threshold), Some(snapshot)) => { + let active_idx = self.active_file_idx.load(Ordering::Acquire); + let files = self.data_files.read(); + match files.get(&active_idx) { + Some(active_file) => { + active_file.file_ordinal == snapshot.checkpoint_ordinal + && active_file + .used_bytes() + .saturating_sub(snapshot.checkpoint_offset) + >= threshold + } + None => false, + } + } + _ => false, + }; + if should_request_follow_up && state.handled_epoch >= state.requested_epoch { + Self::request_checkpoint_epoch_locked(&mut state); + } + } + Err(err) => { + self.stats.checkpoint_errors.fetch_add(1, Ordering::Relaxed); + // For interval-only failures (target_epoch is None), allocate + // a synthetic epoch so last_failure_epoch is set and the error + // is observable rather than silently cleared. + let failure_epoch = target_epoch + .unwrap_or_else(|| Self::request_checkpoint_epoch_locked(&mut state)); + state.handled_epoch = state.handled_epoch.max(failure_epoch); + state.last_failure_epoch = failure_epoch; + state.last_failure = Some(CheckpointFailure::from_error(err)); + } + } + self.checkpoint_condvar.notify_all(); + last_checkpoint_at = Instant::now(); + } + } +} + +impl CandyStore { + pub(super) fn start_checkpoint_worker(&self) { + let mut checkpoint_thd = self.checkpoint_thd.lock(); + if checkpoint_thd.is_some() { + return; + } + + self.inner + .checkpoint_shutting_down + .store(false, Ordering::Release); + let ctx = Arc::clone(&self.inner); + let thd = std::thread::Builder::new() + .name("candy_checkpoint".into()) + .spawn(move || { + ctx.run_checkpoint_worker(); + }) + .unwrap(); + *checkpoint_thd = Some(thd); + } + + pub(super) fn stop_checkpoint_worker(&self) { + { + let _state = self.inner.checkpoint_state.lock(); + self.inner + .checkpoint_shutting_down + .store(true, Ordering::Release); + self.inner.checkpoint_condvar.notify_all(); + } + if let Some(thd) = self.checkpoint_thd.lock().take() { + let _ = thd.join(); + } + } +} diff --git a/src/store/compaction.rs b/src/store/compaction.rs index 00bce42..c92d2a2 100644 --- a/src/store/compaction.rs +++ b/src/store/compaction.rs @@ -145,7 +145,7 @@ impl StoreInner { &self.inflight_tracker, ) { Ok((file_off, size, inflight_guard)) => { - self.record_write(size as u64); + self.record_write(file_off, size as u64); moved_bytes = moved_bytes.saturating_add(size as u64); row.replace_pointer( *col, @@ -353,10 +353,14 @@ impl Drop for CandyStore { fn drop(&mut self) { self.stop_compaction(); - if !self.allow_clean_shutdown.load(Ordering::Relaxed) { + let should_checkpoint = self.allow_clean_shutdown.load(Ordering::Relaxed); + self.stop_checkpoint_worker(); + + if !should_checkpoint { return; } - let _ = self.checkpoint(); + + let _ = self.inner.perform_checkpoint_with_logical_locks(); } } diff --git a/src/store/open.rs b/src/store/open.rs index e22acd5..0a81db6 100644 --- a/src/store/open.rs +++ b/src/store/open.rs @@ -2,13 +2,15 @@ use std::{ collections::{HashMap, HashSet}, path::Path, sync::Arc, + time::Duration, }; use crate::{ data_file::DataFile, index_file::IndexFile, internal::{ - MAX_REPRESENTABLE_FILE_SIZE, is_resettable_open_error, parse_data_file_idx, sync_dir, + FILE_OFFSET_ALIGNMENT, MAX_REPRESENTABLE_FILE_SIZE, is_resettable_open_error, + parse_data_file_idx, sync_dir, }, types::{Config, Error, INITIAL_DATA_FILE_ORDINAL, Result}, }; @@ -28,6 +30,7 @@ impl CandyStore { inner: Arc::new(StoreInner::new(base_path, config, state, num_logical_locks)), _lockfile: lockfile, compaction_thd: parking_lot::Mutex::new(None), + checkpoint_thd: parking_lot::Mutex::new(None), allow_clean_shutdown: std::sync::atomic::AtomicBool::new(true), }) } @@ -139,6 +142,16 @@ impl CandyStore { .compaction_min_threshold .min((max_data_file_size as f64 * 0.8) as u32), remap_scaler: config.remap_scaler.clamp(1, 4), + checkpoint_interval: config.checkpoint_interval.map(|d| { + if d.is_zero() { + Duration::from_millis(100) + } else { + d + } + }), + checkpoint_delta_bytes: config + .checkpoint_delta_bytes + .map(|b| b.max(FILE_OFFSET_ALIGNMENT as usize)), ..config }; @@ -168,6 +181,7 @@ impl CandyStore { let store = Self::build_store(base_path.clone(), config.clone(), lockfile)?; match store.recover_index() { Ok(()) => { + store.start_checkpoint_worker(); store.start_compaction(); Ok(store) } @@ -176,14 +190,17 @@ impl CandyStore { let inner = unsafe { std::ptr::read(&store.inner) }; let lockfile = unsafe { std::ptr::read(&store._lockfile) }; let compaction_thd = unsafe { std::ptr::read(&store.compaction_thd) }; + let checkpoint_thd = unsafe { std::ptr::read(&store.checkpoint_thd) }; let _allow_clean_shutdown = unsafe { std::ptr::read(&store.allow_clean_shutdown) }; drop(compaction_thd); + drop(checkpoint_thd); drop(inner); Self::clear_db_files(&base_path)?; let recovered = Self::build_store(base_path, config, lockfile)?; recovered.recover_index()?; + recovered.start_checkpoint_worker(); recovered.start_compaction(); Ok(recovered) } @@ -201,12 +218,14 @@ impl CandyStore { pub fn clear(&self) -> Result<()> { // stop bg thread self.stop_compaction(); + self.stop_checkpoint_worker(); // now we're single-threaded. take all locks and clear state self.inner.reset()?; self.allow_clean_shutdown .store(true, std::sync::atomic::Ordering::Relaxed); + self.start_checkpoint_worker(); self.start_compaction(); Ok(()) diff --git a/src/types.rs b/src/types.rs index 72501fc..f90b7fd 100644 --- a/src/types.rs +++ b/src/types.rs @@ -36,6 +36,10 @@ pub struct Config { pub reset_on_invalid_data: bool, /// Target background compaction throughput in bytes per second. pub compaction_throughput_bytes_per_sec: usize, + /// perform a checkpoint (for crash-consistency) every this much time (`None` to disable) + pub checkpoint_interval: Option, + /// perform a checkpoint (for crash-consistency) every this many bytes written (`None` to disable) + pub checkpoint_delta_bytes: Option, } impl Default for Config { @@ -50,6 +54,8 @@ impl Default for Config { max_concurrency: (2 * num_cpus::get()).clamp(16, 64), reset_on_invalid_data: false, compaction_throughput_bytes_per_sec: 4 * 1024 * 1024, + checkpoint_interval: Some(Duration::from_secs(5)), + checkpoint_delta_bytes: Some(128 * 1024), } } } @@ -77,6 +83,9 @@ pub enum Error { #[error("Payload {0} too large")] PayloadTooLarge(usize), + + #[error("Checkpoint shutdown: {0}")] + CheckpointShutdown(String), } /// Convenience result type used by the crate. @@ -197,8 +206,21 @@ pub struct Stats { /// Time spent in the most recent grow remap operation. pub last_remap_dur: Duration, + /// Persisted checkpoint slot generation visible to recovery. + pub checkpoint_generation: u64, + /// Most recent completed runtime checkpoint epoch handled by the checkpoint worker. + pub checkpoint_epoch: u64, + /// Approximate bytes written since the last completed checkpoint. + /// + /// This is a best-effort runtime metric intended for monitoring rather + /// than an exact durable boundary. + pub uncheckpointed_bytes: u64, + /// Time spent in the most recent successful checkpoint operation. + pub last_checkpoint_dur: Duration, /// Number of completed background compactions. pub num_compactions: u64, + /// Number of background checkpoint errors since open. + pub checkpoint_errors: u64, /// Time spent in the most recent successful file compaction. pub last_compaction_dur: Duration, /// Bytes reclaimed by the most recent successful file compaction. diff --git a/tests/recovery.rs b/tests/recovery.rs index 227be63..d5b48f5 100644 --- a/tests/recovery.rs +++ b/tests/recovery.rs @@ -4,6 +4,7 @@ use std::collections::HashSet; use std::fs; use std::io::{Read, Seek, SeekFrom, Write}; use std::sync::Arc; +use std::time::{Duration, Instant}; use candystore::{CandyStore, CandyTypedDeque, CandyTypedList, CandyTypedStore, Config, Error}; use tempfile::tempdir; @@ -322,6 +323,39 @@ fn assert_rebuild_stats_non_zero(db: &CandyStore) { ); } +fn wait_for_background_checkpoint(db: &CandyStore, previous_generation: u64) { + let started_at = Instant::now(); + loop { + let stats = db.stats(); + if stats.checkpoint_generation > previous_generation { + return; + } + assert!( + started_at.elapsed() < Duration::from_secs(3), + "background checkpoint did not complete in time: prev_gen={previous_generation}, current_gen={}, uncheckpointed_bytes={}", + stats.checkpoint_generation, + stats.uncheckpointed_bytes, + ); + std::thread::sleep(Duration::from_millis(10)); + } +} + +fn wait_for_checkpoint_generation_advance(db: &CandyStore, previous_generation: u64) { + let started_at = Instant::now(); + loop { + let stats = db.stats(); + if stats.checkpoint_generation > previous_generation { + return; + } + assert!( + started_at.elapsed() < Duration::from_secs(3), + "background checkpoint generation did not advance in time: prev_gen={previous_generation}, current_gen={}", + stats.checkpoint_generation, + ); + std::thread::sleep(Duration::from_millis(10)); + } +} + #[test] fn test_recovery_after_dirty_shutdown() -> Result<(), Error> { let dir = tempdir().unwrap(); @@ -1017,6 +1051,136 @@ fn test_checkpoint_advances_recovery_cursor() -> Result<(), Error> { Ok(()) } +#[test] +fn test_checkpoint_delta_bytes_advances_recovery_cursor_in_background() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 8 * 1024, + compaction_min_threshold: u32::MAX, + compaction_throughput_bytes_per_sec: 0, + checkpoint_interval: None, + checkpoint_delta_bytes: Some(512), + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + let initial_generation = db.stats().checkpoint_generation; + for i in 0..24 { + db.set( + format!("delta-bg-key{i:04}"), + format!("delta-bg-val{i:04}-{}", "x".repeat(96)), + )?; + } + wait_for_background_checkpoint(&db, initial_generation); + db._abort_for_testing(); + } + + let db = CandyStore::open(dir.path(), config)?; + let stats = db.stats(); + assert!( + stats.num_rebuilt_entries < 24, + "threshold-triggered background checkpoint should avoid replaying the entire store" + ); + assert_eq!(stats.num_rebuild_purged_bytes, 0); + for i in 0..24 { + assert_eq!( + db.get(format!("delta-bg-key{i:04}"))?, + Some(format!("delta-bg-val{i:04}-{}", "x".repeat(96)).into_bytes()), + "delta-bg-key{i:04} missing after threshold-triggered background checkpoint" + ); + } + + Ok(()) +} + +#[test] +fn test_checkpoint_interval_advances_recovery_cursor_in_background() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 8 * 1024, + compaction_min_threshold: u32::MAX, + compaction_throughput_bytes_per_sec: 0, + checkpoint_interval: Some(Duration::from_millis(50)), + checkpoint_delta_bytes: None, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + let initial_generation = db.stats().checkpoint_generation; + for i in 0..24 { + db.set( + format!("interval-bg-key{i:04}"), + format!("interval-bg-val{i:04}-{}", "y".repeat(96)), + )?; + } + wait_for_background_checkpoint(&db, initial_generation); + db._abort_for_testing(); + } + + let db = CandyStore::open(dir.path(), config)?; + let stats = db.stats(); + assert_eq!(stats.num_rebuilt_entries, 0); + assert_eq!(stats.num_rebuild_purged_bytes, 0); + for i in 0..24 { + assert_eq!( + db.get(format!("interval-bg-key{i:04}"))?, + Some(format!("interval-bg-val{i:04}-{}", "y".repeat(96)).into_bytes()), + "interval-bg-key{i:04} missing after interval-triggered background checkpoint" + ); + } + + Ok(()) +} + +#[test] +fn test_rotation_advances_recovery_cursor_in_background() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 2048, + compaction_min_threshold: u32::MAX, + compaction_throughput_bytes_per_sec: 0, + checkpoint_interval: None, + checkpoint_delta_bytes: None, + ..Config::default() + }; + + let total_keys; + { + let db = CandyStore::open(dir.path(), config)?; + let initial_generation = db.stats().checkpoint_generation; + let mut next_idx = 0usize; + while db.stats().num_data_files < 2 { + db.set( + format!("rotate-bg-key{next_idx:04}"), + format!("rotate-bg-val{next_idx:04}-{}", "z".repeat(96)), + )?; + next_idx += 1; + } + total_keys = next_idx; + wait_for_checkpoint_generation_advance(&db, initial_generation); + db._abort_for_testing(); + } + + let db = CandyStore::open(dir.path(), config)?; + let stats = db.stats(); + assert!( + stats.num_rebuilt_entries < total_keys as u64, + "rotation-triggered checkpoint should avoid replaying the entire store" + ); + assert_eq!(stats.num_rebuild_purged_bytes, 0); + for i in 0..total_keys { + assert_eq!( + db.get(format!("rotate-bg-key{i:04}"))?, + Some(format!("rotate-bg-val{i:04}-{}", "z".repeat(96)).into_bytes()), + "rotate-bg-key{i:04} missing after rotation-triggered background checkpoint" + ); + } + + Ok(()) +} + #[test] fn test_progressive_rebuild_ignores_bogus_checkpoint_offset() -> Result<(), Error> { let dir = tempdir().unwrap(); From 4f80d7f5cd08689397d32d033347a9a33114b808 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Tue, 31 Mar 2026 10:06:05 +0300 Subject: [PATCH 17/25] Refactoring --- src/store/checkpoint.rs | 301 +++++++++++++++++++++++----------------- src/store/compaction.rs | 282 +++++++++++++++++++++---------------- src/store/list.rs | 12 +- src/store/queue.rs | 12 +- 4 files changed, 341 insertions(+), 266 deletions(-) diff --git a/src/store/checkpoint.rs b/src/store/checkpoint.rs index c1b95f6..c0e9d01 100644 --- a/src/store/checkpoint.rs +++ b/src/store/checkpoint.rs @@ -1,10 +1,10 @@ use std::sync::Arc; use std::sync::atomic::Ordering; -use std::time::Instant; +use std::time::{Duration, Instant}; use crate::types::{Error, Result}; -use super::{CandyStore, CheckpointFailure, StoreInner}; +use super::{CandyStore, CheckpointFailure, CheckpointSnapshot, StoreInner}; /// RAII guard that marks the checkpoint worker as shut down and wakes all /// waiters when the worker thread exits — whether it returns normally or @@ -24,6 +24,17 @@ impl Drop for WorkerShutdownGuard<'_> { } } +enum CheckpointRun { + Shutdown, + Idle { + reset_timer: bool, + }, + Ready { + target_epoch: Option, + snapshot: Result, + }, +} + impl StoreInner { fn request_checkpoint_epoch_locked(state: &mut super::CheckpointState) -> u64 { state.requested_epoch = state @@ -108,147 +119,183 @@ impl StoreInner { } } - fn run_checkpoint_worker(self: &Arc) { - let _shutdown_guard = WorkerShutdownGuard { inner: self }; - let interval = self.config.checkpoint_interval; - let threshold = self.config.checkpoint_delta_bytes.map(|value| value as u64); - let mut last_checkpoint_at = Instant::now(); - + fn wait_for_checkpoint_trigger( + &self, + interval: Option, + last_checkpoint_at: Instant, + ) -> Option { + let mut state = self.checkpoint_state.lock(); loop { - let mut interval_elapsed = false; - { - let mut state = self.checkpoint_state.lock(); - loop { - if self.checkpoint_shutting_down.load(Ordering::Acquire) { - self.checkpoint_condvar.notify_all(); - return; - } + if self.checkpoint_shutting_down.load(Ordering::Acquire) { + self.checkpoint_condvar.notify_all(); + return None; + } - if state.handled_epoch < state.requested_epoch { - break; - } + if state.handled_epoch < state.requested_epoch { + return Some(false); + } - if let Some(interval) = interval { - let remaining = interval.saturating_sub(last_checkpoint_at.elapsed()); - if remaining.is_zero() { - interval_elapsed = true; - break; - } - let wait_result = self.checkpoint_condvar.wait_for(&mut state, remaining); - if wait_result.timed_out() { - interval_elapsed = true; - break; - } - } else { - self.checkpoint_condvar.wait(&mut state); - } + if let Some(interval) = interval { + let remaining = interval.saturating_sub(last_checkpoint_at.elapsed()); + if remaining.is_zero() { + return Some(true); } + let wait_result = self.checkpoint_condvar.wait_for(&mut state, remaining); + if wait_result.timed_out() { + return Some(true); + } + } else { + self.checkpoint_condvar.wait(&mut state); } + } + } - // Acquire all logical locks so compound list/queue operations - // are quiesced, then snapshot the checkpoint progress. Release - // the logical locks *before* the expensive fsync phase so writers - // are only blocked for the snapshot, not for the I/O. - let (target_epoch, snapshot) = { - let _logical_guards = self - .list_meta_locks - .iter() - .map(|lock| lock.write()) - .collect::>(); - let mut state = self.checkpoint_state.lock(); - if self.checkpoint_shutting_down.load(Ordering::Acquire) { - self.checkpoint_condvar.notify_all(); - return; - } + fn complete_checkpoint_noop( + &self, + state: &mut super::CheckpointState, + target_epoch: Option, + ) { + if let Some(target_epoch) = target_epoch { + state.handled_epoch = target_epoch; + state.completed_epoch = target_epoch; + state.last_checkpoint_dur_ms = 0; + if state.last_failure_epoch <= state.completed_epoch { + state.last_failure_epoch = 0; + state.last_failure = None; + } + self.checkpoint_condvar.notify_all(); + } + } - let target_epoch = - (state.handled_epoch < state.requested_epoch).then_some(state.requested_epoch); - if target_epoch.is_none() && !interval_elapsed { - continue; + fn prepare_checkpoint_run(&self, interval_elapsed: bool) -> CheckpointRun { + let _logical_guards = self + .list_meta_locks + .iter() + .map(|lock| lock.write()) + .collect::>(); + let mut state = self.checkpoint_state.lock(); + if self.checkpoint_shutting_down.load(Ordering::Acquire) { + self.checkpoint_condvar.notify_all(); + return CheckpointRun::Shutdown; + } + + let target_epoch = + (state.handled_epoch < state.requested_epoch).then_some(state.requested_epoch); + if target_epoch.is_none() && !interval_elapsed { + return CheckpointRun::Idle { reset_timer: false }; + } + + let current_cursor = self.index_file.checkpoint_cursor(); + match self.snapshot_checkpoint_progress() { + Ok(snapshot) + if snapshot.checkpoint_ordinal == current_cursor.0 + && snapshot.checkpoint_offset == current_cursor.1 + && snapshot.checkpointed_delta == 0 => + { + self.complete_checkpoint_noop(&mut state, target_epoch); + CheckpointRun::Idle { reset_timer: true } + } + Ok(snapshot) => CheckpointRun::Ready { + target_epoch, + snapshot: Ok(snapshot), + }, + Err(err) => CheckpointRun::Ready { + target_epoch, + snapshot: Err(err), + }, + } + } + + fn checkpoint_needs_follow_up(&self, threshold: u64, snapshot: CheckpointSnapshot) -> bool { + let active_idx = self.active_file_idx.load(Ordering::Acquire); + let files = self.data_files.read(); + match files.get(&active_idx) { + Some(active_file) => { + active_file.file_ordinal == snapshot.checkpoint_ordinal + && active_file + .used_bytes() + .saturating_sub(snapshot.checkpoint_offset) + >= threshold + } + None => false, + } + } + + fn finish_checkpoint_run( + &self, + target_epoch: Option, + snapshot: Result, + threshold: Option, + started_at: Instant, + ) { + let snapshot_for_follow_up = snapshot.as_ref().ok().copied(); + let result = snapshot.and_then(|snap| self.sync_checkpoint(snap)); + + let mut state = self.checkpoint_state.lock(); + match result { + Ok(()) => { + state.last_checkpoint_dur_ms = + u64::try_from(started_at.elapsed().as_millis()).unwrap_or(u64::MAX); + if let Some(target_epoch) = target_epoch { + state.handled_epoch = state.handled_epoch.max(target_epoch); + state.completed_epoch = state.completed_epoch.max(target_epoch); + } + if state.last_failure_epoch <= state.completed_epoch { + state.last_failure_epoch = 0; + state.last_failure = None; } - let current_cursor = self.index_file.checkpoint_cursor(); - let snapshot = match self.snapshot_checkpoint_progress() { - Ok(snapshot) => { - let snapshot_is_noop = snapshot.checkpoint_ordinal == current_cursor.0 - && snapshot.checkpoint_offset == current_cursor.1 - && snapshot.checkpointed_delta == 0; - if snapshot_is_noop { - if let Some(target_epoch) = target_epoch { - state.handled_epoch = target_epoch; - state.completed_epoch = target_epoch; - state.last_checkpoint_dur_ms = 0; - if state.last_failure_epoch <= state.completed_epoch { - state.last_failure_epoch = 0; - state.last_failure = None; - } - self.checkpoint_condvar.notify_all(); - } - last_checkpoint_at = Instant::now(); - continue; - } - Ok(snapshot) + let should_request_follow_up = match (threshold, snapshot_for_follow_up) { + (Some(threshold), Some(snapshot)) => { + self.checkpoint_needs_follow_up(threshold, snapshot) } - Err(e) => Err(e), + _ => false, }; + if should_request_follow_up && state.handled_epoch >= state.requested_epoch { + Self::request_checkpoint_epoch_locked(&mut state); + } + } + Err(err) => { + self.stats.checkpoint_errors.fetch_add(1, Ordering::Relaxed); + let failure_epoch = target_epoch + .unwrap_or_else(|| Self::request_checkpoint_epoch_locked(&mut state)); + state.handled_epoch = state.handled_epoch.max(failure_epoch); + state.last_failure_epoch = failure_epoch; + state.last_failure = Some(CheckpointFailure::from_error(err)); + } + } + self.checkpoint_condvar.notify_all(); + } - drop(state); - // _logical_guards dropped here - (target_epoch, snapshot) - }; + fn run_checkpoint_worker(self: &Arc) { + let _shutdown_guard = WorkerShutdownGuard { inner: self }; + let interval = self.config.checkpoint_interval; + let threshold = self.config.checkpoint_delta_bytes.map(|value| value as u64); + let mut last_checkpoint_at = Instant::now(); - let started_at = Instant::now(); - let snapshot_for_follow_up = snapshot.as_ref().ok().copied(); - let result = snapshot.and_then(|snap| self.sync_checkpoint(snap)); - - let mut state = self.checkpoint_state.lock(); - match result { - Ok(()) => { - state.last_checkpoint_dur_ms = - u64::try_from(started_at.elapsed().as_millis()).unwrap_or(u64::MAX); - if let Some(target_epoch) = target_epoch { - state.handled_epoch = state.handled_epoch.max(target_epoch); - state.completed_epoch = state.completed_epoch.max(target_epoch); - } - if state.last_failure_epoch <= state.completed_epoch { - state.last_failure_epoch = 0; - state.last_failure = None; - } + loop { + let Some(interval_elapsed) = + self.wait_for_checkpoint_trigger(interval, last_checkpoint_at) + else { + return; + }; - let should_request_follow_up = match (threshold, snapshot_for_follow_up) { - (Some(threshold), Some(snapshot)) => { - let active_idx = self.active_file_idx.load(Ordering::Acquire); - let files = self.data_files.read(); - match files.get(&active_idx) { - Some(active_file) => { - active_file.file_ordinal == snapshot.checkpoint_ordinal - && active_file - .used_bytes() - .saturating_sub(snapshot.checkpoint_offset) - >= threshold - } - None => false, - } - } - _ => false, - }; - if should_request_follow_up && state.handled_epoch >= state.requested_epoch { - Self::request_checkpoint_epoch_locked(&mut state); + let (target_epoch, snapshot) = match self.prepare_checkpoint_run(interval_elapsed) { + CheckpointRun::Shutdown => return, + CheckpointRun::Idle { reset_timer } => { + if reset_timer { + last_checkpoint_at = Instant::now(); } + continue; } - Err(err) => { - self.stats.checkpoint_errors.fetch_add(1, Ordering::Relaxed); - // For interval-only failures (target_epoch is None), allocate - // a synthetic epoch so last_failure_epoch is set and the error - // is observable rather than silently cleared. - let failure_epoch = target_epoch - .unwrap_or_else(|| Self::request_checkpoint_epoch_locked(&mut state)); - state.handled_epoch = state.handled_epoch.max(failure_epoch); - state.last_failure_epoch = failure_epoch; - state.last_failure = Some(CheckpointFailure::from_error(err)); - } - } - self.checkpoint_condvar.notify_all(); + CheckpointRun::Ready { + target_epoch, + snapshot, + } => (target_epoch, snapshot), + }; + + let started_at = Instant::now(); + self.finish_checkpoint_run(target_epoch, snapshot, threshold, started_at); last_checkpoint_at = Instant::now(); } } diff --git a/src/store/compaction.rs b/src/store/compaction.rs index c92d2a2..f24178d 100644 --- a/src/store/compaction.rs +++ b/src/store/compaction.rs @@ -16,24 +16,33 @@ pub(super) struct CompactionOutcome { pub(super) moved_bytes: u32, } +type CompactionSources = Vec<(u16, Arc)>; + +struct CompactionEntry { + row_idx: usize, + col: usize, + entry: EntryPointer, + source_file: Arc, +} + +type CompactionRowSnapshot = Vec; + impl StoreInner { - pub(super) fn compact_files( - &self, - candidates: &[(u16, u64)], - pacer: &mut Pacer, - #[cfg(windows)] pending_deletions: &mut Vec, - ) -> Result { - if candidates.is_empty() { - return Ok(CompactionOutcome { - compacted_files: 0, - reclaimed_bytes: 0, - moved_bytes: 0, - }); + fn empty_compaction_outcome() -> CompactionOutcome { + CompactionOutcome { + compacted_files: 0, + reclaimed_bytes: 0, + moved_bytes: 0, } + } - let active_file_idx = self.active_file_idx.load(Ordering::Acquire); + fn collect_compaction_sources( + &self, + candidates: &[(u16, u64)], + active_file_idx: u16, + ) -> CompactionSources { let files = self.data_files.read(); - let sources = candidates + candidates .iter() .filter_map(|&(file_idx, expected_ordinal)| { if file_idx == active_file_idx { @@ -47,127 +56,162 @@ impl StoreInner { Some((file_idx, data_file)) }) - .collect::>(); - drop(files); + .collect() + } - if sources.is_empty() { - return Ok(CompactionOutcome { - compacted_files: 0, - reclaimed_bytes: 0, - moved_bytes: 0, - }); + fn snapshot_compaction_row( + &self, + row_idx: usize, + sources: &CompactionSources, + ) -> Option { + let rows = self.index_file.rows_table(); + let active_rows = self.index_file.num_rows(); + if row_idx >= active_rows { + return None; } - let mut moved_bytes = 0u64; - let mut read_buf = Vec::new(); + let row = rows.row(row_idx); + Some( + row.pointers + .iter() + .enumerate() + .filter_map(|(col, &entry)| { + if !entry.is_valid() { + return None; + } + let (_, source_file) = + sources.iter().find(|(idx, _)| *idx == entry.file_idx())?; + Some(CompactionEntry { + row_idx, + col, + entry, + source_file: source_file.clone(), + }) + }) + .collect(), + ) + } - let mut row_idx = 0; + fn rewrite_compacted_entry( + &self, + task: &CompactionEntry, + ns: KeyNamespace, + key: &[u8], + value: &[u8], + moved_bytes: &mut u64, + ) -> Result<()> { + let mut rotate_idx_req = None; loop { - if self.compaction_shutting_down.load(Ordering::Acquire) { - return Ok(CompactionOutcome { - compacted_files: 0, - reclaimed_bytes: 0, - moved_bytes: 0, - }); + if let Some(rotate_idx) = rotate_idx_req.take() { + self._rotate_data_file(rotate_idx)?; } - // Snapshot one row at a time, then drop the rows table read lock before any I/O. - let snapshot: Vec<(usize, EntryPointer, Arc)> = { - let rows = self.index_file.rows_table(); - let active_rows = self.index_file.num_rows(); - if row_idx >= active_rows { - break; + let rows = self.index_file.rows_table(); + let active_rows = self.index_file.num_rows(); + if task.row_idx >= active_rows { + return Ok(()); + } + + let mut row = rows.row_mut(task.row_idx); + if row.pointers[task.col] != task.entry { + return Ok(()); + } + + let active_idx = self.active_file_idx.load(Ordering::Acquire); + let active_file = self + .data_files + .read() + .get(&active_idx) + .cloned() + .ok_or(Error::MissingDataFile(active_idx))?; + + match active_file.append_kv( + crate::internal::EntryType::Update, + ns, + key, + value, + row.shard_idx, + &self.inflight_tracker, + ) { + Ok((file_off, size, inflight_guard)) => { + self.record_write(file_off, size as u64); + *moved_bytes = moved_bytes.saturating_add(size as u64); + row.replace_pointer( + task.col, + EntryPointer::new( + active_idx, + file_off, + size, + task.entry.masked_row_selector(), + ), + ); + inflight_guard.complete(); + return Ok(()); + } + Err(Error::RotateDataFile(rotate_idx)) => { + drop(row); + rotate_idx_req = Some(rotate_idx); } + Err(err) => return Err(err), + } + } + } - let row = rows.row(row_idx); - row.pointers - .iter() - .enumerate() - .filter_map(|(col, &entry)| { - if !entry.is_valid() { - return None; - } - let (_, source_file) = - sources.iter().find(|(idx, _)| *idx == entry.file_idx())?; - Some((col, entry, source_file.clone())) - }) - .collect() - }; + fn compact_snapshot_entry( + &self, + task: CompactionEntry, + pacer: &mut Pacer, + read_buf: &mut Vec, + moved_bytes: &mut u64, + ) -> Result<()> { + self.record_read(task.entry.size_hint() as u64); + pacer.consume(task.entry.size_hint() as u64); + + let kv = task.source_file.read_kv_into( + task.entry.file_offset(), + task.entry.size_hint(), + read_buf, + )?; - for (col, entry, source_file) in &snapshot { - self.record_read(entry.size_hint() as u64); - pacer.consume(entry.size_hint() as u64); + let Some(ns) = KeyNamespace::from_u8(kv.ns) else { + return Err(invalid_data_error("unknown key namespace in data file")); + }; - let kv = source_file.read_kv_into( - entry.file_offset(), - entry.size_hint(), - &mut read_buf, - )?; + self.rewrite_compacted_entry(&task, ns, kv.key(), kv.value(), moved_bytes) + } - let Some(ns) = KeyNamespace::from_u8(kv.ns) else { - return Err(invalid_data_error("unknown key namespace in data file")); - }; + pub(super) fn compact_files( + &self, + candidates: &[(u16, u64)], + pacer: &mut Pacer, + #[cfg(windows)] pending_deletions: &mut Vec, + ) -> Result { + if candidates.is_empty() { + return Ok(Self::empty_compaction_outcome()); + } - // re-acquire the write lock and verify the pointer hasn't been moved - // by a concurrent set/remove before appending + replacing - let mut rotate_idx_req = None; - loop { - if let Some(rotate_idx) = rotate_idx_req.take() { - self._rotate_data_file(rotate_idx)?; - } + let active_file_idx = self.active_file_idx.load(Ordering::Acquire); + let sources = self.collect_compaction_sources(candidates, active_file_idx); - let rows = self.index_file.rows_table(); - let active_rows = self.index_file.num_rows(); - if row_idx >= active_rows { - break; - } + if sources.is_empty() { + return Ok(Self::empty_compaction_outcome()); + } - let mut row = rows.row_mut(row_idx); - if row.pointers[*col] != *entry { - // a concurrent op already moved/removed this entry -- skip it - break; - } + let mut moved_bytes = 0u64; + let mut read_buf = Vec::new(); - let active_idx = self.active_file_idx.load(Ordering::Acquire); - let active_file = self - .data_files - .read() - .get(&active_idx) - .cloned() - .ok_or(Error::MissingDataFile(active_idx))?; - - match active_file.append_kv( - crate::internal::EntryType::Update, - ns, - kv.key(), - kv.value(), - row.shard_idx, - &self.inflight_tracker, - ) { - Ok((file_off, size, inflight_guard)) => { - self.record_write(file_off, size as u64); - moved_bytes = moved_bytes.saturating_add(size as u64); - row.replace_pointer( - *col, - EntryPointer::new( - active_idx, - file_off, - size, - entry.masked_row_selector(), - ), - ); - inflight_guard.complete(); - break; - } - Err(Error::RotateDataFile(rotate_idx)) => { - drop(row); - rotate_idx_req = Some(rotate_idx); - } - Err(err) => { - return Err(err); - } - } - } + let mut row_idx = 0; + loop { + if self.compaction_shutting_down.load(Ordering::Acquire) { + return Ok(Self::empty_compaction_outcome()); + } + + let Some(snapshot) = self.snapshot_compaction_row(row_idx, &sources) else { + break; + }; + + for task in snapshot { + self.compact_snapshot_entry(task, pacer, &mut read_buf, &mut moved_bytes)?; } row_idx += 1; diff --git a/src/store/list.rs b/src/store/list.rs index 934bfba..171af32 100644 --- a/src/store/list.rs +++ b/src/store/list.rs @@ -42,10 +42,6 @@ pub struct ListIterator<'a> { type ListMetadata = RangeMetadata; impl ListIterator<'_> { - fn heal_head(&self, new_head: u64) { - let _ = self.try_heal_head(new_head); - } - fn try_heal_head(&self, new_head: u64) -> Result<()> { self.store.try_heal_range_head( self.ns.meta, @@ -57,10 +53,6 @@ impl ListIterator<'_> { ) } - fn heal_tail(&self, new_tail: u64) { - let _ = self.try_heal_tail(new_tail); - } - fn try_heal_tail(&self, new_tail: u64) -> Result<()> { self.store.try_heal_range_tail( self.ns.meta, @@ -82,7 +74,7 @@ impl Iterator for ListIterator<'_> { self.next_idx += 1; if idx > self.initial_next_idx + 1000 { - self.heal_head(idx); + let _ = self.try_heal_head(idx); self.initial_next_idx = idx; } @@ -118,7 +110,7 @@ impl DoubleEndedIterator for ListIterator<'_> { } if idx + 1000 < self.initial_end_idx { - self.heal_tail(idx); + let _ = self.try_heal_tail(idx); self.initial_end_idx = idx; } diff --git a/src/store/queue.rs b/src/store/queue.rs index 683c856..18760c7 100644 --- a/src/store/queue.rs +++ b/src/store/queue.rs @@ -38,10 +38,6 @@ pub struct QueueIterator<'a> { type QueueMetadata = RangeMetadata; impl<'a> QueueIterator<'a> { - fn heal_head(&self, new_head: u64) { - let _ = self.try_heal_head(new_head); - } - fn try_heal_head(&self, new_head: u64) -> Result<()> { self.store.try_heal_range_head( self.ns.meta, @@ -53,10 +49,6 @@ impl<'a> QueueIterator<'a> { ) } - fn heal_tail(&self, new_tail: u64) { - let _ = self.try_heal_tail(new_tail); - } - fn try_heal_tail(&self, new_tail: u64) -> Result<()> { self.store.try_heal_range_tail( self.ns.meta, @@ -78,7 +70,7 @@ impl Iterator for QueueIterator<'_> { self.next_idx += 1; if idx > self.initial_next_idx + 1000 { - self.heal_head(idx); + let _ = self.try_heal_head(idx); self.initial_next_idx = idx; } @@ -104,7 +96,7 @@ impl DoubleEndedIterator for QueueIterator<'_> { } if idx + 1000 < self.initial_end_idx { - self.heal_tail(idx); + let _ = self.try_heal_tail(idx); self.initial_end_idx = idx; } From 8a80ddd73dcb64aaa05376f68b8820fe368870d4 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Tue, 31 Mar 2026 15:17:49 +0300 Subject: [PATCH 18/25] Move from databuf to postcard, a more popular serialization library --- Cargo.lock | 133 +++++++++++++++++++++++++++++++------------ Cargo.toml | 3 +- src/store/typed.rs | 139 +++++++++++++++++++++++---------------------- src/types.rs | 3 + 4 files changed, 171 insertions(+), 107 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b881db1..f826555 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,15 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "atomic-polyfill" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4" +dependencies = [ + "critical-section", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -41,19 +50,26 @@ version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "candystore" version = "0.7.0" dependencies = [ "crc16-ibm3740-fast", - "databuf", "fslock", "libc", "memmap2", "num_cpus", "parking_lot", + "postcard", "proptest", "rand 0.10.0", + "serde", "simd-itertools", "siphasher", "smallvec", @@ -80,6 +96,15 @@ dependencies = [ "rand_core 0.10.0", ] +[[package]] +name = "cobs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" +dependencies = [ + "thiserror", +] + [[package]] name = "core_detect" version = "1.0.0" @@ -112,32 +137,22 @@ dependencies = [ ] [[package]] -name = "databuf" -version = "0.5.0" +name = "critical-section" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1ad1d99bee317a8dac0b7cd86896c5a5f24307009292985dabbf3e412c8b9d" -dependencies = [ - "databuf-derive", -] +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" [[package]] -name = "databuf-derive" -version = "0.5.0" +name = "embedded-io" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04040c9fc8fcb4084222a26c99faf5b3014772a6115e076b7a50fe49bf25d0ea" -dependencies = [ - "databuf_derive_impl", -] +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" [[package]] -name = "databuf_derive_impl" -version = "0.2.3" +name = "embedded-io" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf656eb071fe87d23716f933788a35a8ad6baa6fdbf66a67a261dbd3f9dc81a" -dependencies = [ - "quote2", - "syn", -] +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" [[package]] name = "equivalent" @@ -209,6 +224,15 @@ dependencies = [ "wasip3", ] +[[package]] +name = "hash32" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -224,6 +248,20 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "heapless" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f" +dependencies = [ + "atomic-polyfill", + "hash32", + "rustc_version", + "serde", + "spin", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.5.0" @@ -388,6 +426,19 @@ dependencies = [ "windows-link", ] +[[package]] +name = "postcard" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "heapless", + "serde", +] + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -450,23 +501,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "quote2" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "970573b86f7e5795c8c6c50c56ef602368593f0687188da27fd489a59e253630" -dependencies = [ - "proc-macro2", - "quote", - "quote2-macros", -] - -[[package]] -name = "quote2-macros" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4b89c37b2d870a28629ad20da669bb0e7d7214878d0d5111b304aa466e1977" - [[package]] name = "r-efi" version = "5.3.0" @@ -549,6 +583,15 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustix" version = "1.1.4" @@ -599,6 +642,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", + "serde_derive", ] [[package]] @@ -655,6 +699,21 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + [[package]] name = "syn" version = "2.0.117" diff --git a/Cargo.toml b/Cargo.toml index d3a0c08..e7aa9d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,11 +9,12 @@ repository = "https://github.com/sweet-security/candystore" [dependencies] crc16-ibm3740-fast = "0.5.0" -databuf = "0.5.0" fslock = "0.2.1" memmap2 = "0.9.10" num_cpus = "1.17.0" parking_lot = "0.12.5" +postcard = { version = "1.1.3", features = ["use-std"] } +serde = { version = "1", features = ["derive"] } simd-itertools = "0.3.0" siphasher = "1.0.2" smallvec = { version = "1.15.1", features = ["write"] } diff --git a/src/store/typed.rs b/src/store/typed.rs index f8d1b94..7375745 100644 --- a/src/store/typed.rs +++ b/src/store/typed.rs @@ -1,6 +1,6 @@ use std::{borrow::Borrow, marker::PhantomData, ops::Range, sync::Arc}; -use databuf::{DecodeOwned, Encode, config::num::LE}; +use serde::{Serialize, de::DeserializeOwned}; use smallvec::SmallVec; use crate::{ @@ -36,7 +36,7 @@ const INLINE_TYPED_BUF_SIZE: usize = 128; type InlineBytes = SmallVec<[u8; INLINE_TYPED_BUF_SIZE]>; /// Marker trait for typed keys and collection identifiers used by the typed wrappers. -pub trait CandyTypedKey: Encode + DecodeOwned { +pub trait CandyTypedKey: Serialize + DeserializeOwned { const TYPE_ID: u32; } @@ -113,8 +113,8 @@ impl Clone for CandyTypedList { impl CandyTypedStore where - K: CandyTypedKey + Encode, - V: Encode + DecodeOwned, + K: CandyTypedKey + Serialize, + V: Serialize + DeserializeOwned, { /// Creates a typed key-value view over `store`. pub fn new(store: Arc) -> Self { @@ -124,7 +124,7 @@ where } } - fn make_key(key: &Q) -> InlineBytes + fn make_key(key: &Q) -> InlineBytes where K: Borrow, { @@ -132,7 +132,7 @@ where } /// Returns the decoded value for `key`, if present. - pub fn get(&self, key: &Q) -> Result> + pub fn get(&self, key: &Q) -> Result> where K: Borrow, { @@ -144,7 +144,7 @@ where } /// Inserts or replaces `key` with `val`. - pub fn set( + pub fn set( &self, key: &Q1, val: &Q2, @@ -162,7 +162,7 @@ where } /// Removes `key` and returns its previous decoded value if it existed. - pub fn remove(&self, key: &Q) -> Result> + pub fn remove(&self, key: &Q) -> Result> where K: Borrow, { @@ -174,7 +174,7 @@ where } /// Returns `true` if `key` currently exists. - pub fn contains(&self, key: &Q) -> Result + pub fn contains(&self, key: &Q) -> Result where K: Borrow, { @@ -185,7 +185,7 @@ where } /// Returns the current value for `key`, or inserts and returns `val` if the key is missing. - pub fn get_or_create( + pub fn get_or_create( &self, key: &Q1, val: &Q2, @@ -206,7 +206,7 @@ where } /// Replaces `key` with `val` only if the current value matches `expected_val` when provided. - pub fn replace( + pub fn replace( &self, key: &Q1, val: &Q2, @@ -231,7 +231,7 @@ where } /// Stores a large typed value under `key`. - pub fn set_big( + pub fn set_big( &self, key: &Q1, val: &Q2, @@ -253,7 +253,7 @@ where } /// Loads a large typed value previously stored with [`CandyTypedStore::set_big`]. - pub fn get_big(&self, key: &Q) -> Result> + pub fn get_big(&self, key: &Q) -> Result> where K: Borrow, { @@ -271,7 +271,7 @@ where } /// Removes a large typed value previously stored with [`CandyTypedStore::set_big`]. - pub fn remove_big(&self, key: &Q) -> Result + pub fn remove_big(&self, key: &Q) -> Result where K: Borrow, { @@ -288,8 +288,8 @@ where impl CandyTypedDeque where - L: CandyTypedKey + Encode, - V: Encode + DecodeOwned, + L: CandyTypedKey + Serialize, + V: Serialize + DeserializeOwned, { /// Creates a typed queue view over `store`. pub fn new(store: Arc) -> Self { @@ -299,7 +299,7 @@ where } } - fn make_queue_key(queue_key: &Q) -> InlineBytes + fn make_queue_key(queue_key: &Q) -> InlineBytes where L: Borrow, { @@ -307,7 +307,7 @@ where } /// Pushes `val` to the tail of `queue_key`. - pub fn push_tail( + pub fn push_tail( &self, queue_key: &Q, val: &QV, @@ -324,7 +324,7 @@ where } /// Pushes `val` to the head of `queue_key`. - pub fn push_head( + pub fn push_head( &self, queue_key: &Q, val: &QV, @@ -341,7 +341,10 @@ where } /// Removes and returns the head item of `queue_key` together with its logical index. - pub fn pop_head_with_idx(&self, queue_key: &Q) -> Result> + pub fn pop_head_with_idx( + &self, + queue_key: &Q, + ) -> Result> where L: Borrow, { @@ -355,7 +358,7 @@ where } /// Removes and returns the head value of `queue_key`. - pub fn pop_head(&self, queue_key: &Q) -> Result> + pub fn pop_head(&self, queue_key: &Q) -> Result> where L: Borrow, { @@ -363,7 +366,10 @@ where } /// Removes and returns the tail item of `queue_key` together with its logical index. - pub fn pop_tail_with_idx(&self, queue_key: &Q) -> Result> + pub fn pop_tail_with_idx( + &self, + queue_key: &Q, + ) -> Result> where L: Borrow, { @@ -377,7 +383,7 @@ where } /// Removes and returns the tail value of `queue_key`. - pub fn pop_tail(&self, queue_key: &Q) -> Result> + pub fn pop_tail(&self, queue_key: &Q) -> Result> where L: Borrow, { @@ -385,7 +391,7 @@ where } /// Returns the head item of `queue_key` and its logical index without removing it. - pub fn peek_head_with_idx( + pub fn peek_head_with_idx( &self, queue_key: &Q, ) -> Result> @@ -402,7 +408,7 @@ where } /// Returns the head value of `queue_key` without removing it. - pub fn peek_head(&self, queue_key: &Q) -> Result> + pub fn peek_head(&self, queue_key: &Q) -> Result> where L: Borrow, { @@ -410,7 +416,7 @@ where } /// Returns the tail item of `queue_key` and its logical index without removing it. - pub fn peek_tail_with_idx( + pub fn peek_tail_with_idx( &self, queue_key: &Q, ) -> Result> @@ -427,7 +433,7 @@ where } /// Returns the tail value of `queue_key` without removing it. - pub fn peek_tail(&self, queue_key: &Q) -> Result> + pub fn peek_tail(&self, queue_key: &Q) -> Result> where L: Borrow, { @@ -435,7 +441,7 @@ where } /// Returns the number of live items in `queue_key`. - pub fn len(&self, queue_key: &Q) -> Result + pub fn len(&self, queue_key: &Q) -> Result where L: Borrow, { @@ -446,7 +452,7 @@ where } /// Returns the current inclusive-exclusive logical index span for `queue_key`. - pub fn range(&self, queue_key: &Q) -> Result> + pub fn range(&self, queue_key: &Q) -> Result> where L: Borrow, { @@ -455,7 +461,7 @@ where } /// Returns `true` when `queue_key` has no live items. - pub fn is_empty(&self, queue_key: &Q) -> Result + pub fn is_empty(&self, queue_key: &Q) -> Result where L: Borrow, { @@ -466,7 +472,7 @@ where } /// Removes all items from `queue_key`. - pub fn discard(&self, queue_key: &Q) -> Result + pub fn discard(&self, queue_key: &Q) -> Result where L: Borrow, { @@ -475,7 +481,7 @@ where } /// Iterates over live items in `queue_key` from head to tail. - pub fn iter<'a, Q: ?Sized + Encode>( + pub fn iter<'a, Q: ?Sized + Serialize>( &'a self, queue_key: &Q, ) -> impl DoubleEndedIterator> + 'a @@ -495,9 +501,9 @@ where impl CandyTypedList where - L: CandyTypedKey + Encode, - K: Encode + DecodeOwned, - V: Encode + DecodeOwned, + L: CandyTypedKey + Serialize, + K: Serialize + DeserializeOwned, + V: Serialize + DeserializeOwned, { /// Creates a typed ordered-map/list view over `store`. pub fn new(store: Arc) -> Self { @@ -507,14 +513,14 @@ where } } - fn make_list_key(list_key: &Q) -> InlineBytes + fn make_list_key(list_key: &Q) -> InlineBytes where L: Borrow, { append_type_id(encode_to_smallvec(list_key), L::TYPE_ID) } - fn make_item_key(item_key: &Q) -> InlineBytes + fn make_item_key(item_key: &Q) -> InlineBytes where K: Borrow, { @@ -522,7 +528,7 @@ where } /// Returns `true` if `item_key` exists in `list_key`. - pub fn contains( + pub fn contains( &self, list_key: &Q1, item_key: &Q2, @@ -535,7 +541,7 @@ where } /// Inserts or replaces `item_key` in `list_key`, placing it at the logical tail. - pub fn set( + pub fn set( &self, list_key: &Q1, item_key: &Q2, @@ -556,7 +562,7 @@ where } /// Returns the current value for `item_key`, or inserts `default_val` if it is missing. - pub fn get_or_create( + pub fn get_or_create( &self, list_key: &Q1, item_key: &Q2, @@ -580,10 +586,10 @@ where /// Replaces `item_key` only if its current value matches `expected_val` when provided. pub fn replace< - Q1: ?Sized + Encode, - Q2: ?Sized + Encode, - Q3: ?Sized + Encode, - Q4: ?Sized + Encode, + Q1: ?Sized + Serialize, + Q2: ?Sized + Serialize, + Q3: ?Sized + Serialize, + Q4: ?Sized + Serialize, >( &self, list_key: &Q1, @@ -613,7 +619,7 @@ where } /// Returns the decoded value for `item_key`, if present. - pub fn get( + pub fn get( &self, list_key: &Q1, item_key: &Q2, @@ -631,7 +637,7 @@ where } /// Removes `item_key` and returns its previous decoded value if it existed. - pub fn remove( + pub fn remove( &self, list_key: &Q1, item_key: &Q2, @@ -649,7 +655,7 @@ where } /// Returns the number of live items in `list_key`. - pub fn len(&self, list_key: &Q) -> Result + pub fn len(&self, list_key: &Q) -> Result where L: Borrow, { @@ -658,7 +664,7 @@ where } /// Returns the current inclusive-exclusive logical span for `list_key`. - pub fn range(&self, list_key: &Q) -> Result> + pub fn range(&self, list_key: &Q) -> Result> where L: Borrow, { @@ -667,7 +673,7 @@ where } /// Returns `true` when `list_key` has no live items. - pub fn is_empty(&self, list_key: &Q) -> Result + pub fn is_empty(&self, list_key: &Q) -> Result where L: Borrow, { @@ -675,7 +681,7 @@ where } /// Removes all items from `list_key`. - pub fn discard(&self, list_key: &Q) -> Result + pub fn discard(&self, list_key: &Q) -> Result where L: Borrow, { @@ -684,7 +690,7 @@ where } /// Compacts `list_key` when `params` indicate enough holes exist to justify rewriting it. - pub fn compact_if_needed( + pub fn compact_if_needed( &self, list_key: &Q, params: ListCompactionParams, @@ -698,7 +704,7 @@ where } /// Inserts or replaces `item_key`, moving it to the logical tail and returning the previous value when present. - pub fn set_promoting( + pub fn set_promoting( &self, list_key: &Q1, item_key: &Q2, @@ -719,7 +725,7 @@ where } /// Iterates over live items in `list_key` from head to tail. - pub fn iter<'a, Q: ?Sized + Encode>( + pub fn iter<'a, Q: ?Sized + Serialize>( &'a self, list_key: &Q, ) -> impl DoubleEndedIterator> + 'a @@ -740,7 +746,7 @@ where } /// Removes and returns the tail item of `list_key`. - pub fn pop_tail(&self, list_key: &Q) -> Result> + pub fn pop_tail(&self, list_key: &Q) -> Result> where L: Borrow, { @@ -755,7 +761,7 @@ where } /// Removes and returns the head item of `list_key`. - pub fn pop_head(&self, list_key: &Q) -> Result> + pub fn pop_head(&self, list_key: &Q) -> Result> where L: Borrow, { @@ -770,7 +776,7 @@ where } /// Returns the tail item of `list_key` without removing it. - pub fn peek_tail(&self, list_key: &Q) -> Result> + pub fn peek_tail(&self, list_key: &Q) -> Result> where L: Borrow, { @@ -783,7 +789,7 @@ where } /// Returns the head item of `list_key` without removing it. - pub fn peek_head(&self, list_key: &Q) -> Result> + pub fn peek_head(&self, list_key: &Q) -> Result> where L: Borrow, { @@ -795,7 +801,7 @@ where } /// Retains only items for which `func` returns `true`, preserving list order. - pub fn retain( + pub fn retain( &self, list_key: &Q, mut func: impl FnMut(&K, &V) -> Result, @@ -813,19 +819,14 @@ where } } -fn decode_from_bytes(bytes: &[u8]) -> Result { - T::from_bytes::(bytes).map_err(|err| { - Error::IOError(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("decode error: {err}"), - )) - }) +fn decode_from_bytes(bytes: &[u8]) -> Result { + postcard::from_bytes(bytes).map_err(Error::PostcardError) } -fn encode_to_smallvec(value: &T) -> InlineBytes { - let mut bytes = InlineBytes::new(); - value.encode::(&mut bytes).unwrap(); - bytes +fn encode_to_smallvec(value: &T) -> InlineBytes { + let mut buf = InlineBytes::new(); + postcard::to_io(value, &mut buf).unwrap(); + buf } fn append_type_id(mut bytes: InlineBytes, type_id: u32) -> InlineBytes { diff --git a/src/types.rs b/src/types.rs index f90b7fd..b7448da 100644 --- a/src/types.rs +++ b/src/types.rs @@ -86,6 +86,9 @@ pub enum Error { #[error("Checkpoint shutdown: {0}")] CheckpointShutdown(String), + + #[error("Postcard error: {0}")] + PostcardError(postcard::Error), } /// Convenience result type used by the crate. From 5191e9a6bfb5fe9f72ebf85998ca48947dcc1ec0 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Fri, 3 Apr 2026 13:15:11 +0300 Subject: [PATCH 19/25] Optimizations: set_len to max file size to save up on metadata updates; sync_file_range instead of all file --- Cargo.toml | 2 +- src/data_file.rs | 143 ++++++++++++++++++-- src/internal.rs | 44 ++++++ src/store.rs | 12 +- src/store/checkpoint.rs | 7 + src/store/compaction.rs | 2 +- src/store/recovery.rs | 8 +- tests/common/mod.rs | 29 ++++ tests/data_loss.rs | 14 +- tests/proptest_state_machine.rs | 43 ++++-- tests/recovery.rs | 233 ++++++++++++++++++++++++++++++-- tests/rotation.rs | 22 +++ 12 files changed, 504 insertions(+), 55 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e7aa9d2..739ed2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ repository = "https://github.com/sweet-security/candystore" [dependencies] crc16-ibm3740-fast = "0.5.0" fslock = "0.2.1" +libc = "0.2.183" memmap2 = "0.9.10" num_cpus = "1.17.0" parking_lot = "0.12.5" @@ -26,7 +27,6 @@ zerocopy = { version = "0.8.47", features = ["derive"] } proptest = "1.10.0" tempfile = "3" rand = "0.10.0" -libc = "0.2.183" [features] whitebox-testing = [] diff --git a/src/data_file.rs b/src/data_file.rs index 7bb646e..2c43776 100644 --- a/src/data_file.rs +++ b/src/data_file.rs @@ -17,7 +17,7 @@ use crate::internal::{ DATA_ENTRY_OFFSET_MAGIC, DATA_ENTRY_OFFSET_MASK, DATA_FILE_SIGNATURE, DATA_FILE_VERSION, EntryType, FILE_OFFSET_ALIGNMENT, KEY_NAMESPACE_BITS, KVBuf, KVRef, KeyNamespace, MAX_KEY_NAMESPACE, PAGE_SIZE, READ_BUFFER_SIZE, SIZE_HINT_UNIT, data_file_path, - invalid_data_error, read_available_at, read_into_at, sync_dir, write_all_at, + invalid_data_error, read_available_at, read_into_at, sync_dir, sync_file_range, write_all_at, }; use crate::types::{Config, Error, MAX_USER_KEY_SIZE, MAX_USER_VALUE_SIZE, Result}; @@ -197,10 +197,13 @@ impl Drop for InflightGuard<'_> { pub(crate) struct DataFile { pub(crate) file: File, file_offset: AtomicU64, + last_synced_offset: AtomicU64, sealed_for_rotation: AtomicBool, config: Arc, pub(crate) file_idx: u16, pub(crate) file_ordinal: u64, + preallocated: bool, + recovery_tail_upper_bound: u64, } impl DataFile { @@ -208,13 +211,110 @@ impl DataFile { self.file_offset.load(Ordering::Acquire) } + pub(crate) fn recovery_tail_upper_bound(&self) -> u64 { + self.recovery_tail_upper_bound + } + + pub(crate) fn sync_data(&self, start_offset: u64, end_offset: u64) -> Result<()> { + let used_bytes = self.used_bytes(); + let start_offset = start_offset.min(used_bytes); + let end_offset = end_offset.min(used_bytes); + if end_offset <= start_offset { + return Ok(()); + } + + if !self.preallocated { + self.file.sync_all().map_err(Error::IOError)?; + } else { + sync_file_range( + &self.file, + size_of::() as u64 + start_offset, + end_offset - start_offset, + )?; + } + self.last_synced_offset + .fetch_max(end_offset, Ordering::Release); + Ok(()) + } + + pub(crate) fn sync_to_current(&self) -> Result<()> { + let start = self.last_synced_offset.load(Ordering::Acquire); + self.sync_data(start, self.used_bytes()) + } + pub(crate) fn truncate_to_offset(&self, file_offset: u64) -> Result<()> { debug_assert_eq!(file_offset % FILE_OFFSET_ALIGNMENT, 0); - self.file - .set_len(size_of::() as u64 + file_offset) - .map_err(Error::IOError)?; + if self.preallocated { + // A crash between the two set_len calls would leave the file + // non-preallocated. That is harmless: the next open will + // detect it as non-preallocated and fall back to sync_all + // until rotation creates a fresh preallocated file. + self.file + .set_len(size_of::() as u64 + file_offset) + .map_err(Error::IOError)?; + self.file + .set_len(size_of::() as u64 + self.config.max_data_file_size as u64) + .map_err(Error::IOError)?; + } else { + self.file + .set_len(size_of::() as u64 + file_offset) + .map_err(Error::IOError)?; + } self.file_offset.store(file_offset, Ordering::Release); - self.file.sync_all().map_err(Error::IOError) + self.file.sync_all().map_err(Error::IOError)?; + self.last_synced_offset + .store(file_offset, Ordering::Release); + Ok(()) + } + + fn used_data_upper_bound(file: &File, physical_data_len: u64) -> Result { + if physical_data_len == 0 { + return Ok(0); + } + + let mut end = physical_data_len; + while end > 0 { + let start = end.saturating_sub(READ_BUFFER_SIZE as u64); + let chunk = read_available_at( + file, + (end - start) as usize, + size_of::() as u64 + start, + ) + .map_err(Error::IOError)?; + if let Some(rel) = chunk.iter().rposition(|byte| *byte != 0) { + let aligned = (start + rel as u64 + 1).next_multiple_of(FILE_OFFSET_ALIGNMENT); + return Ok(aligned.min(physical_data_len)); + } + end = start; + } + + Ok(0) + } + + /// Scans forward from offset 0, parsing each entry, and returns the + /// aligned end of the last valid entry. We temporarily set `file_offset` + /// to `tail_upper_bound` so that `read_next_entry_ref` won't short-circuit + /// before reaching it. This is safe because `open` is single-threaded; + /// the real value is overwritten by the caller immediately after. + fn detect_used_bytes(&self, tail_upper_bound: u64) -> Result { + if tail_upper_bound == 0 { + return Ok(0); + } + + self.file_offset.store(tail_upper_bound, Ordering::Release); + + let mut offset = 0u64; + let mut read_buf = Vec::new(); + let mut buf_file_offset = 0u64; + let mut last_durable_offset = 0u64; + while let Some((_, _, next_offset)) = + self.read_next_entry_ref(offset, &mut read_buf, &mut buf_file_offset)? + { + offset = next_offset; + last_durable_offset = next_offset.next_multiple_of(FILE_OFFSET_ALIGNMENT); + } + + Ok(last_durable_offset) } fn parse_data_entry(buf: &[u8], offset: u64) -> Result { @@ -296,23 +396,30 @@ impl DataFile { "invalid data file header", ))); } - let mut file_offset = file + let physical_data_len = file .metadata() .map_err(Error::IOError)? .len() .saturating_sub(size_of::() as u64); - file_offset -= file_offset % FILE_OFFSET_ALIGNMENT; - file.set_len(size_of::() as u64 + file_offset) - .map_err(Error::IOError)?; + let preallocated = physical_data_len == config.max_data_file_size as u64; + let recovery_tail_upper_bound = Self::used_data_upper_bound(&file, physical_data_len)?; - Ok(Self { + let inst = Self { file, - file_offset: AtomicU64::new(file_offset), + file_offset: AtomicU64::new(physical_data_len), + last_synced_offset: AtomicU64::new(0), sealed_for_rotation: AtomicBool::new(false), config, file_idx, file_ordinal: header.ordinal, - }) + preallocated, + recovery_tail_upper_bound, + }; + let used_bytes = inst.detect_used_bytes(recovery_tail_upper_bound)?; + inst.file_offset.store(used_bytes, Ordering::Release); + inst.last_synced_offset.store(used_bytes, Ordering::Release); + + Ok(inst) } pub(crate) fn create( @@ -328,7 +435,7 @@ impl DataFile { .write(true) .open(data_file_path(base_path, file_idx)) .map_err(Error::IOError)?; - file.set_len(size_of::() as u64) + file.set_len(size_of::() as u64 + config.max_data_file_size as u64) .map_err(Error::IOError)?; let header = DataFileHeader { magic: *DATA_FILE_SIGNATURE, @@ -343,10 +450,13 @@ impl DataFile { Ok(Self { file, file_offset: AtomicU64::new(0), + last_synced_offset: AtomicU64::new(0), sealed_for_rotation: AtomicBool::new(false), config, file_idx, file_ordinal: ordinal, + preallocated: true, + recovery_tail_upper_bound: 0, }) } @@ -554,9 +664,16 @@ impl DataFile { read_buf: &'a mut Vec, buf_file_offset: &mut u64, ) -> Result, u64, u64)>> { + let used_bytes = self.used_bytes(); + if offset >= used_bytes { + return Ok(None); + } offset = offset.next_multiple_of(FILE_OFFSET_ALIGNMENT); loop { + if offset >= used_bytes { + return Ok(None); + } let buf_start = if offset >= *buf_file_offset { (offset - *buf_file_offset) as usize } else { diff --git a/src/internal.rs b/src/internal.rs index 2475462..12b2063 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -70,6 +70,50 @@ pub(crate) fn sync_dir(_path: &Path) -> Result<()> { Ok(()) } +#[cfg(target_os = "linux")] +pub(crate) fn sync_file_range(file: &File, offset: u64, len: u64) -> Result<()> { + use std::os::fd::AsRawFd; + + if len == 0 { + return Ok(()); + } + + let sync_offset = i64::try_from(offset) + .map_err(|_| Error::IOError(std::io::Error::other("sync offset overflow")))?; + let sync_len = i64::try_from(len) + .map_err(|_| Error::IOError(std::io::Error::other("sync length overflow")))?; + + let rc = unsafe { + libc::sync_file_range( + file.as_raw_fd(), + sync_offset, + sync_len, + libc::SYNC_FILE_RANGE_WAIT_BEFORE + | libc::SYNC_FILE_RANGE_WRITE + | libc::SYNC_FILE_RANGE_WAIT_AFTER, + ) + }; + if rc == 0 { + return Ok(()); + } + + let err = std::io::Error::last_os_error(); + match err.raw_os_error() { + Some(libc::EINVAL | libc::ENOSYS | libc::EOPNOTSUPP) => { + file.sync_all().map_err(Error::IOError) + } + _ => Err(Error::IOError(err)), + } +} + +#[cfg(not(target_os = "linux"))] +pub(crate) fn sync_file_range(file: &File, _offset: u64, len: u64) -> Result<()> { + if len == 0 { + return Ok(()); + } + file.sync_all().map_err(Error::IOError) +} + pub(crate) fn parse_data_file_idx(path: &Path) -> Option { let name = path.file_name()?.to_str()?; let suffix = name.strip_prefix("data_")?; diff --git a/src/store.rs b/src/store.rs index 7436d5a..95dbd13 100644 --- a/src/store.rs +++ b/src/store.rs @@ -80,6 +80,7 @@ struct CheckpointSnapshot { checkpoint_offset: u64, checkpointed_delta: i64, last_commit_ordinal: u64, + last_commit_offset: u64, } #[derive(Default)] @@ -465,20 +466,23 @@ impl StoreInner { .ok_or(Error::MissingDataFile(active_idx))?; let (checkpoint_ordinal, checkpoint_offset, checkpointed_delta) = self.inflight_tracker.checkpoint_progress(&active_file); - let last_commit_ordinal = self.index_file.checkpoint_cursor().0; + let (last_commit_ordinal, last_commit_offset) = self.index_file.checkpoint_cursor(); Ok(CheckpointSnapshot { checkpoint_ordinal, checkpoint_offset, checkpointed_delta, last_commit_ordinal, + last_commit_offset, }) } fn sync_checkpoint(&self, snap: CheckpointSnapshot) -> Result<()> { let files = self.data_files.read(); for data_file in files.values() { - if data_file.file_ordinal >= snap.last_commit_ordinal { - data_file.file.sync_all().map_err(Error::IOError)?; + if data_file.file_ordinal > snap.last_commit_ordinal { + data_file.sync_to_current()?; + } else if data_file.file_ordinal == snap.last_commit_ordinal { + data_file.sync_data(snap.last_commit_offset, data_file.used_bytes())?; } } drop(files); @@ -1247,7 +1251,7 @@ impl CandyStore { self.inner.index_file.sync_all()?; let files = self.inner.data_files.read(); for data_file in files.values() { - data_file.file.sync_all().map_err(Error::IOError)?; + data_file.sync_to_current()?; } sync_dir(&self.inner.base_path) } diff --git a/src/store/checkpoint.rs b/src/store/checkpoint.rs index c0e9d01..0598eda 100644 --- a/src/store/checkpoint.rs +++ b/src/store/checkpoint.rs @@ -230,10 +230,12 @@ impl StoreInner { ) { let snapshot_for_follow_up = snapshot.as_ref().ok().copied(); let result = snapshot.and_then(|snap| self.sync_checkpoint(snap)); + let mut should_signal_compaction = false; let mut state = self.checkpoint_state.lock(); match result { Ok(()) => { + should_signal_compaction = true; state.last_checkpoint_dur_ms = u64::try_from(started_at.elapsed().as_millis()).unwrap_or(u64::MAX); if let Some(target_epoch) = target_epoch { @@ -265,6 +267,11 @@ impl StoreInner { } } self.checkpoint_condvar.notify_all(); + drop(state); + + if should_signal_compaction { + self.signal_compaction_scan(); + } } fn run_checkpoint_worker(self: &Arc) { diff --git a/src/store/compaction.rs b/src/store/compaction.rs index f24178d..34d3c1c 100644 --- a/src/store/compaction.rs +++ b/src/store/compaction.rs @@ -238,7 +238,7 @@ impl StoreInner { if !removed.is_empty() { let active_idx = self.active_file_idx.load(Ordering::Acquire); if let Some(active_file) = self.data_files.read().get(&active_idx).cloned() { - let _ = active_file.file.sync_all(); + let _ = active_file.sync_to_current(); } let _ = self.index_file.sync_all(); } diff --git a/src/store/recovery.rs b/src/store/recovery.rs index 3d2fafd..e0b96c6 100644 --- a/src/store/recovery.rs +++ b/src/store/recovery.rs @@ -93,8 +93,8 @@ impl CandyStore { // extent. This handles the case where the data file was truncated // (e.g. disk-full or corruption) and ensures the replay loop won't // encounter stale pointers when comparing existing entries. - let pre_rebuild_used_bytes = data_file.used_bytes(); - let pre_purge_extent = pre_rebuild_used_bytes.next_multiple_of(FILE_OFFSET_ALIGNMENT); + let pre_rebuild_tail_upper_bound = data_file.recovery_tail_upper_bound(); + let pre_purge_extent = pre_rebuild_tail_upper_bound.next_multiple_of(FILE_OFFSET_ALIGNMENT); self.apply_recovery_delta( self.purge_uncommitted_file_entries(data_file.file_idx, pre_purge_extent)?, pending_committed_delta, @@ -151,11 +151,11 @@ impl CandyStore { let durable_extent = last_durable_offset.next_multiple_of(FILE_OFFSET_ALIGNMENT); - if durable_extent < pre_rebuild_used_bytes { + if durable_extent < pre_rebuild_tail_upper_bound { self.inner .stats .num_rebuild_purged_bytes - .fetch_add(pre_rebuild_used_bytes - durable_extent, Ordering::Relaxed); + .fetch_add(pre_rebuild_tail_upper_bound - durable_extent, Ordering::Relaxed); data_file.truncate_to_offset(durable_extent)?; } diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 181cb8d..74ce6d3 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,4 +1,5 @@ use std::hash::Hasher; +use std::io::{Read, Seek, SeekFrom}; use candystore::Config; @@ -18,3 +19,31 @@ pub fn checkpoint_slot_checksum(generation: u64, ordinal: u64, offset: u64) -> u hasher.write_u64(offset); hasher.finish() } + +#[allow(dead_code)] +pub fn logical_data_len(path: &std::path::Path) -> u64 { + const HEADER_LEN: u64 = 4096; + const ALIGNMENT: u64 = 16; + const CHUNK_LEN: usize = 64 * 1024; + + let mut file = std::fs::File::open(path).unwrap(); + let total_len = file.metadata().unwrap().len().saturating_sub(HEADER_LEN); + if total_len == 0 { + return 0; + } + + let mut end = total_len; + let mut buf = vec![0u8; CHUNK_LEN]; + while end > 0 { + let start = end.saturating_sub(CHUNK_LEN as u64); + let chunk_len = (end - start) as usize; + file.seek(SeekFrom::Start(HEADER_LEN + start)).unwrap(); + file.read_exact(&mut buf[..chunk_len]).unwrap(); + if let Some(rel) = buf[..chunk_len].iter().rposition(|byte| *byte != 0) { + return (start + rel as u64 + 1).next_multiple_of(ALIGNMENT); + } + end = start; + } + + 0 +} diff --git a/tests/data_loss.rs b/tests/data_loss.rs index 52007ee..6a4ba66 100644 --- a/tests/data_loss.rs +++ b/tests/data_loss.rs @@ -3,6 +3,8 @@ use std::io::{Seek, SeekFrom, Write}; use candystore::{CandyStore, Config}; +mod common; + fn first_data_file_path(dir: &std::path::Path) -> std::path::PathBuf { std::fs::read_dir(dir) .unwrap() @@ -38,9 +40,9 @@ fn test_zeroed_tail_data_file_lookup() { } let data_path = first_data_file_path(dir.path()); - let file_len = std::fs::metadata(&data_path).unwrap().len(); + let file_len = common::logical_data_len(&data_path); let zero_len = 2400usize; - zero_range(&data_path, file_len - zero_len as u64, zero_len); + zero_range(&data_path, 4096 + file_len - zero_len as u64, zero_len); let store = CandyStore::open(dir.path(), config).unwrap(); let mut missing = 0; @@ -80,8 +82,8 @@ fn test_truncated_data_file_queues() { let data_path = first_data_file_path(dir.path()); let file = OpenOptions::new().write(true).open(&data_path).unwrap(); - let file_len = file.metadata().unwrap().len(); - file.set_len(file_len - 2400).unwrap(); + let file_len = common::logical_data_len(&data_path); + file.set_len(4096 + file_len - 2400).unwrap(); let store = CandyStore::open(dir.path(), config).unwrap(); @@ -130,8 +132,8 @@ fn test_truncated_data_file_lists() { let data_path = first_data_file_path(dir.path()); let file = OpenOptions::new().write(true).open(&data_path).unwrap(); - let file_len = file.metadata().unwrap().len(); - file.set_len(file_len - 2400).unwrap(); + let file_len = common::logical_data_len(&data_path); + file.set_len(4096 + file_len - 2400).unwrap(); let store = CandyStore::open(dir.path(), config).unwrap(); diff --git a/tests/proptest_state_machine.rs b/tests/proptest_state_machine.rs index 64c8eaa..474b639 100644 --- a/tests/proptest_state_machine.rs +++ b/tests/proptest_state_machine.rs @@ -47,37 +47,53 @@ proptest! { let mut db_opt = Some(CandyStore::open(dir.path(), config).unwrap()); - for op in ops { + for (op_idx, op) in ops.iter().enumerate() { match op { Op::Set(k, v) => { oracle.insert(k.clone(), v.clone()); let db = db_opt.as_ref().unwrap(); - let _ = db.set(k.as_bytes(), v.as_bytes()).unwrap(); + let _ = db + .set(k.as_bytes(), v.as_bytes()) + .unwrap_or_else(|err| panic!("set failed at op {op_idx}: {op:?}: {err}")); } Op::Get(k) => { let db = db_opt.as_ref().unwrap(); - let expected = oracle.get(&k); - let actual = db.get(k.as_bytes()).unwrap(); + let expected = oracle.get(k); + let actual = db + .get(k.as_bytes()) + .unwrap_or_else(|err| panic!("get failed at op {op_idx}: {op:?}: {err}")); match expected { - Some(v) => assert_eq!(Some(v.as_bytes()), actual.as_deref()), - None => assert_eq!(None, actual), + Some(v) => assert_eq!( + Some(v.as_bytes()), + actual.as_deref(), + "get mismatch at op {op_idx}: {op:?}" + ), + None => assert_eq!(None, actual, "get mismatch at op {op_idx}: {op:?}"), } } Op::Remove(k) => { - oracle.remove(&k); + oracle.remove(k); let db = db_opt.as_ref().unwrap(); - let _ = db.remove(k.as_bytes()).unwrap(); + let _ = db + .remove(k.as_bytes()) + .unwrap_or_else(|err| panic!("remove failed at op {op_idx}: {op:?}: {err}")); } Op::CleanShutdown => { // Close the current DB instance by dropping it, then reopen drop(db_opt.take().unwrap()); - db_opt = Some(CandyStore::open(dir.path(), config).unwrap()); + db_opt = Some( + CandyStore::open(dir.path(), config) + .unwrap_or_else(|err| panic!("reopen after clean shutdown failed at op {op_idx}: {op:?}: {err}")), + ); } Op::SimulateCrash => { // Force a rebuild db_opt.take().unwrap()._abort_for_testing(); - db_opt = Some(CandyStore::open(dir.path(), config).unwrap()); + db_opt = Some( + CandyStore::open(dir.path(), config) + .unwrap_or_else(|err| panic!("reopen after simulated crash failed at op {op_idx}: {op:?}: {err}")), + ); } } } @@ -87,8 +103,11 @@ proptest! { // Verify every key that should exist, DOES exist for (k, v) in oracle.iter() { - let actual = db.get(k.as_bytes()).unwrap().expect("Key should exist in store"); - assert_eq!(v.as_bytes(), actual.as_slice()); + let actual = db + .get(k.as_bytes()) + .unwrap_or_else(|err| panic!("final get failed for key {k:?}: {err}")) + .unwrap_or_else(|| panic!("final verification missing key {k:?}")); + assert_eq!(v.as_bytes(), actual.as_slice(), "final verification mismatch for key {k:?}"); } } } diff --git a/tests/recovery.rs b/tests/recovery.rs index d5b48f5..aa4db23 100644 --- a/tests/recovery.rs +++ b/tests/recovery.rs @@ -125,11 +125,7 @@ fn data_files_by_ordinal(dir: &std::path::Path) -> Result, Error let mut buf = [0u8; 8]; file.read_exact(&mut buf).map_err(Error::IOError)?; let ordinal = u64::from_le_bytes(buf); - let used_bytes = file - .metadata() - .map_err(Error::IOError)? - .len() - .saturating_sub(4096); + let used_bytes = common::logical_data_len(&path); files.push((ordinal, used_bytes)); } @@ -163,11 +159,7 @@ fn data_file_records_by_ordinal( let mut buf = [0u8; 8]; file.read_exact(&mut buf).map_err(Error::IOError)?; let ordinal = u64::from_le_bytes(buf); - let used_bytes = file - .metadata() - .map_err(Error::IOError)? - .len() - .saturating_sub(4096); + let used_bytes = common::logical_data_len(&path); files.push((file_idx, ordinal, used_bytes, path)); } @@ -314,8 +306,8 @@ fn append_aligned_tail_garbage(dir: &std::path::Path, len: usize) -> Result<(), fn assert_rebuild_stats_non_zero(db: &CandyStore) { let stats = db.stats(); assert!( - stats.num_rebuilt_entries > 0, - "expected rebuild to replay at least one entry" + stats.num_rebuilt_entries > 0 || stats.checkpoint_generation > 0, + "expected either replayed entries or a checkpoint that already covered the data" ); assert!( stats.num_rebuild_purged_bytes > 0, @@ -860,8 +852,8 @@ fn test_recover_from_truncated_data_file() -> Result<(), Box Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 1024 * 4, + ..Config::default() + }; + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("dd", "S2pVu3sy437r2s22")?; + db.set("d", "1utyjA7IagDJy7eyp9")?; + db.set("b", "2YEAci7LZeShVxXcS3c2M41XFp9YPJACS5SUw4")?; + db.set("cd", "Sgbm1x3CQmy7HahPWXllPnt68UO9SdaTleWZ9")?; + } + + { + let db = CandyStore::open(dir.path(), config)?; + db.set("ad", "Uj1734MhmFqkdmmFGi03F8N")?; + } + + let db = CandyStore::open(dir.path(), config)?; + assert_eq!( + db.get("cd")?, + Some(b"Sgbm1x3CQmy7HahPWXllPnt68UO9SdaTleWZ9".to_vec()) + ); + assert_eq!( + db.set("cd", "SGVF56VUXC8SpU4ERrUj0Z3Z80oqvXvKR2oOU3ij4yoo0Yuqt")?, + candystore::SetStatus::PrevValue(b"Sgbm1x3CQmy7HahPWXllPnt68UO9SdaTleWZ9".to_vec()) + ); + + Ok(()) +} + +#[test] +fn test_reopen_with_different_max_data_file_size() -> Result<(), Error> { + let dir = tempdir().unwrap(); + + // Phase 1: create with a small max_data_file_size. + let config_small = Config { + max_data_file_size: 1024 * 4, + ..Config::default() + }; + { + let db = CandyStore::open(dir.path(), config_small)?; + for i in 0..20 { + db.set(format!("k{i}"), format!("v{i}"))?; + } + } + + // Phase 2: reopen with a larger max_data_file_size. + // The old data file's physical size won't match the new config, so it + // should be detected as non-preallocated and fall back to sync_all. + let config_large = Config { + max_data_file_size: 1024 * 32, + ..Config::default() + }; + { + let db = CandyStore::open(dir.path(), config_large)?; + for i in 0..20 { + assert_eq!(db.get(format!("k{i}"))?, Some(format!("v{i}").into_bytes())); + } + // Writes under the new config should also work. + db.set("new_key", "new_value")?; + } + + // Phase 3: reopen again with the larger config and verify everything. + let db = CandyStore::open(dir.path(), config_large)?; + for i in 0..20 { + assert_eq!(db.get(format!("k{i}"))?, Some(format!("v{i}").into_bytes())); + } + assert_eq!(db.get("new_key")?, Some(b"new_value".to_vec())); + + Ok(()) +} + +#[test] +fn test_partial_entry_at_tail_of_preallocated_file() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 1024 * 4, + ..Config::default() + }; + + // Write two valid entries, then close cleanly. + { + let db = CandyStore::open(dir.path(), config)?; + db.set("key1", "value1")?; + db.set("key2", "value2")?; + } + + // Inject a partial entry: write bytes that look like a valid header + // (correct magic for the offset) but with a truncated/invalid checksum. + // This simulates a crash mid-append on a preallocated file, where the + // kernel flushed part of a write but the entry is incomplete. + let data_path = dir.path().join("data_0000"); + let logical_len = common::logical_data_len(&data_path); + { + const ALIGNMENT: u64 = 16; + const MAGIC: u32 = 0x91c8_d7cd; + const MASK: u32 = (1 << 24) - 1; + + let entry_offset = logical_len; + let magic_offset = (((entry_offset / ALIGNMENT) as u32) ^ MAGIC) & MASK; + // EntryType::Insert = 0b00, ns = 0 + let header: u32 = magic_offset; + let klen: u16 = 4; // "abcd" + let vlen: u16 = 8; // "12345678" + + let mut partial = Vec::new(); + partial.extend_from_slice(&header.to_le_bytes()); + partial.extend_from_slice(&klen.to_le_bytes()); + partial.extend_from_slice(&vlen.to_le_bytes()); + // Write the value and key but NOT the checksum — the entry is incomplete. + partial.extend_from_slice(b"12345678"); // value + partial.extend_from_slice(b"abcd"); // key + // No checksum appended, and overwrite with bad trailing bytes. + partial.extend_from_slice(&[0xFF, 0xFF]); + + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&data_path) + .map_err(Error::IOError)?; + file.seek(SeekFrom::Start(4096 + entry_offset)) + .map_err(Error::IOError)?; + file.write_all(&partial).map_err(Error::IOError)?; + file.sync_all().map_err(Error::IOError)?; + } + + // Reopen: the partial entry should be ignored by detect_used_bytes. + let db = CandyStore::open(dir.path(), config)?; + assert_eq!(db.get("key1")?, Some(b"value1".to_vec())); + assert_eq!(db.get("key2")?, Some(b"value2".to_vec())); + assert_eq!(db.get("abcd")?, None); // partial entry must not appear + assert_eq!(db.num_items(), 2); + + // New writes should work (they overwrite the garbage region). + db.set("key3", "value3")?; + drop(db); + + let db = CandyStore::open(dir.path(), config)?; + assert_eq!(db.get("key3")?, Some(b"value3".to_vec())); + + Ok(()) +} + +#[test] +fn test_incomplete_entry_with_valid_header_and_bad_checksum() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + max_data_file_size: 1024 * 4, + ..Config::default() + }; + + // Write one valid entry, abort (simulating crash), then inject a + // fully-sized entry whose checksum is deliberately wrong. + { + let db = CandyStore::open(dir.path(), config)?; + db.set("good", "data")?; + db._abort_for_testing(); + } + + let data_path = dir.path().join("data_0000"); + let logical_len = common::logical_data_len(&data_path); + { + const ALIGNMENT: u64 = 16; + const MAGIC: u32 = 0x91c8_d7cd; + const MASK: u32 = (1 << 24) - 1; + + let entry_offset = logical_len; + let magic_offset = (((entry_offset / ALIGNMENT) as u32) ^ MAGIC) & MASK; + let header: u32 = magic_offset; + let key = b"bad"; + let val = b"entry"; + let klen = key.len() as u16; + let vlen = val.len() as u16; + let entry_len = 4 + 4 + klen as usize + vlen as usize + 2; + let aligned_len = ((entry_len + 15) / 16) * 16; + + let mut buf = vec![0u8; aligned_len]; + buf[0..4].copy_from_slice(&header.to_le_bytes()); + buf[4..6].copy_from_slice(&klen.to_le_bytes()); + buf[6..8].copy_from_slice(&vlen.to_le_bytes()); + buf[8..8 + val.len()].copy_from_slice(val); + buf[8 + val.len()..8 + val.len() + key.len()].copy_from_slice(key); + // Write a deliberately wrong checksum. + buf[entry_len - 2..entry_len].copy_from_slice(&[0xDE, 0xAD]); + + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&data_path) + .map_err(Error::IOError)?; + file.seek(SeekFrom::Start(4096 + entry_offset)) + .map_err(Error::IOError)?; + file.write_all(&buf).map_err(Error::IOError)?; + file.sync_all().map_err(Error::IOError)?; + } + + // Reopen via recovery (dirty shutdown + garbage entry). + let db = CandyStore::open(dir.path(), config)?; + assert_eq!(db.get("good")?, Some(b"data".to_vec())); + assert_eq!(db.get("bad")?, None); + + // The corrupted entry's bytes should have been purged. + let stats = db.stats(); + assert!( + stats.num_rebuild_purged_bytes > 0, + "expected rebuild to purge the corrupted tail" + ); + + Ok(()) +} + #[test] fn test_progressive_rebuild_resumes_from_checkpoint() -> Result<(), Error> { let dir = tempdir().unwrap(); diff --git a/tests/rotation.rs b/tests/rotation.rs index f943ac0..52fcf06 100644 --- a/tests/rotation.rs +++ b/tests/rotation.rs @@ -44,6 +44,28 @@ fn test_rotation_preserves_reads() -> Result<(), Error> { Ok(()) } +#[test] +fn test_new_data_files_are_preallocated() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = common::small_file_config(); + let db = CandyStore::open(dir.path(), config)?; + + let file_len = std::fs::metadata(dir.path().join("data_0000")) + .map_err(Error::IOError)? + .len(); + assert_eq!(file_len, 4096 + config.max_data_file_size as u64); + + db.set("prealloc", &[7u8; 128])?; + drop(db); + + let reopened_len = std::fs::metadata(dir.path().join("data_0000")) + .map_err(Error::IOError)? + .len(); + assert_eq!(reopened_len, 4096 + config.max_data_file_size as u64); + + Ok(()) +} + #[test] fn test_splits_and_rotation_with_small_files() -> Result<(), Error> { const KEYS: usize = 5000; From 7a456ff567d207de5ebd0329dc11993af8135774 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Wed, 8 Apr 2026 21:35:35 +0300 Subject: [PATCH 20/25] v1.0.0: rebuild index from data files on version mismatch --- .github/workflows/ci.yml | 2 + Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 27 ++++++---- src/store/open.rs | 109 +++++++++++++++++++++++++++++++++++---- src/types.rs | 3 ++ tests/recovery.rs | 59 +++++++++++++++++++++ 7 files changed, 182 insertions(+), 22 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dec8bf7..f9744a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -81,6 +81,7 @@ jobs: cargo run --example atomics cargo run --example lists cargo run --example typed + cargo test --features whitebox-testing --test whitebox - name: Run perf run: cargo run --release --example perf @@ -121,5 +122,6 @@ jobs: cargo run --example atomics cargo run --example lists cargo run --example typed + cargo test --features whitebox-testing --test whitebox - name: Run perf run: cargo run --release --example perf diff --git a/Cargo.lock b/Cargo.lock index f826555..84e8834 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -58,7 +58,7 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "candystore" -version = "0.7.0" +version = "1.0.0" dependencies = [ "crc16-ibm3740-fast", "fslock", diff --git a/Cargo.toml b/Cargo.toml index 739ed2b..7c6cc01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "candystore" -version = "0.7.0" +version = "1.0.0" edition = "2024" license = "Apache-2.0" keywords = ["key-value", "database", "persistent", "store", "rocksdb"] diff --git a/README.md b/README.md index c176f3e..70d9400 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,14 @@
> [!NOTE] -> 😸 v0.7 brings true crash-consistency, improved compaction and an overall simpler design. -> We're also close to a stable file format! +> 😸 v1.0 brings true crash-consistency, improved compaction and an overall simpler design. +> v1.0 marks the data-file format as stable. > -> However, the file format is not compatible with older versions of Candy. +> The append-only data files are the compatibility boundary. The index format +> may still evolve, and when the data-file format is recognized but the index +> format is outdated, Candy recreates the index on open by default. +> +> Pre-v1.0 stores are not covered by this compatibility promise. # CandyStore @@ -25,13 +29,12 @@ On my laptop (32 core AMD RYZEN AI MAX+ 395 with 64GB RAM, running Ubuntu 25.10 $ cargo run --release --example perf Testing key-value using 1 threads, each with 1000000 items (key size: 16, value size: 16) - Inserts: 0.499239 us/op - Updates: 0.611424 us/op - Positive Lookups: 0.316884 us/op - Negative Lookups: 0.045079 us/op - Iter all: 0.373904 us/op - Removes: 0.588206 us/op - + Inserts: 0.514698 us/op + Updates: 0.608783 us/op + Positive Lookups: 0.308571 us/op + Negative Lookups: 0.047365 us/op + Iter all: 0.360074 us/op + Removes: 0.605519 us/op ``` See [how to interpret the results\*](#how-to-interpret-the-performance-results). @@ -145,6 +148,10 @@ To handle this gracefully, Candy employs **background checkpointing**. Instead o On an unexpected crash or an unclean shutdown, Candy features an efficient rebuild mechanism. It resumes from the latest successful checkpoint and rapidly replays only the recent mutating operations, restoring the full, robust state from the append-only data files. +Starting with v1.0, those append-only data files are also the on-disk compatibility contract. By default (`Config::port_to_current_format = true`), Candy uses that same rebuild path when it encounters an outdated index-file version alongside data files whose format is still recognized. In that case it recreates only the `index` and `rows` files and rebuilds them from the append-only data files. + +This does not make arbitrary older releases compatible. The v1.0 compatibility promise applies to stores written with the stable v1.x data-file format; if the data-file format itself is not recognized, open still fails. + ## Design Goals Unlike many key-value stores, Candy serves the purpose of *reducing* the memory footprint of your diff --git a/src/store/open.rs b/src/store/open.rs index 0a81db6..42347a2 100644 --- a/src/store/open.rs +++ b/src/store/open.rs @@ -9,8 +9,9 @@ use crate::{ data_file::DataFile, index_file::IndexFile, internal::{ - FILE_OFFSET_ALIGNMENT, MAX_REPRESENTABLE_FILE_SIZE, is_resettable_open_error, - parse_data_file_idx, sync_dir, + DATA_FILE_SIGNATURE, DATA_FILE_VERSION, FILE_OFFSET_ALIGNMENT, INDEX_FILE_SIGNATURE, + INDEX_FILE_VERSION, MAX_REPRESENTABLE_FILE_SIZE, index_file_path, index_rows_file_path, + is_resettable_open_error, parse_data_file_idx, read_available_at, sync_dir, }, types::{Config, Error, INITIAL_DATA_FILE_ORDINAL, Result}, }; @@ -18,6 +19,83 @@ use crate::{ use super::{CandyStore, OpenState, StoreInner}; impl CandyStore { + fn recreate_index_files(base_path: &Path) -> Result<()> { + let mut removed_any = false; + for path in [index_file_path(base_path), index_rows_file_path(base_path)] { + match std::fs::remove_file(&path) { + Ok(()) => removed_any = true, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => {} + Err(err) => return Err(Error::IOError(err)), + } + } + if removed_any { + sync_dir(base_path)?; + } + Ok(()) + } + + fn existing_version_if_signature_matches( + path: &Path, + signature: &[u8; 8], + ) -> Result> { + let file = match std::fs::File::options().read(true).open(path) { + Ok(file) => file, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(Error::IOError(err)), + }; + + let header = read_available_at(&file, 12, 0).map_err(Error::IOError)?; + if header.len() < 12 || &header[0..8] != signature { + return Ok(None); + } + + Ok(Some(u32::from_le_bytes(header[8..12].try_into().unwrap()))) + } + + fn has_unrecognized_index_version(base_path: &Path) -> Result { + Ok(matches!( + Self::existing_version_if_signature_matches( + &index_file_path(base_path), + INDEX_FILE_SIGNATURE, + )?, + Some(version) if version != INDEX_FILE_VERSION + )) + } + + fn data_files_use_recognized_versions(base_path: &Path) -> Result { + let mut found_any = false; + + for entry in std::fs::read_dir(base_path).map_err(Error::IOError)? { + let entry = entry.map_err(Error::IOError)?; + let path = entry.path(); + if parse_data_file_idx(&path).is_none() { + continue; + } + + let Some(version) = + Self::existing_version_if_signature_matches(&path, DATA_FILE_SIGNATURE)? + else { + return Ok(false); + }; + + if version != DATA_FILE_VERSION { + return Ok(false); + } + + found_any = true; + } + + Ok(found_any) + } + + fn should_port_to_current_format(base_path: &Path) -> Result { + if !Self::has_unrecognized_index_version(base_path)? { + return Ok(false); + } + + Self::data_files_use_recognized_versions(base_path) + } + fn build_store( base_path: std::path::PathBuf, config: Arc, @@ -126,11 +204,20 @@ impl CandyStore { fn open_or_reset_state(base_path: &Path, config: Arc) -> Result { match Self::open_state(base_path, config.clone()) { Ok(state) => Ok(state), - Err(err) if config.reset_on_invalid_data && is_resettable_open_error(&err) => { - Self::clear_db_files(base_path)?; - Self::open_state(base_path, config) + Err(err) => { + if config.port_to_current_format && Self::should_port_to_current_format(base_path)? + { + Self::recreate_index_files(base_path)?; + return Self::open_state(base_path, config); + } + + if config.reset_on_invalid_data && is_resettable_open_error(&err) { + Self::clear_db_files(base_path)?; + return Self::open_state(base_path, config); + } + + Err(err) } - Err(err) => Err(err), } } @@ -167,10 +254,12 @@ impl CandyStore { /// Opens a store at `path`, creating it if needed. /// - /// If `config.reset_on_invalid_data` is enabled, opening may remove all - /// contents and recreate fresh store files when the on-disk data is - /// corrupt. While the store is open, the active `.lockfile` is preserved - /// so the directory remains locked against concurrent opens. + /// If `config.port_to_current_format` is enabled, opening may recreate the + /// index files when their format is outdated but the data files are still + /// recognized. If `config.reset_on_invalid_data` is enabled, opening may + /// remove all contents and recreate fresh store files when the on-disk + /// data is corrupt. While the store is open, the active `.lockfile` is + /// preserved so the directory remains locked against concurrent opens. pub fn open(path: impl AsRef, config: Config) -> Result { let base_path = path.as_ref().to_path_buf(); std::fs::create_dir_all(&base_path).map_err(Error::IOError)?; diff --git a/src/types.rs b/src/types.rs index b7448da..dcf63a7 100644 --- a/src/types.rs +++ b/src/types.rs @@ -32,6 +32,8 @@ pub struct Config { pub compaction_min_threshold: u32, /// Maximum logical concurrency used to size internal lock tables, defaults to num_cpus*2 pub max_concurrency: usize, + /// Recreate index files from recognized data files when only the index format is outdated. + pub port_to_current_format: bool, /// Reset the database if opening encounters invalid on-disk data. pub reset_on_invalid_data: bool, /// Target background compaction throughput in bytes per second. @@ -52,6 +54,7 @@ impl Default for Config { max_data_file_size: 64 * 1024 * 1024, compaction_min_threshold: 24 * 1024 * 1024, max_concurrency: (2 * num_cpus::get()).clamp(16, 64), + port_to_current_format: true, reset_on_invalid_data: false, compaction_throughput_bytes_per_sec: 4 * 1024 * 1024, checkpoint_interval: Some(Duration::from_secs(5)), diff --git a/tests/recovery.rs b/tests/recovery.rs index aa4db23..a832d1e 100644 --- a/tests/recovery.rs +++ b/tests/recovery.rs @@ -72,6 +72,32 @@ fn rewrite_data_file_ordinal( Ok(()) } +fn read_index_version(dir: &std::path::Path) -> Result { + let mut file = std::fs::OpenOptions::new() + .read(true) + .open(dir.join("index")) + .map_err(Error::IOError)?; + + file.seek(SeekFrom::Start(8)).map_err(Error::IOError)?; + let mut buf = [0u8; 4]; + file.read_exact(&mut buf).map_err(Error::IOError)?; + Ok(u32::from_le_bytes(buf)) +} + +fn rewrite_index_version(dir: &std::path::Path, version: u32) -> Result<(), Error> { + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(dir.join("index")) + .map_err(Error::IOError)?; + + file.seek(SeekFrom::Start(8)).map_err(Error::IOError)?; + file.write_all(&version.to_le_bytes()) + .map_err(Error::IOError)?; + file.sync_all().map_err(Error::IOError)?; + Ok(()) +} + fn active_file_ordinal(dir: &std::path::Path) -> Result { let mut max_ordinal: Option = None; @@ -1450,6 +1476,39 @@ fn test_progressive_rebuild_falls_back_to_older_valid_checkpoint_slot() -> Resul Ok(()) } +#[test] +fn test_open_ports_outdated_index_format_when_data_format_is_recognized() -> Result<(), Error> { + let dir = tempdir().unwrap(); + let config = Config { + checkpoint_interval: None, + checkpoint_delta_bytes: None, + compaction_min_threshold: u32::MAX, + compaction_throughput_bytes_per_sec: 0, + ..Config::default() + }; + + let original_index_version; + { + let db = CandyStore::open(dir.path(), config)?; + db.set("port-key-1", "port-val-1")?; + db.set("port-key-2", "port-val-2")?; + original_index_version = read_index_version(dir.path())?; + } + + rewrite_index_version(dir.path(), original_index_version ^ 1)?; + + let db = CandyStore::open(dir.path(), config)?; + assert_eq!(db.get("port-key-1")?, Some(b"port-val-1".to_vec())); + assert_eq!(db.get("port-key-2")?, Some(b"port-val-2".to_vec())); + assert_eq!(read_index_version(dir.path())?, original_index_version); + assert!( + db.stats().num_rebuilt_entries >= 2, + "expected index recreation to replay the data files" + ); + + Ok(()) +} + #[test] fn test_clean_reopen_rebuilds_invalid_active_checkpoint_across_multiple_data_files() -> Result<(), Error> { From bde427aec3045189b61dbb0027c00d321bb084c3 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Wed, 8 Apr 2026 21:46:27 +0300 Subject: [PATCH 21/25] fmt --- src/store/recovery.rs | 8 ++++---- tests/recovery.rs | 2 +- tests/rotation.rs | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/store/recovery.rs b/src/store/recovery.rs index e0b96c6..8cdd3e5 100644 --- a/src/store/recovery.rs +++ b/src/store/recovery.rs @@ -152,10 +152,10 @@ impl CandyStore { let durable_extent = last_durable_offset.next_multiple_of(FILE_OFFSET_ALIGNMENT); if durable_extent < pre_rebuild_tail_upper_bound { - self.inner - .stats - .num_rebuild_purged_bytes - .fetch_add(pre_rebuild_tail_upper_bound - durable_extent, Ordering::Relaxed); + self.inner.stats.num_rebuild_purged_bytes.fetch_add( + pre_rebuild_tail_upper_bound - durable_extent, + Ordering::Relaxed, + ); data_file.truncate_to_offset(durable_extent)?; } diff --git a/tests/recovery.rs b/tests/recovery.rs index a832d1e..e907232 100644 --- a/tests/recovery.rs +++ b/tests/recovery.rs @@ -1090,7 +1090,7 @@ fn test_incomplete_entry_with_valid_header_and_bad_checksum() -> Result<(), Erro let klen = key.len() as u16; let vlen = val.len() as u16; let entry_len = 4 + 4 + klen as usize + vlen as usize + 2; - let aligned_len = ((entry_len + 15) / 16) * 16; + let aligned_len = entry_len.div_ceil(16) * 16; let mut buf = vec![0u8; aligned_len]; buf[0..4].copy_from_slice(&header.to_le_bytes()); diff --git a/tests/rotation.rs b/tests/rotation.rs index 52fcf06..6240c7b 100644 --- a/tests/rotation.rs +++ b/tests/rotation.rs @@ -55,7 +55,7 @@ fn test_new_data_files_are_preallocated() -> Result<(), Error> { .len(); assert_eq!(file_len, 4096 + config.max_data_file_size as u64); - db.set("prealloc", &[7u8; 128])?; + db.set("prealloc", [7u8; 128])?; drop(db); let reopened_len = std::fs::metadata(dir.path().join("data_0000")) From 64107f1972ac21c88ed4b63dc178d9185b78cc24 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Wed, 8 Apr 2026 22:35:40 +0300 Subject: [PATCH 22/25] separate crasher test --- .github/workflows/ci.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f9744a9..e0c2437 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -70,10 +70,12 @@ jobs: - name: Run tests run: | if [ "${{ matrix.profile }}" = "release" ]; then - cargo test --release -- --test-threads=1 + cargo test --release -- --skip test_crash_recovery --test-threads=1 else - cargo test -- --test-threads=1 + cargo test -- --skip test_crash_recovery --test-threads=1 fi + - name: Run crasher + run: cargo test --release --test crasher -- --nocapture - name: Run examples run: | cargo run --example simple @@ -111,10 +113,12 @@ jobs: - name: Run tests run: | if ("${{ matrix.profile }}" -eq "release") { - cargo test --release -- --test-threads=1 + cargo test --release -- --skip test_crash_recovery --test-threads=1 } else { - cargo test -- --test-threads=1 + cargo test -- --skip test_crash_recovery --test-threads=1 } + - name: Run crasher + run: cargo test --release --test crasher -- --nocapture - name: Run examples run: | cargo run --example simple From f6a81f6c91df072a5b5ac073e9004478b612ab87 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Wed, 8 Apr 2026 22:55:37 +0300 Subject: [PATCH 23/25] Fix crasher --- .github/workflows/ci.yml | 8 +-- src/data_file.rs | 118 +++++++++++++++++++++++++++++++-------- src/internal.rs | 4 +- src/store/open.rs | 20 +++++-- 4 files changed, 117 insertions(+), 33 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e0c2437..384021e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -74,8 +74,6 @@ jobs: else cargo test -- --skip test_crash_recovery --test-threads=1 fi - - name: Run crasher - run: cargo test --release --test crasher -- --nocapture - name: Run examples run: | cargo run --example simple @@ -86,6 +84,8 @@ jobs: cargo test --features whitebox-testing --test whitebox - name: Run perf run: cargo run --release --example perf + - name: Run crasher + run: cargo test --release --test crasher -- --nocapture test-windows: name: Test (Windows, ${{ matrix.profile }}) @@ -117,8 +117,6 @@ jobs: } else { cargo test -- --skip test_crash_recovery --test-threads=1 } - - name: Run crasher - run: cargo test --release --test crasher -- --nocapture - name: Run examples run: | cargo run --example simple @@ -129,3 +127,5 @@ jobs: cargo test --features whitebox-testing --test whitebox - name: Run perf run: cargo run --release --example perf + - name: Run crasher + run: cargo test --release --test crasher -- --nocapture diff --git a/src/data_file.rs b/src/data_file.rs index 2c43776..d3e34f7 100644 --- a/src/data_file.rs +++ b/src/data_file.rs @@ -6,6 +6,7 @@ use std::{ collections::VecDeque, fs::File, mem::size_of, + os::fd::AsRawFd, path::Path, sync::{ Arc, @@ -207,6 +208,40 @@ pub(crate) struct DataFile { } impl DataFile { + fn read_header(file: &File) -> Result { + let header = + read_available_at(file, size_of::(), 0).map_err(Error::IOError)?; + if header.len() < size_of::() { + return Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "data file header too short", + ))); + } + + let header = DataFileHeader::read_from_bytes(&header).map_err(|_| { + Error::IOError(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "invalid data file header size", + )) + })?; + if &header.magic != DATA_FILE_SIGNATURE || header.version != DATA_FILE_VERSION { + return Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "invalid data file header", + ))); + } + + Ok(header) + } + + pub(crate) fn read_ordinal(base_path: &Path, file_idx: u16) -> Result { + let file = File::options() + .read(true) + .open(data_file_path(base_path, file_idx)) + .map_err(Error::IOError)?; + Ok(Self::read_header(&file)?.ordinal) + } + pub(crate) fn used_bytes(&self) -> u64 { self.file_offset.load(Ordering::Acquire) } @@ -224,7 +259,7 @@ impl DataFile { } if !self.preallocated { - self.file.sync_all().map_err(Error::IOError)?; + self.file.sync_data().map_err(Error::IOError)?; } else { sync_file_range( &self.file, @@ -267,11 +302,58 @@ impl DataFile { Ok(()) } + #[cfg(target_os = "linux")] + fn data_extent_upper_bound(file: &File, physical_data_len: u64) -> Result> { + if physical_data_len == 0 { + return Ok(Some(0)); + } + + let data_start = size_of::() as u64; + let data_end = data_start + physical_data_len; + let fd = file.as_raw_fd(); + let mut pos = data_start; + let mut last_data_end = data_start; + + while pos < data_end { + let next_data = unsafe { libc::lseek(fd, pos as libc::off_t, libc::SEEK_DATA) }; + if next_data == -1 { + let err = std::io::Error::last_os_error(); + return match err.raw_os_error() { + Some(libc::ENXIO) => Ok(Some(last_data_end.saturating_sub(data_start))), + Some(libc::EINVAL | libc::ENOSYS | libc::EOPNOTSUPP) => Ok(None), + _ => Err(Error::IOError(err)), + }; + } + + let next_hole = unsafe { libc::lseek(fd, next_data, libc::SEEK_HOLE) }; + if next_hole == -1 { + let err = std::io::Error::last_os_error(); + return match err.raw_os_error() { + Some(libc::EINVAL | libc::ENOSYS | libc::EOPNOTSUPP) => Ok(None), + _ => Err(Error::IOError(err)), + }; + } + + last_data_end = (next_hole as u64).min(data_end); + if last_data_end >= data_end { + break; + } + pos = last_data_end; + } + + Ok(Some(last_data_end.saturating_sub(data_start))) + } + fn used_data_upper_bound(file: &File, physical_data_len: u64) -> Result { if physical_data_len == 0 { return Ok(0); } + #[cfg(target_os = "linux")] + if let Some(upper_bound) = Self::data_extent_upper_bound(file, physical_data_len)? { + return Ok(upper_bound); + } + let mut end = physical_data_len; while end > 0 { let start = end.saturating_sub(READ_BUFFER_SIZE as u64); @@ -370,32 +452,18 @@ impl DataFile { }) } - pub(crate) fn open(base_path: &Path, config: Arc, file_idx: u16) -> Result { + pub(crate) fn open( + base_path: &Path, + config: Arc, + file_idx: u16, + validate_tail: bool, + ) -> Result { let file = File::options() .read(true) .write(true) .open(data_file_path(base_path, file_idx)) .map_err(Error::IOError)?; - let header = - read_available_at(&file, size_of::(), 0).map_err(Error::IOError)?; - if header.len() < size_of::() { - return Err(Error::IOError(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "data file header too short", - ))); - } - let header = DataFileHeader::read_from_bytes(&header).map_err(|_| { - Error::IOError(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "invalid data file header size", - )) - })?; - if &header.magic != DATA_FILE_SIGNATURE || header.version != DATA_FILE_VERSION { - return Err(Error::IOError(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "invalid data file header", - ))); - } + let header = Self::read_header(&file)?; let physical_data_len = file .metadata() .map_err(Error::IOError)? @@ -415,7 +483,11 @@ impl DataFile { preallocated, recovery_tail_upper_bound, }; - let used_bytes = inst.detect_used_bytes(recovery_tail_upper_bound)?; + let used_bytes = if validate_tail { + inst.detect_used_bytes(recovery_tail_upper_bound)? + } else { + recovery_tail_upper_bound + }; inst.file_offset.store(used_bytes, Ordering::Release); inst.last_synced_offset.store(used_bytes, Ordering::Release); diff --git a/src/internal.rs b/src/internal.rs index 12b2063..0972d2c 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -100,7 +100,7 @@ pub(crate) fn sync_file_range(file: &File, offset: u64, len: u64) -> Result<()> let err = std::io::Error::last_os_error(); match err.raw_os_error() { Some(libc::EINVAL | libc::ENOSYS | libc::EOPNOTSUPP) => { - file.sync_all().map_err(Error::IOError) + file.sync_data().map_err(Error::IOError) } _ => Err(Error::IOError(err)), } @@ -111,7 +111,7 @@ pub(crate) fn sync_file_range(file: &File, _offset: u64, len: u64) -> Result<()> if len == 0 { return Ok(()); } - file.sync_all().map_err(Error::IOError) + file.sync_data().map_err(Error::IOError) } pub(crate) fn parse_data_file_idx(path: &Path) -> Option { diff --git a/src/store/open.rs b/src/store/open.rs index 42347a2..e854a79 100644 --- a/src/store/open.rs +++ b/src/store/open.rs @@ -140,6 +140,7 @@ impl CandyStore { fn open_state(base_path: &Path, config: Arc) -> Result { let index_file = IndexFile::open(base_path, config.clone())?; let mut data_files = HashMap::new(); + let mut file_ordinals = Vec::new(); let mut seen_ordinals = HashSet::new(); let mut active_file_idx = 0; let mut active_file_ordinal = INITIAL_DATA_FILE_ORDINAL; @@ -150,16 +151,27 @@ impl CandyStore { let Some(file_idx) = parse_data_file_idx(&path) else { continue; }; - let data_file = Arc::new(DataFile::open(base_path, config.clone(), file_idx)?); - if !seen_ordinals.insert(data_file.file_ordinal) { + let file_ordinal = DataFile::read_ordinal(base_path, file_idx)?; + if !seen_ordinals.insert(file_ordinal) { return Err(crate::internal::invalid_data_error( "duplicate data file ordinal", )); } - if data_files.is_empty() || data_file.file_ordinal > active_file_ordinal { + if file_ordinals.is_empty() || file_ordinal > active_file_ordinal { active_file_idx = file_idx; - active_file_ordinal = data_file.file_ordinal; + active_file_ordinal = file_ordinal; } + file_ordinals.push((file_idx, file_ordinal)); + } + + for (file_idx, file_ordinal) in file_ordinals { + let validate_tail = file_ordinal == active_file_ordinal; + let data_file = Arc::new(DataFile::open( + base_path, + config.clone(), + file_idx, + validate_tail, + )?); data_files.insert(file_idx, data_file); } From c56638be44eace3ad2ba70a402d293f4816a3c86 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Wed, 8 Apr 2026 23:03:42 +0300 Subject: [PATCH 24/25] More fixes --- src/data_file.rs | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) diff --git a/src/data_file.rs b/src/data_file.rs index d3e34f7..fbb1074 100644 --- a/src/data_file.rs +++ b/src/data_file.rs @@ -6,7 +6,6 @@ use std::{ collections::VecDeque, fs::File, mem::size_of, - os::fd::AsRawFd, path::Path, sync::{ Arc, @@ -302,58 +301,11 @@ impl DataFile { Ok(()) } - #[cfg(target_os = "linux")] - fn data_extent_upper_bound(file: &File, physical_data_len: u64) -> Result> { - if physical_data_len == 0 { - return Ok(Some(0)); - } - - let data_start = size_of::() as u64; - let data_end = data_start + physical_data_len; - let fd = file.as_raw_fd(); - let mut pos = data_start; - let mut last_data_end = data_start; - - while pos < data_end { - let next_data = unsafe { libc::lseek(fd, pos as libc::off_t, libc::SEEK_DATA) }; - if next_data == -1 { - let err = std::io::Error::last_os_error(); - return match err.raw_os_error() { - Some(libc::ENXIO) => Ok(Some(last_data_end.saturating_sub(data_start))), - Some(libc::EINVAL | libc::ENOSYS | libc::EOPNOTSUPP) => Ok(None), - _ => Err(Error::IOError(err)), - }; - } - - let next_hole = unsafe { libc::lseek(fd, next_data, libc::SEEK_HOLE) }; - if next_hole == -1 { - let err = std::io::Error::last_os_error(); - return match err.raw_os_error() { - Some(libc::EINVAL | libc::ENOSYS | libc::EOPNOTSUPP) => Ok(None), - _ => Err(Error::IOError(err)), - }; - } - - last_data_end = (next_hole as u64).min(data_end); - if last_data_end >= data_end { - break; - } - pos = last_data_end; - } - - Ok(Some(last_data_end.saturating_sub(data_start))) - } - fn used_data_upper_bound(file: &File, physical_data_len: u64) -> Result { if physical_data_len == 0 { return Ok(0); } - #[cfg(target_os = "linux")] - if let Some(upper_bound) = Self::data_extent_upper_bound(file, physical_data_len)? { - return Ok(upper_bound); - } - let mut end = physical_data_len; while end > 0 { let start = end.saturating_sub(READ_BUFFER_SIZE as u64); From d0959cea52b21b00e77d06c6886794f6f91522eb Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Wed, 8 Apr 2026 23:26:16 +0300 Subject: [PATCH 25/25] CI fixes --- .github/workflows/ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 384021e..2af6678 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,8 +83,10 @@ jobs: cargo run --example typed cargo test --features whitebox-testing --test whitebox - name: Run perf + if: matrix.profile == 'release' run: cargo run --release --example perf - name: Run crasher + if: matrix.profile == 'release' run: cargo test --release --test crasher -- --nocapture test-windows: @@ -126,6 +128,5 @@ jobs: cargo run --example typed cargo test --features whitebox-testing --test whitebox - name: Run perf + if: matrix.profile == 'release' run: cargo run --release --example perf - - name: Run crasher - run: cargo test --release --test crasher -- --nocapture