diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..2af6678
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,132 @@
+name: CI
+
+on:
+ push:
+ pull_request:
+ workflow_dispatch:
+
+permissions:
+ contents: read
+
+env:
+ CARGO_TERM_COLOR: always
+ RUST_BACKTRACE: "1"
+ RUSTFLAGS: "-D warnings"
+
+jobs:
+ fmt:
+ name: Rustfmt
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v6
+ - name: Print runner info
+ run: |
+ echo "cores: $(nproc)"
+ echo "cpu model: $(lscpu | sed -n 's/^Model name:[[:space:]]*//p' | head -n 1)"
+ echo "ram: $(free -h | awk '/Mem:/ {print $2}')"
+ - run: rustup component add rustfmt
+ - run: cargo fmt -- --check
+
+ clippy:
+ name: Clippy
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v6
+ - name: Print runner info
+ run: |
+ echo "cores: $(nproc)"
+ echo "cpu model: $(lscpu | sed -n 's/^Model name:[[:space:]]*//p' | head -n 1)"
+ echo "ram: $(free -h | awk '/Mem:/ {print $2}')"
+ - uses: actions/cache@v5
+ with:
+ path: |
+ ~/.cargo/registry
+ ~/.cargo/git
+ target
+ key: clippy-${{ runner.os }}-${{ hashFiles('**/Cargo.lock') }}
+ - run: rustup component add clippy
+ - run: cargo clippy --all-targets -- -D warnings
+
+ test-ubuntu:
+ name: Test (Ubuntu, ${{ matrix.profile }})
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ profile: [debug, release]
+ steps:
+ - uses: actions/checkout@v6
+ - name: Print runner info
+ run: |
+ echo "cores: $(nproc)"
+ echo "cpu model: $(lscpu | sed -n 's/^Model name:[[:space:]]*//p' | head -n 1)"
+ echo "ram: $(free -h | awk '/Mem:/ {print $2}')"
+ - uses: actions/cache@v5
+ with:
+ path: |
+ ~/.cargo/registry
+ ~/.cargo/git
+ target
+ key: test-ubuntu-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+ - name: Run tests
+ run: |
+ if [ "${{ matrix.profile }}" = "release" ]; then
+ cargo test --release -- --skip test_crash_recovery --test-threads=1
+ else
+ cargo test -- --skip test_crash_recovery --test-threads=1
+ fi
+ - name: Run examples
+ run: |
+ cargo run --example simple
+ cargo run --example multithreaded
+ cargo run --example atomics
+ cargo run --example lists
+ cargo run --example typed
+ cargo test --features whitebox-testing --test whitebox
+ - name: Run perf
+ if: matrix.profile == 'release'
+ run: cargo run --release --example perf
+ - name: Run crasher
+ if: matrix.profile == 'release'
+ run: cargo test --release --test crasher -- --nocapture
+
+ test-windows:
+ name: Test (Windows, ${{ matrix.profile }})
+ runs-on: windows-latest
+ strategy:
+ matrix:
+ profile: [debug, release]
+ steps:
+ - uses: actions/checkout@v6
+ - name: Print runner info
+ shell: pwsh
+ run: |
+ $cpu = Get-CimInstance Win32_Processor | Select-Object -First 1
+ $computer = Get-CimInstance Win32_ComputerSystem
+ Write-Host "cores: $($cpu.NumberOfCores)"
+ Write-Host "cpu model: $($cpu.Name.Trim())"
+ Write-Host "ram: $([math]::Round($computer.TotalPhysicalMemory / 1GB, 2)) GB"
+ - uses: actions/cache@v5
+ with:
+ path: |
+ ~/.cargo/registry
+ ~/.cargo/git
+ target
+ key: test-windows-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+ - name: Run tests
+ run: |
+ if ("${{ matrix.profile }}" -eq "release") {
+ cargo test --release -- --skip test_crash_recovery --test-threads=1
+ } else {
+ cargo test -- --skip test_crash_recovery --test-threads=1
+ }
+ - name: Run examples
+ run: |
+ cargo run --example simple
+ cargo run --example multithreaded
+ cargo run --example atomics
+ cargo run --example lists
+ cargo run --example typed
+ cargo test --features whitebox-testing --test whitebox
+ - name: Run perf
+ if: matrix.profile == 'release'
+ run: cargo run --release --example perf
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
deleted file mode 100644
index 7e98560..0000000
--- a/.github/workflows/ubuntu.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-name: Linux
-
-on:
- push:
- branches: [ "main" ]
- pull_request:
- branches: [ "main" ]
-
-env:
- CARGO_TERM_COLOR: always
-
-jobs:
- build:
-
- runs-on: ubuntu-latest
-
- steps:
- - uses: actions/checkout@v4
- - name: Build
- run: cargo build
- - name: Run tests
- run: cargo test --release -- --nocapture
- - name: Run simple example
- run: cargo run --example simple
- - name: Run multithreaded example
- run: cargo run --example multithreaded
- - name: Run lists example
- run: cargo run --example lists
- - name: Run typed example
- run: cargo run --example typed
- - name: Run perftest
- run: cd candy-perf; cargo run --release
- - name: Run crasher
- run: cd candy-crasher; cargo run --release
- - name: Run longliving
- run: cd candy-longliving; cargo run --release -- 10 40001 10000
- - name: Run mini-candy
- run: cd mini-candy; cargo run
- - name: Run test-list-collisions
- run: cargo test -F whitebox_testing --test test_list_collisions -- --nocapture
- - name: Run test-flush-agg
- run: cargo test -F flush_aggregation --test test_flush_agg -- --nocapture
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
deleted file mode 100644
index 7e079e1..0000000
--- a/.github/workflows/windows.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: Windows
-
-on:
- push:
- branches: [ "main" ]
- pull_request:
- branches: [ "main" ]
-
-env:
- CARGO_TERM_COLOR: always
-
-jobs:
- build:
-
- runs-on: windows-latest
-
- steps:
- - uses: actions/checkout@v4
- - name: Build
- run: cargo build
- - name: Run simple example
- run: cargo run --example simple
diff --git a/.gitignore b/.gitignore
index 95d8dd0..ea8c4bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,18 +1 @@
-# Generated by Cargo
-# will have compiled files and executables
-debug/
-target/
-dbdir/
-
-# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
-# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
-#Cargo.lock
-
-# These are backup files generated by rustfmt
-**/*.rs.bk
-
-# MSVC Windows builds of rustc generate these, which store debugging information
-*.pdb
-
-# Jetbrains files
-.idea
+/target
diff --git a/Cargo.lock b/Cargo.lock
index b62552e..84e8834 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,81 +4,79 @@ version = 4
[[package]]
name = "anyhow"
-version = "1.0.100"
+version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
-name = "bitflags"
-version = "2.10.0"
+name = "atomic-polyfill"
+version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
+checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4"
+dependencies = [
+ "critical-section",
+]
[[package]]
-name = "bumpalo"
-version = "3.19.0"
+name = "autocfg"
+version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
-name = "bytemuck"
-version = "1.24.0"
+name = "bit-set"
+version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4"
+checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
dependencies = [
- "bytemuck_derive",
+ "bit-vec",
]
[[package]]
-name = "bytemuck_derive"
-version = "1.10.2"
+name = "bit-vec"
+version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
[[package]]
-name = "candy-crasher"
-version = "0.1.0"
-dependencies = [
- "candystore",
- "libc",
- "rand 0.8.5",
-]
+name = "bitflags"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
[[package]]
-name = "candy-longliving"
-version = "0.1.0"
-dependencies = [
- "candystore",
-]
+name = "bumpalo"
+version = "3.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
[[package]]
-name = "candy-perf"
-version = "0.1.0"
-dependencies = [
- "candystore",
-]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "candystore"
-version = "0.5.6"
+version = "1.0.0"
dependencies = [
- "anyhow",
- "bytemuck",
- "crossbeam-channel",
- "databuf",
+ "crc16-ibm3740-fast",
"fslock",
"libc",
- "memmap",
+ "memmap2",
+ "num_cpus",
"parking_lot",
- "rand 0.9.2",
+ "postcard",
+ "proptest",
+ "rand 0.10.0",
+ "serde",
"simd-itertools",
"siphasher",
+ "smallvec",
+ "tempfile",
+ "thiserror",
"uuid",
+ "zerocopy",
]
[[package]]
@@ -88,48 +86,108 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
-name = "crossbeam-channel"
-version = "0.5.15"
+name = "chacha20"
+version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
+checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601"
dependencies = [
- "crossbeam-utils",
+ "cfg-if",
+ "cpufeatures",
+ "rand_core 0.10.0",
]
[[package]]
-name = "crossbeam-utils"
-version = "0.8.21"
+name = "cobs"
+version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1"
+dependencies = [
+ "thiserror",
+]
[[package]]
-name = "databuf"
-version = "0.5.0"
+name = "core_detect"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f8f80099a98041a3d1622845c271458a2d73e688351bf3cb999266764b81d48"
+
+[[package]]
+name = "cpufeatures"
+version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e1ad1d99bee317a8dac0b7cd86896c5a5f24307009292985dabbf3e412c8b9d"
+checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201"
dependencies = [
- "databuf-derive",
+ "libc",
]
[[package]]
-name = "databuf-derive"
+name = "crc-fast-gen"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04040c9fc8fcb4084222a26c99faf5b3014772a6115e076b7a50fe49bf25d0ea"
+checksum = "8d4e7ca1a78a554d1675e8489415c76c5fd804686a7b6902ed8ce55ab498364d"
+
+[[package]]
+name = "crc16-ibm3740-fast"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78bd5030bcadf3aa65886c31c60bb36fab8db9eae235ff081acb64ea962aa5d6"
dependencies = [
- "databuf_derive_impl",
+ "core_detect",
+ "crc-fast-gen",
]
[[package]]
-name = "databuf_derive_impl"
-version = "0.2.3"
+name = "critical-section"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b"
+
+[[package]]
+name = "embedded-io"
+version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "daf656eb071fe87d23716f933788a35a8ad6baa6fdbf66a67a261dbd3f9dc81a"
+checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced"
+
+[[package]]
+name = "embedded-io"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
- "quote2",
- "syn",
+ "libc",
+ "windows-sys",
]
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
[[package]]
name = "fslock"
version = "0.2.1"
@@ -142,42 +200,131 @@ dependencies = [
[[package]]
name = "getrandom"
-version = "0.2.16"
+version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
dependencies = [
"cfg-if",
"libc",
- "wasi",
+ "r-efi 5.3.0",
+ "wasip2",
]
[[package]]
name = "getrandom"
-version = "0.3.4"
+version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
dependencies = [
"cfg-if",
"libc",
- "r-efi",
+ "r-efi 6.0.0",
+ "rand_core 0.10.0",
"wasip2",
+ "wasip3",
+]
+
+[[package]]
+name = "hash32"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67"
+dependencies = [
+ "byteorder",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "foldhash",
]
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "heapless"
+version = "0.7.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f"
+dependencies = [
+ "atomic-polyfill",
+ "hash32",
+ "rustc_version",
+ "serde",
+ "spin",
+ "stable_deref_trait",
+]
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
+[[package]]
+name = "id-arena"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
+
+[[package]]
+name = "indexmap"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.16.1",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
+
[[package]]
name = "js-sys"
-version = "0.3.82"
+version = "0.3.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65"
+checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
dependencies = [
"once_cell",
"wasm-bindgen",
]
+[[package]]
+name = "leb128fmt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
+
[[package]]
name = "libc"
-version = "0.2.177"
+version = "0.2.183"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
[[package]]
name = "lock_api"
@@ -189,21 +336,24 @@ dependencies = [
]
[[package]]
-name = "memmap"
-version = "0.7.0"
+name = "log"
+version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
-dependencies = [
- "libc",
- "winapi",
-]
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
-name = "mini-candy"
-version = "0.1.0"
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "memmap2"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
dependencies = [
- "memmap",
- "siphasher",
+ "libc",
]
[[package]]
@@ -228,11 +378,30 @@ dependencies = [
"target-features",
]
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
[[package]]
name = "once_cell"
-version = "1.21.3"
+version = "1.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
[[package]]
name = "parking_lot"
@@ -257,6 +426,19 @@ dependencies = [
"windows-link",
]
+[[package]]
+name = "postcard"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24"
+dependencies = [
+ "cobs",
+ "embedded-io 0.4.0",
+ "embedded-io 0.6.1",
+ "heapless",
+ "serde",
+]
+
[[package]]
name = "ppv-lite86"
version = "0.2.21"
@@ -267,39 +449,57 @@ dependencies = [
]
[[package]]
-name = "proc-macro2"
-version = "1.0.103"
+name = "prettyplease"
+version = "0.2.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
dependencies = [
- "unicode-ident",
+ "proc-macro2",
+ "syn",
]
[[package]]
-name = "quote"
-version = "1.0.42"
+name = "proc-macro2"
+version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
- "proc-macro2",
+ "unicode-ident",
]
[[package]]
-name = "quote2"
-version = "0.7.0"
+name = "proptest"
+version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "970573b86f7e5795c8c6c50c56ef602368593f0687188da27fd489a59e253630"
+checksum = "37566cb3fdacef14c0737f9546df7cfeadbfbc9fef10991038bf5015d0c80532"
dependencies = [
- "proc-macro2",
- "quote",
- "quote2-macros",
+ "bit-set",
+ "bit-vec",
+ "bitflags",
+ "num-traits",
+ "rand 0.9.2",
+ "rand_chacha",
+ "rand_xorshift",
+ "regex-syntax",
+ "rusty-fork",
+ "tempfile",
+ "unarray",
]
[[package]]
-name = "quote2-macros"
-version = "0.7.0"
+name = "quick-error"
+version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f4b89c37b2d870a28629ad20da669bb0e7d7214878d0d5111b304aa466e1977"
+checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
+
+[[package]]
+name = "quote"
+version = "1.0.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
+dependencies = [
+ "proc-macro2",
+]
[[package]]
name = "r-efi"
@@ -308,15 +508,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
-name = "rand"
-version = "0.8.5"
+name = "r-efi"
+version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
-dependencies = [
- "libc",
- "rand_chacha 0.3.1",
- "rand_core 0.6.4",
-]
+checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
[[package]]
name = "rand"
@@ -324,18 +519,19 @@ version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
- "rand_chacha 0.9.0",
- "rand_core 0.9.3",
+ "rand_chacha",
+ "rand_core 0.9.5",
]
[[package]]
-name = "rand_chacha"
-version = "0.3.1"
+name = "rand"
+version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8"
dependencies = [
- "ppv-lite86",
- "rand_core 0.6.4",
+ "chacha20",
+ "getrandom 0.4.2",
+ "rand_core 0.10.0",
]
[[package]]
@@ -345,25 +541,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
- "rand_core 0.9.3",
+ "rand_core 0.9.5",
]
[[package]]
name = "rand_core"
-version = "0.6.4"
+version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
dependencies = [
- "getrandom 0.2.16",
+ "getrandom 0.3.4",
]
[[package]]
name = "rand_core"
-version = "0.9.3"
+version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
+checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba"
+
+[[package]]
+name = "rand_xorshift"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a"
dependencies = [
- "getrandom 0.3.4",
+ "rand_core 0.9.5",
]
[[package]]
@@ -375,12 +577,52 @@ dependencies = [
"bitflags",
]
+[[package]]
+name = "regex-syntax"
+version = "0.8.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
+
+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
+[[package]]
+name = "rustix"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys",
+]
+
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+[[package]]
+name = "rusty-fork"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2"
+dependencies = [
+ "fnv",
+ "quick-error",
+ "tempfile",
+ "wait-timeout",
+]
+
[[package]]
name = "scopeguard"
version = "1.2.0"
@@ -388,26 +630,68 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
-name = "simd-itertools"
-version = "0.3.0"
+name = "semver"
+version = "1.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a037ed5ba0cb7102a5b720453b642c5b2cf39960edd2ceace91af8ec3743082a"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
- "multiversion",
+ "serde_core",
+ "serde_derive",
]
[[package]]
-name = "simulator"
-version = "0.1.0"
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
- "rand 0.8.5",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "simd-itertools"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a037ed5ba0cb7102a5b720453b642c5b2cf39960edd2ceace91af8ec3743082a"
+dependencies = [
+ "multiversion",
]
[[package]]
name = "siphasher"
-version = "1.0.1"
+version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
+checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
[[package]]
name = "smallvec"
@@ -415,11 +699,26 @@ version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+[[package]]
+name = "spin"
+version = "0.9.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
+dependencies = [
+ "lock_api",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+
[[package]]
name = "syn"
-version = "2.0.111"
+version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
@@ -432,42 +731,99 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5"
+[[package]]
+name = "tempfile"
+version = "3.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
+dependencies = [
+ "fastrand",
+ "getrandom 0.4.2",
+ "once_cell",
+ "rustix",
+ "windows-sys",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "unarray"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
+
[[package]]
name = "unicode-ident"
-version = "1.0.22"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
[[package]]
name = "uuid"
-version = "1.18.1"
+version = "1.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
+checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
-name = "wasi"
-version = "0.11.1+wasi-snapshot-preview1"
+name = "wait-timeout"
+version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
+dependencies = [
+ "libc",
+]
[[package]]
name = "wasip2"
-version = "1.0.1+wasi-0.2.4"
+version = "1.0.2+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasip3"
+version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
+checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
dependencies = [
"wit-bindgen",
]
[[package]]
name = "wasm-bindgen"
-version = "0.2.105"
+version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60"
+checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
dependencies = [
"cfg-if",
"once_cell",
@@ -478,9 +834,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
-version = "0.2.105"
+version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2"
+checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@@ -488,9 +844,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
-version = "0.2.105"
+version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc"
+checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
dependencies = [
"bumpalo",
"proc-macro2",
@@ -501,13 +857,47 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
-version = "0.2.105"
+version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76"
+checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
dependencies = [
"unicode-ident",
]
+[[package]]
+name = "wasm-encoder"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
+dependencies = [
+ "leb128fmt",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasm-metadata"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
+dependencies = [
+ "anyhow",
+ "indexmap",
+ "wasm-encoder",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasmparser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
+dependencies = [
+ "bitflags",
+ "hashbrown 0.15.5",
+ "indexmap",
+ "semver",
+]
+
[[package]]
name = "winapi"
version = "0.3.9"
@@ -536,28 +926,125 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
[[package]]
name = "wit-bindgen"
-version = "0.46.0"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+dependencies = [
+ "wit-bindgen-rust-macro",
+]
+
+[[package]]
+name = "wit-bindgen-core"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
+dependencies = [
+ "anyhow",
+ "heck",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-bindgen-rust"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
+dependencies = [
+ "anyhow",
+ "heck",
+ "indexmap",
+ "prettyplease",
+ "syn",
+ "wasm-metadata",
+ "wit-bindgen-core",
+ "wit-component",
+]
+
+[[package]]
+name = "wit-bindgen-rust-macro"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
+dependencies = [
+ "anyhow",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wit-bindgen-core",
+ "wit-bindgen-rust",
+]
+
+[[package]]
+name = "wit-component"
+version = "0.244.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
+checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
+dependencies = [
+ "anyhow",
+ "bitflags",
+ "indexmap",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "wasm-encoder",
+ "wasm-metadata",
+ "wasmparser",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-parser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
+dependencies = [
+ "anyhow",
+ "id-arena",
+ "indexmap",
+ "log",
+ "semver",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "unicode-xid",
+ "wasmparser",
+]
[[package]]
name = "zerocopy"
-version = "0.8.30"
+version = "0.8.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ea879c944afe8a2b25fef16bb4ba234f47c694565e97383b36f3a878219065c"
+checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
-version = "0.8.30"
+version = "0.8.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf955aa904d6040f70dc8e9384444cb1030aed272ba3cb09bbc4ab9e7c1f34f5"
+checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/Cargo.toml b/Cargo.toml
index 15241b8..7c6cc01 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,29 +1,32 @@
[package]
name = "candystore"
-version = "0.5.6"
-edition = "2021"
+version = "1.0.0"
+edition = "2024"
license = "Apache-2.0"
keywords = ["key-value", "database", "persistent", "store", "rocksdb"]
-description = "A lean, efficient and fast peristent in-process key-value store"
+description = "A lean, efficient and fast persistent in-process key-value store"
repository = "https://github.com/sweet-security/candystore"
[dependencies]
-bytemuck = { version = "1.24.0", features = ["derive"] }
-databuf = "0.5.0"
-memmap = "0.7.0"
-siphasher = "1.0.1"
-anyhow = "1.0.100"
-parking_lot = "0.12.5"
-uuid = "1.10.0"
-rand = "0.9.2"
+crc16-ibm3740-fast = "0.5.0"
fslock = "0.2.1"
-libc = "0.2.158"
-crossbeam-channel = "0.5.15"
+libc = "0.2.183"
+memmap2 = "0.9.10"
+num_cpus = "1.17.0"
+parking_lot = "0.12.5"
+postcard = { version = "1.1.3", features = ["use-std"] }
+serde = { version = "1", features = ["derive"] }
simd-itertools = "0.3.0"
+siphasher = "1.0.2"
+smallvec = { version = "1.15.1", features = ["write"] }
+thiserror = "2.0.18"
+uuid = "1.18.1"
+zerocopy = { version = "0.8.47", features = ["derive"] }
-[features]
-whitebox_testing = []
-flush_aggregation = []
+[dev-dependencies]
+proptest = "1.10.0"
+tempfile = "3"
+rand = "0.10.0"
-[workspace]
-members = ["simulator", "candy-crasher", "candy-longliving", "candy-perf", "mini-candy"]
+[features]
+whitebox-testing = []
diff --git a/README.md b/README.md
index 4d7c365..70d9400 100644
--- a/README.md
+++ b/README.md
@@ -1,150 +1,181 @@
-
-
-
v0.5.6 fixes some potential concurrency issues and panics on missing shards after a crash
-
🪟 v0.5.5 now supports Windows (experimental)! 🎉
-
+
+
+> [!NOTE]
+> 😸 v1.0 brings true crash-consistency, improved compaction and an overall simpler design.
+> v1.0 marks the data-file format as stable.
+>
+> The append-only data files are the compatibility boundary. The index format
+> may still evolve, and when the data-file format is recognized but the index
+> format is outdated, Candy recreates the index on open by default.
+>
+> Pre-v1.0 stores are not covered by this compatibility promise.
# CandyStore
-A pure rust implementation of a fast (*blazingly* :tm:, of course), persistent, in-process key-value store, that relies
-on a novel sharding algorithm. Just how blazingly? It's over 9000!
+
+A pure Rust implementation of a fast (*blazingly* :tm:, of course), persistent, in-process
+key-value store that relies on a hash-based sharding algorithm. All operations — lookup,
+insert, and removal — are O(1).
| Operation | Time* |
|-----------|--------|
| Lookup | < 1us |
-| Insert | < 2us |
-| Removal | < 1us |
+| Insert | < 1us |
+| Update | < 2us |
+| Removal | < 2us |
+
+On my laptop (32 core AMD RYZEN AI MAX+ 395 with 64GB RAM, running Ubuntu 25.10 kernel `6.17.0-19-generic`) I'm getting
+
+```bash
+$ cargo run --release --example perf
+
+Testing key-value using 1 threads, each with 1000000 items (key size: 16, value size: 16)
+ Inserts: 0.514698 us/op
+ Updates: 0.608783 us/op
+ Positive Lookups: 0.308571 us/op
+ Negative Lookups: 0.047365 us/op
+ Iter all: 0.360074 us/op
+ Removes: 0.605519 us/op
+```
-The algorithm can be thought of as a "zero-overhead" extension to a hash table stored over files,
-as it's designed to minimizes IO operations. See [the benchmark](candy-perf/README.md) and
-[how to interpret the results*](#how-to-interpret-the-performance-results).
+See [how to interpret the results\*](#how-to-interpret-the-performance-results).
-## Overview
-Being a hash-table, the key is hashed, producing a 64 bit number. The 16 most significant bits select
-the *shard*, followed by 16 bits selecting the *row* in the shard, and the remaining 32 bits serve as an
-opaque signature. The signature is matched against the signature array within the selected row.
-The row also stores the file offset of the entry, which is used to retrive the entry's key and value.
+## APIs
-
+Candy offers
+* A simple key-value API (`get`, `set`, `remove` and atomic operations like `replace`)
+* A typed interface on top (`get`, `set`, etc.)
+* Double-ended queues (`push_to_queue_tail`, `pop_queue_head`, etc.) as well as a typed interface on top of them
+* Lists (`get_from_list`, `set_in_list`, etc.) as well as a typed interface on top of them
+* The DB is completely thread-safe in idiomatic Rust (just `Arc<>` it)
-Each shard is mapped to a shard file, and a shard file can cover a wide range of consecutive shards.
-We begin with a single shard file covering the whole shard span of `[0-65536]`.
+```rust
+use candystore::{CandyStore, Config, Result};
-When a shard file gets too big, or when one of its rows gets full, it undergoes a *split*.
-This operation takes all entries and splits them into a bottom half and a top half (of roughly
-equal sizes). For instance, if the file covered shards `[0-65536)`, after the split we have two files,
-one covering `[0-32768)` and the other covering `[32768-65536)`. This process repeats as needed,
-and essentially builds a tree of shard files. Each file is split independently, and the amount of work
-is constant (unlike LSM trees).
+fn main() -> Result<()> {
+ let db = CandyStore::open("/path/to/db", Config::default())?;
-```
- [0-65536)
- / \
- / \
- [0-32768) [32768-65536)
- / \
- / \
- [0-16384) [16384-32768)
+ db.set("hello", "world")?;
+ let val = db.get("hello")?;
+ assert_eq!(val, Some(b"world".to_vec()));
+ db.remove("hello")?;
+
+ db.set_in_list("cities", "Barcelona", "Spain")?;
+ db.set_in_list("cities", "Chicago", "USA")?;
+ db.set_in_list("cities", "Caracas", "Venezuela")?;
+
+ let cities: Vec = db.iter_list("cities")
+ .map(|res| String::from_utf8(res.unwrap().0).unwrap())
+ .collect();
+
+ assert_eq!(cities, vec!["Barcelona", "Chicago", "Caracas"]);
+
+ Ok(())
+}
```
-The shard file's header (the rows, signatures and file offsets) are kept in an `mmap`, and the rest
-of the file's data is accessed using `pread` and `pwrite`. The file is only ever extended (until either
-a split or *compaction* takes place), so the algorithm is *crash safe*, in the sense that it will always
-return some valid version of a key-value pair, although it might lose unflushed data.
+## Algorithm
-The library puts its faith in the kernel's page cache, and assumes the `mmap` and writes are flushed to
-disk every so often. This allows us to forgo a journal or write-ahead log (WAL).
+The algorithm can be thought of as a "zero-overhead extension" to a hash table stored over
+files, designed to minimize IO operations. It does not employ a WAL or a journal, and instead
+uses append-only files that serve both as a source of truth and as the final data structure.
+Unlike LSM-based stores that need to maintain large SSTables in memory, sort them and later
+merge them, Candy uses a small mmap'ed index that points to on-disk data directly.
-The default parameters (chosen by simulations) are of shards with 64 rows, each with 512 entries. The chances
-of collisions with these parameters are minimal, and they allow for ~90% utilization of the shard, while
-requiring relatively small header tables (32K entries, taking up 384KB). With the expected 90% utilization,
-you should be expect to hold 29.5K keys per shard. For a shard file of 64MB, that's 0.6% overhead.
+
-Because the data structure is a hash table rather than a search tree, insertion, lookup and removal are
-all O(1) operations.
+The core of the algorithm is the concept of *hash coordinates*: breaking up a 64-bit hash
+into two 32-bit values, a *row selector* and a *signature*, which can be thought of
+as coordinates into the rows table. First, the row selector is used to locate the relevant
+row in the table, and the signature locates the column. To find the row, we take the row
+selector's bits and mask them with a *split level mask*, essentially, the number of rows
+in the table.
-The concept can be extended to a distributed database, by adding a layer of master-shards that select a
-server, followed by the normal sharding mechanism described above.
+To locate the signature (32 bits) within the row, we employ a parallel lookup (using SIMD)
+to find the matching column(s). Then we fetch the corresponding pointers for the matched
+columns, from which we extract another 18 bits of entropy. If both match, we fetch the entry
+from the relevant file (the pointer stores a file index and a file offset).
-## Example
-```rust
-use candystore::{CandyStore, Config, Result};
+Note: the chances of a collision (meaning we fetch a wrong entry from the file) are
+virtually zero, about 1 in 20 billion according to the birthday paradox (a collision in 336
+uniformly-distributed 50-bits numbers).
-fn main() -> Result<()> {
- let db = CandyStore::open("/tmp/candy-dir", Config::default())?;
+Candy supports up to 4096 files, each up to 1GB in size (a span of 4TB). In terms of key-space,
+Candy allows 2^21 rows, each with 336 keys, so a total of 704M keys. The maximum size of a key
+is 16KB and the maximum size of a value is 64KB. Of course these are theoretical limits,
+it would be wise to halve them in practice due to imbalances.
- // simple API
+### Splitting
- db.set("mykey", "myval")?;
- assert_eq!(db.get("mykey")?, Some("myval".into()));
+What happens when a row reaches its limit of 336 keys? We need to split it, of course.
+To do that, we increase the row's split level by one, which means we take an extra bit
+into account when selecting the row. For example, row 2 (0b010) will be split into
+rows 2 (0b0010) and 10 (0b1010). Because the bits are uniformly distributed, we expect about
+half of the entries to move from row 2 to row 10.
- assert_eq!(db.get("yourkey")?, None);
+
- assert_eq!(db.iter().count(), 1);
+Note that splitting may require increasing the global split level (the size of the table)
+which will incur doubling the mmap's size. This may sound like a costly operation, but since
+it's file-backed it's mostly only page-table work, and it's amortized. And since we only
+split a single row -- we do not need to rehash the whole table -- the amount of work we
+do is O(1).
- for res in db.iter() {
- let (k, v) = res?;
- assert_eq!(k, Vec::::from("mykey"));
- assert_eq!(v, Vec::::from("myval"));
- }
+Another optimization is that the pointer contains 18 bits of the row selector, which means
+we do not need to read and recompute the hash coordinates of the keys. Splitting is thus
+memory-bound.
- assert_eq!(db.iter().count(), 1);
+### Compaction
- // lists API
+Data is always written (appended) to the *active data file*. When a file reaches a certain size,
+Candy rotates the active files and creates a new one. The old file becomes an *immutable data
+file*.
- db.set_in_list("mylist", "key1", "123")?;
- db.set_in_list("mylist", "key2", "456")?;
- assert_eq!(db.get_from_list("mylist", "key1")?, Some("123".into()));
+As data is created, updated and removed, the store accumulates waste. To handle it we have
+*background compaction*: a thread that iterates over the rows table, finds all entries that
+belong to files that should be compacted and rewrites them to the active file. After such
+a pass, it simply deletes the old immutable file since no entry points to it.
- assert_eq!(db.iter_list("mylist").count(), 2);
+You can configure the throughput (bytes per second) of compaction.
- for res in db.iter_list("mylist") {
- let (k, v) = res?;
- println!("{k:?} => {v:?}");
- }
- Ok(())
-}
-```
+### Checkpointing & Rebuild
+
+We trust the operating system to flush the data files and mmap'ed rows table to storage,
+which means that even if your process crashes, your data will be fully consistent. However,
+this is not true on a power failure or a kernel panic — in which case the state of the
+index file is unknown relative to the data files.
+
+To handle this gracefully, Candy employs **background checkpointing**. Instead of synchronously `fsync`ing index and data files on every write (which would block the writer), a background worker asynchronously persists a consistent snapshot of the current state at user-defined intervals or after a configured amount of bytes have been written.
+
+On an unexpected crash or an unclean shutdown, Candy features an efficient rebuild mechanism. It resumes from the latest successful checkpoint and rapidly replays only the recent mutating operations, restoring the full, robust state from the append-only data files.
+
+Starting with v1.0, those append-only data files are also the on-disk compatibility contract. By default (`Config::port_to_current_format = true`), Candy uses that same rebuild path when it encounters an outdated index-file version alongside data files whose format is still recognized. In that case it recreates only the `index` and `rows` files and rebuilds them from the append-only data files.
+
+This does not make arbitrary older releases compatible. The v1.0 compatibility promise applies to stores written with the stable v1.x data-file format; if the data-file format itself is not recognized, open still fails.
## Design Goals
-* Fast and efficient, with a very low memory footprint (~0.6% overhead)
-* No heavy/unbounded merges
-* No Write-Ahead Log (WAL) or journalling of any kind
-* Process crash safe: you may lose the latest operations, but never be in an inconsistent state
- if the process crashes. However, if the machine itself crashes, the data on disk may be in an
- inconsistent state.
-* Splitting/compaction happens per-shard, so there's no global locking
-* Suitable for both write-heavy and read-heavy workloads
-* Concurrent by design (multiple threads getting/setting/removing keys at the same time)
-* The backing store is taken to be an SSD, thus it's not optimized for HDDs
-
-## Notes
-* The file format is not yet stable
-* Uses very little `unsafe` (required due to `mmap`)
-
-## Roadmap
-* Distributed protocol based on file locks (meant to run on a shared network folder)
-* Add generations as an adapter on top, so that older generations are compacted into exponentially larger
- time spans. It's an alternative to TTL, and amortizes the number of times an entry will move around as the
- dataset grows.
-* Maybe add Arithmethic coding/Huffman coding as a cheap compression for the keys and values
-## How to Interpret the Performance Results
-While the numbers above are incredible, it is obvious that any file-backed store will be limited by the
-filesystem's latency and bandwidth. For example, you can expect a read latency of 20-100us from SSDs (NVMe),
-so that's the lower bound on reading a random location in the file.
+Unlike many key-value stores, Candy serves the purpose of *reducing* the memory footprint of your
+process, e.g., offloading data to the disk instead of keeping it in-memory. It intentionally does not
+include any caching/LRU layer like many traditional KVs/DBs.
+
+Example use cases for Candy are
+* A hash table that needs to hold more keys than you can hold in memory
+* Persistent work queues (e.g., producers append large work items to a queue and consumers then fetch
+ and perform them)
+* A caching layer for your application logic
-What the numbers above measure is the performance of the *algorithm*, not the *storage*: given you can spare an
-overhead of 0.6% mapped into memory, lookup/insert/removal require a single disk IO. Replacing (updating) an
-existing element requires two IOs, since it needs to compare the key before writing it anew.
-These IOs may return from the kernel's page cache, in which case it's practically immediate, or from disk,
-in which case you can expect it to take 1-2 round-trip times of your device.
+It is designed to be durable for process crashes (where the kernel will flush everything properly)
+but it does not attempt to optimize for durability under kernel panics (full rebuild).
-Inserting to/removing from a lists require 2-3 IOs, since these operations need to update the list's
-head or tail, as well as a "chain" element. Such operations should really be done with a "large enough page cache".
-Updating/fetching an existing element element in a list is a single IO as above.
+## How to Interpret the Performance Results
-If your memory is too constrainted for keeping the lookup tables mapped-in (i.e., they get evicted to disk),
-you'll incur one more unit of "IO latency" for fetching the row from the table. Since the row spans 2KB (and
-aligned to 4KB), it should behave nicely with 4K IOs.
+While the numbers shown above are incredible, it is obvious that any persistent store will be
+limited by the underlying latency and bandwidth of the storage. For example, you can expect a
+read latency of 20-100us from SSDs (NVMe), so that's the lower bound on reading a random
+location in the file.
-See also [this guide to LTO/PGO](https://github.com/sweet-security/candystore/issues/7) by Alexander Zaitsev.
+What the numbers above measure is the performance of the *algorithm*, not the *storage layer*:
+given the index can be kept mapped into memory (12 bytes per item), lookup and insert require
+a single disk IO, while updating or removing requires two IOs. These IOs may be served
+from the kernel's page cache, in which case you only pay for the syscall's latency, or
+from disk, in which case you can expect it to take 1-2 round-trip times of your device.
diff --git a/algo.png b/algo.png
new file mode 100644
index 0000000..c374baa
Binary files /dev/null and b/algo.png differ
diff --git a/candy-crasher/.gitignore b/candy-crasher/.gitignore
deleted file mode 100644
index 9a59ec5..0000000
--- a/candy-crasher/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-dbdir
diff --git a/candy-crasher/Cargo.toml b/candy-crasher/Cargo.toml
deleted file mode 100644
index 71cb49b..0000000
--- a/candy-crasher/Cargo.toml
+++ /dev/null
@@ -1,9 +0,0 @@
-[package]
-name = "candy-crasher"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-libc = "0.2.155"
-rand = "0.8.5"
-candystore={path=".."}
diff --git a/candy-crasher/README.md b/candy-crasher/README.md
deleted file mode 100644
index 72d62c4..0000000
--- a/candy-crasher/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-## VickiStore Crasher
-
-Fork a child process to insert 1M keys into the DB, while the parent kills it repeatedly. The test
-makes sure the child is able to make progress as well as making sure the DB remains consistent.
-
-Note: the store is not meant to be used by multiple processes concurrently -- it uses thread syncrhonization,
-not inter-process synchronization. The test uses the store only from a single process at a time.
-
-
-```
-$ cargo run
-child starting at 0
-[0] killing child
-child starting at 20445
-[1] killing child
-child starting at 31656
-[2] killing child
-child starting at 55500
-.
-.
-.
-child starting at 978418
-[219] killing child
-child starting at 982138
-[220] killing child
-child starting at 991255
-child finished
-child finished in 221 iterations
-Parent starts validating the DB...
-DB validated successfully
-```
diff --git a/candy-longliving/.gitignore b/candy-longliving/.gitignore
deleted file mode 100644
index ca63e2e..0000000
--- a/candy-longliving/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-dbdir
-
diff --git a/candy-longliving/Cargo.toml b/candy-longliving/Cargo.toml
deleted file mode 100644
index 347d19d..0000000
--- a/candy-longliving/Cargo.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[package]
-name = "candy-longliving"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-candystore={path=".."}
diff --git a/candy-longliving/src/main.rs b/candy-longliving/src/main.rs
deleted file mode 100644
index 4527053..0000000
--- a/candy-longliving/src/main.rs
+++ /dev/null
@@ -1,69 +0,0 @@
-use std::{
- sync::{atomic::AtomicU64, Arc},
- time::Instant,
-};
-
-use candystore::{CandyStore, CandyTypedList, Config, Result};
-
-fn main() -> Result<()> {
- let args = std::env::args().collect::>();
- assert!(
- args.len() == 4,
- "usage: {} ",
- args[0]
- );
- let num_threads: usize = args[1].parse().expect("num_threads not a number");
- let num_iters: usize = args[2].parse().expect("num_iters not a number");
- let tail_length: usize = args[3].parse().expect("tail_length not a number");
-
- let db = Arc::new(CandyStore::open(
- "dbdir",
- Config {
- min_compaction_threashold: 1024 * 1024,
- ..Default::default()
- },
- )?);
- db.clear()?;
-
- let mut handles = vec![];
-
- let ops = Arc::new(AtomicU64::new(0));
-
- for thd in 0..num_threads {
- let db = db.clone();
- let ops = ops.clone();
- let h = std::thread::spawn(move || {
- println!("started thread {thd}");
- let typed = CandyTypedList::::new(db.clone());
- let listname = format!("mylist"); //format!("mylist{thd}");
- let mut t0 = Instant::now();
- for i in 0..num_iters {
- if i % 10000 == 0 {
- let t1 = Instant::now();
- println!(
- "thread {thd} at {i} {} rate={}us",
- db.stats(),
- t1.duration_since(t0).as_micros() / 10_000,
- );
- t0 = t1;
- }
-
- typed.set(&listname, &(thd * num_iters + i), "xxx")?;
- ops.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
- if i >= tail_length {
- typed.remove(&listname, &(thd * num_iters + i - tail_length))?;
- ops.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
- }
- }
- Result::<()>::Ok(())
- });
- handles.push(h);
- }
-
- for h in handles {
- h.join().unwrap()?;
- }
- println!("ops={}", ops.load(std::sync::atomic::Ordering::Relaxed));
-
- Ok(())
-}
diff --git a/candy-perf/.gitignore b/candy-perf/.gitignore
deleted file mode 100644
index 9a59ec5..0000000
--- a/candy-perf/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-dbdir
diff --git a/candy-perf/Cargo.toml b/candy-perf/Cargo.toml
deleted file mode 100644
index 0490d50..0000000
--- a/candy-perf/Cargo.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[package]
-name = "candy-perf"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-candystore={path=".."}
diff --git a/candy-perf/README.md b/candy-perf/README.md
deleted file mode 100644
index 1f8bc69..0000000
--- a/candy-perf/README.md
+++ /dev/null
@@ -1,75 +0,0 @@
-Performance results from my machine
-
-* Ubuntu 24.04 LTS
-* Lenovo ThinkPad X1 Carbon Gen 10 (12th Gen Intel® Core™ i7-1260P × 16)
-* RAM: 32.0Â GiB
-* SSD: 512Â GB
-* Built with `cargo build --release`
-* Running on a local filesystem
-
-### Smallish entries (4 byte keys, 3 byte values)
-```
-1000000 small entries with pre-split
- Small entries insert: 1.347us
- Small entries get 100% existing: 0.477us
- Small entries get 50% existing: 0.474us
- Small entries removal: 0.493us
- Small entries mixed: 1.822us
-
-1000000 small entries without pre-split
- Small entries insert: 4.151us
- Small entries get 100% existing: 0.517us
- Small entries get 50% existing: 0.515us
- Small entries removal: 0.535us
- Small entries mixed: 4.633us
-```
-
-### Largish entries (100 byte keys, 300 byte values)
-```
-500000 large entries with pre-split
- Large entries insert: 1.624us
- Large entries get 100% existing: 0.618us
- Large entries removal: 0.128us
-
-500000 large entries without pre-split
- Large entries insert: 5.422us
- Large entries get 100% existing: 0.731us
- Large entries removal: 0.139us
-```
-
-### Lists
-```
-10 collections with 100000 items in each
- Inserts: 8.104us
- Updates: 2.593us
- Gets: 0.612us
- Iterations: 0.556us
- Removal of 50% items: 7.945us
- Discards: 0.972us
-```
-
-### Threads without contention (different keys)
-```
-No-contention: 10 threads accessing 100000 different keys - with pre-split
- Inserts: 3.238us
- Gets: 1.004us
- Removals: 0.929us
-
-No-contention: 10 threads accessing 100000 different keys - without pre-split
- Inserts: 19.497us
- Gets: 1.119us
- Removals: 1.001us
-```
-
-### Threads with contention (same keys)
-```
-Contention: 10 threads accessing 1000000 same keys - with pre-split
- Inserts: 4.556us
- Gets: 1.204us
- Removals: 1.334us
-
-Contention: 10 threads accessing 1000000 same keys - without pre-split
- Inserts: 12.167us
- Gets: 2.195us
- Removals: 2.257us
-```
diff --git a/candy-perf/src/main.rs b/candy-perf/src/main.rs
deleted file mode 100644
index 6712f4d..0000000
--- a/candy-perf/src/main.rs
+++ /dev/null
@@ -1,431 +0,0 @@
-use candystore::{CandyStore, Config, Result};
-use std::{
- hint::black_box,
- sync::{atomic::AtomicU64, Arc},
- time::Instant,
-};
-
-fn run2(msg: &str, iters: u32, mut func: impl FnMut() -> Result<()>) -> Result<()> {
- let t0 = Instant::now();
- func()?;
- let t1 = Instant::now();
- println!(
- "{msg}: {:.3}us",
- ((t1.duration_since(t0).as_nanos() as f64) / 1000.0) / (iters as f64)
- );
- Ok(())
-}
-
-fn run(msg: &str, iters: u32, mut func: impl FnMut(u32) -> Result<()>) -> Result<()> {
- run2(msg, iters, || {
- for i in 0u32..iters {
- func(i)?;
- }
- Ok(())
- })
-}
-
-fn test_small_keys(num_keys: u32) -> Result<()> {
- for pre_split in [true, false] {
- let db = CandyStore::open(
- "./dbdir",
- Config {
- expected_number_of_keys: if pre_split { num_keys as usize } else { 0 },
- ..Default::default()
- },
- )?;
-
- db.clear()?;
-
- if pre_split {
- println!("{num_keys} small entries with pre-split");
- } else {
- println!("{num_keys} small entries without pre-split");
- }
-
- run(" Small entries insert", num_keys, |i| {
- db.set(&(i * 2).to_le_bytes(), "xxx")?;
- Ok(())
- })?;
-
- run(" Small entries get 100% existing", num_keys, |i| {
- let val = db.get(&(i * 2).to_le_bytes())?;
- black_box(val.unwrap());
- Ok(())
- })?;
-
- run(" Small entries get 50% existing", num_keys, |i| {
- let val = db.get(&(i * 2).to_le_bytes())?;
- black_box(val.unwrap());
- Ok(())
- })?;
-
- run(" Small entries removal", num_keys, |i| {
- let val = db.remove(&(i * 2).to_le_bytes())?;
- black_box(val.unwrap());
- Ok(())
- })?;
-
- db.clear()?;
-
- run(" Small entries mixed", num_keys, |i| {
- db.set(&(i * 2).to_le_bytes(), "xxx")?;
- let val = db.get(&(i / 2).to_le_bytes())?;
- black_box(val);
- if i % 8 == 7 {
- db.remove(&(i / 2).to_le_bytes())?;
- }
- Ok(())
- })?;
-
- println!();
- }
-
- Ok(())
-}
-
-fn test_large_keys(num_keys: u32) -> Result<()> {
- for pre_split in [true, false] {
- let db = CandyStore::open(
- "./dbdir",
- Config {
- expected_number_of_keys: if pre_split { num_keys as usize } else { 0 },
- ..Default::default()
- },
- )?;
-
- db.clear()?;
-
- if pre_split {
- println!("{num_keys} large entries with pre-split");
- } else {
- println!("{num_keys} large entries without pre-split");
- }
-
- run(" Large entries insert", num_keys, |i| {
- let mut key = [99u8; 100];
- key[0..4].copy_from_slice(&i.to_le_bytes());
- let val = [7u8; 300];
- db.set(&key, &val)?;
- Ok(())
- })?;
-
- run(" Large entries get 100% existing", num_keys, |i| {
- let mut key = [99u8; 100];
- key[0..4].copy_from_slice(&i.to_le_bytes());
- let val = db.get(&key)?;
- black_box(val);
- Ok(())
- })?;
-
- run(" Large entries removal", num_keys, |i| {
- let mut key = [99u8; 100];
- key[0..4].copy_from_slice(&i.to_le_bytes());
- let val = db.remove(&(i * 2).to_le_bytes())?;
- black_box(val);
- Ok(())
- })?;
-
- println!();
- }
-
- Ok(())
-}
-
-fn test_lists(num_lists: u32, num_items_per_list: u32) -> Result<()> {
- let db = CandyStore::open(
- "./dbdir",
- Config {
- expected_number_of_keys: (num_lists * num_items_per_list) as usize,
- ..Default::default()
- },
- )?;
-
- println!("{num_lists} lists with {num_items_per_list} items in each");
- run2(" Inserts", num_lists * num_items_per_list, || {
- for list in 0..num_lists {
- for item in 0..num_items_per_list {
- db.set_in_list(&list.to_le_bytes(), &item.to_le_bytes(), "xxx")?;
- }
- }
- Ok(())
- })?;
-
- run2(" Updates", num_lists * num_items_per_list, || {
- for list in 0..num_lists {
- for item in 0..num_items_per_list {
- db.set_in_list(&list.to_le_bytes(), &item.to_le_bytes(), "yyy")?;
- }
- }
- Ok(())
- })?;
-
- run2(" Gets", num_lists * num_items_per_list, || {
- for list in 0..num_lists {
- for item in 0..num_items_per_list {
- let val = db.get_from_list(&list.to_le_bytes(), &item.to_le_bytes())?;
- black_box(val);
- }
- }
- Ok(())
- })?;
-
- run2(" Iterations", num_lists * num_items_per_list, || {
- for list in 0..num_lists {
- let count = db.iter_list(&list.to_le_bytes()).count();
- black_box(count);
- debug_assert_eq!(count, num_items_per_list as usize);
- }
- Ok(())
- })?;
-
- run2(
- " Removal of 50% items",
- num_lists * num_items_per_list / 2,
- || {
- for list in 0..num_lists {
- for item in 0..num_items_per_list {
- if item % 2 == 0 {
- let val = db.remove_from_list(&list.to_le_bytes(), &item.to_le_bytes())?;
- black_box(val.unwrap());
- }
- }
- }
- Ok(())
- },
- )?;
-
- run2(" Discards", num_lists * num_items_per_list / 2, || {
- for list in 0..num_lists {
- db.discard_list(&list.to_le_bytes())?;
- }
- Ok(())
- })?;
-
- println!();
-
- Ok(())
-}
-
-fn test_concurrency_without_contention(num_threads: u32, num_keys: u32) -> Result<()> {
- for pre_split in [true, false] {
- let db = Arc::new(CandyStore::open(
- "./dbdir",
- Config {
- expected_number_of_keys: if pre_split {
- (num_threads * num_keys) as usize
- } else {
- 0
- },
- ..Default::default()
- },
- )?);
- db.clear()?;
-
- if pre_split {
- println!("No-contention: {num_threads} threads accessing {num_keys} different keys - with pre-split");
- } else {
- println!(
- "No-contention: {num_threads} threads accessing {num_keys} different keys - without pre-split"
- );
- }
-
- let insert_time_ns = Arc::new(AtomicU64::new(0));
- let get_time_ns = Arc::new(AtomicU64::new(0));
- let removal_time_ns = Arc::new(AtomicU64::new(0));
-
- let mut handles = vec![];
- for thd in 0..num_threads {
- let db = db.clone();
- let insert_time_ns = insert_time_ns.clone();
- let get_time_ns = get_time_ns.clone();
- let removal_time_ns = removal_time_ns.clone();
-
- let h = std::thread::spawn(move || {
- {
- let t0 = Instant::now();
- for i in thd * num_keys..(thd + 1) * num_keys {
- let status = db.set(&i.to_le_bytes(), &thd.to_le_bytes())?;
- debug_assert!(status.was_created());
- }
- insert_time_ns.fetch_add(
- Instant::now().duration_since(t0).as_nanos() as u64,
- std::sync::atomic::Ordering::SeqCst,
- );
- }
-
- {
- let t0 = Instant::now();
- for i in thd * num_keys..(thd + 1) * num_keys {
- let val = db.get(&i.to_le_bytes())?;
- debug_assert_eq!(val, Some(thd.to_le_bytes().to_vec()), "thd={thd} i={i}");
- black_box(val.unwrap());
- }
- get_time_ns.fetch_add(
- Instant::now().duration_since(t0).as_nanos() as u64,
- std::sync::atomic::Ordering::SeqCst,
- );
- }
-
- {
- let t0 = Instant::now();
- for i in thd * num_keys..(thd + 1) * num_keys {
- let val = db.remove(&i.to_le_bytes())?;
- debug_assert!(val.is_some());
- black_box(val.unwrap());
- }
- removal_time_ns.fetch_add(
- Instant::now().duration_since(t0).as_nanos() as u64,
- std::sync::atomic::Ordering::SeqCst,
- );
- }
-
- Result::<()>::Ok(())
- });
- handles.push(h);
- }
- for h in handles {
- h.join().unwrap()?;
- }
-
- let insert_time_ns = insert_time_ns.load(std::sync::atomic::Ordering::SeqCst) as f64;
- let get_time_ns = get_time_ns.load(std::sync::atomic::Ordering::SeqCst) as f64;
- let removal_time_ns = removal_time_ns.load(std::sync::atomic::Ordering::SeqCst) as f64;
- let ops = (num_threads * num_keys) as f64;
-
- println!(" Inserts: {:.3}us", (insert_time_ns / 1000.0) / ops);
- println!(" Gets: {:.3}us", (get_time_ns / 1000.0) / ops);
- println!(" Removals: {:.3}us", (removal_time_ns / 1000.0) / ops);
- println!();
- }
-
- Ok(())
-}
-
-fn do_inserts(
- thd: u32,
- num_keys: u32,
- insert_time_ns: &Arc,
- db: &Arc,
-) -> Result<()> {
- let t0 = Instant::now();
- for i in 0..num_keys {
- db.set(&i.to_le_bytes(), &thd.to_le_bytes())?;
- }
- insert_time_ns.fetch_add(
- Instant::now().duration_since(t0).as_nanos() as u64,
- std::sync::atomic::Ordering::SeqCst,
- );
- Ok(())
-}
-
-fn do_gets(num_keys: u32, get_time_ns: &Arc, db: &Arc) -> Result<()> {
- let t0 = Instant::now();
- for i in 0..num_keys {
- let val = db.get(&i.to_le_bytes())?;
- black_box(val);
- }
- get_time_ns.fetch_add(
- Instant::now().duration_since(t0).as_nanos() as u64,
- std::sync::atomic::Ordering::SeqCst,
- );
- Ok(())
-}
-
-fn do_removals(
- num_keys: u32,
- removal_time_ns: &Arc,
- db: &Arc,
-) -> Result<()> {
- let t0 = Instant::now();
- for i in 0..num_keys {
- let val = db.remove(&i.to_le_bytes())?;
- black_box(val);
- }
- removal_time_ns.fetch_add(
- Instant::now().duration_since(t0).as_nanos() as u64,
- std::sync::atomic::Ordering::SeqCst,
- );
- Ok(())
-}
-
-fn test_concurrency_with_contention(num_threads: u32, num_keys: u32) -> Result<()> {
- for pre_split in [true, false] {
- let db = Arc::new(CandyStore::open(
- "./dbdir",
- Config {
- expected_number_of_keys: if pre_split {
- (num_threads * num_keys) as usize
- } else {
- 0
- },
- ..Default::default()
- },
- )?);
- db.clear()?;
-
- if pre_split {
- println!(
- "Contention: {num_threads} threads accessing {num_keys} same keys - with pre-split"
- );
- } else {
- println!("Contention: {num_threads} threads accessing {num_keys} same keys - without pre-split");
- }
-
- let insert_time_ns = Arc::new(AtomicU64::new(0));
- let get_time_ns = Arc::new(AtomicU64::new(0));
- let removal_time_ns = Arc::new(AtomicU64::new(0));
-
- let mut handles = vec![];
- for thd in 0..num_threads {
- let db = db.clone();
- let insert_time_ns = insert_time_ns.clone();
- let get_time_ns = get_time_ns.clone();
- let removal_time_ns = removal_time_ns.clone();
-
- let h = std::thread::spawn(move || {
- if thd % 3 == 0 {
- do_inserts(thd, num_keys, &insert_time_ns, &db)?;
- do_gets(num_keys, &get_time_ns, &db)?;
- do_removals(num_keys, &removal_time_ns, &db)?;
- } else if thd % 3 == 1 {
- do_gets(num_keys, &get_time_ns, &db)?;
- do_removals(num_keys, &removal_time_ns, &db)?;
- do_inserts(thd, num_keys, &insert_time_ns, &db)?;
- } else {
- do_removals(num_keys, &removal_time_ns, &db)?;
- do_inserts(thd, num_keys, &insert_time_ns, &db)?;
- do_gets(num_keys, &get_time_ns, &db)?;
- }
-
- Result::<()>::Ok(())
- });
- handles.push(h);
- }
- for h in handles {
- h.join().unwrap()?;
- }
-
- let insert_time_ns = insert_time_ns.load(std::sync::atomic::Ordering::SeqCst) as f64;
- let get_time_ns = get_time_ns.load(std::sync::atomic::Ordering::SeqCst) as f64;
- let removal_time_ns = removal_time_ns.load(std::sync::atomic::Ordering::SeqCst) as f64;
- let ops = (num_threads * num_keys) as f64;
-
- println!(" Inserts: {:.3}us", (insert_time_ns / 1000.0) / ops);
- println!(" Gets: {:.3}us", (get_time_ns / 1000.0) / ops);
- println!(" Removals: {:.3}us", (removal_time_ns / 1000.0) / ops);
- println!();
- }
-
- Ok(())
-}
-
-fn main() -> Result<()> {
- test_small_keys(1_000_000)?;
- test_large_keys(500_000)?;
- test_lists(10, 100_000)?;
- test_concurrency_without_contention(10, 100_000)?;
- test_concurrency_with_contention(10, 1_000_000)?;
-
- Ok(())
-}
diff --git a/diagram.png b/diagram.png
deleted file mode 100644
index 8485586..0000000
Binary files a/diagram.png and /dev/null differ
diff --git a/examples/atomics.rs b/examples/atomics.rs
index bc2a269..e108abc 100644
--- a/examples/atomics.rs
+++ b/examples/atomics.rs
@@ -14,12 +14,9 @@ use candystore::{CandyStore, Config, GetOrCreateStatus, Result};
// ...
fn main() -> Result<()> {
+ _ = std::fs::remove_dir_all("/tmp/candy-dir");
let db = Arc::new(CandyStore::open("/tmp/candy-dir", Config::default())?);
- // clear the DB just in case we has something there before. in real-life scenarios you would probably
- // not clear the DB every time
- db.clear()?;
-
let mut handles = vec![];
for thd in 0..3 {
let db = db.clone();
diff --git a/examples/lists.rs b/examples/lists.rs
index 1c27853..c9e813b 100644
--- a/examples/lists.rs
+++ b/examples/lists.rs
@@ -1,12 +1,9 @@
use candystore::{CandyStore, Config, Result};
fn main() -> Result<()> {
+ _ = std::fs::remove_dir_all("/tmp/candy-dir");
let db = CandyStore::open("/tmp/candy-dir", Config::default())?;
- // clear the DB just in case we has something there before. in real-life scenarios you would probably
- // not clear the DB every time
- db.clear()?;
-
db.set_in_list("asia", "iraq", "arabic")?;
db.set_in_list("asia", "china", "chinese")?;
db.set_in_list("asia", "russia", "russian")?;
diff --git a/examples/multithreaded.rs b/examples/multithreaded.rs
index 7a0503c..c9f6a59 100644
--- a/examples/multithreaded.rs
+++ b/examples/multithreaded.rs
@@ -4,17 +4,14 @@ use std::{sync::Arc, time::Duration};
use candystore::{CandyStore, Config, Result};
fn main() -> Result<()> {
+ _ = std::fs::remove_dir_all("/tmp/candy-dir");
let db = Arc::new(CandyStore::open("/tmp/candy-dir-mt", Config::default())?);
- // clear the DB just in case we has something there before. in real-life scenarios you would probably
- // not clear the DB every time
- db.clear()?;
-
// clone db and spawn thread 1
let db1 = db.clone();
let h1 = std::thread::spawn(move || -> Result<()> {
for i in 0..100 {
- db1.set(&format!("key{i}"), "thread 1")?;
+ db1.set(format!("key{i}"), "thread 1")?;
std::thread::sleep(Duration::from_millis(1));
}
Ok(())
@@ -24,7 +21,7 @@ fn main() -> Result<()> {
let db2 = db.clone();
let h2 = std::thread::spawn(move || -> Result<()> {
for i in 0..100 {
- db2.set(&format!("key{i}"), "thread 2")?;
+ db2.set(format!("key{i}"), "thread 2")?;
std::thread::sleep(Duration::from_millis(1));
}
Ok(())
@@ -33,7 +30,7 @@ fn main() -> Result<()> {
h1.join().unwrap()?;
h2.join().unwrap()?;
- for res in db.iter() {
+ for res in db.iter_items() {
let (k, v) = res?;
println!(
"{} = {}",
diff --git a/examples/perf.rs b/examples/perf.rs
new file mode 100644
index 0000000..202091d
--- /dev/null
+++ b/examples/perf.rs
@@ -0,0 +1,348 @@
+use candystore::{CandyStore, Config};
+use std::{
+ hint::black_box,
+ sync::{Arc, atomic::AtomicU64},
+ thread,
+ time::Instant,
+};
+
+fn run_perf(
+ store: Arc,
+ n: u32,
+ n_threads: usize,
+ key_size: usize,
+ val_size: usize,
+) -> Result<(), Box> {
+ let mut handles = Vec::new();
+
+ let inserts_us = Arc::new(AtomicU64::new(0));
+ let updates_us = Arc::new(AtomicU64::new(0));
+ let pos_gets_us = Arc::new(AtomicU64::new(0));
+ let neg_gets_us = Arc::new(AtomicU64::new(0));
+ let iter_us = Arc::new(AtomicU64::new(0));
+ let removes_us = Arc::new(AtomicU64::new(0));
+
+ for t in 0..n_threads {
+ let store = store.clone();
+ let inserts_us = inserts_us.clone();
+ let updates_us = updates_us.clone();
+ let pos_gets_us = pos_gets_us.clone();
+ let neg_gets_us = neg_gets_us.clone();
+ let iter_us = iter_us.clone();
+ let removes_us = removes_us.clone();
+
+ let handle = thread::spawn(move || {
+ let mut key = vec![b'k'; key_size.max(4)];
+ let value1 = vec![b'v'; val_size];
+ let value2 = vec![b'V'; val_size];
+ let start_idx = t as u32 * n;
+ let end_idx = start_idx + n;
+
+ {
+ let t0 = Instant::now();
+ for i in start_idx..end_idx {
+ key[..4].copy_from_slice(&i.to_le_bytes());
+ store.set(&key, &value1).unwrap();
+ }
+ let duration = t0.elapsed();
+ inserts_us.fetch_add(
+ duration.as_micros() as u64,
+ std::sync::atomic::Ordering::Relaxed,
+ );
+ }
+
+ {
+ let t0 = Instant::now();
+ for i in start_idx..end_idx {
+ key[..4].copy_from_slice(&i.to_le_bytes());
+ store.set(&key, &value2).unwrap();
+ }
+ let duration = t0.elapsed();
+ updates_us.fetch_add(
+ duration.as_micros() as u64,
+ std::sync::atomic::Ordering::Relaxed,
+ );
+ }
+
+ {
+ let t0 = Instant::now();
+ for i in start_idx..end_idx {
+ key[..4].copy_from_slice(&i.to_le_bytes());
+ store.get(&key).unwrap().unwrap();
+ }
+ let duration = t0.elapsed();
+ pos_gets_us.fetch_add(
+ duration.as_micros() as u64,
+ std::sync::atomic::Ordering::Relaxed,
+ );
+ }
+
+ {
+ let mut key = vec![b'Q'; key_size.max(4)];
+ let t0 = Instant::now();
+ for i in start_idx..end_idx {
+ key[..4].copy_from_slice(&i.to_le_bytes());
+ assert!(store.get(&key).unwrap().is_none());
+ }
+ let duration = t0.elapsed();
+ neg_gets_us.fetch_add(
+ duration.as_micros() as u64,
+ std::sync::atomic::Ordering::Relaxed,
+ );
+ }
+
+ {
+ let t0 = Instant::now();
+ black_box(store.iter_items().count());
+ let duration = t0.elapsed();
+ iter_us.fetch_add(
+ duration.as_micros() as u64,
+ std::sync::atomic::Ordering::Relaxed,
+ );
+ }
+
+ {
+ let t0 = Instant::now();
+ for i in start_idx..end_idx {
+ key[..4].copy_from_slice(&i.to_le_bytes());
+ store.remove(&key).unwrap();
+ }
+ let duration = t0.elapsed();
+ removes_us.fetch_add(
+ duration.as_micros() as u64,
+ std::sync::atomic::Ordering::Relaxed,
+ );
+ }
+ });
+ handles.push(handle);
+ }
+
+ println!(
+ "Testing key-value using {} threads, each with {} items (key size: {}, value size: {})",
+ n_threads, n, key_size, val_size
+ );
+
+ for handle in handles {
+ handle.join().unwrap();
+ }
+
+ println!(
+ " Inserts: {} us/op",
+ inserts_us.load(std::sync::atomic::Ordering::Relaxed) as f64
+ / (n_threads * n as usize) as f64
+ );
+ println!(
+ " Updates: {} us/op",
+ updates_us.load(std::sync::atomic::Ordering::Relaxed) as f64
+ / (n_threads * n as usize) as f64
+ );
+ println!(
+ " Positive Lookups: {} us/op",
+ pos_gets_us.load(std::sync::atomic::Ordering::Relaxed) as f64
+ / (n_threads * n as usize) as f64
+ );
+ println!(
+ " Negative Lookups: {} us/op",
+ neg_gets_us.load(std::sync::atomic::Ordering::Relaxed) as f64
+ / (n_threads * n as usize) as f64
+ );
+ println!(
+ " Iter all: {} us/op",
+ iter_us.load(std::sync::atomic::Ordering::Relaxed) as f64 / (n_threads * n as usize) as f64
+ );
+ println!(
+ " Removes: {} us/op\n",
+ removes_us.load(std::sync::atomic::Ordering::Relaxed) as f64
+ / (n_threads * n as usize) as f64
+ );
+
+ Ok(())
+}
+
+fn run_queue_perf(
+ store: Arc,
+ n: u32,
+ n_threads: usize,
+ val_size: usize,
+) -> Result<(), Box> {
+ let mut handles = Vec::new();
+
+ let pushes_us = Arc::new(AtomicU64::new(0));
+ let pops_us = Arc::new(AtomicU64::new(0));
+
+ for _ in 0..n_threads {
+ let store = store.clone();
+ let pushes_us = pushes_us.clone();
+ let pops_us = pops_us.clone();
+
+ let handle = thread::spawn(move || {
+ let value = vec![b'v'; val_size];
+ {
+ let t0 = Instant::now();
+ for _ in 0..n {
+ store.push_to_queue_tail("myqueue", &value).unwrap();
+ }
+ let duration = t0.elapsed();
+ pushes_us.fetch_add(
+ duration.as_micros() as u64,
+ std::sync::atomic::Ordering::Relaxed,
+ );
+ }
+
+ {
+ let t0 = Instant::now();
+ for _ in 0..n {
+ store.pop_queue_head("myqueue").unwrap().unwrap();
+ }
+ let duration = t0.elapsed();
+ pops_us.fetch_add(
+ duration.as_micros() as u64,
+ std::sync::atomic::Ordering::Relaxed,
+ );
+ }
+ });
+ handles.push(handle);
+ }
+
+ println!(
+ "Testing a queue using {} threads, each with {} items (value size: {})",
+ n_threads, n, val_size
+ );
+ for handle in handles {
+ handle.join().unwrap();
+ }
+
+ println!(
+ " Pushes: {} us/op",
+ pushes_us.load(std::sync::atomic::Ordering::Relaxed) as f64
+ / (n_threads * n as usize) as f64
+ );
+ println!(
+ " Pops: {} us/op\n",
+ pops_us.load(std::sync::atomic::Ordering::Relaxed) as f64 / (n_threads * n as usize) as f64
+ );
+
+ Ok(())
+}
+
+fn run_list_perf(
+ store: Arc,
+ n: u32,
+ n_threads: usize,
+ key_size: usize,
+ val_size: usize,
+) -> Result<(), Box> {
+ let mut handles = Vec::new();
+
+ let sets_us = Arc::new(AtomicU64::new(0));
+ let gets_us = Arc::new(AtomicU64::new(0));
+ let removes_us = Arc::new(AtomicU64::new(0));
+
+ for t in 0..n_threads {
+ let store = store.clone();
+ let sets_us = sets_us.clone();
+ let gets_us = gets_us.clone();
+ let removes_us = removes_us.clone();
+
+ let handle = thread::spawn(move || {
+ let mut key = vec![b'k'; key_size.max(4)];
+ let value = vec![b'v'; val_size];
+ let start_idx = t as u32 * n;
+ let end_idx = start_idx + n;
+
+ {
+ let t0 = Instant::now();
+ for i in start_idx..end_idx {
+ key[..4].copy_from_slice(&i.to_le_bytes());
+ store.set_in_list("mylist", &key, &value).unwrap();
+ }
+ let duration = t0.elapsed();
+ sets_us.fetch_add(
+ duration.as_micros() as u64,
+ std::sync::atomic::Ordering::Relaxed,
+ );
+ }
+
+ {
+ let t0 = Instant::now();
+ for i in start_idx..end_idx {
+ key[..4].copy_from_slice(&i.to_le_bytes());
+ store.get_from_list("mylist", &key).unwrap();
+ }
+ let duration = t0.elapsed();
+ gets_us.fetch_add(
+ duration.as_micros() as u64,
+ std::sync::atomic::Ordering::Relaxed,
+ );
+ }
+
+ {
+ let t0 = Instant::now();
+ for i in start_idx..end_idx {
+ key[..4].copy_from_slice(&i.to_le_bytes());
+ store.remove_from_list("mylist", &key).unwrap();
+ }
+ let duration = t0.elapsed();
+ removes_us.fetch_add(
+ duration.as_micros() as u64,
+ std::sync::atomic::Ordering::Relaxed,
+ );
+ }
+ });
+ handles.push(handle);
+ }
+
+ println!(
+ "Testing a list using {} threads, each with {} items (value size: {})",
+ n_threads, n, val_size
+ );
+ for handle in handles {
+ handle.join().unwrap();
+ }
+
+ println!(
+ " Sets: {} us/op",
+ sets_us.load(std::sync::atomic::Ordering::Relaxed) as f64 / (n_threads * n as usize) as f64
+ );
+ println!(
+ " Gets: {} us/op",
+ gets_us.load(std::sync::atomic::Ordering::Relaxed) as f64 / (n_threads * n as usize) as f64
+ );
+ println!(
+ " Removes: {} us/op\n",
+ removes_us.load(std::sync::atomic::Ordering::Relaxed) as f64
+ / (n_threads * n as usize) as f64
+ );
+
+ Ok(())
+}
+
+fn main() -> Result<(), Box> {
+ let dir = tempfile::tempdir()?;
+ let store = Arc::new(CandyStore::open(
+ dir.path(),
+ Config {
+ checkpoint_delta_bytes: None,
+ checkpoint_interval: None,
+ ..Default::default()
+ },
+ )?);
+
+ // single threaded
+ run_perf(store.clone(), 1_000_000, 1, 16, 16)?;
+ run_perf(store.clone(), 100_000, 1, 1024, 4096)?;
+
+ // multi threaded
+ run_perf(store.clone(), 250_000, 4, 16, 16)?;
+ //run_perf(store.clone(), 10_000, 20, 16, 16)?;
+
+ // queues
+ run_queue_perf(store.clone(), 500_000, 1, 16)?;
+ run_queue_perf(store.clone(), 100_000, 4, 16)?;
+
+ // lists
+ run_list_perf(store.clone(), 500_000, 1, 16, 16)?;
+ run_list_perf(store.clone(), 100_000, 4, 16, 16)?;
+
+ Ok(())
+}
diff --git a/examples/simple.rs b/examples/simple.rs
index df1e568..3b6a4bc 100644
--- a/examples/simple.rs
+++ b/examples/simple.rs
@@ -3,12 +3,9 @@ use core::str;
use candystore::{CandyStore, Config, Result};
fn main() -> Result<()> {
+ _ = std::fs::remove_dir_all("/tmp/candy-dir");
let db = CandyStore::open("/tmp/candy-dir", Config::default())?;
- // clear the DB just in case we has something there before. in real-life scenarios you would probably
- // not clear the DB every time
- db.clear()?;
-
println!("{:?}", db.get("mykey")?); // None
db.set("mykey", "myval")?;
@@ -20,9 +17,9 @@ fn main() -> Result<()> {
println!("{:?}", db.get("mykey")?); // None
for i in 0..10 {
- db.set(&format!("mykey{i}"), &format!("myval{i}"))?;
+ db.set(format!("mykey{i}"), format!("myval{i}"))?;
}
- for res in db.iter() {
+ for res in db.iter_items() {
let (k, v) = res?;
println!(
"{} = {}",
diff --git a/examples/typed.rs b/examples/typed.rs
index f5085d2..71bbf7c 100644
--- a/examples/typed.rs
+++ b/examples/typed.rs
@@ -3,6 +3,7 @@ use std::sync::Arc;
use candystore::{CandyStore, CandyTypedStore, Config, Result};
fn main() -> Result<()> {
+ _ = std::fs::remove_dir_all("/tmp/candy-dir");
let db = Arc::new(CandyStore::open("/tmp/candy-dir", Config::default())?);
let typed = CandyTypedStore::>::new(db);
diff --git a/mini-candy/Cargo.toml b/mini-candy/Cargo.toml
deleted file mode 100644
index 8fe30fd..0000000
--- a/mini-candy/Cargo.toml
+++ /dev/null
@@ -1,8 +0,0 @@
-[package]
-name = "mini-candy"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-memmap = "0.7.0"
-siphasher = "1.0.1"
diff --git a/mini-candy/README.md b/mini-candy/README.md
deleted file mode 100644
index f0e32e9..0000000
--- a/mini-candy/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# Mini Candy
-A very minimal implementation of the algorithm in ~250 lines of code, for educational purposes
diff --git a/mini-candy/src/main.rs b/mini-candy/src/main.rs
deleted file mode 100644
index 263098d..0000000
--- a/mini-candy/src/main.rs
+++ /dev/null
@@ -1,294 +0,0 @@
-//! a very minimal implementation of CandyStore, for educational purposes. handles single-threaded get/set/remove/iter
-//!
-use std::{
- cell::RefCell,
- fs::{File, OpenOptions},
- io::{Seek, Write},
- os::unix::fs::FileExt,
- path::{Path, PathBuf},
-};
-
-use memmap::{MmapMut, MmapOptions};
-use siphasher::sip::SipHasher24;
-
-type Result = std::io::Result;
-const WIDTH: usize = 512;
-const ROWS: usize = 64;
-
-#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
-struct PartedHash(u64);
-
-impl PartedHash {
- const INVALID_SIG: u32 = 0;
- fn new(buf: &[u8]) -> Self {
- Self(SipHasher24::new().hash(buf))
- }
- fn sig(&self) -> u32 {
- if self.0 as u32 == Self::INVALID_SIG {
- 0x12345678 // can't return INVALID_SIG
- } else {
- self.0 as u32
- }
- }
- fn row(&self) -> usize {
- (self.0 as usize >> 32) % ROWS
- }
- fn shard(&self) -> u32 {
- (self.0 >> 48) as u32
- }
-}
-
-#[derive(Debug, Clone, Copy)]
-#[repr(C)]
-struct Descriptor {
- offset: u32,
- klen: u16,
- vlen: u16,
-}
-
-#[repr(C)]
-struct ShardRow {
- sigs: [u32; WIDTH],
- descs: [Descriptor; WIDTH],
-}
-
-#[repr(C)]
-struct ShardHeader {
- rows: [ShardRow; ROWS],
-}
-
-struct ShardFile {
- start: u32,
- end: u32,
- file: RefCell,
- mmap: MmapMut,
-}
-
-type Buf = Vec;
-type KV = (Buf, Buf);
-
-impl ShardFile {
- const HEADER_SIZE: u64 = size_of::() as u64;
-
- fn open(dirpath: impl AsRef, start: u32, end: u32) -> Result {
- let filepath = dirpath.as_ref().join(format!("{start}-{end}"));
- let mut file = OpenOptions::new()
- .read(true)
- .write(true)
- .create(true)
- .truncate(true)
- .open(filepath)?;
- file.set_len(Self::HEADER_SIZE)?;
- file.seek(std::io::SeekFrom::End(0))?;
- let mmap = unsafe {
- MmapOptions::new()
- .len(Self::HEADER_SIZE as usize)
- .map_mut(&file)
- }?;
- Ok(Self {
- start,
- end,
- file: RefCell::new(file),
- mmap,
- })
- }
-
- fn header_row(&self, r: usize) -> &mut ShardRow {
- &mut unsafe { &mut *(self.mmap.as_ptr() as *const ShardHeader as *mut ShardHeader) }.rows[r]
- }
-
- fn read(&self, desc: Descriptor) -> Result {
- let mut k = vec![0; desc.klen as usize];
- let mut v = vec![0; desc.vlen as usize];
- let f = self.file.borrow();
- f.read_exact_at(&mut k, desc.offset as u64)?;
- f.read_exact_at(&mut v, desc.offset as u64 + desc.klen as u64)?;
- Ok((k, v))
- }
- fn write(&self, key: &[u8], val: &[u8]) -> Result {
- let mut f = self.file.borrow_mut();
- let offset = f.stream_position()?;
- f.write_all(key)?;
- f.write_all(val)?;
- Ok(Descriptor {
- offset: offset as u32,
- klen: key.len() as u16,
- vlen: val.len() as u16,
- })
- }
-
- fn get(&self, ph: PartedHash, key: &[u8]) -> Result> {
- let row = self.header_row(ph.row());
- for (i, s) in row.sigs.iter().enumerate() {
- if *s == ph.sig() {
- let desc = row.descs[i];
- let (k, v) = self.read(desc)?;
- if k == key {
- return Ok(Some(v));
- }
- }
- }
- Ok(None)
- }
-
- fn set(&mut self, ph: PartedHash, key: &[u8], val: &[u8]) -> Result {
- let row = self.header_row(ph.row());
- for (i, s) in row.sigs.iter().enumerate() {
- if *s == ph.sig() {
- let desc = row.descs[i];
- let (k, _) = self.read(desc)?;
- if k == key {
- row.descs[i] = self.write(key, val)?;
- return Ok(true);
- }
- }
- }
-
- for (i, s) in row.sigs.iter_mut().enumerate() {
- if *s == PartedHash::INVALID_SIG {
- // insert new
- *s = ph.sig();
- row.descs[i] = self.write(key, val)?;
- return Ok(true);
- }
- }
-
- Ok(false)
- }
-
- fn remove(&mut self, ph: PartedHash, key: &[u8]) -> Result {
- let row = self.header_row(ph.row());
- for (i, s) in row.sigs.iter_mut().enumerate() {
- if *s == ph.sig() {
- let desc = row.descs[i];
- let (k, _) = self.read(desc)?;
- if k == key {
- *s = PartedHash::INVALID_SIG;
- return Ok(true);
- }
- }
- }
- Ok(false)
- }
-
- fn iter<'a>(&'a self) -> impl Iterator- > + 'a {
- (0..ROWS).map(|r| self.header_row(r)).flat_map(|row| {
- row.sigs.iter().enumerate().filter_map(|(i, sig)| {
- if *sig == PartedHash::INVALID_SIG {
- return None;
- }
- Some(self.read(row.descs[i]))
- })
- })
- }
-}
-
-struct Store {
- dirpath: PathBuf,
- shards: Vec
,
-}
-
-impl Store {
- const MAX_SHARD: u32 = u16::MAX as u32 + 1;
-
- fn open(dirpath: impl AsRef) -> Result {
- let dirpath = dirpath.as_ref().to_path_buf();
- std::fs::create_dir_all(&dirpath)?;
- let first_shard = ShardFile::open(&dirpath, 0, Self::MAX_SHARD)?;
- Ok(Self {
- dirpath,
- shards: vec![first_shard],
- })
- }
-
- fn get(&self, key: &[u8]) -> Result> {
- let ph = PartedHash::new(key);
- for shard in self.shards.iter() {
- if ph.shard() < shard.end {
- return shard.get(ph, key);
- }
- }
- unreachable!();
- }
-
- fn remove(&mut self, key: &[u8]) -> Result {
- let ph = PartedHash::new(key);
- for shard in self.shards.iter_mut() {
- if ph.shard() < shard.end {
- return shard.remove(ph, key);
- }
- }
- unreachable!();
- }
-
- fn split(&mut self, shard_idx: usize) -> Result<()> {
- let removed_shard = self.shards.remove(shard_idx);
-
- let start = removed_shard.start;
- let end = removed_shard.end;
- let mid = (removed_shard.start + removed_shard.end) / 2;
- println!("splitting [{start}, {end}) to [{start}, {mid}) and [{mid}, {end})");
-
- let mut bottom = ShardFile::open(&self.dirpath, start, mid)?;
- let mut top = ShardFile::open(&self.dirpath, mid, end)?;
-
- for res in removed_shard.iter() {
- let (key, val) = res?;
- let ph = PartedHash::new(&key);
- if ph.shard() < mid {
- bottom.set(ph, &key, &val)?;
- } else {
- top.set(ph, &key, &val)?;
- }
- }
-
- std::fs::remove_file(self.dirpath.join(format!("{start}-{end}")))?;
-
- self.shards.push(bottom);
- self.shards.push(top);
- self.shards.sort_by(|x, y| x.end.cmp(&y.end));
- Ok(())
- }
-
- fn set(&mut self, key: &[u8], val: &[u8]) -> Result {
- let ph = PartedHash::new(key);
- loop {
- let mut shard_to_split = None;
- for (i, shard) in self.shards.iter_mut().enumerate() {
- if ph.shard() < shard.end {
- if shard.set(ph, key, val)? {
- return Ok(true);
- }
- shard_to_split = Some(i);
- break;
- }
- }
- self.split(shard_to_split.unwrap())?;
- }
- }
-
- fn iter<'a>(&'a self) -> impl Iterator- > + 'a {
- self.shards.iter().flat_map(|shard| shard.iter())
- }
-}
-
-fn main() -> Result<()> {
- let mut db = Store::open("/tmp/mini-dbdir")?;
- db.set(b"hello", b"world")?;
-
- println!("{:?}", db.get(b"hello")?);
- println!("{:?}", db.get(b"nonexistent")?);
-
- db.remove(b"hello")?;
- println!("{:?}", db.get(b"hello")?);
-
- println!("{}", db.iter().count());
-
- for i in 0..100_000u32 {
- db.set(&i.to_le_bytes(), &(i * 2).to_le_bytes())?;
- }
-
- println!("{}", db.iter().count());
-
- Ok(())
-}
diff --git a/simulator/Cargo.lock b/simulator/Cargo.lock
deleted file mode 100644
index c8878b3..0000000
--- a/simulator/Cargo.lock
+++ /dev/null
@@ -1,140 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 3
-
-[[package]]
-name = "byteorder"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "getrandom"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
-dependencies = [
- "cfg-if",
- "libc",
- "wasi",
-]
-
-[[package]]
-name = "libc"
-version = "0.2.155"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
-
-[[package]]
-name = "ppv-lite86"
-version = "0.2.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dee4364d9f3b902ef14fab8a1ddffb783a1cb6b4bba3bfc1fa3922732c7de97f"
-dependencies = [
- "zerocopy",
-]
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.86"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.36"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "rand"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
-dependencies = [
- "libc",
- "rand_chacha",
- "rand_core",
-]
-
-[[package]]
-name = "rand_chacha"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
-dependencies = [
- "ppv-lite86",
- "rand_core",
-]
-
-[[package]]
-name = "rand_core"
-version = "0.6.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
-dependencies = [
- "getrandom",
-]
-
-[[package]]
-name = "simulator"
-version = "0.1.0"
-dependencies = [
- "rand",
-]
-
-[[package]]
-name = "syn"
-version = "2.0.72"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
-
-[[package]]
-name = "wasi"
-version = "0.11.0+wasi-snapshot-preview1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
-
-[[package]]
-name = "zerocopy"
-version = "0.6.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "854e949ac82d619ee9a14c66a1b674ac730422372ccb759ce0c39cabcf2bf8e6"
-dependencies = [
- "byteorder",
- "zerocopy-derive",
-]
-
-[[package]]
-name = "zerocopy-derive"
-version = "0.6.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml
deleted file mode 100644
index bde7056..0000000
--- a/simulator/Cargo.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[package]
-name = "simulator"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-rand = "0.8.5"
diff --git a/simulator/README.md b/simulator/README.md
deleted file mode 100644
index a86e22e..0000000
--- a/simulator/README.md
+++ /dev/null
@@ -1,51 +0,0 @@
-# Candy Simulator
-
-* Tests the fill level that each shard can have using different params
-* Tests the number of collisions in the same row (signatures)
-* Tests the performance of position_simd for various sizes (compile with `--release`)
-
-# Results
-```
-r= 32 w= 32 avg=0.687102 elems= 1024 sz= 12KB collisions=0 collisions-probability=0.000000115483993
-r= 32 w= 64 avg=0.755089 elems= 2048 sz= 24KB collisions=0 collisions-probability=0.000000469386467
-r= 32 w= 128 avg=0.832785 elems= 4096 sz= 48KB collisions=0 collisions-probability=0.000001892445681 GOOD
-r= 32 w= 256 avg=0.871744 elems= 8192 sz= 96KB collisions=0 collisions-probability=0.000007599563332 GOOD
-r= 32 w= 512 avg=0.907163 elems= 16384 sz= 192KB collisions=0 collisions-probability=0.000030457509641 GOOD
-r= 32 w=1024 avg=0.935280 elems= 32768 sz= 384KB collisions=0 collisions-probability=0.000121943667477 GOOD
-r= 64 w= 32 avg=0.647315 elems= 2048 sz= 24KB collisions=0 collisions-probability=0.000000115483993
-r= 64 w= 64 avg=0.728652 elems= 4096 sz= 48KB collisions=0 collisions-probability=0.000000469386467
-r= 64 w= 128 avg=0.805568 elems= 8192 sz= 96KB collisions=0 collisions-probability=0.000001892445681 GOOD
-r= 64 w= 256 avg=0.853133 elems= 16384 sz= 192KB collisions=0 collisions-probability=0.000007599563332 GOOD
-r= 64 w= 512 avg=0.899420 elems= 32768 sz= 384KB collisions=0 collisions-probability=0.000030457509641 GOOD
-r= 64 w=1024 avg=0.927043 elems= 65536 sz= 768KB collisions=6 collisions-probability=0.000121943667477 GOOD
-r= 128 w= 32 avg=0.615332 elems= 4096 sz= 48KB collisions=0 collisions-probability=0.000000115483993
-r= 128 w= 64 avg=0.708627 elems= 8192 sz= 96KB collisions=0 collisions-probability=0.000000469386467
-r= 128 w= 128 avg=0.784355 elems= 16384 sz= 192KB collisions=0 collisions-probability=0.000001892445681
-r= 128 w= 256 avg=0.843362 elems= 32768 sz= 384KB collisions=0 collisions-probability=0.000007599563332 GOOD
-r= 128 w= 512 avg=0.884743 elems= 65536 sz= 768KB collisions=0 collisions-probability=0.000030457509641 GOOD
-r= 128 w=1024 avg=0.920297 elems= 131072 sz=1536KB collisions=3 collisions-probability=0.000121943667477 GOOD BIG
-r= 256 w= 32 avg=0.599061 elems= 8192 sz= 96KB collisions=0 collisions-probability=0.000000115483993
-r= 256 w= 64 avg=0.688738 elems= 16384 sz= 192KB collisions=0 collisions-probability=0.000000469386467
-r= 256 w= 128 avg=0.768617 elems= 32768 sz= 384KB collisions=0 collisions-probability=0.000001892445681
-r= 256 w= 256 avg=0.832496 elems= 65536 sz= 768KB collisions=0 collisions-probability=0.000007599563332 GOOD
-r= 256 w= 512 avg=0.877548 elems= 131072 sz=1536KB collisions=0 collisions-probability=0.000030457509641 GOOD BIG
-r= 256 w=1024 avg=0.914863 elems= 262144 sz=3072KB collisions=6 collisions-probability=0.000121943667477 GOOD BIG
-```
-
-```
-width= 32 time per simd= 4ns
-width= 64 time per simd= 21ns
-width= 128 time per simd= 26ns
-width= 256 time per simd= 36ns
-width= 512 time per simd= 59ns
-width=1024 time per simd= 100ns
-```
-
-```
-width= 32 time per non-simd= 25ns
-width= 64 time per non-simd= 53ns
-width= 128 time per non-simd= 85ns
-width= 256 time per non-simd= 145ns
-width= 512 time per non-simd= 266ns
-width=1024 time per non-simd= 507ns
-```
diff --git a/simulator/rust-toolchain.toml b/simulator/rust-toolchain.toml
deleted file mode 100644
index 5d56faf..0000000
--- a/simulator/rust-toolchain.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[toolchain]
-channel = "nightly"
diff --git a/simulator/src/main.rs b/simulator/src/main.rs
deleted file mode 100644
index 8da6c1c..0000000
--- a/simulator/src/main.rs
+++ /dev/null
@@ -1,226 +0,0 @@
-#![feature(btree_cursors)]
-use std::{collections::BTreeMap, sync::atomic::AtomicUsize, time::Instant, u32};
-
-#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
-struct PartedHash {
- shard_idx: u32,
- row_idx: u32,
- signature: u32,
-}
-
-impl PartedHash {
- fn new_random() -> Self {
- Self {
- shard_idx: rand::random(),
- row_idx: rand::random(),
- signature: rand::random(),
- }
- }
-}
-
-#[derive(Debug, Default, Clone)]
-struct ShardRow {
- entries: Vec
,
-}
-
-static TOTAL_COLLISIONS: AtomicUsize = AtomicUsize::new(0);
-
-#[derive(Debug)]
-struct Shard {
- row_width: usize,
- total: usize,
- rows: Vec,
-}
-impl Shard {
- fn new(num_rows: usize, row_width: usize) -> Self {
- Self {
- row_width,
- total: 0,
- rows: vec![ShardRow::default(); num_rows],
- }
- }
- fn add(&mut self, h: PartedHash) -> bool {
- let len = self.rows.len();
- let row = &mut self.rows[(h.row_idx as usize) % len];
- if row.entries.len() >= self.row_width {
- false
- } else {
- if row
- .entries
- .iter()
- .find(|h2| h2.signature == h.signature)
- .is_some()
- {
- TOTAL_COLLISIONS.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
- }
- row.entries.push(h);
- self.total += 1;
- true
- }
- }
-}
-
-struct DB {
- num_rows: usize,
- row_width: usize,
- total: usize,
- num_splits: usize,
- fill_level_on_split: usize,
- fill_levels: Vec,
- shards: BTreeMap,
-}
-impl DB {
- fn new(num_rows: usize, row_width: usize) -> Self {
- let mut bt = BTreeMap::new();
- bt.insert(1 << 32, Shard::new(num_rows, row_width));
- Self {
- num_rows,
- row_width,
- total: 0,
- num_splits: 0,
- fill_level_on_split: 0,
- fill_levels: vec![],
- shards: bt,
- }
- }
- fn add(&mut self, to_add: PartedHash) {
- let (key_before, key_after) = {
- let shard_idx = to_add.shard_idx as u64;
- let mut cursor = self
- .shards
- .lower_bound_mut(std::ops::Bound::Excluded(&shard_idx));
- let key_before = cursor.peek_prev().map(|(k, _)| *k).unwrap_or(0);
- let Some((key_after, shard)) = cursor.peek_next() else {
- panic!("no key_after for 0x{:x}", to_add.shard_idx);
- };
-
- if shard.add(to_add) {
- self.total += 1;
- return;
- }
- (key_before, *key_after)
- };
-
- let prev_shard = self.shards.remove(&key_after).unwrap();
- let midpoint = (key_before / 2) + (key_after / 2);
- self.shards
- .insert(midpoint, Shard::new(self.num_rows, self.row_width));
- self.shards
- .insert(key_after, Shard::new(self.num_rows, self.row_width));
-
- self.num_splits += 1;
- self.fill_level_on_split += prev_shard.total;
-
- /*println!(
- "split ({:3}) 0x{key_before:08x}..0x{midpoint:08x}..0x{key_after:09x} [total: {:8}, shard avg fill: {:.4}, shard size: {}]",
- self.num_splits,
- self.total,
- ((self.fill_level_on_split as f64) / (self.num_splits as f64))
- / ((self.num_rows * self.row_width) as f64),
- self.num_rows * self.row_width
- );*/
- self.fill_levels.push(
- ((self.fill_level_on_split as f64) / (self.num_splits as f64))
- / ((self.num_rows * self.row_width) as f64),
- );
- self.total -= prev_shard.total;
-
- for row in prev_shard.rows.iter() {
- for h in row.entries.iter() {
- self.add(*h);
- }
- }
- self.add(to_add);
- }
-}
-
-fn main() {
- for rows in [32, 64, 128, 256] {
- for width in [32, 64, 128, 256, 512, 1024] {
- let mut db = DB::new(rows, width);
- let mut added = 0;
- TOTAL_COLLISIONS.store(0, std::sync::atomic::Ordering::SeqCst);
- for _ in 0..100 {
- for _ in 0..db.num_rows * db.row_width {
- db.add(PartedHash::new_random());
- added += 1;
- }
- }
-
- let mut summed = 0;
- for (_, sh) in db.shards.iter() {
- summed += sh.total;
- }
-
- let mut summed_last_fills = 0.0;
- for lf in db.fill_levels.iter() {
- summed_last_fills += lf;
- }
-
- assert_eq!(db.total, summed);
- assert_eq!(db.total, added);
- let avg = summed_last_fills / (db.fill_levels.len() as f64);
- let sz = (db.num_rows * db.row_width * 12) / 1024;
- println!(
- "r={rows:4} w={width:4} avg={:.6} elems={:7} sz={:4}KB collisions={} collisions-probability={:.015} {} {}",
- avg,
- db.num_rows * db.row_width,
- sz,
- TOTAL_COLLISIONS.load(std::sync::atomic::Ordering::SeqCst),
- 1.0 - (-(width as f64) * (width as f64 - 1.0) / ((1u64 << 33) as f64)).exp(),
- if avg > 0.8 {"GOOD"} else {""},
- if sz > 800 {"BIG"} else {""},
- );
- }
- }
-
- let reps = 10_000_000usize;
- for width in [32, 64, 128, 256, 512, 1024] {
- let mut v = vec![0u32; width];
- for i in 0..width {
- v[i] = i as u32;
- }
- v[width - 1] = 80808080;
- assert_eq!(v.iter().position(|x| *x == 80808080), Some(width - 1));
- assert_eq!(v.iter().position(|x| *x == 80808081), None);
- let mut pos: usize = 0;
-
- let t0 = Instant::now();
- for _ in 0..reps {
- pos += v.iter().position(|x| *x == 80808080).unwrap_or(0);
- pos += v.iter().position(|x| *x == 80808081).unwrap_or(0);
- }
-
- println!(
- "width={width:4} time per simd={:4}ns",
- Instant::now().duration_since(t0).as_nanos() as usize / reps,
- );
-
- assert_eq!(pos, (width - 1) * reps);
- }
-
- let reps = 10_000_000usize;
- for width in [32, 64, 128, 256, 512, 1024] {
- let mut v = vec![0u32; width];
- for i in 0..width {
- v[i] = i as u32;
- }
- v[width - 1] = 80808080;
- assert_eq!(v.iter().position(|x| *x == 80808080), Some(width - 1));
- assert_eq!(v.iter().position(|x| *x == 80808081), None);
- let mut pos: usize = 0;
-
- let t0 = Instant::now();
- for _ in 0..reps {
- pos += v.iter().position(|x| *x == 80808080).unwrap_or(0);
- pos += v.iter().position(|x| *x == 80808081).unwrap_or(0);
- }
-
- println!(
- "width={width:4} time per non-simd={:4}ns",
- Instant::now().duration_since(t0).as_nanos() as usize / reps,
- );
-
- assert_eq!(pos, (width - 1) * reps);
- }
-}
diff --git a/split.png b/split.png
new file mode 100644
index 0000000..cc80e5a
Binary files /dev/null and b/split.png differ
diff --git a/src/data_file.rs b/src/data_file.rs
new file mode 100644
index 0000000..fbb1074
--- /dev/null
+++ b/src/data_file.rs
@@ -0,0 +1,808 @@
+use parking_lot::{Mutex, RwLock};
+use smallvec::SmallVec;
+use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
+
+use std::{
+ collections::VecDeque,
+ fs::File,
+ mem::size_of,
+ path::Path,
+ sync::{
+ Arc,
+ atomic::{AtomicBool, AtomicU64, Ordering},
+ },
+};
+
+use crate::internal::{
+ DATA_ENTRY_OFFSET_MAGIC, DATA_ENTRY_OFFSET_MASK, DATA_FILE_SIGNATURE, DATA_FILE_VERSION,
+ EntryType, FILE_OFFSET_ALIGNMENT, KEY_NAMESPACE_BITS, KVBuf, KVRef, KeyNamespace,
+ MAX_KEY_NAMESPACE, PAGE_SIZE, READ_BUFFER_SIZE, SIZE_HINT_UNIT, data_file_path,
+ invalid_data_error, read_available_at, read_into_at, sync_dir, sync_file_range, write_all_at,
+};
+use crate::types::{Config, Error, MAX_USER_KEY_SIZE, MAX_USER_VALUE_SIZE, Result};
+
+const INLINE_SCRATCH_BUFFER_SIZE: usize = 1024;
+
+struct ParsedDataEntry {
+ data_len: usize,
+ vlen: u16,
+ ns: u8,
+}
+
+#[derive(Clone, Copy, FromBytes, IntoBytes, KnownLayout, Immutable)]
+#[repr(C)]
+struct DataFileHeader {
+ magic: [u8; 8],
+ version: u32,
+ _padding0: u32,
+ ordinal: u64,
+ _trailer: [u8; 4096 - 24],
+}
+
+const _: () = assert!(size_of::() == PAGE_SIZE);
+
+struct InflightSlot {
+ seq: AtomicU64,
+ ordinal: AtomicU64,
+ offset: AtomicU64,
+}
+
+pub(crate) struct InflightTracker {
+ snapshot_barrier: RwLock<()>,
+ next_seq: AtomicU64,
+ slots: Vec,
+ completed_deltas: Vec>>,
+}
+
+impl InflightTracker {
+ pub(crate) fn new(num_shards: usize) -> Self {
+ Self {
+ snapshot_barrier: RwLock::new(()),
+ next_seq: AtomicU64::new(1),
+ slots: (0..num_shards)
+ .map(|_| InflightSlot {
+ seq: AtomicU64::new(0),
+ ordinal: AtomicU64::new(0),
+ offset: AtomicU64::new(0),
+ })
+ .collect(),
+ completed_deltas: (0..num_shards)
+ .map(|_| Mutex::new(VecDeque::new()))
+ .collect(),
+ }
+ }
+
+ fn reserve<'a>(
+ &'a self,
+ data_file: &DataFile,
+ shard_idx: usize,
+ len: u64,
+ delta: i8,
+ ) -> Result<(u64, InflightGuard<'a>)> {
+ let _barrier = self.snapshot_barrier.read();
+ let offset = data_file.allocate(len)?;
+ let ordinal = data_file.file_ordinal;
+ let seq = self.next_seq.fetch_add(1, Ordering::Relaxed);
+ let slot = &self.slots[shard_idx];
+ slot.ordinal.store(ordinal, Ordering::Relaxed);
+ slot.offset.store(offset, Ordering::Relaxed);
+ slot.seq.store(seq, Ordering::Release);
+
+ Ok((
+ offset,
+ InflightGuard {
+ tracker: self,
+ shard_idx,
+ seq,
+ delta,
+ armed: true,
+ },
+ ))
+ }
+
+ pub(crate) fn checkpoint_progress(&self, active_file: &DataFile) -> (u64, u64, i64) {
+ let _barrier = self.snapshot_barrier.write();
+
+ let mut min_slot: Option<(u64, u64, u64)> = None;
+ for slot in &self.slots {
+ let seq = slot.seq.load(Ordering::Acquire);
+ if seq == 0 {
+ continue;
+ }
+ let ordinal = slot.ordinal.load(Ordering::Relaxed);
+ let offset = slot.offset.load(Ordering::Relaxed);
+ let current = (seq, ordinal, offset);
+ min_slot = Some(min_slot.map_or(current, |min_current| min_current.min(current)));
+ }
+
+ let checkpoint = min_slot
+ .map(|(_, ordinal, offset)| (ordinal, offset))
+ .unwrap_or_else(|| (active_file.file_ordinal, active_file.used_bytes()));
+ let completed_before_seq = min_slot.map_or(u64::MAX, |(seq, _, _)| seq);
+ let mut committed_delta = 0i64;
+ for queue in &self.completed_deltas {
+ let mut queue = queue.lock();
+ while let Some(&(seq, delta)) = queue.front() {
+ if seq >= completed_before_seq {
+ break;
+ }
+ queue.pop_front();
+ committed_delta += delta;
+ }
+ }
+
+ (checkpoint.0, checkpoint.1, committed_delta)
+ }
+
+ pub(crate) fn clear_all(&self) {
+ let _barrier = self.snapshot_barrier.write();
+ for slot in &self.slots {
+ slot.seq.store(0, Ordering::Release);
+ slot.ordinal.store(0, Ordering::Relaxed);
+ slot.offset.store(0, Ordering::Relaxed);
+ }
+ for queue in &self.completed_deltas {
+ queue.lock().clear();
+ }
+ }
+
+ fn clear_matching(&self, shard_idx: usize, expected_seq: u64) {
+ let _barrier = self.snapshot_barrier.read();
+ let slot = &self.slots[shard_idx];
+ if slot.seq.load(Ordering::Acquire) == expected_seq {
+ slot.seq.store(0, Ordering::Release);
+ }
+ }
+
+ fn complete_matching(&self, shard_idx: usize, expected_seq: u64, delta: i8) {
+ let _barrier = self.snapshot_barrier.read();
+ let slot = &self.slots[shard_idx];
+ if slot.seq.load(Ordering::Acquire) == expected_seq {
+ slot.seq.store(0, Ordering::Release);
+ if delta != 0 {
+ self.completed_deltas[shard_idx]
+ .lock()
+ .push_back((expected_seq, i64::from(delta)));
+ }
+ }
+ }
+}
+
+pub(crate) struct InflightGuard<'a> {
+ tracker: &'a InflightTracker,
+ shard_idx: usize,
+ seq: u64,
+ delta: i8,
+ armed: bool,
+}
+
+impl InflightGuard<'_> {
+ pub(crate) fn complete(mut self) {
+ if self.armed {
+ self.tracker
+ .complete_matching(self.shard_idx, self.seq, self.delta);
+ self.armed = false;
+ }
+ }
+}
+
+impl Drop for InflightGuard<'_> {
+ fn drop(&mut self) {
+ if self.armed {
+ self.tracker.clear_matching(self.shard_idx, self.seq);
+ }
+ }
+}
+
+pub(crate) struct DataFile {
+ pub(crate) file: File,
+ file_offset: AtomicU64,
+ last_synced_offset: AtomicU64,
+ sealed_for_rotation: AtomicBool,
+ config: Arc,
+ pub(crate) file_idx: u16,
+ pub(crate) file_ordinal: u64,
+ preallocated: bool,
+ recovery_tail_upper_bound: u64,
+}
+
+impl DataFile {
+ fn read_header(file: &File) -> Result {
+ let header =
+ read_available_at(file, size_of::(), 0).map_err(Error::IOError)?;
+ if header.len() < size_of::() {
+ return Err(Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::UnexpectedEof,
+ "data file header too short",
+ )));
+ }
+
+ let header = DataFileHeader::read_from_bytes(&header).map_err(|_| {
+ Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::InvalidData,
+ "invalid data file header size",
+ ))
+ })?;
+ if &header.magic != DATA_FILE_SIGNATURE || header.version != DATA_FILE_VERSION {
+ return Err(Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::InvalidData,
+ "invalid data file header",
+ )));
+ }
+
+ Ok(header)
+ }
+
+ pub(crate) fn read_ordinal(base_path: &Path, file_idx: u16) -> Result {
+ let file = File::options()
+ .read(true)
+ .open(data_file_path(base_path, file_idx))
+ .map_err(Error::IOError)?;
+ Ok(Self::read_header(&file)?.ordinal)
+ }
+
+ pub(crate) fn used_bytes(&self) -> u64 {
+ self.file_offset.load(Ordering::Acquire)
+ }
+
+ pub(crate) fn recovery_tail_upper_bound(&self) -> u64 {
+ self.recovery_tail_upper_bound
+ }
+
+ pub(crate) fn sync_data(&self, start_offset: u64, end_offset: u64) -> Result<()> {
+ let used_bytes = self.used_bytes();
+ let start_offset = start_offset.min(used_bytes);
+ let end_offset = end_offset.min(used_bytes);
+ if end_offset <= start_offset {
+ return Ok(());
+ }
+
+ if !self.preallocated {
+ self.file.sync_data().map_err(Error::IOError)?;
+ } else {
+ sync_file_range(
+ &self.file,
+ size_of::() as u64 + start_offset,
+ end_offset - start_offset,
+ )?;
+ }
+ self.last_synced_offset
+ .fetch_max(end_offset, Ordering::Release);
+ Ok(())
+ }
+
+ pub(crate) fn sync_to_current(&self) -> Result<()> {
+ let start = self.last_synced_offset.load(Ordering::Acquire);
+ self.sync_data(start, self.used_bytes())
+ }
+
+ pub(crate) fn truncate_to_offset(&self, file_offset: u64) -> Result<()> {
+ debug_assert_eq!(file_offset % FILE_OFFSET_ALIGNMENT, 0);
+ if self.preallocated {
+ // A crash between the two set_len calls would leave the file
+ // non-preallocated. That is harmless: the next open will
+ // detect it as non-preallocated and fall back to sync_all
+ // until rotation creates a fresh preallocated file.
+ self.file
+ .set_len(size_of::() as u64 + file_offset)
+ .map_err(Error::IOError)?;
+ self.file
+ .set_len(size_of::() as u64 + self.config.max_data_file_size as u64)
+ .map_err(Error::IOError)?;
+ } else {
+ self.file
+ .set_len(size_of::() as u64 + file_offset)
+ .map_err(Error::IOError)?;
+ }
+ self.file_offset.store(file_offset, Ordering::Release);
+ self.file.sync_all().map_err(Error::IOError)?;
+ self.last_synced_offset
+ .store(file_offset, Ordering::Release);
+ Ok(())
+ }
+
+ fn used_data_upper_bound(file: &File, physical_data_len: u64) -> Result {
+ if physical_data_len == 0 {
+ return Ok(0);
+ }
+
+ let mut end = physical_data_len;
+ while end > 0 {
+ let start = end.saturating_sub(READ_BUFFER_SIZE as u64);
+ let chunk = read_available_at(
+ file,
+ (end - start) as usize,
+ size_of::() as u64 + start,
+ )
+ .map_err(Error::IOError)?;
+ if let Some(rel) = chunk.iter().rposition(|byte| *byte != 0) {
+ let aligned = (start + rel as u64 + 1).next_multiple_of(FILE_OFFSET_ALIGNMENT);
+ return Ok(aligned.min(physical_data_len));
+ }
+ end = start;
+ }
+
+ Ok(0)
+ }
+
+ /// Scans forward from offset 0, parsing each entry, and returns the
+ /// aligned end of the last valid entry. We temporarily set `file_offset`
+ /// to `tail_upper_bound` so that `read_next_entry_ref` won't short-circuit
+ /// before reaching it. This is safe because `open` is single-threaded;
+ /// the real value is overwritten by the caller immediately after.
+ fn detect_used_bytes(&self, tail_upper_bound: u64) -> Result {
+ if tail_upper_bound == 0 {
+ return Ok(0);
+ }
+
+ self.file_offset.store(tail_upper_bound, Ordering::Release);
+
+ let mut offset = 0u64;
+ let mut read_buf = Vec::new();
+ let mut buf_file_offset = 0u64;
+ let mut last_durable_offset = 0u64;
+ while let Some((_, _, next_offset)) =
+ self.read_next_entry_ref(offset, &mut read_buf, &mut buf_file_offset)?
+ {
+ offset = next_offset;
+ last_durable_offset = next_offset.next_multiple_of(FILE_OFFSET_ALIGNMENT);
+ }
+
+ Ok(last_durable_offset)
+ }
+
+ fn parse_data_entry(buf: &[u8], offset: u64) -> Result {
+ if buf.len() < 8 {
+ return Err(Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::UnexpectedEof,
+ "entry too short",
+ )));
+ }
+
+ let header = u32::from_le_bytes(buf[0..4].try_into().unwrap());
+ let magic_offset = (((offset / FILE_OFFSET_ALIGNMENT) as u32) ^ DATA_ENTRY_OFFSET_MAGIC)
+ & DATA_ENTRY_OFFSET_MASK;
+
+ if header & DATA_ENTRY_OFFSET_MASK != magic_offset {
+ return Err(Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::InvalidData,
+ "corrupt entry magic",
+ )));
+ }
+
+ let klen = u16::from_le_bytes(buf[4..6].try_into().unwrap());
+ let vlen = u16::from_le_bytes(buf[6..8].try_into().unwrap());
+ let entry_len = 4 + 4 + klen as usize + vlen as usize + 2;
+ if buf.len() < entry_len {
+ return Err(Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::UnexpectedEof,
+ "entry too short",
+ )));
+ }
+
+ let checksum = u16::from_le_bytes(buf[entry_len - 2..entry_len].try_into().unwrap());
+ if checksum != crc16_ibm3740_fast::hash(&buf[..entry_len - 2]) as u16 {
+ return Err(Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::InvalidData,
+ "checksum mismatch",
+ )));
+ }
+
+ let ns = ((header >> 24) & ((1 << KEY_NAMESPACE_BITS) - 1)) as u8;
+ let entry_type = (header >> 30) & 0b11;
+ if entry_type != EntryType::Insert as u32 && entry_type != EntryType::Update as u32 {
+ return Err(Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::InvalidData,
+ "invalid entry type",
+ )));
+ }
+
+ Ok(ParsedDataEntry {
+ data_len: 8 + vlen as usize + klen as usize,
+ vlen,
+ ns,
+ })
+ }
+
+ pub(crate) fn open(
+ base_path: &Path,
+ config: Arc,
+ file_idx: u16,
+ validate_tail: bool,
+ ) -> Result {
+ let file = File::options()
+ .read(true)
+ .write(true)
+ .open(data_file_path(base_path, file_idx))
+ .map_err(Error::IOError)?;
+ let header = Self::read_header(&file)?;
+ let physical_data_len = file
+ .metadata()
+ .map_err(Error::IOError)?
+ .len()
+ .saturating_sub(size_of::() as u64);
+ let preallocated = physical_data_len == config.max_data_file_size as u64;
+ let recovery_tail_upper_bound = Self::used_data_upper_bound(&file, physical_data_len)?;
+
+ let inst = Self {
+ file,
+ file_offset: AtomicU64::new(physical_data_len),
+ last_synced_offset: AtomicU64::new(0),
+ sealed_for_rotation: AtomicBool::new(false),
+ config,
+ file_idx,
+ file_ordinal: header.ordinal,
+ preallocated,
+ recovery_tail_upper_bound,
+ };
+ let used_bytes = if validate_tail {
+ inst.detect_used_bytes(recovery_tail_upper_bound)?
+ } else {
+ recovery_tail_upper_bound
+ };
+ inst.file_offset.store(used_bytes, Ordering::Release);
+ inst.last_synced_offset.store(used_bytes, Ordering::Release);
+
+ Ok(inst)
+ }
+
+ pub(crate) fn create(
+ base_path: &Path,
+ config: Arc,
+ file_idx: u16,
+ ordinal: u64,
+ ) -> Result {
+ let file = File::options()
+ .create(true)
+ .truncate(true)
+ .read(true)
+ .write(true)
+ .open(data_file_path(base_path, file_idx))
+ .map_err(Error::IOError)?;
+ file.set_len(size_of::() as u64 + config.max_data_file_size as u64)
+ .map_err(Error::IOError)?;
+ let header = DataFileHeader {
+ magic: *DATA_FILE_SIGNATURE,
+ version: DATA_FILE_VERSION,
+ _padding0: 0,
+ ordinal,
+ _trailer: [0; 4096 - 24],
+ };
+ write_all_at(&file, header.as_bytes(), 0).map_err(Error::IOError)?;
+ file.sync_all().map_err(Error::IOError)?;
+ sync_dir(base_path)?;
+ Ok(Self {
+ file,
+ file_offset: AtomicU64::new(0),
+ last_synced_offset: AtomicU64::new(0),
+ sealed_for_rotation: AtomicBool::new(false),
+ config,
+ file_idx,
+ file_ordinal: ordinal,
+ preallocated: true,
+ recovery_tail_upper_bound: 0,
+ })
+ }
+
+ pub(crate) fn seal_for_rotation(&self) {
+ self.sealed_for_rotation.store(true, Ordering::SeqCst);
+ }
+
+ fn allocate(&self, len: u64) -> Result {
+ if self.sealed_for_rotation.load(Ordering::SeqCst) {
+ return Err(Error::RotateDataFile(self.file_idx));
+ }
+ let mut file_offset = self.file_offset.load(Ordering::Relaxed);
+ loop {
+ if file_offset + len > self.config.max_data_file_size as u64 {
+ return Err(Error::RotateDataFile(self.file_idx));
+ }
+ match self.file_offset.compare_exchange(
+ file_offset,
+ file_offset + len,
+ Ordering::Relaxed,
+ Ordering::Relaxed,
+ ) {
+ Ok(_) => return Ok(file_offset),
+ Err(current) => file_offset = current,
+ }
+ }
+ }
+
+ fn append_entry<'a>(
+ &self,
+ entry_type: EntryType,
+ ns: KeyNamespace,
+ key: &[u8],
+ val: Option<&[u8]>,
+ shard_idx: usize,
+ inflight_tracker: &'a InflightTracker,
+ ) -> Result<(u64, usize, InflightGuard<'a>)> {
+ debug_assert!(key.len() <= MAX_USER_KEY_SIZE);
+ debug_assert!(ns as u8 <= MAX_KEY_NAMESPACE);
+
+ let val_len = val.map_or(0, |v| v.len());
+ if let Some(v) = val {
+ debug_assert!(v.len() <= MAX_USER_VALUE_SIZE);
+ }
+
+ let entry_len = 4 + if val.is_some() { 4 } else { 2 } + val_len + key.len() + 2;
+ let aligned_len = entry_len.next_multiple_of(FILE_OFFSET_ALIGNMENT as usize);
+ let delta = match entry_type {
+ EntryType::Insert => 1,
+ EntryType::Tombstone => -1,
+ _ => 0,
+ };
+ let (file_offset, inflight_guard) =
+ inflight_tracker.reserve(self, shard_idx, aligned_len as u64, delta)?;
+ debug_assert!(file_offset % FILE_OFFSET_ALIGNMENT == 0);
+
+ let mut buf = SmallVec::<[u8; INLINE_SCRATCH_BUFFER_SIZE]>::with_capacity(aligned_len);
+ // We overwrite the entry bytes below and only zero the alignment padding.
+ unsafe { buf.set_len(aligned_len) };
+ let buf = &mut buf[..];
+
+ let magic_offset = (((file_offset / FILE_OFFSET_ALIGNMENT) as u32)
+ ^ DATA_ENTRY_OFFSET_MAGIC)
+ & DATA_ENTRY_OFFSET_MASK;
+ let header = magic_offset | ((entry_type as u32) << 30) | ((ns as u32) << 24);
+
+ buf[0..4].copy_from_slice(&header.to_le_bytes());
+ buf[4..6].copy_from_slice(&(key.len() as u16).to_le_bytes());
+
+ if let Some(v) = val {
+ buf[6..8].copy_from_slice(&(v.len() as u16).to_le_bytes());
+ buf[8..8 + v.len()].copy_from_slice(v);
+ buf[8 + v.len()..8 + v.len() + key.len()].copy_from_slice(key);
+ } else {
+ buf[6..6 + key.len()].copy_from_slice(key);
+ }
+
+ buf[entry_len..aligned_len].fill(0);
+ let checksum = crc16_ibm3740_fast::hash(&buf[..entry_len - 2]) as u16;
+ buf[entry_len - 2..entry_len].copy_from_slice(&checksum.to_le_bytes());
+
+ let res = write_all_at(
+ &self.file,
+ buf,
+ size_of::() as u64 + file_offset,
+ )
+ .map_err(Error::IOError);
+ res?;
+
+ Ok((file_offset, aligned_len, inflight_guard))
+ }
+
+ pub(crate) fn append_kv<'a>(
+ &self,
+ entry_type: EntryType,
+ ns: KeyNamespace,
+ key: &[u8],
+ val: &[u8],
+ shard_idx: usize,
+ inflight_tracker: &'a InflightTracker,
+ ) -> Result<(u64, usize, InflightGuard<'a>)> {
+ debug_assert!(matches!(entry_type, EntryType::Insert | EntryType::Update));
+ self.append_entry(entry_type, ns, key, Some(val), shard_idx, inflight_tracker)
+ }
+
+ pub(crate) fn append_tombstone<'a>(
+ &self,
+ ns: KeyNamespace,
+ key: &[u8],
+ shard_idx: usize,
+ inflight_tracker: &'a InflightTracker,
+ ) -> Result<(u64, usize, InflightGuard<'a>)> {
+ self.append_entry(
+ EntryType::Tombstone,
+ ns,
+ key,
+ None,
+ shard_idx,
+ inflight_tracker,
+ )
+ }
+
+ pub(crate) fn read_kv_into<'a>(
+ &self,
+ offset: u64,
+ size_hint: usize,
+ buf: &'a mut Vec,
+ ) -> Result> {
+ debug_assert!(size_hint >= SIZE_HINT_UNIT);
+ read_into_at(
+ &self.file,
+ buf,
+ size_hint,
+ size_of::() as u64 + offset,
+ )
+ .map_err(Error::IOError)?;
+ let parsed = Self::parse_data_entry(buf, offset)?;
+ buf.truncate(parsed.data_len);
+ Ok(KVRef {
+ buf,
+ vlen: parsed.vlen,
+ header_len: 8,
+ ns: parsed.ns,
+ entry_type: EntryType::Insert,
+ })
+ }
+
+ pub(crate) fn read_kv(&self, offset: u64, size_hint: usize) -> Result {
+ debug_assert!(size_hint >= SIZE_HINT_UNIT);
+ let mut buf = read_available_at(
+ &self.file,
+ size_hint,
+ size_of::() as u64 + offset,
+ )
+ .map_err(Error::IOError)?;
+ let parsed = Self::parse_data_entry(&buf, offset)?;
+ buf.truncate(parsed.data_len);
+ Ok(KVBuf {
+ buf,
+ vlen: parsed.vlen,
+ header_len: 8,
+ ns: parsed.ns,
+ entry_type: EntryType::Insert,
+ })
+ }
+
+ fn ensure_verified_entry(
+ &self,
+ read_buf: &mut Vec,
+ buf_file_offset: &mut u64,
+ rel: usize,
+ entry_len: usize,
+ offset: u64,
+ ) -> Result> {
+ let start = if read_buf.len() - rel >= entry_len {
+ rel
+ } else {
+ read_into_at(
+ &self.file,
+ read_buf,
+ entry_len,
+ size_of::() as u64 + offset,
+ )
+ .map_err(Error::IOError)?;
+ *buf_file_offset = offset;
+ if read_buf.len() < entry_len {
+ return Ok(None);
+ }
+ 0
+ };
+
+ let entry_bytes = &read_buf[start..start + entry_len];
+ let checksum =
+ u16::from_le_bytes(entry_bytes[entry_len - 2..entry_len].try_into().unwrap());
+ if checksum != crc16_ibm3740_fast::hash(&entry_bytes[..entry_len - 2]) as u16 {
+ return Ok(None);
+ }
+
+ Ok(Some(start))
+ }
+
+ pub(crate) fn read_next_entry_ref<'a>(
+ &self,
+ mut offset: u64,
+ read_buf: &'a mut Vec,
+ buf_file_offset: &mut u64,
+ ) -> Result, u64, u64)>> {
+ let used_bytes = self.used_bytes();
+ if offset >= used_bytes {
+ return Ok(None);
+ }
+ offset = offset.next_multiple_of(FILE_OFFSET_ALIGNMENT);
+
+ loop {
+ if offset >= used_bytes {
+ return Ok(None);
+ }
+ let buf_start = if offset >= *buf_file_offset {
+ (offset - *buf_file_offset) as usize
+ } else {
+ read_buf.clear();
+ 0
+ };
+
+ if buf_start >= read_buf.len() || read_buf.len() - buf_start < 8 {
+ read_into_at(
+ &self.file,
+ read_buf,
+ READ_BUFFER_SIZE,
+ size_of::() as u64 + offset,
+ )
+ .map_err(Error::IOError)?;
+ *buf_file_offset = offset;
+ if read_buf.len() < 8 {
+ return Ok(None);
+ }
+ }
+
+ let rel = (offset - *buf_file_offset) as usize;
+ let avail = &read_buf[rel..];
+
+ let header = u32::from_le_bytes(avail[0..4].try_into().unwrap());
+ let magic_offset = (((offset / FILE_OFFSET_ALIGNMENT) as u32)
+ ^ DATA_ENTRY_OFFSET_MAGIC)
+ & DATA_ENTRY_OFFSET_MASK;
+ if header & DATA_ENTRY_OFFSET_MASK != magic_offset {
+ offset += FILE_OFFSET_ALIGNMENT;
+ continue;
+ }
+
+ let ns = ((header >> 24) & ((1 << KEY_NAMESPACE_BITS) - 1)) as u8;
+ let entry_type = (header >> 30) & 0b11;
+
+ match entry_type {
+ x if x == EntryType::Insert as u32 || x == EntryType::Update as u32 => {
+ let resolved_type = if x == EntryType::Insert as u32 {
+ EntryType::Insert
+ } else {
+ EntryType::Update
+ };
+ let klen = u16::from_le_bytes(avail[4..6].try_into().unwrap());
+ let vlen = u16::from_le_bytes(avail[6..8].try_into().unwrap());
+ let entry_len = 4 + 4 + klen as usize + vlen as usize + 2;
+
+ let Some(start) = self.ensure_verified_entry(
+ read_buf,
+ buf_file_offset,
+ rel,
+ entry_len,
+ offset,
+ )?
+ else {
+ offset += FILE_OFFSET_ALIGNMENT;
+ continue;
+ };
+ let buf = &read_buf[start..start + 8 + vlen as usize + klen as usize];
+
+ return Ok(Some((
+ KVRef {
+ buf,
+ vlen,
+ header_len: 8,
+ ns,
+ entry_type: resolved_type,
+ },
+ offset,
+ offset + entry_len as u64,
+ )));
+ }
+ x if x == EntryType::Tombstone as u32 => {
+ let klen = u16::from_le_bytes(avail[4..6].try_into().unwrap());
+ let entry_len = 4 + 2 + klen as usize + 2;
+
+ let Some(start) = self.ensure_verified_entry(
+ read_buf,
+ buf_file_offset,
+ rel,
+ entry_len,
+ offset,
+ )?
+ else {
+ offset += FILE_OFFSET_ALIGNMENT;
+ continue;
+ };
+ let buf = &read_buf[start..start + 6 + klen as usize];
+
+ return Ok(Some((
+ KVRef {
+ buf,
+ vlen: 0,
+ header_len: 6,
+ ns,
+ entry_type: EntryType::Tombstone,
+ },
+ offset,
+ offset + entry_len as u64,
+ )));
+ }
+ _ => {
+ return Err(invalid_data_error("unknown data entry type"));
+ }
+ }
+ }
+ }
+}
diff --git a/src/hashing.rs b/src/hashing.rs
deleted file mode 100644
index 0ed1505..0000000
--- a/src/hashing.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-use siphasher::sip128::{Hash128, SipHasher24};
-
-use crate::shard::NUM_ROWS;
-
-use bytemuck::{Pod, Zeroable};
-
-pub type HashSeed = [u8; 16];
-
-#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Pod, Zeroable, Hash)]
-#[repr(transparent)]
-pub(crate) struct PartedHash(u64);
-
-// impl std::fmt::Display for PartedHash {
-// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-// write!(
-// f,
-// "{:04x}.{:04x}.{:08x}",
-// self.shard_selector(),
-// self.row_selector(),
-// self.signature()
-// )
-// }
-// }
-
-pub(crate) const INVALID_SIG: u32 = 0;
-
-#[cfg(feature = "whitebox_testing")]
-pub static mut HASH_BITS_TO_KEEP: u64 = u64::MAX; // which bits to keep from the hash - for testing collisions
-
-impl PartedHash {
- pub fn new(seed: &HashSeed, buf: &[u8]) -> Self {
- Self::from_hash(SipHasher24::new_with_key(&seed).hash(buf))
- }
-
- #[inline]
- pub fn is_valid(&self) -> bool {
- self.signature() != INVALID_SIG
- }
-
- #[inline]
- pub fn shard_selector(&self) -> u32 {
- ((self.0 >> 48) & 0xffff) as u32
- }
-
- #[inline]
- pub fn row_selector(&self) -> usize {
- (((self.0 >> 32) as u16) as usize) % NUM_ROWS
- }
-
- #[inline]
- pub fn signature(&self) -> u32 {
- self.0 as u32
- }
-
- #[allow(dead_code)]
- pub fn as_u64(&self) -> u64 {
- self.0
- }
-
- fn from_hash(h: Hash128) -> Self {
- let mut sig = h.h1 as u32;
- if sig == INVALID_SIG {
- sig = h.h2 as u32;
- if sig == INVALID_SIG {
- sig = (h.h2 >> 32) as u32;
- if sig == INVALID_SIG {
- sig = 0x6052_c9b7; // this is so unlikely that it doesn't really matter
- }
- }
- }
- let shard = h.h1 & 0xffff_0000_0000_0000;
- let row = h.h1 & 0x0000_ffff_0000_0000;
- let val = shard | row | (sig as u64);
-
- #[cfg(feature = "whitebox_testing")]
- let val = (val & unsafe { HASH_BITS_TO_KEEP }) | 1 /* make sure sig != 0 */;
-
- Self(val)
- }
-}
-
-#[test]
-fn test_parted_hash() -> crate::Result<()> {
- use bytemuck::{bytes_of, from_bytes};
-
- let h1 = PartedHash::new(b"aaaabbbbccccdddd", b"hello world");
- assert_eq!(h1.0, 13445180190757400308,);
- let h2 = PartedHash(13445180190757400308);
- assert_eq!(PartedHash::new(b"aaaabbbbccccdddd", b"hello world"), h2);
-
- let h3 = PartedHash(0x1020304050607080);
- assert_eq!(
- bytes_of(&h3),
- [0x80, 0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10]
- );
- let h4: PartedHash = *from_bytes(&[0x80, 0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10]);
- assert_eq!(h4, h3);
-
- Ok(())
-}
diff --git a/src/index_file.rs b/src/index_file.rs
new file mode 100644
index 0000000..47535bd
--- /dev/null
+++ b/src/index_file.rs
@@ -0,0 +1,971 @@
+use memmap2::MmapMut;
+use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
+use simd_itertools::PositionSimd;
+use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, TryFromBytes};
+
+use std::{
+ fs::File,
+ hash::Hasher,
+ mem::{offset_of, size_of},
+ ops::{Deref, DerefMut},
+ path::Path,
+ sync::{
+ Arc,
+ atomic::{AtomicU32, AtomicU64, Ordering},
+ },
+ time::{Duration, Instant},
+};
+
+use crate::internal::{
+ FILE_OFFSET_ALIGNMENT, HashCoord, INDEX_FILE_SIGNATURE, INDEX_FILE_VERSION, MAX_DATA_FILES,
+ MIN_INITIAL_ROWS, MIN_SPLIT_LEVEL, PAGE_SIZE, ROW_WIDTH, SIZE_HINT_UNIT, index_file_path,
+ index_rows_file_path, invalid_data_error, read_available_at, unexpected_eof_error,
+};
+use crate::types::{Config, Error, Result};
+
+const CHECKPOINT_SLOT_COUNT: usize = 2;
+
+#[derive(Clone, Copy)]
+struct CheckpointCursor {
+ generation: u64,
+ file_ordinal: u64,
+ offset: u64,
+}
+
+#[derive(FromBytes, IntoBytes, KnownLayout)]
+#[repr(C)]
+pub(crate) struct CheckpointSlot {
+ generation: AtomicU64,
+ file_ordinal: AtomicU64,
+ offset: AtomicU64,
+ checksum: AtomicU64,
+}
+
+const _: () = assert!(size_of::() == 32);
+
+fn checkpoint_slot_checksum(generation: u64, file_ordinal: u64, offset: u64) -> u64 {
+ let mut hasher = siphasher::sip::SipHasher13::new();
+ hasher.write_u64(generation);
+ hasher.write_u64(file_ordinal);
+ hasher.write_u64(offset);
+ hasher.finish()
+}
+
+#[derive(FromBytes, IntoBytes, KnownLayout)]
+#[repr(C)]
+pub(crate) struct IndexFileHeader {
+ pub(crate) signature: [u8; 8],
+ pub(crate) version: u32,
+ _padding16: u32,
+ pub(crate) hash_key_0: u64,
+ pub(crate) hash_key_1: u64,
+ _padding64: [u8; 64 - 4 * 8],
+
+ ///////////////////////////////////
+ // runtime state
+ ///////////////////////////////////
+ pub(crate) global_split_level: AtomicU64,
+ _padding128: [u8; 64 - 8],
+
+ ///////////////////////////////////
+ // rebuild state
+ ///////////////////////////////////
+ /// Persisted replay cursor, double-buffered so recovery can pick the
+ /// newest valid slot after crashes or torn writes.
+ pub(crate) checkpoint_slots: [CheckpointSlot; CHECKPOINT_SLOT_COUNT],
+ _padding1024: [u8; 896 - CHECKPOINT_SLOT_COUNT * 32],
+
+ ///////////////////////////////////
+ // stats
+ ///////////////////////////////////
+ pub(crate) committed_num_entries: AtomicU64,
+
+ _trailer: [u8; PAGE_SIZE - 1024 - 8],
+}
+
+const _: () = assert!(offset_of!(IndexFileHeader, global_split_level) == 64);
+const _: () = assert!(offset_of!(IndexFileHeader, checkpoint_slots) == 128);
+const _: () = assert!(offset_of!(IndexFileHeader, committed_num_entries) == 1024);
+const _: () = assert!(size_of::() == PAGE_SIZE);
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, FromBytes, IntoBytes, KnownLayout, Immutable)]
+#[repr(transparent)]
+pub(crate) struct EntryPointer(pub(crate) u64);
+
+impl EntryPointer {
+ pub(crate) const INVALID_POINTER: Self = Self(0);
+
+ pub(crate) fn new(
+ file_idx: u16,
+ file_offset: u64,
+ size: usize,
+ masked_row_selector: u32,
+ ) -> Self {
+ debug_assert!(size > 0 && size <= u8::MAX as usize * SIZE_HINT_UNIT);
+
+ let fi = (file_idx as u64) & ((1 << 12) - 1);
+ let fo = ((file_offset / FILE_OFFSET_ALIGNMENT) & ((1 << 26) - 1)) << 12;
+ let sh = (size.div_ceil(SIZE_HINT_UNIT) as u64) << (12 + 26);
+ let rs = (masked_row_selector as u64) << (12 + 26 + 8);
+ Self(fi | fo | sh | rs)
+ }
+
+ pub(crate) fn file_idx(&self) -> u16 {
+ (self.0 & ((1 << 12) - 1)) as u16
+ }
+
+ pub(crate) fn file_offset(&self) -> u64 {
+ ((self.0 >> 12) & ((1 << 26) - 1)) * FILE_OFFSET_ALIGNMENT
+ }
+
+ pub(crate) fn size_hint(&self) -> usize {
+ ((self.0 >> (12 + 26)) & ((1 << 8) - 1)) as usize * SIZE_HINT_UNIT
+ }
+
+ pub(crate) fn masked_row_selector(&self) -> u32 {
+ (self.0 >> (12 + 26 + 8)) as u32
+ }
+
+ pub(crate) fn is_valid(&self) -> bool {
+ self.0 != Self::INVALID_POINTER.0
+ }
+}
+
+#[derive(FromBytes, IntoBytes, KnownLayout)]
+#[repr(C)]
+pub(crate) struct RowLayout {
+ pub(crate) split_level: AtomicU64,
+ _padding: [u8; 56],
+ pub(crate) signatures: [u32; ROW_WIDTH],
+ pub(crate) pointers: [EntryPointer; ROW_WIDTH],
+}
+
+const _: () = assert!(size_of::() == PAGE_SIZE);
+const _: () = assert!(offset_of!(RowLayout, signatures) % 8 == 0);
+const _: () = assert!(offset_of!(RowLayout, pointers) % 8 == 0);
+
+impl RowLayout {
+ pub(crate) fn iter_matches(&self, hash_coord: HashCoord) -> RowMatchIterator<'_> {
+ RowMatchIterator {
+ row: self,
+ hash_coord,
+ offset: 0,
+ }
+ }
+
+ pub(crate) fn find_free_slot(&self) -> Option {
+ self.signatures
+ .iter()
+ .position_simd(|&sig| sig == HashCoord::INVALID_SIG)
+ }
+
+ pub(crate) fn insert(&mut self, idx: usize, sig: u32, ptr: EntryPointer) {
+ debug_assert!(self.signatures[idx] == HashCoord::INVALID_SIG);
+ self.signatures[idx] = sig;
+ crate::crash_point("insert_after_sig");
+ self.pointers[idx] = ptr;
+ }
+
+ pub(crate) fn remove(&mut self, idx: usize) {
+ self.signatures[idx] = HashCoord::INVALID_SIG;
+ self.pointers[idx] = EntryPointer::INVALID_POINTER;
+ }
+
+ pub(crate) fn replace_pointer(&mut self, idx: usize, new_ptr: EntryPointer) {
+ self.pointers[idx] = new_ptr;
+ }
+
+ pub(crate) fn set_split_level(&mut self, new_sl: u64) {
+ self.split_level.store(new_sl, Ordering::Release);
+ }
+}
+
+pub(crate) struct RowMatchIterator<'a> {
+ row: &'a RowLayout,
+ hash_coord: HashCoord,
+ offset: usize,
+}
+
+impl Iterator for RowMatchIterator<'_> {
+ type Item = (usize, EntryPointer);
+
+ fn next(&mut self) -> Option {
+ while self.offset < ROW_WIDTH {
+ if let Some(idx) = self.row.signatures[self.offset..]
+ .iter()
+ .position_simd(|&sig| sig == self.hash_coord.sig)
+ {
+ let real_idx = self.offset + idx;
+ self.offset = real_idx + 1;
+ let ptr = self.row.pointers[real_idx];
+ if ptr.is_valid()
+ && ptr.masked_row_selector() == self.hash_coord.masked_row_selector()
+ {
+ return Some((real_idx, ptr));
+ }
+ } else {
+ self.offset = ROW_WIDTH;
+ }
+ }
+ None
+ }
+}
+
+#[derive(FromBytes, IntoBytes, KnownLayout)]
+#[repr(C)]
+pub(crate) struct IndexFileLayout {
+ pub(crate) header: IndexFileHeader,
+ // note: we don't keep committed and uncommitted waste_levels for space efficiency and because
+ // they only need be approximate
+ pub(crate) waste_levels: [AtomicU32; MAX_DATA_FILES as usize],
+}
+
+const _: () = assert!(size_of::() == PAGE_SIZE * 5);
+
+fn row_count_for_len(len: usize) -> usize {
+ len / size_of::()
+}
+
+fn row_offset(idx: usize) -> usize {
+ idx * size_of::()
+}
+
+fn row_bytes(bytes: &[u8], idx: usize) -> &[u8] {
+ let start = row_offset(idx);
+ let end = start + size_of::();
+ &bytes[start..end]
+}
+
+fn row_bytes_mut(bytes: &mut [u8], idx: usize) -> &mut [u8] {
+ let start = row_offset(idx);
+ let end = start + size_of::();
+ &mut bytes[start..end]
+}
+
+fn row_ref_bytes(bytes: &[u8], idx: usize) -> &RowLayout {
+ unsafe { &*(row_bytes(bytes, idx).as_ptr() as *const RowLayout) }
+}
+
+fn row_mut_bytes(bytes: &mut [u8], idx: usize) -> &mut RowLayout {
+ RowLayout::try_mut_from_bytes(row_bytes_mut(bytes, idx))
+ .expect("row bytes should contain an aligned row")
+}
+
+unsafe fn row_mut_ptr(base_ptr: *const u8, idx: usize) -> *mut RowLayout {
+ unsafe { base_ptr.add(row_offset(idx)) as *mut RowLayout }
+}
+
+pub(crate) struct RowsTableReadGuard<'a> {
+ index_file: &'a IndexFile,
+ pub(crate) row_guard: RwLockReadGuard<'a, MmapMut>,
+}
+
+impl<'a> RowsTableReadGuard<'a> {
+ pub(crate) fn row(&self, idx: usize) -> RowReadGuard<'_> {
+ let row_guard = self.index_file.row_locks[idx & self.index_file.row_locks_mask].read();
+ let row_count = row_count_for_len(self.row_guard.len());
+ assert!(
+ idx < row_count,
+ "row index out of bounds: {idx} >= {row_count}"
+ );
+ let row = row_ref_bytes(&self.row_guard[..], idx);
+ RowReadGuard {
+ _row_guard: row_guard,
+ row,
+ }
+ }
+
+ pub(crate) fn shard_id(&self, idx: usize) -> usize {
+ idx & self.index_file.row_locks_mask
+ }
+
+ pub(crate) fn lock_shard(&self, shard_id: usize) -> RwLockWriteGuard<'_, ()> {
+ self.index_file.row_locks[shard_id].write()
+ }
+
+ pub(crate) fn row_mut(&self, idx: usize) -> RowWriteGuard<'_> {
+ let shard_idx = idx & self.index_file.row_locks_mask;
+ let row_guard = self.index_file.row_locks[shard_idx].write();
+ let row_count = row_count_for_len(self.row_guard.len());
+ assert!(
+ idx < row_count,
+ "row index out of bounds: {idx} >= {row_count}"
+ );
+ let row = unsafe { &mut *row_mut_ptr(self.row_guard.as_ptr(), idx) };
+ RowWriteGuard {
+ _row_guard: row_guard,
+ row,
+ shard_idx,
+ }
+ }
+
+ pub(crate) unsafe fn unlocked_row_ptr(&self, idx: usize) -> *mut RowLayout {
+ let row_count = row_count_for_len(self.row_guard.len());
+ assert!(
+ idx < row_count,
+ "row index out of bounds: {idx} >= {row_count}"
+ );
+ unsafe { row_mut_ptr(self.row_guard.as_ptr(), idx) }
+ }
+}
+
+pub(crate) struct RowsTableWriteGuard<'a> {
+ pub(crate) row_guard: RwLockWriteGuard<'a, MmapMut>,
+}
+
+impl RowsTableWriteGuard<'_> {
+ fn row_mut(&mut self, idx: usize) -> &mut RowLayout {
+ let row_count = row_count_for_len(self.row_guard.len());
+ assert!(
+ idx < row_count,
+ "row index out of bounds: {idx} >= {row_count}"
+ );
+ row_mut_bytes(&mut self.row_guard[..], idx)
+ }
+}
+
+pub(crate) struct RowReadGuard<'a> {
+ _row_guard: RwLockReadGuard<'a, ()>,
+ row: &'a RowLayout,
+}
+
+impl Deref for RowReadGuard<'_> {
+ type Target = RowLayout;
+
+ fn deref(&self) -> &Self::Target {
+ self.row
+ }
+}
+
+pub(crate) struct RowWriteGuard<'a> {
+ _row_guard: RwLockWriteGuard<'a, ()>,
+ row: &'a mut RowLayout,
+ pub(crate) shard_idx: usize,
+}
+
+impl Deref for RowWriteGuard<'_> {
+ type Target = RowLayout;
+
+ fn deref(&self) -> &Self::Target {
+ self.row
+ }
+}
+
+impl DerefMut for RowWriteGuard<'_> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ self.row
+ }
+}
+
+pub(crate) struct IndexFile {
+ /// Kept on Windows so `sync_all` can call `FlushFileBuffers`.
+ /// On Linux the fd is closed after mmap; `msync` suffices for durability.
+ #[cfg(windows)]
+ header_file: File,
+ rows_file: File,
+ /// Fixed mapping covering the header + waste-level pages. Never remapped,
+ /// so `header_ref()` / `layout_prefix_ref()` are always stable without a lock.
+ header_mmap: MmapMut,
+ /// Growable mapping covering only the row pages. Remapped on grow/shrink/reset.
+ rows_mmap: RwLock,
+ row_locks: Vec>,
+ row_locks_mask: usize,
+ config: Arc,
+ /// Cached checkpoint state so concurrent readers (e.g. compaction candidate
+ /// selection) always see a consistent snapshot without going through the
+ /// double-buffer slot protocol.
+ cached_checkpoint_generation: AtomicU64,
+ cached_checkpoint_ordinal: AtomicU64,
+ cached_checkpoint_offset: AtomicU64,
+}
+
+impl IndexFile {
+ fn read_checkpoint_slot(slot: &CheckpointSlot) -> Option {
+ let generation = slot.generation.load(Ordering::Acquire);
+ if generation == 0 {
+ return None;
+ }
+
+ let file_ordinal = slot.file_ordinal.load(Ordering::Relaxed);
+ let offset = slot.offset.load(Ordering::Relaxed);
+ let checksum = slot.checksum.load(Ordering::Acquire);
+ if checksum != checkpoint_slot_checksum(generation, file_ordinal, offset) {
+ return None;
+ }
+
+ Some(CheckpointCursor {
+ generation,
+ file_ordinal,
+ offset,
+ })
+ }
+
+ fn durable_checkpoint(&self) -> Option {
+ self.header_ref()
+ .checkpoint_slots
+ .iter()
+ .filter_map(Self::read_checkpoint_slot)
+ .max_by_key(|cursor| cursor.generation)
+ }
+
+ pub(crate) fn checkpoint_cursor(&self) -> (u64, u64) {
+ let generation = self.cached_checkpoint_generation.load(Ordering::Acquire);
+ if generation == 0 {
+ return (0, 0);
+ }
+ let ordinal = self.cached_checkpoint_ordinal.load(Ordering::Relaxed);
+ let offset = self.cached_checkpoint_offset.load(Ordering::Relaxed);
+ (ordinal, offset)
+ }
+
+ pub(crate) fn checkpoint_generation(&self) -> u64 {
+ self.cached_checkpoint_generation.load(Ordering::Acquire)
+ }
+
+ pub(crate) fn persist_checkpoint_cursor(&self, ordinal: u64, offset: u64) {
+ let current_gen = self.cached_checkpoint_generation.load(Ordering::Relaxed);
+ let next_generation = current_gen
+ .checked_add(1)
+ .expect("checkpoint generation overflow");
+ let slot =
+ &self.header_ref().checkpoint_slots[next_generation as usize % CHECKPOINT_SLOT_COUNT];
+
+ slot.checksum.store(0, Ordering::Release);
+ slot.generation.store(next_generation, Ordering::Relaxed);
+ slot.file_ordinal.store(ordinal, Ordering::Relaxed);
+ slot.offset.store(offset, Ordering::Relaxed);
+ slot.checksum.store(
+ checkpoint_slot_checksum(next_generation, ordinal, offset),
+ Ordering::Release,
+ );
+
+ // Update the cache so concurrent readers see the new values immediately.
+ self.cached_checkpoint_ordinal
+ .store(ordinal, Ordering::Relaxed);
+ self.cached_checkpoint_offset
+ .store(offset, Ordering::Relaxed);
+ self.cached_checkpoint_generation
+ .store(next_generation, Ordering::Release);
+ }
+
+ #[cfg(target_os = "linux")]
+ fn maybe_lock_mmap(config: &Config, mmap: &MmapMut) {
+ if config.mlock_index {
+ let _ = mmap.lock();
+ }
+ }
+
+ #[cfg(not(target_os = "linux"))]
+ fn maybe_lock_mmap(_config: &Config, _mmap: &MmapMut) {}
+
+ fn read_existing_header(header_file: &File, header_len: usize) -> Result<((u64, u64), u64)> {
+ if header_len < size_of::() {
+ return Err(unexpected_eof_error("index file header too short"));
+ }
+ if header_len != size_of::() {
+ return Err(invalid_data_error("index header file has unexpected size"));
+ }
+
+ let header = read_available_at(header_file, size_of::(), 0)
+ .map_err(Error::IOError)?;
+ if header.len() < size_of::() {
+ return Err(unexpected_eof_error("index file header too short"));
+ }
+ let header = IndexFileHeader::read_from_bytes(&header)
+ .map_err(|_| invalid_data_error("invalid index file header size"))?;
+ if &header.signature != INDEX_FILE_SIGNATURE || header.version != INDEX_FILE_VERSION {
+ return Err(invalid_data_error("invalid index file header"));
+ }
+
+ Ok((
+ (header.hash_key_0, header.hash_key_1),
+ header.global_split_level.load(Ordering::Relaxed),
+ ))
+ }
+
+ pub(crate) fn existing_hash_key(base_path: &Path) -> Result> {
+ let header_path = index_file_path(base_path);
+ let header_file = match File::options().read(true).open(header_path) {
+ Ok(file) => file,
+ Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
+ Err(err) => return Err(Error::IOError(err)),
+ };
+ let header_len = header_file.metadata().map_err(Error::IOError)?.len() as usize;
+ if header_len == 0 {
+ return Ok(None);
+ }
+
+ let (hash_key, _) = Self::read_existing_header(&header_file, header_len)?;
+ Ok(Some(hash_key))
+ }
+
+ fn validate_existing(
+ header_file: &File,
+ header_len: usize,
+ rows_len: usize,
+ hash_key: (u64, u64),
+ ) -> Result<()> {
+ if !rows_len.is_multiple_of(PAGE_SIZE) {
+ return Err(invalid_data_error(
+ "index rows file size is not page aligned",
+ ));
+ }
+
+ let (stored_hash_key, gsl) = Self::read_existing_header(header_file, header_len)?;
+ if stored_hash_key != hash_key {
+ return Err(invalid_data_error("index hash key mismatch"));
+ }
+
+ let row_count = row_count_for_len(rows_len);
+ if row_count < MIN_INITIAL_ROWS || !row_count.is_power_of_two() {
+ return Err(invalid_data_error("invalid index file row count"));
+ }
+
+ if gsl < MIN_SPLIT_LEVEL as u64 {
+ return Err(invalid_data_error("invalid index global split level"));
+ }
+
+ let uncommitted_rows = 1usize
+ .checked_shl(gsl as u32)
+ .ok_or_else(|| invalid_data_error("index global split level overflow"))?;
+ if uncommitted_rows > row_count {
+ return Err(invalid_data_error(
+ "index global split level exceeds file size",
+ ));
+ }
+
+ Ok(())
+ }
+
+ pub(crate) fn flush_header(&self) -> Result<()> {
+ self.header_mmap.flush().map_err(Error::IOError)
+ }
+
+ pub(crate) fn open(base_path: &Path, config: Arc) -> Result {
+ let hash_key = config.hash_key;
+ let num_rows = (config.initial_capacity / ROW_WIDTH)
+ .max(MIN_INITIAL_ROWS)
+ .next_power_of_two();
+ let num_locks = config.max_concurrency.min(num_rows).next_power_of_two();
+ let row_locks = (0..num_locks).map(|_| RwLock::new(())).collect::>();
+ let row_locks_mask = num_locks - 1;
+
+ let header_path = index_file_path(base_path);
+ let rows_path = index_rows_file_path(base_path);
+
+ let header_file = File::options()
+ .create(true)
+ .truncate(false)
+ .read(true)
+ .write(true)
+ .open(header_path)
+ .map_err(Error::IOError)?;
+ let rows_file = File::options()
+ .create(true)
+ .truncate(false)
+ .read(true)
+ .write(true)
+ .open(rows_path)
+ .map_err(Error::IOError)?;
+
+ let header_size = size_of::();
+ let header_len = header_file.metadata().map_err(Error::IOError)?.len() as usize;
+ let rows_len = rows_file.metadata().map_err(Error::IOError)?.len() as usize;
+ let new_file = header_len == 0 && rows_len == 0;
+ let rows_size = num_rows * size_of::();
+
+ if new_file {
+ header_file
+ .set_len(header_size as u64)
+ .map_err(Error::IOError)?;
+ rows_file
+ .set_len(rows_size as u64)
+ .map_err(Error::IOError)?;
+ } else {
+ Self::validate_existing(&header_file, header_len, rows_len, config.hash_key)?;
+ }
+
+ let actual_rows_size = if new_file { rows_size } else { rows_len };
+
+ let header_mmap = unsafe {
+ memmap2::MmapOptions::new()
+ .len(header_size)
+ .map_mut(&header_file)
+ }
+ .map_err(Error::IOError)?;
+ Self::maybe_lock_mmap(config.as_ref(), &header_mmap);
+
+ let rows_mmap = unsafe {
+ memmap2::MmapOptions::new()
+ .len(actual_rows_size)
+ .map_mut(&rows_file)
+ }
+ .map_err(Error::IOError)?;
+ Self::maybe_lock_mmap(config.as_ref(), &rows_mmap);
+
+ if new_file {
+ header_file.sync_all().map_err(Error::IOError)?;
+ }
+
+ let inst = Self {
+ #[cfg(windows)]
+ header_file,
+ rows_file,
+ header_mmap,
+ rows_mmap: RwLock::new(rows_mmap),
+ row_locks,
+ row_locks_mask,
+ config,
+ cached_checkpoint_generation: AtomicU64::new(0),
+ cached_checkpoint_ordinal: AtomicU64::new(0),
+ cached_checkpoint_offset: AtomicU64::new(0),
+ };
+
+ if new_file {
+ let rows_table = inst.rows_table_mut();
+ inst.init_header_and_rows(rows_table, hash_key)?;
+ } else if let Some(cursor) = inst.durable_checkpoint() {
+ inst.cached_checkpoint_generation
+ .store(cursor.generation, Ordering::Relaxed);
+ inst.cached_checkpoint_ordinal
+ .store(cursor.file_ordinal, Ordering::Relaxed);
+ inst.cached_checkpoint_offset
+ .store(cursor.offset, Ordering::Relaxed);
+ }
+
+ Ok(inst)
+ }
+
+ pub(crate) fn sync_all(&self) -> Result<()> {
+ // Persist row updates before any header state that claims those rows are durable.
+ self.rows_mmap.write().flush().map_err(Error::IOError)?;
+ self.rows_file.sync_all().map_err(Error::IOError)?;
+ self.header_mmap.flush().map_err(Error::IOError)?;
+ #[cfg(windows)]
+ self.header_file.sync_all().map_err(Error::IOError)?;
+ Ok(())
+ }
+
+ pub(crate) fn file_size_bytes(&self) -> u64 {
+ let header = size_of::() as u64;
+ let rows = self.rows_file.metadata().map(|m| m.len()).unwrap_or(0);
+ header + rows
+ }
+
+ pub(crate) fn rows_table(&self) -> RowsTableReadGuard<'_> {
+ RowsTableReadGuard {
+ index_file: self,
+ row_guard: self.rows_mmap.read(),
+ }
+ }
+
+ pub(crate) fn rows_table_mut(&self) -> RowsTableWriteGuard<'_> {
+ RowsTableWriteGuard {
+ row_guard: self.rows_mmap.write(),
+ }
+ }
+
+ /// Returns a direct reference to the header without acquiring any lock.
+ ///
+ /// Safe because the header mmap is never remapped and the header fields
+ /// used for stats are all atomics.
+ fn full_header_ref(&self) -> &IndexFileLayout {
+ unsafe { &*(self.header_mmap.as_ptr() as *const IndexFileLayout) }
+ }
+
+ pub(crate) fn header_ref(&self) -> &IndexFileHeader {
+ &self.full_header_ref().header
+ }
+
+ pub(crate) fn add_file_waste(&self, file_idx: u16, waste: u32) -> u32 {
+ self.full_header_ref().waste_levels[file_idx as usize].fetch_add(waste, Ordering::Relaxed)
+ + waste
+ }
+
+ pub(crate) fn file_waste(&self, file_idx: u16) -> u32 {
+ self.full_header_ref().waste_levels[file_idx as usize].load(Ordering::Relaxed)
+ }
+
+ /// Returns the combined waste across all file slots.
+ pub(crate) fn total_waste(&self) -> u64 {
+ let ref_full = self.full_header_ref();
+ let mut total = 0u64;
+ for waste in ref_full.waste_levels.iter() {
+ total += waste.load(Ordering::Relaxed) as u64;
+ }
+ total
+ }
+
+ /// Takes the combined waste and resets it
+ pub(crate) fn take_file_waste(&self, file_idx: u16) -> u32 {
+ self.full_header_ref().waste_levels[file_idx as usize].swap(0, Ordering::Relaxed)
+ }
+
+ pub(crate) fn grow(&self, nsl: u64) -> Result> {
+ let mut layout_mut = self.rows_table_mut();
+ let gsl = self.header_ref().global_split_level.load(Ordering::Acquire);
+ if nsl <= gsl {
+ return Ok(None);
+ }
+
+ let mut remap_dur = None;
+ let required_rows_size = (1usize << nsl) * size_of::();
+ if layout_mut.row_guard.len() < required_rows_size {
+ let remap_start = Instant::now();
+ let alloc_split = nsl + self.config.remap_scaler as u64;
+ let new_rows_size = (1usize << alloc_split) * size_of::();
+
+ self.rows_file
+ .set_len(new_rows_size as u64)
+ .map_err(Error::IOError)?;
+
+ #[cfg(target_os = "linux")]
+ unsafe {
+ layout_mut
+ .row_guard
+ .remap(new_rows_size, memmap2::RemapOptions::new().may_move(true))
+ }
+ .map_err(Error::IOError)?;
+
+ #[cfg(not(target_os = "linux"))]
+ {
+ *layout_mut.row_guard = unsafe {
+ memmap2::MmapOptions::new()
+ .len(new_rows_size)
+ .map_mut(&self.rows_file)
+ }
+ .map_err(Error::IOError)?;
+ }
+
+ Self::maybe_lock_mmap(self.config.as_ref(), &layout_mut.row_guard);
+ remap_dur = Some(remap_start.elapsed());
+ }
+
+ self.header_ref()
+ .global_split_level
+ .store(nsl, Ordering::Release);
+ Ok(remap_dur)
+ }
+
+ pub(crate) fn num_rows(&self) -> usize {
+ let gsl = self.header_ref().global_split_level.load(Ordering::Acquire) as usize;
+ 1usize << gsl
+ }
+
+ pub(crate) fn num_shards(&self) -> usize {
+ self.row_locks.len()
+ }
+
+ pub(crate) fn shrink_with_rows_guard(
+ &self,
+ min_rows: usize,
+ mut row_table: RowsTableWriteGuard<'_>,
+ ) -> Result {
+ loop {
+ let global_split_level = self.header_ref().global_split_level.load(Ordering::Acquire);
+ let current_rows = 1usize << global_split_level;
+ if current_rows <= min_rows {
+ break;
+ }
+
+ let next_level = global_split_level - 1;
+ let half_count = 1usize << next_level;
+
+ let mut can_merge = true;
+ for idx in 0..half_count {
+ let row1 = row_ref_bytes(&row_table.row_guard[..], idx);
+ let row1_split = row1.split_level.load(Ordering::Acquire);
+ if row1_split != global_split_level {
+ continue;
+ }
+
+ let row2 = row_ref_bytes(&row_table.row_guard[..], idx + half_count);
+ let count1 = row1
+ .signatures
+ .iter()
+ .filter(|&&sig| sig != HashCoord::INVALID_SIG)
+ .count();
+ let count2 = row2
+ .signatures
+ .iter()
+ .filter(|&&sig| sig != HashCoord::INVALID_SIG)
+ .count();
+ if count1 + count2 > ROW_WIDTH {
+ can_merge = false;
+ break;
+ }
+ }
+
+ if !can_merge {
+ break;
+ }
+
+ for idx in 0..half_count {
+ let row1 = unsafe { &mut *row_mut_ptr(row_table.row_guard.as_ptr(), idx) };
+ let row2 =
+ unsafe { &mut *row_mut_ptr(row_table.row_guard.as_ptr(), idx + half_count) };
+
+ if row1.split_level.load(Ordering::Acquire) != global_split_level {
+ continue;
+ }
+
+ let mut dest_idx = 0usize;
+ for src_idx in 0..ROW_WIDTH {
+ if row2.signatures[src_idx] == HashCoord::INVALID_SIG {
+ continue;
+ }
+
+ while dest_idx < ROW_WIDTH
+ && row1.signatures[dest_idx] != HashCoord::INVALID_SIG
+ {
+ dest_idx += 1;
+ }
+
+ if dest_idx >= ROW_WIDTH {
+ break;
+ }
+
+ row1.insert(dest_idx, row2.signatures[src_idx], row2.pointers[src_idx]);
+ row2.remove(src_idx);
+ }
+
+ row2.set_split_level(0);
+ row1.set_split_level(next_level);
+ }
+
+ self.header_ref()
+ .global_split_level
+ .store(next_level, Ordering::Release);
+ }
+
+ let final_level = self.header_ref().global_split_level.load(Ordering::Acquire);
+ let new_rows_size = (1usize << final_level) * size_of::();
+
+ if new_rows_size < row_table.row_guard.len() {
+ #[cfg(target_os = "linux")]
+ {
+ unsafe {
+ row_table
+ .row_guard
+ .remap(new_rows_size, memmap2::RemapOptions::new().may_move(true))
+ }
+ .map_err(Error::IOError)?;
+ self.rows_file
+ .set_len(new_rows_size as u64)
+ .map_err(Error::IOError)?;
+ }
+
+ #[cfg(not(target_os = "linux"))]
+ {
+ row_table.row_guard.flush().map_err(Error::IOError)?;
+
+ #[cfg(windows)]
+ {
+ // On Windows we must unmap before truncating.
+ *row_table.row_guard = memmap2::MmapOptions::new()
+ .len(1)
+ .map_anon()
+ .map_err(Error::IOError)?;
+ }
+
+ self.rows_file
+ .set_len(new_rows_size as u64)
+ .map_err(Error::IOError)?;
+ *row_table.row_guard = unsafe {
+ memmap2::MmapOptions::new()
+ .len(new_rows_size)
+ .map_mut(&self.rows_file)
+ }
+ .map_err(Error::IOError)?;
+ }
+
+ Self::maybe_lock_mmap(self.config.as_ref(), &row_table.row_guard);
+ }
+
+ Ok(1usize << final_level)
+ }
+
+ fn init_header_and_rows(
+ &self,
+ mut rows_table: RowsTableWriteGuard,
+ hash_key: (u64, u64),
+ ) -> Result<()> {
+ // Zero both mmaps first, then populate.
+ rows_table.row_guard.fill(0);
+ // Safety: header_mmap is a contiguous MmapMut that we own; no other &mut exists yet.
+ unsafe {
+ std::ptr::write_bytes(
+ self.header_mmap.as_ptr() as *mut u8,
+ 0,
+ self.header_mmap.len(),
+ );
+ }
+
+ // Now create the mutable reference after zeroing is complete.
+ // Safety: only called during init (open) or reset, both single-threaded
+ // w.r.t. this store instance.
+ let layout = unsafe { &mut *(self.header_mmap.as_ptr() as *mut IndexFileLayout) };
+
+ layout.header.signature = *INDEX_FILE_SIGNATURE;
+ layout.header.version = INDEX_FILE_VERSION;
+ layout.header.hash_key_0 = hash_key.0;
+ layout.header.hash_key_1 = hash_key.1;
+ layout
+ .header
+ .global_split_level
+ .store(MIN_SPLIT_LEVEL as u64, Ordering::Release);
+
+ for row_idx in 0..MIN_INITIAL_ROWS {
+ rows_table
+ .row_mut(row_idx)
+ .set_split_level(MIN_SPLIT_LEVEL as u64);
+ }
+
+ self.flush_header()?;
+ rows_table.row_guard.flush().map_err(Error::IOError)?;
+ self.rows_file.sync_all().map_err(Error::IOError)?;
+ Ok(())
+ }
+
+ pub(crate) fn reset(&self, mut row_table: RowsTableWriteGuard<'_>) -> Result<()> {
+ let min_rows_size = MIN_INITIAL_ROWS * size_of::();
+
+ #[cfg(target_os = "linux")]
+ unsafe {
+ self.rows_file
+ .set_len(min_rows_size as u64)
+ .map_err(Error::IOError)?;
+ row_table
+ .row_guard
+ .remap(min_rows_size, memmap2::RemapOptions::new().may_move(true))
+ }
+ .map_err(Error::IOError)?;
+
+ #[cfg(not(target_os = "linux"))]
+ {
+ row_table.row_guard.flush().map_err(Error::IOError)?;
+
+ #[cfg(windows)]
+ {
+ *row_table.row_guard = memmap2::MmapOptions::new()
+ .len(1)
+ .map_anon()
+ .map_err(Error::IOError)?;
+ }
+
+ self.rows_file
+ .set_len(min_rows_size as u64)
+ .map_err(Error::IOError)?;
+ *row_table.row_guard = unsafe {
+ memmap2::MmapOptions::new()
+ .len(min_rows_size)
+ .map_mut(&self.rows_file)
+ }
+ .map_err(Error::IOError)?;
+ }
+
+ Self::maybe_lock_mmap(self.config.as_ref(), &row_table.row_guard);
+
+ self.init_header_and_rows(row_table, self.config.hash_key)
+ }
+}
diff --git a/src/internal.rs b/src/internal.rs
new file mode 100644
index 0000000..0972d2c
--- /dev/null
+++ b/src/internal.rs
@@ -0,0 +1,405 @@
+use siphasher::sip128::{Hasher128, SipHasher13};
+
+use std::{
+ fs::File,
+ hash::Hasher,
+ path::{Path, PathBuf},
+};
+
+use crate::types::{Error, Result};
+
+pub(crate) const PAGE_SIZE: usize = 4096;
+pub(crate) const ROW_WIDTH: usize = 16 * 21;
+pub(crate) const MIN_SPLIT_LEVEL: usize = 3;
+pub(crate) const MASKED_ROW_SELECTOR_BITS: u32 = 18;
+pub(crate) const MIN_INITIAL_ROWS: usize = 1 << MIN_SPLIT_LEVEL;
+pub(crate) const MAX_REPRESENTABLE_FILE_SIZE: u32 =
+ ((1u32 << 26) - 1) * FILE_OFFSET_ALIGNMENT as u32;
+pub(crate) const ENTRY_TYPE_SHIFT: u32 = 14;
+pub(crate) const MAX_INTERNAL_KEY_SIZE: usize = (1 << ENTRY_TYPE_SHIFT) - 1;
+pub(crate) const MAX_INTERNAL_VALUE_SIZE: usize = (1 << 16) - 1;
+pub(crate) const MAX_DATA_FILES: u16 = 1 << 12;
+pub(crate) const MAX_DATA_FILE_IDX: u16 = MAX_DATA_FILES - 1;
+
+pub(crate) const INDEX_FILE_SIGNATURE: &[u8; 8] = b"CandyStr";
+pub(crate) const INDEX_FILE_VERSION: u32 = 0x0002_0009;
+pub(crate) const DATA_FILE_SIGNATURE: &[u8; 8] = b"CandyDat";
+pub(crate) const DATA_FILE_VERSION: u32 = 0x0002_0002;
+pub(crate) const FILE_OFFSET_ALIGNMENT: u64 = 16;
+pub(crate) const SIZE_HINT_UNIT: usize = 512;
+pub(crate) const DATA_ENTRY_OFFSET_MAGIC: u32 = 0x91c8_d7cd;
+pub(crate) const DATA_ENTRY_OFFSET_MASK: u32 = (1 << 24) - 1;
+pub(crate) const KEY_NAMESPACE_BITS: u8 = 6;
+pub(crate) const MAX_KEY_NAMESPACE: u8 = (1 << KEY_NAMESPACE_BITS) - 1;
+pub(crate) const READ_BUFFER_SIZE: usize = 128 * 1024;
+
+pub(crate) fn aligned_data_entry_waste(klen: usize, vlen: usize) -> u32 {
+ (10 + klen as u32 + vlen as u32).next_multiple_of(FILE_OFFSET_ALIGNMENT as u32)
+}
+
+pub(crate) fn aligned_tombstone_entry_waste(klen: usize) -> u32 {
+ (8 + klen as u32).next_multiple_of(FILE_OFFSET_ALIGNMENT as u32)
+}
+
+pub(crate) fn aligned_data_entry_size(klen: usize, vlen: usize) -> u64 {
+ (10 + klen as u64 + vlen as u64).next_multiple_of(FILE_OFFSET_ALIGNMENT)
+}
+
+pub(crate) fn index_file_path(base_path: &Path) -> PathBuf {
+ base_path.join("index")
+}
+
+pub(crate) fn index_rows_file_path(base_path: &Path) -> PathBuf {
+ base_path.join("rows")
+}
+
+pub(crate) fn data_file_path(base_path: &Path, file_idx: u16) -> PathBuf {
+ base_path.join(format!("data_{file_idx:04}"))
+}
+
+#[cfg(unix)]
+pub(crate) fn sync_dir(path: &Path) -> Result<()> {
+ File::open(path)
+ .map_err(Error::IOError)?
+ .sync_all()
+ .map_err(Error::IOError)
+}
+
+#[cfg(not(unix))]
+pub(crate) fn sync_dir(_path: &Path) -> Result<()> {
+ Ok(())
+}
+
+#[cfg(target_os = "linux")]
+pub(crate) fn sync_file_range(file: &File, offset: u64, len: u64) -> Result<()> {
+ use std::os::fd::AsRawFd;
+
+ if len == 0 {
+ return Ok(());
+ }
+
+ let sync_offset = i64::try_from(offset)
+ .map_err(|_| Error::IOError(std::io::Error::other("sync offset overflow")))?;
+ let sync_len = i64::try_from(len)
+ .map_err(|_| Error::IOError(std::io::Error::other("sync length overflow")))?;
+
+ let rc = unsafe {
+ libc::sync_file_range(
+ file.as_raw_fd(),
+ sync_offset,
+ sync_len,
+ libc::SYNC_FILE_RANGE_WAIT_BEFORE
+ | libc::SYNC_FILE_RANGE_WRITE
+ | libc::SYNC_FILE_RANGE_WAIT_AFTER,
+ )
+ };
+ if rc == 0 {
+ return Ok(());
+ }
+
+ let err = std::io::Error::last_os_error();
+ match err.raw_os_error() {
+ Some(libc::EINVAL | libc::ENOSYS | libc::EOPNOTSUPP) => {
+ file.sync_data().map_err(Error::IOError)
+ }
+ _ => Err(Error::IOError(err)),
+ }
+}
+
+#[cfg(not(target_os = "linux"))]
+pub(crate) fn sync_file_range(file: &File, _offset: u64, len: u64) -> Result<()> {
+ if len == 0 {
+ return Ok(());
+ }
+ file.sync_data().map_err(Error::IOError)
+}
+
+pub(crate) fn parse_data_file_idx(path: &Path) -> Option {
+ let name = path.file_name()?.to_str()?;
+ let suffix = name.strip_prefix("data_")?;
+ if suffix.len() != 4 {
+ return None;
+ }
+ suffix.parse().ok()
+}
+
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct RangeMetadata {
+ pub(crate) head: u64,
+ pub(crate) tail: u64,
+ pub(crate) count: u64,
+}
+
+impl RangeMetadata {
+ pub(crate) fn new() -> Self {
+ Self {
+ head: 1u64 << 63,
+ tail: (1u64 << 63) - 1,
+ count: 0,
+ }
+ }
+
+ pub(crate) fn to_bytes(self) -> [u8; 24] {
+ let mut buf = [0u8; 24];
+ buf[0..8].copy_from_slice(&self.head.to_le_bytes());
+ buf[8..16].copy_from_slice(&self.tail.to_le_bytes());
+ buf[16..24].copy_from_slice(&self.count.to_le_bytes());
+ buf
+ }
+
+ pub(crate) fn from_bytes(bytes: &[u8]) -> Option {
+ if bytes.len() != 24 {
+ return None;
+ }
+ Some(Self {
+ head: u64::from_le_bytes(bytes[0..8].try_into().ok()?),
+ tail: u64::from_le_bytes(bytes[8..16].try_into().ok()?),
+ count: u64::from_le_bytes(bytes[16..24].try_into().ok()?),
+ })
+ }
+}
+
+#[repr(u16)]
+pub(crate) enum EntryType {
+ Insert = 0,
+ Update = 1,
+ _Unused2 = 2,
+ Tombstone = 3,
+}
+
+pub(crate) fn invalid_data_error(message: &'static str) -> Error {
+ Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::InvalidData,
+ message,
+ ))
+}
+
+pub(crate) fn unexpected_eof_error(message: &'static str) -> Error {
+ Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::UnexpectedEof,
+ message,
+ ))
+}
+
+pub(crate) fn is_resettable_open_error(err: &Error) -> bool {
+ matches!(
+ err,
+ Error::IOError(io_err)
+ if matches!(
+ io_err.kind(),
+ std::io::ErrorKind::InvalidData | std::io::ErrorKind::UnexpectedEof
+ )
+ )
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+#[repr(u8)]
+pub(crate) enum KeyNamespace {
+ User = 0,
+ QueueMeta = 1,
+ QueueData = 2,
+ BigMeta = 3,
+ BigData = 4,
+ ListMeta = 5,
+ ListIndex = 6,
+ ListData = 7,
+ Typed = 8,
+ TypedQueueMeta = 9,
+ TypedQueueData = 10,
+ TypedBigMeta = 11,
+ TypedBigData = 12,
+ TypedListMeta = 13,
+ TypedListIndex = 14,
+ TypedListData = 15,
+}
+
+impl KeyNamespace {
+ pub(crate) fn from_u8(ns: u8) -> Option {
+ match ns {
+ x if x == Self::User as u8 => Some(Self::User),
+ x if x == Self::QueueMeta as u8 => Some(Self::QueueMeta),
+ x if x == Self::QueueData as u8 => Some(Self::QueueData),
+ x if x == Self::BigMeta as u8 => Some(Self::BigMeta),
+ x if x == Self::BigData as u8 => Some(Self::BigData),
+ x if x == Self::ListMeta as u8 => Some(Self::ListMeta),
+ x if x == Self::ListIndex as u8 => Some(Self::ListIndex),
+ x if x == Self::ListData as u8 => Some(Self::ListData),
+ x if x == Self::Typed as u8 => Some(Self::Typed),
+ x if x == Self::TypedQueueMeta as u8 => Some(Self::TypedQueueMeta),
+ x if x == Self::TypedQueueData as u8 => Some(Self::TypedQueueData),
+ x if x == Self::TypedBigMeta as u8 => Some(Self::TypedBigMeta),
+ x if x == Self::TypedBigData as u8 => Some(Self::TypedBigData),
+ x if x == Self::TypedListMeta as u8 => Some(Self::TypedListMeta),
+ x if x == Self::TypedListIndex as u8 => Some(Self::TypedListIndex),
+ x if x == Self::TypedListData as u8 => Some(Self::TypedListData),
+ _ => None,
+ }
+ }
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub(crate) struct HashCoord {
+ pub(crate) sig: u32,
+ pub(crate) row_selector: u32,
+}
+
+impl HashCoord {
+ pub(crate) const INVALID_SIG: u32 = 0;
+
+ pub(crate) fn new(ns: KeyNamespace, key: &[u8], hash_key: (u64, u64)) -> Self {
+ let mut hasher = SipHasher13::new_with_keys(hash_key.0, hash_key.1);
+ hasher.write_u8(ns as u8);
+ hasher.write(key);
+ let h = hasher.finish128();
+ let row_selector = h.h1 as u32;
+ let mut sig = (h.h1 >> 32) as u32;
+ if sig == Self::INVALID_SIG {
+ sig = h.h2 as u32;
+ if sig == Self::INVALID_SIG {
+ sig = (h.h2 >> 32) as u32;
+ if sig == Self::INVALID_SIG {
+ sig = 0x6419_9a93;
+ }
+ }
+ }
+
+ Self { sig, row_selector }
+ }
+
+ pub(crate) fn masked_row_selector(&self) -> u32 {
+ (self.row_selector >> MIN_SPLIT_LEVEL) & ((1 << MASKED_ROW_SELECTOR_BITS) - 1)
+ }
+
+ pub(crate) fn row_index(&self, split_level: u64) -> usize {
+ debug_assert!(split_level >= MIN_SPLIT_LEVEL as u64, "sl={split_level}");
+ ((self.row_selector as u64) & ((1 << split_level) - 1)) as usize
+ }
+}
+
+pub(crate) struct KVBuf {
+ pub(crate) buf: Vec,
+ pub(crate) vlen: u16,
+ pub(crate) header_len: u16,
+ #[allow(dead_code)]
+ pub(crate) ns: u8,
+ #[allow(dead_code)]
+ pub(crate) entry_type: EntryType,
+}
+
+impl KVBuf {
+ pub(crate) fn value(&self) -> &[u8] {
+ let start = self.header_len as usize;
+ &self.buf[start..start + self.vlen as usize]
+ }
+
+ pub(crate) fn key(&self) -> &[u8] {
+ &self.buf[self.header_len as usize + self.vlen as usize..]
+ }
+
+ pub(crate) fn into_value(mut self) -> Vec {
+ let start = self.header_len as usize;
+ let vlen = self.vlen as usize;
+ if start > 0 {
+ self.buf.copy_within(start..start + vlen, 0);
+ }
+ self.buf.truncate(vlen);
+ self.buf
+ }
+}
+
+pub(crate) struct KVRef<'a> {
+ pub(crate) buf: &'a [u8],
+ pub(crate) vlen: u16,
+ pub(crate) header_len: u16,
+ pub(crate) ns: u8,
+ pub(crate) entry_type: EntryType,
+}
+
+impl KVRef<'_> {
+ pub(crate) fn value(&self) -> &[u8] {
+ let start = self.header_len as usize;
+ &self.buf[start..start + self.vlen as usize]
+ }
+
+ pub(crate) fn key(&self) -> &[u8] {
+ &self.buf[self.header_len as usize + self.vlen as usize..]
+ }
+}
+
+#[cfg(unix)]
+pub(crate) fn read_into_at(
+ f: &File,
+ buf: &mut Vec,
+ count: usize,
+ file_offset: u64,
+) -> std::io::Result<()> {
+ buf.resize(count, 0);
+ let mut offset = 0;
+ while offset < count {
+ let n = std::os::unix::fs::FileExt::read_at(
+ f,
+ &mut buf[offset..],
+ file_offset + offset as u64,
+ )?;
+ if n == 0 {
+ break;
+ } else {
+ offset += n;
+ }
+ }
+ buf.truncate(offset);
+ Ok(())
+}
+
+#[cfg(windows)]
+pub(crate) fn read_into_at(
+ f: &File,
+ buf: &mut Vec,
+ count: usize,
+ file_offset: u64,
+) -> std::io::Result<()> {
+ buf.resize(count, 0);
+ let mut offset = 0;
+ while offset < count {
+ let n = std::os::windows::fs::FileExt::seek_read(
+ f,
+ &mut buf[offset..],
+ file_offset + offset as u64,
+ )?;
+ if n == 0 {
+ break;
+ } else {
+ offset += n;
+ }
+ }
+ buf.truncate(offset);
+ Ok(())
+}
+
+pub(crate) fn read_available_at(
+ f: &File,
+ count: usize,
+ file_offset: u64,
+) -> std::io::Result> {
+ let mut buf = Vec::new();
+ read_into_at(f, &mut buf, count, file_offset)?;
+ Ok(buf)
+}
+
+#[cfg(unix)]
+pub(crate) fn write_all_at(f: &File, buf: &[u8], offset: u64) -> std::io::Result<()> {
+ std::os::unix::fs::FileExt::write_all_at(f, buf, offset)
+}
+
+#[cfg(windows)]
+pub(crate) fn write_all_at(f: &File, mut buf: &[u8], mut offset: u64) -> std::io::Result<()> {
+ while !buf.is_empty() {
+ let written = std::os::windows::fs::FileExt::seek_write(f, buf, offset)?;
+ if written == 0 {
+ return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof));
+ }
+ buf = &buf[written..];
+ offset += written as u64;
+ }
+ Ok(())
+}
diff --git a/src/lib.rs b/src/lib.rs
index a8ee29e..38468ba 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,142 +1,50 @@
-//! A fast (*blazingly*, of course), persistent, in-process key-value store that relies on a novel sharding
-//! algorithm. Since Candy does not rely on log-structured merge (LSM) trees or B-Trees, no journal/WAL is needed
-//! and IOs go directly to file.
-//!
-//! The algorithm can be thought of as a "zero-overhead" extension to a hash table that's stored over files,
-//! as it's designed to minimizes disk IO operations. Most operations add an overhead of 1-2 microseconds
-//! to the disk IO latency, and operations generally require 1-4 disk IOs.
-//!
-//! The algorithm, for the most part, is crash-safe. That is, you can crash at any point and still be in a consistent
-//! state. You might lose the ongoing operation, but we consider this acceptable.
-//!
-//! Candy is designed to consume very little memory: entries are written directly to the shard-file, and only a
-//! table of ~380KB is kept `mmap`-ed (it is also file-backed, so can be evicted if needed). A shard-file can
-//! hold around 30K entries, and more shard-files are created as needed.
-//!
-//! A unique feature of Candy is the support of *lists*, which allow creating cheap collections.
-//!
-//! Note: the file format is not yet stable!
-//!
-//! Example:
-//! ```
-//! use candystore::{CandyStore, Config, Result};
-//!
-//! fn main() -> Result<()> {
-//! let db = CandyStore::open("/tmp/candy-dir", Config::default())?;
-//! db.set("hello", "world")?;
-//! assert_eq!(db.get("hello")?, Some("world".into()));
-//! db.remove("hello")?;
-//! assert_eq!(db.get("hello")?, None);
-//!
-//! // lists
-//! db.set_in_list("italian", "bye", "arrivederci")?;
-//! db.set_in_list("italian", "thanks", "grazie")?;
-//! assert_eq!(db.get_from_list("italian", "bye")?, Some("arrivederci".into()));
-//!
-//! db.set_in_list("spanish", "bye", "adios")?;
-//! db.set_in_list("spanish", "thanks", "gracias")?;
-//!
-//! let items = db.iter_list("spanish").map(|res| res.unwrap()).collect::>();
-//! assert_eq!(items, vec![("bye".into(), "adios".into()), ("thanks".into(), "gracias".into())]);
-//!
-//! Ok(())
-//! }
-//! ```
-
-mod hashing;
-mod lists;
-mod queues;
-mod router;
-mod shard;
-mod stats;
+mod data_file;
+mod index_file;
+mod internal;
+mod pacer;
mod store;
-mod typed;
-
-pub use hashing::HashSeed;
-pub use lists::{ListCompactionParams, ListIterator};
-pub use stats::Stats;
-pub use store::{CandyStore, GetOrCreateStatus, ReplaceStatus, SetStatus};
-pub use typed::{CandyTypedDeque, CandyTypedKey, CandyTypedList, CandyTypedStore};
-
-use std::fmt::{Display, Formatter};
-
-#[cfg(feature = "whitebox_testing")]
-pub use hashing::HASH_BITS_TO_KEEP;
-
-#[derive(Debug, PartialEq, Eq, Clone)]
-pub enum CandyError {
- KeyTooLong(usize),
- ValueTooLong(usize),
- EntryCannotFitInShard(usize, usize),
-}
-
-impl Display for CandyError {
- fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
- match self {
- Self::KeyTooLong(sz) => write!(f, "key too long {sz}"),
- Self::ValueTooLong(sz) => write!(f, "value too long {sz}"),
- Self::EntryCannotFitInShard(sz, max) => {
- write!(f, "entry too big ({sz}) for a single shard file ({max})")
- }
- }
+mod types;
+
+/// Named crash point for whitebox testing.
+///
+/// When the `whitebox-testing` feature is enabled and the environment variable
+/// `CANDYSTORE_CRASH_POINT` matches `name`, the process aborts after the number
+/// of hits specified by `CANDYSTORE_CRASH_AFTER` (default 0 = immediate).
+#[cfg(feature = "whitebox-testing")]
+pub(crate) fn crash_point(name: &str) {
+ use std::sync::atomic::{AtomicU64, Ordering};
+ static COUNTER: AtomicU64 = AtomicU64::new(0);
+
+ let Ok(target) = std::env::var("CANDYSTORE_CRASH_POINT") else {
+ return;
+ };
+ if target != name {
+ return;
}
-}
-
-impl std::error::Error for CandyError {}
-
-pub type Result = anyhow::Result;
-
-/// The configuration options for CandyStore. Comes with sane defaults, feel free to use them
-#[derive(Debug, Clone)]
-pub struct Config {
- /// we don't want huge shards, because splitting would be expensive
- pub max_shard_size: u32,
- /// should be ~10% of max_shard_size
- pub min_compaction_threashold: u32,
- /// just some entropy, not so important unless you fear DoS
- pub hash_seed: HashSeed,
- /// hint for creating number of shards accordingly)
- pub expected_number_of_keys: usize,
- /// number of keyed locks for concurrent list ops
- pub max_concurrent_list_ops: u32,
- /// whether or not to truncate up shard files to their max size (spare files)
- pub truncate_up: bool,
- /// whether or not to clear the DB if the version is unsupported
- pub clear_on_unsupported_version: bool,
- /// whether or not to mlock the shard headers to RAM (POSIX only)
- pub mlock_headers: bool,
- /// number of background compaction threads
- pub num_compaction_threads: usize,
- /// optionally delay modifying operations before for the given duration before flushing data to disk,
- /// to ensure reboot consistency
- #[cfg(feature = "flush_aggregation")]
- pub flush_aggregation_delay: Option,
-}
-
-impl Default for Config {
- fn default() -> Self {
- Self {
- max_shard_size: 64 * 1024 * 1024,
- min_compaction_threashold: 8 * 1024 * 1024,
- hash_seed: *b"kOYLu0xvq2WtzcKJ",
- expected_number_of_keys: 0,
- max_concurrent_list_ops: 64,
- truncate_up: true,
- clear_on_unsupported_version: false,
- mlock_headers: false,
- num_compaction_threads: 4,
- #[cfg(feature = "flush_aggregation")]
- flush_aggregation_delay: None,
- }
+ let after: u64 = std::env::var("CANDYSTORE_CRASH_AFTER")
+ .ok()
+ .and_then(|s| s.parse().ok())
+ .unwrap_or(0);
+ if COUNTER.fetch_add(1, Ordering::Relaxed) >= after {
+ std::process::abort();
}
}
-pub(crate) const MAX_TOTAL_KEY_SIZE: usize = 0x3fff; // 14 bits
-pub(crate) const MAX_TOTAL_VALUE_SIZE: usize = 0xffff; // 16 bits
-pub(crate) const NAMESPACING_RESERVED_SIZE: usize = 0xff;
-pub(crate) const VALUE_RESERVED_SIZE: usize = 0xff;
-pub const MAX_KEY_SIZE: usize = MAX_TOTAL_KEY_SIZE - NAMESPACING_RESERVED_SIZE;
-pub const MAX_VALUE_SIZE: usize = MAX_TOTAL_VALUE_SIZE - VALUE_RESERVED_SIZE;
-
-const _: () = assert!(MAX_KEY_SIZE <= u16::MAX as usize);
-const _: () = assert!(MAX_VALUE_SIZE <= u16::MAX as usize);
+#[cfg(not(feature = "whitebox-testing"))]
+#[inline(always)]
+pub(crate) fn crash_point(_name: &str) {}
+
+/// The main untyped store API.
+pub use crate::store::{
+ CandyStore, CandyTypedDeque, CandyTypedKey, CandyTypedList, CandyTypedStore, KVPair,
+ ListIterator,
+};
+/// Public configuration, error, and stats types.
+pub use crate::types::*;
+
+/// Backward-compatible alias for the crate error type.
+pub type CandyError = Error;
+/// Maximum supported user key length in bytes.
+pub const MAX_KEY_LEN: usize = MAX_USER_KEY_SIZE;
+/// Maximum supported inline value length in bytes.
+pub const MAX_VALUE_LEN: usize = MAX_USER_VALUE_SIZE;
diff --git a/src/lists.rs b/src/lists.rs
deleted file mode 100644
index 2e8c83e..0000000
--- a/src/lists.rs
+++ /dev/null
@@ -1,863 +0,0 @@
-use std::ops::Range;
-
-use crate::{
- hashing::PartedHash,
- shard::{InsertMode, KVPair},
- store::{CHAIN_NAMESPACE, ITEM_NAMESPACE, LIST_NAMESPACE},
- CandyStore, GetOrCreateStatus, ReplaceStatus, Result, SetStatus,
-};
-
-use bytemuck::{bytes_of, from_bytes, Pod, Zeroable};
-use parking_lot::MutexGuard;
-
-#[derive(Clone, Copy, Pod, Zeroable)]
-#[repr(C)]
-struct List {
- head_idx: u64, // inclusive
- tail_idx: u64, // exclusive
- num_items: u64,
-}
-
-impl std::fmt::Debug for List {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- write!(
- f,
- "List(0x{:016x}..0x{:016x} items={})",
- self.head_idx, self.tail_idx, self.num_items
- )
- }
-}
-
-impl List {
- fn span_len(&self) -> u64 {
- self.tail_idx - self.head_idx
- }
- fn holes(&self) -> u64 {
- self.span_len() - self.num_items
- }
- fn is_empty(&self) -> bool {
- self.head_idx == self.tail_idx
- }
-}
-
-#[derive(Debug, Clone, Copy, Pod, Zeroable)]
-#[repr(C, packed)]
-struct ChainKey {
- list_ph: PartedHash,
- idx: u64,
- namespace: u8,
-}
-
-#[derive(Debug)]
-pub struct ListCompactionParams {
- pub min_length: u64,
- pub min_holes_ratio: f64,
-}
-
-impl Default for ListCompactionParams {
- fn default() -> Self {
- Self {
- min_length: 100,
- min_holes_ratio: 0.25,
- }
- }
-}
-
-pub struct ListIterator<'a> {
- store: &'a CandyStore,
- list_key: Vec,
- list_ph: PartedHash,
- range: Option>,
- fwd: bool,
-}
-
-impl<'a> Iterator for ListIterator<'a> {
- type Item = Result;
-
- fn next(&mut self) -> Option {
- if self.range.is_none() {
- let _guard = self.store.lock_list(self.list_ph);
- let list_bytes = match self.store.get_raw(&self.list_key) {
- Ok(Some(list_bytes)) => list_bytes,
- Ok(None) => return None,
- Err(e) => return Some(Err(e)),
- };
- let list = *from_bytes::(&list_bytes);
- self.range = Some(list.head_idx..list.tail_idx);
- }
-
- loop {
- let idx = if self.fwd {
- self.range.as_mut().unwrap().next()
- } else {
- self.range.as_mut().unwrap().next_back()
- };
- let Some(idx) = idx else {
- return None;
- };
-
- match self.store.get_from_list_at_index(self.list_ph, idx, true) {
- Err(e) => return Some(Err(e)),
- Ok(Some((_, k, v))) => return Some(Ok((k, v))),
- Ok(None) => {
- // try next index
- }
- }
- }
- }
-
- fn size_hint(&self) -> (usize, Option) {
- if let Some(ref range) = self.range {
- range.size_hint()
- } else {
- (0, None)
- }
- }
-}
-
-#[derive(Debug)]
-enum InsertToListStatus {
- Created(Vec),
- DoesNotExist,
- WrongValue(Vec),
- ExistingValue(Vec),
- Replaced(Vec),
-}
-
-impl CandyStore {
- const FIRST_LIST_IDX: u64 = 0x8000_0000_0000_0000;
-
- fn make_list_key(&self, mut list_key: Vec) -> (PartedHash, Vec) {
- list_key.extend_from_slice(LIST_NAMESPACE);
- (PartedHash::new(&self.config.hash_seed, &list_key), list_key)
- }
-
- fn make_item_key(&self, list_ph: PartedHash, mut item_key: Vec) -> (PartedHash, Vec) {
- item_key.extend_from_slice(bytes_of(&list_ph));
- item_key.extend_from_slice(ITEM_NAMESPACE);
- (PartedHash::new(&self.config.hash_seed, &item_key), item_key)
- }
-
- pub(crate) fn lock_list(&self, list_ph: PartedHash) -> MutexGuard<'_, ()> {
- self.keyed_locks[(list_ph.signature() & self.keyed_locks_mask) as usize].lock()
- }
-
- fn _insert_to_list(
- &self,
- list_key: Vec,
- item_key: Vec,
- mut val: Vec,
- mode: InsertMode,
- ) -> Result {
- let (list_ph, list_key) = self.make_list_key(list_key);
- let (item_ph, item_key) = self.make_item_key(list_ph, item_key);
-
- let _guard = self.lock_list(list_ph);
-
- // if the item already exists, it's already part of the list. just update it and preserve the index
- if let Some(mut existing_val) = self.get_raw(&item_key)? {
- match mode {
- InsertMode::GetOrCreate => {
- existing_val.truncate(existing_val.len() - size_of::());
- return Ok(InsertToListStatus::ExistingValue(existing_val));
- }
- InsertMode::Replace(expected_val) => {
- if let Some(expected_val) = expected_val {
- if expected_val != &existing_val[existing_val.len() - size_of::()..] {
- existing_val.truncate(existing_val.len() - size_of::());
- return Ok(InsertToListStatus::WrongValue(existing_val));
- }
- }
- // fall through
- }
- InsertMode::Set => {
- // fall through
- }
- }
-
- val.extend_from_slice(&existing_val[existing_val.len() - size_of::()..]);
- self.replace_raw(&item_key, &val, None)?;
- existing_val.truncate(existing_val.len() - size_of::());
- return Ok(InsertToListStatus::Replaced(existing_val));
- }
-
- if matches!(mode, InsertMode::Replace(_)) {
- // not allowed to create
- return Ok(InsertToListStatus::DoesNotExist);
- }
-
- // get of create the list
- let res = self.get_or_create_raw(
- &list_key,
- bytes_of(&List {
- head_idx: Self::FIRST_LIST_IDX,
- tail_idx: Self::FIRST_LIST_IDX + 1,
- num_items: 1,
- })
- .to_owned(),
- )?;
-
- match res {
- crate::GetOrCreateStatus::CreatedNew(_) => {
- // list was just created. create chain
- self.set_raw(
- bytes_of(&ChainKey {
- list_ph,
- idx: Self::FIRST_LIST_IDX,
- namespace: CHAIN_NAMESPACE,
- }),
- bytes_of(&item_ph),
- )?;
-
- // create item
- val.extend_from_slice(bytes_of(&Self::FIRST_LIST_IDX));
- self.set_raw(&item_key, &val)?;
- }
- crate::GetOrCreateStatus::ExistingValue(list_bytes) => {
- let mut list = *from_bytes::(&list_bytes);
-
- let idx = list.tail_idx;
- list.tail_idx += 1;
-
- // update list
- list.num_items += 1;
- self.set_raw(&list_key, bytes_of(&list))?;
-
- // create chain
- self.set_raw(
- bytes_of(&ChainKey {
- list_ph,
- idx,
- namespace: CHAIN_NAMESPACE,
- }),
- bytes_of(&item_ph),
- )?;
-
- // create item
- val.extend_from_slice(bytes_of(&idx));
- self.set_raw(&item_key, &val)?;
- }
- }
-
- val.truncate(val.len() - size_of::());
- Ok(InsertToListStatus::Created(val))
- }
-
- /// Inserts or updates an element `item_key` that belongs to list `list_key`. Returns [SetStatus::CreatedNew] if
- /// the item did not exist, or [SetStatus::PrevValue] with the previous value of the item.
- ///
- /// See also [Self::set].
- pub fn set_in_list<
- B1: AsRef<[u8]> + ?Sized,
- B2: AsRef<[u8]> + ?Sized,
- B3: AsRef<[u8]> + ?Sized,
- >(
- &self,
- list_key: &B1,
- item_key: &B2,
- val: &B3,
- ) -> Result {
- self.owned_set_in_list(
- list_key.as_ref().to_owned(),
- item_key.as_ref().to_owned(),
- val.as_ref().to_owned(),
- false,
- )
- }
-
- /// Like [Self::set_in_list] but "promotes" the element to the tail of the list: it's basically a
- /// remove + insert operation. This can be usede to implement LRUs, where older elements are at the
- /// beginning and newer ones at the end.
- ///
- /// Note: **not crash-safe**
- pub fn set_in_list_promoting<
- B1: AsRef<[u8]> + ?Sized,
- B2: AsRef<[u8]> + ?Sized,
- B3: AsRef<[u8]> + ?Sized,
- >(
- &self,
- list_key: &B1,
- item_key: &B2,
- val: &B3,
- ) -> Result {
- self.owned_set_in_list(
- list_key.as_ref().to_owned(),
- item_key.as_ref().to_owned(),
- val.as_ref().to_owned(),
- true,
- )
- }
-
- /// Owned version of [Self::set_in_list], which also takes promote as a parameter
- pub fn owned_set_in_list(
- &self,
- list_key: Vec,
- item_key: Vec,
- val: Vec,
- promote: bool,
- ) -> Result {
- if promote {
- self.owned_remove_from_list(list_key.clone(), item_key.clone())?;
- }
- match self._insert_to_list(list_key, item_key, val, InsertMode::Set)? {
- InsertToListStatus::Created(_v) => Ok(SetStatus::CreatedNew),
- InsertToListStatus::Replaced(v) => Ok(SetStatus::PrevValue(v)),
- _ => unreachable!(),
- }
- }
-
- /// Like [Self::set_in_list], but will only replace (update) an existing item, i.e., it will never create the
- /// key
- pub fn replace_in_list<
- B1: AsRef<[u8]> + ?Sized,
- B2: AsRef<[u8]> + ?Sized,
- B3: AsRef<[u8]> + ?Sized,
- >(
- &self,
- list_key: &B1,
- item_key: &B2,
- val: &B3,
- expected_val: Option<&B3>,
- ) -> Result {
- self.owned_replace_in_list(
- list_key.as_ref().to_owned(),
- item_key.as_ref().to_owned(),
- val.as_ref().to_owned(),
- expected_val.map(|ev| ev.as_ref()),
- )
- }
-
- /// Owned version of [Self::replace_in_list]
- pub fn owned_replace_in_list(
- &self,
- list_key: Vec,
- item_key: Vec,
- val: Vec,
- expected_val: Option<&[u8]>,
- ) -> Result {
- match self._insert_to_list(list_key, item_key, val, InsertMode::Replace(expected_val))? {
- InsertToListStatus::DoesNotExist => Ok(ReplaceStatus::DoesNotExist),
- InsertToListStatus::Replaced(v) => Ok(ReplaceStatus::PrevValue(v)),
- InsertToListStatus::WrongValue(v) => Ok(ReplaceStatus::WrongValue(v)),
- _ => unreachable!(),
- }
- }
-
- /// Like [Self::set_in_list] but will not replace (update) the element if it already exists - it will only
- /// create the element with the default value if it did not exist.
- pub fn get_or_create_in_list<
- B1: AsRef<[u8]> + ?Sized,
- B2: AsRef<[u8]> + ?Sized,
- B3: AsRef<[u8]> + ?Sized,
- >(
- &self,
- list_key: &B1,
- item_key: &B2,
- default_val: &B3,
- ) -> Result {
- self.owned_get_or_create_in_list(
- list_key.as_ref().to_owned(),
- item_key.as_ref().to_owned(),
- default_val.as_ref().to_owned(),
- )
- }
-
- /// Owned version of [Self::get_or_create_in_list]
- pub fn owned_get_or_create_in_list(
- &self,
- list_key: Vec,
- item_key: Vec,
- default_val: Vec,
- ) -> Result {
- match self._insert_to_list(list_key, item_key, default_val, InsertMode::GetOrCreate)? {
- InsertToListStatus::ExistingValue(v) => Ok(GetOrCreateStatus::ExistingValue(v)),
- InsertToListStatus::Created(v) => Ok(GetOrCreateStatus::CreatedNew(v)),
- _ => unreachable!(),
- }
- }
-
- /// Gets a list element identified by `list_key` and `item_key`. This is an O(1) operation.
- ///
- /// See also: [Self::get]
- pub fn get_from_list + ?Sized, B2: AsRef<[u8]> + ?Sized>(
- &self,
- list_key: &B1,
- item_key: &B2,
- ) -> Result>> {
- self.owned_get_from_list(list_key.as_ref().to_owned(), item_key.as_ref().to_owned())
- }
-
- /// Owned version of [Self::get_from_list]
- pub fn owned_get_from_list(
- &self,
- list_key: Vec,
- item_key: Vec,
- ) -> Result>> {
- let (list_ph, _) = self.make_list_key(list_key);
- let (_, item_key) = self.make_item_key(list_ph, item_key);
- let Some(mut val) = self.get_raw(&item_key)? else {
- return Ok(None);
- };
- val.truncate(val.len() - size_of::());
- Ok(Some(val))
- }
-
- /// Removes a element from the list, identified by `list_key` and `item_key. The element can be
- /// at any position in the list, not just the head or the tail, but in this case, it will create a "hole".
- /// This means that iterations will go over the missing element's index every time, until the list is compacted.
- ///
- /// See also [Self::remove], [Self::compact_list_if_needed]
- pub fn remove_from_list + ?Sized, B2: AsRef<[u8]> + ?Sized>(
- &self,
- list_key: &B1,
- item_key: &B2,
- ) -> Result>> {
- self.owned_remove_from_list(list_key.as_ref().to_owned(), item_key.as_ref().to_owned())
- }
-
- /// Owned version of [Self::remove_from_list]
- pub fn owned_remove_from_list(
- &self,
- list_key: Vec,
- item_key: Vec,
- ) -> Result>> {
- let (list_ph, list_key) = self.make_list_key(list_key);
- let (_, item_key) = self.make_item_key(list_ph, item_key);
-
- let _guard = self.lock_list(list_ph);
-
- let Some(mut existing_val) = self.get_raw(&item_key)? else {
- return Ok(None);
- };
-
- let item_idx = u64::from_le_bytes(
- (&existing_val[existing_val.len() - size_of::()..])
- .try_into()
- .unwrap(),
- );
- existing_val.truncate(existing_val.len() - size_of::());
-
- // update list, if the item was the head/tail
- if let Some(list_bytes) = self.get_raw(&list_key)? {
- let mut list = *from_bytes::(&list_bytes);
-
- list.num_items -= 1;
-
- if list.head_idx == item_idx || list.tail_idx == item_idx + 1 {
- if list.head_idx == item_idx {
- list.head_idx += 1;
- } else if list.tail_idx == item_idx + 1 {
- list.tail_idx -= 1;
- }
- }
- if list.is_empty() {
- self.remove_raw(&list_key)?;
- } else {
- self.set_raw(&list_key, bytes_of(&list))?;
- }
- }
-
- // remove chain
- self.remove_raw(bytes_of(&ChainKey {
- list_ph,
- idx: item_idx,
- namespace: CHAIN_NAMESPACE,
- }))?;
-
- // remove item
- self.remove_raw(&item_key)?;
-
- Ok(Some(existing_val))
- }
-
- const LIST_KEY_SUFFIX_LEN: usize = size_of::() + ITEM_NAMESPACE.len();
-
- fn get_from_list_at_index(
- &self,
- list_ph: PartedHash,
- idx: u64,
- truncate: bool,
- ) -> Result, Vec)>> {
- let Some(item_ph_bytes) = self.get_raw(bytes_of(&ChainKey {
- idx,
- list_ph,
- namespace: CHAIN_NAMESPACE,
- }))?
- else {
- return Ok(None);
- };
- let item_ph = *from_bytes::(&item_ph_bytes);
-
- let mut suffix = [0u8; Self::LIST_KEY_SUFFIX_LEN];
- suffix[0..size_of::()].copy_from_slice(bytes_of(&list_ph));
- suffix[size_of::()..].copy_from_slice(ITEM_NAMESPACE);
-
- for (mut k, mut v) in self.get_by_hash(item_ph)? {
- if k.ends_with(&suffix) && v.ends_with(bytes_of(&idx)) {
- if truncate {
- v.truncate(v.len() - size_of::());
- k.truncate(k.len() - suffix.len());
- }
- return Ok(Some((item_ph, k, v)));
- }
- }
-
- Ok(None)
- }
-
- /// Compacts (rewrites) the list such that there will be no holes. Holes are created when removing an
- /// element from the middle of the list (not the head or tail), which makes iteration less efficient.
- /// You should call this function every so often if you're removing elements from lists at random locations.
- /// The function takes parameters that control when to compact: the list has to be of a minimal length and
- /// have a minimal holes-to-length ratio. The default values are expected to be okay for most use cases.
- /// Returns true if the list was compacted, false otherwise.
- ///
- /// Note: **Not crash-safe**
- pub fn compact_list_if_needed + ?Sized>(
- &self,
- list_key: &B,
- params: ListCompactionParams,
- ) -> Result {
- let (list_ph, list_key) = self.make_list_key(list_key.as_ref().to_owned());
- let _guard = self.lock_list(list_ph);
-
- let Some(list_bytes) = self.get_raw(&list_key)? else {
- return Ok(false);
- };
- let list = *from_bytes::(&list_bytes);
- if list.span_len() < params.min_length {
- return Ok(false);
- }
- if (list.holes() as f64) < (list.span_len() as f64) * params.min_holes_ratio {
- return Ok(false);
- }
-
- let mut new_idx = list.tail_idx;
- for idx in list.head_idx..list.tail_idx {
- let Some((item_ph, full_k, mut full_v)) =
- self.get_from_list_at_index(list_ph, idx, false)?
- else {
- continue;
- };
-
- // create new chain
- self.set_raw(
- bytes_of(&ChainKey {
- idx: new_idx,
- list_ph,
- namespace: CHAIN_NAMESPACE,
- }),
- bytes_of(&item_ph),
- )?;
-
- // update item's index suffix
- let offset = full_v.len() - size_of::();
- full_v[offset..].copy_from_slice(bytes_of(&new_idx));
- self.set_raw(&full_k, &full_v)?;
-
- // remove old chain
- self.remove_raw(bytes_of(&ChainKey {
- idx,
- list_ph,
- namespace: CHAIN_NAMESPACE,
- }))?;
-
- new_idx += 1;
- }
-
- if list.tail_idx == new_idx {
- // list is now empty
- self.remove_raw(&list_key)?;
- } else {
- // update list head and tail, set holes=0
- self.set_raw(
- &list_key,
- bytes_of(&List {
- head_idx: list.tail_idx,
- tail_idx: new_idx,
- num_items: new_idx - list.tail_idx,
- }),
- )?;
- }
-
- Ok(true)
- }
-
- /// Iterates over the elements of the list (identified by `list_key`) from the beginning (head)
- /// to the end (tail). Note that if items are removed at random locations in the list, the iterator
- /// will need to skip these holes. If you remove elements from the middle (not head/tail) of the list
- /// frequently, and wish to use iteration, consider compacting the list every so often using
- /// [Self::compact_list_if_needed]
- pub fn iter_list + ?Sized>(&self, list_key: &B) -> ListIterator<'_> {
- self.owned_iter_list(list_key.as_ref().to_owned())
- }
-
- /// Owned version of [Self::iter_list]
- pub fn owned_iter_list(&self, list_key: Vec) -> ListIterator<'_> {
- let (list_ph, list_key) = self.make_list_key(list_key);
- ListIterator {
- store: &self,
- list_key,
- list_ph,
- range: None,
- fwd: true,
- }
- }
-
- /// Same as [Self::iter_list] but iterates from the end (tail) to the beginning (head)
- pub fn iter_list_backwards + ?Sized>(&self, list_key: &B) -> ListIterator<'_> {
- self.owned_iter_list_backwards(list_key.as_ref().to_owned())
- }
-
- /// Owned version of [Self::iter_list_backwards]
- pub fn owned_iter_list_backwards(&self, list_key: Vec) -> ListIterator<'_> {
- let (list_ph, list_key) = self.make_list_key(list_key);
- ListIterator {
- store: &self,
- list_key,
- list_ph,
- range: None,
- fwd: false,
- }
- }
-
- /// Discards the given list, removing all elements it contains and dropping the list itself.
- /// This is more efficient than iteration + removal of each element.
- pub fn discard_list + ?Sized>(&self, list_key: &B) -> Result {
- self.owned_discard_list(list_key.as_ref().to_owned())
- }
-
- /// Owned version of [Self::discard_list]
- pub fn owned_discard_list(&self, list_key: Vec) -> Result {
- let (list_ph, list_key) = self.make_list_key(list_key);
- let _guard = self.lock_list(list_ph);
-
- let Some(list_bytes) = self.get_raw(&list_key)? else {
- return Ok(false);
- };
- let list = *from_bytes::(&list_bytes);
- for idx in list.head_idx..list.tail_idx {
- let Some((_, full_key, _)) = self.get_from_list_at_index(list_ph, idx, false)? else {
- continue;
- };
- self.remove_raw(bytes_of(&ChainKey {
- list_ph,
- idx,
- namespace: CHAIN_NAMESPACE,
- }))?;
- self.remove_raw(&full_key)?;
- }
- self.remove_raw(&list_key)?;
-
- Ok(true)
- }
-
- /// Returns the first (head) element of the list
- pub fn peek_list_head + ?Sized>(&self, list_key: &B) -> Result> {
- self.owned_peek_list_head(list_key.as_ref().to_owned())
- }
-
- /// Owned version of [Self::peek_list_head]
- pub fn owned_peek_list_head(&self, list_key: Vec) -> Result> {
- let Some(kv) = self.owned_iter_list(list_key).next() else {
- return Ok(None);
- };
- Ok(Some(kv?))
- }
-
- /// Returns the last (tail) element of the list
- pub fn peek_list_tail + ?Sized>(&self, list_key: &B) -> Result> {
- self.owned_peek_list_tail(list_key.as_ref().to_owned())
- }
-
- /// Owned version of [Self::peek_list_tail]
- pub fn owned_peek_list_tail(&self, list_key: Vec) -> Result> {
- for kv in self.owned_iter_list_backwards(list_key) {
- return Ok(Some(kv?));
- }
- Ok(None)
- }
-
- /// Removes and returns the first (head) element of the list
- pub fn pop_list_head + ?Sized>(&self, list_key: &B) -> Result> {
- self.owned_pop_list_head(list_key.as_ref().to_owned())
- }
-
- fn _operate_on_list(
- &self,
- list_key: Vec,
- default: T,
- func: impl FnOnce(PartedHash, Vec, List) -> Result,
- ) -> Result {
- let (list_ph, list_key) = self.make_list_key(list_key);
- let _guard = self.lock_list(list_ph);
- let Some(list_bytes) = self.get_raw(&list_key)? else {
- return Ok(default);
- };
- let list = *from_bytes::(&list_bytes);
- func(list_ph, list_key, list)
- }
-
- fn _owned_pop_list(&self, list_key: Vec, fwd: bool) -> Result> {
- self._operate_on_list(list_key, None, |list_ph, list_key, mut list| {
- let range = list.head_idx..list.tail_idx;
-
- let mut pop = |idx| -> Result > {
- let Some((_, mut untrunc_k, mut untrunc_v)) =
- self.get_from_list_at_index(list_ph, idx, false)?
- else {
- return Ok(None);
- };
-
- if fwd {
- list.head_idx = idx + 1;
- } else {
- list.tail_idx = idx - 1;
- }
- list.num_items -= 1;
- if list.is_empty() {
- self.remove_raw(&list_key)?;
- } else {
- self.set_raw(&list_key, bytes_of(&list))?;
- }
-
- // remove chain
- self.remove_raw(bytes_of(&ChainKey {
- list_ph,
- idx,
- namespace: CHAIN_NAMESPACE,
- }))?;
-
- // remove item
- self.remove_raw(&untrunc_k)?;
-
- untrunc_v.truncate(untrunc_v.len() - size_of::());
- untrunc_k.truncate(untrunc_k.len() - Self::LIST_KEY_SUFFIX_LEN);
- Ok(Some((untrunc_k, untrunc_v)))
- };
-
- if fwd {
- for idx in range {
- if let Some(kv) = pop(idx)? {
- return Ok(Some(kv));
- }
- }
- } else {
- for idx in range.rev() {
- if let Some(kv) = pop(idx)? {
- return Ok(Some(kv));
- }
- }
- }
-
- Ok(None)
- })
- }
-
- /// Owned version of [Self::peek_list_tail]
- pub fn owned_pop_list_head(&self, list_key: Vec) -> Result> {
- self._owned_pop_list(list_key, true /* fwd */)
- }
-
- /// Removes and returns the last (tail) element of the list
- pub fn pop_list_tail + ?Sized>(&self, list_key: &B) -> Result> {
- self.owned_pop_list_tail(list_key.as_ref().to_owned())
- }
-
- /// Owned version of [Self::peek_list_tail]
- pub fn owned_pop_list_tail(&self, list_key: Vec) -> Result> {
- self._owned_pop_list(list_key, false /* fwd */)
- }
-
- /// Returns the estimated list length
- pub fn list_len + ?Sized>(&self, list_key: &B) -> Result {
- self.owned_list_len(list_key.as_ref().to_owned())
- }
- pub fn owned_list_len(&self, list_key: Vec) -> Result {
- let (_, list_key) = self.make_list_key(list_key);
-
- let Some(list_bytes) = self.get_raw(&list_key)? else {
- return Ok(0);
- };
-
- Ok(from_bytes::(&list_bytes).num_items as usize)
- }
-
- /// iterate over the given list and retain all elements for which the predicate returns `true`. In other
- /// words, drop all other elements. This operation is not crash safe, and holds the list locked during the
- /// whole iteration, so no other gets/sets/deletes can be done in by other threads on this list while
- /// iterating over it. Beware of deadlocks.
- ///
- /// This operation will also compact the list, basically popping all elements and re-pushing the retained
- /// ones at the end, so no holes will exist by the end.
- pub fn retain_in_list + ?Sized>(
- &self,
- list_key: &B,
- func: impl FnMut(&[u8], &[u8]) -> Result,
- ) -> Result<()> {
- self.owned_retain_in_list(list_key.as_ref().to_owned(), func)
- }
-
- /// owned version of [Self::retain_in_list]
- pub fn owned_retain_in_list(
- &self,
- list_key: Vec,
- mut func: impl FnMut(&[u8], &[u8]) -> Result,
- ) -> Result<()> {
- self._operate_on_list(list_key, (), |list_ph, list_key, mut list| {
- let range = list.head_idx..list.tail_idx;
-
- for idx in range {
- list.head_idx = idx + 1;
- let Some((item_ph, untrunc_k, mut untrunc_v)) =
- self.get_from_list_at_index(list_ph, idx, false)?
- else {
- continue;
- };
-
- untrunc_v.truncate(untrunc_v.len() - size_of::());
- let mut v = untrunc_v;
- let k = &untrunc_k[..untrunc_k.len() - Self::LIST_KEY_SUFFIX_LEN];
-
- // remove chain
- self.remove_raw(bytes_of(&ChainKey {
- list_ph,
- idx,
- namespace: CHAIN_NAMESPACE,
- }))?;
-
- if func(k, &v)? {
- let tail_idx = list.tail_idx;
- list.tail_idx += 1;
-
- // create chain
- self.set_raw(
- bytes_of(&ChainKey {
- list_ph,
- idx: tail_idx,
- namespace: CHAIN_NAMESPACE,
- }),
- bytes_of(&item_ph),
- )?;
-
- // create new item
- v.extend_from_slice(bytes_of(&tail_idx));
- self.set_raw(&untrunc_k, &v)?;
- } else {
- // drop from list
- list.num_items -= 1;
-
- // remove item
- self.remove_raw(&untrunc_k)?;
- }
- }
- // defer updating the list to the very end to save on IOs
- if list.is_empty() {
- self.remove_raw(&list_key)?;
- } else {
- self.set_raw(&list_key, bytes_of(&list))?;
- }
- Ok(())
- })
- }
-}
diff --git a/src/pacer.rs b/src/pacer.rs
new file mode 100644
index 0000000..2fb23b6
--- /dev/null
+++ b/src/pacer.rs
@@ -0,0 +1,253 @@
+use std::time::{Duration, Instant};
+
+/// A token-bucket pacer.
+///
+/// The pacer refills `tokens_per_unit` tokens every `time_unit`, up to `max_tokens`.
+/// Calls to `consume` spend immediately available tokens and block until enough
+/// tokens have accrued to satisfy the request.
+pub struct Pacer {
+ time_unit: Duration,
+ tokens_per_unit: u64,
+ max_tokens: u64, // burst capacity
+ last_refill: Instant,
+ available_tokens: u64,
+}
+
+impl Pacer {
+ /// Creates a new pacer.
+ ///
+ /// `tokens_per_unit` must be non-zero and `time_unit` must be non-zero.
+ /// `max_tokens` is promoted to at least `tokens_per_unit`, ensuring the
+ /// bucket can hold one full refill interval.
+ pub fn new(tokens_per_unit: u64, time_unit: Duration, max_tokens: u64) -> Self {
+ assert!(tokens_per_unit > 0 && !time_unit.is_zero());
+ let max_tokens = max_tokens.max(tokens_per_unit);
+
+ Pacer {
+ time_unit,
+ tokens_per_unit,
+ max_tokens,
+ last_refill: Instant::now(),
+ available_tokens: max_tokens,
+ }
+ }
+
+ fn added_tokens(
+ elapsed_ns: u128,
+ time_unit_ns: u128,
+ tokens_per_unit: u64,
+ capacity: u64,
+ ) -> u64 {
+ let produced_tokens = elapsed_ns.saturating_mul(tokens_per_unit as u128) / time_unit_ns;
+ produced_tokens.min(capacity as u128) as u64
+ }
+
+ fn duration_from_nanos_saturating(total_nanos: u128) -> Duration {
+ let secs = total_nanos / 1_000_000_000;
+ if secs > u64::MAX as u128 {
+ return Duration::MAX;
+ }
+
+ Duration::new(secs as u64, (total_nanos % 1_000_000_000) as u32)
+ }
+
+ fn refill(&mut self, now: Instant) {
+ if self.available_tokens == self.max_tokens {
+ self.last_refill = now;
+ return;
+ }
+
+ let elapsed_ns = now.saturating_duration_since(self.last_refill).as_nanos();
+ let time_unit_ns = self.time_unit.as_nanos();
+ let capacity = self.max_tokens - self.available_tokens;
+ let added_tokens =
+ Self::added_tokens(elapsed_ns, time_unit_ns, self.tokens_per_unit, capacity);
+ if added_tokens == 0 {
+ return;
+ }
+
+ self.available_tokens += added_tokens;
+
+ if self.available_tokens == self.max_tokens {
+ self.last_refill = now;
+ } else {
+ // Advance last_refill by exact time accounted for by added_tokens
+ let time_advanced_ns =
+ (added_tokens as u128 * time_unit_ns) / self.tokens_per_unit as u128;
+ self.last_refill += Self::duration_from_nanos_saturating(time_advanced_ns);
+ }
+ }
+
+ fn time_until_tokens(&self, now: Instant, tokens_needed: u64) -> Duration {
+ let elapsed_ns = now.saturating_duration_since(self.last_refill).as_nanos();
+ let time_unit_ns = self.time_unit.as_nanos();
+ let target_ns = (tokens_needed as u128)
+ .saturating_mul(time_unit_ns)
+ .div_ceil(self.tokens_per_unit as u128);
+ let remaining_ns = target_ns.saturating_sub(elapsed_ns);
+
+ Self::duration_from_nanos_saturating(remaining_ns)
+ }
+
+ /// Consumes `tokens`, sleeping through the provided callback while waiting for refills.
+ pub fn consume_with_sleep_fn(&mut self, mut tokens: u64, mut sleep: impl FnMut(Duration)) {
+ while tokens > 0 {
+ let now = Instant::now();
+ self.refill(now);
+
+ if self.available_tokens > 0 {
+ let consumed = self.available_tokens.min(tokens);
+ self.available_tokens -= consumed;
+ tokens -= consumed;
+ if tokens == 0 {
+ break;
+ }
+ }
+
+ let tokens_to_wait = tokens.min(self.max_tokens);
+ sleep(self.time_until_tokens(now, tokens_to_wait));
+ }
+ }
+
+ /// Consumes `tokens`, blocking the current thread until enough tokens are available.
+ pub fn consume(&mut self, tokens: u64) {
+ self.consume_with_sleep_fn(tokens, std::thread::sleep);
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::Pacer;
+ use std::time::{Duration, Instant};
+
+ #[test]
+ fn test_consume_zero() {
+ let mut pacer = Pacer::new(10, Duration::from_millis(10), 40);
+ pacer.consume_with_sleep_fn(0, |_| unreachable!());
+ }
+
+ #[test]
+ fn test_consume_exact_burst() {
+ let mut pacer = Pacer::new(10, Duration::from_millis(10), 40);
+ pacer.consume_with_sleep_fn(40, |_| unreachable!());
+ }
+
+ #[test]
+ fn test_consume_burst_plus_one_sleeps() {
+ let mut pacer = Pacer::new(10, Duration::from_millis(10), 40);
+ let mut slept = false;
+ pacer.consume_with_sleep_fn(41, |d| {
+ std::thread::sleep(d);
+ slept = true;
+ });
+ assert!(slept);
+ }
+
+ #[test]
+ fn test_tokens_refill_after_idle() {
+ let mut pacer = Pacer::new(10, Duration::from_millis(10), 40);
+ pacer.consume_with_sleep_fn(40, |_| unreachable!());
+ std::thread::sleep(Duration::from_millis(30));
+ pacer.consume_with_sleep_fn(20, |_| unreachable!());
+ }
+
+ #[test]
+ fn test_rate_accuracy() {
+ let mut pacer = Pacer::new(10, Duration::from_millis(10), 10);
+ pacer.consume(10);
+ let t0 = Instant::now();
+ pacer.consume(50);
+ let d = t0.elapsed();
+ assert!(d >= Duration::from_millis(40), "Too fast: {d:?}");
+ assert!(d < Duration::from_millis(150), "Too slow: {d:?}");
+ }
+
+ #[test]
+ fn test_many_small_consumes() {
+ let mut pacer = Pacer::new(10, Duration::from_millis(10), 10);
+ pacer.consume(10);
+ let t0 = Instant::now();
+ for _ in 0..30 {
+ pacer.consume(1);
+ }
+ let d = t0.elapsed();
+ assert!(d >= Duration::from_millis(20), "Too fast: {d:?}");
+ assert!(d < Duration::from_millis(150), "Too slow: {d:?}");
+ }
+
+ #[test]
+ fn test_partial_bucket_refills_before_small_consume() {
+ let mut pacer = Pacer::new(10, Duration::from_millis(10), 40);
+
+ pacer.consume_with_sleep_fn(5, |_| unreachable!());
+ std::thread::sleep(Duration::from_millis(10));
+ pacer.consume_with_sleep_fn(1, |_| unreachable!());
+
+ assert_eq!(pacer.available_tokens, 39);
+ }
+
+ #[test]
+ fn test_waits_for_fractional_token_interval() {
+ let mut pacer = Pacer::new(10, Duration::from_millis(10), 10);
+ let mut requested_sleep = None;
+
+ pacer.consume_with_sleep_fn(10, |_| unreachable!());
+ pacer.consume_with_sleep_fn(1, |duration| {
+ requested_sleep = Some(duration);
+ std::thread::sleep(duration);
+ });
+
+ let requested_sleep = requested_sleep.expect("consume should need to sleep");
+ assert!(
+ requested_sleep > Duration::ZERO,
+ "sleep duration should be positive"
+ );
+ assert!(
+ requested_sleep < Duration::from_millis(5),
+ "expected to wait for a fractional token interval, got {requested_sleep:?}"
+ );
+ }
+
+ #[test]
+ fn test_burst_capacity_promotion() {
+ let mut pacer = Pacer::new(100, Duration::from_secs(1), 10);
+ let mut slept = false;
+ pacer.consume_with_sleep_fn(100, |_| slept = true);
+ assert!(
+ !slept,
+ "Should not sleep if burst capacity was correctly promoted to 100"
+ );
+ }
+
+ #[test]
+ fn test_large_consumes_are_batched() {
+ let mut pacer = Pacer::new(10, Duration::from_millis(10), 20);
+ let mut sleep_count = 0;
+
+ pacer.consume_with_sleep_fn(20, |_| unreachable!());
+ pacer.consume_with_sleep_fn(50, |duration| {
+ sleep_count += 1;
+ std::thread::sleep(duration);
+ });
+
+ assert!(
+ sleep_count <= 4,
+ "Should sleep in large batches (<= 4 sleeps), but slept {} times",
+ sleep_count
+ );
+ }
+
+ #[test]
+ fn test_added_tokens_caps_before_u64_cast() {
+ let added_tokens = Pacer::added_tokens(u128::MAX, 1, u64::MAX, 7);
+ assert_eq!(added_tokens, 7);
+ }
+
+ #[test]
+ fn test_duration_from_nanos_saturates() {
+ assert_eq!(
+ Pacer::duration_from_nanos_saturating(u128::MAX),
+ Duration::MAX
+ );
+ }
+}
diff --git a/src/queues.rs b/src/queues.rs
deleted file mode 100644
index 7575a72..0000000
--- a/src/queues.rs
+++ /dev/null
@@ -1,455 +0,0 @@
-use std::ops::Range;
-
-use crate::{
- hashing::PartedHash,
- store::{QUEUE_ITEM_NAMESPACE, QUEUE_NAMESPACE},
- CandyStore,
-};
-use anyhow::Result;
-use bytemuck::{bytes_of, checked::from_bytes_mut, from_bytes, Pod, Zeroable};
-
-#[derive(Clone, Copy, Pod, Zeroable)]
-#[repr(C)]
-struct Queue {
- head_idx: u64, // inclusive
- tail_idx: u64, // exclusive
- num_items: u64,
-}
-
-impl Queue {
- #[allow(dead_code)]
- fn span_len(&self) -> u64 {
- self.tail_idx - self.head_idx
- }
- #[allow(dead_code)]
- fn holes(&self) -> u64 {
- self.span_len() - self.num_items
- }
- fn is_empty(&self) -> bool {
- self.head_idx == self.tail_idx
- }
-}
-
-enum QueuePos {
- Head,
- Tail,
-}
-
-pub struct QueueIterator<'a> {
- store: &'a CandyStore,
- queue_key: Vec,
- range: Option>,
- fwd: bool,
-}
-
-impl<'a> Iterator for QueueIterator<'a> {
- type Item = Result<(usize, Vec)>;
- fn next(&mut self) -> Option {
- if self.range.is_none() {
- match self.store.fetch_queue(&self.queue_key) {
- Ok(queue) => match queue {
- Some(queue) => {
- self.range = Some(queue.head_idx..queue.tail_idx);
- }
- None => return None,
- },
- Err(e) => return Some(Err(e)),
- }
- }
-
- loop {
- let idx = if self.fwd {
- self.range.as_mut().unwrap().next()
- } else {
- self.range.as_mut().unwrap().next_back()
- };
- let Some(idx) = idx else {
- return None;
- };
-
- match self
- .store
- .get_raw(&self.store.make_queue_item_key(&self.queue_key, idx))
- {
- Ok(v) => {
- match v {
- Some(v) => return Some(Ok((idx as usize, v))),
- None => {
- // continue, we might have holes
- }
- }
- }
- Err(e) => return Some(Err(e)),
- }
- }
- }
-
- fn size_hint(&self) -> (usize, Option) {
- if let Some(ref range) = self.range {
- range.size_hint()
- } else {
- (0, None)
- }
- }
-}
-
-impl CandyStore {
- const FIRST_QUEUE_IDX: u64 = 0x8000_0000_0000_0000;
-
- fn make_queue_key(&self, queue_key: &[u8]) -> (PartedHash, Vec) {
- let mut full_queue_key = queue_key.to_owned();
- full_queue_key.extend_from_slice(QUEUE_NAMESPACE);
- (
- PartedHash::new(&self.config.hash_seed, &queue_key),
- full_queue_key,
- )
- }
- fn make_queue_item_key(&self, queue_key: &[u8], idx: u64) -> Vec {
- let mut item_key = queue_key.to_owned();
- item_key.extend_from_slice(bytes_of(&idx));
- item_key.extend_from_slice(QUEUE_ITEM_NAMESPACE);
- item_key
- }
-
- fn _push_to_queue(&self, queue_key: &[u8], val: &[u8], pos: QueuePos) -> Result {
- let (queue_ph, full_queue_key) = self.make_queue_key(queue_key);
- let _guard = self.lock_list(queue_ph);
-
- let status = self.get_or_create_raw(
- &full_queue_key,
- bytes_of(&Queue {
- head_idx: Self::FIRST_QUEUE_IDX,
- tail_idx: Self::FIRST_QUEUE_IDX + 1,
- num_items: 1,
- })
- .to_owned(),
- )?;
-
- let item_idx = match status {
- crate::GetOrCreateStatus::CreatedNew(_) => Self::FIRST_QUEUE_IDX,
- crate::GetOrCreateStatus::ExistingValue(mut queue_bytes) => {
- let queue = from_bytes_mut::(&mut queue_bytes);
- let item_idx = match pos {
- QueuePos::Head => {
- queue.head_idx -= 1;
- queue.head_idx
- }
- QueuePos::Tail => {
- let item_idx = queue.tail_idx;
- queue.tail_idx += 1;
- item_idx
- }
- };
- queue.num_items += 1;
- self.set_raw(&full_queue_key, &queue_bytes)?;
- item_idx
- }
- };
-
- self.set_raw(&self.make_queue_item_key(queue_key, item_idx), val)?;
- Ok(item_idx as usize)
- }
-
- /// Pushed a new element at the front (head) of the queue, returning the element's index in the queue
- pub fn push_to_queue_head + ?Sized, B2: AsRef<[u8]> + ?Sized>(
- &self,
- queue_key: &B1,
- val: &B2,
- ) -> Result {
- self._push_to_queue(queue_key.as_ref(), val.as_ref(), QueuePos::Head)
- }
-
- /// Pushed a new element at the end (tail) of the queue, returning the element's index in the queue
- pub fn push_to_queue_tail + ?Sized, B2: AsRef<[u8]> + ?Sized>(
- &self,
- queue_key: &B1,
- val: &B2,
- ) -> Result {
- self._push_to_queue(queue_key.as_ref(), val.as_ref(), QueuePos::Tail)
- }
-
- fn _pop_queue(&self, queue_key: &[u8], pos: QueuePos) -> Result)>> {
- let (queue_ph, full_queue_key) = self.make_queue_key(queue_key);
- let _guard = self.lock_list(queue_ph);
-
- let Some(mut queue_bytes) = self.get_raw(&full_queue_key)? else {
- return Ok(None);
- };
- let queue = from_bytes_mut::(&mut queue_bytes);
- let mut res = None;
-
- match pos {
- QueuePos::Head => {
- while queue.head_idx < queue.tail_idx {
- let idx = queue.head_idx;
- queue.head_idx += 1;
- if let Some(v) = self.remove_raw(&self.make_queue_item_key(queue_key, idx))? {
- res = Some((idx as usize, v));
- queue.num_items -= 1;
- break;
- }
- }
- }
- QueuePos::Tail => {
- while queue.tail_idx > queue.head_idx {
- queue.tail_idx -= 1;
- let idx = queue.tail_idx;
- if let Some(v) = self.remove_raw(&self.make_queue_item_key(queue_key, idx))? {
- res = Some((idx as usize, v));
- queue.num_items -= 1;
- break;
- }
- }
- }
- }
-
- if queue.is_empty() {
- self.remove_raw(&full_queue_key)?;
- } else {
- self.set_raw(&full_queue_key, &queue_bytes)?;
- }
-
- Ok(res)
- }
-
- /// Removes and returns the head element and its index of the queue, or None if the queue is empty
- pub fn pop_queue_head_with_idx + ?Sized>(
- &self,
- queue_key: &B,
- ) -> Result