diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index ec0eff6e..94cf9aa7 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -2,9 +2,9 @@ name: Rust on: push: - branches: [ master ] + branches: [master] pull_request: - branches: [ master ] + branches: [master] env: CARGO_TERM_COLOR: always @@ -12,25 +12,40 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - toolchain: stable - components: clippy,rustfmt - - name: Print versions - run: | - cargo --version - rustc --version - clippy-driver --version - rustfmt --version - - name: Build - run: cargo build --verbose - - name: Run tests - run: cargo test --verbose - - name: Run clippy - run: cargo clippy --verbose --all-targets -- -D clippy::all - - name: Check code formatting - run: cargo fmt --verbose --all -- --check + - uses: actions/checkout@v4 + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + components: clippy + - name: Print versions + run: | + cargo --version + rustc --version + clippy-driver --version + - name: Build + run: cargo build --verbose + - name: Run tests + run: cargo test --verbose + - name: Run clippy + run: cargo clippy --verbose --all-targets -- -D clippy::all + + fmt: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: nightly + components: rustfmt + - uses: tombi-toml/setup-tombi@v1 + - name: Print versions + run: | + cargo fmt --version + tombi --version + - name: Check code formatting + uses: actions-rust-lang/rustfmt@v1 + - name: Check toml formatting + run: tombi format --check doc: name: Documentation @@ -38,19 +53,19 @@ jobs: env: RUSTDOCFLAGS: -D warnings steps: - - uses: actions/checkout@v4 - - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - toolchain: stable - - name: Print versions - run: | - cargo --version - rustc --version - rustdoc --version - - name: Doc - run: cargo doc --verbose - - name: Doc with all features - run: cargo doc --verbose --all-features + - uses: actions/checkout@v4 + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + - name: Print versions + run: | + cargo --version + rustc --version + rustdoc --version + - name: Doc + run: cargo doc --verbose + - name: Doc with all features + run: cargo doc --verbose --all-features miri-test: name: Test with miri @@ -58,13 +73,13 @@ jobs: env: MIRIFLAGS: -Zmiri-disable-isolation steps: - - uses: actions/checkout@v4 - - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - toolchain: nightly - components: miri - - run: cargo miri test --verbose --no-default-features - - run: cargo miri test --verbose --all-features + - uses: actions/checkout@v4 + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: nightly + components: miri + - run: cargo miri test --verbose --no-default-features + - run: cargo miri test --verbose --all-features sanitizer-test: name: Test with -Zsanitizer=${{ matrix.sanitizer }} @@ -74,19 +89,19 @@ jobs: matrix: sanitizer: [address, thread, leak] steps: - - uses: actions/checkout@v4 - - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - toolchain: nightly - components: rust-src - - name: Test with sanitizer - env: - RUSTFLAGS: -Zsanitizer=${{ matrix.sanitizer }} - RUSTDOCFLAGS: -Zsanitizer=${{ matrix.sanitizer }} - # only needed by asan - ASAN_OPTIONS: detect_stack_use_after_return=1,detect_leaks=0 - # Asan's leak detection occasionally complains - # about some small leaks if backtraces are captured, - # so ensure they're not - RUST_BACKTRACE: 0 - run: cargo test -Zbuild-std --verbose --target=x86_64-unknown-linux-gnu --lib --bins --tests + - uses: actions/checkout@v4 + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: nightly + components: rust-src + - name: Test with sanitizer + env: + RUSTFLAGS: -Zsanitizer=${{ matrix.sanitizer }} + RUSTDOCFLAGS: -Zsanitizer=${{ matrix.sanitizer }} + # only needed by asan + ASAN_OPTIONS: detect_stack_use_after_return=1,detect_leaks=0 + # Asan's leak detection occasionally complains + # about some small leaks if backtraces are captured, + # so ensure they're not + RUST_BACKTRACE: 0 + run: cargo test -Zbuild-std --verbose --target=x86_64-unknown-linux-gnu --lib --bins --tests diff --git a/Cargo.lock b/Cargo.lock index b1162aee..f8d09338 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,24 +4,24 @@ version = 4 [[package]] name = "addr2line" -version = "0.24.2" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" dependencies = [ "gimli", ] [[package]] name = "adler2" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "afl" -version = "0.15.13" +version = "0.15.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b784d6332a6978dd29861676de9df37aa37ed8852341db6340bd75eb82bc7a69" +checksum = "927cd71710d1a232519e2393470e8f74a178ae59367efe58fa122884bba35ca4" dependencies = [ "home", "libc", @@ -31,13 +31,22 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] +[[package]] +name = "alloca" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" +dependencies = [ + "cc", +] + [[package]] name = "anes" version = "0.1.6" @@ -46,9 +55,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.18" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ "anstyle", "anstyle-parse", @@ -61,49 +70,56 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.10" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anstyle-parse" -version = "0.2.6" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.2" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.59.0", + "windows-sys", ] [[package]] name = "anstyle-wincon" -version = "3.0.6" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", - "windows-sys 0.59.0", + "once_cell_polyfill", + "windows-sys", ] +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + [[package]] name = "autocfg" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "backtrace" -version = "0.3.74" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" dependencies = [ "addr2line", "cfg-if", @@ -111,7 +127,7 @@ dependencies = [ "miniz_oxide", "object", "rustc-demangle", - "windows-targets", + "windows-link", ] [[package]] @@ -122,15 +138,15 @@ checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "cast" @@ -140,9 +156,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cbindgen" -version = "0.28.0" +version = "0.29.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadd868a2ce9ca38de7eeafdcec9c7065ef89b42b32f0839278d55f35c54d1ff" +checksum = "befbfd072a8e81c02f8c507aefce431fe5e7d051f83d48a23ffc9b9fe5a11799" dependencies = [ "clap", "heck", @@ -159,18 +175,19 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.4" +version = "1.2.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9157bbaa6b165880c27a4293a474c91cdcf265cc68cc829bf10be0964a391caf" +checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20" dependencies = [ + "find-msvc-tools", "shlex", ] [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "ciborium" @@ -210,18 +227,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.23" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.23" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ "anstream", "anstyle", @@ -231,37 +248,36 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.4" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "colorchoice" -version = "1.0.3" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "criterion" -version = "0.5.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" dependencies = [ + "alloca", "anes", "cast", "ciborium", "clap", "criterion-plot", - "is-terminal", "itertools", "num-traits", - "once_cell", "oorandom", + "page_size", "plotters", "rayon", "regex", "serde", - "serde_derive", "serde_json", "tinytemplate", "walkdir", @@ -269,9 +285,9 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.5.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" dependencies = [ "cast", "itertools", @@ -304,9 +320,9 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" -version = "0.2.2" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "dyn-clone" @@ -316,31 +332,48 @@ checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "erased-serde" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2add8a07dd6a8d93ff627029c51de145e12686fbc36ecb298ac22e74cf02dec" +dependencies = [ + "serde", + "serde_core", + "typeid", +] [[package]] name = "errno" -version = "0.3.10" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] name = "fastrand" -version = "2.3.0" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fnv" @@ -348,6 +381,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "fuzz-bytes" version = "0.1.0" @@ -374,154 +413,171 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.3.2" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", - "wasi", + "r-efi 5.3.0", + "wasip2", "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + [[package]] name = "gimli" -version = "0.31.1" +version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" [[package]] name = "half" -version = "2.4.1" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "cfg-if", "crunchy", + "zerocopy", ] [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] [[package]] -name = "heck" -version = "0.4.1" +name = "hashbrown" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" [[package]] -name = "hermit-abi" -version = "0.4.0" +name = "heck" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "home" -version = "0.5.9" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" dependencies = [ - "windows-sys 0.52.0", + "windows-sys", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "indexmap" -version = "2.7.0" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.17.0", + "serde", + "serde_core", ] [[package]] name = "indoc" -version = "2.0.5" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" - -[[package]] -name = "is-terminal" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" dependencies = [ - "hermit-abi", - "libc", - "windows-sys 0.52.0", + "rustversion", ] [[package]] name = "is_terminal_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itertools" -version = "0.10.5" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] [[package]] name = "itoa" -version = "1.0.14" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca" dependencies = [ "once_cell", "wasm-bindgen", ] +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" -version = "0.2.168" +version = "0.2.185" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" +checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" [[package]] name = "linux-raw-sys" -version = "0.4.14" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "log" -version = "0.4.22" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "memchr" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" - -[[package]] -name = "memmem" -version = "0.1.1" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a64a92489e2744ce060c349162be1c5f33c6969234104dbd99ddb5feb08b8c15" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", ] @@ -537,18 +593,19 @@ dependencies = [ [[package]] name = "num_enum" -version = "0.7.3" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179" +checksum = "5d0bca838442ec211fa11de3a8b0e0e8f3a4522575b5c4c06ed722e005036f26" dependencies = [ "num_enum_derive", + "rustversion", ] [[package]] name = "num_enum_derive" -version = "0.7.3" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" +checksum = "680998035259dcfcafe653688bf2aa6d3e2dc05e98be6ab46afb089dc84f1df8" dependencies = [ "proc-macro-crate", "proc-macro2", @@ -558,24 +615,40 @@ dependencies = [ [[package]] name = "object" -version = "0.36.5" +version = "0.37.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.20.2" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "oorandom" -version = "11.1.4" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + +[[package]] +name = "page_size" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] [[package]] name = "paste" @@ -620,48 +693,63 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro-crate" -version = "3.2.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" dependencies = [ "toml_edit", ] [[package]] name = "proc-macro2" -version = "1.0.92" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.37" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] [[package]] name = "r-efi" -version = "5.2.0" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" [[package]] name = "rand" -version = "0.9.0" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ "rand_chacha", "rand_core", - "zerocopy", ] [[package]] @@ -676,18 +764,18 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ - "getrandom", + "getrandom 0.3.4", ] [[package]] name = "rayon" -version = "1.10.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" dependencies = [ "either", "rayon-core", @@ -695,9 +783,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.12.1" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -705,9 +793,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.1" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -717,9 +805,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -728,15 +816,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.5" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "rustc-demangle" -version = "0.1.24" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" [[package]] name = "rustc_version" @@ -749,28 +837,22 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.42" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] name = "rustversion" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" - -[[package]] -name = "ryu" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "same-file" @@ -783,41 +865,51 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.24" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" [[package]] name = "seq-macro" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" [[package]] name = "serde" -version = "1.0.216" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ + "serde_core", "serde_derive", ] [[package]] name = "serde-wasm-bindgen" -version = "0.5.0" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3b143e2833c57ab9ad3ea280d21fd34e285a42837aeb0ee301f4f41890fa00e" +checksum = "8302e169f0eddcc139c70f139d19d6467353af16f9fce27e8c30158036a1e16b" dependencies = [ "js-sys", "serde", "wasm-bindgen", ] +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + [[package]] name = "serde_derive" -version = "1.0.216" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -826,23 +918,24 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.133" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", + "serde_core", + "zmij", ] [[package]] name = "serde_spanned" -version = "0.6.8" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" +checksum = "6662b5879511e06e8999a8a235d848113e942c9124f211511b16466ee2995f26" dependencies = [ - "serde", + "serde_core", ] [[package]] @@ -851,6 +944,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "sliceslice" version = "0.4.3" @@ -871,9 +970,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.90" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -882,31 +981,31 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.14.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ - "cfg-if", "fastrand", + "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] name = "thiserror" -version = "1.0.69" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.69" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", @@ -925,43 +1024,81 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.19" +version = "0.9.12+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" +checksum = "cf92845e79fc2e2def6a5d828f0801e29a2f8acc037becc5ab08595c7d5e9863" dependencies = [ - "serde", + "indexmap", + "serde_core", "serde_spanned", - "toml_datetime", - "toml_edit", + "toml_datetime 0.7.5+spec-1.1.0", + "toml_parser", + "toml_writer", + "winnow 0.7.15", ] [[package]] name = "toml_datetime" -version = "0.6.8" +version = "0.7.5+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" dependencies = [ - "serde", + "serde_core", +] + +[[package]] +name = "toml_datetime" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" +dependencies = [ + "serde_core", ] [[package]] name = "toml_edit" -version = "0.22.22" +version = "0.25.11+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" dependencies = [ "indexmap", - "serde", - "serde_spanned", - "toml_datetime", - "winnow", + "toml_datetime 1.1.1+spec-1.1.0", + "toml_parser", + "winnow 1.0.1", +] + +[[package]] +name = "toml_parser" +version = "1.1.2+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" +dependencies = [ + "winnow 1.0.1", ] +[[package]] +name = "toml_writer" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db" + +[[package]] +name = "typeid" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" + [[package]] name = "unicode-ident" -version = "1.0.14" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" [[package]] name = "urlencoding" @@ -986,19 +1123,28 @@ dependencies = [ ] [[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" +name = "wasip2" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ - "wit-bindgen-rt", + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89" dependencies = [ "cfg-if", "once_cell", @@ -1006,27 +1152,14 @@ dependencies = [ "serde", "serde_json", "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1034,141 +1167,136 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129" dependencies = [ "unicode-ident", ] [[package]] -name = "web-sys" -version = "0.3.77" +name = "wasm-encoder" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" dependencies = [ - "js-sys", - "wasm-bindgen", + "leb128fmt", + "wasmparser", ] [[package]] -name = "wildcard" -version = "0.2.0" +name = "wasm-metadata" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36241ad0795516b55e3b60e55c7f979d4f324e4aaea4c70d56b548b9164ee4d2" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ - "thiserror", + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", ] [[package]] -name = "winapi-util" -version = "0.1.9" +name = "wasmparser" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ - "windows-sys 0.59.0", + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", ] [[package]] -name = "windows-sys" -version = "0.52.0" +name = "web-sys" +version = "0.3.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d" dependencies = [ - "windows-targets", + "js-sys", + "wasm-bindgen", ] [[package]] -name = "windows-sys" -version = "0.59.0" +name = "wildcard" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "f9b0540e91e49de3817c314da0dd3bc518093ceacc6ea5327cb0e1eb073e5189" dependencies = [ - "windows-targets", + "thiserror", ] [[package]] -name = "windows-targets" -version = "0.52.6" +name = "winapi" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", ] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" +name = "winapi-util" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] [[package]] -name = "windows_i686_msvc" -version = "0.52.6" +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" +name = "windows-link" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" +name = "windows-sys" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] [[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" +name = "winnow" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" [[package]] name = "winnow" -version = "0.6.20" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +checksum = "09dac053f1cd375980747450bfc7250c264eaae0583872e845c0c7cd578872b5" dependencies = [ "memchr", ] @@ -1183,15 +1311,17 @@ dependencies = [ "cidr", "criterion", "dyn-clone", + "erased-serde", "fnv", - "getrandom", + "getrandom 0.3.4", "indoc", - "memmem", + "memchr", "rand", "regex", "regex-automata", "serde", "serde_json", + "simdutf8", "sliceslice", "thiserror", "urlencoding", @@ -1226,7 +1356,7 @@ dependencies = [ name = "wirefilter-wasm" version = "0.7.0" dependencies = [ - "getrandom", + "getrandom 0.3.4", "js-sys", "serde-wasm-bindgen", "wasm-bindgen", @@ -1234,36 +1364,121 @@ dependencies = [ ] [[package]] -name = "wit-bindgen-rt" -version = "0.39.0" +name = "wit-bindgen" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", ] [[package]] name = "xdg" -version = "2.5.2" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "213b7324336b53d2414b2db8537e56544d981803139155afa84f76eeebb7a546" +checksum = "2fb433233f2df9344722454bc7e96465c9d03bff9d77c248f9e7523fe79585b5" [[package]] name = "zerocopy" -version = "0.8.24" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.24" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", "syn", ] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index 83cda09b..9032944b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,47 +1,53 @@ [workspace] +resolver = "2" members = [ - "engine", - "ffi", - "fuzz/bytes", - "fuzz/raw-string", - "fuzz/map-keys", - "wasm", + "engine", + "ffi", + "fuzz/bytes", + "fuzz/map-keys", + "fuzz/raw-string", + "wasm", ] -resolver = "2" [workspace.package] -authors = ["Ingvar Stepanyan "] version = "0.7.0" -publish = true +authors = ["Cloudflare "] edition = "2024" +publish = true [workspace.dependencies] -backtrace = "0.3" -cfg-if = "1" -cidr = { version = "0.2", features = ["serde"] } -criterion = "0.5" +backtrace = "0.3.76" +base64 = "0.21" +cbindgen = "0.29.2" +cfg-if = "1.0.4" +cidr = { version = "0.2.3", features = ["serde"] } +criterion = "0.8.2" dyn-clone = "1.0.20" -fnv = "1.0.6" -getrandom = { version = "0.3" } -indoc = "2" -libc = "0.2.42" -memmem = "0.1.1" -num_enum = "0.7" -rand = "0.9" -outer-regex = { version = "1.11.1", package = "regex" } -regex-automata = { version = "0.4.9" } -serde = { version = "1.0.113", features = ["derive"] } -serde_json = "1.0.56" +erased-serde = "0.4.9" +fnv = "1.0.7" +getrandom = "0.3.4" +indoc = "2.0.7" +js-sys = "0.3.85" +libc = "0.2.182" +memchr = "2.8.0" +num_enum = "0.7.5" +outer-regex = { package = "regex", version = "1.11.1" } +rand = "0.9.3" +regex-automata = "0.4.14" +serde = { version = "1.0.228", features = ["derive"] } +serde_json = "1.0.149" +serde-wasm-bindgen = "0.6.5" +simdutf8 = "0.1.5" sliceslice = "0.4.3" -thiserror = "1.0" -wildcard = "0.2.0" +thiserror = "2.0.18" urlencoding = "2.1.3" -base64 = "0.21" -wirefilter = { path = "engine", package = "wirefilter-engine" } +wasm-bindgen = { version = "0.2.108", features = ["serde-serialize"] } +wildcard = "0.3.0" +wirefilter = { package = "wirefilter-engine", path = "engine" } -[profile.release] +[profile.dev] panic = "unwind" -lto = true -[profile.dev] +[profile.release] +lto = true panic = "unwind" diff --git a/engine/Cargo.toml b/engine/Cargo.toml index ccff519b..79bd2f61 100644 --- a/engine/Cargo.toml +++ b/engine/Cargo.toml @@ -1,52 +1,53 @@ [package] -authors.workspace = true name = "wirefilter-engine" version.workspace = true +authors.workspace = true +edition.workspace = true description = "An execution engine for Wireshark-like filters" readme = "README.md" -license = "MIT" repository = "https://github.com/cloudflare/wirefilter" -keywords = ["wireshark", "filter", "engine", "parser", "runtime"] +keywords = ["engine", "filter", "parser", "runtime", "wireshark"] categories = ["config", "parser-implementations"] publish.workspace = true -edition.workspace = true [lib] -name = "wirefilter" bench = false +name = "wirefilter" [[bench]] -name = "bench" harness = false +name = "bench" [dependencies] backtrace.workspace = true +base64.workspace = true cfg-if.workspace = true cidr.workspace = true dyn-clone.workspace = true +erased-serde.workspace = true fnv.workspace = true -memmem.workspace = true -rand.workspace = true +memchr.workspace = true outer-regex.workspace = true +rand.workspace = true regex-automata = { workspace = true, optional = true } serde.workspace = true -serde_json.workspace = true +simdutf8.workspace = true sliceslice.workspace = true thiserror.workspace = true -wildcard.workspace = true urlencoding.workspace = true -base64.workspace = true +wildcard.workspace = true [dev-dependencies] criterion.workspace = true indoc.workspace = true - -[features] -default = ["regex"] -regex = ["dep:regex-automata"] +serde_json.workspace = true [target.'cfg(target_family = "wasm")'.dependencies] # By default, getrandom doesn't have any source of randomness on wasm32-unknown. # This optional dependency allows us to build with `--features getrandom/wasm_js`. # For more information see: https://docs.rs/getrandom/#webassembly-support getrandom.workspace = true + +[features] +default = ["regex"] +regex = ["dep:regex-automata"] diff --git a/engine/benches/bench.rs b/engine/benches/bench.rs index 2763df9d..33a351ec 100644 --- a/engine/benches/bench.rs +++ b/engine/benches/bench.rs @@ -6,9 +6,11 @@ use std::alloc::System; static A: System = System; use criterion::{Bencher, Criterion, criterion_group, criterion_main}; -use std::{borrow::Cow, clone::Clone, fmt::Debug, net::IpAddr}; +use std::clone::Clone; +use std::fmt::Debug; +use std::net::IpAddr; use wirefilter::{ - ExecutionContext, FilterAst, FunctionArgs, GetType, LhsValue, SchemeBuilder, + Bytes, ExecutionContext, FilterAst, FunctionArgs, GetType, LhsValue, SchemeBuilder, SimpleFunctionArgKind, SimpleFunctionDefinition, SimpleFunctionImpl, SimpleFunctionParam, Type, }; @@ -17,7 +19,7 @@ fn lowercase<'a>(args: FunctionArgs<'_, 'a>) -> Option> { match input { LhsValue::Bytes(mut bytes) => { let make_lowercase = match bytes { - Cow::Borrowed(bytes) => bytes.iter().any(u8::is_ascii_uppercase), + Bytes::Borrowed(bytes) => bytes.iter().any(u8::is_ascii_uppercase), _ => true, }; if make_lowercase { @@ -34,7 +36,7 @@ fn uppercase<'a>(args: FunctionArgs<'_, 'a>) -> Option> { match input { LhsValue::Bytes(mut bytes) => { let make_uppercase = match bytes { - Cow::Borrowed(bytes) => bytes.iter().any(u8::is_ascii_lowercase), + Bytes::Borrowed(bytes) => bytes.iter().any(u8::is_ascii_lowercase), _ => true, }; if make_uppercase { diff --git a/engine/src/ast/field_expr.rs b/engine/src/ast/field_expr.rs index 338b7d6b..1aa27938 100644 --- a/engine/src/ast/field_expr.rs +++ b/engine/src/ast/field_expr.rs @@ -1,22 +1,18 @@ -use super::{ - Expr, - function_expr::FunctionCallExpr, - parse::FilterParser, - visitor::{Visitor, VisitorMut}, -}; -use crate::{ - ExecutionContext, Scheme, - ast::index_expr::{Compare, IndexExpr}, - compiler::Compiler, - filter::CompiledExpr, - lex::{Lex, LexErrorKind, LexResult, LexWith, expect, skip_space, span}, - range_set::RangeSet, - rhs_types::{Bytes, ExplicitIpRange, ListName, Regex, Wildcard}, - scheme::{Field, Identifier, List}, - searcher::{EmptySearcher, TwoWaySearcher}, - strict_partial_ord::StrictPartialOrd, - types::{GetType, LhsValue, RhsValue, RhsValues, Type}, -}; +use super::Expr; +use super::function_expr::FunctionCallExpr; +use super::parse::FilterParser; +use super::visitor::{Visitor, VisitorMut}; +use crate::ast::index_expr::{Compare, IndexExpr}; +use crate::compiler::Compiler; +use crate::filter::CompiledExpr; +use crate::lex::{Lex, LexErrorKind, LexResult, LexWith, expect, skip_space, span}; +use crate::range_set::RangeSet; +use crate::rhs_types::{BytesExpr, ExplicitIpRange, ListName, Regex, Wildcard}; +use crate::scheme::{Field, Identifier, List}; +use crate::searcher::{EmptySearcher, MemmemSearcher}; +use crate::strict_partial_ord::StrictPartialOrd; +use crate::types::{GetType, LhsValue, RhsValue, RhsValues, Type}; +use crate::{ExecutionContext, Scheme}; use serde::{Serialize, Serializer}; use sliceslice::MemchrSearcher; use std::cmp::Ordering; @@ -148,7 +144,7 @@ pub enum ComparisonOpExpr { /// "contains" comparison #[serde(serialize_with = "serialize_contains")] - Contains(Bytes), + Contains(BytesExpr), /// "matches / ~" comparison #[serde(serialize_with = "serialize_matches")] @@ -168,7 +164,7 @@ pub enum ComparisonOpExpr { /// "contains {...}" comparison #[serde(serialize_with = "serialize_contains_one_of")] - ContainsOneOf(Vec), + ContainsOneOf(Vec), /// "in $..." comparison #[serde(serialize_with = "serialize_list")] @@ -201,7 +197,7 @@ fn serialize_is_true(ser: S) -> Result { out.end() } -fn serialize_contains(rhs: &Bytes, ser: S) -> Result { +fn serialize_contains(rhs: &BytesExpr, ser: S) -> Result { serialize_op_rhs("Contains", rhs, ser) } @@ -224,7 +220,7 @@ fn serialize_one_of(rhs: &RhsValues, ser: S) -> Result(rhs: &[Bytes], ser: S) -> Result { +fn serialize_contains_one_of(rhs: &[BytesExpr], ser: S) -> Result { serialize_op_rhs("ContainsOneOf", rhs, ser) } @@ -372,7 +368,7 @@ impl ComparisonExpr { } (Type::Bytes, ComparisonOp::Bytes(op)) => match op { BytesOp::Contains => { - let (bytes, input) = Bytes::lex(input)?; + let (bytes, input) = BytesExpr::lex(input)?; (ComparisonOpExpr::Contains(bytes), input) } BytesOp::Matches => { @@ -472,7 +468,7 @@ impl Expr for ComparisonExpr { ($op:tt, $def:ident) => { match rhs { RhsValue::Bytes(bytes) => { - struct BytesOp(Bytes); + struct BytesOp(BytesExpr); impl Compare for BytesOp { #[inline] @@ -686,7 +682,7 @@ impl Expr for ComparisonExpr { }; } - search!(TwoWaySearcher::new(bytes)) + search!(MemmemSearcher::new(bytes)) } ComparisonOpExpr::Matches(regex) => lhs.compile_with(compiler, false, regex), ComparisonOpExpr::Wildcard(wildcard) => lhs.compile_with(compiler, false, wildcard), @@ -697,8 +693,8 @@ impl Expr for ComparisonExpr { RhsValues::Ip(ranges) => { let mut v4 = Vec::new(); let mut v6 = Vec::new(); - for range in ranges { - match range.clone().into() { + for range in ranges.into_iter() { + match range.into() { ExplicitIpRange::V4(range) => v4.push(range), ExplicitIpRange::V6(range) => v6.push(range), } @@ -798,28 +794,28 @@ impl Expr for ComparisonExpr { #[allow(clippy::bool_assert_comparison)] mod tests { use super::*; + use crate::ast::function_expr::{FunctionCallArgExpr, FunctionCallExpr}; + use crate::ast::logical_expr::LogicalExpr; + use crate::execution_context::ExecutionContext; + use crate::functions::{ + FunctionArgKind, FunctionArgs, FunctionDefinition, FunctionDefinitionContext, + FunctionParam, FunctionParamError, SimpleFunctionDefinition, SimpleFunctionImpl, + SimpleFunctionOptParam, SimpleFunctionParam, + }; + use crate::lhs_types::{Array, Map}; + use crate::list_matcher::{ListDefinition, ListMatcher}; + use crate::rhs_types::{BytesFormat, IpRange, RegexFormat}; + use crate::scheme::{FieldIndex, IndexAccessError, Scheme}; + use crate::types::ExpectedType; use crate::{ - BytesFormat, FieldRef, LhsValue, ParserSettings, SchemeBuilder, SimpleFunctionArgKind, - TypedMap, - ast::{ - function_expr::{FunctionCallArgExpr, FunctionCallExpr}, - logical_expr::LogicalExpr, - }, - execution_context::ExecutionContext, - functions::{ - FunctionArgKind, FunctionArgs, FunctionDefinition, FunctionDefinitionContext, - FunctionParam, FunctionParamError, SimpleFunctionDefinition, SimpleFunctionImpl, - SimpleFunctionOptParam, SimpleFunctionParam, - }, - lhs_types::{Array, Map}, - list_matcher::{ListDefinition, ListMatcher}, - rhs_types::{IpRange, RegexFormat}, - scheme::{FieldIndex, IndexAccessError, Scheme}, - types::ExpectedType, + FieldRef, LhsValue, ParserSettings, SchemeBuilder, SimpleFunctionArgKind, TypedMap, }; use cidr::IpCidr; + use serde::Deserialize; + use std::convert::TryFrom; + use std::iter::once; + use std::net::IpAddr; use std::sync::LazyLock; - use std::{convert::TryFrom, iter::once, net::IpAddr}; fn any_function<'a>(args: FunctionArgs<'_, 'a>) -> Option> { match args.next()? { @@ -950,12 +946,13 @@ mod tests { pub struct NumMListDefinition {} impl ListDefinition for NumMListDefinition { - fn matcher_from_json_value( + fn deserialize_matcher<'de>( &self, _: Type, - _: serde_json::Value, - ) -> Result, serde_json::Error> { - Ok(Box::new(NumMatcher {})) + deserializer: &mut dyn erased_serde::Deserializer<'de>, + ) -> Result, erased_serde::Error> { + let matcher = erased_serde::deserialize::(deserializer)?; + Ok(Box::new(matcher)) } fn new_matcher(&self) -> Box { @@ -1939,7 +1936,7 @@ mod tests { identifier: IdentifierExpr::Field(field("http.host").to_owned()), indexes: vec![], }), - FunctionCallArgExpr::Literal(RhsValue::Bytes(Bytes::from( + FunctionCallArgExpr::Literal(RhsValue::Bytes(BytesExpr::from( ".org".to_owned() ))), ], @@ -2079,7 +2076,7 @@ mod tests { identifier: IdentifierExpr::Field(field("http.cookies").to_owned()), indexes: vec![FieldIndex::MapEach], }), - FunctionCallArgExpr::Literal(RhsValue::Bytes(Bytes::from( + FunctionCallArgExpr::Literal(RhsValue::Bytes(BytesExpr::from( "-cf".to_owned() ))), ], @@ -2148,7 +2145,7 @@ mod tests { identifier: IdentifierExpr::Field(field("http.headers").to_owned()), indexes: vec![FieldIndex::MapEach], }), - FunctionCallArgExpr::Literal(RhsValue::Bytes(Bytes::from( + FunctionCallArgExpr::Literal(RhsValue::Bytes(BytesExpr::from( "-cf".to_owned() ))), ], @@ -2314,7 +2311,7 @@ mod tests { identifier: IdentifierExpr::Field(field("http.cookies").to_owned()), indexes: vec![FieldIndex::MapEach], }), - FunctionCallArgExpr::Literal(RhsValue::Bytes(Bytes::from( + FunctionCallArgExpr::Literal(RhsValue::Bytes(BytesExpr::from( "-cf".to_owned() ))), ], @@ -2467,7 +2464,7 @@ mod tests { ); } - #[derive(Debug, PartialEq, Eq, Serialize, Clone)] + #[derive(Debug, PartialEq, Eq, Serialize, Clone, Deserialize)] pub struct NumMatcher {} impl ListMatcher for NumMatcher { @@ -2485,10 +2482,6 @@ mod tests { } } - fn to_json_value(&self) -> serde_json::Value { - serde_json::Value::Null - } - fn clear(&mut self) {} } @@ -2565,7 +2558,10 @@ mod tests { assert_eq!(expr.execute_one(ctx), true); let json = serde_json::to_string(ctx).unwrap(); - assert_eq!(json, "{\"tcp.port\":1001,\"$lists\":[]}"); + assert_eq!( + json, + "{\"tcp.port\":1001,\"$lists\":[{\"type\":\"Int\",\"data\":{}}]}" + ); } #[test] @@ -2770,6 +2766,7 @@ mod tests { assert_eq!(true_count, 1); } + #[cfg(feature = "regex")] #[test] fn test_raw_string() { // Equal operator @@ -2782,7 +2779,7 @@ mod tests { }, op: ComparisonOpExpr::Ordering { op: OrderingOp::Equal, - rhs: RhsValue::Bytes(Bytes::new("ab".as_bytes(), BytesFormat::Raw(3))), + rhs: RhsValue::Bytes(BytesExpr::new("ab".as_bytes(), BytesFormat::Raw(3))), }, } ); @@ -2839,7 +2836,7 @@ mod tests { // Wildcard operator let wildcard = Wildcard::new( - Bytes::new(r"foo*\*\\".as_bytes(), BytesFormat::Raw(2)), + BytesExpr::new(r"foo*\*\\".as_bytes(), BytesFormat::Raw(2)), usize::MAX, ) .unwrap(); @@ -2883,7 +2880,7 @@ mod tests { // Strict wildcard operator let wildcard = Wildcard::new( - Bytes::new(r"foo*\*\\".as_bytes(), BytesFormat::Raw(2)), + BytesExpr::new(r"foo*\*\\".as_bytes(), BytesFormat::Raw(2)), usize::MAX, ) .unwrap(); @@ -2938,7 +2935,7 @@ mod tests { identifier: IdentifierExpr::Field(field("http.host").to_owned()), indexes: vec![], }), - FunctionCallArgExpr::Literal(RhsValue::Bytes(Bytes::new( + FunctionCallArgExpr::Literal(RhsValue::Bytes(BytesExpr::new( "cd".as_bytes(), BytesFormat::Raw(1) ))) @@ -2949,7 +2946,7 @@ mod tests { }, op: ComparisonOpExpr::Ordering { op: OrderingOp::Equal, - rhs: RhsValue::Bytes(Bytes::new("abcd".as_bytes(), BytesFormat::Raw(2))) + rhs: RhsValue::Bytes(BytesExpr::new("abcd".as_bytes(), BytesFormat::Raw(2))) } } ); diff --git a/engine/src/ast/function_expr.rs b/engine/src/ast/function_expr.rs index 037d9ae9..c9808941 100644 --- a/engine/src/ast/function_expr.rs +++ b/engine/src/ast/function_expr.rs @@ -1,26 +1,20 @@ -use super::{ - ValueExpr, - parse::FilterParser, - visitor::{Visitor, VisitorMut}, -}; -use crate::{ - FunctionRef, - ast::{ - field_expr::{ComparisonExpr, ComparisonOp, ComparisonOpExpr}, - index_expr::IndexExpr, - logical_expr::{LogicalExpr, UnaryOp}, - }, - compiler::Compiler, - filter::{CompiledExpr, CompiledValueExpr, CompiledValueResult}, - functions::{ - ExactSizeChain, FunctionArgs, FunctionDefinition, FunctionDefinitionContext, FunctionParam, - FunctionParamError, - }, - lex::{Lex, LexError, LexErrorKind, LexResult, LexWith, expect, skip_space, span}, - lhs_types::Array, - scheme::Function, - types::{GetType, LhsValue, RhsValue, Type}, +use super::ValueExpr; +use super::parse::FilterParser; +use super::visitor::{Visitor, VisitorMut}; +use crate::FunctionRef; +use crate::ast::field_expr::{ComparisonExpr, ComparisonOp, ComparisonOpExpr}; +use crate::ast::index_expr::IndexExpr; +use crate::ast::logical_expr::{LogicalExpr, UnaryOp}; +use crate::compiler::Compiler; +use crate::filter::{CompiledExpr, CompiledValueExpr, CompiledValueResult}; +use crate::functions::{ + ExactSizeChain, FunctionArgs, FunctionDefinition, FunctionDefinitionContext, FunctionParam, + FunctionParamError, }; +use crate::lex::{Lex, LexError, LexErrorKind, LexResult, LexWith, expect, skip_space, span}; +use crate::lhs_types::Array; +use crate::scheme::Function; +use crate::types::{GetType, LhsValue, RhsValue, Type}; use serde::Serialize; use std::hash::{Hash, Hasher}; use std::iter::once; @@ -295,7 +289,7 @@ impl ValueExpr for FunctionCallExpr { // Extract the values of the map if let LhsValue::Map(map) = first { first = LhsValue::Array( - Array::try_from_iter(map.value_type(), map.values_into_iter()).unwrap(), + Array::try_from_iter(map.value_type(), map.into_values()).unwrap(), ); } // Retrieve the underlying `Array` @@ -526,21 +520,17 @@ impl<'i> LexWith<'i, &FilterParser<'_>> for FunctionCallExpr { #[cfg(test)] mod tests { use super::*; - use crate::{ - SimpleFunctionArgKind, - ast::{ - field_expr::{ComparisonExpr, ComparisonOpExpr, IdentifierExpr, OrderingOp}, - logical_expr::{LogicalExpr, LogicalOp, ParenthesizedExpr}, - parse::FilterParser, - }, - functions::{ - FunctionArgKind, FunctionArgKindMismatchError, FunctionArgs, SimpleFunctionDefinition, - SimpleFunctionImpl, SimpleFunctionOptParam, SimpleFunctionParam, - }, - rhs_types::{Bytes, BytesFormat}, - scheme::{FieldIndex, IndexAccessError, Scheme}, - types::{RhsValues, Type, TypeMismatchError}, + use crate::SimpleFunctionArgKind; + use crate::ast::field_expr::{ComparisonExpr, ComparisonOpExpr, IdentifierExpr, OrderingOp}; + use crate::ast::logical_expr::{LogicalExpr, LogicalOp, ParenthesizedExpr}; + use crate::ast::parse::FilterParser; + use crate::functions::{ + FunctionArgKind, FunctionArgKindMismatchError, FunctionArgs, SimpleFunctionDefinition, + SimpleFunctionImpl, SimpleFunctionOptParam, SimpleFunctionParam, }; + use crate::rhs_types::{BytesExpr, BytesFormat}; + use crate::scheme::{FieldIndex, IndexAccessError, Scheme}; + use crate::types::{RhsValues, Type, TypeMismatchError}; use std::convert::TryFrom; use std::sync::LazyLock; @@ -562,13 +552,10 @@ mod tests { } fn lower_function<'a>(args: FunctionArgs<'_, 'a>) -> Option> { - use std::borrow::Cow; - match args.next()? { Ok(LhsValue::Bytes(mut b)) => { - let mut text: Vec = b.to_mut().to_vec(); - text.make_ascii_lowercase(); - Some(LhsValue::Bytes(Cow::Owned(text))) + b.to_mut().make_ascii_lowercase(); + Some(LhsValue::Bytes(b)) } Err(Type::Bytes) => None, _ => unreachable!(), @@ -1265,8 +1252,8 @@ mod tests { identifier: IdentifierExpr::Field(SCHEME.get_field("http.host").unwrap().to_owned()), indexes: vec![], }), - FunctionCallArgExpr::Literal(RhsValue::Bytes(Bytes::new("this is a r##raw## string".as_bytes(), BytesFormat::Raw(0)))), - FunctionCallArgExpr::Literal(RhsValue::Bytes(Bytes::new("this is a new r##raw## string".as_bytes(), BytesFormat::Raw(0)))) + FunctionCallArgExpr::Literal(RhsValue::Bytes(BytesExpr::new("this is a r##raw## string".as_bytes(), BytesFormat::Raw(0)))), + FunctionCallArgExpr::Literal(RhsValue::Bytes(BytesExpr::new("this is a new r##raw## string".as_bytes(), BytesFormat::Raw(0)))) ], context: None, }, @@ -1306,8 +1293,8 @@ mod tests { identifier: IdentifierExpr::Field(SCHEME.get_field("http.host").unwrap().to_owned()), indexes: vec![], }), - FunctionCallArgExpr::Literal(RhsValue::Bytes(Bytes::new("this is a r##\"raw\"## string".as_bytes(), BytesFormat::Raw(3)))), - FunctionCallArgExpr::Literal(RhsValue::Bytes(Bytes::new("this is a new r##\"raw\"## string".as_bytes(), BytesFormat::Raw(3)))) + FunctionCallArgExpr::Literal(RhsValue::Bytes(BytesExpr::new("this is a r##\"raw\"## string".as_bytes(), BytesFormat::Raw(3)))), + FunctionCallArgExpr::Literal(RhsValue::Bytes(BytesExpr::new("this is a new r##\"raw\"## string".as_bytes(), BytesFormat::Raw(3)))) ], context: None, }, diff --git a/engine/src/ast/index_expr.rs b/engine/src/ast/index_expr.rs index dd1116cd..b3ef7474 100644 --- a/engine/src/ast/index_expr.rs +++ b/engine/src/ast/index_expr.rs @@ -1,19 +1,16 @@ -use super::{ - ValueExpr, - field_expr::IdentifierExpr, - parse::FilterParser, - visitor::{Visitor, VisitorMut}, -}; -use crate::{ - compiler::Compiler, - execution_context::ExecutionContext, - filter::{CompiledExpr, CompiledOneExpr, CompiledValueExpr, CompiledVecExpr}, - lex::{Lex, LexErrorKind, LexResult, LexWith, expect, skip_space, span}, - lhs_types::{Array, Map, TypedArray}, - scheme::{FieldIndex, IndexAccessError}, - types::{GetType, IntoIter, LhsValue, Type}, -}; -use serde::{Serialize, Serializer, ser::SerializeSeq}; +use super::ValueExpr; +use super::field_expr::IdentifierExpr; +use super::parse::FilterParser; +use super::visitor::{Visitor, VisitorMut}; +use crate::compiler::Compiler; +use crate::execution_context::ExecutionContext; +use crate::filter::{CompiledExpr, CompiledOneExpr, CompiledValueExpr, CompiledVecExpr}; +use crate::lex::{Lex, LexErrorKind, LexResult, LexWith, expect, skip_space, span}; +use crate::lhs_types::{Array, Map, TypedArray}; +use crate::scheme::{FieldIndex, IndexAccessError}; +use crate::types::{GetType, IntoIter, LhsValue, Type}; +use serde::ser::SerializeSeq; +use serde::{Serialize, Serializer}; const BOOL_ARRAY: TypedArray<'_, bool> = TypedArray::new(); @@ -528,10 +525,11 @@ impl<'a> Iterator for MapEachIterator<'a, '_> { #[cfg(test)] mod tests { use super::*; + use crate::ast::field_expr::IdentifierExpr; use crate::{ Array, FieldIndex, FilterParser, FunctionArgs, FunctionCallArgExpr, FunctionCallExpr, Scheme, SchemeBuilder, SimpleFunctionArgKind, SimpleFunctionDefinition, SimpleFunctionImpl, - SimpleFunctionParam, ast::field_expr::IdentifierExpr, + SimpleFunctionParam, }; use std::sync::LazyLock; @@ -938,7 +936,10 @@ mod tests { LhsValue::Bytes(bytes) => bytes, _ => unreachable!(), }; - assert_eq!(std::str::from_utf8(&bytes).unwrap(), format!("[{i}][{j}]")); + assert_eq!( + simdutf8::basic::from_utf8(&bytes).unwrap(), + format!("[{i}][{j}]") + ); } let indexes = [FieldIndex::MapEach, FieldIndex::ArrayIndex(i)]; @@ -950,7 +951,10 @@ mod tests { LhsValue::Bytes(bytes) => bytes, _ => unreachable!(), }; - assert_eq!(std::str::from_utf8(&bytes).unwrap(), format!("[{j}][{i}]")); + assert_eq!( + simdutf8::basic::from_utf8(&bytes).unwrap(), + format!("[{j}][{i}]") + ); } } @@ -965,7 +969,10 @@ mod tests { LhsValue::Bytes(bytes) => bytes, _ => unreachable!(), }; - assert_eq!(std::str::from_utf8(&bytes).unwrap(), format!("[{i}][{j}]")); + assert_eq!( + simdutf8::basic::from_utf8(&bytes).unwrap(), + format!("[{i}][{j}]") + ); j = (j + 1) % 10; i += (j == 0) as u32; } diff --git a/engine/src/ast/logical_expr.rs b/engine/src/ast/logical_expr.rs index 478b4e9f..4f1790a0 100644 --- a/engine/src/ast/logical_expr.rs +++ b/engine/src/ast/logical_expr.rs @@ -1,15 +1,11 @@ -use super::{ - Expr, - field_expr::ComparisonExpr, - parse::FilterParser, - visitor::{Visitor, VisitorMut}, -}; -use crate::{ - compiler::Compiler, - filter::{CompiledExpr, CompiledOneExpr, CompiledVecExpr}, - lex::{Lex, LexErrorKind, LexResult, LexWith, expect, skip_space}, - types::{GetType, Type, TypeMismatchError}, -}; +use super::Expr; +use super::field_expr::ComparisonExpr; +use super::parse::FilterParser; +use super::visitor::{Visitor, VisitorMut}; +use crate::compiler::Compiler; +use crate::filter::{CompiledExpr, CompiledOneExpr, CompiledVecExpr}; +use crate::lex::{Lex, LexErrorKind, LexResult, LexWith, expect, skip_space}; +use crate::types::{GetType, Type, TypeMismatchError}; use serde::Serialize; lex_enum!( @@ -326,15 +322,13 @@ impl Expr for LogicalExpr { #[allow(clippy::cognitive_complexity)] fn test() { use super::field_expr::ComparisonExpr; - use crate::{ - ast::field_expr::{ComparisonOpExpr, IdentifierExpr}, - ast::index_expr::IndexExpr, - execution_context::ExecutionContext, - lex::complete, - lhs_types::Array, - scheme::FieldIndex, - types::Type, - }; + use crate::ast::field_expr::{ComparisonOpExpr, IdentifierExpr}; + use crate::ast::index_expr::IndexExpr; + use crate::execution_context::ExecutionContext; + use crate::lex::complete; + use crate::lhs_types::Array; + use crate::scheme::FieldIndex; + use crate::types::Type; let scheme = &Scheme! { t: Bool, diff --git a/engine/src/ast/mod.rs b/engine/src/ast/mod.rs index 66323bb3..b158ea9a 100644 --- a/engine/src/ast/mod.rs +++ b/engine/src/ast/mod.rs @@ -8,16 +8,14 @@ pub mod visitor; use self::index_expr::IndexExpr; use self::logical_expr::LogicalExpr; use self::parse::FilterParser; -use crate::{ - compiler::{Compiler, DefaultCompiler}, - filter::{CompiledExpr, CompiledValueExpr, Filter, FilterValue}, - lex::{LexErrorKind, LexResult, LexWith}, - scheme::{Scheme, UnknownFieldError}, - types::{GetType, Type, TypeMismatchError}, -}; +use self::visitor::{UsesListVisitor, UsesVisitor, Visitor, VisitorMut}; +use crate::compiler::{Compiler, DefaultCompiler}; +use crate::filter::{CompiledExpr, CompiledValueExpr, Filter, FilterValue}; +use crate::lex::{LexErrorKind, LexResult, LexWith}; +use crate::scheme::{Scheme, UnknownFieldError}; +use crate::types::{GetType, Type, TypeMismatchError}; use serde::Serialize; use std::fmt::{self, Debug}; -use visitor::{UsesListVisitor, UsesVisitor, Visitor, VisitorMut}; /// Trait used to represent node that evaluates to a [`bool`] (or a [`Vec`]). pub trait Expr: diff --git a/engine/src/ast/parse.rs b/engine/src/ast/parse.rs index e9984294..562703d5 100644 --- a/engine/src/ast/parse.rs +++ b/engine/src/ast/parse.rs @@ -1,8 +1,6 @@ use super::{FilterAst, FilterValueAst}; -use crate::{ - lex::{LexErrorKind, LexResult, LexWith, complete}, - scheme::Scheme, -}; +use crate::lex::{LexErrorKind, LexResult, LexWith, complete}; +use crate::scheme::Scheme; use std::cmp::{max, min}; use std::error::Error; use std::fmt::{self, Debug, Display, Formatter}; diff --git a/engine/src/ast/visitor.rs b/engine/src/ast/visitor.rs index ddcbfb81..ad5ca796 100644 --- a/engine/src/ast/visitor.rs +++ b/engine/src/ast/visitor.rs @@ -1,10 +1,8 @@ -use super::{ - Expr, ValueExpr, - field_expr::{ComparisonExpr, ComparisonOpExpr}, - function_expr::{FunctionCallArgExpr, FunctionCallExpr}, - index_expr::IndexExpr, - logical_expr::LogicalExpr, -}; +use super::field_expr::{ComparisonExpr, ComparisonOpExpr}; +use super::function_expr::{FunctionCallArgExpr, FunctionCallExpr}; +use super::index_expr::IndexExpr; +use super::logical_expr::LogicalExpr; +use super::{Expr, ValueExpr}; use crate::{Field, FieldRef, Function}; /// Trait used to immutably visit all nodes in the AST. diff --git a/engine/src/execution_context.rs b/engine/src/execution_context.rs index 5c303a6b..58b0e8ce 100644 --- a/engine/src/execution_context.rs +++ b/engine/src/execution_context.rs @@ -1,11 +1,9 @@ -use crate::{ - FieldRef, ListMatcher, ListRef, UnknownFieldError, - scheme::{Field, List, Scheme, SchemeMismatchError}, - types::{GetType, LhsValue, LhsValueSeed, Type, TypeMismatchError}, -}; -use serde::de::{self, DeserializeSeed, Deserializer, MapAccess, Visitor}; +use crate::scheme::{Field, List, Scheme, SchemeMismatchError}; +use crate::types::{GetType, LhsValue, LhsValueSeed, Type, TypeMismatchError}; +use crate::{FieldRef, ListMatcher, ListRef, UnknownFieldError}; +use serde::Serialize; +use serde::de::{self, DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}; use serde::ser::{SerializeMap, SerializeSeq, Serializer}; -use serde::{Deserialize, Serialize}; use std::borrow::Cow; use std::fmt; use std::fmt::Debug; @@ -292,11 +290,142 @@ impl Drop for ExecutionContextGuard<'_, '_, U, T> { } } -#[derive(Serialize, Deserialize)] -struct ListData { - #[serde(rename = "type")] - ty: Type, - data: serde_json::Value, +struct ListMatcherData<'a>(ListRef<'a>); + +impl<'de> DeserializeSeed<'de> for ListMatcherData<'_> { + type Value = Box; + + fn deserialize(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + use serde::de::Error; + + let mut erased = >::erase(deserializer); + self.0 + .definition() + .deserialize_matcher(self.0.get_type(), &mut erased) + .map_err(D::Error::custom) + } +} + +struct ListMatcherEntry<'a>(&'a Scheme, &'a mut [Box]); + +impl<'de> DeserializeSeed<'de> for ListMatcherEntry<'_> { + type Value = (); + + fn deserialize(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct ListMatcherEntryVisitor<'a>(&'a Scheme, &'a mut [Box]); + + impl<'de> Visitor<'de> for ListMatcherEntryVisitor<'_> { + type Value = (); + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(formatter, "list matcher data") + } + + fn visit_map(self, mut access: M) -> Result<(), M::Error> + where + M: MapAccess<'de>, + { + use serde::de::Error; + + let Some(key) = access.next_key::>()? else { + return Err(M::Error::missing_field("type")); + }; + + if key != "type" { + return Err(M::Error::unknown_field(&key, &["type", "data"])); + } + + let ty = access.next_value::()?; + + let Some(list) = self.0.get_list(&ty) else { + return Err(M::Error::custom(format!("no list defined for type {ty}"))); + }; + + let Some(key) = access.next_key::>()? else { + return Err(M::Error::missing_field("data")); + }; + + if key != "data" { + return Err(M::Error::unknown_field(&key, &["type", "data"])); + } + + let matcher = access.next_value_seed(ListMatcherData(list))?; + + self.1[list.index()] = matcher; + + Ok(()) + } + } + + const FIELDS: &[&str] = &["type", "data"]; + deserializer.deserialize_struct( + "ListMatcher", + FIELDS, + ListMatcherEntryVisitor(self.0, self.1), + ) + } +} + +struct ListMatcherSlice<'a>(&'a Scheme, &'a mut [Box]); + +impl<'de> DeserializeSeed<'de> for ListMatcherSlice<'_> { + type Value = (); + + fn deserialize(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct ListMatcherSliceVisitor<'a>(&'a Scheme, &'a mut [Box]); + + impl<'de> Visitor<'de> for ListMatcherSliceVisitor<'_> { + type Value = (); + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(formatter, "a list of list matcher data") + } + + fn visit_seq(self, mut access: S) -> Result<(), S::Error> + where + S: SeqAccess<'de>, + { + while let Some(()) = access.next_element_seed(ListMatcherEntry(self.0, self.1))? {} + + Ok(()) + } + } + + deserializer.deserialize_seq(ListMatcherSliceVisitor(self.0, self.1)) + } +} + +impl Serialize for ListMatcherSlice<'_> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + #[derive(Serialize)] + struct TypedListMatcher<'a> { + #[serde(rename = "type")] + ty: Type, + data: &'a dyn erased_serde::Serialize, + } + + let mut seq = serializer.serialize_seq(Some(self.1.len()))?; + for list in self.0.lists() { + let matcher = &*self.1[list.index()] as &dyn erased_serde::Serialize; + seq.serialize_element(&TypedListMatcher { + ty: list.get_type(), + data: matcher, + })?; + } + seq.end() + } } impl<'de, U> DeserializeSeed<'de> for &mut ExecutionContext<'de, U> { @@ -312,7 +441,7 @@ impl<'de, U> DeserializeSeed<'de> for &mut ExecutionContext<'de, U> { type Value = (); fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(formatter, "a map of lhs value") + write!(formatter, "a serialized execution context") } fn visit_map(self, mut access: M) -> Result<(), M::Error> @@ -322,20 +451,10 @@ impl<'de, U> DeserializeSeed<'de> for &mut ExecutionContext<'de, U> { while let Some(key) = access.next_key::>()? { if key == "$lists" { // Deserialize lists - let vec = access.next_value::>()?; - for ListData { ty, data } in vec.into_iter() { - let list = self.0.scheme.get_list(&ty).ok_or_else(|| { - de::Error::custom(format!("unknown list for type: {ty:?}")) - })?; - self.0.list_matchers[list.index()] = list - .definition() - .matcher_from_json_value(ty, data) - .map_err(|err| { - de::Error::custom(format!( - "failed to deserialize list matcher: {err:?}" - )) - })?; - } + access.next_value_seed(ListMatcherSlice( + &self.0.scheme, + &mut self.0.list_matchers, + ))?; } else { let field = self .0 @@ -381,6 +500,13 @@ impl Serialize for ExecutionContext<'_, U> { struct ListMatcherSlice<'a>(&'a Scheme, &'a [Box]); + #[derive(Serialize)] + struct TypedListMatcher<'a> { + #[serde(rename = "type")] + ty: Type, + data: &'a dyn erased_serde::Serialize, + } + impl Serialize for ListMatcherSlice<'_> { fn serialize(&self, serializer: S) -> Result where @@ -388,13 +514,11 @@ impl Serialize for ExecutionContext<'_, U> { { let mut seq = serializer.serialize_seq(Some(self.1.len()))?; for list in self.0.lists() { - let data = self.1[list.index()].to_json_value(); - if data != serde_json::Value::Null { - seq.serialize_element(&ListData { - ty: list.get_type(), - data, - })?; - } + let matcher = &*self.1[list.index()] as &dyn erased_serde::Serialize; + seq.serialize_element(&TypedListMatcher { + ty: list.get_type(), + data: matcher, + })?; } seq.end() } diff --git a/engine/src/filter.rs b/engine/src/filter.rs index db9b1803..510672cf 100644 --- a/engine/src/filter.rs +++ b/engine/src/filter.rs @@ -5,12 +5,10 @@ //! their `execute` methods and aggregating results into a single boolean value //! as recursion unwinds. -use crate::{ - execution_context::ExecutionContext, - lhs_types::TypedArray, - scheme::{Scheme, SchemeMismatchError}, - types::{LhsValue, Type}, -}; +use crate::execution_context::ExecutionContext; +use crate::lhs_types::TypedArray; +use crate::scheme::{Scheme, SchemeMismatchError}; +use crate::types::{LhsValue, Type}; use std::fmt; type BoxedClosureToOneBool = diff --git a/engine/src/functions/cidr.rs b/engine/src/functions/cidr.rs index 1431f55c..1fdf22ea 100644 --- a/engine/src/functions/cidr.rs +++ b/engine/src/functions/cidr.rs @@ -1,9 +1,6 @@ -use std::{ - iter, - net::{IpAddr, Ipv4Addr, Ipv6Addr}, -}; - use crate::{FunctionArgs, FunctionDefinition, LhsValue, Type}; +use std::iter; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; /// `cidr` Function (Cloudflare Ruleset Engine) /// diff --git a/engine/src/functions/concat.rs b/engine/src/functions/concat.rs index c9d83473..1d493d93 100644 --- a/engine/src/functions/concat.rs +++ b/engine/src/functions/concat.rs @@ -1,8 +1,8 @@ use crate::{ - Array, ExpectedType, FunctionArgs, FunctionDefinition, FunctionDefinitionContext, + Array, Bytes, ExpectedType, FunctionArgs, FunctionDefinition, FunctionDefinitionContext, FunctionParam, FunctionParamError, GetType, LhsValue, ParserSettings, Type, }; -use std::{borrow::Cow, iter::once}; +use std::iter::once; /// A function which, given one or more arrays or byte-strings, returns the /// concatenation of each of them. @@ -43,15 +43,15 @@ fn concat_array<'a>(accumulator: Array<'a>, args: FunctionArgs<'_, 'a>) -> Array Array::try_from_vec(val_type, vec).unwrap() } -fn concat_bytes<'a>(mut accumulator: Cow<'a, [u8]>, args: FunctionArgs<'_, 'a>) -> Cow<'a, [u8]> { +fn concat_bytes<'a>(mut accumulator: Vec, args: FunctionArgs<'_, 'a>) -> Bytes<'a> { for arg in args { match arg { - Ok(LhsValue::Bytes(value)) => accumulator.to_mut().extend(value.iter()), + Ok(LhsValue::Bytes(value)) => accumulator.extend_from_slice(&value), Err(Type::Bytes) => (), _ => (), } } - accumulator + accumulator.into() } pub(crate) const EXPECTED_TYPES: [ExpectedType; 2] = @@ -103,7 +103,10 @@ impl FunctionDefinition for ConcatFunction { return Some(LhsValue::Array(concat_array(array, args))); } Ok(LhsValue::Bytes(bytes)) => { - return Some(LhsValue::Bytes(concat_bytes(bytes, args))); + return Some(LhsValue::Bytes(concat_bytes( + bytes.into_owned().into(), + args, + ))); } Err(_) => (), _ => unreachable!(), @@ -124,12 +127,12 @@ mod tests { #[test] fn test_concat_bytes() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"hello"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"world"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"hello"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"world"))), ] .into_iter(); assert_eq!( - Some(LhsValue::Bytes(Cow::Borrowed(b"helloworld"))), + Some(LhsValue::Bytes(Bytes::Borrowed(b"helloworld"))), CONCAT_FN.compile(&mut std::iter::empty(), None)(&mut args) ); } @@ -137,14 +140,14 @@ mod tests { #[test] fn test_concat_many_bytes() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"hello"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"world"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"hello2"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"world2"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"hello"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"world"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"hello2"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"world2"))), ] .into_iter(); assert_eq!( - Some(LhsValue::Bytes(Cow::Borrowed(b"helloworldhello2world2"))), + Some(LhsValue::Bytes(Bytes::Borrowed(b"helloworldhello2world2"))), CONCAT_FN.compile(&mut std::iter::empty(), None)(&mut args) ); } diff --git a/engine/src/functions/decode_base64.rs b/engine/src/functions/decode_base64.rs index 9570cccf..06cd5fff 100644 --- a/engine/src/functions/decode_base64.rs +++ b/engine/src/functions/decode_base64.rs @@ -1,10 +1,8 @@ -use std::borrow::Cow; - +use crate::lhs_types::Bytes; +use crate::{FunctionArgs, FunctionDefinition, LhsValue, Type}; use base64::Engine; use base64::engine::general_purpose::STANDARD; -use crate::{FunctionArgs, FunctionDefinition, LhsValue, Type}; - /// Decodes a Base64-encoded string specified in `source`. /// /// The `source` must be a field (not a literal). The function decodes using @@ -20,13 +18,15 @@ use crate::{FunctionArgs, FunctionDefinition, LhsValue, Type}; /// /// The above evaluates to true because `MTIzYWJj` decodes to `"123abc"`. #[derive(Default, Debug)] +#[allow(dead_code)] pub struct DecodeBase64Function {} #[inline] -fn decode_base64_impl_inner<'a>(source: Cow<'_, [u8]>) -> Cow<'a, [u8]> { - match STANDARD.decode(source.as_ref()) { - Ok(decoded) => Cow::Owned(decoded), - Err(_) => Cow::Owned(Vec::new()), +#[allow(dead_code)] +fn decode_base64_impl_inner(source: &[u8]) -> Bytes<'static> { + match STANDARD.decode(source) { + Ok(decoded) => Bytes::Owned(decoded.into_boxed_slice()), + Err(_) => Bytes::Owned(Vec::new().into_boxed_slice()), } } @@ -39,7 +39,10 @@ fn decode_base64_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> { } match source { - Ok(LhsValue::Bytes(b)) => Some(LhsValue::Bytes(decode_base64_impl_inner(b))), + Ok(LhsValue::Bytes(b)) => { + let decoded = decode_base64_impl_inner(b.as_ref()); + Some(LhsValue::Bytes(decoded)) + } Err(Type::Bytes) => None, _ => unreachable!(), } @@ -93,12 +96,12 @@ mod tests { use super::*; fn owned_bytes(s: &str) -> LhsValue<'_> { - LhsValue::Bytes(Cow::Owned(s.as_bytes().to_vec())) + LhsValue::Bytes(Bytes::Owned(s.as_bytes().to_vec().into_boxed_slice())) } #[test] fn test_decode_base64_basic() { - let mut args = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"MTIzYWJj")))].into_iter(); + let mut args = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"MTIzYWJj")))].into_iter(); assert_eq!(decode_base64_impl(&mut args), Some(owned_bytes("123abc"))); } @@ -113,8 +116,8 @@ mod tests { #[should_panic(expected = "expected exactly 1 arg, got 2")] fn test_panic_more_args() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"MTIzYWJj"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"MTIzYWJj"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"MTIzYWJj"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"MTIzYWJj"))), ] .into_iter(); decode_base64_impl(&mut args); diff --git a/engine/src/functions/ends_with.rs b/engine/src/functions/ends_with.rs index aa01da49..f552022c 100644 --- a/engine/src/functions/ends_with.rs +++ b/engine/src/functions/ends_with.rs @@ -1,5 +1,6 @@ use std::iter; +use crate::lhs_types::Bytes; use crate::{LhsValue, Type}; use super::{FunctionArgKind, FunctionArgs, FunctionDefinition}; @@ -77,27 +78,26 @@ impl FunctionDefinition for EndsWithFunction { #[cfg(test)] mod tests { use super::*; - use std::borrow::Cow; #[test] fn test_ends_with_fn() { let mut true_args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"example_value"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"value"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"example_value")), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"value"))), ] .into_iter(); assert_eq!(ends_with_impl(&mut true_args), Some(LhsValue::Bool(true))); let mut false_args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"example_value"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"exampl"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"example_value")), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"exampl"))), ] .into_iter(); assert_eq!(ends_with_impl(&mut false_args), Some(LhsValue::Bool(false))); let mut empty_source_args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b""))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"exampl"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b""))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"exampl"))), ] .into_iter(); assert_eq!( @@ -106,8 +106,8 @@ mod tests { ); let mut empty_substring_args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"example_value"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b""))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"example_value")), + Ok(LhsValue::Bytes(Bytes::Borrowed(b""))), ] .into_iter(); assert_eq!( @@ -133,11 +133,11 @@ mod tests { #[test] fn test_bad_args() { let mut first_arg_error = - vec![Err(Type::Bytes), Ok(LhsValue::Bytes(Cow::Borrowed(b"")))].into_iter(); + vec![Err(Type::Bytes), Ok(LhsValue::Bytes(Bytes::Borrowed(b"")))].into_iter(); assert_eq!(ends_with_impl(&mut first_arg_error), None); let mut second_arg_error = - vec![Ok(LhsValue::Bytes(Cow::Borrowed(b""))), Err(Type::Bytes)].into_iter(); + vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b""))), Err(Type::Bytes)].into_iter(); assert_eq!(ends_with_impl(&mut second_arg_error), None); let mut both_arg_error = vec![Err(Type::Bytes), Err(Type::Bytes)].into_iter(); diff --git a/engine/src/functions/json_lookup_integer.rs b/engine/src/functions/json_lookup_integer.rs index 5f2b9ba1..c81f3f91 100644 --- a/engine/src/functions/json_lookup_integer.rs +++ b/engine/src/functions/json_lookup_integer.rs @@ -1,5 +1,6 @@ use std::iter; +use crate::lhs_types::Bytes; use crate::{LhsValue, Type}; use super::{FunctionArgKind, FunctionArgs, FunctionDefinition}; @@ -133,14 +134,13 @@ impl FunctionDefinition for JsonLookupIntegerFunction { #[cfg(test)] mod tests { use super::*; - use std::borrow::Cow; #[test] fn test_lookup_json_integer_basic() { let json = r#"{ "record_id": "aed53a", "version": 2 }"#; let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(json.as_bytes()))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"version"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(json.as_bytes()))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"version"))), ] .into_iter(); assert_eq!(json_lookup_integer_impl(&mut args), Some(LhsValue::Int(2))); @@ -150,8 +150,8 @@ mod tests { fn test_lookup_json_integer_basic_negative() { let json = r#"{ "record_id": "aed53a", "version": -2 }"#; let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(json.as_bytes()))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"version"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(json.as_bytes()))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"version"))), ] .into_iter(); assert_eq!(json_lookup_integer_impl(&mut args), Some(LhsValue::Int(-2))); @@ -161,9 +161,9 @@ mod tests { fn test_lookup_json_integer_nested() { let json = r#"{ "product": { "id": 356 } }"#; let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(json.as_bytes()))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"product"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"id"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(json.as_bytes()))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"product"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"id"))), ] .into_iter(); assert_eq!( @@ -176,7 +176,7 @@ mod tests { fn test_lookup_json_integer_array_root() { let json = r#"["first_item", -234]"#; let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(json.as_bytes()))), + Ok(LhsValue::Bytes(Bytes::Borrowed(json.as_bytes()))), Ok(LhsValue::Int(1)), ] .into_iter(); @@ -190,8 +190,8 @@ mod tests { fn test_lookup_json_integer_array_in_object() { let json = r#"{ "network_ids": [123, 456] }"#; let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(json.as_bytes()))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"network_ids"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(json.as_bytes()))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"network_ids"))), Ok(LhsValue::Int(0)), ] .into_iter(); @@ -205,9 +205,9 @@ mod tests { fn test_lookup_json_integer_array_of_objects() { let json = r#"[{ "product_id": 123 }, { "product_id": 456 }]"#; let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(json.as_bytes()))), + Ok(LhsValue::Bytes(Bytes::Borrowed(json.as_bytes()))), Ok(LhsValue::Int(1)), - Ok(LhsValue::Bytes(Cow::Borrowed(b"product_id"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"product_id"))), ] .into_iter(); assert_eq!( @@ -220,8 +220,8 @@ mod tests { fn test_lookup_json_integer_non_integer_float() { let json = r#"{ "value": 42.0 }"#; let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(json.as_bytes()))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"value"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(json.as_bytes()))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"value"))), ] .into_iter(); assert_eq!(json_lookup_integer_impl(&mut args), None); @@ -231,8 +231,8 @@ mod tests { fn test_lookup_json_integer_invalid_json() { let json = b"not a json"; let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(json))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"k"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(json))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"k"))), ] .into_iter(); assert_eq!(json_lookup_integer_impl(&mut args), None); diff --git a/engine/src/functions/json_lookup_string.rs b/engine/src/functions/json_lookup_string.rs index cc8a3f61..4daeee54 100644 --- a/engine/src/functions/json_lookup_string.rs +++ b/engine/src/functions/json_lookup_string.rs @@ -1,5 +1,6 @@ use std::iter; +use crate::lhs_types::Bytes; use crate::{LhsValue, Type}; use super::{FunctionArgKind, FunctionArgs, FunctionDefinition}; @@ -81,7 +82,7 @@ fn json_lookup_string_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option(args: FunctionArgs<'_, 'a>) -> Option> { match arg { Ok(LhsValue::Bytes(bytes)) => { - let bytes_lower = bytes.into_owned().to_ascii_lowercase(); - Some(LhsValue::Bytes(Cow::Owned(bytes_lower))) + let bytes_lower: Vec = bytes.into_owned().to_vec(); + let bytes_lower = bytes_lower.to_ascii_lowercase(); + Some(LhsValue::Bytes(Bytes::Owned( + bytes_lower.into_boxed_slice(), + ))) } Err(Type::Bytes) => None, _ => unreachable!(), @@ -75,31 +76,40 @@ mod tests { #[test] fn test_lower_fn() { // Test with an all-uppercase string - let mut args_upper = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"HELLO WORLD")))].into_iter(); + let mut args_upper = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"HELLO WORLD")))].into_iter(); assert_eq!( lower_impl(&mut args_upper), - Some(LhsValue::Bytes(Cow::Owned(b"hello world".to_vec()))) + Some(LhsValue::Bytes(Bytes::Owned( + b"hello world".to_vec().into_boxed_slice() + ))) ); // Test with a mixed-case string - let mut args_mixed = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"MiXeD CaSe")))].into_iter(); + let mut args_mixed = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"MiXeD CaSe")))].into_iter(); assert_eq!( lower_impl(&mut args_mixed), - Some(LhsValue::Bytes(Cow::Owned(b"mixed case".to_vec()))) + Some(LhsValue::Bytes(Bytes::Owned( + b"mixed case".to_vec().into_boxed_slice() + ))) ); // Test with an already lowercase string - let mut args_lower = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"already lower")))].into_iter(); + let mut args_lower = + vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"already lower")))].into_iter(); assert_eq!( lower_impl(&mut args_lower), - Some(LhsValue::Bytes(Cow::Owned(b"already lower".to_vec()))) + Some(LhsValue::Bytes(Bytes::Owned( + b"already lower".to_vec().into_boxed_slice() + ))) ); // Test with an empty string - let mut args_empty = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"")))].into_iter(); + let mut args_empty = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"")))].into_iter(); assert_eq!( lower_impl(&mut args_empty), - Some(LhsValue::Bytes(Cow::Owned(b"".to_vec()))) + Some(LhsValue::Bytes(Bytes::Owned( + b"".to_vec().into_boxed_slice() + ))) ); // Test with missing field @@ -118,8 +128,8 @@ mod tests { #[should_panic(expected = "expected 1 argument, got 2")] fn test_lower_fn_too_many_args() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"a"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"b"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"a"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"b"))), ] .into_iter(); lower_impl(&mut args); diff --git a/engine/src/functions/mod.rs b/engine/src/functions/mod.rs index 7fcebf79..3a2f1730 100644 --- a/engine/src/functions/mod.rs +++ b/engine/src/functions/mod.rs @@ -3,47 +3,34 @@ pub(crate) mod any; pub(crate) mod cidr; pub(crate) mod concat; pub(crate) mod decode_base64; -pub(crate) mod ends_with; -pub(crate) mod json_lookup_integer; -pub(crate) mod json_lookup_string; pub(crate) mod len; pub(crate) mod lower; pub(crate) mod remove_bytes; -pub(crate) mod remove_query_args; pub(crate) mod starts_with; pub(crate) mod substring; -pub(crate) mod to_string; -pub(crate) mod upper; pub(crate) mod url_decode; pub(crate) mod uuid4; pub(crate) mod wildcard_replace; -use crate::{ - ParserSettings, - filter::CompiledValueResult, - types::{ExpectedType, ExpectedTypeList, GetType, LhsValue, RhsValue, Type, TypeMismatchError}, +use crate::ParserSettings; +use crate::filter::CompiledValueResult; +use crate::types::{ + ExpectedType, ExpectedTypeList, GetType, LhsValue, RhsValue, Type, TypeMismatchError, }; pub use all::AllFunction; pub use any::AnyFunction; pub use cidr::CIDRFunction; pub use concat::ConcatFunction; pub use decode_base64::DecodeBase64Function; -pub use ends_with::EndsWithFunction; -pub use json_lookup_integer::JsonLookupIntegerFunction; -pub use json_lookup_string::JsonLookupStringFunction; pub use len::LenFunction; pub use lower::LowerFunction; pub use remove_bytes::RemoveBytesFunction; -pub use remove_query_args::RemoveQueryArgsFunction; pub use starts_with::StartsWithFunction; use std::any::Any; -use std::convert::TryFrom; use std::fmt::{self, Debug}; use std::iter::once; pub use substring::SubstringFunction; use thiserror::Error; -pub use to_string::ToStringFunction; -pub use upper::UpperFunction; pub use url_decode::UrlDecodeFunction; pub use uuid4::UUID4Function; pub use wildcard_replace::WildcardReplaceFunction; @@ -439,7 +426,7 @@ pub trait FunctionDefinition: Debug + Send + Sync { ) -> Box Fn(FunctionArgs<'i, 'a>) -> Option> + Sync + Send + 'static>; } -/* Simple function APIs */ +// Simple function APIs type FunctionPtr = for<'i, 'a> fn(FunctionArgs<'i, 'a>) -> Option>; diff --git a/engine/src/functions/remove_bytes.rs b/engine/src/functions/remove_bytes.rs index 4064ae13..ce6e2234 100644 --- a/engine/src/functions/remove_bytes.rs +++ b/engine/src/functions/remove_bytes.rs @@ -1,8 +1,5 @@ -use std::borrow::Cow; - -use crate::{LhsValue, Type}; - -use super::{FunctionArgKind, FunctionArgs, FunctionDefinition}; +use crate::lhs_types::Bytes; +use crate::{FunctionArgKind, FunctionArgs, FunctionDefinition, LhsValue, Type}; /// Removes all bytes that appear in the provided byte list from the source bytes. /// @@ -10,9 +7,11 @@ use super::{FunctionArgKind, FunctionArgs, FunctionDefinition}; /// will be removed from the source. For example, `remove_bytes(field, "abc")` /// removes all `a`, `b`, and `c` bytes from `field`. #[derive(Debug, Default)] +#[allow(dead_code)] pub struct RemoveBytesFunction {} #[inline] +#[allow(dead_code)] fn remove_bytes_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> { let source_arg = args.next().expect("expected 2 argument, got 0"); let pattern_arg = args.next().expect("expected 2 arguments, got 1"); @@ -27,7 +26,9 @@ fn remove_bytes_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> { let pattern_bytes = pattern_list.as_ref(); if pattern_bytes.is_empty() { - return Some(LhsValue::Bytes(Cow::Owned(source_bytes.to_vec()))); + return Some(LhsValue::Bytes(Bytes::Owned( + source_bytes.to_vec().into_boxed_slice(), + ))); } let mut to_remove = [false; 256]; @@ -42,7 +43,7 @@ fn remove_bytes_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> { } } - Some(LhsValue::Bytes(Cow::Owned(res))) + Some(LhsValue::Bytes(Bytes::Owned(res.into_boxed_slice()))) } (Err(Type::Bytes), _) => None, (_, Err(Type::Bytes)) => None, @@ -98,17 +99,16 @@ impl FunctionDefinition for RemoveBytesFunction { #[cfg(test)] mod tests { use super::*; - use std::borrow::Cow; fn owned_bytes(s: &str) -> LhsValue<'_> { - LhsValue::Bytes(Cow::Owned(s.as_bytes().to_vec())) + LhsValue::Bytes(Bytes::Owned(s.as_bytes().to_vec().into_boxed_slice())) } #[test] fn test_remove_bytes_basic() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"cloudflare.com"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"."))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"cloudflare.com"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"."))), ] .into_iter(); assert_eq!( @@ -120,8 +120,8 @@ mod tests { #[test] fn test_remove_bytes_multibyte_pattern() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"a--b--c"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"-"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"a--b--c"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"-"))), ] .into_iter(); assert_eq!(remove_bytes_impl(&mut args), Some(owned_bytes("abc"))); @@ -130,8 +130,8 @@ mod tests { #[test] fn test_remove_multiple_bytes() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"ab1c2d3"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"123"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"ab1c2d3"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"123"))), ] .into_iter(); assert_eq!(remove_bytes_impl(&mut args), Some(owned_bytes("abcd"))); @@ -140,8 +140,8 @@ mod tests { #[test] fn test_remove_bytes_no_match() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"hello"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"z"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"hello"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"z"))), ] .into_iter(); assert_eq!(remove_bytes_impl(&mut args), Some(owned_bytes("hello"))); @@ -150,8 +150,8 @@ mod tests { #[test] fn test_remove_bytes_empty_pattern() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"abc"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b""))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"abc"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b""))), ] .into_iter(); assert_eq!(remove_bytes_impl(&mut args), Some(owned_bytes("abc"))); @@ -167,11 +167,11 @@ mod tests { #[test] fn test_bad_args() { let mut first_arg_error = - vec![Err(Type::Bytes), Ok(LhsValue::Bytes(Cow::Borrowed(b"")))].into_iter(); + vec![Err(Type::Bytes), Ok(LhsValue::Bytes(Bytes::Borrowed(b"")))].into_iter(); assert_eq!(remove_bytes_impl(&mut first_arg_error), None); let mut second_arg_error = - vec![Ok(LhsValue::Bytes(Cow::Borrowed(b""))), Err(Type::Bytes)].into_iter(); + vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b""))), Err(Type::Bytes)].into_iter(); assert_eq!(remove_bytes_impl(&mut second_arg_error), None); } } diff --git a/engine/src/functions/remove_query_args.rs b/engine/src/functions/remove_query_args.rs index c398e5b4..856232df 100644 --- a/engine/src/functions/remove_query_args.rs +++ b/engine/src/functions/remove_query_args.rs @@ -1,6 +1,6 @@ -use std::borrow::Cow; use std::collections::HashSet; +use crate::lhs_types::Bytes; use crate::{LhsValue, Type}; use super::{FunctionArgKind, FunctionArgs, FunctionDefinition}; @@ -59,7 +59,7 @@ fn remove_query_args_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option out.extend_from_slice(seg); } - Some(LhsValue::Bytes(Cow::Owned(out))) + Some(LhsValue::Bytes(Bytes::Owned(out.into_boxed_slice()))) } (Err(Type::Bytes), _) => None, _ => unreachable!(), @@ -113,31 +113,30 @@ impl FunctionDefinition for RemoveQueryArgsFunction { #[cfg(test)] mod tests { use super::*; - use std::borrow::Cow; fn owned(s: &str) -> LhsValue<'_> { - LhsValue::Bytes(Cow::Owned(s.as_bytes().to_vec())) + LhsValue::Bytes(Bytes::Owned(s.as_bytes().to_vec().into_boxed_slice())) } #[test] fn test_remove_query_args_basic() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"order=asc&country=GB"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"country"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"order=asc&country=GB"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"country"))), ] .into_iter(); assert_eq!(remove_query_args_impl(&mut args), Some(owned("order=asc"))); let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"order=asc&country=GB"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"order"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"order=asc&country=GB"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"order"))), ] .into_iter(); assert_eq!(remove_query_args_impl(&mut args), Some(owned("country=GB"))); let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"order=asc&country=GB"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"search"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"order=asc&country=GB"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"search"))), ] .into_iter(); assert_eq!( @@ -149,10 +148,10 @@ mod tests { #[test] fn test_remove_query_args_repeated() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed( + Ok(LhsValue::Bytes(Bytes::Borrowed( b"category=Foo&order=desc&category=Bar", ))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"order"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"order"))), ] .into_iter(); assert_eq!( @@ -161,10 +160,10 @@ mod tests { ); let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed( + Ok(LhsValue::Bytes(Bytes::Borrowed( b"category=Foo&order=desc&category=Bar", ))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"category"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"category"))), ] .into_iter(); assert_eq!(remove_query_args_impl(&mut args), Some(owned("order=desc"))); @@ -173,9 +172,9 @@ mod tests { #[test] fn test_remove_query_args_multiple_params() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"a=1&b=2&c=3&d=4"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"b"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"d"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"a=1&b=2&c=3&d=4"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"b"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"d"))), ] .into_iter(); assert_eq!(remove_query_args_impl(&mut args), Some(owned("a=1&c=3"))); @@ -184,8 +183,8 @@ mod tests { #[test] fn test_remove_query_args_no_match() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"x=1&y=2"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"z"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"x=1&y=2"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"z"))), ] .into_iter(); assert_eq!(remove_query_args_impl(&mut args), Some(owned("x=1&y=2"))); @@ -194,8 +193,8 @@ mod tests { #[test] fn test_remove_query_args_empty_result() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"only=one"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"only"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"only=one"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"only"))), ] .into_iter(); assert_eq!(remove_query_args_impl(&mut args), Some(owned(""))); @@ -211,7 +210,7 @@ mod tests { #[test] #[should_panic(expected = "expected at least 2 args, got 1")] fn test_panic_one_arg() { - let mut args = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"a=1&b=2")))].into_iter(); + let mut args = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"a=1&b=2")))].into_iter(); remove_query_args_impl(&mut args); } } diff --git a/engine/src/functions/starts_with.rs b/engine/src/functions/starts_with.rs index a61298fd..c7452709 100644 --- a/engine/src/functions/starts_with.rs +++ b/engine/src/functions/starts_with.rs @@ -1,8 +1,6 @@ -use std::iter; - -use crate::{LhsValue, Type}; - use super::{FunctionArgKind, FunctionArgs, FunctionDefinition}; +use crate::{LhsValue, Type}; +use std::iter; /// Returns `true` when the source starts with a given substring. Returns `false` otherwise. The source cannot be a literal value (like `"foo"`). /// For example, if `http.request.uri.path` is `"/blog/first-post"`, then `starts_with(http.request.uri.path, "/blog")` will return `true`. @@ -79,24 +77,20 @@ impl FunctionDefinition for StartsWithFunction { #[cfg(test)] mod tests { use super::*; - use std::borrow::Cow; - - // fn create_bytes_lhs_val(s: &str) -> LhsValue<'_> { - // LhsValue::Bytes(Cow::Owned(s.as_bytes().to_vec())) - // } + use crate::lhs_types::Bytes; #[test] fn test_starts_with_fn() { let mut true_args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"example_value"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"exampl"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"example_value"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"exampl"))), ] .into_iter(); assert_eq!(starts_with_impl(&mut true_args), Some(LhsValue::Bool(true))); let mut false_args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"example_value"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"empl"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"example_value"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"empl"))), ] .into_iter(); assert_eq!( @@ -105,8 +99,8 @@ mod tests { ); let mut empty_source_args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b""))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"exampl"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b""))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"exampl"))), ] .into_iter(); assert_eq!( @@ -115,8 +109,8 @@ mod tests { ); let mut empty_substring_args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"example_value"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b""))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"example_value"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b""))), ] .into_iter(); assert_eq!( @@ -142,11 +136,11 @@ mod tests { #[test] fn test_bad_args() { let mut first_arg_error = - vec![Err(Type::Bytes), Ok(LhsValue::Bytes(Cow::Borrowed(b"")))].into_iter(); + vec![Err(Type::Bytes), Ok(LhsValue::Bytes(Bytes::Borrowed(b"")))].into_iter(); assert_eq!(starts_with_impl(&mut first_arg_error), None); let mut second_arg_error = - vec![Ok(LhsValue::Bytes(Cow::Borrowed(b""))), Err(Type::Bytes)].into_iter(); + vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b""))), Err(Type::Bytes)].into_iter(); assert_eq!(starts_with_impl(&mut second_arg_error), None); let mut both_arg_error = vec![Err(Type::Bytes), Err(Type::Bytes)].into_iter(); diff --git a/engine/src/functions/substring.rs b/engine/src/functions/substring.rs index f1f78015..5d6b24e9 100644 --- a/engine/src/functions/substring.rs +++ b/engine/src/functions/substring.rs @@ -1,8 +1,6 @@ -use std::borrow::Cow; - -use crate::{LhsValue, Type}; - use super::{FunctionArgKind, FunctionArgs, FunctionDefinition}; +use crate::lhs_types::Bytes; +use crate::{LhsValue, Type}; /// Returns a substring (slice by byte index) of a String/Bytes field. /// @@ -33,9 +31,11 @@ use super::{FunctionArgKind, FunctionArgs, FunctionDefinition}; /// substring(http.request.body.raw, -2) -> "jk" /// substring(http.request.body.raw, 0, -2) -> "asdfgh" #[derive(Debug, Default)] +#[allow(dead_code)] pub struct SubstringFunction {} #[inline] +#[allow(dead_code)] fn substring_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> { let source_arg = args.next().expect("expected at least 2 arguments, got 0"); let start_arg = args.next().expect("expected at least 2 arguments, got 1"); @@ -67,12 +67,14 @@ fn substring_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> { } if end_idx < start_idx { - return Some(LhsValue::Bytes(Cow::Owned(Vec::new()))); + return Some(LhsValue::Bytes(Bytes::Owned(Vec::new().into_boxed_slice()))); } let start_us = start_idx as usize; let end_us = end_idx as usize; - Some(LhsValue::Bytes(Cow::Owned(s[start_us..end_us].to_vec()))) + Some(LhsValue::Bytes(Bytes::Owned( + s[start_us..end_us].to_vec().into_boxed_slice(), + ))) } (Ok(LhsValue::Bytes(source)), Ok(LhsValue::Int(start)), None) => { let s = source.as_ref(); @@ -86,7 +88,9 @@ fn substring_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> { } let start_us = start_idx as usize; - Some(LhsValue::Bytes(Cow::Owned(s[start_us..].to_vec()))) + Some(LhsValue::Bytes(Bytes::Owned( + s[start_us..].to_vec().into_boxed_slice(), + ))) } (Err(Type::Bytes), _, _) => None, (_, Err(Type::Int), _) => None, @@ -146,16 +150,15 @@ impl FunctionDefinition for SubstringFunction { #[cfg(test)] mod tests { use super::*; - use std::borrow::Cow; fn owned(s: &str) -> LhsValue<'_> { - LhsValue::Bytes(Cow::Owned(s.as_bytes().to_vec())) + LhsValue::Bytes(Bytes::Owned(s.as_bytes().to_vec().into_boxed_slice())) } #[test] fn test_substring_examples() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"asdfghjk"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"asdfghjk"))), Ok(LhsValue::Int(2)), Ok(LhsValue::Int(5)), ] @@ -163,21 +166,21 @@ mod tests { assert_eq!(substring_impl(&mut args), Some(owned("dfg"))); let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"asdfghjk"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"asdfghjk"))), Ok(LhsValue::Int(2)), ] .into_iter(); assert_eq!(substring_impl(&mut args), Some(owned("dfghjk"))); let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"asdfghjk"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"asdfghjk"))), Ok(LhsValue::Int(-2)), ] .into_iter(); assert_eq!(substring_impl(&mut args), Some(owned("jk"))); let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"asdfghjk"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"asdfghjk"))), Ok(LhsValue::Int(0)), Ok(LhsValue::Int(-2)), ] @@ -188,14 +191,14 @@ mod tests { #[test] fn test_substring_out_of_bounds() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"abc"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"abc"))), Ok(LhsValue::Int(10)), ] .into_iter(); assert_eq!(substring_impl(&mut args), Some(owned(""))); let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"abc"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"abc"))), Ok(LhsValue::Int(-10)), ] .into_iter(); diff --git a/engine/src/functions/to_string.rs b/engine/src/functions/to_string.rs index 47e1105f..a6855041 100644 --- a/engine/src/functions/to_string.rs +++ b/engine/src/functions/to_string.rs @@ -1,5 +1,4 @@ -use std::borrow::Cow; - +use crate::lhs_types::Bytes; use crate::{LhsValue, Type}; use super::{FunctionArgKind, FunctionArgs, FunctionDefinition}; @@ -40,9 +39,9 @@ fn to_string_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> { } match arg { - Ok(LhsValue::Int(i)) => Some(LhsValue::Bytes(Cow::Owned(i.to_string().into_bytes()))), - Ok(LhsValue::Bool(b)) => Some(LhsValue::Bytes(Cow::Owned(b.to_string().into_bytes()))), - Ok(LhsValue::Ip(ip)) => Some(LhsValue::Bytes(Cow::Owned(ip.to_string().into_bytes()))), + Ok(LhsValue::Int(i)) => Some(LhsValue::Bytes(Bytes::Owned(i.to_string().into_boxed_str().into_boxed_bytes()))), + Ok(LhsValue::Bool(b)) => Some(LhsValue::Bytes(Bytes::Owned(b.to_string().into_boxed_str().into_boxed_bytes()))), + Ok(LhsValue::Ip(ip)) => Some(LhsValue::Bytes(Bytes::Owned(ip.to_string().into_boxed_str().into_boxed_bytes()))), Err(Type::Int) | Err(Type::Bool) | Err(Type::Ip) => None, _ => unreachable!(), } @@ -97,7 +96,7 @@ mod tests { use std::borrow::Cow; fn owned(s: &str) -> LhsValue<'_> { - LhsValue::Bytes(Cow::Owned(s.as_bytes().to_vec())) + LhsValue::Bytes(Bytes::Owned(s.as_bytes().to_vec().into_boxed_slice())) } #[test] diff --git a/engine/src/functions/upper.rs b/engine/src/functions/upper.rs index ee26f1a6..02242c02 100644 --- a/engine/src/functions/upper.rs +++ b/engine/src/functions/upper.rs @@ -1,9 +1,8 @@ -use std::borrow::Cow; - -use super::{FunctionArgKind, FunctionArgs, FunctionDefinition}; -use crate::{LhsValue, Type}; use std::iter; +use crate::lhs_types::Bytes; +use crate::{FunctionArgKind, FunctionArgs, FunctionDefinition, LhsValue, Type}; + /// Converts a string field to uppercase. Only lowercase ASCII bytes are converted. All other bytes are unaffected. /// For example, if http.host is "www.cloudflare.com", then upper(http.host) will return "WWW.CLOUDFLARE.COM". #[derive(Debug, Default)] @@ -19,8 +18,9 @@ fn upper_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> { match arg { Ok(LhsValue::Bytes(bytes)) => { - let bytes_upper = bytes.into_owned().to_ascii_uppercase(); - Some(LhsValue::Bytes(Cow::Owned(bytes_upper))) + let bytes_upper: Vec = bytes.into_owned().to_vec(); + let bytes_upper = bytes_upper.to_ascii_uppercase(); + Some(LhsValue::Bytes(Bytes::Owned(bytes_upper.into_boxed_slice()))) } Err(Type::Bytes) => None, _ => unreachable!(), @@ -75,39 +75,39 @@ mod tests { #[test] fn test_upper_fn() { // Test with an all-lowercase string - let mut args_lower = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"hello world")))].into_iter(); + let mut args_lower = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"hello world")))].into_iter(); assert_eq!( upper_impl(&mut args_lower), - Some(LhsValue::Bytes(Cow::Owned(b"HELLO WORLD".to_vec()))) + Some(LhsValue::Bytes(Bytes::Owned(b"HELLO WORLD".to_vec().into_boxed_slice()))) ); // Test with a mixed-case string - let mut args_mixed = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"MiXeD CaSe")))].into_iter(); + let mut args_mixed = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"MiXeD CaSe")))].into_iter(); assert_eq!( upper_impl(&mut args_mixed), - Some(LhsValue::Bytes(Cow::Owned(b"MIXED CASE".to_vec()))) + Some(LhsValue::Bytes(Bytes::Owned(b"MIXED CASE".to_vec().into_boxed_slice()))) ); // Test with an already uppercase string - let mut args_upper = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"ALREADY UPPER")))].into_iter(); + let mut args_upper = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"ALREADY UPPER")))].into_iter(); assert_eq!( upper_impl(&mut args_upper), - Some(LhsValue::Bytes(Cow::Owned(b"ALREADY UPPER".to_vec()))) + Some(LhsValue::Bytes(Bytes::Owned(b"ALREADY UPPER".to_vec().into_boxed_slice()))) ); // Test with the example from the specification: "www.cloudflare.com" let mut args_example = - vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"www.cloudflare.com")))].into_iter(); + vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"www.cloudflare.com")))].into_iter(); assert_eq!( upper_impl(&mut args_example), - Some(LhsValue::Bytes(Cow::Owned(b"WWW.CLOUDFLARE.COM".to_vec()))) + Some(LhsValue::Bytes(Bytes::Owned(b"WWW.CLOUDFLARE.COM".to_vec().into_boxed_slice()))) ); // Test with an empty string - let mut args_empty = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"")))].into_iter(); + let mut args_empty = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"")))].into_iter(); assert_eq!( upper_impl(&mut args_empty), - Some(LhsValue::Bytes(Cow::Owned(b"".to_vec()))) + Some(LhsValue::Bytes(Bytes::Owned(b"".to_vec().into_boxed_slice()))) ); // Test with missing field @@ -116,10 +116,10 @@ mod tests { // Test that only ASCII lowercase bytes are converted, other bytes are unaffected let mut args_non_ascii = - vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"hello\xc3\xa9world")))].into_iter(); + vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"hello\xc3\xa9world")))].into_iter(); assert_eq!( upper_impl(&mut args_non_ascii), - Some(LhsValue::Bytes(Cow::Owned(b"HELLO\xc3\xa9WORLD".to_vec()))) + Some(LhsValue::Bytes(Bytes::Owned(b"HELLO\xc3\xa9WORLD".to_vec().into_boxed_slice()))) ); } @@ -134,8 +134,8 @@ mod tests { #[should_panic(expected = "expected 1 argument, got 2")] fn test_upper_fn_too_many_args() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"a"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"b"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"a"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"b"))), ] .into_iter(); upper_impl(&mut args); diff --git a/engine/src/functions/url_decode.rs b/engine/src/functions/url_decode.rs index 5038bcde..a768356d 100644 --- a/engine/src/functions/url_decode.rs +++ b/engine/src/functions/url_decode.rs @@ -1,6 +1,6 @@ -use std::{borrow::Cow, iter}; - +use crate::lhs_types::Bytes; use crate::{FunctionArgs, FunctionDefinition, LhsValue, Type}; +use std::iter; /// Decodes a URL-formatted string defined in source. /// @@ -90,11 +90,11 @@ fn decode_once(input: &[u8], unicode_u: bool) -> Vec { } #[inline] -fn url_decode<'a>(source: Cow<'_, [u8]>, options: Option>) -> Cow<'a, [u8]> { +fn url_decode(source: &[u8], options: Option<&[u8]>) -> Bytes<'static> { let mut recursive = false; let mut unicode_u = false; if let Some(opts) = options { - for &b in opts.as_ref() { + for &b in opts { match b { b'r' => recursive = true, b'u' => unicode_u = true, @@ -103,7 +103,7 @@ fn url_decode<'a>(source: Cow<'_, [u8]>, options: Option>) -> Cow< } } - let mut current = source.into_owned(); + let mut current = source.to_vec(); let mut next = decode_once(¤t, unicode_u); @@ -115,9 +115,9 @@ fn url_decode<'a>(source: Cow<'_, [u8]>, options: Option>) -> Cow< current = next; next = decode_once(¤t, unicode_u); } - Cow::Owned(current) + Bytes::Owned(current.into_boxed_slice()) } else { - Cow::Owned(next) + Bytes::Owned(next.into_boxed_slice()) } } @@ -133,12 +133,13 @@ fn url_decode_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> { match (source_arg, options_arg) { (_, Some(Err(Type::Bytes))) => None, (Ok(LhsValue::Bytes(source)), opt) => { - let options_extracted = match opt { - Some(Ok(LhsValue::Bytes(o))) => Some(o), - None => None, - _ => unreachable!(), + // Extract options bytes into an owned buffer to avoid lifetime issues + let opts_bytes: Option> = match opt { + Some(Ok(LhsValue::Bytes(b))) => Some(b.as_ref().to_vec()), + _ => None, }; - Some(LhsValue::Bytes(url_decode(source, options_extracted))) + let decoded = url_decode(source.as_ref(), opts_bytes.as_deref()); + Some(LhsValue::Bytes(decoded)) } (Err(Type::Bytes), _) => None, _ => unreachable!(), @@ -197,26 +198,27 @@ impl FunctionDefinition for UrlDecodeFunction { #[cfg(test)] mod tests { use super::*; + use crate::lhs_types::Bytes; fn owned_bytes(s: &str) -> LhsValue<'_> { - LhsValue::Bytes(Cow::Owned(s.as_bytes().to_vec())) + LhsValue::Bytes(Bytes::Owned(s.as_bytes().to_vec().into_boxed_slice())) } #[test] fn test_url_decode_basic() { - let mut args = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"John%20Doe")))].into_iter(); + let mut args = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"John%20Doe")))].into_iter(); assert_eq!(url_decode_impl(&mut args), Some(owned_bytes("John Doe"))); - let mut args = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"John+Doe")))].into_iter(); + let mut args = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"John+Doe")))].into_iter(); assert_eq!(url_decode_impl(&mut args), Some(owned_bytes("John Doe"))); - let mut args = vec![Ok(LhsValue::Bytes(Cow::Borrowed(b"%2520")))].into_iter(); + let mut args = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(b"%2520")))].into_iter(); // without recursive flag -> "%20" assert_eq!(url_decode_impl(&mut args), Some(owned_bytes("%20"))); let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"%2520"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"r"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"%2520"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"r"))), ] .into_iter(); assert_eq!(url_decode_impl(&mut args), Some(owned_bytes(" "))); @@ -226,13 +228,13 @@ mod tests { fn test_url_decode_unicode_u() { // %u2601 -> U+2601 (cloud) let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"%u2601"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"u"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"%u2601"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"u"))), ] .into_iter(); let res = url_decode_impl(&mut args).unwrap(); if let LhsValue::Bytes(b) = res { - assert_eq!(b.into_owned(), "☁".as_bytes()); + assert_eq!(b.into_owned(), "☁".as_bytes().to_vec().into_boxed_slice()); } else { panic!("expected bytes") } diff --git a/engine/src/functions/uuid4.rs b/engine/src/functions/uuid4.rs index ea0bb13c..83b7dfba 100644 --- a/engine/src/functions/uuid4.rs +++ b/engine/src/functions/uuid4.rs @@ -1,18 +1,19 @@ -use rand::rngs::StdRng; -use rand::{Rng, SeedableRng}; -use std::borrow::Cow; - use super::{FunctionArgKind, FunctionArgs, FunctionDefinition}; +use crate::lhs_types::Bytes; use crate::{LhsValue, Type}; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; use std::iter; /// Generates a random UUIDv4 (Universally Unique Identifier, version 4) based on the given argument (a source of randomness). /// To obtain an array of random bytes, use the cf.random_seed field. /// For example, uuidv4(cf.random_seed) will return a UUIDv4 similar to 49887398-6bcf-485f-8899-f15dbef4d1d5. #[derive(Debug, Default)] +#[allow(dead_code)] pub struct UUID4Function {} #[inline] +#[allow(dead_code)] fn uuid4_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> { let arg = args.next().expect("expected 1 argument, got 0"); @@ -63,7 +64,9 @@ fn uuid4_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> { uuid_bytes[15] ); - Some(LhsValue::Bytes(Cow::Owned(uuid_string.into_bytes()))) + Some(LhsValue::Bytes(Bytes::Owned( + uuid_string.into_bytes().into_boxed_slice(), + ))) } Err(Type::Bytes) => None, _ => unreachable!(), @@ -119,7 +122,7 @@ mod tests { fn test_uuid4_fn() { // Test with some seed bytes let seed_bytes = b"\x12\x34\x56\x78\x9a\xbc\xde\xf0"; - let mut args = vec![Ok(LhsValue::Bytes(Cow::Borrowed(seed_bytes)))].into_iter(); + let mut args = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(seed_bytes)))].into_iter(); let result = uuid4_impl(&mut args); assert!(result.is_some()); @@ -149,10 +152,10 @@ mod tests { // Test that same seed produces same UUID (deterministic) let seed_bytes = b"test_seed_12345"; - let mut args1 = vec![Ok(LhsValue::Bytes(Cow::Borrowed(seed_bytes)))].into_iter(); + let mut args1 = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(seed_bytes)))].into_iter(); let result1 = uuid4_impl(&mut args1); - let mut args2 = vec![Ok(LhsValue::Bytes(Cow::Borrowed(seed_bytes)))].into_iter(); + let mut args2 = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(seed_bytes)))].into_iter(); let result2 = uuid4_impl(&mut args2); assert_eq!(result1, result2); @@ -164,10 +167,10 @@ mod tests { let seed1 = b"seed1"; let seed2 = b"seed2"; - let mut args1 = vec![Ok(LhsValue::Bytes(Cow::Borrowed(seed1)))].into_iter(); + let mut args1 = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(seed1)))].into_iter(); let result1 = uuid4_impl(&mut args1); - let mut args2 = vec![Ok(LhsValue::Bytes(Cow::Borrowed(seed2)))].into_iter(); + let mut args2 = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(seed2)))].into_iter(); let result2 = uuid4_impl(&mut args2); assert_ne!(result1, result2); @@ -177,7 +180,7 @@ mod tests { fn test_uuid4_fn_short_seed() { // Test with a single byte seed (should work) let short_seed = b"a"; - let mut args = vec![Ok(LhsValue::Bytes(Cow::Borrowed(short_seed)))].into_iter(); + let mut args = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(short_seed)))].into_iter(); let result = uuid4_impl(&mut args); assert!(result.is_some()); @@ -195,7 +198,7 @@ mod tests { fn test_uuid4_fn_empty_bytes() { // Test with empty bytes (should return None now) let empty_bytes = b""; - let mut args = vec![Ok(LhsValue::Bytes(Cow::Borrowed(empty_bytes)))].into_iter(); + let mut args = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(empty_bytes)))].into_iter(); let result = uuid4_impl(&mut args); assert_eq!(result, None); @@ -205,7 +208,7 @@ mod tests { fn test_uuid4_fn_long_seed() { // Test with a long seed (should work with any length) let long_seed = b"this_is_a_very_long_seed_with_many_bytes_to_test_entropy_mixing"; - let mut args = vec![Ok(LhsValue::Bytes(Cow::Borrowed(long_seed)))].into_iter(); + let mut args = vec![Ok(LhsValue::Bytes(Bytes::Borrowed(long_seed)))].into_iter(); let result = uuid4_impl(&mut args); assert!(result.is_some()); @@ -237,8 +240,8 @@ mod tests { #[should_panic(expected = "expected 1 argument, got 2")] fn test_uuid4_fn_too_many_args() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"a"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"b"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"a"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"b"))), ] .into_iter(); uuid4_impl(&mut args); diff --git a/engine/src/functions/wildcard_replace.rs b/engine/src/functions/wildcard_replace.rs index 51606f89..3730a495 100644 --- a/engine/src/functions/wildcard_replace.rs +++ b/engine/src/functions/wildcard_replace.rs @@ -1,7 +1,7 @@ -use std::{borrow::Cow, iter}; - +use crate::lhs_types::Bytes; use crate::{FunctionArgs, FunctionDefinition, LhsValue, Type}; use outer_regex::bytes::Regex; +use std::iter; /// Mimics Cloudflare's `wildcard_replace` function for byte slice inputs and output. /// @@ -58,24 +58,24 @@ use outer_regex::bytes::Regex; /// * If `wildcard_pattern` is not valid UTF-8. /// * If `replacement` is not valid UTF-8. /// * If the `wildcard_pattern` results in an invalid regular expression (e.g., `**`). -/// + #[derive(Debug, Default)] pub struct WildcardReplaceFunction {} #[inline] -fn wildcard_replace<'a>( - source: Cow<'_, [u8]>, - wildcard_pattern: Cow<'_, [u8]>, - replacement: Cow<'_, [u8]>, - flags: Option>, -) -> Cow<'a, [u8]> { - let widlcard_pattern_str = std::str::from_utf8(wildcard_pattern.as_ref()) +fn wildcard_replace( + source: &[u8], + wildcard_pattern: &[u8], + replacement: &[u8], + case_sensitive: bool, +) -> Bytes<'static> { + let widlcard_pattern_str = std::str::from_utf8(wildcard_pattern) .expect("Pattern argument must be valid UTF-8 for wildcard replacement."); - let replacement_str = std::str::from_utf8(replacement.as_ref()) + let replacement_str = std::str::from_utf8(replacement) .expect("Replacement argument must be valid UTF-8 for wildcard replacement."); - let mut regex_pattern_str = String::from('^'); + let mut regex_pattern_str = String::new(); for c in widlcard_pattern_str.chars() { match c { '*' => regex_pattern_str.push_str("(.*?)"), @@ -90,21 +90,39 @@ fn wildcard_replace<'a>( } } - let final_regex_pattern = match flags { - Some(flag_bytes) => { - if flag_bytes.as_ref() == [b's'] { - regex_pattern_str - } else { - format!("(?i){}", regex_pattern_str) - } + if case_sensitive { + // For case-sensitive, use anchored pattern + let re = Regex::new(&format!("^{}$", regex_pattern_str)) + .expect("Invalid regex pattern generated."); + let replaced_bytes: Vec = re + .replace_all(source, replacement_str.as_bytes()) + .into_owned(); + Bytes::Owned(replaced_bytes.into_boxed_slice()) + } else { + // For case-insensitive, check if empty pattern first (special case) + if regex_pattern_str.is_empty() { + // Empty pattern matches at start of string only (prepend replacement) + let result = [replacement_str.as_bytes(), source].concat(); + return Bytes::Owned(result.into_boxed_slice()); } - _ => regex_pattern_str, - }; - let re = Regex::new(&final_regex_pattern).expect("Invalid regex pattern generated."); - let replaced_bytes: Cow<'_, [u8]> = re.replace_all(source.as_ref(), replacement_str.as_bytes()); + // For case-insensitive matching, we need to check if the entire source string + // matches the pattern before doing replacement + let check_pattern = format!("^(?i:{})$", regex_pattern_str); + let re_check = Regex::new(&check_pattern).expect("Invalid regex pattern generated."); - Cow::Owned(replaced_bytes.into_owned()) + // Check if entire source matches + if !re_check.is_match(source) { + return Bytes::Owned(source.to_vec().into_boxed_slice()); + } + + // For replacement with captures, use the same anchored pattern + let re = Regex::new(&check_pattern).expect("Invalid regex pattern generated."); + let replaced_bytes: Vec = re + .replace_all(source, replacement_str.as_bytes()) + .into_owned(); + Bytes::Owned(replaced_bytes.into_boxed_slice()) + } } #[inline] @@ -119,23 +137,23 @@ fn wildcard_replace_impl<'a>(args: FunctionArgs<'_, 'a>) -> Option> } match (source_arg, wildcard_pattern_arg, replacement_arg, flags_arg) { - (_, _, _, Some(Err(Type::Bytes))) => None, // needs to be tested here so it does not go into unreachable + (_, _, _, Some(Err(Type::Bytes))) => None, /* needs to be tested here so it does not go into unreachable */ ( Ok(LhsValue::Bytes(source)), Ok(LhsValue::Bytes(wildcard_pattern)), Ok(LhsValue::Bytes(replacement)), flags, ) => { - let flags_extracted = match flags { - Some(Ok(LhsValue::Bytes(flags_raw))) => Some(flags_raw), - None => None, + let case_sensitive = match flags { + Some(Ok(LhsValue::Bytes(flags_raw))) => flags_raw.as_ref() == [b's'], + None => false, _ => unreachable!(), }; Some(LhsValue::Bytes(wildcard_replace( - source, - wildcard_pattern, - replacement, - flags_extracted, + source.as_ref(), + wildcard_pattern.as_ref(), + replacement.as_ref(), + case_sensitive, ))) } (Err(Type::Bytes), _, _, _) => None, @@ -209,21 +227,21 @@ impl FunctionDefinition for WildcardReplaceFunction { #[cfg(test)] mod tests { use super::*; - use crate::Type; - use std::borrow::Cow; fn owned_bytes(s: &str) -> LhsValue<'_> { - LhsValue::Bytes(Cow::Owned(s.as_bytes().to_vec())) + LhsValue::Bytes(Bytes::Owned(s.as_bytes().to_vec().into_boxed_slice())) } #[test] fn test_wildcard_replace_for_uri() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed( + Ok(LhsValue::Bytes(Bytes::Borrowed( b"https://apps.example.com/calendar/admin?expand=true", ))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"https://*.example.com/*/*"))), - Ok(LhsValue::Bytes(Cow::Borrowed( + Ok(LhsValue::Bytes(Bytes::Borrowed( + b"https://*.example.com/*/*", + ))), + Ok(LhsValue::Bytes(Bytes::Borrowed( b"https://example.com/${1}/${2}/${3}", ))), ] @@ -236,11 +254,11 @@ mod tests { ); let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed( + Ok(LhsValue::Bytes(Bytes::Borrowed( b"https://example.com/applications/app1", ))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"/applications/*"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"/apps/${1}"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"/applications/*"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"/apps/${1}"))), ] .into_iter(); assert_eq!( @@ -249,9 +267,9 @@ mod tests { ); let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"/calendar"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"/*"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"/apps/${1}"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"/calendar"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"/*"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"/apps/${1}"))), ] .into_iter(); assert_eq!( @@ -260,10 +278,10 @@ mod tests { ); let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"/Apps/calendar"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"/apps/*"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"/${1}"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"s"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"/Apps/calendar"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"/apps/*"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"/${1}"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"s"))), ] .into_iter(); assert_eq!( @@ -272,9 +290,9 @@ mod tests { ); let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"/apps/calendar/login"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"/apps/*/login"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"/${1}/login"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"/apps/calendar/login"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"/apps/*/login"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"/${1}/login"))), ] .into_iter(); assert_eq!( @@ -286,9 +304,9 @@ mod tests { #[test] fn test_wildcard_replace_basic() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"hello world"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"w*d"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"universe"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"hello world"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"w*d"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"universe"))), ] .into_iter(); assert_eq!( @@ -300,9 +318,9 @@ mod tests { #[test] fn test_wildcard_replace_special_chars_in_pattern() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"file.txt"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"*.txt"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"document.md"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"file.txt"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"*.txt"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"document.md"))), ] .into_iter(); assert_eq!( @@ -314,9 +332,9 @@ mod tests { #[test] fn test_wildcard_replace_no_match() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"hello world"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"xyz*"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"test"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"hello world"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"xyz*"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"test"))), ] .into_iter(); assert_eq!( @@ -328,9 +346,9 @@ mod tests { #[test] fn test_wildcard_replace_empty_source() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b""))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"*"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"replaced"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b""))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"*"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"replaced"))), ] .into_iter(); assert_eq!( @@ -348,9 +366,9 @@ mod tests { // or "XaXbXcX" (if regex matches between chars). // The current code's `re.replace_all` with an empty pattern and "X" on "abc" results in "Xabc". let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"abc"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b""))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"X"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"abc"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b""))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"X"))), ] .into_iter(); assert_eq!(wildcard_replace_impl(&mut args), Some(owned_bytes("Xabc"))); @@ -359,9 +377,9 @@ mod tests { #[test] fn test_wildcard_replace_empty_replacement() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"remove this part"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b" this *"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b""))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"remove this part"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b" this *"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b""))), ] .into_iter(); assert_eq!( @@ -373,10 +391,10 @@ mod tests { #[test] fn test_wildcard_replace_with_s_flag() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"HELLO world"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"h*o"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"X"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"s"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"HELLO world"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"h*o"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"X"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"s"))), ] .into_iter(); assert_eq!( @@ -388,9 +406,9 @@ mod tests { #[test] fn test_wildcard_replace_no_flag() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"HELLO world"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"h*o"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"X"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"HELLO world"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"h*o"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"X"))), ] .into_iter(); assert_eq!( @@ -410,8 +428,8 @@ mod tests { #[should_panic(expected = "expected at least 3 args, got 2")] fn test_panic_two_args() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"a"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"b"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"a"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"b"))), ] .into_iter(); wildcard_replace_impl(&mut args); @@ -421,11 +439,11 @@ mod tests { #[should_panic(expected = "expected maximum 4 args, got 5")] fn test_panic_five_args() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"a"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"b"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"c"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"d"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"e"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"a"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"b"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"c"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"d"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"e"))), ] .into_iter(); wildcard_replace_impl(&mut args); @@ -436,25 +454,25 @@ mod tests { // Source is Err let mut args_err_source = vec![ Err(Type::Bytes), - Ok(LhsValue::Bytes(Cow::Borrowed(b"*"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"rep"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"*"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"rep"))), ] .into_iter(); assert_eq!(wildcard_replace_impl(&mut args_err_source), None); // Pattern is Err let mut args_err_pattern = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"abc"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"abc"))), Err(Type::Bytes), - Ok(LhsValue::Bytes(Cow::Borrowed(b"rep"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"rep"))), ] .into_iter(); assert_eq!(wildcard_replace_impl(&mut args_err_pattern), None); // Replacement is Err let mut args_err_replacement = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"abc"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"*"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"abc"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"*"))), Err(Type::Bytes), ] .into_iter(); @@ -462,9 +480,9 @@ mod tests { // Flags is Err let mut args_err_flags = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"abc"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"*"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"rep"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"abc"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"*"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"rep"))), Err(Type::Bytes), ] .into_iter(); @@ -475,9 +493,9 @@ mod tests { #[should_panic(expected = "Pattern argument must be valid UTF-8 for wildcard replacement.")] fn test_panic_invalid_utf8_pattern() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"source"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"\xFF\xFE"))), // Invalid UTF-8 - Ok(LhsValue::Bytes(Cow::Borrowed(b"replacement"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"source"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"\xFF\xFE"))), // Invalid UTF-8 + Ok(LhsValue::Bytes(Bytes::Borrowed(b"replacement"))), ] .into_iter(); wildcard_replace_impl(&mut args); @@ -487,9 +505,9 @@ mod tests { #[should_panic(expected = "Replacement argument must be valid UTF-8 for wildcard replacement.")] fn test_panic_invalid_utf8_replacement() { let mut args = vec![ - Ok(LhsValue::Bytes(Cow::Borrowed(b"source"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"*"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"\xFF\xFE"))), // Invalid UTF-8 + Ok(LhsValue::Bytes(Bytes::Borrowed(b"source"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"*"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"\xFF\xFE"))), // Invalid UTF-8 ] .into_iter(); wildcard_replace_impl(&mut args); @@ -500,8 +518,8 @@ mod tests { fn test_panic_incorrect_arg_type() { let mut args = vec![ Ok(LhsValue::Int(123)), // Not Bytes - Ok(LhsValue::Bytes(Cow::Borrowed(b"*"))), - Ok(LhsValue::Bytes(Cow::Borrowed(b"replacement"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"*"))), + Ok(LhsValue::Bytes(Bytes::Borrowed(b"replacement"))), ] .into_iter(); wildcard_replace_impl(&mut args); diff --git a/engine/src/lex.rs b/engine/src/lex.rs index ec5ab33a..4fcc5633 100644 --- a/engine/src/lex.rs +++ b/engine/src/lex.rs @@ -1,9 +1,7 @@ -use crate::{ - functions::{FunctionArgInvalidConstantError, FunctionArgKindMismatchError}, - rhs_types::{RegexError, WildcardError}, - scheme::{IndexAccessError, UnknownFieldError, UnknownFunctionError}, - types::{Type, TypeMismatchError}, -}; +use crate::functions::{FunctionArgInvalidConstantError, FunctionArgKindMismatchError}; +use crate::rhs_types::{RegexError, WildcardError}; +use crate::scheme::{IndexAccessError, UnknownFieldError, UnknownFunctionError}; +use crate::types::{Type, TypeMismatchError}; use cidr::errors::NetworkParseError; use std::num::ParseIntError; use thiserror::Error; diff --git a/engine/src/lhs_types/array.rs b/engine/src/lhs_types/array.rs index 384960b8..b3620f00 100644 --- a/engine/src/lhs_types/array.rs +++ b/engine/src/lhs_types/array.rs @@ -1,20 +1,15 @@ -use crate::{ - lhs_types::AsRefIterator, - types::{CompoundType, GetType, IntoValue, LhsValue, LhsValueSeed, Type, TypeMismatchError}, +use super::TypedMap; +use super::map::InnerMap; +use crate::lhs_types::AsRefIterator; +use crate::types::{ + CompoundType, GetType, IntoValue, LhsValue, LhsValueSeed, Type, TypeMismatchError, }; -use serde::{ - Serialize, Serializer, - de::{self, DeserializeSeed, Deserializer, SeqAccess, Visitor}, - ser::SerializeSeq, -}; -use std::{ - fmt, - hash::{Hash, Hasher}, - hint::unreachable_unchecked, - ops::Deref, -}; - -use super::{TypedMap, map::InnerMap}; +use serde::de::{self, DeserializeSeed, Deserializer, SeqAccess, Visitor}; +use serde::ser::SerializeSeq; +use serde::{Serialize, Serializer}; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::hint::unreachable_unchecked; // Ideally, we would want to use Cow<'a, LhsValue<'a>> here // but it doesnt work for unknown reasons @@ -32,48 +27,34 @@ impl<'a> InnerArray<'a> { } #[inline] - fn as_vec(&mut self) -> &mut Vec> { + fn len(&self) -> usize { match self { - InnerArray::Owned(vec) => vec, - InnerArray::Borrowed(slice) => { - *self = InnerArray::Owned(slice.to_vec()); - match self { - InnerArray::Owned(vec) => vec, - _ => unsafe { unreachable_unchecked() }, - } - } + InnerArray::Owned(vec) => vec.len(), + InnerArray::Borrowed(slice) => slice.len(), } } #[inline] - fn get_mut(&mut self, idx: usize) -> Option<&mut LhsValue<'a>> { - self.as_vec().get_mut(idx) - } - - #[inline] - fn push(&mut self, value: LhsValue<'a>) { - self.as_vec().push(value) + fn is_empty(&self) -> bool { + match self { + InnerArray::Owned(vec) => vec.is_empty(), + InnerArray::Borrowed(slice) => slice.is_empty(), + } } #[inline] - fn truncate(&mut self, len: usize) { + fn get(&self, idx: usize) -> Option<&LhsValue<'a>> { match self { - InnerArray::Owned(vec) => vec.truncate(len), - InnerArray::Borrowed(slice) => { - *slice = &slice[..len]; - } + Self::Owned(vec) => vec.get(idx), + Self::Borrowed(slice) => slice.get(idx), } } -} - -impl<'a> Deref for InnerArray<'a> { - type Target = [LhsValue<'a>]; #[inline] - fn deref(&self) -> &Self::Target { + fn iter(&self) -> std::slice::Iter<'_, LhsValue<'a>> { match self { - InnerArray::Owned(vec) => &vec[..], - InnerArray::Borrowed(slice) => slice, + Self::Owned(vec) => vec.iter(), + Self::Borrowed(slice) => slice.iter(), } } } @@ -84,6 +65,15 @@ impl Default for InnerArray<'_> { } } +impl Hash for InnerArray<'_> { + fn hash(&self, state: &mut H) { + match self { + InnerArray::Owned(vec) => vec.as_slice().hash(state), + InnerArray::Borrowed(slice) => slice.hash(state), + } + } +} + /// An array of [`Type`]. #[derive(Debug, Clone)] pub struct Array<'a> { @@ -148,6 +138,12 @@ impl<'a> Array<'a> { self.data.is_empty() } + /// Returns an iterator over the elements in array. + #[inline] + pub fn iter(&self) -> ArrayIter<'a, '_> { + ArrayIter(self.data.iter()) + } + pub(crate) fn extract(self, idx: usize) -> Option> { let Self { data, .. } = self; if idx >= data.len() { @@ -160,16 +156,15 @@ impl<'a> Array<'a> { } } - pub(crate) fn as_slice(&self) -> &[LhsValue<'a>] { - &self.data - } - pub(crate) fn filter_map_to(self, value_type: impl Into, func: F) -> Self where F: Fn(LhsValue<'a>) -> Option>, { - let Self { mut data, .. } = self; - let mut vec = std::mem::take(data.as_vec()); + let Self { data, .. } = self; + let mut vec = match data { + InnerArray::Owned(vec) => vec, + InnerArray::Borrowed(slice) => slice.to_vec(), + }; let val_type = value_type.into(); let mut write = 0; for read in 0..vec.len() { @@ -247,7 +242,21 @@ impl<'a> Array<'a> { impl<'a> PartialEq for Array<'a> { #[inline] fn eq(&self, other: &Array<'a>) -> bool { - self.val_type == other.val_type && self.data.deref() == other.data.deref() + if self.val_type != other.val_type { + return false; + } + + if self.data.len() != other.data.len() { + return false; + } + + for (v1, v2) in self.data.iter().zip(other.data.iter()) { + if v1 != v2 { + return false; + } + } + + true } } @@ -262,7 +271,7 @@ impl GetType for Array<'_> { impl Hash for Array<'_> { fn hash(&self, state: &mut H) { self.get_type().hash(state); - self.data.deref().hash(state); + self.data.hash(state); } } @@ -279,18 +288,18 @@ impl<'a, V: IntoValue<'a>> FromIterator for Array<'a> { } } -pub enum ArrayIterator<'a> { +pub enum ArrayIntoIter<'a> { Owned(std::vec::IntoIter>), Borrowed(AsRefIterator<'a, std::slice::Iter<'a, LhsValue<'a>>>), } -impl<'a> Iterator for ArrayIterator<'a> { +impl<'a> Iterator for ArrayIntoIter<'a> { type Item = LhsValue<'a>; fn next(&mut self) -> Option { match self { - ArrayIterator::Owned(vec_iter) => vec_iter.next(), - ArrayIterator::Borrowed(slice_iter) => slice_iter.next(), + ArrayIntoIter::Owned(vec_iter) => vec_iter.next(), + ArrayIntoIter::Borrowed(slice_iter) => slice_iter.next(), } } @@ -299,31 +308,57 @@ impl<'a> Iterator for ArrayIterator<'a> { } } -impl ExactSizeIterator for ArrayIterator<'_> { +impl ExactSizeIterator for ArrayIntoIter<'_> { fn len(&self) -> usize { match self { - ArrayIterator::Owned(vec_iter) => vec_iter.len(), - ArrayIterator::Borrowed(slice_iter) => slice_iter.len(), + ArrayIntoIter::Owned(vec_iter) => vec_iter.len(), + ArrayIntoIter::Borrowed(slice_iter) => slice_iter.len(), } } } impl<'a> IntoIterator for Array<'a> { + type IntoIter = ArrayIntoIter<'a>; type Item = LhsValue<'a>; - type IntoIter = ArrayIterator<'a>; + fn into_iter(self) -> Self::IntoIter { match self.data { - InnerArray::Owned(vec) => ArrayIterator::Owned(vec.into_iter()), - InnerArray::Borrowed(slice) => ArrayIterator::Borrowed(AsRefIterator(slice.iter())), + InnerArray::Owned(vec) => ArrayIntoIter::Owned(vec.into_iter()), + InnerArray::Borrowed(slice) => ArrayIntoIter::Borrowed(AsRefIterator(slice.iter())), } } } +pub struct ArrayIter<'a, 'b>(std::slice::Iter<'b, LhsValue<'a>>); + +impl<'a, 'b> Iterator for ArrayIter<'a, 'b> { + type Item = &'b LhsValue<'a>; + + #[inline] + fn next(&mut self) -> Option { + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.len(), Some(self.len())) + } +} + +impl ExactSizeIterator for ArrayIter<'_, '_> { + #[inline] + fn len(&self) -> usize { + self.0.len() + } +} + impl<'a, 'b> IntoIterator for &'b Array<'a> { + type IntoIter = ArrayIter<'a, 'b>; type Item = &'b LhsValue<'a>; - type IntoIter = std::slice::Iter<'b, LhsValue<'a>>; + + #[inline] fn into_iter(self) -> Self::IntoIter { - self.data.iter() + self.iter() } } @@ -361,7 +396,10 @@ impl<'de> DeserializeSeed<'de> for &mut Array<'de> { A: SeqAccess<'de>, { let value_type = self.0.value_type(); - let vec = self.0.data.as_vec(); + let mut vec = match &mut self.0.data { + InnerArray::Owned(vec) => std::mem::take(vec), + InnerArray::Borrowed(slice) => slice.to_vec(), + }; while let Some(elem) = seq.next_element_seed(LhsValueSeed(&value_type))? { let elem_type = elem.get_type(); if value_type != elem_type { @@ -371,6 +409,7 @@ impl<'de> DeserializeSeed<'de> for &mut Array<'de> { } vec.push(elem); } + self.0.data = InnerArray::Owned(vec); Ok(()) } } @@ -396,16 +435,32 @@ impl<'a, V: IntoValue<'a>> TypedArray<'a, V> { pub const fn new() -> Self { const { Self { - array: InnerArray::new(), + array: InnerArray::Owned(Vec::new()), _marker: std::marker::PhantomData, } } } + #[inline] + fn as_vec_ref(&self) -> &Vec> { + match &self.array { + InnerArray::Owned(vec) => vec, + InnerArray::Borrowed(_) => unreachable!(), + } + } + + #[inline] + fn as_vec_mut(&mut self) -> &mut Vec> { + match &mut self.array { + InnerArray::Owned(vec) => vec, + InnerArray::Borrowed(_) => unreachable!(), + } + } + /// Push an element to the back of the array #[inline] pub fn push(&mut self, value: V) { - self.array.push(value.into_value()) + self.as_vec_mut().push(value.into_value()) } /// Returns the number of elements in the array @@ -423,14 +478,14 @@ impl<'a, V: IntoValue<'a>> TypedArray<'a, V> { /// Shortens the array, keeping the first `len` elements and dropping the rest. #[inline] pub fn truncate(&mut self, len: usize) { - self.array.truncate(len); + self.as_vec_mut().truncate(len); } /// Converts the strongly typed array into a borrowed loosely typed array. pub fn as_array(&'a self) -> Array<'a> { Array { val_type: V::TYPE.into(), - data: InnerArray::Borrowed(self.array.deref()), + data: InnerArray::Borrowed(self.as_vec_ref()), } } } @@ -438,7 +493,7 @@ impl<'a, V: IntoValue<'a>> TypedArray<'a, V> { impl TypedArray<'static, bool> { #[inline] pub(crate) fn iter(&self) -> impl ExactSizeIterator + '_ { - self.array.iter().map(|value| match value { + self.as_vec_ref().iter().map(|value| match value { LhsValue::Bool(b) => b, _ => unsafe { unreachable_unchecked() }, }) @@ -446,7 +501,7 @@ impl TypedArray<'static, bool> { #[inline] pub(crate) fn iter_mut(&mut self) -> impl ExactSizeIterator + '_ { - self.array.as_vec().iter_mut().map(|value| match value { + self.as_vec_mut().iter_mut().map(|value| match value { LhsValue::Bool(b) => b, _ => unsafe { unreachable_unchecked() }, }) @@ -461,7 +516,7 @@ impl<'a, V: IntoValue<'a>> fmt::Debug for TypedArray<'a, V> { impl<'a, V: IntoValue<'a>> PartialEq for TypedArray<'a, V> { fn eq(&self, other: &Self) -> bool { - self.array.deref() == other.array.deref() + self.as_vec_ref() == other.as_vec_ref() } } @@ -496,8 +551,7 @@ impl<'a, V: IntoValue<'a>> Default for TypedArray<'a, V> { impl<'a, V: IntoValue<'a>> Extend for TypedArray<'a, V> { #[inline] fn extend>(&mut self, iter: T) { - self.array - .as_vec() + self.as_vec_mut() .extend(iter.into_iter().map(IntoValue::into_value)) } } @@ -526,7 +580,7 @@ impl<'a, V: IntoValue<'a>> IntoValue<'a> for TypedArray<'a, V> { impl<'a, V: IntoValue<'a>> TypedArray<'a, TypedArray<'a, V>> { /// Returns a reference to an element or None if the index is out of bounds. pub fn get(&self, index: usize) -> Option<&TypedArray<'a, V>> { - self.array.get(index).map(|val| match val { + self.as_vec_ref().get(index).map(|val| match val { LhsValue::Array(array) => { // Safety: this is safe because `TypedArray` is a repr(transparent) // newtype over `InnerArray`. @@ -538,7 +592,7 @@ impl<'a, V: IntoValue<'a>> TypedArray<'a, TypedArray<'a, V>> { /// Returns a mutable reference to an element or None if the index is out of bounds. pub fn get_mut(&mut self, index: usize) -> Option<&mut TypedArray<'a, V>> { - self.array.get_mut(index).map(|val| match val { + self.as_vec_mut().get_mut(index).map(|val| match val { LhsValue::Array(array) => { // Safety: this is safe because `TypedArray` is a repr(transparent) // newtype over `InnerArray`. @@ -556,7 +610,7 @@ impl<'a, V: IntoValue<'a>> TypedArray<'a, TypedArray<'a, V>> { impl<'a, V: IntoValue<'a>> TypedArray<'a, TypedMap<'a, V>> { /// Returns a reference to an element or None if the index is out of bounds. pub fn get(&self, index: usize) -> Option<&TypedMap<'a, V>> { - self.array.get(index).map(|val| match val { + self.as_vec_ref().get(index).map(|val| match val { LhsValue::Map(map) => { // Safety: this is safe because `TypedMap` is a repr(transparent) // newtype over `InnerMap`. @@ -568,7 +622,7 @@ impl<'a, V: IntoValue<'a>> TypedArray<'a, TypedMap<'a, V>> { /// Returns a mutable reference to an element or None if the index is out of bounds. pub fn get_mut(&mut self, index: usize) -> Option<&mut TypedMap<'a, V>> { - self.array.get_mut(index).map(|val| match val { + self.as_vec_mut().get_mut(index).map(|val| match val { LhsValue::Map(map) => { // Safety: this is safe because `TypedMap` is a repr(transparent) // newtype over `InnerMap`. diff --git a/engine/src/lhs_types/bytes.rs b/engine/src/lhs_types/bytes.rs new file mode 100644 index 00000000..59ca8fc8 --- /dev/null +++ b/engine/src/lhs_types/bytes.rs @@ -0,0 +1,363 @@ +use serde::de::Visitor; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::borrow::{Borrow, Cow}; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; + +/// A byte string. +#[derive(Debug, Clone)] +pub enum Bytes<'a> { + /// Borrowed byte string. + Borrowed(&'a [u8]), + /// Owned byte string. + Owned(Box<[u8]>), +} + +impl<'a> Bytes<'a> { + /// Clones self into a fully owned byte string. + #[inline] + pub fn to_owned(&self) -> Bytes<'static> { + match self { + Self::Borrowed(b) => Bytes::Owned(Box::from(*b)), + Self::Owned(b) => Bytes::Owned(b.clone()), + } + } + + /// Converts self into a fully owned byte string. + #[inline] + pub fn into_owned(self) -> Box<[u8]> { + match self { + Self::Borrowed(b) => Box::from(b), + Self::Owned(b) => b, + } + } + + /// Converts self into an owned byte string if necessary + /// and returns a mutable reference to the bytes. + #[inline] + pub fn to_mut(&mut self) -> &mut [u8] { + if let Self::Borrowed(b) = self { + *self = Self::Owned(Box::from(*b)); + } + match self { + Self::Owned(b) => b, + Self::Borrowed(_) => unreachable!(), + } + } + + /// Shortens the byte string, keeping only the first `len` elements. + #[inline] + pub fn truncate(&mut self, len: usize) { + match self { + Self::Borrowed(slice) => { + *slice = &slice[..len]; + } + Self::Owned(data) => { + let mut vec = Vec::from(std::mem::take(data)); + vec.truncate(len); + *data = Box::from(vec); + } + } + } +} + +impl Deref for Bytes<'_> { + type Target = [u8]; + + #[inline] + fn deref(&self) -> &Self::Target { + match self { + Self::Borrowed(b) => b, + Self::Owned(b) => b, + } + } +} + +impl AsRef<[u8]> for Bytes<'_> { + #[inline] + fn as_ref(&self) -> &[u8] { + match self { + Self::Borrowed(b) => b, + Self::Owned(b) => b, + } + } +} + +impl Borrow<[u8]> for Bytes<'_> { + #[inline] + fn borrow(&self) -> &[u8] { + match self { + Self::Borrowed(b) => b, + Self::Owned(b) => b, + } + } +} + +impl<'a> From<&'a [u8]> for Bytes<'a> { + #[inline] + fn from(value: &'a [u8]) -> Self { + Bytes::Borrowed(value) + } +} + +impl<'a, const N: usize> From<&'a [u8; N]> for Bytes<'a> { + #[inline] + fn from(value: &'a [u8; N]) -> Self { + Bytes::Borrowed(value) + } +} + +impl From> for Bytes<'static> { + #[inline] + fn from(value: Box<[u8]>) -> Self { + Bytes::Owned(value) + } +} + +impl From> for Bytes<'static> { + #[inline] + fn from(value: Vec) -> Self { + Bytes::Owned(value.into_boxed_slice()) + } +} + +impl<'a> From> for Bytes<'a> { + #[inline] + fn from(value: Cow<'a, [u8]>) -> Self { + match value { + Cow::Borrowed(b) => Self::Borrowed(b), + Cow::Owned(b) => Self::Owned(b.into_boxed_slice()), + } + } +} + +impl<'a> From<&'a str> for Bytes<'a> { + #[inline] + fn from(value: &'a str) -> Self { + Bytes::Borrowed(value.as_bytes()) + } +} + +impl From> for Bytes<'static> { + #[inline] + fn from(value: Box) -> Self { + Bytes::Owned(value.into_boxed_bytes()) + } +} + +impl<'a> From<&'a Box> for Bytes<'a> { + #[inline] + fn from(value: &'a Box) -> Self { + Bytes::Borrowed(value.as_bytes()) + } +} + +impl From for Bytes<'static> { + #[inline] + fn from(value: String) -> Self { + // Call into_boxed_str in order to reduce memory usage + Bytes::Owned(value.into_boxed_str().into_boxed_bytes()) + } +} + +impl<'a> From<&'a String> for Bytes<'a> { + #[inline] + fn from(value: &'a String) -> Self { + Bytes::Borrowed(value.as_bytes()) + } +} + +impl<'a> From> for Bytes<'a> { + #[inline] + fn from(value: Cow<'a, str>) -> Self { + match value { + Cow::Borrowed(b) => Self::Borrowed(b.as_bytes()), + Cow::Owned(b) => Self::Owned(b.into_boxed_str().into_boxed_bytes()), + } + } +} + +impl PartialEq for Bytes<'_> { + #[inline] + fn eq(&self, other: &Self) -> bool { + **self == **other + } +} + +impl PartialEq<[u8]> for Bytes<'_> { + #[inline] + fn eq(&self, other: &[u8]) -> bool { + &**self == other + } +} + +impl PartialEq<&[u8]> for Bytes<'_> { + #[inline] + fn eq(&self, other: &&[u8]) -> bool { + &**self == *other + } +} + +impl PartialEq<[u8; N]> for Bytes<'_> { + #[inline] + fn eq(&self, other: &[u8; N]) -> bool { + **self == *other + } +} + +impl PartialEq<&[u8; N]> for Bytes<'_> { + #[inline] + fn eq(&self, other: &&[u8; N]) -> bool { + &**self == *other + } +} + +impl PartialEq> for Bytes<'_> { + #[inline] + fn eq(&self, other: &Vec) -> bool { + &**self == other + } +} + +impl PartialEq for Bytes<'_> { + #[inline] + fn eq(&self, other: &str) -> bool { + &**self == other.as_bytes() + } +} + +impl PartialEq<&str> for Bytes<'_> { + #[inline] + fn eq(&self, other: &&str) -> bool { + &**self == other.as_bytes() + } +} + +impl PartialEq for Bytes<'_> { + #[inline] + fn eq(&self, other: &String) -> bool { + &**self == other.as_bytes() + } +} + +impl Eq for Bytes<'_> {} + +impl Hash for Bytes<'_> { + #[inline] + fn hash(&self, h: &mut H) { + self.deref().hash(h); + } +} + +impl Serialize for Bytes<'_> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + if let Ok(s) = simdutf8::basic::from_utf8(self) { + serializer.serialize_str(s) + } else { + serializer.serialize_bytes(self) + } + } +} + +struct BytesVisitor; + +impl<'de> Visitor<'de> for BytesVisitor { + type Value = Bytes<'de>; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("a byte string") + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: serde::de::Error, + { + Ok(Bytes::from(v.to_vec())) + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: serde::de::Error, + { + Ok(Bytes::from(v)) + } + + fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result + where + E: serde::de::Error, + { + Ok(Bytes::Borrowed(v)) + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + Ok(Bytes::from(v.as_bytes().to_vec())) + } + + fn visit_string(self, v: String) -> Result + where + E: serde::de::Error, + { + Ok(Bytes::from(v)) + } + + fn visit_borrowed_str(self, v: &'de str) -> Result + where + E: serde::de::Error, + { + Ok(Bytes::Borrowed(v.as_bytes())) + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let mut vec = Vec::::with_capacity(seq.size_hint().unwrap_or_default()); + while let Some(val) = seq.next_element()? { + vec.push(val); + } + + Ok(Bytes::from(vec)) + } +} + +impl<'de> Deserialize<'de> for Bytes<'de> { + fn deserialize(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_bytes(BytesVisitor) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bytes_deserialize() { + let bytes = serde_json::from_str::>("\"a JSON string with unicode ❤\"").unwrap(); + assert_eq!( + bytes, + Bytes::from(&b"a JSON string with unicode \xE2\x9D\xA4"[..]) + ); + + let bytes = + serde_json::from_str::>("\"a JSON string with escaped-unicode \\u2764\"") + .unwrap(); + assert_eq!( + bytes, + Bytes::from(&b"a JSON string with escaped-unicode \xE2\x9D\xA4"[..]) + ); + + let bytes = + serde_json::from_str::>("[97, 32, 74, 83, 79, 78, 32, 115, 116, 114, 105, 110, 103, 32, 102, 114, 111, 109, 32, 105, 110, 116, 101, 103, 101, 114, 32, 97, 114, 114, 97, 121]") + .unwrap(); + assert_eq!(bytes, Bytes::from(&b"a JSON string from integer array"[..])); + } +} diff --git a/engine/src/lhs_types/map.rs b/engine/src/lhs_types/map.rs index 8850b229..bbe48e7b 100644 --- a/engine/src/lhs_types/map.rs +++ b/engine/src/lhs_types/map.rs @@ -1,23 +1,15 @@ -use crate::{ - TypeMismatchError, - lhs_types::AsRefIterator, - types::{BytesOrString, CompoundType, GetType, IntoValue, LhsValue, LhsValueSeed, Type}, -}; -use serde::{ - Serialize, Serializer, - de::{self, DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}, - ser::{SerializeMap, SerializeSeq}, -}; -use std::{ - borrow::Cow, - collections::BTreeMap, - fmt, - hash::{Hash, Hasher}, - hint::unreachable_unchecked, - ops::Deref, -}; - -use super::{TypedArray, array::InnerArray}; +use super::TypedArray; +use super::array::InnerArray; +use crate::TypeMismatchError; +use crate::lhs_types::{AsRefIterator, Bytes}; +use crate::types::{CompoundType, GetType, IntoValue, LhsValue, LhsValueSeed, Type}; +use serde::de::{self, DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}; +use serde::ser::{SerializeMap, SerializeSeq}; +use serde::{Serialize, Serializer}; +use std::borrow::Cow; +use std::collections::BTreeMap; +use std::fmt; +use std::hash::{Hash, Hasher}; #[derive(Debug, Clone)] pub(crate) enum InnerMap<'a> { @@ -32,43 +24,34 @@ impl<'a> InnerMap<'a> { } #[inline] - fn as_map(&mut self) -> &mut BTreeMap, LhsValue<'a>> { + fn len(&self) -> usize { match self { - InnerMap::Owned(map) => map, - InnerMap::Borrowed(map) => { - *self = InnerMap::Owned(map.clone()); - match self { - InnerMap::Owned(map) => map, - _ => unsafe { unreachable_unchecked() }, - } - } + Self::Owned(map) => map.len(), + Self::Borrowed(map) => map.len(), } } #[inline] - fn get_mut(&mut self, key: &[u8]) -> Option<&mut LhsValue<'a>> { - self.as_map().get_mut(key) - } - - #[inline] - fn insert(&mut self, key: Box<[u8]>, value: LhsValue<'a>) { - self.as_map().insert(key, value); + fn is_empty(&self) -> bool { + match self { + Self::Owned(map) => map.is_empty(), + Self::Borrowed(map) => map.is_empty(), + } } #[inline] - fn get_or_insert(&mut self, key: Box<[u8]>, value: LhsValue<'a>) -> &mut LhsValue<'a> { - self.as_map().entry(key).or_insert(value) + fn get>(&self, key: K) -> Option<&LhsValue<'a>> { + match self { + Self::Owned(map) => map.get(key.as_ref()), + Self::Borrowed(map) => map.get(key.as_ref()), + } } -} - -impl<'a> Deref for InnerMap<'a> { - type Target = BTreeMap, LhsValue<'a>>; #[inline] - fn deref(&self) -> &Self::Target { + fn iter(&self) -> MapIter<'a, '_> { match self { - InnerMap::Owned(map) => map, - InnerMap::Borrowed(ref_map) => ref_map, + Self::Owned(map) => MapIter(map.iter()), + Self::Borrowed(map) => MapIter(map.iter()), } } } @@ -79,6 +62,16 @@ impl Default for InnerMap<'_> { } } +impl Hash for InnerMap<'_> { + #[inline] + fn hash(&self, state: &mut H) { + match self { + Self::Owned(map) => map.hash(state), + Self::Borrowed(map) => map.hash(state), + } + } +} + /// A map of string to [`Type`]. #[derive(Debug, Clone)] pub struct Map<'a> { @@ -130,22 +123,25 @@ impl<'a> Map<'a> { } /// Returns the type of the contained values. + #[inline] pub fn value_type(&self) -> Type { self.val_type.into() } /// Returns the number of elements in the map + #[inline] pub fn len(&self) -> usize { self.data.len() } /// Returns true if the map contains no elements. + #[inline] pub fn is_empty(&self) -> bool { self.data.is_empty() } /// Convert current map into an iterator over contained values - pub fn values_into_iter(self) -> MapValuesIntoIter<'a> { + pub fn into_values(self) -> MapValuesIntoIter<'a> { let Map { data, .. } = self; match data { InnerMap::Owned(map) => MapValuesIntoIter::Owned(map.into_iter()), @@ -166,7 +162,7 @@ impl<'a> Map<'a> { /// Creates an iterator visiting all key-value pairs in arbitrary order. #[inline] pub fn iter(&self) -> MapIter<'a, '_> { - MapIter(self.data.iter()) + self.data.iter() } /// Creates a new map from the specified iterator. @@ -201,7 +197,21 @@ impl<'a> Map<'a> { impl<'a> PartialEq for Map<'a> { #[inline] fn eq(&self, other: &Map<'a>) -> bool { - self.val_type == other.val_type && self.data.deref() == other.data.deref() + if self.val_type != other.val_type { + return false; + } + + if self.data.len() != other.data.len() { + return false; + } + + for (k, v) in self.data.iter() { + if other.data.get(k) != Some(v) { + return false; + } + } + + true } } @@ -217,7 +227,7 @@ impl GetType for Map<'_> { impl Hash for Map<'_> { fn hash(&self, state: &mut H) { self.get_type().hash(state); - self.data.deref().hash(state); + self.data.hash(state); } } @@ -268,6 +278,7 @@ impl<'a> Iterator for MapValuesIntoIter<'a> { } impl ExactSizeIterator for MapValuesIntoIter<'_> { + #[inline] fn len(&self) -> usize { match self { MapValuesIntoIter::Owned(iter) => iter.len(), @@ -276,24 +287,47 @@ impl ExactSizeIterator for MapValuesIntoIter<'_> { } } +enum MapIntoIterImpl<'a> { + Owned(std::collections::btree_map::IntoIter, LhsValue<'a>>), + Borrowed(std::collections::btree_map::Iter<'a, Box<[u8]>, LhsValue<'a>>), +} + +pub struct MapIntoIter<'a>(MapIntoIterImpl<'a>); + +impl<'a> Iterator for MapIntoIter<'a> { + type Item = (Cow<'a, [u8]>, LhsValue<'a>); + + fn next(&mut self) -> Option { + match self { + MapIntoIter(MapIntoIterImpl::Owned(iter)) => { + iter.next().map(|(k, v)| (Vec::from(k).into(), v)) + } + MapIntoIter(MapIntoIterImpl::Borrowed(iter)) => { + iter.next().map(|(k, v)| ((&**k).into(), v.as_ref())) + } + } + } +} + impl<'a> IntoIterator for Map<'a> { - type Item = (Box<[u8]>, LhsValue<'a>); - type IntoIter = std::collections::btree_map::IntoIter, LhsValue<'a>>; + type IntoIter = MapIntoIter<'a>; + type Item = (Cow<'a, [u8]>, LhsValue<'a>); + fn into_iter(self) -> Self::IntoIter { match self.data { - InnerMap::Owned(map) => map.into_iter(), - InnerMap::Borrowed(ref_map) => ref_map.clone().into_iter(), + InnerMap::Owned(map) => MapIntoIter(MapIntoIterImpl::Owned(map.into_iter())), + InnerMap::Borrowed(map) => MapIntoIter(MapIntoIterImpl::Borrowed(map.iter())), } } } impl<'a, 'b> IntoIterator for &'b Map<'a> { - type Item = (&'b [u8], &'b LhsValue<'a>); type IntoIter = MapIter<'a, 'b>; + type Item = (&'b [u8], &'b LhsValue<'a>); #[inline] fn into_iter(self) -> Self::IntoIter { - MapIter(self.data.deref().iter()) + self.data.iter() } } @@ -302,25 +336,28 @@ impl Serialize for Map<'_> { where S: Serializer, { - let to_map = self.data.keys().all(|key| std::str::from_utf8(key).is_ok()); + let to_map = self + .data + .iter() + .all(|(key, _)| simdutf8::basic::from_utf8(key).is_ok()); if to_map { let mut map = serializer.serialize_map(Some(self.len()))?; for (k, v) in self.data.iter() { - map.serialize_entry(std::str::from_utf8(k).unwrap(), v)?; + map.serialize_entry(simdutf8::basic::from_utf8(k).unwrap(), v)?; } map.end() } else { // Keys have to be sorted in order to have reproducible output let mut keys = Vec::new(); - for key in self.data.keys() { + for (key, _) in self.data.iter() { keys.push(key) } keys.sort(); let mut seq = serializer.serialize_seq(Some(self.len()))?; for key in keys { seq.serialize_element(&[ - &LhsValue::Bytes((&**key).into()), + &LhsValue::Bytes(key.into()), self.data.get(key).unwrap(), ])?; } @@ -332,7 +369,7 @@ impl Serialize for Map<'_> { struct MapEntrySeed<'a>(&'a Type); impl<'de> DeserializeSeed<'de> for MapEntrySeed<'_> { - type Value = (Cow<'de, [u8]>, LhsValue<'de>); + type Value = (Bytes<'de>, LhsValue<'de>); fn deserialize(self, deserializer: D) -> Result where @@ -341,7 +378,7 @@ impl<'de> DeserializeSeed<'de> for MapEntrySeed<'_> { struct MapEntryVisitor<'a>(&'a Type); impl<'de> Visitor<'de> for MapEntryVisitor<'_> { - type Value = (Cow<'de, [u8]>, LhsValue<'de>); + type Value = (Bytes<'de>, LhsValue<'de>); fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { write!(formatter, "a [key, lhs value] pair") @@ -352,12 +389,12 @@ impl<'de> DeserializeSeed<'de> for MapEntrySeed<'_> { V: SeqAccess<'de>, { let key = seq - .next_element::>()? + .next_element::>()? .ok_or_else(|| de::Error::invalid_length(0, &self))?; let value = seq .next_element_seed(LhsValueSeed(self.0))? .ok_or_else(|| de::Error::invalid_length(1, &self))?; - Ok((key.into_bytes(), value)) + Ok((key, value)) } } @@ -389,6 +426,17 @@ impl<'de> DeserializeSeed<'de> for &mut Map<'de> { M: MapAccess<'de>, { let value_type = self.0.value_type(); + let map = match &mut self.0.data { + InnerMap::Owned(map) => map, + InnerMap::Borrowed(map) => { + let map = map.clone(); + self.0.data = InnerMap::Owned(map); + match &mut self.0.data { + InnerMap::Owned(map) => map, + InnerMap::Borrowed(_) => unreachable!(), + } + } + }; while let Some(key) = access.next_key::>()? { let value = access.next_value_seed(LhsValueSeed(&value_type))?; if value.get_type() != value_type { @@ -398,9 +446,7 @@ impl<'de> DeserializeSeed<'de> for &mut Map<'de> { value_type ))); } - self.0 - .data - .insert(key.into_owned().into_bytes().into(), value); + map.insert(key.into_owned().into_bytes().into(), value); } Ok(()) @@ -411,6 +457,17 @@ impl<'de> DeserializeSeed<'de> for &mut Map<'de> { V: SeqAccess<'de>, { let value_type = self.0.value_type(); + let map = match &mut self.0.data { + InnerMap::Owned(map) => map, + InnerMap::Borrowed(map) => { + let map = map.clone(); + self.0.data = InnerMap::Owned(map); + match &mut self.0.data { + InnerMap::Owned(map) => map, + InnerMap::Borrowed(_) => unreachable!(), + } + } + }; while let Some((key, value)) = seq.next_element_seed(MapEntrySeed(&value_type))? { if value.get_type() != value_type { return Err(de::Error::custom(format!( @@ -419,7 +476,7 @@ impl<'de> DeserializeSeed<'de> for &mut Map<'de> { value_type ))); } - self.0.data.insert(key.into_owned().into(), value); + map.insert(key.into_owned(), value); } Ok(()) } @@ -446,48 +503,64 @@ impl<'a, V: IntoValue<'a>> TypedMap<'a, V> { pub const fn new() -> Self { const { Self { - map: InnerMap::new(), + map: InnerMap::Owned(BTreeMap::new()), _marker: std::marker::PhantomData, } } } + #[inline] + fn as_map_ref(&self) -> &BTreeMap, LhsValue<'a>> { + match &self.map { + InnerMap::Owned(map) => map, + InnerMap::Borrowed(_) => unreachable!(), + } + } + + #[inline] + fn as_map_mut(&mut self) -> &mut BTreeMap, LhsValue<'a>> { + match &mut self.map { + InnerMap::Owned(map) => map, + InnerMap::Borrowed(_) => unreachable!(), + } + } + /// Push an element to the back of the map #[inline] pub fn insert(&mut self, key: Box<[u8]>, value: V) { - self.map.insert(key, value.into_value()) + self.as_map_mut().insert(key, value.into_value()); } /// Returns the number of elements in the array #[inline] pub fn len(&self) -> usize { - self.map.len() + self.as_map_ref().len() } /// Returns true if the array contains no elements. #[inline] pub fn is_empty(&self) -> bool { - self.map.is_empty() + self.as_map_ref().is_empty() } /// Converts the strongly typed map into a borrowed loosely typed map. pub fn as_map(&'a self) -> Map<'a> { Map { val_type: V::TYPE.into(), - data: InnerMap::Borrowed(self.map.deref()), + data: InnerMap::Borrowed(self.as_map_ref()), } } } impl<'a, V: IntoValue<'a>> fmt::Debug for TypedMap<'a, V> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt.debug_map().entries(self.map.iter()).finish() + fmt.debug_map().entries(self.as_map_ref().iter()).finish() } } impl<'a, V: IntoValue<'a>> PartialEq for TypedMap<'a, V> { fn eq(&self, other: &Self) -> bool { - self.map.deref() == other.map.deref() + self.as_map_ref() == other.as_map_ref() } } @@ -498,7 +571,7 @@ impl<'a, 'k, V: Copy + IntoValue<'a>, S: AsRef<[(&'k [u8], V)]>> PartialEq fo .iter() .copied() .map(|(k, v)| (k, v.into_value())) - .eq(self.map.iter().map(|(k, v)| (&**k, v.as_ref()))) + .eq(self.as_map_ref().iter().map(|(k, v)| (&**k, v.as_ref()))) } } @@ -522,8 +595,7 @@ impl<'a, V: IntoValue<'a>> Default for TypedMap<'a, V> { impl<'a, V: IntoValue<'a>> Extend<(Box<[u8]>, V)> for TypedMap<'a, V> { #[inline] fn extend, V)>>(&mut self, iter: T) { - self.map - .as_map() + self.as_map_mut() .extend(iter.into_iter().map(|(k, v)| (k, v.into_value()))) } } @@ -552,7 +624,7 @@ impl<'a, V: IntoValue<'a>> IntoValue<'a> for TypedMap<'a, V> { impl<'a, V: IntoValue<'a>> TypedMap<'a, TypedMap<'a, V>> { /// Returns a reference to the value corresponding to the key. pub fn get>(&self, key: K) -> Option<&TypedMap<'a, V>> { - self.map.get(key.as_ref()).map(|val| match val { + self.as_map_ref().get(key.as_ref()).map(|val| match val { LhsValue::Map(map) => { // Safety: this is safe because `TypedMap` is a repr(transparent) // newtype over `InnerMap`. @@ -564,16 +636,20 @@ impl<'a, V: IntoValue<'a>> TypedMap<'a, TypedMap<'a, V>> { /// Returns a mutable reference to the value corresponding to the key. pub fn get_mut>(&mut self, key: K) -> Option<&mut TypedMap<'a, V>> { - self.map.get_mut(key.as_ref()).map(|val| match val { - LhsValue::Map(map) => { - // Safety: this is safe because `TypedMap` is a repr(transparent) - // newtype over `InnerMap`. - unsafe { - std::mem::transmute::<&mut InnerMap<'a>, &mut TypedMap<'a, V>>(&mut map.data) + self.as_map_mut() + .get_mut(key.as_ref()) + .map(|val| match val { + LhsValue::Map(map) => { + // Safety: this is safe because `TypedMap` is a repr(transparent) + // newtype over `InnerMap`. + unsafe { + std::mem::transmute::<&mut InnerMap<'a>, &mut TypedMap<'a, V>>( + &mut map.data, + ) + } } - } - _ => unreachable!(), - }) + _ => unreachable!(), + }) } /// Returns a mutable reference to the value coressponding to the key or insert a new one. @@ -582,7 +658,7 @@ impl<'a, V: IntoValue<'a>> TypedMap<'a, TypedMap<'a, V>> { key: Box<[u8]>, value: TypedMap<'a, V>, ) -> &mut TypedMap<'a, V> { - match self.map.get_or_insert(key, value.into_value()) { + match self.as_map_mut().entry(key).or_insert(value.into_value()) { LhsValue::Map(map) => { // Safety: this is safe because `TypedMap` is a repr(transparent) // newtype over `InnerMap`. @@ -598,7 +674,7 @@ impl<'a, V: IntoValue<'a>> TypedMap<'a, TypedMap<'a, V>> { impl<'a, V: IntoValue<'a>> TypedMap<'a, TypedArray<'a, V>> { /// Returns a reference to the value corresponding to the key. pub fn get>(&self, key: K) -> Option<&TypedArray<'a, V>> { - self.map.get(key.as_ref()).map(|val| match val { + self.as_map_ref().get(key.as_ref()).map(|val| match val { LhsValue::Array(array) => { // Safety: this is safe because `TypedArray` is a repr(transparent) // newtype over `InnerArray`. @@ -610,18 +686,20 @@ impl<'a, V: IntoValue<'a>> TypedMap<'a, TypedArray<'a, V>> { /// Returns a mutable reference to the value corresponding to the key. pub fn get_mut>(&mut self, key: K) -> Option<&mut TypedArray<'a, V>> { - self.map.get_mut(key.as_ref()).map(|val| match val { - LhsValue::Array(array) => { - // Safety: this is safe because `TypedArray` is a repr(transparent) - // newtype over `InnerArray`. - unsafe { - std::mem::transmute::<&mut InnerArray<'a>, &mut TypedArray<'a, V>>( - &mut array.data, - ) + self.as_map_mut() + .get_mut(key.as_ref()) + .map(|val| match val { + LhsValue::Array(array) => { + // Safety: this is safe because `TypedArray` is a repr(transparent) + // newtype over `InnerArray`. + unsafe { + std::mem::transmute::<&mut InnerArray<'a>, &mut TypedArray<'a, V>>( + &mut array.data, + ) + } } - } - _ => unreachable!(), - }) + _ => unreachable!(), + }) } /// Returns a mutable reference to the value coressponding to the key or insert a new one. @@ -630,7 +708,7 @@ impl<'a, V: IntoValue<'a>> TypedMap<'a, TypedArray<'a, V>> { key: Box<[u8]>, value: TypedArray<'a, V>, ) -> &mut TypedArray<'a, V> { - match self.map.get_or_insert(key, value.into_value()) { + match self.as_map_mut().entry(key).or_insert(value.into_value()) { LhsValue::Array(array) => { // Safety: this is safe because `TypedArray` is a repr(transparent) // newtype over `InnerArray`. diff --git a/engine/src/lhs_types/mod.rs b/engine/src/lhs_types/mod.rs index 93172b41..2f4c6c3b 100644 --- a/engine/src/lhs_types/mod.rs +++ b/engine/src/lhs_types/mod.rs @@ -1,13 +1,12 @@ mod array; +mod bytes; mod map; +pub use self::array::{Array, ArrayIntoIter, ArrayIter, TypedArray}; +pub use self::bytes::Bytes; +pub use self::map::{Map, MapIter, MapValuesIntoIter, TypedMap}; use crate::types::LhsValue; -pub use self::{ - array::{Array, ArrayIterator, TypedArray}, - map::{Map, MapIter, MapValuesIntoIter, TypedMap}, -}; - pub struct AsRefIterator<'a, T: Iterator>>(T); impl<'a, T: Iterator>> AsRefIterator<'a, T> { diff --git a/engine/src/lib.rs b/engine/src/lib.rs index 5708c001..5a39d66b 100644 --- a/engine/src/lib.rs +++ b/engine/src/lib.rs @@ -79,53 +79,48 @@ mod searcher; mod strict_partial_ord; mod types; -pub use self::{ - ast::{ - Expr, FilterAst, FilterValueAst, ValueExpr, - field_expr::{ComparisonExpr, ComparisonOpExpr, IdentifierExpr, IntOp, OrderingOp}, - function_expr::{FunctionCallArgExpr, FunctionCallExpr}, - index_expr::{Compare, IndexExpr}, - logical_expr::{LogicalExpr, LogicalOp, ParenthesizedExpr, UnaryOp}, - parse::{FilterParser, ParseError, ParserSettings}, - visitor::{Visitor, VisitorMut}, - }, - compiler::{Compiler, DefaultCompiler}, - execution_context::{ - ExecutionContext, ExecutionContextGuard, InvalidListMatcherError, SetFieldValueError, - }, - filter::{ - CompiledExpr, CompiledOneExpr, CompiledValueExpr, CompiledVecExpr, Filter, FilterValue, - }, - functions::{ - AllFunction, AnyFunction, CIDRFunction, ConcatFunction, DecodeBase64Function, - EndsWithFunction, FunctionArgInvalidConstantError, FunctionArgKind, - FunctionArgKindMismatchError, FunctionArgs, FunctionDefinition, FunctionDefinitionContext, - FunctionParam, FunctionParamError, JsonLookupIntegerFunction, JsonLookupStringFunction, - LenFunction, LowerFunction, RemoveBytesFunction, RemoveQueryArgsFunction, - SimpleFunctionArgKind, SimpleFunctionDefinition, SimpleFunctionImpl, - SimpleFunctionOptParam, SimpleFunctionParam, StartsWithFunction, SubstringFunction, - ToStringFunction, UUID4Function, UpperFunction, UrlDecodeFunction, WildcardReplaceFunction, - }, - lex::LexErrorKind, - lhs_types::{Array, Map, MapIter, TypedArray, TypedMap}, - list_matcher::{ - AlwaysList, AlwaysListMatcher, ListDefinition, ListMatcher, NeverList, NeverListMatcher, - }, - panic::{ - PanicCatcherFallbackMode, catch_panic, panic_catcher_disable, panic_catcher_enable, - panic_catcher_get_backtrace, panic_catcher_set_fallback_mode, panic_catcher_set_hook, - }, - rhs_types::{ - Bytes, BytesFormat, ExplicitIpRange, IntRange, IpCidr, IpRange, Regex, RegexError, - RegexFormat, - }, - scheme::{ - Field, FieldIndex, FieldRedefinitionError, FieldRef, Function, FunctionRedefinitionError, - FunctionRef, IdentifierRedefinitionError, IndexAccessError, List, ListRef, Scheme, - SchemeBuilder, SchemeMismatchError, UnknownFieldError, - }, - types::{ - CompoundType, ExpectedType, ExpectedTypeList, GetType, LhsValue, RhsValue, RhsValues, Type, - TypeMismatchError, - }, +pub use self::ast::field_expr::{ + ComparisonExpr, ComparisonOpExpr, IdentifierExpr, IntOp, OrderingOp, +}; +pub use self::ast::function_expr::{FunctionCallArgExpr, FunctionCallExpr}; +pub use self::ast::index_expr::{Compare, IndexExpr}; +pub use self::ast::logical_expr::{LogicalExpr, LogicalOp, ParenthesizedExpr, UnaryOp}; +pub use self::ast::parse::{FilterParser, ParseError, ParserSettings}; +pub use self::ast::visitor::{Visitor, VisitorMut}; +pub use self::ast::{Expr, FilterAst, FilterValueAst, ValueExpr}; +pub use self::compiler::{Compiler, DefaultCompiler}; +pub use self::execution_context::{ + ExecutionContext, ExecutionContextGuard, InvalidListMatcherError, SetFieldValueError, +}; +pub use self::filter::{ + CompiledExpr, CompiledOneExpr, CompiledValueExpr, CompiledVecExpr, Filter, FilterValue, +}; +pub use self::functions::{ + AllFunction, AnyFunction, CIDRFunction, ConcatFunction, DecodeBase64Function, + FunctionArgInvalidConstantError, FunctionArgKind, FunctionArgKindMismatchError, FunctionArgs, + FunctionDefinition, FunctionDefinitionContext, FunctionParam, FunctionParamError, LenFunction, + LowerFunction, RemoveBytesFunction, SimpleFunctionArgKind, SimpleFunctionDefinition, + SimpleFunctionImpl, SimpleFunctionOptParam, SimpleFunctionParam, StartsWithFunction, + SubstringFunction, UUID4Function, UrlDecodeFunction, WildcardReplaceFunction, +}; +pub use self::lex::LexErrorKind; +pub use self::lhs_types::{Array, Bytes, Map, MapIter, TypedArray, TypedMap}; +pub use self::list_matcher::{ + AlwaysList, AlwaysListMatcher, ListDefinition, ListMatcher, NeverList, NeverListMatcher, +}; +pub use self::panic::{ + PanicCatcherFallbackMode, catch_panic, panic_catcher_disable, panic_catcher_enable, + panic_catcher_get_backtrace, panic_catcher_set_fallback_mode, panic_catcher_set_hook, +}; +pub use self::rhs_types::{ + BytesFormat, ExplicitIpRange, IntRange, IpCidr, IpRange, Regex, RegexError, RegexFormat, +}; +pub use self::scheme::{ + Field, FieldIndex, FieldRedefinitionError, FieldRef, Function, FunctionRedefinitionError, + FunctionRef, IdentifierRedefinitionError, IndexAccessError, List, ListRef, Scheme, + SchemeBuilder, SchemeMismatchError, UnknownFieldError, +}; +pub use self::types::{ + CompoundType, ExpectedType, ExpectedTypeList, GetType, LhsValue, RhsValue, RhsValues, Type, + TypeMismatchError, }; diff --git a/engine/src/list_matcher.rs b/engine/src/list_matcher.rs index 4832c332..2de813ed 100644 --- a/engine/src/list_matcher.rs +++ b/engine/src/list_matcher.rs @@ -1,7 +1,6 @@ -use crate::LhsValue; -use crate::Type; +use crate::{LhsValue, Type}; use dyn_clone::DynClone; -use serde_json::Value; +use serde::{Deserialize, Serialize}; use std::any::Any; use std::fmt::Debug; @@ -10,15 +9,15 @@ use std::fmt::Debug; /// `ListDefinition` needs to be registered in the `Scheme` for a given `Type`. /// See `Scheme::add_list`. pub trait ListDefinition: Debug + Sync + Send { - /// Converts a deserialized `serde_json::Value` into a `ListMatcher`. + /// Deserializes a list matcher. /// /// This method is necessary to support deserialization of lists during the /// the deserialization of an `ExecutionContext`. - fn matcher_from_json_value( + fn deserialize_matcher<'de>( &self, ty: Type, - value: Value, - ) -> Result, serde_json::Error>; + deserializer: &mut dyn erased_serde::Deserializer<'de>, + ) -> Result, erased_serde::Error>; /// Creates a new matcher object for this list. fn new_matcher(&self) -> Box; @@ -58,14 +57,13 @@ impl DynPartialEq for T { } } -/// Implement this Trait to match a given `LhsValue` against a list. -pub trait ListMatcher: AsAny + Debug + DynClone + DynPartialEq + Send + Sync + 'static { +/// Implement this trait to match a given `LhsValue` against a list. +pub trait ListMatcher: + AsAny + Debug + DynClone + DynPartialEq + Send + Sync + erased_serde::Serialize + 'static +{ /// Returns true if `val` is in the given list. fn match_value(&self, list_name: &str, val: &LhsValue<'_>) -> bool; - /// Convert the list matcher to a serde_json::Value in order to serialize it. - fn to_json_value(&self) -> Value; - /// Clears the list matcher, removing all its content. fn clear(&mut self); } @@ -84,16 +82,17 @@ impl PartialEq for dyn ListMatcher { pub struct AlwaysList {} /// Matcher for `AlwaysList` -#[derive(Clone, Debug, Default, PartialEq, Eq)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct AlwaysListMatcher {} impl ListDefinition for AlwaysList { - fn matcher_from_json_value( + fn deserialize_matcher<'de>( &self, _: Type, - _: serde_json::Value, - ) -> Result, serde_json::Error> { - Ok(Box::new(AlwaysListMatcher {})) + deserializer: &mut dyn erased_serde::Deserializer<'de>, + ) -> Result, erased_serde::Error> { + let matcher = erased_serde::deserialize::(deserializer)?; + Ok(Box::new(matcher)) } fn new_matcher(&self) -> Box { @@ -106,10 +105,6 @@ impl ListMatcher for AlwaysListMatcher { false } - fn to_json_value(&self) -> serde_json::Value { - serde_json::Value::Null - } - fn clear(&mut self) {} } @@ -118,16 +113,17 @@ impl ListMatcher for AlwaysListMatcher { pub struct NeverList {} /// Matcher for `NeverList` -#[derive(Clone, Debug, Default, PartialEq, Eq)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct NeverListMatcher {} impl ListDefinition for NeverList { - fn matcher_from_json_value( + fn deserialize_matcher<'de>( &self, _: Type, - _: serde_json::Value, - ) -> Result, serde_json::Error> { - Ok(Box::new(NeverListMatcher {})) + deserializer: &mut dyn erased_serde::Deserializer<'de>, + ) -> Result, erased_serde::Error> { + let matcher = erased_serde::deserialize::(deserializer)?; + Ok(Box::new(matcher)) } fn new_matcher(&self) -> Box { @@ -140,10 +136,6 @@ impl ListMatcher for NeverListMatcher { false } - fn to_json_value(&self) -> serde_json::Value { - serde_json::Value::Null - } - fn clear(&mut self) {} } diff --git a/engine/src/range_set.rs b/engine/src/range_set.rs index 69f0c967..e024f787 100644 --- a/engine/src/range_set.rs +++ b/engine/src/range_set.rs @@ -1,4 +1,7 @@ -use std::{borrow::Borrow, cmp::Ordering, iter::FromIterator, ops::RangeInclusive}; +use std::borrow::Borrow; +use std::cmp::Ordering; +use std::iter::FromIterator; +use std::ops::RangeInclusive; /// RangeSet provides a set-like interface that allows to search for items while /// being constructed from and storing inclusive ranges in a compact fashion. diff --git a/engine/src/rhs_types/array.rs b/engine/src/rhs_types/array.rs index dee02abc..f3ef2771 100644 --- a/engine/src/rhs_types/array.rs +++ b/engine/src/rhs_types/array.rs @@ -1,11 +1,10 @@ -use crate::{ - lex::{Lex, LexResult}, - lhs_types::Array, - strict_partial_ord::StrictPartialOrd, - types::{GetType, Type}, -}; +use crate::lex::{Lex, LexResult}; +use crate::lhs_types::Array; +use crate::strict_partial_ord::StrictPartialOrd; +use crate::types::{GetType, Type}; use serde::Serialize; -use std::{borrow::Borrow, cmp::Ordering}; +use std::borrow::Borrow; +use std::cmp::Ordering; /// [Uninhabited / empty type](https://doc.rust-lang.org/nomicon/exotic-sizes.html#empty-types) /// for `array` with traits we need for RHS values. diff --git a/engine/src/rhs_types/bool.rs b/engine/src/rhs_types/bool.rs index 0c549014..e44b4211 100644 --- a/engine/src/rhs_types/bool.rs +++ b/engine/src/rhs_types/bool.rs @@ -1,9 +1,8 @@ -use crate::{ - lex::{Lex, LexResult}, - strict_partial_ord::StrictPartialOrd, -}; +use crate::lex::{Lex, LexResult}; +use crate::strict_partial_ord::StrictPartialOrd; use serde::Serialize; -use std::{borrow::Borrow, cmp::Ordering}; +use std::borrow::Borrow; +use std::cmp::Ordering; /// [Uninhabited / empty type](https://doc.rust-lang.org/nomicon/exotic-sizes.html#empty-types) /// for `bool` with traits we need for RHS values. diff --git a/engine/src/rhs_types/bytes.rs b/engine/src/rhs_types/bytes.rs index ec71d590..15c37a8f 100644 --- a/engine/src/rhs_types/bytes.rs +++ b/engine/src/rhs_types/bytes.rs @@ -1,14 +1,10 @@ -use crate::{ - lex::{Lex, LexErrorKind, LexResult, take}, - strict_partial_ord::StrictPartialOrd, -}; +use crate::lex::{Lex, LexErrorKind, LexResult, take}; +use crate::strict_partial_ord::StrictPartialOrd; use serde::{Serialize, Serializer}; -use std::{ - fmt::{self, Debug, Formatter}, - hash::{Hash, Hasher}, - ops::Deref, - str, -}; +use std::fmt::{self, Debug, Formatter}; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; +use std::str; /// BytesFormat describes the format in which the string was expressed #[derive(PartialEq, Eq, Copy, Clone)] @@ -23,12 +19,12 @@ pub enum BytesFormat { /// Bytes literal represented either by a string, raw string or raw bytes. #[derive(PartialEq, Eq, Clone)] -pub struct Bytes { +pub struct BytesExpr { format: BytesFormat, data: Box<[u8]>, } -impl Bytes { +impl BytesExpr { /// Creates a new bytes literal. #[inline] pub fn new(data: impl Into>, format: BytesFormat) -> Self { @@ -45,17 +41,19 @@ impl Bytes { } } -impl Serialize for Bytes { +impl Serialize for BytesExpr { #[inline] fn serialize(&self, serializer: S) -> Result where S: Serializer, { match self.format() { - BytesFormat::Quoted | BytesFormat::Raw(_) => match std::str::from_utf8(&self.data) { - Ok(s) => s.serialize(serializer), - Err(_) => self.data.serialize(serializer), - }, + BytesFormat::Quoted | BytesFormat::Raw(_) => { + match simdutf8::basic::from_utf8(&self.data) { + Ok(s) => s.serialize(serializer), + Err(_) => self.data.serialize(serializer), + } + } BytesFormat::Byte => self.data.serialize(serializer), } } @@ -66,48 +64,48 @@ impl Serialize for Bytes { // `Bytes == Bytes` to check enum tags but `Bytes == &[u8]` to ignore them, and // consistency of the latter is all that matters for `Borrow` consumers. #[allow(clippy::derived_hash_with_manual_eq)] -impl Hash for Bytes { +impl Hash for BytesExpr { #[inline] fn hash(&self, h: &mut H) { (self as &[u8]).hash(h); } } -impl From> for Bytes { +impl From> for BytesExpr { #[inline] fn from(src: Vec) -> Self { - Bytes { + Self { format: BytesFormat::Byte, data: src.into_boxed_slice(), } } } -impl From for Bytes { +impl From for BytesExpr { #[inline] fn from(src: String) -> Self { - Bytes { + Self { format: BytesFormat::Quoted, data: src.into_boxed_str().into_boxed_bytes(), } } } -impl From for Box<[u8]> { +impl From for Box<[u8]> { #[inline] - fn from(bytes: Bytes) -> Self { + fn from(bytes: BytesExpr) -> Self { bytes.data } } -impl From for Vec { +impl From for Vec { #[inline] - fn from(bytes: Bytes) -> Self { + fn from(bytes: BytesExpr) -> Self { bytes.data.into_vec() } } -impl Debug for Bytes { +impl Debug for BytesExpr { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { fn fmt_raw(data: &[u8], f: &mut Formatter<'_>) -> fmt::Result { let mut iter = data.iter(); @@ -121,16 +119,18 @@ impl Debug for Bytes { } match self.format { - BytesFormat::Quoted | BytesFormat::Raw(_) => match std::str::from_utf8(&self.data) { - Ok(s) => s.fmt(f), - Err(_) => fmt_raw(&self.data, f), - }, + BytesFormat::Quoted | BytesFormat::Raw(_) => { + match simdutf8::basic::from_utf8(&self.data) { + Ok(s) => s.fmt(f), + Err(_) => fmt_raw(&self.data, f), + } + } BytesFormat::Byte => fmt_raw(&self.data, f), } } } -impl Deref for Bytes { +impl Deref for BytesExpr { type Target = [u8]; #[inline] @@ -139,16 +139,16 @@ impl Deref for Bytes { } } -impl AsRef<[u8]> for Bytes { +impl AsRef<[u8]> for BytesExpr { #[inline] fn as_ref(&self) -> &[u8] { self } } -impl<'a> IntoIterator for &'a Bytes { - type Item = &'a u8; +impl<'a> IntoIterator for &'a BytesExpr { type IntoIter = std::slice::Iter<'a, u8>; + type Item = &'a u8; #[inline] fn into_iter(self) -> std::slice::Iter<'a, u8> { @@ -236,9 +236,9 @@ pub(crate) fn lex_quoted_string_as_vec(input: &str) -> LexResult<'_, Vec> { } } -fn lex_quoted_string(input: &str) -> LexResult<'_, Bytes> { +fn lex_quoted_string(input: &str) -> LexResult<'_, BytesExpr> { lex_quoted_string_as_vec(input).map(|(vec, rest)| { - let bytes = Bytes { + let bytes = BytesExpr { format: BytesFormat::Quoted, data: vec.into_boxed_slice(), }; @@ -247,7 +247,7 @@ fn lex_quoted_string(input: &str) -> LexResult<'_, Bytes> { }) } -fn lex_byte_string(mut input: &str) -> LexResult<'_, Bytes> { +fn lex_byte_string(mut input: &str) -> LexResult<'_, BytesExpr> { let mut res = Vec::new(); let (b, rest) = hex_byte(input)?; res.push(b); @@ -309,10 +309,10 @@ pub(crate) fn lex_raw_string_as_str(input: &str) -> LexResult<'_, (&str, u8)> { } #[inline] -fn lex_raw_string(input: &str) -> LexResult<'_, Bytes> { +fn lex_raw_string(input: &str) -> LexResult<'_, BytesExpr> { let ((lexed, hash_count), rest) = lex_raw_string_as_str(input)?; Ok(( - Bytes { + BytesExpr { format: BytesFormat::Raw(hash_count), data: Box::from(lexed.as_bytes()), }, @@ -320,7 +320,7 @@ fn lex_raw_string(input: &str) -> LexResult<'_, Bytes> { )) } -pub(crate) fn lex_quoted_or_raw_string(input: &str) -> LexResult<'_, Bytes> { +pub(crate) fn lex_quoted_or_raw_string(input: &str) -> LexResult<'_, BytesExpr> { match input.as_bytes().first() { Some(b'"') => lex_quoted_string(&input[1..]), Some(b'r') => lex_raw_string(&input[1..]), @@ -329,7 +329,7 @@ pub(crate) fn lex_quoted_or_raw_string(input: &str) -> LexResult<'_, Bytes> { } } -impl Lex<'_> for Bytes { +impl Lex<'_> for BytesExpr { #[inline] fn lex(input: &str) -> LexResult<'_, Self> { match input.as_bytes().first() { @@ -349,18 +349,18 @@ mod test { #[test] fn test() { assert_ok!( - Bytes::lex("01:2e:f3-77.12;"), - Bytes::from(vec![0x01, 0x2E, 0xF3, 0x77, 0x12]), + BytesExpr::lex("01:2e:f3-77.12;"), + BytesExpr::from(vec![0x01, 0x2E, 0xF3, 0x77, 0x12]), ";" ); assert_ok!( - Bytes::lex(r#""s\\t\"r\x0A\000t""#), - Bytes::from("s\\t\"r\n\0t".to_owned()) + BytesExpr::lex(r#""s\\t\"r\x0A\000t""#), + BytesExpr::from("s\\t\"r\n\0t".to_owned()) ); assert_err!( - Bytes::lex("01:4x;"), + BytesExpr::lex("01:4x;"), LexErrorKind::ParseInt { err: u8::from_str_radix("4x", 16).unwrap_err(), radix: 16, @@ -369,13 +369,13 @@ mod test { ); assert_err!( - Bytes::lex("01;"), + BytesExpr::lex("01;"), LexErrorKind::ExpectedName("byte separator"), ";" ); assert_err!( - Bytes::lex("01:;"), + BytesExpr::lex("01:;"), LexErrorKind::CountMismatch { name: "character", actual: 1, @@ -384,24 +384,27 @@ mod test { ";" ); - assert_ok!(Bytes::lex("01:2f-34"), Bytes::from(vec![0x01, 0x2F, 0x34])); + assert_ok!( + BytesExpr::lex("01:2f-34"), + BytesExpr::from(vec![0x01, 0x2F, 0x34]) + ); - assert_err!(Bytes::lex("\"1"), LexErrorKind::MissingEndingQuote, "1"); + assert_err!(BytesExpr::lex("\"1"), LexErrorKind::MissingEndingQuote, "1"); assert_err!( - Bytes::lex(r#""\n""#), + BytesExpr::lex(r#""\n""#), LexErrorKind::InvalidCharacterEscape, "n" ); assert_err!( - Bytes::lex(r#""abcd\"#), + BytesExpr::lex(r#""abcd\"#), LexErrorKind::MissingEndingQuote, "abcd\\" ); assert_err!( - Bytes::lex(r#""\01😢""#), + BytesExpr::lex(r#""\01😢""#), LexErrorKind::ParseInt { err: u8::from_str_radix("01😢", 8).unwrap_err(), radix: 8, @@ -410,7 +413,7 @@ mod test { ); assert_err!( - Bytes::lex(r#""\x3😢""#), + BytesExpr::lex(r#""\x3😢""#), LexErrorKind::ParseInt { err: u8::from_str_radix("3😢", 16).unwrap_err(), radix: 16, @@ -419,7 +422,7 @@ mod test { ); assert_err!( - Bytes::lex("12:3😢"), + BytesExpr::lex("12:3😢"), LexErrorKind::ParseInt { err: u8::from_str_radix("3😢", 16).unwrap_err(), radix: 16, @@ -427,33 +430,39 @@ mod test { "3😢" ); - assert_ok!(Bytes::lex(r#""\x7F""#), Bytes::from("\x7F".to_owned())); + assert_ok!( + BytesExpr::lex(r#""\x7F""#), + BytesExpr::from("\x7F".to_owned()) + ); assert_ok!( - Bytes::lex(r#""\x80""#), - Bytes::new(vec![0x80], BytesFormat::Quoted) + BytesExpr::lex(r#""\x80""#), + BytesExpr::new(vec![0x80], BytesFormat::Quoted) ); assert_ok!( - Bytes::lex(r#""\xFF""#), - Bytes::new(vec![0xFF], BytesFormat::Quoted) + BytesExpr::lex(r#""\xFF""#), + BytesExpr::new(vec![0xFF], BytesFormat::Quoted) ); - assert_ok!(Bytes::lex(r#""\177""#), Bytes::from("\x7F".to_owned())); + assert_ok!( + BytesExpr::lex(r#""\177""#), + BytesExpr::from("\x7F".to_owned()) + ); assert_ok!( - Bytes::lex(r#""\200""#), - Bytes::new(vec![0x80], BytesFormat::Quoted) + BytesExpr::lex(r#""\200""#), + BytesExpr::new(vec![0x80], BytesFormat::Quoted) ); assert_ok!( - Bytes::lex(r#""\377""#), - Bytes::new(vec![0xFF], BytesFormat::Quoted) + BytesExpr::lex(r#""\377""#), + BytesExpr::new(vec![0xFF], BytesFormat::Quoted) ); assert_ok!( - Bytes::lex("c2:b4710c6888a5d47befe865c8e6fb19"), - Bytes::from(vec![0xC2, 0xb4]), + BytesExpr::lex("c2:b4710c6888a5d47befe865c8e6fb19"), + BytesExpr::from(vec![0xC2, 0xb4]), "710c6888a5d47befe865c8e6fb19" ); } @@ -462,87 +471,87 @@ mod test { fn test_raw_string() { // Valid empty strings assert_ok!( - Bytes::lex("r\"\""), - Bytes::new("".as_bytes(), BytesFormat::Raw(0)) + BytesExpr::lex("r\"\""), + BytesExpr::new("".as_bytes(), BytesFormat::Raw(0)) ); assert_ok!( - Bytes::lex("r#\"\"#"), - Bytes::new("".as_bytes(), BytesFormat::Raw(1)) + BytesExpr::lex("r#\"\"#"), + BytesExpr::new("".as_bytes(), BytesFormat::Raw(1)) ); assert_ok!( - Bytes::lex("r##\"\"##"), - Bytes::new("".as_bytes(), BytesFormat::Raw(2)) + BytesExpr::lex("r##\"\"##"), + BytesExpr::new("".as_bytes(), BytesFormat::Raw(2)) ); assert_ok!( - Bytes::lex("r###\"\"###"), - Bytes::new("".as_bytes(), BytesFormat::Raw(3)) + BytesExpr::lex("r###\"\"###"), + BytesExpr::new("".as_bytes(), BytesFormat::Raw(3)) ); // Valid raw strings assert_ok!( - Bytes::lex("r\"a\""), - Bytes::new("a".as_bytes(), BytesFormat::Raw(0)) + BytesExpr::lex("r\"a\""), + BytesExpr::new("a".as_bytes(), BytesFormat::Raw(0)) ); assert_ok!( - Bytes::lex("r#\"a\"#"), - Bytes::new("a".as_bytes(), BytesFormat::Raw(1)) + BytesExpr::lex("r#\"a\"#"), + BytesExpr::new("a".as_bytes(), BytesFormat::Raw(1)) ); assert_ok!( - Bytes::lex("r##\"a\"##"), - Bytes::new("a".as_bytes(), BytesFormat::Raw(2)) + BytesExpr::lex("r##\"a\"##"), + BytesExpr::new("a".as_bytes(), BytesFormat::Raw(2)) ); assert_ok!( - Bytes::lex("r###\"a\"###"), - Bytes::new("a".as_bytes(), BytesFormat::Raw(3)) + BytesExpr::lex("r###\"a\"###"), + BytesExpr::new("a".as_bytes(), BytesFormat::Raw(3)) ); // Quotes and hashes can be used inside the raw string assert_ok!( - Bytes::lex("r\"#\""), - Bytes::new("#".as_bytes(), BytesFormat::Raw(0)) + BytesExpr::lex("r\"#\""), + BytesExpr::new("#".as_bytes(), BytesFormat::Raw(0)) ); assert_ok!( - Bytes::lex("r\"a#\""), - Bytes::new("a#".as_bytes(), BytesFormat::Raw(0)) + BytesExpr::lex("r\"a#\""), + BytesExpr::new("a#".as_bytes(), BytesFormat::Raw(0)) ); assert_ok!( - Bytes::lex("r#\"\"a\"\"\"#"), - Bytes::new("\"a\"\"".as_bytes(), BytesFormat::Raw(1)) + BytesExpr::lex("r#\"\"a\"\"\"#"), + BytesExpr::new("\"a\"\"".as_bytes(), BytesFormat::Raw(1)) ); assert_ok!( - Bytes::lex("r##\"\"a\"#b\"##"), - Bytes::new("\"a\"#b".as_bytes(), BytesFormat::Raw(2)) + BytesExpr::lex("r##\"\"a\"#b\"##"), + BytesExpr::new("\"a\"#b".as_bytes(), BytesFormat::Raw(2)) ); assert_ok!( - Bytes::lex("r###\"a###\"##\"\"###"), - Bytes::new("a###\"##\"".as_bytes(), BytesFormat::Raw(3)) + BytesExpr::lex("r###\"a###\"##\"\"###"), + BytesExpr::new("a###\"##\"".as_bytes(), BytesFormat::Raw(3)) ); assert_ok!( - Bytes::lex("r#\"a\"\"\"#"), - Bytes::new("a\"\"".as_bytes(), BytesFormat::Raw(1)) + BytesExpr::lex("r#\"a\"\"\"#"), + BytesExpr::new("a\"\"".as_bytes(), BytesFormat::Raw(1)) ); assert_ok!( - Bytes::lex("r##\"a\"#\"##"), - Bytes::new("a\"#".as_bytes(), BytesFormat::Raw(2)) + BytesExpr::lex("r##\"a\"#\"##"), + BytesExpr::new("a\"#".as_bytes(), BytesFormat::Raw(2)) ); assert_ok!( - Bytes::lex("r###\"a###\"##\"###"), - Bytes::new("a###\"##".as_bytes(), BytesFormat::Raw(3)) + BytesExpr::lex("r###\"a###\"##\"###"), + BytesExpr::new("a###\"##".as_bytes(), BytesFormat::Raw(3)) ); // Expect an error if the number of '#' doesn't match assert_err!( - Bytes::lex("r#\"a\""), + BytesExpr::lex("r#\"a\""), LexErrorKind::MissingEndingQuote, "#\"a\"" ); assert_err!( - Bytes::lex("r##\"a\"#"), + BytesExpr::lex("r##\"a\"#"), LexErrorKind::MissingEndingQuote, "##\"a\"#" ); assert_err!( - Bytes::lex("r###\"a\"##"), + BytesExpr::lex("r###\"a\"##"), LexErrorKind::MissingEndingQuote, "###\"a\"##" ); @@ -550,54 +559,60 @@ mod test { // Expect an error when there are too many hashes being used let hashes = format!("r{}\"abc\"{}", "#".repeat(255), "#".repeat(255)); assert_ok!( - Bytes::lex(hashes.as_str()), - Bytes::new("abc".as_bytes(), BytesFormat::Raw(255)) + BytesExpr::lex(hashes.as_str()), + BytesExpr::new("abc".as_bytes(), BytesFormat::Raw(255)) ); let hashes = format!("r{}\"abc\"{}", "#".repeat(256), "#".repeat(256)); assert_err!( - Bytes::lex(hashes.as_str()), + BytesExpr::lex(hashes.as_str()), LexErrorKind::InvalidRawStringHashCount, &hashes.as_str()[1..] ); // Test regex escapes remain the same assert_ok!( - Bytes::lex(r#"r".\d\D\pA\p{Greek}\PA\P{Greek}[xyz][^xyz][a-z][[:alpha:]][[:^alpha:]][x[^xyz]][a-y&&xyz][0-9&&[^4]][0-9--4][a-g~~b-h][\[\]]""#), - Bytes::new(r#".\d\D\pA\p{Greek}\PA\P{Greek}[xyz][^xyz][a-z][[:alpha:]][[:^alpha:]][x[^xyz]][a-y&&xyz][0-9&&[^4]][0-9--4][a-g~~b-h][\[\]]"#.as_bytes(), BytesFormat::Raw(0)) + BytesExpr::lex(r#"r".\d\D\pA\p{Greek}\PA\P{Greek}[xyz][^xyz][a-z][[:alpha:]][[:^alpha:]][x[^xyz]][a-y&&xyz][0-9&&[^4]][0-9--4][a-g~~b-h][\[\]]""#), + BytesExpr::new(r#".\d\D\pA\p{Greek}\PA\P{Greek}[xyz][^xyz][a-z][[:alpha:]][[:^alpha:]][x[^xyz]][a-y&&xyz][0-9&&[^4]][0-9--4][a-g~~b-h][\[\]]"#.as_bytes(), BytesFormat::Raw(0)) ); assert_ok!( - Bytes::lex(r##"r#"\*\a\f\t\n\r\v\123\x7F\x{10FFFF}\u007F\u{7F}\U0000007F\U{7F}"#"##), - Bytes::new( + BytesExpr::lex( + r##"r#"\*\a\f\t\n\r\v\123\x7F\x{10FFFF}\u007F\u{7F}\U0000007F\U{7F}"#"## + ), + BytesExpr::new( r#"\*\a\f\t\n\r\v\123\x7F\x{10FFFF}\u007F\u{7F}\U0000007F\U{7F}"#.as_bytes(), BytesFormat::Raw(1) ) ); // Invalid character after 'r' or '#' - assert_err!(Bytes::lex("r"), LexErrorKind::ExpectedName("\" or #"), ""); assert_err!( - Bytes::lex("r#ab"), + BytesExpr::lex("r"), + LexErrorKind::ExpectedName("\" or #"), + "" + ); + assert_err!( + BytesExpr::lex("r#ab"), LexErrorKind::ExpectedName("\" or #"), "ab" ); assert_err!( - Bytes::lex("r##ab"), + BytesExpr::lex("r##ab"), LexErrorKind::ExpectedName("\" or #"), "ab" ); // Any characters after a raw string should get returned assert_eq!( - Bytes::lex("r#\"ab\"##"), - Ok((Bytes::new("ab".as_bytes(), BytesFormat::Raw(1)), "#")) + BytesExpr::lex("r#\"ab\"##"), + Ok((BytesExpr::new("ab".as_bytes(), BytesFormat::Raw(1)), "#")) ); assert_eq!( - Bytes::lex("r#\"ab\"#\""), - Ok((Bytes::new("ab".as_bytes(), BytesFormat::Raw(1)), "\"")) + BytesExpr::lex("r#\"ab\"#\""), + Ok((BytesExpr::new("ab".as_bytes(), BytesFormat::Raw(1)), "\"")) ); assert_eq!( - Bytes::lex("r#\"ab\"#a"), - Ok((Bytes::new("ab".as_bytes(), BytesFormat::Raw(1)), "a")) + BytesExpr::lex("r#\"ab\"#a"), + Ok((BytesExpr::new("ab".as_bytes(), BytesFormat::Raw(1)), "a")) ); } } diff --git a/engine/src/rhs_types/int.rs b/engine/src/rhs_types/int.rs index 5021f61a..4643090c 100644 --- a/engine/src/rhs_types/int.rs +++ b/engine/src/rhs_types/int.rs @@ -1,7 +1,5 @@ -use crate::{ - lex::{Lex, LexErrorKind, LexResult, expect, span, take_while}, - strict_partial_ord::StrictPartialOrd, -}; +use crate::lex::{Lex, LexErrorKind, LexResult, expect, span, take_while}; +use crate::strict_partial_ord::StrictPartialOrd; use serde::Serialize; use std::ops::RangeInclusive; diff --git a/engine/src/rhs_types/ip.rs b/engine/src/rhs_types/ip.rs index a5526d76..b1597958 100644 --- a/engine/src/rhs_types/ip.rs +++ b/engine/src/rhs_types/ip.rs @@ -1,17 +1,13 @@ +use crate::lex::{Lex, LexError, LexErrorKind, LexResult, take_while}; +use crate::strict_partial_ord::StrictPartialOrd; pub use cidr::IpCidr; - -use crate::{ - lex::{Lex, LexError, LexErrorKind, LexResult, take_while}, - strict_partial_ord::StrictPartialOrd, -}; -use cidr::{Ipv4Cidr, Ipv6Cidr, errors::NetworkParseError}; +use cidr::errors::NetworkParseError; +use cidr::{Ipv4Cidr, Ipv6Cidr}; use serde::Serialize; -use std::{ - cmp::Ordering, - net::{IpAddr, Ipv4Addr, Ipv6Addr}, - ops::RangeInclusive, - str::FromStr, -}; +use std::cmp::Ordering; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +use std::ops::RangeInclusive; +use std::str::FromStr; fn match_addr_or_cidr(input: &str) -> LexResult<'_, &str> { take_while( diff --git a/engine/src/rhs_types/map.rs b/engine/src/rhs_types/map.rs index 004fb0d5..72baa80b 100644 --- a/engine/src/rhs_types/map.rs +++ b/engine/src/rhs_types/map.rs @@ -1,11 +1,10 @@ -use crate::{ - lex::{Lex, LexResult}, - lhs_types::Map, - strict_partial_ord::StrictPartialOrd, - types::{GetType, Type}, -}; +use crate::lex::{Lex, LexResult}; +use crate::lhs_types::Map; +use crate::strict_partial_ord::StrictPartialOrd; +use crate::types::{GetType, Type}; use serde::Serialize; -use std::{borrow::Borrow, cmp::Ordering}; +use std::borrow::Borrow; +use std::cmp::Ordering; /// [Uninhabited / empty type](https://doc.rust-lang.org/nomicon/exotic-sizes.html#empty-types) /// for `map` with traits we need for RHS values. diff --git a/engine/src/rhs_types/mod.rs b/engine/src/rhs_types/mod.rs index 18d805af..78e8756d 100644 --- a/engine/src/rhs_types/mod.rs +++ b/engine/src/rhs_types/mod.rs @@ -8,14 +8,12 @@ mod map; mod regex; mod wildcard; -pub use self::{ - array::UninhabitedArray, - bool::UninhabitedBool, - bytes::{Bytes, BytesFormat}, - int::IntRange, - ip::{ExplicitIpRange, IpCidr, IpRange}, - list::ListName, - map::UninhabitedMap, - regex::{Error as RegexError, Regex, RegexFormat}, - wildcard::{Wildcard, WildcardError}, -}; +pub use self::array::UninhabitedArray; +pub use self::bool::UninhabitedBool; +pub use self::bytes::{BytesExpr, BytesFormat}; +pub use self::int::IntRange; +pub use self::ip::{ExplicitIpRange, IpCidr, IpRange}; +pub use self::list::ListName; +pub use self::map::UninhabitedMap; +pub use self::regex::{Error as RegexError, Regex, RegexFormat}; +pub use self::wildcard::{Wildcard, WildcardError}; diff --git a/engine/src/rhs_types/regex/imp_real.rs b/engine/src/rhs_types/regex/imp_real.rs index 7171828a..4425ccbd 100644 --- a/engine/src/rhs_types/regex/imp_real.rs +++ b/engine/src/rhs_types/regex/imp_real.rs @@ -1,7 +1,7 @@ -use regex_automata::MatchKind; - use super::Error; use crate::{ParserSettings, RegexFormat}; +use regex_automata::MatchKind; +use regex_automata::nfa::thompson::WhichCaptures; use std::ops::Deref; use std::sync::Arc; @@ -33,6 +33,7 @@ impl Regex { .onepass(false) .dfa_size_limit(Some(settings.regex_compiled_size_limit)) .hybrid_cache_capacity(settings.regex_dfa_size_limit) + .which_captures(WhichCaptures::Implicit) } /// Compiles a regular expression. diff --git a/engine/src/rhs_types/regex/imp_stub.rs b/engine/src/rhs_types/regex/imp_stub.rs index 0f5a1081..17b55857 100644 --- a/engine/src/rhs_types/regex/imp_stub.rs +++ b/engine/src/rhs_types/regex/imp_stub.rs @@ -1,10 +1,4 @@ -use thiserror::Error; - -use crate::{FilterParser, RegexFormat}; - -/// Dummy regex error. -#[derive(Debug, PartialEq, Error)] -pub enum Error {} +use crate::{ParserSettings, RegexFormat}; /// Dummy regex wrapper that can only store a pattern /// but not actually be used for matching. @@ -16,7 +10,11 @@ pub struct Regex { impl Regex { /// Creates a new dummy regex. - pub fn new(pattern: &str, format: RegexFormat, _: &FilterParser<'_>) -> Result { + pub fn new( + pattern: &str, + format: RegexFormat, + _: &ParserSettings, + ) -> Result { Ok(Self { pattern: pattern.to_string(), format, diff --git a/engine/src/rhs_types/wildcard.rs b/engine/src/rhs_types/wildcard.rs index 54d56ff6..d0db6827 100644 --- a/engine/src/rhs_types/wildcard.rs +++ b/engine/src/rhs_types/wildcard.rs @@ -1,11 +1,9 @@ use crate::lex::{LexResult, LexWith}; -use crate::rhs_types::bytes::lex_quoted_or_raw_string; -use crate::{Bytes, FilterParser, LexErrorKind}; +use crate::rhs_types::bytes::{BytesExpr, lex_quoted_or_raw_string}; +use crate::{FilterParser, LexErrorKind}; use serde::{Serialize, Serializer}; -use std::{ - fmt::{self, Debug, Formatter}, - hash::{Hash, Hasher}, -}; +use std::fmt::{self, Debug, Formatter}; +use std::hash::{Hash, Hasher}; use thiserror::Error; use wildcard::WildcardToken; @@ -65,12 +63,12 @@ pub struct Wildcard { compiled_wildcard: wildcard::Wildcard<'static>, /// The original pattern. We keep this to allow correct serialization of the wildcard pattern, /// since bytes are encoded differently depending on whether they are a valid UTF-8 sequence. - pattern: Bytes, + pattern: BytesExpr, } impl Wildcard { pub fn new( - pattern: Bytes, + pattern: BytesExpr, wildcard_star_limit: usize, ) -> Result, WildcardError> { let wildcard = wildcard::WildcardBuilder::from_owned(pattern.to_vec()) @@ -92,7 +90,7 @@ impl Wildcard { } /// Returns the pattern. - pub fn pattern(&self) -> &Bytes { + pub fn pattern(&self) -> &BytesExpr { &self.pattern } } @@ -144,12 +142,12 @@ mod test { fn t() { assert_eq!( Wildcard::::new( - Bytes::new("a quoted string".as_bytes(), BytesFormat::Quoted), + BytesExpr::new("a quoted string".as_bytes(), BytesFormat::Quoted), usize::MAX ) .unwrap(), Wildcard::::new( - Bytes::new("a quoted string".as_bytes(), BytesFormat::Quoted), + BytesExpr::new("a quoted string".as_bytes(), BytesFormat::Quoted), usize::MAX ) .unwrap(), @@ -159,12 +157,12 @@ mod test { // visual representation: assert_ne!( Wildcard::::new( - Bytes::new("a quoted string".as_bytes(), BytesFormat::Quoted), + BytesExpr::new("a quoted string".as_bytes(), BytesFormat::Quoted), usize::MAX ) .unwrap(), Wildcard::::new( - Bytes::new("a quoted string".as_bytes(), BytesFormat::Raw(0)), + BytesExpr::new("a quoted string".as_bytes(), BytesFormat::Raw(0)), usize::MAX ) .unwrap(), @@ -183,7 +181,7 @@ mod test { let expr = assert_ok!( Wildcard::::lex_with(r#""a quoted string";"#, &FilterParser::new(&scheme)), Wildcard::::new( - Bytes::new("a quoted string".as_bytes(), BytesFormat::Quoted), + BytesExpr::new("a quoted string".as_bytes(), BytesFormat::Quoted), usize::MAX ) .unwrap(), @@ -217,7 +215,7 @@ mod test { &FilterParser::new(&scheme) ), Wildcard::::new( - Bytes::new( + BytesExpr::new( r#####"a raw\\xaa r#""# string"#####.as_bytes(), BytesFormat::Raw(2), ), @@ -258,7 +256,7 @@ mod test { &FilterParser::new(&scheme) ), Wildcard::::new( - Bytes::new(bytes.into_boxed_slice(), BytesFormat::Quoted), + BytesExpr::new(bytes.into_boxed_slice(), BytesFormat::Quoted), usize::MAX ) .unwrap(), diff --git a/engine/src/scheme.rs b/engine/src/scheme.rs index 48e55a15..b6c9f7bf 100644 --- a/engine/src/scheme.rs +++ b/engine/src/scheme.rs @@ -1,26 +1,20 @@ -use crate::{ - ast::{ - FilterAst, FilterValueAst, - parse::{FilterParser, ParseError, ParserSettings}, - }, - functions::FunctionDefinition, - lex::{Lex, LexErrorKind, LexResult, LexWith, expect, span, take_while}, - list_matcher::ListDefinition, - types::{GetType, RhsValue, Type}, -}; +use crate::ast::parse::{FilterParser, ParseError, ParserSettings}; +use crate::ast::{FilterAst, FilterValueAst}; +use crate::functions::FunctionDefinition; +use crate::lex::{Lex, LexErrorKind, LexResult, LexWith, expect, span, take_while}; +use crate::list_matcher::ListDefinition; +use crate::types::{GetType, RhsValue, Type}; use fnv::FnvBuildHasher; use serde::de::Visitor; use serde::ser::SerializeMap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::collections::HashMap; use std::collections::hash_map::Entry; +use std::convert::TryFrom; +use std::fmt::{self, Debug, Formatter}; +use std::hash::{Hash, Hasher}; +use std::iter::Iterator; use std::sync::Arc; -use std::{ - collections::HashMap, - convert::TryFrom, - fmt::{self, Debug, Formatter}, - hash::{Hash, Hasher}, - iter::Iterator, -}; use thiserror::Error; /// An error that occurs if two underlying [schemes](struct@Scheme) @@ -29,11 +23,9 @@ use thiserror::Error; #[error("underlying schemes do not match")] pub struct SchemeMismatchError; -#[derive(Debug, PartialEq, Eq, Clone, Hash, Serialize)] -#[serde(tag = "kind", content = "value")] -/// FieldIndex is an enum with variants [`ArrayIndex(usize)`], -/// representing an index into an Array, or `[MapKey(String)`], -/// representing a key into a Map. +/// Enum representing either: +/// * An array index with [`FieldIndex::ArrayIndex`] +/// * A map key with [`FieldIndex::MapKey`] /// /// ``` /// #[allow(dead_code)] @@ -42,6 +34,8 @@ pub struct SchemeMismatchError; /// MapKey(String), /// } /// ``` +#[derive(Debug, PartialEq, Eq, Clone, Hash, Serialize)] +#[serde(tag = "kind", content = "value")] pub enum FieldIndex { /// Index into an Array ArrayIndex(u32), @@ -82,10 +76,16 @@ impl<'i> Lex<'i> for FieldIndex { input, )), }, - RhsValue::Bytes(b) => match String::from_utf8(b.to_vec()) { - Ok(s) => Ok((FieldIndex::MapKey(s), rest)), - Err(_) => Err((LexErrorKind::ExpectedLiteral("expected utf8 string"), input)), - }, + RhsValue::Bytes(b) => { + match simdutf8::basic::from_utf8(&b) { + Ok(_) => { + // SAFETY: simdutf8 just validated the bytes as valid UTF-8. + let s = unsafe { String::from_utf8_unchecked(b.into()) }; + Ok((FieldIndex::MapKey(s), rest)) + } + Err(_) => Err((LexErrorKind::ExpectedLiteral("expected utf8 string"), input)), + } + } _ => unreachable!(), } } @@ -1270,7 +1270,8 @@ fn test_parse_error() { fn test_parse_error_in_op() { use cidr::errors::NetworkParseError; use indoc::indoc; - use std::{net::IpAddr, str::FromStr}; + use std::net::IpAddr; + use std::str::FromStr; let scheme = &Scheme! { num: Int, diff --git a/engine/src/searcher.rs b/engine/src/searcher.rs index 136e268d..23846c71 100644 --- a/engine/src/searcher.rs +++ b/engine/src/searcher.rs @@ -1,8 +1,6 @@ -use memmem::Searcher; -use sliceslice::MemchrSearcher; -use std::mem::ManuallyDrop; - use crate::{Compare, ExecutionContext, LhsValue}; +use memchr::memmem::{Finder, FinderBuilder}; +use sliceslice::MemchrSearcher; pub struct EmptySearcher; @@ -13,44 +11,20 @@ impl Compare for EmptySearcher { } } -pub struct TwoWaySearcher { - // This is an `Box` whose lifetime must exceed `searcher`. - needle: *mut [u8], - - // We need this because `memmem::TwoWaySearcher` wants a lifetime for the data it refers to, but - // we don't want to tie it to the lifetime of `TwoWaySearcher`, since our data is heap-allocated - // and is guaranteed to deref to the same address across moves of the container. Hence, we use - // `static` as a substitute lifetime and it points to the same the data as `needle`. - searcher: ManuallyDrop>, -} - -// This is safe because we are only ever accessing `needle` mutably during `Drop::drop` -// which is statically enforced by the compiler to be called once when the searcher is -// not in used anymore. -unsafe impl Send for TwoWaySearcher {} -// This is safe because we are only ever accessing `needle` mutably during `Drop::drop` -// which is statically enforced by the compiler to be called once when the searcher is -// not in used anymore. -unsafe impl Sync for TwoWaySearcher {} +pub struct MemmemSearcher(Finder<'static>); -impl TwoWaySearcher { +impl MemmemSearcher { + #[inline] pub fn new(needle: Box<[u8]>) -> Self { - let needle = Box::into_raw(needle); - // Convert needle's contents to the static lifetime. - let needle_static = unsafe { &*needle }; - - TwoWaySearcher { - needle, - searcher: ManuallyDrop::new(memmem::TwoWaySearcher::new(needle_static)), - } + Self(FinderBuilder::new().build_forward_owned(needle)) } } -impl Compare for TwoWaySearcher { +impl Compare for MemmemSearcher { #[inline] fn compare<'e>(&self, value: &LhsValue<'e>, _: &'e ExecutionContext<'e, U>) -> bool { - self.searcher - .search_in(match value { + self.0 + .find(match value { LhsValue::Bytes(bytes) => bytes, _ => unreachable!(), }) @@ -58,16 +32,6 @@ impl Compare for TwoWaySearcher { } } -impl Drop for TwoWaySearcher { - fn drop(&mut self) { - unsafe { - // Explicitly drop `searcher` first in case it needs `needle` to be alive. - ManuallyDrop::drop(&mut self.searcher); - drop(Box::from_raw(self.needle)); - } - } -} - impl Compare for MemchrSearcher { #[inline] fn compare<'e>(&self, value: &LhsValue<'e>, _: &'e ExecutionContext<'e, U>) -> bool { diff --git a/engine/src/types.rs b/engine/src/types.rs index 84853485..e24b2e77 100644 --- a/engine/src/types.rs +++ b/engine/src/types.rs @@ -1,21 +1,19 @@ -use crate::{ - lex::{Lex, LexResult, LexWith, expect, skip_space}, - lhs_types::{Array, ArrayIterator, Map, MapIter, MapValuesIntoIter}, - rhs_types::{Bytes, IntRange, IpRange, UninhabitedArray, UninhabitedBool, UninhabitedMap}, - scheme::{FieldIndex, IndexAccessError}, - strict_partial_ord::StrictPartialOrd, +use crate::lex::{Lex, LexResult, LexWith, expect, skip_space}; +use crate::lhs_types::{Array, ArrayIntoIter, ArrayIter, Bytes, Map, MapIter, MapValuesIntoIter}; +use crate::rhs_types::{ + BytesExpr, IntRange, IpRange, UninhabitedArray, UninhabitedBool, UninhabitedMap, }; +use crate::scheme::{FieldIndex, IndexAccessError}; +use crate::strict_partial_ord::StrictPartialOrd; use serde::de::{DeserializeSeed, Deserializer}; use serde::{Deserialize, Serialize, Serializer}; -use std::{ - borrow::Cow, - cmp::Ordering, - collections::BTreeSet, - convert::TryFrom, - fmt::{self, Debug, Formatter}, - iter::once, - net::{IpAddr, Ipv4Addr, Ipv6Addr}, -}; +use std::borrow::Cow; +use std::cmp::Ordering; +use std::collections::BTreeSet; +use std::convert::TryFrom; +use std::fmt::{self, Debug, Formatter}; +use std::iter::once; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use thiserror::Error; fn lex_rhs_values<'i, T: Lex<'i>>(input: &'i str) -> LexResult<'i, Vec> { @@ -139,7 +137,7 @@ pub struct TypeMismatchError { } macro_rules! replace_underscore { - ($name:ident ($val_ty:ty)) => { + ($name:ident($val_ty:ty)) => { Type::$name(_) }; ($name:ident) => { @@ -331,7 +329,10 @@ macro_rules! declare_types { impl From for RhsValues { fn from(rhs: RhsValue) -> Self { match rhs { - $(RhsValue::$name(rhs) => RhsValues::$name(vec![rhs.into()]),)* + $(RhsValue::$name(rhs) => { + #[allow(unreachable_code)] + RhsValues::$name(vec![rhs.into()]) + })* } } } @@ -341,7 +342,10 @@ macro_rules! declare_types { pub fn push(&mut self, rhs: RhsValue) -> Result<(), TypeMismatchError> { match self { $(RhsValues::$name(vec) => match rhs { - RhsValue::$name(rhs) => Ok(vec.push(rhs.into())), + RhsValue::$name(rhs) => { + #[allow(unreachable_code)] + Ok(vec.push(rhs.into())) + } _ => Err(TypeMismatchError { expected: self.get_type().into(), actual: rhs.get_type(), @@ -464,30 +468,9 @@ impl PartialEq for LhsValue<'_> { } } -#[derive(Deserialize)] -#[serde(untagged)] -pub enum BytesOrString<'a> { - BorrowedBytes(#[serde(borrow)] &'a [u8]), - OwnedBytes(Vec), - BorrowedString(#[serde(borrow)] &'a str), - OwnedString(String), -} - -impl<'a> BytesOrString<'a> { - pub fn into_bytes(self) -> Cow<'a, [u8]> { - match self { - BytesOrString::BorrowedBytes(slice) => (*slice).into(), - BytesOrString::OwnedBytes(vec) => vec.into(), - BytesOrString::BorrowedString(str) => str.as_bytes().into(), - BytesOrString::OwnedString(str) => str.into_bytes().into(), - } - } -} - mod private { use super::IntoValue; - use crate::{TypedArray, TypedMap}; - use std::borrow::Cow; + use crate::{Bytes, TypedArray, TypedMap}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; pub trait SealedIntoValue {} @@ -502,19 +485,12 @@ mod private { impl SealedIntoValue for u32 {} impl SealedIntoValue for i64 {} - impl SealedIntoValue for &[u8] {} - impl SealedIntoValue for Box<[u8]> {} - impl SealedIntoValue for Vec {} - impl SealedIntoValue for Cow<'_, [u8]> {} - impl SealedIntoValue for &str {} - impl SealedIntoValue for Box {} - impl SealedIntoValue for String {} - impl SealedIntoValue for Cow<'_, str> {} - impl SealedIntoValue for IpAddr {} impl SealedIntoValue for Ipv4Addr {} impl SealedIntoValue for Ipv6Addr {} + impl<'a, T> SealedIntoValue for T where Bytes<'a>: From {} + impl<'a, V: IntoValue<'a>> SealedIntoValue for TypedArray<'a, V> {} impl<'a, V: IntoValue<'a>> SealedIntoValue for TypedMap<'a, V> {} } @@ -597,7 +573,7 @@ impl<'a> IntoValue<'a> for &'a [u8] { #[inline] fn into_value(self) -> LhsValue<'a> { - LhsValue::Bytes(Cow::Borrowed(self)) + LhsValue::Bytes(Bytes::from(self)) } } @@ -606,7 +582,7 @@ impl<'a> IntoValue<'a> for Box<[u8]> { #[inline] fn into_value(self) -> LhsValue<'a> { - LhsValue::Bytes(Cow::Owned(Vec::from(self))) + LhsValue::Bytes(Bytes::from(self)) } } @@ -615,7 +591,7 @@ impl<'a> IntoValue<'a> for Vec { #[inline] fn into_value(self) -> LhsValue<'a> { - LhsValue::Bytes(Cow::Owned(self)) + LhsValue::Bytes(Bytes::from(self)) } } @@ -624,7 +600,7 @@ impl<'a> IntoValue<'a> for Cow<'a, [u8]> { #[inline] fn into_value(self) -> LhsValue<'a> { - LhsValue::Bytes(self) + LhsValue::Bytes(Bytes::from(self)) } } @@ -633,7 +609,7 @@ impl<'a> IntoValue<'a> for &'a str { #[inline] fn into_value(self) -> LhsValue<'a> { - LhsValue::Bytes(Cow::Borrowed(self.as_bytes())) + LhsValue::Bytes(Bytes::from(self)) } } @@ -642,7 +618,7 @@ impl<'a> IntoValue<'a> for Box { #[inline] fn into_value(self) -> LhsValue<'a> { - LhsValue::Bytes(Cow::Owned(Vec::from(Box::<[u8]>::from(self)))) + LhsValue::Bytes(Bytes::from(self)) } } @@ -651,7 +627,7 @@ impl<'a> IntoValue<'a> for String { #[inline] fn into_value(self) -> LhsValue<'a> { - LhsValue::Bytes(Cow::Owned(self.into_bytes())) + LhsValue::Bytes(Bytes::from(self)) } } @@ -660,10 +636,16 @@ impl<'a> IntoValue<'a> for Cow<'a, str> { #[inline] fn into_value(self) -> LhsValue<'a> { - LhsValue::Bytes(match self { - Cow::Borrowed(slice) => Cow::Borrowed(slice.as_bytes()), - Cow::Owned(vec) => Cow::Owned(vec.into()), - }) + LhsValue::Bytes(Bytes::from(self)) + } +} + +impl<'a> IntoValue<'a> for Bytes<'a> { + const TYPE: Type = Type::Bytes; + + #[inline] + fn into_value(self) -> LhsValue<'a> { + LhsValue::Bytes(self) } } @@ -739,7 +721,7 @@ impl<'a> From<&'a RhsValue> for LhsValue<'a> { fn from(rhs_value: &'a RhsValue) -> Self { match rhs_value { RhsValue::Ip(ip) => LhsValue::Ip(*ip), - RhsValue::Bytes(bytes) => LhsValue::Bytes(Cow::Borrowed(bytes)), + RhsValue::Bytes(bytes) => LhsValue::Bytes(Bytes::Borrowed(bytes)), RhsValue::Int(integer) => LhsValue::Int(*integer), RhsValue::Bool(b) => match *b {}, RhsValue::Array(a) => match *a {}, @@ -752,7 +734,7 @@ impl From for LhsValue<'_> { fn from(rhs_value: RhsValue) -> Self { match rhs_value { RhsValue::Ip(ip) => LhsValue::Ip(ip), - RhsValue::Bytes(bytes) => LhsValue::Bytes(Cow::Owned(bytes.into())), + RhsValue::Bytes(bytes) => LhsValue::Bytes(Bytes::Owned(bytes.into())), RhsValue::Int(integer) => LhsValue::Int(integer), RhsValue::Bool(b) => match b {}, RhsValue::Array(a) => match a {}, @@ -767,7 +749,7 @@ impl<'a> LhsValue<'a> { pub fn as_ref(&'a self) -> Self { match self { LhsValue::Ip(ip) => LhsValue::Ip(*ip), - LhsValue::Bytes(bytes) => LhsValue::Bytes(Cow::Borrowed(bytes)), + LhsValue::Bytes(bytes) => LhsValue::Bytes(Bytes::Borrowed(bytes)), LhsValue::Int(integer) => LhsValue::Int(*integer), LhsValue::Bool(b) => LhsValue::Bool(*b), LhsValue::Array(a) => LhsValue::Array(a.as_ref()), @@ -779,7 +761,7 @@ impl<'a> LhsValue<'a> { pub fn into_owned(self) -> LhsValue<'static> { match self { LhsValue::Ip(ip) => LhsValue::Ip(ip), - LhsValue::Bytes(bytes) => LhsValue::Bytes(Cow::Owned(bytes.into_owned())), + LhsValue::Bytes(bytes) => LhsValue::Bytes(Bytes::Owned(bytes.into_owned())), LhsValue::Int(i) => LhsValue::Int(i), LhsValue::Bool(b) => LhsValue::Bool(b), LhsValue::Array(arr) => LhsValue::Array(arr.into_owned()), @@ -858,7 +840,7 @@ impl<'a> LhsValue<'a> { /// Returns an iterator over the Map or Array pub(crate) fn iter(&'a self) -> Option> { match self { - LhsValue::Array(array) => Some(Iter::IterArray(array.as_slice().iter())), + LhsValue::Array(array) => Some(Iter::IterArray(array.iter())), LhsValue::Map(map) => Some(Iter::IterMap(map.iter())), _ => None, } @@ -873,10 +855,10 @@ impl Serialize for LhsValue<'_> { match self { LhsValue::Ip(ip) => ip.serialize(serializer), LhsValue::Bytes(bytes) => { - if let Ok(s) = std::str::from_utf8(bytes) { - s.serialize(serializer) + if let Ok(s) = simdutf8::basic::from_utf8(bytes) { + serializer.serialize_str(s) } else { - bytes.serialize(serializer) + serializer.serialize_bytes(bytes) } } LhsValue::Int(num) => num.serialize(serializer), @@ -900,9 +882,7 @@ impl<'de> DeserializeSeed<'de> for LhsValueSeed<'_> { Type::Ip => Ok(LhsValue::Ip(std::net::IpAddr::deserialize(deserializer)?)), Type::Int => Ok(LhsValue::Int(i64::deserialize(deserializer)?)), Type::Bool => Ok(LhsValue::Bool(bool::deserialize(deserializer)?)), - Type::Bytes => Ok(LhsValue::Bytes( - BytesOrString::deserialize(deserializer)?.into_bytes(), - )), + Type::Bytes => Ok(LhsValue::Bytes(Bytes::deserialize(deserializer)?)), Type::Array(ty) => Ok(LhsValue::Array({ let mut arr = Array::new(*ty); arr.deserialize(deserializer)?; @@ -918,7 +898,7 @@ impl<'de> DeserializeSeed<'de> for LhsValueSeed<'_> { } pub enum IntoIter<'a> { - IntoArray(ArrayIterator<'a>), + IntoArray(ArrayIntoIter<'a>), IntoMap(MapValuesIntoIter<'a>), } @@ -947,19 +927,20 @@ impl ExactSizeIterator for IntoIter<'_> { } impl<'a> IntoIterator for LhsValue<'a> { - type Item = LhsValue<'a>; type IntoIter = IntoIter<'a>; + type Item = LhsValue<'a>; + fn into_iter(self) -> Self::IntoIter { match self { LhsValue::Array(array) => IntoIter::IntoArray(array.into_iter()), - LhsValue::Map(map) => IntoIter::IntoMap(map.values_into_iter()), + LhsValue::Map(map) => IntoIter::IntoMap(map.into_values()), _ => unreachable!(), } } } pub(crate) enum Iter<'a> { - IterArray(std::slice::Iter<'a, LhsValue<'a>>), + IterArray(ArrayIter<'a, 'a>), IterMap(MapIter<'a, 'a>), } @@ -1142,7 +1123,7 @@ declare_types!( /// /// These are completely interchangeable in runtime and differ only in /// syntax representation, so we represent them as a single type. - Bytes(#[serde(borrow)] Cow<'a, [u8]> | Bytes | Bytes), + Bytes(#[serde(borrow)] Bytes<'a> | BytesExpr | BytesExpr), /// An Array of [`Type`]. Array[CompoundType](#[serde(skip_deserializing)] Array<'a> | UninhabitedArray | UninhabitedArray), diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index 53d50e75..fd588f1c 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -1,19 +1,25 @@ [package] -authors.workspace = true name = "wirefilter-ffi" version.workspace = true +authors.workspace = true +edition.workspace = true description = "FFI bindings for the Wirefilter engine" publish = false -edition.workspace = true [package.metadata.deb] -assets = [ [ "target/release/libwirefilter_ffi.so", "usr/local/lib/libwirefilter.so", "644" ] ] +assets = [ + [ + "target/release/libwirefilter_ffi.so", + "usr/local/lib/libwirefilter.so", + "644", + ], +] [lib] -crate-type = [ "cdylib", "rlib" ] +bench = false +crate-type = ["cdylib", "rlib"] # Avoid duplicate compilation error messages as we don't have doctests anyway doctest = false -bench = false [dependencies] fnv.workspace = true @@ -28,7 +34,7 @@ indoc.workspace = true regex-automata.workspace = true [build-dependencies] -cbindgen = "0.28" +cbindgen.workspace = true [target.'cfg(unix)'.dev-dependencies] wirefilter-ffi-ctests = { path = "tests/ctests" } diff --git a/ffi/include/wirefilter.h b/ffi/include/wirefilter.h index 22e12629..1d5730ca 100644 --- a/ffi/include/wirefilter.h +++ b/ffi/include/wirefilter.h @@ -1,7 +1,7 @@ #ifndef _WIREFILTER_H_ #define _WIREFILTER_H_ -/* Generated with cbindgen:0.28.0 */ +/* Generated with cbindgen:0.29.2 */ /* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */ diff --git a/ffi/src/cstring.rs b/ffi/src/cstring.rs index 8fa16d0c..ddb18ab3 100644 --- a/ffi/src/cstring.rs +++ b/ffi/src/cstring.rs @@ -1,8 +1,6 @@ -use std::{ - fmt::{self, Debug}, - io, - os::raw::c_char, -}; +use std::fmt::{self, Debug}; +use std::io; +use std::os::raw::c_char; /// Used for replacing null bytes in C strings that cannot contain null bytes. const SUBSTITUTE_BYTE: u8 = 0x1a; diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 0c71c4a3..feee9707 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -10,19 +10,16 @@ use libc::c_char; use num_enum::{IntoPrimitive, TryFromPrimitive}; use serde::de::DeserializeSeed; use std::cell::RefCell; +use std::convert::TryFrom; +use std::hash::Hasher; +use std::io::{self, Write}; +use std::net::IpAddr; use std::ops::{Deref, DerefMut}; -use std::{ - convert::TryFrom, - hash::Hasher, - io::{self, Write}, - net::IpAddr, -}; use wirefilter::{ AllFunction, AlwaysList, AnyFunction, CIDRFunction, ConcatFunction, DecodeBase64Function, - EndsWithFunction, GetType, JsonLookupIntegerFunction, JsonLookupStringFunction, LenFunction, - LowerFunction, NeverList, RemoveBytesFunction, RemoveQueryArgsFunction, StartsWithFunction, - SubstringFunction, ToStringFunction, Type, UUID4Function, UpperFunction, UrlDecodeFunction, - WildcardReplaceFunction, catch_panic, + GetType, LenFunction, LowerFunction, NeverList, RemoveBytesFunction, StartsWithFunction, + SubstringFunction, Type, UUID4Function, UrlDecodeFunction, WildcardReplaceFunction, + catch_panic, }; const VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -178,7 +175,7 @@ macro_rules! wrap_type { }; } -/* Wrapper types needed by cbindgen to forward declare opaque structs */ +// Wrapper types needed by cbindgen to forward declare opaque structs #[derive(Debug, Default)] #[repr(Rust)] @@ -308,36 +305,102 @@ pub extern "C" fn wirefilter_add_function_to_scheme( ) -> bool { let name = to_str!(name_ptr, name_len); - let result = match name { - "concat" => builder.add_function(name, ConcatFunction::default()), - "any" => builder.add_function(name, AnyFunction::default()), - "all" => builder.add_function(name, AllFunction::default()), - "lower" => builder.add_function(name, LowerFunction::default()), - "starts_with" => builder.add_function(name, StartsWithFunction::default()), - "cidr" => builder.add_function(name, CIDRFunction::default()), - "len" => builder.add_function(name, LenFunction::default()), - "wildcard_replace" => builder.add_function(name, WildcardReplaceFunction::default()), - "url_decode" => builder.add_function(name, UrlDecodeFunction::default()), - "decode_base64" => builder.add_function(name, DecodeBase64Function::default()), - "ends_with" => builder.add_function(name, EndsWithFunction::default()), - "json_lookup_integer" => builder.add_function(name, JsonLookupIntegerFunction::default()), - "json_lookup_string" => builder.add_function(name, JsonLookupStringFunction::default()), - "remove_bytes" => builder.add_function(name, RemoveBytesFunction::default()), - "remove_query_args" => builder.add_function(name, RemoveQueryArgsFunction::default()), - "substring" => builder.add_function(name, SubstringFunction::default()), - "to_string" => builder.add_function(name, ToStringFunction::default()), - "upper" => builder.add_function(name, UpperFunction::default()), - "uuid4" => builder.add_function(name, UUID4Function::default()), + match name { + "concat" => match builder.add_function(name, ConcatFunction::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + }, + "any" => match builder.add_function(name, AnyFunction::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + }, + "all" => match builder.add_function(name, AllFunction::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + }, + "lower" => match builder.add_function(name, LowerFunction::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + }, + "starts_with" => match builder.add_function(name, StartsWithFunction::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + }, + "cidr" => match builder.add_function(name, CIDRFunction::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + }, + "len" => match builder.add_function(name, LenFunction::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + }, + "wildcard_replace" => { + match builder.add_function(name, WildcardReplaceFunction::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + } + } + "url_decode" => match builder.add_function(name, UrlDecodeFunction::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + }, + "decode_base64" => match builder.add_function(name, DecodeBase64Function::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + }, + "remove_bytes" => match builder.add_function(name, RemoveBytesFunction::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + }, + "substring" => match builder.add_function(name, SubstringFunction::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + }, + "uuid4" => match builder.add_function(name, UUID4Function::default()) { + Ok(_) => true, + Err(err) => { + write_last_error!("{}", err); + false + } + }, _ => { write_last_error!("Unknown function name provided: {}", name); - return false; - } - }; - - match result { - Ok(_) => true, - Err(err) => { - write_last_error!("{}", err); false } } @@ -724,9 +787,8 @@ pub struct MatchingResult { } impl MatchingResult { - #[cfg(test)] - const MISSED: Self = Self { - status: Status::Success, + const ERROR: Self = Self { + status: Status::Error, matched: false, }; #[cfg(test)] @@ -734,8 +796,9 @@ impl MatchingResult { status: Status::Success, matched: true, }; - const ERROR: Self = Self { - status: Status::Error, + #[cfg(test)] + const MISSED: Self = Self { + status: Status::Success, matched: false, }; const PANIC: Self = Self { @@ -780,6 +843,14 @@ pub struct UsingResult { } impl UsingResult { + const ERROR: Self = Self { + status: Status::Error, + used: false, + }; + const PANIC: Self = Self { + status: Status::Error, + used: false, + }; #[cfg(test)] const UNUSED: Self = Self { status: Status::Success, @@ -790,14 +861,6 @@ impl UsingResult { status: Status::Success, used: true, }; - const ERROR: Self = Self { - status: Status::Error, - used: false, - }; - const PANIC: Self = Self { - status: Status::Error, - used: false, - }; } #[unsafe(no_mangle)] diff --git a/ffi/tests/ctests/Cargo.toml b/ffi/tests/ctests/Cargo.toml index d4fbdb41..40704349 100644 --- a/ffi/tests/ctests/Cargo.toml +++ b/ffi/tests/ctests/Cargo.toml @@ -7,7 +7,7 @@ publish = false edition = "2024" [dependencies] -wirefilter-ffi = {path = "../.."} +wirefilter-ffi = { path = "../.." } [build-dependencies] cc = "1.0" diff --git a/ffi/tests/ctests/src/tests.c b/ffi/tests/ctests/src/tests.c index 363c2873..1995c1de 100644 --- a/ffi/tests/ctests/src/tests.c +++ b/ffi/tests/ctests/src/tests.c @@ -515,7 +515,7 @@ void wirefilter_ffi_ctest_execution_context_serialize() struct wirefilter_rust_allocated_str json = serializing_result.json; rust_assert(json.ptr != NULL && json.len > 0, "could not serialize execution context to JSON"); - const char *expected = "{\"http.host\":\"www.cloudflare.com\",\"ip.src\":\"192.168.0.1\",\"ip.dst\":\"2606:4700:4700::1111\",\"ssl\":false,\"tcp.port\":80,\"$lists\":[]}"; + const char *expected = "{\"http.host\":\"www.cloudflare.com\",\"ip.src\":\"192.168.0.1\",\"ip.dst\":\"2606:4700:4700::1111\",\"ssl\":false,\"tcp.port\":80,\"$lists\":[{\"type\":\"Ip\",\"data\":{}}]}"; rust_assert(json.len == strlen(expected), "invalid JSON serialization"); @@ -551,8 +551,9 @@ void wirefilter_ffi_ctest_execution_context_deserialize() rust_assert(json.ptr != NULL && json.len > 0, "could not serialize execution context to JSON"); rust_assert( - strncmp(json.ptr, "{\"http.host\":\"www.cloudflare.com\",\"$lists\":[]}", json.len) == 0, - "invalid JSON serialization"); + strncmp(json.ptr, "{\"http.host\":\"www.cloudflare.com\",\"$lists\":[{\"type\":\"Ip\",\"data\":{}}]}", json.len) == 0, + "invalid JSON serialization" + ); struct wirefilter_execution_context *conv_exec_ctx = wirefilter_create_execution_context(scheme); rust_assert(conv_exec_ctx != NULL, "could not create execution context"); @@ -568,8 +569,9 @@ void wirefilter_ffi_ctest_execution_context_deserialize() rust_assert(conv_json.ptr != NULL && conv_json.len > 0, "could not serialize execution context to JSON"); rust_assert( - strncmp(conv_json.ptr, "{\"http.host\":\"www.cloudflare.com\",\"$lists\":[]}", conv_json.len) == 0, - "invalid JSON serialization"); + strncmp(conv_json.ptr, "{\"http.host\":\"www.cloudflare.com\",\"$lists\":[{\"type\":\"Ip\",\"data\":{}}]}", conv_json.len) == 0, + "invalid JSON serialization" + ); wirefilter_free_string(conv_json); diff --git a/fuzz/bytes/Cargo.toml b/fuzz/bytes/Cargo.toml index a9bb9290..520aa6e4 100644 --- a/fuzz/bytes/Cargo.toml +++ b/fuzz/bytes/Cargo.toml @@ -8,4 +8,4 @@ afl = "0.15" wirefilter.workspace = true [lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(fuzzing)'] } +unexpected_cfgs = { level = "warn", check-cfg = ["cfg(fuzzing)"] } diff --git a/fuzz/map-keys/Cargo.toml b/fuzz/map-keys/Cargo.toml index 7a31cfe8..cd1d29cf 100644 --- a/fuzz/map-keys/Cargo.toml +++ b/fuzz/map-keys/Cargo.toml @@ -8,4 +8,4 @@ afl = "0.15" wirefilter.workspace = true [lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(fuzzing)'] } +unexpected_cfgs = { level = "warn", check-cfg = ["cfg(fuzzing)"] } diff --git a/fuzz/map-keys/src/main.rs b/fuzz/map-keys/src/main.rs index a30d399c..80b2c36b 100644 --- a/fuzz/map-keys/src/main.rs +++ b/fuzz/map-keys/src/main.rs @@ -1,5 +1,4 @@ use std::sync::LazyLock; - use wirefilter::{ FunctionArgs, LhsValue, SimpleFunctionArgKind, SimpleFunctionDefinition, SimpleFunctionImpl, SimpleFunctionParam, Type, diff --git a/fuzz/raw-string/Cargo.toml b/fuzz/raw-string/Cargo.toml index f649a81e..b1f74575 100644 --- a/fuzz/raw-string/Cargo.toml +++ b/fuzz/raw-string/Cargo.toml @@ -8,4 +8,4 @@ afl = "0.15" wirefilter.workspace = true [lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(fuzzing)'] } +unexpected_cfgs = { level = "warn", check-cfg = ["cfg(fuzzing)"] } diff --git a/rustfmt.toml b/rustfmt.toml index c9460f60..9ded391f 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,5 +1,9 @@ -format_doc_comments = true -merge_imports = true +format_code_in_doc_comments = true +format_macro_matchers = true +group_imports = "One" +imports_granularity = "Module" normalize_comments = true normalize_doc_attributes = true -wrap_comments = true +reorder_impl_items = true +use_field_init_shorthand = true +use_try_shorthand = true diff --git a/wasm/Cargo.toml b/wasm/Cargo.toml index 050a64ad..1f50f0fc 100644 --- a/wasm/Cargo.toml +++ b/wasm/Cargo.toml @@ -12,8 +12,8 @@ crate-type = ["cdylib"] doctest = false [dependencies] -getrandom = { version = "0.3", features = ["wasm_js"] } -js-sys = "0.3.77" -serde-wasm-bindgen = "0.5.0" -wasm-bindgen = { version = "0.2", features = ["serde-serialize"] } +getrandom = { workspace = true, features = ["wasm_js"] } +js-sys.workspace = true +serde-wasm-bindgen.workspace = true +wasm-bindgen.workspace = true wirefilter.workspace = true