diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 1993062..5584529 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -55,9 +55,6 @@ jobs: python-version: 3.x - name: Build wheels uses: PyO3/maturin-action@db323e2cf5679b7feb8bcb561a36b27a0bc19e79 # v1 - env: - # Ensure ring's ARM assembly sees an explicit architecture on aarch64 (glibc) - CFLAGS_aarch64_unknown_linux_gnu: -D__ARM_ARCH=8 with: target: ${{ matrix.platform.target }} args: --release --out dist --find-interpreter @@ -91,9 +88,6 @@ jobs: python-version: 3.x - name: Build wheels uses: PyO3/maturin-action@db323e2cf5679b7feb8bcb561a36b27a0bc19e79 # v1 - env: - # Ensure ring's ARM assembly sees an explicit architecture on aarch64 (musl) - CFLAGS_aarch64_unknown_linux_musl: -D__ARM_ARCH=8 with: target: ${{ matrix.platform.target }} args: --release --out dist --find-interpreter diff --git a/Cargo.lock b/Cargo.lock index d245b49..746f6d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,12 +26,6 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" -[[package]] -name = "atomic-waker" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" - [[package]] name = "autocfg" version = "1.5.0" @@ -59,12 +53,6 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" -[[package]] -name = "bitflags" -version = "2.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" - [[package]] name = "block-buffer" version = "0.10.4" @@ -100,12 +88,6 @@ version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" -[[package]] -name = "bytes" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" - [[package]] name = "cc" version = "1.2.62" @@ -113,6 +95,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -122,12 +106,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" -[[package]] -name = "cfg_aliases" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" - [[package]] name = "chrono" version = "0.4.44" @@ -225,17 +203,6 @@ dependencies = [ "crypto-common", ] -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "dyn-clone" version = "1.0.20" @@ -265,54 +232,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" -[[package]] -name = "form_urlencoded" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "futures-channel" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" -dependencies = [ - "futures-core", - "futures-sink", -] - [[package]] name = "futures-core" version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" -[[package]] -name = "futures-io" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" - -[[package]] -name = "futures-macro" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-sink" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" - [[package]] name = "futures-task" version = "0.3.32" @@ -326,11 +251,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-core", - "futures-io", - "futures-macro", - "futures-sink", "futures-task", - "memchr", "pin-project-lite", "slab", ] @@ -345,19 +266,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "getrandom" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" -dependencies = [ - "cfg-if", - "js-sys", - "libc", - "wasi", - "wasm-bindgen", -] - [[package]] name = "getrandom" version = "0.3.4" @@ -365,11 +273,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", - "js-sys", "libc", "r-efi", "wasip2", - "wasm-bindgen", ] [[package]] @@ -396,104 +302,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" -[[package]] -name = "http" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" -dependencies = [ - "bytes", - "itoa", -] - -[[package]] -name = "http-body" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" -dependencies = [ - "bytes", - "http", -] - -[[package]] -name = "http-body-util" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" -dependencies = [ - "bytes", - "futures-core", - "http", - "http-body", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" - -[[package]] -name = "hyper" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" -dependencies = [ - "atomic-waker", - "bytes", - "futures-channel", - "futures-core", - "http", - "http-body", - "httparse", - "itoa", - "pin-project-lite", - "smallvec", - "tokio", - "want", -] - -[[package]] -name = "hyper-rustls" -version = "0.27.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" -dependencies = [ - "http", - "hyper", - "hyper-util", - "rustls", - "tokio", - "tokio-rustls", - "tower-service", - "webpki-roots", -] - -[[package]] -name = "hyper-util" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" -dependencies = [ - "base64", - "bytes", - "futures-channel", - "futures-util", - "http", - "http-body", - "hyper", - "ipnet", - "libc", - "percent-encoding", - "pin-project-lite", - "socket2", - "tokio", - "tower-service", - "tracing", -] - [[package]] name = "iana-time-zone" version = "0.1.65" @@ -518,115 +326,12 @@ dependencies = [ "cc", ] -[[package]] -name = "icu_collections" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" -dependencies = [ - "displaydoc", - "potential_utf", - "utf8_iter", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locale_core" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_normalizer" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" -dependencies = [ - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" - -[[package]] -name = "icu_properties" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" -dependencies = [ - "icu_collections", - "icu_locale_core", - "icu_properties_data", - "icu_provider", - "zerotrie", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" - -[[package]] -name = "icu_provider" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" -dependencies = [ - "displaydoc", - "icu_locale_core", - "writeable", - "yoke", - "zerofrom", - "zerotrie", - "zerovec", -] - [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" -[[package]] -name = "idna" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" -dependencies = [ - "icu_normalizer", - "icu_properties", -] - [[package]] name = "indexmap" version = "1.9.3" @@ -659,18 +364,22 @@ dependencies = [ "rustversion", ] -[[package]] -name = "ipnet" -version = "2.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" - [[package]] name = "itoa" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom", + "libc", +] + [[package]] name = "js-sys" version = "0.3.98" @@ -689,24 +398,12 @@ version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" -[[package]] -name = "litemap" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" - [[package]] name = "log" version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" -[[package]] -name = "lru-slab" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" - [[package]] name = "memchr" version = "2.8.0" @@ -722,33 +419,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - -[[package]] -name = "mime_guess" -version = "2.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" -dependencies = [ - "mime", - "unicase", -] - -[[package]] -name = "mio" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" -dependencies = [ - "libc", - "wasi", - "windows-sys 0.61.2", -] - [[package]] name = "num-conv" version = "0.2.1" @@ -780,22 +450,15 @@ dependencies = [ "fancy-regex", "pretty_assertions", "pyo3", - "reqwest", - "rustc-hash 1.1.0", + "rustc-hash", "serde", "serde_json", "serde_with", - "sha1", "sha2", "thiserror", + "zstd", ] -[[package]] -name = "percent-encoding" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" - [[package]] name = "pin-project-lite" version = "0.2.17" @@ -803,19 +466,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] -name = "portable-atomic" -version = "1.13.1" +name = "pkg-config" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" [[package]] -name = "potential_utf" -version = "0.1.5" +name = "portable-atomic" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" -dependencies = [ - "zerovec", -] +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "powerfmt" @@ -823,15 +483,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - [[package]] name = "pretty_assertions" version = "1.4.1" @@ -913,61 +564,6 @@ dependencies = [ "syn", ] -[[package]] -name = "quinn" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" -dependencies = [ - "bytes", - "cfg_aliases", - "pin-project-lite", - "quinn-proto", - "quinn-udp", - "rustc-hash 2.1.2", - "rustls", - "socket2", - "thiserror", - "tokio", - "tracing", - "web-time", -] - -[[package]] -name = "quinn-proto" -version = "0.11.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" -dependencies = [ - "bytes", - "getrandom 0.3.4", - "lru-slab", - "rand", - "ring", - "rustc-hash 2.1.2", - "rustls", - "rustls-pki-types", - "slab", - "thiserror", - "tinyvec", - "tracing", - "web-time", -] - -[[package]] -name = "quinn-udp" -version = "0.5.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" -dependencies = [ - "cfg_aliases", - "libc", - "once_cell", - "socket2", - "tracing", - "windows-sys 0.60.2", -] - [[package]] name = "quote" version = "1.0.45" @@ -983,35 +579,6 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" -[[package]] -name = "rand" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" -dependencies = [ - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" -dependencies = [ - "getrandom 0.3.4", -] - [[package]] name = "ref-cast" version = "1.0.25" @@ -1050,121 +617,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] -name = "reqwest" -version = "0.12.28" +name = "rustc-hash" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" -dependencies = [ - "base64", - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "http", - "http-body", - "http-body-util", - "hyper", - "hyper-rustls", - "hyper-util", - "js-sys", - "log", - "mime_guess", - "percent-encoding", - "pin-project-lite", - "quinn", - "rustls", - "rustls-pki-types", - "serde", - "serde_json", - "serde_urlencoded", - "sync_wrapper", - "tokio", - "tokio-rustls", - "tokio-util", - "tower", - "tower-http", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "wasm-streams", - "web-sys", - "webpki-roots", -] +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] -name = "ring" -version = "0.17.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" -dependencies = [ - "cc", - "cfg-if", - "getrandom 0.2.17", - "libc", - "untrusted", - "windows-sys 0.52.0", -] - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc-hash" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" - -[[package]] -name = "rustls" -version = "0.23.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" -dependencies = [ - "once_cell", - "ring", - "rustls-pki-types", - "rustls-webpki", - "subtle", - "zeroize", -] - -[[package]] -name = "rustls-pki-types" -version = "1.14.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" -dependencies = [ - "web-time", - "zeroize", -] - -[[package]] -name = "rustls-webpki" -version = "0.103.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" -dependencies = [ - "ring", - "rustls-pki-types", - "untrusted", -] - -[[package]] -name = "rustversion" -version = "1.0.22" +name = "rustversion" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" -[[package]] -name = "ryu" -version = "1.0.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" - [[package]] name = "schemars" version = "0.9.0" @@ -1233,18 +696,6 @@ dependencies = [ "zmij", ] -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", -] - [[package]] name = "serde_with" version = "3.20.0" @@ -1277,17 +728,6 @@ dependencies = [ "syn", ] -[[package]] -name = "sha1" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - [[package]] name = "sha2" version = "0.10.9" @@ -1311,40 +751,12 @@ version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" -[[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - -[[package]] -name = "socket2" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" -dependencies = [ - "libc", - "windows-sys 0.61.2", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" - [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "subtle" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" - [[package]] name = "syn" version = "2.0.117" @@ -1356,26 +768,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "sync_wrapper" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" -dependencies = [ - "futures-core", -] - -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "target-lexicon" version = "0.13.5" @@ -1433,16 +825,6 @@ dependencies = [ "time-core", ] -[[package]] -name = "tinystr" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" -dependencies = [ - "displaydoc", - "zerovec", -] - [[package]] name = "tinyvec" version = "1.11.0" @@ -1458,125 +840,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" -[[package]] -name = "tokio" -version = "1.52.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" -dependencies = [ - "bytes", - "libc", - "mio", - "pin-project-lite", - "socket2", - "windows-sys 0.61.2", -] - -[[package]] -name = "tokio-rustls" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" -dependencies = [ - "rustls", - "tokio", -] - -[[package]] -name = "tokio-util" -version = "0.7.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tower" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" -dependencies = [ - "futures-core", - "futures-util", - "pin-project-lite", - "sync_wrapper", - "tokio", - "tower-layer", - "tower-service", -] - -[[package]] -name = "tower-http" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68d6fdd9f81c2819c9a8b0e0cd91660e7746a8e6ea2ba7c6b2b057985f6bcb51" -dependencies = [ - "bitflags", - "bytes", - "futures-util", - "http", - "http-body", - "pin-project-lite", - "tower", - "tower-layer", - "tower-service", - "url", -] - -[[package]] -name = "tower-layer" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" - -[[package]] -name = "tower-service" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" - -[[package]] -name = "tracing" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" -dependencies = [ - "pin-project-lite", - "tracing-core", -] - -[[package]] -name = "tracing-core" -version = "0.1.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" -dependencies = [ - "once_cell", -] - -[[package]] -name = "try-lock" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" - [[package]] name = "typenum" version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" -[[package]] -name = "unicase" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" - [[package]] name = "unicode-ident" version = "1.0.24" @@ -1589,51 +858,12 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" -[[package]] -name = "untrusted" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" - -[[package]] -name = "url" -version = "2.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", - "serde", -] - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - [[package]] name = "wasip2" version = "1.0.3+wasi-0.2.9" @@ -1656,16 +886,6 @@ dependencies = [ "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.71" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "wasm-bindgen-macro" version = "0.2.121" @@ -1698,48 +918,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "wasm-streams" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" -dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - -[[package]] -name = "web-sys" -version = "0.3.98" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "web-time" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "webpki-roots" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" -dependencies = [ - "rustls-pki-types", -] - [[package]] name = "windows-core" version = "0.62.2" @@ -1799,174 +977,12 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.5", -] - -[[package]] -name = "windows-sys" -version = "0.61.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", -] - -[[package]] -name = "windows-targets" -version = "0.53.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" -dependencies = [ - "windows-link", - "windows_aarch64_gnullvm 0.53.1", - "windows_aarch64_msvc 0.53.1", - "windows_i686_gnu 0.53.1", - "windows_i686_gnullvm 0.53.1", - "windows_i686_msvc 0.53.1", - "windows_x86_64_gnu 0.53.1", - "windows_x86_64_gnullvm 0.53.1", - "windows_x86_64_msvc 0.53.1", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_i686_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" - [[package]] name = "wit-bindgen" version = "0.57.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" -[[package]] -name = "writeable" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" - [[package]] name = "yansi" version = "1.0.1" @@ -1974,110 +990,35 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] -name = "yoke" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" -dependencies = [ - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - -[[package]] -name = "zerocopy" -version = "0.8.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zerofrom" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - -[[package]] -name = "zeroize" -version = "1.8.2" +name = "zmij" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" [[package]] -name = "zerotrie" -version = "0.2.4" +name = "zstd" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" dependencies = [ - "displaydoc", - "yoke", - "zerofrom", + "zstd-safe", ] [[package]] -name = "zerovec" -version = "0.11.6" +name = "zstd-safe" +version = "7.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", + "zstd-sys", ] [[package]] -name = "zerovec-derive" -version = "0.11.3" +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ - "proc-macro2", - "quote", - "syn", + "cc", + "pkg-config", ] - -[[package]] -name = "zmij" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index 4433c9b..a871d95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,19 +24,8 @@ thiserror = "2.0.12" fancy-regex = "0.13.0" rustc-hash = "1.1.0" bstr = "1.5.0" -sha1 = "0.10.6" sha2 = "0.10.9" -# Use the pure-Rust `rustls` TLS backend so we don't depend on an OpenSSL -# installation on the CI runners. We disable the default features (which -# include `platform-native-tls`) and explicitly enable only the capabilities -# we need. -reqwest = { version = "0.12.5", default-features = false, features = [ - "blocking", - "json", - "multipart", - "stream", - "rustls-tls", -] } +zstd = "0.13" pyo3 = { version = "0.25.0", optional = true, features = [ "extension-module", "abi3-py38", diff --git a/src/tiktoken_ext/data/cl100k_base.tiktoken.zst b/src/tiktoken_ext/data/cl100k_base.tiktoken.zst new file mode 100644 index 0000000..de86b62 Binary files /dev/null and b/src/tiktoken_ext/data/cl100k_base.tiktoken.zst differ diff --git a/src/tiktoken_ext/data/o200k_base.tiktoken.zst b/src/tiktoken_ext/data/o200k_base.tiktoken.zst new file mode 100644 index 0000000..cbf5df5 Binary files /dev/null and b/src/tiktoken_ext/data/o200k_base.tiktoken.zst differ diff --git a/src/tiktoken_ext/mod.rs b/src/tiktoken_ext/mod.rs index 5ad31ae..627185b 100644 --- a/src/tiktoken_ext/mod.rs +++ b/src/tiktoken_ext/mod.rs @@ -1,2 +1,2 @@ mod public_encodings; -pub use public_encodings::{set_tiktoken_base_url, Encoding}; +pub use public_encodings::Encoding; diff --git a/src/tiktoken_ext/public_encodings.rs b/src/tiktoken_ext/public_encodings.rs index 9a3cd3c..8c87978 100644 --- a/src/tiktoken_ext/public_encodings.rs +++ b/src/tiktoken_ext/public_encodings.rs @@ -1,15 +1,8 @@ -use std::{ - collections::HashMap, - fs::File, - io::{BufReader, BufWriter, Read as _, Write as _}, - path::{Path, PathBuf}, - sync::OnceLock, -}; +use std::{collections::HashMap, io::Cursor, path::PathBuf}; use base64::{prelude::BASE64_STANDARD, Engine as _}; use crate::tiktoken::{CoreBPE, Rank}; -use sha1::Sha1; use sha2::{Digest as _, Sha256}; #[derive(Debug, thiserror::Error)] @@ -25,53 +18,15 @@ pub enum LoadError { #[error("failed to create CoreBPE: {0}")] CoreBPECreationFailed(#[source] Box), - - #[error("error downloading or loading vocab file: {0}")] - DownloadOrLoadVocabFile( - #[source] - #[from] - RemoteVocabFileError, - ), - - #[error("failed to extend encoding")] - FailedToExtendEncoding(#[source] Box), -} - -#[derive(Debug, thiserror::Error)] -pub enum RemoteVocabFileError { - #[error("failed to download or load vocab file")] - FailedToDownloadOrLoadVocabFile(#[source] Box), - - #[error("an underlying IO error occurred while {0}: {1}")] - IOError(String, #[source] std::io::Error), - - #[error("hash mismatch for remote file {file_url}")] - HashMismatch { - file_url: String, - expected_hash: String, - computed_hash: String, - }, } const TIKTOKEN_ENCODINGS_BASE_VAR: &str = "TIKTOKEN_ENCODINGS_BASE"; -const DEFAULT_TIKTOKEN_BASE_URL: &str = "https://openaipublic.blob.core.windows.net/encodings/"; - -static TIKTOKEN_BASE_URL_OVERRIDE: OnceLock = OnceLock::new(); -pub fn set_tiktoken_base_url(base_url: impl Into) { - let mut base = base_url.into(); - if !base.ends_with('/') { - base.push('/'); - } - // ignore error if already set - let _ = TIKTOKEN_BASE_URL_OVERRIDE.set(base); -} +const O200K_BASE_DATA: &[u8] = include_bytes!("data/o200k_base.tiktoken.zst"); +const CL100K_BASE_DATA: &[u8] = include_bytes!("data/cl100k_base.tiktoken.zst"); -fn tiktoken_base_url() -> &'static str { - TIKTOKEN_BASE_URL_OVERRIDE - .get() - .map(|s| s.as_str()) - .unwrap_or(DEFAULT_TIKTOKEN_BASE_URL) +fn decompress_vocab(compressed: &[u8]) -> Vec { + zstd::decode_all(Cursor::new(compressed)).expect("embedded vocab data is corrupt") } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -112,80 +67,62 @@ impl Encoding { } pub fn load(&self) -> Result { - let (vocab_file_path, check_hash) = - if let Ok(base_dir) = std::env::var(TIKTOKEN_ENCODINGS_BASE_VAR) { - (PathBuf::from(base_dir).join(self.vocab_file_name()), true) - } else { - let url = self.public_vocab_file_url(); - ( - download_or_find_cached_file(&url, Some(self.expected_hash())) - .map_err(LoadError::DownloadOrLoadVocabFile)?, - false, - ) - }; + let encoder = if let Ok(base_dir) = std::env::var(TIKTOKEN_ENCODINGS_BASE_VAR) { + let path = PathBuf::from(base_dir).join(self.vocab_file_name()); + load_tiktoken_vocab_file(&path, Some(self.expected_hash())) + .map_err(LoadError::InvalidTiktokenVocabFile)? + } else { + let raw = decompress_vocab(self.embedded_data()); + load_tiktoken_vocab_bytes(&raw, None).map_err(LoadError::InvalidTiktokenVocabFile)? + }; + + let specials = self.build_special_tokens(); + + CoreBPE::new(encoder, specials, &self.pattern()).map_err(LoadError::CoreBPECreationFailed) + } + fn vocab_file_name(&self) -> &'static str { match self { - Self::O200kHarmony => { - let mut specials: Vec<(String, Rank)> = self - .special_tokens() - .iter() - .map(|(s, r)| ((*s).to_string(), *r)) - .collect(); - specials.extend((200014..=201088).map(|id| (format!("<|reserved_{id}|>"), id))); - load_encoding_from_file( - vocab_file_path, - check_hash.then(|| self.expected_hash()), - specials, - &self.pattern(), - ) - } - Self::O200kBase => { - let mut specials: Vec<(String, Rank)> = self - .special_tokens() - .iter() - .map(|(s, r)| ((*s).to_string(), *r)) - .collect(); - specials.extend((199998..=201088).map(|id| (format!("<|reserved_{id}|>"), id))); - load_encoding_from_file( - vocab_file_path, - check_hash.then(|| self.expected_hash()), - specials, - &self.pattern(), - ) - } - _ => load_encoding_from_file( - vocab_file_path, - check_hash.then(|| self.expected_hash()), - self.special_tokens().iter().cloned(), - &self.pattern(), - ), + Self::O200kBase | Self::O200kHarmony => "o200k_base.tiktoken", + Self::Cl100kBase => "cl100k_base.tiktoken", } } - fn public_vocab_file_url(&self) -> String { - let base = tiktoken_base_url(); + fn embedded_data(&self) -> &'static [u8] { match self { - Self::O200kBase => format!("{base}o200k_base.tiktoken"), - Self::O200kHarmony => format!("{base}o200k_base.tiktoken"), - Self::Cl100kBase => format!("{base}cl100k_base.tiktoken"), + Self::O200kBase | Self::O200kHarmony => O200K_BASE_DATA, + Self::Cl100kBase => CL100K_BASE_DATA, } } - fn vocab_file_name(&self) -> &'static str { + fn expected_hash(&self) -> &'static str { match self { - Self::O200kBase => "o200k_base.tiktoken", - Self::O200kHarmony => "o200k_base.tiktoken", - Self::Cl100kBase => "cl100k_base.tiktoken", + Self::O200kBase | Self::O200kHarmony => { + "446a9538cb6c348e3516120d7c08b09f57c36495e2acfffe59a5bf8b0cfb1a2d" + } + Self::Cl100kBase => "223921b76ee99bde995b7ff738513eef100fb51d18c93597a113bcffe865b2a7", } } - fn expected_hash(&self) -> &'static str { + fn build_special_tokens(&self) -> Vec<(String, Rank)> { + let base: Vec<(String, Rank)> = self + .special_tokens() + .iter() + .map(|(s, r)| ((*s).to_string(), *r)) + .collect(); + match self { - Self::O200kBase => "446a9538cb6c348e3516120d7c08b09f57c36495e2acfffe59a5bf8b0cfb1a2d", Self::O200kHarmony => { - "446a9538cb6c348e3516120d7c08b09f57c36495e2acfffe59a5bf8b0cfb1a2d" + let mut specials = base; + specials.extend((200014..=201088).map(|id| (format!("<|reserved_{id}|>"), id))); + specials } - Self::Cl100kBase => "223921b76ee99bde995b7ff738513eef100fb51d18c93597a113bcffe865b2a7", + Self::O200kBase => { + let mut specials = base; + specials.extend((199998..=201088).map(|id| (format!("<|reserved_{id}|>"), id))); + specials + } + Self::Cl100kBase => base, } } @@ -222,18 +159,7 @@ impl Encoding { fn pattern(&self) -> String { match self { - Self::O200kBase => { - [ - "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?", - "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?", - "\\p{N}{1,3}", - " ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*", - "\\s*[\\r\\n]+", - "\\s+(?!\\S)", - "\\s+", - ].join("|") - } - Self::O200kHarmony => { + Self::O200kBase | Self::O200kHarmony => { [ "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?", "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?", @@ -260,7 +186,6 @@ where { let mut hasher = expected_hash.map(|_| Sha256::new()); let mut bpe_ranks = HashMap::new(); - // using readline here so that the line returned includes the newline bytes for the hasher let mut lin_no = 0; let mut line_buffer = String::new(); while reader.read_line(&mut line_buffer)? > 0 { @@ -308,134 +233,19 @@ pub fn load_tiktoken_vocab_file

( expected_hash: Option<&str>, ) -> std::result::Result, Rank>, std::io::Error> where - P: AsRef, + P: AsRef, { let file = std::fs::File::open(path)?; let reader = std::io::BufReader::new(file); load_tiktoken_vocab(reader, expected_hash) } -pub fn load_encoding_from_file( - file_path: P, +fn load_tiktoken_vocab_bytes( + data: &[u8], expected_hash: Option<&str>, - special_tokens: S, - pattern: &str, -) -> Result -where - P: AsRef, - S: IntoIterator, - TS: Into, -{ - let encoder = load_tiktoken_vocab_file(file_path, expected_hash) - .map_err(LoadError::InvalidTiktokenVocabFile)?; - CoreBPE::new( - encoder, - special_tokens.into_iter().map(|(k, v)| (k.into(), v)), - pattern, - ) - .map_err(LoadError::CoreBPECreationFailed) -} - -/// This returns the path to a file containing the data at `url`. If the file is -/// cached, it is used. Otherwise, the file is downloaded and cached. -fn download_or_find_cached_file( - url: &str, - expected_hash: Option<&str>, -) -> Result { - let cache_dir = resolve_cache_dir()?; - let cache_path = resolve_cache_path(&cache_dir, url); - if cache_path.exists() { - if verify_file_hash(&cache_path, expected_hash)? { - return Ok(cache_path); - } - let _ = std::fs::remove_file(&cache_path); - } - let hash = load_remote_file(url, &cache_path)?; - if let Some(expected_hash) = expected_hash { - if hash != expected_hash { - let _ = std::fs::remove_file(&cache_path); - return Err(RemoteVocabFileError::HashMismatch { - file_url: url.to_string(), - expected_hash: expected_hash.to_string(), - computed_hash: hash, - }); - } - } - Ok(cache_path) -} - -fn resolve_cache_dir() -> Result { - // we use a different env var and a different default dir name to avoid - // conflicts with the python tiktoken package, while sharing a cache dir - // with the python tiktoken package is a desirable future goal, it is not - // a priority and we should optimize for avoiding breaking tiktoken installs - // on the same system until we can validate the correctness wrt the python - // implementation and write tests to avoid regressions - let cache_dir_override = std::env::var("TIKTOKEN_RS_CACHE_DIR").ok(); - if let Some(cache_dir_override) = cache_dir_override { - Ok(PathBuf::from(cache_dir_override)) - } else { - let cache_dir = std::env::temp_dir().join("tiktoken-rs-cache"); - std::fs::create_dir_all(&cache_dir).map_err(|e| { - RemoteVocabFileError::IOError(format!("creating cache dir {cache_dir:?}"), e) - })?; - Ok(cache_dir) - } -} - -fn resolve_cache_path(cache_dir: &Path, url: &str) -> PathBuf { - let mut hasher = Sha1::new(); - hasher.update(url.as_bytes()); - let cache_key = format!("{:x}", hasher.finalize()); - cache_dir.join(cache_key) -} - -fn verify_file_hash( - file_path: &Path, - expected_hash: Option<&str>, -) -> Result { - let Some(expected_hash) = expected_hash else { - return Ok(true); - }; - let file = File::open(file_path) - .map_err(|e| RemoteVocabFileError::IOError(format!("opening file {file_path:?}"), e))?; - let mut reader = BufReader::new(file); - let mut hasher = Sha256::new(); - std::io::copy(&mut reader, &mut hasher).map_err(|e| { - RemoteVocabFileError::IOError(format!("copying file {file_path:?} contents to hasher"), e) - })?; - let computed_hash = format!("{:x}", hasher.finalize()); - Ok(computed_hash == expected_hash) -} - -/// Loads a remote file to `destination` and returns the computed hash of the -/// file contents. -fn load_remote_file(url: &str, destination: &Path) -> Result { - let client = reqwest::blocking::Client::new(); - let mut response = client - .get(url) - .send() - .and_then(|r| r.error_for_status()) - .map_err(|e| RemoteVocabFileError::FailedToDownloadOrLoadVocabFile(Box::new(e)))?; - - let file = File::create(destination) - .map_err(|e| RemoteVocabFileError::IOError(format!("creating file {destination:?}"), e))?; - let mut dest = BufWriter::new(file); - let mut hasher = Sha256::new(); - let mut buffer = [0u8; 8192]; - loop { - let bytes_read = response.read(&mut buffer).map_err(|e| { - RemoteVocabFileError::IOError(format!("reading from response {url}"), e) - })?; - if bytes_read == 0 { - break; - } - dest.write_all(&buffer[..bytes_read]).map_err(|e| { - RemoteVocabFileError::IOError(format!("writing to file {destination:?}"), e) - })?; - hasher.update(&buffer[..bytes_read]); - } - Ok(format!("{:x}", hasher.finalize())) +) -> std::result::Result, Rank>, std::io::Error> { + let reader = std::io::BufReader::new(Cursor::new(data)); + load_tiktoken_vocab(reader, expected_hash) } #[cfg(test)] @@ -448,4 +258,13 @@ mod tests { let _ = encoding.load().unwrap(); } } + + #[test] + fn test_embedded_data_integrity() { + for encoding in &[Encoding::O200kBase, Encoding::Cl100kBase] { + let raw = decompress_vocab(encoding.embedded_data()); + let hash = format!("{:x}", Sha256::digest(&raw)); + assert_eq!(hash, encoding.expected_hash()); + } + } }