From ad623d297f029f941e83939e5c0b42cfee36d03a Mon Sep 17 00:00:00 2001 From: Thomas Aubry Date: Tue, 21 Apr 2026 11:51:03 +0200 Subject: [PATCH 1/8] Refactor into workspace and clean up wasm boundaries --- Cargo.lock | 301 ++++++++++++++++-- Cargo.toml | 84 ++--- README.md | 190 ++++++++--- crates/md-docrs-cli/Cargo.toml | 28 ++ crates/md-docrs-cli/src/fetch.rs | 116 +++++++ {src => crates/md-docrs-cli/src}/main.rs | 23 +- {src => crates/md-docrs-cli/src}/server.rs | 17 +- crates/md-docrs-core/Cargo.toml | 24 ++ .../md-docrs-core/src}/cache/hybrid.rs | 0 .../md-docrs-core/src}/cache/memory.rs | 0 .../md-docrs-core/src}/cache/mod.rs | 0 {src => crates/md-docrs-core/src}/error.rs | 4 - crates/md-docrs-core/src/fetch.rs | 94 ++++++ crates/md-docrs-core/src/lib.rs | 79 +++++ .../md-docrs-core/src}/render/mod.rs | 0 .../md-docrs-core/src}/render/ty.rs | 0 {src => crates/md-docrs-core/src}/resolve.rs | 0 {src => crates/md-docrs-core/src}/spec.rs | 0 .../md-docrs-rust-wasm}/Cargo.toml | 16 +- crates/md-docrs-rust-wasm/README.md | 198 ++++++++++++ .../md-docrs-rust-wasm}/src/lib.rs | 20 +- .../md-docrs-wasm-compare}/Cargo.toml | 8 +- .../md-docrs-wasm-compare}/src/main.rs | 9 +- crates/md-docrs-worker/Cargo.toml | 27 ++ crates/md-docrs-worker/package.json | 12 + crates/md-docrs-worker/src/lib.rs | 278 ++++++++++++++++ crates/md-docrs-worker/wrangler.toml | 6 + rust-wasm/README.md | 117 ------- src/fetch.rs | 147 --------- src/lib.rs | 57 ---- wasm/README.md | 283 +++++++++++----- wasm/build.sh | 172 +++++++--- zig/README.md | 263 ++++++++++----- 33 files changed, 1859 insertions(+), 714 deletions(-) create mode 100644 crates/md-docrs-cli/Cargo.toml create mode 100644 crates/md-docrs-cli/src/fetch.rs rename {src => crates/md-docrs-cli/src}/main.rs (92%) rename {src => crates/md-docrs-cli/src}/server.rs (90%) create mode 100644 crates/md-docrs-core/Cargo.toml rename {src => crates/md-docrs-core/src}/cache/hybrid.rs (100%) rename {src => crates/md-docrs-core/src}/cache/memory.rs (100%) rename {src => crates/md-docrs-core/src}/cache/mod.rs (100%) rename {src => crates/md-docrs-core/src}/error.rs (85%) create mode 100644 crates/md-docrs-core/src/fetch.rs create mode 100644 crates/md-docrs-core/src/lib.rs rename {src => crates/md-docrs-core/src}/render/mod.rs (100%) rename {src => crates/md-docrs-core/src}/render/ty.rs (100%) rename {src => crates/md-docrs-core/src}/resolve.rs (100%) rename {src => crates/md-docrs-core/src}/spec.rs (100%) rename {rust-wasm => crates/md-docrs-rust-wasm}/Cargo.toml (70%) create mode 100644 crates/md-docrs-rust-wasm/README.md rename {rust-wasm => crates/md-docrs-rust-wasm}/src/lib.rs (97%) rename {wasm => crates/md-docrs-wasm-compare}/Cargo.toml (59%) rename {wasm => crates/md-docrs-wasm-compare}/src/main.rs (99%) create mode 100644 crates/md-docrs-worker/Cargo.toml create mode 100644 crates/md-docrs-worker/package.json create mode 100644 crates/md-docrs-worker/src/lib.rs create mode 100644 crates/md-docrs-worker/wrangler.toml delete mode 100644 rust-wasm/README.md delete mode 100644 src/fetch.rs delete mode 100644 src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 6fceef2..a19d263 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,15 +150,43 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core 0.4.5", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "itoa", + "matchit 0.7.3", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "sync_wrapper", + "tower", + "tower-layer", + "tower-service", +] + [[package]] name = "axum" version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" dependencies = [ - "axum-core", + "axum-core 0.5.6", "bytes", - "form_urlencoded", "futures-util", "http", "http-body", @@ -166,21 +194,37 @@ dependencies = [ "hyper", "hyper-util", "itoa", - "matchit", + "matchit 0.8.4", "memchr", "mime", "percent-encoding", "pin-project-lite", "serde_core", - "serde_json", - "serde_path_to_error", - "serde_urlencoded", "sync_wrapper", "tokio", "tower", "tower-layer", "tower-service", - "tracing", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", ] [[package]] @@ -199,7 +243,17 @@ dependencies = [ "sync_wrapper", "tower-layer", "tower-service", - "tracing", +] + +[[package]] +name = "axum-macros" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d123550fa8d071b7255cb0cc04dc302baa6c8c4a79f55701552684d8399bce" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -373,6 +427,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "js-sys", + "num-traits", + "wasm-bindgen", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -1333,6 +1398,17 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.32" @@ -1354,6 +1430,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -1596,7 +1673,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", - "webpki-roots", + "webpki-roots 1.0.7", ] [[package]] @@ -2002,6 +2079,12 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "matchit" version = "0.8.4" @@ -2009,10 +2092,44 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" [[package]] -name = "md-docrs-wasm" +name = "md-docrs-cli" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "axum 0.8.9", + "clap", + "md-docrs-core", + "rustdoc-types", + "serde_json", + "tokio", + "tower-http", + "tracing", + "tracing-subscriber", + "ureq", + "zstd", +] + +[[package]] +name = "md-docrs-core" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "foyer", + "lru", + "rustdoc-types", + "serde", + "serde_json", + "thiserror 2.0.18", + "tracing", +] + +[[package]] +name = "md-docrs-rust-wasm" version = "0.1.0" dependencies = [ - "md_docrs_proxy", + "md-docrs-core", "rustdoc-types", "ruzstd", "serde_json", @@ -2030,25 +2147,21 @@ dependencies = [ ] [[package]] -name = "md_docrs_proxy" +name = "md-docrs-worker" version = "0.1.0" dependencies = [ - "anyhow", "async-trait", - "axum", - "bytes", - "clap", - "foyer", - "lru", - "reqwest", + "axum 0.7.9", + "axum-macros", + "md-docrs-core", "rustdoc-types", "serde", "serde_json", - "thiserror 2.0.18", - "tokio", - "tower-http", - "tracing", - "tracing-subscriber", + "tower-service", + "wasm-bindgen", + "wasm-bindgen-futures", + "worker", + "worker-macros", "zstd", ] @@ -2191,6 +2304,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "num_cpus" version = "1.17.0" @@ -2729,7 +2851,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots", + "webpki-roots 1.0.7", ] [[package]] @@ -2826,6 +2948,7 @@ version = "0.23.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21" dependencies = [ + "log", "once_cell", "ring", "rustls-pki-types", @@ -2908,6 +3031,17 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-wasm-bindgen" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3b143e2833c57ab9ad3ea280d21fd34e285a42837aeb0ee301f4f41890fa00e" +dependencies = [ + "js-sys", + "serde", + "wasm-bindgen", +] + [[package]] name = "serde-wasm-bindgen" version = "0.6.5" @@ -3337,7 +3471,6 @@ dependencies = [ "tokio", "tower-layer", "tower-service", - "tracing", ] [[package]] @@ -3382,7 +3515,6 @@ version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ - "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -3489,6 +3621,22 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-pki-types", + "url", + "webpki-roots 0.26.11", +] + [[package]] name = "url" version = "2.5.8" @@ -3682,6 +3830,19 @@ dependencies = [ "wasmparser 0.244.0", ] +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasmer" version = "7.1.0" @@ -3699,7 +3860,7 @@ dependencies = [ "paste", "rustc-demangle", "serde", - "serde-wasm-bindgen", + "serde-wasm-bindgen 0.6.5", "shared-buffer", "tar", "target-lexicon", @@ -4201,6 +4362,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.7", +] + [[package]] name = "webpki-roots" version = "1.0.7" @@ -4565,6 +4735,81 @@ dependencies = [ "wasmparser 0.246.2", ] +[[package]] +name = "worker" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "727789ca7eff9733efbea9d0e97779edc1cf1926e98aee7d7d8afe32805458aa" +dependencies = [ + "async-trait", + "axum 0.7.9", + "bytes", + "chrono", + "futures-channel", + "futures-util", + "http", + "http-body", + "js-sys", + "matchit 0.7.3", + "pin-project", + "serde", + "serde-wasm-bindgen 0.6.5", + "serde_json", + "serde_urlencoded", + "tokio", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "worker-kv", + "worker-macros", + "worker-sys", +] + +[[package]] +name = "worker-kv" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f06d4d1416a9f8346ee9123b0d9a11b3cfa38e6cfb5a139698017d1597c4d41" +dependencies = [ + "js-sys", + "serde", + "serde-wasm-bindgen 0.5.0", + "serde_json", + "thiserror 1.0.69", + "wasm-bindgen", + "wasm-bindgen-futures", +] + +[[package]] +name = "worker-macros" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d625c24570ba9207a2617476013335f28a95cbe513e59bb814ffba092a18058" +dependencies = [ + "async-trait", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-macro-support", + "worker-sys", +] + +[[package]] +name = "worker-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34563340d41016b4381257c5a16b0d2bc590dbe00500ecfbebcaa16f5f85ce90" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "writeable" version = "0.6.3" diff --git a/Cargo.toml b/Cargo.toml index 794e592..167c47d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,70 +1,42 @@ [workspace] -members = [".", "rust-wasm", "wasm"] -default-members = ["."] +members = [ + "crates/md-docrs-core", + "crates/md-docrs-cli", + "crates/md-docrs-worker", + "crates/md-docrs-rust-wasm", + "crates/md-docrs-wasm-compare", +] resolver = "3" -[package] -name = "md_docrs_proxy" +[workspace.package] version = "0.1.0" edition = "2024" -[[bin]] -name = "md-docrs" -path = "src/main.rs" -required-features = ["cli"] - -[lib] -name = "md_docrs_proxy" -path = "src/lib.rs" - -[features] -default = ["cli", "http", "server"] -# Pure pipeline (spec parse + resolve + render + in-memory cache) always compiles. -# `http` adds the docs.rs fetcher (reqwest + zstd + tokio + tracing). -http = ["dep:reqwest", "dep:zstd", "dep:tokio", "dep:tracing", "dep:bytes"] -# `server` layers the axum HTTP mirror on top of `http`. -server = ["http", "dep:axum", "dep:tower-http"] -# `cli` is only relevant for the `md-docrs` binary. -cli = [ - "http", - "server", - "dep:anyhow", - "dep:clap", - "dep:tokio", - "dep:tracing", - "dep:tracing-subscriber", -] -# Opt-in disk-backed cache via foyer. Not wasm-compatible. -hybrid-cache = ["dep:foyer", "dep:serde"] - -[dependencies] -# Always on — used by the pure pipeline (spec, resolve, render, cache). +[workspace.dependencies] +anyhow = "1" async-trait = "0.1" +axum = { version = "0.8", default-features = false } +bytes = "1" +clap = { version = "4", features = ["derive"] } +foyer = { version = "0.22", features = ["serde"] } lru = "0.17" +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } rustdoc-types = "0.57" +serde = { version = "1", features = ["derive", "rc"] } serde_json = "1" thiserror = "2" - -# Optional — gated by features above. -anyhow = { version = "1", optional = true } -axum = { version = "0.8", optional = true } -bytes = { version = "1", optional = true } -clap = { version = "4", features = ["derive"], optional = true } -foyer = { version = "0.22", optional = true, features = ["serde"] } -reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"], optional = true } -serde = { version = "1", optional = true, features = ["derive", "rc"] } -tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"], optional = true } -tower-http = { version = "0.6", features = ["trace"], optional = true } -tracing = { version = "0.1", optional = true } -tracing-subscriber = { version = "0.3", features = ["env-filter"], optional = true } -zstd = { version = "0.13", optional = true } - -[profile.release] -lto = "thin" - -# Squeeze the wasm artifact as tight as possible — this is the module we -# compare against Zig's ReleaseSmall build. -[profile.release.package.md-docrs-wasm] +tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"] } +tower-http = { version = "0.6", features = ["trace"] } +tower-service = "0.3.3" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +wasm-bindgen = "0.2" +wasm-bindgen-futures = "0.4" +worker = "0.5" +worker-macros = "0.5" +zstd = "0.13" + +[profile.release.package.md-docrs-rust-wasm] opt-level = "z" codegen-units = 1 strip = true diff --git a/README.md b/README.md index 5296c38..62da812 100644 --- a/README.md +++ b/README.md @@ -1,95 +1,185 @@ # md-docrs-proxy -Proxy that downloads rustdoc JSON from docs.rs and renders it as Markdown - built for LLM agents that waste tokens scraping docs.rs HTML. +`md-docrs-proxy` resolves docs.rs rustdoc JSON URLs and renders rustdoc JSON as Markdown. + +This repository is organized with clear boundaries between: + +- **Rust workspace crates** for the real application and shared logic +- **Zig** for a minimal `resolve_url` implementation and Worker wrapper +- **Top-level `wasm/`** for cross-language artifact staging and comparison + +## Repository boundaries + +### Rust workspace + +The Rust implementation lives under `crates/`: + +- `crates/md-docrs-core` — pure shared logic + - spec parsing + - docs.rs URL resolution + - rustdoc JSON rendering + - cache abstractions and shared types +- `crates/md-docrs-cli` — native CLI and local HTTP server +- `crates/md-docrs-worker` — Cloudflare Worker crate for the Rust side +- `crates/md-docrs-rust-wasm` — Rust `wasm32-unknown-unknown` build exposing the WASM ABI +- `crates/md-docrs-wasm-compare` — host-side comparison harness for staged `.wasm` artifacts + +### Zig + +The Zig implementation lives under `zig/`: + +- `zig/lib` — Zig source for: + - spec parsing + - docs.rs URL building + - minimal WASM ABI + - native Zig CLI +- `zig/src` — TypeScript Cloudflare Worker wrapper for the Zig wasm module + +Zig is intentionally narrow in scope today: it is the minimal `resolve_url` implementation, not the full Markdown rendering pipeline. + +### Top-level wasm harness + +The top-level `wasm/` directory is **not** a Cargo crate anymore. + +It exists only for repo-level WASM workflow: + +- `wasm/build.sh` — builds/stages Zig and Rust wasm artifacts into `wasm/artifacts/` +- `wasm/artifacts/` — generated staged artifacts used by the comparison harness +- `wasm/README.md` — docs for the comparison flow + +The actual comparison binary lives in: + +- `crates/md-docrs-wasm-compare` ## Build +Build the Rust workspace: + ```sh -cargo build --release -# binary at ./target/release/md-docrs +cargo build --workspace +``` + +## Native CLI + +The main native binary is provided by `md-docrs-cli`. + +Spec grammar: + +```text +crate[@version][::path::to::item] ``` -## CLI +Version defaults to `latest`. -Spec grammar: `crate[@version][::path::to::item]`. Version defaults to `latest`. +Examples: ```sh -md-docrs anyhow # crate index, latest -md-docrs anyhow::Error # item page -md-docrs tokio::sync::Mutex # follows pub use re-exports -md-docrs tokio@1.52.1::sync::Mutex # pinned version -md-docrs --target x86_64-unknown-linux-gnu tokio::sync::Mutex +cargo run -p md-docrs-cli -- anyhow +cargo run -p md-docrs-cli -- anyhow::Error +cargo run -p md-docrs-cli -- tokio::sync::Mutex +cargo run -p md-docrs-cli -- tokio@1.52.1::sync::Mutex +cargo run -p md-docrs-cli -- --target x86_64-unknown-linux-gnu tokio::sync::Mutex ``` -Not every `@version` pin works: docs.rs has to have rebuilt rustdoc JSON at the supported format version (currently 57) for that exact release. Older releases predate the rebuild and return `502`; pin to a recent version or drop the pin to use `latest`. +Not every `@version` pin works: docs.rs must have rebuilt rustdoc JSON for the supported format version for that exact release. Older releases may return `502`; in that case use a newer version or `latest`. -Markdown goes to stdout. Pipe it into whatever consumes it. +Markdown goes to stdout. -## Server +## Local server -Mirrors docs.rs URLs, always replies with `text/markdown`: +The native server also comes from `md-docrs-cli`. ```sh -md-docrs serve --port 8080 --bind 127.0.0.1 +cargo run -p md-docrs-cli -- serve --port 8080 --bind 127.0.0.1 ``` +Examples: + ```sh -curl -s localhost:8080/anyhow # crate root -curl -s localhost:8080/anyhow/latest/anyhow/struct.Error.html # item page -curl -s localhost:8080/tokio/latest/tokio/sync/struct.Mutex.html # re-exported item +curl -s localhost:8080/anyhow +curl -s localhost:8080/anyhow/latest/anyhow/struct.Error.html +curl -s localhost:8080/tokio/latest/tokio/sync/struct.Mutex.html ``` -Response headers: `Content-Type: text/markdown; charset=utf-8`, `X-Markdown-Tokens` (byte-count/4 heuristic), `Vary: Accept`. +Response shape: -Status codes: 404 item not found, 400 bad spec, 502 upstream/decode error. +- `Content-Type: text/markdown; charset=utf-8` +- `X-Markdown-Tokens` +- `Vary: Accept` -## Notes +Status codes: + +- `400` bad spec +- `404` item not found +- `502` upstream/decode error -- In-memory LRU cache (32 crates) per process. No disk cache. -- v0 does not render trait impls, blanket impls, or source links. -- Glob re-exports into external crates (e.g. `clap::Parser` from `clap_builder`) are not followed. +## Rust WASM -## WebAssembly builds +The Rust WASM module lives in: -Two same-ABI WASM modules live alongside the Rust library: +- `crates/md-docrs-rust-wasm` -- [`rust-wasm/`](rust-wasm/README.md) — `wasm32-unknown-unknown` build of - the pure pipeline (spec parse + resolve + render). Exports `alloc`, - `free`, `resolve_url`, and optionally `render_markdown`. -- [`zig/`](zig/README.md) — Zig 0.16 port of the same surface (`resolve_url` - parity today; `render_markdown` is a follow-up). Ships a Cloudflare Worker - wrapper that can load either artifact unchanged. +It exposes the shared ABI used for side-by-side comparison with Zig: -Build the Rust wasm: +- `alloc` +- `free` +- `resolve_url` +- optionally `render_markdown` + +### Minimal Rust WASM build + +This is the closest match to the current Zig WASM surface. ```sh -# Minimal (resolve_url only — matches current Zig surface). cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm --no-default-features -# Full (adds render_markdown, brings in serde_json + rustdoc-types). + -p md-docrs-rust-wasm --no-default-features +``` + +### Full Rust WASM build + +This adds `render_markdown`. + +```sh cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm -# Optional shipped-size pass for Rust artifacts. -wasm-opt -Oz --strip-debug --strip-dwarf \ - -o wasm/artifacts/rust-minimal-opt.wasm \ - target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm + -p md-docrs-rust-wasm --no-default-features --features full +``` + +## Zig + +See: + +- [`zig/README.md`](zig/README.md) + +Typical Zig commands: + +```sh +zig build --build-file zig/lib/build.zig +zig build cli --build-file zig/lib/build.zig +zig build test --build-file zig/lib/build.zig ``` -The root crate's HTTP / server / CLI bits are gated behind `http`, `server`, -and `cli` features (all on by default), so the pure pipeline compiles for -`wasm32` without reqwest/tokio/axum/zstd. +## WASM comparison harness -To compare the two modules side by side (size, output parity, per-call -latency) under an embedded wasmtime or wasmer, see -[`wasm/`](wasm/README.md): +Use the top-level `wasm/` directory to stage artifacts, then run the Rust comparison harness. ```sh -./wasm/build.sh # builds zig + rust wasm, runs wasm-opt, stages them -cargo run -p md-docrs-wasm-compare # runs the table +./wasm/build.sh +cargo run -p md-docrs-wasm-compare -- --offline ``` +For full docs, see: + +- [`wasm/README.md`](wasm/README.md) + +## Notes + +- In-memory LRU cache only for the native process path +- No disk cache by default +- v0 does not render trait impls, blanket impls, or source links +- Glob re-exports into external crates are not fully followed + ## Logging ```sh -RUST_LOG=md_docrs_proxy=debug md-docrs serve +RUST_LOG=debug cargo run -p md-docrs-cli -- serve ``` diff --git a/crates/md-docrs-cli/Cargo.toml b/crates/md-docrs-cli/Cargo.toml new file mode 100644 index 0000000..e3355e0 --- /dev/null +++ b/crates/md-docrs-cli/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "md-docrs-cli" +version.workspace = true +edition.workspace = true + +[[bin]] +name = "md-docrs" +path = "src/main.rs" + +[features] +default = ["hybrid-cache"] +hybrid-cache = ["md-docrs-core/hybrid-cache"] + +[dependencies] +anyhow.workspace = true +async-trait.workspace = true +axum = { workspace = true, features = ["tokio", "http1"] } +clap.workspace = true +rustdoc-types.workspace = true +serde_json.workspace = true +tokio.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true +tower-http.workspace = true +ureq = { version = "2", features = ["tls", "gzip"] } +zstd.workspace = true + +md-docrs-core = { path = "../md-docrs-core" } diff --git a/crates/md-docrs-cli/src/fetch.rs b/crates/md-docrs-cli/src/fetch.rs new file mode 100644 index 0000000..68b10dd --- /dev/null +++ b/crates/md-docrs-cli/src/fetch.rs @@ -0,0 +1,116 @@ +use md_docrs_core::{ + Error, Result, RustdocFetcher, + fetch::{DOCS_RS_BASE, build_url, validate_format_version}, +}; +use rustdoc_types::{Crate, FORMAT_VERSION}; +use std::{future::Future, io::Cursor, pin::Pin, time::Duration}; + +/// Native docs.rs fetcher used by the CLI/server binary. +/// +/// This implementation is intentionally outside `md-docrs-core` so the core +/// stays transport/runtime agnostic. +pub struct CliFetcher { + agent: ureq::Agent, + base: String, +} + +impl CliFetcher { + /// Create a fetcher configured for docs.rs. + #[must_use] + pub fn new() -> Self { + let agent = ureq::AgentBuilder::new() + .timeout(Duration::from_secs(30)) + .redirects(10) + .user_agent(concat!("md-docrs-cli/", env!("CARGO_PKG_VERSION"))) + .build(); + + Self { + agent, + base: DOCS_RS_BASE.to_string(), + } + } + + /// Override the docs.rs base URL, mainly for tests. + #[must_use] + pub fn with_base(mut self, base: impl Into) -> Self { + self.base = base.into(); + self + } + + fn read_body_bytes(response: ureq::Response, url: &str) -> Result> { + let mut reader = response.into_reader(); + let mut bytes = Vec::new(); + std::io::Read::read_to_end(&mut reader, &mut bytes).map_err(|err| { + Error::Fetch(format!("failed to read response body for {url}: {err}")) + })?; + Ok(bytes) + } + + fn get_bytes(&self, url: &str) -> Result<(u16, Vec)> { + match self.agent.get(url).call() { + Ok(response) => { + let status = response.status(); + let bytes = Self::read_body_bytes(response, url)?; + Ok((status, bytes)) + } + Err(ureq::Error::Status(status, response)) => { + let bytes = Self::read_body_bytes(response, url)?; + Ok((status, bytes)) + } + Err(err) => Err(Error::Fetch(format!("request failed for {url}: {err}"))), + } + } + + fn head_status(&self, url: &str) -> Result { + match self.agent.head(url).call() { + Ok(response) => Ok(response.status()), + Err(ureq::Error::Status(status, _response)) => Ok(status), + Err(err) => Err(Error::Fetch(format!("request failed for {url}: {err}"))), + } + } +} + +impl RustdocFetcher for CliFetcher { + fn fetch<'a>( + &'a self, + crate_name: &'a str, + version: &'a str, + target: Option<&'a str>, + ) -> Pin> + 'a>> { + Box::pin(async move { + let url = build_url( + &self.base, + crate_name, + version, + target, + Some(FORMAT_VERSION), + ); + + let (status, bytes) = self.get_bytes(&url)?; + + if status == 404 { + let probe_url = build_url(&self.base, crate_name, version, target, None); + let probe_status = self.head_status(&probe_url)?; + if (200..300).contains(&probe_status) { + return Err(Error::Fetch(format!( + "{crate_name}@{version} has no rustdoc JSON for format version {FORMAT_VERSION}; waiting on docs.rs rebuild" + ))); + } + return Err(Error::Fetch(format!( + "{crate_name}@{version} not found on docs.rs" + ))); + } + + if !(200..300).contains(&status) { + return Err(Error::Fetch(format!( + "{status} response for {crate_name}@{version}" + ))); + } + + let decoded = zstd::decode_all(Cursor::new(bytes))?; + let krate: Crate = serde_json::from_slice(&decoded)?; + validate_format_version(&krate)?; + Ok(krate) + }) + } +} diff --git a/src/main.rs b/crates/md-docrs-cli/src/main.rs similarity index 92% rename from src/main.rs rename to crates/md-docrs-cli/src/main.rs index b9569bb..71cf60b 100644 --- a/src/main.rs +++ b/crates/md-docrs-cli/src/main.rs @@ -2,10 +2,9 @@ use anyhow::{Context, Result}; use clap::{Parser, Subcommand}; -use md_docrs_proxy::{ +use md_docrs_core::{ ItemSpec, cache::{CrateCache, InMemoryCache}, - fetch::Fetcher, render_spec, }; use std::net::SocketAddr; @@ -13,10 +12,13 @@ use std::path::PathBuf; use std::sync::Arc; #[cfg(feature = "hybrid-cache")] -use md_docrs_proxy::cache::{FoyerHybridCache, FoyerHybridCacheConfig}; +use md_docrs_core::cache::{FoyerHybridCache, FoyerHybridCacheConfig}; +mod fetch; mod server; +use crate::fetch::CliFetcher; + #[derive(Parser, Debug)] #[command( name = "md-docrs", @@ -85,16 +87,7 @@ async fn main() -> Result<()> { cache_memory_bytes, }), _, - ) => { - serve_cmd( - &bind, - port, - cache_dir, - cache_disk_bytes, - cache_memory_bytes, - ) - .await - } + ) => serve_cmd(&bind, port, cache_dir, cache_disk_bytes, cache_memory_bytes).await, (None, Some(spec)) => render_cmd(&spec, cli.target).await, (None, None) => { eprintln!("usage: md-docrs | md-docrs serve | md-docrs render "); @@ -116,7 +109,7 @@ async fn render_cmd(raw: &str, target: Option) -> Result<()> { let spec = ItemSpec::parse(raw) .with_context(|| format!("invalid spec: {raw}"))? .with_target(target); - let fetcher = Fetcher::new()?; + let fetcher = CliFetcher::new(); let cache = InMemoryCache::default(); let md = render_spec(&spec, &fetcher, &cache).await?; print!("{md}"); @@ -133,7 +126,7 @@ async fn serve_cmd( let addr: SocketAddr = format!("{bind}:{port}").parse()?; let cache = build_cache(cache_dir, cache_disk_bytes, cache_memory_bytes).await?; let state = Arc::new(server::AppState { - fetcher: Fetcher::new()?, + fetcher: Arc::new(CliFetcher::new()), cache, }); let app = server::router(state); diff --git a/src/server.rs b/crates/md-docrs-cli/src/server.rs similarity index 90% rename from src/server.rs rename to crates/md-docrs-cli/src/server.rs index ccfedd3..e4da186 100644 --- a/src/server.rs +++ b/crates/md-docrs-cli/src/server.rs @@ -5,11 +5,11 @@ use axum::{ response::{IntoResponse, Response}, routing::get, }; -use md_docrs_proxy::{Error, ItemSpec, cache::CrateCache, fetch::Fetcher, render_spec}; +use md_docrs_core::{Error, ItemSpec, RustdocFetcher, cache::CrateCache, render_spec}; use std::sync::Arc; pub struct AppState { - pub fetcher: Fetcher, + pub fetcher: Arc, pub cache: Arc, } @@ -29,6 +29,7 @@ async fn root() -> &'static str { "md-docrs-proxy - GET /[/][/] for Markdown docs\n" } +#[axum::debug_handler] async fn crate_root( State(state): State>, Path(crate_name): Path, @@ -36,6 +37,7 @@ async fn crate_root( serve(&state, &crate_name, "latest", &[]).await } +#[axum::debug_handler] async fn version_root( State(state): State>, Path((crate_name, version)): Path<(String, String)>, @@ -43,6 +45,7 @@ async fn version_root( serve(&state, &crate_name, &version, &[]).await } +#[axum::debug_handler] async fn deep( State(state): State>, Path((crate_name, version, rest)): Path<(String, String, String)>, @@ -127,7 +130,7 @@ async fn serve( path, }; - match render_spec(&spec, &state.fetcher, state.cache.as_ref()).await { + match render_spec(&spec, state.fetcher.as_ref(), state.cache.as_ref()).await { Ok(body) => { let mut headers = HeaderMap::new(); headers.insert( @@ -147,11 +150,9 @@ fn error_to_response(e: &Error) -> Response { let status = match e { Error::NotFound(_) => StatusCode::NOT_FOUND, Error::InvalidSpec(_) => StatusCode::BAD_REQUEST, - Error::FormatVersionMismatch { .. } - | Error::Fetch(_) - | Error::Http(_) - | Error::Json(_) - | Error::Io(_) => StatusCode::BAD_GATEWAY, + Error::FormatVersionMismatch { .. } | Error::Fetch(_) | Error::Json(_) | Error::Io(_) => { + StatusCode::BAD_GATEWAY + } }; (status, e.to_string()).into_response() } diff --git a/crates/md-docrs-core/Cargo.toml b/crates/md-docrs-core/Cargo.toml new file mode 100644 index 0000000..b670f8c --- /dev/null +++ b/crates/md-docrs-core/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "md-docrs-core" +version.workspace = true +edition.workspace = true + +[lib] +name = "md_docrs_core" +path = "src/lib.rs" + +[features] +default = [] +hybrid-cache = ["dep:foyer", "dep:serde"] + +[dependencies] +anyhow.workspace = true +async-trait.workspace = true +lru.workspace = true +rustdoc-types.workspace = true +serde_json.workspace = true +thiserror.workspace = true +tracing.workspace = true + +foyer = { workspace = true, optional = true } +serde = { workspace = true, optional = true } diff --git a/src/cache/hybrid.rs b/crates/md-docrs-core/src/cache/hybrid.rs similarity index 100% rename from src/cache/hybrid.rs rename to crates/md-docrs-core/src/cache/hybrid.rs diff --git a/src/cache/memory.rs b/crates/md-docrs-core/src/cache/memory.rs similarity index 100% rename from src/cache/memory.rs rename to crates/md-docrs-core/src/cache/memory.rs diff --git a/src/cache/mod.rs b/crates/md-docrs-core/src/cache/mod.rs similarity index 100% rename from src/cache/mod.rs rename to crates/md-docrs-core/src/cache/mod.rs diff --git a/src/error.rs b/crates/md-docrs-core/src/error.rs similarity index 85% rename from src/error.rs rename to crates/md-docrs-core/src/error.rs index 0d8733b..b335fb5 100644 --- a/src/error.rs +++ b/crates/md-docrs-core/src/error.rs @@ -21,8 +21,4 @@ pub enum Error { #[error("io error: {0}")] Io(#[from] std::io::Error), - - #[cfg(feature = "http")] - #[error("http error: {0}")] - Http(#[from] reqwest::Error), } diff --git a/crates/md-docrs-core/src/fetch.rs b/crates/md-docrs-core/src/fetch.rs new file mode 100644 index 0000000..d451133 --- /dev/null +++ b/crates/md-docrs-core/src/fetch.rs @@ -0,0 +1,94 @@ +use crate::{Error, Result}; +use async_trait::async_trait; +use rustdoc_types::{Crate, FORMAT_VERSION}; + +pub const DOCS_RS_BASE: &str = "https://docs.rs"; + +/// Build the docs.rs rustdoc JSON URL for a crate/version/target tuple. +/// +/// When `format_version` is `Some`, the URL is pinned to a specific +/// rustdoc JSON schema version, e.g. `/json/57.zst`. +/// +/// When `format_version` is `None`, the legacy unpinned endpoint is used, +/// e.g. `/json.zst`. +#[must_use] +pub fn build_url( + base: &str, + crate_name: &str, + version: &str, + target: Option<&str>, + format_version: Option, +) -> String { + let target_seg = target.map(|t| format!("/{t}")).unwrap_or_default(); + match format_version { + Some(v) => format!("{base}/crate/{crate_name}/{version}{target_seg}/json/{v}.zst"), + None => format!("{base}/crate/{crate_name}/{version}{target_seg}/json.zst"), + } +} + +/// Minimal transport abstraction for loading parsed rustdoc JSON. +/// +/// Platform-specific callers provide their own implementation: +/// - CLI can use a small native HTTP client +/// - Cloudflare Worker can use the Worker runtime fetch API +#[async_trait(?Send)] +pub trait RustdocFetcher: Send + Sync { + /// Fetch, decode, and parse rustdoc JSON for the requested crate. + /// + /// # Errors + /// Returns transport-specific fetch failures as `Error::Fetch`, + /// unsupported schema versions as `Error::FormatVersionMismatch`, + /// JSON parse failures as `Error::Json`, and decode failures as `Error::Io` + /// or `Error::Fetch` depending on the implementation. + async fn fetch(&self, crate_name: &str, version: &str, target: Option<&str>) -> Result; +} + +/// Shared validation helper for fetcher implementations. +/// +/// # Errors +/// Returns `Error::FormatVersionMismatch` when the crate's +/// `format_version` differs from the one supported by this build. +pub fn validate_format_version(krate: &Crate) -> Result<()> { + if krate.format_version != FORMAT_VERSION { + return Err(Error::FormatVersionMismatch { + got: krate.format_version, + expected: FORMAT_VERSION, + }); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn url_basic() { + assert_eq!( + build_url("https://docs.rs", "serde", "latest", None, None), + "https://docs.rs/crate/serde/latest/json.zst" + ); + } + + #[test] + fn url_with_target() { + assert_eq!( + build_url( + "https://docs.rs", + "serde", + "latest", + Some("x86_64-pc-windows-msvc"), + None + ), + "https://docs.rs/crate/serde/latest/x86_64-pc-windows-msvc/json.zst" + ); + } + + #[test] + fn url_format_pinned() { + assert_eq!( + build_url("https://docs.rs", "serde", "1.0.200", None, Some(57)), + "https://docs.rs/crate/serde/1.0.200/json/57.zst" + ); + } +} diff --git a/crates/md-docrs-core/src/lib.rs b/crates/md-docrs-core/src/lib.rs new file mode 100644 index 0000000..b037540 --- /dev/null +++ b/crates/md-docrs-core/src/lib.rs @@ -0,0 +1,79 @@ +#![warn(clippy::pedantic)] + +use rustdoc_types::Crate; +use std::{future::Future, pin::Pin, sync::Arc}; + +pub mod cache; +pub mod error; +pub mod fetch; +pub mod render; +pub mod resolve; +pub mod spec; + +pub use error::{Error, Result}; +pub use fetch::{DOCS_RS_BASE, build_url, validate_format_version}; +pub use spec::ItemSpec; + +pub trait RustdocFetcher: Send + Sync { + fn fetch<'a>( + &'a self, + crate_name: &'a str, + version: &'a str, + target: Option<&'a str>, + ) -> Pin> + 'a>>; +} + +/// High-level entry point: take a parsed [`ItemSpec`], fetch the rustdoc crate, +/// resolve the requested item, and render Markdown. +/// +/// # Errors +/// Forwards: +/// - fetch errors from [`RustdocFetcher::fetch`] +/// - cache-independent resolution errors from [`resolve::resolve`] +pub async fn render_spec( + spec: &ItemSpec, + fetcher: &dyn RustdocFetcher, + cache: &dyn cache::CrateCache, +) -> Result { + let krate = load_crate(spec, fetcher, cache).await?; + render_loaded_crate(&krate, spec) +} + +/// Load a rustdoc crate through the cache + fetcher abstraction. +/// +/// # Errors +/// Returns any error produced by the fetcher. +pub async fn load_crate( + spec: &ItemSpec, + fetcher: &dyn RustdocFetcher, + cache: &dyn cache::CrateCache, +) -> Result> { + let key = cache::CacheKey { + crate_name: spec.crate_name.clone(), + version: spec.version.clone(), + target: spec.target.clone(), + }; + + if let Some(hit) = cache.get(&key).await { + return Ok(hit); + } + + let krate = fetcher + .fetch(&spec.crate_name, &spec.version, spec.target.as_deref()) + .await?; + let arc = Arc::new(krate); + cache.put(key, Arc::clone(&arc)).await; + Ok(arc) +} + +/// Resolve and render Markdown from an already-loaded rustdoc crate. +/// +/// Useful for environments that obtain the crate data elsewhere but still want +/// to reuse the shared resolution + rendering pipeline. +/// +/// # Errors +/// Returns resolution errors when the requested item path cannot be found. +pub fn render_loaded_crate(krate: &Crate, spec: &ItemSpec) -> Result { + let resolved = resolve::resolve(krate, spec)?; + Ok(render::render(krate, &resolved, spec)) +} diff --git a/src/render/mod.rs b/crates/md-docrs-core/src/render/mod.rs similarity index 100% rename from src/render/mod.rs rename to crates/md-docrs-core/src/render/mod.rs diff --git a/src/render/ty.rs b/crates/md-docrs-core/src/render/ty.rs similarity index 100% rename from src/render/ty.rs rename to crates/md-docrs-core/src/render/ty.rs diff --git a/src/resolve.rs b/crates/md-docrs-core/src/resolve.rs similarity index 100% rename from src/resolve.rs rename to crates/md-docrs-core/src/resolve.rs diff --git a/src/spec.rs b/crates/md-docrs-core/src/spec.rs similarity index 100% rename from src/spec.rs rename to crates/md-docrs-core/src/spec.rs diff --git a/rust-wasm/Cargo.toml b/crates/md-docrs-rust-wasm/Cargo.toml similarity index 70% rename from rust-wasm/Cargo.toml rename to crates/md-docrs-rust-wasm/Cargo.toml index f32348a..6dc0492 100644 --- a/rust-wasm/Cargo.toml +++ b/crates/md-docrs-rust-wasm/Cargo.toml @@ -1,17 +1,17 @@ [package] -name = "md-docrs-wasm" -version = "0.1.0" -edition = "2024" +name = "md-docrs-rust-wasm" +version.workspace = true +edition.workspace = true publish = false [lib] -# cdylib for the actual wasm artifact, rlib so `cargo test -p md-docrs-wasm` +# cdylib for the actual wasm artifact, rlib so `cargo test -p md-docrs-rust-wasm` # can exercise the exported functions on the host. crate-type = ["cdylib", "rlib"] [features] # Default: render only (host-driven fetch + decompression). Kept on by -# default so `cargo test -p md-docrs-wasm` exercises the renderer. +# default so `cargo test -p md-docrs-rust-wasm` exercises the renderer. # - `render` pulls in serde_json + rustdoc-types for JSON -> Markdown. # - `fetch` adds the host `env.fetch_bytes` import and in-WASM zstd # decoding (via ruzstd) so the module owns the full pipeline. @@ -23,7 +23,7 @@ full = ["render", "fetch"] [dependencies] # Pulls in only the pure pipeline (spec / resolve / render / cache). -md_docrs_proxy = { path = "..", default-features = false } -rustdoc-types = { version = "0.57", optional = true } -serde_json = { version = "1", optional = true } +md-docrs-core = { path = "../md-docrs-core", default-features = false } +rustdoc-types = { workspace = true, optional = true } +serde_json = { workspace = true, optional = true } ruzstd = { version = "0.8", optional = true, default-features = false, features = ["std"] } diff --git a/crates/md-docrs-rust-wasm/README.md b/crates/md-docrs-rust-wasm/README.md new file mode 100644 index 0000000..c2e65d8 --- /dev/null +++ b/crates/md-docrs-rust-wasm/README.md @@ -0,0 +1,198 @@ +# md-docrs-rust-wasm + +Rust `wasm32-unknown-unknown` crate for the workspace's WASM-facing ABI. + +This crate is intentionally narrow: + +- it exposes a small C-style ABI for hosts +- it reuses shared Rust logic from `md-docrs-core` +- it does not own the comparison harness +- it does not own the Zig implementation +- it does not own the Cloudflare Worker wrapper + +That separation keeps boundaries clear: + +- `crates/md-docrs-core` — shared Rust parsing / resolution / rendering logic +- `crates/md-docrs-rust-wasm` — Rust WASM export layer +- `crates/md-docrs-wasm-compare` — host-side comparison harness +- `zig/` — independent Zig implementation and Worker wrapper +- `wasm/` — staged artifacts and helper build script + +## Purpose + +`md-docrs-rust-wasm` builds a WebAssembly module that can be loaded by any host that understands its exported ABI. + +Today it supports two scopes: + +- **minimal**: `resolve_url` only +- **full**: `resolve_url` + `render_markdown` + +The minimal build is the direct Rust counterpart to the Zig WASM module. +The full build keeps the same base ABI and adds Markdown rendering. + +## Exports + +The module exports: + +| Symbol | Signature | Notes | +| --- | --- | --- | +| `alloc` | `(len: u32) -> *mut u8` | Allocates a buffer in WASM linear memory. Returns null on failure or `len == 0`. | +| `free` | `(ptr: *mut u8, len: u32)` | Frees a buffer previously returned by `alloc`. Length must match. | +| `resolve_url` | `(spec_ptr, spec_len, target_ptr, target_len, out_ptr, out_cap) -> u32` | Resolves a docs.rs rustdoc JSON URL into the caller-provided output buffer. Returns bytes written, or `0` on error. | +| `render_markdown` | `(json_ptr, json_len, spec_ptr, spec_len, target_ptr, target_len, len_out: *mut u32) -> *mut u8` | Present in builds with the `render` feature. Returns a newly allocated Markdown buffer; caller must free it. Returns null on error. | + +## Build modes + +### Minimal build + +This is the smallest Rust build and the one intended for direct parity with Zig. + +It exposes: + +- `alloc` +- `free` +- `resolve_url` + +Build it with: + +```sh +cargo build --profile wasm-release --target wasm32-unknown-unknown \ + -p md-docrs-rust-wasm --no-default-features +``` + +Output: + +```sh +target/wasm32-unknown-unknown/wasm-release/md_docrs_rust_wasm.wasm +``` + +### Default build + +The default feature set includes `render`. + +It exposes: + +- `alloc` +- `free` +- `resolve_url` +- `render_markdown` + +Build it with: + +```sh +cargo build --profile wasm-release --target wasm32-unknown-unknown \ + -p md-docrs-rust-wasm +``` + +### Full build + +The crate also defines a convenience `full` feature: + +- `render` +- `fetch` + +Build it with: + +```sh +cargo build --profile wasm-release --target wasm32-unknown-unknown \ + -p md-docrs-rust-wasm --no-default-features --features full +``` + +Use this when you want the full WASM-oriented surface used by the repo-level comparison flow. + +## Features + +| Feature | Default | Meaning | +| --- | --- | --- | +| `render` | yes | Enables JSON-to-Markdown rendering via `serde_json` and `rustdoc-types`, and exports `render_markdown`. | +| `fetch` | no | Enables fetch/decompression-related functionality needed by the full WASM pipeline. | +| `full` | no | Convenience alias for `render` + `fetch`. | + +## Workspace boundaries + +This crate should stay lean and focused on the ABI layer. + +### It should contain + +- exported WASM ABI functions +- memory handling for host/WASM interaction +- thin adapters into `md-docrs-core` +- feature-gated WASM-specific integration logic + +### It should not contain + +- CLI code +- server code +- Cloudflare Worker code +- comparison harness code +- Zig-specific code +- repo-level artifact staging logic + +Those live elsewhere on purpose. + +## Relationship to Zig + +The Zig implementation lives under `zig/`. + +The goal is to keep the **minimal ABI compatible** across both implementations so the same host-side logic can load either artifact with minimal or no changes. + +That means the Rust minimal build should stay disciplined: + +- small export surface +- stable memory protocol +- no unnecessary host assumptions + +## Comparison workflow + +This crate does not run comparisons itself. + +For side-by-side Rust vs Zig comparison, use the repo-level flow: + +- `wasm/build.sh` — builds and stages artifacts into `wasm/artifacts/` +- `crates/md-docrs-wasm-compare` — loads those artifacts and benchmarks / checks parity + +Typical flow from the repo root: + +```sh +./wasm/build.sh +cargo run -p md-docrs-wasm-compare -- --offline +``` + +## Optimization + +If `wasm-opt` is installed, you can post-process the built artifact manually: + +```sh +wasm-opt -Oz --strip-debug --strip-dwarf \ + -o target/wasm32-unknown-unknown/wasm-release/md_docrs_rust_wasm.opt.wasm \ + target/wasm32-unknown-unknown/wasm-release/md_docrs_rust_wasm.wasm +``` + +In normal repo usage, the top-level `wasm/build.sh` script handles staging optimized artifacts. + +## Tests + +Host tests can still exercise the crate logic: + +```sh +cargo test -p md-docrs-rust-wasm +``` + +## Design guidance + +To keep this crate lean over time: + +- prefer pushing reusable logic down into `md-docrs-core` +- keep exported functions thin +- keep features explicit +- avoid mixing host/runtime concerns into the ABI layer +- treat code size as a product constraint for the minimal build + +If a future change is only needed for: + +- CLI behavior +- HTTP serving +- Worker deployment +- harness benchmarking + +then it probably belongs outside this crate. \ No newline at end of file diff --git a/rust-wasm/src/lib.rs b/crates/md-docrs-rust-wasm/src/lib.rs similarity index 97% rename from rust-wasm/src/lib.rs rename to crates/md-docrs-rust-wasm/src/lib.rs index c6ddb68..861630d 100644 --- a/rust-wasm/src/lib.rs +++ b/crates/md-docrs-rust-wasm/src/lib.rs @@ -19,9 +19,9 @@ //! | -5 | spec parse / resolve miss / URL too long | //! | -6 | output pointer write failure | -use md_docrs_proxy::ItemSpec; +use md_docrs_core::ItemSpec; #[cfg(feature = "render")] -use md_docrs_proxy::{render, resolve}; +use md_docrs_core::{render, resolve}; #[cfg(feature = "render")] use rustdoc_types::Crate; use std::alloc::{Layout, alloc as rust_alloc, dealloc}; @@ -34,7 +34,11 @@ const FORMAT_VERSION: u32 = 57; const DOCS_RS_BASE: &str = "https://docs.rs"; fn layout_for(len: usize) -> Option { - if len == 0 { None } else { Layout::array::(len).ok() } + if len == 0 { + None + } else { + Layout::array::(len).ok() + } } /// Allocate `len` bytes inside the WASM linear memory. Returns null on failure @@ -235,14 +239,7 @@ pub unsafe extern "C" fn render_spec( let mut resp_ptr: u32 = 0; let mut resp_len: u32 = 0; - let rc = unsafe { - fetch_bytes( - url.as_ptr(), - url.len() as u32, - &mut resp_ptr, - &mut resp_len, - ) - }; + let rc = unsafe { fetch_bytes(url.as_ptr(), url.len() as u32, &mut resp_ptr, &mut resp_len) }; if rc != 0 { return -2; } @@ -373,5 +370,4 @@ mod tests { free(ptr, 64); } } - } diff --git a/wasm/Cargo.toml b/crates/md-docrs-wasm-compare/Cargo.toml similarity index 59% rename from wasm/Cargo.toml rename to crates/md-docrs-wasm-compare/Cargo.toml index 512555d..f6fa475 100644 --- a/wasm/Cargo.toml +++ b/crates/md-docrs-wasm-compare/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "md-docrs-wasm-compare" -version = "0.1.0" -edition = "2024" +version.workspace = true +edition.workspace = true publish = false [[bin]] @@ -9,9 +9,9 @@ name = "wasm-compare" path = "src/main.rs" [dependencies] -anyhow = "1" +anyhow.workspace = true blake3 = "1" -reqwest = { version = "0.12", default-features = false, features = ["blocking", "rustls-tls", "gzip"] } +reqwest = { workspace = true, features = ["blocking", "gzip"] } wasmtime = "44" wasmer = { version = "7", optional = true } diff --git a/wasm/src/main.rs b/crates/md-docrs-wasm-compare/src/main.rs similarity index 99% rename from wasm/src/main.rs rename to crates/md-docrs-wasm-compare/src/main.rs index b6fc1d7..7ad2312 100644 --- a/wasm/src/main.rs +++ b/crates/md-docrs-wasm-compare/src/main.rs @@ -164,7 +164,11 @@ fn print_help() { fn default_artifacts_dir() -> PathBuf { if let Some(dir) = option_env!("CARGO_MANIFEST_DIR") { - return Path::new(dir).join("artifacts"); + return Path::new(dir) + .join("..") + .join("..") + .join("wasm") + .join("artifacts"); } PathBuf::from("wasm/artifacts") } @@ -209,9 +213,8 @@ fn main() -> Result<()> { if present.is_empty() { bail!( "no .wasm artifacts found under {}\n\ - run `{}/build.sh` first, or pass --artifacts-dir", + run `./wasm/build.sh` first, or pass --artifacts-dir", args.artifacts_dir.display(), - env!("CARGO_MANIFEST_DIR"), ); } diff --git a/crates/md-docrs-worker/Cargo.toml b/crates/md-docrs-worker/Cargo.toml new file mode 100644 index 0000000..819849c --- /dev/null +++ b/crates/md-docrs-worker/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "md-docrs-worker" +version.workspace = true +edition.workspace = true +publish = false + +[package.metadata.release] +release = false + +[lib] +crate-type = ["cdylib"] + +[dependencies] +async-trait.workspace = true +md-docrs-core = { path = "../md-docrs-core", default-features = false } +rustdoc-types.workspace = true +serde_json.workspace = true +zstd.workspace = true + +worker = { workspace = true, features = ["http", "axum"] } +worker-macros = { workspace = true, features = ["http"] } +axum = { version = "0.7", default-features = false, features = ["json"] } +axum-macros = "0.4.2" +tower-service.workspace = true +wasm-bindgen.workspace = true +wasm-bindgen-futures.workspace = true +serde.workspace = true diff --git a/crates/md-docrs-worker/package.json b/crates/md-docrs-worker/package.json new file mode 100644 index 0000000..5d7e21a --- /dev/null +++ b/crates/md-docrs-worker/package.json @@ -0,0 +1,12 @@ +{ + "name": "md-docrs-worker", + "version": "0.0.0", + "private": true, + "scripts": { + "deploy": "cargo install worker-build ; wrangler deploy", + "dev": "cargo install worker-build ; wrangler dev --local" + }, + "devDependencies": { + "wrangler": "^4" + } +} diff --git a/crates/md-docrs-worker/src/lib.rs b/crates/md-docrs-worker/src/lib.rs new file mode 100644 index 0000000..b0a5df0 --- /dev/null +++ b/crates/md-docrs-worker/src/lib.rs @@ -0,0 +1,278 @@ +#![warn(clippy::pedantic)] + +use axum::{ + Router, + extract::{Path, State}, + http::{HeaderMap, StatusCode, header}, + response::{IntoResponse, Response}, + routing::get, +}; +use md_docrs_core::{ + Error, ItemSpec, Result, RustdocFetcher, + cache::{CrateCache, InMemoryCache}, + fetch::{DOCS_RS_BASE, build_url, validate_format_version}, + render_spec, +}; +use rustdoc_types::{Crate, FORMAT_VERSION}; +use std::{future::Future, io::Cursor, pin::Pin, sync::Arc}; +use tower_service::Service; +use worker::*; + +#[derive(Clone)] +struct WorkerFetcher { + base: String, +} + +impl WorkerFetcher { + fn new() -> Self { + Self { + base: DOCS_RS_BASE.to_string(), + } + } + + async fn fetch_bytes(&self, url: &str, method: Method) -> Result<(u16, Vec)> { + let mut init = RequestInit::new(); + init.with_method(method); + + let request = Request::new_with_init(url, &init) + .map_err(|err| Error::Fetch(format!("failed to build request for {url}: {err}")))?; + + let mut response = Fetch::Request(request) + .send() + .await + .map_err(|err| Error::Fetch(format!("request failed for {url}: {err}")))?; + + let status = response.status_code(); + let bytes = response.bytes().await.map_err(|err| { + Error::Fetch(format!("failed to read response body for {url}: {err}")) + })?; + + Ok((status, bytes)) + } + + async fn head_status(&self, url: &str) -> Result { + let mut init = RequestInit::new(); + init.with_method(Method::Head); + + let request = Request::new_with_init(url, &init) + .map_err(|err| Error::Fetch(format!("failed to build request for {url}: {err}")))?; + + let response = Fetch::Request(request) + .send() + .await + .map_err(|err| Error::Fetch(format!("request failed for {url}: {err}")))?; + + Ok(response.status_code()) + } +} + +impl RustdocFetcher for WorkerFetcher { + fn fetch<'a>( + &'a self, + crate_name: &'a str, + version: &'a str, + target: Option<&'a str>, + ) -> Pin> + 'a>> { + Box::pin(async move { + let url = build_url( + &self.base, + crate_name, + version, + target, + Some(FORMAT_VERSION), + ); + + let (status, bytes) = self.fetch_bytes(&url, Method::Get).await?; + + if status == 404 { + let probe_url = build_url(&self.base, crate_name, version, target, None); + let probe_status = self.head_status(&probe_url).await?; + if (200..300).contains(&probe_status) { + return Err(Error::Fetch(format!( + "{crate_name}@{version} has no rustdoc JSON for format version {FORMAT_VERSION}; waiting on docs.rs rebuild" + ))); + } + return Err(Error::Fetch(format!( + "{crate_name}@{version} not found on docs.rs" + ))); + } + + if !(200..300).contains(&status) { + return Err(Error::Fetch(format!( + "{status} response for {crate_name}@{version}" + ))); + } + + let decoded = zstd::decode_all(Cursor::new(bytes))?; + let krate: Crate = serde_json::from_slice(&decoded)?; + validate_format_version(&krate)?; + Ok(krate) + }) + } +} + +#[derive(Clone)] +struct AppState { + fetcher: Arc, + cache: Arc, +} + +fn router(state: Arc) -> Router { + Router::new() + .route("/", get(root)) + .route("/healthz", get(healthz)) + .route("/{crate_name}", get(crate_root)) + .route("/{crate_name}/", get(crate_root)) + .route("/{crate_name}/{version}", get(version_root)) + .route("/{crate_name}/{version}/", get(version_root)) + .route("/{crate_name}/{version}/{*rest}", get(deep)) + .with_state(state) +} + +async fn root() -> &'static str { + "md-docrs-worker - GET /[/][/] for Markdown docs\n" +} + +async fn healthz() -> &'static str { + "ok" +} + +#[axum_macros::debug_handler] +async fn crate_root( + State(state): State>, + Path(crate_name): Path, +) -> Response { + serve(&state, &crate_name, "latest", &[]).await +} + +#[axum_macros::debug_handler] +async fn version_root( + State(state): State>, + Path((crate_name, version)): Path<(String, String)>, +) -> Response { + serve(&state, &crate_name, &version, &[]).await +} + +#[axum_macros::debug_handler] +async fn deep( + State(state): State>, + Path((crate_name, version, rest)): Path<(String, String, String)>, +) -> Response { + let path_segs = parse_rest(&rest); + serve(&state, &crate_name, &version, &path_segs).await +} + +fn parse_rest(rest: &str) -> Vec { + let rest = rest.trim_end_matches('/'); + if rest.is_empty() { + return vec![]; + } + + let parts: Vec<&str> = rest.split('/').filter(|s| !s.is_empty()).collect(); + let mut out = Vec::with_capacity(parts.len()); + if parts.is_empty() { + return out; + } + + let last_idx = parts.len() - 1; + for (i, seg) in parts.iter().enumerate() { + if i == last_idx { + if let Some(name) = strip_kind_prefix(seg) { + out.push(name); + } else { + out.push((*seg).to_string()); + } + } else { + out.push((*seg).to_string()); + } + } + + out +} + +fn strip_kind_prefix(seg: &str) -> Option { + let seg = seg.strip_suffix(".html").unwrap_or(seg); + for prefix in [ + "struct.", + "enum.", + "trait.", + "fn.", + "type.", + "constant.", + "static.", + "macro.", + "union.", + "primitive.", + "derive.", + "attr.", + ] { + if let Some(rest) = seg.strip_prefix(prefix) { + return Some(rest.to_string()); + } + } + None +} + +async fn serve( + state: &AppState, + crate_name: &str, + version: &str, + path_segs: &[String], +) -> Response { + let path = match path_segs.split_first() { + Some((head, tail)) if head == crate_name => tail.to_vec(), + _ => path_segs.to_vec(), + }; + + let spec = ItemSpec { + crate_name: crate_name.to_string(), + version: version.to_string(), + target: None, + path, + }; + + match render_spec(&spec, state.fetcher.as_ref(), state.cache.as_ref()).await { + Ok(body) => { + let mut headers = HeaderMap::new(); + headers.insert( + header::CONTENT_TYPE, + "text/markdown; charset=utf-8".parse().unwrap(), + ); + headers.insert(header::VARY, "Accept".parse().unwrap()); + headers.insert( + "x-markdown-tokens", + (body.len() / 4).to_string().parse().unwrap(), + ); + (StatusCode::OK, headers, body).into_response() + } + Err(err) => error_to_response(&err), + } +} + +fn error_to_response(err: &Error) -> Response { + let status = match err { + Error::NotFound(_) => StatusCode::NOT_FOUND, + Error::InvalidSpec(_) => StatusCode::BAD_REQUEST, + Error::FormatVersionMismatch { .. } | Error::Fetch(_) | Error::Json(_) | Error::Io(_) => { + StatusCode::BAD_GATEWAY + } + }; + (status, err.to_string()).into_response() +} + +#[event(fetch)] +async fn fetch( + req: HttpRequest, + _env: Env, + _ctx: Context, +) -> worker::Result> { + let state = Arc::new(AppState { + fetcher: Arc::new(WorkerFetcher::new()), + cache: Arc::new(InMemoryCache::default()), + }); + + router(state) + .call(req) + .await + .map_err(|err| worker::Error::RustError(err.to_string())) +} diff --git a/crates/md-docrs-worker/wrangler.toml b/crates/md-docrs-worker/wrangler.toml new file mode 100644 index 0000000..95b3a3e --- /dev/null +++ b/crates/md-docrs-worker/wrangler.toml @@ -0,0 +1,6 @@ +name = "md-docrs-worker" +main = "build/index.js" +compatibility_date = "2025-04-21" + +[build] +command = "cargo install \"worker-build@^0.8\" && worker-build --release" diff --git a/rust-wasm/README.md b/rust-wasm/README.md deleted file mode 100644 index 7c38fe4..0000000 --- a/rust-wasm/README.md +++ /dev/null @@ -1,117 +0,0 @@ -# md-docrs-wasm - -`wasm32-unknown-unknown` build of the `md_docrs_proxy` pure pipeline, exposing -the **exact same C ABI** as the Zig build (`zig/lib/wasm.zig`). Lets us drop -either `.wasm` into the same host and compare size and per-request latency -without any host-side code changes. - -## Exports - -| Symbol | Signature | Notes | -| --- | --- | --- | -| `alloc` | `(len: u32) -> *u8` | Backed by Rust's global allocator. Returns null on OOM or `len == 0`. | -| `free` | `(ptr: *u8, len: u32)` | Length must match the allocation. | -| `resolve_url` | `(spec_ptr, spec_len, target_ptr, target_len, out_ptr, out_cap) -> u32` | Same semantics as the Zig export. 0 on error. | -| `render_markdown` | `(json_ptr, json_len, spec_ptr, spec_len, target_ptr, target_len, len_out: *u32) -> *u8` | Takes already-decoded rustdoc JSON, returns a fresh allocation containing Markdown. Caller frees. Null on error. Only present in the `render` feature build. | - -## Building - -```sh -# Minimal parity build — matches the Zig wasm surface (resolve_url only). -cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm --no-default-features -wasm-opt -Oz --strip-debug --strip-dwarf \ - -o target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.opt.wasm \ - target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm - -# Full pipeline — adds render_markdown (serde_json + rustdoc-types). -cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm -wasm-opt -Oz --strip-debug --strip-dwarf \ - -o target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.opt.wasm \ - target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm -``` - -Raw artifact lives at `target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm`. - -If you run `wasm-opt`, the optimized artifact can live alongside it, e.g. -`target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.opt.wasm`. - -## Size snapshot - -Measured on Rust 1.94 / Zig 0.16. - -| Build | Bytes | -| --- | ---: | -| Zig 0.16 — `ReleaseSmall` + `strip`, exports `resolve_url` | **6,775** | -| Rust `wasm-release` — `resolve_url` only (`--no-default-features`) | **36,336** | -| Rust `wasm-release` + `wasm-opt -Oz` — `resolve_url` only | **28,523** | -| Rust `wasm-release` — `resolve_url` + `render_markdown` | **486,387** | - -For the `resolve_url`-only Rust build, `wasm-opt -Oz` trims about **7,813 bytes** -from the raw `wasm-release` artifact, roughly a **21.5%** reduction. - -The large jump for `render_markdown` is serde_json + `rustdoc-types` -deserialise impls. Expected; that's the cost of JSON→AST→Markdown. - -## Feature gates - -- `render` (default) — pulls `serde_json` + `rustdoc-types` and exposes - `render_markdown`. Turn off for the minimal size-parity build. - -## Tests - -Host tests run through the same internal functions as the WASM exports -(the `no_mangle` attribute is gated to `target_arch = "wasm32"` so the test -binary doesn't shadow libc's `free`): - -```sh -cargo test -p md-docrs-wasm -``` - -## Comparing with Zig - -Both modules share this memory protocol: - -1. Host calls `alloc(n)` to reserve input / output buffers in linear memory. -2. Host writes input bytes into those buffers via a fresh `Uint8Array(memory.buffer, ptr, len)`. -3. Host calls `resolve_url(...)` (or `render_markdown(...)`). -4. Host reads the output, then calls `free(ptr, len)` on each buffer. - -Because the ABI matches byte-for-byte, the Worker at `zig/src/index.ts` -works as-is against either module — just point the `.wasm` import at the -Rust artifact. - -## What's next - -- Port `render_markdown` to Zig. That's where the real interesting size / - speed comparison happens — today the Zig wasm doesn't carry serde_json - or the rustdoc types. -- Benchmark instantiation + per-call latency side-by-side in a Worker - (e.g. hyperfine-style loop from a test harness, or wrangler dev + `wrk`). -- Keep comparing raw vs `wasm-opt -Oz` output as the Rust WASM surface grows, - especially once Zig gains the full render pipeline too. - -Option A: keep `std`, but drastically reduce code size -This is the lowest-risk path. - -For the minimal build: -- stop using `ItemSpec::parse` -- stop using `String` -- stop using `format!` -- implement a tiny local parser over `&[u8]` -- write URL bytes directly to `out_ptr` - -This alone could cut a lot. - -### Option B: create a dedicated `no_std` tiny crate -Example direction: -- `rust-wasm-tiny/` -- exports only `resolve_url` -- parser implemented over raw bytes -- no `std` -- no `serde` -- no `rustdoc-types` -- no dependency on main crate - -This is the path most likely to get you materially closer to Zig. diff --git a/src/fetch.rs b/src/fetch.rs deleted file mode 100644 index b00dc71..0000000 --- a/src/fetch.rs +++ /dev/null @@ -1,147 +0,0 @@ -use crate::{Error, Result}; -use rustdoc_types::{Crate, FORMAT_VERSION}; -use std::time::Duration; - -/// Downloads rustdoc JSON from docs.rs and parses it with `rustdoc-types`. -pub struct Fetcher { - client: reqwest::Client, - base: String, -} - -impl Fetcher { - /// # Errors - /// Returns `Error::Http` if the underlying HTTP client fails to build. - pub fn new() -> Result { - let client = reqwest::Client::builder() - .user_agent(concat!("md-docrs-proxy/", env!("CARGO_PKG_VERSION"))) - .timeout(Duration::from_secs(30)) - .redirect(reqwest::redirect::Policy::limited(10)) - .build()?; - Ok(Self { - client, - base: "https://docs.rs".into(), - }) - } - - /// Override the docs.rs base URL (used in tests). - #[must_use] - pub fn with_base(mut self, base: impl Into) -> Self { - self.base = base.into(); - self - } - - /// # Errors - /// Returns `Error::Fetch` on HTTP errors or unsupported format versions, - /// `Error::Json` on JSON parse failure, and `Error::FormatVersionMismatch` - /// when the downloaded JSON's `format_version` disagrees with ours. - pub async fn fetch( - &self, - crate_name: &str, - version: &str, - target: Option<&str>, - ) -> Result { - // Always request the format version we can parse. docs.rs keeps - // multiple format versions during rebuilds, so this is the reliable - // way to avoid schema-mismatch parse errors. A 404 here means the - // crate hasn't been rebuilt for our supported format yet. - let url = build_url( - &self.base, - crate_name, - version, - target, - Some(FORMAT_VERSION), - ); - tracing::debug!(url = %url, "fetch rustdoc JSON"); - let resp = self.client.get(&url).send().await?; - - if resp.status() == reqwest::StatusCode::NOT_FOUND { - // Distinguish "crate not found" from "format version unavailable" - // by probing the unpinned endpoint. - let probe_url = build_url(&self.base, crate_name, version, target, None); - let probe = self.client.head(&probe_url).send().await?; - if probe.status().is_success() { - return Err(Error::Fetch(format!( - "{crate_name}@{version} has no rustdoc JSON for format version \ - {FORMAT_VERSION}; waiting on docs.rs rebuild" - ))); - } - return Err(Error::Fetch(format!( - "{crate_name}@{version} not found on docs.rs" - ))); - } - - if !resp.status().is_success() { - return Err(Error::Fetch(format!( - "{} {} for {crate_name}@{version}", - resp.status().as_u16(), - resp.status().canonical_reason().unwrap_or("") - ))); - } - - let bytes = resp.bytes().await?; - - // Decompress zstd off the tokio runtime - it's CPU-bound. - let decoded = - tokio::task::spawn_blocking(move || zstd::decode_all(std::io::Cursor::new(bytes))) - .await - .map_err(|e| Error::Fetch(format!("zstd decode panicked: {e}")))??; - - let krate: Crate = serde_json::from_slice(&decoded)?; - if krate.format_version != FORMAT_VERSION { - return Err(Error::FormatVersionMismatch { - got: krate.format_version, - expected: FORMAT_VERSION, - }); - } - Ok(krate) - } -} - -fn build_url( - base: &str, - crate_name: &str, - version: &str, - target: Option<&str>, - format_version: Option, -) -> String { - let target_seg = target.map(|t| format!("/{t}")).unwrap_or_default(); - match format_version { - Some(v) => format!("{base}/crate/{crate_name}/{version}{target_seg}/json/{v}.zst"), - None => format!("{base}/crate/{crate_name}/{version}{target_seg}/json.zst"), - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn url_basic() { - assert_eq!( - build_url("https://docs.rs", "serde", "latest", None, None), - "https://docs.rs/crate/serde/latest/json.zst" - ); - } - - #[test] - fn url_with_target() { - assert_eq!( - build_url( - "https://docs.rs", - "serde", - "latest", - Some("x86_64-pc-windows-msvc"), - None - ), - "https://docs.rs/crate/serde/latest/x86_64-pc-windows-msvc/json.zst" - ); - } - - #[test] - fn url_format_pinned() { - assert_eq!( - build_url("https://docs.rs", "serde", "1.0.200", None, Some(57)), - "https://docs.rs/crate/serde/1.0.200/json/57.zst" - ); - } -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index c385c4b..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,57 +0,0 @@ -#![warn(clippy::pedantic)] - -pub mod cache; -pub mod error; -#[cfg(feature = "http")] -pub mod fetch; -pub mod render; -pub mod resolve; -pub mod spec; - -pub use error::{Error, Result}; -pub use spec::ItemSpec; - -#[cfg(feature = "http")] -use std::sync::Arc; - -/// High-level entry point: take a parsed `ItemSpec`, return rendered Markdown. -/// -/// Fetches and caches the crate's rustdoc JSON via the supplied `fetch::Fetcher` -/// and any `cache::CrateCache` implementation, resolves the requested item, -/// and renders it to Markdown. -/// -/// # Errors -/// Forwards errors from `Fetcher::fetch` (network / docs.rs / decode failures) -/// and `resolve::resolve` (`Error::NotFound` when the path does not match). -#[cfg(feature = "http")] -pub async fn render_spec( - spec: &ItemSpec, - fetcher: &fetch::Fetcher, - cache: &dyn cache::CrateCache, -) -> Result { - let krate = load_crate(spec, fetcher, cache).await?; - let resolved = resolve::resolve(&krate, spec)?; - Ok(render::render(&krate, &resolved, spec)) -} - -#[cfg(feature = "http")] -async fn load_crate( - spec: &ItemSpec, - fetcher: &fetch::Fetcher, - cache: &dyn cache::CrateCache, -) -> Result> { - let key = cache::CacheKey { - crate_name: spec.crate_name.clone(), - version: spec.version.clone(), - target: spec.target.clone(), - }; - if let Some(hit) = cache.get(&key).await { - return Ok(hit); - } - let krate = fetcher - .fetch(&spec.crate_name, &spec.version, spec.target.as_deref()) - .await?; - let arc = Arc::new(krate); - cache.put(key, Arc::clone(&arc)).await; - Ok(arc) -} diff --git a/wasm/README.md b/wasm/README.md index c910518..87dec5f 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -1,114 +1,243 @@ -# wasm/ — side-by-side comparison harness +# wasm/ — workspace-level WASM harness -Runs the Zig and Rust wasm builds of `resolve_url` through the exact same -sequence of specs and reports: +This directory is **not** a Rust crate. -- artifact size -- resolved URL (parity check — every artifact must produce byte-identical output) -- median and p95 per-call latency -- raw Rust vs `wasm-opt -Oz` size comparison for the same build flavor +It exists to keep the cross-language WASM comparison workflow in one simple place: + +- `build.sh` builds and stages WASM artifacts from the Rust and Zig implementations +- `artifacts/` holds the staged `.wasm` files +- this `README.md` explains how to run the comparison harness + +The actual Rust comparison binary lives in: + +- `crates/md-docrs-wasm-compare` + +## Boundaries + +Keep the repo split like this: + +- `crates/md-docrs-core` — shared Rust library logic +- `crates/md-docrs-rust-wasm` — Rust WASM module +- `crates/md-docrs-wasm-compare` — Rust host-side comparison harness +- `zig/` — Zig implementation and its Worker wrapper +- `wasm/` — staging area and glue docs/scripts only + +That separation keeps responsibilities lean: -Default runtime is embedded **wasmtime** (crate). The `wasmer` cargo feature -swaps in the **wasmer** crate as an alternate host. Both are in-process -embeddings, not the `wasmtime` / `wasmer` CLI binaries. +- Zig owns the Zig implementation +- Rust owns the Rust implementation and host harness +- `wasm/` owns only the artifact workflow ## Layout +```/dev/null/layout.txt#L1-11 +wasm/ +├── README.md # this file +├── build.sh # stages Rust + Zig wasm outputs into artifacts/ +└── artifacts/ # .gitignored staged outputs + ├── zig-minimal.wasm + ├── zig-full.wasm # optional, only if Zig full build exists + ├── rust-minimal.wasm + ├── rust-minimal-opt.wasm + ├── rust-full.wasm + └── rust-full-opt.wasm ``` + +Related workspace locations: + +```/dev/null/workspace-layout.txt#L1-8 +crates/ +├── md-docrs-rust-wasm/ +├── md-docrs-wasm-compare/ +└── ... +zig/ +└── ... wasm/ -├── Cargo.toml # md-docrs-wasm-compare (workspace member) -├── src/main.rs # harness: loads wasm, drives resolve_url, reports -├── build.sh # builds zig + rust wasms and stages them in artifacts/ -├── artifacts/ # .gitignored — populated by build.sh -│ ├── zig-minimal.wasm -│ ├── rust-minimal.wasm -│ ├── rust-minimal-opt.wasm -│ ├── rust-full.wasm -│ └── rust-full-opt.wasm -└── README.md +└── ... ``` +## What gets compared + +The harness compares compatible WASM artifacts that share the same low-level ABI. + +Today that means: + +- **Zig minimal** + - exports `alloc`, `free`, `resolve_url` + - implements spec parsing + docs.rs URL resolution +- **Rust minimal** + - exports the same minimal ABI + - meant to match the Zig surface +- **Rust full** + - extends the surface with rendering functionality +- **Zig full** + - optional future/experimental target if implemented + +The comparison harness reports: + +- artifact size +- output parity for `resolve_url` +- median and p95 latency +- raw Rust size vs `wasm-opt -Oz` size + ## Quick start -```sh -# From repo root. -./wasm/build.sh # produces artifacts/*.wasm -cargo run -p md-docrs-wasm-compare # default: wasmtime, 200 iterations +From the repo root: + +```/dev/null/quickstart.sh#L1-4 +./wasm/build.sh +cargo run -p md-docrs-wasm-compare +``` + +That does two things: + +1. builds/stages available `.wasm` artifacts into `wasm/artifacts/` +2. runs the host-side comparison binary from `crates/md-docrs-wasm-compare` + +## What `build.sh` does + +`wasm/build.sh` is the single entry point for artifact staging. + +It is responsible for: + +- building Zig minimal +- attempting Zig full, but skipping it cleanly if unsupported +- building Rust minimal from `crates/md-docrs-rust-wasm` +- building Rust full from `crates/md-docrs-rust-wasm` +- producing optimized Rust copies with `wasm-opt` +- copying all generated outputs into `wasm/artifacts/` + +It should not contain harness logic. +It should not become a second build system. +Its job is only to stage comparable artifacts in one place. + +## Required tools + +You need these available on your machine: + +- Rust toolchain with `wasm32-unknown-unknown` +- Zig +- `wasm-opt` from Binaryen + +If `wasm-opt` is missing, `build.sh` should fail early because optimized Rust artifacts are part of the comparison output. + +## Artifact names + +The harness looks for these filenames in `wasm/artifacts/`: + +- `zig-minimal.wasm` +- `zig-full.wasm` +- `rust-minimal.wasm` +- `rust-minimal-opt.wasm` +- `rust-full.wasm` +- `rust-full-opt.wasm` + +Any subset may be present. +Missing files are skipped. + +That makes the flow flexible: + +- minimal-only comparison works +- Rust-only comparison works +- future Zig full comparison can slot in without redesign + +## Rust commands + +The Rust WASM module comes from `crates/md-docrs-rust-wasm`. + +Minimal build: + +```/dev/null/rust-minimal.sh#L1-3 +cargo build --profile wasm-release --target wasm32-unknown-unknown \ + -p md-docrs-rust-wasm --no-default-features +``` + +Full build: + +```/dev/null/rust-full.sh#L1-3 +cargo build --profile wasm-release --target wasm32-unknown-unknown \ + -p md-docrs-rust-wasm --no-default-features --features full +``` + +Comparison harness: + +```/dev/null/harness.sh#L1-2 +cargo run -p md-docrs-wasm-compare ``` -Sample output: +Optional Wasmer runtime: +```/dev/null/harness-wasmer.sh#L1-2 +cargo run -p md-docrs-wasm-compare --features wasmer -- --runtime wasmer ``` -artifact bytes flavor --------------- ---------- -------- -zig-minimal 6775 minimal -rust-minimal 36336 minimal -rust-minimal-opt 25541 minimal -rust-full 486387 full -rust-full-opt 361606 full - -spec: tokio@1.52.1::sync::Mutex -artifact output median µs p95 µs --------------- ------------------------------------------------ --------- ---------- -zig https://docs.rs/crate/tokio/1.52.1/json/57.zst 7 8 -rust-minimal https://docs.rs/crate/tokio/1.52.1/json/57.zst 9 9 -rust-full https://docs.rs/crate/tokio/1.52.1/json/57.zst 9 9 + +## Zig commands + +The Zig implementation lives under `zig/`. + +Minimal WASM build: + +```/dev/null/zig-build.sh#L1-3 +cd zig/lib +zig build ``` -All three artifacts must return byte-identical URLs for every spec — that is -the ABI parity check. Per-call latency includes three `alloc`s, one -`resolve_url`, three `free`s, plus one `Memory::write` per input and one -`Memory::read` for the output. +Native Zig tests: + +```/dev/null/zig-test.sh#L1-3 +zig build test --build-file zig/lib/build.zig +``` + +If Zig full is not implemented yet, `build.sh` should print a skip message and continue. ## Flags +The harness supports these main flags: + | Flag | Default | Meaning | | --- | --- | --- | -| `--runtime wasmtime\|wasmer` | `wasmtime` | Embedded host. `wasmer` requires `--features wasmer`. | -| `--iterations N` | 200 | Hot-loop samples per (artifact, spec) cell. | -| `--artifacts-dir PATH` | `wasm/artifacts` | Where to look for `zig-minimal.wasm`, `zig-full.wasm`, `rust-minimal.wasm`, `rust-minimal-opt.wasm`, `rust-full.wasm`, and `rust-full-opt.wasm`. | +| `--runtime wasmtime\|wasmer` | `wasmtime` | Embedded runtime used by the Rust host harness | +| `--iterations N` | `200` | Hot-loop samples per artifact/spec pair | +| `--artifacts-dir PATH` | `wasm/artifacts` | Directory containing staged `.wasm` files | -Any subset of the expected `.wasm` files may be missing — the harness just skips those rows. +If supported by the harness version you are running, other flags such as offline or render-specific controls follow the same rule: they belong to the host harness crate, not to `wasm/build.sh`. -## Wasmer (optional) +## Running raw modules manually -```sh -cargo run -p md-docrs-wasm-compare --features wasmer -- --runtime wasmer -``` +The `.wasm` files can be inspected directly, but real calls require host code that: -Wasmer pulls in its own Cranelift fork; first build is ~20s. Both runtimes -agree on output, but wasmer's singlepass / cranelift defaults typically -give different per-call timings than wasmtime's cranelift — useful for -separating ABI cost from JIT cost. +- allocates memory in the module +- writes input bytes into WASM memory +- calls exported functions +- reads the output bytes +- frees buffers correctly -## Running the raw `.wasm` without the harness +That host logic lives in the Rust comparison harness, not in this directory. -The CLI form of wasmtime / wasmer can't easily marshal strings across the -ABI boundary, but you can still inspect the modules: +## Design rule for this directory -```sh -wasmtime compile wasm/artifacts/zig.wasm -o /tmp/zig.cwasm -wasmer inspect wasm/artifacts/rust-minimal.wasm | head -``` +Keep `wasm/` boring. + +Good uses: -For an end-to-end call you need host code that writes the spec into WASM -memory and reads the result back — that's exactly what `src/main.rs` does. +- stage artifacts +- document the comparison workflow +- hold generated outputs -## Adding a new spec +Bad uses: -Edit `DEFAULT_SPECS` in `src/main.rs`. A spec is `(spec_string, optional_target)` -and runs against every `.wasm` in the artifacts directory. +- adding a second Rust crate here +- duplicating logic from `crates/md-docrs-wasm-compare` +- mixing Zig source code into this directory +- mixing Rust library code into this directory -## wasm-opt outputs +## Summary -`build.sh` now requires `wasm-opt` on `PATH` and stages optimized Rust artifacts -next to the raw cargo outputs: +If you are looking for: -- `rust-minimal.wasm` — `cargo build --profile wasm-release --no-default-features` -- `rust-minimal-opt.wasm` — same module after `wasm-opt -Oz --strip-debug --strip-dwarf` -- `rust-full.wasm` — `cargo build --profile wasm-release --no-default-features --features full` -- `rust-full-opt.wasm` — same module after `wasm-opt -Oz --strip-debug --strip-dwarf` +- the Rust WASM implementation: see `crates/md-docrs-rust-wasm` +- the Rust host comparison program: see `crates/md-docrs-wasm-compare` +- the Zig implementation: see `zig/` +- the staged outputs and helper script: stay in `wasm/` -That lets the harness report the size delta between the unoptimized Rust wasm -and the post-processed `wasm-opt` version while still checking `resolve_url` -output parity across all staged artifacts. +The goal is simple: one place to stage artifacts, one Rust crate to compare them, and clear boundaries between Rust, Zig, and the shared WASM workflow. \ No newline at end of file diff --git a/wasm/build.sh b/wasm/build.sh index 6f78d20..a43a84d 100755 --- a/wasm/build.sh +++ b/wasm/build.sh @@ -1,70 +1,136 @@ #!/usr/bin/env bash -# Build the Zig and Rust wasm artifacts and stage them under artifacts/ so -# the comparison harness (cargo run -p md-docrs-wasm-compare) can load them -# without knowing where each toolchain drops its output. -# -# Produces up to six artifacts: -# zig-minimal.wasm Zig ReleaseSmall, resolve_url only -# zig-full.wasm Zig ReleaseSmall, full pipeline (if -Dfull supported) -# rust-minimal.wasm Rust wasm-release, --no-default-features -# rust-minimal-opt.wasm Rust wasm-release + wasm-opt -Oz, --no-default-features -# rust-full.wasm Rust wasm-release, --features full (fetch + render) -# rust-full-opt.wasm Rust wasm-release + wasm-opt -Oz, --features full set -euo pipefail +# Build and stage the WASM artifacts used by the comparison harness. +# +# Responsibilities: +# - build Zig minimal wasm +# - optionally build Zig full wasm if supported +# - build Rust minimal/full wasm from the workspace +# - run wasm-opt on Rust artifacts +# - copy everything into wasm/artifacts/ +# +# This directory is only a staging area. The actual Rust crates live under: +# - crates/md-docrs-rust-wasm +# - crates/md-docrs-wasm-compare + HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT="$(cd "${HERE}/.." && pwd)" -ARTIFACTS="${HERE}/artifacts" +ARTIFACTS_DIR="${HERE}/artifacts" +RUST_WASM_PKG="md-docrs-rust-wasm" +RUST_WASM_OUT="${ROOT}/target/wasm32-unknown-unknown/wasm-release/md_docrs_rust_wasm.wasm" +ZIG_DIR="${ROOT}/zig/lib" +STAGED_ARTIFACTS=( + "zig-minimal.wasm" + "zig-full.wasm" + "rust-minimal.wasm" + "rust-minimal-opt.wasm" + "rust-full.wasm" + "rust-full-opt.wasm" +) + +mkdir -p "${ARTIFACTS_DIR}" + +require_cmd() { + command -v "$1" >/dev/null 2>&1 || { + echo "missing required command: $1" >&2 + exit 1 + } +} -mkdir -p "${ARTIFACTS}" +copy_if_exists() { + local src="$1" + local dest="$2" -if command -v wasm-opt >/dev/null 2>&1; then - WASM_OPT="$(command -v wasm-opt)" - echo ">> wasm-opt: ${WASM_OPT}" -else - echo "wasm-opt not found in PATH; install Binaryen to produce optimized Rust artifacts" >&2 - exit 1 -fi + if [[ -f "${src}" ]]; then + cp "${src}" "${dest}" + return 0 + fi + + return 1 +} optimize_wasm() { local src="$1" local dest="$2" - "${WASM_OPT}" -Oz --enable-bulk-memory --strip-debug --strip-dwarf -o "${dest}" "${src}" + wasm-opt -Oz \ + --enable-bulk-memory \ + --strip-debug \ + --strip-dwarf \ + -o "${dest}" \ + "${src}" } -echo ">> zig-minimal: ReleaseSmall, wasm32-freestanding" -(cd "${ROOT}/zig/lib" && zig build) -cp "${ROOT}/zig/lib/zig-out/bin/md-docrs.wasm" "${ARTIFACTS}/zig-minimal.wasm" +build_zig_minimal() { + echo ">> zig-minimal" + ( + cd "${ZIG_DIR}" + zig build + ) + copy_if_exists \ + "${ZIG_DIR}/zig-out/bin/md-docrs.wasm" \ + "${ARTIFACTS_DIR}/zig-minimal.wasm" +} -echo ">> zig-full: ReleaseSmall + full pipeline (-Dfull)" -if (cd "${ROOT}/zig/lib" && zig build -Dfull 2>/dev/null); then - if [[ -f "${ROOT}/zig/lib/zig-out/bin/md-docrs-full.wasm" ]]; then - cp "${ROOT}/zig/lib/zig-out/bin/md-docrs-full.wasm" \ - "${ARTIFACTS}/zig-full.wasm" +build_zig_full() { + echo ">> zig-full" + if ( + cd "${ZIG_DIR}" + zig build -Dfull >/dev/null 2>&1 + ); then + if copy_if_exists \ + "${ZIG_DIR}/zig-out/bin/md-docrs-full.wasm" \ + "${ARTIFACTS_DIR}/zig-full.wasm"; then + : + else + echo " skipped: build accepted -Dfull but produced no md-docrs-full.wasm" + fi else - echo " (skipping: -Dfull accepted but produced no md-docrs-full.wasm)" + echo " skipped: Zig full wasm is not implemented yet" fi -else - echo " (skipping: zig -Dfull not supported yet; implement render_spec in zig/lib/)" -fi - -echo ">> rust-minimal: wasm-release, --no-default-features (resolve_url only)" -cargo build --manifest-path "${ROOT}/Cargo.toml" \ - --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm --no-default-features -cp "${ROOT}/target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm" \ - "${ARTIFACTS}/rust-minimal.wasm" -optimize_wasm "${ARTIFACTS}/rust-minimal.wasm" "${ARTIFACTS}/rust-minimal-opt.wasm" - -echo ">> rust-full: wasm-release, --features full (fetch + render)" -cargo build --manifest-path "${ROOT}/Cargo.toml" \ - --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm --no-default-features --features full -cp "${ROOT}/target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm" \ - "${ARTIFACTS}/rust-full.wasm" -optimize_wasm "${ARTIFACTS}/rust-full.wasm" "${ARTIFACTS}/rust-full-opt.wasm" - -echo -echo "staged artifacts:" -ls -la "${ARTIFACTS}" +} + +build_rust() { + local label="$1" + shift + + echo ">> ${label}" + cargo build \ + --manifest-path "${ROOT}/Cargo.toml" \ + --profile wasm-release \ + --target wasm32-unknown-unknown \ + -p "${RUST_WASM_PKG}" \ + "$@" +} + +stage_rust_artifact() { + local raw_name="$1" + local opt_name="$2" + + copy_if_exists "${RUST_WASM_OUT}" "${ARTIFACTS_DIR}/${raw_name}" + optimize_wasm "${ARTIFACTS_DIR}/${raw_name}" "${ARTIFACTS_DIR}/${opt_name}" +} + +main() { + require_cmd cargo + require_cmd zig + require_cmd wasm-opt + + rm -f "${STAGED_ARTIFACTS[@]/#/${ARTIFACTS_DIR}/}" + + build_zig_minimal + build_zig_full + + build_rust "rust-minimal" --no-default-features + stage_rust_artifact "rust-minimal.wasm" "rust-minimal-opt.wasm" + + build_rust "rust-full" --no-default-features --features full + stage_rust_artifact "rust-full.wasm" "rust-full-opt.wasm" + + echo + echo "staged artifacts:" + ls -la "${ARTIFACTS_DIR}" +} + +main "$@" diff --git a/zig/README.md b/zig/README.md index a49d651..dc239d8 100644 --- a/zig/README.md +++ b/zig/README.md @@ -1,151 +1,264 @@ # md-docrs-zig -Zig 0.16 port of the spec-parsing / URL-building portion of `md-docrs-proxy`, compiled two ways: +Zig implementation of the **minimal URL-resolution surface** of this repository. -- **WASM** (`wasm32-freestanding`, `ReleaseSmall`) — runs on Cloudflare Workers via `src/index.ts`. Layout and memory protocol mirror [zigflare](https://github.com/mattzcarey/zigflare). -- **Native CLI** — same core `resolve.resolveUrl`, wrapped with argv handling in `lib/cli.zig`. Useful for local iteration and for A/B testing against the Rust binary. +This subtree is intentionally small and separate from the Rust workspace. Its job is to answer: -Scope is intentionally narrow so the WASM artifact is directly comparable to a same-scope Rust WASM build: no HTTP, no zstd, no rustdoc-JSON parsing, no Markdown renderer — those stay in the root Rust crate. +- can Zig produce a smaller `.wasm` for the same ABI? +- can Zig match Rust's `resolve_url` behavior exactly? +- can the same host code load either module unchanged? + +It is **not** the full docs.rs proxy. It does not fetch rustdoc JSON, decompress zstd, or render Markdown. + +## Boundaries + +### What lives here + +`zig/` owns the minimal, self-contained path: + +- parse `crate[@version][::path]` +- build the corresponding docs.rs rustdoc JSON URL +- expose that logic through: + - a Zig native CLI + - a tiny WASM module + - a Cloudflare Worker wrapper in TypeScript + +### What does not live here + +The following stay on the Rust side: + +- HTTP fetching +- caching +- zstd decoding +- rustdoc JSON parsing +- Markdown rendering +- the main CLI/server application + +That split is deliberate. It keeps the Zig implementation lean and makes size/perf comparisons fair. + +## Relationship to the Rust workspace + +The repository has three distinct layers: + +1. `crates/md-docrs-core` + - shared Rust logic for the full pipeline + +2. `crates/md-docrs-rust-wasm` + - Rust WASM module with the same low-level ABI as the Zig WASM module + - can be built in: + - minimal mode: `resolve_url` only + - fuller mode: adds render support + +3. `zig/` + - independent Zig implementation of the minimal ABI surface + +At the top level, `wasm/` is just a harness area: + +- `wasm/build.sh` stages Zig and Rust artifacts into `wasm/artifacts/` +- `crates/md-docrs-wasm-compare` loads those artifacts and compares size, parity, and latency + +So the conceptual split is: + +- **Rust workspace** = production pipeline and Rust WASM +- **Zig subtree** = minimal alternative implementation +- **wasm/** = comparison/staging glue ## Layout -``` +```/dev/null/zig-layout.txt#L1-17 zig/ -├── lib/ # Zig sources (build runs here) +├── lib/ │ ├── build.zig │ ├── build.zig.zon -│ ├── spec.zig # pure: crate[@version][::path] grammar -│ ├── url.zig # pure: docs.rs URL builder -│ ├── resolve.zig # pure: spec + url glue, native tests -│ ├── wasm.zig # WASM entry: alloc / free / resolve_url -│ └── cli.zig # native CLI entry -├── src/ # Cloudflare Worker (TypeScript) +│ ├── spec.zig +│ ├── url.zig +│ ├── resolve.zig +│ ├── wasm.zig +│ └── cli.zig +├── src/ │ ├── index.ts │ ├── md_docrs.wasm.d.ts -│ └── md_docrs.wasm # produced by `npm run build:wasm` +│ └── md_docrs.wasm ├── package.json ├── tsconfig.json └── wrangler.jsonc ``` +## Components + +### `lib/spec.zig` +Parses the spec grammar: + +- `crate` +- `crate@version` +- `crate::path::to::item` +- `crate@version::path::to::item` + +### `lib/url.zig` +Builds the docs.rs JSON URL from parsed pieces. + +### `lib/resolve.zig` +Pure glue between parsing and URL building. This is the logic shared by the CLI and WASM entrypoints. + +### `lib/wasm.zig` +Exports the minimal ABI used for host-neutral comparisons. + +### `lib/cli.zig` +Wraps the same core resolver as a native command-line tool. + +### `src/index.ts` +Cloudflare Worker host for the WASM module. This is host glue only; the actual URL resolution lives in Zig WASM. + ## Build -Everything runs from `zig/lib/`. `zig build` produces only the WASM artifact -by default — the CLI and tests are explicit steps so `npm run build:wasm` -stays focused. +Most Zig work happens from `zig/lib/`. -```sh +```/dev/null/zig-build.sh#L1-11 cd zig/lib -# WASM (default step). +# Build the WASM artifact. zig build -# -> zig-out/bin/md-docrs.wasm -# Native CLI. +# Build the native CLI. zig build cli -# -> zig-out/bin/md-docrs-zig -# Unit tests (spec / url / resolve). +# Run unit tests. zig build test ``` +If you want to run the test step from the repository root, point Zig at the build file explicitly: + +```/dev/null/zig-build-root.sh#L1-1 +zig build test --build-file zig/lib/build.zig +``` + ## Native CLI -`md-docrs-zig` wraps the same `resolve.resolveUrl` that the WASM build -exports, so it's the fastest way to sanity-check a spec without spinning -up the Worker. +The CLI is the fastest way to sanity-check the minimal resolver behavior. -```sh +```/dev/null/zig-cli.sh#L1-13 cd zig/lib zig build cli -# Run directly. ./zig-out/bin/md-docrs-zig serde -# https://docs.rs/crate/serde/latest/json/57.zst - ./zig-out/bin/md-docrs-zig 'tokio@1.52.1::sync::Mutex' -# https://docs.rs/crate/tokio/1.52.1/json/57.zst - ./zig-out/bin/md-docrs-zig 'anyhow::Error' --target x86_64-unknown-linux-gnu -# https://docs.rs/crate/anyhow/latest/x86_64-unknown-linux-gnu/json/57.zst - -./zig-out/bin/md-docrs-zig --help -# Or run through the build system (rebuilds if needed, forwards args after --). +# Or via the build runner: zig build run -- 'tokio@1.52.1::sync::Mutex' --target x86_64-unknown-linux-gnu ``` +Expected output is always a fully resolved docs.rs rustdoc JSON URL, for example: + +```/dev/null/zig-cli-output.txt#L1-3 +https://docs.rs/crate/serde/latest/json/57.zst +https://docs.rs/crate/tokio/1.52.1/json/57.zst +https://docs.rs/crate/anyhow/latest/x86_64-unknown-linux-gnu/json/57.zst +``` + Exit codes: | Code | Meaning | | --- | --- | -| 0 | URL printed to stdout. | -| 2 | Bad spec, missing `--target` value, or unknown argument (usage on stderr). | +| 0 | URL printed to stdout | +| 2 | Invalid spec, missing `--target` value, or unknown argument | ## Worker -```sh +The Worker is a thin host around the Zig WASM module. + +```/dev/null/zig-worker.sh#L1-6 cd zig npm install -npm run build:wasm # builds lib/ and copies the wasm into src/ -npm run dev # wrangler dev on localhost -npm run deploy # wrangler deploy +npm run build:wasm +npm run dev +npm run deploy ``` -Endpoints: +Example requests: -```sh -curl localhost:8787/serde # latest +```/dev/null/zig-worker-curl.sh#L1-4 +curl localhost:8787/serde curl localhost:8787/tokio@1.52.1::sync::Mutex curl 'localhost:8787/tokio::sync::Mutex?target=x86_64-unknown-linux-gnu' curl 'localhost:8787/?spec=anyhow::Error' ``` -All three print the fully resolved `https://docs.rs/crate//[/]/json/57.zst` URL. +Each returns a resolved docs.rs URL string. ## WASM ABI -Exported from `lib/wasm.zig`: +The Zig module exports a deliberately tiny ABI: | Export | Signature | Notes | | --- | --- | --- | -| `alloc` | `(len: u32) -> *u8` | Backed by `std.heap.wasm_allocator`. Returns 0 on OOM. | -| `free` | `(ptr: *u8, len: u32)` | Caller must pass the exact length passed to `alloc`. | -| `resolve_url` | `(spec_ptr, spec_len, target_ptr, target_len, out_ptr, out_cap) -> u32` | Returns bytes written, or 0 on bad spec / out-of-space. `target_len == 0` means "no target override". | +| `alloc` | `(len: u32) -> *u8` | Allocates in linear memory. Returns `0` on failure. | +| `free` | `(ptr: *u8, len: u32)` | Caller must free with the same length used for allocation. | +| `resolve_url` | `(spec_ptr, spec_len, target_ptr, target_len, out_ptr, out_cap) -> u32` | Writes the resolved URL into caller-provided output memory. Returns bytes written, or `0` on error. | -Memory protocol notes in the zigflare [`doc/memory.md`](https://github.com/mattzcarey/zigflare/blob/main/doc/memory.md) apply verbatim: always recreate `Uint8Array` views *after* each `alloc`, since WASM memory growth detaches existing views. +This ABI is intentionally matched by the Rust WASM crate so the same host can swap implementations without changing its calling convention. -## Comparing with Rust WASM +## Integration with Rust WASM -The Rust equivalent lives at [`../rust-wasm/`](../rust-wasm/README.md). It exports -the same `alloc` / `free` / `resolve_url` symbols with byte-for-byte identical -signatures, so the Worker at `src/index.ts` can swap between the two by changing -a single import path. +The Rust equivalent is `crates/md-docrs-rust-wasm`. -```sh -# Minimal parity build — matches this Zig wasm surface 1:1. -cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm --no-default-features -cp ../target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm \ - src/md_docrs.wasm # drop-in replacement for the Zig artifact +Both modules are meant to be interchangeable for the minimal path: + +- same exported function names +- same memory ownership model +- same `resolve_url` contract +- same expected output bytes for the same input + +Build the Rust minimal module like this: -# Full pipeline build — also exports `render_markdown` (JSON → Markdown). +```/dev/null/rust-wasm-build.sh#L1-3 cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm + -p md-docrs-rust-wasm --no-default-features ``` -What we're comparing: +You can then compare the Zig and Rust artifacts through the top-level harness: + +```/dev/null/wasm-compare.sh#L1-2 +./wasm/build.sh +cargo run -p md-docrs-wasm-compare -- --offline +``` + +## Why this split exists + +This subtree is intentionally narrow for two reasons: + +1. **clear ownership** + - Zig owns only the minimal resolver path + - Rust owns the full product pipeline + +2. **fair comparison** + - if both Zig and Rust expose only `resolve_url`, size and latency comparisons mean something + - if one side includes fetch/decompress/render and the other does not, the comparison becomes noisy + +## Current status + +Today, Zig covers: + +- spec parsing +- URL resolution +- native CLI +- minimal WASM export +- Worker hosting + +It does **not** yet cover: + +- JSON-to-Markdown rendering +- in-WASM fetching +- zstd decompression + +That is intentional. The minimal boundary is the stable comparison target. + +## Summary -- `.wasm` size (Zig `ReleaseSmall` + `strip` vs. Rust `opt-level=z` + fat LTO + `strip`). -- Instantiation + per-call latency in a Worker. -- Cold-start cost (wrangler measures this). +If you're deciding where code should go: -For a host-neutral comparison that doesn't involve wrangler, use the -[`wasm/`](../wasm/README.md) harness at the repo root. It builds both -modules, runs the exact same specs through each inside embedded wasmtime -(optionally wasmer), and reports byte size, output parity, and median / p95 -per-call latency in a single table. +- put **full docs.rs proxy behavior** in Rust workspace crates +- put **minimal ABI-compatible URL resolution** in `zig/` +- put **artifact staging and cross-runtime comparison** in top-level `wasm/` -Porting `render_markdown` to Zig is the interesting follow-up — that's -where serde_json / rustdoc-types vs. `std.json` + hand-written types -becomes a real apples-to-apples test. +That keeps the repository lean and the boundaries clear. \ No newline at end of file From 8aee3caedfb8e2b32ac3d34510d31bb7e6298427 Mon Sep 17 00:00:00 2001 From: Thomas Aubry Date: Tue, 21 Apr 2026 15:18:13 +0200 Subject: [PATCH 2/8] fix(worker): switch wasm zstd decoding to ruzstd --- Cargo.lock | 177 +++++------- Cargo.toml | 6 +- crates/md-docrs-worker/Cargo.toml | 9 +- crates/md-docrs-worker/src/lib.rs | 400 +++++++++++++++++---------- crates/md-docrs-worker/wrangler.toml | 4 + 5 files changed, 334 insertions(+), 262 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a19d263..6366920 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,42 +150,14 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" -[[package]] -name = "axum" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" -dependencies = [ - "async-trait", - "axum-core 0.4.5", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "itoa", - "matchit 0.7.3", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "serde_json", - "serde_path_to_error", - "sync_wrapper", - "tower", - "tower-layer", - "tower-service", -] - [[package]] name = "axum" version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" dependencies = [ - "axum-core 0.5.6", + "axum-core", + "axum-macros", "bytes", "futures-util", "http", @@ -207,26 +179,6 @@ dependencies = [ "tower-service", ] -[[package]] -name = "axum-core" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper", - "tower-layer", - "tower-service", -] - [[package]] name = "axum-core" version = "0.5.6" @@ -247,9 +199,9 @@ dependencies = [ [[package]] name = "axum-macros" -version = "0.4.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d123550fa8d071b7255cb0cc04dc302baa6c8c4a79f55701552684d8399bce" +checksum = "7aa268c23bfbbd2c4363b9cd302a4f504fb2a9dfe7e3451d66f35dd392e20aca" dependencies = [ "proc-macro2", "quote", @@ -2096,18 +2048,12 @@ name = "md-docrs-cli" version = "0.1.0" dependencies = [ "anyhow", - "async-trait", - "axum 0.8.9", "clap", "md-docrs-core", - "rustdoc-types", - "serde_json", + "md-docrs-fetch-http", "tokio", - "tower-http", "tracing", "tracing-subscriber", - "ureq", - "zstd", ] [[package]] @@ -2125,6 +2071,18 @@ dependencies = [ "tracing", ] +[[package]] +name = "md-docrs-fetch-http" +version = "0.1.0" +dependencies = [ + "async-trait", + "md-docrs-core", + "rustdoc-types", + "serde_json", + "ureq", + "zstd", +] + [[package]] name = "md-docrs-rust-wasm" version = "0.1.0" @@ -2135,6 +2093,21 @@ dependencies = [ "serde_json", ] +[[package]] +name = "md-docrs-server" +version = "0.1.0" +dependencies = [ + "anyhow", + "axum", + "clap", + "md-docrs-core", + "md-docrs-fetch-http", + "tokio", + "tower-http", + "tracing", + "tracing-subscriber", +] + [[package]] name = "md-docrs-wasm-compare" version = "0.1.0" @@ -2151,18 +2124,15 @@ name = "md-docrs-worker" version = "0.1.0" dependencies = [ "async-trait", - "axum 0.7.9", - "axum-macros", "md-docrs-core", "rustdoc-types", + "ruzstd", "serde", "serde_json", - "tower-service", "wasm-bindgen", "wasm-bindgen-futures", "worker", "worker-macros", - "zstd", ] [[package]] @@ -3031,17 +3001,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde-wasm-bindgen" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3b143e2833c57ab9ad3ea280d21fd34e285a42837aeb0ee301f4f41890fa00e" -dependencies = [ - "js-sys", - "serde", - "wasm-bindgen", -] - [[package]] name = "serde-wasm-bindgen" version = "0.6.5" @@ -3086,17 +3045,6 @@ dependencies = [ "zmij", ] -[[package]] -name = "serde_path_to_error" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" -dependencies = [ - "itoa", - "serde", - "serde_core", -] - [[package]] name = "serde_spanned" version = "1.1.1" @@ -3220,6 +3168,27 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "subtle" version = "2.6.1" @@ -3832,9 +3801,9 @@ dependencies = [ [[package]] name = "wasm-streams" -version = "0.4.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" dependencies = [ "futures-util", "js-sys", @@ -3860,7 +3829,7 @@ dependencies = [ "paste", "rustc-demangle", "serde", - "serde-wasm-bindgen 0.6.5", + "serde-wasm-bindgen", "shared-buffer", "tar", "target-lexicon", @@ -4737,12 +4706,11 @@ dependencies = [ [[package]] name = "worker" -version = "0.5.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727789ca7eff9733efbea9d0e97779edc1cf1926e98aee7d7d8afe32805458aa" +checksum = "4afd7ae4f7fcc11e0e5e64b964890b3dda90f1290b0612f7cd821b381cc18826" dependencies = [ "async-trait", - "axum 0.7.9", "bytes", "chrono", "futures-channel", @@ -4753,7 +4721,7 @@ dependencies = [ "matchit 0.7.3", "pin-project", "serde", - "serde-wasm-bindgen 0.6.5", + "serde-wasm-bindgen", "serde_json", "serde_urlencoded", "tokio", @@ -4762,35 +4730,20 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "worker-kv", "worker-macros", "worker-sys", ] -[[package]] -name = "worker-kv" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f06d4d1416a9f8346ee9123b0d9a11b3cfa38e6cfb5a139698017d1597c4d41" -dependencies = [ - "js-sys", - "serde", - "serde-wasm-bindgen 0.5.0", - "serde_json", - "thiserror 1.0.69", - "wasm-bindgen", - "wasm-bindgen-futures", -] - [[package]] name = "worker-macros" -version = "0.5.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d625c24570ba9207a2617476013335f28a95cbe513e59bb814ffba092a18058" +checksum = "6371f41ac538c9f6dbe4d40cf7db58ed451eb0529a66f3e29ab8726217fc8a05" dependencies = [ "async-trait", "proc-macro2", "quote", + "strum", "syn", "wasm-bindgen", "wasm-bindgen-futures", @@ -4800,9 +4753,9 @@ dependencies = [ [[package]] name = "worker-sys" -version = "0.5.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34563340d41016b4381257c5a16b0d2bc590dbe00500ecfbebcaa16f5f85ce90" +checksum = "4c8de95c532944cee89d63fa8d7945f3db6260ca75ee3da42f7acfeebf538e4c" dependencies = [ "cfg-if", "js-sys", diff --git a/Cargo.toml b/Cargo.toml index 167c47d..49197cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,9 @@ [workspace] members = [ "crates/md-docrs-core", + "crates/md-docrs-fetch-http", "crates/md-docrs-cli", + "crates/md-docrs-server", "crates/md-docrs-worker", "crates/md-docrs-rust-wasm", "crates/md-docrs-wasm-compare", @@ -32,8 +34,8 @@ tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } wasm-bindgen = "0.2" wasm-bindgen-futures = "0.4" -worker = "0.5" -worker-macros = "0.5" +worker = "0.8.1" +worker-macros = "0.8.1" zstd = "0.13" [profile.release.package.md-docrs-rust-wasm] diff --git a/crates/md-docrs-worker/Cargo.toml b/crates/md-docrs-worker/Cargo.toml index 819849c..1e53571 100644 --- a/crates/md-docrs-worker/Cargo.toml +++ b/crates/md-docrs-worker/Cargo.toml @@ -14,14 +14,11 @@ crate-type = ["cdylib"] async-trait.workspace = true md-docrs-core = { path = "../md-docrs-core", default-features = false } rustdoc-types.workspace = true +serde = { workspace = true, features = ["derive"] } serde_json.workspace = true -zstd.workspace = true +ruzstd = { version = "0.8", default-features = false, features = ["std"] } -worker = { workspace = true, features = ["http", "axum"] } +worker = { workspace = true, features = ["http"] } worker-macros = { workspace = true, features = ["http"] } -axum = { version = "0.7", default-features = false, features = ["json"] } -axum-macros = "0.4.2" -tower-service.workspace = true wasm-bindgen.workspace = true wasm-bindgen-futures.workspace = true -serde.workspace = true diff --git a/crates/md-docrs-worker/src/lib.rs b/crates/md-docrs-worker/src/lib.rs index b0a5df0..47fc203 100644 --- a/crates/md-docrs-worker/src/lib.rs +++ b/crates/md-docrs-worker/src/lib.rs @@ -1,22 +1,25 @@ #![warn(clippy::pedantic)] -use axum::{ - Router, - extract::{Path, State}, - http::{HeaderMap, StatusCode, header}, - response::{IntoResponse, Response}, - routing::get, -}; use md_docrs_core::{ - Error, ItemSpec, Result, RustdocFetcher, - cache::{CrateCache, InMemoryCache}, + Error, ItemSpec, + cache::CacheKey, fetch::{DOCS_RS_BASE, build_url, validate_format_version}, - render_spec, + render_loaded_crate, }; use rustdoc_types::{Crate, FORMAT_VERSION}; -use std::{future::Future, io::Cursor, pin::Pin, sync::Arc}; -use tower_service::Service; -use worker::*; +use serde::{Deserialize, Serialize}; +use std::{ + io::{Cursor, Read}, + sync::Arc, +}; +use worker::kv::{KvError, KvStore}; +use worker::{Context, Env, Fetch, Headers, Method, Request, RequestInit, Response, Result, event}; + +#[derive(Clone)] +struct AppState { + fetcher: Arc, + cache: Arc, +} #[derive(Clone)] struct WorkerFetcher { @@ -30,7 +33,11 @@ impl WorkerFetcher { } } - async fn fetch_bytes(&self, url: &str, method: Method) -> Result<(u16, Vec)> { + async fn fetch_bytes( + &self, + url: &str, + method: Method, + ) -> md_docrs_core::Result<(u16, Vec)> { let mut init = RequestInit::new(); init.with_method(method); @@ -50,7 +57,7 @@ impl WorkerFetcher { Ok((status, bytes)) } - async fn head_status(&self, url: &str) -> Result { + async fn head_status(&self, url: &str) -> md_docrs_core::Result { let mut init = RequestInit::new(); init.with_method(Method::Head); @@ -66,132 +73,207 @@ impl WorkerFetcher { } } -impl RustdocFetcher for WorkerFetcher { - fn fetch<'a>( - &'a self, - crate_name: &'a str, - version: &'a str, - target: Option<&'a str>, - ) -> Pin> + 'a>> { - Box::pin(async move { - let url = build_url( - &self.base, - crate_name, - version, - target, - Some(FORMAT_VERSION), - ); - - let (status, bytes) = self.fetch_bytes(&url, Method::Get).await?; - - if status == 404 { - let probe_url = build_url(&self.base, crate_name, version, target, None); - let probe_status = self.head_status(&probe_url).await?; - if (200..300).contains(&probe_status) { - return Err(Error::Fetch(format!( - "{crate_name}@{version} has no rustdoc JSON for format version {FORMAT_VERSION}; waiting on docs.rs rebuild" - ))); - } +impl WorkerFetcher { + async fn fetch( + &self, + crate_name: &str, + version: &str, + target: Option<&str>, + ) -> md_docrs_core::Result { + let url = build_url( + &self.base, + crate_name, + version, + target, + Some(FORMAT_VERSION), + ); + + let (status, bytes) = self.fetch_bytes(&url, Method::Get).await?; + + if status == 404 { + let probe_url = build_url(&self.base, crate_name, version, target, None); + let probe_status = self.head_status(&probe_url).await?; + if (200..300).contains(&probe_status) { return Err(Error::Fetch(format!( - "{crate_name}@{version} not found on docs.rs" + "{crate_name}@{version} has no rustdoc JSON for format version \ + {FORMAT_VERSION}; waiting on docs.rs rebuild" ))); } + return Err(Error::Fetch(format!( + "{crate_name}@{version} not found on docs.rs" + ))); + } - if !(200..300).contains(&status) { - return Err(Error::Fetch(format!( - "{status} response for {crate_name}@{version}" - ))); - } + if !(200..300).contains(&status) { + return Err(Error::Fetch(format!( + "{status} response for {crate_name}@{version}" + ))); + } - let decoded = zstd::decode_all(Cursor::new(bytes))?; - let krate: Crate = serde_json::from_slice(&decoded)?; - validate_format_version(&krate)?; - Ok(krate) - }) + let decoded = ruzstd::decoding::StreamingDecoder::new(Cursor::new(bytes)) + .map_err(|err| { + Error::Io(std::io::Error::other(format!( + "zstd decode init failed: {err}" + ))) + })? + .bytes() + .collect::>>()?; + let krate: Crate = serde_json::from_slice(&decoded)?; + validate_format_version(&krate)?; + Ok(krate) } } -#[derive(Clone)] -struct AppState { - fetcher: Arc, - cache: Arc, +#[derive(Debug, Serialize, Deserialize)] +struct CachedCrate { + krate: Crate, } -fn router(state: Arc) -> Router { - Router::new() - .route("/", get(root)) - .route("/healthz", get(healthz)) - .route("/{crate_name}", get(crate_root)) - .route("/{crate_name}/", get(crate_root)) - .route("/{crate_name}/{version}", get(version_root)) - .route("/{crate_name}/{version}/", get(version_root)) - .route("/{crate_name}/{version}/{*rest}", get(deep)) - .with_state(state) +#[derive(Clone)] +struct KvCrateCache { + kv: KvStore, + ttl_seconds: u64, } -async fn root() -> &'static str { - "md-docrs-worker - GET /[/][/] for Markdown docs\n" -} +impl KvCrateCache { + fn new(kv: KvStore) -> Self { + Self { + kv, + ttl_seconds: 60 * 60, + } + } -async fn healthz() -> &'static str { - "ok" + fn key_string(key: &CacheKey) -> String { + match &key.target { + Some(target) => format!("crate:{}:{}:{}", key.crate_name, key.version, target), + None => format!("crate:{}:{}", key.crate_name, key.version), + } + } } -#[axum_macros::debug_handler] -async fn crate_root( - State(state): State>, - Path(crate_name): Path, -) -> Response { - serve(&state, &crate_name, "latest", &[]).await +impl KvCrateCache { + async fn get(&self, key: &CacheKey) -> Option> { + let cache_key = Self::key_string(key); + + match self.kv.get(&cache_key).json::().await { + Ok(Some(cached)) => Some(Arc::new(cached.krate)), + Ok(None) | Err(_) => None, + } + } + + async fn put(&self, key: CacheKey, value: Arc) { + let cache_key = Self::key_string(&key); + let cached = CachedCrate { + krate: (*value).clone(), + }; + + let Ok(payload) = serde_json::to_string(&cached) else { + return; + }; + + let builder = match self.kv.put(&cache_key, payload) { + Ok(builder) => builder.expiration_ttl(self.ttl_seconds), + Err(err) => { + if matches!(err, KvError::InvalidKvStore(_)) { + panic!("invalid kv store"); + } + return; + } + }; + + if let Err(err) = builder.execute().await + && matches!(err, KvError::InvalidKvStore(_)) + { + panic!("invalid kv store"); + } + } } -#[axum_macros::debug_handler] -async fn version_root( - State(state): State>, - Path((crate_name, version)): Path<(String, String)>, -) -> Response { - serve(&state, &crate_name, &version, &[]).await +#[event(fetch)] +async fn fetch(req: Request, env: Env, _ctx: Context) -> Result { + let kv = env.kv("EXAMPLE")?; + let state = AppState { + fetcher: Arc::new(WorkerFetcher::new()), + cache: Arc::new(KvCrateCache::new(kv)), + }; + + route(req, state).await } -#[axum_macros::debug_handler] -async fn deep( - State(state): State>, - Path((crate_name, version, rest)): Path<(String, String, String)>, -) -> Response { - let path_segs = parse_rest(&rest); +async fn route(req: Request, state: AppState) -> Result { + let path = req.path(); + + if path == "/" { + return text_response( + 200, + "md-docrs-worker - GET /[/][/] for Markdown docs\n", + "text/plain; charset=utf-8", + ); + } + + if path == "/healthz" { + return text_response(200, "ok", "text/plain; charset=utf-8"); + } + + if path == "/kv" { + return kv_list(&state).await; + } + + let segments: Vec<&str> = path + .split('/') + .filter(|segment| !segment.is_empty()) + .collect(); + + if segments.is_empty() { + return text_response( + 200, + "md-docrs-worker - GET /[/][/] for Markdown docs\n", + "text/plain; charset=utf-8", + ); + } + + let crate_name = segments[0].to_string(); + let version = if segments.len() >= 2 { + segments[1].to_string() + } else { + "latest".to_string() + }; + + let path_segs = if segments.len() > 2 { + parse_rest_segments(&segments[2..]) + } else { + Vec::new() + }; + serve(&state, &crate_name, &version, &path_segs).await } -fn parse_rest(rest: &str) -> Vec { - let rest = rest.trim_end_matches('/'); - if rest.is_empty() { +fn parse_rest_segments(segments: &[&str]) -> Vec { + if segments.is_empty() { return vec![]; } - let parts: Vec<&str> = rest.split('/').filter(|s| !s.is_empty()).collect(); - let mut out = Vec::with_capacity(parts.len()); - if parts.is_empty() { - return out; - } + let last_idx = segments.len() - 1; + let mut out = Vec::with_capacity(segments.len()); - let last_idx = parts.len() - 1; - for (i, seg) in parts.iter().enumerate() { - if i == last_idx { - if let Some(name) = strip_kind_prefix(seg) { + for (idx, segment) in segments.iter().enumerate() { + if idx == last_idx { + if let Some(name) = strip_kind_prefix(segment) { out.push(name); } else { - out.push((*seg).to_string()); + out.push((*segment).to_string()); } } else { - out.push((*seg).to_string()); + out.push((*segment).to_string()); } } out } -fn strip_kind_prefix(seg: &str) -> Option { - let seg = seg.strip_suffix(".html").unwrap_or(seg); +fn strip_kind_prefix(segment: &str) -> Option { + let segment = segment.strip_suffix(".html").unwrap_or(segment); + for prefix in [ "struct.", "enum.", @@ -206,20 +288,42 @@ fn strip_kind_prefix(seg: &str) -> Option { "derive.", "attr.", ] { - if let Some(rest) = seg.strip_prefix(prefix) { + if let Some(rest) = segment.strip_prefix(prefix) { return Some(rest.to_string()); } } + None } +async fn kv_list(state: &AppState) -> Result { + let list_response = state + .cache + .kv + .list() + .limit(100) + .execute() + .await + .map_err(|e| { + if matches!(e, KvError::InvalidKvStore(_)) { + panic!("invalid kv store"); + } + e + })?; + + let body = serde_json::to_string_pretty(&list_response) + .map_err(|err| worker::Error::RustError(err.to_string()))?; + + text_response(200, &body, "application/json; charset=utf-8") +} + async fn serve( state: &AppState, crate_name: &str, version: &str, path_segs: &[String], -) -> Response { - let path = match path_segs.split_first() { +) -> Result { + let path: Vec = match path_segs.split_first() { Some((head, tail)) if head == crate_name => tail.to_vec(), _ => path_segs.to_vec(), }; @@ -231,48 +335,60 @@ async fn serve( path, }; - match render_spec(&spec, state.fetcher.as_ref(), state.cache.as_ref()).await { - Ok(body) => { - let mut headers = HeaderMap::new(); - headers.insert( - header::CONTENT_TYPE, - "text/markdown; charset=utf-8".parse().unwrap(), - ); - headers.insert(header::VARY, "Accept".parse().unwrap()); - headers.insert( - "x-markdown-tokens", - (body.len() / 4).to_string().parse().unwrap(), - ); - (StatusCode::OK, headers, body).into_response() - } - Err(err) => error_to_response(&err), + let key = CacheKey { + crate_name: spec.crate_name.clone(), + version: spec.version.clone(), + target: spec.target.clone(), + }; + + let krate = if let Some(hit) = state.cache.get(&key).await { + hit + } else { + let fetched = match state + .fetcher + .fetch(&spec.crate_name, &spec.version, spec.target.as_deref()) + .await + { + Ok(fetched) => fetched, + Err(err) => return error_response(&err), + }; + let krate = Arc::new(fetched); + state.cache.put(key, Arc::clone(&krate)).await; + krate + }; + + match render_loaded_crate(&krate, &spec) { + Ok(body) => markdown_response(&body), + Err(err) => error_response(&err), } } -fn error_to_response(err: &Error) -> Response { +fn markdown_response(body: &str) -> Result { + let headers = Headers::new(); + headers.set("content-type", "text/markdown; charset=utf-8")?; + headers.set("vary", "Accept")?; + headers.set("x-markdown-tokens", &(body.len() / 4).to_string())?; + + Ok(Response::ok(body.to_string())?.with_headers(headers)) +} + +fn error_response(err: &Error) -> Result { let status = match err { - Error::NotFound(_) => StatusCode::NOT_FOUND, - Error::InvalidSpec(_) => StatusCode::BAD_REQUEST, + Error::NotFound(_) => 404, + Error::InvalidSpec(_) => 400, Error::FormatVersionMismatch { .. } | Error::Fetch(_) | Error::Json(_) | Error::Io(_) => { - StatusCode::BAD_GATEWAY + 502 } }; - (status, err.to_string()).into_response() + + text_response(status, &err.to_string(), "text/plain; charset=utf-8") } -#[event(fetch)] -async fn fetch( - req: HttpRequest, - _env: Env, - _ctx: Context, -) -> worker::Result> { - let state = Arc::new(AppState { - fetcher: Arc::new(WorkerFetcher::new()), - cache: Arc::new(InMemoryCache::default()), - }); +fn text_response(status: u16, body: &str, content_type: &str) -> Result { + let headers = Headers::new(); + headers.set("content-type", content_type)?; - router(state) - .call(req) - .await - .map_err(|err| worker::Error::RustError(err.to_string())) + Ok(Response::ok(body.to_string())? + .with_headers(headers) + .with_status(status)) } diff --git a/crates/md-docrs-worker/wrangler.toml b/crates/md-docrs-worker/wrangler.toml index 95b3a3e..85d659f 100644 --- a/crates/md-docrs-worker/wrangler.toml +++ b/crates/md-docrs-worker/wrangler.toml @@ -2,5 +2,9 @@ name = "md-docrs-worker" main = "build/index.js" compatibility_date = "2025-04-21" +[[kv_namespaces]] +binding = "EXAMPLE" +id = "EXAMPLE" + [build] command = "cargo install \"worker-build@^0.8\" && worker-build --release" From 3901cd085fed35d9d77fdfe40650cd753c2e2bf7 Mon Sep 17 00:00:00 2001 From: Thomas Aubry Date: Tue, 21 Apr 2026 15:18:16 +0200 Subject: [PATCH 3/8] refactor(workspace): split http fetch and server crates --- crates/md-docrs-cli/Cargo.toml | 12 +- crates/md-docrs-cli/src/fetch.rs | 116 ------- crates/md-docrs-cli/src/main.rs | 162 +-------- crates/md-docrs-core/src/cache/mod.rs | 4 +- crates/md-docrs-core/src/fetch.rs | 18 - crates/md-docrs-core/src/lib.rs | 24 +- crates/md-docrs-fetch-http/Cargo.toml | 13 + crates/md-docrs-fetch-http/src/lib.rs | 133 +++++++ crates/md-docrs-rust-wasm/src/lib.rs | 95 +++-- crates/md-docrs-server/Cargo.toml | 24 ++ crates/md-docrs-server/src/main.rs | 120 +++++++ .../src/server.rs | 29 +- crates/md-docrs-wasm-compare/src/main.rs | 324 +++++++++++------- 13 files changed, 605 insertions(+), 469 deletions(-) delete mode 100644 crates/md-docrs-cli/src/fetch.rs create mode 100644 crates/md-docrs-fetch-http/Cargo.toml create mode 100644 crates/md-docrs-fetch-http/src/lib.rs create mode 100644 crates/md-docrs-server/Cargo.toml create mode 100644 crates/md-docrs-server/src/main.rs rename crates/{md-docrs-cli => md-docrs-server}/src/server.rs (86%) diff --git a/crates/md-docrs-cli/Cargo.toml b/crates/md-docrs-cli/Cargo.toml index e3355e0..2fe8bfd 100644 --- a/crates/md-docrs-cli/Cargo.toml +++ b/crates/md-docrs-cli/Cargo.toml @@ -7,22 +7,12 @@ edition.workspace = true name = "md-docrs" path = "src/main.rs" -[features] -default = ["hybrid-cache"] -hybrid-cache = ["md-docrs-core/hybrid-cache"] - [dependencies] anyhow.workspace = true -async-trait.workspace = true -axum = { workspace = true, features = ["tokio", "http1"] } clap.workspace = true -rustdoc-types.workspace = true -serde_json.workspace = true tokio.workspace = true tracing.workspace = true tracing-subscriber.workspace = true -tower-http.workspace = true -ureq = { version = "2", features = ["tls", "gzip"] } -zstd.workspace = true md-docrs-core = { path = "../md-docrs-core" } +md-docrs-fetch-http = { path = "../md-docrs-fetch-http" } diff --git a/crates/md-docrs-cli/src/fetch.rs b/crates/md-docrs-cli/src/fetch.rs deleted file mode 100644 index 68b10dd..0000000 --- a/crates/md-docrs-cli/src/fetch.rs +++ /dev/null @@ -1,116 +0,0 @@ -use md_docrs_core::{ - Error, Result, RustdocFetcher, - fetch::{DOCS_RS_BASE, build_url, validate_format_version}, -}; -use rustdoc_types::{Crate, FORMAT_VERSION}; -use std::{future::Future, io::Cursor, pin::Pin, time::Duration}; - -/// Native docs.rs fetcher used by the CLI/server binary. -/// -/// This implementation is intentionally outside `md-docrs-core` so the core -/// stays transport/runtime agnostic. -pub struct CliFetcher { - agent: ureq::Agent, - base: String, -} - -impl CliFetcher { - /// Create a fetcher configured for docs.rs. - #[must_use] - pub fn new() -> Self { - let agent = ureq::AgentBuilder::new() - .timeout(Duration::from_secs(30)) - .redirects(10) - .user_agent(concat!("md-docrs-cli/", env!("CARGO_PKG_VERSION"))) - .build(); - - Self { - agent, - base: DOCS_RS_BASE.to_string(), - } - } - - /// Override the docs.rs base URL, mainly for tests. - #[must_use] - pub fn with_base(mut self, base: impl Into) -> Self { - self.base = base.into(); - self - } - - fn read_body_bytes(response: ureq::Response, url: &str) -> Result> { - let mut reader = response.into_reader(); - let mut bytes = Vec::new(); - std::io::Read::read_to_end(&mut reader, &mut bytes).map_err(|err| { - Error::Fetch(format!("failed to read response body for {url}: {err}")) - })?; - Ok(bytes) - } - - fn get_bytes(&self, url: &str) -> Result<(u16, Vec)> { - match self.agent.get(url).call() { - Ok(response) => { - let status = response.status(); - let bytes = Self::read_body_bytes(response, url)?; - Ok((status, bytes)) - } - Err(ureq::Error::Status(status, response)) => { - let bytes = Self::read_body_bytes(response, url)?; - Ok((status, bytes)) - } - Err(err) => Err(Error::Fetch(format!("request failed for {url}: {err}"))), - } - } - - fn head_status(&self, url: &str) -> Result { - match self.agent.head(url).call() { - Ok(response) => Ok(response.status()), - Err(ureq::Error::Status(status, _response)) => Ok(status), - Err(err) => Err(Error::Fetch(format!("request failed for {url}: {err}"))), - } - } -} - -impl RustdocFetcher for CliFetcher { - fn fetch<'a>( - &'a self, - crate_name: &'a str, - version: &'a str, - target: Option<&'a str>, - ) -> Pin> + 'a>> { - Box::pin(async move { - let url = build_url( - &self.base, - crate_name, - version, - target, - Some(FORMAT_VERSION), - ); - - let (status, bytes) = self.get_bytes(&url)?; - - if status == 404 { - let probe_url = build_url(&self.base, crate_name, version, target, None); - let probe_status = self.head_status(&probe_url)?; - if (200..300).contains(&probe_status) { - return Err(Error::Fetch(format!( - "{crate_name}@{version} has no rustdoc JSON for format version {FORMAT_VERSION}; waiting on docs.rs rebuild" - ))); - } - return Err(Error::Fetch(format!( - "{crate_name}@{version} not found on docs.rs" - ))); - } - - if !(200..300).contains(&status) { - return Err(Error::Fetch(format!( - "{status} response for {crate_name}@{version}" - ))); - } - - let decoded = zstd::decode_all(Cursor::new(bytes))?; - let krate: Crate = serde_json::from_slice(&decoded)?; - validate_format_version(&krate)?; - Ok(krate) - }) - } -} diff --git a/crates/md-docrs-cli/src/main.rs b/crates/md-docrs-cli/src/main.rs index 71cf60b..2b8066e 100644 --- a/crates/md-docrs-cli/src/main.rs +++ b/crates/md-docrs-cli/src/main.rs @@ -1,175 +1,51 @@ #![warn(clippy::pedantic)] use anyhow::{Context, Result}; -use clap::{Parser, Subcommand}; -use md_docrs_core::{ - ItemSpec, - cache::{CrateCache, InMemoryCache}, - render_spec, -}; -use std::net::SocketAddr; -use std::path::PathBuf; -use std::sync::Arc; - -#[cfg(feature = "hybrid-cache")] -use md_docrs_core::cache::{FoyerHybridCache, FoyerHybridCacheConfig}; - -mod fetch; -mod server; - -use crate::fetch::CliFetcher; +use clap::Parser; +use md_docrs_core::{ItemSpec, cache::InMemoryCache, render_spec}; +use md_docrs_fetch_http::UreqRustdocFetcher; #[derive(Parser, Debug)] #[command( name = "md-docrs", version, - about = "Serve Rust crate docs as Markdown via rustdoc JSON" + about = "Render Rust crate docs as Markdown via rustdoc JSON" )] struct Cli { - #[command(subcommand)] - command: Option, - - /// Spec: crate[@version][::path::to::item]. Equivalent to `render` subcommand. + /// Spec: crate[@version][::path::to::item]. #[arg(value_name = "SPEC")] - spec: Option, + spec: String, /// Override the target triple (e.g. x86_64-pc-windows-msvc). - #[arg(long, global = true)] + #[arg(long)] target: Option, } -#[derive(Subcommand, Debug)] -enum Command { - /// Render a single spec to stdout. - Render { - spec: String, - #[arg(long)] - target: Option, - }, - /// Run the HTTP server mirroring docs.rs URLs. - Serve { - #[arg(long, default_value_t = 8080)] - port: u16, - #[arg(long, default_value = "127.0.0.1")] - bind: String, - /// Enable the memory+disk hybrid cache backed by foyer; requires the - /// `hybrid-cache` feature. When set, the directory is created if - /// missing and used as the disk tier. - #[arg(long, value_name = "DIR")] - cache_dir: Option, - /// Disk tier capacity in bytes. Only applied when `--cache-dir` is - /// set. - #[arg(long, default_value_t = 4 * 1024 * 1024 * 1024)] - cache_disk_bytes: usize, - /// Memory tier weight budget in bytes. Only applied when - /// `--cache-dir` is set. - #[arg(long, default_value_t = 256 * 1024 * 1024)] - cache_memory_bytes: usize, - }, -} - #[tokio::main] async fn main() -> Result<()> { init_tracing(); let cli = Cli::parse(); + let spec = ItemSpec::parse(&cli.spec) + .with_context(|| format!("invalid spec: {}", cli.spec))? + .with_target(cli.target); - match (cli.command, cli.spec) { - (Some(Command::Render { spec, target }), _) => { - render_cmd(&spec, target.or(cli.target)).await - } - ( - Some(Command::Serve { - port, - bind, - cache_dir, - cache_disk_bytes, - cache_memory_bytes, - }), - _, - ) => serve_cmd(&bind, port, cache_dir, cache_disk_bytes, cache_memory_bytes).await, - (None, Some(spec)) => render_cmd(&spec, cli.target).await, - (None, None) => { - eprintln!("usage: md-docrs | md-docrs serve | md-docrs render "); - std::process::exit(2); - } - } + let fetcher = + UreqRustdocFetcher::with_user_agent(concat!("md-docrs-cli/", env!("CARGO_PKG_VERSION"))); + let cache = InMemoryCache::default(); + + let md = render_spec(&spec, &fetcher, &cache).await?; + print!("{md}"); + + Ok(()) } fn init_tracing() { let filter = tracing_subscriber::EnvFilter::try_from_default_env() .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")); + tracing_subscriber::fmt() .with_env_filter(filter) .with_target(false) .init(); } - -async fn render_cmd(raw: &str, target: Option) -> Result<()> { - let spec = ItemSpec::parse(raw) - .with_context(|| format!("invalid spec: {raw}"))? - .with_target(target); - let fetcher = CliFetcher::new(); - let cache = InMemoryCache::default(); - let md = render_spec(&spec, &fetcher, &cache).await?; - print!("{md}"); - Ok(()) -} - -async fn serve_cmd( - bind: &str, - port: u16, - cache_dir: Option, - cache_disk_bytes: usize, - cache_memory_bytes: usize, -) -> Result<()> { - let addr: SocketAddr = format!("{bind}:{port}").parse()?; - let cache = build_cache(cache_dir, cache_disk_bytes, cache_memory_bytes).await?; - let state = Arc::new(server::AppState { - fetcher: Arc::new(CliFetcher::new()), - cache, - }); - let app = server::router(state); - tracing::info!(%addr, "md-docrs serve listening"); - let listener = tokio::net::TcpListener::bind(addr).await?; - axum::serve(listener, app).await?; - Ok(()) -} - -#[cfg(feature = "hybrid-cache")] -async fn build_cache( - cache_dir: Option, - disk_bytes: usize, - memory_bytes: usize, -) -> Result> { - if let Some(dir) = cache_dir { - std::fs::create_dir_all(&dir) - .with_context(|| format!("create cache dir {}", dir.display()))?; - tracing::info!(dir = %dir.display(), disk_bytes, memory_bytes, "using foyer hybrid cache"); - let hybrid = FoyerHybridCache::new(FoyerHybridCacheConfig { - dir, - memory_capacity_bytes: memory_bytes, - disk_capacity_bytes: disk_bytes, - }) - .await?; - Ok(Arc::new(hybrid)) - } else { - Ok(Arc::new(InMemoryCache::default())) - } -} - -#[cfg(not(feature = "hybrid-cache"))] -#[allow(clippy::unused_async)] -async fn build_cache( - cache_dir: Option, - _disk_bytes: usize, - _memory_bytes: usize, -) -> Result> { - if cache_dir.is_some() { - anyhow::bail!( - "--cache-dir was supplied but this binary was built without the \ - `hybrid-cache` feature; rebuild with `cargo build --features hybrid-cache`" - ); - } - Ok(Arc::new(InMemoryCache::default())) -} diff --git a/crates/md-docrs-core/src/cache/mod.rs b/crates/md-docrs-core/src/cache/mod.rs index 995fa7d..18976b4 100644 --- a/crates/md-docrs-core/src/cache/mod.rs +++ b/crates/md-docrs-core/src/cache/mod.rs @@ -5,13 +5,13 @@ use std::sync::Arc; #[cfg(feature = "hybrid-cache")] use serde::{Deserialize, Serialize}; -mod memory; #[cfg(feature = "hybrid-cache")] mod hybrid; +mod memory; -pub use memory::InMemoryCache; #[cfg(feature = "hybrid-cache")] pub use hybrid::{FoyerHybridCache, FoyerHybridCacheConfig}; +pub use memory::InMemoryCache; #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "hybrid-cache", derive(Serialize, Deserialize))] diff --git a/crates/md-docrs-core/src/fetch.rs b/crates/md-docrs-core/src/fetch.rs index d451133..0da9fc5 100644 --- a/crates/md-docrs-core/src/fetch.rs +++ b/crates/md-docrs-core/src/fetch.rs @@ -1,5 +1,4 @@ use crate::{Error, Result}; -use async_trait::async_trait; use rustdoc_types::{Crate, FORMAT_VERSION}; pub const DOCS_RS_BASE: &str = "https://docs.rs"; @@ -26,23 +25,6 @@ pub fn build_url( } } -/// Minimal transport abstraction for loading parsed rustdoc JSON. -/// -/// Platform-specific callers provide their own implementation: -/// - CLI can use a small native HTTP client -/// - Cloudflare Worker can use the Worker runtime fetch API -#[async_trait(?Send)] -pub trait RustdocFetcher: Send + Sync { - /// Fetch, decode, and parse rustdoc JSON for the requested crate. - /// - /// # Errors - /// Returns transport-specific fetch failures as `Error::Fetch`, - /// unsupported schema versions as `Error::FormatVersionMismatch`, - /// JSON parse failures as `Error::Json`, and decode failures as `Error::Io` - /// or `Error::Fetch` depending on the implementation. - async fn fetch(&self, crate_name: &str, version: &str, target: Option<&str>) -> Result; -} - /// Shared validation helper for fetcher implementations. /// /// # Errors diff --git a/crates/md-docrs-core/src/lib.rs b/crates/md-docrs-core/src/lib.rs index b037540..2d655df 100644 --- a/crates/md-docrs-core/src/lib.rs +++ b/crates/md-docrs-core/src/lib.rs @@ -1,7 +1,8 @@ #![warn(clippy::pedantic)] +use async_trait::async_trait; use rustdoc_types::Crate; -use std::{future::Future, pin::Pin, sync::Arc}; +use std::sync::Arc; pub mod cache; pub mod error; @@ -14,13 +15,14 @@ pub use error::{Error, Result}; pub use fetch::{DOCS_RS_BASE, build_url, validate_format_version}; pub use spec::ItemSpec; +#[async_trait] pub trait RustdocFetcher: Send + Sync { - fn fetch<'a>( - &'a self, - crate_name: &'a str, - version: &'a str, - target: Option<&'a str>, - ) -> Pin> + 'a>>; + /// Fetch and decode the rustdoc JSON crate for the requested package. + /// + /// # Errors + /// Returns any transport, decode, or parse error surfaced by the + /// implementation. + async fn fetch(&self, crate_name: &str, version: &str, target: Option<&str>) -> Result; } /// High-level entry point: take a parsed [`ItemSpec`], fetch the rustdoc crate, @@ -32,8 +34,8 @@ pub trait RustdocFetcher: Send + Sync { /// - cache-independent resolution errors from [`resolve::resolve`] pub async fn render_spec( spec: &ItemSpec, - fetcher: &dyn RustdocFetcher, - cache: &dyn cache::CrateCache, + fetcher: &(dyn RustdocFetcher + Send + Sync), + cache: &(dyn cache::CrateCache + Send + Sync), ) -> Result { let krate = load_crate(spec, fetcher, cache).await?; render_loaded_crate(&krate, spec) @@ -45,8 +47,8 @@ pub async fn render_spec( /// Returns any error produced by the fetcher. pub async fn load_crate( spec: &ItemSpec, - fetcher: &dyn RustdocFetcher, - cache: &dyn cache::CrateCache, + fetcher: &(dyn RustdocFetcher + Send + Sync), + cache: &(dyn cache::CrateCache + Send + Sync), ) -> Result> { let key = cache::CacheKey { crate_name: spec.crate_name.clone(), diff --git a/crates/md-docrs-fetch-http/Cargo.toml b/crates/md-docrs-fetch-http/Cargo.toml new file mode 100644 index 0000000..78bfeb8 --- /dev/null +++ b/crates/md-docrs-fetch-http/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "md-docrs-fetch-http" +version.workspace = true +edition.workspace = true + +[dependencies] +async-trait.workspace = true +md-docrs-core = { path = "../md-docrs-core" } + +rustdoc-types.workspace = true +serde_json.workspace = true +ureq = { version = "2", features = ["tls", "gzip"] } +zstd.workspace = true diff --git a/crates/md-docrs-fetch-http/src/lib.rs b/crates/md-docrs-fetch-http/src/lib.rs new file mode 100644 index 0000000..84dfd71 --- /dev/null +++ b/crates/md-docrs-fetch-http/src/lib.rs @@ -0,0 +1,133 @@ +#![warn(clippy::pedantic)] + +use async_trait::async_trait; +use md_docrs_core::{ + Error, Result, RustdocFetcher, + fetch::{DOCS_RS_BASE, build_url, validate_format_version}, +}; +use rustdoc_types::{Crate, FORMAT_VERSION}; +use std::{io::Cursor, time::Duration}; + +/// Native docs.rs fetcher shared by the CLI and native server crates. +/// +/// This lives outside `md-docrs-core` so the core remains transport-agnostic. +pub struct UreqRustdocFetcher { + agent: ureq::Agent, + base: String, + user_agent: String, +} + +impl UreqRustdocFetcher { + /// Create a fetcher configured for docs.rs with a default user agent. + #[must_use] + pub fn new() -> Self { + Self::with_user_agent(concat!("md-docrs/", env!("CARGO_PKG_VERSION"))) + } + + /// Create a fetcher with a custom user agent string. + #[must_use] + pub fn with_user_agent(user_agent: impl Into) -> Self { + let user_agent = user_agent.into(); + let agent = ureq::AgentBuilder::new() + .timeout(Duration::from_secs(30)) + .redirects(10) + .user_agent(&user_agent) + .build(); + + Self { + agent, + base: DOCS_RS_BASE.to_string(), + user_agent, + } + } + + /// Override the docs.rs base URL, mainly for tests. + #[must_use] + pub fn with_base(mut self, base: impl Into) -> Self { + self.base = base.into(); + self + } + + /// Return the configured user agent. + #[must_use] + pub fn user_agent(&self) -> &str { + &self.user_agent + } + + fn read_body_bytes(response: ureq::Response, url: &str) -> Result> { + let mut reader = response.into_reader(); + let mut bytes = Vec::new(); + std::io::Read::read_to_end(&mut reader, &mut bytes).map_err(|err| { + Error::Fetch(format!("failed to read response body for {url}: {err}")) + })?; + Ok(bytes) + } + + fn get_bytes(&self, url: &str) -> Result<(u16, Vec)> { + match self.agent.get(url).call() { + Ok(response) => { + let status = response.status(); + let bytes = Self::read_body_bytes(response, url)?; + Ok((status, bytes)) + } + Err(ureq::Error::Status(status, response)) => { + let bytes = Self::read_body_bytes(response, url)?; + Ok((status, bytes)) + } + Err(err) => Err(Error::Fetch(format!("request failed for {url}: {err}"))), + } + } + + fn head_status(&self, url: &str) -> Result { + match self.agent.head(url).call() { + Ok(response) => Ok(response.status()), + Err(ureq::Error::Status(status, _response)) => Ok(status), + Err(err) => Err(Error::Fetch(format!("request failed for {url}: {err}"))), + } + } +} + +impl Default for UreqRustdocFetcher { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl RustdocFetcher for UreqRustdocFetcher { + async fn fetch(&self, crate_name: &str, version: &str, target: Option<&str>) -> Result { + let url = build_url( + &self.base, + crate_name, + version, + target, + Some(FORMAT_VERSION), + ); + + let (status, bytes) = self.get_bytes(&url)?; + + if status == 404 { + let probe_url = build_url(&self.base, crate_name, version, target, None); + let probe_status = self.head_status(&probe_url)?; + if (200..300).contains(&probe_status) { + return Err(Error::Fetch(format!( + "{crate_name}@{version} has no rustdoc JSON for format version {FORMAT_VERSION}; waiting on docs.rs rebuild" + ))); + } + return Err(Error::Fetch(format!( + "{crate_name}@{version} not found on docs.rs" + ))); + } + + if !(200..300).contains(&status) { + return Err(Error::Fetch(format!( + "{status} response for {crate_name}@{version}" + ))); + } + + let decoded = zstd::decode_all(Cursor::new(bytes))?; + let krate: Crate = serde_json::from_slice(&decoded)?; + validate_format_version(&krate)?; + Ok(krate) + } +} diff --git a/crates/md-docrs-rust-wasm/src/lib.rs b/crates/md-docrs-rust-wasm/src/lib.rs index 861630d..3a14212 100644 --- a/crates/md-docrs-rust-wasm/src/lib.rs +++ b/crates/md-docrs-rust-wasm/src/lib.rs @@ -43,9 +43,13 @@ fn layout_for(len: usize) -> Option { /// Allocate `len` bytes inside the WASM linear memory. Returns null on failure /// or when `len == 0`. Caller must free with `free(ptr, len)`. +#[must_use] #[cfg_attr(target_arch = "wasm32", unsafe(no_mangle))] pub extern "C" fn alloc(len: u32) -> *mut u8 { - let Some(layout) = layout_for(len as usize) else { + let Ok(len) = usize::try_from(len) else { + return ptr::null_mut(); + }; + let Some(layout) = layout_for(len) else { return ptr::null_mut(); }; unsafe { rust_alloc(layout) } @@ -60,7 +64,10 @@ pub unsafe extern "C" fn free(ptr: *mut u8, len: u32) { if ptr.is_null() { return; } - let Some(layout) = layout_for(len as usize) else { + let Ok(len) = usize::try_from(len) else { + return; + }; + let Some(layout) = layout_for(len) else { return; }; unsafe { dealloc(ptr, layout) }; @@ -85,15 +92,17 @@ fn parse_spec_with_target( target_ptr: *const u8, target_len: u32, ) -> Option { - let spec_bytes = unsafe { slice::from_raw_parts(spec_ptr, spec_len as usize) }; - let spec_str = std::str::from_utf8(spec_bytes).ok()?; - let mut spec = ItemSpec::parse(spec_str).ok()?; + let spec_len = usize::try_from(spec_len).ok()?; + let spec_bytes = unsafe { slice::from_raw_parts(spec_ptr, spec_len) }; + let spec = std::str::from_utf8(spec_bytes).ok()?; + let mut item_spec = ItemSpec::parse(spec).ok()?; if target_len > 0 { - let t = unsafe { slice::from_raw_parts(target_ptr, target_len as usize) }; - let t_str = std::str::from_utf8(t).ok()?; - spec = spec.with_target(Some(t_str.to_string())); + let target_len = usize::try_from(target_len).ok()?; + let target_bytes = unsafe { slice::from_raw_parts(target_ptr, target_len) }; + let target = std::str::from_utf8(target_bytes).ok()?; + item_spec = item_spec.with_target(Some(target.to_string())); } - Some(spec) + Some(item_spec) } /// Parse `spec` and write the docs.rs rustdoc JSON URL into `out_ptr`. @@ -115,15 +124,18 @@ pub unsafe extern "C" fn resolve_url( let Some(spec) = parse_spec_with_target(spec_ptr, spec_len, target_ptr, target_len) else { return 0; }; + let Ok(out_cap) = usize::try_from(out_cap) else { + return 0; + }; let url = build_docs_rs_url(&spec); let bytes = url.as_bytes(); - if bytes.len() > out_cap as usize { + if bytes.len() > out_cap { return 0; } unsafe { ptr::copy_nonoverlapping(bytes.as_ptr(), out_ptr, bytes.len()); } - bytes.len() as u32 + u32::try_from(bytes.len()).unwrap_or(0) } /// Render rustdoc JSON to Markdown. @@ -151,7 +163,10 @@ pub unsafe extern "C" fn render_markdown( target_len: u32, len_out: *mut u32, ) -> *mut u8 { - let json = unsafe { slice::from_raw_parts(json_ptr, json_len as usize) }; + let Ok(json_len) = usize::try_from(json_len) else { + return ptr::null_mut(); + }; + let json = unsafe { slice::from_raw_parts(json_ptr, json_len) }; let Some(spec) = parse_spec_with_target(spec_ptr, spec_len, target_ptr, target_len) else { return ptr::null_mut(); }; @@ -172,9 +187,13 @@ pub unsafe extern "C" fn render_markdown( if out.is_null() { return ptr::null_mut(); } + let Ok(len_out_value) = u32::try_from(bytes.len()) else { + unsafe { dealloc(out, layout) }; + return ptr::null_mut(); + }; unsafe { ptr::copy_nonoverlapping(bytes.as_ptr(), out, bytes.len()); - *len_out = bytes.len() as u32; + *len_out = len_out_value; } out } @@ -236,10 +255,13 @@ pub unsafe extern "C" fn render_spec( return -5; }; let url = build_docs_rs_url(&spec); + let Ok(url_len) = u32::try_from(url.len()) else { + return -5; + }; let mut resp_ptr: u32 = 0; let mut resp_len: u32 = 0; - let rc = unsafe { fetch_bytes(url.as_ptr(), url.len() as u32, &mut resp_ptr, &mut resp_len) }; + let rc = unsafe { fetch_bytes(url.as_ptr(), url_len, &mut resp_ptr, &mut resp_len) }; if rc != 0 { return -2; } @@ -247,8 +269,17 @@ pub unsafe extern "C" fn render_spec( return -2; } + let Ok(resp_ptr_usize) = usize::try_from(resp_ptr) else { + unsafe { free(resp_ptr as *mut u8, resp_len) }; + return -3; + }; + let Ok(resp_len_usize) = usize::try_from(resp_len) else { + unsafe { free(resp_ptr as *mut u8, resp_len) }; + return -3; + }; + // Take ownership of the host-written buffer; free it once decoded. - let compressed = unsafe { slice::from_raw_parts(resp_ptr as *const u8, resp_len as usize) }; + let compressed = unsafe { slice::from_raw_parts(resp_ptr_usize as *const u8, resp_len_usize) }; let decoded = zstd_decode(compressed); unsafe { free(resp_ptr as *mut u8, resp_len) }; let Some(json) = decoded else { @@ -273,10 +304,18 @@ pub unsafe extern "C" fn render_spec( if out.is_null() { return -1; } + let Ok(out_ptr_value) = u32::try_from(out as usize) else { + unsafe { dealloc(out, layout) }; + return -6; + }; + let Ok(out_len_value) = u32::try_from(bytes.len()) else { + unsafe { dealloc(out, layout) }; + return -6; + }; unsafe { ptr::copy_nonoverlapping(bytes.as_ptr(), out, bytes.len()); - *buf_ptr_out = out as u32; - *buf_len_out = bytes.len() as u32; + *buf_ptr_out = out_ptr_value; + *buf_len_out = out_len_value; } 0 } @@ -292,15 +331,15 @@ mod tests { let n = unsafe { resolve_url( spec.as_ptr(), - spec.len() as u32, + u32::try_from(spec.len()).unwrap(), ptr::null(), 0, out.as_mut_ptr(), - out.len() as u32, + u32::try_from(out.len()).unwrap(), ) }; assert_eq!( - std::str::from_utf8(&out[..n as usize]).unwrap(), + std::str::from_utf8(&out[..usize::try_from(n).unwrap()]).unwrap(), "https://docs.rs/crate/serde/latest/json/57.zst", ); } @@ -313,15 +352,15 @@ mod tests { let n = unsafe { resolve_url( spec.as_ptr(), - spec.len() as u32, + u32::try_from(spec.len()).unwrap(), target.as_ptr(), - target.len() as u32, + u32::try_from(target.len()).unwrap(), out.as_mut_ptr(), - out.len() as u32, + u32::try_from(out.len()).unwrap(), ) }; assert_eq!( - std::str::from_utf8(&out[..n as usize]).unwrap(), + std::str::from_utf8(&out[..usize::try_from(n).unwrap()]).unwrap(), "https://docs.rs/crate/tokio/1.52.1/x86_64-unknown-linux-gnu/json/57.zst", ); } @@ -333,11 +372,11 @@ mod tests { let n = unsafe { resolve_url( spec.as_ptr(), - spec.len() as u32, + u32::try_from(spec.len()).unwrap(), ptr::null(), 0, out.as_mut_ptr(), - out.len() as u32, + u32::try_from(out.len()).unwrap(), ) }; assert_eq!(n, 0); @@ -350,11 +389,11 @@ mod tests { let n = unsafe { resolve_url( spec.as_ptr(), - spec.len() as u32, + u32::try_from(spec.len()).unwrap(), ptr::null(), 0, out.as_mut_ptr(), - out.len() as u32, + u32::try_from(out.len()).unwrap(), ) }; assert_eq!(n, 0); diff --git a/crates/md-docrs-server/Cargo.toml b/crates/md-docrs-server/Cargo.toml new file mode 100644 index 0000000..c6021c7 --- /dev/null +++ b/crates/md-docrs-server/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "md-docrs-server" +version.workspace = true +edition.workspace = true + +[[bin]] +name = "md-docrs-server" +path = "src/main.rs" + +[features] +default = ["hybrid-cache"] +hybrid-cache = ["md-docrs-core/hybrid-cache"] + +[dependencies] +anyhow.workspace = true +axum = { workspace = true, features = ["tokio", "http1", "macros"] } +clap.workspace = true +tokio.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true +tower-http.workspace = true + +md-docrs-core = { path = "../md-docrs-core" } +md-docrs-fetch-http = { path = "../md-docrs-fetch-http" } diff --git a/crates/md-docrs-server/src/main.rs b/crates/md-docrs-server/src/main.rs new file mode 100644 index 0000000..c70e6c4 --- /dev/null +++ b/crates/md-docrs-server/src/main.rs @@ -0,0 +1,120 @@ +#![warn(clippy::pedantic)] + +use anyhow::{Context, Result}; +use md_docrs_core::cache::{CrateCache, InMemoryCache}; +#[cfg(feature = "hybrid-cache")] +use md_docrs_core::cache::{FoyerHybridCache, FoyerHybridCacheConfig}; +use md_docrs_fetch_http::UreqRustdocFetcher; +use std::net::SocketAddr; +use std::path::PathBuf; +use std::sync::Arc; + +mod server; + +#[derive(clap::Parser, Debug)] +#[command( + name = "md-docrs-server", + version, + about = "Serve Rust crate docs as Markdown via rustdoc JSON" +)] +struct Cli { + #[arg(long, default_value_t = 8080)] + port: u16, + + #[arg(long, default_value = "127.0.0.1")] + bind: String, + + /// Enable the memory+disk hybrid cache backed by foyer. When set, the + /// directory is created if missing and used as the disk tier. + #[arg(long, value_name = "DIR")] + cache_dir: Option, + + /// Disk tier capacity in bytes. Only applied when `--cache-dir` is set. + #[arg(long, default_value_t = 4 * 1024 * 1024 * 1024)] + cache_disk_bytes: usize, + + /// Memory tier weight budget in bytes. Only applied when `--cache-dir` is set. + #[arg(long, default_value_t = 256 * 1024 * 1024)] + cache_memory_bytes: usize, +} + +#[tokio::main] +async fn main() -> Result<()> { + init_tracing(); + + let cli = ::parse(); + let addr: SocketAddr = format!("{}:{}", cli.bind, cli.port).parse()?; + + let cache = build_cache(cli.cache_dir, cli.cache_disk_bytes, cli.cache_memory_bytes).await?; + + let state = Arc::new(server::AppState { + fetcher: Arc::new(UreqRustdocFetcher::new()), + cache, + }); + + let app = server::router(state); + + tracing::info!(%addr, "md-docrs-server listening"); + + let listener = tokio::net::TcpListener::bind(addr).await?; + axum::serve(listener, app).await?; + + Ok(()) +} + +fn init_tracing() { + let filter = tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")); + + tracing_subscriber::fmt() + .with_env_filter(filter) + .with_target(false) + .init(); +} + +#[cfg(feature = "hybrid-cache")] +async fn build_cache( + cache_dir: Option, + disk_bytes: usize, + memory_bytes: usize, +) -> Result> { + if let Some(dir) = cache_dir { + std::fs::create_dir_all(&dir) + .with_context(|| format!("create cache dir {}", dir.display()))?; + + tracing::info!( + dir = %dir.display(), + disk_bytes, + memory_bytes, + "using foyer hybrid cache" + ); + + let hybrid = FoyerHybridCache::new(FoyerHybridCacheConfig { + dir, + memory_capacity_bytes: memory_bytes, + disk_capacity_bytes: disk_bytes, + }) + .await?; + + Ok(Arc::new(hybrid)) + } else { + Ok(Arc::new(InMemoryCache::default())) + } +} + +#[cfg(not(feature = "hybrid-cache"))] +#[allow(clippy::unused_async)] +async fn build_cache( + cache_dir: Option, + _disk_bytes: usize, + _memory_bytes: usize, +) -> Result> { + if cache_dir.is_some() { + anyhow::bail!( + "--cache-dir was supplied but this binary was built without the \ + `hybrid-cache` feature; rebuild with `cargo build -p md-docrs-server --features hybrid-cache`" + ); + } + + Ok(Arc::new(InMemoryCache::default())) +} diff --git a/crates/md-docrs-cli/src/server.rs b/crates/md-docrs-server/src/server.rs similarity index 86% rename from crates/md-docrs-cli/src/server.rs rename to crates/md-docrs-server/src/server.rs index e4da186..5812323 100644 --- a/crates/md-docrs-cli/src/server.rs +++ b/crates/md-docrs-server/src/server.rs @@ -26,7 +26,7 @@ pub fn router(state: Arc) -> Router { } async fn root() -> &'static str { - "md-docrs-proxy - GET /[/][/] for Markdown docs\n" + "md-docrs-server - GET /[/][/] for Markdown docs\n" } #[axum::debug_handler] @@ -66,12 +66,15 @@ fn parse_rest(rest: &str) -> Vec { if rest.is_empty() { return vec![]; } + let parts: Vec<&str> = rest.split('/').filter(|s| !s.is_empty()).collect(); - let mut out: Vec = Vec::with_capacity(parts.len()); if parts.is_empty() { - return out; + return vec![]; } + let last_idx = parts.len() - 1; + let mut out = Vec::with_capacity(parts.len()); + for (i, seg) in parts.iter().enumerate() { if i == last_idx { if let Some(name) = strip_kind_prefix(seg) { @@ -83,11 +86,13 @@ fn parse_rest(rest: &str) -> Vec { out.push((*seg).to_string()); } } + out } fn strip_kind_prefix(seg: &str) -> Option { let seg = seg.strip_suffix(".html").unwrap_or(seg); + for prefix in [ "struct.", "enum.", @@ -106,6 +111,7 @@ fn strip_kind_prefix(seg: &str) -> Option { return Some(rest.to_string()); } } + None } @@ -115,9 +121,6 @@ async fn serve( version: &str, path_segs: &[String], ) -> Response { - // docs.rs URLs embed the crate name as the first module segment (e.g. - // /serde/latest/serde/de/trait.Foo.html). Strip it so the spec path is - // relative to the crate root. let path: Vec = match path_segs.split_first() { Some((head, tail)) if head == crate_name => tail.to_vec(), _ => path_segs.to_vec(), @@ -137,22 +140,24 @@ async fn serve( header::CONTENT_TYPE, "text/markdown; charset=utf-8".parse().unwrap(), ); - let tokens = body.len() / 4; - headers.insert("x-markdown-tokens", tokens.to_string().parse().unwrap()); headers.insert(header::VARY, "Accept".parse().unwrap()); + headers.insert( + "x-markdown-tokens", + (body.len() / 4).to_string().parse().unwrap(), + ); (StatusCode::OK, headers, body).into_response() } - Err(e) => error_to_response(&e), + Err(err) => error_to_response(&err), } } -fn error_to_response(e: &Error) -> Response { - let status = match e { +fn error_to_response(err: &Error) -> Response { + let status = match err { Error::NotFound(_) => StatusCode::NOT_FOUND, Error::InvalidSpec(_) => StatusCode::BAD_REQUEST, Error::FormatVersionMismatch { .. } | Error::Fetch(_) | Error::Json(_) | Error::Io(_) => { StatusCode::BAD_GATEWAY } }; - (status, e.to_string()).into_response() + (status, err.to_string()).into_response() } diff --git a/crates/md-docrs-wasm-compare/src/main.rs b/crates/md-docrs-wasm-compare/src/main.rs index 7ad2312..b45d157 100644 --- a/crates/md-docrs-wasm-compare/src/main.rs +++ b/crates/md-docrs-wasm-compare/src/main.rs @@ -175,8 +175,43 @@ fn default_artifacts_dir() -> PathBuf { fn main() -> Result<()> { let args = parse_args()?; + let all = artifacts(&args); + let present: Vec<_> = all + .iter() + .filter(|artifact| artifact.path.exists()) + .collect(); - let all = [ + if present.is_empty() { + bail!( + "no .wasm artifacts found under {}\n\ + run `./wasm/build.sh` first, or pass --artifacts-dir", + args.artifacts_dir.display(), + ); + } + + print_run_header(&args); + print_artifact_sizes(&present)?; + + for spec in DEFAULT_SPECS { + print_spec_header(spec); + print_resolve_results(&present, &args, spec)?; + print_render_results(&present, &args, spec)?; + println!(); + } + + Ok(()) +} + +fn truncate(s: &str, max: usize) -> String { + if s.len() > max { + format!("{}...", &s[..max.saturating_sub(3)]) + } else { + s.to_string() + } +} + +fn artifacts(args: &Args) -> [Artifact; 6] { + [ Artifact { label: "zig-minimal", path: args.artifacts_dir.join("zig-minimal.wasm"), @@ -207,17 +242,10 @@ fn main() -> Result<()> { path: args.artifacts_dir.join("rust-full-opt.wasm"), flavor: Flavor::Full, }, - ]; - - let present: Vec<_> = all.iter().filter(|a| a.path.exists()).collect(); - if present.is_empty() { - bail!( - "no .wasm artifacts found under {}\n\ - run `./wasm/build.sh` first, or pass --artifacts-dir", - args.artifacts_dir.display(), - ); - } + ] +} +fn print_run_header(args: &Args) { println!("runtime: {:?}", args.runtime); println!( "iterations: resolve_url={}, render_spec={}", @@ -229,114 +257,131 @@ fn main() -> Result<()> { if args.offline { "offline" } else { "online" } ); println!(); +} +fn print_artifact_sizes(present: &[&Artifact]) -> Result<()> { println!("{:<14} {:>10} {:>8}", "artifact", "bytes", "flavor"); println!("{:-<14} {:->10} {:->8}", "", "", ""); - for a in &present { - let meta = fs::metadata(&a.path)?; - let flavor = match a.flavor { + + for artifact in present { + let meta = fs::metadata(&artifact.path)?; + let flavor = match artifact.flavor { Flavor::Minimal => "minimal", Flavor::Full => "full", }; - println!("{:<14} {:>10} {:>8}", a.label, meta.len(), flavor); + println!("{:<14} {:>10} {:>8}", artifact.label, meta.len(), flavor); } + println!(); + Ok(()) +} - for spec in DEFAULT_SPECS { - println!( - "spec: {}{}", - spec.spec, - spec.target - .map(|t| format!(" (target={t})")) - .unwrap_or_default(), - ); - println!( - "{:<14} {:<60} {:>10} {:>10}", - "artifact", "resolve_url output", "median us", "p95 us" - ); - println!("{:-<14} {:-<60} {:->10} {:->10}", "", "", "", ""); - for a in &present { - let bytes = fs::read(&a.path)?; - match run_resolve(args.runtime, &bytes, spec, args.iterations) { - Ok(result) => { - let output = result - .output - .as_deref() - .unwrap_or(""); - let shown = truncate(output, 60); - println!( - "{:<14} {:<60} {:>10} {:>10}", - a.label, - shown, - result.median.as_micros(), - result.p95.as_micros(), - ); - } - Err(e) => println!("{:<14} resolve_url error: {}", a.label, e), +fn print_spec_header(spec: &Spec) { + println!( + "spec: {}{}", + spec.spec, + spec.target + .map(|target| format!(" (target={target})")) + .unwrap_or_default(), + ); + println!( + "{:<14} {:<60} {:>10} {:>10}", + "artifact", "resolve_url output", "median us", "p95 us" + ); + println!("{:-<14} {:-<60} {:->10} {:->10}", "", "", "", ""); +} + +fn print_resolve_results(present: &[&Artifact], args: &Args, spec: &Spec) -> Result<()> { + for artifact in present { + let bytes = fs::read(&artifact.path)?; + match run_resolve(args.runtime, &bytes, spec, args.iterations) { + Ok(result) => { + let output = result + .output + .as_deref() + .unwrap_or(""); + let shown = truncate(output, 60); + println!( + "{:<14} {:<60} {:>10} {:>10}", + artifact.label, + shown, + result.median.as_micros(), + result.p95.as_micros(), + ); } + Err(error) => println!("{:<14} resolve_url error: {}", artifact.label, error), } + } - if !args.offline && present.iter().any(|a| a.flavor == Flavor::Full) { - println!(); - println!( - "{:<14} {:>8} {:>8} {:>10} {:>10} {:<16}", - "artifact", "md bytes", "fetch ms", "render ms", "total ms", "parity" - ); - println!( - "{:-<14} {:->8} {:->8} {:->10} {:->10} {:-<16}", - "", "", "", "", "", "" - ); - let mut parity: HashMap> = HashMap::new(); - for a in &present { - if a.flavor != Flavor::Full { - continue; - } - let bytes = fs::read(&a.path)?; - match run_render(args.runtime, &bytes, spec, args.render_iterations) { - Ok(r) => { - let hash = blake3::hash(r.output.as_bytes()); - let short = short_hash(hash.to_hex().as_str()); - parity.entry(short.clone()).or_default().push(a.label); - println!( - "{:<14} {:>8} {:>8} {:>10} {:>10} {:<16}", - a.label, - r.output.len(), - r.fetch_median.as_millis(), - r.render_median.as_millis(), - r.total_median.as_millis(), - short, - ); - } - Err(e) => println!("{:<14} render_spec error: {}", a.label, e), - } - } - if parity.len() > 1 { - println!("parity: outputs differ across artifacts"); - for (hash, labels) in &parity { - println!(" {}: {}", hash, labels.join(", ")); - } - } else if let Some((hash, labels)) = parity.iter().next() { - if labels.len() > 1 { - println!( - "parity: all {} full artifacts agree ({})", - labels.len(), - hash - ); - } - } + Ok(()) +} + +fn print_render_results(present: &[&Artifact], args: &Args, spec: &Spec) -> Result<()> { + if args.offline + || !present + .iter() + .any(|artifact| artifact.flavor == Flavor::Full) + { + return Ok(()); + } + + println!(); + println!( + "{:<14} {:>8} {:>8} {:>10} {:>10} {:<16}", + "artifact", "md bytes", "fetch ms", "render ms", "total ms", "parity" + ); + println!( + "{:-<14} {:->8} {:->8} {:->10} {:->10} {:-<16}", + "", "", "", "", "", "" + ); + + let mut parity: HashMap> = HashMap::new(); + for artifact in present { + if artifact.flavor != Flavor::Full { + continue; } - println!(); + let bytes = fs::read(&artifact.path)?; + match run_render(args.runtime, &bytes, spec, args.render_iterations) { + Ok(result) => { + let hash = blake3::hash(result.output.as_bytes()); + let short = short_hash(hash.to_hex().as_str()); + parity + .entry(short.clone()) + .or_default() + .push(artifact.label); + println!( + "{:<14} {:>8} {:>8} {:>10} {:>10} {:<16}", + artifact.label, + result.output.len(), + result.fetch_median.as_millis(), + result.render_median.as_millis(), + result.total_median.as_millis(), + short, + ); + } + Err(error) => println!("{:<14} render_spec error: {}", artifact.label, error), + } } + print_parity_summary(&parity); Ok(()) } -fn truncate(s: &str, max: usize) -> String { - if s.len() > max { - format!("{}...", &s[..max.saturating_sub(3)]) - } else { - s.to_string() +fn print_parity_summary(parity: &HashMap>) { + if parity.len() > 1 { + println!("parity: outputs differ across artifacts"); + for (hash, labels) in parity { + println!(" {}: {}", hash, labels.join(", ")); + } + } else if let Some((hash, labels)) = parity.iter().next() + && labels.len() > 1 + { + println!( + "parity: all {} full artifacts agree ({})", + labels.len(), + hash + ); } } @@ -386,7 +431,7 @@ fn run_render( fn stats(mut samples: Vec) -> (Duration, Duration) { samples.sort(); let median = samples[samples.len() / 2]; - let p95_idx = ((samples.len() as f64) * 0.95) as usize; + let p95_idx = samples.len().saturating_mul(95) / 100; let p95 = samples[p95_idx.min(samples.len() - 1)]; (median, p95) } @@ -405,7 +450,10 @@ fn blocking_http_client() -> Result { } mod wasmtime_runner { - use super::*; + use super::{ + Context, Duration, HashMap, OUT_CAP, RenderResult, ResolveResult, Result, Spec, bail, + blocking_http_client, median_duration, stats, + }; use std::sync::{Arc, Mutex}; use std::time::Instant; use wasmtime::{Caller, Engine, Linker, Memory, Module, Store, TypedFunc}; @@ -585,7 +633,7 @@ mod wasmtime_runner { let resp = client.get(&url).send().context("fetch_bytes: GET failed")?; let status = resp.status(); if !status.is_success() { - return Ok(status.as_u16() as i32); + return Ok(i32::from(status.as_u16())); } let bytes = resp.bytes().context("fetch_bytes: read body failed")?; let vec = bytes.to_vec(); @@ -597,16 +645,22 @@ mod wasmtime_runner { state.last_fetch = Some(start.elapsed()); } - let buf_ptr = alloc_fn.call(&mut *caller, body.len() as u32)?; + let body_len = + u32::try_from(body.len()).context("fetch_bytes: response body too large for wasm")?; + let buf_ptr = alloc_fn.call(&mut *caller, body_len)?; if buf_ptr == 0 { return Ok(-1); } - memory.write(&mut *caller, buf_ptr as usize, &body)?; - memory.write(&mut *caller, buf_ptr_out as usize, &buf_ptr.to_le_bytes())?; + memory.write(&mut *caller, usize::try_from(buf_ptr)?, &body)?; memory.write( &mut *caller, - buf_len_out as usize, - &(body.len() as u32).to_le_bytes(), + usize::try_from(buf_ptr_out)?, + &buf_ptr.to_le_bytes(), + )?; + memory.write( + &mut *caller, + usize::try_from(buf_len_out)?, + &body_len.to_le_bytes(), )?; Ok(0) } @@ -619,20 +673,27 @@ mod wasmtime_runner { resolve_url: &TypedFunc<(u32, u32, u32, u32, u32, u32), u32>, spec: &Spec, ) -> Result> { - let spec_len = spec.spec.len() as u32; + let spec_len = + u32::try_from(spec.spec.len()).context("spec too large for wasm linear memory")?; let spec_ptr = alloc.call(&mut *store, spec_len)?; if spec_ptr == 0 { bail!("alloc(spec) returned null"); } - memory.write(&mut *store, spec_ptr as usize, spec.spec.as_bytes())?; + memory.write( + &mut *store, + usize::try_from(spec_ptr)?, + spec.spec.as_bytes(), + )?; - let (target_ptr, target_len) = if let Some(t) = spec.target { - let p = alloc.call(&mut *store, t.len() as u32)?; - if p == 0 { + let (target_ptr, target_len) = if let Some(target) = spec.target { + let target_len = + u32::try_from(target.len()).context("target too large for wasm linear memory")?; + let ptr = alloc.call(&mut *store, target_len)?; + if ptr == 0 { bail!("alloc(target) returned null"); } - memory.write(&mut *store, p as usize, t.as_bytes())?; - (p, t.len() as u32) + memory.write(&mut *store, usize::try_from(ptr)?, target.as_bytes())?; + (ptr, target_len) } else { (0, 0) }; @@ -649,8 +710,8 @@ mod wasmtime_runner { let output = if n == 0 { None } else { - let mut buf = vec![0u8; n as usize]; - memory.read(&*store, out_ptr as usize, &mut buf)?; + let mut buf = vec![0u8; usize::try_from(n)?]; + memory.read(&*store, usize::try_from(out_ptr)?, &mut buf)?; Some(String::from_utf8(buf).context("resolve_url returned non-UTF8 bytes")?) }; @@ -670,20 +731,27 @@ mod wasmtime_runner { render_spec: &TypedFunc<(u32, u32, u32, u32, u32, u32), i32>, spec: &Spec, ) -> Result { - let spec_len = spec.spec.len() as u32; + let spec_len = + u32::try_from(spec.spec.len()).context("spec too large for wasm linear memory")?; let spec_ptr = alloc.call(&mut *store, spec_len)?; if spec_ptr == 0 { bail!("alloc(spec) returned null"); } - memory.write(&mut *store, spec_ptr as usize, spec.spec.as_bytes())?; + memory.write( + &mut *store, + usize::try_from(spec_ptr)?, + spec.spec.as_bytes(), + )?; - let (target_ptr, target_len) = if let Some(t) = spec.target { - let p = alloc.call(&mut *store, t.len() as u32)?; - if p == 0 { + let (target_ptr, target_len) = if let Some(target) = spec.target { + let target_len = + u32::try_from(target.len()).context("target too large for wasm linear memory")?; + let ptr = alloc.call(&mut *store, target_len)?; + if ptr == 0 { bail!("alloc(target) returned null"); } - memory.write(&mut *store, p as usize, t.as_bytes())?; - (p, t.len() as u32) + memory.write(&mut *store, usize::try_from(ptr)?, target.as_bytes())?; + (ptr, target_len) } else { (0, 0) }; @@ -693,7 +761,7 @@ mod wasmtime_runner { if slot_ptr == 0 { bail!("alloc(slots) returned null"); } - memory.write(&mut *store, slot_ptr as usize, &[0u8; 8])?; + memory.write(&mut *store, usize::try_from(slot_ptr)?, &[0u8; 8])?; let rc = render_spec.call( &mut *store, @@ -711,11 +779,11 @@ mod wasmtime_runner { } let mut slots = [0u8; 8]; - memory.read(&*store, slot_ptr as usize, &mut slots)?; + memory.read(&*store, usize::try_from(slot_ptr)?, &mut slots)?; let out_ptr = u32::from_le_bytes(slots[0..4].try_into().unwrap()); let out_len = u32::from_le_bytes(slots[4..8].try_into().unwrap()); - let mut buf = vec![0u8; out_len as usize]; - memory.read(&*store, out_ptr as usize, &mut buf)?; + let mut buf = vec![0u8; usize::try_from(out_len)?]; + memory.read(&*store, usize::try_from(out_ptr)?, &mut buf)?; let md = String::from_utf8(buf).context("render_spec returned non-UTF8 bytes")?; free.call(&mut *store, (spec_ptr, spec_len))?; From b1c5d5d1317001f7413d21b3b700daefc6563955 Mon Sep 17 00:00:00 2001 From: Thomas Aubry Date: Tue, 21 Apr 2026 15:27:50 +0200 Subject: [PATCH 4/8] docs: refresh workspace README files --- README.md | 178 ++++++++---------- crates/md-docrs-rust-wasm/README.md | 233 +++++++++++------------ justfile | 67 +++++++ wasm/README.md | 248 ++++--------------------- zig/README.md | 274 +++++++++++----------------- 5 files changed, 404 insertions(+), 596 deletions(-) create mode 100644 justfile diff --git a/README.md b/README.md index 62da812..3fc3383 100644 --- a/README.md +++ b/README.md @@ -2,78 +2,64 @@ `md-docrs-proxy` resolves docs.rs rustdoc JSON URLs and renders rustdoc JSON as Markdown. -This repository is organized with clear boundaries between: +## Workspace -- **Rust workspace crates** for the real application and shared logic -- **Zig** for a minimal `resolve_url` implementation and Worker wrapper -- **Top-level `wasm/`** for cross-language artifact staging and comparison +Rust crates under `crates/`: -## Repository boundaries +- `md-docrs-core` — shared spec parsing, docs.rs resolution, rustdoc JSON rendering, cache traits +- `md-docrs-fetch-http` — native HTTP fetcher for docs.rs +- `md-docrs-cli` — native CLI that prints Markdown to stdout +- `md-docrs-server` — native HTTP server +- `md-docrs-worker` — Cloudflare Worker crate +- `md-docrs-rust-wasm` — Rust `wasm32-unknown-unknown` export layer +- `md-docrs-wasm-compare` — host-side WASM comparison harness -### Rust workspace +Other top-level directories: -The Rust implementation lives under `crates/`: +- `zig/` — Zig implementation of the minimal `resolve_url` ABI, plus its Worker wrapper +- `wasm/` — staged WASM artifacts and the repo-level build script -- `crates/md-docrs-core` — pure shared logic - - spec parsing - - docs.rs URL resolution - - rustdoc JSON rendering - - cache abstractions and shared types -- `crates/md-docrs-cli` — native CLI and local HTTP server -- `crates/md-docrs-worker` — Cloudflare Worker crate for the Rust side -- `crates/md-docrs-rust-wasm` — Rust `wasm32-unknown-unknown` build exposing the WASM ABI -- `crates/md-docrs-wasm-compare` — host-side comparison harness for staged `.wasm` artifacts +## What each path owns -### Zig +- `crates/` owns the Rust implementation +- `zig/` owns the minimal Zig implementation +- `wasm/` owns artifact staging for Rust/Zig WASM comparison -The Zig implementation lives under `zig/`: +The top-level `wasm/` directory is not a Cargo crate. -- `zig/lib` — Zig source for: - - spec parsing - - docs.rs URL building - - minimal WASM ABI - - native Zig CLI -- `zig/src` — TypeScript Cloudflare Worker wrapper for the Zig wasm module +## Build and test -Zig is intentionally narrow in scope today: it is the minimal `resolve_url` implementation, not the full Markdown rendering pipeline. - -### Top-level wasm harness - -The top-level `wasm/` directory is **not** a Cargo crate anymore. - -It exists only for repo-level WASM workflow: - -- `wasm/build.sh` — builds/stages Zig and Rust wasm artifacts into `wasm/artifacts/` -- `wasm/artifacts/` — generated staged artifacts used by the comparison harness -- `wasm/README.md` — docs for the comparison flow +Build the Rust workspace: -The actual comparison binary lives in: +```/dev/null/build.sh#L1-1 +cargo build --workspace +``` -- `crates/md-docrs-wasm-compare` +Run the Rust tests: -## Build +```/dev/null/test.sh#L1-1 +cargo test --workspace +``` -Build the Rust workspace: +Run the Zig tests from the repo root: -```sh -cargo build --workspace +```/dev/null/zig-test.sh#L1-1 +zig build test --build-file zig/lib/build.zig ``` ## Native CLI -The main native binary is provided by `md-docrs-cli`. +The CLI binary comes from `md-docrs-cli`. Spec grammar: -```text +```/dev/null/spec.txt#L1-1 crate[@version][::path::to::item] ``` -Version defaults to `latest`. - Examples: -```sh +```/dev/null/cli-examples.sh#L1-5 cargo run -p md-docrs-cli -- anyhow cargo run -p md-docrs-cli -- anyhow::Error cargo run -p md-docrs-cli -- tokio::sync::Mutex @@ -81,105 +67,91 @@ cargo run -p md-docrs-cli -- tokio@1.52.1::sync::Mutex cargo run -p md-docrs-cli -- --target x86_64-unknown-linux-gnu tokio::sync::Mutex ``` -Not every `@version` pin works: docs.rs must have rebuilt rustdoc JSON for the supported format version for that exact release. Older releases may return `502`; in that case use a newer version or `latest`. +Output is Markdown on stdout. -Markdown goes to stdout. +## Native server -## Local server +The HTTP server binary comes from `md-docrs-server`. -The native server also comes from `md-docrs-cli`. +Start it locally: -```sh -cargo run -p md-docrs-cli -- serve --port 8080 --bind 127.0.0.1 +```/dev/null/server.sh#L1-1 +cargo run -p md-docrs-server -- --port 8080 --bind 127.0.0.1 ``` -Examples: +Example requests: -```sh -curl -s localhost:8080/anyhow -curl -s localhost:8080/anyhow/latest/anyhow/struct.Error.html -curl -s localhost:8080/tokio/latest/tokio/sync/struct.Mutex.html +```/dev/null/server-curl.sh#L1-4 +curl -sS http://127.0.0.1:8080/anyhow +curl -sS http://127.0.0.1:8080/anyhow/latest/anyhow/struct.Error.html +curl -sS http://127.0.0.1:8080/tokio/latest/tokio/sync/struct.Mutex.html +curl -sS http://127.0.0.1:8080/healthz ``` -Response shape: +Response behavior: -- `Content-Type: text/markdown; charset=utf-8` -- `X-Markdown-Tokens` -- `Vary: Accept` +- `200` with `Content-Type: text/markdown; charset=utf-8` +- `400` for invalid specs +- `404` for missing items +- `502` for upstream, decode, or JSON errors -Status codes: - -- `400` bad spec -- `404` item not found -- `502` upstream/decode error +Optional disk-backed cache support is available behind the `hybrid-cache` feature on `md-docrs-server`. ## Rust WASM -The Rust WASM module lives in: - -- `crates/md-docrs-rust-wasm` - -It exposes the shared ABI used for side-by-side comparison with Zig: - -- `alloc` -- `free` -- `resolve_url` -- optionally `render_markdown` +The Rust WASM crate lives at `crates/md-docrs-rust-wasm`. -### Minimal Rust WASM build +Minimal build, ABI-compatible with the Zig module: -This is the closest match to the current Zig WASM surface. - -```sh +```/dev/null/rust-wasm-min.sh#L1-2 cargo build --profile wasm-release --target wasm32-unknown-unknown \ -p md-docrs-rust-wasm --no-default-features ``` -### Full Rust WASM build +Default build adds `render_markdown`: -This adds `render_markdown`. +```/dev/null/rust-wasm-default.sh#L1-2 +cargo build --profile wasm-release --target wasm32-unknown-unknown \ + -p md-docrs-rust-wasm +``` -```sh +Full build adds `render_markdown` and `render_spec`: + +```/dev/null/rust-wasm-full.sh#L1-2 cargo build --profile wasm-release --target wasm32-unknown-unknown \ -p md-docrs-rust-wasm --no-default-features --features full ``` ## Zig -See: - -- [`zig/README.md`](zig/README.md) +The Zig subtree implements the minimal `resolve_url` path. -Typical Zig commands: +Common commands: -```sh +```/dev/null/zig-commands.sh#L1-3 zig build --build-file zig/lib/build.zig zig build cli --build-file zig/lib/build.zig zig build test --build-file zig/lib/build.zig ``` -## WASM comparison harness +See `zig/README.md` for details. + +## WASM comparison -Use the top-level `wasm/` directory to stage artifacts, then run the Rust comparison harness. +Stage artifacts, then run the comparison harness: -```sh +```/dev/null/wasm-compare.sh#L1-2 ./wasm/build.sh cargo run -p md-docrs-wasm-compare -- --offline ``` -For full docs, see: - -- [`wasm/README.md`](wasm/README.md) +See `wasm/README.md` for the workflow and supported flags. ## Notes -- In-memory LRU cache only for the native process path -- No disk cache by default -- v0 does not render trait impls, blanket impls, or source links -- Glob re-exports into external crates are not fully followed - -## Logging +Current limits: -```sh -RUST_LOG=debug cargo run -p md-docrs-cli -- serve -``` +- in-memory cache by default for native paths +- no disk cache unless `md-docrs-server` is built with `hybrid-cache` +- partial rendering coverage; not all rustdoc surfaces are rendered yet +- Zig currently covers URL resolution only, not fetch/decompress/render \ No newline at end of file diff --git a/crates/md-docrs-rust-wasm/README.md b/crates/md-docrs-rust-wasm/README.md index c2e65d8..61ce638 100644 --- a/crates/md-docrs-rust-wasm/README.md +++ b/crates/md-docrs-rust-wasm/README.md @@ -1,198 +1,201 @@ # md-docrs-rust-wasm -Rust `wasm32-unknown-unknown` crate for the workspace's WASM-facing ABI. +Rust `wasm32-unknown-unknown` export layer for this workspace. -This crate is intentionally narrow: +This crate wraps the shared Rust logic from `md-docrs-core` behind a small C-style ABI for host environments. -- it exposes a small C-style ABI for hosts -- it reuses shared Rust logic from `md-docrs-core` -- it does not own the comparison harness -- it does not own the Zig implementation -- it does not own the Cloudflare Worker wrapper +## What it exports -That separation keeps boundaries clear: +### Always available -- `crates/md-docrs-core` — shared Rust parsing / resolution / rendering logic -- `crates/md-docrs-rust-wasm` — Rust WASM export layer -- `crates/md-docrs-wasm-compare` — host-side comparison harness -- `zig/` — independent Zig implementation and Worker wrapper -- `wasm/` — staged artifacts and helper build script +- `alloc(len: u32) -> *mut u8` +- `free(ptr: *mut u8, len: u32)` +- `resolve_url(spec_ptr, spec_len, target_ptr, target_len, out_ptr, out_cap) -> u32` -## Purpose +These three exports match the Zig minimal WASM surface. -`md-docrs-rust-wasm` builds a WebAssembly module that can be loaded by any host that understands its exported ABI. +### With `render` -Today it supports two scopes: +- `render_markdown(json_ptr, json_len, spec_ptr, spec_len, target_ptr, target_len, len_out) -> *mut u8` -- **minimal**: `resolve_url` only -- **full**: `resolve_url` + `render_markdown` +This lets a host pass rustdoc JSON into the module and receive rendered Markdown back. -The minimal build is the direct Rust counterpart to the Zig WASM module. -The full build keeps the same base ABI and adds Markdown rendering. +### With `render` + `fetch` -## Exports +- `render_spec(spec_ptr, spec_len, target_ptr, target_len, buf_ptr_out, buf_len_out) -> i32` -The module exports: +This is the full in-module pipeline: -| Symbol | Signature | Notes | +1. parse the spec +2. build the docs.rs rustdoc JSON URL +3. call the host-provided `fetch_bytes` +4. zstd-decode the response +5. parse rustdoc JSON +6. resolve the requested item +7. render Markdown + +## Features + +| Feature | Default | Purpose | | --- | --- | --- | -| `alloc` | `(len: u32) -> *mut u8` | Allocates a buffer in WASM linear memory. Returns null on failure or `len == 0`. | -| `free` | `(ptr: *mut u8, len: u32)` | Frees a buffer previously returned by `alloc`. Length must match. | -| `resolve_url` | `(spec_ptr, spec_len, target_ptr, target_len, out_ptr, out_cap) -> u32` | Resolves a docs.rs rustdoc JSON URL into the caller-provided output buffer. Returns bytes written, or `0` on error. | -| `render_markdown` | `(json_ptr, json_len, spec_ptr, spec_len, target_ptr, target_len, len_out: *mut u32) -> *mut u8` | Present in builds with the `render` feature. Returns a newly allocated Markdown buffer; caller must free it. Returns null on error. | +| `render` | yes | Enables JSON-to-Markdown rendering and exports `render_markdown` | +| `fetch` | no | Enables host-imported fetch + in-WASM zstd decode used by `render_spec` | +| `full` | no | Convenience alias for `render` + `fetch` | ## Build modes -### Minimal build - -This is the smallest Rust build and the one intended for direct parity with Zig. +### Minimal -It exposes: +Exports only: - `alloc` - `free` - `resolve_url` -Build it with: +Build: -```sh +```/dev/null/minimal.sh#L1-2 cargo build --profile wasm-release --target wasm32-unknown-unknown \ -p md-docrs-rust-wasm --no-default-features ``` -Output: - -```sh -target/wasm32-unknown-unknown/wasm-release/md_docrs_rust_wasm.wasm -``` - -### Default build +### Default -The default feature set includes `render`. - -It exposes: +Exports: - `alloc` - `free` - `resolve_url` - `render_markdown` -Build it with: +Build: -```sh +```/dev/null/default.sh#L1-2 cargo build --profile wasm-release --target wasm32-unknown-unknown \ -p md-docrs-rust-wasm ``` -### Full build +### Full -The crate also defines a convenience `full` feature: +Exports: -- `render` -- `fetch` +- `alloc` +- `free` +- `resolve_url` +- `render_markdown` +- `render_spec` -Build it with: +Build: -```sh +```/dev/null/full.sh#L1-2 cargo build --profile wasm-release --target wasm32-unknown-unknown \ -p md-docrs-rust-wasm --no-default-features --features full ``` -Use this when you want the full WASM-oriented surface used by the repo-level comparison flow. +Output path: -## Features +```/dev/null/output.txt#L1-1 +target/wasm32-unknown-unknown/wasm-release/md_docrs_rust_wasm.wasm +``` -| Feature | Default | Meaning | -| --- | --- | --- | -| `render` | yes | Enables JSON-to-Markdown rendering via `serde_json` and `rustdoc-types`, and exports `render_markdown`. | -| `fetch` | no | Enables fetch/decompression-related functionality needed by the full WASM pipeline. | -| `full` | no | Convenience alias for `render` + `fetch`. | +## ABI notes -## Workspace boundaries +### Memory -This crate should stay lean and focused on the ABI layer. +- `alloc` returns a pointer in WASM linear memory +- `free` must be called with the exact pointer and length originally allocated +- `alloc(0)` returns null +- most failures are reported as `0`, null, or a negative status code depending on the export -### It should contain +### `resolve_url` -- exported WASM ABI functions -- memory handling for host/WASM interaction -- thin adapters into `md-docrs-core` -- feature-gated WASM-specific integration logic +`resolve_url` parses: -### It should not contain +```/dev/null/spec.txt#L1-1 +crate[@version][::path::to::item] +``` -- CLI code -- server code -- Cloudflare Worker code -- comparison harness code -- Zig-specific code -- repo-level artifact staging logic +If `target_len == 0`, no explicit target triple is used. -Those live elsewhere on purpose. +On success it writes the docs.rs rustdoc JSON URL into the caller-provided output buffer and returns the number of bytes written. -## Relationship to Zig +It returns `0` on failure, including: -The Zig implementation lives under `zig/`. +- invalid UTF-8 +- invalid spec +- output buffer too small -The goal is to keep the **minimal ABI compatible** across both implementations so the same host-side logic can load either artifact with minimal or no changes. +### `render_markdown` -That means the Rust minimal build should stay disciplined: +`render_markdown` expects the host to provide: -- small export surface -- stable memory protocol -- no unnecessary host assumptions +- rustdoc JSON bytes +- a spec +- an optional target triple +- a writable `len_out` -## Comparison workflow +On success it returns a newly allocated Markdown buffer and writes its size to `*len_out`. -This crate does not run comparisons itself. +The caller owns the returned buffer and must release it with `free(ptr, len)`. -For side-by-side Rust vs Zig comparison, use the repo-level flow: +It returns null on failure. -- `wasm/build.sh` — builds and stages artifacts into `wasm/artifacts/` -- `crates/md-docrs-wasm-compare` — loads those artifacts and benchmarks / checks parity +### `render_spec` -Typical flow from the repo root: +`render_spec` requires a host import: -```sh -./wasm/build.sh -cargo run -p md-docrs-wasm-compare -- --offline +```/dev/null/fetch-bytes.txt#L1-5 +fetch_bytes( + url_ptr: *const u8, + url_len: u32, + buf_ptr_out: *mut u32, + buf_len_out: *mut u32, +) -> i32 ``` -## Optimization +The host is expected to: -If `wasm-opt` is installed, you can post-process the built artifact manually: +1. fetch the URL +2. allocate a buffer inside WASM memory using exported `alloc` +3. write the response body into that buffer +4. store the pointer and length into the provided out-slots -```sh -wasm-opt -Oz --strip-debug --strip-dwarf \ - -o target/wasm32-unknown-unknown/wasm-release/md_docrs_rust_wasm.opt.wasm \ - target/wasm32-unknown-unknown/wasm-release/md_docrs_rust_wasm.wasm -``` +Return `0` for success and non-zero for failure. -In normal repo usage, the top-level `wasm/build.sh` script handles staging optimized artifacts. +On success, `render_spec` writes an allocated Markdown buffer to `*buf_ptr_out` and `*buf_len_out`, then returns `0`. -## Tests +### `render_spec` status codes -Host tests can still exercise the crate logic: +| Code | Meaning | +| --- | --- | +| `0` | Success | +| `-1` | Allocation failure | +| `-2` | Host fetch failed | +| `-3` | zstd decode failed | +| `-4` | JSON parse failed | +| `-5` | Spec parse failure, resolve miss, or URL too long | +| `-6` | Output pointer or length could not be written | -```sh -cargo test -p md-docrs-rust-wasm -``` +## Relationship to the rest of the repo -## Design guidance +- `crates/md-docrs-core` contains the shared Rust parsing, resolution, and rendering logic +- `crates/md-docrs-wasm-compare` contains the host-side comparison harness +- `zig/` contains the independent Zig implementation of the minimal ABI +- `wasm/` contains the repo-level staging script and staged artifacts -To keep this crate lean over time: +This crate should stay focused on the Rust WASM ABI layer. -- prefer pushing reusable logic down into `md-docrs-core` -- keep exported functions thin -- keep features explicit -- avoid mixing host/runtime concerns into the ABI layer -- treat code size as a product constraint for the minimal build +## Typical workflow -If a future change is only needed for: +Build and stage artifacts from the repo root: -- CLI behavior -- HTTP serving -- Worker deployment -- harness benchmarking +```/dev/null/workflow.sh#L1-2 +./wasm/build.sh +cargo run -p md-docrs-wasm-compare -- --offline +``` -then it probably belongs outside this crate. \ No newline at end of file +## Tests + +```/dev/null/tests.sh#L1-1 +cargo test -p md-docrs-rust-wasm +``` diff --git a/justfile b/justfile new file mode 100644 index 0000000..6f70cdf --- /dev/null +++ b/justfile @@ -0,0 +1,67 @@ +default: + @just --list + +# Run all Rust workspace tests. +test: + cargo test --workspace + +# Build the Rust workspace. +build: + cargo build --workspace + +# Build the Cloudflare Worker crate for wasm. +build-worker: + cargo check -p md-docrs-worker --target wasm32-unknown-unknown + +# Run the Cloudflare Worker locally with Wrangler. +worker-dev: + npx wrangler@latest dev --config wrangler.toml --cwd crates/md-docrs-worker --local --port 8787 --persist-to .wrangler/state + +# Probe the worker root with a crate spec in the path. +curl-worker spec="anyhow": + curl -sS "http://127.0.0.1:8787/{{ spec }}" + +# Probe the worker with a target triple query parameter. +curl-worker-target spec="tokio::sync::Mutex" target="x86_64-unknown-linux-gnu": + curl -sS "http://127.0.0.1:8787/{{ spec }}?target={{ target }}" + +# Probe the worker using the spec query parameter form. +curl-worker-query spec="anyhow::Error": + curl -sS "http://127.0.0.1:8787/?spec={{ spec }}" + +# Run a few common worker smoke tests. +test-worker: + just curl-worker anyhow + echo + just curl-worker-query "anyhow::Error" + echo + just curl-worker-target "tokio::sync::Mutex" "x86_64-unknown-linux-gnu" + +# Run the native Markdown server locally. +server-dev: + cargo run -p md-docrs-server -- --port 8080 --bind 127.0.0.1 + +# Probe the native server. +curl-server path="anyhow": + curl -sS "http://127.0.0.1:8080/{{ path }}" + +# Run the WASM comparison flow described in the repo docs. +wasm-compare: + ./wasm/build.sh + cargo run -p md-docrs-wasm-compare -- --offline + +# Run Zig tests from the repo root. +zig-test: + zig build test --build-file zig/lib/build.zig + +# Show the main commands collected from workspace READMEs. +help-commands: + @echo "Common commands from README files:" + @echo " cargo build --workspace" + @echo " cargo test --workspace" + @echo " cargo run -p md-docrs-cli -- anyhow" + @echo " cargo run -p md-docrs-server -- --port 8080 --bind 127.0.0.1" + @echo " cargo build --profile wasm-release --target wasm32-unknown-unknown -p md-docrs-rust-wasm --no-default-features" + @echo " ./wasm/build.sh" + @echo " cargo run -p md-docrs-wasm-compare -- --offline" + @echo " zig build test --build-file zig/lib/build.zig" diff --git a/wasm/README.md b/wasm/README.md index 87dec5f..1f222f9 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -1,129 +1,42 @@ -# wasm/ — workspace-level WASM harness +# wasm/ -This directory is **not** a Rust crate. +Workspace-level WASM staging for artifact comparison. -It exists to keep the cross-language WASM comparison workflow in one simple place: +This directory is not a Rust crate. It only exists to: -- `build.sh` builds and stages WASM artifacts from the Rust and Zig implementations -- `artifacts/` holds the staged `.wasm` files -- this `README.md` explains how to run the comparison harness +- build and stage Zig and Rust `.wasm` artifacts +- keep staged outputs under `wasm/artifacts/` +- document the comparison flow -The actual Rust comparison binary lives in: +The comparison binary lives in `crates/md-docrs-wasm-compare`. -- `crates/md-docrs-wasm-compare` +## What it contains -## Boundaries +- `build.sh` — builds and stages available artifacts +- `artifacts/` — staged `.wasm` files used by the comparison harness +- `README.md` — this file -Keep the repo split like this: - -- `crates/md-docrs-core` — shared Rust library logic -- `crates/md-docrs-rust-wasm` — Rust WASM module -- `crates/md-docrs-wasm-compare` — Rust host-side comparison harness -- `zig/` — Zig implementation and its Worker wrapper -- `wasm/` — staging area and glue docs/scripts only - -That separation keeps responsibilities lean: - -- Zig owns the Zig implementation -- Rust owns the Rust implementation and host harness -- `wasm/` owns only the artifact workflow - -## Layout - -```/dev/null/layout.txt#L1-11 -wasm/ -├── README.md # this file -├── build.sh # stages Rust + Zig wasm outputs into artifacts/ -└── artifacts/ # .gitignored staged outputs - ├── zig-minimal.wasm - ├── zig-full.wasm # optional, only if Zig full build exists - ├── rust-minimal.wasm - ├── rust-minimal-opt.wasm - ├── rust-full.wasm - └── rust-full-opt.wasm -``` - -Related workspace locations: - -```/dev/null/workspace-layout.txt#L1-8 -crates/ -├── md-docrs-rust-wasm/ -├── md-docrs-wasm-compare/ -└── ... -zig/ -└── ... -wasm/ -└── ... -``` - -## What gets compared - -The harness compares compatible WASM artifacts that share the same low-level ABI. - -Today that means: - -- **Zig minimal** - - exports `alloc`, `free`, `resolve_url` - - implements spec parsing + docs.rs URL resolution -- **Rust minimal** - - exports the same minimal ABI - - meant to match the Zig surface -- **Rust full** - - extends the surface with rendering functionality -- **Zig full** - - optional future/experimental target if implemented - -The comparison harness reports: - -- artifact size -- output parity for `resolve_url` -- median and p95 latency -- raw Rust size vs `wasm-opt -Oz` size - -## Quick start +## Artifact workflow From the repo root: -```/dev/null/quickstart.sh#L1-4 +```/dev/null/wasm-compare.sh#L1-2 ./wasm/build.sh -cargo run -p md-docrs-wasm-compare +cargo run -p md-docrs-wasm-compare -- --offline ``` -That does two things: - -1. builds/stages available `.wasm` artifacts into `wasm/artifacts/` -2. runs the host-side comparison binary from `crates/md-docrs-wasm-compare` - -## What `build.sh` does - -`wasm/build.sh` is the single entry point for artifact staging. +`build.sh` does this: -It is responsible for: +- builds Zig minimal WASM +- attempts Zig full WASM and skips it cleanly if unsupported +- builds Rust minimal WASM from `crates/md-docrs-rust-wasm` +- builds Rust full WASM from `crates/md-docrs-rust-wasm` +- runs `wasm-opt -Oz` on Rust artifacts +- copies staged outputs into `wasm/artifacts/` -- building Zig minimal -- attempting Zig full, but skipping it cleanly if unsupported -- building Rust minimal from `crates/md-docrs-rust-wasm` -- building Rust full from `crates/md-docrs-rust-wasm` -- producing optimized Rust copies with `wasm-opt` -- copying all generated outputs into `wasm/artifacts/` +## Expected staged files -It should not contain harness logic. -It should not become a second build system. -Its job is only to stage comparable artifacts in one place. - -## Required tools - -You need these available on your machine: - -- Rust toolchain with `wasm32-unknown-unknown` -- Zig -- `wasm-opt` from Binaryen - -If `wasm-opt` is missing, `build.sh` should fail early because optimized Rust artifacts are part of the comparison output. - -## Artifact names - -The harness looks for these filenames in `wasm/artifacts/`: +The harness looks for these filenames: - `zig-minimal.wasm` - `zig-full.wasm` @@ -132,112 +45,29 @@ The harness looks for these filenames in `wasm/artifacts/`: - `rust-full.wasm` - `rust-full-opt.wasm` -Any subset may be present. Missing files are skipped. -That makes the flow flexible: - -- minimal-only comparison works -- Rust-only comparison works -- future Zig full comparison can slot in without redesign - -## Rust commands - -The Rust WASM module comes from `crates/md-docrs-rust-wasm`. - -Minimal build: - -```/dev/null/rust-minimal.sh#L1-3 -cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-rust-wasm --no-default-features -``` - -Full build: - -```/dev/null/rust-full.sh#L1-3 -cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-rust-wasm --no-default-features --features full -``` - -Comparison harness: - -```/dev/null/harness.sh#L1-2 -cargo run -p md-docrs-wasm-compare -``` - -Optional Wasmer runtime: - -```/dev/null/harness-wasmer.sh#L1-2 -cargo run -p md-docrs-wasm-compare --features wasmer -- --runtime wasmer -``` - -## Zig commands - -The Zig implementation lives under `zig/`. - -Minimal WASM build: - -```/dev/null/zig-build.sh#L1-3 -cd zig/lib -zig build -``` - -Native Zig tests: - -```/dev/null/zig-test.sh#L1-3 -zig build test --build-file zig/lib/build.zig -``` - -If Zig full is not implemented yet, `build.sh` should print a skip message and continue. - -## Flags - -The harness supports these main flags: - -| Flag | Default | Meaning | -| --- | --- | --- | -| `--runtime wasmtime\|wasmer` | `wasmtime` | Embedded runtime used by the Rust host harness | -| `--iterations N` | `200` | Hot-loop samples per artifact/spec pair | -| `--artifacts-dir PATH` | `wasm/artifacts` | Directory containing staged `.wasm` files | - -If supported by the harness version you are running, other flags such as offline or render-specific controls follow the same rule: they belong to the host harness crate, not to `wasm/build.sh`. - -## Running raw modules manually - -The `.wasm` files can be inspected directly, but real calls require host code that: - -- allocates memory in the module -- writes input bytes into WASM memory -- calls exported functions -- reads the output bytes -- frees buffers correctly - -That host logic lives in the Rust comparison harness, not in this directory. - -## Design rule for this directory - -Keep `wasm/` boring. +## Required tools -Good uses: +You need: -- stage artifacts -- document the comparison workflow -- hold generated outputs +- Rust with `wasm32-unknown-unknown` +- Zig +- `wasm-opt` -Bad uses: +## Related paths -- adding a second Rust crate here -- duplicating logic from `crates/md-docrs-wasm-compare` -- mixing Zig source code into this directory -- mixing Rust library code into this directory +- `crates/md-docrs-rust-wasm` — Rust WASM module +- `crates/md-docrs-wasm-compare` — host comparison harness +- `zig/` — Zig implementation +- `wasm/artifacts/` — staged outputs -## Summary +## Notes -If you are looking for: +Keep `wasm/` boring: -- the Rust WASM implementation: see `crates/md-docrs-rust-wasm` -- the Rust host comparison program: see `crates/md-docrs-wasm-compare` -- the Zig implementation: see `zig/` -- the staged outputs and helper script: stay in `wasm/` +- no Rust crate here +- no shared library logic here +- no comparison logic here -The goal is simple: one place to stage artifacts, one Rust crate to compare them, and clear boundaries between Rust, Zig, and the shared WASM workflow. \ No newline at end of file +It is only the staging area for cross-language WASM artifacts. \ No newline at end of file diff --git a/zig/README.md b/zig/README.md index dc239d8..7a1149e 100644 --- a/zig/README.md +++ b/zig/README.md @@ -1,132 +1,68 @@ # md-docrs-zig -Zig implementation of the **minimal URL-resolution surface** of this repository. +Minimal Zig implementation of docs.rs rustdoc JSON URL resolution. -This subtree is intentionally small and separate from the Rust workspace. Its job is to answer: +This subtree does three things: -- can Zig produce a smaller `.wasm` for the same ABI? -- can Zig match Rust's `resolve_url` behavior exactly? -- can the same host code load either module unchanged? +- parses `crate[@version][::path::to::item]` +- builds the matching docs.rs rustdoc JSON URL +- exposes that logic as: + - a native CLI + - a small WASM module + - a Cloudflare Worker wrapper -It is **not** the full docs.rs proxy. It does not fetch rustdoc JSON, decompress zstd, or render Markdown. +It does not fetch rustdoc JSON, decode zstd, or render Markdown. Those live on the Rust side. -## Boundaries +## Scope -### What lives here +`zig/` is the minimal comparison target for the Rust WASM build. -`zig/` owns the minimal, self-contained path: +It owns: -- parse `crate[@version][::path]` -- build the corresponding docs.rs rustdoc JSON URL -- expose that logic through: - - a Zig native CLI - - a tiny WASM module - - a Cloudflare Worker wrapper in TypeScript - -### What does not live here +- spec parsing +- docs.rs URL construction +- `resolve_url` WASM export +- native Zig CLI +- Worker host wrapper -The following stay on the Rust side: +It does not own: - HTTP fetching - caching - zstd decoding - rustdoc JSON parsing - Markdown rendering -- the main CLI/server application - -That split is deliberate. It keeps the Zig implementation lean and makes size/perf comparisons fair. - -## Relationship to the Rust workspace - -The repository has three distinct layers: - -1. `crates/md-docrs-core` - - shared Rust logic for the full pipeline - -2. `crates/md-docrs-rust-wasm` - - Rust WASM module with the same low-level ABI as the Zig WASM module - - can be built in: - - minimal mode: `resolve_url` only - - fuller mode: adds render support - -3. `zig/` - - independent Zig implementation of the minimal ABI surface - -At the top level, `wasm/` is just a harness area: - -- `wasm/build.sh` stages Zig and Rust artifacts into `wasm/artifacts/` -- `crates/md-docrs-wasm-compare` loads those artifacts and compares size, parity, and latency - -So the conceptual split is: - -- **Rust workspace** = production pipeline and Rust WASM -- **Zig subtree** = minimal alternative implementation -- **wasm/** = comparison/staging glue +- the main native server ## Layout -```/dev/null/zig-layout.txt#L1-17 -zig/ -├── lib/ -│ ├── build.zig -│ ├── build.zig.zon -│ ├── spec.zig -│ ├── url.zig -│ ├── resolve.zig -│ ├── wasm.zig -│ └── cli.zig -├── src/ -│ ├── index.ts -│ ├── md_docrs.wasm.d.ts -│ └── md_docrs.wasm -├── package.json -├── tsconfig.json -└── wrangler.jsonc -``` - -## Components - -### `lib/spec.zig` -Parses the spec grammar: - -- `crate` -- `crate@version` -- `crate::path::to::item` -- `crate@version::path::to::item` - -### `lib/url.zig` -Builds the docs.rs JSON URL from parsed pieces. - -### `lib/resolve.zig` -Pure glue between parsing and URL building. This is the logic shared by the CLI and WASM entrypoints. - -### `lib/wasm.zig` -Exports the minimal ABI used for host-neutral comparisons. - -### `lib/cli.zig` -Wraps the same core resolver as a native command-line tool. - -### `src/index.ts` -Cloudflare Worker host for the WASM module. This is host glue only; the actual URL resolution lives in Zig WASM. +- `lib/build.zig` — Zig build definitions +- `lib/cli.zig` — native CLI +- `lib/resolve.zig` — shared resolver logic +- `lib/spec.zig` — spec parser +- `lib/url.zig` — docs.rs URL builder +- `lib/wasm.zig` — minimal WASM ABI +- `src/index.ts` — Cloudflare Worker wrapper +- `src/md_docrs.wasm` — staged WASM artifact used by the Worker ## Build -Most Zig work happens from `zig/lib/`. +From `zig/`: -```/dev/null/zig-build.sh#L1-11 -cd zig/lib +```/dev/null/zig-build-npm.sh#L1-2 +npm install +npm run build:wasm +``` -# Build the WASM artifact. -zig build +From `zig/lib/`: -# Build the native CLI. +```/dev/null/zig-build-lib.sh#L1-3 +zig build zig build cli - -# Run unit tests. zig build test ``` -If you want to run the test step from the repository root, point Zig at the build file explicitly: +From the repo root: ```/dev/null/zig-build-root.sh#L1-1 zig build test --build-file zig/lib/build.zig @@ -134,21 +70,41 @@ zig build test --build-file zig/lib/build.zig ## Native CLI -The CLI is the fastest way to sanity-check the minimal resolver behavior. +Build: -```/dev/null/zig-cli.sh#L1-13 +```/dev/null/zig-cli-build.sh#L1-2 cd zig/lib zig build cli +``` +Run: + +```/dev/null/zig-cli-run.sh#L1-4 ./zig-out/bin/md-docrs-zig serde ./zig-out/bin/md-docrs-zig 'tokio@1.52.1::sync::Mutex' ./zig-out/bin/md-docrs-zig 'anyhow::Error' --target x86_64-unknown-linux-gnu - -# Or via the build runner: zig build run -- 'tokio@1.52.1::sync::Mutex' --target x86_64-unknown-linux-gnu ``` -Expected output is always a fully resolved docs.rs rustdoc JSON URL, for example: +Usage: + +```/dev/null/zig-cli-usage.txt#L1-1 +md-docrs-zig [--target TRIPLE] +``` + +Spec grammar: + +```/dev/null/spec.txt#L1-1 +crate[@version][::path::to::item] +``` + +Behavior: + +- prints the resolved docs.rs rustdoc JSON URL to stdout +- exits `0` on success +- exits `2` for invalid input, missing `--target` value, or unexpected arguments + +Examples of output: ```/dev/null/zig-cli-output.txt#L1-3 https://docs.rs/crate/serde/latest/json/57.zst @@ -156,26 +112,35 @@ https://docs.rs/crate/tokio/1.52.1/json/57.zst https://docs.rs/crate/anyhow/latest/x86_64-unknown-linux-gnu/json/57.zst ``` -Exit codes: - -| Code | Meaning | -| --- | --- | -| 0 | URL printed to stdout | -| 2 | Invalid spec, missing `--target` value, or unknown argument | - ## Worker The Worker is a thin host around the Zig WASM module. -```/dev/null/zig-worker.sh#L1-6 +Setup and run: + +```/dev/null/zig-worker-dev.sh#L1-4 cd zig npm install npm run build:wasm npm run dev +``` + +Deploy: + +```/dev/null/zig-worker-deploy.sh#L1-1 npm run deploy ``` -Example requests: +Accepted request forms: + +```/dev/null/zig-worker-routes.txt#L1-4 +GET / +GET /?target= +GET /?spec= +GET /?spec=&target= +``` + +Examples: ```/dev/null/zig-worker-curl.sh#L1-4 curl localhost:8787/serde @@ -184,81 +149,52 @@ curl 'localhost:8787/tokio::sync::Mutex?target=x86_64-unknown-linux-gnu' curl 'localhost:8787/?spec=anyhow::Error' ``` -Each returns a resolved docs.rs URL string. +Responses: + +- success: plain text docs.rs URL plus trailing newline +- failure: `400` with plain text error +- empty spec: `400` with a short usage message ## WASM ABI -The Zig module exports a deliberately tiny ABI: +The module exports a small C-style ABI: | Export | Signature | Notes | | --- | --- | --- | -| `alloc` | `(len: u32) -> *u8` | Allocates in linear memory. Returns `0` on failure. | -| `free` | `(ptr: *u8, len: u32)` | Caller must free with the same length used for allocation. | -| `resolve_url` | `(spec_ptr, spec_len, target_ptr, target_len, out_ptr, out_cap) -> u32` | Writes the resolved URL into caller-provided output memory. Returns bytes written, or `0` on error. | +| `alloc` | `(len: u32) -> *u8` | Allocates linear memory. Returns `0` on failure. | +| `free` | `(ptr: *u8, len: u32)` | Frees memory allocated by `alloc`. | +| `resolve_url` | `(spec_ptr, spec_len, target_ptr, target_len, out_ptr, out_cap) -> u32` | Writes the resolved URL into caller-provided memory. Returns bytes written, or `0` on error. | -This ABI is intentionally matched by the Rust WASM crate so the same host can swap implementations without changing its calling convention. +Contract: -## Integration with Rust WASM +- `target_len == 0` means no explicit target +- caller owns input and output buffers +- output buffer must be large enough for the full URL +- return value `0` means invalid spec or insufficient output capacity -The Rust equivalent is `crates/md-docrs-rust-wasm`. +The Worker currently uses a fixed output buffer of `512` bytes. -Both modules are meant to be interchangeable for the minimal path: +## Relationship to Rust + +This Zig module matches the minimal ABI surface of `crates/md-docrs-rust-wasm`: - same exported function names - same memory ownership model - same `resolve_url` contract -- same expected output bytes for the same input - -Build the Rust minimal module like this: -```/dev/null/rust-wasm-build.sh#L1-3 -cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-rust-wasm --no-default-features -``` +That lets the comparison harness swap Rust and Zig artifacts with the same host-side calling convention. -You can then compare the Zig and Rust artifacts through the top-level harness: +Use the repo-level comparison flow from the repository root: ```/dev/null/wasm-compare.sh#L1-2 ./wasm/build.sh cargo run -p md-docrs-wasm-compare -- --offline ``` -## Why this split exists - -This subtree is intentionally narrow for two reasons: - -1. **clear ownership** - - Zig owns only the minimal resolver path - - Rust owns the full product pipeline - -2. **fair comparison** - - if both Zig and Rust expose only `resolve_url`, size and latency comparisons mean something - - if one side includes fetch/decompress/render and the other does not, the comparison becomes noisy - -## Current status - -Today, Zig covers: - -- spec parsing -- URL resolution -- native CLI -- minimal WASM export -- Worker hosting - -It does **not** yet cover: - -- JSON-to-Markdown rendering -- in-WASM fetching -- zstd decompression - -That is intentional. The minimal boundary is the stable comparison target. - -## Summary - -If you're deciding where code should go: - -- put **full docs.rs proxy behavior** in Rust workspace crates -- put **minimal ABI-compatible URL resolution** in `zig/` -- put **artifact staging and cross-runtime comparison** in top-level `wasm/` +## Notes -That keeps the repository lean and the boundaries clear. \ No newline at end of file +- current format version is `57` +- default docs.rs base is `https://docs.rs` +- default `zig build` produces the WASM artifact +- `zig build cli` builds the native CLI separately +- this subtree is intentionally narrow so size and latency comparisons stay meaningful \ No newline at end of file From 7842d96c789cb564366358da18964b12d5362ef1 Mon Sep 17 00:00:00 2001 From: Thomas Aubry Date: Tue, 21 Apr 2026 15:28:06 +0200 Subject: [PATCH 5/8] chore: ignore worker wrangler state --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 4cd393d..c817a41 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ /target +.wrangler +crates/md-docrs-worker/.wrangler/ zig/lib/zig-out/ zig/lib/.zig-cache/ zig/node_modules/ From 4d3f8a41b74b27dc18ec866e844a9213288045ec Mon Sep 17 00:00:00 2001 From: Thomas Aubry Date: Tue, 21 Apr 2026 15:29:11 +0200 Subject: [PATCH 6/8] chore: rename kv to KRATE_KV --- crates/md-docrs-worker/src/lib.rs | 2 +- crates/md-docrs-worker/wrangler.toml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/md-docrs-worker/src/lib.rs b/crates/md-docrs-worker/src/lib.rs index 47fc203..ca2d295 100644 --- a/crates/md-docrs-worker/src/lib.rs +++ b/crates/md-docrs-worker/src/lib.rs @@ -191,7 +191,7 @@ impl KvCrateCache { #[event(fetch)] async fn fetch(req: Request, env: Env, _ctx: Context) -> Result { - let kv = env.kv("EXAMPLE")?; + let kv = env.kv("KRATE_KV")?; let state = AppState { fetcher: Arc::new(WorkerFetcher::new()), cache: Arc::new(KvCrateCache::new(kv)), diff --git a/crates/md-docrs-worker/wrangler.toml b/crates/md-docrs-worker/wrangler.toml index 85d659f..959d18a 100644 --- a/crates/md-docrs-worker/wrangler.toml +++ b/crates/md-docrs-worker/wrangler.toml @@ -3,8 +3,8 @@ main = "build/index.js" compatibility_date = "2025-04-21" [[kv_namespaces]] -binding = "EXAMPLE" -id = "EXAMPLE" +binding = "KRATE_KV" +id = "KRATE_KV" [build] command = "cargo install \"worker-build@^0.8\" && worker-build --release" From 075dda42517817f7403c0b7524c1983a08da518b Mon Sep 17 00:00:00 2001 From: Thomas Aubry Date: Tue, 21 Apr 2026 15:41:15 +0200 Subject: [PATCH 7/8] add distributable rust docs CLI skill --- skills/README.md | 34 ++++++++++++ skills/rust-docrs-cli/SKILL.md | 65 +++++++++++++++++++++++ skills/rust-docrs-cli/references/usage.md | 55 +++++++++++++++++++ 3 files changed, 154 insertions(+) create mode 100644 skills/README.md create mode 100644 skills/rust-docrs-cli/SKILL.md create mode 100644 skills/rust-docrs-cli/references/usage.md diff --git a/skills/README.md b/skills/README.md new file mode 100644 index 0000000..fe7cbb3 --- /dev/null +++ b/skills/README.md @@ -0,0 +1,34 @@ +# Skills + +This directory contains reusable agent skills for working with `md-docrs` and related workflows. + +## Available skills + +### `rust-docrs-cli` + +Use this skill when you need to retrieve, inspect, or summarize Rust crate and item documentation from docs.rs with the `md-docrs` CLI. + +Typical uses: + +- get docs for a crate root like `anyhow` +- get docs for an item like `anyhow::Error` +- get docs for a versioned item like `tokio@1.52.1::sync::Mutex` +- get docs for a target-specific item with `--target` +- form the correct `md-docrs` command from a user request +- summarize the Markdown returned by the CLI + +Path: + +- `skills/rust-docrs-cli/SKILL.md` + +Reference material: + +- `skills/rust-docrs-cli/references/usage.md` + +## Notes + +Keep each skill focused: + +- put trigger logic and core instructions in `SKILL.md` +- put longer examples and lookup details in `references/` +- avoid mixing unrelated workflows into one skill \ No newline at end of file diff --git a/skills/rust-docrs-cli/SKILL.md b/skills/rust-docrs-cli/SKILL.md new file mode 100644 index 0000000..3549e40 --- /dev/null +++ b/skills/rust-docrs-cli/SKILL.md @@ -0,0 +1,65 @@ +--- +name: Rust docs.rs CLI +description: Use this skill when you need to retrieve or summarize Rust crate or item documentation from docs.rs with the `md-docrs` CLI. Use it for crate-root lookups, item lookups, versioned lookups, target-specific lookups, and for forming the correct command. Do NOT use it for general Rust programming help, source-code editing, deployment, server workflows, Zig, WASM, or unrelated Cargo tasks. +--- + +# Rust docs.rs CLI + +Use this skill to help people get Rust documentation from docs.rs through the `md-docrs` CLI. + +## Use this skill when + +- the user wants docs for a crate +- the user wants docs for a Rust item +- the user wants docs for a specific crate version +- the user wants docs for a specific target +- the user wants the correct `md-docrs` command +- the user wants the returned docs summarized + +## Do not use this skill when + +- the task is general Rust advice without docs lookup +- the task is editing or reviewing code +- the task is about servers, deployment, Zig, or WASM +- the task is general Cargo troubleshooting unrelated to `md-docrs` + +## Core rules + +- Use this spec grammar: `crate[@version][::path::to::item]` +- Prefer `md-docrs ` +- Use `md-docrs --target ` for target-specific docs +- Give the exact command first +- State that successful output is Markdown on stdout +- Correct invalid specs directly +- Do not invent unsupported flags or URL formats + +## Common patterns + +- crate root: `anyhow` +- item: `anyhow::Error` +- versioned item: `tokio@1.52.1::sync::Mutex` + +## Examples + +- `md-docrs anyhow` +- `md-docrs anyhow::Error` +- `md-docrs tokio@1.52.1::sync::Mutex` +- `md-docrs --target x86_64-unknown-linux-gnu tokio::sync::Mutex` + +## Fallback + +If `md-docrs` is not installed, use: + +- `cargo run -p md-docrs-cli -- ` + +## Response style + +- command first +- shortest valid spec +- include version only when needed +- include target only when needed +- summarize the returned Markdown only if the user wants interpretation + +## Additional reference + +For more examples and lookup guidance, read `references/usage.md`. \ No newline at end of file diff --git a/skills/rust-docrs-cli/references/usage.md b/skills/rust-docrs-cli/references/usage.md new file mode 100644 index 0000000..ddbedf4 --- /dev/null +++ b/skills/rust-docrs-cli/references/usage.md @@ -0,0 +1,55 @@ +# `md-docrs` usage reference + +Use this reference for quick, correct `md-docrs` lookups against docs.rs. + +## Spec format + +`crate[@version][::path::to::item]` + +Examples: + +- crate root: `anyhow` +- item: `anyhow::Error` +- nested item: `tokio::sync::Mutex` +- versioned item: `tokio@1.52.1::sync::Mutex` + +## Command forms + +Installed binary: + +`md-docrs ` + +Target-specific lookup: + +`md-docrs --target ` + +Cargo fallback in this repository: + +`cargo run -p md-docrs-cli -- ` + +## Copy-paste examples + +`md-docrs anyhow` + +`md-docrs anyhow::Error` + +`md-docrs tokio@1.52.1::sync::Mutex` + +`md-docrs --target x86_64-unknown-linux-gnu tokio::sync::Mutex` + +`cargo run -p md-docrs-cli -- tokio::sync::Mutex` + +## Guidance + +- give the exact command first +- use the shortest valid spec +- include version only when needed +- include target only when needed +- use `::` for Rust item paths +- successful output is Markdown on stdout + +## Avoid + +- inventing unsupported flags +- using docs.rs HTML URLs when a spec is enough +- assuming a version the user did not request \ No newline at end of file From dfc745d0d82d2dcc9923c80616c5f4e61f677838 Mon Sep 17 00:00:00 2001 From: Thomas Aubry Date: Tue, 21 Apr 2026 15:58:42 +0200 Subject: [PATCH 8/8] Fix async fetcher blocking and worker query routing --- Cargo.lock | 1 + crates/md-docrs-fetch-http/Cargo.toml | 1 + crates/md-docrs-fetch-http/src/lib.rs | 54 ++++++++++++++++---------- crates/md-docrs-worker/src/lib.rs | 56 +++++++++++++++++++++------ 4 files changed, 80 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6366920..23887d5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2079,6 +2079,7 @@ dependencies = [ "md-docrs-core", "rustdoc-types", "serde_json", + "tokio", "ureq", "zstd", ] diff --git a/crates/md-docrs-fetch-http/Cargo.toml b/crates/md-docrs-fetch-http/Cargo.toml index 78bfeb8..638877f 100644 --- a/crates/md-docrs-fetch-http/Cargo.toml +++ b/crates/md-docrs-fetch-http/Cargo.toml @@ -9,5 +9,6 @@ md-docrs-core = { path = "../md-docrs-core" } rustdoc-types.workspace = true serde_json.workspace = true +tokio.workspace = true ureq = { version = "2", features = ["tls", "gzip"] } zstd.workspace = true diff --git a/crates/md-docrs-fetch-http/src/lib.rs b/crates/md-docrs-fetch-http/src/lib.rs index 84dfd71..11fedf5 100644 --- a/crates/md-docrs-fetch-http/src/lib.rs +++ b/crates/md-docrs-fetch-http/src/lib.rs @@ -7,6 +7,7 @@ use md_docrs_core::{ }; use rustdoc_types::{Crate, FORMAT_VERSION}; use std::{io::Cursor, time::Duration}; +use tokio::task; /// Native docs.rs fetcher shared by the CLI and native server crates. /// @@ -103,31 +104,42 @@ impl RustdocFetcher for UreqRustdocFetcher { target, Some(FORMAT_VERSION), ); - - let (status, bytes) = self.get_bytes(&url)?; - - if status == 404 { - let probe_url = build_url(&self.base, crate_name, version, target, None); - let probe_status = self.head_status(&probe_url)?; - if (200..300).contains(&probe_status) { + let probe_url = build_url(&self.base, crate_name, version, target, None); + let fetcher = Self { + agent: self.agent.clone(), + base: self.base.clone(), + user_agent: self.user_agent.clone(), + }; + let crate_name = crate_name.to_string(); + let version = version.to_string(); + + task::spawn_blocking(move || { + let (status, bytes) = fetcher.get_bytes(&url)?; + + if status == 404 { + let probe_status = fetcher.head_status(&probe_url)?; + if (200..300).contains(&probe_status) { + return Err(Error::Fetch(format!( + "{crate_name}@{version} has no rustdoc JSON for format version {FORMAT_VERSION}; waiting on docs.rs rebuild" + ))); + } return Err(Error::Fetch(format!( - "{crate_name}@{version} has no rustdoc JSON for format version {FORMAT_VERSION}; waiting on docs.rs rebuild" + "{crate_name}@{version} not found on docs.rs" ))); } - return Err(Error::Fetch(format!( - "{crate_name}@{version} not found on docs.rs" - ))); - } - if !(200..300).contains(&status) { - return Err(Error::Fetch(format!( - "{status} response for {crate_name}@{version}" - ))); - } + if !(200..300).contains(&status) { + return Err(Error::Fetch(format!( + "{status} response for {crate_name}@{version}" + ))); + } - let decoded = zstd::decode_all(Cursor::new(bytes))?; - let krate: Crate = serde_json::from_slice(&decoded)?; - validate_format_version(&krate)?; - Ok(krate) + let decoded = zstd::decode_all(Cursor::new(bytes))?; + let krate: Crate = serde_json::from_slice(&decoded)?; + validate_format_version(&krate)?; + Ok(krate) + }) + .await + .map_err(|err| Error::Fetch(format!("blocking fetch task failed: {err}")))? } } diff --git a/crates/md-docrs-worker/src/lib.rs b/crates/md-docrs-worker/src/lib.rs index ca2d295..40eed62 100644 --- a/crates/md-docrs-worker/src/lib.rs +++ b/crates/md-docrs-worker/src/lib.rs @@ -9,7 +9,7 @@ use md_docrs_core::{ use rustdoc_types::{Crate, FORMAT_VERSION}; use serde::{Deserialize, Serialize}; use std::{ - io::{Cursor, Read}, + io::{BufReader, Cursor, Read}, sync::Arc, }; use worker::kv::{KvError, KvStore}; @@ -110,14 +110,16 @@ impl WorkerFetcher { ))); } - let decoded = ruzstd::decoding::StreamingDecoder::new(Cursor::new(bytes)) - .map_err(|err| { - Error::Io(std::io::Error::other(format!( - "zstd decode init failed: {err}" - ))) - })? - .bytes() - .collect::>>()?; + let mut decoder = ruzstd::decoding::StreamingDecoder::new(BufReader::new(Cursor::new( + bytes, + ))) + .map_err(|err| { + Error::Io(std::io::Error::other(format!( + "zstd decode init failed: {err}" + ))) + })?; + let mut decoded = Vec::new(); + decoder.read_to_end(&mut decoded)?; let krate: Crate = serde_json::from_slice(&decoded)?; validate_format_version(&krate)?; Ok(krate) @@ -202,6 +204,19 @@ async fn fetch(req: Request, env: Env, _ctx: Context) -> Result { async fn route(req: Request, state: AppState) -> Result { let path = req.path(); + let url = req.url()?; + + if let Some(spec) = url + .query_pairs() + .find(|(key, _)| key == "spec") + .map(|(_, value)| value.into_owned()) + { + let target = url + .query_pairs() + .find(|(key, _)| key == "target") + .map(|(_, value)| value.into_owned()); + return serve_spec(&state, &spec, target).await; + } if path == "/" { return text_response( @@ -219,6 +234,11 @@ async fn route(req: Request, state: AppState) -> Result { return kv_list(&state).await; } + let target = url + .query_pairs() + .find(|(key, _)| key == "target") + .map(|(_, value)| value.into_owned()); + let segments: Vec<&str> = path .split('/') .filter(|segment| !segment.is_empty()) @@ -245,7 +265,7 @@ async fn route(req: Request, state: AppState) -> Result { Vec::new() }; - serve(&state, &crate_name, &version, &path_segs).await + serve(&state, &crate_name, &version, target, &path_segs).await } fn parse_rest_segments(segments: &[&str]) -> Vec { @@ -317,10 +337,20 @@ async fn kv_list(state: &AppState) -> Result { text_response(200, &body, "application/json; charset=utf-8") } +async fn serve_spec(state: &AppState, raw_spec: &str, target: Option) -> Result { + let spec = match ItemSpec::parse(raw_spec) { + Ok(spec) => spec.with_target(target), + Err(err) => return error_response(&err), + }; + + render_spec_response(state, spec).await +} + async fn serve( state: &AppState, crate_name: &str, version: &str, + target: Option, path_segs: &[String], ) -> Result { let path: Vec = match path_segs.split_first() { @@ -331,10 +361,14 @@ async fn serve( let spec = ItemSpec { crate_name: crate_name.to_string(), version: version.to_string(), - target: None, + target, path, }; + render_spec_response(state, spec).await +} + +async fn render_spec_response(state: &AppState, spec: ItemSpec) -> Result { let key = CacheKey { crate_name: spec.crate_name.clone(), version: spec.version.clone(),