diff --git a/.gitignore b/.gitignore index 4cd393d..c817a41 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ /target +.wrangler +crates/md-docrs-worker/.wrangler/ zig/lib/zig-out/ zig/lib/.zig-cache/ zig/node_modules/ diff --git a/Cargo.lock b/Cargo.lock index 6fceef2..23887d5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -157,8 +157,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" dependencies = [ "axum-core", + "axum-macros", "bytes", - "form_urlencoded", "futures-util", "http", "http-body", @@ -166,21 +166,17 @@ dependencies = [ "hyper", "hyper-util", "itoa", - "matchit", + "matchit 0.8.4", "memchr", "mime", "percent-encoding", "pin-project-lite", "serde_core", - "serde_json", - "serde_path_to_error", - "serde_urlencoded", "sync_wrapper", "tokio", "tower", "tower-layer", "tower-service", - "tracing", ] [[package]] @@ -199,7 +195,17 @@ dependencies = [ "sync_wrapper", "tower-layer", "tower-service", - "tracing", +] + +[[package]] +name = "axum-macros" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aa268c23bfbbd2c4363b9cd302a4f504fb2a9dfe7e3451d66f35dd392e20aca" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -373,6 +379,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "js-sys", + "num-traits", + "wasm-bindgen", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -1333,6 +1350,17 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.32" @@ -1354,6 +1382,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -1596,7 +1625,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", - "webpki-roots", + "webpki-roots 1.0.7", ] [[package]] @@ -2002,6 +2031,12 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "matchit" version = "0.8.4" @@ -2009,15 +2044,71 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" [[package]] -name = "md-docrs-wasm" +name = "md-docrs-cli" version = "0.1.0" dependencies = [ - "md_docrs_proxy", + "anyhow", + "clap", + "md-docrs-core", + "md-docrs-fetch-http", + "tokio", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "md-docrs-core" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "foyer", + "lru", + "rustdoc-types", + "serde", + "serde_json", + "thiserror 2.0.18", + "tracing", +] + +[[package]] +name = "md-docrs-fetch-http" +version = "0.1.0" +dependencies = [ + "async-trait", + "md-docrs-core", + "rustdoc-types", + "serde_json", + "tokio", + "ureq", + "zstd", +] + +[[package]] +name = "md-docrs-rust-wasm" +version = "0.1.0" +dependencies = [ + "md-docrs-core", "rustdoc-types", "ruzstd", "serde_json", ] +[[package]] +name = "md-docrs-server" +version = "0.1.0" +dependencies = [ + "anyhow", + "axum", + "clap", + "md-docrs-core", + "md-docrs-fetch-http", + "tokio", + "tower-http", + "tracing", + "tracing-subscriber", +] + [[package]] name = "md-docrs-wasm-compare" version = "0.1.0" @@ -2030,26 +2121,19 @@ dependencies = [ ] [[package]] -name = "md_docrs_proxy" +name = "md-docrs-worker" version = "0.1.0" dependencies = [ - "anyhow", "async-trait", - "axum", - "bytes", - "clap", - "foyer", - "lru", - "reqwest", + "md-docrs-core", "rustdoc-types", + "ruzstd", "serde", "serde_json", - "thiserror 2.0.18", - "tokio", - "tower-http", - "tracing", - "tracing-subscriber", - "zstd", + "wasm-bindgen", + "wasm-bindgen-futures", + "worker", + "worker-macros", ] [[package]] @@ -2191,6 +2275,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "num_cpus" version = "1.17.0" @@ -2729,7 +2822,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots", + "webpki-roots 1.0.7", ] [[package]] @@ -2826,6 +2919,7 @@ version = "0.23.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21" dependencies = [ + "log", "once_cell", "ring", "rustls-pki-types", @@ -2952,17 +3046,6 @@ dependencies = [ "zmij", ] -[[package]] -name = "serde_path_to_error" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" -dependencies = [ - "itoa", - "serde", - "serde_core", -] - [[package]] name = "serde_spanned" version = "1.1.1" @@ -3086,6 +3169,27 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "subtle" version = "2.6.1" @@ -3337,7 +3441,6 @@ dependencies = [ "tokio", "tower-layer", "tower-service", - "tracing", ] [[package]] @@ -3382,7 +3485,6 @@ version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ - "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -3489,6 +3591,22 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-pki-types", + "url", + "webpki-roots 0.26.11", +] + [[package]] name = "url" version = "2.5.8" @@ -3682,6 +3800,19 @@ dependencies = [ "wasmparser 0.244.0", ] +[[package]] +name = "wasm-streams" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasmer" version = "7.1.0" @@ -4201,6 +4332,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.7", +] + [[package]] name = "webpki-roots" version = "1.0.7" @@ -4565,6 +4705,65 @@ dependencies = [ "wasmparser 0.246.2", ] +[[package]] +name = "worker" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4afd7ae4f7fcc11e0e5e64b964890b3dda90f1290b0612f7cd821b381cc18826" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures-channel", + "futures-util", + "http", + "http-body", + "js-sys", + "matchit 0.7.3", + "pin-project", + "serde", + "serde-wasm-bindgen", + "serde_json", + "serde_urlencoded", + "tokio", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "worker-macros", + "worker-sys", +] + +[[package]] +name = "worker-macros" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6371f41ac538c9f6dbe4d40cf7db58ed451eb0529a66f3e29ab8726217fc8a05" +dependencies = [ + "async-trait", + "proc-macro2", + "quote", + "strum", + "syn", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-macro-support", + "worker-sys", +] + +[[package]] +name = "worker-sys" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8de95c532944cee89d63fa8d7945f3db6260ca75ee3da42f7acfeebf538e4c" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "writeable" version = "0.6.3" diff --git a/Cargo.toml b/Cargo.toml index 794e592..49197cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,70 +1,44 @@ [workspace] -members = [".", "rust-wasm", "wasm"] -default-members = ["."] +members = [ + "crates/md-docrs-core", + "crates/md-docrs-fetch-http", + "crates/md-docrs-cli", + "crates/md-docrs-server", + "crates/md-docrs-worker", + "crates/md-docrs-rust-wasm", + "crates/md-docrs-wasm-compare", +] resolver = "3" -[package] -name = "md_docrs_proxy" +[workspace.package] version = "0.1.0" edition = "2024" -[[bin]] -name = "md-docrs" -path = "src/main.rs" -required-features = ["cli"] - -[lib] -name = "md_docrs_proxy" -path = "src/lib.rs" - -[features] -default = ["cli", "http", "server"] -# Pure pipeline (spec parse + resolve + render + in-memory cache) always compiles. -# `http` adds the docs.rs fetcher (reqwest + zstd + tokio + tracing). -http = ["dep:reqwest", "dep:zstd", "dep:tokio", "dep:tracing", "dep:bytes"] -# `server` layers the axum HTTP mirror on top of `http`. -server = ["http", "dep:axum", "dep:tower-http"] -# `cli` is only relevant for the `md-docrs` binary. -cli = [ - "http", - "server", - "dep:anyhow", - "dep:clap", - "dep:tokio", - "dep:tracing", - "dep:tracing-subscriber", -] -# Opt-in disk-backed cache via foyer. Not wasm-compatible. -hybrid-cache = ["dep:foyer", "dep:serde"] - -[dependencies] -# Always on — used by the pure pipeline (spec, resolve, render, cache). +[workspace.dependencies] +anyhow = "1" async-trait = "0.1" +axum = { version = "0.8", default-features = false } +bytes = "1" +clap = { version = "4", features = ["derive"] } +foyer = { version = "0.22", features = ["serde"] } lru = "0.17" +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } rustdoc-types = "0.57" +serde = { version = "1", features = ["derive", "rc"] } serde_json = "1" thiserror = "2" - -# Optional — gated by features above. -anyhow = { version = "1", optional = true } -axum = { version = "0.8", optional = true } -bytes = { version = "1", optional = true } -clap = { version = "4", features = ["derive"], optional = true } -foyer = { version = "0.22", optional = true, features = ["serde"] } -reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"], optional = true } -serde = { version = "1", optional = true, features = ["derive", "rc"] } -tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"], optional = true } -tower-http = { version = "0.6", features = ["trace"], optional = true } -tracing = { version = "0.1", optional = true } -tracing-subscriber = { version = "0.3", features = ["env-filter"], optional = true } -zstd = { version = "0.13", optional = true } - -[profile.release] -lto = "thin" - -# Squeeze the wasm artifact as tight as possible — this is the module we -# compare against Zig's ReleaseSmall build. -[profile.release.package.md-docrs-wasm] +tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"] } +tower-http = { version = "0.6", features = ["trace"] } +tower-service = "0.3.3" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +wasm-bindgen = "0.2" +wasm-bindgen-futures = "0.4" +worker = "0.8.1" +worker-macros = "0.8.1" +zstd = "0.13" + +[profile.release.package.md-docrs-rust-wasm] opt-level = "z" codegen-units = 1 strip = true diff --git a/README.md b/README.md index 5296c38..3fc3383 100644 --- a/README.md +++ b/README.md @@ -1,95 +1,157 @@ # md-docrs-proxy -Proxy that downloads rustdoc JSON from docs.rs and renders it as Markdown - built for LLM agents that waste tokens scraping docs.rs HTML. +`md-docrs-proxy` resolves docs.rs rustdoc JSON URLs and renders rustdoc JSON as Markdown. -## Build +## Workspace -```sh -cargo build --release -# binary at ./target/release/md-docrs +Rust crates under `crates/`: + +- `md-docrs-core` — shared spec parsing, docs.rs resolution, rustdoc JSON rendering, cache traits +- `md-docrs-fetch-http` — native HTTP fetcher for docs.rs +- `md-docrs-cli` — native CLI that prints Markdown to stdout +- `md-docrs-server` — native HTTP server +- `md-docrs-worker` — Cloudflare Worker crate +- `md-docrs-rust-wasm` — Rust `wasm32-unknown-unknown` export layer +- `md-docrs-wasm-compare` — host-side WASM comparison harness + +Other top-level directories: + +- `zig/` — Zig implementation of the minimal `resolve_url` ABI, plus its Worker wrapper +- `wasm/` — staged WASM artifacts and the repo-level build script + +## What each path owns + +- `crates/` owns the Rust implementation +- `zig/` owns the minimal Zig implementation +- `wasm/` owns artifact staging for Rust/Zig WASM comparison + +The top-level `wasm/` directory is not a Cargo crate. + +## Build and test + +Build the Rust workspace: + +```/dev/null/build.sh#L1-1 +cargo build --workspace ``` -## CLI +Run the Rust tests: + +```/dev/null/test.sh#L1-1 +cargo test --workspace +``` + +Run the Zig tests from the repo root: + +```/dev/null/zig-test.sh#L1-1 +zig build test --build-file zig/lib/build.zig +``` + +## Native CLI + +The CLI binary comes from `md-docrs-cli`. + +Spec grammar: -Spec grammar: `crate[@version][::path::to::item]`. Version defaults to `latest`. +```/dev/null/spec.txt#L1-1 +crate[@version][::path::to::item] +``` + +Examples: -```sh -md-docrs anyhow # crate index, latest -md-docrs anyhow::Error # item page -md-docrs tokio::sync::Mutex # follows pub use re-exports -md-docrs tokio@1.52.1::sync::Mutex # pinned version -md-docrs --target x86_64-unknown-linux-gnu tokio::sync::Mutex +```/dev/null/cli-examples.sh#L1-5 +cargo run -p md-docrs-cli -- anyhow +cargo run -p md-docrs-cli -- anyhow::Error +cargo run -p md-docrs-cli -- tokio::sync::Mutex +cargo run -p md-docrs-cli -- tokio@1.52.1::sync::Mutex +cargo run -p md-docrs-cli -- --target x86_64-unknown-linux-gnu tokio::sync::Mutex ``` -Not every `@version` pin works: docs.rs has to have rebuilt rustdoc JSON at the supported format version (currently 57) for that exact release. Older releases predate the rebuild and return `502`; pin to a recent version or drop the pin to use `latest`. +Output is Markdown on stdout. -Markdown goes to stdout. Pipe it into whatever consumes it. +## Native server -## Server +The HTTP server binary comes from `md-docrs-server`. -Mirrors docs.rs URLs, always replies with `text/markdown`: +Start it locally: -```sh -md-docrs serve --port 8080 --bind 127.0.0.1 +```/dev/null/server.sh#L1-1 +cargo run -p md-docrs-server -- --port 8080 --bind 127.0.0.1 ``` -```sh -curl -s localhost:8080/anyhow # crate root -curl -s localhost:8080/anyhow/latest/anyhow/struct.Error.html # item page -curl -s localhost:8080/tokio/latest/tokio/sync/struct.Mutex.html # re-exported item +Example requests: + +```/dev/null/server-curl.sh#L1-4 +curl -sS http://127.0.0.1:8080/anyhow +curl -sS http://127.0.0.1:8080/anyhow/latest/anyhow/struct.Error.html +curl -sS http://127.0.0.1:8080/tokio/latest/tokio/sync/struct.Mutex.html +curl -sS http://127.0.0.1:8080/healthz ``` -Response headers: `Content-Type: text/markdown; charset=utf-8`, `X-Markdown-Tokens` (byte-count/4 heuristic), `Vary: Accept`. +Response behavior: -Status codes: 404 item not found, 400 bad spec, 502 upstream/decode error. +- `200` with `Content-Type: text/markdown; charset=utf-8` +- `400` for invalid specs +- `404` for missing items +- `502` for upstream, decode, or JSON errors -## Notes +Optional disk-backed cache support is available behind the `hybrid-cache` feature on `md-docrs-server`. -- In-memory LRU cache (32 crates) per process. No disk cache. -- v0 does not render trait impls, blanket impls, or source links. -- Glob re-exports into external crates (e.g. `clap::Parser` from `clap_builder`) are not followed. +## Rust WASM -## WebAssembly builds +The Rust WASM crate lives at `crates/md-docrs-rust-wasm`. -Two same-ABI WASM modules live alongside the Rust library: +Minimal build, ABI-compatible with the Zig module: -- [`rust-wasm/`](rust-wasm/README.md) — `wasm32-unknown-unknown` build of - the pure pipeline (spec parse + resolve + render). Exports `alloc`, - `free`, `resolve_url`, and optionally `render_markdown`. -- [`zig/`](zig/README.md) — Zig 0.16 port of the same surface (`resolve_url` - parity today; `render_markdown` is a follow-up). Ships a Cloudflare Worker - wrapper that can load either artifact unchanged. +```/dev/null/rust-wasm-min.sh#L1-2 +cargo build --profile wasm-release --target wasm32-unknown-unknown \ + -p md-docrs-rust-wasm --no-default-features +``` -Build the Rust wasm: +Default build adds `render_markdown`: -```sh -# Minimal (resolve_url only — matches current Zig surface). +```/dev/null/rust-wasm-default.sh#L1-2 cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm --no-default-features -# Full (adds render_markdown, brings in serde_json + rustdoc-types). + -p md-docrs-rust-wasm +``` + +Full build adds `render_markdown` and `render_spec`: + +```/dev/null/rust-wasm-full.sh#L1-2 cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm -# Optional shipped-size pass for Rust artifacts. -wasm-opt -Oz --strip-debug --strip-dwarf \ - -o wasm/artifacts/rust-minimal-opt.wasm \ - target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm + -p md-docrs-rust-wasm --no-default-features --features full ``` -The root crate's HTTP / server / CLI bits are gated behind `http`, `server`, -and `cli` features (all on by default), so the pure pipeline compiles for -`wasm32` without reqwest/tokio/axum/zstd. +## Zig -To compare the two modules side by side (size, output parity, per-call -latency) under an embedded wasmtime or wasmer, see -[`wasm/`](wasm/README.md): +The Zig subtree implements the minimal `resolve_url` path. -```sh -./wasm/build.sh # builds zig + rust wasm, runs wasm-opt, stages them -cargo run -p md-docrs-wasm-compare # runs the table +Common commands: + +```/dev/null/zig-commands.sh#L1-3 +zig build --build-file zig/lib/build.zig +zig build cli --build-file zig/lib/build.zig +zig build test --build-file zig/lib/build.zig ``` -## Logging +See `zig/README.md` for details. + +## WASM comparison -```sh -RUST_LOG=md_docrs_proxy=debug md-docrs serve +Stage artifacts, then run the comparison harness: + +```/dev/null/wasm-compare.sh#L1-2 +./wasm/build.sh +cargo run -p md-docrs-wasm-compare -- --offline ``` + +See `wasm/README.md` for the workflow and supported flags. + +## Notes + +Current limits: + +- in-memory cache by default for native paths +- no disk cache unless `md-docrs-server` is built with `hybrid-cache` +- partial rendering coverage; not all rustdoc surfaces are rendered yet +- Zig currently covers URL resolution only, not fetch/decompress/render \ No newline at end of file diff --git a/crates/md-docrs-cli/Cargo.toml b/crates/md-docrs-cli/Cargo.toml new file mode 100644 index 0000000..2fe8bfd --- /dev/null +++ b/crates/md-docrs-cli/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "md-docrs-cli" +version.workspace = true +edition.workspace = true + +[[bin]] +name = "md-docrs" +path = "src/main.rs" + +[dependencies] +anyhow.workspace = true +clap.workspace = true +tokio.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true + +md-docrs-core = { path = "../md-docrs-core" } +md-docrs-fetch-http = { path = "../md-docrs-fetch-http" } diff --git a/crates/md-docrs-cli/src/main.rs b/crates/md-docrs-cli/src/main.rs new file mode 100644 index 0000000..2b8066e --- /dev/null +++ b/crates/md-docrs-cli/src/main.rs @@ -0,0 +1,51 @@ +#![warn(clippy::pedantic)] + +use anyhow::{Context, Result}; +use clap::Parser; +use md_docrs_core::{ItemSpec, cache::InMemoryCache, render_spec}; +use md_docrs_fetch_http::UreqRustdocFetcher; + +#[derive(Parser, Debug)] +#[command( + name = "md-docrs", + version, + about = "Render Rust crate docs as Markdown via rustdoc JSON" +)] +struct Cli { + /// Spec: crate[@version][::path::to::item]. + #[arg(value_name = "SPEC")] + spec: String, + + /// Override the target triple (e.g. x86_64-pc-windows-msvc). + #[arg(long)] + target: Option, +} + +#[tokio::main] +async fn main() -> Result<()> { + init_tracing(); + + let cli = Cli::parse(); + let spec = ItemSpec::parse(&cli.spec) + .with_context(|| format!("invalid spec: {}", cli.spec))? + .with_target(cli.target); + + let fetcher = + UreqRustdocFetcher::with_user_agent(concat!("md-docrs-cli/", env!("CARGO_PKG_VERSION"))); + let cache = InMemoryCache::default(); + + let md = render_spec(&spec, &fetcher, &cache).await?; + print!("{md}"); + + Ok(()) +} + +fn init_tracing() { + let filter = tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")); + + tracing_subscriber::fmt() + .with_env_filter(filter) + .with_target(false) + .init(); +} diff --git a/crates/md-docrs-core/Cargo.toml b/crates/md-docrs-core/Cargo.toml new file mode 100644 index 0000000..b670f8c --- /dev/null +++ b/crates/md-docrs-core/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "md-docrs-core" +version.workspace = true +edition.workspace = true + +[lib] +name = "md_docrs_core" +path = "src/lib.rs" + +[features] +default = [] +hybrid-cache = ["dep:foyer", "dep:serde"] + +[dependencies] +anyhow.workspace = true +async-trait.workspace = true +lru.workspace = true +rustdoc-types.workspace = true +serde_json.workspace = true +thiserror.workspace = true +tracing.workspace = true + +foyer = { workspace = true, optional = true } +serde = { workspace = true, optional = true } diff --git a/src/cache/hybrid.rs b/crates/md-docrs-core/src/cache/hybrid.rs similarity index 100% rename from src/cache/hybrid.rs rename to crates/md-docrs-core/src/cache/hybrid.rs diff --git a/src/cache/memory.rs b/crates/md-docrs-core/src/cache/memory.rs similarity index 100% rename from src/cache/memory.rs rename to crates/md-docrs-core/src/cache/memory.rs diff --git a/src/cache/mod.rs b/crates/md-docrs-core/src/cache/mod.rs similarity index 100% rename from src/cache/mod.rs rename to crates/md-docrs-core/src/cache/mod.rs index 995fa7d..18976b4 100644 --- a/src/cache/mod.rs +++ b/crates/md-docrs-core/src/cache/mod.rs @@ -5,13 +5,13 @@ use std::sync::Arc; #[cfg(feature = "hybrid-cache")] use serde::{Deserialize, Serialize}; -mod memory; #[cfg(feature = "hybrid-cache")] mod hybrid; +mod memory; -pub use memory::InMemoryCache; #[cfg(feature = "hybrid-cache")] pub use hybrid::{FoyerHybridCache, FoyerHybridCacheConfig}; +pub use memory::InMemoryCache; #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "hybrid-cache", derive(Serialize, Deserialize))] diff --git a/src/error.rs b/crates/md-docrs-core/src/error.rs similarity index 85% rename from src/error.rs rename to crates/md-docrs-core/src/error.rs index 0d8733b..b335fb5 100644 --- a/src/error.rs +++ b/crates/md-docrs-core/src/error.rs @@ -21,8 +21,4 @@ pub enum Error { #[error("io error: {0}")] Io(#[from] std::io::Error), - - #[cfg(feature = "http")] - #[error("http error: {0}")] - Http(#[from] reqwest::Error), } diff --git a/crates/md-docrs-core/src/fetch.rs b/crates/md-docrs-core/src/fetch.rs new file mode 100644 index 0000000..0da9fc5 --- /dev/null +++ b/crates/md-docrs-core/src/fetch.rs @@ -0,0 +1,76 @@ +use crate::{Error, Result}; +use rustdoc_types::{Crate, FORMAT_VERSION}; + +pub const DOCS_RS_BASE: &str = "https://docs.rs"; + +/// Build the docs.rs rustdoc JSON URL for a crate/version/target tuple. +/// +/// When `format_version` is `Some`, the URL is pinned to a specific +/// rustdoc JSON schema version, e.g. `/json/57.zst`. +/// +/// When `format_version` is `None`, the legacy unpinned endpoint is used, +/// e.g. `/json.zst`. +#[must_use] +pub fn build_url( + base: &str, + crate_name: &str, + version: &str, + target: Option<&str>, + format_version: Option, +) -> String { + let target_seg = target.map(|t| format!("/{t}")).unwrap_or_default(); + match format_version { + Some(v) => format!("{base}/crate/{crate_name}/{version}{target_seg}/json/{v}.zst"), + None => format!("{base}/crate/{crate_name}/{version}{target_seg}/json.zst"), + } +} + +/// Shared validation helper for fetcher implementations. +/// +/// # Errors +/// Returns `Error::FormatVersionMismatch` when the crate's +/// `format_version` differs from the one supported by this build. +pub fn validate_format_version(krate: &Crate) -> Result<()> { + if krate.format_version != FORMAT_VERSION { + return Err(Error::FormatVersionMismatch { + got: krate.format_version, + expected: FORMAT_VERSION, + }); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn url_basic() { + assert_eq!( + build_url("https://docs.rs", "serde", "latest", None, None), + "https://docs.rs/crate/serde/latest/json.zst" + ); + } + + #[test] + fn url_with_target() { + assert_eq!( + build_url( + "https://docs.rs", + "serde", + "latest", + Some("x86_64-pc-windows-msvc"), + None + ), + "https://docs.rs/crate/serde/latest/x86_64-pc-windows-msvc/json.zst" + ); + } + + #[test] + fn url_format_pinned() { + assert_eq!( + build_url("https://docs.rs", "serde", "1.0.200", None, Some(57)), + "https://docs.rs/crate/serde/1.0.200/json/57.zst" + ); + } +} diff --git a/crates/md-docrs-core/src/lib.rs b/crates/md-docrs-core/src/lib.rs new file mode 100644 index 0000000..2d655df --- /dev/null +++ b/crates/md-docrs-core/src/lib.rs @@ -0,0 +1,81 @@ +#![warn(clippy::pedantic)] + +use async_trait::async_trait; +use rustdoc_types::Crate; +use std::sync::Arc; + +pub mod cache; +pub mod error; +pub mod fetch; +pub mod render; +pub mod resolve; +pub mod spec; + +pub use error::{Error, Result}; +pub use fetch::{DOCS_RS_BASE, build_url, validate_format_version}; +pub use spec::ItemSpec; + +#[async_trait] +pub trait RustdocFetcher: Send + Sync { + /// Fetch and decode the rustdoc JSON crate for the requested package. + /// + /// # Errors + /// Returns any transport, decode, or parse error surfaced by the + /// implementation. + async fn fetch(&self, crate_name: &str, version: &str, target: Option<&str>) -> Result; +} + +/// High-level entry point: take a parsed [`ItemSpec`], fetch the rustdoc crate, +/// resolve the requested item, and render Markdown. +/// +/// # Errors +/// Forwards: +/// - fetch errors from [`RustdocFetcher::fetch`] +/// - cache-independent resolution errors from [`resolve::resolve`] +pub async fn render_spec( + spec: &ItemSpec, + fetcher: &(dyn RustdocFetcher + Send + Sync), + cache: &(dyn cache::CrateCache + Send + Sync), +) -> Result { + let krate = load_crate(spec, fetcher, cache).await?; + render_loaded_crate(&krate, spec) +} + +/// Load a rustdoc crate through the cache + fetcher abstraction. +/// +/// # Errors +/// Returns any error produced by the fetcher. +pub async fn load_crate( + spec: &ItemSpec, + fetcher: &(dyn RustdocFetcher + Send + Sync), + cache: &(dyn cache::CrateCache + Send + Sync), +) -> Result> { + let key = cache::CacheKey { + crate_name: spec.crate_name.clone(), + version: spec.version.clone(), + target: spec.target.clone(), + }; + + if let Some(hit) = cache.get(&key).await { + return Ok(hit); + } + + let krate = fetcher + .fetch(&spec.crate_name, &spec.version, spec.target.as_deref()) + .await?; + let arc = Arc::new(krate); + cache.put(key, Arc::clone(&arc)).await; + Ok(arc) +} + +/// Resolve and render Markdown from an already-loaded rustdoc crate. +/// +/// Useful for environments that obtain the crate data elsewhere but still want +/// to reuse the shared resolution + rendering pipeline. +/// +/// # Errors +/// Returns resolution errors when the requested item path cannot be found. +pub fn render_loaded_crate(krate: &Crate, spec: &ItemSpec) -> Result { + let resolved = resolve::resolve(krate, spec)?; + Ok(render::render(krate, &resolved, spec)) +} diff --git a/src/render/mod.rs b/crates/md-docrs-core/src/render/mod.rs similarity index 100% rename from src/render/mod.rs rename to crates/md-docrs-core/src/render/mod.rs diff --git a/src/render/ty.rs b/crates/md-docrs-core/src/render/ty.rs similarity index 100% rename from src/render/ty.rs rename to crates/md-docrs-core/src/render/ty.rs diff --git a/src/resolve.rs b/crates/md-docrs-core/src/resolve.rs similarity index 100% rename from src/resolve.rs rename to crates/md-docrs-core/src/resolve.rs diff --git a/src/spec.rs b/crates/md-docrs-core/src/spec.rs similarity index 100% rename from src/spec.rs rename to crates/md-docrs-core/src/spec.rs diff --git a/crates/md-docrs-fetch-http/Cargo.toml b/crates/md-docrs-fetch-http/Cargo.toml new file mode 100644 index 0000000..638877f --- /dev/null +++ b/crates/md-docrs-fetch-http/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "md-docrs-fetch-http" +version.workspace = true +edition.workspace = true + +[dependencies] +async-trait.workspace = true +md-docrs-core = { path = "../md-docrs-core" } + +rustdoc-types.workspace = true +serde_json.workspace = true +tokio.workspace = true +ureq = { version = "2", features = ["tls", "gzip"] } +zstd.workspace = true diff --git a/crates/md-docrs-fetch-http/src/lib.rs b/crates/md-docrs-fetch-http/src/lib.rs new file mode 100644 index 0000000..11fedf5 --- /dev/null +++ b/crates/md-docrs-fetch-http/src/lib.rs @@ -0,0 +1,145 @@ +#![warn(clippy::pedantic)] + +use async_trait::async_trait; +use md_docrs_core::{ + Error, Result, RustdocFetcher, + fetch::{DOCS_RS_BASE, build_url, validate_format_version}, +}; +use rustdoc_types::{Crate, FORMAT_VERSION}; +use std::{io::Cursor, time::Duration}; +use tokio::task; + +/// Native docs.rs fetcher shared by the CLI and native server crates. +/// +/// This lives outside `md-docrs-core` so the core remains transport-agnostic. +pub struct UreqRustdocFetcher { + agent: ureq::Agent, + base: String, + user_agent: String, +} + +impl UreqRustdocFetcher { + /// Create a fetcher configured for docs.rs with a default user agent. + #[must_use] + pub fn new() -> Self { + Self::with_user_agent(concat!("md-docrs/", env!("CARGO_PKG_VERSION"))) + } + + /// Create a fetcher with a custom user agent string. + #[must_use] + pub fn with_user_agent(user_agent: impl Into) -> Self { + let user_agent = user_agent.into(); + let agent = ureq::AgentBuilder::new() + .timeout(Duration::from_secs(30)) + .redirects(10) + .user_agent(&user_agent) + .build(); + + Self { + agent, + base: DOCS_RS_BASE.to_string(), + user_agent, + } + } + + /// Override the docs.rs base URL, mainly for tests. + #[must_use] + pub fn with_base(mut self, base: impl Into) -> Self { + self.base = base.into(); + self + } + + /// Return the configured user agent. + #[must_use] + pub fn user_agent(&self) -> &str { + &self.user_agent + } + + fn read_body_bytes(response: ureq::Response, url: &str) -> Result> { + let mut reader = response.into_reader(); + let mut bytes = Vec::new(); + std::io::Read::read_to_end(&mut reader, &mut bytes).map_err(|err| { + Error::Fetch(format!("failed to read response body for {url}: {err}")) + })?; + Ok(bytes) + } + + fn get_bytes(&self, url: &str) -> Result<(u16, Vec)> { + match self.agent.get(url).call() { + Ok(response) => { + let status = response.status(); + let bytes = Self::read_body_bytes(response, url)?; + Ok((status, bytes)) + } + Err(ureq::Error::Status(status, response)) => { + let bytes = Self::read_body_bytes(response, url)?; + Ok((status, bytes)) + } + Err(err) => Err(Error::Fetch(format!("request failed for {url}: {err}"))), + } + } + + fn head_status(&self, url: &str) -> Result { + match self.agent.head(url).call() { + Ok(response) => Ok(response.status()), + Err(ureq::Error::Status(status, _response)) => Ok(status), + Err(err) => Err(Error::Fetch(format!("request failed for {url}: {err}"))), + } + } +} + +impl Default for UreqRustdocFetcher { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl RustdocFetcher for UreqRustdocFetcher { + async fn fetch(&self, crate_name: &str, version: &str, target: Option<&str>) -> Result { + let url = build_url( + &self.base, + crate_name, + version, + target, + Some(FORMAT_VERSION), + ); + let probe_url = build_url(&self.base, crate_name, version, target, None); + let fetcher = Self { + agent: self.agent.clone(), + base: self.base.clone(), + user_agent: self.user_agent.clone(), + }; + let crate_name = crate_name.to_string(); + let version = version.to_string(); + + task::spawn_blocking(move || { + let (status, bytes) = fetcher.get_bytes(&url)?; + + if status == 404 { + let probe_status = fetcher.head_status(&probe_url)?; + if (200..300).contains(&probe_status) { + return Err(Error::Fetch(format!( + "{crate_name}@{version} has no rustdoc JSON for format version {FORMAT_VERSION}; waiting on docs.rs rebuild" + ))); + } + return Err(Error::Fetch(format!( + "{crate_name}@{version} not found on docs.rs" + ))); + } + + if !(200..300).contains(&status) { + return Err(Error::Fetch(format!( + "{status} response for {crate_name}@{version}" + ))); + } + + let decoded = zstd::decode_all(Cursor::new(bytes))?; + let krate: Crate = serde_json::from_slice(&decoded)?; + validate_format_version(&krate)?; + Ok(krate) + }) + .await + .map_err(|err| Error::Fetch(format!("blocking fetch task failed: {err}")))? + } +} diff --git a/rust-wasm/Cargo.toml b/crates/md-docrs-rust-wasm/Cargo.toml similarity index 70% rename from rust-wasm/Cargo.toml rename to crates/md-docrs-rust-wasm/Cargo.toml index f32348a..6dc0492 100644 --- a/rust-wasm/Cargo.toml +++ b/crates/md-docrs-rust-wasm/Cargo.toml @@ -1,17 +1,17 @@ [package] -name = "md-docrs-wasm" -version = "0.1.0" -edition = "2024" +name = "md-docrs-rust-wasm" +version.workspace = true +edition.workspace = true publish = false [lib] -# cdylib for the actual wasm artifact, rlib so `cargo test -p md-docrs-wasm` +# cdylib for the actual wasm artifact, rlib so `cargo test -p md-docrs-rust-wasm` # can exercise the exported functions on the host. crate-type = ["cdylib", "rlib"] [features] # Default: render only (host-driven fetch + decompression). Kept on by -# default so `cargo test -p md-docrs-wasm` exercises the renderer. +# default so `cargo test -p md-docrs-rust-wasm` exercises the renderer. # - `render` pulls in serde_json + rustdoc-types for JSON -> Markdown. # - `fetch` adds the host `env.fetch_bytes` import and in-WASM zstd # decoding (via ruzstd) so the module owns the full pipeline. @@ -23,7 +23,7 @@ full = ["render", "fetch"] [dependencies] # Pulls in only the pure pipeline (spec / resolve / render / cache). -md_docrs_proxy = { path = "..", default-features = false } -rustdoc-types = { version = "0.57", optional = true } -serde_json = { version = "1", optional = true } +md-docrs-core = { path = "../md-docrs-core", default-features = false } +rustdoc-types = { workspace = true, optional = true } +serde_json = { workspace = true, optional = true } ruzstd = { version = "0.8", optional = true, default-features = false, features = ["std"] } diff --git a/crates/md-docrs-rust-wasm/README.md b/crates/md-docrs-rust-wasm/README.md new file mode 100644 index 0000000..61ce638 --- /dev/null +++ b/crates/md-docrs-rust-wasm/README.md @@ -0,0 +1,201 @@ +# md-docrs-rust-wasm + +Rust `wasm32-unknown-unknown` export layer for this workspace. + +This crate wraps the shared Rust logic from `md-docrs-core` behind a small C-style ABI for host environments. + +## What it exports + +### Always available + +- `alloc(len: u32) -> *mut u8` +- `free(ptr: *mut u8, len: u32)` +- `resolve_url(spec_ptr, spec_len, target_ptr, target_len, out_ptr, out_cap) -> u32` + +These three exports match the Zig minimal WASM surface. + +### With `render` + +- `render_markdown(json_ptr, json_len, spec_ptr, spec_len, target_ptr, target_len, len_out) -> *mut u8` + +This lets a host pass rustdoc JSON into the module and receive rendered Markdown back. + +### With `render` + `fetch` + +- `render_spec(spec_ptr, spec_len, target_ptr, target_len, buf_ptr_out, buf_len_out) -> i32` + +This is the full in-module pipeline: + +1. parse the spec +2. build the docs.rs rustdoc JSON URL +3. call the host-provided `fetch_bytes` +4. zstd-decode the response +5. parse rustdoc JSON +6. resolve the requested item +7. render Markdown + +## Features + +| Feature | Default | Purpose | +| --- | --- | --- | +| `render` | yes | Enables JSON-to-Markdown rendering and exports `render_markdown` | +| `fetch` | no | Enables host-imported fetch + in-WASM zstd decode used by `render_spec` | +| `full` | no | Convenience alias for `render` + `fetch` | + +## Build modes + +### Minimal + +Exports only: + +- `alloc` +- `free` +- `resolve_url` + +Build: + +```/dev/null/minimal.sh#L1-2 +cargo build --profile wasm-release --target wasm32-unknown-unknown \ + -p md-docrs-rust-wasm --no-default-features +``` + +### Default + +Exports: + +- `alloc` +- `free` +- `resolve_url` +- `render_markdown` + +Build: + +```/dev/null/default.sh#L1-2 +cargo build --profile wasm-release --target wasm32-unknown-unknown \ + -p md-docrs-rust-wasm +``` + +### Full + +Exports: + +- `alloc` +- `free` +- `resolve_url` +- `render_markdown` +- `render_spec` + +Build: + +```/dev/null/full.sh#L1-2 +cargo build --profile wasm-release --target wasm32-unknown-unknown \ + -p md-docrs-rust-wasm --no-default-features --features full +``` + +Output path: + +```/dev/null/output.txt#L1-1 +target/wasm32-unknown-unknown/wasm-release/md_docrs_rust_wasm.wasm +``` + +## ABI notes + +### Memory + +- `alloc` returns a pointer in WASM linear memory +- `free` must be called with the exact pointer and length originally allocated +- `alloc(0)` returns null +- most failures are reported as `0`, null, or a negative status code depending on the export + +### `resolve_url` + +`resolve_url` parses: + +```/dev/null/spec.txt#L1-1 +crate[@version][::path::to::item] +``` + +If `target_len == 0`, no explicit target triple is used. + +On success it writes the docs.rs rustdoc JSON URL into the caller-provided output buffer and returns the number of bytes written. + +It returns `0` on failure, including: + +- invalid UTF-8 +- invalid spec +- output buffer too small + +### `render_markdown` + +`render_markdown` expects the host to provide: + +- rustdoc JSON bytes +- a spec +- an optional target triple +- a writable `len_out` + +On success it returns a newly allocated Markdown buffer and writes its size to `*len_out`. + +The caller owns the returned buffer and must release it with `free(ptr, len)`. + +It returns null on failure. + +### `render_spec` + +`render_spec` requires a host import: + +```/dev/null/fetch-bytes.txt#L1-5 +fetch_bytes( + url_ptr: *const u8, + url_len: u32, + buf_ptr_out: *mut u32, + buf_len_out: *mut u32, +) -> i32 +``` + +The host is expected to: + +1. fetch the URL +2. allocate a buffer inside WASM memory using exported `alloc` +3. write the response body into that buffer +4. store the pointer and length into the provided out-slots + +Return `0` for success and non-zero for failure. + +On success, `render_spec` writes an allocated Markdown buffer to `*buf_ptr_out` and `*buf_len_out`, then returns `0`. + +### `render_spec` status codes + +| Code | Meaning | +| --- | --- | +| `0` | Success | +| `-1` | Allocation failure | +| `-2` | Host fetch failed | +| `-3` | zstd decode failed | +| `-4` | JSON parse failed | +| `-5` | Spec parse failure, resolve miss, or URL too long | +| `-6` | Output pointer or length could not be written | + +## Relationship to the rest of the repo + +- `crates/md-docrs-core` contains the shared Rust parsing, resolution, and rendering logic +- `crates/md-docrs-wasm-compare` contains the host-side comparison harness +- `zig/` contains the independent Zig implementation of the minimal ABI +- `wasm/` contains the repo-level staging script and staged artifacts + +This crate should stay focused on the Rust WASM ABI layer. + +## Typical workflow + +Build and stage artifacts from the repo root: + +```/dev/null/workflow.sh#L1-2 +./wasm/build.sh +cargo run -p md-docrs-wasm-compare -- --offline +``` + +## Tests + +```/dev/null/tests.sh#L1-1 +cargo test -p md-docrs-rust-wasm +``` diff --git a/rust-wasm/src/lib.rs b/crates/md-docrs-rust-wasm/src/lib.rs similarity index 78% rename from rust-wasm/src/lib.rs rename to crates/md-docrs-rust-wasm/src/lib.rs index c6ddb68..3a14212 100644 --- a/rust-wasm/src/lib.rs +++ b/crates/md-docrs-rust-wasm/src/lib.rs @@ -19,9 +19,9 @@ //! | -5 | spec parse / resolve miss / URL too long | //! | -6 | output pointer write failure | -use md_docrs_proxy::ItemSpec; +use md_docrs_core::ItemSpec; #[cfg(feature = "render")] -use md_docrs_proxy::{render, resolve}; +use md_docrs_core::{render, resolve}; #[cfg(feature = "render")] use rustdoc_types::Crate; use std::alloc::{Layout, alloc as rust_alloc, dealloc}; @@ -34,14 +34,22 @@ const FORMAT_VERSION: u32 = 57; const DOCS_RS_BASE: &str = "https://docs.rs"; fn layout_for(len: usize) -> Option { - if len == 0 { None } else { Layout::array::(len).ok() } + if len == 0 { + None + } else { + Layout::array::(len).ok() + } } /// Allocate `len` bytes inside the WASM linear memory. Returns null on failure /// or when `len == 0`. Caller must free with `free(ptr, len)`. +#[must_use] #[cfg_attr(target_arch = "wasm32", unsafe(no_mangle))] pub extern "C" fn alloc(len: u32) -> *mut u8 { - let Some(layout) = layout_for(len as usize) else { + let Ok(len) = usize::try_from(len) else { + return ptr::null_mut(); + }; + let Some(layout) = layout_for(len) else { return ptr::null_mut(); }; unsafe { rust_alloc(layout) } @@ -56,7 +64,10 @@ pub unsafe extern "C" fn free(ptr: *mut u8, len: u32) { if ptr.is_null() { return; } - let Some(layout) = layout_for(len as usize) else { + let Ok(len) = usize::try_from(len) else { + return; + }; + let Some(layout) = layout_for(len) else { return; }; unsafe { dealloc(ptr, layout) }; @@ -81,15 +92,17 @@ fn parse_spec_with_target( target_ptr: *const u8, target_len: u32, ) -> Option { - let spec_bytes = unsafe { slice::from_raw_parts(spec_ptr, spec_len as usize) }; - let spec_str = std::str::from_utf8(spec_bytes).ok()?; - let mut spec = ItemSpec::parse(spec_str).ok()?; + let spec_len = usize::try_from(spec_len).ok()?; + let spec_bytes = unsafe { slice::from_raw_parts(spec_ptr, spec_len) }; + let spec = std::str::from_utf8(spec_bytes).ok()?; + let mut item_spec = ItemSpec::parse(spec).ok()?; if target_len > 0 { - let t = unsafe { slice::from_raw_parts(target_ptr, target_len as usize) }; - let t_str = std::str::from_utf8(t).ok()?; - spec = spec.with_target(Some(t_str.to_string())); + let target_len = usize::try_from(target_len).ok()?; + let target_bytes = unsafe { slice::from_raw_parts(target_ptr, target_len) }; + let target = std::str::from_utf8(target_bytes).ok()?; + item_spec = item_spec.with_target(Some(target.to_string())); } - Some(spec) + Some(item_spec) } /// Parse `spec` and write the docs.rs rustdoc JSON URL into `out_ptr`. @@ -111,15 +124,18 @@ pub unsafe extern "C" fn resolve_url( let Some(spec) = parse_spec_with_target(spec_ptr, spec_len, target_ptr, target_len) else { return 0; }; + let Ok(out_cap) = usize::try_from(out_cap) else { + return 0; + }; let url = build_docs_rs_url(&spec); let bytes = url.as_bytes(); - if bytes.len() > out_cap as usize { + if bytes.len() > out_cap { return 0; } unsafe { ptr::copy_nonoverlapping(bytes.as_ptr(), out_ptr, bytes.len()); } - bytes.len() as u32 + u32::try_from(bytes.len()).unwrap_or(0) } /// Render rustdoc JSON to Markdown. @@ -147,7 +163,10 @@ pub unsafe extern "C" fn render_markdown( target_len: u32, len_out: *mut u32, ) -> *mut u8 { - let json = unsafe { slice::from_raw_parts(json_ptr, json_len as usize) }; + let Ok(json_len) = usize::try_from(json_len) else { + return ptr::null_mut(); + }; + let json = unsafe { slice::from_raw_parts(json_ptr, json_len) }; let Some(spec) = parse_spec_with_target(spec_ptr, spec_len, target_ptr, target_len) else { return ptr::null_mut(); }; @@ -168,9 +187,13 @@ pub unsafe extern "C" fn render_markdown( if out.is_null() { return ptr::null_mut(); } + let Ok(len_out_value) = u32::try_from(bytes.len()) else { + unsafe { dealloc(out, layout) }; + return ptr::null_mut(); + }; unsafe { ptr::copy_nonoverlapping(bytes.as_ptr(), out, bytes.len()); - *len_out = bytes.len() as u32; + *len_out = len_out_value; } out } @@ -232,17 +255,13 @@ pub unsafe extern "C" fn render_spec( return -5; }; let url = build_docs_rs_url(&spec); + let Ok(url_len) = u32::try_from(url.len()) else { + return -5; + }; let mut resp_ptr: u32 = 0; let mut resp_len: u32 = 0; - let rc = unsafe { - fetch_bytes( - url.as_ptr(), - url.len() as u32, - &mut resp_ptr, - &mut resp_len, - ) - }; + let rc = unsafe { fetch_bytes(url.as_ptr(), url_len, &mut resp_ptr, &mut resp_len) }; if rc != 0 { return -2; } @@ -250,8 +269,17 @@ pub unsafe extern "C" fn render_spec( return -2; } + let Ok(resp_ptr_usize) = usize::try_from(resp_ptr) else { + unsafe { free(resp_ptr as *mut u8, resp_len) }; + return -3; + }; + let Ok(resp_len_usize) = usize::try_from(resp_len) else { + unsafe { free(resp_ptr as *mut u8, resp_len) }; + return -3; + }; + // Take ownership of the host-written buffer; free it once decoded. - let compressed = unsafe { slice::from_raw_parts(resp_ptr as *const u8, resp_len as usize) }; + let compressed = unsafe { slice::from_raw_parts(resp_ptr_usize as *const u8, resp_len_usize) }; let decoded = zstd_decode(compressed); unsafe { free(resp_ptr as *mut u8, resp_len) }; let Some(json) = decoded else { @@ -276,10 +304,18 @@ pub unsafe extern "C" fn render_spec( if out.is_null() { return -1; } + let Ok(out_ptr_value) = u32::try_from(out as usize) else { + unsafe { dealloc(out, layout) }; + return -6; + }; + let Ok(out_len_value) = u32::try_from(bytes.len()) else { + unsafe { dealloc(out, layout) }; + return -6; + }; unsafe { ptr::copy_nonoverlapping(bytes.as_ptr(), out, bytes.len()); - *buf_ptr_out = out as u32; - *buf_len_out = bytes.len() as u32; + *buf_ptr_out = out_ptr_value; + *buf_len_out = out_len_value; } 0 } @@ -295,15 +331,15 @@ mod tests { let n = unsafe { resolve_url( spec.as_ptr(), - spec.len() as u32, + u32::try_from(spec.len()).unwrap(), ptr::null(), 0, out.as_mut_ptr(), - out.len() as u32, + u32::try_from(out.len()).unwrap(), ) }; assert_eq!( - std::str::from_utf8(&out[..n as usize]).unwrap(), + std::str::from_utf8(&out[..usize::try_from(n).unwrap()]).unwrap(), "https://docs.rs/crate/serde/latest/json/57.zst", ); } @@ -316,15 +352,15 @@ mod tests { let n = unsafe { resolve_url( spec.as_ptr(), - spec.len() as u32, + u32::try_from(spec.len()).unwrap(), target.as_ptr(), - target.len() as u32, + u32::try_from(target.len()).unwrap(), out.as_mut_ptr(), - out.len() as u32, + u32::try_from(out.len()).unwrap(), ) }; assert_eq!( - std::str::from_utf8(&out[..n as usize]).unwrap(), + std::str::from_utf8(&out[..usize::try_from(n).unwrap()]).unwrap(), "https://docs.rs/crate/tokio/1.52.1/x86_64-unknown-linux-gnu/json/57.zst", ); } @@ -336,11 +372,11 @@ mod tests { let n = unsafe { resolve_url( spec.as_ptr(), - spec.len() as u32, + u32::try_from(spec.len()).unwrap(), ptr::null(), 0, out.as_mut_ptr(), - out.len() as u32, + u32::try_from(out.len()).unwrap(), ) }; assert_eq!(n, 0); @@ -353,11 +389,11 @@ mod tests { let n = unsafe { resolve_url( spec.as_ptr(), - spec.len() as u32, + u32::try_from(spec.len()).unwrap(), ptr::null(), 0, out.as_mut_ptr(), - out.len() as u32, + u32::try_from(out.len()).unwrap(), ) }; assert_eq!(n, 0); @@ -373,5 +409,4 @@ mod tests { free(ptr, 64); } } - } diff --git a/crates/md-docrs-server/Cargo.toml b/crates/md-docrs-server/Cargo.toml new file mode 100644 index 0000000..c6021c7 --- /dev/null +++ b/crates/md-docrs-server/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "md-docrs-server" +version.workspace = true +edition.workspace = true + +[[bin]] +name = "md-docrs-server" +path = "src/main.rs" + +[features] +default = ["hybrid-cache"] +hybrid-cache = ["md-docrs-core/hybrid-cache"] + +[dependencies] +anyhow.workspace = true +axum = { workspace = true, features = ["tokio", "http1", "macros"] } +clap.workspace = true +tokio.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true +tower-http.workspace = true + +md-docrs-core = { path = "../md-docrs-core" } +md-docrs-fetch-http = { path = "../md-docrs-fetch-http" } diff --git a/crates/md-docrs-server/src/main.rs b/crates/md-docrs-server/src/main.rs new file mode 100644 index 0000000..c70e6c4 --- /dev/null +++ b/crates/md-docrs-server/src/main.rs @@ -0,0 +1,120 @@ +#![warn(clippy::pedantic)] + +use anyhow::{Context, Result}; +use md_docrs_core::cache::{CrateCache, InMemoryCache}; +#[cfg(feature = "hybrid-cache")] +use md_docrs_core::cache::{FoyerHybridCache, FoyerHybridCacheConfig}; +use md_docrs_fetch_http::UreqRustdocFetcher; +use std::net::SocketAddr; +use std::path::PathBuf; +use std::sync::Arc; + +mod server; + +#[derive(clap::Parser, Debug)] +#[command( + name = "md-docrs-server", + version, + about = "Serve Rust crate docs as Markdown via rustdoc JSON" +)] +struct Cli { + #[arg(long, default_value_t = 8080)] + port: u16, + + #[arg(long, default_value = "127.0.0.1")] + bind: String, + + /// Enable the memory+disk hybrid cache backed by foyer. When set, the + /// directory is created if missing and used as the disk tier. + #[arg(long, value_name = "DIR")] + cache_dir: Option, + + /// Disk tier capacity in bytes. Only applied when `--cache-dir` is set. + #[arg(long, default_value_t = 4 * 1024 * 1024 * 1024)] + cache_disk_bytes: usize, + + /// Memory tier weight budget in bytes. Only applied when `--cache-dir` is set. + #[arg(long, default_value_t = 256 * 1024 * 1024)] + cache_memory_bytes: usize, +} + +#[tokio::main] +async fn main() -> Result<()> { + init_tracing(); + + let cli = ::parse(); + let addr: SocketAddr = format!("{}:{}", cli.bind, cli.port).parse()?; + + let cache = build_cache(cli.cache_dir, cli.cache_disk_bytes, cli.cache_memory_bytes).await?; + + let state = Arc::new(server::AppState { + fetcher: Arc::new(UreqRustdocFetcher::new()), + cache, + }); + + let app = server::router(state); + + tracing::info!(%addr, "md-docrs-server listening"); + + let listener = tokio::net::TcpListener::bind(addr).await?; + axum::serve(listener, app).await?; + + Ok(()) +} + +fn init_tracing() { + let filter = tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")); + + tracing_subscriber::fmt() + .with_env_filter(filter) + .with_target(false) + .init(); +} + +#[cfg(feature = "hybrid-cache")] +async fn build_cache( + cache_dir: Option, + disk_bytes: usize, + memory_bytes: usize, +) -> Result> { + if let Some(dir) = cache_dir { + std::fs::create_dir_all(&dir) + .with_context(|| format!("create cache dir {}", dir.display()))?; + + tracing::info!( + dir = %dir.display(), + disk_bytes, + memory_bytes, + "using foyer hybrid cache" + ); + + let hybrid = FoyerHybridCache::new(FoyerHybridCacheConfig { + dir, + memory_capacity_bytes: memory_bytes, + disk_capacity_bytes: disk_bytes, + }) + .await?; + + Ok(Arc::new(hybrid)) + } else { + Ok(Arc::new(InMemoryCache::default())) + } +} + +#[cfg(not(feature = "hybrid-cache"))] +#[allow(clippy::unused_async)] +async fn build_cache( + cache_dir: Option, + _disk_bytes: usize, + _memory_bytes: usize, +) -> Result> { + if cache_dir.is_some() { + anyhow::bail!( + "--cache-dir was supplied but this binary was built without the \ + `hybrid-cache` feature; rebuild with `cargo build -p md-docrs-server --features hybrid-cache`" + ); + } + + Ok(Arc::new(InMemoryCache::default())) +} diff --git a/src/server.rs b/crates/md-docrs-server/src/server.rs similarity index 78% rename from src/server.rs rename to crates/md-docrs-server/src/server.rs index ccfedd3..5812323 100644 --- a/src/server.rs +++ b/crates/md-docrs-server/src/server.rs @@ -5,11 +5,11 @@ use axum::{ response::{IntoResponse, Response}, routing::get, }; -use md_docrs_proxy::{Error, ItemSpec, cache::CrateCache, fetch::Fetcher, render_spec}; +use md_docrs_core::{Error, ItemSpec, RustdocFetcher, cache::CrateCache, render_spec}; use std::sync::Arc; pub struct AppState { - pub fetcher: Fetcher, + pub fetcher: Arc, pub cache: Arc, } @@ -26,9 +26,10 @@ pub fn router(state: Arc) -> Router { } async fn root() -> &'static str { - "md-docrs-proxy - GET /[/][/] for Markdown docs\n" + "md-docrs-server - GET /[/][/] for Markdown docs\n" } +#[axum::debug_handler] async fn crate_root( State(state): State>, Path(crate_name): Path, @@ -36,6 +37,7 @@ async fn crate_root( serve(&state, &crate_name, "latest", &[]).await } +#[axum::debug_handler] async fn version_root( State(state): State>, Path((crate_name, version)): Path<(String, String)>, @@ -43,6 +45,7 @@ async fn version_root( serve(&state, &crate_name, &version, &[]).await } +#[axum::debug_handler] async fn deep( State(state): State>, Path((crate_name, version, rest)): Path<(String, String, String)>, @@ -63,12 +66,15 @@ fn parse_rest(rest: &str) -> Vec { if rest.is_empty() { return vec![]; } + let parts: Vec<&str> = rest.split('/').filter(|s| !s.is_empty()).collect(); - let mut out: Vec = Vec::with_capacity(parts.len()); if parts.is_empty() { - return out; + return vec![]; } + let last_idx = parts.len() - 1; + let mut out = Vec::with_capacity(parts.len()); + for (i, seg) in parts.iter().enumerate() { if i == last_idx { if let Some(name) = strip_kind_prefix(seg) { @@ -80,11 +86,13 @@ fn parse_rest(rest: &str) -> Vec { out.push((*seg).to_string()); } } + out } fn strip_kind_prefix(seg: &str) -> Option { let seg = seg.strip_suffix(".html").unwrap_or(seg); + for prefix in [ "struct.", "enum.", @@ -103,6 +111,7 @@ fn strip_kind_prefix(seg: &str) -> Option { return Some(rest.to_string()); } } + None } @@ -112,9 +121,6 @@ async fn serve( version: &str, path_segs: &[String], ) -> Response { - // docs.rs URLs embed the crate name as the first module segment (e.g. - // /serde/latest/serde/de/trait.Foo.html). Strip it so the spec path is - // relative to the crate root. let path: Vec = match path_segs.split_first() { Some((head, tail)) if head == crate_name => tail.to_vec(), _ => path_segs.to_vec(), @@ -127,31 +133,31 @@ async fn serve( path, }; - match render_spec(&spec, &state.fetcher, state.cache.as_ref()).await { + match render_spec(&spec, state.fetcher.as_ref(), state.cache.as_ref()).await { Ok(body) => { let mut headers = HeaderMap::new(); headers.insert( header::CONTENT_TYPE, "text/markdown; charset=utf-8".parse().unwrap(), ); - let tokens = body.len() / 4; - headers.insert("x-markdown-tokens", tokens.to_string().parse().unwrap()); headers.insert(header::VARY, "Accept".parse().unwrap()); + headers.insert( + "x-markdown-tokens", + (body.len() / 4).to_string().parse().unwrap(), + ); (StatusCode::OK, headers, body).into_response() } - Err(e) => error_to_response(&e), + Err(err) => error_to_response(&err), } } -fn error_to_response(e: &Error) -> Response { - let status = match e { +fn error_to_response(err: &Error) -> Response { + let status = match err { Error::NotFound(_) => StatusCode::NOT_FOUND, Error::InvalidSpec(_) => StatusCode::BAD_REQUEST, - Error::FormatVersionMismatch { .. } - | Error::Fetch(_) - | Error::Http(_) - | Error::Json(_) - | Error::Io(_) => StatusCode::BAD_GATEWAY, + Error::FormatVersionMismatch { .. } | Error::Fetch(_) | Error::Json(_) | Error::Io(_) => { + StatusCode::BAD_GATEWAY + } }; - (status, e.to_string()).into_response() + (status, err.to_string()).into_response() } diff --git a/wasm/Cargo.toml b/crates/md-docrs-wasm-compare/Cargo.toml similarity index 59% rename from wasm/Cargo.toml rename to crates/md-docrs-wasm-compare/Cargo.toml index 512555d..f6fa475 100644 --- a/wasm/Cargo.toml +++ b/crates/md-docrs-wasm-compare/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "md-docrs-wasm-compare" -version = "0.1.0" -edition = "2024" +version.workspace = true +edition.workspace = true publish = false [[bin]] @@ -9,9 +9,9 @@ name = "wasm-compare" path = "src/main.rs" [dependencies] -anyhow = "1" +anyhow.workspace = true blake3 = "1" -reqwest = { version = "0.12", default-features = false, features = ["blocking", "rustls-tls", "gzip"] } +reqwest = { workspace = true, features = ["blocking", "gzip"] } wasmtime = "44" wasmer = { version = "7", optional = true } diff --git a/wasm/src/main.rs b/crates/md-docrs-wasm-compare/src/main.rs similarity index 81% rename from wasm/src/main.rs rename to crates/md-docrs-wasm-compare/src/main.rs index b6fc1d7..b45d157 100644 --- a/wasm/src/main.rs +++ b/crates/md-docrs-wasm-compare/src/main.rs @@ -164,15 +164,54 @@ fn print_help() { fn default_artifacts_dir() -> PathBuf { if let Some(dir) = option_env!("CARGO_MANIFEST_DIR") { - return Path::new(dir).join("artifacts"); + return Path::new(dir) + .join("..") + .join("..") + .join("wasm") + .join("artifacts"); } PathBuf::from("wasm/artifacts") } fn main() -> Result<()> { let args = parse_args()?; + let all = artifacts(&args); + let present: Vec<_> = all + .iter() + .filter(|artifact| artifact.path.exists()) + .collect(); - let all = [ + if present.is_empty() { + bail!( + "no .wasm artifacts found under {}\n\ + run `./wasm/build.sh` first, or pass --artifacts-dir", + args.artifacts_dir.display(), + ); + } + + print_run_header(&args); + print_artifact_sizes(&present)?; + + for spec in DEFAULT_SPECS { + print_spec_header(spec); + print_resolve_results(&present, &args, spec)?; + print_render_results(&present, &args, spec)?; + println!(); + } + + Ok(()) +} + +fn truncate(s: &str, max: usize) -> String { + if s.len() > max { + format!("{}...", &s[..max.saturating_sub(3)]) + } else { + s.to_string() + } +} + +fn artifacts(args: &Args) -> [Artifact; 6] { + [ Artifact { label: "zig-minimal", path: args.artifacts_dir.join("zig-minimal.wasm"), @@ -203,18 +242,10 @@ fn main() -> Result<()> { path: args.artifacts_dir.join("rust-full-opt.wasm"), flavor: Flavor::Full, }, - ]; - - let present: Vec<_> = all.iter().filter(|a| a.path.exists()).collect(); - if present.is_empty() { - bail!( - "no .wasm artifacts found under {}\n\ - run `{}/build.sh` first, or pass --artifacts-dir", - args.artifacts_dir.display(), - env!("CARGO_MANIFEST_DIR"), - ); - } + ] +} +fn print_run_header(args: &Args) { println!("runtime: {:?}", args.runtime); println!( "iterations: resolve_url={}, render_spec={}", @@ -226,114 +257,131 @@ fn main() -> Result<()> { if args.offline { "offline" } else { "online" } ); println!(); +} +fn print_artifact_sizes(present: &[&Artifact]) -> Result<()> { println!("{:<14} {:>10} {:>8}", "artifact", "bytes", "flavor"); println!("{:-<14} {:->10} {:->8}", "", "", ""); - for a in &present { - let meta = fs::metadata(&a.path)?; - let flavor = match a.flavor { + + for artifact in present { + let meta = fs::metadata(&artifact.path)?; + let flavor = match artifact.flavor { Flavor::Minimal => "minimal", Flavor::Full => "full", }; - println!("{:<14} {:>10} {:>8}", a.label, meta.len(), flavor); + println!("{:<14} {:>10} {:>8}", artifact.label, meta.len(), flavor); } + println!(); + Ok(()) +} - for spec in DEFAULT_SPECS { - println!( - "spec: {}{}", - spec.spec, - spec.target - .map(|t| format!(" (target={t})")) - .unwrap_or_default(), - ); - println!( - "{:<14} {:<60} {:>10} {:>10}", - "artifact", "resolve_url output", "median us", "p95 us" - ); - println!("{:-<14} {:-<60} {:->10} {:->10}", "", "", "", ""); - for a in &present { - let bytes = fs::read(&a.path)?; - match run_resolve(args.runtime, &bytes, spec, args.iterations) { - Ok(result) => { - let output = result - .output - .as_deref() - .unwrap_or(""); - let shown = truncate(output, 60); - println!( - "{:<14} {:<60} {:>10} {:>10}", - a.label, - shown, - result.median.as_micros(), - result.p95.as_micros(), - ); - } - Err(e) => println!("{:<14} resolve_url error: {}", a.label, e), +fn print_spec_header(spec: &Spec) { + println!( + "spec: {}{}", + spec.spec, + spec.target + .map(|target| format!(" (target={target})")) + .unwrap_or_default(), + ); + println!( + "{:<14} {:<60} {:>10} {:>10}", + "artifact", "resolve_url output", "median us", "p95 us" + ); + println!("{:-<14} {:-<60} {:->10} {:->10}", "", "", "", ""); +} + +fn print_resolve_results(present: &[&Artifact], args: &Args, spec: &Spec) -> Result<()> { + for artifact in present { + let bytes = fs::read(&artifact.path)?; + match run_resolve(args.runtime, &bytes, spec, args.iterations) { + Ok(result) => { + let output = result + .output + .as_deref() + .unwrap_or(""); + let shown = truncate(output, 60); + println!( + "{:<14} {:<60} {:>10} {:>10}", + artifact.label, + shown, + result.median.as_micros(), + result.p95.as_micros(), + ); } + Err(error) => println!("{:<14} resolve_url error: {}", artifact.label, error), } + } - if !args.offline && present.iter().any(|a| a.flavor == Flavor::Full) { - println!(); - println!( - "{:<14} {:>8} {:>8} {:>10} {:>10} {:<16}", - "artifact", "md bytes", "fetch ms", "render ms", "total ms", "parity" - ); - println!( - "{:-<14} {:->8} {:->8} {:->10} {:->10} {:-<16}", - "", "", "", "", "", "" - ); - let mut parity: HashMap> = HashMap::new(); - for a in &present { - if a.flavor != Flavor::Full { - continue; - } - let bytes = fs::read(&a.path)?; - match run_render(args.runtime, &bytes, spec, args.render_iterations) { - Ok(r) => { - let hash = blake3::hash(r.output.as_bytes()); - let short = short_hash(hash.to_hex().as_str()); - parity.entry(short.clone()).or_default().push(a.label); - println!( - "{:<14} {:>8} {:>8} {:>10} {:>10} {:<16}", - a.label, - r.output.len(), - r.fetch_median.as_millis(), - r.render_median.as_millis(), - r.total_median.as_millis(), - short, - ); - } - Err(e) => println!("{:<14} render_spec error: {}", a.label, e), - } - } - if parity.len() > 1 { - println!("parity: outputs differ across artifacts"); - for (hash, labels) in &parity { - println!(" {}: {}", hash, labels.join(", ")); - } - } else if let Some((hash, labels)) = parity.iter().next() { - if labels.len() > 1 { - println!( - "parity: all {} full artifacts agree ({})", - labels.len(), - hash - ); - } - } + Ok(()) +} + +fn print_render_results(present: &[&Artifact], args: &Args, spec: &Spec) -> Result<()> { + if args.offline + || !present + .iter() + .any(|artifact| artifact.flavor == Flavor::Full) + { + return Ok(()); + } + + println!(); + println!( + "{:<14} {:>8} {:>8} {:>10} {:>10} {:<16}", + "artifact", "md bytes", "fetch ms", "render ms", "total ms", "parity" + ); + println!( + "{:-<14} {:->8} {:->8} {:->10} {:->10} {:-<16}", + "", "", "", "", "", "" + ); + + let mut parity: HashMap> = HashMap::new(); + for artifact in present { + if artifact.flavor != Flavor::Full { + continue; } - println!(); + let bytes = fs::read(&artifact.path)?; + match run_render(args.runtime, &bytes, spec, args.render_iterations) { + Ok(result) => { + let hash = blake3::hash(result.output.as_bytes()); + let short = short_hash(hash.to_hex().as_str()); + parity + .entry(short.clone()) + .or_default() + .push(artifact.label); + println!( + "{:<14} {:>8} {:>8} {:>10} {:>10} {:<16}", + artifact.label, + result.output.len(), + result.fetch_median.as_millis(), + result.render_median.as_millis(), + result.total_median.as_millis(), + short, + ); + } + Err(error) => println!("{:<14} render_spec error: {}", artifact.label, error), + } } + print_parity_summary(&parity); Ok(()) } -fn truncate(s: &str, max: usize) -> String { - if s.len() > max { - format!("{}...", &s[..max.saturating_sub(3)]) - } else { - s.to_string() +fn print_parity_summary(parity: &HashMap>) { + if parity.len() > 1 { + println!("parity: outputs differ across artifacts"); + for (hash, labels) in parity { + println!(" {}: {}", hash, labels.join(", ")); + } + } else if let Some((hash, labels)) = parity.iter().next() + && labels.len() > 1 + { + println!( + "parity: all {} full artifacts agree ({})", + labels.len(), + hash + ); } } @@ -383,7 +431,7 @@ fn run_render( fn stats(mut samples: Vec) -> (Duration, Duration) { samples.sort(); let median = samples[samples.len() / 2]; - let p95_idx = ((samples.len() as f64) * 0.95) as usize; + let p95_idx = samples.len().saturating_mul(95) / 100; let p95 = samples[p95_idx.min(samples.len() - 1)]; (median, p95) } @@ -402,7 +450,10 @@ fn blocking_http_client() -> Result { } mod wasmtime_runner { - use super::*; + use super::{ + Context, Duration, HashMap, OUT_CAP, RenderResult, ResolveResult, Result, Spec, bail, + blocking_http_client, median_duration, stats, + }; use std::sync::{Arc, Mutex}; use std::time::Instant; use wasmtime::{Caller, Engine, Linker, Memory, Module, Store, TypedFunc}; @@ -582,7 +633,7 @@ mod wasmtime_runner { let resp = client.get(&url).send().context("fetch_bytes: GET failed")?; let status = resp.status(); if !status.is_success() { - return Ok(status.as_u16() as i32); + return Ok(i32::from(status.as_u16())); } let bytes = resp.bytes().context("fetch_bytes: read body failed")?; let vec = bytes.to_vec(); @@ -594,16 +645,22 @@ mod wasmtime_runner { state.last_fetch = Some(start.elapsed()); } - let buf_ptr = alloc_fn.call(&mut *caller, body.len() as u32)?; + let body_len = + u32::try_from(body.len()).context("fetch_bytes: response body too large for wasm")?; + let buf_ptr = alloc_fn.call(&mut *caller, body_len)?; if buf_ptr == 0 { return Ok(-1); } - memory.write(&mut *caller, buf_ptr as usize, &body)?; - memory.write(&mut *caller, buf_ptr_out as usize, &buf_ptr.to_le_bytes())?; + memory.write(&mut *caller, usize::try_from(buf_ptr)?, &body)?; + memory.write( + &mut *caller, + usize::try_from(buf_ptr_out)?, + &buf_ptr.to_le_bytes(), + )?; memory.write( &mut *caller, - buf_len_out as usize, - &(body.len() as u32).to_le_bytes(), + usize::try_from(buf_len_out)?, + &body_len.to_le_bytes(), )?; Ok(0) } @@ -616,20 +673,27 @@ mod wasmtime_runner { resolve_url: &TypedFunc<(u32, u32, u32, u32, u32, u32), u32>, spec: &Spec, ) -> Result> { - let spec_len = spec.spec.len() as u32; + let spec_len = + u32::try_from(spec.spec.len()).context("spec too large for wasm linear memory")?; let spec_ptr = alloc.call(&mut *store, spec_len)?; if spec_ptr == 0 { bail!("alloc(spec) returned null"); } - memory.write(&mut *store, spec_ptr as usize, spec.spec.as_bytes())?; + memory.write( + &mut *store, + usize::try_from(spec_ptr)?, + spec.spec.as_bytes(), + )?; - let (target_ptr, target_len) = if let Some(t) = spec.target { - let p = alloc.call(&mut *store, t.len() as u32)?; - if p == 0 { + let (target_ptr, target_len) = if let Some(target) = spec.target { + let target_len = + u32::try_from(target.len()).context("target too large for wasm linear memory")?; + let ptr = alloc.call(&mut *store, target_len)?; + if ptr == 0 { bail!("alloc(target) returned null"); } - memory.write(&mut *store, p as usize, t.as_bytes())?; - (p, t.len() as u32) + memory.write(&mut *store, usize::try_from(ptr)?, target.as_bytes())?; + (ptr, target_len) } else { (0, 0) }; @@ -646,8 +710,8 @@ mod wasmtime_runner { let output = if n == 0 { None } else { - let mut buf = vec![0u8; n as usize]; - memory.read(&*store, out_ptr as usize, &mut buf)?; + let mut buf = vec![0u8; usize::try_from(n)?]; + memory.read(&*store, usize::try_from(out_ptr)?, &mut buf)?; Some(String::from_utf8(buf).context("resolve_url returned non-UTF8 bytes")?) }; @@ -667,20 +731,27 @@ mod wasmtime_runner { render_spec: &TypedFunc<(u32, u32, u32, u32, u32, u32), i32>, spec: &Spec, ) -> Result { - let spec_len = spec.spec.len() as u32; + let spec_len = + u32::try_from(spec.spec.len()).context("spec too large for wasm linear memory")?; let spec_ptr = alloc.call(&mut *store, spec_len)?; if spec_ptr == 0 { bail!("alloc(spec) returned null"); } - memory.write(&mut *store, spec_ptr as usize, spec.spec.as_bytes())?; + memory.write( + &mut *store, + usize::try_from(spec_ptr)?, + spec.spec.as_bytes(), + )?; - let (target_ptr, target_len) = if let Some(t) = spec.target { - let p = alloc.call(&mut *store, t.len() as u32)?; - if p == 0 { + let (target_ptr, target_len) = if let Some(target) = spec.target { + let target_len = + u32::try_from(target.len()).context("target too large for wasm linear memory")?; + let ptr = alloc.call(&mut *store, target_len)?; + if ptr == 0 { bail!("alloc(target) returned null"); } - memory.write(&mut *store, p as usize, t.as_bytes())?; - (p, t.len() as u32) + memory.write(&mut *store, usize::try_from(ptr)?, target.as_bytes())?; + (ptr, target_len) } else { (0, 0) }; @@ -690,7 +761,7 @@ mod wasmtime_runner { if slot_ptr == 0 { bail!("alloc(slots) returned null"); } - memory.write(&mut *store, slot_ptr as usize, &[0u8; 8])?; + memory.write(&mut *store, usize::try_from(slot_ptr)?, &[0u8; 8])?; let rc = render_spec.call( &mut *store, @@ -708,11 +779,11 @@ mod wasmtime_runner { } let mut slots = [0u8; 8]; - memory.read(&*store, slot_ptr as usize, &mut slots)?; + memory.read(&*store, usize::try_from(slot_ptr)?, &mut slots)?; let out_ptr = u32::from_le_bytes(slots[0..4].try_into().unwrap()); let out_len = u32::from_le_bytes(slots[4..8].try_into().unwrap()); - let mut buf = vec![0u8; out_len as usize]; - memory.read(&*store, out_ptr as usize, &mut buf)?; + let mut buf = vec![0u8; usize::try_from(out_len)?]; + memory.read(&*store, usize::try_from(out_ptr)?, &mut buf)?; let md = String::from_utf8(buf).context("render_spec returned non-UTF8 bytes")?; free.call(&mut *store, (spec_ptr, spec_len))?; diff --git a/crates/md-docrs-worker/Cargo.toml b/crates/md-docrs-worker/Cargo.toml new file mode 100644 index 0000000..1e53571 --- /dev/null +++ b/crates/md-docrs-worker/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "md-docrs-worker" +version.workspace = true +edition.workspace = true +publish = false + +[package.metadata.release] +release = false + +[lib] +crate-type = ["cdylib"] + +[dependencies] +async-trait.workspace = true +md-docrs-core = { path = "../md-docrs-core", default-features = false } +rustdoc-types.workspace = true +serde = { workspace = true, features = ["derive"] } +serde_json.workspace = true +ruzstd = { version = "0.8", default-features = false, features = ["std"] } + +worker = { workspace = true, features = ["http"] } +worker-macros = { workspace = true, features = ["http"] } +wasm-bindgen.workspace = true +wasm-bindgen-futures.workspace = true diff --git a/crates/md-docrs-worker/package.json b/crates/md-docrs-worker/package.json new file mode 100644 index 0000000..5d7e21a --- /dev/null +++ b/crates/md-docrs-worker/package.json @@ -0,0 +1,12 @@ +{ + "name": "md-docrs-worker", + "version": "0.0.0", + "private": true, + "scripts": { + "deploy": "cargo install worker-build ; wrangler deploy", + "dev": "cargo install worker-build ; wrangler dev --local" + }, + "devDependencies": { + "wrangler": "^4" + } +} diff --git a/crates/md-docrs-worker/src/lib.rs b/crates/md-docrs-worker/src/lib.rs new file mode 100644 index 0000000..40eed62 --- /dev/null +++ b/crates/md-docrs-worker/src/lib.rs @@ -0,0 +1,428 @@ +#![warn(clippy::pedantic)] + +use md_docrs_core::{ + Error, ItemSpec, + cache::CacheKey, + fetch::{DOCS_RS_BASE, build_url, validate_format_version}, + render_loaded_crate, +}; +use rustdoc_types::{Crate, FORMAT_VERSION}; +use serde::{Deserialize, Serialize}; +use std::{ + io::{BufReader, Cursor, Read}, + sync::Arc, +}; +use worker::kv::{KvError, KvStore}; +use worker::{Context, Env, Fetch, Headers, Method, Request, RequestInit, Response, Result, event}; + +#[derive(Clone)] +struct AppState { + fetcher: Arc, + cache: Arc, +} + +#[derive(Clone)] +struct WorkerFetcher { + base: String, +} + +impl WorkerFetcher { + fn new() -> Self { + Self { + base: DOCS_RS_BASE.to_string(), + } + } + + async fn fetch_bytes( + &self, + url: &str, + method: Method, + ) -> md_docrs_core::Result<(u16, Vec)> { + let mut init = RequestInit::new(); + init.with_method(method); + + let request = Request::new_with_init(url, &init) + .map_err(|err| Error::Fetch(format!("failed to build request for {url}: {err}")))?; + + let mut response = Fetch::Request(request) + .send() + .await + .map_err(|err| Error::Fetch(format!("request failed for {url}: {err}")))?; + + let status = response.status_code(); + let bytes = response.bytes().await.map_err(|err| { + Error::Fetch(format!("failed to read response body for {url}: {err}")) + })?; + + Ok((status, bytes)) + } + + async fn head_status(&self, url: &str) -> md_docrs_core::Result { + let mut init = RequestInit::new(); + init.with_method(Method::Head); + + let request = Request::new_with_init(url, &init) + .map_err(|err| Error::Fetch(format!("failed to build request for {url}: {err}")))?; + + let response = Fetch::Request(request) + .send() + .await + .map_err(|err| Error::Fetch(format!("request failed for {url}: {err}")))?; + + Ok(response.status_code()) + } +} + +impl WorkerFetcher { + async fn fetch( + &self, + crate_name: &str, + version: &str, + target: Option<&str>, + ) -> md_docrs_core::Result { + let url = build_url( + &self.base, + crate_name, + version, + target, + Some(FORMAT_VERSION), + ); + + let (status, bytes) = self.fetch_bytes(&url, Method::Get).await?; + + if status == 404 { + let probe_url = build_url(&self.base, crate_name, version, target, None); + let probe_status = self.head_status(&probe_url).await?; + if (200..300).contains(&probe_status) { + return Err(Error::Fetch(format!( + "{crate_name}@{version} has no rustdoc JSON for format version \ + {FORMAT_VERSION}; waiting on docs.rs rebuild" + ))); + } + return Err(Error::Fetch(format!( + "{crate_name}@{version} not found on docs.rs" + ))); + } + + if !(200..300).contains(&status) { + return Err(Error::Fetch(format!( + "{status} response for {crate_name}@{version}" + ))); + } + + let mut decoder = ruzstd::decoding::StreamingDecoder::new(BufReader::new(Cursor::new( + bytes, + ))) + .map_err(|err| { + Error::Io(std::io::Error::other(format!( + "zstd decode init failed: {err}" + ))) + })?; + let mut decoded = Vec::new(); + decoder.read_to_end(&mut decoded)?; + let krate: Crate = serde_json::from_slice(&decoded)?; + validate_format_version(&krate)?; + Ok(krate) + } +} + +#[derive(Debug, Serialize, Deserialize)] +struct CachedCrate { + krate: Crate, +} + +#[derive(Clone)] +struct KvCrateCache { + kv: KvStore, + ttl_seconds: u64, +} + +impl KvCrateCache { + fn new(kv: KvStore) -> Self { + Self { + kv, + ttl_seconds: 60 * 60, + } + } + + fn key_string(key: &CacheKey) -> String { + match &key.target { + Some(target) => format!("crate:{}:{}:{}", key.crate_name, key.version, target), + None => format!("crate:{}:{}", key.crate_name, key.version), + } + } +} + +impl KvCrateCache { + async fn get(&self, key: &CacheKey) -> Option> { + let cache_key = Self::key_string(key); + + match self.kv.get(&cache_key).json::().await { + Ok(Some(cached)) => Some(Arc::new(cached.krate)), + Ok(None) | Err(_) => None, + } + } + + async fn put(&self, key: CacheKey, value: Arc) { + let cache_key = Self::key_string(&key); + let cached = CachedCrate { + krate: (*value).clone(), + }; + + let Ok(payload) = serde_json::to_string(&cached) else { + return; + }; + + let builder = match self.kv.put(&cache_key, payload) { + Ok(builder) => builder.expiration_ttl(self.ttl_seconds), + Err(err) => { + if matches!(err, KvError::InvalidKvStore(_)) { + panic!("invalid kv store"); + } + return; + } + }; + + if let Err(err) = builder.execute().await + && matches!(err, KvError::InvalidKvStore(_)) + { + panic!("invalid kv store"); + } + } +} + +#[event(fetch)] +async fn fetch(req: Request, env: Env, _ctx: Context) -> Result { + let kv = env.kv("KRATE_KV")?; + let state = AppState { + fetcher: Arc::new(WorkerFetcher::new()), + cache: Arc::new(KvCrateCache::new(kv)), + }; + + route(req, state).await +} + +async fn route(req: Request, state: AppState) -> Result { + let path = req.path(); + let url = req.url()?; + + if let Some(spec) = url + .query_pairs() + .find(|(key, _)| key == "spec") + .map(|(_, value)| value.into_owned()) + { + let target = url + .query_pairs() + .find(|(key, _)| key == "target") + .map(|(_, value)| value.into_owned()); + return serve_spec(&state, &spec, target).await; + } + + if path == "/" { + return text_response( + 200, + "md-docrs-worker - GET /[/][/] for Markdown docs\n", + "text/plain; charset=utf-8", + ); + } + + if path == "/healthz" { + return text_response(200, "ok", "text/plain; charset=utf-8"); + } + + if path == "/kv" { + return kv_list(&state).await; + } + + let target = url + .query_pairs() + .find(|(key, _)| key == "target") + .map(|(_, value)| value.into_owned()); + + let segments: Vec<&str> = path + .split('/') + .filter(|segment| !segment.is_empty()) + .collect(); + + if segments.is_empty() { + return text_response( + 200, + "md-docrs-worker - GET /[/][/] for Markdown docs\n", + "text/plain; charset=utf-8", + ); + } + + let crate_name = segments[0].to_string(); + let version = if segments.len() >= 2 { + segments[1].to_string() + } else { + "latest".to_string() + }; + + let path_segs = if segments.len() > 2 { + parse_rest_segments(&segments[2..]) + } else { + Vec::new() + }; + + serve(&state, &crate_name, &version, target, &path_segs).await +} + +fn parse_rest_segments(segments: &[&str]) -> Vec { + if segments.is_empty() { + return vec![]; + } + + let last_idx = segments.len() - 1; + let mut out = Vec::with_capacity(segments.len()); + + for (idx, segment) in segments.iter().enumerate() { + if idx == last_idx { + if let Some(name) = strip_kind_prefix(segment) { + out.push(name); + } else { + out.push((*segment).to_string()); + } + } else { + out.push((*segment).to_string()); + } + } + + out +} + +fn strip_kind_prefix(segment: &str) -> Option { + let segment = segment.strip_suffix(".html").unwrap_or(segment); + + for prefix in [ + "struct.", + "enum.", + "trait.", + "fn.", + "type.", + "constant.", + "static.", + "macro.", + "union.", + "primitive.", + "derive.", + "attr.", + ] { + if let Some(rest) = segment.strip_prefix(prefix) { + return Some(rest.to_string()); + } + } + + None +} + +async fn kv_list(state: &AppState) -> Result { + let list_response = state + .cache + .kv + .list() + .limit(100) + .execute() + .await + .map_err(|e| { + if matches!(e, KvError::InvalidKvStore(_)) { + panic!("invalid kv store"); + } + e + })?; + + let body = serde_json::to_string_pretty(&list_response) + .map_err(|err| worker::Error::RustError(err.to_string()))?; + + text_response(200, &body, "application/json; charset=utf-8") +} + +async fn serve_spec(state: &AppState, raw_spec: &str, target: Option) -> Result { + let spec = match ItemSpec::parse(raw_spec) { + Ok(spec) => spec.with_target(target), + Err(err) => return error_response(&err), + }; + + render_spec_response(state, spec).await +} + +async fn serve( + state: &AppState, + crate_name: &str, + version: &str, + target: Option, + path_segs: &[String], +) -> Result { + let path: Vec = match path_segs.split_first() { + Some((head, tail)) if head == crate_name => tail.to_vec(), + _ => path_segs.to_vec(), + }; + + let spec = ItemSpec { + crate_name: crate_name.to_string(), + version: version.to_string(), + target, + path, + }; + + render_spec_response(state, spec).await +} + +async fn render_spec_response(state: &AppState, spec: ItemSpec) -> Result { + let key = CacheKey { + crate_name: spec.crate_name.clone(), + version: spec.version.clone(), + target: spec.target.clone(), + }; + + let krate = if let Some(hit) = state.cache.get(&key).await { + hit + } else { + let fetched = match state + .fetcher + .fetch(&spec.crate_name, &spec.version, spec.target.as_deref()) + .await + { + Ok(fetched) => fetched, + Err(err) => return error_response(&err), + }; + let krate = Arc::new(fetched); + state.cache.put(key, Arc::clone(&krate)).await; + krate + }; + + match render_loaded_crate(&krate, &spec) { + Ok(body) => markdown_response(&body), + Err(err) => error_response(&err), + } +} + +fn markdown_response(body: &str) -> Result { + let headers = Headers::new(); + headers.set("content-type", "text/markdown; charset=utf-8")?; + headers.set("vary", "Accept")?; + headers.set("x-markdown-tokens", &(body.len() / 4).to_string())?; + + Ok(Response::ok(body.to_string())?.with_headers(headers)) +} + +fn error_response(err: &Error) -> Result { + let status = match err { + Error::NotFound(_) => 404, + Error::InvalidSpec(_) => 400, + Error::FormatVersionMismatch { .. } | Error::Fetch(_) | Error::Json(_) | Error::Io(_) => { + 502 + } + }; + + text_response(status, &err.to_string(), "text/plain; charset=utf-8") +} + +fn text_response(status: u16, body: &str, content_type: &str) -> Result { + let headers = Headers::new(); + headers.set("content-type", content_type)?; + + Ok(Response::ok(body.to_string())? + .with_headers(headers) + .with_status(status)) +} diff --git a/crates/md-docrs-worker/wrangler.toml b/crates/md-docrs-worker/wrangler.toml new file mode 100644 index 0000000..959d18a --- /dev/null +++ b/crates/md-docrs-worker/wrangler.toml @@ -0,0 +1,10 @@ +name = "md-docrs-worker" +main = "build/index.js" +compatibility_date = "2025-04-21" + +[[kv_namespaces]] +binding = "KRATE_KV" +id = "KRATE_KV" + +[build] +command = "cargo install \"worker-build@^0.8\" && worker-build --release" diff --git a/justfile b/justfile new file mode 100644 index 0000000..6f70cdf --- /dev/null +++ b/justfile @@ -0,0 +1,67 @@ +default: + @just --list + +# Run all Rust workspace tests. +test: + cargo test --workspace + +# Build the Rust workspace. +build: + cargo build --workspace + +# Build the Cloudflare Worker crate for wasm. +build-worker: + cargo check -p md-docrs-worker --target wasm32-unknown-unknown + +# Run the Cloudflare Worker locally with Wrangler. +worker-dev: + npx wrangler@latest dev --config wrangler.toml --cwd crates/md-docrs-worker --local --port 8787 --persist-to .wrangler/state + +# Probe the worker root with a crate spec in the path. +curl-worker spec="anyhow": + curl -sS "http://127.0.0.1:8787/{{ spec }}" + +# Probe the worker with a target triple query parameter. +curl-worker-target spec="tokio::sync::Mutex" target="x86_64-unknown-linux-gnu": + curl -sS "http://127.0.0.1:8787/{{ spec }}?target={{ target }}" + +# Probe the worker using the spec query parameter form. +curl-worker-query spec="anyhow::Error": + curl -sS "http://127.0.0.1:8787/?spec={{ spec }}" + +# Run a few common worker smoke tests. +test-worker: + just curl-worker anyhow + echo + just curl-worker-query "anyhow::Error" + echo + just curl-worker-target "tokio::sync::Mutex" "x86_64-unknown-linux-gnu" + +# Run the native Markdown server locally. +server-dev: + cargo run -p md-docrs-server -- --port 8080 --bind 127.0.0.1 + +# Probe the native server. +curl-server path="anyhow": + curl -sS "http://127.0.0.1:8080/{{ path }}" + +# Run the WASM comparison flow described in the repo docs. +wasm-compare: + ./wasm/build.sh + cargo run -p md-docrs-wasm-compare -- --offline + +# Run Zig tests from the repo root. +zig-test: + zig build test --build-file zig/lib/build.zig + +# Show the main commands collected from workspace READMEs. +help-commands: + @echo "Common commands from README files:" + @echo " cargo build --workspace" + @echo " cargo test --workspace" + @echo " cargo run -p md-docrs-cli -- anyhow" + @echo " cargo run -p md-docrs-server -- --port 8080 --bind 127.0.0.1" + @echo " cargo build --profile wasm-release --target wasm32-unknown-unknown -p md-docrs-rust-wasm --no-default-features" + @echo " ./wasm/build.sh" + @echo " cargo run -p md-docrs-wasm-compare -- --offline" + @echo " zig build test --build-file zig/lib/build.zig" diff --git a/rust-wasm/README.md b/rust-wasm/README.md deleted file mode 100644 index 7c38fe4..0000000 --- a/rust-wasm/README.md +++ /dev/null @@ -1,117 +0,0 @@ -# md-docrs-wasm - -`wasm32-unknown-unknown` build of the `md_docrs_proxy` pure pipeline, exposing -the **exact same C ABI** as the Zig build (`zig/lib/wasm.zig`). Lets us drop -either `.wasm` into the same host and compare size and per-request latency -without any host-side code changes. - -## Exports - -| Symbol | Signature | Notes | -| --- | --- | --- | -| `alloc` | `(len: u32) -> *u8` | Backed by Rust's global allocator. Returns null on OOM or `len == 0`. | -| `free` | `(ptr: *u8, len: u32)` | Length must match the allocation. | -| `resolve_url` | `(spec_ptr, spec_len, target_ptr, target_len, out_ptr, out_cap) -> u32` | Same semantics as the Zig export. 0 on error. | -| `render_markdown` | `(json_ptr, json_len, spec_ptr, spec_len, target_ptr, target_len, len_out: *u32) -> *u8` | Takes already-decoded rustdoc JSON, returns a fresh allocation containing Markdown. Caller frees. Null on error. Only present in the `render` feature build. | - -## Building - -```sh -# Minimal parity build — matches the Zig wasm surface (resolve_url only). -cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm --no-default-features -wasm-opt -Oz --strip-debug --strip-dwarf \ - -o target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.opt.wasm \ - target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm - -# Full pipeline — adds render_markdown (serde_json + rustdoc-types). -cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm -wasm-opt -Oz --strip-debug --strip-dwarf \ - -o target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.opt.wasm \ - target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm -``` - -Raw artifact lives at `target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm`. - -If you run `wasm-opt`, the optimized artifact can live alongside it, e.g. -`target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.opt.wasm`. - -## Size snapshot - -Measured on Rust 1.94 / Zig 0.16. - -| Build | Bytes | -| --- | ---: | -| Zig 0.16 — `ReleaseSmall` + `strip`, exports `resolve_url` | **6,775** | -| Rust `wasm-release` — `resolve_url` only (`--no-default-features`) | **36,336** | -| Rust `wasm-release` + `wasm-opt -Oz` — `resolve_url` only | **28,523** | -| Rust `wasm-release` — `resolve_url` + `render_markdown` | **486,387** | - -For the `resolve_url`-only Rust build, `wasm-opt -Oz` trims about **7,813 bytes** -from the raw `wasm-release` artifact, roughly a **21.5%** reduction. - -The large jump for `render_markdown` is serde_json + `rustdoc-types` -deserialise impls. Expected; that's the cost of JSON→AST→Markdown. - -## Feature gates - -- `render` (default) — pulls `serde_json` + `rustdoc-types` and exposes - `render_markdown`. Turn off for the minimal size-parity build. - -## Tests - -Host tests run through the same internal functions as the WASM exports -(the `no_mangle` attribute is gated to `target_arch = "wasm32"` so the test -binary doesn't shadow libc's `free`): - -```sh -cargo test -p md-docrs-wasm -``` - -## Comparing with Zig - -Both modules share this memory protocol: - -1. Host calls `alloc(n)` to reserve input / output buffers in linear memory. -2. Host writes input bytes into those buffers via a fresh `Uint8Array(memory.buffer, ptr, len)`. -3. Host calls `resolve_url(...)` (or `render_markdown(...)`). -4. Host reads the output, then calls `free(ptr, len)` on each buffer. - -Because the ABI matches byte-for-byte, the Worker at `zig/src/index.ts` -works as-is against either module — just point the `.wasm` import at the -Rust artifact. - -## What's next - -- Port `render_markdown` to Zig. That's where the real interesting size / - speed comparison happens — today the Zig wasm doesn't carry serde_json - or the rustdoc types. -- Benchmark instantiation + per-call latency side-by-side in a Worker - (e.g. hyperfine-style loop from a test harness, or wrangler dev + `wrk`). -- Keep comparing raw vs `wasm-opt -Oz` output as the Rust WASM surface grows, - especially once Zig gains the full render pipeline too. - -Option A: keep `std`, but drastically reduce code size -This is the lowest-risk path. - -For the minimal build: -- stop using `ItemSpec::parse` -- stop using `String` -- stop using `format!` -- implement a tiny local parser over `&[u8]` -- write URL bytes directly to `out_ptr` - -This alone could cut a lot. - -### Option B: create a dedicated `no_std` tiny crate -Example direction: -- `rust-wasm-tiny/` -- exports only `resolve_url` -- parser implemented over raw bytes -- no `std` -- no `serde` -- no `rustdoc-types` -- no dependency on main crate - -This is the path most likely to get you materially closer to Zig. diff --git a/skills/README.md b/skills/README.md new file mode 100644 index 0000000..fe7cbb3 --- /dev/null +++ b/skills/README.md @@ -0,0 +1,34 @@ +# Skills + +This directory contains reusable agent skills for working with `md-docrs` and related workflows. + +## Available skills + +### `rust-docrs-cli` + +Use this skill when you need to retrieve, inspect, or summarize Rust crate and item documentation from docs.rs with the `md-docrs` CLI. + +Typical uses: + +- get docs for a crate root like `anyhow` +- get docs for an item like `anyhow::Error` +- get docs for a versioned item like `tokio@1.52.1::sync::Mutex` +- get docs for a target-specific item with `--target` +- form the correct `md-docrs` command from a user request +- summarize the Markdown returned by the CLI + +Path: + +- `skills/rust-docrs-cli/SKILL.md` + +Reference material: + +- `skills/rust-docrs-cli/references/usage.md` + +## Notes + +Keep each skill focused: + +- put trigger logic and core instructions in `SKILL.md` +- put longer examples and lookup details in `references/` +- avoid mixing unrelated workflows into one skill \ No newline at end of file diff --git a/skills/rust-docrs-cli/SKILL.md b/skills/rust-docrs-cli/SKILL.md new file mode 100644 index 0000000..3549e40 --- /dev/null +++ b/skills/rust-docrs-cli/SKILL.md @@ -0,0 +1,65 @@ +--- +name: Rust docs.rs CLI +description: Use this skill when you need to retrieve or summarize Rust crate or item documentation from docs.rs with the `md-docrs` CLI. Use it for crate-root lookups, item lookups, versioned lookups, target-specific lookups, and for forming the correct command. Do NOT use it for general Rust programming help, source-code editing, deployment, server workflows, Zig, WASM, or unrelated Cargo tasks. +--- + +# Rust docs.rs CLI + +Use this skill to help people get Rust documentation from docs.rs through the `md-docrs` CLI. + +## Use this skill when + +- the user wants docs for a crate +- the user wants docs for a Rust item +- the user wants docs for a specific crate version +- the user wants docs for a specific target +- the user wants the correct `md-docrs` command +- the user wants the returned docs summarized + +## Do not use this skill when + +- the task is general Rust advice without docs lookup +- the task is editing or reviewing code +- the task is about servers, deployment, Zig, or WASM +- the task is general Cargo troubleshooting unrelated to `md-docrs` + +## Core rules + +- Use this spec grammar: `crate[@version][::path::to::item]` +- Prefer `md-docrs ` +- Use `md-docrs --target ` for target-specific docs +- Give the exact command first +- State that successful output is Markdown on stdout +- Correct invalid specs directly +- Do not invent unsupported flags or URL formats + +## Common patterns + +- crate root: `anyhow` +- item: `anyhow::Error` +- versioned item: `tokio@1.52.1::sync::Mutex` + +## Examples + +- `md-docrs anyhow` +- `md-docrs anyhow::Error` +- `md-docrs tokio@1.52.1::sync::Mutex` +- `md-docrs --target x86_64-unknown-linux-gnu tokio::sync::Mutex` + +## Fallback + +If `md-docrs` is not installed, use: + +- `cargo run -p md-docrs-cli -- ` + +## Response style + +- command first +- shortest valid spec +- include version only when needed +- include target only when needed +- summarize the returned Markdown only if the user wants interpretation + +## Additional reference + +For more examples and lookup guidance, read `references/usage.md`. \ No newline at end of file diff --git a/skills/rust-docrs-cli/references/usage.md b/skills/rust-docrs-cli/references/usage.md new file mode 100644 index 0000000..ddbedf4 --- /dev/null +++ b/skills/rust-docrs-cli/references/usage.md @@ -0,0 +1,55 @@ +# `md-docrs` usage reference + +Use this reference for quick, correct `md-docrs` lookups against docs.rs. + +## Spec format + +`crate[@version][::path::to::item]` + +Examples: + +- crate root: `anyhow` +- item: `anyhow::Error` +- nested item: `tokio::sync::Mutex` +- versioned item: `tokio@1.52.1::sync::Mutex` + +## Command forms + +Installed binary: + +`md-docrs ` + +Target-specific lookup: + +`md-docrs --target ` + +Cargo fallback in this repository: + +`cargo run -p md-docrs-cli -- ` + +## Copy-paste examples + +`md-docrs anyhow` + +`md-docrs anyhow::Error` + +`md-docrs tokio@1.52.1::sync::Mutex` + +`md-docrs --target x86_64-unknown-linux-gnu tokio::sync::Mutex` + +`cargo run -p md-docrs-cli -- tokio::sync::Mutex` + +## Guidance + +- give the exact command first +- use the shortest valid spec +- include version only when needed +- include target only when needed +- use `::` for Rust item paths +- successful output is Markdown on stdout + +## Avoid + +- inventing unsupported flags +- using docs.rs HTML URLs when a spec is enough +- assuming a version the user did not request \ No newline at end of file diff --git a/src/fetch.rs b/src/fetch.rs deleted file mode 100644 index b00dc71..0000000 --- a/src/fetch.rs +++ /dev/null @@ -1,147 +0,0 @@ -use crate::{Error, Result}; -use rustdoc_types::{Crate, FORMAT_VERSION}; -use std::time::Duration; - -/// Downloads rustdoc JSON from docs.rs and parses it with `rustdoc-types`. -pub struct Fetcher { - client: reqwest::Client, - base: String, -} - -impl Fetcher { - /// # Errors - /// Returns `Error::Http` if the underlying HTTP client fails to build. - pub fn new() -> Result { - let client = reqwest::Client::builder() - .user_agent(concat!("md-docrs-proxy/", env!("CARGO_PKG_VERSION"))) - .timeout(Duration::from_secs(30)) - .redirect(reqwest::redirect::Policy::limited(10)) - .build()?; - Ok(Self { - client, - base: "https://docs.rs".into(), - }) - } - - /// Override the docs.rs base URL (used in tests). - #[must_use] - pub fn with_base(mut self, base: impl Into) -> Self { - self.base = base.into(); - self - } - - /// # Errors - /// Returns `Error::Fetch` on HTTP errors or unsupported format versions, - /// `Error::Json` on JSON parse failure, and `Error::FormatVersionMismatch` - /// when the downloaded JSON's `format_version` disagrees with ours. - pub async fn fetch( - &self, - crate_name: &str, - version: &str, - target: Option<&str>, - ) -> Result { - // Always request the format version we can parse. docs.rs keeps - // multiple format versions during rebuilds, so this is the reliable - // way to avoid schema-mismatch parse errors. A 404 here means the - // crate hasn't been rebuilt for our supported format yet. - let url = build_url( - &self.base, - crate_name, - version, - target, - Some(FORMAT_VERSION), - ); - tracing::debug!(url = %url, "fetch rustdoc JSON"); - let resp = self.client.get(&url).send().await?; - - if resp.status() == reqwest::StatusCode::NOT_FOUND { - // Distinguish "crate not found" from "format version unavailable" - // by probing the unpinned endpoint. - let probe_url = build_url(&self.base, crate_name, version, target, None); - let probe = self.client.head(&probe_url).send().await?; - if probe.status().is_success() { - return Err(Error::Fetch(format!( - "{crate_name}@{version} has no rustdoc JSON for format version \ - {FORMAT_VERSION}; waiting on docs.rs rebuild" - ))); - } - return Err(Error::Fetch(format!( - "{crate_name}@{version} not found on docs.rs" - ))); - } - - if !resp.status().is_success() { - return Err(Error::Fetch(format!( - "{} {} for {crate_name}@{version}", - resp.status().as_u16(), - resp.status().canonical_reason().unwrap_or("") - ))); - } - - let bytes = resp.bytes().await?; - - // Decompress zstd off the tokio runtime - it's CPU-bound. - let decoded = - tokio::task::spawn_blocking(move || zstd::decode_all(std::io::Cursor::new(bytes))) - .await - .map_err(|e| Error::Fetch(format!("zstd decode panicked: {e}")))??; - - let krate: Crate = serde_json::from_slice(&decoded)?; - if krate.format_version != FORMAT_VERSION { - return Err(Error::FormatVersionMismatch { - got: krate.format_version, - expected: FORMAT_VERSION, - }); - } - Ok(krate) - } -} - -fn build_url( - base: &str, - crate_name: &str, - version: &str, - target: Option<&str>, - format_version: Option, -) -> String { - let target_seg = target.map(|t| format!("/{t}")).unwrap_or_default(); - match format_version { - Some(v) => format!("{base}/crate/{crate_name}/{version}{target_seg}/json/{v}.zst"), - None => format!("{base}/crate/{crate_name}/{version}{target_seg}/json.zst"), - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn url_basic() { - assert_eq!( - build_url("https://docs.rs", "serde", "latest", None, None), - "https://docs.rs/crate/serde/latest/json.zst" - ); - } - - #[test] - fn url_with_target() { - assert_eq!( - build_url( - "https://docs.rs", - "serde", - "latest", - Some("x86_64-pc-windows-msvc"), - None - ), - "https://docs.rs/crate/serde/latest/x86_64-pc-windows-msvc/json.zst" - ); - } - - #[test] - fn url_format_pinned() { - assert_eq!( - build_url("https://docs.rs", "serde", "1.0.200", None, Some(57)), - "https://docs.rs/crate/serde/1.0.200/json/57.zst" - ); - } -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index c385c4b..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,57 +0,0 @@ -#![warn(clippy::pedantic)] - -pub mod cache; -pub mod error; -#[cfg(feature = "http")] -pub mod fetch; -pub mod render; -pub mod resolve; -pub mod spec; - -pub use error::{Error, Result}; -pub use spec::ItemSpec; - -#[cfg(feature = "http")] -use std::sync::Arc; - -/// High-level entry point: take a parsed `ItemSpec`, return rendered Markdown. -/// -/// Fetches and caches the crate's rustdoc JSON via the supplied `fetch::Fetcher` -/// and any `cache::CrateCache` implementation, resolves the requested item, -/// and renders it to Markdown. -/// -/// # Errors -/// Forwards errors from `Fetcher::fetch` (network / docs.rs / decode failures) -/// and `resolve::resolve` (`Error::NotFound` when the path does not match). -#[cfg(feature = "http")] -pub async fn render_spec( - spec: &ItemSpec, - fetcher: &fetch::Fetcher, - cache: &dyn cache::CrateCache, -) -> Result { - let krate = load_crate(spec, fetcher, cache).await?; - let resolved = resolve::resolve(&krate, spec)?; - Ok(render::render(&krate, &resolved, spec)) -} - -#[cfg(feature = "http")] -async fn load_crate( - spec: &ItemSpec, - fetcher: &fetch::Fetcher, - cache: &dyn cache::CrateCache, -) -> Result> { - let key = cache::CacheKey { - crate_name: spec.crate_name.clone(), - version: spec.version.clone(), - target: spec.target.clone(), - }; - if let Some(hit) = cache.get(&key).await { - return Ok(hit); - } - let krate = fetcher - .fetch(&spec.crate_name, &spec.version, spec.target.as_deref()) - .await?; - let arc = Arc::new(krate); - cache.put(key, Arc::clone(&arc)).await; - Ok(arc) -} diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index b9569bb..0000000 --- a/src/main.rs +++ /dev/null @@ -1,182 +0,0 @@ -#![warn(clippy::pedantic)] - -use anyhow::{Context, Result}; -use clap::{Parser, Subcommand}; -use md_docrs_proxy::{ - ItemSpec, - cache::{CrateCache, InMemoryCache}, - fetch::Fetcher, - render_spec, -}; -use std::net::SocketAddr; -use std::path::PathBuf; -use std::sync::Arc; - -#[cfg(feature = "hybrid-cache")] -use md_docrs_proxy::cache::{FoyerHybridCache, FoyerHybridCacheConfig}; - -mod server; - -#[derive(Parser, Debug)] -#[command( - name = "md-docrs", - version, - about = "Serve Rust crate docs as Markdown via rustdoc JSON" -)] -struct Cli { - #[command(subcommand)] - command: Option, - - /// Spec: crate[@version][::path::to::item]. Equivalent to `render` subcommand. - #[arg(value_name = "SPEC")] - spec: Option, - - /// Override the target triple (e.g. x86_64-pc-windows-msvc). - #[arg(long, global = true)] - target: Option, -} - -#[derive(Subcommand, Debug)] -enum Command { - /// Render a single spec to stdout. - Render { - spec: String, - #[arg(long)] - target: Option, - }, - /// Run the HTTP server mirroring docs.rs URLs. - Serve { - #[arg(long, default_value_t = 8080)] - port: u16, - #[arg(long, default_value = "127.0.0.1")] - bind: String, - /// Enable the memory+disk hybrid cache backed by foyer; requires the - /// `hybrid-cache` feature. When set, the directory is created if - /// missing and used as the disk tier. - #[arg(long, value_name = "DIR")] - cache_dir: Option, - /// Disk tier capacity in bytes. Only applied when `--cache-dir` is - /// set. - #[arg(long, default_value_t = 4 * 1024 * 1024 * 1024)] - cache_disk_bytes: usize, - /// Memory tier weight budget in bytes. Only applied when - /// `--cache-dir` is set. - #[arg(long, default_value_t = 256 * 1024 * 1024)] - cache_memory_bytes: usize, - }, -} - -#[tokio::main] -async fn main() -> Result<()> { - init_tracing(); - - let cli = Cli::parse(); - - match (cli.command, cli.spec) { - (Some(Command::Render { spec, target }), _) => { - render_cmd(&spec, target.or(cli.target)).await - } - ( - Some(Command::Serve { - port, - bind, - cache_dir, - cache_disk_bytes, - cache_memory_bytes, - }), - _, - ) => { - serve_cmd( - &bind, - port, - cache_dir, - cache_disk_bytes, - cache_memory_bytes, - ) - .await - } - (None, Some(spec)) => render_cmd(&spec, cli.target).await, - (None, None) => { - eprintln!("usage: md-docrs | md-docrs serve | md-docrs render "); - std::process::exit(2); - } - } -} - -fn init_tracing() { - let filter = tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")); - tracing_subscriber::fmt() - .with_env_filter(filter) - .with_target(false) - .init(); -} - -async fn render_cmd(raw: &str, target: Option) -> Result<()> { - let spec = ItemSpec::parse(raw) - .with_context(|| format!("invalid spec: {raw}"))? - .with_target(target); - let fetcher = Fetcher::new()?; - let cache = InMemoryCache::default(); - let md = render_spec(&spec, &fetcher, &cache).await?; - print!("{md}"); - Ok(()) -} - -async fn serve_cmd( - bind: &str, - port: u16, - cache_dir: Option, - cache_disk_bytes: usize, - cache_memory_bytes: usize, -) -> Result<()> { - let addr: SocketAddr = format!("{bind}:{port}").parse()?; - let cache = build_cache(cache_dir, cache_disk_bytes, cache_memory_bytes).await?; - let state = Arc::new(server::AppState { - fetcher: Fetcher::new()?, - cache, - }); - let app = server::router(state); - tracing::info!(%addr, "md-docrs serve listening"); - let listener = tokio::net::TcpListener::bind(addr).await?; - axum::serve(listener, app).await?; - Ok(()) -} - -#[cfg(feature = "hybrid-cache")] -async fn build_cache( - cache_dir: Option, - disk_bytes: usize, - memory_bytes: usize, -) -> Result> { - if let Some(dir) = cache_dir { - std::fs::create_dir_all(&dir) - .with_context(|| format!("create cache dir {}", dir.display()))?; - tracing::info!(dir = %dir.display(), disk_bytes, memory_bytes, "using foyer hybrid cache"); - let hybrid = FoyerHybridCache::new(FoyerHybridCacheConfig { - dir, - memory_capacity_bytes: memory_bytes, - disk_capacity_bytes: disk_bytes, - }) - .await?; - Ok(Arc::new(hybrid)) - } else { - Ok(Arc::new(InMemoryCache::default())) - } -} - -#[cfg(not(feature = "hybrid-cache"))] -#[allow(clippy::unused_async)] -async fn build_cache( - cache_dir: Option, - _disk_bytes: usize, - _memory_bytes: usize, -) -> Result> { - if cache_dir.is_some() { - anyhow::bail!( - "--cache-dir was supplied but this binary was built without the \ - `hybrid-cache` feature; rebuild with `cargo build --features hybrid-cache`" - ); - } - Ok(Arc::new(InMemoryCache::default())) -} diff --git a/wasm/README.md b/wasm/README.md index c910518..1f222f9 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -1,114 +1,73 @@ -# wasm/ — side-by-side comparison harness +# wasm/ -Runs the Zig and Rust wasm builds of `resolve_url` through the exact same -sequence of specs and reports: +Workspace-level WASM staging for artifact comparison. -- artifact size -- resolved URL (parity check — every artifact must produce byte-identical output) -- median and p95 per-call latency -- raw Rust vs `wasm-opt -Oz` size comparison for the same build flavor +This directory is not a Rust crate. It only exists to: -Default runtime is embedded **wasmtime** (crate). The `wasmer` cargo feature -swaps in the **wasmer** crate as an alternate host. Both are in-process -embeddings, not the `wasmtime` / `wasmer` CLI binaries. +- build and stage Zig and Rust `.wasm` artifacts +- keep staged outputs under `wasm/artifacts/` +- document the comparison flow -## Layout +The comparison binary lives in `crates/md-docrs-wasm-compare`. -``` -wasm/ -├── Cargo.toml # md-docrs-wasm-compare (workspace member) -├── src/main.rs # harness: loads wasm, drives resolve_url, reports -├── build.sh # builds zig + rust wasms and stages them in artifacts/ -├── artifacts/ # .gitignored — populated by build.sh -│ ├── zig-minimal.wasm -│ ├── rust-minimal.wasm -│ ├── rust-minimal-opt.wasm -│ ├── rust-full.wasm -│ └── rust-full-opt.wasm -└── README.md -``` +## What it contains -## Quick start +- `build.sh` — builds and stages available artifacts +- `artifacts/` — staged `.wasm` files used by the comparison harness +- `README.md` — this file -```sh -# From repo root. -./wasm/build.sh # produces artifacts/*.wasm -cargo run -p md-docrs-wasm-compare # default: wasmtime, 200 iterations -``` +## Artifact workflow -Sample output: +From the repo root: -``` -artifact bytes flavor --------------- ---------- -------- -zig-minimal 6775 minimal -rust-minimal 36336 minimal -rust-minimal-opt 25541 minimal -rust-full 486387 full -rust-full-opt 361606 full - -spec: tokio@1.52.1::sync::Mutex -artifact output median µs p95 µs --------------- ------------------------------------------------ --------- ---------- -zig https://docs.rs/crate/tokio/1.52.1/json/57.zst 7 8 -rust-minimal https://docs.rs/crate/tokio/1.52.1/json/57.zst 9 9 -rust-full https://docs.rs/crate/tokio/1.52.1/json/57.zst 9 9 +```/dev/null/wasm-compare.sh#L1-2 +./wasm/build.sh +cargo run -p md-docrs-wasm-compare -- --offline ``` -All three artifacts must return byte-identical URLs for every spec — that is -the ABI parity check. Per-call latency includes three `alloc`s, one -`resolve_url`, three `free`s, plus one `Memory::write` per input and one -`Memory::read` for the output. +`build.sh` does this: -## Flags +- builds Zig minimal WASM +- attempts Zig full WASM and skips it cleanly if unsupported +- builds Rust minimal WASM from `crates/md-docrs-rust-wasm` +- builds Rust full WASM from `crates/md-docrs-rust-wasm` +- runs `wasm-opt -Oz` on Rust artifacts +- copies staged outputs into `wasm/artifacts/` -| Flag | Default | Meaning | -| --- | --- | --- | -| `--runtime wasmtime\|wasmer` | `wasmtime` | Embedded host. `wasmer` requires `--features wasmer`. | -| `--iterations N` | 200 | Hot-loop samples per (artifact, spec) cell. | -| `--artifacts-dir PATH` | `wasm/artifacts` | Where to look for `zig-minimal.wasm`, `zig-full.wasm`, `rust-minimal.wasm`, `rust-minimal-opt.wasm`, `rust-full.wasm`, and `rust-full-opt.wasm`. | +## Expected staged files -Any subset of the expected `.wasm` files may be missing — the harness just skips those rows. +The harness looks for these filenames: -## Wasmer (optional) +- `zig-minimal.wasm` +- `zig-full.wasm` +- `rust-minimal.wasm` +- `rust-minimal-opt.wasm` +- `rust-full.wasm` +- `rust-full-opt.wasm` -```sh -cargo run -p md-docrs-wasm-compare --features wasmer -- --runtime wasmer -``` - -Wasmer pulls in its own Cranelift fork; first build is ~20s. Both runtimes -agree on output, but wasmer's singlepass / cranelift defaults typically -give different per-call timings than wasmtime's cranelift — useful for -separating ABI cost from JIT cost. +Missing files are skipped. -## Running the raw `.wasm` without the harness +## Required tools -The CLI form of wasmtime / wasmer can't easily marshal strings across the -ABI boundary, but you can still inspect the modules: - -```sh -wasmtime compile wasm/artifacts/zig.wasm -o /tmp/zig.cwasm -wasmer inspect wasm/artifacts/rust-minimal.wasm | head -``` +You need: -For an end-to-end call you need host code that writes the spec into WASM -memory and reads the result back — that's exactly what `src/main.rs` does. +- Rust with `wasm32-unknown-unknown` +- Zig +- `wasm-opt` -## Adding a new spec +## Related paths -Edit `DEFAULT_SPECS` in `src/main.rs`. A spec is `(spec_string, optional_target)` -and runs against every `.wasm` in the artifacts directory. +- `crates/md-docrs-rust-wasm` — Rust WASM module +- `crates/md-docrs-wasm-compare` — host comparison harness +- `zig/` — Zig implementation +- `wasm/artifacts/` — staged outputs -## wasm-opt outputs +## Notes -`build.sh` now requires `wasm-opt` on `PATH` and stages optimized Rust artifacts -next to the raw cargo outputs: +Keep `wasm/` boring: -- `rust-minimal.wasm` — `cargo build --profile wasm-release --no-default-features` -- `rust-minimal-opt.wasm` — same module after `wasm-opt -Oz --strip-debug --strip-dwarf` -- `rust-full.wasm` — `cargo build --profile wasm-release --no-default-features --features full` -- `rust-full-opt.wasm` — same module after `wasm-opt -Oz --strip-debug --strip-dwarf` +- no Rust crate here +- no shared library logic here +- no comparison logic here -That lets the harness report the size delta between the unoptimized Rust wasm -and the post-processed `wasm-opt` version while still checking `resolve_url` -output parity across all staged artifacts. +It is only the staging area for cross-language WASM artifacts. \ No newline at end of file diff --git a/wasm/build.sh b/wasm/build.sh index 6f78d20..a43a84d 100755 --- a/wasm/build.sh +++ b/wasm/build.sh @@ -1,70 +1,136 @@ #!/usr/bin/env bash -# Build the Zig and Rust wasm artifacts and stage them under artifacts/ so -# the comparison harness (cargo run -p md-docrs-wasm-compare) can load them -# without knowing where each toolchain drops its output. -# -# Produces up to six artifacts: -# zig-minimal.wasm Zig ReleaseSmall, resolve_url only -# zig-full.wasm Zig ReleaseSmall, full pipeline (if -Dfull supported) -# rust-minimal.wasm Rust wasm-release, --no-default-features -# rust-minimal-opt.wasm Rust wasm-release + wasm-opt -Oz, --no-default-features -# rust-full.wasm Rust wasm-release, --features full (fetch + render) -# rust-full-opt.wasm Rust wasm-release + wasm-opt -Oz, --features full set -euo pipefail +# Build and stage the WASM artifacts used by the comparison harness. +# +# Responsibilities: +# - build Zig minimal wasm +# - optionally build Zig full wasm if supported +# - build Rust minimal/full wasm from the workspace +# - run wasm-opt on Rust artifacts +# - copy everything into wasm/artifacts/ +# +# This directory is only a staging area. The actual Rust crates live under: +# - crates/md-docrs-rust-wasm +# - crates/md-docrs-wasm-compare + HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT="$(cd "${HERE}/.." && pwd)" -ARTIFACTS="${HERE}/artifacts" +ARTIFACTS_DIR="${HERE}/artifacts" +RUST_WASM_PKG="md-docrs-rust-wasm" +RUST_WASM_OUT="${ROOT}/target/wasm32-unknown-unknown/wasm-release/md_docrs_rust_wasm.wasm" +ZIG_DIR="${ROOT}/zig/lib" +STAGED_ARTIFACTS=( + "zig-minimal.wasm" + "zig-full.wasm" + "rust-minimal.wasm" + "rust-minimal-opt.wasm" + "rust-full.wasm" + "rust-full-opt.wasm" +) + +mkdir -p "${ARTIFACTS_DIR}" + +require_cmd() { + command -v "$1" >/dev/null 2>&1 || { + echo "missing required command: $1" >&2 + exit 1 + } +} -mkdir -p "${ARTIFACTS}" +copy_if_exists() { + local src="$1" + local dest="$2" -if command -v wasm-opt >/dev/null 2>&1; then - WASM_OPT="$(command -v wasm-opt)" - echo ">> wasm-opt: ${WASM_OPT}" -else - echo "wasm-opt not found in PATH; install Binaryen to produce optimized Rust artifacts" >&2 - exit 1 -fi + if [[ -f "${src}" ]]; then + cp "${src}" "${dest}" + return 0 + fi + + return 1 +} optimize_wasm() { local src="$1" local dest="$2" - "${WASM_OPT}" -Oz --enable-bulk-memory --strip-debug --strip-dwarf -o "${dest}" "${src}" + wasm-opt -Oz \ + --enable-bulk-memory \ + --strip-debug \ + --strip-dwarf \ + -o "${dest}" \ + "${src}" } -echo ">> zig-minimal: ReleaseSmall, wasm32-freestanding" -(cd "${ROOT}/zig/lib" && zig build) -cp "${ROOT}/zig/lib/zig-out/bin/md-docrs.wasm" "${ARTIFACTS}/zig-minimal.wasm" +build_zig_minimal() { + echo ">> zig-minimal" + ( + cd "${ZIG_DIR}" + zig build + ) + copy_if_exists \ + "${ZIG_DIR}/zig-out/bin/md-docrs.wasm" \ + "${ARTIFACTS_DIR}/zig-minimal.wasm" +} -echo ">> zig-full: ReleaseSmall + full pipeline (-Dfull)" -if (cd "${ROOT}/zig/lib" && zig build -Dfull 2>/dev/null); then - if [[ -f "${ROOT}/zig/lib/zig-out/bin/md-docrs-full.wasm" ]]; then - cp "${ROOT}/zig/lib/zig-out/bin/md-docrs-full.wasm" \ - "${ARTIFACTS}/zig-full.wasm" +build_zig_full() { + echo ">> zig-full" + if ( + cd "${ZIG_DIR}" + zig build -Dfull >/dev/null 2>&1 + ); then + if copy_if_exists \ + "${ZIG_DIR}/zig-out/bin/md-docrs-full.wasm" \ + "${ARTIFACTS_DIR}/zig-full.wasm"; then + : + else + echo " skipped: build accepted -Dfull but produced no md-docrs-full.wasm" + fi else - echo " (skipping: -Dfull accepted but produced no md-docrs-full.wasm)" + echo " skipped: Zig full wasm is not implemented yet" fi -else - echo " (skipping: zig -Dfull not supported yet; implement render_spec in zig/lib/)" -fi - -echo ">> rust-minimal: wasm-release, --no-default-features (resolve_url only)" -cargo build --manifest-path "${ROOT}/Cargo.toml" \ - --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm --no-default-features -cp "${ROOT}/target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm" \ - "${ARTIFACTS}/rust-minimal.wasm" -optimize_wasm "${ARTIFACTS}/rust-minimal.wasm" "${ARTIFACTS}/rust-minimal-opt.wasm" - -echo ">> rust-full: wasm-release, --features full (fetch + render)" -cargo build --manifest-path "${ROOT}/Cargo.toml" \ - --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm --no-default-features --features full -cp "${ROOT}/target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm" \ - "${ARTIFACTS}/rust-full.wasm" -optimize_wasm "${ARTIFACTS}/rust-full.wasm" "${ARTIFACTS}/rust-full-opt.wasm" - -echo -echo "staged artifacts:" -ls -la "${ARTIFACTS}" +} + +build_rust() { + local label="$1" + shift + + echo ">> ${label}" + cargo build \ + --manifest-path "${ROOT}/Cargo.toml" \ + --profile wasm-release \ + --target wasm32-unknown-unknown \ + -p "${RUST_WASM_PKG}" \ + "$@" +} + +stage_rust_artifact() { + local raw_name="$1" + local opt_name="$2" + + copy_if_exists "${RUST_WASM_OUT}" "${ARTIFACTS_DIR}/${raw_name}" + optimize_wasm "${ARTIFACTS_DIR}/${raw_name}" "${ARTIFACTS_DIR}/${opt_name}" +} + +main() { + require_cmd cargo + require_cmd zig + require_cmd wasm-opt + + rm -f "${STAGED_ARTIFACTS[@]/#/${ARTIFACTS_DIR}/}" + + build_zig_minimal + build_zig_full + + build_rust "rust-minimal" --no-default-features + stage_rust_artifact "rust-minimal.wasm" "rust-minimal-opt.wasm" + + build_rust "rust-full" --no-default-features --features full + stage_rust_artifact "rust-full.wasm" "rust-full-opt.wasm" + + echo + echo "staged artifacts:" + ls -la "${ARTIFACTS_DIR}" +} + +main "$@" diff --git a/zig/README.md b/zig/README.md index a49d651..7a1149e 100644 --- a/zig/README.md +++ b/zig/README.md @@ -1,151 +1,200 @@ # md-docrs-zig -Zig 0.16 port of the spec-parsing / URL-building portion of `md-docrs-proxy`, compiled two ways: +Minimal Zig implementation of docs.rs rustdoc JSON URL resolution. -- **WASM** (`wasm32-freestanding`, `ReleaseSmall`) — runs on Cloudflare Workers via `src/index.ts`. Layout and memory protocol mirror [zigflare](https://github.com/mattzcarey/zigflare). -- **Native CLI** — same core `resolve.resolveUrl`, wrapped with argv handling in `lib/cli.zig`. Useful for local iteration and for A/B testing against the Rust binary. +This subtree does three things: -Scope is intentionally narrow so the WASM artifact is directly comparable to a same-scope Rust WASM build: no HTTP, no zstd, no rustdoc-JSON parsing, no Markdown renderer — those stay in the root Rust crate. +- parses `crate[@version][::path::to::item]` +- builds the matching docs.rs rustdoc JSON URL +- exposes that logic as: + - a native CLI + - a small WASM module + - a Cloudflare Worker wrapper + +It does not fetch rustdoc JSON, decode zstd, or render Markdown. Those live on the Rust side. + +## Scope + +`zig/` is the minimal comparison target for the Rust WASM build. + +It owns: + +- spec parsing +- docs.rs URL construction +- `resolve_url` WASM export +- native Zig CLI +- Worker host wrapper + +It does not own: + +- HTTP fetching +- caching +- zstd decoding +- rustdoc JSON parsing +- Markdown rendering +- the main native server ## Layout -``` -zig/ -├── lib/ # Zig sources (build runs here) -│ ├── build.zig -│ ├── build.zig.zon -│ ├── spec.zig # pure: crate[@version][::path] grammar -│ ├── url.zig # pure: docs.rs URL builder -│ ├── resolve.zig # pure: spec + url glue, native tests -│ ├── wasm.zig # WASM entry: alloc / free / resolve_url -│ └── cli.zig # native CLI entry -├── src/ # Cloudflare Worker (TypeScript) -│ ├── index.ts -│ ├── md_docrs.wasm.d.ts -│ └── md_docrs.wasm # produced by `npm run build:wasm` -├── package.json -├── tsconfig.json -└── wrangler.jsonc -``` +- `lib/build.zig` — Zig build definitions +- `lib/cli.zig` — native CLI +- `lib/resolve.zig` — shared resolver logic +- `lib/spec.zig` — spec parser +- `lib/url.zig` — docs.rs URL builder +- `lib/wasm.zig` — minimal WASM ABI +- `src/index.ts` — Cloudflare Worker wrapper +- `src/md_docrs.wasm` — staged WASM artifact used by the Worker ## Build -Everything runs from `zig/lib/`. `zig build` produces only the WASM artifact -by default — the CLI and tests are explicit steps so `npm run build:wasm` -stays focused. +From `zig/`: -```sh -cd zig/lib +```/dev/null/zig-build-npm.sh#L1-2 +npm install +npm run build:wasm +``` -# WASM (default step). -zig build -# -> zig-out/bin/md-docrs.wasm +From `zig/lib/`: -# Native CLI. +```/dev/null/zig-build-lib.sh#L1-3 +zig build zig build cli -# -> zig-out/bin/md-docrs-zig - -# Unit tests (spec / url / resolve). zig build test ``` +From the repo root: + +```/dev/null/zig-build-root.sh#L1-1 +zig build test --build-file zig/lib/build.zig +``` + ## Native CLI -`md-docrs-zig` wraps the same `resolve.resolveUrl` that the WASM build -exports, so it's the fastest way to sanity-check a spec without spinning -up the Worker. +Build: -```sh +```/dev/null/zig-cli-build.sh#L1-2 cd zig/lib zig build cli +``` -# Run directly. -./zig-out/bin/md-docrs-zig serde -# https://docs.rs/crate/serde/latest/json/57.zst +Run: +```/dev/null/zig-cli-run.sh#L1-4 +./zig-out/bin/md-docrs-zig serde ./zig-out/bin/md-docrs-zig 'tokio@1.52.1::sync::Mutex' -# https://docs.rs/crate/tokio/1.52.1/json/57.zst - ./zig-out/bin/md-docrs-zig 'anyhow::Error' --target x86_64-unknown-linux-gnu -# https://docs.rs/crate/anyhow/latest/x86_64-unknown-linux-gnu/json/57.zst +zig build run -- 'tokio@1.52.1::sync::Mutex' --target x86_64-unknown-linux-gnu +``` -./zig-out/bin/md-docrs-zig --help +Usage: -# Or run through the build system (rebuilds if needed, forwards args after --). -zig build run -- 'tokio@1.52.1::sync::Mutex' --target x86_64-unknown-linux-gnu +```/dev/null/zig-cli-usage.txt#L1-1 +md-docrs-zig [--target TRIPLE] +``` + +Spec grammar: + +```/dev/null/spec.txt#L1-1 +crate[@version][::path::to::item] ``` -Exit codes: +Behavior: -| Code | Meaning | -| --- | --- | -| 0 | URL printed to stdout. | -| 2 | Bad spec, missing `--target` value, or unknown argument (usage on stderr). | +- prints the resolved docs.rs rustdoc JSON URL to stdout +- exits `0` on success +- exits `2` for invalid input, missing `--target` value, or unexpected arguments + +Examples of output: + +```/dev/null/zig-cli-output.txt#L1-3 +https://docs.rs/crate/serde/latest/json/57.zst +https://docs.rs/crate/tokio/1.52.1/json/57.zst +https://docs.rs/crate/anyhow/latest/x86_64-unknown-linux-gnu/json/57.zst +``` ## Worker -```sh +The Worker is a thin host around the Zig WASM module. + +Setup and run: + +```/dev/null/zig-worker-dev.sh#L1-4 cd zig npm install -npm run build:wasm # builds lib/ and copies the wasm into src/ -npm run dev # wrangler dev on localhost -npm run deploy # wrangler deploy +npm run build:wasm +npm run dev ``` -Endpoints: +Deploy: -```sh -curl localhost:8787/serde # latest +```/dev/null/zig-worker-deploy.sh#L1-1 +npm run deploy +``` + +Accepted request forms: + +```/dev/null/zig-worker-routes.txt#L1-4 +GET / +GET /?target= +GET /?spec= +GET /?spec=&target= +``` + +Examples: + +```/dev/null/zig-worker-curl.sh#L1-4 +curl localhost:8787/serde curl localhost:8787/tokio@1.52.1::sync::Mutex curl 'localhost:8787/tokio::sync::Mutex?target=x86_64-unknown-linux-gnu' curl 'localhost:8787/?spec=anyhow::Error' ``` -All three print the fully resolved `https://docs.rs/crate//[/]/json/57.zst` URL. +Responses: + +- success: plain text docs.rs URL plus trailing newline +- failure: `400` with plain text error +- empty spec: `400` with a short usage message ## WASM ABI -Exported from `lib/wasm.zig`: +The module exports a small C-style ABI: | Export | Signature | Notes | | --- | --- | --- | -| `alloc` | `(len: u32) -> *u8` | Backed by `std.heap.wasm_allocator`. Returns 0 on OOM. | -| `free` | `(ptr: *u8, len: u32)` | Caller must pass the exact length passed to `alloc`. | -| `resolve_url` | `(spec_ptr, spec_len, target_ptr, target_len, out_ptr, out_cap) -> u32` | Returns bytes written, or 0 on bad spec / out-of-space. `target_len == 0` means "no target override". | +| `alloc` | `(len: u32) -> *u8` | Allocates linear memory. Returns `0` on failure. | +| `free` | `(ptr: *u8, len: u32)` | Frees memory allocated by `alloc`. | +| `resolve_url` | `(spec_ptr, spec_len, target_ptr, target_len, out_ptr, out_cap) -> u32` | Writes the resolved URL into caller-provided memory. Returns bytes written, or `0` on error. | -Memory protocol notes in the zigflare [`doc/memory.md`](https://github.com/mattzcarey/zigflare/blob/main/doc/memory.md) apply verbatim: always recreate `Uint8Array` views *after* each `alloc`, since WASM memory growth detaches existing views. +Contract: -## Comparing with Rust WASM +- `target_len == 0` means no explicit target +- caller owns input and output buffers +- output buffer must be large enough for the full URL +- return value `0` means invalid spec or insufficient output capacity -The Rust equivalent lives at [`../rust-wasm/`](../rust-wasm/README.md). It exports -the same `alloc` / `free` / `resolve_url` symbols with byte-for-byte identical -signatures, so the Worker at `src/index.ts` can swap between the two by changing -a single import path. +The Worker currently uses a fixed output buffer of `512` bytes. -```sh -# Minimal parity build — matches this Zig wasm surface 1:1. -cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm --no-default-features -cp ../target/wasm32-unknown-unknown/wasm-release/md_docrs_wasm.wasm \ - src/md_docrs.wasm # drop-in replacement for the Zig artifact +## Relationship to Rust -# Full pipeline build — also exports `render_markdown` (JSON → Markdown). -cargo build --profile wasm-release --target wasm32-unknown-unknown \ - -p md-docrs-wasm -``` +This Zig module matches the minimal ABI surface of `crates/md-docrs-rust-wasm`: + +- same exported function names +- same memory ownership model +- same `resolve_url` contract -What we're comparing: +That lets the comparison harness swap Rust and Zig artifacts with the same host-side calling convention. -- `.wasm` size (Zig `ReleaseSmall` + `strip` vs. Rust `opt-level=z` + fat LTO + `strip`). -- Instantiation + per-call latency in a Worker. -- Cold-start cost (wrangler measures this). +Use the repo-level comparison flow from the repository root: + +```/dev/null/wasm-compare.sh#L1-2 +./wasm/build.sh +cargo run -p md-docrs-wasm-compare -- --offline +``` -For a host-neutral comparison that doesn't involve wrangler, use the -[`wasm/`](../wasm/README.md) harness at the repo root. It builds both -modules, runs the exact same specs through each inside embedded wasmtime -(optionally wasmer), and reports byte size, output parity, and median / p95 -per-call latency in a single table. +## Notes -Porting `render_markdown` to Zig is the interesting follow-up — that's -where serde_json / rustdoc-types vs. `std.json` + hand-written types -becomes a real apples-to-apples test. +- current format version is `57` +- default docs.rs base is `https://docs.rs` +- default `zig build` produces the WASM artifact +- `zig build cli` builds the native CLI separately +- this subtree is intentionally narrow so size and latency comparisons stay meaningful \ No newline at end of file