diff --git a/Cargo.lock b/Cargo.lock index b37acbc..b5dc3c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1788,7 +1788,7 @@ dependencies = [ [[package]] name = "spackle" -version = "0.5.0-rc3" +version = "0.5.0-rc4" dependencies = [ "async-process", "async-stream", @@ -1812,7 +1812,7 @@ dependencies = [ [[package]] name = "spackle-cli" -version = "0.5.0-rc3" +version = "0.5.0-rc4" dependencies = [ "anyhow", "atty", @@ -1829,7 +1829,7 @@ dependencies = [ [[package]] name = "spackle-wasm" -version = "0.5.0-rc3" +version = "0.5.0-rc4" dependencies = [ "console_error_panic_hook", "getrandom 0.2.17", diff --git a/Cargo.toml b/Cargo.toml index 3aae88f..e4571ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spackle" -version = "0.5.0-rc3" +version = "0.5.0-rc4" edition = "2021" repository = "https://github.com/a2-ai/spackle" description = "A frictionless project templating tool." diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 8d95c8b..d96fb44 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spackle-cli" -version = "0.5.0-rc3" +version = "0.5.0-rc4" edition = "2021" repository = "https://github.com/a2-ai/spackle" diff --git a/crates/spackle-wasm/Cargo.toml b/crates/spackle-wasm/Cargo.toml index 1e03520..dd2cc57 100644 --- a/crates/spackle-wasm/Cargo.toml +++ b/crates/spackle-wasm/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spackle-wasm" -version = "0.5.0-rc3" +version = "0.5.0-rc4" edition = "2021" repository = "https://github.com/a2-ai/spackle" description = "wasm-bindgen surface for spackle. Bundle-in / bundle-out pure-function exports." diff --git a/crates/spackle-wasm/src/callback_fs.rs b/crates/spackle-wasm/src/callback_fs.rs new file mode 100644 index 0000000..7b504af --- /dev/null +++ b/crates/spackle-wasm/src/callback_fs.rs @@ -0,0 +1,527 @@ +//! `CallbackFs` — wasm-only `FileSystem` impl that emits each output +//! entry through a host-supplied callback as it's produced, instead of +//! buffering output in an in-memory map. +//! +//! Used by the streaming `generate` export to remove the peak ≈ 1× +//! output memory hump that the eager `MemoryFs::drain_subtree` path +//! incurs. The host hands us a JS function and a project bundle; we run +//! `Project::generate` against this fs; every `write_file` / +//! `create_dir_all` under `out_root` becomes an event delivered to the +//! callback synchronously, with the bytes dropped immediately. +//! +//! Source bundle reads still go through an inner `MemoryFs` (input-side +//! eager read is the documented remaining ceiling — out of scope here). +//! +//! Internal split: +//! - source paths (anywhere outside `out_root`): delegated to the +//! inner `MemoryFs` (`read_file`, `list_dir`, `stat`, `exists`). +//! - output paths (under `out_root`): `write_file` → file event, +//! `create_dir_all` → dir event(s) (root-to-leaf, deduped). Reads +//! of output paths return `NotFound` — output is write-only. +//! +//! `exists(out_root)` returns `false` so `Project::generate`'s +//! `AlreadyExists` guard at `src/lib.rs:160` lets generation proceed — +//! the host has already cleared/created the real output dir before +//! calling us. +//! +//! Errors from the JS callback are latched in `callback_error`; once +//! latched, subsequent writes short-circuit with `io::Error` so the +//! template phase (which collects per-file errors at +//! `src/template.rs:235-241` rather than aborting) can surface them +//! without re-entering JS. The wasm export checks the latch after +//! `Project::generate` returns and prefers the latched JS error over +//! the synthesized `GenerateError`. +//! +//! The fs is parameterized over an `EntrySink` trait so cargo tests can +//! drive the streaming logic with a `Vec`-backed sink (no wasm runtime). + +use std::cell::RefCell; +use std::collections::HashSet; +use std::io; +use std::path::{Path, PathBuf}; + +use serde::Serialize; +use spackle::fs::{FileEntry, FileStat, FileSystem}; +use wasm_bindgen::{JsCast, JsValue}; + +use crate::memory_fs::MemoryFs; + +/// One streamed entry passed to the sink. Borrowed — the sink is +/// expected to serialize/forward and not retain. +pub enum StreamEntry<'a> { + File { path: &'a str, bytes: &'a [u8] }, + Dir { path: &'a str }, +} + +/// Sink for streamed entries. Production wasm impl wraps a +/// `js_sys::Function`; tests substitute a `Vec`-backed sink. +pub trait EntrySink { + /// Called once per output entry. On error, return `Err(message)` — + /// `CallbackFs` latches the message and aborts further writes. + fn emit(&self, entry: StreamEntry<'_>) -> Result<(), String>; +} + +pub struct CallbackFs { + source: MemoryFs, + out_root: PathBuf, + sink: S, + emitted_dirs: RefCell>, + callback_error: RefCell>, +} + +impl CallbackFs { + pub fn new(source: MemoryFs, out_root: PathBuf, sink: S) -> Self { + Self { + source, + out_root, + sink, + emitted_dirs: RefCell::new(HashSet::new()), + callback_error: RefCell::new(None), + } + } + + /// If the JS callback threw at any point, this returns the latched + /// message. The wasm export consults this after `Project::generate` + /// returns and surfaces it as the response error. + pub fn take_callback_error(&self) -> Option { + self.callback_error.borrow_mut().take() + } + + fn is_output(&self, path: &Path) -> bool { + path == self.out_root || path.starts_with(&self.out_root) + } + + fn relative_to_out(&self, path: &Path) -> String { + let stripped = path.strip_prefix(&self.out_root).unwrap_or(path); + normalize_to_forward(stripped) + } + + fn emit(&self, entry: StreamEntry<'_>) -> io::Result<()> { + if self.callback_error.borrow().is_some() { + return Err(io::Error::new(io::ErrorKind::Other, "callback aborted")); + } + match self.sink.emit(entry) { + Ok(()) => Ok(()), + Err(msg) => { + *self.callback_error.borrow_mut() = Some(msg.clone()); + Err(io::Error::new(io::ErrorKind::Other, msg)) + } + } + } +} + +/// Normalize a path's components to a forward-slash-joined string. We +/// never touch a real OS fs in wasm, so paths are usually already `/`- +/// separated, but templated destination paths (`copy::copy:104`) could +/// in theory contain backslashes — guard against that leaking into the +/// emitted event. +fn normalize_to_forward(path: &Path) -> String { + let mut out = String::new(); + let mut first = true; + for c in path.components() { + let part = c.as_os_str().to_string_lossy(); + if part == "/" || part == "\\" { + continue; + } + if !first { + out.push('/'); + } + out.push_str(&part); + first = false; + } + out +} + +impl FileSystem for CallbackFs { + fn read_file(&self, path: &Path) -> io::Result> { + if self.is_output(path) { + return Err(io::Error::new( + io::ErrorKind::NotFound, + format!("output is write-only: {}", path.display()), + )); + } + self.source.read_file(path) + } + + fn write_file(&self, path: &Path, content: &[u8]) -> io::Result<()> { + if !self.is_output(path) { + return Err(io::Error::new( + io::ErrorKind::PermissionDenied, + format!("write outside out_root: {}", path.display()), + )); + } + let rel = self.relative_to_out(path); + self.emit(StreamEntry::File { + path: &rel, + bytes: content, + }) + } + + fn create_dir_all(&self, path: &Path) -> io::Result<()> { + if !self.is_output(path) { + return Ok(()); + } + if self.callback_error.borrow().is_some() { + return Err(io::Error::new(io::ErrorKind::Other, "callback aborted")); + } + + // Walk the ancestor chain leaf-to-root, then emit reverse for + // root-first parent-before-child ordering. `out_root` itself is + // skipped — host creates the real outDir up front. + let mut chain: Vec = Vec::new(); + let mut current = path; + loop { + if current == self.out_root.as_path() { + break; + } + if !self.is_output(current) { + break; + } + chain.push(current.to_path_buf()); + match current.parent() { + Some(parent) if parent != current => current = parent, + _ => break, + } + } + + for ancestor in chain.iter().rev() { + if self.emitted_dirs.borrow().contains(ancestor) { + continue; + } + let rel = self.relative_to_out(ancestor); + if rel.is_empty() { + continue; + } + self.emit(StreamEntry::Dir { path: &rel })?; + self.emitted_dirs.borrow_mut().insert(ancestor.clone()); + } + Ok(()) + } + + fn list_dir(&self, path: &Path) -> io::Result> { + if self.is_output(path) { + return Ok(Vec::new()); + } + self.source.list_dir(path) + } + + fn copy_file(&self, src: &Path, dst: &Path) -> io::Result<()> { + let bytes = self.source.read_file(src)?; + self.write_file(dst, &bytes) + } + + fn exists(&self, path: &Path) -> bool { + if self.is_output(path) { + if path == self.out_root.as_path() { + // Required: Project::generate checks + // `fs.exists(out_dir)` at `src/lib.rs:160` and aborts + // with AlreadyExists if true. Our streaming model + // expects the host to have created the real outDir + // separately, so the in-fs view of out_root is "not + // there yet" until something has been written under it. + return false; + } + return self.emitted_dirs.borrow().contains(path); + } + self.source.exists(path) + } + + fn stat(&self, path: &Path) -> io::Result { + if self.is_output(path) { + return Err(io::Error::new( + io::ErrorKind::NotFound, + format!("output is write-only: {}", path.display()), + )); + } + self.source.stat(path) + } +} + +// --- JS-backed sink for the wasm export --- + +#[derive(Serialize)] +#[serde(tag = "kind", rename_all = "lowercase")] +enum EmittedEntry<'a> { + File { + path: &'a str, + #[serde(with = "serde_bytes")] + bytes: &'a [u8], + }, + Dir { + path: &'a str, + }, +} + +/// `EntrySink` impl that serializes the entry and dispatches to a +/// host-provided `js_sys::Function`. Errors thrown by the callback +/// (anything that makes `call1` return `Err`) are stringified into the +/// returned message and surfaced through `CallbackFs::take_callback_error`. +pub struct JsCallbackSink { + function: js_sys::Function, +} + +impl JsCallbackSink { + pub fn new(function: js_sys::Function) -> Self { + Self { function } + } +} + +impl EntrySink for JsCallbackSink { + fn emit(&self, entry: StreamEntry<'_>) -> Result<(), String> { + let event = match entry { + StreamEntry::File { path, bytes } => EmittedEntry::File { path, bytes }, + StreamEntry::Dir { path } => EmittedEntry::Dir { path }, + }; + let value = event + .serialize(&serde_wasm_bindgen::Serializer::new()) + .map_err(|e| format!("serialize entry: {}", e))?; + self.function + .call1(&JsValue::NULL, &value) + .map(|_| ()) + .map_err(|e| stringify_jsvalue(&e)) + } +} + +fn stringify_jsvalue(v: &JsValue) -> String { + if let Some(s) = v.as_string() { + return s; + } + if let Some(err) = v.dyn_ref::() { + return String::from(err.to_string()); + } + format!("{:?}", v) +} + +#[cfg(all(test, not(target_arch = "wasm32")))] +mod tests { + use super::*; + use crate::memory_fs::BundleEntry; + use std::cell::RefCell; + use std::collections::HashMap; + + /// Vec-backed sink for cargo tests — captures every emitted entry + /// and (optionally) errors on the Nth call. + struct VecSink { + events: RefCell>, + fail_after: Option, + } + + #[derive(Debug, Clone, PartialEq, Eq)] + enum OwnedEvent { + File { path: String, bytes: Vec }, + Dir { path: String }, + } + + impl VecSink { + fn new() -> Self { + Self { + events: RefCell::new(Vec::new()), + fail_after: None, + } + } + + fn failing_after(n: usize) -> Self { + Self { + events: RefCell::new(Vec::new()), + fail_after: Some(n), + } + } + + fn into_events(self) -> Vec { + self.events.into_inner() + } + } + + impl EntrySink for VecSink { + fn emit(&self, entry: StreamEntry<'_>) -> Result<(), String> { + if let Some(n) = self.fail_after { + if self.events.borrow().len() >= n { + return Err(format!("simulated failure after {} events", n)); + } + } + self.events.borrow_mut().push(match entry { + StreamEntry::File { path, bytes } => OwnedEvent::File { + path: path.to_string(), + bytes: bytes.to_vec(), + }, + StreamEntry::Dir { path } => OwnedEvent::Dir { + path: path.to_string(), + }, + }); + Ok(()) + } + } + + fn make_fs(bundle: Vec, sink: VecSink) -> CallbackFs { + let source = MemoryFs::from_bundle(bundle); + CallbackFs::new(source, PathBuf::from("/output"), sink) + } + + #[test] + fn out_root_exists_returns_false_for_alreadyexists_guard() { + // Project::generate at src/lib.rs:160 calls fs.exists(out_dir) + // and aborts if true. Streaming model expects host to manage + // the real outDir; the in-fs view of out_root must report false + // until something has been written under it. + let fs = make_fs(vec![], VecSink::new()); + assert!(!fs.exists(Path::new("/output"))); + } + + #[test] + fn create_dir_all_on_out_root_emits_nothing() { + // copy::copy:51 calls fs.create_dir_all(dest) eagerly. The + // streaming sink should NOT receive an event for out_root + // itself — the host already created the real outDir. + let fs = make_fs(vec![], VecSink::new()); + fs.create_dir_all(Path::new("/output")).unwrap(); + assert!(fs.emitted_dirs.borrow().is_empty()); + } + + #[test] + fn nested_create_dir_all_emits_root_to_leaf_deduped() { + let sink = VecSink::new(); + let source = MemoryFs::from_bundle(vec![]); + let fs = CallbackFs::new(source, PathBuf::from("/output"), sink); + + fs.create_dir_all(Path::new("/output/a/b/c")).unwrap(); + fs.create_dir_all(Path::new("/output/a/b/d")).unwrap(); + + let events = fs.sink.into_events(); + let dir_paths: Vec<&str> = events + .iter() + .filter_map(|e| match e { + OwnedEvent::Dir { path } => Some(path.as_str()), + _ => None, + }) + .collect(); + assert_eq!(dir_paths, vec!["a", "a/b", "a/b/c", "a/b/d"]); + } + + #[test] + fn write_file_emits_file_event_with_relative_path() { + let sink = VecSink::new(); + let fs = make_fs(vec![], sink); + + fs.create_dir_all(Path::new("/output/sub")).unwrap(); + fs.write_file(Path::new("/output/sub/a.txt"), b"hello") + .unwrap(); + + let events = fs.sink.into_events(); + assert_eq!( + events, + vec![ + OwnedEvent::Dir { + path: "sub".to_string(), + }, + OwnedEvent::File { + path: "sub/a.txt".to_string(), + bytes: b"hello".to_vec(), + }, + ] + ); + } + + #[test] + fn callback_error_latches_and_short_circuits_subsequent_writes() { + // Sink throws on the 2nd emit. First call goes through, second + // latches; subsequent write_file/create_dir_all must error + // without re-entering the sink. + let source = MemoryFs::from_bundle(vec![]); + let fs = CallbackFs::new(source, PathBuf::from("/output"), VecSink::failing_after(1)); + + fs.create_dir_all(Path::new("/output/a")).unwrap(); + // Second emission attempt → sink fails → latched. + let err = fs + .write_file(Path::new("/output/a/x.txt"), b"x") + .unwrap_err(); + assert_eq!(err.kind(), io::ErrorKind::Other); + + // Subsequent writes short-circuit without reaching the sink. + let err2 = fs + .write_file(Path::new("/output/a/y.txt"), b"y") + .unwrap_err(); + assert_eq!(err2.kind(), io::ErrorKind::Other); + assert!(err2.to_string().contains("callback aborted")); + + // Latched message is recoverable. + assert!(fs + .take_callback_error() + .unwrap() + .contains("simulated failure")); + } + + #[test] + fn copy_file_reads_source_and_emits_file_event() { + let bundle = vec![BundleEntry { + path: "/project/a.txt".to_string(), + bytes: b"src-bytes".to_vec(), + }]; + let fs = make_fs(bundle, VecSink::new()); + + fs.create_dir_all(Path::new("/output")).unwrap(); + fs.copy_file(Path::new("/project/a.txt"), Path::new("/output/a.txt")) + .unwrap(); + + let events = fs.sink.into_events(); + assert_eq!( + events, + vec![OwnedEvent::File { + path: "a.txt".to_string(), + bytes: b"src-bytes".to_vec(), + }] + ); + } + + #[test] + fn end_to_end_streaming_generate_against_callback_fs() { + // Exercise the full Project::generate pipeline through + // CallbackFs. Mirrors `memory_fs::tests::end_to_end_generate_against_memory_fs` + // but asserts the streaming-event sequence rather than the + // drained bundle. + let project_toml = br#"name = "demo" +[[slots]] +key = "name" +type = "String" +"#; + let template = b"hello from {{ name }}\n"; + + let bundle = vec![ + BundleEntry { + path: "/project/spackle.toml".into(), + bytes: project_toml.to_vec(), + }, + BundleEntry { + path: "/project/{{name}}.txt.j2".into(), + bytes: template.to_vec(), + }, + ]; + let source = MemoryFs::from_bundle(bundle); + let fs = CallbackFs::new(source, PathBuf::from("/output"), VecSink::new()); + + let project_dir = PathBuf::from("/project"); + let out_dir = PathBuf::from("/output"); + // CallbackFs delegates source-path reads to the inner MemoryFs, + // so load_project can run through it directly. + let project = spackle::load_project(&fs, &project_dir).expect("load_project"); + + let data = HashMap::from([("name".to_string(), "world".to_string())]); + project + .generate(&fs, &project_dir, &out_dir, &data) + .expect("streaming generate succeeds"); + + assert!(fs.take_callback_error().is_none()); + let events = fs.sink.into_events(); + + // The rendered file lands under out_root with the templated + // name. We don't assert the exact event order here (that's + // copy/template implementation detail) — just that the + // expected file event is present with the rendered bytes. + let found = events.iter().any(|e| match e { + OwnedEvent::File { path, bytes } => { + path == "world.txt" && bytes == b"hello from world\n" + } + _ => false, + }); + assert!(found, "rendered file event missing: {:?}", events); + } +} diff --git a/crates/spackle-wasm/src/lib.rs b/crates/spackle-wasm/src/lib.rs index 198f655..60b0939 100644 --- a/crates/spackle-wasm/src/lib.rs +++ b/crates/spackle-wasm/src/lib.rs @@ -17,10 +17,13 @@ //! success: `{ "valid": true }` //! failure: `{ "valid": false, "errors": ["..."] }` //! -//! generate(project_bundle, project_dir, out_dir, slot_data_json) -> JsValue: -//! success: `{ ok: true, files: Array<{path, bytes: Uint8Array}>, dirs: string[] }` -//! where `path` is relative to `out_dir` -//! failure: `{ ok: false, error: "..." }` +//! generate(project_bundle, project_dir, out_dir, slot_data_json, on_entry) -> JsValue: +//! streams each output entry through `on_entry(event)` as it's +//! produced — `{ kind: "file", path, bytes }` or `{ kind: "dir", path }` +//! where `path` is relative to `out_dir`. Returns a terminal envelope: +//! success: `{ ok: true }` +//! failure: `{ ok: false, error: "..." }` (host callback throws are +//! latched and surfaced here; they win over downstream Rust errors). //! //! plan_hooks(project_bundle, project_dir, out_dir, data_json, hook_ran_json?) -> String (JSON): //! `Vec` — templated commands + should_run + skip_reason + template_errors. @@ -34,8 +37,10 @@ use std::path::{Path, PathBuf}; use serde::Serialize; use wasm_bindgen::prelude::*; +mod callback_fs; pub mod memory_fs; +use callback_fs::{CallbackFs, JsCallbackSink}; use memory_fs::{BundleEntry, MemoryFs}; #[wasm_bindgen(start)] @@ -61,12 +66,6 @@ struct ValidationErr { #[derive(Serialize)] struct GenerateOk { ok: bool, - files: Vec, - /// Directories under `out_dir` relative to it. Included separately - /// from `files` so empty dirs (created by the copy pass for - /// Directory entries that had no files pass the ignore filter) - /// survive the bundle round-trip — host must `mkdir -p` each. - dirs: Vec, } #[derive(Serialize)] @@ -168,9 +167,28 @@ pub fn validate_slot_data( } } -/// Generate a filled project. Runs the full generate pipeline (copy + -/// template fill) against an in-memory fs, returns the rendered subtree -/// as a flat bundle with paths relative to `out_dir`. +/// Generate a filled project, streaming each output entry through a +/// host-supplied callback as it's produced. +/// +/// `on_entry(event)` is called synchronously for every output file and +/// directory, where `event` is one of: +/// - `{ kind: "file", path: string, bytes: Uint8Array }` — `path` is +/// relative to `out_dir`. +/// - `{ kind: "dir", path: string }` — same path semantics. +/// +/// Order: directories arrive root-to-leaf and before any file under +/// them (parent-before-child), interleaved with file events as the +/// copy/template walks produce them. +/// +/// Return envelope: +/// success: `{ ok: true }` +/// failure: `{ ok: false, error: "..." }` +/// +/// If `on_entry` throws, the error is latched and short-circuits any +/// remaining Rust-side writes; the latched message is returned as the +/// envelope's `error`. The host callback wins over downstream +/// `GenerateError`s — those are typically just downstream effects of +/// the latched abort. /// /// Hooks are a separate step — mirror the native CLI's two-call shape /// (`project.generate(...)` then `project.run_hooks_stream(...)`). Call @@ -182,15 +200,18 @@ pub fn generate( project_dir: &str, out_dir: &str, slot_data_json: &str, + on_entry: js_sys::Function, ) -> JsValue { let entries = match decode_bundle(project_bundle) { Ok(e) => e, Err(msg) => return generate_err_value(msg), }; - let fs = MemoryFs::from_bundle(entries); let project_path = PathBuf::from(project_dir); let out_path = PathBuf::from(out_dir); + let source = MemoryFs::from_bundle(entries); + let fs = CallbackFs::new(source, out_path.clone(), JsCallbackSink::new(on_entry)); + let project = match spackle::load_project(&fs, &project_path) { Ok(p) => p, Err(e) => return generate_err_value(e.to_string()), @@ -205,18 +226,23 @@ pub fn generate( return generate_err_value(format!("slot data invalid: {}", e)); } - if let Err(e) = project.generate(&fs, &project_path, &out_path, &slot_data) { - return generate_err_value(e.to_string()); + let generate_result = project.generate(&fs, &project_path, &out_path, &slot_data); + + // Host callback errors latch in CallbackFs and turn subsequent + // writes into io::Error — those propagate up as + // `GenerateError::FileError`. The latched message is the original + // cause; surface it preferentially. + if let Some(msg) = fs.take_callback_error() { + return generate_err_value(msg); } - let (files, dirs) = fs.drain_subtree(&out_path); - GenerateOk { - ok: true, - files, - dirs, + if let Err(e) = generate_result { + return generate_err_value(e.to_string()); } - .serialize(&serializer()) - .unwrap_or(JsValue::NULL) + + GenerateOk { ok: true } + .serialize(&serializer()) + .unwrap_or(JsValue::NULL) } /// Evaluate a hook plan for the project. Pure — no subprocess spawning, @@ -258,7 +284,13 @@ pub fn plan_hooks( Ok(e) => e, Err(msg) => return plan_hooks_err(msg), }; - plan_hooks_from_entries(entries, project_dir, out_dir, data_json, hook_ran_json.as_deref()) + plan_hooks_from_entries( + entries, + project_dir, + out_dir, + data_json, + hook_ran_json.as_deref(), + ) } /// Pure-Rust implementation of `plan_hooks`. Split out so native tests @@ -302,8 +334,7 @@ fn plan_hooks_from_entries( // the items set (so dependent hooks' `needs` resolution still // finds them). Skipping iteration prevents the planner from // overwriting our hook_ran_ seed on its success branch. - let mut executed_keys: std::collections::HashSet = - std::collections::HashSet::new(); + let mut executed_keys: std::collections::HashSet = std::collections::HashSet::new(); if let Some(raw) = hook_ran_json { let hook_ran: HashMap = match serde_json::from_str(raw) { Ok(d) => d, @@ -332,7 +363,10 @@ fn plan_hooks_from_entries( .filter(|e| !executed_keys.contains(&e.key)) .collect(); - json_or_panic(&PlanHooksOk { ok: true, plan: &plan }) + json_or_panic(&PlanHooksOk { + ok: true, + plan: &plan, + }) } /// Planner with native `run_hooks_stream` ordering: is_enabled → @@ -499,9 +533,8 @@ mod plan_hooks_tests { use super::*; use serde_json::Value; - const FIXTURE_TOML: &[u8] = include_bytes!( - "../../../tests/fixtures/hooks_fixture/spackle.toml" - ); + const FIXTURE_TOML: &[u8] = + include_bytes!("../../../tests/fixtures/hooks_fixture/spackle.toml"); fn fixture_bundle() -> Vec { vec![BundleEntry { @@ -578,13 +611,7 @@ mod plan_hooks_tests { #[test] fn invalid_data_json_returns_err_shape() { - let raw = plan_hooks_from_entries( - fixture_bundle(), - "/project", - "/tmp/o", - "not json", - None, - ); + let raw = plan_hooks_from_entries(fixture_bundle(), "/project", "/tmp/o", "not json", None); let v: Value = serde_json::from_str(&raw).unwrap(); assert_eq!(v["ok"], false); assert!( @@ -643,11 +670,7 @@ default = true // remaining plan must still mark hook_c as satisfied — dropping // hook_a from the items set would wrongly demote hook_c to // unsatisfied_needs. - let plan = call_with_bundle( - needs_fixture_bundle(), - "{}", - Some(r#"{"hook_a": true}"#), - ); + let plan = call_with_bundle(needs_fixture_bundle(), "{}", Some(r#"{"hook_a": true}"#)); let hook_c = plan .as_array() .unwrap() @@ -661,7 +684,10 @@ default = true ); // hook_a was executed — should be stripped from the returned plan. assert!( - plan.as_array().unwrap().iter().all(|e| e["key"] != "hook_a"), + plan.as_array() + .unwrap() + .iter() + .all(|e| e["key"] != "hook_a"), "hook_a should not appear in the remaining plan: {}", plan ); diff --git a/docs/design/wasm.md b/docs/design/wasm.md index cc15337..7c96c4e 100644 --- a/docs/design/wasm.md +++ b/docs/design/wasm.md @@ -8,19 +8,26 @@ For the running implementation log, see [`SUMMARY.md`](SUMMARY.md). ## One-paragraph architecture -`crates/spackle-wasm/` is a `cdylib` crate that depends on `spackle` via path. It exposes four `#[wasm_bindgen]` functions — `check`, `validate_slot_data`, `generate`, `plan_hooks` — that take a **project bundle** (`Array<{path, bytes: Uint8Array}>`), hydrate an in-process `MemoryFs` from it, run the requested operation against that fs through the generic `spackle::fs::FileSystem` trait, and return a serialized result. `generate` additionally returns an output bundle; `plan_hooks` returns a resolved hook plan (templated commands + should-run + skip reasons) that the host executes. Rust never touches the host filesystem; the TS host (`ts/`) reads projects into bundles, writes output bundles back to disk, and spawns hook subprocesses on its side. Fundamentally: Rust is a pure compute step. +`crates/spackle-wasm/` is a `cdylib` crate that depends on `spackle` via path. It exposes four `#[wasm_bindgen]` functions — `check`, `validate_slot_data`, `generate`, `plan_hooks` — that take a **project bundle** (`Array<{path, bytes: Uint8Array}>`), hydrate an in-process `MemoryFs` from it, and run the requested operation against that fs through the generic `spackle::fs::FileSystem` trait. `check` / `validate_slot_data` / `plan_hooks` return their result as a serialized envelope. `generate` is **streaming-only**: it takes a host callback (`on_entry`) and emits each output file/dir through it as Rust produces them, returning just an `{ok}` envelope; the rendered tree never accumulates Rust-side. Rust never touches the host filesystem; the TS host (`ts/`) reads projects into bundles, picks a sink for the streamed output (memory buffer, async iterator, or sync disk write), and spawns hook subprocesses on its side. Fundamentally: Rust is a pure compute step. ``` ┌───────────────────────────────────────────────────────┐ │ TS host (ts/src/) │ │ │ │ DiskFs — readProject(dir) → Bundle │ +│ — prepareOutDir(dir) → string │ +│ — writeEntry(dir, event) ← streaming sink │ │ — writeOutput(dir, Bundle) │ │ MemoryFs — toBundle() / fromBundle(bundle) │ │ (plain TS classes; not passed to wasm) │ +│ │ +│ generate(projectDir, outDir, …, fs) — disk streaming │ +│ generateBundle(bundle, …) — buffer to {files, dirs} │ +│ generateStream(bundle, …) — async-iter ergonomics │ └─────────────────────┬─────────────────────────────────┘ - │ bundle in, bundle out - │ (Uint8Array across the boundary) + │ bundle in (eager input read); + │ output entries fed back through + │ on_entry callback while wasm runs ┌─────────────────────▼─────────────────────────────────┐ │ wasm-bindgen layer (crates/spackle-wasm/src/lib.rs) │ │ │ @@ -28,15 +35,18 @@ For the running implementation log, see [`SUMMARY.md`](SUMMARY.md). │ pub fn validate_slot_data(bundle, project_dir, │ │ slot_data_json) -> String │ │ pub fn generate(bundle, project_dir, out_dir, │ -│ slot_data_json) │ -│ -> JsValue {ok, files, dirs|error} │ +│ slot_data_json, │ +│ on_entry: js_sys::Function) │ +│ -> JsValue {ok | ok+error} │ │ pub fn plan_hooks(bundle, project_dir, out_dir, │ │ data_json, hook_ran_json?) │ │ -> String {ok, plan | error} │ └─────────────────────┬─────────────────────────────────┘ - │ MemoryFs impls FileSystem + │ CallbackFs + │ impls FileSystem (write_file / + │ create_dir_all → JS callback) ┌─────────────────────▼─────────────────────────────────┐ -│ spackle core (src/) │ +│ spackle core (src/) — UNCHANGED │ │ │ │ Project::{check, generate} │ │ template::fill │ @@ -54,12 +64,13 @@ Native CLI (`cli/`) threads `spackle::fs::StdFs` through the same core. The only ``` spackle/ -├── src/ # spackle core (rlib only — no wasm deps) -├── cli/ # spackle-cli (uses StdFs) +├── src/ # spackle core (rlib only — no wasm deps) +├── cli/ # spackle-cli (uses StdFs) ├── crates/ -│ └── spackle-wasm/ # cdylib, wasm-bindgen exports + MemoryFs -│ ├── src/lib.rs # three #[wasm_bindgen] exports + init -│ └── src/memory_fs.rs # MemoryFs impls spackle::fs::FileSystem +│ └── spackle-wasm/ # cdylib, wasm-bindgen exports + MemoryFs +│ ├── src/lib.rs # four #[wasm_bindgen] exports + init +│ ├── src/memory_fs.rs # MemoryFs impls spackle::fs::FileSystem +│ └── src/callback_fs.rs # CallbackFs — streaming sink for `generate` ├── scripts/ │ └── build-wasm.sh # cargo build (wasm32) → wasm-bindgen --target web → wasm-opt ├── ts/ # @a2-ai/spackle npm-shaped TS package @@ -76,12 +87,38 @@ spackle/ ## The bundle contract -A **bundle** is `Array<{path: string, bytes: Uint8Array}>`. Paths in an **input** bundle are absolute from the caller's virtual root (typical: `/project/spackle.toml`). Paths in the **output** bundle returned by `generate` are relative to `outDir`. +A **bundle** is `Array<{path: string, bytes: Uint8Array}>`. Paths in an **input** bundle (passed to all four exports) are absolute from the caller's virtual root (typical: `/project/spackle.toml`). -Rust deserializes bundles via `serde-wasm-bindgen` into `Vec` where `BundleEntry { path: String, bytes: Vec }` is annotated with `#[serde(with = "serde_bytes")]` so the default `Serializer::new()` emits `Uint8Array` on the return trip (and accepts it on the way in). +Rust deserializes input bundles via `serde-wasm-bindgen` into `Vec` where `BundleEntry { path: String, bytes: Vec }` is annotated with `#[serde(with = "serde_bytes")]` so the default `Serializer::new()` accepts `Uint8Array` on the way in (and emits it on the way out for the streamed entries — see below). The `MemoryFs` (in `crates/spackle-wasm/src/memory_fs.rs`) auto-creates ancestor dirs when hydrating from the bundle, so callers only need to send file entries — they don't have to enumerate directories explicitly. +## The generate streaming protocol + +`generate` does not return an output bundle. Instead, the host passes a `js_sys::Function` callback as the fifth argument; Rust invokes it synchronously per output entry while the wasm call runs: + +``` +{ kind: "file", path: , bytes: Uint8Array } +{ kind: "dir", path: } +``` + +Paths are relative to `out_dir`. Order is parent-before-child: `create_dir_all` events for ancestor directories arrive before any file underneath them, root-to-leaf, deduplicated across multiple file writes that share parents. Files within a directory arrive in whatever order `copy::copy` and `template::fill` produce them. + +The `CallbackFs` impl (`crates/spackle-wasm/src/callback_fs.rs`) is the bridge: + +- Source-bundle reads (`read_file`, `list_dir`, `stat`, `exists` on `/project/...` paths) delegate to an inner `MemoryFs` hydrated from the input bundle. +- `write_file(/output/, bytes)` and `create_dir_all(/output/)` are translated into `{kind, path, bytes?}` events fed to the JS callback. +- `exists(/output)` returns `false` so `Project::generate`'s `AlreadyExists` guard at `src/lib.rs:160` doesn't abort — the host is responsible for AlreadyExists semantics on the real disk before calling. +- Errors thrown by the JS callback are latched in a `RefCell>`; subsequent writes short-circuit so the template phase (which collects per-file errors at `src/template.rs:235-241` rather than aborting) can surface them without re-entering JS. The wasm export checks the latch after `Project::generate` returns and prefers the latched JS error over the synthesized `GenerateError`. + +`Project::generate` itself is unchanged — it still writes through the `FileSystem` trait. The streaming behavior is entirely in the wasm-side `CallbackFs`; native callers (CLI) keep using `StdFs` and see no difference. + +The TS package gives consumers three sinks atop this primitive: + +- `generateBundle(bundle, …)` — buffers events into `{ files: Bundle; dirs: string[] }`. Same shape as the legacy buffered API; preserved for in-memory consumers (preview, in-process inspection). **Does not reduce peak memory** — the buffer holds everything. +- `generateStream(bundle, …)` — async generator that yields each entry plus terminal `done` / `error` events. Useful for progress UIs. **Also does not reduce peak memory** because the wasm call is synchronous and the queue accumulates while Rust runs. +- `generate(projectDir, outDir, …, fs)` — synchronously routes each entry to `DiskFs.writeEntry` inside the callback. **This is the only path that bounds peak memory at one entry** — bytes never accumulate host-side. + --- ## Build + test locally @@ -133,3 +170,6 @@ Consumer-facing walkthrough: [`docs/ts/hooks.md`](docs/ts/hooks.md). - **`canonicalize` is gone from the lib.** `Project::get_name` and `get_output_name` use `.file_stem()` / `.file_name()` directly. `DiskFs` canonicalizes host-side for its containment check. - **`slugify` appears in `pkg/*/spackle_wasm.d.ts`.** Incidental export from tera's `slug` dep. Not part of our public contract; ignore. - **Tera builtins are fully on.** No `default-features = false` dance — the `slug` cfg collision that motivated it was resolved upstream. +- **`CallbackFs::exists(out_root)` returns `false`.** Required so `Project::generate`'s `AlreadyExists` guard at `src/lib.rs:160` lets generation proceed under streaming. The host owns AlreadyExists semantics for the real disk; the in-memory view of out_root is "not there yet" until something has been written under it. +- **Streaming aborts leave partial output on disk.** When the host callback throws (or returns an error), the wasm export latches it and surfaces a terminal `{ ok: false, error }` envelope, but any entries that already streamed to disk stay there. Matches native CLI behavior — there's no temp-dir + atomic-rename phase. Callers that need atomicity should pick a fresh `outDir` and move it themselves on success. +- **Input bundle is still eager.** `DiskFs.readProject` materializes the project before calling `wasm.generate`. Output is the streaming win; a lazy-input path is a separate, larger change deferred until profiles call for it. diff --git a/docs/ts/api.md b/docs/ts/api.md index 9008c8f..e4ba9dd 100644 --- a/docs/ts/api.md +++ b/docs/ts/api.md @@ -63,7 +63,7 @@ Rules enforced: every declared slot is present, types coerce (`"42"` → `Number ## `generate(projectDir, outDir, slotData, fs, opts?)` -Run the full pipeline: copy non-template files, render `.j2` files, render path placeholders, write everything under `outDir`. Hooks are a separate call — see `runHooksStream()` below. +Run the full pipeline: copy non-template files, render `.j2` files, render path placeholders, write everything under `outDir`. Each rendered entry is **streamed straight to disk** as Rust produces it — peak memory is bounded by one file, not by the whole rendered output. Hooks are a separate call — see `runHooksStream()` below. ```ts function generate( @@ -75,16 +75,22 @@ function generate( virtualProjectDir?: string; virtualOutDir?: string; }, -): Promise; +): Promise; -type GenerateResponse = - | { ok: true; files: Bundle; dirs: string[] } +type GenerateDiskResponse = + | { ok: true; files: number; dirs: number } | { ok: false; error: string }; ``` -`result.files` carries the rendered bundle with paths **relative to `outDir`**. `result.dirs` carries directory paths (also relative) — present so **empty directories survive the round-trip**. Native `spackle generate` calls `create_dir_all` for every directory walked during the copy pass; without emitting them, a project whose `drafts/` directory is fully ignored (every file filtered out) would still have `drafts/` created on native but silently dropped under wasm. Hosts writing output manually MUST mkdir each entry in `dirs` to match native behavior. +The success shape is **counts**, not a materialized bundle. The rendered tree lands directly under `outDir`; if you also need the bytes in memory (preview, in-process consumers), call `generateBundle` instead. + +Output-dir contract: `generate` (via `DiskFs.prepareOutDir`) throws if `outDir` already exists, matching native's `GenerateError::AlreadyExists`. The check runs **before** the wasm call, so a pre-existing target fails fast with no Rust-side work. + +Errors mid-stream are not rolled back: any files already written before the failure remain on disk (matches native CLI behavior). Pick a fresh `outDir` per run. -`DiskFs.writeOutput` (built-in) handles both `files` and `dirs` for you. Output-dir contract: `writeOutput` throws if `outDir` already exists, matching native's `GenerateError::AlreadyExists`. +The input read (`fs.readProject(projectDir)`) is still eager — the project bundle is materialized in memory before the stream starts. Templates are typically small on input and large on output, so the streaming win is on the output side; lazy input reads are deferred to a later change. + +> **Breaking change (vs. 0.5.0-rc3):** `generate` previously returned `{ ok: true; files: Bundle; dirs: string[] }` and host code consumed `result.files` / `result.dirs` to render UI. Migration: read the rendered tree from disk, or switch to `generateBundle` if you specifically need the bundle shape in memory. ## `planHooks(projectDir, outDir, data, fs, opts?)` @@ -220,6 +226,11 @@ function generateBundle( slotData: SlotData, opts?: { virtualProjectDir?: string; virtualOutDir?: string }, ): Promise; +function generateStream( + bundle: Bundle, + slotData: SlotData, + opts?: { virtualProjectDir?: string; virtualOutDir?: string }, +): AsyncGenerator; function planHooksBundle( projectBundle: Bundle, virtualProjectDir: string, @@ -227,9 +238,27 @@ function planHooksBundle( data: Record, hookRan?: Record, ): Promise; + +type GenerateResponse = + | { ok: true; files: Bundle; dirs: string[] } + | { ok: false; error: string }; + +type GenerateStreamEvent = + | { kind: "file"; path: string; bytes: Uint8Array } + | { kind: "dir"; path: string } + | { kind: "error"; error: string } + | { kind: "done" }; ``` -`generateBundle` has no streaming-hooks counterpart — bundle-only mode doesn't have a real `cwd` for subprocess execution. Use the disk-backed `runHooksStream()` above when you need hooks. +Three modes for in-memory consumers: + +- **`generateBundle`** — buffers every streamed entry into a `{ files, dirs }` bundle. Same shape and semantics as the pre-streaming API; suited to in-process preview where you want the whole rendered tree at once. **Does not reduce peak memory** — the buffer holds everything. Output is **deduped by path** (later writes replace earlier ones, matching disk-streaming's `writeFileSync` overwrite semantics — relevant when both `copy::copy` and `template::fill` produce the same output path) and **sorted by path** for deterministic ordering across runs. +- **`generateStream`** — yields each entry as an `AsyncGenerator` event with a terminal `done` / `error`. Use for progress UIs or anything driven off an async iterator. **Also does not reduce peak memory**: the wasm call is synchronous, so events accumulate in a queue while Rust runs and flush out after. The win here is API ergonomics, not memory. +- **`generate(projectDir, outDir, …)`** — the only path that genuinely bounds peak memory at one entry, because writes happen synchronously inside the host callback (see above). + +Output bundle paths are **relative to `virtualOutDir`** (default `/output`). `dirs` exists so empty directories survive the round-trip — native `spackle generate` calls `create_dir_all` for every directory walked during the copy pass, including ones whose contents are fully ignored. Without emitting dir entries, a project whose `drafts/` directory is fully ignored would still have `drafts/` created on native but silently dropped under wasm. + +`generateBundle` / `generateStream` have no streaming-hooks counterpart — bundle-only mode doesn't have a real `cwd` for subprocess execution. Use the disk-backed `runHooksStream()` above when you need hooks. Pair with `MemoryFs.toBundle()` / `MemoryFs.fromBundle()` to inspect results in-memory. @@ -241,18 +270,32 @@ Pair with `MemoryFs.toBundle()` / `MemoryFs.fromBundle()` to inspect results in- class DiskFs { constructor(opts: { workspaceRoot: string }); readProject(projectDir: string, opts?: { virtualRoot?: string }): Bundle; + assertOutDirAvailable(outDir: string): string; + prepareOutDir(outDir: string): string; + ensureOutDir(outDir: string): string; + writeEntry(outDir: string, entry: GenerateStreamEntry): void; writeOutput( outDir: string, input: Bundle | { files: Bundle; dirs?: string[] }, ): void; } + +type GenerateStreamEntry = + | { kind: "file"; path: string; bytes: Uint8Array } + | { kind: "dir"; path: string }; ``` -`writeOutput` accepts either a flat `Bundle` (files only — convenient for hand-rolled calls) or the `{ files, dirs }` shape returned by `generate`. Pass `{ files, dirs }` to preserve empty directories. Ancestor dirs for file writes are created automatically either way. +Three write APIs at three layers: + +- **`assertOutDirAvailable(outDir)`** — AlreadyExists + workspaceRoot containment check; returns the canonical path **without creating the directory**. Streaming `generate(...)` uses this so wasm validation failures (bad config, type-mismatched slot data, malformed bundle) leave no empty `outDir` on disk — `writeEntry`'s recursive parent-mkdir creates `outDir` lazily on the first event, matching native `Project::generate` which only creates the destination as part of `copy::copy`. +- **`prepareOutDir(outDir)`** — `assertOutDirAvailable` + eagerly creates the directory. Use this when you've already buffered the full output (`generateBundle` → `writeOutput`). +- **`ensureOutDir(outDir)`** — idempotent `mkdir -p` with workspaceRoot containment. Used by streaming `generate(...)` after a successful wasm call to handle the empty-project case (no events fire, but native still creates an empty `outDir`). +- **`writeEntry(outDir, entry)`** — sync sibling for the streaming-generate path: each `wasm.generate` callback synchronously routes a file or dir entry to disk. Re-validates `outDir` containment under `workspaceRoot` per call so external streaming consumers can't accidentally write outside the DiskFs root. Parent dirs for files are mkdir'd recursively (idempotent), which is also what creates `outDir` itself when `assertOutDirAvailable` was used in lieu of `prepareOutDir`. +- **`writeOutput(outDir, input)`** — convenience for callers that already buffered. Accepts either a flat `Bundle` (files only) or the `{ files, dirs }` shape returned by `generateBundle`. Internally calls `prepareOutDir` then loops `writeEntry`. -**`outDir` must not already exist.** `writeOutput` refuses a pre-existing `outDir` with an `already exists` error (parity with native `spackle generate`). Callers should pick a fresh path per run, or `rm -rf` the target before calling. +**`outDir` must not already exist.** Both `prepareOutDir` and `writeOutput` refuse a pre-existing `outDir` with an `already exists` error (parity with native `spackle generate`). Callers should pick a fresh path per run, or `rm -rf` the target before calling. -**Containment:** `workspaceRoot` is canonicalized once. Every path fed into `readProject` / `writeOutput` must resolve under it; anything else throws. Per-entry traversal is blocked using `path.resolve` + prefix comparison — rejects `../escape`, absolute overrides, and any OS-normalized escape. +**Containment:** `workspaceRoot` is canonicalized once. Every path fed into `readProject` / `prepareOutDir` / `writeEntry` / `writeOutput` must resolve under it; anything else throws. Per-entry traversal is blocked using `path.resolve` + prefix comparison — rejects `../escape`, absolute overrides, and any OS-normalized escape. ### `MemoryFs` @@ -273,6 +316,7 @@ Pure TS. No filesystem interaction. Useful for tests and preview flows. ## Known limitations - **UTF-8 paths only.** The bundle boundary doesn't round-trip non-UTF-8 filenames. -- **Whole-project marshalling.** Input and output bundles materialize in memory. Fine for typical templates (KB–MB); very large fixtures should wait on a streaming path. +- **Input bundle still materialized in memory.** `DiskFs.readProject` reads the whole project before generation begins. The streaming path bounds **output** memory at one entry, but a very large project tree still occupies its full size on the input side. A lazy-input change is deferred to a later PR; templates are typically small on input and large on output, so the output streaming wins back the dominant peak today. +- **`generateStream` does not reduce peak memory.** The wasm call is synchronous, so streamed entries accumulate in a queue and flush after the call returns. Use `generate(projectDir, outDir, ...)` for true streaming-to-disk; `generateStream` is for ergonomics only. - **Browser-side hooks require a custom `SpackleHooks`.** `defaultHooks()` throws in environments without Bun or Node. See [hooks.md](./hooks.md). - **`run_as_user` / polyjuice not exposed.** Native CLI can spawn hooks as another user; wasm path can't. Wrap it in a custom `SpackleHooks.execute` if needed. diff --git a/docs/ts/getting-started.md b/docs/ts/getting-started.md index 3a84daf..8b04aac 100644 --- a/docs/ts/getting-started.md +++ b/docs/ts/getting-started.md @@ -29,13 +29,13 @@ const result = await generate( ); if (result.ok) { - console.log(`Wrote ${result.files.length} files.`); + console.log(`Wrote ${result.files} file(s) and ${result.dirs} dir(s).`); } else { console.error(result.error); } ``` -`DiskFs` handles reading the project into a bundle, calling wasm, and writing the output bundle back to disk. The `workspaceRoot` option is a containment boundary — both `projectDir` and `outDir` must resolve under it, or `DiskFs` refuses the call. +`DiskFs` handles reading the project into a bundle, calling wasm, and **streaming each rendered entry to disk** as Rust produces it — peak memory is bounded by one entry, not by the whole rendered output. The success result returns counts, not a materialized bundle; if you need the rendered bytes in memory (preview, in-process consumers), call `generateBundle` instead. The `workspaceRoot` option is a containment boundary — both `projectDir` and `outDir` must resolve under it, or `DiskFs` refuses the call. ## What's a "project"? diff --git a/ts/package.json b/ts/package.json index a29b216..9d36d86 100644 --- a/ts/package.json +++ b/ts/package.json @@ -1,6 +1,6 @@ { "name": "@a2-ai/spackle", - "version": "0.5.0-rc3", + "version": "0.5.0-rc4", "description": "spackle project templating, as a WebAssembly module for JS hosts.", "repository": "https://github.com/a2-ai/spackle", "files": [ diff --git a/ts/scripts/demo.ts b/ts/scripts/demo.ts index f155481..9697563 100644 --- a/ts/scripts/demo.ts +++ b/ts/scripts/demo.ts @@ -107,7 +107,7 @@ for (const fixture of ["basic_project", "bad_template"]) { fs, ); if (result.ok) { - for (const f of result.files) console.log(` ${f.path} (${f.bytes.length} bytes)`); + console.log(` streamed ${result.files} file(s), ${result.dirs} dir(s) to disk`); } else { console.log(` FAILED: ${result.error}`); } diff --git a/ts/src/host/disk-fs.ts b/ts/src/host/disk-fs.ts index ddf801c..f2d739e 100644 --- a/ts/src/host/disk-fs.ts +++ b/ts/src/host/disk-fs.ts @@ -25,7 +25,7 @@ import { sep as pathSep, } from "node:path"; -import type { Bundle } from "../wasm/types.ts"; +import type { Bundle, GenerateStreamEntry } from "../wasm/types.ts"; /** Shape of a successful `generate` response — kept here (rather than * in wasm/types.ts) so DiskFs's signature doesn't import the full @@ -103,46 +103,115 @@ export class DiskFs { } /** - * Write a rendered output bundle to `outDir`. + * Verify `outDir` is contained under `workspaceRoot` and does not + * already exist; return its canonical absolute path. Does NOT + * create the directory. * - * Accepts either a flat `Bundle` (just files) or an object with - * `files` + `dirs` — when `dirs` is present, each listed directory - * is mkdir'd so empty dirs created by the Rust copy pass survive - * the round-trip (native spackle calls `create_dir_all` for every - * Directory entry during copy). + * Use this in streaming-generate flows where you want to fail fast + * on AlreadyExists / containment without leaving an empty `outDir` + * on disk if a downstream step (e.g., wasm validation) errors out + * before any entry has streamed. `writeEntry` recursively mkdirs the + * parent of each entry it writes, so `outDir` gets created lazily on + * the first write — matching native `Project::generate`, which only + * creates the destination as part of `copy::copy`. * * Contract (matches native `GenerateError::AlreadyExists`): * `outDir` must NOT already exist on disk. If it does, throws — * same as `spackle generate` on native. + */ + assertOutDirAvailable(outDir: string): string { + const absOut = this.containDiskForCreate(outDir); + if (existsSync(absOut)) { + throw new Error(`assertOutDirAvailable: output directory already exists: ${absOut}`); + } + return absOut; + } + + /** + * `assertOutDirAvailable` + create the directory. Use this when + * you've already buffered the full output (e.g., `generateBundle` → + * `writeOutput`) — eager creation matches the buffered model. + * Streaming callers should use `assertOutDirAvailable` and let + * `writeEntry` create the directory lazily. + */ + prepareOutDir(outDir: string): string { + const absOut = this.assertOutDirAvailable(outDir); + mkdirSync(absOut, { recursive: true }); + return absOut; + } + + /** + * Idempotent `mkdir -p` for `outDir`, with workspaceRoot + * containment. Unlike `prepareOutDir`, this does NOT throw if the + * directory already exists — used by streaming generate to preserve + * native parity for empty projects, where no streamed events fire + * and `writeEntry`'s parent-mkdir never runs. + */ + ensureOutDir(outDir: string): string { + const absOut = this.containDiskForCreate(outDir); + mkdirSync(absOut, { recursive: true }); + return absOut; + } + + /** + * Write a single streamed entry to disk under `outDir`. + * + * Sync sibling of `writeOutput` for the streaming-generate path: + * `wasm.generate(...)` invokes a host callback per file/dir entry, + * and that callback ends up here, dropping bytes to disk before the + * next event arrives. Peak memory is bounded by the size of one + * entry, not by the rendered output. + * + * Re-validates that `outDir` is under `workspaceRoot` on every call + * (via `containDiskForCreate`) so external streaming consumers can't + * accidentally write outside the DiskFs root by passing an + * arbitrary path. After the first write, `outDir` exists and the + * canonicalization hits the existing-path branch — single + * `realpathSync`, microseconds per call. + * + * Containment for the entry's relative path uses `containedJoin` so + * traversal escapes (`../`, absolute paths, platform-specific + * separators) are rejected before any write. Parent dirs are + * mkdir'd recursively (idempotent) — that's also what creates + * `outDir` itself on the first write when `assertOutDirAvailable` + * was used in lieu of `prepareOutDir`. + */ + writeEntry(outDir: string, entry: GenerateStreamEntry): void { + const absOut = this.containDiskForCreate(outDir); + const absPath = this.containedJoin(absOut, entry.path); + if (entry.kind === "dir") { + mkdirSync(absPath, { recursive: true }); + return; + } + mkdirSync(pathDirname(absPath), { recursive: true }); + writeFileSync(absPath, entry.bytes); + } + + /** + * Write a rendered output bundle to `outDir` (buffered shape). * - * Containment: `outDir` must resolve under `workspaceRoot` (we walk - * up to the nearest existing ancestor and canonicalize that). - * Per-entry traversal guard uses `path.resolve` to normalize - * platform-specific separators; `../x.txt`, `..\x.txt`, and any - * normalized escape are all rejected. + * Accepts either a flat `Bundle` (just files) or an object with + * `files` + `dirs` — when `dirs` is present, each listed directory + * is mkdir'd so empty dirs created by the Rust copy pass survive + * the round-trip. + * + * Same contract as `prepareOutDir` + a loop of `writeEntry` calls; + * use this when you already have the full bundle in memory (e.g., + * from `generateBundle`). Streaming callers should drive + * `writeEntry` directly off the wasm callback to avoid the + * intermediate buffer. */ writeOutput(outDir: string, input: Bundle | WriteOutputInput): void { const { files, dirs } = Array.isArray(input) ? { files: input, dirs: undefined as string[] | undefined } : input; - const absOut = this.containDiskForCreate(outDir); - if (existsSync(absOut)) { - throw new Error(`writeOutput: output directory already exists: ${absOut}`); - } - mkdirSync(absOut, { recursive: true }); - - // Create empty dirs first so the final tree matches native - // generation even when a directory has no files under it. + const absOut = this.prepareOutDir(outDir); for (const rel of dirs ?? []) { - const absDir = this.containedJoin(absOut, rel); - mkdirSync(absDir, { recursive: true }); + this.writeEntry(absOut, { kind: "dir", path: rel }); } - for (const entry of files) { - const absFile = this.containedJoin(absOut, entry.path); - mkdirSync(pathDirname(absFile), { recursive: true }); - writeFileSync(absFile, entry.bytes); + this.writeEntry(absOut, { kind: "file", path: entry.path, bytes: entry.bytes }); } } @@ -156,7 +225,7 @@ export class DiskFs { private containedJoin(absBase: string, rel: string): string { const resolved = pathResolve(absBase, rel); if (resolved !== absBase && !resolved.startsWith(absBase + pathSep)) { - throw new Error(`writeOutput: entry path escapes outDir: ${rel}`); + throw new Error(`entry path escapes outDir: ${rel}`); } return resolved; } diff --git a/ts/src/spackle.ts b/ts/src/spackle.ts index 6232701..8b5d4ce 100644 --- a/ts/src/spackle.ts +++ b/ts/src/spackle.ts @@ -20,7 +20,10 @@ import { loadSpackleWasm } from "./wasm/index.ts"; import type { Bundle, CheckResponse, + GenerateDiskResponse, GenerateResponse, + GenerateStreamEntry, + GenerateStreamEvent, PlanHooksResponse, SlotData, ValidationResponse, @@ -99,9 +102,25 @@ export async function validateSlotDataBundle( } /** - * Generate a filled project from disk → bundle → (wasm) → bundle → disk. - * DiskFs handles both the read (projectDir → bundle) and write (output - * bundle → outDir) legs. + * Generate a filled project, streaming each rendered file/dir to disk + * as Rust produces it. + * + * Reads the project bundle from `projectDir` (eagerly — the input read + * is the documented remaining ceiling), then drives the wasm streaming + * generate with a callback that synchronously writes each entry under + * `outDir`. Peak memory is bounded by one entry, not by the rendered + * output. + * + * Returns counts (`files`, `dirs`) on success — not a materialized + * bundle. Callers that want the rendered output in memory should call + * `generateBundle` instead. + * + * Failure semantics match native `Project::generate`: validation + * failures (bad config, slot data type mismatch, malformed bundle) + * fail BEFORE `outDir` is created on disk. Mid-stream failures (a + * template tera error after the first write, or a host-callback + * throw) leave whatever was already written — there's no rollback, + * matching native CLI behavior. Pick a fresh `outDir` per run. * * Hooks are a separate step — iterate `runHooksStream()` after * `generate()` if the project defines any. Mirrors the native CLI's @@ -114,22 +133,60 @@ export async function generate( slotData: SlotData, fs: DiskFs, opts: GenerateOptions = {}, -): Promise { +): Promise { const virtualProject = opts.virtualProjectDir ?? DEFAULT_VIRTUAL_PROJECT; const virtualOut = opts.virtualOutDir ?? DEFAULT_VIRTUAL_OUTPUT; + const wasm = await loadSpackleWasm(); const bundle = fs.readProject(projectDir, { virtualRoot: virtualProject }); - const result = await generateBundle(bundle, slotData, { - virtualProjectDir: virtualProject, - virtualOutDir: virtualOut, + + // Containment + AlreadyExists check WITHOUT creating outDir — defer + // creation until the first streamed entry. That way wasm-side + // validation failures (bad slot data, bad config, bad bundle) leave + // no empty outDir on disk, matching native `Project::generate` which + // only creates the destination as part of `copy::copy`. + const absOut = fs.assertOutDirAvailable(outDir); + + let files = 0; + let dirs = 0; + const result = wasm.generate(bundle, virtualProject, virtualOut, slotData, (event) => { + fs.writeEntry(absOut, event); + if (event.kind === "file") files++; + else dirs++; }); - if (result.ok) { - fs.writeOutput(outDir, { files: result.files, dirs: result.dirs }); + if (!result.ok) { + return { ok: false, error: result.error }; } - return result; + + // Empty-project parity: native `copy::copy` unconditionally calls + // `create_dir_all(dest)` so a project with zero files still produces + // an empty outDir. Streaming generate skips the out_root event, so + // ensure outDir on success — mkdir recursive is idempotent so this + // is a no-op when writeEntry already created it. + fs.ensureOutDir(absOut); + + return { ok: true, files, dirs }; } -/** Bundle-to-bundle variant of `generate` — for MemoryFs / preview - * flows that never touch disk. */ +/** + * Bundle-to-bundle variant of `generate` — buffers the streamed entries + * into a `Bundle` and returns the legacy `{ ok, files, dirs }` shape. + * For preview / in-process consumers that want the rendered tree in + * memory; use `generateStream` for async-iter ergonomics or `generate` + * for true-streaming disk writes. + * + * Dedupes and sorts to preserve the **final-tree** semantics the old + * buffered path had (which drained an in-memory map): when both + * `copy::copy` and `template::fill` write to the same output path + * (e.g., a project with `foo` and `foo.j2` both rendering to `foo`), + * the second write wins — matching what lands on disk under streaming + * `generate(...)` because the second `writeFileSync` overwrites the + * first. Output is sorted by path so consumers can rely on stable + * order regardless of HashMap iteration in the underlying walk. + * + * NOTE: this internally buffers — peak memory is the same as the + * pre-streaming API. Streaming benefits the disk path; in-memory + * consumers always pay full output size. + */ export async function generateBundle( projectBundle: Bundle, slotData: SlotData, @@ -138,7 +195,66 @@ export async function generateBundle( const virtualProject = opts.virtualProjectDir ?? DEFAULT_VIRTUAL_PROJECT; const virtualOut = opts.virtualOutDir ?? DEFAULT_VIRTUAL_OUTPUT; const wasm = await loadSpackleWasm(); - return wasm.generate(projectBundle, virtualProject, virtualOut, slotData); + + // Dedupe via Map: later writes replace earlier + // ones, mirroring disk-write last-wins semantics. Set for + // dirs is just dedup; create_dir_all events fire repeatedly for the + // same ancestor as different files traverse it. + const fileMap = new Map(); + const dirSet = new Set(); + const result = wasm.generate(projectBundle, virtualProject, virtualOut, slotData, (event) => { + if (event.kind === "file") { + fileMap.set(event.path, event); + } else { + dirSet.add(event.path); + } + }); + if (!result.ok) { + return { ok: false, error: result.error }; + } + + const files: Bundle = Array.from(fileMap.values(), (e) => ({ + path: e.path, + bytes: e.bytes, + })).toSorted((a, b) => (a.path < b.path ? -1 : a.path > b.path ? 1 : 0)); + const dirs = Array.from(dirSet).toSorted(); + return { ok: true, files, dirs }; +} + +/** + * Streaming variant — yields each rendered entry as an async generator + * event, with terminal `done` / `error` events. Mirrors the + * `runHooksStream` shape. + * + * MEMORY NOTE: this does NOT reduce peak memory. The wasm call is + * synchronous, so the host callback runs to completion before this + * generator can `yield` — entries pile up in an internal queue, then + * stream out. The value of this API is ergonomics (preview, progress + * UI), not memory. For true-streaming disk writes that bound peak + * memory at one entry, call `generate(projectDir, outDir, ...)`. + */ +export async function* generateStream( + projectBundle: Bundle, + slotData: SlotData, + opts: GenerateOptions = {}, +): AsyncGenerator { + const virtualProject = opts.virtualProjectDir ?? DEFAULT_VIRTUAL_PROJECT; + const virtualOut = opts.virtualOutDir ?? DEFAULT_VIRTUAL_OUTPUT; + const wasm = await loadSpackleWasm(); + + const queue: GenerateStreamEntry[] = []; + const result = wasm.generate(projectBundle, virtualProject, virtualOut, slotData, (event) => + queue.push(event), + ); + + for (const event of queue) { + yield event; + } + if (result.ok) { + yield { kind: "done" }; + } else { + yield { kind: "error", error: result.error }; + } } /** @@ -261,7 +377,12 @@ export type { Bundle, BundleEntry, CheckResponse, + GenerateDiskResponse, GenerateResponse, + GenerateStreamDirEvent, + GenerateStreamEntry, + GenerateStreamEvent, + GenerateStreamFileEvent, Hook, HookPlanEntry, PlanHooksResponse, diff --git a/ts/src/wasm/browser.ts b/ts/src/wasm/browser.ts index 58eb917..41ced51 100644 --- a/ts/src/wasm/browser.ts +++ b/ts/src/wasm/browser.ts @@ -32,7 +32,12 @@ export type { Bundle, BundleEntry, CheckResponse, + GenerateDiskResponse, GenerateResponse, + GenerateStreamDirEvent, + GenerateStreamEntry, + GenerateStreamEvent, + GenerateStreamFileEvent, Hook, HookPlanEntry, PlanHooksResponse, diff --git a/ts/src/wasm/index.ts b/ts/src/wasm/index.ts index 6a014a7..4d82127 100644 --- a/ts/src/wasm/index.ts +++ b/ts/src/wasm/index.ts @@ -38,7 +38,12 @@ export type { Bundle, BundleEntry, CheckResponse, + GenerateDiskResponse, GenerateResponse, + GenerateStreamDirEvent, + GenerateStreamEntry, + GenerateStreamEvent, + GenerateStreamFileEvent, Hook, HookPlanEntry, PlanHooksResponse, diff --git a/ts/src/wasm/runtime.ts b/ts/src/wasm/runtime.ts index df61e91..8b2ce31 100644 --- a/ts/src/wasm/runtime.ts +++ b/ts/src/wasm/runtime.ts @@ -5,7 +5,8 @@ import type { Bundle, CheckResponse, - GenerateResponse, + GenerateResult, + GenerateStreamEntry, PlanHooksResponse, SlotData, ValidationResponse, @@ -29,11 +30,16 @@ export interface RawWasmExports { initWasm: InitWasm; check(projectBundle: unknown, projectDir: string): string; validateSlotData(projectBundle: unknown, projectDir: string, slotDataJson: string): string; + /** Streams output entries through `onEntry` synchronously while the + * wasm call runs; returns a terminal envelope. The callback receives + * raw `{kind, path, bytes?}` objects from serde-wasm-bindgen — the + * typed wrapper below narrows this for callers. */ generate( projectBundle: unknown, projectDir: string, outDir: string, slotDataJson: string, + onEntry: (event: unknown) => void, ): unknown; planHooks( projectBundle: unknown, @@ -53,12 +59,18 @@ export interface SpackleWasm { projectDir: string, slotData: SlotData, ): ValidationResponse; + /** Run generate, streaming each output file/dir through `onEntry` as + * it's produced. Returns once Rust has finished walking the project. + * Bytes are dropped after each callback returns — peak memory is + * bounded by what the host buffers in `onEntry`, not by the size of + * the rendered output. */ generate( projectBundle: Bundle, projectDir: string, outDir: string, slotData: SlotData, - ): GenerateResponse; + onEntry: (event: GenerateStreamEntry) => void, + ): GenerateResult; planHooks( projectBundle: Bundle, projectDir: string, @@ -123,15 +135,20 @@ async function initialize( raw.validateSlotData(projectBundle, projectDir, JSON.stringify(slotData)), ) as ValidationResponse; }, - generate(projectBundle, projectDir, outDir, slotData) { - // generate returns a JsValue (object), not a JSON string. + generate(projectBundle, projectDir, outDir, slotData, onEntry) { + // generate returns a JsValue (object), not a JSON string. The + // callback receives serde-wasm-bindgen-shaped {kind, path, bytes?} + // objects — assert the narrowed entry shape here so consumers + // see the typed union. // oxlint-disable-next-line typescript-eslint/no-unsafe-type-assertion return raw.generate( projectBundle, projectDir, outDir, JSON.stringify(slotData), - ) as GenerateResponse; + // oxlint-disable-next-line typescript-eslint/no-unsafe-type-assertion + (event: unknown) => onEntry(event as GenerateStreamEntry), + ) as GenerateResult; }, planHooks(projectBundle, projectDir, outDir, data, hookRan) { // oxlint-disable-next-line typescript-eslint/no-unsafe-type-assertion diff --git a/ts/src/wasm/types.ts b/ts/src/wasm/types.ts index e659ee8..b7d9a07 100644 --- a/ts/src/wasm/types.ts +++ b/ts/src/wasm/types.ts @@ -59,21 +59,50 @@ export type CheckResponse = /** Response from `validateSlotData()`. */ export type ValidationResponse = { valid: true } | { valid: false; errors: string[] }; -/** Response from `generate()`. - * - * `files` carries the rendered output subtree with paths **relative to - * outDir**. `dirs` carries the directory subtree (also relative) so - * empty dirs survive the bundle round-trip — the native `copy` pass - * `create_dir_all`s every directory entry it walks, and we match that - * behavior host-side by mkdir'ing each dir even if no files live under - * it. - * - * Hooks are a separate step — call `planHooks` / `runHooksStream` - * after `generate` (mirrors the native CLI's two-call shape). */ +/** Single output entry streamed from wasm during generate. Paths are + * **relative to outDir**. Files carry their bytes; dirs are markers so + * empty dirs (created by the Rust copy pass for directory entries that + * had no files pass the ignore filter) survive the round-trip. */ +export type GenerateStreamFileEvent = { + kind: "file"; + path: string; + bytes: Uint8Array; +}; +export type GenerateStreamDirEvent = { kind: "dir"; path: string }; +export type GenerateStreamEntry = GenerateStreamFileEvent | GenerateStreamDirEvent; + +/** Public event union surfaced by the `generateStream` async generator. + * Adds terminal `done` / `error` events to the streamed entries. */ +export type GenerateStreamEvent = + | GenerateStreamFileEvent + | GenerateStreamDirEvent + | { kind: "error"; error: string } + | { kind: "done" }; + +/** Terminal envelope returned from a single `wasm.generate(...)` call. + * Streamed file/dir entries arrive separately through the host callback — + * this is just the success/error signal. Host-callback throws are + * latched wasm-side and surfaced here with the original message. */ +export type GenerateResult = { ok: true } | { ok: false; error: string }; + +/** Response from the buffered `generateBundle()` wrapper. Same shape + * as the legacy bundle-output API: a flat list of files plus the + * directory subtree (so empty dirs survive). Hosts that want to keep + * the rendered output in memory (preview, in-process consumers) call + * `generateBundle`. Hosts that want to write to disk call the + * `generate(projectDir, outDir, ...)` wrapper, which uses + * `DiskFs.writeEntry` per event and never materializes a `Bundle`. */ export type GenerateResponse = | { ok: true; files: Bundle; dirs: string[] } | { ok: false; error: string }; +/** Response from the disk-streaming `generate()` wrapper. Returns + * counts instead of a materialized bundle — that's the whole point of + * streaming: bytes never accumulate host-side. */ +export type GenerateDiskResponse = + | { ok: true; files: number; dirs: number } + | { ok: false; error: string }; + /** One entry in a hook plan. Snake_case fields mirror Rust's * `HookPlanEntry` (`#[derive(Serialize)]` default casing). */ export interface HookPlanEntry { diff --git a/ts/tests/disk-fs.test.ts b/ts/tests/disk-fs.test.ts index 09a8f20..d6d5dc0 100644 --- a/ts/tests/disk-fs.test.ts +++ b/ts/tests/disk-fs.test.ts @@ -160,4 +160,93 @@ describe("DiskFs", () => { const fs = new DiskFs({ workspaceRoot: root }); expect(() => fs.writeOutput("/etc/spackle-out", [])).toThrow(); }); + + test("prepareOutDir creates the outDir and refuses pre-existing", async () => { + const fs = new DiskFs({ workspaceRoot: root }); + const outDir = join(root, "out"); + const abs = fs.prepareOutDir(outDir); + expect(existsSync(outDir)).toBe(true); + expect(abs).toBe(outDir); + + expect(() => fs.prepareOutDir(outDir)).toThrow(/already exists/); + }); + + test("writeEntry writes file and dir entries under outDir", async () => { + const fs = new DiskFs({ workspaceRoot: root }); + const outDir = join(root, "out"); + fs.prepareOutDir(outDir); + + fs.writeEntry(outDir, { kind: "dir", path: "sub" }); + fs.writeEntry(outDir, { + kind: "file", + path: "sub/a.txt", + bytes: new TextEncoder().encode("A"), + }); + expect(existsSync(join(outDir, "sub"))).toBe(true); + expect(await readFile(join(outDir, "sub", "a.txt"), "utf8")).toBe("A"); + }); + + test("writeEntry creates missing parent dirs on file events defensively", async () => { + // Streaming events arrive parent-before-child so the parent dir + // typically already exists, but a templated path can introduce a + // dir that wasn't its own dir event. writeEntry mkdir's the parent + // recursively to be safe. + const fs = new DiskFs({ workspaceRoot: root }); + const outDir = join(root, "out"); + fs.prepareOutDir(outDir); + + fs.writeEntry(outDir, { + kind: "file", + path: "deep/nested/x.txt", + bytes: new TextEncoder().encode("x"), + }); + expect(await readFile(join(outDir, "deep", "nested", "x.txt"), "utf8")).toBe("x"); + }); + + test("writeEntry rejects entry paths that escape outDir", () => { + const fs = new DiskFs({ workspaceRoot: root }); + const outDir = join(root, "out"); + fs.prepareOutDir(outDir); + expect(() => + fs.writeEntry(outDir, { + kind: "file", + path: "../escape.txt", + bytes: new Uint8Array(), + }), + ).toThrow(/escapes outDir/); + expect(() => fs.writeEntry(outDir, { kind: "dir", path: "../escape" })).toThrow( + /escapes outDir/, + ); + }); + + test("writeEntry rejects an outDir outside workspaceRoot", () => { + // Public API safety: a custom streaming consumer that calls + // writeEntry directly (bypassing prepareOutDir / generate()) must + // not be able to write outside the DiskFs's workspaceRoot. Older + // implementations only enforced entry-path containment relative to + // outDir; this regression test pins the per-call workspaceRoot + // check too. + const fs = new DiskFs({ workspaceRoot: root }); + expect(() => + fs.writeEntry("/etc/spackle-pwn", { + kind: "file", + path: "a.txt", + bytes: new TextEncoder().encode("x"), + }), + ).toThrow(/escapes workspaceRoot/); + expect(() => fs.writeEntry("/etc/spackle-pwn", { kind: "dir", path: "sub" })).toThrow( + /escapes workspaceRoot/, + ); + }); + + test("assertOutDirAvailable returns canonical path without creating outDir", async () => { + const fs = new DiskFs({ workspaceRoot: root }); + const outDir = join(root, "deferred"); + const abs = fs.assertOutDirAvailable(outDir); + expect(abs).toBe(outDir); + expect(existsSync(outDir)).toBe(false); + // AlreadyExists check still fires even though it doesn't create. + await mkdir(outDir, { recursive: true }); + expect(() => fs.assertOutDirAvailable(outDir)).toThrow(/already exists/); + }); }); diff --git a/ts/tests/spackle.test.ts b/ts/tests/spackle.test.ts index 75238fc..8e81146 100644 --- a/ts/tests/spackle.test.ts +++ b/ts/tests/spackle.test.ts @@ -8,6 +8,7 @@ import { tmpdir } from "node:os"; import { join, resolve } from "node:path"; import { + type Bundle, DiskFs, MemoryFs, check, @@ -15,6 +16,8 @@ import { configureSpackleWasm, generate, generateBundle, + generateStream, + loadSpackleWasm, validateSlotData, } from "../src/spackle.ts"; @@ -111,8 +114,9 @@ describe("spackle (DiskFs)", () => { ); expect(res.ok).toBe(true); if (res.ok) { - const paths = res.files.map((f) => f.path).toSorted(); - expect(paths).toContain("README.md"); + // Streaming generate returns counts, not a materialized bundle — + // verify the rendered tree is actually on disk. + expect(res.files).toBeGreaterThan(0); const readme = await readFile(join(ws.outDir, "README.md"), "utf8"); expect(readme).toContain("HI, world!"); @@ -186,3 +190,231 @@ describe("spackle (bundle-only / MemoryFs)", () => { } }); }); + +describe("spackle (streaming generate)", () => { + async function basicBundle() { + return bundleFromDisk( + ["spackle.toml", "README.md.j2", "docs/static.md", "src/{{ filename }}.txt.j2"], + join(FIXTURES, "basic_project"), + "/project", + ); + } + + test("generateStream yields entries before a terminal done event", async () => { + const bundle = await basicBundle(); + + const events: Array<{ kind: string; path?: string }> = []; + let bytesSeen = 0; + for await (const e of generateStream(bundle, { + greeting: "hi", + target: "stream", + filename: "notes", + })) { + if (e.kind === "file") { + events.push({ kind: e.kind, path: e.path }); + bytesSeen += e.bytes.length; + } else if (e.kind === "dir") { + events.push({ kind: e.kind, path: e.path }); + } else { + events.push({ kind: e.kind }); + } + } + // Must terminate with `done`, not `error`. + expect(events[events.length - 1]).toEqual({ kind: "done" }); + + // README rendered through the stream — bytes flowed. + const readme = events.find((e) => e.kind === "file" && e.path === "README.md"); + expect(readme).toBeDefined(); + expect(bytesSeen).toBeGreaterThan(0); + }); + + test("generateStream emits parent dirs before any child file", async () => { + const bundle = await basicBundle(); + + let firstDocFile = -1; + let docDirIdx = -1; + let i = 0; + for await (const e of generateStream(bundle, { + greeting: "hi", + target: "stream", + filename: "notes", + })) { + if (e.kind === "dir" && e.path === "docs") docDirIdx = i; + if (e.kind === "file" && e.path.startsWith("docs/") && firstDocFile === -1) firstDocFile = i; + i++; + } + expect(docDirIdx).toBeGreaterThanOrEqual(0); + expect(firstDocFile).toBeGreaterThanOrEqual(0); + expect(docDirIdx).toBeLessThan(firstDocFile); + }); + + test("disk-streaming generate aborts when a host write throws (no rollback)", async () => { + // The disk-streaming `generate(...fs)` writes synchronously inside + // the wasm callback. If a write throws (e.g., disk full), the + // CallbackFs latches the error and Rust short-circuits the rest of + // the pipeline — the wasm export returns ok:false and any files + // already written stay on disk (no rollback). This test simulates + // a failure by wrapping DiskFs.writeEntry to throw on the 2nd call. + const ws = await workspace("basic_project"); + try { + const fs = new DiskFs({ workspaceRoot: ws.root }); + const realWriteEntry = fs.writeEntry.bind(fs); + let callCount = 0; + // oxlint-disable-next-line typescript-eslint/no-unsafe-type-assertion + (fs as unknown as { writeEntry: typeof fs.writeEntry }).writeEntry = (outDir, entry) => { + callCount++; + if (callCount === 2) { + throw new Error("simulated disk failure"); + } + realWriteEntry(outDir, entry); + }; + + const res = await generate( + ws.projectDir, + ws.outDir, + { greeting: "hi", target: "x", filename: "notes" }, + fs, + ); + expect(res.ok).toBe(false); + if (!res.ok) { + expect(res.error).toMatch(/simulated disk failure/); + } + } finally { + await rm(ws.root, { recursive: true, force: true }); + } + }); + + test("generate does not create outDir on slot validation failure", async () => { + // Native parity: Project::generate validates config + slot data + // BEFORE copy::copy creates the destination. Our streaming wrapper + // must defer outDir creation until the first event so wasm-side + // validation failures don't leave an empty directory on disk. + const ws = await workspace("typed_slots"); + try { + const fs = new DiskFs({ workspaceRoot: ws.root }); + const res = await generate( + ws.projectDir, + ws.outDir, + // count is declared as Number — passing a non-numeric string + // is a slot validation failure, surfaced before any walk + // happens in Rust. + { name: "demo", count: "not-a-number", enabled: "true" }, + fs, + ); + expect(res.ok).toBe(false); + // existsSync is imported from node:fs at top of test file. + const { existsSync } = await import("node:fs"); + expect(existsSync(ws.outDir)).toBe(false); + } finally { + await rm(ws.root, { recursive: true, force: true }); + } + }); + + test("generate creates outDir on success even for empty projects", async () => { + // Native `copy::copy` unconditionally calls create_dir_all(dest), + // so even an empty project produces an empty outDir. Wasm streams + // skip the out_root event, so the disk wrapper must mkdir on + // success to preserve parity. + const root = await realpath(await mkdtemp(join(tmpdir(), "spackle-empty-"))); + try { + const projectDir = join(root, "project"); + await import("node:fs/promises").then((m) => m.mkdir(projectDir, { recursive: true })); + // Minimal valid spackle.toml — no slots, no files to render. + await import("node:fs/promises").then((m) => + m.writeFile(join(projectDir, "spackle.toml"), 'name = "empty"\n'), + ); + const outDir = join(root, "output"); + const fs = new DiskFs({ workspaceRoot: root }); + const res = await generate(projectDir, outDir, {}, fs); + expect(res.ok).toBe(true); + const { existsSync } = await import("node:fs"); + expect(existsSync(outDir)).toBe(true); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + test("generateBundle dedupes overlapping copy + template paths (template wins)", async () => { + // Project::generate runs copy::copy first then template::fill. + // When both write to the same output path (e.g., a project with + // `foo` and `foo.j2` rendering to `foo`), the streaming events + // include two file entries for `foo`. The buffered generateBundle + // wrapper must collapse them — last-write wins, matching + // disk-streaming's writeFileSync overwrite semantics. + const bundle: Bundle = [ + { + path: "/project/spackle.toml", + bytes: new TextEncoder().encode( + 'name = "overlap"\n[[slots]]\nkey = "x"\ntype = "String"\n', + ), + }, + { + path: "/project/foo", + bytes: new TextEncoder().encode("static-foo"), + }, + { + path: "/project/foo.j2", + bytes: new TextEncoder().encode("rendered-{{ x }}"), + }, + ]; + const res = await generateBundle(bundle, { x: "value" }); + expect(res.ok).toBe(true); + if (res.ok) { + const fooEntries = res.files.filter((f) => f.path === "foo"); + expect(fooEntries.length).toBe(1); + // template's render runs second in core, so it wins. + expect(new TextDecoder().decode(fooEntries[0].bytes)).toBe("rendered-value"); + } + }); + + test("generateBundle returns files and dirs sorted by path", async () => { + // Streaming order depends on HashMap iteration in MemoryFs's + // list_dir, which Rust does not guarantee to be stable. The old + // drain_subtree path explicitly sorted; the buffered wrapper must + // do the same so snapshots / downstream consumers see deterministic + // output. + const bundle = await bundleFromDisk( + ["spackle.toml", "README.md.j2", "docs/static.md", "src/{{ filename }}.txt.j2"], + join(FIXTURES, "basic_project"), + "/project", + ); + const res = await generateBundle(bundle, { + greeting: "hi", + target: "sort", + filename: "notes", + }); + expect(res.ok).toBe(true); + if (res.ok) { + const filePaths = res.files.map((f) => f.path); + const sortedFiles = filePaths.toSorted(); + expect(filePaths).toEqual(sortedFiles); + + const sortedDirs = res.dirs.toSorted(); + expect(res.dirs).toEqual(sortedDirs); + } + }); + + test("wasm.generate surfaces a thrown host callback as the response error", async () => { + // Direct wasm-level test: a callback that throws should latch + // wasm-side and come back as { ok: false, error }. Subsequent + // entries do not trigger the callback. + const bundle = await basicBundle(); + const wasm = await loadSpackleWasm(); + let calls = 0; + const result = wasm.generate( + bundle, + "/project", + "/output", + { greeting: "hi", target: "x", filename: "notes" }, + () => { + calls++; + throw new Error("host boom"); + }, + ); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error).toMatch(/host boom/); + } + expect(calls).toBe(1); + }); +});