From c100786d4f49fc01745eb9e8e0e0bb969888612f Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 8 May 2026 09:31:44 +0000 Subject: [PATCH 1/6] feat(coreutils-port): implement replace_with rewriter (towards #1534) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the syn-based import rewriter for the manifest's `replace_with` action so modules referencing uucore-internal types can be vendored with substitutions, unblocking #1534's printf migration. Behaviour: - For each top-level `use` item, flatten the tree into leaf paths (preserving glob/rename semantics). - For each leaf, look up a `replace_with` substitution whose prefix matches; replace the matched prefix segments with `target`. - When the rewritten path's final segment differs from the original imported name, an `as ` rename is inserted so call-site references still resolve. - Rebuild use items individually — use groups (`use a::{b, c};`) become separate `use` lines (the file is re-pretty-printed via `prettyplease::unparse` whenever any `replace_with` is in scope). - Existing `error` semantics unchanged; `inline` still bails with a follow-up message (separate from this change). - Validation: `replace_with` requires a non-empty `target`. Tests cover: prefix-match rewrite with rename insertion, no-rename when leaf matches, use-group flattening, alias preservation, and missing-target validation. Spec and module-doc updated. This is a focused incremental step toward #1534; the format/ vendor + printf migration land in follow-up PRs. --- crates/bashkit-coreutils-port/src/manifest.rs | 12 +- crates/bashkit-coreutils-port/src/module.rs | 380 +++++++++++++++++- specs/coreutils-args-port.md | 15 +- 3 files changed, 377 insertions(+), 30 deletions(-) diff --git a/crates/bashkit-coreutils-port/src/manifest.rs b/crates/bashkit-coreutils-port/src/manifest.rs index 6c195546c..096e98855 100644 --- a/crates/bashkit-coreutils-port/src/manifest.rs +++ b/crates/bashkit-coreutils-port/src/manifest.rs @@ -37,11 +37,13 @@ //! Action support, current implementation: //! //! - `error` — fully implemented (port aborts when matched). -//! - `inline`, `replace_with` — accepted in the schema, but require -//! the future `syn`-based import rewriter; the tool errors at -//! runtime if a module's substitution declares them. Manifest-side -//! declarations stay forward-compatible: when the rewriter lands, -//! the same manifest works without further changes. +//! - `replace_with` — fully implemented. The matched prefix in every +//! `use` path is rewritten to `target`. When the rewritten path's +//! final segment differs from the original, an `as ` rename is +//! inserted so call sites compile unchanged. Use groups are +//! flattened into individual `use` items as a side effect. +//! - `inline` — accepted in the schema, awaits a follow-up. The +//! manifest declarations stay forward-compatible. use serde::Deserialize; diff --git a/crates/bashkit-coreutils-port/src/module.rs b/crates/bashkit-coreutils-port/src/module.rs index 5ea81e07e..d0b193a00 100644 --- a/crates/bashkit-coreutils-port/src/module.rs +++ b/crates/bashkit-coreutils-port/src/module.rs @@ -1,12 +1,10 @@ -//! Module mode — vendor a uucore module verbatim into bashkit. +//! Module mode — vendor a uucore module into bashkit. //! //! Algorithm: //! 1. Load the manifest and look up the requested module entry. //! 2. Walk every `.rs` file under the entry's `source` (single file //! or directory, depth-recursive). -//! 3. For each file, parse with syn purely to inspect top-level -//! `use` items — flatten use trees into individual paths, then -//! enforce the policy: +//! 3. For each file, parse with syn and walk top-level `use` items: //! - `use fluent::*;` (or any `fluent::...`) → hard error: the //! module is not safely vendorable without code changes. //! - `use uucore::translate;` / `translate::*` → same hard error @@ -15,18 +13,20 @@ //! manifest substitution prefix. Unmatched paths abort the port //! so unexpected internal references surface explicitly. //! - matched `error` actions abort with a policy-rejection message. -//! - matched `inline`/`replace_with` actions abort with a -//! "rewriter-not-implemented" message — the manifest schema -//! already accepts them, but actual rewriting awaits the future -//! `syn`-based rewriter (#1534's first consumer will drive that). -//! 4. Write the file body verbatim with a banner header prepended. -//! No body rewriting; this is the verbatim-copy path documented -//! in `specs/coreutils-args-port.md` § Module mode. +//! - matched `replace_with` actions are rewritten in place (see +//! [`apply_replace_with`]). +//! - matched `inline` actions still abort — inline vendoring awaits +//! a follow-up; manifest stanzas stay forward-compatible. +//! 4. If any `replace_with` substitutions are in scope, the rewritten +//! AST is emitted via `prettyplease::unparse` (use groups become +//! individual `use` items as a side effect). Otherwise the source +//! is written verbatim. A banner is prepended either way. use std::path::{Path, PathBuf}; use anyhow::{Context, Result, anyhow, bail}; -use syn::{Item, UseTree}; +use proc_macro2::Span; +use syn::{Ident, Item, ItemUse, UseTree}; use crate::manifest::{Action, Manifest, Module, Substitution}; @@ -113,12 +113,19 @@ fn port_dir( fn port_file(src: &Path, out: &Path, module: &Module, rev: &str, rel_path: &str) -> Result<()> { let text = std::fs::read_to_string(src).with_context(|| format!("read source {}", src.display()))?; - let parsed = + let mut parsed = syn::parse_file(&text).with_context(|| format!("parse {} as rust", src.display()))?; enforce_use_policy(&parsed, module, rel_path)?; + let body_text = if has_replace_with(module) { + apply_replace_with(&mut parsed, module)?; + prettyplease::unparse(&parsed) + } else { + text + }; + let banner = banner(rev, &module.name, rel_path); - let body = format!("{banner}{text}"); + let body = format!("{banner}{body_text}"); if let Some(parent) = out.parent() { std::fs::create_dir_all(parent) .with_context(|| format!("create parent dir {}", parent.display()))?; @@ -127,6 +134,13 @@ fn port_file(src: &Path, out: &Path, module: &Module, rev: &str, rel_path: &str) Ok(()) } +fn has_replace_with(module: &Module) -> bool { + module + .substitutions + .iter() + .any(|s| s.action == Action::ReplaceWith) +} + fn banner(rev: &str, module_name: &str, rel_path: &str) -> String { format!( "// GENERATED by bashkit-coreutils-port. DO NOT EDIT.\n\ @@ -191,8 +205,16 @@ fn enforce_use_policy(file: &syn::File, module: &Module, rel_path: &str) -> Resu rel_path, s.prefix ), - Action::Inline | Action::ReplaceWith => bail!( - "import '{}' in {} requires action '{}' (manifest prefix '{}'), but the syn-based import rewriter is not yet implemented in bashkit-coreutils-port — file a follow-up to land it (verbatim-copy mode is the only path supported today, see specs/coreutils-args-port.md § Module mode)", + Action::ReplaceWith => { + if s.target.is_none() { + bail!( + "manifest substitution prefix '{}' has action 'replace_with' but no 'target' field", + s.prefix + ); + } + } + Action::Inline => bail!( + "import '{}' in {} requires action '{}' (manifest prefix '{}'), but inline vendoring is not yet implemented (replace_with is supported; inline awaits a follow-up)", path.join("::"), rel_path, s.action.as_str(), @@ -204,6 +226,215 @@ fn enforce_use_policy(file: &syn::File, module: &Module, rel_path: &str) -> Resu Ok(()) } +/// Apply `replace_with` substitutions across all top-level `use` items. +/// +/// Strategy: flatten each use tree into its leaf paths (with optional +/// renames), apply matching substitutions, then re-emit one `use` item +/// per leaf. Use groups (`use a::{b, c}`) are flattened — semantically +/// equivalent, but easier to rewrite without losing the formatting that +/// was going to be re-pretty-printed anyway. +/// +/// Substitution rule: when a leaf's path starts with `s.prefix`, the +/// matched prefix is replaced with `s.target`. If the rewritten path's +/// final segment differs from the original final segment, an `as` +/// rename preserves call-site references (e.g. `use crate::error::Error +/// as UError;`). +fn apply_replace_with(file: &mut syn::File, module: &Module) -> Result<()> { + let mut new_items: Vec = Vec::with_capacity(file.items.len()); + for item in file.items.drain(..) { + match item { + Item::Use(u) => { + let mut leaves: Vec = Vec::new(); + collect_leaves(&u.tree, &mut Vec::new(), &mut leaves); + if leaves.is_empty() { + new_items.push(Item::Use(u)); + continue; + } + for leaf in leaves { + let rewritten = rewrite_leaf(leaf, &module.substitutions)?; + new_items.push(Item::Use(build_item_use(&u, rewritten))); + } + } + other => new_items.push(other), + } + } + file.items = new_items; + Ok(()) +} + +#[derive(Clone, Debug)] +struct UseLeaf { + /// Path segments excluding the final identifier (which becomes the + /// imported name or the source for a glob). + path: Vec, + /// Final segment: either an imported identifier or `*` for glob. + /// `Glob` is represented as `path = full path` and `tail = Glob`. + tail: LeafTail, +} + +#[derive(Clone, Debug)] +enum LeafTail { + /// `use a::b::c;` or `use a::b::c as d;` — `name` is the source + /// segment (`c`), `alias` is `d` (or None if no rename). + Name { name: String, alias: Option }, + /// `use a::b::*;` + Glob, +} + +fn collect_leaves(tree: &UseTree, prefix: &mut Vec, out: &mut Vec) { + match tree { + UseTree::Path(p) => { + prefix.push(p.ident.to_string()); + collect_leaves(&p.tree, prefix, out); + prefix.pop(); + } + UseTree::Name(n) => { + out.push(UseLeaf { + path: prefix.clone(), + tail: LeafTail::Name { + name: n.ident.to_string(), + alias: None, + }, + }); + } + UseTree::Rename(r) => { + out.push(UseLeaf { + path: prefix.clone(), + tail: LeafTail::Name { + name: r.ident.to_string(), + alias: Some(r.rename.to_string()), + }, + }); + } + UseTree::Glob(_) => { + out.push(UseLeaf { + path: prefix.clone(), + tail: LeafTail::Glob, + }); + } + UseTree::Group(g) => { + for t in &g.items { + collect_leaves(t, prefix, out); + } + } + } +} + +fn rewrite_leaf(leaf: UseLeaf, subs: &[Substitution]) -> Result { + // Build the full path representing this leaf's import target. For + // `Name { name }` the full path is `path + [name]`; for `Glob` + // it's just `path`. + let mut full = leaf.path.clone(); + if let LeafTail::Name { ref name, .. } = leaf.tail { + full.push(name.clone()); + } + + let Some(sub) = find_replace_with(&full, subs) else { + return Ok(leaf); + }; + let target = sub + .target + .as_ref() + .expect("validated in enforce_use_policy"); + + // Replace the matched prefix with the target. The unmatched suffix + // is preserved. + let prefix_len = sub.prefix.split("::").count(); + let target_segs: Vec = target.split("::").map(String::from).collect(); + if target_segs.is_empty() { + bail!( + "manifest substitution prefix '{}' has empty target", + sub.prefix + ); + } + let suffix = &full[prefix_len..]; + let mut rewritten_full: Vec = target_segs; + rewritten_full.extend_from_slice(suffix); + + // Split rewritten_full back into (path, tail). For glob preservation, + // we keep the original tail kind. + match leaf.tail { + LeafTail::Glob => Ok(UseLeaf { + path: rewritten_full, + tail: LeafTail::Glob, + }), + LeafTail::Name { + name: orig_name, + alias: orig_alias, + } => { + // Final segment of rewritten_full is the new imported ident. + let new_name = rewritten_full + .pop() + .ok_or_else(|| anyhow!("rewritten path is empty for prefix '{}'", sub.prefix))?; + + // Preserve the original call-site name. If the user already + // had `as alias`, keep it. Otherwise, if rewriting changed + // the last segment, alias to the original name. + let alias = match orig_alias { + Some(a) => Some(a), + None if new_name != orig_name => Some(orig_name), + None => None, + }; + + Ok(UseLeaf { + path: rewritten_full, + tail: LeafTail::Name { + name: new_name, + alias, + }, + }) + } + } +} + +fn find_replace_with<'a>(path: &[String], subs: &'a [Substitution]) -> Option<&'a Substitution> { + subs.iter() + .filter(|s| s.action == Action::ReplaceWith) + .find(|s| { + let segs: Vec<&str> = s.prefix.split("::").collect(); + path.len() >= segs.len() && path.iter().zip(&segs).all(|(a, b)| a == b) + }) +} + +fn build_item_use(template: &ItemUse, leaf: UseLeaf) -> ItemUse { + let tree = build_use_tree(&leaf); + ItemUse { + attrs: template.attrs.clone(), + vis: template.vis.clone(), + use_token: template.use_token, + leading_colon: template.leading_colon, + tree, + semi_token: template.semi_token, + } +} + +fn build_use_tree(leaf: &UseLeaf) -> UseTree { + let inner = match &leaf.tail { + LeafTail::Name { name, alias } => { + let ident = Ident::new(name, Span::call_site()); + match alias { + Some(rename) => UseTree::Rename(syn::UseRename { + ident, + as_token: syn::Token![as](Span::call_site()), + rename: Ident::new(rename, Span::call_site()), + }), + None => UseTree::Name(syn::UseName { ident }), + } + } + LeafTail::Glob => UseTree::Glob(syn::UseGlob { + star_token: syn::Token![*](Span::call_site()), + }), + }; + + leaf.path.iter().rev().fold(inner, |acc, seg| { + UseTree::Path(syn::UsePath { + ident: Ident::new(seg, Span::call_site()), + colon2_token: syn::Token![::](Span::call_site()), + tree: Box::new(acc), + }) + }) +} + fn is_internal(path: &[String]) -> bool { matches!( path.first().map(String::as_str), @@ -364,7 +595,7 @@ action = "error" } #[test] - fn replace_with_action_not_yet_implemented() { + fn replace_with_action_rewrites_use_path() { let (_tmp, uutils, manifest, out) = fixture( r#" [[modules]] @@ -376,12 +607,125 @@ out = "demo.rs" prefix = "uucore::error::UError" action = "replace_with" target = "crate::error::Error" +"#, + &[("lib/demo.rs", "use uucore::error::UError;\n")], + ); + let written = run(&uutils, "demo", "x", &manifest, &out).unwrap(); + assert_eq!(written.len(), 1); + let body = fs::read_to_string(&written[0]).unwrap(); + assert!( + body.contains("use crate::error::Error as UError;"), + "got: {body}" + ); + assert!(!body.contains("uucore::error::UError"), "got: {body}"); + } + + #[test] + fn replace_with_preserves_matching_leaf_without_alias() { + let (_tmp, uutils, manifest, out) = fixture( + r#" +[[modules]] +name = "demo" +source = "lib/demo.rs" +out = "demo.rs" + +[[modules.substitutions]] +prefix = "uucore::extendedbigdecimal" +action = "replace_with" +target = "crate::extendedbigdecimal" +"#, + &[( + "lib/demo.rs", + "use uucore::extendedbigdecimal::ExtendedBigDecimal;\n", + )], + ); + let written = run(&uutils, "demo", "x", &manifest, &out).unwrap(); + let body = fs::read_to_string(&written[0]).unwrap(); + assert!( + body.contains("use crate::extendedbigdecimal::ExtendedBigDecimal;"), + "got: {body}" + ); + assert!(!body.contains(" as "), "no alias needed; got: {body}"); + } + + #[test] + fn replace_with_flattens_use_groups() { + let (_tmp, uutils, manifest, out) = fixture( + r#" +[[modules]] +name = "demo" +source = "lib/demo.rs" +out = "demo.rs" + +[[modules.substitutions]] +prefix = "uucore::error::UError" +action = "replace_with" +target = "crate::error::Error" + +[[modules.substitutions]] +prefix = "uucore::extendedbigdecimal::ExtendedBigDecimal" +action = "replace_with" +target = "crate::extendedbigdecimal::ExtendedBigDecimal" +"#, + &[( + "lib/demo.rs", + "use uucore::{error::UError, extendedbigdecimal::ExtendedBigDecimal};\n", + )], + ); + let written = run(&uutils, "demo", "x", &manifest, &out).unwrap(); + let body = fs::read_to_string(&written[0]).unwrap(); + assert!( + body.contains("use crate::error::Error as UError;"), + "got: {body}" + ); + assert!( + body.contains("use crate::extendedbigdecimal::ExtendedBigDecimal;"), + "got: {body}" + ); + } + + #[test] + fn replace_with_preserves_existing_alias() { + let (_tmp, uutils, manifest, out) = fixture( + r#" +[[modules]] +name = "demo" +source = "lib/demo.rs" +out = "demo.rs" + +[[modules.substitutions]] +prefix = "uucore::error::UError" +action = "replace_with" +target = "crate::error::Error" +"#, + &[("lib/demo.rs", "use uucore::error::UError as MyErr;\n")], + ); + let written = run(&uutils, "demo", "x", &manifest, &out).unwrap(); + let body = fs::read_to_string(&written[0]).unwrap(); + assert!( + body.contains("use crate::error::Error as MyErr;"), + "got: {body}" + ); + } + + #[test] + fn replace_with_missing_target_fails() { + let (_tmp, uutils, manifest, out) = fixture( + r#" +[[modules]] +name = "demo" +source = "lib/demo.rs" +out = "demo.rs" + +[[modules.substitutions]] +prefix = "uucore::error::UError" +action = "replace_with" "#, &[("lib/demo.rs", "use uucore::error::UError;\n")], ); let err = run(&uutils, "demo", "x", &manifest, &out).unwrap_err(); let msg = format!("{err:#}"); - assert!(msg.contains("not yet implemented"), "got: {msg}"); + assert!(msg.contains("no 'target'"), "got: {msg}"); } #[test] diff --git a/specs/coreutils-args-port.md b/specs/coreutils-args-port.md index 07f635666..7b0fc41f4 100644 --- a/specs/coreutils-args-port.md +++ b/specs/coreutils-args-port.md @@ -234,15 +234,16 @@ Substitution `action`s: | Action | Behaviour | Status | |---|---|---| | `error` | Abort the port at this import. Use when the module references a uucore type that should not be vendored. | Implemented | -| `replace_with` | Rewrite the import to a bashkit-side equivalent (`target = "crate::error::Error"`). | Schema-only — runtime rewriting awaits the future `syn`-based rewriter (#1534) | -| `inline` | Vendor the source file defining the substituted type alongside (`inline_source = "..."`). | Schema-only — same future-rewriter dependency | +| `replace_with` | Rewrite the matched prefix in every `use` path to `target`; when the rewritten path's final segment differs from the original, an `as ` rename is inserted so call sites compile unchanged. | Implemented | +| `inline` | Vendor the source file defining the substituted type alongside (`inline_source = "..."`). | Schema-only — awaits a follow-up | The schema accepts all three so manifest stanzas don't change shape -when the rewriter lands. Today the tool emits sources verbatim -(banner-only) and any module relying on `replace_with`/`inline` errors -out with a "rewriter not yet implemented" message that points back to -this spec. The first user with that need (#1534's printf migration on -top of `uucore::format`) will drive landing the rewriter. +when `inline` lands. Modules that use only `error` and `replace_with` +port today; modules that need `inline` still error out with a +"rewriter not yet implemented" message pointing back to this spec. +Output goes through `prettyplease::unparse` whenever any +`replace_with` substitution is in scope, so use-group syntax may be +flattened into individual `use` items as a side effect of rewriting. ### Output banner From c1089483589a31437eb4e7c09b9a6e3342dc0256 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 8 May 2026 14:58:08 +0000 Subject: [PATCH 2/6] feat(coreutils-port): implement inline action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the rewriter to support `action = "inline"` substitutions. Behaviour: - Each `inline` substitution names an `inline_source` file under the uutils dir. After porting the primary tree, the tool ports each inlined file to `/.rs` (where `` is the prefix's final segment). - The inlined file goes through the same enforce + rewrite pipeline so its own transitive uucore references either substitute via another manifest stanza or surface explicitly. - Use-paths matching an `inline` substitution prefix are rewritten to `super::::…` so the vendored module compiles. - Validation: `inline` substitutions require a non-empty `inline_source` field; missing field is rejected at port time. Tests cover: inlined file is vendored alongside, primary use-path is rewritten to `super::`, banner is added to the inlined file, and missing-inline_source validation. Spec and module-doc updated. With `replace_with` (prior commit) and `inline` both supported, the rewriter foundation in `bashkit-coreutils-port` is complete; the remaining pieces of #1534 (vendor uucore::format, migrate printf.rs) land in follow-up PRs against this primitive. --- crates/bashkit-coreutils-port/src/manifest.rs | 10 +- crates/bashkit-coreutils-port/src/module.rs | 229 ++++++++++++++---- specs/coreutils-args-port.md | 11 +- 3 files changed, 197 insertions(+), 53 deletions(-) diff --git a/crates/bashkit-coreutils-port/src/manifest.rs b/crates/bashkit-coreutils-port/src/manifest.rs index 096e98855..9f2c20dd9 100644 --- a/crates/bashkit-coreutils-port/src/manifest.rs +++ b/crates/bashkit-coreutils-port/src/manifest.rs @@ -42,8 +42,13 @@ //! final segment differs from the original, an `as ` rename is //! inserted so call sites compile unchanged. Use groups are //! flattened into individual `use` items as a side effect. -//! - `inline` — accepted in the schema, awaits a follow-up. The -//! manifest declarations stay forward-compatible. +//! - `inline` — fully implemented. The file at `inline_source` is +//! vendored next to the module's output dir (under +//! `/.rs` where `` is the prefix's final +//! segment), and matching `use` paths are rewritten to +//! `super::::…` so the vendored module compiles. The +//! inlined file goes through the same enforce + rewrite pipeline +//! so transitive uucore references either substitute or surface. use serde::Deserialize; @@ -89,6 +94,7 @@ pub enum Action { } impl Action { + #[allow(dead_code)] // Kept for diagnostic strings that don't currently consume it. pub fn as_str(self) -> &'static str { match self { Action::Inline => "inline", diff --git a/crates/bashkit-coreutils-port/src/module.rs b/crates/bashkit-coreutils-port/src/module.rs index d0b193a00..ebb95cc1c 100644 --- a/crates/bashkit-coreutils-port/src/module.rs +++ b/crates/bashkit-coreutils-port/src/module.rs @@ -14,13 +14,21 @@ //! so unexpected internal references surface explicitly. //! - matched `error` actions abort with a policy-rejection message. //! - matched `replace_with` actions are rewritten in place (see -//! [`apply_replace_with`]). -//! - matched `inline` actions still abort — inline vendoring awaits -//! a follow-up; manifest stanzas stay forward-compatible. -//! 4. If any `replace_with` substitutions are in scope, the rewritten -//! AST is emitted via `prettyplease::unparse` (use groups become -//! individual `use` items as a side effect). Otherwise the source -//! is written verbatim. A banner is prepended either way. +//! [`apply_substitutions`]). Use-paths starting with the prefix +//! have the matched segments swapped for `target`; if the leaf +//! changes an `as ` rename is added. +//! - matched `inline` actions vendor the file at `inline_source` +//! next to the module's output dir and rewrite the use-path to +//! `super::::…` so the vendored module compiles. +//! 4. If any `replace_with` or `inline` substitutions are in scope, +//! the rewritten AST is emitted via `prettyplease::unparse` (use +//! groups become individual `use` items as a side effect). +//! Otherwise the source is written verbatim. A banner is prepended +//! either way. +//! 5. After the primary tree, every `inline` substitution drives a +//! second port pass on its `inline_source`, with the same enforce +//! plus rewrite policy applied so transitive uucore references still +//! surface explicitly. use std::path::{Path, PathBuf}; @@ -77,6 +85,12 @@ pub fn run( &mut written, )?; } + + // Inline-vendor any `action = "inline"` substitutions alongside the + // module. The inlined files land next to the module's `out` dir so + // rewritten paths can resolve them as siblings. + port_inlined(uutils_dir, module, rev, out_base, &mut written)?; + Ok(written) } @@ -117,8 +131,8 @@ fn port_file(src: &Path, out: &Path, module: &Module, rev: &str, rel_path: &str) syn::parse_file(&text).with_context(|| format!("parse {} as rust", src.display()))?; enforce_use_policy(&parsed, module, rel_path)?; - let body_text = if has_replace_with(module) { - apply_replace_with(&mut parsed, module)?; + let body_text = if needs_rewrite(module) { + apply_substitutions(&mut parsed, module)?; prettyplease::unparse(&parsed) } else { text @@ -134,11 +148,67 @@ fn port_file(src: &Path, out: &Path, module: &Module, rev: &str, rel_path: &str) Ok(()) } -fn has_replace_with(module: &Module) -> bool { +fn needs_rewrite(module: &Module) -> bool { module .substitutions .iter() - .any(|s| s.action == Action::ReplaceWith) + .any(|s| matches!(s.action, Action::ReplaceWith | Action::Inline)) +} + +fn port_inlined( + uutils_dir: &Path, + module: &Module, + rev: &str, + out_base: &Path, + written: &mut Vec, +) -> Result<()> { + for sub in &module.substitutions { + if sub.action != Action::Inline { + continue; + } + let inline_source = sub.inline_source.as_ref().ok_or_else(|| { + anyhow!( + "manifest substitution prefix '{}' has action 'inline' but no 'inline_source' field", + sub.prefix + ) + })?; + let src = uutils_dir.join(inline_source); + if !src.exists() { + bail!( + "inline_source path does not exist: {} (uutils dir: {})", + src.display(), + uutils_dir.display() + ); + } + let inline_target = inline_target_path(sub)?; + let out = out_base.join(&inline_target); + + // Each inlined file gets the same enforce + rewrite treatment as + // the primary module so transitive uucore references either + // substitute or surface explicitly. + port_file(&src, &out, module, rev, inline_source)?; + written.push(out); + } + Ok(()) +} + +/// Where on disk the inlined file lands. By default, derive from the +/// substitution prefix's leaf segment (e.g. `crate::extendedbigdecimal` +/// → `extendedbigdecimal.rs`). Manifest stanzas may override the +/// derived path in the future via a new field; today we infer. +fn inline_target_path(sub: &Substitution) -> Result { + let leaf = sub + .prefix + .rsplit("::") + .next() + .filter(|s| !s.is_empty()) + .ok_or_else(|| { + anyhow!( + "inline substitution prefix '{}' has no leaf segment", + sub.prefix + ) + })?; + Ok(format!("{leaf}.rs")) } fn banner(rev: &str, module_name: &str, rel_path: &str) -> String { @@ -213,20 +283,22 @@ fn enforce_use_policy(file: &syn::File, module: &Module, rel_path: &str) -> Resu ); } } - Action::Inline => bail!( - "import '{}' in {} requires action '{}' (manifest prefix '{}'), but inline vendoring is not yet implemented (replace_with is supported; inline awaits a follow-up)", - path.join("::"), - rel_path, - s.action.as_str(), - s.prefix - ), + Action::Inline => { + if s.inline_source.is_none() { + bail!( + "manifest substitution prefix '{}' has action 'inline' but no 'inline_source' field", + s.prefix + ); + } + } }, } } Ok(()) } -/// Apply `replace_with` substitutions across all top-level `use` items. +/// Apply `replace_with` and `inline` substitutions across all top-level +/// `use` items. /// /// Strategy: flatten each use tree into its leaf paths (with optional /// renames), apply matching substitutions, then re-emit one `use` item @@ -234,12 +306,16 @@ fn enforce_use_policy(file: &syn::File, module: &Module, rel_path: &str) -> Resu /// equivalent, but easier to rewrite without losing the formatting that /// was going to be re-pretty-printed anyway. /// -/// Substitution rule: when a leaf's path starts with `s.prefix`, the -/// matched prefix is replaced with `s.target`. If the rewritten path's -/// final segment differs from the original final segment, an `as` -/// rename preserves call-site references (e.g. `use crate::error::Error -/// as UError;`). -fn apply_replace_with(file: &mut syn::File, module: &Module) -> Result<()> { +/// Substitution rules: +/// - `replace_with`: when a leaf's path starts with `s.prefix`, the +/// matched prefix is replaced with `s.target`. If the rewritten +/// path's final segment differs from the original, an `as` rename +/// preserves call-site references (e.g. `use crate::error::Error as +/// UError;`). +/// - `inline`: the inlined file lives next to the module's `out` dir, +/// so the path is rewritten to point at it via `super::`. The +/// leaf identifier in the use is preserved. +fn apply_substitutions(file: &mut syn::File, module: &Module) -> Result<()> { let mut new_items: Vec = Vec::with_capacity(file.items.len()); for item in file.items.drain(..) { match item { @@ -329,24 +405,43 @@ fn rewrite_leaf(leaf: UseLeaf, subs: &[Substitution]) -> Result { full.push(name.clone()); } - let Some(sub) = find_replace_with(&full, subs) else { + let Some(sub) = find_rewriting_match(&full, subs) else { return Ok(leaf); }; - let target = sub - .target - .as_ref() - .expect("validated in enforce_use_policy"); + + let target_segs: Vec = match sub.action { + Action::ReplaceWith => { + let target = sub + .target + .as_ref() + .expect("validated in enforce_use_policy"); + let segs: Vec = target.split("::").map(String::from).collect(); + if segs.is_empty() { + bail!( + "manifest substitution prefix '{}' has empty target", + sub.prefix + ); + } + segs + } + Action::Inline => { + // Inlined file is a sibling of the module out dir. Use + // `super::` to reach it from the vendored module's + // submodules. + let leaf_seg = sub + .prefix + .rsplit("::") + .next() + .filter(|s| !s.is_empty()) + .ok_or_else(|| anyhow!("inline prefix '{}' has no leaf segment", sub.prefix))?; + vec!["super".to_string(), leaf_seg.to_string()] + } + Action::Error => unreachable!("error action does not reach the rewriter"), + }; // Replace the matched prefix with the target. The unmatched suffix // is preserved. let prefix_len = sub.prefix.split("::").count(); - let target_segs: Vec = target.split("::").map(String::from).collect(); - if target_segs.is_empty() { - bail!( - "manifest substitution prefix '{}' has empty target", - sub.prefix - ); - } let suffix = &full[prefix_len..]; let mut rewritten_full: Vec = target_segs; rewritten_full.extend_from_slice(suffix); @@ -387,9 +482,9 @@ fn rewrite_leaf(leaf: UseLeaf, subs: &[Substitution]) -> Result { } } -fn find_replace_with<'a>(path: &[String], subs: &'a [Substitution]) -> Option<&'a Substitution> { +fn find_rewriting_match<'a>(path: &[String], subs: &'a [Substitution]) -> Option<&'a Substitution> { subs.iter() - .filter(|s| s.action == Action::ReplaceWith) + .filter(|s| matches!(s.action, Action::ReplaceWith | Action::Inline)) .find(|s| { let segs: Vec<&str> = s.prefix.split("::").collect(); path.len() >= segs.len() && path.iter().zip(&segs).all(|(a, b)| a == b) @@ -777,18 +872,65 @@ out = "demo.rs" } #[test] - fn rejects_inline_until_rewriter_lands() { + fn inline_action_vendors_source_file_alongside() { let (_tmp, uutils, manifest, out) = fixture( r#" [[modules]] name = "demo" source = "lib/demo.rs" -out = "demo.rs" +out = "demo" + +[[modules.substitutions]] +prefix = "uucore::extendedbigdecimal" +action = "inline" +inline_source = "lib/extendedbigdecimal.rs" +"#, + &[ + ( + "lib/demo.rs", + "use uucore::extendedbigdecimal::ExtendedBigDecimal;\n", + ), + ( + "lib/extendedbigdecimal.rs", + "use std::fmt::Display;\npub struct ExtendedBigDecimal;\n", + ), + ], + ); + let written = run(&uutils, "demo", "x", &manifest, &out).unwrap(); + assert_eq!(written.len(), 2, "got: {written:?}"); + + // Module body uses super::extendedbigdecimal to reach the + // sibling-vendored file. + let module_body = fs::read_to_string(&written[0]).unwrap(); + assert!( + module_body.contains("use super::extendedbigdecimal::ExtendedBigDecimal;"), + "got: {module_body}" + ); + + // Inlined file is vendored next to the module with its own banner. + let inlined_body = fs::read_to_string(&written[1]).unwrap(); + assert!( + inlined_body.starts_with("// GENERATED by bashkit-coreutils-port"), + "got: {inlined_body}" + ); + assert!( + inlined_body.contains("pub struct ExtendedBigDecimal;"), + "got: {inlined_body}" + ); + } + + #[test] + fn inline_missing_inline_source_field_fails() { + let (_tmp, uutils, manifest, out) = fixture( + r#" +[[modules]] +name = "demo" +source = "lib/demo.rs" +out = "demo" [[modules.substitutions]] prefix = "uucore::extendedbigdecimal" action = "inline" -inline_source = "src/uucore/src/lib/features/extendedbigdecimal.rs" "#, &[( "lib/demo.rs", @@ -797,7 +939,6 @@ inline_source = "src/uucore/src/lib/features/extendedbigdecimal.rs" ); let err = run(&uutils, "demo", "x", &manifest, &out).unwrap_err(); let msg = format!("{err:#}"); - assert!(msg.contains("not yet implemented"), "got: {msg}"); - assert!(msg.contains("inline"), "got: {msg}"); + assert!(msg.contains("inline_source"), "got: {msg}"); } } diff --git a/specs/coreutils-args-port.md b/specs/coreutils-args-port.md index 7b0fc41f4..1d34a2e9a 100644 --- a/specs/coreutils-args-port.md +++ b/specs/coreutils-args-port.md @@ -235,15 +235,12 @@ Substitution `action`s: |---|---|---| | `error` | Abort the port at this import. Use when the module references a uucore type that should not be vendored. | Implemented | | `replace_with` | Rewrite the matched prefix in every `use` path to `target`; when the rewritten path's final segment differs from the original, an `as ` rename is inserted so call sites compile unchanged. | Implemented | -| `inline` | Vendor the source file defining the substituted type alongside (`inline_source = "..."`). | Schema-only — awaits a follow-up | +| `inline` | Vendor the file at `inline_source` next to the module's output dir (under `/.rs` where `` is the prefix's final segment), and rewrite matching `use` paths to `super::::…`. The inlined file is processed through the same enforce + rewrite pipeline so transitive uucore references either substitute or surface explicitly. | Implemented | -The schema accepts all three so manifest stanzas don't change shape -when `inline` lands. Modules that use only `error` and `replace_with` -port today; modules that need `inline` still error out with a -"rewriter not yet implemented" message pointing back to this spec. Output goes through `prettyplease::unparse` whenever any -`replace_with` substitution is in scope, so use-group syntax may be -flattened into individual `use` items as a side effect of rewriting. +`replace_with` or `inline` substitution is in scope, so use-group +syntax may be flattened into individual `use` items as a side effect +of rewriting. ### Output banner From d7b939c88a76e632573f4296569a878ced113215 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 8 May 2026 15:04:56 +0000 Subject: [PATCH 3/6] feat(bashkit): add bigdecimal/num-traits/unit-prefix/os_display deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pin versions to uutils' workspace defaults. These deps back the upcoming vendored uucore::format module that #1534 will use for `printf`. Adding them in a separate commit so the vendor PR keeps to mechanical codegen output. Verified: - `cargo check -p bashkit` (default features): clean. - `cargo build -p bashkit --no-default-features`: clean. - `cargo test -p bashkit --lib`: 2230 passed. - `cargo clippy -p bashkit --all-targets -- -D warnings`: clean. (`cargo build --target wasm32-unknown-unknown` is broken on bashkit `main` independently of this change — `getrandom`'s wasm-js backend isn't enabled — so was not regression-tested here. Re-test once that pre-existing wasm build is restored.) --- Cargo.lock | 19 +++++++++++++++++++ crates/bashkit/Cargo.toml | 9 +++++++++ 2 files changed, 28 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index ff6dbd970..cbf168e42 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -350,6 +350,7 @@ dependencies = [ "anyhow", "async-trait", "base64", + "bigdecimal", "chrono", "clap", "criterion", @@ -367,6 +368,8 @@ dependencies = [ "jaq-std", "md-5", "monty", + "num-traits", + "os_display", "pretty_assertions", "proptest", "rand 0.10.1", @@ -387,6 +390,7 @@ dependencies = [ "tower", "tracing", "turso_core", + "unit-prefix", "url", "zapcode-core", "zeroize", @@ -3049,6 +3053,15 @@ dependencies = [ "indexmap", ] +[[package]] +name = "os_display" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad5fd71b79026fb918650dde6d125000a233764f1c2f1659a1c71118e33ea08f" +dependencies = [ + "unicode-width 0.2.2", +] + [[package]] name = "owo-colors" version = "3.5.0" @@ -5809,6 +5822,12 @@ dependencies = [ "rand 0.8.6", ] +[[package]] +name = "unit-prefix" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" + [[package]] name = "universal-hash" version = "0.5.1" diff --git a/crates/bashkit/Cargo.toml b/crates/bashkit/Cargo.toml index ce783bec6..aa928b689 100644 --- a/crates/bashkit/Cargo.toml +++ b/crates/bashkit/Cargo.toml @@ -84,6 +84,15 @@ tracing = { workspace = true, optional = true } # argument surfaces. Always on — see specs/coreutils-args-port.md. clap = { workspace = true } +# Pinned to uutils' workspace versions. Used by the vendored +# uucore::format module (see crates/bashkit/src/builtins/generated/format) +# that backs `printf`. Kept platform-clean — no rustix / errno +# transitive deps — so wasm32 stays buildable. +bigdecimal = "0.4" +num-traits = "0.2" +unit-prefix = "0.5" +os_display = "0.1.3" + # Embedded Python interpreter (optional) monty = { git = "https://github.com/pydantic/monty", rev = "49faa4c", optional = true } From 42979b4755bcc7d47f41e60b94508d7d8e12fb2e Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 8 May 2026 15:11:58 +0000 Subject: [PATCH 4/6] chore(supply-chain): exempt os_display 0.1.4 and unit-prefix 0.5.2 cargo-vet flagged the new format-vendor deps as unvetted. Add exemptions to match the existing entries for bigdecimal / num-traits in this manifest. --- supply-chain/config.toml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/supply-chain/config.toml b/supply-chain/config.toml index a372815ad..5b2d9bc6c 100644 --- a/supply-chain/config.toml +++ b/supply-chain/config.toml @@ -1200,6 +1200,10 @@ criteria = "safe-to-deploy" version = "1.2.0" criteria = "safe-to-deploy" +[[exemptions.os_display]] +version = "0.1.4" +criteria = "safe-to-deploy" + [[exemptions.owo-colors]] version = "3.5.0" criteria = "safe-to-deploy" @@ -2188,6 +2192,10 @@ criteria = "safe-to-deploy" version = "1.3.0" criteria = "safe-to-deploy" +[[exemptions.unit-prefix]] +version = "0.5.2" +criteria = "safe-to-deploy" + [[exemptions.unicode_names2_generator]] version = "1.3.0" criteria = "safe-to-deploy" From 3f5f368dac2c57161f4fb01dd0c4fead26d43160 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 8 May 2026 15:20:03 +0000 Subject: [PATCH 5/6] chore(supply-chain): cargo vet fmt sort exemptions --- supply-chain/config.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/supply-chain/config.toml b/supply-chain/config.toml index 5b2d9bc6c..e71d42127 100644 --- a/supply-chain/config.toml +++ b/supply-chain/config.toml @@ -2192,14 +2192,14 @@ criteria = "safe-to-deploy" version = "1.3.0" criteria = "safe-to-deploy" -[[exemptions.unit-prefix]] -version = "0.5.2" -criteria = "safe-to-deploy" - [[exemptions.unicode_names2_generator]] version = "1.3.0" criteria = "safe-to-deploy" +[[exemptions.unit-prefix]] +version = "0.5.2" +criteria = "safe-to-deploy" + [[exemptions.universal-hash]] version = "0.5.1" criteria = "safe-to-deploy" From 019816ec8201d8caef211b119dd6488581326799 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 8 May 2026 17:21:04 -0500 Subject: [PATCH 6/6] feat(printf): vendor uucore format --- Cargo.lock | 2 + Cargo.toml | 4 +- crates/bashkit-cli/src/main.rs | 12 + crates/bashkit-coreutils-port/src/module.rs | 225 +++- crates/bashkit-coreutils-port/vendored.toml | 61 +- .../builtins/generated/extendedbigdecimal.rs | 239 ++++ .../src/builtins/generated/format/argument.rs | 202 ++++ .../src/builtins/generated/format/escape.rs | 151 +++ .../src/builtins/generated/format/human.rs | 37 + .../src/builtins/generated/format/mod.rs | 347 ++++++ .../builtins/generated/format/num_format.rs | 544 +++++++++ .../src/builtins/generated/format/spec.rs | 527 +++++++++ .../src/builtins/generated/format_support.rs | 126 ++ crates/bashkit/src/builtins/generated/mod.rs | 8 + .../src/builtins/generated/num_parser.rs | 441 +++++++ crates/bashkit/src/builtins/printf.rs | 1033 +++++++---------- specs/coreutils-args-port.md | 22 +- 17 files changed, 3335 insertions(+), 646 deletions(-) create mode 100644 crates/bashkit/src/builtins/generated/extendedbigdecimal.rs create mode 100644 crates/bashkit/src/builtins/generated/format/argument.rs create mode 100644 crates/bashkit/src/builtins/generated/format/escape.rs create mode 100644 crates/bashkit/src/builtins/generated/format/human.rs create mode 100644 crates/bashkit/src/builtins/generated/format/mod.rs create mode 100644 crates/bashkit/src/builtins/generated/format/num_format.rs create mode 100644 crates/bashkit/src/builtins/generated/format/spec.rs create mode 100644 crates/bashkit/src/builtins/generated/format_support.rs create mode 100644 crates/bashkit/src/builtins/generated/num_parser.rs diff --git a/Cargo.lock b/Cargo.lock index cbf168e42..0d905ee7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1809,9 +1809,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", + "js-sys", "libc", "r-efi 5.3.0", "wasip2", + "wasm-bindgen", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 33cee8354..59ba5f4f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,9 +70,11 @@ rand = "0.10" zeroize = "1" # CSPRNG for placeholder token generation (security-grade randomness). +# Important decision: enable `wasm_js` so the public `wasm32-unknown-unknown` +# build gate stays green; bashkit targets JS-backed wasm for that triple. # Used by the credential placeholder generator; getrandom is the lowest-level # CSPRNG primitive and is already transitively in the dep tree via reqwest/rustls. -getrandom = "0.3" +getrandom = { version = "0.3", features = ["wasm_js"] } # CLI # Intentionally NOT enabling clap's `env` feature: it makes `Arg::env(...)` diff --git a/crates/bashkit-cli/src/main.rs b/crates/bashkit-cli/src/main.rs index 27faf0f83..dcb08dff6 100644 --- a/crates/bashkit-cli/src/main.rs +++ b/crates/bashkit-cli/src/main.rs @@ -204,6 +204,18 @@ fn apply_real_mounts( ro_mounts: &[String], rw_mounts: &[String], ) -> bashkit::BashBuilder { + // Important decision: explicit CLI mount flags are the user's allowlist. + // The library keeps sensitive host paths closed by default for embedders; + // the CLI turns each requested host path into an audit-visible allow entry. + let allowed_mount_paths = ro_mounts + .iter() + .chain(rw_mounts) + .map(|spec| spec.split_once(':').map_or(spec.as_str(), |(host, _)| host)) + .collect::>(); + if !allowed_mount_paths.is_empty() { + builder = builder.allowed_mount_paths(allowed_mount_paths); + } + for spec in ro_mounts { if let Some((host, vfs)) = spec.split_once(':') { builder = builder.mount_real_readonly_at(host, vfs); diff --git a/crates/bashkit-coreutils-port/src/module.rs b/crates/bashkit-coreutils-port/src/module.rs index ebb95cc1c..900e3b282 100644 --- a/crates/bashkit-coreutils-port/src/module.rs +++ b/crates/bashkit-coreutils-port/src/module.rs @@ -4,14 +4,18 @@ //! 1. Load the manifest and look up the requested module entry. //! 2. Walk every `.rs` file under the entry's `source` (single file //! or directory, depth-recursive). -//! 3. For each file, parse with syn and walk top-level `use` items: +//! 3. Strip upstream top-level `#[cfg(test)]` items and rustdoc attrs; +//! both assume the original uucore crate topology, while bashkit +//! tests and documents the integrated generated module. +//! 4. For each file, parse with syn and walk top-level `use` items: //! - `use fluent::*;` (or any `fluent::...`) → hard error: the //! module is not safely vendorable without code changes. //! - `use uucore::translate;` / `translate::*` → same hard error //! class (Fluent is the i18n boundary). -//! - any other internal path (`uucore::`, `crate::`) must match a -//! manifest substitution prefix. Unmatched paths abort the port -//! so unexpected internal references surface explicitly. +//! - any uucore-crate path (`uucore::`, `crate::`) must match a +//! manifest substitution prefix. Unmatched paths abort the port so +//! unexpected uucore runtime references surface explicitly. Relative +//! `self::`/`super::` paths stay inside the vendored module tree. //! - matched `error` actions abort with a policy-rejection message. //! - matched `replace_with` actions are rewritten in place (see //! [`apply_substitutions`]). Use-paths starting with the prefix @@ -19,13 +23,14 @@ //! changes an `as ` rename is added. //! - matched `inline` actions vendor the file at `inline_source` //! next to the module's output dir and rewrite the use-path to -//! `super::::…` so the vendored module compiles. -//! 4. If any `replace_with` or `inline` substitutions are in scope, +//! `crate::builtins::generated::::…` so the vendored module +//! compiles from any nested depth. +//! 5. If any `replace_with` or `inline` substitutions are in scope, //! the rewritten AST is emitted via `prettyplease::unparse` (use //! groups become individual `use` items as a side effect). //! Otherwise the source is written verbatim. A banner is prepended //! either way. -//! 5. After the primary tree, every `inline` substitution drives a +//! 6. After the primary tree, every `inline` substitution drives a //! second port pass on its `inline_source`, with the same enforce //! plus rewrite policy applied so transitive uucore references still //! surface explicitly. @@ -34,6 +39,7 @@ use std::path::{Path, PathBuf}; use anyhow::{Context, Result, anyhow, bail}; use proc_macro2::Span; +use quote::ToTokens; use syn::{Ident, Item, ItemUse, UseTree}; use crate::manifest::{Action, Manifest, Module, Substitution}; @@ -129,9 +135,11 @@ fn port_file(src: &Path, out: &Path, module: &Module, rev: &str, rel_path: &str) std::fs::read_to_string(src).with_context(|| format!("read source {}", src.display()))?; let mut parsed = syn::parse_file(&text).with_context(|| format!("parse {} as rust", src.display()))?; + let stripped_test_items = strip_cfg_test_items(&mut parsed); + let stripped_doc_attrs = strip_doc_attrs(&mut parsed); enforce_use_policy(&parsed, module, rel_path)?; - let body_text = if needs_rewrite(module) { + let body_text = if needs_rewrite(module) || stripped_test_items || stripped_doc_attrs { apply_substitutions(&mut parsed, module)?; prettyplease::unparse(&parsed) } else { @@ -155,6 +163,74 @@ fn needs_rewrite(module: &Module) -> bool { .any(|s| matches!(s.action, Action::ReplaceWith | Action::Inline)) } +fn strip_cfg_test_items(file: &mut syn::File) -> bool { + let before = file.items.len(); + file.items.retain(|item| !has_cfg_test(item)); + before != file.items.len() +} + +fn has_cfg_test(item: &Item) -> bool { + item_attrs(item).iter().any(|attr| { + attr.path().is_ident("cfg") && attr.meta.to_token_stream().to_string().contains("test") + }) +} + +fn strip_doc_attrs(file: &mut syn::File) -> bool { + let mut stripped = false; + for item in &mut file.items { + if let Some(attrs) = item_attrs_mut(item) { + let before = attrs.len(); + attrs.retain(|attr| !attr.path().is_ident("doc")); + stripped |= before != attrs.len(); + } + } + stripped +} + +fn item_attrs(item: &Item) -> &[syn::Attribute] { + match item { + Item::Const(i) => &i.attrs, + Item::Enum(i) => &i.attrs, + Item::ExternCrate(i) => &i.attrs, + Item::Fn(i) => &i.attrs, + Item::ForeignMod(i) => &i.attrs, + Item::Impl(i) => &i.attrs, + Item::Macro(i) => &i.attrs, + Item::Mod(i) => &i.attrs, + Item::Static(i) => &i.attrs, + Item::Struct(i) => &i.attrs, + Item::Trait(i) => &i.attrs, + Item::TraitAlias(i) => &i.attrs, + Item::Type(i) => &i.attrs, + Item::Union(i) => &i.attrs, + Item::Use(i) => &i.attrs, + Item::Verbatim(_) => &[], + _ => &[], + } +} + +fn item_attrs_mut(item: &mut Item) -> Option<&mut Vec> { + match item { + Item::Const(i) => Some(&mut i.attrs), + Item::Enum(i) => Some(&mut i.attrs), + Item::ExternCrate(i) => Some(&mut i.attrs), + Item::Fn(i) => Some(&mut i.attrs), + Item::ForeignMod(i) => Some(&mut i.attrs), + Item::Impl(i) => Some(&mut i.attrs), + Item::Macro(i) => Some(&mut i.attrs), + Item::Mod(i) => Some(&mut i.attrs), + Item::Static(i) => Some(&mut i.attrs), + Item::Struct(i) => Some(&mut i.attrs), + Item::Trait(i) => Some(&mut i.attrs), + Item::TraitAlias(i) => Some(&mut i.attrs), + Item::Type(i) => Some(&mut i.attrs), + Item::Union(i) => Some(&mut i.attrs), + Item::Use(i) => Some(&mut i.attrs), + Item::Verbatim(_) => None, + _ => None, + } +} + fn port_inlined( uutils_dir: &Path, module: &Module, @@ -255,8 +331,10 @@ fn enforce_use_policy(file: &syn::File, module: &Module, rel_path: &str) -> Resu ); } - // External: pass through. Anything not rooted at uucore/crate/self/super - // is assumed to be a published crate (std, bigdecimal, …). + // External or module-local relative paths pass through. Anything not + // rooted at uucore/crate is assumed to be a published crate + // (std, bigdecimal, …) or a `self`/`super` reference within the + // vendored tree. if !is_internal(path) { continue; } @@ -313,8 +391,9 @@ fn enforce_use_policy(file: &syn::File, module: &Module, rel_path: &str) -> Resu /// preserves call-site references (e.g. `use crate::error::Error as /// UError;`). /// - `inline`: the inlined file lives next to the module's `out` dir, -/// so the path is rewritten to point at it via `super::`. The -/// leaf identifier in the use is preserved. +/// so the path is rewritten to point at it via +/// `crate::builtins::generated::`. The leaf identifier in the +/// use is preserved. fn apply_substitutions(file: &mut syn::File, module: &Module) -> Result<()> { let mut new_items: Vec = Vec::with_capacity(file.items.len()); for item in file.items.drain(..) { @@ -401,7 +480,9 @@ fn rewrite_leaf(leaf: UseLeaf, subs: &[Substitution]) -> Result { // `Name { name }` the full path is `path + [name]`; for `Glob` // it's just `path`. let mut full = leaf.path.clone(); - if let LeafTail::Name { ref name, .. } = leaf.tail { + if let LeafTail::Name { ref name, .. } = leaf.tail + && name != "self" + { full.push(name.clone()); } @@ -425,16 +506,21 @@ fn rewrite_leaf(leaf: UseLeaf, subs: &[Substitution]) -> Result { segs } Action::Inline => { - // Inlined file is a sibling of the module out dir. Use - // `super::` to reach it from the vendored module's - // submodules. + // Inlined files are siblings under `builtins::generated`. + // Use an absolute crate path so references work from both the + // primary module root and any nested submodules. let leaf_seg = sub .prefix .rsplit("::") .next() .filter(|s| !s.is_empty()) .ok_or_else(|| anyhow!("inline prefix '{}' has no leaf segment", sub.prefix))?; - vec!["super".to_string(), leaf_seg.to_string()] + vec![ + "crate".to_string(), + "builtins".to_string(), + "generated".to_string(), + leaf_seg.to_string(), + ] } Action::Error => unreachable!("error action does not reach the rewriter"), }; @@ -457,6 +543,15 @@ fn rewrite_leaf(leaf: UseLeaf, subs: &[Substitution]) -> Result { name: orig_name, alias: orig_alias, } => { + if orig_name == "self" { + return Ok(UseLeaf { + path: rewritten_full, + tail: LeafTail::Name { + name: orig_name, + alias: orig_alias, + }, + }); + } // Final segment of rewritten_full is the new imported ident. let new_name = rewritten_full .pop() @@ -504,6 +599,20 @@ fn build_item_use(template: &ItemUse, leaf: UseLeaf) -> ItemUse { } fn build_use_tree(leaf: &UseLeaf) -> UseTree { + if let LeafTail::Name { name, alias } = &leaf.tail + && name == "self" + && let Some((import_name, parent)) = leaf.path.split_last() + { + let normalized = UseLeaf { + path: parent.to_vec(), + tail: LeafTail::Name { + name: import_name.clone(), + alias: alias.clone(), + }, + }; + return build_use_tree(&normalized); + } + let inner = match &leaf.tail { LeafTail::Name { name, alias } => { let ident = Ident::new(name, Span::call_site()); @@ -531,10 +640,7 @@ fn build_use_tree(leaf: &UseLeaf) -> UseTree { } fn is_internal(path: &[String]) -> bool { - matches!( - path.first().map(String::as_str), - Some("uucore" | "crate" | "self" | "super") - ) + matches!(path.first().map(String::as_str), Some("uucore" | "crate")) } fn find_match<'a>(path: &[String], subs: &'a [Substitution]) -> Option<&'a Substitution> { @@ -899,11 +1005,13 @@ inline_source = "lib/extendedbigdecimal.rs" let written = run(&uutils, "demo", "x", &manifest, &out).unwrap(); assert_eq!(written.len(), 2, "got: {written:?}"); - // Module body uses super::extendedbigdecimal to reach the - // sibling-vendored file. + // Module body uses an absolute generated-module path so the + // sibling-vendored file is reachable from nested module depths. let module_body = fs::read_to_string(&written[0]).unwrap(); assert!( - module_body.contains("use super::extendedbigdecimal::ExtendedBigDecimal;"), + module_body.contains( + "use crate::builtins::generated::extendedbigdecimal::ExtendedBigDecimal;" + ), "got: {module_body}" ); @@ -919,6 +1027,75 @@ inline_source = "lib/extendedbigdecimal.rs" ); } + #[test] + fn strips_upstream_cfg_test_modules() { + let (_tmp, uutils, manifest, out) = fixture( + r#" +[[modules]] +name = "demo" +source = "lib/demo.rs" +out = "demo.rs" +"#, + &[( + "lib/demo.rs", + "#[cfg(test)]\nmod tests { use crate::original_topology::Thing; }\npub fn live() {}\n", + )], + ); + let written = run(&uutils, "demo", "x", &manifest, &out).unwrap(); + let body = fs::read_to_string(&written[0]).unwrap(); + assert!(body.contains("pub fn live() {}"), "got: {body}"); + assert!(!body.contains("original_topology"), "got: {body}"); + } + + #[test] + fn strips_upstream_rustdoc_attrs() { + let (_tmp, uutils, manifest, out) = fixture( + r#" +[[modules]] +name = "demo" +source = "lib/demo.rs" +out = "demo.rs" +"#, + &[( + "lib/demo.rs", + "/// Example assumes `use uucore::format::printf;`.\npub fn live() {}\n", + )], + ); + let written = run(&uutils, "demo", "x", &manifest, &out).unwrap(); + let body = fs::read_to_string(&written[0]).unwrap(); + assert!(body.contains("pub fn live() {}"), "got: {body}"); + assert!(!body.contains("uucore::format"), "got: {body}"); + } + + #[test] + fn relative_self_use_group_rewrites_to_module_import() { + let (_tmp, uutils, manifest, out) = fixture( + r#" +[[modules]] +name = "demo" +source = "lib/demo.rs" +out = "demo.rs" + +[[modules.substitutions]] +prefix = "crate::support" +action = "replace_with" +target = "crate::builtins::generated::support" +"#, + &[( + "lib/demo.rs", + "use super::num_format::{self, Formatter};\nuse crate::support::Thing;\n", + )], + ); + let written = run(&uutils, "demo", "x", &manifest, &out).unwrap(); + let body = fs::read_to_string(&written[0]).unwrap(); + assert!(body.contains("use super::num_format;"), "got: {body}"); + assert!( + body.contains("use super::num_format::Formatter;"), + "got: {body}" + ); + assert!(!body.contains("::self;"), "got: {body}"); + } + #[test] fn inline_missing_inline_source_field_fails() { let (_tmp, uutils, manifest, out) = fixture( diff --git a/crates/bashkit-coreutils-port/vendored.toml b/crates/bashkit-coreutils-port/vendored.toml index ad5cffd0d..852d30b4e 100644 --- a/crates/bashkit-coreutils-port/vendored.toml +++ b/crates/bashkit-coreutils-port/vendored.toml @@ -23,4 +23,63 @@ # prefix = "uucore::error::UError" # action = "error" -modules = [] +[[modules]] +name = "format" +source = "src/uucore/src/lib/features/format" +out = "format" + +# Self-references inside uucore::format now point at the vendored +# generated module path. +[[modules.substitutions]] +prefix = "crate::format" +action = "replace_with" +target = "crate::builtins::generated::format" + +# Small numeric helper module. Inlined so the generated code stays detached +# from uucore's broad runtime dependency surface. +[[modules.substitutions]] +prefix = "crate::extendedbigdecimal" +action = "inline" +inline_source = "src/uucore/src/lib/features/extendedbigdecimal.rs" + +# Numeric parser required by FormatArgument::Unparsed. +[[modules.substitutions]] +prefix = "crate::parser::num_parser" +action = "inline" +inline_source = "src/uucore/src/lib/features/parser/num_parser.rs" + +# Bashkit-local replacements for uucore runtime hooks and platform helpers. +[[modules.substitutions]] +prefix = "crate::NonUtf8OsStrError" +action = "replace_with" +target = "crate::builtins::generated::format_support::NonUtf8OsStrError" + +[[modules.substitutions]] +prefix = "crate::os_str_as_bytes" +action = "replace_with" +target = "crate::builtins::generated::format_support::os_str_as_bytes" + +[[modules.substitutions]] +prefix = "crate::error::UError" +action = "replace_with" +target = "crate::builtins::generated::format_support::UError" + +[[modules.substitutions]] +prefix = "crate::error::set_exit_code" +action = "replace_with" +target = "crate::builtins::generated::format_support::set_exit_code" + +[[modules.substitutions]] +prefix = "crate::quoting_style" +action = "replace_with" +target = "crate::builtins::generated::format_support" + +[[modules.substitutions]] +prefix = "crate::show_error" +action = "replace_with" +target = "crate::builtins::generated::format_support::show_error" + +[[modules.substitutions]] +prefix = "crate::show_warning" +action = "replace_with" +target = "crate::builtins::generated::format_support::show_warning" diff --git a/crates/bashkit/src/builtins/generated/extendedbigdecimal.rs b/crates/bashkit/src/builtins/generated/extendedbigdecimal.rs new file mode 100644 index 000000000..423bf54c4 --- /dev/null +++ b/crates/bashkit/src/builtins/generated/extendedbigdecimal.rs @@ -0,0 +1,239 @@ +// GENERATED by bashkit-coreutils-port. DO NOT EDIT. +// +// Source: uutils/coreutils@39364b6 src/uucore/src/lib/features/extendedbigdecimal.rs +// Regenerate: cargo run -p bashkit-coreutils-port -- port-module format +// +// Original uutils licensed MIT; see THIRD_PARTY_LICENSES. + +//! An arbitrary precision float that can also represent infinity, NaN, etc. +//! +//! The finite values are stored as [`BigDecimal`] instances. Because +//! the `bigdecimal` library does not represent infinity, NaN, etc., we +//! need to represent them explicitly ourselves. The +//! [`ExtendedBigDecimal`] enumeration does that. +//! +//! # Examples +//! +//! Addition works for [`ExtendedBigDecimal`] as it does for floats. For +//! example, adding infinity to any finite value results in infinity: +//! +//! ```rust,ignore +//! let summand1 = ExtendedBigDecimal::BigDecimal(BigDecimal::zero()); +//! let summand2 = ExtendedBigDecimal::Infinity; +//! assert_eq!(summand1 + summand2, ExtendedBigDecimal::Infinity); +//! ``` +use bigdecimal::BigDecimal; +use bigdecimal::num_bigint::BigUint; +use num_traits::FromPrimitive; +use num_traits::Signed; +use num_traits::Zero; +use std::cmp::Ordering; +use std::ops::Add; +use std::ops::Neg; +#[derive(Debug, Clone)] +pub enum ExtendedBigDecimal { + /// Arbitrary precision floating point number. + BigDecimal(BigDecimal), + /// Floating point positive infinity. + /// + /// This is represented as its own enumeration member instead of as + /// a [`BigDecimal`] because the `bigdecimal` library does not + /// support infinity, see [here][0]. + /// + /// [0]: https://github.com/akubera/bigdecimal-rs/issues/67 + Infinity, + /// Floating point negative infinity. + /// + /// This is represented as its own enumeration member instead of as + /// a [`BigDecimal`] because the `bigdecimal` library does not + /// support infinity, see [here][0]. + /// + /// [0]: https://github.com/akubera/bigdecimal-rs/issues/67 + MinusInfinity, + /// Floating point negative zero. + /// + /// This is represented as its own enumeration member instead of as + /// a [`BigDecimal`] because the `bigdecimal` library does not + /// support negative zero. + MinusZero, + /// Floating point NaN. + /// + /// This is represented as its own enumeration member instead of as + /// a [`BigDecimal`] because the `bigdecimal` library does not + /// support NaN, see [here][0]. + /// + /// [0]: https://github.com/akubera/bigdecimal-rs/issues/67 + Nan, + /// Floating point negative NaN. + /// + /// This is represented as its own enumeration member instead of as + /// a [`BigDecimal`] because the `bigdecimal` library does not + /// support NaN, see [here][0]. + /// + /// [0]: https://github.com/akubera/bigdecimal-rs/issues/67 + MinusNan, +} +impl From for ExtendedBigDecimal { + fn from(val: f64) -> Self { + if val.is_nan() { + if val.is_sign_negative() { + Self::MinusNan + } else { + Self::Nan + } + } else if val.is_infinite() { + if val.is_sign_negative() { + Self::MinusInfinity + } else { + Self::Infinity + } + } else if val.is_zero() && val.is_sign_negative() { + Self::MinusZero + } else { + Self::BigDecimal(BigDecimal::from_f64(val).unwrap()) + } + } +} +impl From for ExtendedBigDecimal { + fn from(val: u8) -> Self { + Self::BigDecimal(val.into()) + } +} +impl From for ExtendedBigDecimal { + fn from(val: u32) -> Self { + Self::BigDecimal(val.into()) + } +} +impl ExtendedBigDecimal { + pub fn zero() -> Self { + Self::BigDecimal(0.into()) + } + pub fn one() -> Self { + Self::BigDecimal(1.into()) + } + pub fn to_biguint(&self) -> Option { + match self { + Self::BigDecimal(big_decimal) => { + let (bi, scale) = big_decimal.as_bigint_and_scale(); + if bi.is_negative() || scale > 0 || scale < -(u32::MAX as i64) { + return None; + } + bi.to_biguint() + .map(|bi| bi * BigUint::from(10u32).pow(-scale as u32)) + } + _ => None, + } + } +} +impl Zero for ExtendedBigDecimal { + fn zero() -> Self { + Self::BigDecimal(BigDecimal::zero()) + } + fn is_zero(&self) -> bool { + match self { + Self::BigDecimal(n) => n.is_zero(), + Self::MinusZero => true, + _ => false, + } + } +} +impl Default for ExtendedBigDecimal { + fn default() -> Self { + Self::zero() + } +} +impl Add for ExtendedBigDecimal { + type Output = Self; + fn add(self, other: Self) -> Self { + match (self, other) { + (Self::BigDecimal(m), Self::BigDecimal(n)) => Self::BigDecimal(m.add(n)), + (Self::BigDecimal(_), Self::MinusInfinity) => Self::MinusInfinity, + (Self::BigDecimal(_), Self::Infinity) => Self::Infinity, + (Self::BigDecimal(m), Self::MinusZero) => Self::BigDecimal(m), + (Self::Infinity, Self::BigDecimal(_)) => Self::Infinity, + (Self::Infinity, Self::Infinity) => Self::Infinity, + (Self::Infinity, Self::MinusZero) => Self::Infinity, + (Self::Infinity, Self::MinusInfinity) => Self::Nan, + (Self::MinusInfinity, Self::BigDecimal(_)) => Self::MinusInfinity, + (Self::MinusInfinity, Self::MinusInfinity) => Self::MinusInfinity, + (Self::MinusInfinity, Self::MinusZero) => Self::MinusInfinity, + (Self::MinusInfinity, Self::Infinity) => Self::Nan, + (Self::Nan, _) => Self::Nan, + (_, Self::Nan) => Self::Nan, + (Self::MinusNan, _) => Self::MinusNan, + (_, Self::MinusNan) => Self::MinusNan, + (Self::MinusZero, other) => other, + } + } +} +impl PartialEq for ExtendedBigDecimal { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::BigDecimal(m), Self::BigDecimal(n)) => m.eq(n), + (Self::BigDecimal(_), Self::MinusInfinity) => false, + (Self::BigDecimal(_), Self::Infinity) => false, + (Self::BigDecimal(_), Self::MinusZero) => false, + (Self::Infinity, Self::BigDecimal(_)) => false, + (Self::Infinity, Self::Infinity) => true, + (Self::Infinity, Self::MinusZero) => false, + (Self::Infinity, Self::MinusInfinity) => false, + (Self::MinusInfinity, Self::BigDecimal(_)) => false, + (Self::MinusInfinity, Self::Infinity) => false, + (Self::MinusInfinity, Self::MinusZero) => false, + (Self::MinusInfinity, Self::MinusInfinity) => true, + (Self::MinusZero, Self::BigDecimal(_)) => false, + (Self::MinusZero, Self::Infinity) => false, + (Self::MinusZero, Self::MinusZero) => true, + (Self::MinusZero, Self::MinusInfinity) => false, + (Self::Nan, _) => false, + (Self::MinusNan, _) => false, + (_, Self::Nan) => false, + (_, Self::MinusNan) => false, + } + } +} +impl PartialOrd for ExtendedBigDecimal { + fn partial_cmp(&self, other: &Self) -> Option { + match (self, other) { + (Self::BigDecimal(m), Self::BigDecimal(n)) => m.partial_cmp(n), + (Self::BigDecimal(_), Self::MinusInfinity) => Some(Ordering::Greater), + (Self::BigDecimal(_), Self::Infinity) => Some(Ordering::Less), + (Self::BigDecimal(m), Self::MinusZero) => m.partial_cmp(&BigDecimal::zero()), + (Self::Infinity, Self::BigDecimal(_)) => Some(Ordering::Greater), + (Self::Infinity, Self::Infinity) => Some(Ordering::Equal), + (Self::Infinity, Self::MinusZero) => Some(Ordering::Greater), + (Self::Infinity, Self::MinusInfinity) => Some(Ordering::Greater), + (Self::MinusInfinity, Self::BigDecimal(_)) => Some(Ordering::Less), + (Self::MinusInfinity, Self::Infinity) => Some(Ordering::Less), + (Self::MinusInfinity, Self::MinusZero) => Some(Ordering::Less), + (Self::MinusInfinity, Self::MinusInfinity) => Some(Ordering::Equal), + (Self::MinusZero, Self::BigDecimal(n)) => BigDecimal::zero().partial_cmp(n), + (Self::MinusZero, Self::Infinity) => Some(Ordering::Less), + (Self::MinusZero, Self::MinusZero) => Some(Ordering::Equal), + (Self::MinusZero, Self::MinusInfinity) => Some(Ordering::Greater), + (Self::Nan, _) => None, + (Self::MinusNan, _) => None, + (_, Self::Nan) => None, + (_, Self::MinusNan) => None, + } + } +} +impl Neg for ExtendedBigDecimal { + type Output = Self; + fn neg(self) -> Self::Output { + match self { + Self::BigDecimal(bd) => { + if bd.is_zero() { + Self::MinusZero + } else { + Self::BigDecimal(bd.neg()) + } + } + Self::MinusZero => Self::BigDecimal(BigDecimal::zero()), + Self::Infinity => Self::MinusInfinity, + Self::MinusInfinity => Self::Infinity, + Self::Nan => Self::MinusNan, + Self::MinusNan => Self::Nan, + } + } +} diff --git a/crates/bashkit/src/builtins/generated/format/argument.rs b/crates/bashkit/src/builtins/generated/format/argument.rs new file mode 100644 index 000000000..248f333a5 --- /dev/null +++ b/crates/bashkit/src/builtins/generated/format/argument.rs @@ -0,0 +1,202 @@ +// GENERATED by bashkit-coreutils-port. DO NOT EDIT. +// +// Source: uutils/coreutils@39364b6 src/uucore/src/lib/features/format/argument.rs +// Regenerate: cargo run -p bashkit-coreutils-port -- port-module format +// +// Original uutils licensed MIT; see THIRD_PARTY_LICENSES. + +use super::ExtendedBigDecimal; +use crate::builtins::generated::format::spec::ArgumentLocation; +use crate::builtins::generated::format_support::QuotingStyle; +use crate::builtins::generated::format_support::locale_aware_escape_name; +use crate::builtins::generated::format_support::os_str_as_bytes; +use crate::builtins::generated::format_support::set_exit_code; +use crate::builtins::generated::format_support::show_error; +use crate::builtins::generated::format_support::show_warning; +use crate::builtins::generated::num_parser::ExtendedParser; +use crate::builtins::generated::num_parser::ExtendedParserError; +use os_display::Quotable; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::num::NonZero; +#[derive(Clone, Debug, PartialEq)] +pub enum FormatArgument { + Char(char), + String(OsString), + UnsignedInt(u64), + SignedInt(i64), + Float(ExtendedBigDecimal), + /// Special argument that gets coerced into the other variants + Unparsed(OsString), +} +#[derive(Debug, PartialEq)] +pub struct FormatArguments<'a> { + args: &'a [FormatArgument], + next_arg_position: usize, + highest_arg_position: Option, + current_offset: usize, +} +impl<'a> FormatArguments<'a> { + /// Create a new FormatArguments from a slice of FormatArgument + pub fn new(args: &'a [FormatArgument]) -> Self { + Self { + args, + next_arg_position: 0, + highest_arg_position: None, + current_offset: 0, + } + } + /// Get the next argument that would be used + pub fn peek_arg(&self) -> Option<&'a FormatArgument> { + self.args.get(self.next_arg_position) + } + /// Check if all arguments have been consumed + pub fn is_exhausted(&self) -> bool { + self.current_offset >= self.args.len() + } + pub fn start_next_batch(&mut self) { + self.current_offset = self + .next_arg_position + .max(self.highest_arg_position.map_or(0, |x| x.saturating_add(1))); + self.next_arg_position = self.current_offset; + } + pub fn next_char(&mut self, position: ArgumentLocation) -> u8 { + match self.next_arg(position) { + Some(FormatArgument::Char(c)) => *c as u8, + Some(FormatArgument::Unparsed(os)) => match os_str_as_bytes(os) { + Ok(bytes) => bytes.first().copied().unwrap_or(b'\0'), + Err(_) => b'\0', + }, + _ => b'\0', + } + } + pub fn next_string(&mut self, position: ArgumentLocation) -> &'a OsStr { + match self.next_arg(position) { + Some(FormatArgument::Unparsed(os) | FormatArgument::String(os)) => os, + _ => "".as_ref(), + } + } + pub fn next_i64(&mut self, position: ArgumentLocation) -> i64 { + match self.next_arg(position) { + Some(FormatArgument::SignedInt(n)) => *n, + Some(FormatArgument::Unparsed(os)) => Self::get_num::(os), + _ => 0, + } + } + pub fn next_u64(&mut self, position: ArgumentLocation) -> u64 { + match self.next_arg(position) { + Some(FormatArgument::UnsignedInt(n)) => *n, + Some(FormatArgument::Unparsed(os)) => Self::get_num::(os), + _ => 0, + } + } + pub fn next_extended_big_decimal(&mut self, position: ArgumentLocation) -> ExtendedBigDecimal { + match self.next_arg(position) { + Some(FormatArgument::Float(n)) => n.clone(), + Some(FormatArgument::Unparsed(os)) => Self::get_num::(os), + _ => ExtendedBigDecimal::zero(), + } + } + fn parse_quote_start(os: &OsStr) -> Result> + where + T: ExtendedParser + From + From + Default, + { + let Ok(s) = os_str_as_bytes(os) else { + return Err(ExtendedParserError::NotNumeric); + }; + let (Some((b'"', bytes)) | Some((b'\'', bytes))) = s.split_first() else { + debug_assert!(false); + return Err(ExtendedParserError::NotNumeric); + }; + if bytes.is_empty() { + return Err(ExtendedParserError::NotNumeric); + } + let (val, len) = if let Some(c) = bytes + .utf8_chunks() + .next() + .expect("bytes should not be empty") + .valid() + .chars() + .next() + { + ((c as u32).into(), c.len_utf8()) + } else { + (bytes[0].into(), 1) + }; + if bytes.len() > len { + return Err(ExtendedParserError::PartialMatch( + val, + String::from_utf8_lossy(&bytes[len..]).into_owned(), + )); + } + Ok(val) + } + fn get_num(os: &OsStr) -> T + where + T: ExtendedParser + From + From + Default, + { + let s = os.to_string_lossy(); + let first = s.as_bytes().first().copied(); + let quote_start = first == Some(b'"') || first == Some(b'\''); + let parsed = if quote_start { + Self::parse_quote_start(os) + } else { + T::extended_parse(&s) + }; + extract_value(parsed, &s, quote_start) + } + fn get_at_relative_position(&mut self, pos: NonZero) -> Option<&'a FormatArgument> { + let pos: usize = pos.into(); + let pos = (pos - 1).saturating_add(self.current_offset); + self.highest_arg_position = Some(self.highest_arg_position.map_or(pos, |x| x.max(pos))); + self.args.get(pos) + } + fn next_arg(&mut self, position: ArgumentLocation) -> Option<&'a FormatArgument> { + match position { + ArgumentLocation::NextArgument => { + let arg = self.args.get(self.next_arg_position); + self.next_arg_position += 1; + arg + } + ArgumentLocation::Position(pos) => self.get_at_relative_position(pos), + } + } +} +fn extract_value( + p: Result>, + input: &str, + quote_start: bool, +) -> T { + match p { + Ok(v) => v, + Err(e) => { + set_exit_code(1); + let input = locale_aware_escape_name(OsStr::new(input), QuotingStyle::C_NO_QUOTES); + match e { + ExtendedParserError::Overflow(v) => { + show_error!("{}: Numerical result out of range", input.quote()); + v + } + ExtendedParserError::Underflow(v) => { + show_error!("{}: Numerical result out of range", input.quote()); + v + } + ExtendedParserError::NotNumeric => { + show_error!("{}: expected a numeric value", input.quote()); + Default::default() + } + ExtendedParserError::PartialMatch(v, rest) => { + if quote_start { + set_exit_code(0); + show_warning!( + "{rest}: character(s) following character constant have been ignored" + ); + } else { + show_error!("{}: value not completely converted", input.quote()); + } + v + } + } + } + } +} diff --git a/crates/bashkit/src/builtins/generated/format/escape.rs b/crates/bashkit/src/builtins/generated/format/escape.rs new file mode 100644 index 000000000..7c2c33145 --- /dev/null +++ b/crates/bashkit/src/builtins/generated/format/escape.rs @@ -0,0 +1,151 @@ +// GENERATED by bashkit-coreutils-port. DO NOT EDIT. +// +// Source: uutils/coreutils@39364b6 src/uucore/src/lib/features/format/escape.rs +// Regenerate: cargo run -p bashkit-coreutils-port -- port-module format +// +// Original uutils licensed MIT; see THIRD_PARTY_LICENSES. + +//! Parsing of escape sequences +use crate::builtins::generated::format::FormatError; +#[derive(Debug)] +pub enum EscapedChar { + /// A single byte + Byte(u8), + /// A unicode character + Char(char), + /// A character prefixed with a backslash (i.e. an invalid escape sequence) + Backslash(u8), + /// Specifies that the string should stop (`\c`) + End, +} +#[derive(Clone, Copy, Default)] +pub enum OctalParsing { + #[default] + TwoDigits = 2, + ThreeDigits = 3, +} +#[derive(Clone, Copy)] +enum Base { + Oct(OctalParsing), + Hex, +} +impl Base { + fn as_base(self) -> u8 { + match self { + Self::Oct(_) => 8, + Self::Hex => 16, + } + } + fn max_digits(self) -> u8 { + match self { + Self::Oct(parsing) => parsing as u8, + Self::Hex => 2, + } + } + fn convert_digit(self, c: u8) -> Option { + match self { + Self::Oct(_) => { + if matches!(c, b'0'..=b'7') { + Some(c - b'0') + } else { + None + } + } + Self::Hex => match c { + b'0'..=b'9' => Some(c - b'0'), + b'A'..=b'F' => Some(c - b'A' + 10), + b'a'..=b'f' => Some(c - b'a' + 10), + _ => None, + }, + } + } +} +fn parse_code(input: &mut &[u8], base: Base) -> Option { + let [c, rest @ ..] = input else { return None }; + let mut ret = base.convert_digit(*c)?; + *input = rest; + for _ in 1..base.max_digits() { + let [c, rest @ ..] = input else { break }; + let Some(n) = base.convert_digit(*c) else { + break; + }; + ret = ret.wrapping_mul(base.as_base()).wrapping_add(n); + *input = rest; + } + Some(ret) +} +fn parse_unicode(input: &mut &[u8], digits: u8) -> Result { + if let Some((new_digits, rest)) = input.split_at_checked(digits as usize) { + *input = rest; + let ret = new_digits + .iter() + .map(|c| Base::Hex.convert_digit(*c)) + .collect::>>() + .ok_or(EscapeError::MissingHexadecimalNumber)? + .iter() + .map(|n| *n as u32) + .reduce(|ret, n| ret.wrapping_mul(Base::Hex.as_base() as u32).wrapping_add(n)) + .expect("must have multiple digits in unicode string"); + char::from_u32(ret).ok_or_else(|| EscapeError::InvalidCharacters(new_digits.to_vec())) + } else { + Err(EscapeError::MissingHexadecimalNumber) + } +} +#[derive(Debug, PartialEq)] +pub enum EscapeError { + InvalidCharacters(Vec), + MissingHexadecimalNumber, +} +pub fn parse_escape_code( + rest: &mut &[u8], + zero_octal_parsing: OctalParsing, +) -> Result { + if let [c, new_rest @ ..] = rest { + if let b'1'..=b'7' = c { + if let Some(parsed) = parse_code(rest, Base::Oct(OctalParsing::ThreeDigits)) { + return Ok(EscapedChar::Byte(parsed)); + } + } + *rest = new_rest; + match c { + b'\\' => Ok(EscapedChar::Byte(b'\\')), + b'"' => Ok(EscapedChar::Byte(b'"')), + b'a' => Ok(EscapedChar::Byte(b'\x07')), + b'b' => Ok(EscapedChar::Byte(b'\x08')), + b'c' => Ok(EscapedChar::End), + b'e' => Ok(EscapedChar::Byte(b'\x1b')), + b'f' => Ok(EscapedChar::Byte(b'\x0c')), + b'n' => Ok(EscapedChar::Byte(b'\n')), + b'r' => Ok(EscapedChar::Byte(b'\r')), + b't' => Ok(EscapedChar::Byte(b'\t')), + b'v' => Ok(EscapedChar::Byte(b'\x0b')), + b'x' => { + if let Some(c) = parse_code(rest, Base::Hex) { + Ok(EscapedChar::Byte(c)) + } else { + Err(FormatError::MissingHex) + } + } + b'0' => Ok(EscapedChar::Byte( + parse_code(rest, Base::Oct(zero_octal_parsing)).unwrap_or(b'\0'), + )), + b'u' => match parse_unicode(rest, 4) { + Ok(c) => Ok(EscapedChar::Char(c)), + Err(EscapeError::MissingHexadecimalNumber) => Err(FormatError::MissingHex), + Err(EscapeError::InvalidCharacters(chars)) => { + Err(FormatError::InvalidCharacter('u', chars)) + } + }, + b'U' => match parse_unicode(rest, 8) { + Ok(c) => Ok(EscapedChar::Char(c)), + Err(EscapeError::MissingHexadecimalNumber) => Err(FormatError::MissingHex), + Err(EscapeError::InvalidCharacters(chars)) => { + Err(FormatError::InvalidCharacter('U', chars)) + } + }, + c => Ok(EscapedChar::Backslash(*c)), + } + } else { + Ok(EscapedChar::Byte(b'\\')) + } +} diff --git a/crates/bashkit/src/builtins/generated/format/human.rs b/crates/bashkit/src/builtins/generated/format/human.rs new file mode 100644 index 000000000..6b4847d64 --- /dev/null +++ b/crates/bashkit/src/builtins/generated/format/human.rs @@ -0,0 +1,37 @@ +// GENERATED by bashkit-coreutils-port. DO NOT EDIT. +// +// Source: uutils/coreutils@39364b6 src/uucore/src/lib/features/format/human.rs +// Regenerate: cargo run -p bashkit-coreutils-port -- port-module format +// +// Original uutils licensed MIT; see THIRD_PARTY_LICENSES. + +//! `human`-size formatting +//! +//! Format sizes like gnulibs human_readable() would +use unit_prefix::NumberPrefix; +#[derive(Copy, Clone, PartialEq)] +pub enum SizeFormat { + Bytes, + Binary, + Decimal, +} +fn format_prefixed(prefixed: &NumberPrefix) -> String { + match prefixed { + NumberPrefix::Standalone(bytes) => bytes.to_string(), + NumberPrefix::Prefixed(prefix, bytes) => { + let prefix_str = prefix.symbol().trim_end_matches('i'); + if (10.0 * bytes).ceil() >= 100.0 { + format!("{:.0}{prefix_str}", bytes.ceil()) + } else { + format!("{:.1}{prefix_str}", (10.0 * bytes).ceil() / 10.0) + } + } + } +} +pub fn human_readable(size: u64, sfmt: SizeFormat) -> String { + match sfmt { + SizeFormat::Binary => format_prefixed(&NumberPrefix::binary(size as f64)), + SizeFormat::Decimal => format_prefixed(&NumberPrefix::decimal(size as f64)), + SizeFormat::Bytes => size.to_string(), + } +} diff --git a/crates/bashkit/src/builtins/generated/format/mod.rs b/crates/bashkit/src/builtins/generated/format/mod.rs new file mode 100644 index 000000000..b5037c41f --- /dev/null +++ b/crates/bashkit/src/builtins/generated/format/mod.rs @@ -0,0 +1,347 @@ +// GENERATED by bashkit-coreutils-port. DO NOT EDIT. +// +// Source: uutils/coreutils@39364b6 src/uucore/src/lib/features/format/mod.rs +// Regenerate: cargo run -p bashkit-coreutils-port -- port-module format +// +// Original uutils licensed MIT; see THIRD_PARTY_LICENSES. + +//! `printf`-style formatting +//! +//! Rust has excellent formatting capabilities, but the coreutils require very +//! specific formatting that needs to work exactly like the GNU utilities. +//! Naturally, the GNU behavior is based on the C `printf` functionality. +//! +//! Additionally, we need support for escape sequences for the `printf` utility. +//! +//! The [`printf`] and [`sprintf`] functions closely match the behavior of the +//! corresponding C functions: the former renders a formatted string +//! to stdout, the latter renders to a new [`String`] object. +//! +//! There are three kinds of parsing that we might want to do: +//! +//! 1. Parse only `printf` directives (for e.g. `seq`, `dd`) +//! 2. Parse only escape sequences (for e.g. `echo`) +//! 3. Parse both `printf` specifiers and escape sequences (for e.g. `printf`) +//! +//! This module aims to combine all three use cases. An iterator parsing each +//! of these cases is provided by [`parse_spec_only`], [`parse_escape_only`] +//! and [`parse_spec_and_escape`], respectively. +//! +//! There is a special [`Format`] type, which can be used to parse a format +//! string containing exactly one directive and does not use any `*` in that +//! directive. This format can be printed in a type-safe manner without failing +//! (modulo IO errors). +mod argument; +mod escape; +pub mod human; +pub mod num_format; +mod spec; +pub use self::escape::EscapedChar; +pub use self::escape::OctalParsing; +use self::escape::parse_escape_code; +use self::num_format::Formatter; +use crate::builtins::generated::extendedbigdecimal::ExtendedBigDecimal; +use crate::builtins::generated::format_support::NonUtf8OsStrError; +use crate::builtins::generated::format_support::UError; +pub use argument::FormatArgument; +pub use argument::FormatArguments; +use os_display::Quotable; +pub use spec::Spec; +use std::error::Error; +use std::fmt::Display; +use std::io::Write; +use std::io::stdout; +use std::marker::PhantomData; +use std::ops::ControlFlow; +#[derive(Debug)] +pub enum FormatError { + SpecError(Vec), + IoError(std::io::Error), + NoMoreArguments, + InvalidArgument(FormatArgument), + TooManySpecs(Vec), + NeedAtLeastOneSpec(Vec), + WrongSpecType, + InvalidPrecision(String), + /// The format specifier ends with a %, as in `%f%`. + EndsWithPercent(Vec), + /// The escape sequence `\x` appears without a literal hexadecimal value. + MissingHex, + /// The hexadecimal characters represent a code point that cannot represent a + /// Unicode character (e.g., a surrogate code point) + InvalidCharacter(char, Vec), + InvalidEncoding(NonUtf8OsStrError), +} +impl Error for FormatError {} +impl UError for FormatError {} +impl From for FormatError { + fn from(value: std::io::Error) -> Self { + Self::IoError(value) + } +} +impl From for FormatError { + fn from(value: NonUtf8OsStrError) -> Self { + Self::InvalidEncoding(value) + } +} +impl Display for FormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::SpecError(s) => { + write!( + f, + "%{}: invalid conversion specification", + String::from_utf8_lossy(s) + ) + } + Self::TooManySpecs(s) => { + write!( + f, + "format '{}' has too many % directives", + String::from_utf8_lossy(s) + ) + } + Self::NeedAtLeastOneSpec(s) => { + write!( + f, + "format '{}' has no % directive", + String::from_utf8_lossy(s) + ) + } + Self::EndsWithPercent(s) => { + write!(f, "format {} ends in %", String::from_utf8_lossy(s).quote()) + } + Self::InvalidPrecision(precision) => { + write!(f, "invalid precision: '{precision}'") + } + Self::WrongSpecType => write!(f, "wrong % directive type was given"), + Self::IoError(e) => write!(f, "write error: {e}"), + Self::NoMoreArguments => write!(f, "no more arguments"), + Self::InvalidArgument(_) => write!(f, "invalid argument"), + Self::MissingHex => write!(f, "missing hexadecimal number in escape"), + Self::InvalidCharacter(escape_char, digits) => { + write!( + f, + "invalid universal character name \\{escape_char}{}", + String::from_utf8_lossy(digits) + ) + } + Self::InvalidEncoding(no) => no.fmt(f), + } + } +} +const MAX_FORMAT_WIDTH: usize = 1_000_000; +fn check_width(width: usize) -> std::io::Result<()> { + if width > MAX_FORMAT_WIDTH { + Err(std::io::Error::new( + std::io::ErrorKind::OutOfMemory, + "formatting width too large", + )) + } else { + Ok(()) + } +} +pub enum FormatItem { + /// A format specifier + Spec(Spec), + /// A single character + Char(C), +} +pub trait FormatChar { + fn write(&self, writer: impl Write) -> std::io::Result>; +} +impl FormatChar for u8 { + fn write(&self, mut writer: impl Write) -> std::io::Result> { + writer.write_all(&[*self])?; + Ok(ControlFlow::Continue(())) + } +} +impl FormatChar for EscapedChar { + fn write(&self, mut writer: impl Write) -> std::io::Result> { + match self { + Self::Byte(c) => { + writer.write_all(&[*c])?; + } + Self::Char(c) => { + write!(writer, "{c}")?; + } + Self::Backslash(c) => { + writer.write_all(&[b'\\', *c])?; + } + Self::End => return Ok(ControlFlow::Break(())), + } + Ok(ControlFlow::Continue(())) + } +} +impl FormatItem { + pub fn write( + &self, + writer: impl Write, + args: &mut FormatArguments, + ) -> Result, FormatError> { + match self { + Self::Spec(spec) => spec.write(writer, args)?, + Self::Char(c) => return c.write(writer).map_err(FormatError::IoError), + } + Ok(ControlFlow::Continue(())) + } +} +pub fn parse_spec_and_escape( + fmt: &[u8], +) -> impl Iterator, FormatError>> + '_ { + let mut current = fmt; + std::iter::from_fn(move || match current { + [] => None, + [b'%', b'%', rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(EscapedChar::Byte(b'%')))) + } + [b'%', rest @ ..] => { + current = rest; + let spec = match Spec::parse(&mut current) { + Ok(spec) => spec, + Err(slice) => return Some(Err(FormatError::SpecError(slice.to_vec()))), + }; + Some(Ok(FormatItem::Spec(spec))) + } + [b'\\', rest @ ..] => { + current = rest; + Some(parse_escape_code(&mut current, OctalParsing::default()).map(FormatItem::Char)) + } + [c, rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(EscapedChar::Byte(*c)))) + } + }) +} +pub fn parse_spec_only( + fmt: &[u8], +) -> impl Iterator, FormatError>> + '_ { + let mut current = fmt; + std::iter::from_fn(move || match current { + [] => None, + [b'%'] => Some(Err(FormatError::EndsWithPercent(fmt.to_vec()))), + [b'%', b'%', rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(b'%'))) + } + [b'%', rest @ ..] => { + current = rest; + let spec = match Spec::parse(&mut current) { + Ok(spec) => spec, + Err(slice) => return Some(Err(FormatError::SpecError(slice.to_vec()))), + }; + Some(Ok(FormatItem::Spec(spec))) + } + [c, rest @ ..] => { + current = rest; + Some(Ok(FormatItem::Char(*c))) + } + }) +} +pub fn parse_escape_only( + fmt: &[u8], + zero_octal_parsing: OctalParsing, +) -> impl Iterator + '_ { + let mut current = fmt; + std::iter::from_fn(move || match current { + [] => None, + [b'\\', rest @ ..] => { + current = rest; + Some( + parse_escape_code(&mut current, zero_octal_parsing) + .unwrap_or(EscapedChar::Backslash(b'x')), + ) + } + [c, rest @ ..] => { + current = rest; + Some(EscapedChar::Byte(*c)) + } + }) +} +pub fn printf<'a>( + format_string: impl AsRef<[u8]>, + arguments: impl IntoIterator, +) -> Result<(), FormatError> { + printf_writer(stdout(), format_string, arguments) +} +fn printf_writer<'a>( + mut writer: impl Write, + format_string: impl AsRef<[u8]>, + args: impl IntoIterator, +) -> Result<(), FormatError> { + let args = args.into_iter().cloned().collect::>(); + let mut args = FormatArguments::new(&args); + for item in parse_spec_only(format_string.as_ref()) { + if item?.write(&mut writer, &mut args)?.is_break() { + break; + } + } + Ok(()) +} +pub fn sprintf<'a>( + format_string: impl AsRef<[u8]>, + arguments: impl IntoIterator, +) -> Result, FormatError> { + let mut writer = Vec::new(); + printf_writer(&mut writer, format_string, arguments)?; + Ok(writer) +} +pub struct Format, T> { + prefix: Vec, + suffix: Vec, + formatter: F, + _marker: PhantomData, +} +impl, T> Format { + pub fn from_formatter(formatter: F) -> Self { + Self { + prefix: Vec::::new(), + suffix: Vec::::new(), + formatter, + _marker: PhantomData, + } + } + pub fn parse(format_string: impl AsRef<[u8]>) -> Result { + let mut iter = parse_spec_only(format_string.as_ref()); + let mut prefix = Vec::new(); + let mut spec = None; + for item in &mut iter { + match item? { + FormatItem::Spec(s) => { + spec = Some(s); + break; + } + FormatItem::Char(c) => prefix.push(c), + } + } + let Some(spec) = spec else { + return Err(FormatError::NeedAtLeastOneSpec( + format_string.as_ref().to_vec(), + )); + }; + let formatter = F::try_from_spec(spec)?; + let mut suffix = Vec::new(); + for item in &mut iter { + match item { + Ok(FormatItem::Spec(_)) | Err(FormatError::EndsWithPercent(_)) => { + return Err(FormatError::TooManySpecs(format_string.as_ref().to_vec())); + } + Ok(FormatItem::Char(c)) => suffix.push(c), + Err(e) => return Err(e), + } + } + Ok(Self { + prefix, + suffix, + formatter, + _marker: PhantomData, + }) + } + pub fn fmt(&self, mut w: impl Write, f: T) -> std::io::Result<()> { + w.write_all(&self.prefix)?; + self.formatter.fmt(&mut w, f)?; + w.write_all(&self.suffix)?; + Ok(()) + } +} diff --git a/crates/bashkit/src/builtins/generated/format/num_format.rs b/crates/bashkit/src/builtins/generated/format/num_format.rs new file mode 100644 index 000000000..b6f20c4ce --- /dev/null +++ b/crates/bashkit/src/builtins/generated/format/num_format.rs @@ -0,0 +1,544 @@ +// GENERATED by bashkit-coreutils-port. DO NOT EDIT. +// +// Source: uutils/coreutils@39364b6 src/uucore/src/lib/features/format/num_format.rs +// Regenerate: cargo run -p bashkit-coreutils-port -- port-module format +// +// Original uutils licensed MIT; see THIRD_PARTY_LICENSES. + +//! Utilities for formatting numbers in various formats +use super::ExtendedBigDecimal; +use super::FormatError; +use super::spec::CanAsterisk; +use super::spec::Spec; +use bigdecimal::BigDecimal; +use bigdecimal::num_bigint::ToBigInt; +use num_traits::Signed; +use num_traits::Zero; +use std::cmp::min; +use std::io::Write; +pub trait Formatter { + fn fmt(&self, writer: impl Write, x: T) -> std::io::Result<()>; + fn try_from_spec(s: Spec) -> Result + where + Self: Sized; +} +#[derive(Clone, Copy, Debug)] +pub enum UnsignedIntVariant { + Decimal, + Octal(Prefix), + Hexadecimal(Case, Prefix), +} +#[derive(Clone, Copy, Debug)] +pub enum FloatVariant { + Decimal, + Scientific, + Shortest, + Hexadecimal, +} +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Case { + Lowercase, + Uppercase, +} +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Prefix { + No, + Yes, +} +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ForceDecimal { + No, + Yes, +} +#[derive(Clone, Copy, Debug)] +pub enum PositiveSign { + None, + Plus, + Space, +} +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum NumberAlignment { + Left, + RightSpace, + RightZero, +} +pub struct SignedInt { + pub width: usize, + pub precision: usize, + pub positive_sign: PositiveSign, + pub alignment: NumberAlignment, +} +impl Formatter for SignedInt { + fn fmt(&self, writer: impl Write, x: i64) -> std::io::Result<()> { + let abs = (x as i128).abs(); + let s = if self.precision > 0 { + format!("{abs:0>width$}", width = self.precision) + } else { + abs.to_string() + }; + let sign_indicator = get_sign_indicator(self.positive_sign, x.is_negative()); + write_output(writer, sign_indicator, s, self.width, self.alignment) + } + fn try_from_spec(s: Spec) -> Result { + let Spec::SignedInt { + width, + precision, + positive_sign, + alignment, + position: _position, + } = s + else { + return Err(FormatError::WrongSpecType); + }; + let width = match width { + Some(CanAsterisk::Fixed(x)) => x, + None => 0, + Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType), + }; + let precision = match precision { + Some(CanAsterisk::Fixed(x)) => x, + None => 0, + Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType), + }; + Ok(Self { + width, + precision, + positive_sign, + alignment, + }) + } +} +pub struct UnsignedInt { + pub variant: UnsignedIntVariant, + pub width: usize, + pub precision: usize, + pub alignment: NumberAlignment, +} +impl Formatter for UnsignedInt { + fn fmt(&self, writer: impl Write, x: u64) -> std::io::Result<()> { + let mut s = match self.variant { + UnsignedIntVariant::Decimal => format!("{x}"), + UnsignedIntVariant::Octal(_) => format!("{x:o}"), + UnsignedIntVariant::Hexadecimal(case, _) => match case { + Case::Lowercase => format!("{x:x}"), + Case::Uppercase => format!("{x:X}"), + }, + }; + let prefix = match (x, self.variant) { + (1.., UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::Yes)) => "0x", + (1.., UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::Yes)) => "0X", + (1.., UnsignedIntVariant::Octal(Prefix::Yes)) if s.len() >= self.precision => "0", + _ => "", + }; + s = format!("{prefix}{s:0>width$}", width = self.precision); + write_output(writer, String::new(), s, self.width, self.alignment) + } + fn try_from_spec(s: Spec) -> Result { + let s = if let Spec::SignedInt { + width, + precision, + positive_sign: PositiveSign::None, + alignment, + position, + } = s + { + Spec::UnsignedInt { + variant: UnsignedIntVariant::Decimal, + width, + precision, + alignment, + position, + } + } else { + s + }; + let Spec::UnsignedInt { + variant, + width, + precision, + alignment, + position: _position, + } = s + else { + return Err(FormatError::WrongSpecType); + }; + let width = match width { + Some(CanAsterisk::Fixed(x)) => x, + None => 0, + Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType), + }; + let precision = match precision { + Some(CanAsterisk::Fixed(x)) => x, + None => 0, + Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType), + }; + Ok(Self { + variant, + width, + precision, + alignment, + }) + } +} +pub struct Float { + pub variant: FloatVariant, + pub case: Case, + pub force_decimal: ForceDecimal, + pub width: usize, + pub positive_sign: PositiveSign, + pub alignment: NumberAlignment, + pub precision: Option, +} +impl Default for Float { + fn default() -> Self { + Self { + variant: FloatVariant::Decimal, + case: Case::Lowercase, + force_decimal: ForceDecimal::No, + width: 0, + positive_sign: PositiveSign::None, + alignment: NumberAlignment::Left, + precision: None, + } + } +} +impl Formatter<&ExtendedBigDecimal> for Float { + fn fmt(&self, writer: impl Write, e: &ExtendedBigDecimal) -> std::io::Result<()> { + let (abs, negative) = match e { + ExtendedBigDecimal::BigDecimal(bd) => { + (ExtendedBigDecimal::BigDecimal(bd.abs()), bd.is_negative()) + } + ExtendedBigDecimal::MinusZero => (ExtendedBigDecimal::zero(), true), + ExtendedBigDecimal::Infinity => (ExtendedBigDecimal::Infinity, false), + ExtendedBigDecimal::MinusInfinity => (ExtendedBigDecimal::Infinity, true), + ExtendedBigDecimal::Nan => (ExtendedBigDecimal::Nan, false), + ExtendedBigDecimal::MinusNan => (ExtendedBigDecimal::Nan, true), + }; + let mut alignment = self.alignment; + let s = if let ExtendedBigDecimal::BigDecimal(bd) = abs { + match self.variant { + FloatVariant::Decimal => { + format_float_decimal(&bd, self.precision, self.force_decimal) + } + FloatVariant::Scientific => { + format_float_scientific(&bd, self.precision, self.case, self.force_decimal) + } + FloatVariant::Shortest => { + format_float_shortest(&bd, self.precision, self.case, self.force_decimal) + } + FloatVariant::Hexadecimal => { + format_float_hexadecimal(&bd, self.precision, self.case, self.force_decimal) + } + } + } else { + if alignment == NumberAlignment::RightZero { + alignment = NumberAlignment::RightSpace; + } + format_float_non_finite(&abs, self.case) + }; + let sign_indicator = get_sign_indicator(self.positive_sign, negative); + write_output(writer, sign_indicator, s, self.width, alignment) + } + fn try_from_spec(s: Spec) -> Result + where + Self: Sized, + { + let Spec::Float { + variant, + case, + force_decimal, + width, + positive_sign, + alignment, + precision, + position: _position, + } = s + else { + return Err(FormatError::WrongSpecType); + }; + let width = match width { + Some(CanAsterisk::Fixed(x)) => x, + None => 0, + Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType), + }; + let precision = match precision { + Some(CanAsterisk::Fixed(x)) => Some(x), + None => None, + Some(CanAsterisk::Asterisk(_)) => return Err(FormatError::WrongSpecType), + }; + Ok(Self { + variant, + case, + force_decimal, + width, + positive_sign, + alignment, + precision, + }) + } +} +fn get_sign_indicator(sign: PositiveSign, negative: bool) -> String { + if negative { + String::from("-") + } else { + match sign { + PositiveSign::None => String::new(), + PositiveSign::Plus => String::from("+"), + PositiveSign::Space => String::from(" "), + } + } +} +fn format_float_non_finite(e: &ExtendedBigDecimal, case: Case) -> String { + let mut s = match e { + ExtendedBigDecimal::Infinity => String::from("inf"), + ExtendedBigDecimal::Nan => String::from("nan"), + _ => { + debug_assert!(false); + String::from("INVALID") + } + }; + if case == Case::Uppercase { + s.make_ascii_uppercase(); + } + s +} +fn format_float_decimal( + bd: &BigDecimal, + precision: Option, + force_decimal: ForceDecimal, +) -> String { + debug_assert!(!bd.is_negative()); + let precision = precision.unwrap_or(6); + if precision == 0 { + let (bi, scale) = bd.as_bigint_and_scale(); + if scale == 0 && force_decimal != ForceDecimal::Yes { + return bi.to_str_radix(10); + } else if force_decimal == ForceDecimal::Yes { + return format!("{bd:.0}."); + } + } + format!("{bd:.precision$}") +} +fn bd_to_string_exp_with_prec(bd: &BigDecimal, precision: usize) -> (String, i64) { + let bd_round = bd.with_prec(precision as u64); + let (frac, mut p) = bd_round.as_bigint_and_exponent(); + let mut digits = frac.to_str_radix(10); + if digits.len() == precision + 1 { + debug_assert!(&digits[precision..] == "0"); + digits.truncate(precision); + p -= 1; + } + let exponent = -p + precision as i64 - 1; + (digits, exponent) +} +fn format_float_scientific( + bd: &BigDecimal, + precision: Option, + case: Case, + force_decimal: ForceDecimal, +) -> String { + debug_assert!(!bd.is_negative()); + let precision = precision.unwrap_or(6); + let exp_char = match case { + Case::Lowercase => 'e', + Case::Uppercase => 'E', + }; + if BigDecimal::zero().eq(bd) { + return if force_decimal == ForceDecimal::Yes && precision == 0 { + format!("0.{exp_char}+00") + } else { + format!("{:.precision$}{exp_char}+00", 0.0) + }; + } + let (digits, exponent) = bd_to_string_exp_with_prec(bd, precision + 1); + let (first_digit, remaining_digits) = digits.split_at(1); + let dot = + if !remaining_digits.is_empty() || (precision == 0 && ForceDecimal::Yes == force_decimal) { + "." + } else { + "" + }; + format!("{first_digit}{dot}{remaining_digits}{exp_char}{exponent:+03}") +} +fn format_float_shortest( + bd: &BigDecimal, + precision: Option, + case: Case, + force_decimal: ForceDecimal, +) -> String { + debug_assert!(!bd.is_negative()); + let precision = precision.unwrap_or(6); + let precision = precision.max(1); + if BigDecimal::zero().eq(bd) { + return match (force_decimal, precision) { + (ForceDecimal::Yes, 1) => "0.".into(), + (ForceDecimal::Yes, _) => format!("{:.*}", precision - 1, 0.0), + (ForceDecimal::No, _) => "0".into(), + }; + } + let mut output = String::with_capacity(precision); + let (digits, exponent) = bd_to_string_exp_with_prec(bd, precision); + if exponent < -4 || exponent >= precision as i64 { + let (first_digit, remaining_digits) = digits.split_at(1); + output.push_str(first_digit); + output.push('.'); + output.push_str(remaining_digits); + if force_decimal == ForceDecimal::No { + strip_fractional_zeroes_and_dot(&mut output); + } + output.push(match case { + Case::Lowercase => 'e', + Case::Uppercase => 'E', + }); + let exponent_abs = exponent.abs(); + output.push(if exponent < 0 { '-' } else { '+' }); + if exponent_abs < 10 { + output.push('0'); + } + output.push_str(&exponent_abs.to_string()); + } else { + if exponent < 0 { + output.push_str("0."); + output.extend(std::iter::repeat_n('0', -exponent as usize - 1)); + output.push_str(&digits); + } else { + let (first_digits, remaining_digits) = digits.split_at(exponent as usize + 1); + output.push_str(first_digits); + output.push('.'); + output.push_str(remaining_digits); + } + if force_decimal == ForceDecimal::No { + strip_fractional_zeroes_and_dot(&mut output); + } + } + output +} +fn format_float_hexadecimal( + bd: &BigDecimal, + precision: Option, + case: Case, + force_decimal: ForceDecimal, +) -> String { + const BEFORE_BITS: usize = 4; + debug_assert!(!bd.is_negative()); + let max_precision = precision.unwrap_or(15); + let (prefix, exp_char) = match case { + Case::Lowercase => ("0x", 'p'), + Case::Uppercase => ("0X", 'P'), + }; + if BigDecimal::zero().eq(bd) { + return if force_decimal == ForceDecimal::Yes && precision.unwrap_or(0) == 0 { + format!("0x0.{exp_char}+0") + } else { + format!("0x{:.*}{exp_char}+0", precision.unwrap_or(0), 0.0) + }; + } + let (frac10, p) = bd.as_bigint_and_exponent(); + let exp10 = -p; + let (mut frac2, mut exp2) = if exp10 >= 0 { + (frac10 * 5.to_bigint().unwrap().pow(exp10 as u32), exp10) + } else { + let margin = + ((max_precision + 1) as i64 * 4 - frac10.bits() as i64).max(0) + -exp10 * 3 + 1; + ( + (frac10 << margin) / 5.to_bigint().unwrap().pow(-exp10 as u32), + exp10 - margin, + ) + }; + let wanted_bits = (BEFORE_BITS + max_precision * 4) as u64; + let bits = frac2.bits(); + exp2 += bits as i64 - wanted_bits as i64; + if bits > wanted_bits { + frac2 >>= bits - wanted_bits - 1; + let add = frac2.bit(0); + frac2 >>= 1; + if add { + frac2 += 0x1; + if frac2.bits() > wanted_bits { + frac2 >>= 4; + exp2 += 4; + } + } + } else { + frac2 <<= wanted_bits - bits; + } + let mut digits = frac2.to_str_radix(16); + if case == Case::Uppercase { + digits.make_ascii_uppercase(); + } + let (first_digit, remaining_digits) = digits.split_at(1); + let exponent = exp2 + (4 * max_precision) as i64; + let mut remaining_digits = remaining_digits.to_string(); + if precision.is_none() { + strip_fractional_zeroes(&mut remaining_digits); + } + let dot = if !remaining_digits.is_empty() + || (precision.unwrap_or(0) == 0 && ForceDecimal::Yes == force_decimal) + { + "." + } else { + "" + }; + format!("{prefix}{first_digit}{dot}{remaining_digits}{exp_char}{exponent:+}") +} +fn strip_fractional_zeroes(s: &mut String) { + let mut trim_to = s.len(); + for (pos, c) in s.char_indices().rev() { + if pos + c.len_utf8() == trim_to { + if c == '0' { + trim_to = pos; + } else { + break; + } + } + } + s.truncate(trim_to); +} +fn strip_fractional_zeroes_and_dot(s: &mut String) { + let mut trim_to = s.len(); + for (pos, c) in s.char_indices().rev() { + if pos + c.len_utf8() == trim_to && (c == '0' || c == '.') { + trim_to = pos; + } + if c == '.' { + s.truncate(trim_to); + break; + } + } +} +fn write_output( + mut writer: impl Write, + sign_indicator: String, + s: String, + width: usize, + alignment: NumberAlignment, +) -> std::io::Result<()> { + if width == 0 { + writer.write_all(sign_indicator.as_bytes())?; + writer.write_all(s.as_bytes())?; + return Ok(()); + } + let remaining_width = width - min(width, sign_indicator.len()); + super::check_width(remaining_width)?; + match alignment { + NumberAlignment::Left => write!(writer, "{sign_indicator}{s: { + let is_sign = sign_indicator.starts_with('-') || sign_indicator.starts_with('+'); + if is_sign && remaining_width > 0 { + let s = sign_indicator + s.as_str(); + write!(writer, "{s:>width$}", width = remaining_width + 1) + } else { + write!(writer, "{sign_indicator}{s:>remaining_width$}") + } + } + NumberAlignment::RightZero => { + let (prefix, rest) = if s.len() >= 2 && s[..2].eq_ignore_ascii_case("0x") { + (&s[..2], &s[2..]) + } else { + ("", s.as_str()) + }; + let remaining_width = remaining_width.saturating_sub(prefix.len()); + write!(writer, "{sign_indicator}{prefix}{rest:0>remaining_width$}") + } + } +} diff --git a/crates/bashkit/src/builtins/generated/format/spec.rs b/crates/bashkit/src/builtins/generated/format/spec.rs new file mode 100644 index 000000000..549b30e18 --- /dev/null +++ b/crates/bashkit/src/builtins/generated/format/spec.rs @@ -0,0 +1,527 @@ +// GENERATED by bashkit-coreutils-port. DO NOT EDIT. +// +// Source: uutils/coreutils@39364b6 src/uucore/src/lib/features/format/spec.rs +// Regenerate: cargo run -p bashkit-coreutils-port -- port-module format +// +// Original uutils licensed MIT; see THIRD_PARTY_LICENSES. + +use super::ExtendedBigDecimal; +use super::FormatChar; +use super::FormatError; +use super::OctalParsing; +use super::num_format; +use super::num_format::Case; +use super::num_format::FloatVariant; +use super::num_format::ForceDecimal; +use super::num_format::Formatter; +use super::num_format::NumberAlignment; +use super::num_format::PositiveSign; +use super::num_format::Prefix; +use super::num_format::UnsignedIntVariant; +use super::parse_escape_only; +use crate::builtins::generated::format::FormatArguments; +use crate::builtins::generated::format_support::QuotingStyle; +use crate::builtins::generated::format_support::locale_aware_escape_name; +use crate::builtins::generated::format_support::os_str_as_bytes; +use std::io::Write; +use std::num::NonZero; +use std::ops::ControlFlow; +#[derive(Debug)] +pub enum Spec { + Char { + position: ArgumentLocation, + width: Option>, + align_left: bool, + }, + String { + position: ArgumentLocation, + precision: Option>, + width: Option>, + align_left: bool, + }, + EscapedString { + position: ArgumentLocation, + }, + QuotedString { + position: ArgumentLocation, + }, + SignedInt { + position: ArgumentLocation, + width: Option>, + precision: Option>, + positive_sign: PositiveSign, + alignment: NumberAlignment, + }, + UnsignedInt { + position: ArgumentLocation, + variant: UnsignedIntVariant, + width: Option>, + precision: Option>, + alignment: NumberAlignment, + }, + Float { + position: ArgumentLocation, + variant: FloatVariant, + case: Case, + force_decimal: ForceDecimal, + width: Option>, + positive_sign: PositiveSign, + alignment: NumberAlignment, + precision: Option>, + }, +} +#[derive(Clone, Copy, Debug)] +pub enum ArgumentLocation { + NextArgument, + Position(NonZero), +} +#[derive(Clone, Copy, Debug)] +pub enum CanAsterisk { + Fixed(T), + Asterisk(ArgumentLocation), +} +enum Length { + /// signed/unsigned char ("hh") + Char, + /// signed/unsigned short int ("h") + Short, + /// signed/unsigned long int ("l") + Long, + /// signed/unsigned long long int ("ll") + LongLong, + /// intmax_t ("j") + IntMaxT, + /// size_t ("z") + SizeT, + /// ptrdiff_t ("t") + PtfDiffT, + /// long double ("L") + LongDouble, +} +#[derive(Default, PartialEq, Eq)] +struct Flags { + minus: bool, + plus: bool, + space: bool, + hash: bool, + zero: bool, + quote: bool, +} +impl Flags { + pub fn parse(rest: &mut &[u8], index: &mut usize) -> Self { + let mut flags = Self::default(); + while let Some(x) = rest.get(*index) { + match x { + b'-' => flags.minus = true, + b'+' => flags.plus = true, + b' ' => flags.space = true, + b'#' => flags.hash = true, + b'0' => flags.zero = true, + b'\'' => { + flags.quote = true; + } + _ => break, + } + *index += 1; + } + flags + } + /// Whether any of the flags is set to true + fn any(&self) -> bool { + self != &Self::default() + } +} +impl Spec { + pub fn parse<'a>(rest: &mut &'a [u8]) -> Result { + let mut index = 0; + let start = *rest; + let Some(position) = eat_argument_position(rest, &mut index) else { + return Err(&start[..index]); + }; + let flags = Flags::parse(rest, &mut index); + let positive_sign = match flags { + Flags { plus: true, .. } => PositiveSign::Plus, + Flags { space: true, .. } => PositiveSign::Space, + _ => PositiveSign::None, + }; + let width = eat_asterisk_or_number(rest, &mut index); + let precision = if let Some(b'.') = rest.get(index) { + index += 1; + Some(eat_asterisk_or_number(rest, &mut index).unwrap_or(CanAsterisk::Fixed(0))) + } else { + None + }; + let alignment = if flags.minus { + NumberAlignment::Left + } else if flags.zero && precision.is_none() { + NumberAlignment::RightZero + } else { + NumberAlignment::RightSpace + }; + let _ = Self::parse_length(rest, &mut index); + let Some(type_spec) = rest.get(index) else { + return Err(&start[..index]); + }; + index += 1; + *rest = &start[index..]; + Ok(match type_spec { + b'c' => { + if flags.zero || flags.hash || precision.is_some() { + return Err(&start[..index]); + } + Self::Char { + position, + width, + align_left: flags.minus, + } + } + b's' => { + if flags.zero || flags.hash || flags.quote { + return Err(&start[..index]); + } + Self::String { + position, + precision, + width, + align_left: flags.minus, + } + } + b'b' => { + if flags.any() || width.is_some() || precision.is_some() { + return Err(&start[..index]); + } + Self::EscapedString { position } + } + b'q' => { + if flags.any() || width.is_some() || precision.is_some() { + return Err(&start[..index]); + } + Self::QuotedString { position } + } + b'd' | b'i' => { + if flags.hash { + return Err(&start[..index]); + } + Self::SignedInt { + position, + width, + precision, + alignment, + positive_sign, + } + } + c @ (b'u' | b'o' | b'x' | b'X') => { + if *c == b'u' && flags.hash { + return Err(&start[..index]); + } + let prefix = if flags.hash { Prefix::Yes } else { Prefix::No }; + let variant = match c { + b'u' => UnsignedIntVariant::Decimal, + b'o' => UnsignedIntVariant::Octal(prefix), + b'x' => UnsignedIntVariant::Hexadecimal(Case::Lowercase, prefix), + b'X' => UnsignedIntVariant::Hexadecimal(Case::Uppercase, prefix), + _ => unreachable!(), + }; + Self::UnsignedInt { + position, + variant, + precision, + width, + alignment, + } + } + c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Self::Float { + position, + width, + precision, + variant: match c { + b'f' | b'F' => FloatVariant::Decimal, + b'e' | b'E' => FloatVariant::Scientific, + b'g' | b'G' => FloatVariant::Shortest, + b'a' | b'A' => FloatVariant::Hexadecimal, + _ => unreachable!(), + }, + force_decimal: if flags.hash { + ForceDecimal::Yes + } else { + ForceDecimal::No + }, + case: if c.is_ascii_uppercase() { + Case::Uppercase + } else { + Case::Lowercase + }, + alignment: if flags.zero && !flags.minus { + NumberAlignment::RightZero + } else { + alignment + }, + positive_sign, + }, + _ => return Err(&start[..index]), + }) + } + fn parse_length(rest: &mut &[u8], index: &mut usize) -> Option { + let mut length = None; + loop { + let new_length = rest.get(*index).and_then(|c| { + Some(match c { + b'h' => { + if let Some(b'h') = rest.get(*index + 1) { + *index += 1; + Length::Char + } else { + Length::Short + } + } + b'l' => { + if let Some(b'l') = rest.get(*index + 1) { + *index += 1; + Length::Long + } else { + Length::LongLong + } + } + b'j' => Length::IntMaxT, + b'z' => Length::SizeT, + b't' => Length::PtfDiffT, + b'L' => Length::LongDouble, + _ => return None, + }) + }); + if new_length.is_some() { + *index += 1; + length = new_length; + } else { + break; + } + } + length + } + pub fn write( + &self, + mut writer: impl Write, + args: &mut FormatArguments, + ) -> Result<(), FormatError> { + match self { + Self::Char { + width, + align_left, + position, + } => { + let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or_default(); + write_padded( + writer, + &[args.next_char(*position)], + width, + *align_left || neg_width, + ) + } + Self::String { + width, + align_left, + precision, + position, + } => { + let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or_default(); + let precision = resolve_asterisk_precision(*precision, args); + let os_str = args.next_string(*position); + let bytes = os_str_as_bytes(os_str)?; + let truncated = match precision { + Some(p) if p < os_str.len() => &bytes[..p], + _ => bytes, + }; + write_padded(writer, truncated, width, *align_left || neg_width) + } + Self::EscapedString { position } => { + let os_str = args.next_string(*position); + let bytes = os_str_as_bytes(os_str)?; + let mut parsed = Vec::::new(); + for c in parse_escape_only(bytes, OctalParsing::ThreeDigits) { + match c.write(&mut parsed)? { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => { + break; + } + } + } + writer.write_all(&parsed).map_err(FormatError::IoError) + } + Self::QuotedString { position } => { + let s = locale_aware_escape_name( + args.next_string(*position), + QuotingStyle::SHELL_ESCAPE, + ); + let bytes = os_str_as_bytes(&s)?; + writer.write_all(bytes).map_err(FormatError::IoError) + } + Self::SignedInt { + width, + precision, + positive_sign, + alignment, + position, + } => { + let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or((0, false)); + let precision = resolve_asterisk_precision(*precision, args).unwrap_or_default(); + let i = args.next_i64(*position); + if precision as u64 > i32::MAX as u64 { + return Err(FormatError::InvalidPrecision(precision.to_string())); + } + num_format::SignedInt { + width, + precision, + positive_sign: *positive_sign, + alignment: if neg_width { + NumberAlignment::Left + } else { + *alignment + }, + } + .fmt(writer, i) + .map_err(FormatError::IoError) + } + Self::UnsignedInt { + variant, + width, + precision, + alignment, + position, + } => { + let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or((0, false)); + let precision = resolve_asterisk_precision(*precision, args).unwrap_or_default(); + let i = args.next_u64(*position); + if precision as u64 > i32::MAX as u64 { + return Err(FormatError::InvalidPrecision(precision.to_string())); + } + num_format::UnsignedInt { + variant: *variant, + precision, + width, + alignment: if neg_width { + NumberAlignment::Left + } else { + *alignment + }, + } + .fmt(writer, i) + .map_err(FormatError::IoError) + } + Self::Float { + variant, + case, + force_decimal, + width, + positive_sign, + alignment, + precision, + position, + } => { + let (width, neg_width) = resolve_asterisk_width(*width, args).unwrap_or((0, false)); + let precision = resolve_asterisk_precision(*precision, args); + let f: ExtendedBigDecimal = args.next_extended_big_decimal(*position); + if precision.is_some_and(|p| p as u64 > i32::MAX as u64) { + return Err(FormatError::InvalidPrecision( + precision.unwrap().to_string(), + )); + } + num_format::Float { + width, + precision, + variant: *variant, + case: *case, + force_decimal: *force_decimal, + positive_sign: *positive_sign, + alignment: if neg_width { + NumberAlignment::Left + } else { + *alignment + }, + } + .fmt(writer, &f) + .map_err(FormatError::IoError) + } + } + } +} +fn resolve_asterisk_width( + option: Option>, + args: &mut FormatArguments, +) -> Option<(usize, bool)> { + match option { + None => None, + Some(CanAsterisk::Asterisk(loc)) => { + let nb = args.next_i64(loc); + if nb < 0 { + Some((usize::try_from(-(nb as isize)).ok().unwrap_or(0), true)) + } else { + Some((usize::try_from(nb).ok().unwrap_or(0), false)) + } + } + Some(CanAsterisk::Fixed(w)) => Some((w, false)), + } +} +fn resolve_asterisk_precision( + option: Option>, + args: &mut FormatArguments, +) -> Option { + match option { + None => None, + Some(CanAsterisk::Asterisk(loc)) => match args.next_i64(loc) { + v if v >= 0 => usize::try_from(v).ok(), + v if v < 0 => Some(0usize), + _ => None, + }, + Some(CanAsterisk::Fixed(w)) => Some(w), + } +} +fn write_padded( + mut writer: impl Write, + text: &[u8], + width: usize, + left: bool, +) -> Result<(), FormatError> { + let padlen = width.saturating_sub(text.len()); + super::check_width(padlen).map_err(FormatError::IoError)?; + if left { + writer.write_all(text)?; + write!(writer, "{: padlen$}", "")?; + writer.write_all(text) + } + .map_err(FormatError::IoError) +} +fn eat_argument_position(rest: &mut &[u8], index: &mut usize) -> Option { + let original_index = *index; + if let Some(pos) = eat_number(rest, index) { + if let Some(&b'$') = rest.get(*index) { + *index += 1; + Some(ArgumentLocation::Position(NonZero::new(pos)?)) + } else { + *index = original_index; + Some(ArgumentLocation::NextArgument) + } + } else { + *index = original_index; + Some(ArgumentLocation::NextArgument) + } +} +fn eat_asterisk_or_number(rest: &mut &[u8], index: &mut usize) -> Option> { + if let Some(b'*') = rest.get(*index) { + *index += 1; + Some(CanAsterisk::Asterisk(eat_argument_position(rest, index)?)) + } else { + eat_number(rest, index).map(CanAsterisk::Fixed) + } +} +fn eat_number(rest: &mut &[u8], index: &mut usize) -> Option { + match rest[*index..].iter().position(|b| !b.is_ascii_digit()) { + None | Some(0) => None, + Some(i) => { + let num_str = std::str::from_utf8(&rest[*index..(*index + i)]).unwrap(); + *index += i; + Some(num_str.parse().unwrap_or(usize::MAX)) + } + } +} diff --git a/crates/bashkit/src/builtins/generated/format_support.rs b/crates/bashkit/src/builtins/generated/format_support.rs new file mode 100644 index 000000000..7b6d3defb --- /dev/null +++ b/crates/bashkit/src/builtins/generated/format_support.rs @@ -0,0 +1,126 @@ +//! Handwritten support shims for the vendored uucore `format` module. +//! +//! Keep uucore runtime hooks local and side-effect free: bashkit builtins +//! return structured `ExecResult`s, so generated formatting code must not +//! write diagnostics directly to host stderr or depend on uucore exit state. + +use std::ffi::{OsStr, OsString}; + +#[cfg(any(unix, target_os = "wasi"))] +use std::os::unix::ffi::OsStrExt; + +#[derive(Debug)] +pub struct NonUtf8OsStrError { + input_lossy_string: String, +} + +impl NonUtf8OsStrError { + #[cfg(test)] + pub(crate) fn new_for_test(input_lossy_string: impl Into) -> Self { + Self { + input_lossy_string: input_lossy_string.into(), + } + } +} + +impl std::fmt::Display for NonUtf8OsStrError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use os_display::Quotable; + let quoted = self.input_lossy_string.quote(); + write!(f, "invalid UTF-8 input {quoted}") + } +} + +impl std::error::Error for NonUtf8OsStrError {} + +pub trait UError: std::error::Error {} + +#[cfg_attr(any(unix, target_os = "wasi"), expect(clippy::unnecessary_wraps))] +pub fn os_str_as_bytes(os_string: &OsStr) -> Result<&[u8], NonUtf8OsStrError> { + #[cfg(any(unix, target_os = "wasi"))] + return Ok(os_string.as_bytes()); + + #[cfg(not(any(unix, target_os = "wasi")))] + os_string + .to_str() + .ok_or_else(|| NonUtf8OsStrError { + input_lossy_string: os_string.to_string_lossy().into_owned(), + }) + .map(str::as_bytes) +} + +#[allow(clippy::needless_pass_by_value)] +pub fn set_exit_code(_code: i32) {} + +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug)] +pub enum QuotingStyle { + C_NO_QUOTES, + SHELL_ESCAPE, +} + +pub fn locale_aware_escape_name(input: &OsStr, style: QuotingStyle) -> OsString { + match style { + QuotingStyle::C_NO_QUOTES => input.to_os_string(), + QuotingStyle::SHELL_ESCAPE => shell_quote(&input.to_string_lossy()).into(), + } +} + +macro_rules! show_error { + ($($arg:tt)*) => {{ + let _ = format_args!($($arg)*); + }}; +} + +macro_rules! show_warning { + ($($arg:tt)*) => {{ + let _ = format_args!($($arg)*); + }}; +} + +pub(crate) use show_error; +pub(crate) use show_warning; + +fn shell_quote(s: &str) -> String { + if s.is_empty() { + return "''".to_string(); + } + + let needs_quoting = s + .chars() + .any(|c| !c.is_ascii_alphanumeric() && !"_/.:-=+@,%^".contains(c)); + if !needs_quoting { + return s.to_string(); + } + + let has_control = s.chars().any(|c| (c as u32) < 32 || c as u32 == 127); + if has_control { + let mut out = String::from("$'"); + for ch in s.chars() { + match ch { + '\'' => out.push_str("\\'"), + '\\' => out.push_str("\\\\"), + '\n' => out.push_str("\\n"), + '\t' => out.push_str("\\t"), + '\r' => out.push_str("\\r"), + c if (c as u32) < 32 || c as u32 == 127 => { + out.push_str(&format!("\\x{:02x}", c as u32)); + } + c => out.push(c), + } + } + out.push('\''); + return out; + } + + let mut out = String::new(); + for ch in s.chars() { + if ch.is_ascii_alphanumeric() || "_/.:-=+@,%^".contains(ch) { + out.push(ch); + } else { + out.push('\\'); + out.push(ch); + } + } + out +} diff --git a/crates/bashkit/src/builtins/generated/mod.rs b/crates/bashkit/src/builtins/generated/mod.rs index fc45ea821..90dba50b2 100644 --- a/crates/bashkit/src/builtins/generated/mod.rs +++ b/crates/bashkit/src/builtins/generated/mod.rs @@ -1,5 +1,6 @@ // GENERATED MODULE INDEX. Edit individual `*_args.rs` files only via the // `bashkit-coreutils-port` codegen tool — see crates/bashkit-coreutils-port. +#![allow(dead_code)] // // Each `_args.rs` exposes `pub fn _command() -> clap::Command` // derived from uutils/coreutils' `uu_app()` definitions, with `translate!()` @@ -28,8 +29,15 @@ pub const UUTILS_REVISION: &str = "39364b6"; pub mod cat_args; +#[allow(clippy::collapsible_if, clippy::unwrap_used)] +pub mod extendedbigdecimal; +#[allow(clippy::collapsible_if, clippy::unwrap_used)] +pub mod format; +pub mod format_support; pub mod ls_args; pub mod mktemp_args; +#[allow(clippy::collapsible_if, clippy::unwrap_used)] +pub mod num_parser; pub mod od_args; pub mod readlink_args; pub mod realpath_args; diff --git a/crates/bashkit/src/builtins/generated/num_parser.rs b/crates/bashkit/src/builtins/generated/num_parser.rs new file mode 100644 index 000000000..c121fee73 --- /dev/null +++ b/crates/bashkit/src/builtins/generated/num_parser.rs @@ -0,0 +1,441 @@ +// GENERATED by bashkit-coreutils-port. DO NOT EDIT. +// +// Source: uutils/coreutils@39364b6 src/uucore/src/lib/features/parser/num_parser.rs +// Regenerate: cargo run -p bashkit-coreutils-port -- port-module format +// +// Original uutils licensed MIT; see THIRD_PARTY_LICENSES. + +//! Utilities for parsing numbers in various formats +use crate::builtins::generated::extendedbigdecimal::ExtendedBigDecimal; +use bigdecimal::BigDecimal; +use bigdecimal::num_bigint::BigInt; +use bigdecimal::num_bigint::BigUint; +use bigdecimal::num_bigint::Sign; +use num_traits::Signed; +use num_traits::ToPrimitive; +use num_traits::Zero; +#[derive(Clone, Copy, PartialEq)] +enum Base { + /// Binary base + Binary = 2, + /// Octal base + Octal = 8, + /// Decimal base + Decimal = 10, + /// Hexadecimal base + Hexadecimal = 16, +} +impl Base { + /// Return the digit value of a character in the given base + fn digit(self, c: char) -> Option { + fn from_decimal(c: char) -> u64 { + u64::from(c) - u64::from('0') + } + match self { + Self::Binary => ('0'..='1').contains(&c).then(|| from_decimal(c)), + Self::Octal => ('0'..='7').contains(&c).then(|| from_decimal(c)), + Self::Decimal => c.is_ascii_digit().then(|| from_decimal(c)), + Self::Hexadecimal => match c.to_ascii_lowercase() { + '0'..='9' => Some(from_decimal(c)), + c @ 'a'..='f' => Some(u64::from(c) - u64::from('a') + 10), + _ => None, + }, + } + } + /// Greedily parse as many digits as possible from the string + /// Returns parsed digits (if any), and the rest of the string. + fn parse_digits(self, str: &str) -> (Option, &str) { + let (digits, _, rest) = self.parse_digits_count(str, None); + (digits, rest) + } + /// Greedily parse as many digits as possible from the string, adding to already parsed digits. + /// This is meant to be used (directly) for the part after a decimal point. + /// Returns parsed digits (if any), the number of parsed digits, and the rest of the string. + fn parse_digits_count( + self, + str: &str, + digits: Option, + ) -> (Option, i64, &str) { + let mut digits: Option = digits; + let mut count: i64 = 0; + let mut rest = str; + let mut digits_tmp: u64 = 0; + let mut count_tmp: i64 = 0; + let mut mul_tmp: u64 = 1; + while let Some(d) = rest.chars().next().and_then(|c| self.digit(c)) { + (digits_tmp, count_tmp, mul_tmp) = ( + digits_tmp * self as u64 + d, + count_tmp + 1, + mul_tmp * self as u64, + ); + rest = &rest[1..]; + if count_tmp >= 15 { + (digits, count) = ( + Some(digits.unwrap_or_default() * mul_tmp + digits_tmp), + count + count_tmp, + ); + (digits_tmp, count_tmp, mul_tmp) = (0, 0, 1); + } + } + if mul_tmp > 1 { + (digits, count) = ( + Some(digits.unwrap_or_default() * mul_tmp + digits_tmp), + count + count_tmp, + ); + } + (digits, count, rest) + } +} +#[derive(Debug, PartialEq)] +pub enum ExtendedParserError { + /// The input as a whole makes no sense + NotNumeric, + /// The beginning of the input made sense and has been parsed, + /// while the remaining doesn't. + PartialMatch(T, String), + /// The value has overflowed the type storage. The returned value + /// is saturated (e.g. positive or negative infinity, or min/max + /// value for the integer type). + Overflow(T), + Underflow(T), +} +impl ExtendedParserError +where + T: Zero, +{ + /// Extract the value out of an error, if possible. + fn extract(self) -> T { + match self { + Self::NotNumeric => T::zero(), + Self::PartialMatch(v, _) => v, + Self::Overflow(v) => v, + Self::Underflow(v) => v, + } + } + /// Map an error to another, using the provided conversion function. + /// The error (self) takes precedence over errors happening during the + /// conversion. + fn map( + self, + f: impl FnOnce(T) -> Result>, + ) -> ExtendedParserError + where + U: Zero, + { + fn extract(v: Result>) -> U + where + U: Zero, + { + v.unwrap_or_else(ExtendedParserError::extract) + } + match self { + Self::NotNumeric => ExtendedParserError::NotNumeric, + Self::PartialMatch(v, rest) => ExtendedParserError::PartialMatch(extract(f(v)), rest), + Self::Overflow(v) => ExtendedParserError::Overflow(extract(f(v))), + Self::Underflow(v) => ExtendedParserError::Underflow(extract(f(v))), + } + } +} +pub trait ExtendedParser { + fn extended_parse(input: &str) -> Result> + where + Self: Sized; +} +impl ExtendedParser for i64 { + /// Parse a number as i64. No fractional part is allowed. + fn extended_parse(input: &str) -> Result> { + fn into_i64(ebd: ExtendedBigDecimal) -> Result> { + match ebd { + ExtendedBigDecimal::BigDecimal(bd) => { + let (digits, scale) = bd.into_bigint_and_scale(); + if scale == 0 { + let negative = digits.sign() == Sign::Minus; + match i64::try_from(digits) { + Ok(i) => Ok(i), + _ => Err(ExtendedParserError::Overflow(if negative { + i64::MIN + } else { + i64::MAX + })), + } + } else { + Err(ExtendedParserError::NotNumeric) + } + } + ExtendedBigDecimal::MinusZero => Ok(0), + _ => Err(ExtendedParserError::NotNumeric), + } + } + match parse(input, ParseTarget::Integral, &[]) { + Ok(v) => into_i64(v), + Err(e) => Err(e.map(into_i64)), + } + } +} +impl ExtendedParser for u64 { + /// Parse a number as u64. No fractional part is allowed. + fn extended_parse(input: &str) -> Result> { + fn into_u64(ebd: ExtendedBigDecimal) -> Result> { + match ebd { + ExtendedBigDecimal::BigDecimal(bd) => { + let (digits, scale) = bd.into_bigint_and_scale(); + if scale == 0 { + let (sign, digits) = digits.into_parts(); + match u64::try_from(digits) { + Ok(i) => { + if sign == Sign::Minus { + Ok(!i + 1) + } else { + Ok(i) + } + } + _ => Err(ExtendedParserError::Overflow(u64::MAX)), + } + } else { + Err(ExtendedParserError::NotNumeric) + } + } + ExtendedBigDecimal::MinusZero => Ok(0), + _ => Err(ExtendedParserError::NotNumeric), + } + } + match parse(input, ParseTarget::Integral, &[]) { + Ok(v) => into_u64(v), + Err(e) => Err(e.map(into_u64)), + } + } +} +impl ExtendedParser for f64 { + /// Parse a number as f64 + fn extended_parse(input: &str) -> Result> { + fn into_f64(ebd: ExtendedBigDecimal) -> Result> { + let v = match ebd { + ExtendedBigDecimal::BigDecimal(bd) => { + let f = bd.to_f64().unwrap(); + if f.is_infinite() { + return Err(ExtendedParserError::Overflow(f)); + } + if f.is_zero() && !bd.is_zero() { + return Err(ExtendedParserError::Underflow(f)); + } + f + } + ExtendedBigDecimal::MinusZero => -0.0, + ExtendedBigDecimal::Nan => f64::NAN, + ExtendedBigDecimal::MinusNan => -f64::NAN, + ExtendedBigDecimal::Infinity => f64::INFINITY, + ExtendedBigDecimal::MinusInfinity => -f64::INFINITY, + }; + Ok(v) + } + match parse(input, ParseTarget::Decimal, &[]) { + Ok(v) => into_f64(v), + Err(e) => Err(e.map(into_f64)), + } + } +} +impl ExtendedParser for ExtendedBigDecimal { + /// Parse a number as an ExtendedBigDecimal + fn extended_parse(input: &str) -> Result> { + parse(input, ParseTarget::Decimal, &[]) + } +} +fn parse_digits(base: Base, str: &str, fractional: bool) -> (Option, i64, &str) { + let (digits, rest) = base.parse_digits(str); + if fractional { + if let Some(rest) = rest.strip_prefix('.') { + return base.parse_digits_count(rest, digits); + } + } + (digits, 0, rest) +} +fn parse_exponent(base: Base, str: &str) -> (Option, &str) { + let exp_chars = match base { + Base::Decimal => ['e', 'E'], + Base::Hexadecimal => ['p', 'P'], + _ => unreachable!(), + }; + if let Some(rest) = str.strip_prefix(exp_chars) { + let (sign, rest) = if let Some(rest) = rest.strip_prefix('-') { + (Sign::Minus, rest) + } else if let Some(rest) = rest.strip_prefix('+') { + (Sign::Plus, rest) + } else { + (Sign::Plus, rest) + }; + let (exp_uint, rest) = Base::Decimal.parse_digits(rest); + if let Some(exp_uint) = exp_uint { + return (Some(BigInt::from_biguint(sign, exp_uint)), rest); + } + } + (None, str) +} +fn parse_suffix_multiplier<'a>(str: &'a str, allowed_suffixes: &[(char, u32)]) -> (u32, &'a str) { + if let Some(ch) = str.chars().next() { + if let Some(mul) = allowed_suffixes + .iter() + .find_map(|(c, t)| (ch == *c).then_some(*t)) + { + return (mul, &str[1..]); + } + } + (1, str) +} +fn parse_special_value( + input: &str, + negative: bool, + allowed_suffixes: &[(char, u32)], +) -> Result> { + const MATCH_TABLE: &[(&str, ExtendedBigDecimal)] = &[ + ("infinity", ExtendedBigDecimal::Infinity), + ("inf", ExtendedBigDecimal::Infinity), + ("nan", ExtendedBigDecimal::Nan), + ]; + let input_lc = input.to_ascii_lowercase(); + for (str, ebd) in MATCH_TABLE { + if input_lc.starts_with(str) { + let mut special = ebd.clone(); + if negative { + special = -special; + } + let (_, rest) = parse_suffix_multiplier(&input[str.len()..], allowed_suffixes); + return if rest.is_empty() { + Ok(special) + } else { + Err(ExtendedParserError::PartialMatch(special, rest.to_string())) + }; + } + } + Err(ExtendedParserError::NotNumeric) +} +fn make_error(overflow: bool, negative: bool) -> ExtendedParserError { + let mut v = if overflow { + ExtendedBigDecimal::Infinity + } else { + ExtendedBigDecimal::zero() + }; + if negative { + v = -v; + } + if overflow { + ExtendedParserError::Overflow(v) + } else { + ExtendedParserError::Underflow(v) + } +} +fn construct_extended_big_decimal( + digits: BigUint, + negative: bool, + base: Base, + scale: i64, + exponent: BigInt, +) -> Result> { + if digits == BigUint::zero() { + return Ok(if negative { + ExtendedBigDecimal::MinusZero + } else { + ExtendedBigDecimal::zero() + }); + } + let sign = if negative { Sign::Minus } else { Sign::Plus }; + let signed_digits = BigInt::from_biguint(sign, digits); + let bd = if scale == 0 && exponent.is_zero() { + BigDecimal::from_bigint(signed_digits, 0) + } else if base == Base::Decimal { + if exponent.is_zero() { + BigDecimal::from_bigint(signed_digits, scale) + } else { + let new_scale = -exponent + scale; + if let Some(new_scale) = new_scale.to_i64() { + BigDecimal::from_bigint(signed_digits, new_scale) + } else { + return Err(make_error(new_scale.is_negative(), negative)); + } + } + } else if base == Base::Hexadecimal { + if scale > u32::MAX.into() { + return Err(ExtendedParserError::NotNumeric); + } + let bd = BigDecimal::from_bigint(signed_digits, 0) + / BigDecimal::from_bigint(BigInt::from(16).pow(scale as u32), 0); + let Some(exponent) = exponent.to_i64() else { + return Err(make_error(exponent.is_positive(), negative)); + }; + let base: BigDecimal = 2.into(); + let pow2 = base.powi(exponent); + bd * pow2 + } else { + unreachable!(); + }; + Ok(ExtendedBigDecimal::BigDecimal(bd)) +} +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum ParseTarget { + Decimal, + Integral, + Duration, +} +pub(crate) fn parse( + input: &str, + target: ParseTarget, + allowed_suffixes: &[(char, u32)], +) -> Result> { + let trimmed_input = input.trim_ascii_start(); + let (negative, unsigned) = if let Some(trimmed_input) = trimmed_input.strip_prefix('-') { + (true, trimmed_input) + } else if let Some(trimmed_input) = trimmed_input.strip_prefix('+') { + (false, trimmed_input) + } else { + (false, trimmed_input) + }; + let (base, rest) = if let Some(rest) = unsigned.strip_prefix('0') { + if let Some(rest) = rest.strip_prefix(['x', 'X']) { + (Base::Hexadecimal, rest) + } else if target == ParseTarget::Integral { + if let Some(rest) = rest.strip_prefix(['b', 'B']) { + (Base::Binary, rest) + } else { + (Base::Octal, unsigned) + } + } else { + (Base::Decimal, unsigned) + } + } else { + (Base::Decimal, unsigned) + }; + let parse_frac_exp = + matches!(base, Base::Decimal | Base::Hexadecimal) && target != ParseTarget::Integral; + let (digits, scale, rest) = parse_digits(base, rest, parse_frac_exp); + let (exponent, rest) = if parse_frac_exp { + parse_exponent(base, rest) + } else { + (None, rest) + }; + if digits.is_none() { + if let Some(partial) = unsigned.strip_prefix("0") { + let ebd = if negative { + ExtendedBigDecimal::MinusZero + } else { + ExtendedBigDecimal::zero() + }; + return Err(ExtendedParserError::PartialMatch(ebd, partial.to_string())); + } + return if target == ParseTarget::Integral { + Err(ExtendedParserError::NotNumeric) + } else { + parse_special_value(unsigned, negative, allowed_suffixes) + }; + } + let (mul, rest) = parse_suffix_multiplier(rest, allowed_suffixes); + let digits = digits.unwrap() * mul; + let ebd_result = + construct_extended_big_decimal(digits, negative, base, scale, exponent.unwrap_or_default()); + if rest.is_empty() { + ebd_result + } else { + Err(ExtendedParserError::PartialMatch( + ebd_result.unwrap_or_else(ExtendedParserError::extract), + rest.to_string(), + )) + } +} diff --git a/crates/bashkit/src/builtins/printf.rs b/crates/bashkit/src/builtins/printf.rs index 02df83b34..a0e96c055 100644 --- a/crates/bashkit/src/builtins/printf.rs +++ b/crates/bashkit/src/builtins/printf.rs @@ -1,11 +1,24 @@ //! printf builtin - formatted output +//! +//! Important decision: uucore owns printf parsing/formatting semantics here; +//! bashkit keeps only shell integration plus a preflight width/precision cap +//! outside generated code so regenerating `format/` cannot erase the DoS guard. + +use std::borrow::Cow; +use std::ffi::OsString; +use std::ops::ControlFlow; use async_trait::async_trait; -use super::{Builtin, Context}; +use super::generated::format::{ + FormatArgument, FormatArguments, FormatError, FormatItem, parse_spec_and_escape, +}; +use super::{Builtin, Context, MAX_FORMAT_WIDTH}; use crate::error::Result; use crate::interpreter::{ExecResult, is_internal_variable}; +const MAX_PRINTF_DIAG_CHARS: usize = 1_024; + /// printf builtin - formatted string output pub struct Printf; @@ -26,7 +39,6 @@ impl Builtin for Printf { let mut args_iter = ctx.args.iter(); let mut var_name: Option = None; - // Check for -v varname flag let format = loop { match args_iter.next() { Some(arg) if arg == "-v" => { @@ -40,26 +52,16 @@ impl Builtin for Printf { }; let args: Vec = args_iter.cloned().collect(); - let mut arg_index = 0; - let mut output = String::new(); - - // Bash printf repeats the format string until all args are consumed - loop { - let start_index = arg_index; - output.push_str(&format_string(&format, &args, &mut arg_index)); - - // If no args were consumed or we've used all args, stop - if arg_index == start_index || arg_index >= args.len() { - break; - } - } + let output = match render_printf(&format, &args) { + Ok(output) => output, + Err(err) => return Ok(ExecResult::err(err, 1)), + }; if let Some(name) = var_name { // THREAT[TM-INJ-009]: Block internal variable prefix injection via printf -v if is_internal_variable(&name) { return Ok(ExecResult::ok(String::new())); } - // -v: assign to variable instead of printing ctx.variables.insert(name, output); Ok(ExecResult::ok(String::new())) } else { @@ -68,699 +70,498 @@ impl Builtin for Printf { } } -/// Parsed format specification -use super::MAX_FORMAT_WIDTH; +fn render_printf(format: &str, args: &[String]) -> std::result::Result { + let format = strip_zero_hex_escapes(format); + let format = format.as_ref(); + let values = format_arguments(args); + validate_format_caps(format.as_bytes(), args)?; + + let mut out = Vec::new(); + let mut format_seen = false; + let mut fmt_args = FormatArguments::new(&values); + + let stopped = write_format_pass(format.as_bytes(), &mut fmt_args, &mut out, &mut format_seen)?; + fmt_args.start_next_batch(); + + if stopped || !format_seen { + return Ok(bytes_to_stdout_string(out)); + } + + while !fmt_args.is_exhausted() { + if write_format_pass(format.as_bytes(), &mut fmt_args, &mut out, &mut format_seen)? { + break; + } + fmt_args.start_next_batch(); + } -struct FormatSpec { - left_align: bool, - zero_pad: bool, - sign_plus: bool, - width: Option, - precision: Option, + Ok(bytes_to_stdout_string(out)) } -impl FormatSpec { - fn parse(spec: &str) -> Self { - let mut left_align = false; - let mut zero_pad = false; - let mut sign_plus = false; - let mut chars = spec.chars().peekable(); - - // Parse flags - while let Some(&c) = chars.peek() { - match c { - '-' => { - left_align = true; - chars.next(); - } - '0' if !zero_pad && chars.clone().nth(1).is_some() => { - // Only treat as flag if followed by more chars (width) - zero_pad = true; - chars.next(); - } - '+' => { - sign_plus = true; - chars.next(); - } - ' ' | '#' => { - chars.next(); - } - _ => break, +fn bytes_to_stdout_string(bytes: Vec) -> String { + String::from_utf8_lossy(&bytes).into_owned() +} + +fn strip_zero_hex_escapes(input: &str) -> Cow<'_, str> { + let bytes = input.as_bytes(); + let mut index = 0; + let mut output: Option> = None; + + while index < bytes.len() { + if bytes.get(index) == Some(&b'\\') && bytes.get(index + 1) == Some(&b'x') { + let first = bytes.get(index + 2).copied().filter(u8::is_ascii_hexdigit); + let second = bytes.get(index + 3).copied().filter(u8::is_ascii_hexdigit); + let digits = [first, second]; + let digit_count = digits.iter().flatten().count(); + let zero_hex = digit_count > 0 && digits.iter().flatten().all(|digit| *digit == b'0'); + if zero_hex { + output.get_or_insert_with(|| bytes[..index].to_vec()); + index += 2 + digit_count; + continue; } } - // Parse width - let mut width_str = String::new(); - while let Some(&c) = chars.peek() { - if c.is_ascii_digit() { - width_str.push( - chars - .next() - .expect("chars.next() valid: peek() confirmed char exists"), - ); - } else { - break; - } + if let Some(out) = &mut output { + out.push(bytes[index]); } - let width = if width_str.is_empty() { - None - } else { - width_str - .parse() - .ok() - .map(|w: usize| w.min(MAX_FORMAT_WIDTH)) - }; + index += 1; + } - // Parse precision - let precision = if chars.peek() == Some(&'.') { - chars.next(); - let mut prec_str = String::new(); - while let Some(&c) = chars.peek() { - if c.is_ascii_digit() { - prec_str.push( - chars - .next() - .expect("chars.next() valid: peek() confirmed char exists"), - ); - } else { - break; - } - } - if prec_str.is_empty() { - Some(0) - } else { - prec_str - .parse() - .ok() - .map(|p: usize| p.min(MAX_FORMAT_WIDTH)) - } - } else { - None - }; + match output { + Some(bytes) => Cow::Owned(String::from_utf8_lossy(&bytes).into_owned()), + None => Cow::Borrowed(input), + } +} - Self { - left_align, - zero_pad, - sign_plus, - width, - precision, +fn format_arguments(args: &[String]) -> Vec { + args.iter() + .map(|arg| FormatArgument::Unparsed(OsString::from(arg))) + .collect() +} + +fn write_format_pass( + format: &[u8], + args: &mut FormatArguments<'_>, + out: &mut Vec, + format_seen: &mut bool, +) -> std::result::Result { + for item in parse_spec_and_escape(format) { + let item = item.map_err(|err| render_printf_error(&err))?; + if matches!(item, FormatItem::Spec(_)) { + *format_seen = true; + } + match item + .write(&mut *out, args) + .map_err(|err| render_printf_error(&err))? + { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => return Ok(true), } } + Ok(false) +} - /// Format an integer with the parsed spec - fn format_int(&self, n: i64) -> String { - let formatted = if self.sign_plus && n >= 0 { - format!("+{}", n) - } else { - n.to_string() - }; +fn render_printf_error(err: &FormatError) -> String { + format!( + "printf: {}\n", + truncate_text( + &err.to_string(), + MAX_PRINTF_DIAG_CHARS.saturating_sub("printf: \n".len()) + ) + ) +} - self.apply_width(&formatted, true) +fn truncate_text(input: &str, max_chars: usize) -> String { + if input.chars().count() <= max_chars { + return input.to_string(); } + let keep = max_chars.saturating_sub(3); + format!("{}...", input.chars().take(keep).collect::()) +} + +#[derive(Clone, Copy)] +enum CapArgLocation { + NextArgument, + Position(usize), +} + +struct CapArgs<'a> { + args: &'a [String], + next_arg_position: usize, + highest_arg_position: Option, + current_offset: usize, +} - /// Format an unsigned integer with the parsed spec - fn format_uint(&self, n: u64) -> String { - let formatted = n.to_string(); - self.apply_width(&formatted, true) +impl<'a> CapArgs<'a> { + fn new(args: &'a [String]) -> Self { + Self { + args, + next_arg_position: 0, + highest_arg_position: None, + current_offset: 0, + } } - /// Format a string with the parsed spec - fn format_str(&self, s: &str) -> String { - // TM-UNI-016: Use char-based truncation, not byte-based, to avoid - // panics when precision falls inside a multi-byte UTF-8 character. - let truncated; - let s = if let Some(prec) = self.precision { - truncated = s.chars().take(prec).collect::(); - truncated.as_str() - } else { - s - }; - self.apply_width(s, false) + fn is_exhausted(&self) -> bool { + self.current_offset >= self.args.len() } - /// Apply width padding - fn apply_width(&self, s: &str, is_numeric: bool) -> String { - let width = match self.width { - Some(w) => w, - None => return s.to_string(), - }; + fn start_next_batch(&mut self) { + self.current_offset = self + .next_arg_position + .max(self.highest_arg_position.map_or(0, |x| x.saturating_add(1))); + self.next_arg_position = self.current_offset; + } - if s.len() >= width { - return s.to_string(); - } + fn next_i64(&mut self, location: CapArgLocation) -> i64 { + self.next_arg(location).map(parse_leading_i64).unwrap_or(0) + } - let pad_char = if self.zero_pad && is_numeric && !self.left_align { - '0' - } else { - ' ' - }; - let padding = width - s.len(); - - if self.left_align { - format!("{}{}", s, " ".repeat(padding)) - } else if self.zero_pad && is_numeric && s.starts_with('-') { - // Handle negative numbers: put minus before zeros - format!("-{}{}", pad_char.to_string().repeat(padding), &s[1..]) - } else if self.zero_pad && is_numeric && s.starts_with('+') { - // Handle explicit plus sign - format!("+{}{}", pad_char.to_string().repeat(padding), &s[1..]) - } else { - format!("{}{}", pad_char.to_string().repeat(padding), s) + fn consume(&mut self, location: CapArgLocation) { + let _ = self.next_arg(location); + } + + fn next_arg(&mut self, location: CapArgLocation) -> Option<&'a str> { + match location { + CapArgLocation::NextArgument => { + let arg = self.args.get(self.next_arg_position).map(String::as_str); + self.next_arg_position += 1; + arg + } + CapArgLocation::Position(pos) => { + let pos = pos.saturating_sub(1).saturating_add(self.current_offset); + self.highest_arg_position = + Some(self.highest_arg_position.map_or(pos, |x| x.max(pos))); + self.args.get(pos).map(String::as_str) + } } } } -/// Format a string using printf-style format specifiers -#[allow(clippy::collapsible_if)] -fn format_string(format: &str, args: &[String], arg_index: &mut usize) -> String { - let mut output = String::new(); - let mut chars = format.chars().peekable(); - - while let Some(ch) = chars.next() { - if ch == '\\' { - // Handle escape sequences - if let Some(next) = chars.next() { - match next { - 'n' => output.push('\n'), - 't' => output.push('\t'), - 'r' => output.push('\r'), - '\\' => output.push('\\'), - '"' => output.push('"'), - '\'' => output.push('\''), - '0' => { - // Octal escape sequence - \0, \0N, \0NN, \0NNN - let mut octal = String::from("0"); - while let Some(&c) = chars.peek() { - if c.is_ascii_digit() && c != '8' && c != '9' && octal.len() < 4 { - octal.push( - chars - .next() - .expect("chars.next() valid: peek() confirmed char exists"), - ); - } else { - break; - } - } - if let Ok(val) = u8::from_str_radix(&octal, 8) { - output.push(val as char); - } - } - 'x' => { - // \xHH - hex escape (1-2 hex digits) - let mut hex = String::new(); - for _ in 0..2 { - if let Some(&c) = chars.peek() { - if c.is_ascii_hexdigit() { - hex.push(chars.next().expect( - "chars.next() valid: peek() confirmed char exists", - )); - } else { - break; - } - } - } - // NUL bytes are stripped (bash behavior in string context) - if let Ok(val) = u8::from_str_radix(&hex, 16) { - if val != 0 { - output.push(val as char); - } - } - } - 'u' => { - // \uHHHH - 4-digit unicode escape - if let Some(c) = parse_unicode_escape(&mut chars, 4) { - output.push(c); - } - } - 'U' => { - // \UHHHHHHHH - 8-digit unicode escape - if let Some(c) = parse_unicode_escape(&mut chars, 8) { - output.push(c); - } - } - _ => { - output.push('\\'); - output.push(next); - } - } - } else { - output.push('\\'); - } - } else if ch == '%' { - // Handle format specifiers - if let Some(&next) = chars.peek() { - if next == '%' { - chars.next(); - output.push('%'); - continue; - } +fn validate_format_caps(format: &[u8], args: &[String]) -> std::result::Result<(), String> { + let mut args = CapArgs::new(args); + let (format_seen, stopped) = validate_format_caps_pass(format, &mut args)?; + args.start_next_batch(); - // Parse optional flags, width, precision - let mut spec = String::new(); - while let Some(&c) = chars.peek() { - if c.is_ascii_digit() - || c == '-' - || c == '+' - || c == ' ' - || c == '#' - || c == '.' - { - spec.push( - chars - .next() - .expect("chars.next() valid: peek() confirmed char exists"), - ); - } else { - break; - } - } + if stopped || !format_seen { + return Ok(()); + } - let fmt_spec = FormatSpec::parse(&spec); - - // Get the format type - if let Some(fmt_type) = chars.next() { - let arg = args.get(*arg_index).map(|s| s.as_str()).unwrap_or(""); - *arg_index += 1; - - match fmt_type { - 's' => { - // String - output.push_str(&fmt_spec.format_str(arg)); - } - 'd' | 'i' => { - // Integer - if let Ok(n) = arg.parse::() { - output.push_str(&fmt_spec.format_int(n)); - } else { - output.push_str(&fmt_spec.format_int(0)); - } - } - 'u' => { - // Unsigned integer - if let Ok(n) = arg.parse::() { - output.push_str(&fmt_spec.format_uint(n)); - } else { - output.push_str(&fmt_spec.format_uint(0)); - } - } - 'o' => { - // Octal - if let Ok(n) = arg.parse::() { - let formatted = format!("{:o}", n); - output.push_str(&fmt_spec.apply_width(&formatted, true)); - } else { - output.push_str(&fmt_spec.apply_width("0", true)); - } - } - 'x' => { - // Lowercase hex - if let Ok(n) = arg.parse::() { - let formatted = format!("{:x}", n); - output.push_str(&fmt_spec.apply_width(&formatted, true)); - } else { - output.push_str(&fmt_spec.apply_width("0", true)); - } - } - 'X' => { - // Uppercase hex - if let Ok(n) = arg.parse::() { - let formatted = format!("{:X}", n); - output.push_str(&fmt_spec.apply_width(&formatted, true)); - } else { - output.push_str(&fmt_spec.apply_width("0", true)); - } - } - 'f' | 'e' | 'E' | 'g' | 'G' => { - // Float - if let Ok(n) = arg.parse::() { - let formatted = if let Some(prec) = fmt_spec.precision { - format!("{:.prec$}", n, prec = prec) - } else { - format!("{}", n) - }; - output.push_str(&fmt_spec.apply_width(&formatted, true)); - } else { - output.push_str("0.0"); - } - } - 'c' => { - // Character - if let Some(c) = arg.chars().next() { - output.push(c); - } - } - 'b' => { - // String with escape sequences - output.push_str(&expand_escapes(arg)); - } - 'q' => { - // Shell-quoted string safe for reuse - output.push_str(&shell_quote(arg)); - } - _ => { - // Unknown format - output literally - output.push('%'); - output.push_str(&spec); - output.push(fmt_type); - *arg_index -= 1; // Don't consume arg - } - } - } - } else { - output.push('%'); - } - } else { - output.push(ch); + while !args.is_exhausted() { + let (_, stopped) = validate_format_caps_pass(format, &mut args)?; + args.start_next_batch(); + if stopped { + break; } } - - output + Ok(()) } -/// Quote a string for safe shell reuse (printf %q behavior). -/// -/// Matches bash behavior: -/// - Empty string → `''` -/// - Safe strings (only alnum/`_`/`.`/`-`/`:`/`=`/`+`/`@`/`,`/`%`/`^`/`/`) → unquoted -/// - Strings with control chars (tab, newline, etc.) → `$'...'` quoting -/// - Other strings → backslash-escape individual special characters -fn shell_quote(s: &str) -> String { - if s.is_empty() { - return "''".to_string(); - } - - // Check if the string needs quoting at all - let needs_quoting = s - .chars() - .any(|c| !c.is_ascii_alphanumeric() && !"_/.:-=+@,%^".contains(c)); - - if !needs_quoting { - return s.to_string(); - } - - // Check for control characters that require $'...' quoting - let has_control = s.chars().any(|c| (c as u32) < 32 || c as u32 == 127); - - if has_control { - // Use $'...' quoting - let mut out = String::from("$'"); - for ch in s.chars() { - match ch { - '\'' => out.push_str("\\'"), - '\\' => out.push_str("\\\\"), - '\n' => out.push_str("\\n"), - '\t' => out.push_str("\\t"), - '\r' => out.push_str("\\r"), - c if (c as u32) < 32 || c as u32 == 127 => { - out.push_str(&format!("\\x{:02x}", c as u32)); - } - c => out.push(c), +fn validate_format_caps_pass( + format: &[u8], + args: &mut CapArgs<'_>, +) -> std::result::Result<(bool, bool), String> { + let mut i = 0; + let mut format_seen = false; + while i < format.len() { + match format[i] { + b'\\' if format.get(i + 1) == Some(&b'c') => return Ok((format_seen, true)), + b'\\' => { + i = i.saturating_add(2); } - } - out.push('\''); - out - } else { - // Backslash-escape individual special characters - let mut out = String::new(); - for ch in s.chars() { - if ch.is_ascii_alphanumeric() || "_/.:-=+@,%^".contains(ch) { - out.push(ch); - } else { - out.push('\\'); - out.push(ch); + b'%' if format.get(i + 1) == Some(&b'%') => { + i += 2; } + b'%' => { + let Some(spec) = parse_cap_spec(format, i + 1) else { + i += 1; + continue; + }; + spec.validate(args)?; + format_seen = true; + i = spec.end; + } + _ => i += 1, } - out } + Ok((format_seen, false)) } -/// Expand escape sequences in a string -#[allow(clippy::collapsible_if)] -fn expand_escapes(s: &str) -> String { - let mut output = String::new(); - let mut chars = s.chars().peekable(); - - while let Some(ch) = chars.next() { - if ch == '\\' { - if let Some(next) = chars.next() { - match next { - 'n' => output.push('\n'), - 't' => output.push('\t'), - 'r' => output.push('\r'), - '\\' => output.push('\\'), - '0' => { - // Octal escape sequence - let mut octal = String::from("0"); - while let Some(&c) = chars.peek() { - if c.is_ascii_digit() && c != '8' && c != '9' && octal.len() < 4 { - octal.push( - chars - .next() - .expect("chars.next() valid: peek() confirmed char exists"), - ); - } else { - break; - } - } - if let Ok(val) = u8::from_str_radix(&octal, 8) { - output.push(val as char); - } - } - 'x' => { - // \xHH - hex escape (1-2 hex digits) - let mut hex = String::new(); - for _ in 0..2 { - if let Some(&c) = chars.peek() { - if c.is_ascii_hexdigit() { - hex.push(chars.next().expect( - "chars.next() valid: peek() confirmed char exists", - )); - } else { - break; - } - } - } - // NUL bytes are stripped (bash behavior in string context) - if let Ok(val) = u8::from_str_radix(&hex, 16) { - if val != 0 { - output.push(val as char); - } - } - } - 'u' => { - // \uHHHH - 4-digit unicode escape - if let Some(c) = parse_unicode_escape(&mut chars, 4) { - output.push(c); - } - } - 'U' => { - // \UHHHHHHHH - 8-digit unicode escape - if let Some(c) = parse_unicode_escape(&mut chars, 8) { - output.push(c); - } - } - _ => { - output.push('\\'); - output.push(next); - } - } - } else { - output.push('\\'); - } - } else { - output.push(ch); +struct CapSpec { + end: usize, + position: CapArgLocation, + width: Option, + precision: Option, +} + +enum CapValue { + Fixed(usize), + Asterisk(CapArgLocation), +} + +impl CapSpec { + fn validate(&self, args: &mut CapArgs<'_>) -> std::result::Result<(), String> { + if let Some(width) = &self.width { + let width = resolve_cap_value(width, args, true); + reject_over_cap("width", width)?; } + if let Some(precision) = &self.precision { + let precision = resolve_cap_value(precision, args, false); + reject_over_cap("precision", precision)?; + } + args.consume(self.position); + Ok(()) } +} - output +fn reject_over_cap(kind: &str, value: usize) -> std::result::Result<(), String> { + if value > MAX_FORMAT_WIDTH { + return Err(format!( + "printf: format {kind} {value} exceeds limit {MAX_FORMAT_WIDTH}\n" + )); + } + Ok(()) } -/// Parse a unicode escape sequence (\uHHHH or \UHHHHHHHH) from a char iterator. -/// `max_digits` is 4 for \u and 8 for \U. -fn parse_unicode_escape( - chars: &mut std::iter::Peekable>, - max_digits: usize, -) -> Option { - let mut hex = String::new(); - for _ in 0..max_digits { - if let Some(&c) = chars.peek() { - if c.is_ascii_hexdigit() { - hex.push( - chars - .next() - .expect("chars.next() valid: peek() confirmed char exists"), - ); +fn resolve_cap_value(value: &CapValue, args: &mut CapArgs<'_>, is_width: bool) -> usize { + match value { + CapValue::Fixed(value) => *value, + CapValue::Asterisk(location) => { + let value = args.next_i64(*location); + if is_width { + value + .checked_abs() + .and_then(|v| usize::try_from(v).ok()) + .unwrap_or(usize::MAX) + } else if value < 0 { + 0 } else { - break; + usize::try_from(value).unwrap_or(usize::MAX) } - } else { - break; } } - if hex.is_empty() { - return None; - } - u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) } -#[cfg(test)] -mod tests { - use super::*; +fn parse_cap_spec(format: &[u8], start: usize) -> Option { + let mut index = start; + let position = eat_argument_position(format, &mut index)?; - #[test] - fn test_zero_padding() { - let args = vec!["42".to_string()]; - let mut idx = 0; - assert_eq!(format_string("%05d", &args, &mut idx), "00042"); + while matches!( + format.get(index), + Some(b'-' | b'+' | b' ' | b'#' | b'0' | b'\'') + ) { + index += 1; } - #[test] - fn test_zero_padding_negative() { - let args = vec!["-42".to_string()]; - let mut idx = 0; - assert_eq!(format_string("%06d", &args, &mut idx), "-00042"); + let width = eat_asterisk_or_number(format, &mut index); + let precision = if format.get(index) == Some(&b'.') { + index += 1; + Some(eat_asterisk_or_number(format, &mut index).unwrap_or(CapValue::Fixed(0))) + } else { + None + }; + + while let Some(length) = parse_length(format, index) { + index += length; + } + + let specifier = *format.get(index)?; + index += 1; + if !matches!( + specifier, + b'c' | b's' + | b'b' + | b'q' + | b'd' + | b'i' + | b'u' + | b'o' + | b'x' + | b'X' + | b'f' + | b'F' + | b'e' + | b'E' + | b'g' + | b'G' + | b'a' + | b'A' + ) { + return None; } - #[test] - fn test_width_without_zero() { - let args = vec!["42".to_string()]; - let mut idx = 0; - assert_eq!(format_string("%5d", &args, &mut idx), " 42"); - } + Some(CapSpec { + end: index, + position, + width, + precision, + }) +} - #[test] - fn test_left_align() { - let args = vec!["42".to_string()]; - let mut idx = 0; - assert_eq!(format_string("%-5d", &args, &mut idx), "42 "); +fn eat_asterisk_or_number(format: &[u8], index: &mut usize) -> Option { + if format.get(*index) == Some(&b'*') { + *index += 1; + Some(CapValue::Asterisk(eat_argument_position(format, index)?)) + } else { + eat_number(format, index).map(CapValue::Fixed) } +} - #[test] - fn test_string_width() { - let args = vec!["hi".to_string()]; - let mut idx = 0; - assert_eq!(format_string("%5s", &args, &mut idx), " hi"); +fn eat_argument_position(format: &[u8], index: &mut usize) -> Option { + let original_index = *index; + let Some(pos) = eat_number(format, index) else { + return Some(CapArgLocation::NextArgument); + }; + if format.get(*index) == Some(&b'$') { + *index += 1; + Some(CapArgLocation::Position(pos)) + } else { + *index = original_index; + Some(CapArgLocation::NextArgument) } +} - #[test] - fn test_string_left_align() { - let args = vec!["hi".to_string()]; - let mut idx = 0; - assert_eq!(format_string("%-5s", &args, &mut idx), "hi "); +fn eat_number(format: &[u8], index: &mut usize) -> Option { + let start = *index; + let mut value = 0usize; + while let Some(byte) = format.get(*index) { + if !byte.is_ascii_digit() { + break; + } + value = value + .saturating_mul(10) + .saturating_add(usize::from(byte - b'0')); + *index += 1; } + (*index > start).then_some(value) +} - #[test] - fn test_precision_float() { - let args = vec!["3.14159".to_string()]; - let mut idx = 0; - assert_eq!(format_string("%.2f", &args, &mut idx), "3.14"); +fn parse_length(format: &[u8], index: usize) -> Option { + match format.get(index)? { + b'h' | b'l' if format.get(index + 1) == format.get(index) => Some(2), + b'h' | b'l' | b'j' | b'z' | b't' | b'L' => Some(1), + _ => None, } +} - #[test] - fn test_width_and_precision() { - let args = vec!["3.14".to_string()]; - let mut idx = 0; - assert_eq!(format_string("%8.2f", &args, &mut idx), " 3.14"); - } +fn parse_leading_i64(input: &str) -> i64 { + let bytes = input.as_bytes(); + let mut index = 0; + let sign = match bytes.first() { + Some(b'-') => { + index = 1; + -1i128 + } + Some(b'+') => { + index = 1; + 1i128 + } + _ => 1i128, + }; - #[test] - fn test_hex_zero_padding() { - let args = vec!["255".to_string()]; - let mut idx = 0; - assert_eq!(format_string("%04x", &args, &mut idx), "00ff"); + let start_digits = index; + let mut value = 0i128; + while let Some(byte) = bytes.get(index) { + if !byte.is_ascii_digit() { + break; + } + value = value + .saturating_mul(10) + .saturating_add(i128::from(byte - b'0')); + index += 1; } - #[test] - fn test_unicode_escape_u() { - // \u03bc -> μ (Greek small letter mu) - let args = vec![]; - let mut idx = 0; - assert_eq!(format_string("\\u03bc", &args, &mut idx), "\u{03bc}"); + if index == start_digits { + return 0; } + let value = value.saturating_mul(sign); + value.clamp(i128::from(i64::MIN), i128::from(i64::MAX)) as i64 +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::interpreter::ExecResult; + #[test] - fn test_unicode_escape_big_u() { - // \U000003bc -> μ - let args = vec![]; - let mut idx = 0; - assert_eq!(format_string("\\U000003bc", &args, &mut idx), "\u{03bc}"); + fn generated_formatter_repeats_format_until_args_exhausted() { + assert_eq!( + render_printf("%s=%d ", &["a".into(), "1".into(), "b".into(), "2".into()]).unwrap(), + "a=1 b=2 " + ); } #[test] - fn test_unicode_escape_ascii() { - // \u0041 -> A - let args = vec![]; - let mut idx = 0; + fn generated_formatter_handles_escapes_and_quotes() { + assert_eq!(render_printf("a\\nb", &[]).unwrap(), "a\nb"); assert_eq!( - format_string("\\u0041\\u0042\\u0043", &args, &mut idx), - "ABC" + render_printf("%q", &["hello world".into()]).unwrap(), + "hello\\ world" ); } #[test] - fn test_unicode_escape_in_expand() { - // %b format also handles \u escapes - assert_eq!(expand_escapes("\\u03bc"), "\u{03bc}"); - assert_eq!(expand_escapes("\\U000003bc"), "\u{03bc}"); + fn strips_zero_hex_escapes_at_stdout_boundary() { + assert_eq!(render_printf("a\\x00b", &[]).unwrap(), "ab"); } #[test] - fn test_hex_escape() { - // \x41 -> A - let args = vec![]; - let mut idx = 0; - assert_eq!(format_string("\\x41\\x42\\x43", &args, &mut idx), "ABC"); - // \x00 -> NUL stripped - idx = 0; - assert_eq!(format_string("a\\x00b", &args, &mut idx), "ab"); + fn preserves_octal_nul_for_zero_delimited_pipelines() { + assert_eq!(render_printf("a\\0b", &[]).unwrap().as_bytes(), b"a\0b"); } #[test] - fn test_hex_escape_in_expand() { - assert_eq!(expand_escapes("\\x41"), "A"); - assert_eq!(expand_escapes("a\\x00b"), "ab"); + fn rejects_fixed_width_over_cap() { + let err = render_printf("%10001s", &["x".into()]).unwrap_err(); + assert!(err.contains("width 10001 exceeds limit")); } - // Issue #435: precision should use char count, not byte count #[test] - fn test_precision_multibyte_utf8() { - // "café" = 4 chars, 5 bytes. %.3s should give "caf", not panic. - let args = vec!["café".to_string()]; - let mut idx = 0; - assert_eq!( - format_string("%.3s", &args, &mut idx), - "caf", - "precision should truncate by chars" - ); + fn rejects_fixed_precision_over_cap() { + let err = render_printf("%.10001f", &["1".into()]).unwrap_err(); + assert!(err.contains("precision 10001 exceeds limit")); } #[test] - fn test_precision_cjk() { - // "日本語" = 3 chars, 9 bytes. %.2s should give "日本" - let args = vec!["日本語".to_string()]; - let mut idx = 0; - assert_eq!( - format_string("%.2s", &args, &mut idx), - "日本", - "should handle CJK chars" - ); + fn rejects_asterisk_width_over_cap() { + let err = render_printf("%*s", &["999999".into(), "x".into()]).unwrap_err(); + assert!(err.contains("width 999999 exceeds limit")); } #[test] - fn test_large_precision_no_panic() { - // Must not panic on precision > 65535 - let args = vec!["1.0".to_string()]; - let mut idx = 0; - let result = format_string("%.99999f", &args, &mut idx); - // Should produce output without panicking — precision clamped - assert!(!result.is_empty()); + fn rejects_nested_repeat_asterisk_width_over_cap() { + let err = render_printf("%s %*s", &["ok".into(), "999999".into(), "x".into()]).unwrap_err(); + assert!(err.contains("width 999999 exceeds limit")); + } + + #[tokio::test] + async fn no_leak_printf_format_errors() { + let r = crate::builtins::debug_leak_check::run("printf '%10001s' x").await; + crate::builtins::debug_leak_check::assert_no_leak(&r, "printf_width_cap", &[]); } #[test] - fn test_normal_precision_still_works() { - let args = vec!["3.14159".to_string()]; - let mut idx = 0; - let result = format_string("%.2f", &args, &mut idx); - assert_eq!(result, "3.14"); + fn no_leak_all_format_error_variants() { + let variants = vec![ + FormatError::SpecError(vec![b'?']), + FormatError::IoError(std::io::Error::other("io failed")), + FormatError::NoMoreArguments, + FormatError::InvalidArgument(FormatArgument::String("x".into())), + FormatError::TooManySpecs(b"%s %s".to_vec()), + FormatError::NeedAtLeastOneSpec(b"plain".to_vec()), + FormatError::WrongSpecType, + FormatError::InvalidPrecision("bad".into()), + FormatError::EndsWithPercent(b"%".to_vec()), + FormatError::MissingHex, + FormatError::InvalidCharacter('u', b"d800".to_vec()), + FormatError::InvalidEncoding( + super::super::generated::format_support::NonUtf8OsStrError::new_for_test("x"), + ), + ]; + + for err in variants { + let result = ExecResult::err(render_printf_error(&err), 1); + crate::testing::assert_no_leak(&result, "printf_format_error_variant", &[]); + } } } diff --git a/specs/coreutils-args-port.md b/specs/coreutils-args-port.md index 1d34a2e9a..bf6bb2935 100644 --- a/specs/coreutils-args-port.md +++ b/specs/coreutils-args-port.md @@ -217,9 +217,10 @@ output mirrors the structure. each ported file, flattens nested groups (`use a::{b, c}`), and classifies each path: -- **External** (anything not rooted at `uucore`/`crate`/`self`/`super`) +- **External or module-local** (anything not rooted at `uucore`/`crate`) passes through. `std`, `bigdecimal`, `num-traits`, etc. resolve at - bashkit's compile time. + bashkit's compile time; `self::`/`super::` references stay inside the + vendored module tree. - **Fluent boundary** — `use fluent::*;` and `use uucore::translate;` / `uucore::i18n::*` are hard errors regardless of manifest, with a message telling the operator the module is not safely vendorable @@ -235,12 +236,25 @@ Substitution `action`s: |---|---|---| | `error` | Abort the port at this import. Use when the module references a uucore type that should not be vendored. | Implemented | | `replace_with` | Rewrite the matched prefix in every `use` path to `target`; when the rewritten path's final segment differs from the original, an `as ` rename is inserted so call sites compile unchanged. | Implemented | -| `inline` | Vendor the file at `inline_source` next to the module's output dir (under `/.rs` where `` is the prefix's final segment), and rewrite matching `use` paths to `super::::…`. The inlined file is processed through the same enforce + rewrite pipeline so transitive uucore references either substitute or surface explicitly. | Implemented | +| `inline` | Vendor the file at `inline_source` next to the module's output dir (under `/.rs` where `` is the prefix's final segment), and rewrite matching `use` paths to `crate::builtins::generated::::…` so imports work from any nested module depth. The inlined file is processed through the same enforce + rewrite pipeline so transitive uucore references either substitute or surface explicitly. | Implemented | Output goes through `prettyplease::unparse` whenever any `replace_with` or `inline` substitution is in scope, so use-group syntax may be flattened into individual `use` items as a side effect -of rewriting. +of rewriting. `use module::{self, Item}` is normalized to +`use module;` plus `use module::Item;` so flattened relative imports +remain valid Rust. + +Top-level upstream `#[cfg(test)]` items and rustdoc attributes are +stripped during module vendoring. Bashkit tests and public docs cover +the integrated behavior, while upstream tests and examples assume the +original uucore crate topology. + +### Vendored Modules + +| Module | uutils source | Output | Substitution decisions | +|---|---|---|---| +| `format` | `src/uucore/src/lib/features/format` | `crates/bashkit/src/builtins/generated/format/` plus `extendedbigdecimal.rs` and `num_parser.rs` siblings | `crate::format` self-refs rewrite to `crate::builtins::generated::format`; `extendedbigdecimal` and `parser::num_parser` are inlined; `NonUtf8OsStrError`, `os_str_as_bytes`, `UError`, `set_exit_code`, `quoting_style`, `show_error`, and `show_warning` rewrite to bashkit-local `format_support` shims. | ### Output banner