diff --git a/crates/perry-codegen/src/codegen/closure.rs b/crates/perry-codegen/src/codegen/closure.rs index 0381aa1f1..b35e33786 100644 --- a/crates/perry-codegen/src/codegen/closure.rs +++ b/crates/perry-codegen/src/codegen/closure.rs @@ -296,6 +296,7 @@ pub(super) fn compile_closure( local_closure_func_ids: HashMap::new(), local_closure_param_counts: HashMap::new(), option_object_locals: HashMap::new(), + object_literal_locals: HashSet::new(), namespace_imports: &cross_module.namespace_imports, namespace_reexport_named_imports: &cross_module.namespace_reexport_named_imports, namespace_member_prefixes: &cross_module.namespace_member_prefixes, diff --git a/crates/perry-codegen/src/codegen/entry.rs b/crates/perry-codegen/src/codegen/entry.rs index 7e98a01bc..1451ded49 100644 --- a/crates/perry-codegen/src/codegen/entry.rs +++ b/crates/perry-codegen/src/codegen/entry.rs @@ -398,6 +398,7 @@ pub(super) fn compile_module_entry( local_closure_func_ids: HashMap::new(), local_closure_param_counts: HashMap::new(), option_object_locals: HashMap::new(), + object_literal_locals: HashSet::new(), namespace_imports: &cross_module.namespace_imports, namespace_reexport_named_imports: &cross_module.namespace_reexport_named_imports, namespace_member_prefixes: &cross_module.namespace_member_prefixes, @@ -836,6 +837,7 @@ pub(super) fn compile_module_entry( local_closure_func_ids: HashMap::new(), local_closure_param_counts: HashMap::new(), option_object_locals: HashMap::new(), + object_literal_locals: HashSet::new(), namespace_imports: &cross_module.namespace_imports, namespace_reexport_named_imports: &cross_module.namespace_reexport_named_imports, namespace_member_prefixes: &cross_module.namespace_member_prefixes, diff --git a/crates/perry-codegen/src/codegen/function.rs b/crates/perry-codegen/src/codegen/function.rs index bd151af6b..d78a02a3a 100644 --- a/crates/perry-codegen/src/codegen/function.rs +++ b/crates/perry-codegen/src/codegen/function.rs @@ -198,6 +198,7 @@ pub(super) fn compile_function( local_closure_func_ids: HashMap::new(), local_closure_param_counts: HashMap::new(), option_object_locals: HashMap::new(), + object_literal_locals: HashSet::new(), namespace_imports: &cross_module.namespace_imports, namespace_reexport_named_imports: &cross_module.namespace_reexport_named_imports, namespace_member_prefixes: &cross_module.namespace_member_prefixes, diff --git a/crates/perry-codegen/src/codegen/method.rs b/crates/perry-codegen/src/codegen/method.rs index 9cd62ad83..d9eddb90c 100644 --- a/crates/perry-codegen/src/codegen/method.rs +++ b/crates/perry-codegen/src/codegen/method.rs @@ -185,6 +185,7 @@ pub(super) fn compile_method( local_closure_func_ids: HashMap::new(), local_closure_param_counts: HashMap::new(), option_object_locals: HashMap::new(), + object_literal_locals: HashSet::new(), namespace_imports: &cross_module.namespace_imports, namespace_reexport_named_imports: &cross_module.namespace_reexport_named_imports, namespace_member_prefixes: &cross_module.namespace_member_prefixes, @@ -694,6 +695,7 @@ pub(super) fn compile_static_method( local_closure_func_ids: HashMap::new(), local_closure_param_counts: HashMap::new(), option_object_locals: HashMap::new(), + object_literal_locals: HashSet::new(), namespace_imports: &cross_module.namespace_imports, namespace_reexport_named_imports: &cross_module.namespace_reexport_named_imports, namespace_member_prefixes: &cross_module.namespace_member_prefixes, diff --git a/crates/perry-codegen/src/expr/mod.rs b/crates/perry-codegen/src/expr/mod.rs index 4e7dc0ae1..75b45cfab 100644 --- a/crates/perry-codegen/src/expr/mod.rs +++ b/crates/perry-codegen/src/expr/mod.rs @@ -399,6 +399,14 @@ pub(crate) struct FnCtx<'a> { /// native constructor lowering read `const init = {...}; new Request(url, /// init)` with the same field extractor used for inline object literals. pub option_object_locals: std::collections::HashMap>, + /// LocalIds of immutable locals provably initialized from an object + /// literal (`const o = { … }`, including method-bearing literals that + /// lower to an object-building IIFE). #5271: a builtin-named method on + /// such a receiver (`o.trim()`, joi's `internals.trim(v, s)`) is the + /// object's OWN method, never `String.prototype.` — so the static + /// String-method fast path must NOT claim it even when the call's arity + /// happens to match the String builtin. + pub object_literal_locals: std::collections::HashSet, // ── Cross-module import plumbing (Phase F) ────────────────────── /// Locals that are namespace imports (`import * as X from "./mod"`). diff --git a/crates/perry-codegen/src/lower_call/property_get.rs b/crates/perry-codegen/src/lower_call/property_get.rs index 528281f02..8561007e6 100644 --- a/crates/perry-codegen/src/lower_call/property_get.rs +++ b/crates/perry-codegen/src/lower_call/property_get.rs @@ -41,6 +41,48 @@ fn is_array_only_method_name(name: &str) -> bool { ) } +/// For the Any-typed-receiver string-method fallback only: is `argc` a +/// plausible argument count for the String.prototype builtin named +/// `name`? When a builtin-named method is invoked on a receiver that is +/// NOT provably a string (object literal, `any`, unknown) AND the arg +/// count can't match the String builtin's signature, the call is almost +/// certainly a user method that merely shares a name with a String +/// builtin — e.g. joi's `internals.trim(value, schema)` (#5271). Forcing +/// the String path there used to abort codegen with +/// "String.trim takes no args, got 2"; gating on arity here lets such +/// calls fall through to the runtime method dispatcher instead. +/// +/// The accepted ranges mirror `lower_string_method`'s per-arm arity +/// guards. Char-access methods (`charAt`/`charCodeAt`/`codePointAt`) +/// ignore surplus args per spec, so any count is fine for them. +fn string_only_method_arity_ok(name: &str, argc: usize) -> bool { + match name { + // No-arg string transforms. + "trim" | "trimStart" | "trimEnd" | "toLowerCase" | "toUpperCase" => argc == 0, + // Locale-aware case folding: optional `locales`. + "toLocaleLowerCase" | "toLocaleUpperCase" => argc <= 1, + // split(separator?, limit?). + "split" => argc <= 2, + // substring(start?, end?). + "substring" => argc <= 2, + // substr(start, length?) — start is required. + "substr" => argc == 1 || argc == 2, + // replaceAll(search, replace). + "replaceAll" => argc == 2, + // padStart/padEnd(targetLength, padString?). + "padStart" | "padEnd" => argc == 1 || argc == 2, + // repeat(count). + "repeat" => argc == 1, + // localeCompare(that, locales?, options?). + "localeCompare" => argc <= 3, + // Char-access ignores extra args (still evaluated for side effects). + "charAt" | "charCodeAt" | "codePointAt" => true, + // Conservative default: methods reaching this gate but not listed + // here keep their prior (already arity-checked) routing. + _ => true, + } +} + fn is_date_receiver(ctx: &FnCtx<'_>, object: &Expr) -> bool { matches!(object, Expr::DateNew(_)) || receiver_class_name(ctx, object).as_deref() == Some("Date") @@ -393,7 +435,34 @@ pub fn try_lower_property_get_method_call( // Falling through here routes it to the generic `js_native_call_method` // dispatch (→ `dispatch_native_module_method`); forcing the string path // hands the namespace pointer to a string FFI and SIGSEGVs. + // #5271: a builtin-named method on a receiver that is NOT provably a + // string (object literal, `any`, unknown) may be a USER method that + // merely shares the name — joi's `internals.trim(value, schema)`, or + // any `{ trim() {…} }.trim()`. Forcing the static String path there + // hands the object pointer to a string FFI: it either aborts codegen + // on the String arity guard (`String.trim takes no args, got 2`) or + // bit-casts the object as a string and returns "[object Object]". + // + // Take the static String fast path only when: + // * the receiver is NOT a known object-literal local — `o.trim()` + // on `const o = { trim() {…} }` is the object's OWN method, never + // `String.prototype.trim`, even when the arity matches; AND + // * the arg count is plausible for the String builtin — when it is + // NOT (joi's `internals.trim(value, schema)`: 2 args to a 0-arg + // builtin), the call is a user method sharing the name. + // Otherwise fall through to `js_native_call_method`, which resolves + // the receiver's own member at runtime and still services a genuine + // (Any-typed) string receiver via its `jsval.is_string()` arm — so + // the earlier "[object Object]" hazard the comment above warns about + // no longer applies (the runtime grew full string-method arms in + // #421/#514). See #5271. + let receiver_is_object_literal = matches!( + &**object, + Expr::LocalGet(id) if ctx.object_literal_locals.contains(id) + ) || matches!(&**object, Expr::Object(_)); if is_string_only_method + && string_only_method_arity_ok(property, args.len()) + && !receiver_is_object_literal && !is_array_expr(ctx, object) && !is_buffer && !is_native_module_dynamic_index(object) diff --git a/crates/perry-codegen/src/stmt/let_stmt.rs b/crates/perry-codegen/src/stmt/let_stmt.rs index 63dbeea68..e0ec3144f 100644 --- a/crates/perry-codegen/src/stmt/let_stmt.rs +++ b/crates/perry-codegen/src/stmt/let_stmt.rs @@ -10,6 +10,29 @@ use crate::native_value::{ }; use crate::types::{DOUBLE, I32, I64, I8, PTR}; +/// #5271: does `init` provably evaluate to a plain object literal? Two +/// shapes reach codegen: a data-only literal stays `Expr::Object`, while a +/// literal carrying methods/getters lowers to an immediately-invoked +/// object-building closure whose sole param is named `__perry_obj_iife` +/// and whose single argument is the seed `Object(..)`. Recognizing both +/// lets `o.trim()` / `internals.trim(v, s)` resolve to the receiver's own +/// member rather than `String.prototype.trim`. +fn is_object_literal_init(init: &perry_hir::Expr) -> bool { + use perry_hir::Expr; + match init { + Expr::Object(_) => true, + Expr::Call { callee, args, .. } => { + matches!(args.first(), Some(Expr::Object(_))) + && matches!( + callee.as_ref(), + Expr::Closure { params, .. } + if params.first().map_or(false, |p| p.name == "__perry_obj_iife") + ) + } + _ => false, + } +} + fn is_global_this_value(expr: &perry_hir::Expr) -> bool { matches!(expr, perry_hir::Expr::GlobalGet(_)) || matches!( @@ -45,6 +68,14 @@ pub(crate) fn lower_let( if let Some(props) = crate::lower_call::extract_options_fields(ctx, init_expr) { ctx.option_object_locals.insert(id, props); } + // #5271: remember object-literal locals so a builtin-named member + // call (`o.trim()`, joi's `internals.trim(v, s)`) resolves to the + // object's OWN method instead of being claimed by the static + // String-method fast path. Covers both plain literals and the + // method-bearing literals that lower to an object-building IIFE. + if is_object_literal_init(init_expr) { + ctx.object_literal_locals.insert(id); + } } } if let Some(init_expr) = init { diff --git a/crates/perry/tests/issue_5271_string_method_nonstring_receiver.rs b/crates/perry/tests/issue_5271_string_method_nonstring_receiver.rs new file mode 100644 index 000000000..e371aa131 --- /dev/null +++ b/crates/perry/tests/issue_5271_string_method_nonstring_receiver.rs @@ -0,0 +1,160 @@ +//! Regression tests for #5271 — a method whose name collides with a +//! `String.prototype` builtin (`trim`/`split`/`slice`/…) on a NON-string +//! receiver was eagerly lowered to the static `String.prototype.` fast +//! path in `lower_string_method`. +//! +//! Root cause: the "string-only method on an `any`-typed receiver" fallback +//! in `lower_call/property_get.rs` forced the String path for collision-prone +//! method names regardless of the receiver's real shape. joi's +//! `validator.js:359` calls its OWN `internals.trim(value, schema)` — 2 args +//! to a 0-arg String builtin — which aborted codegen with +//! `perry-codegen: String.trim takes no args, got 2`. Same-arity collisions +//! (`{ trim() {…} }.trim()`, `{ split(a,b){…} }.split(1,2)`) instead bit-cast +//! the object pointer as a string and returned "[object Object]". +//! +//! The fix gates the static String fast path on (a) the receiver NOT being a +//! known object-literal local and (b) the arg count being plausible for the +//! String builtin; otherwise it falls through to `js_native_call_method`, +//! which resolves the receiver's own member at runtime while still servicing a +//! genuine (any-typed) string receiver via its `jsval.is_string()` arm. + +use std::path::PathBuf; +use std::process::Command; + +fn perry_bin() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_perry")) +} + +fn compile_and_run(dir: &std::path::Path, entry: &std::path::Path) -> String { + let output = dir.join("main_bin"); + let compile = Command::new(perry_bin()) + .current_dir(dir) + .arg("compile") + .arg(entry) + .arg("-o") + .arg(&output) + .output() + .expect("run perry compile"); + assert!( + compile.status.success(), + "perry compile failed\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&compile.stdout), + String::from_utf8_lossy(&compile.stderr) + ); + let run = Command::new(&output).output().expect("run compiled binary"); + assert!( + run.status.success(), + "compiled binary failed\nstatus: {:?}\nstdout:\n{}\nstderr:\n{}", + run.status, + String::from_utf8_lossy(&run.stdout), + String::from_utf8_lossy(&run.stderr) + ); + String::from_utf8_lossy(&run.stdout).to_string() +} + +/// The exact joi shape: a user `trim` taking 2 args. Pre-fix this aborted +/// codegen ("String.trim takes no args, got 2"); it must compile and call the +/// object's own method. +#[test] +fn object_trim_two_args_resolves_to_user_method() { + let dir = tempfile::tempdir().expect("tempdir"); + let entry = dir.path().join("main.ts"); + std::fs::write( + &entry, + r#" +const internals = { trim(value: string, schema: any) { return value + ":" + schema; } }; +console.log(internals.trim("a", "b")); +"#, + ) + .expect("write entry"); + + let stdout = compile_and_run(dir.path(), &entry); + assert!( + stdout.contains("a:b"), + "internals.trim(value, schema) must call the object's own 2-arg method (got: {stdout})" + ); +} + +/// Same-arity collisions: a user method whose arg count matches the String +/// builtin must still resolve to the object's own member, not the builtin. +#[test] +fn object_methods_with_matching_arity_resolve_to_user_methods() { + let dir = tempfile::tempdir().expect("tempdir"); + let entry = dir.path().join("main.ts"); + std::fs::write( + &entry, + r#" +const a = { trim() { return "USERTRIM"; } }; // 0-arg, matches String.trim +const b = { split(x: number, y: number) { return x + y; } }; // 2-arg, matches String.split +const c = { slice() { return "USERSLICE"; } }; // 0-arg, matches String.slice +console.log(a.trim()); +console.log(b.split(1, 2)); +console.log(c.slice()); +"#, + ) + .expect("write entry"); + + let stdout = compile_and_run(dir.path(), &entry); + assert!( + stdout.contains("USERTRIM"), + "a.trim() must call the user method (got: {stdout})" + ); + assert!( + stdout.contains("\n3"), + "b.split(1, 2) must call the user method and return 3 (got: {stdout})" + ); + assert!( + stdout.contains("USERSLICE"), + "c.slice() must call the user method (got: {stdout})" + ); +} + +/// The fix must NOT regress genuine string receivers: literals and statically +/// string-typed values keep the fast String path with correct results. +#[test] +fn genuine_string_receivers_keep_string_semantics() { + let dir = tempfile::tempdir().expect("tempdir"); + let entry = dir.path().join("main.ts"); + std::fs::write( + &entry, + r#" +console.log(" hi ".trim()); +console.log(JSON.stringify("a,b".split(","))); +const s: string = "HeLLo"; +console.log(s.toLowerCase()); +console.log(JSON.stringify(s.split("L"))); +// any-typed value that is really a string still works via runtime dispatch +function id(x: any): any { return x; } +const t: any = id("X,Y,Z"); +console.log(JSON.stringify(t.split(","))); +console.log(t.trim()); +"#, + ) + .expect("write entry"); + + let stdout = compile_and_run(dir.path(), &entry); + assert!( + stdout.contains("hi\n"), + "string literal trim must work (got: {stdout})" + ); + assert!( + stdout.contains(r#"["a","b"]"#), + "string literal split must work (got: {stdout})" + ); + assert!( + stdout.contains("hello\n"), + "string-typed toLowerCase must work (got: {stdout})" + ); + assert!( + stdout.contains(r#"["He","","o"]"#), + "string-typed split must work (got: {stdout})" + ); + assert!( + stdout.contains(r#"["X","Y","Z"]"#), + "any-typed string split must work (got: {stdout})" + ); + assert!( + stdout.contains("X,Y,Z"), + "any-typed string trim must work (got: {stdout})" + ); +}