Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/perry-codegen/src/codegen/closure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ pub(super) fn compile_closure(
local_closure_func_ids: HashMap::new(),
local_closure_param_counts: HashMap::new(),
option_object_locals: HashMap::new(),
object_literal_locals: HashSet::new(),
namespace_imports: &cross_module.namespace_imports,
namespace_reexport_named_imports: &cross_module.namespace_reexport_named_imports,
namespace_member_prefixes: &cross_module.namespace_member_prefixes,
Expand Down
2 changes: 2 additions & 0 deletions crates/perry-codegen/src/codegen/entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ pub(super) fn compile_module_entry(
local_closure_func_ids: HashMap::new(),
local_closure_param_counts: HashMap::new(),
option_object_locals: HashMap::new(),
object_literal_locals: HashSet::new(),
namespace_imports: &cross_module.namespace_imports,
namespace_reexport_named_imports: &cross_module.namespace_reexport_named_imports,
namespace_member_prefixes: &cross_module.namespace_member_prefixes,
Expand Down Expand Up @@ -836,6 +837,7 @@ pub(super) fn compile_module_entry(
local_closure_func_ids: HashMap::new(),
local_closure_param_counts: HashMap::new(),
option_object_locals: HashMap::new(),
object_literal_locals: HashSet::new(),
namespace_imports: &cross_module.namespace_imports,
namespace_reexport_named_imports: &cross_module.namespace_reexport_named_imports,
namespace_member_prefixes: &cross_module.namespace_member_prefixes,
Expand Down
1 change: 1 addition & 0 deletions crates/perry-codegen/src/codegen/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ pub(super) fn compile_function(
local_closure_func_ids: HashMap::new(),
local_closure_param_counts: HashMap::new(),
option_object_locals: HashMap::new(),
object_literal_locals: HashSet::new(),
namespace_imports: &cross_module.namespace_imports,
namespace_reexport_named_imports: &cross_module.namespace_reexport_named_imports,
namespace_member_prefixes: &cross_module.namespace_member_prefixes,
Expand Down
2 changes: 2 additions & 0 deletions crates/perry-codegen/src/codegen/method.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ pub(super) fn compile_method(
local_closure_func_ids: HashMap::new(),
local_closure_param_counts: HashMap::new(),
option_object_locals: HashMap::new(),
object_literal_locals: HashSet::new(),
namespace_imports: &cross_module.namespace_imports,
namespace_reexport_named_imports: &cross_module.namespace_reexport_named_imports,
namespace_member_prefixes: &cross_module.namespace_member_prefixes,
Expand Down Expand Up @@ -694,6 +695,7 @@ pub(super) fn compile_static_method(
local_closure_func_ids: HashMap::new(),
local_closure_param_counts: HashMap::new(),
option_object_locals: HashMap::new(),
object_literal_locals: HashSet::new(),
namespace_imports: &cross_module.namespace_imports,
namespace_reexport_named_imports: &cross_module.namespace_reexport_named_imports,
namespace_member_prefixes: &cross_module.namespace_member_prefixes,
Expand Down
8 changes: 8 additions & 0 deletions crates/perry-codegen/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,14 @@ pub(crate) struct FnCtx<'a> {
/// native constructor lowering read `const init = {...}; new Request(url,
/// init)` with the same field extractor used for inline object literals.
pub option_object_locals: std::collections::HashMap<u32, Vec<(String, Expr)>>,
/// LocalIds of immutable locals provably initialized from an object
/// literal (`const o = { … }`, including method-bearing literals that
/// lower to an object-building IIFE). #5271: a builtin-named method on
/// such a receiver (`o.trim()`, joi's `internals.trim(v, s)`) is the
/// object's OWN method, never `String.prototype.<m>` — so the static
/// String-method fast path must NOT claim it even when the call's arity
/// happens to match the String builtin.
pub object_literal_locals: std::collections::HashSet<u32>,

// ── Cross-module import plumbing (Phase F) ──────────────────────
/// Locals that are namespace imports (`import * as X from "./mod"`).
Expand Down
69 changes: 69 additions & 0 deletions crates/perry-codegen/src/lower_call/property_get.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,48 @@ fn is_array_only_method_name(name: &str) -> bool {
)
}

/// For the Any-typed-receiver string-method fallback only: is `argc` a
/// plausible argument count for the String.prototype builtin named
/// `name`? When a builtin-named method is invoked on a receiver that is
/// NOT provably a string (object literal, `any`, unknown) AND the arg
/// count can't match the String builtin's signature, the call is almost
/// certainly a user method that merely shares a name with a String
/// builtin — e.g. joi's `internals.trim(value, schema)` (#5271). Forcing
/// the String path there used to abort codegen with
/// "String.trim takes no args, got 2"; gating on arity here lets such
/// calls fall through to the runtime method dispatcher instead.
///
/// The accepted ranges mirror `lower_string_method`'s per-arm arity
/// guards. Char-access methods (`charAt`/`charCodeAt`/`codePointAt`)
/// ignore surplus args per spec, so any count is fine for them.
fn string_only_method_arity_ok(name: &str, argc: usize) -> bool {
match name {
// No-arg string transforms.
"trim" | "trimStart" | "trimEnd" | "toLowerCase" | "toUpperCase" => argc == 0,
// Locale-aware case folding: optional `locales`.
"toLocaleLowerCase" | "toLocaleUpperCase" => argc <= 1,
// split(separator?, limit?).
"split" => argc <= 2,
// substring(start?, end?).
"substring" => argc <= 2,
// substr(start, length?) — start is required.
"substr" => argc == 1 || argc == 2,
// replaceAll(search, replace).
"replaceAll" => argc == 2,
// padStart/padEnd(targetLength, padString?).
"padStart" | "padEnd" => argc == 1 || argc == 2,
// repeat(count).
"repeat" => argc == 1,
// localeCompare(that, locales?, options?).
"localeCompare" => argc <= 3,
// Char-access ignores extra args (still evaluated for side effects).
"charAt" | "charCodeAt" | "codePointAt" => true,
// Conservative default: methods reaching this gate but not listed
// here keep their prior (already arity-checked) routing.
_ => true,
}
}

fn is_date_receiver(ctx: &FnCtx<'_>, object: &Expr) -> bool {
matches!(object, Expr::DateNew(_))
|| receiver_class_name(ctx, object).as_deref() == Some("Date")
Expand Down Expand Up @@ -393,7 +435,34 @@ pub fn try_lower_property_get_method_call(
// Falling through here routes it to the generic `js_native_call_method`
// dispatch (→ `dispatch_native_module_method`); forcing the string path
// hands the namespace pointer to a string FFI and SIGSEGVs.
// #5271: a builtin-named method on a receiver that is NOT provably a
// string (object literal, `any`, unknown) may be a USER method that
// merely shares the name — joi's `internals.trim(value, schema)`, or
// any `{ trim() {…} }.trim()`. Forcing the static String path there
// hands the object pointer to a string FFI: it either aborts codegen
// on the String arity guard (`String.trim takes no args, got 2`) or
// bit-casts the object as a string and returns "[object Object]".
//
// Take the static String fast path only when:
// * the receiver is NOT a known object-literal local — `o.trim()`
// on `const o = { trim() {…} }` is the object's OWN method, never
// `String.prototype.trim`, even when the arity matches; AND
// * the arg count is plausible for the String builtin — when it is
// NOT (joi's `internals.trim(value, schema)`: 2 args to a 0-arg
// builtin), the call is a user method sharing the name.
// Otherwise fall through to `js_native_call_method`, which resolves
// the receiver's own member at runtime and still services a genuine
// (Any-typed) string receiver via its `jsval.is_string()` arm — so
// the earlier "[object Object]" hazard the comment above warns about
// no longer applies (the runtime grew full string-method arms in
// #421/#514). See #5271.
let receiver_is_object_literal = matches!(
&**object,
Expr::LocalGet(id) if ctx.object_literal_locals.contains(id)
) || matches!(&**object, Expr::Object(_));
if is_string_only_method
&& string_only_method_arity_ok(property, args.len())
&& !receiver_is_object_literal
&& !is_array_expr(ctx, object)
Comment on lines +459 to 466

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Receiver-shape guard misses IIFE-lowered object literals.

receiver_is_object_literal does not include the object-building IIFE shape, so inline method-bearing literals can still hit the String fast-path for matching-arity builtin names.

Suggested guard extension
 let receiver_is_object_literal = matches!(
     &**object,
     Expr::LocalGet(id) if ctx.object_literal_locals.contains(id)
-) || matches!(&**object, Expr::Object(_));
+) || matches!(&**object, Expr::Object(_))
+  || matches!(
+      &**object,
+      Expr::Call { callee, args, .. }
+          if matches!(args.first(), Some(Expr::Object(_)))
+              && matches!(
+                  callee.as_ref(),
+                  Expr::Closure { params, .. }
+                      if params.first().map_or(false, |p| p.name == "__perry_obj_iife")
+              )
+  );
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
let receiver_is_object_literal = matches!(
&**object,
Expr::LocalGet(id) if ctx.object_literal_locals.contains(id)
) || matches!(&**object, Expr::Object(_));
if is_string_only_method
&& string_only_method_arity_ok(property, args.len())
&& !receiver_is_object_literal
&& !is_array_expr(ctx, object)
let receiver_is_object_literal = matches!(
&**object,
Expr::LocalGet(id) if ctx.object_literal_locals.contains(id)
) || matches!(&**object, Expr::Object(_))
|| matches!(
&**object,
Expr::Call { callee, args, .. }
if matches!(args.first(), Some(Expr::Object(_)))
&& matches!(
callee.as_ref(),
Expr::Closure { params, .. }
if params.first().map_or(false, |p| p.name == "__perry_obj_iife")
)
);
if is_string_only_method
&& string_only_method_arity_ok(property, args.len())
&& !receiver_is_object_literal
&& !is_array_expr(ctx, object)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@crates/perry-codegen/src/lower_call/property_get.rs` around lines 459 - 466,
The receiver_is_object_literal check in the condition block is incomplete
because it only detects LocalGet references to object literal locals and direct
Object expressions, but misses IIFE-lowered object literals. Extend the
receiver_is_object_literal guard to also detect the object-building IIFE pattern
(a Call expression with a Function body that returns an object). This will
prevent incorrectly routing such patterns to the String fast-path when they
should not be treated as simple object literals.

&& !is_buffer
&& !is_native_module_dynamic_index(object)
Expand Down
31 changes: 31 additions & 0 deletions crates/perry-codegen/src/stmt/let_stmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,29 @@ use crate::native_value::{
};
use crate::types::{DOUBLE, I32, I64, I8, PTR};

/// #5271: does `init` provably evaluate to a plain object literal? Two
/// shapes reach codegen: a data-only literal stays `Expr::Object`, while a
/// literal carrying methods/getters lowers to an immediately-invoked
/// object-building closure whose sole param is named `__perry_obj_iife`
/// and whose single argument is the seed `Object(..)`. Recognizing both
/// lets `o.trim()` / `internals.trim(v, s)` resolve to the receiver's own
/// member rather than `String.prototype.trim`.
fn is_object_literal_init(init: &perry_hir::Expr) -> bool {
use perry_hir::Expr;
match init {
Expr::Object(_) => true,
Expr::Call { callee, args, .. } => {
matches!(args.first(), Some(Expr::Object(_)))
&& matches!(
callee.as_ref(),
Expr::Closure { params, .. }
if params.first().map_or(false, |p| p.name == "__perry_obj_iife")
)
}
_ => false,
}
}

fn is_global_this_value(expr: &perry_hir::Expr) -> bool {
matches!(expr, perry_hir::Expr::GlobalGet(_))
|| matches!(
Expand Down Expand Up @@ -45,6 +68,14 @@ pub(crate) fn lower_let(
if let Some(props) = crate::lower_call::extract_options_fields(ctx, init_expr) {
ctx.option_object_locals.insert(id, props);
}
// #5271: remember object-literal locals so a builtin-named member
// call (`o.trim()`, joi's `internals.trim(v, s)`) resolves to the
// object's OWN method instead of being claimed by the static
// String-method fast path. Covers both plain literals and the
// method-bearing literals that lower to an object-building IIFE.
if is_object_literal_init(init_expr) {
ctx.object_literal_locals.insert(id);
}
}
}
if let Some(init_expr) = init {
Expand Down
133 changes: 133 additions & 0 deletions crates/perry/tests/issue_5271_string_method_nonstring_receiver.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
//! Regression tests for #5271 — a method whose name collides with a
//! `String.prototype` builtin (`trim`/`split`/`slice`/…) on a NON-string
//! receiver was eagerly lowered to the static `String.prototype.<m>` fast
//! path in `lower_string_method`.
//!
//! Root cause: the "string-only method on an `any`-typed receiver" fallback
//! in `lower_call/property_get.rs` forced the String path for collision-prone
//! method names regardless of the receiver's real shape. joi's
//! `validator.js:359` calls its OWN `internals.trim(value, schema)` — 2 args
//! to a 0-arg String builtin — which aborted codegen with
//! `perry-codegen: String.trim takes no args, got 2`. Same-arity collisions
//! (`{ trim() {…} }.trim()`, `{ split(a,b){…} }.split(1,2)`) instead bit-cast
//! the object pointer as a string and returned "[object Object]".
//!
//! The fix gates the static String fast path on (a) the receiver NOT being a
//! known object-literal local and (b) the arg count being plausible for the
//! String builtin; otherwise it falls through to `js_native_call_method`,
//! which resolves the receiver's own member at runtime while still servicing a
//! genuine (any-typed) string receiver via its `jsval.is_string()` arm.

use std::path::PathBuf;
use std::process::Command;

fn perry_bin() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_perry"))
}

fn compile_and_run(dir: &std::path::Path, entry: &std::path::Path) -> String {
let output = dir.join("main_bin");
let compile = Command::new(perry_bin())
.current_dir(dir)
.arg("compile")
.arg(entry)
.arg("-o")
.arg(&output)
.output()
.expect("run perry compile");
assert!(
compile.status.success(),
"perry compile failed\nstdout:\n{}\nstderr:\n{}",
String::from_utf8_lossy(&compile.stdout),
String::from_utf8_lossy(&compile.stderr)
);
let run = Command::new(&output).output().expect("run compiled binary");
assert!(
run.status.success(),
"compiled binary failed\nstatus: {:?}\nstdout:\n{}\nstderr:\n{}",
run.status,
String::from_utf8_lossy(&run.stdout),
String::from_utf8_lossy(&run.stderr)
);
String::from_utf8_lossy(&run.stdout).to_string()
}

/// The exact joi shape: a user `trim` taking 2 args. Pre-fix this aborted
/// codegen ("String.trim takes no args, got 2"); it must compile and call the
/// object's own method.
#[test]
fn object_trim_two_args_resolves_to_user_method() {
let dir = tempfile::tempdir().expect("tempdir");
let entry = dir.path().join("main.ts");
std::fs::write(
&entry,
r#"
const internals = { trim(value: string, schema: any) { return value + ":" + schema; } };
console.log(internals.trim("a", "b"));
"#,
)
.expect("write entry");

let stdout = compile_and_run(dir.path(), &entry);
assert!(
stdout.contains("a:b"),
"internals.trim(value, schema) must call the object's own 2-arg method (got: {stdout})"
);
}

/// Same-arity collisions: a user method whose arg count matches the String
/// builtin must still resolve to the object's own member, not the builtin.
#[test]
fn object_methods_with_matching_arity_resolve_to_user_methods() {
let dir = tempfile::tempdir().expect("tempdir");
let entry = dir.path().join("main.ts");
std::fs::write(
&entry,
r#"
const a = { trim() { return "USERTRIM"; } }; // 0-arg, matches String.trim
const b = { split(x: number, y: number) { return x + y; } }; // 2-arg, matches String.split
const c = { slice() { return "USERSLICE"; } }; // 0-arg, matches String.slice
console.log(a.trim());
console.log(b.split(1, 2));
console.log(c.slice());
"#,
)
.expect("write entry");

let stdout = compile_and_run(dir.path(), &entry);
assert!(stdout.contains("USERTRIM"), "a.trim() must call the user method (got: {stdout})");
assert!(stdout.contains("\n3"), "b.split(1, 2) must call the user method and return 3 (got: {stdout})");
assert!(stdout.contains("USERSLICE"), "c.slice() must call the user method (got: {stdout})");
}

/// The fix must NOT regress genuine string receivers: literals and statically
/// string-typed values keep the fast String path with correct results.
#[test]
fn genuine_string_receivers_keep_string_semantics() {
let dir = tempfile::tempdir().expect("tempdir");
let entry = dir.path().join("main.ts");
std::fs::write(
&entry,
r#"
console.log(" hi ".trim());
console.log(JSON.stringify("a,b".split(",")));
const s: string = "HeLLo";
console.log(s.toLowerCase());
console.log(JSON.stringify(s.split("L")));
// any-typed value that is really a string still works via runtime dispatch
function id(x: any): any { return x; }
const t: any = id("X,Y,Z");
console.log(JSON.stringify(t.split(",")));
console.log(t.trim());
"#,
)
.expect("write entry");

let stdout = compile_and_run(dir.path(), &entry);
assert!(stdout.contains("hi\n"), "string literal trim must work (got: {stdout})");
assert!(stdout.contains(r#"["a","b"]"#), "string literal split must work (got: {stdout})");
assert!(stdout.contains("hello\n"), "string-typed toLowerCase must work (got: {stdout})");
assert!(stdout.contains(r#"["He","","o"]"#), "string-typed split must work (got: {stdout})");
assert!(stdout.contains(r#"["X","Y","Z"]"#), "any-typed string split must work (got: {stdout})");
assert!(stdout.contains("X,Y,Z"), "any-typed string trim must work (got: {stdout})");
}
Loading