From ddef8e8f94a7e83e2eb690340a58409b36f28020 Mon Sep 17 00:00:00 2001 From: Ralph Date: Tue, 16 Jun 2026 06:33:57 -0700 Subject: [PATCH] =?UTF-8?q?perf(hir):=20O(1)=20scope=20resolution=20?= =?UTF-8?q?=E2=80=94=20index=20local=20bindings=20by=20name=20(#5267)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expression/declaration lowering resolved every identifier by a reverse linear scan of the per-scope `locals` stack (`iter().rev().find(name)` / `rposition`), so a scope with N bindings and N references lowered in O(n²). Real minified/bundled JS puts tens of thousands of bindings and references in one module/function scope, so `check-lower` stalled for minutes and got killed with no diagnostic on large bundles. Wrap `locals` in a `Locals` newtype (crates/perry-hir/src/lower/locals.rs) that keeps the same ordered `(name, id, type)` stack plus a side `name -> ascending positions` index. The innermost binding for a name is the back of its position list, so `lookup` / `lookup_index` / `lookup_type` are O(1). Every position-changing op (push, drain, extend, remove) maintains the index; read/in-place ops reach the slice via `Deref`/`DerefMut` (which hands out `&mut [..]`, not `&mut Vec`, so the index can't be bypassed). Also fix the per-declaration linear scans that the var/let-hoisting and var-redeclaration paths ran: - `predefine_var_bindings_in_function_body` (block.rs) and the function- body var/fn pre-passes (expr_function.rs) used `iter().enumerate().rev().any(|(idx, ..)| name && idx >= mark)` per binding; replaced with O(1) `lookup_index_in_scope` (max-position == innermost, so "exists idx >= mark" iff "max >= mark"). - the var-redeclaration reuse (var_decl.rs / stmt.rs) found the binding by name then re-found it by id to patch its type; now `iter_named` yields the position and `type_mut_at` patches it in place. Lowering of a single large scope is now linear: a 160k-binding/160k-ref function checks in ~0.5s (was killed >600s extrapolated); the issue's 40k repro drops from 2.2s to 0.12s. All perry-hir (200+) and perry CLI (560+) tests pass; var hoisting/redeclaration/shadowing/closure-capture output verified byte-identical to `node --experimental-strip-types`. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../perry-hir/src/destructuring/var_decl.rs | 25 +-- crates/perry-hir/src/lower/context.rs | 20 +- crates/perry-hir/src/lower/expr_function.rs | 27 +-- crates/perry-hir/src/lower/locals.rs | 175 ++++++++++++++++++ .../perry-hir/src/lower/lowering_context.rs | 5 +- crates/perry-hir/src/lower/mod.rs | 3 + crates/perry-hir/src/lower/stmt.rs | 19 +- crates/perry-hir/src/lower_decl/block.rs | 18 +- 8 files changed, 223 insertions(+), 69 deletions(-) create mode 100644 crates/perry-hir/src/lower/locals.rs diff --git a/crates/perry-hir/src/destructuring/var_decl.rs b/crates/perry-hir/src/destructuring/var_decl.rs index b05b04873a..9f88e59890 100644 --- a/crates/perry-hir/src/destructuring/var_decl.rs +++ b/crates/perry-hir/src/destructuring/var_decl.rs @@ -1502,9 +1502,7 @@ pub(crate) fn lower_var_decl_with_destructuring( // broke this way because a local `zipWith` (L1180) precedes it. let id = ctx.lookup_local(&name).unwrap(); // Update the type now that we have full inference - if let Some((_, _, existing_ty)) = - ctx.locals.iter_mut().rev().find(|(n, _, _)| n == &name) - { + if let Some(existing_ty) = ctx.locals.lookup_type_mut(&name) { *existing_ty = ty.clone(); } id @@ -1523,7 +1521,7 @@ pub(crate) fn lower_var_decl_with_destructuring( *existing_ty = ty.clone(); } fid - } else if let Some(id) = is_var_decl + } else if let Some((reuse_pos, id)) = is_var_decl .then(|| { // Issue #838 followup (b): when the closure-body hoist // in `lower_fn_expr` / `lower_arrow` pre-registered this @@ -1542,18 +1540,15 @@ pub(crate) fn lower_var_decl_with_destructuring( // lexical binding, and using `lookup_local(name)` here // would accidentally grab a shadowing catch parameter. ctx.locals - .iter() - .rev() - .find(|(n, lid, _)| n == &name && ctx.var_hoisted_ids.contains(lid)) - .map(|(_, lid, _)| *lid) + .iter_named(&name) + .find(|(_, (_, lid, _))| ctx.var_hoisted_ids.contains(lid)) + .map(|(pos, (_, lid, _))| (pos, *lid)) }) .flatten() { - if let Some((_, _, existing_ty)) = - ctx.locals.iter_mut().rev().find(|(_, lid, _)| *lid == id) - { - *existing_ty = ty.clone(); - } + // Patch the reused binding's type in place (O(1) by position) + // rather than re-finding it with an O(n) scan (#5267). + *ctx.locals.type_mut_at(reuse_pos) = ty.clone(); id } else { ctx.define_local(name.clone(), ty.clone()) @@ -1972,9 +1967,7 @@ pub(crate) fn lower_var_decl_with_destructuring( ctx.pre_registered_module_var_decls.remove(&name); // #1758: module-scope only — see the sibling guard above. let id = ctx.lookup_local(&name).unwrap(); - if let Some((_, _, existing_ty)) = - ctx.locals.iter_mut().rev().find(|(n, _, _)| n == &name) - { + if let Some(existing_ty) = ctx.locals.lookup_type_mut(&name) { *existing_ty = ty.clone(); } id diff --git a/crates/perry-hir/src/lower/context.rs b/crates/perry-hir/src/lower/context.rs index a42cf55feb..83bab8f500 100644 --- a/crates/perry-hir/src/lower/context.rs +++ b/crates/perry-hir/src/lower/context.rs @@ -48,7 +48,7 @@ impl LoweringContext { next_type_alias_id: 0, tagged_template_site_salt, next_tagged_template_site_id: 0, - locals: Vec::new(), + locals: crate::lower::Locals::new(), globals: Vec::new(), functions: Vec::new(), func_defaults: Vec::new(), @@ -705,15 +705,11 @@ impl LoweringContext { } pub(crate) fn lookup_local(&self, name: &str) -> Option { - self.locals - .iter() - .rev() - .find(|(n, _, _)| n == name) - .map(|(_, id, _)| *id) + self.locals.lookup(name) } fn lookup_local_index(&self, name: &str) -> Option { - self.locals.iter().rposition(|(n, _, _)| n == name) + self.locals.lookup_index(name) } /// #5216: drop the most-recently-bound local named `name` (if any), e.g. a @@ -762,11 +758,7 @@ impl LoweringContext { } pub(crate) fn lookup_local_type(&self, name: &str) -> Option<&Type> { - self.locals - .iter() - .rev() - .find(|(n, _, _)| n == name) - .map(|(_, _, ty)| ty) + self.locals.lookup_type(name) } pub(crate) fn lookup_func(&self, name: &str) -> Option { @@ -1242,7 +1234,7 @@ impl LoweringContext { self.scope_local_marks.pop(); if self.locals.len() > mark.0 { let mut kept: Vec<(String, LocalId, Type)> = Vec::new(); - for entry in self.locals.drain(mark.0..) { + for entry in self.locals.drain_from(mark.0) { if self.sloppy_implicit_global_ids.contains(&entry.1) { kept.push(entry); } @@ -1299,7 +1291,7 @@ impl LoweringContext { // module-scoped bindings too — keep them visible after the block. if self.locals.len() > locals_mark { let mut kept: Vec<(String, LocalId, Type)> = Vec::new(); - for entry in self.locals.drain(locals_mark..) { + for entry in self.locals.drain_from(locals_mark) { if self.var_hoisted_ids.contains(&entry.1) || self.sloppy_implicit_global_ids.contains(&entry.1) { diff --git a/crates/perry-hir/src/lower/expr_function.rs b/crates/perry-hir/src/lower/expr_function.rs index 2a9262eace..592d7908ad 100644 --- a/crates/perry-hir/src/lower/expr_function.rs +++ b/crates/perry-hir/src/lower/expr_function.rs @@ -742,10 +742,8 @@ fn lower_fn_expr_anon(ctx: &mut LoweringContext, fn_expr: &ast::FnExpr) -> Resul let name = ident.id.sym.to_string(); let already_in_scope = ctx .locals - .iter() - .enumerate() - .rev() - .any(|(idx, (n, _, _))| n == &name && idx >= outer_locals_len); + .lookup_index_in_scope(&name, outer_locals_len) + .is_some(); if !already_in_scope { let id = ctx.define_local(name, Type::Any); // Mark as hoisted so closures created @@ -786,11 +784,8 @@ fn lower_fn_expr_anon(ctx: &mut LoweringContext, fn_expr: &ast::FnExpr) -> Resul let name = fn_decl.ident.sym.to_string(); let existing_in_scope = ctx .locals - .iter() - .enumerate() - .rev() - .find(|(idx, (n, _, _))| n == &name && *idx >= outer_locals_len) - .map(|(_, (_, id, _))| *id); + .lookup_index_in_scope(&name, outer_locals_len) + .map(|pos| ctx.locals[pos].1); let local_id = if let Some(existing) = existing_in_scope { existing } else { @@ -831,10 +826,10 @@ fn lower_fn_expr_anon(ctx: &mut LoweringContext, fn_expr: &ast::FnExpr) -> Resul for decl in &var_decl.decls { if let ast::Pat::Ident(ident) = &decl.name { let name = ident.id.sym.to_string(); - let already_in_scope = - ctx.locals.iter().enumerate().rev().any(|(idx, (n, _, _))| { - n == &name && idx >= outer_locals_len - }); + let already_in_scope = ctx + .locals + .lookup_index_in_scope(&name, outer_locals_len) + .is_some(); if !already_in_scope { let id = ctx.define_local(name, Type::Any); // Boxed-capture semantics: a closure @@ -878,10 +873,8 @@ fn lower_fn_expr_anon(ctx: &mut LoweringContext, fn_expr: &ast::FnExpr) -> Resul for name in names { let already_in_scope = ctx .locals - .iter() - .enumerate() - .rev() - .any(|(idx, (n, _, _))| n == &name && idx >= outer_locals_len); + .lookup_index_in_scope(&name, outer_locals_len) + .is_some(); if !already_in_scope { let id = ctx.define_local(name.clone(), Type::Any); ctx.var_hoisted_ids.insert(id); diff --git a/crates/perry-hir/src/lower/locals.rs b/crates/perry-hir/src/lower/locals.rs new file mode 100644 index 0000000000..1912424f4c --- /dev/null +++ b/crates/perry-hir/src/lower/locals.rs @@ -0,0 +1,175 @@ +//! Scope-local bindings with an O(1) name→position index (#5267). +//! +//! The lowerer resolves every identifier reference by finding the +//! innermost (most-recently-pushed) binding with a given name. The old +//! representation was a bare `Vec<(String, LocalId, Type)>` scanned with a +//! reverse `find`/`rposition` per reference, so a scope holding N bindings +//! with N references lowered in **O(n²)** time. Real minified/bundled JS +//! puts tens of thousands of bindings and references in one module (or one +//! wrapper-function) scope, so `check-lower` stalled for minutes and got +//! killed with no diagnostic on large bundles. +//! +//! `Locals` keeps the same ordered stack (`entries`, the authoritative +//! source of truth) plus a side `index` mapping each name to the ascending +//! list of positions in `entries` that currently hold that name. The *last* +//! position in a name's list is the innermost binding, so `lookup`, +//! `lookup_index`, and `lookup_type` are O(1). +//! +//! Every operation that moves a binding's position (`push`, `drain_from`, +//! `extend`, `remove`) goes through an inherent method here that keeps +//! `index` in sync. Read-only and in-place ops (`iter`, `iter_mut`, +//! slicing, `len`) reach the underlying slice via `Deref`/`DerefMut` — +//! those never move a binding or rename it, so the index stays valid. Note +//! `DerefMut` hands out `&mut [..]` (a slice), **not** `&mut Vec`, so +//! callers cannot bypass the index with `Vec::push`/`remove`/`truncate`. + +use std::ops::{Deref, DerefMut}; + +use perry_types::{LocalId, Type}; +use std::collections::HashMap; + +/// Ordered stack of scope-local bindings with a name→positions index. +#[derive(Debug, Clone, Default)] +pub(crate) struct Locals { + /// Authoritative ordered list of `(name, id, type)` bindings. + entries: Vec<(String, LocalId, Type)>, + /// name -> ascending positions into `entries`. The last entry of each + /// list is the innermost (most-recent) binding for that name. + index: HashMap>, +} + +impl Locals { + pub(crate) fn new() -> Self { + Self::default() + } + + /// Push a new innermost binding. O(1) amortized. + pub(crate) fn push(&mut self, entry: (String, LocalId, Type)) { + let pos = self.entries.len(); + self.index.entry(entry.0.clone()).or_default().push(pos); + self.entries.push(entry); + } + + /// Position of the innermost binding named `name`, if any. O(1). + /// Equivalent to the old `iter().rposition(|(n, ..)| n == name)`. + pub(crate) fn lookup_index(&self, name: &str) -> Option { + self.index.get(name).and_then(|v| v.last()).copied() + } + + /// `LocalId` of the innermost binding named `name`, if any. O(1). + pub(crate) fn lookup(&self, name: &str) -> Option { + self.lookup_index(name).map(|i| self.entries[i].1) + } + + /// `Type` of the innermost binding named `name`, if any. O(1). + pub(crate) fn lookup_type(&self, name: &str) -> Option<&Type> { + self.lookup_index(name).map(|i| &self.entries[i].2) + } + + /// Mutable `Type` of the innermost binding named `name`, if any. O(1). + /// Replaces the old `iter_mut().rev().find(|(n, ..)| n == name)` type + /// patch-ups that were O(n) per declaration (#5267). + pub(crate) fn lookup_type_mut(&mut self, name: &str) -> Option<&mut Type> { + let idx = self.lookup_index(name)?; + Some(&mut self.entries[idx].2) + } + + /// Position of the innermost binding named `name` whose position is at + /// or past `min_pos` (i.e. introduced in the current scope), if any. + /// O(1): the innermost binding for a name has the maximal position, so + /// one exists at `>= min_pos` iff that maximum is `>= min_pos`. Replaces + /// the `iter().enumerate().rev().any(|(idx, (n, ..))| n == name && idx >= + /// min_pos)` scans that were O(n) per declaration (#5267). + pub(crate) fn lookup_index_in_scope(&self, name: &str, min_pos: usize) -> Option { + self.lookup_index(name).filter(|&pos| pos >= min_pos) + } + + /// Iterate the bindings named `name` from innermost to outermost + /// (descending position), yielding `(position, &binding)`. O(number of + /// bindings sharing `name`) — a single step for the common case of a + /// unique name — instead of the old O(n) reverse scan of the entire + /// stack (#5267). Used by the `var`-redeclaration reuse checks, which + /// need the innermost binding matching an extra predicate (e.g. + /// var-hoisted) plus its position for an O(1) type patch-up afterwards. + pub(crate) fn iter_named<'a>( + &'a self, + name: &'a str, + ) -> impl Iterator + 'a { + self.index + .get(name) + .into_iter() + .flat_map(move |positions| positions.iter().rev().map(move |&p| (p, &self.entries[p]))) + } + + /// Mutable `Type` of the binding at `pos`. Pairs with `iter_named` / + /// `lookup_index*` to patch a binding's inferred type without a second + /// O(n) scan to re-find it (#5267). + pub(crate) fn type_mut_at(&mut self, pos: usize) -> &mut Type { + &mut self.entries[pos].2 + } + + /// Drain every binding at position `>= mark` (a scope/block pop), + /// returning the drained entries in ascending-position order. The caller + /// may filter and `extend` survivors back (var-hoisted / sloppy globals). + pub(crate) fn drain_from(&mut self, mark: usize) -> Vec<(String, LocalId, Type)> { + if mark >= self.entries.len() { + return Vec::new(); + } + let drained: Vec<(String, LocalId, Type)> = self.entries.drain(mark..).collect(); + // Each drained entry sat at a position `>= mark`, and for any single + // name those positions are the highest in its index list. Walking the + // drained entries in reverse (descending positions) lets us pop them + // off the back one-for-one, leaving any `< mark` positions intact. + for (name, _, _) in drained.iter().rev() { + let now_empty = match self.index.get_mut(name) { + Some(positions) => { + positions.pop(); + positions.is_empty() + } + None => false, + }; + if now_empty { + self.index.remove(name); + } + } + drained + } + + /// Re-append previously-drained survivors, assigning fresh (compacted) + /// positions. Mirrors `Vec::extend`; keeps the index in sync. + pub(crate) fn extend>(&mut self, iter: I) { + for entry in iter { + self.push(entry); + } + } + + /// Remove the binding at `idx`, shifting later bindings down by one. + /// Rare (native-require dedup, #5216), so a full O(n) reindex is fine. + pub(crate) fn remove(&mut self, idx: usize) -> (String, LocalId, Type) { + let removed = self.entries.remove(idx); + self.reindex(); + removed + } + + /// Rebuild the whole name→positions index from `entries`. + fn reindex(&mut self) { + self.index.clear(); + for (pos, (name, _, _)) in self.entries.iter().enumerate() { + self.index.entry(name.clone()).or_default().push(pos); + } + } +} + +impl Deref for Locals { + type Target = [(String, LocalId, Type)]; + + fn deref(&self) -> &Self::Target { + &self.entries + } +} + +impl DerefMut for Locals { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.entries + } +} diff --git a/crates/perry-hir/src/lower/lowering_context.rs b/crates/perry-hir/src/lower/lowering_context.rs index 169b8ed8ae..94fefa452a 100644 --- a/crates/perry-hir/src/lower/lowering_context.rs +++ b/crates/perry-hir/src/lower/lowering_context.rs @@ -67,8 +67,9 @@ pub struct LoweringContext { pub(crate) tagged_template_site_salt: u64, /// Counter for generating unique tagged-template call-site IDs. pub(crate) next_tagged_template_site_id: u32, - /// Current scope's local variables: name -> (id, type) - pub(crate) locals: Vec<(String, LocalId, Type)>, + /// Current scope's local variables: name -> (id, type), with an O(1) + /// name→position index for innermost-binding resolution (#5267). + pub(crate) locals: crate::lower::Locals, /// LocalIds that represent immutable bindings (`const`, imports, and /// other lexical bindings that must throw when assigned). pub(crate) immutable_locals: HashSet, diff --git a/crates/perry-hir/src/lower/mod.rs b/crates/perry-hir/src/lower/mod.rs index ca5bcf0ea2..ed92009467 100644 --- a/crates/perry-hir/src/lower/mod.rs +++ b/crates/perry-hir/src/lower/mod.rs @@ -85,6 +85,9 @@ pub(crate) use lowering_context::{ LoweringContext, PrivKind, PrivMember, PrivateScope, WithEnvFrame, }; +mod locals; +pub(crate) use locals::Locals; + mod typed_parse; pub(crate) use typed_parse::{extract_typed_parse_source_order, resolve_typed_parse_ty}; diff --git a/crates/perry-hir/src/lower/stmt.rs b/crates/perry-hir/src/lower/stmt.rs index 9da2c90f8c..49ff32bf6a 100644 --- a/crates/perry-hir/src/lower/stmt.rs +++ b/crates/perry-hir/src/lower/stmt.rs @@ -69,18 +69,15 @@ fn emit_class_expression_value_binding( } id } else if is_var { - if let Some(id) = ctx + // Bind the lookup result first so the `iter_named` borrow of + // `ctx.locals` ends before the mutable reuse below (#5267). + let existing_var = ctx .locals - .iter() - .rev() - .find(|(name, id, _)| name == bind_name && ctx.var_hoisted_ids.contains(id)) - .map(|(_, id, _)| *id) - { - if let Some((_, _, existing_ty)) = - ctx.locals.iter_mut().rev().find(|(_, lid, _)| *lid == id) - { - *existing_ty = ty.clone(); - } + .iter_named(bind_name) + .find(|(_, (_, id, _))| ctx.var_hoisted_ids.contains(id)) + .map(|(pos, (_, id, _))| (pos, *id)); + if let Some((reuse_pos, id)) = existing_var { + *ctx.locals.type_mut_at(reuse_pos) = ty.clone(); id } else { ctx.define_local(bind_name.to_string(), ty.clone()) diff --git a/crates/perry-hir/src/lower_decl/block.rs b/crates/perry-hir/src/lower_decl/block.rs index c8afe0a7f9..f958ee77ac 100644 --- a/crates/perry-hir/src/lower_decl/block.rs +++ b/crates/perry-hir/src/lower_decl/block.rs @@ -72,10 +72,8 @@ pub(crate) fn pre_register_forward_captured_lets( } let already_in_scope = ctx .locals - .iter() - .enumerate() - .rev() - .any(|(idx, (n, _, _))| n == &name && idx >= body_entry_locals_len); + .lookup_index_in_scope(&name, body_entry_locals_len) + .is_some(); if !already_in_scope { let id = ctx.define_local(name, Type::Any); ctx.var_hoisted_ids.insert(id); @@ -558,11 +556,13 @@ fn predefine_var_bindings_in_function_body( let mut created = Vec::new(); let scope_start = ctx.scope_local_marks.last().copied().unwrap_or(0); for name in names { - let existing_current_scope = ctx.locals[scope_start..] - .iter() - .rev() - .find(|(n, _, _)| n == &name) - .map(|(_, id, _)| *id); + // O(1) innermost-in-scope lookup instead of an O(n) reverse scan of + // `locals[scope_start..]` per var name — the per-binding scan made a + // function body with N `var`s lower in O(n²) (#5267). + let existing_current_scope = ctx + .locals + .lookup_index_in_scope(&name, scope_start) + .map(|pos| ctx.locals[pos].1); let local_id = existing_current_scope.unwrap_or_else(|| { let id = ctx.define_local(name.clone(), Type::Any); created.push((name, id));