From e7f5610ed99ed7d3e976de9d072238ae00db3ce2 Mon Sep 17 00:00:00 2001
From: mrhapile <allinonegaming3456@gmail.com>
Date: Sun, 5 Apr 2026 17:19:52 +0530
Subject: [PATCH 1/3] fix(regexp): align lastIndex with code point boundary
 under /u

Signed-off-by: mrhapile <allinonegaming3456@gmail.com>
---
 core/engine/src/builtins/regexp/mod.rs   | 26 ++++++++++--
 core/engine/src/builtins/regexp/tests.rs | 54 ++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/core/engine/src/builtins/regexp/mod.rs b/core/engine/src/builtins/regexp/mod.rs
index 037eed6eb19..38ce09ae0ef 100644
--- a/core/engine/src/builtins/regexp/mod.rs
+++ b/core/engine/src/builtins/regexp/mod.rs
@@ -1136,6 +1136,26 @@ impl RegExp {
         // 9. If flags contains "u" or flags contains "v", let fullUnicode be true; else let fullUnicode be false.
         let full_unicode = flags.contains(b'u') || flags.contains(b'v');
 
+        // When the /u or /v flag is active, the input string is modeled as a sequence
+        // of Unicode code points (§22.2.2). Since `last_index` is a UTF-16 code unit
+        // index, it may point to the trailing half of a surrogate pair, which is not
+        // a valid code point boundary. In that case, we adjust the matcher start
+        // position to the preceding lead surrogate so matching begins at a valid
+        // code point boundary.
+        // Ref: https://tc39.es/ecma262/#sec-pattern-semantics
+        let mut start_index = last_index;
+        if full_unicode && start_index > 0 {
+            if let Some(cu) = input.code_unit_at(start_index as usize) {
+                if (0xDC00..=0xDFFF).contains(&cu) {
+                    if let Some(prev_cu) = input.code_unit_at(start_index as usize - 1) {
+                        if (0xD800..=0xDBFF).contains(&prev_cu) {
+                            start_index -= 1;
+                        }
+                    }
+                }
+            }
+        }
+
         // NOTE: The following steps are take care of by regress:
         //
         // SKIP: 10. Let matchSucceeded be false.
@@ -1163,13 +1183,13 @@ impl RegExp {
                 let input = input.to_vec();
 
                 // NOTE: We can use the faster ucs2 variant since there will never be two byte unicode.
-                matcher.find_from_ucs2(&input, last_index as usize).next()
+                matcher.find_from_ucs2(&input, start_index as usize).next()
             }
             (true, JsStrVariant::Utf16(input)) => {
-                matcher.find_from_utf16(input, last_index as usize).next()
+                matcher.find_from_utf16(input, start_index as usize).next()
             }
             (false, JsStrVariant::Utf16(input)) => {
-                matcher.find_from_ucs2(input, last_index as usize).next()
+                matcher.find_from_ucs2(input, start_index as usize).next()
             }
         };
 
diff --git a/core/engine/src/builtins/regexp/tests.rs b/core/engine/src/builtins/regexp/tests.rs
index 0897b682720..c89a225098d 100644
--- a/core/engine/src/builtins/regexp/tests.rs
+++ b/core/engine/src/builtins/regexp/tests.rs
@@ -262,3 +262,57 @@ fn regexp_no_panic_on_empty_class_quantifier() {
     // It should return null without panicking.
     run_test_actions([TestAction::assert_eq("/[]*1/u.exec()", JsValue::null())]);
 }
+
+#[test]
+fn regexp_exec_coercion_order() {
+    // ECMAScript §21.2.5.2.1 — RegExpExec
+    // Ensures ToString(input) happens before accessing lastIndex
+    run_test_actions([
+        TestAction::assert_eq(
+            indoc! {r#"
+                let log = [];
+                let re = /a/g;
+
+                re.lastIndex = {
+                  valueOf() { log.push("lastIndex"); return 0; }
+                };
+
+                let str = {
+                  toString() { log.push("string"); return "a"; }
+                };
+
+                re.exec(str);
+                log.join(",");
+            "#},
+            js_str!("string,lastIndex"),
+        ),
+    ]);
+}
+
+#[test]
+fn regexp_unicode_lastindex_surrogate_boundary() {
+    run_test_actions([
+        TestAction::assert_eq(
+            indoc! {r#"
+                let re = /./gu;
+                re.lastIndex = 1;
+                re.exec("💩")[0];
+            "#},
+            js_str!("💩"),
+        ),
+    ]);
+}
+
+#[test]
+fn regexp_unicode_lastindex_no_adjustment() {
+    run_test_actions([
+        TestAction::assert_eq(
+            indoc! {r#"
+                let re = /./gu;
+                re.lastIndex = 0;
+                re.exec("💩")[0];
+            "#},
+            js_str!("💩"),
+        ),
+    ]);
+}

From bc1bb77c176a4ecb39c91fdf769fca125631c096 Mon Sep 17 00:00:00 2001
From: mrhapile <allinonegaming3456@gmail.com>
Date: Sun, 5 Apr 2026 17:21:32 +0530
Subject: [PATCH 2/3] style: apply rustfmt

Signed-off-by: mrhapile <allinonegaming3456@gmail.com>
---
 core/engine/src/builtins/regexp/tests.rs | 30 ++++++++++--------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/core/engine/src/builtins/regexp/tests.rs b/core/engine/src/builtins/regexp/tests.rs
index c89a225098d..1a6ede5db36 100644
--- a/core/engine/src/builtins/regexp/tests.rs
+++ b/core/engine/src/builtins/regexp/tests.rs
@@ -267,9 +267,8 @@ fn regexp_no_panic_on_empty_class_quantifier() {
 fn regexp_exec_coercion_order() {
     // ECMAScript §21.2.5.2.1 — RegExpExec
     // Ensures ToString(input) happens before accessing lastIndex
-    run_test_actions([
-        TestAction::assert_eq(
-            indoc! {r#"
+    run_test_actions([TestAction::assert_eq(
+        indoc! {r#"
                 let log = [];
                 let re = /a/g;
 
@@ -284,35 +283,30 @@ fn regexp_exec_coercion_order() {
                 re.exec(str);
                 log.join(",");
             "#},
-            js_str!("string,lastIndex"),
-        ),
-    ]);
+        js_str!("string,lastIndex"),
+    )]);
 }
 
 #[test]
 fn regexp_unicode_lastindex_surrogate_boundary() {
-    run_test_actions([
-        TestAction::assert_eq(
-            indoc! {r#"
+    run_test_actions([TestAction::assert_eq(
+        indoc! {r#"
                 let re = /./gu;
                 re.lastIndex = 1;
                 re.exec("💩")[0];
             "#},
-            js_str!("💩"),
-        ),
-    ]);
+        js_str!("💩"),
+    )]);
 }
 
 #[test]
 fn regexp_unicode_lastindex_no_adjustment() {
-    run_test_actions([
-        TestAction::assert_eq(
-            indoc! {r#"
+    run_test_actions([TestAction::assert_eq(
+        indoc! {r#"
                 let re = /./gu;
                 re.lastIndex = 0;
                 re.exec("💩")[0];
             "#},
-            js_str!("💩"),
-        ),
-    ]);
+        js_str!("💩"),
+    )]);
 }

From da29940ba354f9f771305484ac14a14f8e111a42 Mon Sep 17 00:00:00 2001
From: mrhapile <allinonegaming3456@gmail.com>
Date: Sun, 5 Apr 2026 17:24:06 +0530
Subject: [PATCH 3/3] fix: resolve clippy collapsible-if lint

---
 core/engine/src/builtins/regexp/mod.rs | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/core/engine/src/builtins/regexp/mod.rs b/core/engine/src/builtins/regexp/mod.rs
index 38ce09ae0ef..58c20803d58 100644
--- a/core/engine/src/builtins/regexp/mod.rs
+++ b/core/engine/src/builtins/regexp/mod.rs
@@ -1144,16 +1144,14 @@ impl RegExp {
         // code point boundary.
         // Ref: https://tc39.es/ecma262/#sec-pattern-semantics
         let mut start_index = last_index;
-        if full_unicode && start_index > 0 {
-            if let Some(cu) = input.code_unit_at(start_index as usize) {
-                if (0xDC00..=0xDFFF).contains(&cu) {
-                    if let Some(prev_cu) = input.code_unit_at(start_index as usize - 1) {
-                        if (0xD800..=0xDBFF).contains(&prev_cu) {
-                            start_index -= 1;
-                        }
-                    }
-                }
-            }
+        if full_unicode
+            && start_index > 0
+            && let Some(cu) = input.code_unit_at(start_index as usize)
+            && (0xDC00..=0xDFFF).contains(&cu)
+            && let Some(prev_cu) = input.code_unit_at(start_index as usize - 1)
+            && (0xD800..=0xDBFF).contains(&prev_cu)
+        {
+            start_index -= 1;
         }
 
         // NOTE: The following steps are take care of by regress: