boa-dev · mrhapile · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026 · Copilot
@@ -1136,6 +1136,24 @@ impl RegExp {
         // 9. If flags contains "u" or flags contains "v", let fullUnicode be true; else let fullUnicode be false.
         let full_unicode = flags.contains(b'u') || flags.contains(b'v');
 
+        // When the /u or /v flag is active, the input string is modeled as a sequence
+        // of Unicode code points (§22.2.2). Since `last_index` is a UTF-16 code unit
+        // index, it may point to the trailing half of a surrogate pair, which is not
+        // a valid code point boundary. In that case, we adjust the matcher start
+        // position to the preceding lead surrogate so matching begins at a valid
+        // code point boundary.
+        // Ref: https://tc39.es/ecma262/#sec-pattern-semantics
+        let mut start_index = last_index;
+        if full_unicode
+            && start_index > 0
+            && let Some(cu) = input.code_unit_at(start_index as usize)
+            && (0xDC00..=0xDFFF).contains(&cu)
+            && let Some(prev_cu) = input.code_unit_at(start_index as usize - 1)
-            && let Some(cu) = input.code_unit_at(start_index as usize)
-            && (0xDC00..=0xDFFF).contains(&cu)
-            && let Some(prev_cu) = input.code_unit_at(start_index as usize - 1)
+            && start_index <= length
+            && let Ok(start_index_usize) = usize::try_from(start_index)
+            && let Some(cu) = input.code_unit_at(start_index_usize)
+            && (0xDC00..=0xDFFF).contains(&cu)
+            && let Some(prev_cu) = input.code_unit_at(start_index_usize - 1)
-            && let Some(cu) = input.code_unit_at(start_index as usize)
-            && (0xDC00..=0xDFFF).contains(&cu)
-            && let Some(prev_cu) = input.code_unit_at(start_index as usize - 1)
+            && start_index <= length
+            && let Ok(start_index_usize) = usize::try_from(start_index)
+            && let Some(cu) = input.code_unit_at(start_index_usize)
+            && (0xDC00..=0xDFFF).contains(&cu)
+            && let Some(prev_cu) = input.code_unit_at(start_index_usize - 1)
+            && (0xD800..=0xDBFF).contains(&prev_cu)
+        {
+            start_index -= 1;
+        }
+
         // NOTE: The following steps are take care of by regress:
         //
         // SKIP: 10. Let matchSucceeded be false.
@@ -1163,13 +1181,13 @@ impl RegExp {
                 let input = input.to_vec();
 
                 // NOTE: We can use the faster ucs2 variant since there will never be two byte unicode.
-                matcher.find_from_ucs2(&input, last_index as usize).next()
+                matcher.find_from_ucs2(&input, start_index as usize).next()
             }
             (true, JsStrVariant::Utf16(input)) => {
-                matcher.find_from_utf16(input, last_index as usize).next()
+                matcher.find_from_utf16(input, start_index as usize).next()
             }
             (false, JsStrVariant::Utf16(input)) => {
-                matcher.find_from_ucs2(input, last_index as usize).next()
+                matcher.find_from_ucs2(input, start_index as usize).next()
             }
         };
 

@@ -262,3 +262,51 @@ fn regexp_no_panic_on_empty_class_quantifier() {
     // It should return null without panicking.
     run_test_actions([TestAction::assert_eq("/[]*1/u.exec()", JsValue::null())]);
 }
+
+#[test]
+fn regexp_exec_coercion_order() {
+    // ECMAScript §21.2.5.2.1 — RegExpExec
+    // Ensures ToString(input) happens before accessing lastIndex
+    run_test_actions([TestAction::assert_eq(
+        indoc! {r#"
+                let log = [];
+                let re = /a/g;
+
+                re.lastIndex = {
+                  valueOf() { log.push("lastIndex"); return 0; }
+                };
+
+                let str = {
+                  toString() { log.push("string"); return "a"; }
+                };
+
+                re.exec(str);
+                log.join(",");
+            "#},
+        js_str!("string,lastIndex"),
+    )]);
+}
+
+#[test]
+fn regexp_unicode_lastindex_surrogate_boundary() {
+    run_test_actions([TestAction::assert_eq(
+        indoc! {r#"
+                let re = /./gu;
+                re.lastIndex = 1;
+                re.exec("💩")[0];
+            "#},
+        js_str!("💩"),
+    )]);
+}
+
+#[test]
+fn regexp_unicode_lastindex_no_adjustment() {
+    run_test_actions([TestAction::assert_eq(
+        indoc! {r#"
+                let re = /./gu;
+                re.lastIndex = 0;
+                re.exec("💩")[0];
+            "#},
+        js_str!("💩"),
+    )]);
+}