From 3249453ca4f4a673ce5d80a4025e2a284ad7cb97 Mon Sep 17 00:00:00 2001 From: robobun Date: Wed, 8 Apr 2026 14:37:32 +0000 Subject: [PATCH] [JSC] Reject dangling hyphen in class set under /v flag In UnicodeSets mode (/v), - is a ClassSetSyntaxCharacter per ECMA-262 and is only legal between two ClassSetCharacters as part of a ClassSetRange. A bare or trailing - with no right-hand side (e.g. /[a-]/v, /[\d-]/v, /[\w-]/v, /[a-z\d-]/v) must be rejected. ClassSetParserDelegate previously silently accepted the CachedCharacterHyphen and AfterCharacterClassHyphen states in flushCachedCharacterIfNeeded() and end(), so these patterns parsed without error and matched both operands and - literally. None of V8, SpiderMonkey, or the spec agree. Add an InvalidClassSetCharacter error when either incomplete-range state is hit at a class-set transition point (nested class boundary, set operator, or closing ]). The valid-range path (CachedCharacter -> CachedCharacterHyphen -> completed range) is unaffected because it does not go through flushCachedCharacterIfNeeded or end() while the hyphen is pending. Fixes oven-sh/bun#29003. --- Source/JavaScriptCore/yarr/YarrParser.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/Source/JavaScriptCore/yarr/YarrParser.h b/Source/JavaScriptCore/yarr/YarrParser.h index fea2b052b25d..ca0bc31af313 100644 --- a/Source/JavaScriptCore/yarr/YarrParser.h +++ b/Source/JavaScriptCore/yarr/YarrParser.h @@ -537,6 +537,14 @@ class Parser { if (m_state == ClassSetConstructionState::CachedCharacter) { m_delegate.atomCharacterClassAtom(m_character); m_state = ClassSetConstructionState::Empty; + } else if (m_state == ClassSetConstructionState::CachedCharacterHyphen + || m_state == ClassSetConstructionState::AfterCharacterClassHyphen) { + // A '-' is a ClassSetSyntaxCharacter in /v and is only legal + // between two ClassSetCharacters as part of a ClassSetRange. + // Reaching any other transition (nested class, set operator, + // end of class) while a hyphen is pending means the range + // has no right-hand side — reject it. + m_errorCode = ErrorCode::InvalidClassSetCharacter; } } @@ -757,9 +765,13 @@ class Parser { { if (m_state == ClassSetConstructionState::CachedCharacter) m_delegate.atomCharacterClassAtom(m_character); - else if (m_state == ClassSetConstructionState::CachedCharacterHyphen) { - m_delegate.atomCharacterClassAtom(m_character); - m_delegate.atomCharacterClassAtom('-'); + else if (m_state == ClassSetConstructionState::CachedCharacterHyphen + || m_state == ClassSetConstructionState::AfterCharacterClassHyphen) { + // A trailing '-' in /v mode (e.g. /[a-]/v, /[\d-]/v) leaves a + // ClassSetRange with no right-hand side. '-' is a + // ClassSetSyntaxCharacter that must be escaped unless it + // forms a full range, so this is a syntax error. + m_errorCode = ErrorCode::InvalidClassSetCharacter; } else if (m_state == ClassSetConstructionState::AfterSetOperator) m_errorCode = ErrorCode::InvalidClassSetCharacter;