Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true

# Node 24+ runs the .ts sources directly (native type stripping) — no build, no tsx.
- uses: actions/setup-node@v4
Expand Down Expand Up @@ -44,6 +46,7 @@ jobs:
node test/js-conformance.ts
node test/tsx-conformance.ts
node test/jsx-conformance.ts
node test/redcmd-tm-diagnostics.ts
node test/html-lexer-spike.ts
node test/html-conformance.ts
node test/html-monarch.ts
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "vendor/RedCMD-TmLanguage-Syntax-Highlighter"]
path = vendor/RedCMD-TmLanguage-Syntax-Highlighter
url = https://github.com/RedCMD/TmLanguage-Syntax-Highlighter.git
44 changes: 36 additions & 8 deletions javascript.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
{
"include": "#blockcomment"
},
{
"include": "#regex-literal-prefix-ops"
},
{
"include": "#regex"
},
Expand Down Expand Up @@ -224,6 +227,35 @@
}
],
"repository": {
"regex-literal-prefix-ops": {
"name": "string.regexp.js",
"begin": "(?:(?<=[=|\\^&<>+\\-*%~(,.\\[?:{;])|(?<=\\binstanceof)|(?<=\\bin)|(?<=\\bnew)|(?<=\\bextends)|(?<=\\byield)|(?<=\\bget)|(?<=\\bset)|(?<=\\basync)|(?<=\\belse)|(?<=\\bdo)|(?<=\\breturn)|(?<=\\bthrow)|(?<=\\btry)|(?<=\\bfinally)|(?<=\\bcatch)|(?<=\\bof)|(?<=\\bcase)|(?<=\\bexport)|(?<=\\bdefault)|(?<=\\bimport)|(?<=\\bstatic)|(?<=\\baccessor)|(?<=\\btypeof)|(?<=\\bvoid)|(?<=\\bdelete)|(?<=\\bawait)|(?<=^))s*([!](?:s*[!])*)s*(?:((?:/\\*\\*(?!/)[\\s\\S]*?\\*/|/\\*[\\s\\S]*?\\*/)\\s*))?(/)(?![*/])",
"beginCaptures": {
"1": {
"name": "keyword.operator.logical.prefix.js"
},
"2": {
"name": "comment.block.js"
},
"3": {
"name": "punctuation.definition.string.begin.regexp.js"
}
},
"end": "(/)([gimsuydv]*)",
"endCaptures": {
"1": {
"name": "punctuation.definition.string.end.regexp.js"
},
"2": {
"name": "keyword.other.regexp.js"
}
},
"patterns": [
{
"include": "#regexp"
}
]
},
"regexp": {
"patterns": [
{
Expand Down Expand Up @@ -1922,6 +1954,9 @@
{
"include": "#blockcomment"
},
{
"include": "#regex-literal-prefix-ops"
},
{
"include": "#regex"
},
Expand Down Expand Up @@ -2086,13 +2121,6 @@
}
]
},
"type": {
"patterns": [
{
"include": "#simple-type"
}
]
},
"qstring-double": {
"name": "string.quoted.double.js",
"begin": "\"",
Expand Down Expand Up @@ -2149,7 +2177,7 @@
},
"regex": {
"name": "string.regexp.js",
"begin": "(?:(?<=[=|\\^&<>+\\-*%~(,.\\[?:{;])|(?<=\\binstanceof)|(?<=\\bin)|(?<=\\bnew)|(?<=\\bextends)|(?<=\\byield)|(?<=\\bget)|(?<=\\bset)|(?<=\\basync)|(?<=\\belse)|(?<=\\bdo)|(?<=\\breturn)|(?<=\\bthrow)|(?<=\\btry)|(?<=\\bfinally)|(?<=\\bcatch)|(?<=\\bof)|(?<=\\bcase)|(?<=\\bexport)|(?<=\\bdefault)|(?<=\\bimport)|(?<=\\bstatic)|(?<=\\baccessor)|(?<=\\btypeof)|(?<=\\bvoid)|(?<=\\bdelete)|(?<=\\bawait)|(?<=(?:[=|\\^&<>+\\-*%~(,.\\[?:{;]|\\binstanceof|\\bin|\\bnew|\\bextends|\\byield|\\bget|\\bset|\\basync|\\belse|\\bdo|\\breturn|\\bthrow|\\btry|\\bfinally|\\bcatch|\\bof|\\bcase|\\bexport|\\bdefault|\\bimport|\\bstatic|\\baccessor|\\btypeof|\\bvoid|\\bdelete|\\bawait|^)\\s*[!](?:\\s*[!])*)|(?<=^))\\s*(?:((?:/\\*\\*(?!/)[\\s\\S]*?\\*/|/\\*[\\s\\S]*?\\*/)\\s*))?(/)(?![*/])",
"begin": "(?:(?<=[=|\\^&<>+\\-*%~(,.\\[?:{;])|(?<=\\binstanceof)|(?<=\\bin)|(?<=\\bnew)|(?<=\\bextends)|(?<=\\byield)|(?<=\\bget)|(?<=\\bset)|(?<=\\basync)|(?<=\\belse)|(?<=\\bdo)|(?<=\\breturn)|(?<=\\bthrow)|(?<=\\btry)|(?<=\\bfinally)|(?<=\\bcatch)|(?<=\\bof)|(?<=\\bcase)|(?<=\\bexport)|(?<=\\bdefault)|(?<=\\bimport)|(?<=\\bstatic)|(?<=\\baccessor)|(?<=\\btypeof)|(?<=\\bvoid)|(?<=\\bdelete)|(?<=\\bawait)|(?<=^))\\s*(?:((?:/\\*\\*(?!/)[\\s\\S]*?\\*/|/\\*[\\s\\S]*?\\*/)\\s*))?(/)(?![*/])",
"beginCaptures": {
"1": {
"name": "comment.block.js"
Expand Down
44 changes: 36 additions & 8 deletions javascriptreact.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
{
"include": "#blockcomment"
},
{
"include": "#regex-literal-prefix-ops"
},
{
"include": "#regex"
},
Expand Down Expand Up @@ -703,6 +706,35 @@
}
]
},
"regex-literal-prefix-ops": {
"name": "string.regexp.js.jsx",
"begin": "(?:(?<=[=|\\^&<>+\\-*%~(,.\\[?:{;])|(?<=\\binstanceof)|(?<=\\bin)|(?<=\\bnew)|(?<=\\bextends)|(?<=\\byield)|(?<=\\bget)|(?<=\\bset)|(?<=\\basync)|(?<=\\belse)|(?<=\\bdo)|(?<=\\breturn)|(?<=\\bthrow)|(?<=\\btry)|(?<=\\bfinally)|(?<=\\bcatch)|(?<=\\bof)|(?<=\\bcase)|(?<=\\bexport)|(?<=\\bdefault)|(?<=\\bimport)|(?<=\\bstatic)|(?<=\\baccessor)|(?<=\\btypeof)|(?<=\\bvoid)|(?<=\\bdelete)|(?<=\\bawait)|(?<=^))s*([!](?:s*[!])*)s*(?:((?:/\\*\\*(?!/)[\\s\\S]*?\\*/|/\\*[\\s\\S]*?\\*/)\\s*))?(/)(?![*/])",
"beginCaptures": {
"1": {
"name": "keyword.operator.logical.prefix.js.jsx"
},
"2": {
"name": "comment.block.js.jsx"
},
"3": {
"name": "punctuation.definition.string.begin.regexp.js.jsx"
}
},
"end": "(/)([gimsuydv]*)",
"endCaptures": {
"1": {
"name": "punctuation.definition.string.end.regexp.js.jsx"
},
"2": {
"name": "keyword.other.regexp.js.jsx"
}
},
"patterns": [
{
"include": "#regexp"
}
]
},
"regexp": {
"patterns": [
{
Expand Down Expand Up @@ -2410,6 +2442,9 @@
{
"include": "#blockcomment"
},
{
"include": "#regex-literal-prefix-ops"
},
{
"include": "#regex"
},
Expand Down Expand Up @@ -2574,13 +2609,6 @@
}
]
},
"type": {
"patterns": [
{
"include": "#simple-type"
}
]
},
"qstring-double": {
"name": "string.quoted.double.js.jsx",
"begin": "\"",
Expand Down Expand Up @@ -2637,7 +2665,7 @@
},
"regex": {
"name": "string.regexp.js.jsx",
"begin": "(?:(?<=[=|\\^&<>+\\-*%~(,.\\[?:{;])|(?<=\\binstanceof)|(?<=\\bin)|(?<=\\bnew)|(?<=\\bextends)|(?<=\\byield)|(?<=\\bget)|(?<=\\bset)|(?<=\\basync)|(?<=\\belse)|(?<=\\bdo)|(?<=\\breturn)|(?<=\\bthrow)|(?<=\\btry)|(?<=\\bfinally)|(?<=\\bcatch)|(?<=\\bof)|(?<=\\bcase)|(?<=\\bexport)|(?<=\\bdefault)|(?<=\\bimport)|(?<=\\bstatic)|(?<=\\baccessor)|(?<=\\btypeof)|(?<=\\bvoid)|(?<=\\bdelete)|(?<=\\bawait)|(?<=(?:[=|\\^&<>+\\-*%~(,.\\[?:{;]|\\binstanceof|\\bin|\\bnew|\\bextends|\\byield|\\bget|\\bset|\\basync|\\belse|\\bdo|\\breturn|\\bthrow|\\btry|\\bfinally|\\bcatch|\\bof|\\bcase|\\bexport|\\bdefault|\\bimport|\\bstatic|\\baccessor|\\btypeof|\\bvoid|\\bdelete|\\bawait|^)\\s*[!](?:\\s*[!])*)|(?<=^))\\s*(?:((?:/\\*\\*(?!/)[\\s\\S]*?\\*/|/\\*[\\s\\S]*?\\*/)\\s*))?(/)(?![*/])",
"begin": "(?:(?<=[=|\\^&<>+\\-*%~(,.\\[?:{;])|(?<=\\binstanceof)|(?<=\\bin)|(?<=\\bnew)|(?<=\\bextends)|(?<=\\byield)|(?<=\\bget)|(?<=\\bset)|(?<=\\basync)|(?<=\\belse)|(?<=\\bdo)|(?<=\\breturn)|(?<=\\bthrow)|(?<=\\btry)|(?<=\\bfinally)|(?<=\\bcatch)|(?<=\\bof)|(?<=\\bcase)|(?<=\\bexport)|(?<=\\bdefault)|(?<=\\bimport)|(?<=\\bstatic)|(?<=\\baccessor)|(?<=\\btypeof)|(?<=\\bvoid)|(?<=\\bdelete)|(?<=\\bawait)|(?<=^))\\s*(?:((?:/\\*\\*(?!/)[\\s\\S]*?\\*/|/\\*[\\s\\S]*?\\*/)\\s*))?(/)(?![*/])",
"beginCaptures": {
"1": {
"name": "comment.block.js.jsx"
Expand Down
8 changes: 8 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"conformance:tsx": "node test/tsx-conformance.ts",
"conformance:jsx": "node test/jsx-conformance.ts",
"conformance:html": "node test/html-conformance.ts",
"test:tm-diagnostics": "node test/redcmd-tm-diagnostics.ts",
"spike:html-lexer": "node test/html-lexer-spike.ts",
"bench:html-official": "node test/html-bench.ts",
"bench:html-embed": "node test/html-embed-js.ts",
Expand Down Expand Up @@ -49,6 +50,7 @@
"parse5": "^8.0.1",
"tree-sitter-cli": "^0.26.9",
"typescript": "^5.6.0",
"vscode-onigmo": "^2.0.1",
"vscode-oniguruma": "^2.0.1",
"vscode-textmate": "^9.3.2",
"vscode-tmlanguage-snapshot": "^1.0.1",
Expand Down
74 changes: 45 additions & 29 deletions src/gen-tm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,14 @@ function buildOperandStartClass(grammar: CstGrammar, identToken: TokenDecl | und
return `[[:alpha:][:digit:]${cls}]`;
}

function notAfterValueWithOptionalWhitespace(valueCharClass: string, maxWhitespace = 16): string {
const assertions: string[] = [];
for (let spaces = 0; spaces <= maxWhitespace; spaces++) {
assertions.push(`(?<![${valueCharClass}]${'\\s'.repeat(spaces)})`);
}
return assertions.join('');
}

// ── JSX detection ──

interface JsxInfo {
Expand Down Expand Up @@ -992,6 +1000,7 @@ interface JsxDisambigDelims {
topTypeParam: string; // "is a type-param list" body: top-level comma OR constraint keyword
balancedAngles: string; // recursive balanced `<…>` named group `(?<B>…)`
arrowParamShape: string; // the arrow-shaped `(` confirm after `>`
close: string; // the generic close delimiter (`>` for TS/TSX)
// Lookbehind body asserting the `>` just left closes a type-param LIST that carried a
// top-level comma or constraint keyword (`<T,>`, `<T extends X>`) — i.e. the SAME
// generic-arrow disambiguation signal as `topTypeParam`, but for matching a `(` that
Expand Down Expand Up @@ -1126,7 +1135,7 @@ function jsxDisambigDelims(grammar: CstGrammar, identRegex: string, separator: s
? `|${skip}\\b(?:${constraintKeywords.map(escapeRegex).join('|')})\\b`
: '';
const typeParamCloseBehind = `${escapeRegex(open)}(?:${topComma}${behindKw})${skip}${escapeRegex(close)}`;
return { topComma, topTypeParam, balancedAngles, arrowParamShape, typeParamCloseBehind };
return { topComma, topTypeParam, balancedAngles, arrowParamShape, close, typeParamCloseBehind };
}

/**
Expand Down Expand Up @@ -1916,12 +1925,12 @@ function generateTypeCastPattern(
const tpEnd = `punctuation.definition.typeparameters.end.${langName}`;
// `<` only at expression-start. A prefix cast's `<` is never preceded by a value
// OPERAND; a comparison's `<` always is (`a < b`). Reject the cast when `<` is
// preceded — across any whitespace — by an operand-ending char: an identifier
// preceded — across bounded whitespace — by an operand-ending char: an identifier
// char, `)`, `]`, a numeric/quote tail. This keeps `a < b > c`, `f() < g`,
// `x] < y` as comparisons (variable-length lookbehind; Oniguruma supports it).
// `x] < y` as comparisons while staying compatible with TextMate 2.0 Onigmo.
// Casts after a keyword that ends in a letter (`return <T>x`) stay a comparison
// here — rare, and never a regression (they were unhighlighted before too).
const notAfter = `(?<![\\w$)\\]]\\s*)`;
const notAfter = notAfterValueWithOptionalWhitespace('\\w$)\\]');
// Type-shaped, balanced-angle inner content (kept to type characters so an
// ordinary `a < b > c` comparison — whose operands are arbitrary expressions —
// is not swallowed). `\g<TC>` recurses for nested generics like `<Array<T>>`.
Expand Down Expand Up @@ -2445,26 +2454,7 @@ function generateRegexLiteralPatterns(
// Also match at start of line
const startOfLine = '(?<=^)';

// Ambiguous postfix/prefix op chars (TS `!`): a `/` may follow one ONLY when the op-run is
// the PREFIX form — i.e. the run is itself in a regex-start position (`= !/re/`, `!!/re/`,
// `return !/x/`), NOT the postfix non-null form (`x! / y` → division). We can't decide that
// from the single char before `/` (it's the op either way), so look back PAST the op-run and
// re-apply the same regex-start test there. The inner context is the SAME char-class +
// keywords + line-start used above, but un-wrapped (it sits inside this lookbehind), and the
// op-run is `[ops](?:\s*[ops])*` (chained `!!` allowed). Because these chars were excluded
// from `charLookbehind`, a postfix op (preceded by a value) matches NONE of the alternatives
// → the `/` falls through to the division operator.
const innerCtx = [
charEsc ? `[${charEsc}]` : null,
...info.preceedingKeywords.map(kw => `\\b${escapeRegex(kw)}`),
'^',
].filter(Boolean).join('|');
const opRun = info.postfixAmbiguousChars.map(escapeRegex).join('');
const postfixBangLookbehind = opRun
? `(?<=(?:${innerCtx})\\s*[${opRun}](?:\\s*[${opRun}])*)`
: '';

const lbAlts = [charLookbehind, keywordLookbehinds, postfixBangLookbehind, startOfLine]
const lbAlts = [charLookbehind, keywordLookbehinds, startOfLine]
.filter(Boolean).join('|');
const fullLookbehind = `(?:${lbAlts})`;

Expand All @@ -2491,6 +2481,31 @@ function generateRegexLiteralPatterns(
};
if (commentBody) beginCaptures['1'] = { name: `comment.block.${langName}` };

// Ambiguous postfix/prefix op chars (TS `!`): a `/` may follow one ONLY when the op-run is
// the PREFIX form (`= !/re/`, `return !!/x/`), not postfix non-null (`x! / y`). TextMate 2.0's
// Onigmo rejects the old variable-length lookbehind that looked past the whole op-run, so this
// separate pattern anchors on the fixed-width expression-start context and consumes the op-run.
const prefixOpClass = info.postfixAmbiguousChars.map(escapeForCharClass).join('');
if (prefixOpClass) {
const prefixSlashGroup = commentBody ? '3' : '2';
const prefixCaptures: Record<string, { name: string }> = {
'1': { name: `keyword.operator.logical.prefix.${langName}` },
[prefixSlashGroup]: { name: `punctuation.definition.string.begin.regexp.${langName}` },
};
if (commentBody) prefixCaptures['2'] = { name: `comment.block.${langName}` };
result['regex-literal-prefix-ops'] = {
name: `string.regexp.${langName}`,
begin: `${fullLookbehind}\s*([${prefixOpClass}](?:\s*[${prefixOpClass}])*)\s*${commentPrefix}(/)${commentExclude}`,
beginCaptures: prefixCaptures,
end: `(/)(${info.flagsPattern})`,
endCaptures: {
'1': { name: `punctuation.definition.string.end.regexp.${langName}` },
'2': { name: `keyword.other.regexp.${langName}` },
},
patterns: [{ include: '#regexp' }],
};
}

result['regex-literal'] = {
name: `string.regexp.${langName}`,
begin: `${fullLookbehind}\\s*${commentPrefix}(/)${commentExclude}`,
Expand Down Expand Up @@ -4382,6 +4397,7 @@ export function generateTmLanguage(grammar: CstGrammar, langName: string): TmGra
for (const [key, pattern] of Object.entries(rlPatterns)) {
repository[key] = pattern;
}
if (rlPatterns['regex-literal-prefix-ops']) topPatterns.push({ include: '#regex-literal-prefix-ops' });
topPatterns.push({ include: '#regex-literal' });
}

Expand Down Expand Up @@ -4937,7 +4953,7 @@ export function generateTmLanguage(grammar: CstGrammar, langName: string): TmGra
// reference it via `{ include: '#type-inner' }`. No shared mutable array;
// later injections rebuild the patterns array non-destructively.
// Type operators are derived from @type rule literals.
const typeInnerPats: (TmPattern | { include: string })[] = [
const typeInnerPats: (TmPattern | { include: string })[] = hasTypeAnnotations ? [
...(repository['generic-type'] ? [{ include: '#generic-type' }] : []),
...(repository['type-object-type'] ? [{ include: '#type-object-type' }] : []),
...(repository['type-paren'] ? [{ include: '#type-paren' }] : []),
Expand All @@ -4947,7 +4963,7 @@ export function generateTmLanguage(grammar: CstGrammar, langName: string): TmGra
// swallowed by the surrounding type region's name.
...literalTypeIncludes,
{ include: '#simple-type' },
];
] : [];
// Union/intersection operators — only if present in @type rules
const typeUnionOps = ['|', '&'].filter(op => typeLiterals.has(op));
if (typeUnionOps.length > 0) {
Expand Down Expand Up @@ -5028,7 +5044,7 @@ export function generateTmLanguage(grammar: CstGrammar, langName: string): TmGra
typeInnerPats.splice(idx === -1 ? typeInnerPats.length : idx, 0, { include: '#type-conditional' });
}

repository['type-inner'] = { patterns: typeInnerPats };
if (hasTypeAnnotations) repository['type-inner'] = { patterns: typeInnerPats };

// Wire up deferred type-paren pattern (basic wiring; patched after type injections)
if (repository['type-paren']) {
Expand Down Expand Up @@ -5405,7 +5421,7 @@ export function generateTmLanguage(grammar: CstGrammar, langName: string): TmGra
if (angleBracket && angleDisambig) {
const balancedAngles = angleDisambig.balancedAngles;
const arrowParamShape = angleDisambig.arrowParamShape;
const arrowPos = `(?:(?<=\\basync\\s)|(?<![\\w$)\\]}]\\s*))`;
const arrowPos = `(?:(?<=\\basync\\s)|${notAfterValueWithOptionalWhitespace('\\w$)\\]}')})`;
// JSX-dialect disambiguator: in a `.tsx`/`.jsx` grammar a bare `<Foo>(…`
// is a JSX element, so a generic-arrow type-param list is only recognised
// when it carries a TOP-LEVEL comma inside the `<…>` (`<T,>`, `<T = X,>`,
Expand Down Expand Up @@ -6366,7 +6382,7 @@ export function generateTmLanguage(grammar: CstGrammar, langName: string): TmGra
if (angleBracket && angleDisambig) {
repository['arrow-function-params-generic'] = {
name: `meta.parameters.arrow.${langName}`,
begin: `(?<=${angleDisambig.typeParamCloseBehind})\\s*(\\()\\s*$`,
begin: `(?<=${escapeRegex(angleDisambig.close)})\\s*(\\()\\s*$`,
beginCaptures: {
'1': { name: `punctuation.definition.parameters.begin.${langName}` },
},
Expand Down
Loading
Loading