From c4ef34358deac72960dcdb12d3cf8928c7b4469e Mon Sep 17 00:00:00 2001 From: Kiro Agent <244629292+kiro-agent@users.noreply.github.com> Date: Sun, 14 Jun 2026 09:56:31 +0000 Subject: [PATCH] JavaScript: Reduce false positives in js/regex/missing-regexp-anchor for non-URL patterns Filter hasMisleadingAnchorPrecedence results to only flag patterns that contain a dot character (escaped or unescaped), since hostname/URL patterns inherently contain dots. This eliminates false positives for patterns matching simple strings, module names, CLI arguments, event names, and CSS properties that do not relate to URL/hostname validation. --- .../Security/CWE-020/MissingRegExpAnchor.ql | 22 +++++++- ...6-14-missing-regexp-anchor-fp-reduction.md | 4 ++ .../MissingRegExpAnchor.expected | 28 ---------- .../tst-SemiAnchoredRegExp.js | 56 +++++++++---------- 4 files changed, 53 insertions(+), 57 deletions(-) create mode 100644 javascript/ql/src/change-notes/2025-06-14-missing-regexp-anchor-fp-reduction.md diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql index 1057f9ccca50..925627100fac 100644 --- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql +++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql @@ -45,12 +45,32 @@ private module Impl implements import MissingRegExpAnchor::Make +/** + * Holds if `src` is a pattern that plausibly matches a hostname or URL, + * as indicated by the presence of a dot character in the pattern (either an + * escaped literal dot `\.` or an unescaped dot `.` used as a wildcard). + * + * Hostname patterns inherently contain dots (e.g., `example\.com` or `example.com`), + * whereas patterns matching simple strings, module names, or CLI arguments + * typically do not contain dots. + */ +private predicate looksLikeHostnamePattern(RegExpPatternSource src) { + exists(RegExpTerm term | + term = src.getRegExpTerm().getAChild*() and + ( + term.(RegExpConstant).getValue() = "." + or + term instanceof RegExpDot + ) + ) +} + from DataFlow::Node nd, string msg where isUnanchoredHostnameRegExp(nd, msg) or isSemiAnchoredHostnameRegExp(nd, msg) or - hasMisleadingAnchorPrecedence(nd, msg) + hasMisleadingAnchorPrecedence(nd, msg) and looksLikeHostnamePattern(nd) // isLineAnchoredHostnameRegExp is not used here, as it is not relevant to JS. select nd, msg diff --git a/javascript/ql/src/change-notes/2025-06-14-missing-regexp-anchor-fp-reduction.md b/javascript/ql/src/change-notes/2025-06-14-missing-regexp-anchor-fp-reduction.md new file mode 100644 index 000000000000..5bb65ebdf3d1 --- /dev/null +++ b/javascript/ql/src/change-notes/2025-06-14-missing-regexp-anchor-fp-reduction.md @@ -0,0 +1,4 @@ +--- +category: majorAnalysis +--- +* Reduced false positives in the `js/regex/missing-regexp-anchor` query by filtering out `hasMisleadingAnchorPrecedence` results where the regular expression does not contain a literal dot character. Patterns matching simple strings, module names, or CLI arguments (which do not contain dots) are no longer flagged, as the query targets URL/hostname validation bypasses which inherently involve dotted domain names. diff --git a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor/MissingRegExpAnchor.expected b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor/MissingRegExpAnchor.expected index ebff0b583c02..6959c79d0f5f 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor/MissingRegExpAnchor.expected +++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor/MissingRegExpAnchor.expected @@ -1,26 +1,8 @@ -| tst-SemiAnchoredRegExp.js:3:2:3:7 | /^a\|b/ | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:6:2:6:9 | /^a\|b\|c/ | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:12:2:12:9 | /^a\|(b)/ | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:14:2:14:11 | /^(a)\|(b)/ | Misleading operator precedence. The subexpression '^(a)' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:17:2:17:7 | /a\|b$/ | Misleading operator precedence. The subexpression 'b$' is anchored at the end, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:20:2:20:9 | /a\|b\|c$/ | Misleading operator precedence. The subexpression 'c$' is anchored at the end, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:26:2:26:9 | /(a)\|b$/ | Misleading operator precedence. The subexpression 'b$' is anchored at the end, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:28:2:28:11 | /(a)\|(b)$/ | Misleading operator precedence. The subexpression '(b)$' is anchored at the end, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | Misleading operator precedence. The subexpression '^good.com' is anchored at the beginning, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:31:2:31:25 | /^good\\ ... r\\.com/ | Misleading operator precedence. The subexpression '^good\\.com' is anchored at the beginning, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:32:2:32:27 | /^good\\ ... \\\\.com/ | Misleading operator precedence. The subexpression '^good\\\\.com' is anchored at the beginning, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:33:2:33:29 | /^good\\ ... \\\\.com/ | Misleading operator precedence. The subexpression '^good\\\\\\.com' is anchored at the beginning, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:34:2:34:31 | /^good\\ ... \\\\.com/ | Misleading operator precedence. The subexpression '^good\\\\\\\\.com' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:36:2:36:16 | /^foo\|bar\|baz$/ | Misleading operator precedence. The subexpression '^foo' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:36:2:36:16 | /^foo\|bar\|baz$/ | Misleading operator precedence. The subexpression 'baz$' is anchored at the end, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:42:13:42:18 | "^a\|b" | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:45:13:45:20 | "^a\|b\|c" | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:51:13:51:20 | "^a\|(b)" | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:53:13:53:22 | "^(a)\|(b)" | Misleading operator precedence. The subexpression '^(a)' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:56:13:56:18 | "a\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored at the end, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:59:13:59:20 | "a\|b\|c$" | Misleading operator precedence. The subexpression 'c$' is anchored at the end, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:65:13:65:20 | "(a)\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored at the end, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:67:13:67:22 | "(a)\|(b)$" | Misleading operator precedence. The subexpression '(b)$' is anchored at the end, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:69:13:69:34 | '^good. ... er.com' | Misleading operator precedence. The subexpression '^good.com' is anchored at the beginning, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:70:13:70:36 | '^good\\ ... r\\.com' | Misleading operator precedence. The subexpression '^good.com' is anchored at the beginning, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:71:13:71:38 | '^good\\ ... \\\\.com' | Misleading operator precedence. The subexpression '^good\\.com' is anchored at the beginning, but the other parts of this regular expression are not | @@ -29,18 +11,8 @@ | tst-SemiAnchoredRegExp.js:82:2:82:27 | /(\\.xxx ... .zzz)$/ | Misleading operator precedence. The subexpression '(\\.zzz)$' is anchored at the end, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:84:2:84:23 | /\\.xxx\| ... zzz$/ig | Misleading operator precedence. The subexpression '\\.zzz$' is anchored at the end, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:85:2:85:19 | /\\.xxx\|\\.yyy\|zzz$/ | Misleading operator precedence. The subexpression 'zzz$' is anchored at the end, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:87:2:87:28 | /^(xxx ... yyy)/i | Misleading operator precedence. The subexpression '^(xxx yyy zzz)' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:88:2:88:53 | /^(xxx ... x\|1st/i | Misleading operator precedence. The subexpression '^(xxx yyy zzz)' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:89:2:89:24 | /^(xxx: ... (zzz:)/ | Misleading operator precedence. The subexpression '^(xxx:)' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:90:2:90:23 | /^(xxx? ... zzz\\/)/ | Misleading operator precedence. The subexpression '^(xxx?:)' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:91:2:91:16 | /^@media\|@page/ | Misleading operator precedence. The subexpression '^@media' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:92:2:92:32 | /^\\s*(x ... :yyy\\// | Misleading operator precedence. The subexpression '^\\s*(xxx?\|yyy\|zzz):' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:93:2:93:21 | /^click\|mouse\|touch/ | Misleading operator precedence. The subexpression '^click' is anchored at the beginning, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:94:2:94:43 | /^http: ... r\\.com/ | Misleading operator precedence. The subexpression '^http:\\/\\/good\\.com' is anchored at the beginning, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:95:2:95:47 | /^https ... r\\.com/ | Misleading operator precedence. The subexpression '^https?:\\/\\/good\\.com' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:96:2:96:55 | /^mouse ... ragend/ | Misleading operator precedence. The subexpression '^mouse' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:97:2:97:14 | /^xxx:\|yyy:/i | Misleading operator precedence. The subexpression '^xxx:' is anchored at the beginning, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:98:2:98:18 | /_xxx\|_yyy\|_zzz$/ | Misleading operator precedence. The subexpression '_zzz$' is anchored at the end, but the other parts of this regular expression are not | | tst-UnanchoredUrlRegExp.js:3:47:3:65 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | | tst-UnanchoredUrlRegExp.js:4:58:4:76 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | | tst-UnanchoredUrlRegExp.js:5:47:5:66 | "^https?://good.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. | diff --git a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor/tst-SemiAnchoredRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor/tst-SemiAnchoredRegExp.js index 6fc6d6296204..4399780d76b2 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor/tst-SemiAnchoredRegExp.js +++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor/tst-SemiAnchoredRegExp.js @@ -1,31 +1,31 @@ (function coreRegExp() { /^a|/; - /^a|b/; // $ Alert + /^a|b/; // no dot, not flagged /a|^b/; /^a|^b/; - /^a|b|c/; // $ Alert + /^a|b|c/; // no dot, not flagged /a|^b|c/; /a|b|^c/; /^a|^b|c/; /(^a)|b/; - /^a|(b)/; // $ Alert + /^a|(b)/; // no dot, not flagged /^a|(^b)/; - /^(a)|(b)/; // $ Alert + /^(a)|(b)/; // no dot, not flagged - /a|b$/; // $ Alert + /a|b$/; // no dot, not flagged /a$|b/; /a$|b$/; - /a|b|c$/; // $ Alert + /a|b|c$/; // no dot, not flagged /a|b$|c/; /a$|b|c/; /a|b$|c$/; /a|(b$)/; - /(a)|b$/; // $ Alert + /(a)|b$/; // no dot, not flagged /(a$)|b$/; - /(a)|(b)$/; // $ Alert + /(a)|(b)$/; // no dot, not flagged /^good.com|better.com/; // $ Alert /^good\.com|better\.com/; // $ Alert @@ -33,38 +33,38 @@ /^good\\\.com|better\\\.com/; // $ Alert /^good\\\\.com|better\\\\.com/; // $ Alert - /^foo|bar|baz$/; // $ Alert + /^foo|bar|baz$/; // no dot, not flagged /^foo|%/; }); (function coreString() { new RegExp("^a|"); - new RegExp("^a|b"); // $ Alert + new RegExp("^a|b"); // no dot, not flagged new RegExp("a|^b"); new RegExp("^a|^b"); - new RegExp("^a|b|c"); // $ Alert + new RegExp("^a|b|c"); // no dot, not flagged new RegExp("a|^b|c"); new RegExp("a|b|^c"); new RegExp("^a|^b|c"); new RegExp("(^a)|b"); - new RegExp("^a|(b)"); // $ Alert + new RegExp("^a|(b)"); // no dot, not flagged new RegExp("^a|(^b)"); - new RegExp("^(a)|(b)"); // $ Alert + new RegExp("^(a)|(b)"); // no dot, not flagged - new RegExp("a|b$"); // $ Alert + new RegExp("a|b$"); // no dot, not flagged new RegExp("a$|b"); new RegExp("a$|b$"); - new RegExp("a|b|c$"); // $ Alert + new RegExp("a|b|c$"); // no dot, not flagged new RegExp("a|b$|c"); new RegExp("a$|b|c"); new RegExp("a|b$|c$"); new RegExp("a|(b$)"); - new RegExp("(a)|b$"); // $ Alert + new RegExp("(a)|b$"); // no dot, not flagged new RegExp("(a$)|b$"); - new RegExp("(a)|(b)$"); // $ Alert + new RegExp("(a)|(b)$"); // no dot, not flagged new RegExp('^good.com|better.com'); // $ Alert new RegExp('^good\.com|better\.com'); // $ Alert @@ -77,25 +77,25 @@ // real-world examples that have been anonymized a bit /* - * NOT OK: flagged + * NOT OK: flagged (patterns containing dots are plausibly hostname-related) */ /(\.xxx)|(\.yyy)|(\.zzz)$/; // $ Alert /(^left|right|center)\sbottom$/; // not flagged at the moment due to interior anchors /\.xxx|\.yyy|\.zzz$/ig; // $ Alert /\.xxx|\.yyy|zzz$/; // $ Alert /^([A-Z]|xxx[XY]$)/; // not flagged at the moment due to interior anchors - /^(xxx yyy zzz)|(xxx yyy)/i; // $ Alert - /^(xxx yyy zzz)|(xxx yyy)|(1st( xxx)? yyy)|xxx|1st/i; // $ Alert - /^(xxx:)|(yyy:)|(zzz:)/; // $ Alert - /^(xxx?:)|(yyy:zzz\/)/; // $ Alert - /^@media|@page/; // $ Alert - /^\s*(xxx?|yyy|zzz):|xxx:yyy\//; // $ Alert - /^click|mouse|touch/; // $ Alert + /^(xxx yyy zzz)|(xxx yyy)/i; // no dot, not flagged + /^(xxx yyy zzz)|(xxx yyy)|(1st( xxx)? yyy)|xxx|1st/i; // no dot, not flagged + /^(xxx:)|(yyy:)|(zzz:)/; // no dot, not flagged + /^(xxx?:)|(yyy:zzz\/)/; // no dot, not flagged + /^@media|@page/; // no dot, not flagged + /^\s*(xxx?|yyy|zzz):|xxx:yyy\//; // no dot, not flagged + /^click|mouse|touch/; // no dot, not flagged /^http:\/\/good\.com|http:\/\/better\.com/; // $ Alert /^https?:\/\/good\.com|https?:\/\/better\.com/; // $ Alert - /^mouse|touch|click|contextmenu|drop|dragover|dragend/; // $ Alert - /^xxx:|yyy:/i; // $ Alert - /_xxx|_yyy|_zzz$/; // $ Alert + /^mouse|touch|click|contextmenu|drop|dragover|dragend/; // no dot, not flagged + /^xxx:|yyy:/i; // no dot, not flagged + /_xxx|_yyy|_zzz$/; // no dot, not flagged /em|%$/; // not flagged at the moment due to the anchor not being for letters /*