From b97db67d1d184046899a0046af547d40c486df7f Mon Sep 17 00:00:00 2001 From: Jakub Vrana Date: Sat, 7 Feb 2026 19:44:18 +0100 Subject: [PATCH 1/2] Fix regular expression escaping `/` was unnecessarily escaped even though the delimiter was `~`. Using delimiter `~` required escaping it in `$boundaries` and elsewhere even though these are `preg_quote`'d later. Using `()` as the delimiter allows treating all characters the same. --- src/Tokenizer.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Tokenizer.php b/src/Tokenizer.php index 5682c49..8b76db4 100644 --- a/src/Tokenizer.php +++ b/src/Tokenizer.php @@ -820,10 +820,10 @@ private function makeRegexFromList(array $values, bool $sorted = false): string $prefix = (string) $prefix; } - $regex .= preg_quote($prefix, '/'); + $regex .= preg_quote($prefix); $regex .= count($items) === 1 - ? preg_quote(substr(reset($items), strlen($prefix)), '/') + ? preg_quote(substr(reset($items), strlen($prefix))) : $this->makeRegexFromList(array_map(static fn ($v) => substr($v, strlen($prefix)), $items), true); } @@ -887,7 +887,7 @@ private function makeTokenizeRegex(array $regexes): string $parts[] = '(?' . $regex . ')'; } - return '~\G(?:' . implode('|', $parts) . ')~'; + return '(\G(?:' . implode('|', $parts) . '))'; } /** From 6d638a78655cf83cf1b51047e222c9dd09c27f0a Mon Sep 17 00:00:00 2001 From: Jakub Vrana Date: Sat, 7 Feb 2026 19:44:34 +0100 Subject: [PATCH 2/2] Treat ~* as operator --- src/Tokenizer.php | 1 + tests/clihighlight.txt | 3 +++ tests/compress.txt | 2 ++ tests/format-highlight.html | 3 +++ tests/format.txt | 3 +++ tests/highlight.html | 2 ++ tests/sql.sql | 2 ++ 7 files changed, 16 insertions(+) diff --git a/src/Tokenizer.php b/src/Tokenizer.php index 8b76db4..a62bfe3 100644 --- a/src/Tokenizer.php +++ b/src/Tokenizer.php @@ -743,6 +743,7 @@ final class Tokenizer '>', '+', '-', + '~*', // https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-POSIX-REGEXP '*', '/', '!', diff --git a/tests/clihighlight.txt b/tests/clihighlight.txt index 75c8429..ceaf391 100644 --- a/tests/clihighlight.txt +++ b/tests/clihighlight.txt @@ -1211,3 +1211,6 @@ MY_NON_TOP_LEVEL_KEYWORD_FX_3(); --- SELECT vector1 <#> vector2 +--- +SELECT + text ~* '\w+' diff --git a/tests/compress.txt b/tests/compress.txt index f7fcba4..6ec44ca 100644 --- a/tests/compress.txt +++ b/tests/compress.txt @@ -115,3 +115,5 @@ CREATE TABLE t (c VARCHAR(20)) DEFAULT CHARACTER SET utf8mb4 ENGINE = InnoDB SELECT '{}'::json #> '{}' --- SELECT vector1 <#> vector2 +--- +SELECT text ~* '\w+' diff --git a/tests/format-highlight.html b/tests/format-highlight.html index a059de3..bafb6f2 100644 --- a/tests/format-highlight.html +++ b/tests/format-highlight.html @@ -1211,3 +1211,6 @@ ---
SELECT
   vector1 <#> vector2
+--- +
SELECT
+  text ~* '\w+'
diff --git a/tests/format.txt b/tests/format.txt index 868db4b..d3bea98 100644 --- a/tests/format.txt +++ b/tests/format.txt @@ -1209,3 +1209,6 @@ SELECT --- SELECT vector1 <#> vector2 +--- +SELECT + text ~* '\w+' diff --git a/tests/highlight.html b/tests/highlight.html index e988db0..9d24e6c 100644 --- a/tests/highlight.html +++ b/tests/highlight.html @@ -429,3 +429,5 @@
SELECT '{}'::json #> '{}'
---
SELECT vector1 <#> vector2
+--- +
SELECT text ~* '\w+'
diff --git a/tests/sql.sql b/tests/sql.sql index c0811e5..2c72c6f 100644 --- a/tests/sql.sql +++ b/tests/sql.sql @@ -429,3 +429,5 @@ CREATE TABLE t (c VARCHAR(20)) DEFAULT CHARACTER SET utf8mb4 ENGINE = InnoDB SELECT '{}'::json #> '{}' --- SELECT vector1 <#> vector2 +--- +SELECT text ~* '\w+'