From 639215a5c8fc3b3dc019b5a54d3e383c45f7f2d6 Mon Sep 17 00:00:00 2001
From: cemeceme <26171877+cemeceme@users.noreply.github.com>
Date: Wed, 11 Feb 2026 11:36:55 +0100
Subject: [PATCH 1/4] Implemented the matches string operation.
---
docs/lexicon.html | 9 +++++++++
rules/builtins.build_defs | 2 ++
src/parse/asp/builtins.go | 10 ++++++++++
3 files changed, 21 insertions(+)
diff --git a/docs/lexicon.html b/docs/lexicon.html
index 49ee4c266..7a2ecf710 100644
--- a/docs/lexicon.html
+++ b/docs/lexicon.html
@@ -509,6 +509,15 @@
- returns a copy of this string converted to lowercase.
+
+
+ matches(pattern)
+ - returns true if the string matches the regular expression given by pattern.
+
+
diff --git a/rules/builtins.build_defs b/rules/builtins.build_defs
index f405bff5b..1f62089b0 100644
--- a/rules/builtins.build_defs
+++ b/rules/builtins.build_defs
@@ -109,6 +109,8 @@ def upper(self:str) -> str:
pass
def lower(self:str) -> str:
pass
+def matches(self:str, pattern: str) -> bool:
+ pass
def fail(msg:str):
pass
diff --git a/src/parse/asp/builtins.go b/src/parse/asp/builtins.go
index d17cd7b2b..2f52243a4 100644
--- a/src/parse/asp/builtins.go
+++ b/src/parse/asp/builtins.go
@@ -7,6 +7,7 @@ import (
"io"
"path/filepath"
"reflect"
+ "regexp"
"slices"
"sort"
"strconv"
@@ -98,6 +99,7 @@ func registerBuiltins(s *scope) {
"count": setNativeCode(s, "count", strCount),
"upper": setNativeCode(s, "upper", strUpper),
"lower": setNativeCode(s, "lower", strLower),
+ "matches": setNativeCode(s, "matches", strMatches),
}
s.interpreter.stringMethods["format"].kwargs = true
s.interpreter.dictMethods = map[string]*pyFunc{
@@ -645,6 +647,14 @@ func strLower(s *scope, args []pyObject) pyObject {
return pyString(strings.ToLower(self))
}
+func strMatches(s *scope, args []pyObject) pyObject {
+ self := string(args[0].(pyString))
+ pattern := string(args[1].(pyString))
+ res, err := regexp.MatchString(pattern, self)
+ s.Assert(err == nil, "%s", err)
+ return newPyBool(res)
+}
+
func boolType(s *scope, args []pyObject) pyObject {
return newPyBool(args[0].IsTruthy())
}
From ea16d91308eec8e8a3cb706c08b1d0ae4b81ddaa Mon Sep 17 00:00:00 2001
From: cemeceme <26171877+cemeceme@users.noreply.github.com>
Date: Wed, 11 Feb 2026 14:47:06 +0100
Subject: [PATCH 2/4] Added string builtins tests.
---
test/builtins/BUILD | 7 +++++
test/builtins/strings/.plzconfig | 2 ++
test/builtins/strings/BUILD_FILE | 45 ++++++++++++++++++++++++++++++++
3 files changed, 54 insertions(+)
create mode 100644 test/builtins/BUILD
create mode 100644 test/builtins/strings/.plzconfig
create mode 100644 test/builtins/strings/BUILD_FILE
diff --git a/test/builtins/BUILD b/test/builtins/BUILD
new file mode 100644
index 000000000..953b9ef5e
--- /dev/null
+++ b/test/builtins/BUILD
@@ -0,0 +1,7 @@
+subinclude("//test/build_defs")
+
+please_repo_e2e_test(
+ name = "strings_test",
+ plz_command = "plz build",
+ repo = "strings",
+)
\ No newline at end of file
diff --git a/test/builtins/strings/.plzconfig b/test/builtins/strings/.plzconfig
new file mode 100644
index 000000000..ea85e4f73
--- /dev/null
+++ b/test/builtins/strings/.plzconfig
@@ -0,0 +1,2 @@
+[parse]
+BuildFileName = BUILD_FILE
diff --git a/test/builtins/strings/BUILD_FILE b/test/builtins/strings/BUILD_FILE
new file mode 100644
index 000000000..0de263a69
--- /dev/null
+++ b/test/builtins/strings/BUILD_FILE
@@ -0,0 +1,45 @@
+
+assert ",".join(["a","b","c"]) == "a,b,c"
+
+assert "a,b,c".split(",") == ["a", "b", "c"]
+
+assert "abc".replace("bc", "ab") == "aab"
+
+pre, sep, post = "a,b,c".partition(",")
+assert pre == "a" and sep == "," and post == "b,c"
+
+pre, sep, post = "a,b,c".rpartition(",")
+assert pre == "a,b" and sep == "," and post == "c"
+
+assert "abc".startswith("ab") == True
+
+assert "abc".endswith("bc") == True
+
+assert "a{var1},b{var2},c{var3}".format(var1="a", var2=2, var3=3)
+
+assert "abc".lstrip("a") == "bc"
+
+assert "abc".rstrip("c") == "ab"
+
+assert "abcba".strip("a") == "bcb"
+
+assert "abc".removeprefix("ab") == "c"
+
+assert "abc".removesuffix("bc") == "a"
+
+assert "abcba".find("b") == 1
+
+assert "abcba".rfind("b") == 3
+
+assert "abcba".count("b") == 2
+
+assert "abc".upper() == "ABC"
+
+assert "ABC".lower() == "abc"
+
+
+assert "abc".matches("a.c")
+
+assert "abbbbbbc".matches("a.*c")
+
+assert not "abc".matches("$b")
\ No newline at end of file
From 75cd2c3551954c2bdfd0f4b1f54dcf4ae95cd523 Mon Sep 17 00:00:00 2001
From: cemeceme <26171877+cemeceme@users.noreply.github.com>
Date: Wed, 11 Feb 2026 15:48:33 +0100
Subject: [PATCH 3/4] Added a cache map for regex matching.
---
src/parse/asp/builtins.go | 11 ++++++++---
src/parse/asp/interpreter.go | 12 ++++++++----
2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/src/parse/asp/builtins.go b/src/parse/asp/builtins.go
index 2f52243a4..bc7354468 100644
--- a/src/parse/asp/builtins.go
+++ b/src/parse/asp/builtins.go
@@ -650,9 +650,14 @@ func strLower(s *scope, args []pyObject) pyObject {
func strMatches(s *scope, args []pyObject) pyObject {
self := string(args[0].(pyString))
pattern := string(args[1].(pyString))
- res, err := regexp.MatchString(pattern, self)
- s.Assert(err == nil, "%s", err)
- return newPyBool(res)
+ compiledRegex, found := s.interpreter.regexCache[pattern]
+ if !found {
+ compiled, err := regexp.Compile(pattern)
+ s.Assert(err == nil, "%s", err)
+ s.interpreter.regexCache[pattern] = compiled
+ compiledRegex = compiled
+ }
+ return newPyBool(compiledRegex.MatchString(self))
}
func boolType(s *scope, args []pyObject) pyObject {
diff --git a/src/parse/asp/interpreter.go b/src/parse/asp/interpreter.go
index 4d8199439..cd01a6a40 100644
--- a/src/parse/asp/interpreter.go
+++ b/src/parse/asp/interpreter.go
@@ -6,6 +6,7 @@ import (
"iter"
"path/filepath"
"reflect"
+ "regexp"
"runtime/debug"
"runtime/pprof"
"strings"
@@ -31,6 +32,8 @@ type interpreter struct {
limiter semaphore
stringMethods, dictMethods, configMethods map[string]*pyFunc
+
+ regexCache map[string]*regexp.Regexp
}
// newInterpreter creates and returns a new interpreter instance.
@@ -42,10 +45,11 @@ func newInterpreter(state *core.BuildState, p *Parser) *interpreter {
locals: map[string]pyObject{},
}
i := &interpreter{
- scope: s,
- parser: p,
- configs: map[*core.BuildState]*pyConfig{},
- limiter: make(semaphore, state.Config.Parse.NumThreads),
+ scope: s,
+ parser: p,
+ configs: map[*core.BuildState]*pyConfig{},
+ limiter: make(semaphore, state.Config.Parse.NumThreads),
+ regexCache: map[string]*regexp.Regexp{},
}
// If we're creating an interpreter for a subrepo, we should share the subinclude cache.
if p.interpreter != nil {
From c337c17c0396d797500c8b7fde2c7969b7ab582d Mon Sep 17 00:00:00 2001
From: cemeceme <26171877+cemeceme@users.noreply.github.com>
Date: Thu, 12 Feb 2026 11:15:53 +0100
Subject: [PATCH 4/4] Use cmap for regex cache.
---
src/parse/asp/builtins.go | 7 ++++---
src/parse/asp/interpreter.go | 4 ++--
test/builtins/BUILD | 2 +-
test/builtins/strings/BUILD_FILE | 8 ++++----
4 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/src/parse/asp/builtins.go b/src/parse/asp/builtins.go
index bc7354468..353478b07 100644
--- a/src/parse/asp/builtins.go
+++ b/src/parse/asp/builtins.go
@@ -650,11 +650,12 @@ func strLower(s *scope, args []pyObject) pyObject {
func strMatches(s *scope, args []pyObject) pyObject {
self := string(args[0].(pyString))
pattern := string(args[1].(pyString))
- compiledRegex, found := s.interpreter.regexCache[pattern]
- if !found {
+ compiledRegex := s.interpreter.regexCache.Get(pattern)
+ if compiledRegex == nil {
compiled, err := regexp.Compile(pattern)
s.Assert(err == nil, "%s", err)
- s.interpreter.regexCache[pattern] = compiled
+ // We don't need to check if another task inserted the regex first, as it will be an identical result.
+ s.interpreter.regexCache.Add(pattern, compiled)
compiledRegex = compiled
}
return newPyBool(compiledRegex.MatchString(self))
diff --git a/src/parse/asp/interpreter.go b/src/parse/asp/interpreter.go
index cd01a6a40..93e4ca0a3 100644
--- a/src/parse/asp/interpreter.go
+++ b/src/parse/asp/interpreter.go
@@ -33,7 +33,7 @@ type interpreter struct {
stringMethods, dictMethods, configMethods map[string]*pyFunc
- regexCache map[string]*regexp.Regexp
+ regexCache *cmap.Map[string, *regexp.Regexp]
}
// newInterpreter creates and returns a new interpreter instance.
@@ -49,7 +49,7 @@ func newInterpreter(state *core.BuildState, p *Parser) *interpreter {
parser: p,
configs: map[*core.BuildState]*pyConfig{},
limiter: make(semaphore, state.Config.Parse.NumThreads),
- regexCache: map[string]*regexp.Regexp{},
+ regexCache: cmap.New[string, *regexp.Regexp](cmap.SmallShardCount, cmap.XXHash),
}
// If we're creating an interpreter for a subrepo, we should share the subinclude cache.
if p.interpreter != nil {
diff --git a/test/builtins/BUILD b/test/builtins/BUILD
index 953b9ef5e..ccb21ec42 100644
--- a/test/builtins/BUILD
+++ b/test/builtins/BUILD
@@ -4,4 +4,4 @@ please_repo_e2e_test(
name = "strings_test",
plz_command = "plz build",
repo = "strings",
-)
\ No newline at end of file
+)
diff --git a/test/builtins/strings/BUILD_FILE b/test/builtins/strings/BUILD_FILE
index 0de263a69..8d7275107 100644
--- a/test/builtins/strings/BUILD_FILE
+++ b/test/builtins/strings/BUILD_FILE
@@ -15,11 +15,11 @@ assert "abc".startswith("ab") == True
assert "abc".endswith("bc") == True
-assert "a{var1},b{var2},c{var3}".format(var1="a", var2=2, var3=3)
+assert "a{var1},b{var2},c{var3}".format(var1="a", var2=2, var3=[3]) == "aa,b2,c[3]"
-assert "abc".lstrip("a") == "bc"
+assert "abcba".lstrip("a") == "bcba"
-assert "abc".rstrip("c") == "ab"
+assert "abcba".rstrip("a") == "abcb"
assert "abcba".strip("a") == "bcb"
@@ -42,4 +42,4 @@ assert "abc".matches("a.c")
assert "abbbbbbc".matches("a.*c")
-assert not "abc".matches("$b")
\ No newline at end of file
+assert not "abc".matches("$b")