From 639215a5c8fc3b3dc019b5a54d3e383c45f7f2d6 Mon Sep 17 00:00:00 2001 From: cemeceme <26171877+cemeceme@users.noreply.github.com> Date: Wed, 11 Feb 2026 11:36:55 +0100 Subject: [PATCH 1/4] Implemented the matches string operation. --- docs/lexicon.html | 9 +++++++++ rules/builtins.build_defs | 2 ++ src/parse/asp/builtins.go | 10 ++++++++++ 3 files changed, 21 insertions(+) diff --git a/docs/lexicon.html b/docs/lexicon.html index 49ee4c266..7a2ecf710 100644 --- a/docs/lexicon.html +++ b/docs/lexicon.html @@ -509,6 +509,15 @@

- returns a copy of this string converted to lowercase. +
  • + + matches(pattern) + - returns true if the string matches the regular expression given by pattern. + +
  • diff --git a/rules/builtins.build_defs b/rules/builtins.build_defs index f405bff5b..1f62089b0 100644 --- a/rules/builtins.build_defs +++ b/rules/builtins.build_defs @@ -109,6 +109,8 @@ def upper(self:str) -> str: pass def lower(self:str) -> str: pass +def matches(self:str, pattern: str) -> bool: + pass def fail(msg:str): pass diff --git a/src/parse/asp/builtins.go b/src/parse/asp/builtins.go index d17cd7b2b..2f52243a4 100644 --- a/src/parse/asp/builtins.go +++ b/src/parse/asp/builtins.go @@ -7,6 +7,7 @@ import ( "io" "path/filepath" "reflect" + "regexp" "slices" "sort" "strconv" @@ -98,6 +99,7 @@ func registerBuiltins(s *scope) { "count": setNativeCode(s, "count", strCount), "upper": setNativeCode(s, "upper", strUpper), "lower": setNativeCode(s, "lower", strLower), + "matches": setNativeCode(s, "matches", strMatches), } s.interpreter.stringMethods["format"].kwargs = true s.interpreter.dictMethods = map[string]*pyFunc{ @@ -645,6 +647,14 @@ func strLower(s *scope, args []pyObject) pyObject { return pyString(strings.ToLower(self)) } +func strMatches(s *scope, args []pyObject) pyObject { + self := string(args[0].(pyString)) + pattern := string(args[1].(pyString)) + res, err := regexp.MatchString(pattern, self) + s.Assert(err == nil, "%s", err) + return newPyBool(res) +} + func boolType(s *scope, args []pyObject) pyObject { return newPyBool(args[0].IsTruthy()) } From ea16d91308eec8e8a3cb706c08b1d0ae4b81ddaa Mon Sep 17 00:00:00 2001 From: cemeceme <26171877+cemeceme@users.noreply.github.com> Date: Wed, 11 Feb 2026 14:47:06 +0100 Subject: [PATCH 2/4] Added string builtins tests. --- test/builtins/BUILD | 7 +++++ test/builtins/strings/.plzconfig | 2 ++ test/builtins/strings/BUILD_FILE | 45 ++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 test/builtins/BUILD create mode 100644 test/builtins/strings/.plzconfig create mode 100644 test/builtins/strings/BUILD_FILE diff --git a/test/builtins/BUILD b/test/builtins/BUILD new file mode 100644 index 000000000..953b9ef5e --- /dev/null +++ b/test/builtins/BUILD @@ -0,0 +1,7 @@ +subinclude("//test/build_defs") + +please_repo_e2e_test( + name = "strings_test", + plz_command = "plz build", + repo = "strings", +) \ No newline at end of file diff --git a/test/builtins/strings/.plzconfig b/test/builtins/strings/.plzconfig new file mode 100644 index 000000000..ea85e4f73 --- /dev/null +++ b/test/builtins/strings/.plzconfig @@ -0,0 +1,2 @@ +[parse] +BuildFileName = BUILD_FILE diff --git a/test/builtins/strings/BUILD_FILE b/test/builtins/strings/BUILD_FILE new file mode 100644 index 000000000..0de263a69 --- /dev/null +++ b/test/builtins/strings/BUILD_FILE @@ -0,0 +1,45 @@ + +assert ",".join(["a","b","c"]) == "a,b,c" + +assert "a,b,c".split(",") == ["a", "b", "c"] + +assert "abc".replace("bc", "ab") == "aab" + +pre, sep, post = "a,b,c".partition(",") +assert pre == "a" and sep == "," and post == "b,c" + +pre, sep, post = "a,b,c".rpartition(",") +assert pre == "a,b" and sep == "," and post == "c" + +assert "abc".startswith("ab") == True + +assert "abc".endswith("bc") == True + +assert "a{var1},b{var2},c{var3}".format(var1="a", var2=2, var3=3) + +assert "abc".lstrip("a") == "bc" + +assert "abc".rstrip("c") == "ab" + +assert "abcba".strip("a") == "bcb" + +assert "abc".removeprefix("ab") == "c" + +assert "abc".removesuffix("bc") == "a" + +assert "abcba".find("b") == 1 + +assert "abcba".rfind("b") == 3 + +assert "abcba".count("b") == 2 + +assert "abc".upper() == "ABC" + +assert "ABC".lower() == "abc" + + +assert "abc".matches("a.c") + +assert "abbbbbbc".matches("a.*c") + +assert not "abc".matches("$b") \ No newline at end of file From 75cd2c3551954c2bdfd0f4b1f54dcf4ae95cd523 Mon Sep 17 00:00:00 2001 From: cemeceme <26171877+cemeceme@users.noreply.github.com> Date: Wed, 11 Feb 2026 15:48:33 +0100 Subject: [PATCH 3/4] Added a cache map for regex matching. --- src/parse/asp/builtins.go | 11 ++++++++--- src/parse/asp/interpreter.go | 12 ++++++++---- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/parse/asp/builtins.go b/src/parse/asp/builtins.go index 2f52243a4..bc7354468 100644 --- a/src/parse/asp/builtins.go +++ b/src/parse/asp/builtins.go @@ -650,9 +650,14 @@ func strLower(s *scope, args []pyObject) pyObject { func strMatches(s *scope, args []pyObject) pyObject { self := string(args[0].(pyString)) pattern := string(args[1].(pyString)) - res, err := regexp.MatchString(pattern, self) - s.Assert(err == nil, "%s", err) - return newPyBool(res) + compiledRegex, found := s.interpreter.regexCache[pattern] + if !found { + compiled, err := regexp.Compile(pattern) + s.Assert(err == nil, "%s", err) + s.interpreter.regexCache[pattern] = compiled + compiledRegex = compiled + } + return newPyBool(compiledRegex.MatchString(self)) } func boolType(s *scope, args []pyObject) pyObject { diff --git a/src/parse/asp/interpreter.go b/src/parse/asp/interpreter.go index 4d8199439..cd01a6a40 100644 --- a/src/parse/asp/interpreter.go +++ b/src/parse/asp/interpreter.go @@ -6,6 +6,7 @@ import ( "iter" "path/filepath" "reflect" + "regexp" "runtime/debug" "runtime/pprof" "strings" @@ -31,6 +32,8 @@ type interpreter struct { limiter semaphore stringMethods, dictMethods, configMethods map[string]*pyFunc + + regexCache map[string]*regexp.Regexp } // newInterpreter creates and returns a new interpreter instance. @@ -42,10 +45,11 @@ func newInterpreter(state *core.BuildState, p *Parser) *interpreter { locals: map[string]pyObject{}, } i := &interpreter{ - scope: s, - parser: p, - configs: map[*core.BuildState]*pyConfig{}, - limiter: make(semaphore, state.Config.Parse.NumThreads), + scope: s, + parser: p, + configs: map[*core.BuildState]*pyConfig{}, + limiter: make(semaphore, state.Config.Parse.NumThreads), + regexCache: map[string]*regexp.Regexp{}, } // If we're creating an interpreter for a subrepo, we should share the subinclude cache. if p.interpreter != nil { From c337c17c0396d797500c8b7fde2c7969b7ab582d Mon Sep 17 00:00:00 2001 From: cemeceme <26171877+cemeceme@users.noreply.github.com> Date: Thu, 12 Feb 2026 11:15:53 +0100 Subject: [PATCH 4/4] Use cmap for regex cache. --- src/parse/asp/builtins.go | 7 ++++--- src/parse/asp/interpreter.go | 4 ++-- test/builtins/BUILD | 2 +- test/builtins/strings/BUILD_FILE | 8 ++++---- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/parse/asp/builtins.go b/src/parse/asp/builtins.go index bc7354468..353478b07 100644 --- a/src/parse/asp/builtins.go +++ b/src/parse/asp/builtins.go @@ -650,11 +650,12 @@ func strLower(s *scope, args []pyObject) pyObject { func strMatches(s *scope, args []pyObject) pyObject { self := string(args[0].(pyString)) pattern := string(args[1].(pyString)) - compiledRegex, found := s.interpreter.regexCache[pattern] - if !found { + compiledRegex := s.interpreter.regexCache.Get(pattern) + if compiledRegex == nil { compiled, err := regexp.Compile(pattern) s.Assert(err == nil, "%s", err) - s.interpreter.regexCache[pattern] = compiled + // We don't need to check if another task inserted the regex first, as it will be an identical result. + s.interpreter.regexCache.Add(pattern, compiled) compiledRegex = compiled } return newPyBool(compiledRegex.MatchString(self)) diff --git a/src/parse/asp/interpreter.go b/src/parse/asp/interpreter.go index cd01a6a40..93e4ca0a3 100644 --- a/src/parse/asp/interpreter.go +++ b/src/parse/asp/interpreter.go @@ -33,7 +33,7 @@ type interpreter struct { stringMethods, dictMethods, configMethods map[string]*pyFunc - regexCache map[string]*regexp.Regexp + regexCache *cmap.Map[string, *regexp.Regexp] } // newInterpreter creates and returns a new interpreter instance. @@ -49,7 +49,7 @@ func newInterpreter(state *core.BuildState, p *Parser) *interpreter { parser: p, configs: map[*core.BuildState]*pyConfig{}, limiter: make(semaphore, state.Config.Parse.NumThreads), - regexCache: map[string]*regexp.Regexp{}, + regexCache: cmap.New[string, *regexp.Regexp](cmap.SmallShardCount, cmap.XXHash), } // If we're creating an interpreter for a subrepo, we should share the subinclude cache. if p.interpreter != nil { diff --git a/test/builtins/BUILD b/test/builtins/BUILD index 953b9ef5e..ccb21ec42 100644 --- a/test/builtins/BUILD +++ b/test/builtins/BUILD @@ -4,4 +4,4 @@ please_repo_e2e_test( name = "strings_test", plz_command = "plz build", repo = "strings", -) \ No newline at end of file +) diff --git a/test/builtins/strings/BUILD_FILE b/test/builtins/strings/BUILD_FILE index 0de263a69..8d7275107 100644 --- a/test/builtins/strings/BUILD_FILE +++ b/test/builtins/strings/BUILD_FILE @@ -15,11 +15,11 @@ assert "abc".startswith("ab") == True assert "abc".endswith("bc") == True -assert "a{var1},b{var2},c{var3}".format(var1="a", var2=2, var3=3) +assert "a{var1},b{var2},c{var3}".format(var1="a", var2=2, var3=[3]) == "aa,b2,c[3]" -assert "abc".lstrip("a") == "bc" +assert "abcba".lstrip("a") == "bcba" -assert "abc".rstrip("c") == "ab" +assert "abcba".rstrip("a") == "abcb" assert "abcba".strip("a") == "bcb" @@ -42,4 +42,4 @@ assert "abc".matches("a.c") assert "abbbbbbc".matches("a.*c") -assert not "abc".matches("$b") \ No newline at end of file +assert not "abc".matches("$b")