diff --git a/docs/lexicon.html b/docs/lexicon.html index 49ee4c266..7a2ecf710 100644 --- a/docs/lexicon.html +++ b/docs/lexicon.html @@ -509,6 +509,15 @@

- returns a copy of this string converted to lowercase. +
  • + + matches(pattern) + - returns true if the string matches the regular expression given by pattern. + +
  • diff --git a/rules/builtins.build_defs b/rules/builtins.build_defs index f405bff5b..1f62089b0 100644 --- a/rules/builtins.build_defs +++ b/rules/builtins.build_defs @@ -109,6 +109,8 @@ def upper(self:str) -> str: pass def lower(self:str) -> str: pass +def matches(self:str, pattern: str) -> bool: + pass def fail(msg:str): pass diff --git a/src/parse/asp/builtins.go b/src/parse/asp/builtins.go index d17cd7b2b..353478b07 100644 --- a/src/parse/asp/builtins.go +++ b/src/parse/asp/builtins.go @@ -7,6 +7,7 @@ import ( "io" "path/filepath" "reflect" + "regexp" "slices" "sort" "strconv" @@ -98,6 +99,7 @@ func registerBuiltins(s *scope) { "count": setNativeCode(s, "count", strCount), "upper": setNativeCode(s, "upper", strUpper), "lower": setNativeCode(s, "lower", strLower), + "matches": setNativeCode(s, "matches", strMatches), } s.interpreter.stringMethods["format"].kwargs = true s.interpreter.dictMethods = map[string]*pyFunc{ @@ -645,6 +647,20 @@ func strLower(s *scope, args []pyObject) pyObject { return pyString(strings.ToLower(self)) } +func strMatches(s *scope, args []pyObject) pyObject { + self := string(args[0].(pyString)) + pattern := string(args[1].(pyString)) + compiledRegex := s.interpreter.regexCache.Get(pattern) + if compiledRegex == nil { + compiled, err := regexp.Compile(pattern) + s.Assert(err == nil, "%s", err) + // We don't need to check if another task inserted the regex first, as it will be an identical result. + s.interpreter.regexCache.Add(pattern, compiled) + compiledRegex = compiled + } + return newPyBool(compiledRegex.MatchString(self)) +} + func boolType(s *scope, args []pyObject) pyObject { return newPyBool(args[0].IsTruthy()) } diff --git a/src/parse/asp/interpreter.go b/src/parse/asp/interpreter.go index 4d8199439..93e4ca0a3 100644 --- a/src/parse/asp/interpreter.go +++ b/src/parse/asp/interpreter.go @@ -6,6 +6,7 @@ import ( "iter" "path/filepath" "reflect" + "regexp" "runtime/debug" "runtime/pprof" "strings" @@ -31,6 +32,8 @@ type interpreter struct { limiter semaphore stringMethods, dictMethods, configMethods map[string]*pyFunc + + regexCache *cmap.Map[string, *regexp.Regexp] } // newInterpreter creates and returns a new interpreter instance. @@ -42,10 +45,11 @@ func newInterpreter(state *core.BuildState, p *Parser) *interpreter { locals: map[string]pyObject{}, } i := &interpreter{ - scope: s, - parser: p, - configs: map[*core.BuildState]*pyConfig{}, - limiter: make(semaphore, state.Config.Parse.NumThreads), + scope: s, + parser: p, + configs: map[*core.BuildState]*pyConfig{}, + limiter: make(semaphore, state.Config.Parse.NumThreads), + regexCache: cmap.New[string, *regexp.Regexp](cmap.SmallShardCount, cmap.XXHash), } // If we're creating an interpreter for a subrepo, we should share the subinclude cache. if p.interpreter != nil { diff --git a/test/builtins/BUILD b/test/builtins/BUILD new file mode 100644 index 000000000..ccb21ec42 --- /dev/null +++ b/test/builtins/BUILD @@ -0,0 +1,7 @@ +subinclude("//test/build_defs") + +please_repo_e2e_test( + name = "strings_test", + plz_command = "plz build", + repo = "strings", +) diff --git a/test/builtins/strings/.plzconfig b/test/builtins/strings/.plzconfig new file mode 100644 index 000000000..ea85e4f73 --- /dev/null +++ b/test/builtins/strings/.plzconfig @@ -0,0 +1,2 @@ +[parse] +BuildFileName = BUILD_FILE diff --git a/test/builtins/strings/BUILD_FILE b/test/builtins/strings/BUILD_FILE new file mode 100644 index 000000000..8d7275107 --- /dev/null +++ b/test/builtins/strings/BUILD_FILE @@ -0,0 +1,45 @@ + +assert ",".join(["a","b","c"]) == "a,b,c" + +assert "a,b,c".split(",") == ["a", "b", "c"] + +assert "abc".replace("bc", "ab") == "aab" + +pre, sep, post = "a,b,c".partition(",") +assert pre == "a" and sep == "," and post == "b,c" + +pre, sep, post = "a,b,c".rpartition(",") +assert pre == "a,b" and sep == "," and post == "c" + +assert "abc".startswith("ab") == True + +assert "abc".endswith("bc") == True + +assert "a{var1},b{var2},c{var3}".format(var1="a", var2=2, var3=[3]) == "aa,b2,c[3]" + +assert "abcba".lstrip("a") == "bcba" + +assert "abcba".rstrip("a") == "abcb" + +assert "abcba".strip("a") == "bcb" + +assert "abc".removeprefix("ab") == "c" + +assert "abc".removesuffix("bc") == "a" + +assert "abcba".find("b") == 1 + +assert "abcba".rfind("b") == 3 + +assert "abcba".count("b") == 2 + +assert "abc".upper() == "ABC" + +assert "ABC".lower() == "abc" + + +assert "abc".matches("a.c") + +assert "abbbbbbc".matches("a.*c") + +assert not "abc".matches("$b")