From 9fe339b4af9ee01324af84505666658d97a7f76a Mon Sep 17 00:00:00 2001
From: Patrick Boyd <Patrick_Boyd@Dell.com>
Date: Tue, 6 May 2025 13:04:06 -0500
Subject: [PATCH] Initial changes to optimize performance of govaluate

---
 EvaluableExpression.go |  26 +++++++-
 benchmarks_test.go     |  29 +++++----
 evaluationStage.go     |   2 +-
 lexerStream.go         |  38 +++++++----
 parsing.go             | 142 ++++++++++++++++++++++++++++-------------
 stagePlanner.go        |   1 +
 tokenStream.go         |  29 ++++++---
 7 files changed, 186 insertions(+), 81 deletions(-)

diff --git a/EvaluableExpression.go b/EvaluableExpression.go
index a5fe50d..3300a03 100644
--- a/EvaluableExpression.go
+++ b/EvaluableExpression.go
@@ -3,6 +3,7 @@ package govaluate
 import (
 	"errors"
 	"fmt"
+	"sync"
 )
 
 const isoDateFormat string = "2006-01-02T15:04:05.999999999Z0700"
@@ -137,6 +138,12 @@ func (this EvaluableExpression) Evaluate(parameters map[string]interface{}) (int
 	return this.Eval(MapParameters(parameters))
 }
 
+var sanitizedParamsPool = sync.Pool{
+	New: func() interface{} {
+		return &sanitizedParameters{}
+	},
+}
+
 /*
 	Runs the entire expression using the given [parameters].
 	e.g., If the expression contains a reference to the variable "foo", it will be taken from `parameters.Get("foo")`.
@@ -154,13 +161,21 @@ func (this EvaluableExpression) Eval(parameters Parameters) (interface{}, error)
 		return nil, nil
 	}
 
+	free := false
 	if parameters != nil {
-		parameters = &sanitizedParameters{parameters}
+		free = true
+		tmp := sanitizedParamsPool.Get().(*sanitizedParameters)
+		tmp.orig = parameters
+		parameters = tmp
 	} else {
 		parameters = DUMMY_PARAMETERS
 	}
 
-	return this.evaluateStage(this.evaluationStages, parameters)
+	ret, err := this.evaluateStage(this.evaluationStages, parameters)
+	if free {
+		sanitizedParamsPool.Put(parameters)
+	}
+	return ret, err
 }
 
 func (this EvaluableExpression) evaluateStage(stage *evaluationStage, parameters Parameters) (interface{}, error) {
@@ -274,3 +289,10 @@ func (this EvaluableExpression) Vars() []string {
 	}
 	return varlist
 }
+
+/*
+Removes the tokens from the EvaluableExpression. This will cause the Tokens() and Vars() functions to no longer operate, but will save memory.
+*/
+func (this *EvaluableExpression) CleanupTokens() {
+	this.tokens = this.tokens[:0]
+}
diff --git a/benchmarks_test.go b/benchmarks_test.go
index a7f99c8..98f62a6 100644
--- a/benchmarks_test.go
+++ b/benchmarks_test.go
@@ -8,7 +8,7 @@ import (
 Serves as a "water test" to give an idea of the general overhead of parsing
 */
 func BenchmarkSingleParse(bench *testing.B) {
-
+	bench.ReportAllocs()
 	for i := 0; i < bench.N; i++ {
 		_, _ = NewEvaluableExpression("1")
 	}
@@ -19,7 +19,7 @@ The most common use case, a single variable, modified slightly, compared to a co
 This is the "expected" use case of govaluate.
 */
 func BenchmarkSimpleParse(bench *testing.B) {
-
+	bench.ReportAllocs()
 	for i := 0; i < bench.N; i++ {
 		_, _ = NewEvaluableExpression("(requests_made * requests_succeeded / 100) >= 90")
 	}
@@ -29,6 +29,7 @@ func BenchmarkSimpleParse(bench *testing.B) {
 Benchmarks all syntax possibilities in one expression.
 */
 func BenchmarkFullParse(bench *testing.B) {
+	bench.ReportAllocs()
 	// represents all the major syntax possibilities.
 	expression := "2 > 1 &&" +
 		"'something' != 'nothing' || " +
@@ -45,7 +46,7 @@ func BenchmarkFullParse(bench *testing.B) {
 Benchmarks the bare-minimum evaluation time
 */
 func BenchmarkEvaluationSingle(bench *testing.B) {
-
+	bench.ReportAllocs()
 	expression, _ := NewEvaluableExpression("1")
 
 	bench.ResetTimer()
@@ -58,7 +59,7 @@ func BenchmarkEvaluationSingle(bench *testing.B) {
 Benchmarks evaluation times of literals (no variables, no modifiers)
 */
 func BenchmarkEvaluationNumericLiteral(bench *testing.B) {
-
+	bench.ReportAllocs()
 	expression, _ := NewEvaluableExpression("(2) > (1)")
 
 	bench.ResetTimer()
@@ -71,7 +72,7 @@ func BenchmarkEvaluationNumericLiteral(bench *testing.B) {
 Benchmarks evaluation times of literals with modifiers
 */
 func BenchmarkEvaluationLiteralModifiers(bench *testing.B) {
-
+	bench.ReportAllocs()
 	expression, _ := NewEvaluableExpression("(2) + (2) == (4)")
 
 	bench.ResetTimer()
@@ -81,7 +82,7 @@ func BenchmarkEvaluationLiteralModifiers(bench *testing.B) {
 }
 
 func BenchmarkEvaluationParameter(bench *testing.B) {
-
+	bench.ReportAllocs()
 	expression, _ := NewEvaluableExpression("requests_made")
 	parameters := map[string]interface{}{
 		"requests_made": 99.0,
@@ -97,7 +98,7 @@ func BenchmarkEvaluationParameter(bench *testing.B) {
 Benchmarks evaluation times of parameters
 */
 func BenchmarkEvaluationParameters(bench *testing.B) {
-
+	bench.ReportAllocs()
 	expression, _ := NewEvaluableExpression("requests_made > requests_succeeded")
 	parameters := map[string]interface{}{
 		"requests_made":      99.0,
@@ -114,7 +115,7 @@ func BenchmarkEvaluationParameters(bench *testing.B) {
 Benchmarks evaluation times of parameters + literals with modifiers
 */
 func BenchmarkEvaluationParametersModifiers(bench *testing.B) {
-
+	bench.ReportAllocs()
 	expression, _ := NewEvaluableExpression("(requests_made * requests_succeeded / 100) >= 90")
 	parameters := map[string]interface{}{
 		"requests_made":      99.0,
@@ -134,6 +135,7 @@ This is largely a canary benchmark to make sure that any syntax additions don't
 unnecessarily bloat the evaluation time.
 */
 func BenchmarkComplexExpression(bench *testing.B) {
+	bench.ReportAllocs()
 	expressionString := "2 > 1 &&" +
 		"'something' != 'nothing' || " +
 		"'2014-01-20' < 'Wed Jul  8 23:07:35 MDT 2015' && " +
@@ -160,6 +162,7 @@ and possible performance pitfalls. This test doesn't aim to be comprehensive aga
 it is primarily concerned with tracking how much longer it takes to compile a regex at evaluation-time than during parse-time.
 */
 func BenchmarkRegexExpression(bench *testing.B) {
+	bench.ReportAllocs()
 	expressionString := "(foo !~ bar) && (foobar =~ oba)"
 
 	expression, _ := NewEvaluableExpression(expressionString)
@@ -182,7 +185,7 @@ are actually being precompiled.
 Also demonstrates that (generally) compiling a regex at evaluation-time takes an order of magnitude more time than pre-compiling.
 */
 func BenchmarkConstantRegexExpression(bench *testing.B) {
-
+	bench.ReportAllocs()
 	expressionString := "(foo !~ '[bB]az') && (bar =~ '[bB]ar')"
 	expression, _ := NewEvaluableExpression(expressionString)
 
@@ -198,7 +201,7 @@ func BenchmarkConstantRegexExpression(bench *testing.B) {
 }
 
 func BenchmarkAccessors(bench *testing.B) {
-
+	bench.ReportAllocs()
 	expressionString := "foo.Int"
 	expression, _ := NewEvaluableExpression(expressionString)
 
@@ -209,7 +212,7 @@ func BenchmarkAccessors(bench *testing.B) {
 }
 
 func BenchmarkAccessorMethod(bench *testing.B) {
-
+	bench.ReportAllocs()
 	expressionString := "foo.Func()"
 	expression, _ := NewEvaluableExpression(expressionString)
 
@@ -220,7 +223,7 @@ func BenchmarkAccessorMethod(bench *testing.B) {
 }
 
 func BenchmarkAccessorMethodParams(bench *testing.B) {
-
+	bench.ReportAllocs()
 	expressionString := "foo.FuncArgStr('bonk')"
 	expression, _ := NewEvaluableExpression(expressionString)
 
@@ -231,7 +234,7 @@ func BenchmarkAccessorMethodParams(bench *testing.B) {
 }
 
 func BenchmarkNestedAccessors(bench *testing.B) {
-
+	bench.ReportAllocs()
 	expressionString := "foo.Nested.Funk"
 	expression, _ := NewEvaluableExpression(expressionString)
 
diff --git a/evaluationStage.go b/evaluationStage.go
index a22ade8..92e813a 100644
--- a/evaluationStage.go
+++ b/evaluationStage.go
@@ -184,7 +184,7 @@ func regexStage(left interface{}, right interface{}, parameters Parameters) (int
 		pattern = right
 	}
 
-	return pattern.Match([]byte(left.(string))), nil
+	return pattern.MatchString(left.(string)), nil
 }
 
 func notRegexStage(left interface{}, right interface{}, parameters Parameters) (interface{}, error) {
diff --git a/lexerStream.go b/lexerStream.go
index c6ed76e..b0be027 100644
--- a/lexerStream.go
+++ b/lexerStream.go
@@ -1,28 +1,35 @@
 package govaluate
 
+import "sync"
+
 type lexerStream struct {
-	source   []rune
-	position int
-	length   int
+	sourceString string
+	source       []rune
+	position     int
+	length       int
 }
 
-func newLexerStream(source string) *lexerStream {
-
-	var ret *lexerStream
-	var runes []rune
+var lexerStreamPool = sync.Pool{
+	New: func() interface{} {
+		return new(lexerStream)
+	},
+}
 
+func newLexerStream(source string) *lexerStream {
+	ret := lexerStreamPool.Get().(*lexerStream)
+	if ret.source == nil {
+		ret.source = make([]rune, 0, len(source))
+	}
 	for _, character := range source {
-		runes = append(runes, character)
+		ret.source = append(ret.source, character)
 	}
-
-	ret = new(lexerStream)
-	ret.source = runes
-	ret.length = len(runes)
+	ret.sourceString = source
+	ret.position = 0
+	ret.length = len(ret.source)
 	return ret
 }
 
 func (this *lexerStream) readCharacter() rune {
-
 	character := this.source[this.position]
 	this.position += 1
 	return character
@@ -35,3 +42,8 @@ func (this *lexerStream) rewind(amount int) {
 func (this lexerStream) canRead() bool {
 	return this.position < this.length
 }
+
+func (this *lexerStream) close() {
+	this.source = this.source[:0]
+	lexerStreamPool.Put(this)
+}
diff --git a/parsing.go b/parsing.go
index dae78f7..a37f259 100644
--- a/parsing.go
+++ b/parsing.go
@@ -7,13 +7,19 @@ import (
 	"regexp"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
 	"unicode"
+	"unicode/utf8"
 )
 
-func parseTokens(expression string, functions map[string]ExpressionFunction) ([]ExpressionToken, error) {
+var (
+	averageTokens = 1
+	samples       = make([]int, 0, 10)
+)
 
-	var ret []ExpressionToken
+func parseTokens(expression string, functions map[string]ExpressionFunction) ([]ExpressionToken, error) {
+	ret := make([]ExpressionToken, 0, averageTokens)
 	var token ExpressionToken
 	var stream *lexerStream
 	var state lexerState
@@ -43,6 +49,18 @@ func parseTokens(expression string, functions map[string]ExpressionFunction) ([]
 		// append this valid token
 		ret = append(ret, token)
 	}
+	stream.close()
+	if len(samples) == cap(samples) {
+		copy(samples, samples[1:])
+		samples[len(samples)-1] = len(ret)
+	} else {
+		samples = append(samples, len(ret))
+	}
+	total := 0
+	for _, val := range samples {
+		total += val
+	}
+	averageTokens = total / len(samples)
 
 	err = checkBalance(ret)
 	if err != nil {
@@ -140,30 +158,23 @@ func readToken(stream *lexerStream, state lexerState, functions map[string]Expre
 		if unicode.IsLetter(character) {
 
 			tokenString = readTokenUntilFalse(stream, isVariableName)
-
-			tokenValue = tokenString
-			kind = VARIABLE
-
-			// boolean?
-			if tokenValue == "true" {
-
+			switch tokenString {
+			case "true":
 				kind = BOOLEAN
 				tokenValue = true
-			} else {
-
-				if tokenValue == "false" {
-
-					kind = BOOLEAN
-					tokenValue = false
-				}
-			}
-
-			// textual operator?
-			if tokenValue == "in" || tokenValue == "IN" {
-
+			case "false":
+				kind = BOOLEAN
+				tokenValue = false
+			case "in":
+				fallthrough
+			case "IN":
 				// force lower case for consistency
 				tokenValue = "in"
 				kind = COMPARATOR
+			default:
+				// This causes an alloc, avoid it if we can
+				tokenValue = tokenString
+				kind = VARIABLE
 			}
 
 			// function?
@@ -284,37 +295,50 @@ func readTokenUntilFalse(stream *lexerStream, condition func(rune) bool) string
 	return ret
 }
 
+var tokenBufferPool = sync.Pool{
+	New: func() interface{} {
+		return &bytes.Buffer{}
+	},
+}
+
 /*
 Returns the string that was read until the given [condition] was false, or whitespace was broken.
 Returns false if the stream ended before whitespace was broken or condition was met.
 */
 func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace bool, allowEscaping bool, condition func(rune) bool) (string, bool) {
 
-	var tokenBuffer bytes.Buffer
+	tokenBuffer := tokenBufferPool.Get().(*bytes.Buffer)
 	var character rune
-	var conditioned bool
 
-	conditioned = false
+	startPosition := stream.position
+	reuseString := true
+	trimString := false
+	conditioned := false
 
 	for stream.canRead() {
 
 		character = stream.readCharacter()
+		if character > utf8.RuneSelf {
+			// International runes, we can't just grab from the string in this case
+			reuseString = false
+		}
 
 		// Use backslashes to escape anything
 		if allowEscaping && character == '\\' {
-
+			reuseString = false
 			character = stream.readCharacter()
 			tokenBuffer.WriteString(string(character))
 			continue
 		}
 
 		if unicode.IsSpace(character) {
-
 			if breakWhitespace && tokenBuffer.Len() > 0 {
 				conditioned = true
+				trimString = true
 				break
 			}
 			if !includeWhitespace {
+				reuseString = false
 				continue
 			}
 		}
@@ -328,7 +352,21 @@ func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace
 		}
 	}
 
-	return tokenBuffer.String(), conditioned
+	// This reduces allocations by just reusing parts of the original source string if applicable
+	if reuseString {
+		tokenBuffer.Reset()
+		tokenBufferPool.Put(tokenBuffer)
+		ret := stream.sourceString[startPosition:stream.position]
+		if trimString {
+			ret = ret[:len(ret)-1]
+		}
+		return ret, conditioned
+	}
+
+	ret := tokenBuffer.String()
+	tokenBuffer.Reset()
+	tokenBufferPool.Put(tokenBuffer)
+	return ret, conditioned
 }
 
 /*
@@ -395,6 +433,8 @@ func checkBalance(tokens []ExpressionToken) error {
 		}
 	}
 
+	stream.close()
+
 	if parens != 0 {
 		return errors.New("Unbalanced parenthesis")
 	}
@@ -448,6 +488,12 @@ func isNotClosingBracket(character rune) bool {
 	return character != ']'
 }
 
+type timeFormat struct {
+	format    string
+	minLength int
+	maxLength int
+}
+
 /*
 Attempts to parse the [candidate] as a Time.
 Tries a series of standardized date formats, returns the Time if one applies,
@@ -458,26 +504,34 @@ func tryParseTime(candidate string) (time.Time, bool) {
 	var ret time.Time
 	var found bool
 
-	timeFormats := [...]string{
-		time.ANSIC,
-		time.UnixDate,
-		time.RubyDate,
-		time.Kitchen,
-		time.RFC3339,
-		time.RFC3339Nano,
-		"2006-01-02",                         // RFC 3339
-		"2006-01-02 15:04",                   // RFC 3339 with minutes
-		"2006-01-02 15:04:05",                // RFC 3339 with seconds
-		"2006-01-02 15:04:05-07:00",          // RFC 3339 with seconds and timezone
-		"2006-01-02T15Z0700",                 // ISO8601 with hour
-		"2006-01-02T15:04Z0700",              // ISO8601 with minutes
-		"2006-01-02T15:04:05Z0700",           // ISO8601 with seconds
-		"2006-01-02T15:04:05.999999999Z0700", // ISO8601 with nanoseconds
+	if !strings.Contains(candidate, ":") && !strings.Contains(candidate, "-") {
+		// The blow formats either have a : or a - in them. If the string contains neither it cannot be a time string
+		return time.Now(), false
 	}
 
-	for _, format := range timeFormats {
+	timeFormats := [...]timeFormat{
+		{time.ANSIC, len(time.ANSIC) - 1, len(time.ANSIC)},
+		{time.UnixDate, len(time.UnixDate) - 1, len(time.ANSIC)},
+		{time.RubyDate, len(time.RubyDate), len(time.RubyDate)},
+		{time.Kitchen, len(time.Kitchen), len(time.Kitchen) + 1},
+		{time.RFC3339, len(time.RFC3339), len(time.RFC3339)},
+		{time.RFC3339Nano, len(time.RFC3339Nano), len(time.RFC3339Nano)},
+		{"2006-01-02", 10, 10},                         // RFC 3339
+		{"2006-01-02 15:04", 16, 16},                   // RFC 3339 with minutes
+		{"2006-01-02 15:04:05", 19, 19},                // RFC 3339 with seconds
+		{"2006-01-02 15:04:05-07:00", 25, 25},          // RFC 3339 with seconds and timezone
+		{"2006-01-02T15Z0700", 18, 18},                 // ISO8601 with hour
+		{"2006-01-02T15:04Z0700", 21, 21},              // ISO8601 with minutes
+		{"2006-01-02T15:04:05Z0700", 24, 24},           // ISO8601 with seconds
+		{"2006-01-02T15:04:05.999999999Z0700", 34, 34}, // ISO8601 with nanoseconds
+	}
 
-		ret, found = tryParseExactTime(candidate, format)
+	for _, format := range timeFormats {
+		// Avoid trying to parse formats it could not be to reduce allocation of time parse errors
+		if len(candidate) < format.minLength || len(candidate) > format.maxLength {
+			continue
+		}
+		ret, found = tryParseExactTime(candidate, format.format)
 		if found {
 			return ret, true
 		}
diff --git a/stagePlanner.go b/stagePlanner.go
index d18be4b..85bbe60 100644
--- a/stagePlanner.go
+++ b/stagePlanner.go
@@ -186,6 +186,7 @@ func planStages(tokens []ExpressionToken) (*evaluationStage, error) {
 	if err != nil {
 		return nil, err
 	}
+	stream.close()
 
 	// while we're now fully-planned, we now need to re-order same-precedence operators.
 	// this could probably be avoided with a different planning method
diff --git a/tokenStream.go b/tokenStream.go
index 7c7c40a..b530fd7 100644
--- a/tokenStream.go
+++ b/tokenStream.go
@@ -1,30 +1,43 @@
 package govaluate
 
+import "sync"
+
 type tokenStream struct {
 	tokens      []ExpressionToken
 	index       int
 	tokenLength int
 }
 
+var tokenStreamPool = sync.Pool{
+	New: func() interface{} {
+		return new(tokenStream)
+	},
+}
+
 func newTokenStream(tokens []ExpressionToken) *tokenStream {
-	ret := new(tokenStream)
+	ret := tokenStreamPool.Get().(*tokenStream)
 	ret.tokens = tokens
+	ret.index = 0
 	ret.tokenLength = len(tokens)
 	return ret
 }
 
-func (this *tokenStream) rewind() {
-	this.index -= 1
+func (t *tokenStream) rewind() {
+	t.index -= 1
 }
 
-func (this *tokenStream) next() ExpressionToken {
-	token := this.tokens[this.index]
+func (t *tokenStream) next() ExpressionToken {
+	token := t.tokens[t.index]
 
-	this.index += 1
+	t.index += 1
 	return token
 }
 
-func (this tokenStream) hasNext() bool {
+func (t tokenStream) hasNext() bool {
+
+	return t.index < t.tokenLength
+}
 
-	return this.index < this.tokenLength
+func (t *tokenStream) close() {
+	tokenStreamPool.Put(t)
 }