From 9fe339b4af9ee01324af84505666658d97a7f76a Mon Sep 17 00:00:00 2001 From: Patrick Boyd Date: Tue, 6 May 2025 13:04:06 -0500 Subject: [PATCH] Initial changes to optimize performance of govaluate --- EvaluableExpression.go | 26 +++++++- benchmarks_test.go | 29 +++++---- evaluationStage.go | 2 +- lexerStream.go | 38 +++++++---- parsing.go | 142 ++++++++++++++++++++++++++++------------- stagePlanner.go | 1 + tokenStream.go | 29 ++++++--- 7 files changed, 186 insertions(+), 81 deletions(-) diff --git a/EvaluableExpression.go b/EvaluableExpression.go index a5fe50d..3300a03 100644 --- a/EvaluableExpression.go +++ b/EvaluableExpression.go @@ -3,6 +3,7 @@ package govaluate import ( "errors" "fmt" + "sync" ) const isoDateFormat string = "2006-01-02T15:04:05.999999999Z0700" @@ -137,6 +138,12 @@ func (this EvaluableExpression) Evaluate(parameters map[string]interface{}) (int return this.Eval(MapParameters(parameters)) } +var sanitizedParamsPool = sync.Pool{ + New: func() interface{} { + return &sanitizedParameters{} + }, +} + /* Runs the entire expression using the given [parameters]. e.g., If the expression contains a reference to the variable "foo", it will be taken from `parameters.Get("foo")`. @@ -154,13 +161,21 @@ func (this EvaluableExpression) Eval(parameters Parameters) (interface{}, error) return nil, nil } + free := false if parameters != nil { - parameters = &sanitizedParameters{parameters} + free = true + tmp := sanitizedParamsPool.Get().(*sanitizedParameters) + tmp.orig = parameters + parameters = tmp } else { parameters = DUMMY_PARAMETERS } - return this.evaluateStage(this.evaluationStages, parameters) + ret, err := this.evaluateStage(this.evaluationStages, parameters) + if free { + sanitizedParamsPool.Put(parameters) + } + return ret, err } func (this EvaluableExpression) evaluateStage(stage *evaluationStage, parameters Parameters) (interface{}, error) { @@ -274,3 +289,10 @@ func (this EvaluableExpression) Vars() []string { } return varlist } + +/* +Removes the tokens from the EvaluableExpression. This will cause the Tokens() and Vars() functions to no longer operate, but will save memory. +*/ +func (this *EvaluableExpression) CleanupTokens() { + this.tokens = this.tokens[:0] +} diff --git a/benchmarks_test.go b/benchmarks_test.go index a7f99c8..98f62a6 100644 --- a/benchmarks_test.go +++ b/benchmarks_test.go @@ -8,7 +8,7 @@ import ( Serves as a "water test" to give an idea of the general overhead of parsing */ func BenchmarkSingleParse(bench *testing.B) { - + bench.ReportAllocs() for i := 0; i < bench.N; i++ { _, _ = NewEvaluableExpression("1") } @@ -19,7 +19,7 @@ The most common use case, a single variable, modified slightly, compared to a co This is the "expected" use case of govaluate. */ func BenchmarkSimpleParse(bench *testing.B) { - + bench.ReportAllocs() for i := 0; i < bench.N; i++ { _, _ = NewEvaluableExpression("(requests_made * requests_succeeded / 100) >= 90") } @@ -29,6 +29,7 @@ func BenchmarkSimpleParse(bench *testing.B) { Benchmarks all syntax possibilities in one expression. */ func BenchmarkFullParse(bench *testing.B) { + bench.ReportAllocs() // represents all the major syntax possibilities. expression := "2 > 1 &&" + "'something' != 'nothing' || " + @@ -45,7 +46,7 @@ func BenchmarkFullParse(bench *testing.B) { Benchmarks the bare-minimum evaluation time */ func BenchmarkEvaluationSingle(bench *testing.B) { - + bench.ReportAllocs() expression, _ := NewEvaluableExpression("1") bench.ResetTimer() @@ -58,7 +59,7 @@ func BenchmarkEvaluationSingle(bench *testing.B) { Benchmarks evaluation times of literals (no variables, no modifiers) */ func BenchmarkEvaluationNumericLiteral(bench *testing.B) { - + bench.ReportAllocs() expression, _ := NewEvaluableExpression("(2) > (1)") bench.ResetTimer() @@ -71,7 +72,7 @@ func BenchmarkEvaluationNumericLiteral(bench *testing.B) { Benchmarks evaluation times of literals with modifiers */ func BenchmarkEvaluationLiteralModifiers(bench *testing.B) { - + bench.ReportAllocs() expression, _ := NewEvaluableExpression("(2) + (2) == (4)") bench.ResetTimer() @@ -81,7 +82,7 @@ func BenchmarkEvaluationLiteralModifiers(bench *testing.B) { } func BenchmarkEvaluationParameter(bench *testing.B) { - + bench.ReportAllocs() expression, _ := NewEvaluableExpression("requests_made") parameters := map[string]interface{}{ "requests_made": 99.0, @@ -97,7 +98,7 @@ func BenchmarkEvaluationParameter(bench *testing.B) { Benchmarks evaluation times of parameters */ func BenchmarkEvaluationParameters(bench *testing.B) { - + bench.ReportAllocs() expression, _ := NewEvaluableExpression("requests_made > requests_succeeded") parameters := map[string]interface{}{ "requests_made": 99.0, @@ -114,7 +115,7 @@ func BenchmarkEvaluationParameters(bench *testing.B) { Benchmarks evaluation times of parameters + literals with modifiers */ func BenchmarkEvaluationParametersModifiers(bench *testing.B) { - + bench.ReportAllocs() expression, _ := NewEvaluableExpression("(requests_made * requests_succeeded / 100) >= 90") parameters := map[string]interface{}{ "requests_made": 99.0, @@ -134,6 +135,7 @@ This is largely a canary benchmark to make sure that any syntax additions don't unnecessarily bloat the evaluation time. */ func BenchmarkComplexExpression(bench *testing.B) { + bench.ReportAllocs() expressionString := "2 > 1 &&" + "'something' != 'nothing' || " + "'2014-01-20' < 'Wed Jul 8 23:07:35 MDT 2015' && " + @@ -160,6 +162,7 @@ and possible performance pitfalls. This test doesn't aim to be comprehensive aga it is primarily concerned with tracking how much longer it takes to compile a regex at evaluation-time than during parse-time. */ func BenchmarkRegexExpression(bench *testing.B) { + bench.ReportAllocs() expressionString := "(foo !~ bar) && (foobar =~ oba)" expression, _ := NewEvaluableExpression(expressionString) @@ -182,7 +185,7 @@ are actually being precompiled. Also demonstrates that (generally) compiling a regex at evaluation-time takes an order of magnitude more time than pre-compiling. */ func BenchmarkConstantRegexExpression(bench *testing.B) { - + bench.ReportAllocs() expressionString := "(foo !~ '[bB]az') && (bar =~ '[bB]ar')" expression, _ := NewEvaluableExpression(expressionString) @@ -198,7 +201,7 @@ func BenchmarkConstantRegexExpression(bench *testing.B) { } func BenchmarkAccessors(bench *testing.B) { - + bench.ReportAllocs() expressionString := "foo.Int" expression, _ := NewEvaluableExpression(expressionString) @@ -209,7 +212,7 @@ func BenchmarkAccessors(bench *testing.B) { } func BenchmarkAccessorMethod(bench *testing.B) { - + bench.ReportAllocs() expressionString := "foo.Func()" expression, _ := NewEvaluableExpression(expressionString) @@ -220,7 +223,7 @@ func BenchmarkAccessorMethod(bench *testing.B) { } func BenchmarkAccessorMethodParams(bench *testing.B) { - + bench.ReportAllocs() expressionString := "foo.FuncArgStr('bonk')" expression, _ := NewEvaluableExpression(expressionString) @@ -231,7 +234,7 @@ func BenchmarkAccessorMethodParams(bench *testing.B) { } func BenchmarkNestedAccessors(bench *testing.B) { - + bench.ReportAllocs() expressionString := "foo.Nested.Funk" expression, _ := NewEvaluableExpression(expressionString) diff --git a/evaluationStage.go b/evaluationStage.go index a22ade8..92e813a 100644 --- a/evaluationStage.go +++ b/evaluationStage.go @@ -184,7 +184,7 @@ func regexStage(left interface{}, right interface{}, parameters Parameters) (int pattern = right } - return pattern.Match([]byte(left.(string))), nil + return pattern.MatchString(left.(string)), nil } func notRegexStage(left interface{}, right interface{}, parameters Parameters) (interface{}, error) { diff --git a/lexerStream.go b/lexerStream.go index c6ed76e..b0be027 100644 --- a/lexerStream.go +++ b/lexerStream.go @@ -1,28 +1,35 @@ package govaluate +import "sync" + type lexerStream struct { - source []rune - position int - length int + sourceString string + source []rune + position int + length int } -func newLexerStream(source string) *lexerStream { - - var ret *lexerStream - var runes []rune +var lexerStreamPool = sync.Pool{ + New: func() interface{} { + return new(lexerStream) + }, +} +func newLexerStream(source string) *lexerStream { + ret := lexerStreamPool.Get().(*lexerStream) + if ret.source == nil { + ret.source = make([]rune, 0, len(source)) + } for _, character := range source { - runes = append(runes, character) + ret.source = append(ret.source, character) } - - ret = new(lexerStream) - ret.source = runes - ret.length = len(runes) + ret.sourceString = source + ret.position = 0 + ret.length = len(ret.source) return ret } func (this *lexerStream) readCharacter() rune { - character := this.source[this.position] this.position += 1 return character @@ -35,3 +42,8 @@ func (this *lexerStream) rewind(amount int) { func (this lexerStream) canRead() bool { return this.position < this.length } + +func (this *lexerStream) close() { + this.source = this.source[:0] + lexerStreamPool.Put(this) +} diff --git a/parsing.go b/parsing.go index dae78f7..a37f259 100644 --- a/parsing.go +++ b/parsing.go @@ -7,13 +7,19 @@ import ( "regexp" "strconv" "strings" + "sync" "time" "unicode" + "unicode/utf8" ) -func parseTokens(expression string, functions map[string]ExpressionFunction) ([]ExpressionToken, error) { +var ( + averageTokens = 1 + samples = make([]int, 0, 10) +) - var ret []ExpressionToken +func parseTokens(expression string, functions map[string]ExpressionFunction) ([]ExpressionToken, error) { + ret := make([]ExpressionToken, 0, averageTokens) var token ExpressionToken var stream *lexerStream var state lexerState @@ -43,6 +49,18 @@ func parseTokens(expression string, functions map[string]ExpressionFunction) ([] // append this valid token ret = append(ret, token) } + stream.close() + if len(samples) == cap(samples) { + copy(samples, samples[1:]) + samples[len(samples)-1] = len(ret) + } else { + samples = append(samples, len(ret)) + } + total := 0 + for _, val := range samples { + total += val + } + averageTokens = total / len(samples) err = checkBalance(ret) if err != nil { @@ -140,30 +158,23 @@ func readToken(stream *lexerStream, state lexerState, functions map[string]Expre if unicode.IsLetter(character) { tokenString = readTokenUntilFalse(stream, isVariableName) - - tokenValue = tokenString - kind = VARIABLE - - // boolean? - if tokenValue == "true" { - + switch tokenString { + case "true": kind = BOOLEAN tokenValue = true - } else { - - if tokenValue == "false" { - - kind = BOOLEAN - tokenValue = false - } - } - - // textual operator? - if tokenValue == "in" || tokenValue == "IN" { - + case "false": + kind = BOOLEAN + tokenValue = false + case "in": + fallthrough + case "IN": // force lower case for consistency tokenValue = "in" kind = COMPARATOR + default: + // This causes an alloc, avoid it if we can + tokenValue = tokenString + kind = VARIABLE } // function? @@ -284,37 +295,50 @@ func readTokenUntilFalse(stream *lexerStream, condition func(rune) bool) string return ret } +var tokenBufferPool = sync.Pool{ + New: func() interface{} { + return &bytes.Buffer{} + }, +} + /* Returns the string that was read until the given [condition] was false, or whitespace was broken. Returns false if the stream ended before whitespace was broken or condition was met. */ func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace bool, allowEscaping bool, condition func(rune) bool) (string, bool) { - var tokenBuffer bytes.Buffer + tokenBuffer := tokenBufferPool.Get().(*bytes.Buffer) var character rune - var conditioned bool - conditioned = false + startPosition := stream.position + reuseString := true + trimString := false + conditioned := false for stream.canRead() { character = stream.readCharacter() + if character > utf8.RuneSelf { + // International runes, we can't just grab from the string in this case + reuseString = false + } // Use backslashes to escape anything if allowEscaping && character == '\\' { - + reuseString = false character = stream.readCharacter() tokenBuffer.WriteString(string(character)) continue } if unicode.IsSpace(character) { - if breakWhitespace && tokenBuffer.Len() > 0 { conditioned = true + trimString = true break } if !includeWhitespace { + reuseString = false continue } } @@ -328,7 +352,21 @@ func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace } } - return tokenBuffer.String(), conditioned + // This reduces allocations by just reusing parts of the original source string if applicable + if reuseString { + tokenBuffer.Reset() + tokenBufferPool.Put(tokenBuffer) + ret := stream.sourceString[startPosition:stream.position] + if trimString { + ret = ret[:len(ret)-1] + } + return ret, conditioned + } + + ret := tokenBuffer.String() + tokenBuffer.Reset() + tokenBufferPool.Put(tokenBuffer) + return ret, conditioned } /* @@ -395,6 +433,8 @@ func checkBalance(tokens []ExpressionToken) error { } } + stream.close() + if parens != 0 { return errors.New("Unbalanced parenthesis") } @@ -448,6 +488,12 @@ func isNotClosingBracket(character rune) bool { return character != ']' } +type timeFormat struct { + format string + minLength int + maxLength int +} + /* Attempts to parse the [candidate] as a Time. Tries a series of standardized date formats, returns the Time if one applies, @@ -458,26 +504,34 @@ func tryParseTime(candidate string) (time.Time, bool) { var ret time.Time var found bool - timeFormats := [...]string{ - time.ANSIC, - time.UnixDate, - time.RubyDate, - time.Kitchen, - time.RFC3339, - time.RFC3339Nano, - "2006-01-02", // RFC 3339 - "2006-01-02 15:04", // RFC 3339 with minutes - "2006-01-02 15:04:05", // RFC 3339 with seconds - "2006-01-02 15:04:05-07:00", // RFC 3339 with seconds and timezone - "2006-01-02T15Z0700", // ISO8601 with hour - "2006-01-02T15:04Z0700", // ISO8601 with minutes - "2006-01-02T15:04:05Z0700", // ISO8601 with seconds - "2006-01-02T15:04:05.999999999Z0700", // ISO8601 with nanoseconds + if !strings.Contains(candidate, ":") && !strings.Contains(candidate, "-") { + // The blow formats either have a : or a - in them. If the string contains neither it cannot be a time string + return time.Now(), false } - for _, format := range timeFormats { + timeFormats := [...]timeFormat{ + {time.ANSIC, len(time.ANSIC) - 1, len(time.ANSIC)}, + {time.UnixDate, len(time.UnixDate) - 1, len(time.ANSIC)}, + {time.RubyDate, len(time.RubyDate), len(time.RubyDate)}, + {time.Kitchen, len(time.Kitchen), len(time.Kitchen) + 1}, + {time.RFC3339, len(time.RFC3339), len(time.RFC3339)}, + {time.RFC3339Nano, len(time.RFC3339Nano), len(time.RFC3339Nano)}, + {"2006-01-02", 10, 10}, // RFC 3339 + {"2006-01-02 15:04", 16, 16}, // RFC 3339 with minutes + {"2006-01-02 15:04:05", 19, 19}, // RFC 3339 with seconds + {"2006-01-02 15:04:05-07:00", 25, 25}, // RFC 3339 with seconds and timezone + {"2006-01-02T15Z0700", 18, 18}, // ISO8601 with hour + {"2006-01-02T15:04Z0700", 21, 21}, // ISO8601 with minutes + {"2006-01-02T15:04:05Z0700", 24, 24}, // ISO8601 with seconds + {"2006-01-02T15:04:05.999999999Z0700", 34, 34}, // ISO8601 with nanoseconds + } - ret, found = tryParseExactTime(candidate, format) + for _, format := range timeFormats { + // Avoid trying to parse formats it could not be to reduce allocation of time parse errors + if len(candidate) < format.minLength || len(candidate) > format.maxLength { + continue + } + ret, found = tryParseExactTime(candidate, format.format) if found { return ret, true } diff --git a/stagePlanner.go b/stagePlanner.go index d18be4b..85bbe60 100644 --- a/stagePlanner.go +++ b/stagePlanner.go @@ -186,6 +186,7 @@ func planStages(tokens []ExpressionToken) (*evaluationStage, error) { if err != nil { return nil, err } + stream.close() // while we're now fully-planned, we now need to re-order same-precedence operators. // this could probably be avoided with a different planning method diff --git a/tokenStream.go b/tokenStream.go index 7c7c40a..b530fd7 100644 --- a/tokenStream.go +++ b/tokenStream.go @@ -1,30 +1,43 @@ package govaluate +import "sync" + type tokenStream struct { tokens []ExpressionToken index int tokenLength int } +var tokenStreamPool = sync.Pool{ + New: func() interface{} { + return new(tokenStream) + }, +} + func newTokenStream(tokens []ExpressionToken) *tokenStream { - ret := new(tokenStream) + ret := tokenStreamPool.Get().(*tokenStream) ret.tokens = tokens + ret.index = 0 ret.tokenLength = len(tokens) return ret } -func (this *tokenStream) rewind() { - this.index -= 1 +func (t *tokenStream) rewind() { + t.index -= 1 } -func (this *tokenStream) next() ExpressionToken { - token := this.tokens[this.index] +func (t *tokenStream) next() ExpressionToken { + token := t.tokens[t.index] - this.index += 1 + t.index += 1 return token } -func (this tokenStream) hasNext() bool { +func (t tokenStream) hasNext() bool { + + return t.index < t.tokenLength +} - return this.index < this.tokenLength +func (t *tokenStream) close() { + tokenStreamPool.Put(t) }