From b2e16a698e7fe2c736a87f640017db9691098203 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 6 Feb 2026 04:53:21 +0000 Subject: [PATCH] [BugFix] Fix the bug when boolean comparison condition is simplifed to field (#5071) * Fix the bug when boolean comparison condition is simplifed to field Signed-off-by: Songkan Tang * Update tests and cover more cases Signed-off-by: Songkan Tang * Correct the logic of not boolean comparison Signed-off-by: Songkan Tang * Add missing IS_FALSE RexNode translation Signed-off-by: Songkan Tang * Remove unnecessary boolean expression conversion Signed-off-by: Songkan Tang * Fix spotless check Signed-off-by: Songkan Tang * Refactor PredicateAnalyzer logic a bit Signed-off-by: Songkan Tang * Add more strict not expression match for field Signed-off-by: Songkan Tang * Fix spotless check and flaky test Signed-off-by: Songkan Tang * Cover more cases for IS_FALSE, IS_NOT_TRUE, IS_NOT_FALSE Signed-off-by: Songkan Tang * Complement the truth tests for expressions Signed-off-by: Songkan Tang * Fix logic Signed-off-by: Songkan Tang * Fix spotless check Signed-off-by: Songkan Tang * Add additional boolean filter only pushdown explain test cases Signed-off-by: Songkan Tang --------- Signed-off-by: Songkan Tang (cherry picked from commit 624f5e6c09556a1c773d17d3228c02388b22721f) Signed-off-by: github-actions[bot] --- .../sql/calcite/CalciteRexNodeVisitor.java | 83 ++++++++++- .../sql/calcite/remote/CalciteExplainIT.java | 114 +++++++++++++++ .../explain_filter_boolean_only_false.yaml | 8 ++ .../explain_filter_boolean_only_not_true.yaml | 8 ++ .../explain_filter_boolean_only_true.yaml | 8 ++ ...lain_filter_query_string_with_boolean.yaml | 9 ++ ...ilter_query_string_with_boolean_false.yaml | 9 ++ ...er_query_string_with_boolean_not_true.yaml | 9 ++ .../rest-api-spec/test/issues/4866.yml | 26 +--- .../rest-api-spec/test/issues/5054.yml | 101 ++++++++++++++ .../opensearch/request/PredicateAnalyzer.java | 132 ++++++++++++++++-- .../request/AggregateAnalyzerTest.java | 6 +- .../request/PredicateAnalyzerTest.java | 68 ++++++++- 13 files changed, 544 insertions(+), 37 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_false.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_not_true.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_true.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_not_true.yaml create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5054.yml diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java index c9af2261a0c..92239010c2e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java @@ -28,9 +28,12 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLambdaRef; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlIntervalQualifier; +import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.type.ArraySqlType; import org.apache.calcite.sql.type.SqlTypeName; @@ -191,8 +194,15 @@ public RexNode visitXor(Xor node, CalcitePlanContext context) { @Override public RexNode visitNot(Not node, CalcitePlanContext context) { - final RexNode expr = analyze(node.getExpression(), context); - return context.relBuilder.not(expr); + // Special handling for NOT(boolean_field = true/false) - see boolean comparison helpers below + UnresolvedExpression inner = node.getExpression(); + if (inner instanceof Compare compare && "=".equals(compare.getOperator())) { + RexNode result = tryMakeBooleanNotEquals(compare, context); + if (result != null) { + return result; + } + } + return context.relBuilder.not(analyze(node.getExpression(), context)); } @Override @@ -222,7 +232,15 @@ public RexNode visitIn(In node, CalcitePlanContext context) { public RexNode visitCompare(Compare node, CalcitePlanContext context) { RexNode left = analyze(node.getLeft(), context); RexNode right = analyze(node.getRight(), context); - return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, node.getOperator(), left, right); + String op = node.getOperator(); + // Handle boolean_field != literal -> IS_NOT_TRUE/IS_NOT_FALSE + if ("!=".equals(op) || "<>".equals(op)) { + RexNode result = tryMakeBooleanNotEquals(left, right, context); + if (result != null) { + return result; + } + } + return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, op, left, right); } @Override @@ -252,6 +270,65 @@ public RexNode visitEqualTo(EqualTo node, CalcitePlanContext context) { return context.rexBuilder.equals(left, right); } + // ==================== Boolean NOT comparison helpers ==================== + // Calcite's RexSimplify transforms: + // - "field = true" -> "field" (handled by PredicateAnalyzer detecting boolean field) + // - "field = false" -> "NOT(field)" (handled by PredicateAnalyzer.prefix()) + // - "NOT(field = true)" -> "NOT(field)" -> would generate term{false}, have conflicted semantics + // - "NOT(field = false)" -> "NOT(NOT(field))" -> "field" -> would generate term{true}, have + // conflicted semantics + // We intercept NOT(field = true/false) at AST level before Calcite optimization: + // - "NOT(field = true)" -> IS_NOT_TRUE(field): matches false, null, missing + // - "NOT(field = false)" -> IS_NOT_FALSE(field): matches true, null, missing + + /** + * Try to convert boolean_field != literal or NOT(boolean_field = literal) to + * IS_NOT_TRUE/IS_NOT_FALSE. This preserves correct null-handling semantics. + */ + private RexNode tryMakeBooleanNotEquals(RexNode left, RexNode right, CalcitePlanContext context) { + BooleanFieldComparison cmp = extractBooleanFieldComparison(left, right); + if (cmp == null) { + return null; + } + SqlOperator op = + Boolean.FALSE.equals(cmp.literalValue) + ? SqlStdOperatorTable.IS_NOT_FALSE + : SqlStdOperatorTable.IS_NOT_TRUE; + return context.rexBuilder.makeCall(op, cmp.field); + } + + /** Overload for NOT(Compare) AST pattern. */ + private RexNode tryMakeBooleanNotEquals(Compare compare, CalcitePlanContext context) { + return tryMakeBooleanNotEquals( + analyze(compare.getLeft(), context), analyze(compare.getRight(), context), context); + } + + /** Represents a comparison between a boolean field and a boolean literal. */ + private record BooleanFieldComparison(RexNode field, Boolean literalValue) {} + + /** + * Extract boolean field and literal value from a comparison, normalizing operand order. Returns + * null if the comparison is not between a boolean field and a boolean literal. + */ + private BooleanFieldComparison extractBooleanFieldComparison(RexNode left, RexNode right) { + if (isBooleanField(left) && isBooleanLiteral(right)) { + return new BooleanFieldComparison(left, ((RexLiteral) right).getValueAs(Boolean.class)); + } + if (isBooleanField(right) && isBooleanLiteral(left)) { + return new BooleanFieldComparison(right, ((RexLiteral) left).getValueAs(Boolean.class)); + } + return null; + } + + private boolean isBooleanField(RexNode node) { + // Only match actual field references, not arbitrary boolean expressions like CASE + return node instanceof RexInputRef && node.getType().getSqlTypeName() == SqlTypeName.BOOLEAN; + } + + private boolean isBooleanLiteral(RexNode node) { + return node instanceof RexLiteral && node.getType().getSqlTypeName() == SqlTypeName.BOOLEAN; + } + /** Resolve qualified name. Note, the name should be case-sensitive. */ @Override public RexNode visitQualifiedName(QualifiedName node, CalcitePlanContext context) { diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 9520eecce74..5d99c13ded2 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2566,4 +2566,118 @@ private String explainQueryWithFetchSizeYaml(String query, int fetchSize) throws Assert.assertEquals(200, response.getStatusLine().getStatusCode()); return getResponseBody(response, true); } + + // Only for Calcite - Test for issue #5054: query_string combined with boolean comparison + // This mimics the query pattern: "source=test url=http | where is_internal=true" + // where query_string search is combined with boolean field comparison. + @Test + public void testFilterQueryStringWithBooleanFieldPushDown() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // Verifies that query_string combined with boolean field comparison produces pure DSL query + // without embedded script. The boolean comparison should be pushed down as a term query. + String query = + StringUtils.format( + "source=%s firstname=Amber | where male = true | fields firstname", TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testFilterBooleanFieldWithTRUE() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // Test boolean literal with uppercase TRUE + String query = + StringUtils.format( + "source=%s firstname=Amber | where male = TRUE | fields firstname", TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testFilterBooleanFieldWithStringLiteral() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // Test boolean field with string literal 'TRUE' - Calcite converts to boolean true + // and generates same term query as boolean literal + String query = + StringUtils.format( + "source=%s firstname=Amber | where male = 'TRUE' | fields firstname", TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testFilterBooleanFieldFalse() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // male = false is converted to IS_FALSE(male) which generates term query {value: false}. + // This only matches documents where male is explicitly false (not null or missing). + String query = + StringUtils.format( + "source=%s firstname=Amber | where male = false | fields firstname", TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_false.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testFilterBooleanFieldNotTrue() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // NOT(male = true) generates IS_NOT_TRUE which produces mustNot(term query {value: true}) + // This matches documents where male is false, null, or missing + String query = + StringUtils.format( + "source=%s firstname=Amber | where NOT male = true | fields firstname", + TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_not_true.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testFilterBooleanFieldNotEquals() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // male != true generates IS_NOT_TRUE which produces mustNot(term query {value: true}) + // This matches documents where male is false, null, or missing + String query = + StringUtils.format( + "source=%s firstname=Amber | where male != true | fields firstname", TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_not_true.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testFilterBooleanFieldOnlyTrue() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // Test single boolean filter without query_string + String query = + StringUtils.format("source=%s | where male = true | fields firstname", TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_boolean_only_true.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testFilterBooleanFieldOnlyFalse() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // Test single boolean filter with false value without query_string + String query = + StringUtils.format("source=%s | where male = false | fields firstname", TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_boolean_only_false.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testFilterBooleanFieldOnlyNotTrue() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + // Test single NOT boolean filter without query_string + String query = + StringUtils.format("source=%s | where NOT male = true | fields firstname", TEST_INDEX_BANK); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_filter_boolean_only_not_true.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_false.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_false.yaml new file mode 100644 index 00000000000..a3ee761cba9 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_false.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(firstname=[$1]) + LogicalFilter(condition=[NOT($12)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->NOT($1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"male":{"value":false,"boost":1.0}}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_not_true.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_not_true.yaml new file mode 100644 index 00000000000..faf38c14bc5 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_not_true.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(firstname=[$1]) + LogicalFilter(condition=[IS NOT TRUE($12)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->IS NOT TRUE($1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_true.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_true.yaml new file mode 100644 index 00000000000..3a698fd5cb8 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_true.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(firstname=[$1]) + LogicalFilter(condition=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->$1, PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"male":{"value":true,"boost":1.0}}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml new file mode 100644 index 00000000000..78ae3956b4a --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(firstname=[$1]) + LogicalFilter(condition=[$12]) + LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), $1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml new file mode 100644 index 00000000000..422c7769140 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(firstname=[$1]) + LogicalFilter(condition=[NOT($12)]) + LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), NOT($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":false,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_not_true.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_not_true.yaml new file mode 100644 index 00000000000..cd51bb8a61a --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_not_true.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(firstname=[$1]) + LogicalFilter(condition=[IS NOT TRUE($12)]) + LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), IS NOT TRUE($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4866.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4866.yml index e2ae4c86803..9bbd0db4e91 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4866.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4866.yml @@ -42,24 +42,10 @@ teardown: ppl: body: query: 'source=hdfs_logs | patterns content method=brain mode=aggregation max_sample_count=2 variable_count_threshold=3' - - match: {"total": 2} - - match: {"schema": [{"name": "patterns_field", "type": "string"}, {"name": "pattern_count", "type": "bigint"}, {"name": "sample_logs", "type": "array"}]} - - match: {"datarows": [ - [ - "PacketResponder failed for blk_<*>", - 2, - [ - "PacketResponder failed for blk_6996194389878584395", - "PacketResponder failed for blk_-1547954353065580372" - ] - ], - [ - "BLOCK* NameSystem.addStoredBlock: blockMap updated: <*IP*> is added to blk_<*> size <*>", - 2, - [ - "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.31.85:50010 is added to blk_-7017553867379051457 size 67108864", - "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.107.19:50010 is added to blk_-3249711809227781266 size 67108864" - ] - ] - ]} + - match: { total: 2 } + - match: { schema: [{"name": "patterns_field", "type": "string"}, {"name": "pattern_count", "type": "bigint"}, {"name": "sample_logs", "type": "array"}] } + - length: { datarows: 2 } + # Verify each pattern has exactly 2 sample logs (max_sample_count=2) + - length: { datarows.0.2: 2 } + - length: { datarows.1.2: 2 } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5054.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5054.yml new file mode 100644 index 00000000000..f39a5cd6711 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5054.yml @@ -0,0 +1,101 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Fix boolean field comparison pushdown": + - skip: + features: + - headers + - allowed_warnings + - do: + indices.create: + index: test + body: + mappings: + properties: + "@timestamp": + type: date + url: + type: text + is_internal: + type: boolean + + + - do: + bulk: + index: test + refresh: true + body: + - '{"index": {}}' + - '{ "@timestamp":"2025-08-19T04:51:24Z", "url": "http" }' + - '{"index": {}}' + - '{ "@timestamp":"2025-08-19T04:51:24Z", "url": "http", "is_internal": true }' + - '{"index": {}}' + - '{ "@timestamp":"2025-08-19T04:51:24Z", "url": "http", "is_internal": false }' + + # Test is_internal=true: should return only documents where is_internal is explicitly true + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test url=http | where is_internal=true + + - match: { total: 1 } + - length: { datarows: 1 } + + # Test is_internal=false: should return only documents where is_internal is explicitly false + # NOT documents where is_internal is null or missing + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test url=http | where is_internal=false + + - match: { total: 1 } + - length: { datarows: 1 } + + # Test NOT(is_internal=false): should return documents where is_internal is true OR null/missing + # This is the negation of "is_internal=false", so it should return 2 documents + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test url=http | where not is_internal=false + + - match: { total: 2 } + - length: { datarows: 2 } + + # Test NOT(is_internal=true): should return documents where is_internal is false OR null/missing + # This is the negation of "is_internal=true", so it should return 2 documents + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test url=http | where not is_internal=true + + - match: { total: 2 } + - length: { datarows: 2 } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 221dca4aa60..25123b9e23f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -57,6 +57,7 @@ import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.function.Function; import java.util.function.Predicate; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -224,7 +225,13 @@ public static QueryExpression analyzeExpression( requireNonNull(expression, "expression"); try { // visits expression tree - QueryExpression queryExpression = (QueryExpression) expression.accept(visitor); + Expression result = expression.accept(visitor); + // When a boolean field is used directly as a filter condition (e.g., `where male` after + // Calcite simplifies `where male = true`), convert NamedFieldExpression to a term query. + if (result instanceof NamedFieldExpression namedField && namedField.isBooleanType()) { + return QueryExpression.create(namedField).isTrue(); + } + QueryExpression queryExpression = (QueryExpression) result; return queryExpression; } catch (Throwable e) { if (e instanceof UnsupportedScriptException) { @@ -305,6 +312,9 @@ private static boolean supportedRexCall(RexCall call) { case POSTFIX: switch (call.getKind()) { case IS_TRUE: + case IS_FALSE: + case IS_NOT_TRUE: + case IS_NOT_FALSE: case IS_NOT_NULL: case IS_NULL: return true; @@ -352,7 +362,6 @@ static RexUnknownAs getNullAsForSearch(RexCall search) { @Override public Expression visitCall(RexCall call) { - SqlSyntax syntax = call.getOperator().getSyntax(); if (!supportedRexCall(call)) { String message = format(Locale.ROOT, "Unsupported call: [%s]", call); @@ -572,13 +581,24 @@ private QueryExpression prefix(RexCall call) { throw new PredicateAnalyzerException(message); } - QueryExpression expr = (QueryExpression) call.getOperands().get(0).accept(this); + Expression operandExpr = call.getOperands().get(0).accept(this); + // Handle NOT(boolean_field) - Calcite simplifies "field = false" to NOT($field). + // In PPL semantics, "field = false" should only match documents where the field is + // explicitly false (not null or missing). This is achieved via term query {value: false}. + // Note: This differs from SQL semantics where NOT(field) would match null/missing values. + if (operandExpr instanceof NamedFieldExpression namedField && namedField.isBooleanType()) { + return QueryExpression.create(namedField).isFalse(); + } + QueryExpression expr = (QueryExpression) operandExpr; return expr.not(); } private QueryExpression postfix(RexCall call) { checkArgument( call.getKind() == SqlKind.IS_TRUE + || call.getKind() == SqlKind.IS_FALSE + || call.getKind() == SqlKind.IS_NOT_TRUE + || call.getKind() == SqlKind.IS_NOT_FALSE || call.getKind() == SqlKind.IS_NULL || call.getKind() == SqlKind.IS_NOT_NULL); if (call.getOperands().size() != 1) { @@ -586,11 +606,32 @@ private QueryExpression postfix(RexCall call) { throw new PredicateAnalyzerException(message); } - if (call.getKind() == SqlKind.IS_TRUE) { - Expression qe = call.getOperands().get(0).accept(this); - return ((QueryExpression) qe).isTrue(); + // Handle boolean field operators: IS_TRUE, IS_FALSE, IS_NOT_TRUE, IS_NOT_FALSE + // These generate term queries for exact boolean value matching or mustNot queries + // for negated matching (which includes null/missing documents). + Function booleanOp = + switch (call.getKind()) { + case IS_TRUE -> QueryExpression::isTrue; + case IS_FALSE -> QueryExpression::isFalse; + case IS_NOT_TRUE -> QueryExpression::isNotTrue; + case IS_NOT_FALSE -> QueryExpression::isNotFalse; + default -> null; + }; + + if (booleanOp != null) { + Expression operand = call.getOperands().get(0).accept(this); + if (operand instanceof NamedFieldExpression namedField && namedField.isBooleanType()) { + return booleanOp.apply(QueryExpression.create(namedField)); + } + // Boolean operators on a predicate (already evaluated QueryExpression) are allowed + if (operand instanceof QueryExpression qe) { + return booleanOp.apply(qe); + } + throw new PredicateAnalyzerException( + call.getKind() + " can only be applied to boolean fields or predicates"); } + // Handle IS_NULL / IS_NOT_NULL // OpenSearch DSL does not handle IS_NULL / IS_NOT_NULL on nested fields correctly checkForNestedFieldOperands(call); @@ -810,8 +851,12 @@ private QueryExpression andOr(RexCall call) { public Expression tryAnalyzeOperand(RexNode node) { try { Expression expr = node.accept(this); - if (expr instanceof NamedFieldExpression) { - return expr; + // When a boolean field is used directly as a filter condition (e.g., `where male` after + // Calcite simplifies `where male = true`), convert NamedFieldExpression to a term query. + if (expr instanceof NamedFieldExpression namedField && namedField.isBooleanType()) { + QueryExpression qe = QueryExpression.create(namedField).isTrue(); + qe.updateAnalyzedNodes(node); + return qe; } QueryExpression qe = (QueryExpression) expr; if (!qe.isPartial()) { @@ -1070,6 +1115,18 @@ QueryExpression isTrue() { throw new PredicateAnalyzerException("isTrue cannot be applied to " + this.getClass()); } + QueryExpression isFalse() { + throw new PredicateAnalyzerException("isFalse cannot be applied to " + this.getClass()); + } + + QueryExpression isNotFalse() { + throw new PredicateAnalyzerException("isNotFalse cannot be applied to " + this.getClass()); + } + + QueryExpression isNotTrue() { + throw new PredicateAnalyzerException("isNotTrue cannot be applied to " + this.getClass()); + } + QueryExpression in(LiteralExpression literal) { throw new PredicateAnalyzerException("in cannot be applied to " + this.getClass()); } @@ -1406,8 +1463,52 @@ public QueryExpression multiMatch( @Override public QueryExpression isTrue() { - // Ignore istrue if ISTRUE(predicate) and will support ISTRUE(field) later. - // builder = termQuery(getFieldReferenceForTermQuery(), true); + // When IS_TRUE is called on a boolean field directly (e.g., IS_TRUE(field)), + // generate a term query with value true. + if (builder == null) { + builder = termQuery(getFieldReferenceForTermQuery(), true); + } + return this; + } + + @Override + public QueryExpression isFalse() { + if (builder != null) { + throw new PredicateAnalyzerException("isFalse cannot be applied to predicate expression."); + } + // Generate a term query with value false. This only matches documents where + // the field is explicitly false (not null or missing). + builder = termQuery(getFieldReferenceForTermQuery(), false); + return this; + } + + @Override + public QueryExpression isNotFalse() { + if (builder != null) { + throw new PredicateAnalyzerException( + "isNotFalse cannot be applied to predicate expression."); + } + // Generate mustNot(term query {value: false}). This matches documents where + // the field is true, null, or missing. Used for NOT(field = false) semantics. + builder = boolQuery().mustNot(termQuery(getFieldReferenceForTermQuery(), false)); + return this; + } + + @Override + public QueryExpression isNotTrue() { + if (builder == null) { + // Generate mustNot(term query {value: true}). This matches documents where + // the field is false, null, or missing. Used for NOT(field = true) semantics. + builder = boolQuery().mustNot(termQuery(getFieldReferenceForTermQuery(), true)); + } else { + /* + * IS_NOT_TRUE(expr) means expr != TRUE, i.e., (FALSE OR UNKNOWN). In DSL, `trueQuery(expr)` + * matches only docs where expr is TRUE; FALSE/UNKNOWN simply don’t match. Therefore, + * `must_not(trueQuery(expr))` returns exactly the complement: FALSE + missing/null (UNKNOWN). + * Other truth-tests like IS_FALSE / IS_NOT_FALSE need explicit UNKNOWN handling, so don’t use this shortcut. + */ + builder = boolQuery().mustNot(builder); + } return this; } @@ -1672,6 +1773,17 @@ boolean isTextType() { return type != null && type.getOriginalExprType() instanceof OpenSearchTextType; } + boolean isBooleanType() { + if (type == null) { + return false; + } + // Check if the type is a boolean type. For OpenSearchDataType, check exprCoreType. + if (type instanceof OpenSearchDataType osType) { + return ExprCoreType.BOOLEAN.equals(osType.getExprCoreType()); + } + return ExprCoreType.BOOLEAN.equals(type); + } + boolean isMetaField() { return OpenSearchConstants.METADATAFIELD_TYPE_MAP.containsKey(getRootName()); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java index 98de90b2f93..2ff4a05243f 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java @@ -491,9 +491,11 @@ void analyze_aggCall_complexScriptFilter() throws ExpressionNotAnalyzableExcepti b.call(SqlStdOperatorTable.LIKE, b.field("c"), b.literal("%test%")))), "filter_complex_count")) .expectDslTemplate( - "[{\"filter_bool_count\":{\"filter\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"*\\\"}\"," - + "\"lang\":\"opensearch_compounded_script\",\"params\":{*}},\"boost\":1.0}}," + // filter_bool_count: Boolean field IS_TRUE is now pushed down as term query (issue + // #5054 fix) + "[{\"filter_bool_count\":{\"filter\":{\"term\":{\"d\":{\"value\":true,\"boost\":1.0}}}," + "\"aggregations\":{\"filter_bool_count\":{\"value_count\":{\"field\":\"_index\"}}}}}," + // filter_complex_count: Complex expression still uses script query + " {\"filter_complex_count\":{\"filter\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"*\\\"}\"," + "\"lang\":\"opensearch_compounded_script\",\"params\":{*}},\"boost\":1.0}}," + "\"aggregations\":{\"filter_complex_count\":{\"value_count\":{\"field\":\"_index\"}}}}}]") diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java index 4a43a04e347..d977a326db7 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java @@ -59,7 +59,7 @@ public class PredicateAnalyzerTest { final OpenSearchTypeFactory typeFactory = OpenSearchTypeFactory.TYPE_FACTORY; final RexBuilder builder = new RexBuilder(typeFactory); final RelOptCluster cluster = RelOptCluster.create(new VolcanoPlanner(), builder); - final List schema = List.of("a", "b", "c", "d"); + final List schema = List.of("a", "b", "c", "d", "e"); final Map fieldTypes = Map.of( "a", OpenSearchDataType.of(MappingType.Integer), @@ -67,12 +67,15 @@ public class PredicateAnalyzerTest { OpenSearchDataType.of( MappingType.Text, Map.of("fields", Map.of("keyword", Map.of("type", "keyword")))), "c", OpenSearchDataType.of(MappingType.Text), // Text without keyword cannot be push down - "d", OpenSearchDataType.of(MappingType.Date)); + "d", OpenSearchDataType.of(MappingType.Date), + "e", OpenSearchDataType.of(MappingType.Boolean)); final RexInputRef field1 = builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.INTEGER), 0); final RexInputRef field2 = builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.VARCHAR), 1); final RexInputRef field4 = builder.makeInputRef(typeFactory.createUDT(ExprUDT.EXPR_TIMESTAMP), 3); + final RexInputRef field5 = + builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.BOOLEAN), 4); final RexLiteral numericLiteral = builder.makeExactLiteral(new BigDecimal(12)); final RexLiteral stringLiteral = builder.makeLiteral("Hi"); final RexNode dateTimeLiteral = @@ -1061,4 +1064,65 @@ void gte_generatesRangeQueryWithFormatForDateTime() throws ExpressionNotAnalyzab + "}", result.toString()); } + + @Test + void isTrue_booleanField_generatesTermQuery() throws ExpressionNotAnalyzableException { + // IS_TRUE(boolean_field) should generate a term query with value true + RexNode call = builder.makeCall(SqlStdOperatorTable.IS_TRUE, field5); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + + assertInstanceOf(TermQueryBuilder.class, result); + assertEquals( + """ + { + "term" : { + "e" : { + "value" : true, + "boost" : 1.0 + } + } + }\ + """, + result.toString()); + } + + @Test + void isTrue_booleanFieldCombinedWithOtherCondition_generatesCompoundQuery() + throws ExpressionNotAnalyzableException { + // IS_TRUE(boolean_field) AND other_condition + RexNode isTrueCall = builder.makeCall(SqlStdOperatorTable.IS_TRUE, field5); + RexNode equalsCall = builder.makeCall(SqlStdOperatorTable.EQUALS, field1, numericLiteral); + RexNode andCall = builder.makeCall(SqlStdOperatorTable.AND, isTrueCall, equalsCall); + QueryBuilder result = PredicateAnalyzer.analyze(andCall, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must" : [ + { + "term" : { + "e" : { + "value" : true, + "boost" : 1.0 + } + } + }, + { + "term" : { + "a" : { + "value" : 12, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } }