Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,12 @@
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLambdaRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlIntervalQualifier;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.ArraySqlType;
import org.apache.calcite.sql.type.SqlTypeName;
Expand Down Expand Up @@ -191,8 +194,15 @@ public RexNode visitXor(Xor node, CalcitePlanContext context) {

@Override
public RexNode visitNot(Not node, CalcitePlanContext context) {
final RexNode expr = analyze(node.getExpression(), context);
return context.relBuilder.not(expr);
// Special handling for NOT(boolean_field = true/false) - see boolean comparison helpers below
UnresolvedExpression inner = node.getExpression();
if (inner instanceof Compare compare && "=".equals(compare.getOperator())) {
RexNode result = tryMakeBooleanNotEquals(compare, context);
if (result != null) {
return result;
}
}
return context.relBuilder.not(analyze(node.getExpression(), context));
}

@Override
Expand Down Expand Up @@ -222,7 +232,15 @@ public RexNode visitIn(In node, CalcitePlanContext context) {
public RexNode visitCompare(Compare node, CalcitePlanContext context) {
RexNode left = analyze(node.getLeft(), context);
RexNode right = analyze(node.getRight(), context);
return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, node.getOperator(), left, right);
String op = node.getOperator();
// Handle boolean_field != literal -> IS_NOT_TRUE/IS_NOT_FALSE
if ("!=".equals(op) || "<>".equals(op)) {
RexNode result = tryMakeBooleanNotEquals(left, right, context);
if (result != null) {
return result;
}
}
return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, op, left, right);
}

@Override
Expand Down Expand Up @@ -252,6 +270,65 @@ public RexNode visitEqualTo(EqualTo node, CalcitePlanContext context) {
return context.rexBuilder.equals(left, right);
}

// ==================== Boolean NOT comparison helpers ====================
// Calcite's RexSimplify transforms:
// - "field = true" -> "field" (handled by PredicateAnalyzer detecting boolean field)
// - "field = false" -> "NOT(field)" (handled by PredicateAnalyzer.prefix())
// - "NOT(field = true)" -> "NOT(field)" -> would generate term{false}, have conflicted semantics
// - "NOT(field = false)" -> "NOT(NOT(field))" -> "field" -> would generate term{true}, have
// conflicted semantics
// We intercept NOT(field = true/false) at AST level before Calcite optimization:
// - "NOT(field = true)" -> IS_NOT_TRUE(field): matches false, null, missing
// - "NOT(field = false)" -> IS_NOT_FALSE(field): matches true, null, missing

/**
* Try to convert boolean_field != literal or NOT(boolean_field = literal) to
* IS_NOT_TRUE/IS_NOT_FALSE. This preserves correct null-handling semantics.
*/
private RexNode tryMakeBooleanNotEquals(RexNode left, RexNode right, CalcitePlanContext context) {
BooleanFieldComparison cmp = extractBooleanFieldComparison(left, right);
if (cmp == null) {
return null;
}
SqlOperator op =
Boolean.FALSE.equals(cmp.literalValue)
? SqlStdOperatorTable.IS_NOT_FALSE
: SqlStdOperatorTable.IS_NOT_TRUE;
return context.rexBuilder.makeCall(op, cmp.field);
}

/** Overload for NOT(Compare) AST pattern. */
private RexNode tryMakeBooleanNotEquals(Compare compare, CalcitePlanContext context) {
return tryMakeBooleanNotEquals(
analyze(compare.getLeft(), context), analyze(compare.getRight(), context), context);
}

/** Represents a comparison between a boolean field and a boolean literal. */
private record BooleanFieldComparison(RexNode field, Boolean literalValue) {}

/**
* Extract boolean field and literal value from a comparison, normalizing operand order. Returns
* null if the comparison is not between a boolean field and a boolean literal.
*/
private BooleanFieldComparison extractBooleanFieldComparison(RexNode left, RexNode right) {
if (isBooleanField(left) && isBooleanLiteral(right)) {
return new BooleanFieldComparison(left, ((RexLiteral) right).getValueAs(Boolean.class));
}
if (isBooleanField(right) && isBooleanLiteral(left)) {
return new BooleanFieldComparison(right, ((RexLiteral) left).getValueAs(Boolean.class));
}
return null;
}

private boolean isBooleanField(RexNode node) {
// Only match actual field references, not arbitrary boolean expressions like CASE
return node instanceof RexInputRef && node.getType().getSqlTypeName() == SqlTypeName.BOOLEAN;
}

private boolean isBooleanLiteral(RexNode node) {
return node instanceof RexLiteral && node.getType().getSqlTypeName() == SqlTypeName.BOOLEAN;
}

/** Resolve qualified name. Note, the name should be case-sensitive. */
@Override
public RexNode visitQualifiedName(QualifiedName node, CalcitePlanContext context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2566,4 +2566,118 @@ private String explainQueryWithFetchSizeYaml(String query, int fetchSize) throws
Assert.assertEquals(200, response.getStatusLine().getStatusCode());
return getResponseBody(response, true);
}

// Only for Calcite - Test for issue #5054: query_string combined with boolean comparison
// This mimics the query pattern: "source=test url=http | where is_internal=true"
// where query_string search is combined with boolean field comparison.
@Test
public void testFilterQueryStringWithBooleanFieldPushDown() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Verifies that query_string combined with boolean field comparison produces pure DSL query
// without embedded script. The boolean comparison should be pushed down as a term query.
String query =
StringUtils.format(
"source=%s firstname=Amber | where male = true | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldWithTRUE() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Test boolean literal with uppercase TRUE
String query =
StringUtils.format(
"source=%s firstname=Amber | where male = TRUE | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldWithStringLiteral() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Test boolean field with string literal 'TRUE' - Calcite converts to boolean true
// and generates same term query as boolean literal
String query =
StringUtils.format(
"source=%s firstname=Amber | where male = 'TRUE' | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldFalse() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// male = false is converted to IS_FALSE(male) which generates term query {value: false}.
// This only matches documents where male is explicitly false (not null or missing).
String query =
StringUtils.format(
"source=%s firstname=Amber | where male = false | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_false.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldNotTrue() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// NOT(male = true) generates IS_NOT_TRUE which produces mustNot(term query {value: true})
// This matches documents where male is false, null, or missing
String query =
StringUtils.format(
"source=%s firstname=Amber | where NOT male = true | fields firstname",
TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_not_true.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldNotEquals() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// male != true generates IS_NOT_TRUE which produces mustNot(term query {value: true})
// This matches documents where male is false, null, or missing
String query =
StringUtils.format(
"source=%s firstname=Amber | where male != true | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_not_true.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldOnlyTrue() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Test single boolean filter without query_string
String query =
StringUtils.format("source=%s | where male = true | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_boolean_only_true.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldOnlyFalse() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Test single boolean filter with false value without query_string
String query =
StringUtils.format("source=%s | where male = false | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_boolean_only_false.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testFilterBooleanFieldOnlyNotTrue() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
// Test single NOT boolean filter without query_string
String query =
StringUtils.format("source=%s | where NOT male = true | fields firstname", TEST_INDEX_BANK);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_filter_boolean_only_not_true.yaml");
assertYamlEqualsIgnoreId(expected, result);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
calcite:
logical: |
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(firstname=[$1])
LogicalFilter(condition=[NOT($12)])
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
physical: |
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->NOT($1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"male":{"value":false,"boost":1.0}}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
calcite:
logical: |
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(firstname=[$1])
LogicalFilter(condition=[IS NOT TRUE($12)])
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
physical: |
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->IS NOT TRUE($1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
calcite:
logical: |
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(firstname=[$1])
LogicalFilter(condition=[$12])
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
physical: |
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->$1, PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"male":{"value":true,"boost":1.0}}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
calcite:
logical: |
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(firstname=[$1])
LogicalFilter(condition=[$12])
LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))])
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
physical: |
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), $1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
calcite:
logical: |
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(firstname=[$1])
LogicalFilter(condition=[NOT($12)])
LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))])
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
physical: |
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), NOT($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":false,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
calcite:
logical: |
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(firstname=[$1])
LogicalFilter(condition=[IS NOT TRUE($12)])
LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))])
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
physical: |
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), IS NOT TRUE($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
Expand Up @@ -42,24 +42,10 @@ teardown:
ppl:
body:
query: 'source=hdfs_logs | patterns content method=brain mode=aggregation max_sample_count=2 variable_count_threshold=3'
- match: {"total": 2}
- match: {"schema": [{"name": "patterns_field", "type": "string"}, {"name": "pattern_count", "type": "bigint"}, {"name": "sample_logs", "type": "array"}]}
- match: {"datarows": [
[
"PacketResponder failed for blk_<*>",
2,
[
"PacketResponder failed for blk_6996194389878584395",
"PacketResponder failed for blk_-1547954353065580372"
]
],
[
"BLOCK* NameSystem.addStoredBlock: blockMap updated: <*IP*> is added to blk_<*> size <*>",
2,
[
"BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.31.85:50010 is added to blk_-7017553867379051457 size 67108864",
"BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.107.19:50010 is added to blk_-3249711809227781266 size 67108864"
]
]
]}
- match: { total: 2 }
- match: { schema: [{"name": "patterns_field", "type": "string"}, {"name": "pattern_count", "type": "bigint"}, {"name": "sample_logs", "type": "array"}] }
- length: { datarows: 2 }
# Verify each pattern has exactly 2 sample logs (max_sample_count=2)
- length: { datarows.0.2: 2 }
- length: { datarows.1.2: 2 }

Loading
Loading