diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 58d542538d..2fdd8d6b84 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -81,6 +81,7 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.NoMv; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -546,6 +547,11 @@ public LogicalPlan visitMvCombine(MvCombine node, AnalysisContext context) { throw getOnlyForCalciteException("mvcombine"); } + @Override + public LogicalPlan visitNoMv(NoMv node, AnalysisContext context) { + throw getOnlyForCalciteException("nomv"); + } + /** Build {@link ParseExpression} to context and skip to child nodes. */ @Override public LogicalPlan visitParse(Parse node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 2486b63791..9a8fac2588 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -69,6 +69,7 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.NoMv; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -475,4 +476,8 @@ public T visitAddColTotals(AddColTotals node, C context) { public T visitMvCombine(MvCombine node, C context) { return visitChildren(node, context); } + + public T visitNoMv(NoMv node, C context) { + return visitChildren(node, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/NoMv.java b/core/src/main/java/org/opensearch/sql/ast/tree/NoMv.java new file mode 100644 index 0000000000..0b24576281 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/NoMv.java @@ -0,0 +1,79 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import javax.annotation.Nullable; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.Let; +import org.opensearch.sql.ast.expression.Literal; + +/** + * AST node for the NOMV command. Converts multi-value fields to single-value fields by joining + * array elements with newline delimiter. + */ +@Getter +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = false) +public class NoMv extends UnresolvedPlan { + + private final Field field; + @Nullable private UnresolvedPlan child; + + public NoMv(Field field) { + this.field = field; + } + + public NoMv attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return child == null ? ImmutableList.of() : ImmutableList.of(child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitNoMv(this, context); + } + + /** + * Rewrites the nomv command as an eval command using mvjoin function with null filtering. nomv + * is rewritten to: eval = coalesce(mvjoin(array_compact(), "\n"), "") + * + *

The array_compact removes null elements from the array, and coalesce ensures empty arrays + * return empty string instead of null. + * + * @return an Eval node representing the equivalent mvjoin operation with null filtering + */ + public UnresolvedPlan rewriteAsEval() { + Function arrayCompactFunc = new Function("array_compact", ImmutableList.of(field)); + + Function mvjoinFunc = + new Function( + "mvjoin", ImmutableList.of(arrayCompactFunc, new Literal("\n", DataType.STRING))); + + Function coalesceFunc = + new Function("coalesce", ImmutableList.of(mvjoinFunc, new Literal("", DataType.STRING))); + + Let letExpr = new Let(field, coalesceFunc); + + Eval eval = new Eval(ImmutableList.of(letExpr)); + if (this.child != null) { + eval.attach(this.child); + } + return eval; + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 68a700b66b..2d46e972aa 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -126,6 +126,7 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.NoMv; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -3290,6 +3291,26 @@ private void restoreColumnOrderAfterArrayAgg( relBuilder.project(projections, projectionNames, /* force= */ true); } + /** + * Visits a NoMv (no multivalue) node by rewriting it as an Eval node. + * + *

The NoMv command converts multivalue (array) fields to single-value strings by joining array + * elements with newline delimiters. Internally, NoMv rewrites itself to an Eval node containing a + * mvjoin function call: {@code eval field = mvjoin(field, "\n")}. + * + *

The explicit cast to Eval is safe because {@link NoMv#rewriteAsEval()} always returns a + * newly constructed Eval instance and never returns null or other types. + * + * @param node the NoMv node to visit + * @param context the Calcite plan context containing schema and optimization information + * @return the RelNode resulting from visiting the rewritten Eval node + * @see NoMv#rewriteAsEval() + */ + @Override + public RelNode visitNoMv(NoMv node, CalcitePlanContext context) { + return visitEval((Eval) node.rewriteAsEval(), context); + } + @Override public RelNode visitValues(Values values, CalcitePlanContext context) { if (values.getValues() == null || values.getValues().isEmpty()) { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index dce558bf7c..3171a09d39 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -71,6 +71,7 @@ public enum BuiltinFunctionName { ARRAY(FunctionName.of("array")), ARRAY_LENGTH(FunctionName.of("array_length")), ARRAY_SLICE(FunctionName.of("array_slice"), true), + ARRAY_COMPACT(FunctionName.of("array_compact")), MAP_APPEND(FunctionName.of("map_append"), true), MAP_CONCAT(FunctionName.of("map_concat"), true), MAP_REMOVE(FunctionName.of("map_remove"), true), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java index 2e9e53b9ac..9a77a0d5a7 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java @@ -35,7 +35,7 @@ */ public class ArrayFunctionImpl extends ImplementorUDF { public ArrayFunctionImpl() { - super(new ArrayImplementor(), NullPolicy.ANY); + super(new ArrayImplementor(), NullPolicy.NONE); } /** @@ -81,7 +81,8 @@ public Expression implement( /** * The asList will generate the List. We need to convert internally, otherwise, the - * calcite will directly cast like DOUBLE -> INTEGER, which throw error + * calcite will directly cast like DOUBLE -> INTEGER, which throw error. Null elements are + * preserved in the array. */ public static Object internalCast(Object... args) { List originalList = (List) args[0]; @@ -93,7 +94,9 @@ public static Object internalCast(Object... args) { originalList.stream() .map( num -> { - if (num instanceof BigDecimal) { + if (num == null) { + return null; + } else if (num instanceof BigDecimal) { return (BigDecimal) num; } else { return BigDecimal.valueOf(((Number) num).doubleValue()); @@ -104,17 +107,20 @@ public static Object internalCast(Object... args) { case DOUBLE: result = originalList.stream() - .map(i -> (Object) ((Number) i).doubleValue()) + .map(i -> i == null ? null : (Object) ((Number) i).doubleValue()) .collect(Collectors.toList()); break; case FLOAT: result = originalList.stream() - .map(i -> (Object) ((Number) i).floatValue()) + .map(i -> i == null ? null : (Object) ((Number) i).floatValue()) .collect(Collectors.toList()); break; case VARCHAR, CHAR: - result = originalList.stream().map(i -> (Object) i.toString()).collect(Collectors.toList()); + result = + originalList.stream() + .map(i -> i == null ? null : (Object) i.toString()) + .collect(Collectors.toList()); break; default: result = originalList; diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 205f3a0f2e..29463c95f4 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -16,6 +16,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADDTIME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.AND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_COMPACT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_LENGTH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_SLICE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ASCII; @@ -990,12 +991,7 @@ void populate() { PPLTypeChecker.family(SqlTypeFamily.ANY)); // Register MVJOIN to use Calcite's ARRAY_JOIN - register( - MVJOIN, - (FunctionImp2) - (builder, array, delimiter) -> - builder.makeCall(SqlLibraryOperators.ARRAY_JOIN, array, delimiter), - PPLTypeChecker.family(SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER)); + registerOperator(MVJOIN, SqlLibraryOperators.ARRAY_JOIN); // Register SPLIT with custom logic for empty delimiter // Case 1: Delimiter is not empty string, use SPLIT @@ -1048,6 +1044,7 @@ void populate() { registerOperator(MAP_REMOVE, PPLBuiltinOperators.MAP_REMOVE); registerOperator(ARRAY_LENGTH, SqlLibraryOperators.ARRAY_LENGTH); registerOperator(ARRAY_SLICE, SqlLibraryOperators.ARRAY_SLICE); + registerOperator(ARRAY_COMPACT, SqlLibraryOperators.ARRAY_COMPACT); registerOperator(FORALL, PPLBuiltinOperators.FORALL); registerOperator(EXISTS, PPLBuiltinOperators.EXISTS); registerOperator(FILTER, PPLBuiltinOperators.FILTER); diff --git a/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImplTest.java b/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImplTest.java new file mode 100644 index 0000000000..6dbc1901fa --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImplTest.java @@ -0,0 +1,305 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.CollectionUDF; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for ArrayFunctionImpl. + * + *

These tests verify that the array() function correctly handles null elements inside arrays, + * which is critical for the NOMV command's null filtering functionality via ARRAY_COMPACT. + * + *

The array() function uses NullPolicy.NONE, meaning it accepts null arguments and preserves + * them inside the resulting array. This allows ARRAY_COMPACT to filter them out later. + */ +public class ArrayFunctionImplTest { + + @Test + public void testArrayWithNoArguments() { + Object result = ArrayFunctionImpl.internalCast(Collections.emptyList(), SqlTypeName.VARCHAR); + assertNotNull(result, "Empty array should not be null"); + assertTrue(result instanceof List, "Result should be a List"); + assertEquals(0, ((List) result).size(), "Empty array should have size 0"); + } + + @Test + public void testArrayWithSingleElement() { + Object result = ArrayFunctionImpl.internalCast(Arrays.asList("test"), SqlTypeName.VARCHAR); + assertNotNull(result); + assertTrue(result instanceof List); + assertEquals(Arrays.asList("test"), result); + } + + @Test + public void testArrayWithMultipleElements() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList("a", "b", "c"), SqlTypeName.VARCHAR); + assertNotNull(result); + assertEquals(Arrays.asList("a", "b", "c"), result); + } + + // ==================== NULL HANDLING TESTS ==================== + // These tests are critical for NOMV command's null filtering + + @Test + public void testArrayWithNullInMiddle() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList("a", null, "b"), SqlTypeName.VARCHAR); + assertNotNull(result, "Array with null should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(3, list.size(), "Array should preserve null element"); + assertEquals("a", list.get(0)); + assertNull(list.get(1), "Middle element should be null"); + assertEquals("b", list.get(2)); + } + + @Test + public void testArrayWithNullAtBeginning() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList(null, "a", "b"), SqlTypeName.VARCHAR); + assertNotNull(result, "Array with null should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(3, list.size(), "Array should preserve null element"); + assertNull(list.get(0), "First element should be null"); + assertEquals("a", list.get(1)); + assertEquals("b", list.get(2)); + } + + @Test + public void testArrayWithNullAtEnd() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList("a", "b", null), SqlTypeName.VARCHAR); + assertNotNull(result, "Array with null should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(3, list.size(), "Array should preserve null element"); + assertEquals("a", list.get(0)); + assertEquals("b", list.get(1)); + assertNull(list.get(2), "Last element should be null"); + } + + @Test + public void testArrayWithMultipleNulls() { + Object result = + ArrayFunctionImpl.internalCast( + Arrays.asList("a", null, "b", null, "c"), SqlTypeName.VARCHAR); + assertNotNull(result, "Array with nulls should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(5, list.size(), "Array should preserve all null elements"); + assertEquals("a", list.get(0)); + assertNull(list.get(1), "Second element should be null"); + assertEquals("b", list.get(2)); + assertNull(list.get(3), "Fourth element should be null"); + assertEquals("c", list.get(4)); + } + + @Test + public void testArrayWithAllNulls() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList(null, null, null), SqlTypeName.VARCHAR); + assertNotNull(result, "Array of all nulls should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(3, list.size(), "Array should preserve all null elements"); + assertNull(list.get(0)); + assertNull(list.get(1)); + assertNull(list.get(2)); + } + + @Test + public void testArrayWithSingleNull() { + Object result = + ArrayFunctionImpl.internalCast(Collections.singletonList(null), SqlTypeName.VARCHAR); + assertNotNull(result, "Array with single null should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(1, list.size(), "Array should contain one null element"); + assertNull(list.get(0)); + } + + @Test + public void testArrayWithMixedTypesAndNulls() { + Object result = + ArrayFunctionImpl.internalCast( + Arrays.asList(1, null, "text", null, 3.14), SqlTypeName.VARCHAR); + assertNotNull(result, "Array with mixed types and nulls should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(5, list.size()); + assertEquals("1", list.get(0)); // Converted to string + assertNull(list.get(1)); + assertEquals("text", list.get(2)); + assertNull(list.get(3)); + assertEquals("3.14", list.get(4)); // Converted to string + } + + // ==================== INTEGRATION WITH NOMV WORKFLOW ==================== + // These tests verify the array works correctly in the NOMV workflow: + // array(fields) -> array_compact(array) -> mvjoin(compacted, '\n') -> coalesce(result, '') + + @Test + public void testArrayOutputCanBeProcessedByArrayCompact() { + // Simulate: array(field1, null, field2) -> array_compact + Object arrayResult = + ArrayFunctionImpl.internalCast( + Arrays.asList("value1", null, "value2"), SqlTypeName.VARCHAR); + assertNotNull(arrayResult); + assertTrue(arrayResult instanceof List); + + // Verify the array has the structure expected by array_compact + List list = (List) arrayResult; + assertEquals(3, list.size(), "Array should have 3 elements before compacting"); + + // Simulate what array_compact would do (filter out nulls) + List compacted = list.stream().filter(item -> item != null).collect(Collectors.toList()); + assertEquals(2, compacted.size(), "After compacting, array should have 2 elements"); + assertEquals("value1", compacted.get(0)); + assertEquals("value2", compacted.get(1)); + } + + @Test + public void testArrayWithAllNullsForNomvWorkflow() { + // RFC Example 9: array(null, null, null) should allow NOMV to return "" + Object arrayResult = + ArrayFunctionImpl.internalCast(Arrays.asList(null, null, null), SqlTypeName.VARCHAR); + assertNotNull(arrayResult); + assertTrue(arrayResult instanceof List); + + List list = (List) arrayResult; + assertEquals(3, list.size(), "Array should preserve all nulls"); + + // Simulate array_compact - should result in empty array + List compacted = + list.stream().filter(item -> item != null).collect(java.util.stream.Collectors.toList()); + assertEquals( + 0, compacted.size(), "After compacting all nulls, array should be empty for NOMV to use"); + } + + @Test + public void testArrayPreservesNullsForRFCExample5() { + // RFC Example 5: nomv should filter nulls + // array('a', null, 'b') -> array_compact -> ['a', 'b'] -> mvjoin -> "a\nb" + Object arrayResult = + ArrayFunctionImpl.internalCast(Arrays.asList("a", null, "b"), SqlTypeName.VARCHAR); + assertNotNull(arrayResult); + + List list = (List) arrayResult; + assertEquals(3, list.size(), "Original array should have 3 elements"); + assertNull(list.get(1), "Middle element should be null"); + + // After array_compact + List compacted = + list.stream().filter(item -> item != null).collect(java.util.stream.Collectors.toList()); + assertEquals(2, compacted.size()); + assertEquals("a", compacted.get(0)); + assertEquals("b", compacted.get(1)); + } + + // ==================== EDGE CASE TESTS ==================== + + @Test + public void testArrayWithNumericTypes() { + Object result = ArrayFunctionImpl.internalCast(Arrays.asList(1, 2, 3), SqlTypeName.INTEGER); + assertNotNull(result); + assertEquals(Arrays.asList(1, 2, 3), result); + } + + @Test + public void testArrayWithMixedNumericAndString() { + Object result = ArrayFunctionImpl.internalCast(Arrays.asList(1, "two", 3), SqlTypeName.VARCHAR); + assertNotNull(result); + assertEquals(Arrays.asList("1", "two", "3"), result); + } + + @Test + public void testArrayWithEmptyStrings() { + // Empty strings should be preserved (they are not null) + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList("a", "", "b"), SqlTypeName.VARCHAR); + assertNotNull(result); + List list = (List) result; + assertEquals(3, list.size()); + assertEquals("a", list.get(0)); + assertEquals("", list.get(1), "Empty string should be preserved"); + assertEquals("b", list.get(2)); + } + + @Test + public void testArrayWithBooleanValues() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList(true, false, null), SqlTypeName.BOOLEAN); + assertNotNull(result); + List list = (List) result; + assertEquals(3, list.size()); + assertEquals(true, list.get(0)); + assertEquals(false, list.get(1)); + assertNull(list.get(2)); + } + + // ==================== TYPE CONVERSION TESTS ==================== + // Test that internalCast correctly handles type conversions while preserving nulls + + @Test + public void testArrayWithDoubleTypePreservesNulls() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList(1.5, null, 2.7), SqlTypeName.DOUBLE); + assertNotNull(result); + List list = (List) result; + assertEquals(3, list.size()); + assertEquals(1.5, list.get(0)); + assertNull(list.get(1), "Null should be preserved during DOUBLE type conversion"); + assertEquals(2.7, list.get(2)); + } + + @Test + public void testArrayWithFloatTypePreservesNulls() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList(1.5f, null, 2.7f), SqlTypeName.FLOAT); + assertNotNull(result); + List list = (List) result; + assertEquals(3, list.size()); + assertEquals(1.5f, list.get(0)); + assertNull(list.get(1), "Null should be preserved during FLOAT type conversion"); + assertEquals(2.7f, list.get(2)); + } + + @Test + public void testArrayWithVarcharTypePreservesNulls() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList("a", null, "b"), SqlTypeName.VARCHAR); + assertNotNull(result); + List list = (List) result; + assertEquals(3, list.size()); + assertEquals("a", list.get(0)); + assertNull(list.get(1), "Null should be preserved during VARCHAR type conversion"); + assertEquals("b", list.get(2)); + } + + @Test + public void testArrayWithCharTypePreservesNulls() { + Object result = ArrayFunctionImpl.internalCast(Arrays.asList("x", null, "y"), SqlTypeName.CHAR); + assertNotNull(result); + List list = (List) result; + assertEquals(3, list.size()); + assertEquals("x", list.get(0)); + assertNull(list.get(1), "Null should be preserved during CHAR type conversion"); + assertEquals("y", list.get(2)); + } +} diff --git a/docs/category.json b/docs/category.json index bcf73cb1a8..f90acc0369 100644 --- a/docs/category.json +++ b/docs/category.json @@ -25,6 +25,7 @@ "user/ppl/cmd/join.md", "user/ppl/cmd/lookup.md", "user/ppl/cmd/mvcombine.md", + "user/ppl/cmd/nomv.md", "user/ppl/cmd/parse.md", "user/ppl/cmd/patterns.md", "user/ppl/cmd/rare.md", diff --git a/docs/user/ppl/cmd/nomv.md b/docs/user/ppl/cmd/nomv.md new file mode 100644 index 0000000000..87c17d1247 --- /dev/null +++ b/docs/user/ppl/cmd/nomv.md @@ -0,0 +1,81 @@ +# nomv + +## Description + +The `nomv` command converts a multivalue (array) field into a single-value string field by joining all array elements with newline characters (`\n`). This operation is performed in-place, replacing the original field with its joined string representation. + +`nomv` is a transforming command: it modifies the specified field without changing the number of rows in the result set. + +### Key behaviors + +- The field must be **ARRAY type**. For scalar fields, use the `array()` function to create an array first. + +--- + +## Syntax + +```syntax +nomv +``` + +### Arguments + +- **field** (required) + The name of the field whose multivalue content should be converted to a single-value string. + +--- + +## Example 1: Basic nomv usage + +```ppl +source=accounts +| where account_number=1 +| eval names = array(firstname, lastname) +| nomv names +| fields account_number, names +``` + +Expected output: +```text +fetched rows / total rows = 1/1 ++----------------+-------+ +| account_number | names | +|----------------+-------| +| 1 | Amber | +| | Duke | ++----------------+-------+ +``` + +## Example 2: nomv with an eval-created field + +```ppl +source=accounts +| where account_number=1 +| eval location = array(city, state) +| nomv location +| fields account_number, location +``` + +Expected output: +```text +fetched rows / total rows = 1/1 ++----------------+----------+ +| account_number | location | +|----------------+----------| +| 1 | Brogan | +| | IL | ++----------------+----------+ +``` + +--- + +## Notes + +- The `nomv` command is only available when the Calcite query engine is enabled. +- This command is particularly useful when you need to export or display multivalue fields as single strings. +- The newline delimiter (`\n`) is fixed and cannot be customized. For custom delimiters, use the `mvjoin` function directly in an eval expression. +- NULL values within the array are automatically filtered out when converting the array to a string, so they do not appear in the output or contribute empty lines. + +## Related commands + +- `mvjoin()` -- Function used by nomv internally to join array elements with a custom delimiter diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md index 12afe96eea..262cdf4f02 100644 --- a/docs/user/ppl/index.md +++ b/docs/user/ppl/index.md @@ -78,11 +78,12 @@ source=accounts | [describe command](cmd/describe.md) | 2.1 | stable (since 2.1) | Query the metadata of an index. | | [explain command](cmd/explain.md) | 3.1 | stable (since 3.1) | Explain the plan of query. | | [show datasources command](cmd/showdatasources.md) | 2.4 | stable (since 2.4) | Query datasources configured in the PPL engine. | -| [addtotals command](cmd/addtotals.md) | 3.5 | stable (since 3.5) | Adds row and column values and appends a totals column and row. | +| [addtotals command](cmd/addtotals.md) | 3.5 | stable (since 3.5) | Adds row and column values and appends a totals column and row. | | [addcoltotals command](cmd/addcoltotals.md) | 3.5 | stable (since 3.5) | Adds column values and appends a totals row. | | [transpose command](cmd/transpose.md) | 3.5 | stable (since 3.5) | Transpose rows to columns. | | [mvcombine command](cmd/mvcombine.md) | 3.5 | stable (since 3.4) | Combines values of a specified field across rows identical on all other fields. | - +| [nomv command](cmd/nomv.md) | 3.6 | stable (since 3.6) | Converts a multivalue field to a single-value string by joining elements with newlines. | + - [Syntax](cmd/syntax.md) - PPL query structure and command syntax formatting * **Functions** - [Aggregation Functions](functions/aggregations.md) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index aa569629e2..c564c0b86c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -108,7 +108,8 @@ CalciteVisualizationFormatIT.class, CalciteWhereCommandIT.class, CalcitePPLTpchIT.class, - CalciteMvCombineCommandIT.class + CalciteMvCombineCommandIT.class, + CalciteNoMvCommandIT.class }) public class CalciteNoPushdownIT { private static boolean wasPushdownEnabled; diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 0bbc25f1d7..7a50a1ae44 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2529,6 +2529,19 @@ public void testExplainMvCombine() throws IOException { assertYamlEqualsIgnoreId(expected, actual); } + @Test + public void testExplainNoMv() throws IOException { + String query = + "source=opensearch-sql_test_index_account " + + "| fields state, city, age " + + "| eval location = array(state, city) " + + "| nomv location"; + + String actual = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_nomv.yaml"); + assertYamlEqualsIgnoreId(expected, actual); + } + // ==================== fetch_size explain tests ==================== @Test @@ -2706,4 +2719,30 @@ public void testFilterBooleanFieldOnlyNotTrue() throws IOException { String expected = loadExpectedPlan("explain_filter_boolean_only_not_true.yaml"); assertYamlEqualsIgnoreId(expected, result); } + + @Test + public void testNoMvBasic() throws IOException { + String query = + StringUtils.format( + "source=%s | fields firstname, age | eval names = array(firstname) | nomv names |" + + " fields names", + TEST_INDEX_BANK); + var result = explainQueryYaml(query); + Assert.assertTrue( + "Expected explain to contain ARRAY_JOIN function", + result.toLowerCase().contains("array_join")); + } + + @Test + public void testNoMvWithEval() throws IOException { + String query = + StringUtils.format( + "source=%s | eval full_name = concat(firstname, ' J.') | eval name_array =" + + " array(full_name) | nomv name_array | fields name_array", + TEST_INDEX_BANK); + var result = explainQueryYaml(query); + Assert.assertTrue( + "Expected explain to contain both CONCAT and ARRAY_JOIN", + result.toLowerCase().contains("concat") && result.toLowerCase().contains("array_join")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java new file mode 100644 index 0000000000..3ad50cdb4b --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java @@ -0,0 +1,359 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.opensearch.client.ResponseException; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteNoMvCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.BANK); + loadIndex(Index.BANK_WITH_NULL_VALUES); + } + + // --------------------------- + // Sanity (precondition) + // --------------------------- + + @Test + public void testSanityDatasetIsLoaded() throws IOException { + JSONObject result = executeQuery("source=" + TEST_INDEX_BANK + " | head 5"); + int rows = result.getJSONArray("datarows").length(); + Assertions.assertTrue(rows > 0, "Expected bank dataset to have rows, got 0"); + } + + // --------------------------- + // Happy path (core nomv) + // --------------------------- + + @Test + public void testNoMvBasicUsageFromRFC() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | where account_number=1 | eval names = array(firstname, lastname) | nomv names |" + + " fields account_number, names"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("names", null, "string")); + + verifyDataRows(result, rows(1, "Amber JOHnny\nDuke Willmington")); + } + + @Test + public void testNoMvEvalCreatedFieldFromRFC() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | where account_number=1 | eval location = array(city, state) | nomv location |" + + " fields account_number, location"; + + JSONObject result = executeQuery(q); + + verifySchema( + result, schema("account_number", null, "bigint"), schema("location", null, "string")); + + verifyDataRows(result, rows(1, "Brogan\nIL")); + } + + // --------------------------- + // Additional nomv tests + // --------------------------- + + @Test + public void testNoMvMultipleArraysAppliedInSequence() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval arr1 = array('a', 'b'), arr2 = array('x', 'y') | nomv arr1 | nomv arr2 |" + + " head 1 | fields arr1, arr2"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("arr1", null, "string"), schema("arr2", null, "string")); + + verifyDataRows(result, rows("a\nb", "x\ny")); + } + + @Test + public void testNoMvInComplexPipelineWithWhereAndSort() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | where account_number < 20 | eval arr = array(firstname, 'test') | nomv arr |" + + " sort account_number | head 3 | fields account_number, arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); + + verifyDataRows( + result, rows(1, "Amber JOHnny\ntest"), rows(6, "Hattie\ntest"), rows(13, "Nanette\ntest")); + } + + @Test + public void testNoMvFieldUsableInSubsequentOperations() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | where account_number = 6 | eval arr = array('test', 'data') | nomv arr | eval" + + " arr_len = length(arr) | fields account_number, arr, arr_len"; + + JSONObject result = executeQuery(q); + + verifySchema( + result, + schema("account_number", null, "bigint"), + schema("arr", null, "string"), + schema("arr_len", null, "int")); + + verifyDataRows(result, rows(6, "test\ndata", 9)); + } + + @Test + public void testNoMvWithStatsAfterAggregation() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | stats count() as cnt by age | eval age_str = cast(age as string) | eval arr =" + + " array(age_str, 'count') | nomv arr | fields cnt, age, arr | sort cnt | head 2"; + + JSONObject result = executeQuery(q); + + verifySchema( + result, + schema("cnt", null, "bigint"), + schema("age", null, "int"), + schema("arr", null, "string")); + + Assertions.assertTrue(result.getJSONArray("datarows").length() > 0); + } + + @Test + public void testNoMvWithEvalWorksOnComputedArrays() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | where account_number = 1 | eval full_name = concat(firstname, ' ', lastname) |" + + " eval arr = array(full_name, 'suffix') | nomv arr | fields full_name, arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("full_name", null, "string"), schema("arr", null, "string")); + + verifyDataRows( + result, rows("Amber JOHnny Duke Willmington", "Amber JOHnny Duke Willmington\nsuffix")); + } + + @Test + public void testNoMvEmptyArray() throws IOException { + String q = + "source=" + TEST_INDEX_BANK + " | eval arr = array() | nomv arr | head 1 | fields arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("arr", null, "string")); + + verifyDataRows(result, rows("")); + } + + @Test + public void testNoMvScalarFieldError() throws IOException { + ResponseException ex = + Assertions.assertThrows( + ResponseException.class, + () -> + executeQuery("source=" + TEST_INDEX_BANK + " | fields firstname | nomv firstname")); + + int status = ex.getResponse().getStatusLine().getStatusCode(); + Assertions.assertEquals(400, status, "Expected 400 for type mismatch"); + + String msg = ex.getMessage(); + + Assertions.assertTrue( + msg.contains("MVJOIN") || msg.contains("ARRAY") || msg.contains("type"), msg); + } + + @Test + public void testNoMvResultUsedInComparison() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval arr = array('test') | nomv arr | where arr = 'test' | head 1 | fields" + + " account_number, arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); + + Assertions.assertTrue(result.getJSONArray("datarows").length() > 0); + } + + @Test + public void testNoMvMissingFieldShouldReturn4xx() throws IOException { + ResponseException ex = + Assertions.assertThrows( + ResponseException.class, + () -> executeQuery("source=" + TEST_INDEX_BANK + " | nomv does_not_exist")); + + int status = ex.getResponse().getStatusLine().getStatusCode(); + + Assertions.assertEquals(400, status, "Unexpected status. ex=" + ex.getMessage()); + + String msg = ex.getMessage(); + Assertions.assertTrue( + msg.contains("does_not_exist") + || msg.contains("field") + || msg.contains("Field") + || msg.contains("ARRAY_COMPACT") + || msg.contains("ARRAY"), + msg); + } + + @Test + public void testNoMvWithNullInMiddleOfArray() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK_WITH_NULL_VALUES + + " | where account_number = 25 | eval arr = array(firstname, age, lastname) | nomv" + + " arr | fields account_number, arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); + + verifyDataRows(result, rows(25, "Virginia\nAyala")); + } + + @Test + public void testNoMvWithNullAtBeginningAndEnd() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK_WITH_NULL_VALUES + + " | where account_number = 25 | eval arr = array(age, firstname, age) | nomv arr |" + + " fields account_number, arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); + + verifyDataRows(result, rows(25, "Virginia")); + } + + @Test + public void testNoMvWithAllNulls() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK_WITH_NULL_VALUES + + " | where account_number = 25 | eval arr = array(age, age, age) | nomv arr | fields" + + " account_number, arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); + + verifyDataRows(result, rows(25, "")); + } + + @Test + public void testNoMvArrayWithAllNulls() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK_WITH_NULL_VALUES + + " | where account_number = 25 | eval arr = array(age, age, age) | nomv arr | fields" + + " account_number, arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); + + verifyDataRows(result, rows(25, "")); + } + + @Test + public void testNoMvMultipleRowsRowLocalBehavior() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval tags = array(firstname, lastname) | nomv tags | sort account_number | head" + + " 3 | fields account_number, tags"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("tags", null, "string")); + + verifyDataRows( + result, + rows(1, "Amber JOHnny\nDuke Willmington"), + rows(6, "Hattie\nBond"), + rows(13, "Nanette\nBates")); + } + + @Test + public void testNoMvNonConsecutiveRowsNoGrouping() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | where account_number = 1 or account_number = 6 or account_number = 13 | eval" + + " tags = array(firstname, city) | nomv tags | sort account_number | fields" + + " account_number, tags"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("tags", null, "string")); + + verifyDataRows( + result, + rows(1, "Amber JOHnny\nBrogan"), + rows(6, "Hattie\nDante"), + rows(13, "Nanette\nNogal")); + } + + @Test + public void testNoMvNullFieldValue() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK_WITH_NULL_VALUES + + " | where account_number = 6 | eval balance_str = cast(balance as string) | eval arr" + + " = array(balance_str) | nomv arr | fields account_number, arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); + + verifyDataRows(result, rows(6, "")); + } + + @Test + public void testNoMvArrayWithEmptyStrings() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval tags = array('a', '', 'b') | nomv tags | head 1 | fields tags"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("tags", null, "string")); + + verifyDataRows(result, rows("a\n\nb")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java index c5a1d08c37..5a7f7be922 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java @@ -252,4 +252,20 @@ public void testMvCombineUnsupportedInV2() throws IOException { } verifyQuery(result); } + + @Test + public void testNoMvUnsupportedInV2() throws IOException { + JSONObject result; + try { + result = + executeQuery( + String.format( + "source=%s | fields account_number, firstname | eval names = array(firstname) |" + + " nomv names", + TEST_INDEX_BANK)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + } + verifyQuery(result); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java index 571d915517..0c101ce472 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java @@ -418,4 +418,21 @@ public void testCrossClusterFieldFormat() throws IOException { verifyDataRows( result, rows("Hattie", 36, 5686, "$5,686"), rows("Nanette", 28, 32838, "$32,838")); } + + /** CrossClusterSearchIT Test for nomv. */ + @Test + public void testCrossClusterNoMv() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | where firstname='Hattie' " + + "| eval names = array(firstname, lastname) | nomv names " + + "| fields firstname, names", + TEST_INDEX_BANK_REMOTE)); + + verifyColumn(result, columnName("firstname"), columnName("names")); + verifySchema(result, schema("firstname", "string"), schema("names", "string")); + + verifyDataRows(result, rows("Hattie", "Hattie\nBond")); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml new file mode 100644 index 0000000000..e522ceb639 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(state=[$7], city=[$5], age=[$8], location=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($7, $5)), ' + '), '':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[array($t0, $t1)], expr#4=[ARRAY_COMPACT($t3)], expr#5=[' + '], expr#6=[ARRAY_JOIN($t4, $t5)], expr#7=['':VARCHAR], expr#8=[COALESCE($t6, $t7)], proj#0..2=[{exprs}], location=[$t8]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state, city, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state","city","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml new file mode 100644 index 0000000000..ace49cb6b9 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(state=[$7], city=[$5], age=[$8], location=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($7, $5)), ' + '), '':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[array($t7, $t5)], expr#18=[ARRAY_COMPACT($t17)], expr#19=[' + '], expr#20=[ARRAY_JOIN($t18, $t19)], expr#21=['':VARCHAR], expr#22=[COALESCE($t20, $t21)], state=[$t7], city=[$t5], age=[$t8], location=[$t22]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 9113663e47..54d69da39b 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -74,6 +74,7 @@ AGGREGATION: 'AGGREGATION'; APPENDPIPE: 'APPENDPIPE'; COLUMN_NAME: 'COLUMN_NAME'; MVCOMBINE: 'MVCOMBINE'; +NOMV: 'NOMV'; //Native JOIN KEYWORDS diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 8cc4ed932d..455fd92c3d 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -91,6 +91,7 @@ commands | replaceCommand | mvcombineCommand | fieldformatCommand + | nomvCommand ; commandName @@ -136,6 +137,7 @@ commandName | APPENDPIPE | REPLACE | MVCOMBINE + | NOMV | TRANSPOSE ; @@ -555,6 +557,10 @@ mvcombineCommand : MVCOMBINE fieldExpression (DELIM EQUAL stringLiteral)? ; +nomvCommand + : NOMV fieldExpression + ; + flattenCommand : FLATTEN fieldExpression (AS aliases = identifierSeq)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 1ff9d2818d..ed432dc903 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -93,6 +93,7 @@ import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.NoMv; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -908,6 +909,12 @@ public UnresolvedPlan visitMvcombineCommand(OpenSearchPPLParser.MvcombineCommand return new MvCombine(field, delim); } + @Override + public UnresolvedPlan visitNomvCommand(OpenSearchPPLParser.NomvCommandContext ctx) { + Field field = (Field) internalVisitExpression(ctx.fieldExpression()); + return new NoMv(field); + } + @Override public UnresolvedPlan visitGrokCommand(OpenSearchPPLParser.GrokCommandContext ctx) { UnresolvedExpression sourceField = internalVisitExpression(ctx.source_field); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 4376b5659d..a47db85c3c 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -83,6 +83,7 @@ import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.NoMv; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -473,6 +474,14 @@ public String visitMvCombine(MvCombine node, String context) { return StringUtils.format("%s | mvcombine delim=%s %s", child, MASK_LITERAL, field); } + @Override + public String visitNoMv(NoMv node, String context) { + String child = node.getChild().getFirst().accept(this, context); + String field = visitExpression(node.getField()); + + return StringUtils.format("%s | nomv %s", child, field); + } + /** Build {@link LogicalSort}. */ @Override public String visitSort(Sort node, String context) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java index 9513558952..4383acf40e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java @@ -298,6 +298,6 @@ public void testValuesFunctionWithArrayArgType() { public void testMvjoinRejectsNonStringValues() { verifyQueryThrowsException( "source=EMP | eval result = mvjoin(42, ',') | fields result | head 1", - "MVJOIN function expects {[ARRAY,STRING]}, but got [INTEGER,STRING]"); + "MVJOIN function expects {[ARRAY,STRING]|[ARRAY,STRING,STRING]}, but got [INTEGER,STRING]"); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java new file mode 100644 index 0000000000..5d7669d20a --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java @@ -0,0 +1,481 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import static org.junit.Assert.assertThrows; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.rel2sql.RelToSqlConverter; +import org.apache.calcite.rel.rel2sql.SqlImplementor; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLNoMvTest extends CalcitePPLAbstractTest { + + private static final String LS = System.lineSeparator(); + + public CalcitePPLNoMvTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + /** + * Override to avoid normalizing the '\n' delimiter inside ARRAY_JOIN. The base class's + * normalization replaces ALL \n with System.lineSeparator(), which incorrectly changes the + * delimiter from '\n' to '\r\n' on Windows. The delimiter should always be '\n' regardless of + * platform - it's a data value, not a line separator. + */ + @Override + public void verifyPPLToSparkSQL(RelNode rel, String expected) { + // Don't normalize - expect strings are written with explicit System.lineSeparator() + SqlImplementor.Result result = getConverter().visitRoot(rel); + final SqlNode sqlNode = result.asStatement(); + final String sql = sqlNode.toSqlString(OpenSearchSparkSqlDialect.DEFAULT).getSql(); + org.hamcrest.MatcherAssert.assertThat(sql, org.hamcrest.CoreMatchers.is(expected)); + } + + // Helper to access converter from parent + private RelToSqlConverter getConverter() { + return new RelToSqlConverter(OpenSearchSparkSqlDialect.DEFAULT); + } + + @Test + public void testNoMvBasic() { + String ppl = + "source=EMP | eval arr = array('web', 'production', 'east') | nomv arr | head 1 | fields" + + " arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('web':VARCHAR, 'production':VARCHAR," + + " 'east':VARCHAR)), '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('web', 'production', 'east')), '\n" + + "'), '') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvMultipleDocuments() { + String ppl = + "source=EMP | eval arr = array('web', 'production') | nomv arr | head 2 | fields" + + " EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8])\n" + + " LogicalSort(fetch=[2])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('web':VARCHAR, 'production':VARCHAR))," + + " '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('web', 'production')), '\n" + + "'), '') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 2"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvWithMultipleFields() { + String ppl = + "source=EMP | eval arr1 = array('a', 'b'), arr2 = array('x', 'y') | nomv arr1 | nomv arr2 |" + + " head 1 | fields arr1, arr2"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(arr1=[$8], arr2=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " arr1=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('a', 'b')), '\n" + + "'), '':VARCHAR)], arr2=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('x', 'y')), '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('a', 'b')), '\n" + + "'), '') `arr1`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('x', 'y')), '\n" + + "'), '') `arr2`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvWithArrayFromFields() { + String ppl = + "source=EMP | eval tags = array(ENAME, JOB) | nomv tags | head 1 | fields EMPNO, tags"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], tags=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], tags=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($1," + + " $2)), '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY(`ENAME`, `JOB`)), '\n" + + "'), '') `tags`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvInPipeline() { + String ppl = + "source=EMP | where DEPTNO = 10 | eval names = array(ENAME, JOB) | nomv names | head 1 |" + + " fields EMPNO, names"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], names=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " names=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($1, $2)), '\n" + + "'), '':VARCHAR)])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY(`ENAME`, `JOB`)), '\n" + + "'), '') `names`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "WHERE `DEPTNO` = 10" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvNonExistentField() { + String ppl = "source=EMP | eval arr = array('a', 'b') | nomv does_not_exist | head 1"; + + Exception ex = assertThrows(Exception.class, () -> getRelNode(ppl)); + + String msg = String.valueOf(ex.getMessage()); + org.junit.Assert.assertTrue( + "Expected error message to mention missing field or type error. Actual: " + msg, + msg.toLowerCase().contains("does_not_exist") + || msg.toLowerCase().contains("field") + || msg.contains("ARRAY_COMPACT") + || msg.contains("ARRAY")); + } + + @Test + public void testNoMvScalarFieldError() { + String ppl = "source=EMP | nomv EMPNO | head 1"; + + Exception ex = assertThrows(Exception.class, () -> getRelNode(ppl)); + + String msg = String.valueOf(ex.getMessage()); + org.junit.Assert.assertTrue( + "Expected error for non-array field. Actual: " + msg, + msg.toLowerCase().contains("array") || msg.toLowerCase().contains("type")); + } + + @Test + public void testNoMvNonDirectFieldReferenceError() { + String ppl = "source=EMP | eval arr = array('a', 'b') | nomv upper(arr) | head 1"; + + Exception ex = assertThrows(Exception.class, () -> getRelNode(ppl)); + + String msg = String.valueOf(ex.getMessage()); + org.junit.Assert.assertTrue( + "Expected parser error for non-direct field reference. Actual: " + msg, + msg.contains("(") + || msg.toLowerCase().contains("syntax") + || msg.toLowerCase().contains("parse")); + } + + @Test + public void testNoMvWithNestedArray() { + String ppl = + "source=EMP | eval arr = array('a', 'b', 'c') | nomv arr | eval arr_len = length(arr) |" + + " head 1 | fields EMPNO, arr, arr_len"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8], arr_len=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('a'," + + " 'b', 'c')), '\n" + + "'), '':VARCHAR)], arr_len=[CHAR_LENGTH(COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('a'," + + " 'b', 'c')), '\n" + + "'), '':VARCHAR))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('a', 'b', 'c')), '\n" + + "'), '') `arr`, CHAR_LENGTH(COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('a', 'b', 'c'))," + + " '\n" + + "'), '')) `arr_len`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvWithConcatInArray() { + String ppl = + "source=EMP | eval full_name = concat(ENAME, ' - ', JOB), arr = array(full_name) | nomv" + + " arr | head 1 | fields EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], full_name=[CONCAT($1, ' - ':VARCHAR, $2)]," + + " arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array(CONCAT($1, ' - ':VARCHAR, $2))), '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY(CONCAT(`ENAME`, ' - ', `JOB`)))," + + " '\n" + + "'), '') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvSingleElementArray() { + String ppl = "source=EMP | eval arr = array('single') | nomv arr | head 1 | fields EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('single':VARCHAR)), '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('single')), '\n'), '') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvEmptyArray() { + String ppl = "source=EMP | eval arr = array() | nomv arr | head 1 | fields EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array())," + + " '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY()), '\n'), '') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvLargeArray() { + String ppl = + "source=EMP | eval arr = array('1', '2', '3', '4', '5', '6', '7', '8', '9', '10') | nomv" + + " arr | head 1 | fields arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('1'," + + " '2', '3', '4', '5', '6', '7', '8', '9', '10':VARCHAR)), '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('1', '2', '3', '4', '5', '6', '7', '8'," + + " '9', '10')), '\n" + + "'), '') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvChainedWithOtherOperations() { + String ppl = + "source=EMP | eval arr = array('a', 'b') | nomv arr | eval arr_upper = upper(arr) | head" + + " 1 | fields arr, arr_upper"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(arr=[$8], arr_upper=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('a'," + + " 'b')), '\n" + + "'), '':VARCHAR)], arr_upper=[UPPER(COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('a'," + + " 'b')), '\n" + + "'), '':VARCHAR))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('a', 'b')), '\n" + + "'), '') `arr`, UPPER(COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('a', 'b')), '\n" + + "'), '')) `arr_upper`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvWithNullableField() { + String ppl = + "source=EMP | eval arr = array(ENAME, COMM) | nomv arr | head 1 | fields EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($1," + + " $6)), '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY(`ENAME`, `COMM`)), '\n" + + "'), '') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvWithMultipleNullableFields() { + String ppl = "source=EMP | eval arr = array(MGR, COMM) | nomv arr | head 1 | fields EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($3," + + " $6)), '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY(`MGR`, `COMM`)), '\n'), '') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvWithMixedNullableAndNonNullableFields() { + String ppl = + "source=EMP | eval arr = array(ENAME, COMM, JOB) | nomv arr | head 1 | fields EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($1," + + " $6, $2)), '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY(`ENAME`, `COMM`, `JOB`)), '\n" + + "'), '') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 1e200eb092..bdb07ea6a9 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -1030,4 +1030,9 @@ public void testMvcombineCommandWithDelim() { "source=table | mvcombine delim=*** identifier", anonymize("source=t | mvcombine age delim=','")); } + + @Test + public void testNoMvCommand() { + assertEquals("source=table | nomv identifier", anonymize("source=t | nomv firstname")); + } }