From f6ccc31052459788ccf41800137a1bfed9ff145d Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Wed, 11 Feb 2026 16:03:52 -0800 Subject: [PATCH 1/7] Add auto extraction mode in spath command Signed-off-by: Chen Dai --- .../org/opensearch/sql/ast/tree/SPath.java | 23 +++++ .../sql/calcite/CalciteRelNodeVisitor.java | 6 +- docs/user/ppl/cmd/spath.md | 49 +++++++++- doctest/test_data/structured.json | 6 +- doctest/test_mapping/structured.json | 5 +- .../remote/CalcitePPLSpathCommandIT.java | 93 ++++++++++++++++++- .../opensearch/sql/ppl/parser/AstBuilder.java | 3 - .../sql/ppl/calcite/CalcitePPLSpathTest.java | 21 +++++ .../sql/ppl/parser/AstBuilderTest.java | 10 ++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 7 ++ .../sql/ppl/utils/SPathRewriteTest.java | 19 +++- 11 files changed, 227 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java index a1c0c08a15f..88f1b4d05f6 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java @@ -18,6 +18,21 @@ import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.dsl.AstDSL; +/** + * AST node for the PPL {@code spath} command. Supports two modes: + * + * + * + *

The {@code input} parameter is always required. When {@code output} is omitted, it defaults to + * the path value (path mode) or the input field name (extract-all mode). + */ @ToString @EqualsAndHashCode(callSuper = false) @RequiredArgsConstructor @@ -62,4 +77,12 @@ public Eval rewriteAsEval() { AstDSL.function( "json_extract", AstDSL.field(inField), AstDSL.stringLiteral(unquotedPath)))); } + + public Eval rewriteAsExtractAllEval() { + String outField = this.outField != null ? this.outField : this.inField; + return AstDSL.eval( + this.child, + AstDSL.let( + AstDSL.field(outField), AstDSL.function("json_extract_all", AstDSL.field(inField)))); + } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 68a700b66b5..d2114bad2e5 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -795,7 +795,11 @@ public RelNode visitParse(Parse node, CalcitePlanContext context) { @Override public RelNode visitSpath(SPath node, CalcitePlanContext context) { - return visitEval(node.rewriteAsEval(), context); + if (node.getPath() != null) { + return visitEval(node.rewriteAsEval(), context); + } else { + return visitEval(node.rewriteAsExtractAllEval(), context); + } } @Override diff --git a/docs/user/ppl/cmd/spath.md b/docs/user/ppl/cmd/spath.md index d9293113fb0..86d4b9560c4 100644 --- a/docs/user/ppl/cmd/spath.md +++ b/docs/user/ppl/cmd/spath.md @@ -1,7 +1,10 @@ # spath -The `spath` command extracts fields from structured text data by allowing you to select JSON values using JSON paths. +The `spath` command extracts fields from structured JSON data. It operates in two modes: + +- **Path-based mode**: When `path` is specified, extracts a single value at the given JSON path. +- **Auto-extract mode**: When `path` is omitted, extracts all fields from the JSON into a map. > **Note**: The `spath` command is not executed on OpenSearch data nodes. It extracts fields from data after it has been returned to the coordinator node, which is slow on large datasets. We recommend indexing fields needed for filtering directly instead of using `spath` to filter nested fields. @@ -20,11 +23,21 @@ The `spath` command supports the following parameters. | Parameter | Required/Optional | Description | | --- | --- | --- | | `input` | Required | The field containing JSON data to parse. | -| `output` | Optional | The destination field in which the extracted data is stored. Default is the value of ``. | -| `` | Required | The JSON path that identifies the data to extract. | +| `output` | Optional | The destination field in which the extracted data is stored. Default is the value of `path` in path-based mode, or the value of `input` in auto-extract mode. | +| `path` | Optional | The JSON path that identifies the data to extract. When omitted, all fields are extracted into a map (auto-extract mode). | For more information about path syntax, see [json_extract](../functions/json.md#json_extract). +## Auto-extract mode + +When `path` is omitted, the `spath` command runs in auto-extract mode. Instead of extracting a single value, it flattens the entire JSON into a `map` column using the following rules: + +- Nested objects use dotted keys: `user.name`, `user.age` +- Arrays use `{}` suffix: `tags{}`, `users{}.name` +- Duplicate logical keys merge into arrays: `c{}.b = [2, 3]` + +> **Note**: Auto-extract mode processes the entire input field with no character limit. For large JSON payloads, consider using path-based extraction to target specific fields. + ## Example 1: Basic field extraction The basic use of `spath` extracts a single field from JSON data. The following query extracts the `n` field from JSON objects in the `doc_n` field: @@ -123,3 +136,33 @@ fetched rows / total rows = 3/3 +-------+---+ ``` + +## Example 5: Auto-extract mode + +When `path` is omitted, `spath` extracts all fields from the JSON into a map: + +```ppl +source=structured +| spath input=doc_auto output=result +| fields doc_auto result +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++---------------------------------------------------------------------------------+----------------------------------------------------------------------------------+ +| doc_auto | result | +|---------------------------------------------------------------------------------+----------------------------------------------------------------------------------| +| {"user":{"name":"John","age":30},"tags":["java","sql"]} | {'user.age': 30, 'tags{}': ['java', 'sql'], 'user.name': 'John'} | +| {"user":{"name":"Jane","age":25},"tags":["python"]} | {'user.age': 25, 'tags{}': 'python', 'user.name': 'Jane'} | +| {"user":{"name":"Bob","age":35},"tags":["go","rust","sql"],"user.name":"Bobby"} | {'user.age': 35, 'tags{}': ['go', 'rust', 'sql'], 'user.name': ['Bob', 'Bobby']} | ++---------------------------------------------------------------------------------+----------------------------------------------------------------------------------+ +``` + +The flattening rules demonstrated in this example: + +- Nested objects use dotted keys: `user.name` and `user.age` are extracted from `{"user": {"name": "John", "age": 30}}` +- Arrays use `{}` suffix: `tags{}` is extracted from `{"tags": ["java", "sql"]}` +- Duplicate logical keys merge into arrays: in the third row, both `"user": {"name": "Bob"}` (nested) and `"user.name": "Bobby"` (direct dotted key) resolve to the same key `user.name`, so their values merge into `[Bob, Bobby]` + diff --git a/doctest/test_data/structured.json b/doctest/test_data/structured.json index c0717c6f328..1c87b631a72 100644 --- a/doctest/test_data/structured.json +++ b/doctest/test_data/structured.json @@ -1,3 +1,3 @@ -{"doc_n":"{\"n\": 1}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 0}","doc_list":"{\"list\": [1, 2, 3, 4], \"nest_out\": {\"nest_in\": \"a\"}}","obj_field":{"field": "a"}} -{"doc_n":"{\"n\": 2}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 1}","doc_list":"{\"list\": [], \"nest_out\": {\"nest_in\": \"a\"}}","obj_field":{"field": "b"}} -{"doc_n":"{\"n\": 3}","doc_escape":"{\"a fancy field name\": false,\"a.b.c\": 2}","doc_list":"{\"list\": [5, 6], \"nest_out\": {\"nest_in\": \"a\"}}","obj_field":{"field": "c"}} \ No newline at end of file +{"doc_n":"{\"n\": 1}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 0}","doc_list":"{\"list\": [1, 2, 3, 4], \"nest_out\": {\"nest_in\": \"a\"}}","doc_auto":"{\"user\":{\"name\":\"John\",\"age\":30},\"tags\":[\"java\",\"sql\"]}","obj_field":{"field": "a"}} +{"doc_n":"{\"n\": 2}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 1}","doc_list":"{\"list\": [], \"nest_out\": {\"nest_in\": \"a\"}}","doc_auto":"{\"user\":{\"name\":\"Jane\",\"age\":25},\"tags\":[\"python\"]}","obj_field":{"field": "b"}} +{"doc_n":"{\"n\": 3}","doc_escape":"{\"a fancy field name\": false,\"a.b.c\": 2}","doc_list":"{\"list\": [5, 6], \"nest_out\": {\"nest_in\": \"a\"}}","doc_auto":"{\"user\":{\"name\":\"Bob\",\"age\":35},\"tags\":[\"go\",\"rust\",\"sql\"],\"user.name\":\"Bobby\"}","obj_field":{"field": "c"}} diff --git a/doctest/test_mapping/structured.json b/doctest/test_mapping/structured.json index 5c79e53dc0a..dd255cc0c54 100644 --- a/doctest/test_mapping/structured.json +++ b/doctest/test_mapping/structured.json @@ -10,6 +10,9 @@ "doc_escape": { "type": "text" }, + "doc_auto": { + "type": "text" + }, "obj_field": { "properties": { "field": { "type": "text" } @@ -17,4 +20,4 @@ } } } -} \ No newline at end of file +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java index 51b5bd40304..7cc192cdf02 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java @@ -24,7 +24,7 @@ public void init() throws Exception { loadIndex(Index.BANK); - // Create test data for string concatenation + // Simple JSON docs for path-based extraction Request request1 = new Request("PUT", "/test_spath/_doc/1?refresh=true"); request1.setJsonEntity("{\"doc\": \"{\\\"n\\\": 1}\"}"); client().performRequest(request1); @@ -36,6 +36,24 @@ public void init() throws Exception { Request request3 = new Request("PUT", "/test_spath/_doc/3?refresh=true"); request3.setJsonEntity("{\"doc\": \"{\\\"n\\\": 3}\"}"); client().performRequest(request3); + + // Nested JSON doc for nested path extraction + Request request4 = new Request("PUT", "/test_spath_nested/_doc/1?refresh=true"); + request4.setJsonEntity( + "{\"doc\":" + + " \"{\\\"user\\\":{\\\"name\\\":\\\"John\\\",\\\"age\\\":30},\\\"active\\\":true}\"}"); + client().performRequest(request4); + + // JSON with arrays for array path extraction + Request request5 = new Request("PUT", "/test_spath_array/_doc/1?refresh=true"); + request5.setJsonEntity( + "{\"doc\": \"{\\\"items\\\":[{\\\"id\\\":1},{\\\"id\\\":2},{\\\"id\\\":3}]}\"}"); + client().performRequest(request5); + + // JSON with special field names for escaped path extraction + Request request6 = new Request("PUT", "/test_spath_escape/_doc/1?refresh=true"); + request6.setJsonEntity("{\"doc\": \"{\\\"a fancy field\\\":true,\\\"a.b.c\\\":42}\"}"); + client().performRequest(request6); } @Test @@ -45,4 +63,77 @@ public void testSimpleSpath() throws IOException { verifySchema(result, schema("result", "string")); verifyDataRows(result, rows("1"), rows("2"), rows("3")); } + + @Test + public void testSpathPathDefaultsOutputToPath() throws IOException { + JSONObject result = executeQuery("source=test_spath | spath input=doc path=n | fields n"); + verifySchema(result, schema("n", "string")); + verifyDataRows(result, rows("1"), rows("2"), rows("3")); + } + + @Test + public void testSpathNestedPath() throws IOException { + JSONObject result = + executeQuery( + "source=test_spath_nested | spath input=doc output=name path=user.name | fields name"); + verifySchema(result, schema("name", "string")); + verifyDataRows(result, rows("John")); + } + + @Test + public void testSpathMissingPath() throws IOException { + JSONObject result = + executeQuery( + "source=test_spath | spath input=doc output=result path=nonexistent | fields result"); + verifySchema(result, schema("result", "string")); + verifyDataRows(result, rows((Object) null), rows((Object) null), rows((Object) null)); + } + + @Test + public void testSpathNoPathExtractAll() throws IOException { + JSONObject result = + executeQuery("source=test_spath_nested | spath input=doc output=result | fields result"); + verifySchema(result, schema("result", "other")); + // extract-all returns a map with flattened keys + verifyDataRows(result, rows("{user.name=John, active=true, user.age=30}")); + } + + @Test + public void testSpathNoPathDefaultsOutputToInput() throws IOException { + JSONObject result = executeQuery("source=test_spath | spath input=doc | fields doc"); + verifySchema(result, schema("doc", "other")); + // output defaults to input field name, overwrites with map + verifyDataRows(result, rows("{n=1}"), rows("{n=2}"), rows("{n=3}")); + } + + @Test + public void testSpathArrayIndex() throws IOException { + JSONObject result = + executeQuery( + "source=test_spath_array | spath input=doc output=first path=items{0}.id" + + " | fields first"); + verifySchema(result, schema("first", "string")); + verifyDataRows(result, rows("1")); + } + + @Test + public void testSpathArrayWildcard() throws IOException { + JSONObject result = + executeQuery( + "source=test_spath_array | spath input=doc output=all_ids path=items{}.id" + + " | fields all_ids"); + verifySchema(result, schema("all_ids", "string")); + verifyDataRows(result, rows("[1,2,3]")); + } + + @Test + public void testSpathEscapedPath() throws IOException { + JSONObject result = + executeQuery( + "source=test_spath_escape | spath input=doc output=a path=\"['a fancy field']\"" + + " | spath input=doc output=b path=\"['a.b.c']\"" + + " | fields a b"); + verifySchema(result, schema("a", "string"), schema("b", "string")); + verifyDataRows(result, rows("true", "42")); + } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 1ff9d2818d9..4397eb6eb6a 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -945,9 +945,6 @@ public UnresolvedPlan visitSpathCommand(OpenSearchPPLParser.SpathCommandContext if (inField == null) { throw new IllegalArgumentException("`input` parameter is required for `spath`"); } - if (path == null) { - throw new IllegalArgumentException("`path` parameter is required for `spath`"); - } return new SPath(inField, outField, path); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java index 57b11d83150..150c0308ea6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java @@ -45,4 +45,25 @@ public void testEvalWithOutput() { "SELECT JSON_EXTRACT(`ENAME`, 'src.path') `custom`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testExtractAllNoPath() { + String ppl = "source=EMP | spath input=ENAME"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], ENAME=[JSON_EXTRACT_ALL($1)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testExtractAllNoPathWithOutput() { + String ppl = "source=EMP | spath input=ENAME output=result | fields result"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(result=[JSON_EXTRACT_ALL($1)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index 9e1cfe05a4b..4c1980ee54e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -937,6 +937,16 @@ public void testSpathWithNoPathKeyword() { "source=t | spath input=f simple.nested", spath(relation("t"), "f", null, "simple.nested")); } + @Test + public void testSpathWithNoPath() { + assertEqual("source=t | spath input=f", spath(relation("t"), "f", null, null)); + } + + @Test + public void testSpathWithNoPathButOutput() { + assertEqual("source=t | spath input=f output=o", spath(relation("t"), "f", "o", null)); + } + @Test public void testKmeansCommand() { assertEqual( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 1e200eb092b..4d8a7029be0 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -1009,6 +1009,13 @@ public void testSpath() { "search source=t | spath input=json_attr output=out path=foo.bar | fields id, out")); } + @Test + public void testSpathNoPath() { + assertEquals( + "source=table | spath input=identifier", + anonymize("search source=t | spath input=json_attr")); + } + @Test public void testMvfind() { assertEquals( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java index 73d282d1f64..aa14fcdd7ba 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java @@ -55,9 +55,22 @@ public void testSpathMissingInputArgumentHandling() { plan("source = t | spath path=a output=a"); } - @Test(expected = IllegalArgumentException.class) - public void testSpathMissingPathArgumentHandling() { - plan("source = t | spath input=a output=a"); + @Test + public void testSpathExtractAllRewrite() { + // spath input=a output=o (no path) -> eval o=json_extract_all(a) + SPath sp = (SPath) plan("source = t | spath input=a output=o"); + assertEquals( + eval(relation("t"), let(field("o"), function("json_extract_all", field("a")))), + sp.rewriteAsExtractAllEval()); + } + + @Test + public void testSpathExtractAllDefaultOutput() { + // spath input=a (no path, no output) -> eval a=json_extract_all(a) + SPath sp = (SPath) plan("source = t | spath input=a"); + assertEquals( + eval(relation("t"), let(field("a"), function("json_extract_all", field("a")))), + sp.rewriteAsExtractAllEval()); } @Test From d4e15c0ba26e46b0b5c0cf14d0787be4eb8eb711 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Wed, 11 Feb 2026 20:44:44 -0800 Subject: [PATCH 2/7] Change json_extract_all to return map and fix null perserve issue Signed-off-by: Chen Dai --- .../jsonUDF/JsonExtractAllFunctionImpl.java | 30 +++++++++-- .../JsonExtractAllFunctionImplTest.java | 52 +++++++++---------- docs/user/ppl/cmd/spath.md | 29 +++++++---- doctest/test_data/structured.json | 4 +- .../standalone/JsonExtractAllFunctionIT.java | 10 ++-- 5 files changed, 79 insertions(+), 46 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java index 1f91c87bb77..8768f4e69a8 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java @@ -51,7 +51,7 @@ public SqlReturnTypeInference getReturnTypeInference() { return ReturnTypes.explicit( TYPE_FACTORY.createMapType( TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR), - TYPE_FACTORY.createSqlType(SqlTypeName.ANY), + TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR), true)); } @@ -82,7 +82,31 @@ public static Object eval(Object... args) { return null; } - return parseJson(jsonStr); + Map parsed = parseJson(jsonStr); + return parsed == null ? null : stringifyMap(parsed); + } + + /** + * Convert all values in the map to strings to match the declared {@code map} + * return type. List values (from duplicate key merging) are stringified element-wise. + */ + @SuppressWarnings("unchecked") + private static Map stringifyMap(Map map) { + Map result = new HashMap<>(map.size()); + for (Map.Entry entry : map.entrySet()) { + Object value = entry.getValue(); + if (value instanceof List) { + List original = (List) value; + List stringified = new LinkedList<>(); + for (Object item : original) { + stringified.add(String.valueOf(item)); + } + result.put(entry.getKey(), stringified); + } else { + result.put(entry.getKey(), String.valueOf(value)); + } + } + return result; } private static Map parseJson(String jsonStr) { @@ -150,7 +174,7 @@ private static Map parseJson(String jsonStr) { @SuppressWarnings("unchecked") private static void appendValue(Map resultMap, String path, Object value) { Object existingValue = resultMap.get(path); - if (existingValue == null) { + if (existingValue == null && !resultMap.containsKey(path)) { resultMap.put(path, value); } else if (existingValue instanceof List) { ((List) existingValue).add(value); diff --git a/core/src/test/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImplTest.java b/core/src/test/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImplTest.java index 5a010a17422..cebd097a785 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImplTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImplTest.java @@ -111,7 +111,7 @@ public void testSimpleJsonObject() throws Exception { Map map = eval("{\"name\": \"John\", \"age\": 30}"); assertEquals("John", map.get("name")); - assertEquals(30, map.get("age")); + assertEquals("30", map.get("age")); assertEquals(2, map.size()); } @@ -127,7 +127,7 @@ public void testInvalidJsonReturnResults() { public void testNonObjectJsonArray() { Map map = eval("[1, 2, 3]"); - assertMapListValue(map, "{}", 1, 2, 3); + assertMapListValue(map, "{}", "1", "2", "3"); assertEquals(1, map.size()); } @@ -135,7 +135,7 @@ public void testNonObjectJsonArray() { public void testTopLevelArrayOfObjects() { Map map = eval("[{\"age\": 1}, {\"age\": 2}]"); - assertMapListValue(map, "{}.age", 1, 2); + assertMapListValue(map, "{}.age", "1", "2"); assertEquals(1, map.size()); } @@ -145,7 +145,7 @@ public void testTopLevelArrayOfComplexObjects() { eval("[{\"name\": \"John\", \"age\": 30}, {\"name\": \"Jane\", \"age\": 25}]"); assertMapListValue(map, "{}.name", "John", "Jane"); - assertMapListValue(map, "{}.age", 30, 25); + assertMapListValue(map, "{}.age", "30", "25"); assertEquals(2, map.size()); } @@ -220,7 +220,7 @@ public void testArrayOfObjects() { public void testNestedArray() { Map map = eval("{\"data\": {\"items\": [1, 2, 3]}}"); - assertMapListValue(map, "data.items{}", 1, 2, 3); + assertMapListValue(map, "data.items{}", "1", "2", "3"); assertEquals(1, map.size()); } @@ -231,12 +231,12 @@ public void testNested() { "{\"data\": {\"items\": [[1, 2, {\"hello\": 3}], 4], \"other\": 5}, \"another\": [6," + " [7, 8], 9]}"); - assertMapListValue(map, "data.items{}{}", 1, 2); - assertMapValue(map, "data.items{}{}.hello", 3); - assertMapValue(map, "data.items{}", 4); - assertMapValue(map, "data.other", 5); - assertMapListValue(map, "another{}", 6, 9); - assertMapListValue(map, "another{}{}", 7, 8); + assertMapListValue(map, "data.items{}{}", "1", "2"); + assertMapValue(map, "data.items{}{}.hello", "3"); + assertMapValue(map, "data.items{}", "4"); + assertMapValue(map, "data.other", "5"); + assertMapListValue(map, "another{}", "6", "9"); + assertMapListValue(map, "another{}{}", "7", "8"); assertEquals(6, map.size()); } @@ -265,18 +265,18 @@ public void testNumericValues() { + " \"double\": 3.14159}"); assertEquals(4, map.size()); - assertEquals(42, map.get("int")); - assertEquals(9223372036854775807L, map.get("long")); - assertEquals(9223372036854775808.0, map.get("hugeNumber")); - assertEquals(3.14159, map.get("double")); + assertEquals("42", map.get("int")); + assertEquals("9223372036854775807", map.get("long")); + assertEquals("9.223372036854776E18", map.get("hugeNumber")); + assertEquals("3.14159", map.get("double")); } @Test public void testBooleanValues() { Map map = eval("{\"isTrue\": true, \"isFalse\": false}"); - assertEquals(true, map.get("isTrue")); - assertEquals(false, map.get("isFalse")); + assertEquals("true", map.get("isTrue")); + assertEquals("false", map.get("isFalse")); assertEquals(2, map.size()); } @@ -284,7 +284,7 @@ public void testBooleanValues() { public void testNullValues() { Map map = eval("{\"nullValue\": null, \"notNull\": \"value\"}"); - assertNull(map.get("nullValue")); + assertEquals("null", map.get("nullValue")); assertEquals("value", map.get("notNull")); assertEquals(2, map.size()); } @@ -296,10 +296,10 @@ public void testMixedTypesInArray() { List mixed = (List) assertListValue(map, "mixed{}"); assertEquals(5, mixed.size()); assertEquals("string", mixed.get(0)); - assertEquals(42, mixed.get(1)); - assertEquals(true, mixed.get(2)); - assertNull(mixed.get(3)); - assertEquals(3.14, mixed.get(4)); + assertEquals("42", mixed.get(1)); + assertEquals("true", mixed.get(2)); + assertEquals("null", mixed.get(3)); + assertEquals("3.14", mixed.get(4)); assertEquals(1, map.size()); } @@ -322,7 +322,7 @@ public void testUnicodeCharacters() { assertEquals("こんにちは", map.get("unicode")); assertEquals("🚀", map.get("emoji")); - assertEquals(1, map.get("🚀")); + assertEquals("1", map.get("🚀")); assertEquals(3, map.size()); } @@ -339,7 +339,7 @@ public void testComplexNestedStructure() { assertMapListValue(map, "user.profile.contacts{}.type", "email", "phone"); assertMapListValue(map, "user.profile.contacts{}.value", "john@example.com", "123-456-7890"); assertEquals("dark", map.get("user.preferences.theme")); - assertEquals(true, map.get("user.preferences.notifications")); + assertEquals("true", map.get("user.preferences.notifications")); assertEquals(5, map.size()); } @@ -354,7 +354,7 @@ public void testLargeJsonObject() { Map map = eval(jsonBuilder.toString()); assertEquals(100, map.size()); - assertEquals(0, map.get("field0")); - assertEquals(99, map.get("field99")); + assertEquals("0", map.get("field0")); + assertEquals("99", map.get("field99")); } } diff --git a/docs/user/ppl/cmd/spath.md b/docs/user/ppl/cmd/spath.md index 86d4b9560c4..7decb623273 100644 --- a/docs/user/ppl/cmd/spath.md +++ b/docs/user/ppl/cmd/spath.md @@ -30,14 +30,21 @@ For more information about path syntax, see [json_extract](../functions/json.md# ## Auto-extract mode -When `path` is omitted, the `spath` command runs in auto-extract mode. Instead of extracting a single value, it flattens the entire JSON into a `map` column using the following rules: +When `path` is omitted, the `spath` command runs in auto-extract mode. Instead of extracting a single value, it flattens the entire JSON into a `map` column using the following rules: - Nested objects use dotted keys: `user.name`, `user.age` - Arrays use `{}` suffix: `tags{}`, `users{}.name` - Duplicate logical keys merge into arrays: `c{}.b = [2, 3]` +- Null values are preserved: a JSON `null` becomes the string `"null"` in the map +- All values are stringified: numbers and booleans are converted to their string representation (for example, `30` becomes `"30"`, `true` becomes `"true"`) > **Note**: Auto-extract mode processes the entire input field with no character limit. For large JSON payloads, consider using path-based extraction to target specific fields. +### Corner cases + +- Invalid or malformed JSON returns partial results containing any fields successfully parsed before the error. +- Empty JSON object (`{}`) returns an empty map. + ## Example 1: Basic field extraction The basic use of `spath` extracts a single field from JSON data. The following query extracts the `n` field from JSON objects in the `doc_n` field: @@ -139,7 +146,7 @@ fetched rows / total rows = 3/3 ## Example 5: Auto-extract mode -When `path` is omitted, `spath` extracts all fields from the JSON into a map: +When `path` is omitted, `spath` extracts all fields from the JSON into a map. All values are stringified, and null values are preserved: ```ppl source=structured @@ -151,18 +158,20 @@ The query returns the following results: ```text fetched rows / total rows = 3/3 -+---------------------------------------------------------------------------------+----------------------------------------------------------------------------------+ -| doc_auto | result | -|---------------------------------------------------------------------------------+----------------------------------------------------------------------------------| -| {"user":{"name":"John","age":30},"tags":["java","sql"]} | {'user.age': 30, 'tags{}': ['java', 'sql'], 'user.name': 'John'} | -| {"user":{"name":"Jane","age":25},"tags":["python"]} | {'user.age': 25, 'tags{}': 'python', 'user.name': 'Jane'} | -| {"user":{"name":"Bob","age":35},"tags":["go","rust","sql"],"user.name":"Bobby"} | {'user.age': 35, 'tags{}': ['go', 'rust', 'sql'], 'user.name': ['Bob', 'Bobby']} | -+---------------------------------------------------------------------------------+----------------------------------------------------------------------------------+ ++---------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+ +| doc_auto | result | +|---------------------------------------------------------------------------------+--------------------------------------------------------------------------------------| +| {"user":{"name":"John","age":30},"tags":["java","sql"],"active":true} | {'user.age': '30', 'tags{}': ['java', 'sql'], 'user.name': 'John', 'active': 'true'} | +| {"user":{"name":"Jane","age":25},"tags":["python"],"active":null} | {'user.age': '25', 'tags{}': 'python', 'user.name': 'Jane', 'active': 'null'} | +| {"user":{"name":"Bob","age":35},"tags":["go","rust","sql"],"user.name":"Bobby"} | {'user.age': '35', 'tags{}': ['go', 'rust', 'sql'], 'user.name': ['Bob', 'Bobby']} | ++---------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+ ``` The flattening rules demonstrated in this example: - Nested objects use dotted keys: `user.name` and `user.age` are extracted from `{"user": {"name": "John", "age": 30}}` - Arrays use `{}` suffix: `tags{}` is extracted from `{"tags": ["java", "sql"]}` -- Duplicate logical keys merge into arrays: in the third row, both `"user": {"name": "Bob"}` (nested) and `"user.name": "Bobby"` (direct dotted key) resolve to the same key `user.name`, so their values merge into `[Bob, Bobby]` +- Duplicate logical keys merge into arrays: in the third row, both `"user": {"name": "Bob"}` (nested) and `"user.name": "Bobby"` (direct dotted key) resolve to the same key `user.name`, so their values merge into `['Bob', 'Bobby']` +- All values are strings: numeric `30` becomes `'30'`, boolean `true` becomes `'true'` +- Null values are preserved: in the second row, `"active": null` is kept as `'active': 'null'` in the map diff --git a/doctest/test_data/structured.json b/doctest/test_data/structured.json index 1c87b631a72..d96995a44f9 100644 --- a/doctest/test_data/structured.json +++ b/doctest/test_data/structured.json @@ -1,3 +1,3 @@ -{"doc_n":"{\"n\": 1}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 0}","doc_list":"{\"list\": [1, 2, 3, 4], \"nest_out\": {\"nest_in\": \"a\"}}","doc_auto":"{\"user\":{\"name\":\"John\",\"age\":30},\"tags\":[\"java\",\"sql\"]}","obj_field":{"field": "a"}} -{"doc_n":"{\"n\": 2}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 1}","doc_list":"{\"list\": [], \"nest_out\": {\"nest_in\": \"a\"}}","doc_auto":"{\"user\":{\"name\":\"Jane\",\"age\":25},\"tags\":[\"python\"]}","obj_field":{"field": "b"}} +{"doc_n":"{\"n\": 1}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 0}","doc_list":"{\"list\": [1, 2, 3, 4], \"nest_out\": {\"nest_in\": \"a\"}}","doc_auto":"{\"user\":{\"name\":\"John\",\"age\":30},\"tags\":[\"java\",\"sql\"],\"active\":true}","obj_field":{"field": "a"}} +{"doc_n":"{\"n\": 2}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 1}","doc_list":"{\"list\": [], \"nest_out\": {\"nest_in\": \"a\"}}","doc_auto":"{\"user\":{\"name\":\"Jane\",\"age\":25},\"tags\":[\"python\"],\"active\":null}","obj_field":{"field": "b"}} {"doc_n":"{\"n\": 3}","doc_escape":"{\"a fancy field name\": false,\"a.b.c\": 2}","doc_list":"{\"list\": [5, 6], \"nest_out\": {\"nest_in\": \"a\"}}","doc_auto":"{\"user\":{\"name\":\"Bob\",\"age\":35},\"tags\":[\"go\",\"rust\",\"sql\"],\"user.name\":\"Bobby\"}","obj_field":{"field": "c"}} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/JsonExtractAllFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/JsonExtractAllFunctionIT.java index 68bf57ea8dd..d1fdf03aa3b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/JsonExtractAllFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/JsonExtractAllFunctionIT.java @@ -73,7 +73,7 @@ public void testJsonExtractAllWithSimpleObject() throws Exception { Map map = getMap(resultSet, 1); assertEquals("John", map.get("name")); - assertEquals(30, map.get("age")); + assertEquals("30", map.get("age")); assertEquals(2, map.size()); }); } @@ -111,8 +111,8 @@ public void testJsonExtractAllWithNestedObject() throws Exception { Map map = getMap(resultSet, 1); assertEquals("John", map.get("user.name")); - assertEquals(30, map.get("user.age")); - assertEquals(true, map.get("active")); + assertEquals("30", map.get("user.age")); + assertEquals("true", map.get("active")); assertEquals(3, map.size()); }); } @@ -208,8 +208,8 @@ public void testJsonExtractAllWithTopLevelArray() throws Exception { Map map = getMap(resultSet, 1); List ids = getList(map, "{}.id"); assertEquals(2, ids.size()); - assertEquals(1, ids.get(0)); - assertEquals(2, ids.get(1)); + assertEquals("1", ids.get(0)); + assertEquals("2", ids.get(1)); assertEquals(1, map.size()); }); } From 8a05c8b4ef2cca5c4e4277af0b9d9689cc41acb5 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Thu, 12 Feb 2026 10:28:32 -0800 Subject: [PATCH 3/7] Refactor all unit test and integration tests Signed-off-by: Chen Dai --- .../org/opensearch/sql/ast/tree/SPath.java | 44 +++----- .../sql/calcite/CalciteRelNodeVisitor.java | 6 +- .../remote/CalcitePPLSpathCommandIT.java | 101 +++++++----------- .../sql/ppl/calcite/CalcitePPLSpathTest.java | 74 ++++++------- .../sql/ppl/utils/SPathRewriteTest.java | 34 +++--- 5 files changed, 105 insertions(+), 154 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java index 88f1b4d05f6..1fa6448c892 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java @@ -18,21 +18,7 @@ import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.dsl.AstDSL; -/** - * AST node for the PPL {@code spath} command. Supports two modes: - * - *
    - *
  • Path-based extraction ({@code path} is non-null): rewrites to {@code eval output = - * json_extract(input, path)} via {@link #rewriteAsEval()}. - *
  • Extract-all mode ({@code path} is null): rewrites to {@code eval output = - * json_extract_all(input)} via {@link #rewriteAsExtractAllEval()}, returning a {@code - * map} with flattened keys (dotted for nested objects, {@code {}} suffix for - * arrays). - *
- * - *

The {@code input} parameter is always required. When {@code output} is omitted, it defaults to - * the path value (path mode) or the input field name (extract-all mode). - */ +/** AST node for the PPL {@code spath} command. */ @ToString @EqualsAndHashCode(callSuper = false) @RequiredArgsConstructor @@ -45,7 +31,7 @@ public class SPath extends UnresolvedPlan { @Nullable private final String outField; - private final String path; + @Nullable private final String path; @Override public UnresolvedPlan attach(UnresolvedPlan child) { @@ -55,7 +41,7 @@ public UnresolvedPlan attach(UnresolvedPlan child) { @Override public List getChild() { - return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + return child == null ? ImmutableList.of() : ImmutableList.of(child); } @Override @@ -63,26 +49,30 @@ public T accept(AbstractNodeVisitor nodeVisitor, C context) { return nodeVisitor.visitSpath(this, context); } + /** Rewrites this spath node to an equivalent {@link Eval} node. */ public Eval rewriteAsEval() { - String outField = this.outField; - String unquotedPath = unquoteText(this.path); - if (outField == null) { - outField = unquotedPath; + if (path != null) { + return rewritePathMode(); } + return rewriteAutoExtractMode(); + } + private Eval rewritePathMode() { + String unquotedPath = unquoteText(path); + String output = outField != null ? outField : unquotedPath; return AstDSL.eval( - this.child, + child, AstDSL.let( - AstDSL.field(outField), + AstDSL.field(output), AstDSL.function( "json_extract", AstDSL.field(inField), AstDSL.stringLiteral(unquotedPath)))); } - public Eval rewriteAsExtractAllEval() { - String outField = this.outField != null ? this.outField : this.inField; + private Eval rewriteAutoExtractMode() { + String output = outField != null ? outField : inField; return AstDSL.eval( - this.child, + child, AstDSL.let( - AstDSL.field(outField), AstDSL.function("json_extract_all", AstDSL.field(inField)))); + AstDSL.field(output), AstDSL.function("json_extract_all", AstDSL.field(inField)))); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index d2114bad2e5..68a700b66b5 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -795,11 +795,7 @@ public RelNode visitParse(Parse node, CalcitePlanContext context) { @Override public RelNode visitSpath(SPath node, CalcitePlanContext context) { - if (node.getPath() != null) { - return visitEval(node.rewriteAsEval(), context); - } else { - return visitEval(node.rewriteAsExtractAllEval(), context); - } + return visitEval(node.rewriteAsEval(), context); } @Override diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java index 7cc192cdf02..9fea01c911c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java @@ -37,23 +37,15 @@ public void init() throws Exception { request3.setJsonEntity("{\"doc\": \"{\\\"n\\\": 3}\"}"); client().performRequest(request3); - // Nested JSON doc for nested path extraction - Request request4 = new Request("PUT", "/test_spath_nested/_doc/1?refresh=true"); - request4.setJsonEntity( - "{\"doc\":" - + " \"{\\\"user\\\":{\\\"name\\\":\\\"John\\\",\\\"age\\\":30},\\\"active\\\":true}\"}"); - client().performRequest(request4); - - // JSON with arrays for array path extraction - Request request5 = new Request("PUT", "/test_spath_array/_doc/1?refresh=true"); - request5.setJsonEntity( - "{\"doc\": \"{\\\"items\\\":[{\\\"id\\\":1},{\\\"id\\\":2},{\\\"id\\\":3}]}\"}"); - client().performRequest(request5); - - // JSON with special field names for escaped path extraction - Request request6 = new Request("PUT", "/test_spath_escape/_doc/1?refresh=true"); - request6.setJsonEntity("{\"doc\": \"{\\\"a fancy field\\\":true,\\\"a.b.c\\\":42}\"}"); - client().performRequest(request6); + // Auto-extract mode: one doc field per flatten rule + Request autoExtractDoc = new Request("PUT", "/test_spath_auto/_doc/1?refresh=true"); + autoExtractDoc.setJsonEntity( + "{\"nested_doc\": \"{\\\"user\\\":{\\\"name\\\":\\\"John\\\"}}\"," + + " \"array_doc\": \"{\\\"tags\\\":[\\\"java\\\",\\\"sql\\\"]}\"," + + " \"merge_doc\": \"{\\\"a\\\":{\\\"b\\\":1},\\\"a.b\\\":2}\"," + + " \"stringify_doc\":" + + " \"{\\\"n\\\":30,\\\"b\\\":true,\\\"x\\\":null}\"}"); + client().performRequest(autoExtractDoc); } @Test @@ -65,75 +57,58 @@ public void testSimpleSpath() throws IOException { } @Test - public void testSpathPathDefaultsOutputToPath() throws IOException { - JSONObject result = executeQuery("source=test_spath | spath input=doc path=n | fields n"); - verifySchema(result, schema("n", "string")); - verifyDataRows(result, rows("1"), rows("2"), rows("3")); + public void testSpathAutoExtract() throws IOException { + JSONObject result = executeQuery("source=test_spath | spath input=doc"); + verifySchema(result, schema("doc", "struct")); } @Test - public void testSpathNestedPath() throws IOException { - JSONObject result = - executeQuery( - "source=test_spath_nested | spath input=doc output=name path=user.name | fields name"); - verifySchema(result, schema("name", "string")); - verifyDataRows(result, rows("John")); + public void testSpathAutoExtractWithOutput() throws IOException { + JSONObject result = executeQuery("source=test_spath | spath input=doc output=result"); + verifySchema(result, schema("doc", "string"), schema("result", "struct")); } @Test - public void testSpathMissingPath() throws IOException { + public void testSpathAutoExtractNestedFields() throws IOException { JSONObject result = executeQuery( - "source=test_spath | spath input=doc output=result path=nonexistent | fields result"); - verifySchema(result, schema("result", "string")); - verifyDataRows(result, rows((Object) null), rows((Object) null), rows((Object) null)); - } + "source=test_spath_auto | spath input=nested_doc output=result | fields result"); - @Test - public void testSpathNoPathExtractAll() throws IOException { - JSONObject result = - executeQuery("source=test_spath_nested | spath input=doc output=result | fields result"); - verifySchema(result, schema("result", "other")); - // extract-all returns a map with flattened keys - verifyDataRows(result, rows("{user.name=John, active=true, user.age=30}")); + // Nested objects flatten to dotted keys: user.name + verifySchema(result, schema("result", "struct")); + verifyDataRows(result, rows(new JSONObject("{\"user.name\":\"John\"}"))); } @Test - public void testSpathNoPathDefaultsOutputToInput() throws IOException { - JSONObject result = executeQuery("source=test_spath | spath input=doc | fields doc"); - verifySchema(result, schema("doc", "other")); - // output defaults to input field name, overwrites with map - verifyDataRows(result, rows("{n=1}"), rows("{n=2}"), rows("{n=3}")); - } - - @Test - public void testSpathArrayIndex() throws IOException { + public void testSpathAutoExtractArraySuffix() throws IOException { JSONObject result = executeQuery( - "source=test_spath_array | spath input=doc output=first path=items{0}.id" - + " | fields first"); - verifySchema(result, schema("first", "string")); - verifyDataRows(result, rows("1")); + "source=test_spath_auto | spath input=array_doc output=result | fields result"); + + // Arrays use {} suffix: tags{} + verifySchema(result, schema("result", "struct")); + verifyDataRows(result, rows(new JSONObject("{\"tags{}\":[\"java\",\"sql\"]}"))); } @Test - public void testSpathArrayWildcard() throws IOException { + public void testSpathAutoExtractDuplicateKeysMerge() throws IOException { JSONObject result = executeQuery( - "source=test_spath_array | spath input=doc output=all_ids path=items{}.id" - + " | fields all_ids"); - verifySchema(result, schema("all_ids", "string")); - verifyDataRows(result, rows("[1,2,3]")); + "source=test_spath_auto | spath input=merge_doc output=result | fields result"); + + // Duplicate logical keys merge into arrays: a.b from nested and dotted key + verifySchema(result, schema("result", "struct")); + verifyDataRows(result, rows(new JSONObject("{\"a.b\":[\"1\",\"2\"]}"))); } @Test - public void testSpathEscapedPath() throws IOException { + public void testSpathAutoExtractStringifyAndNull() throws IOException { JSONObject result = executeQuery( - "source=test_spath_escape | spath input=doc output=a path=\"['a fancy field']\"" - + " | spath input=doc output=b path=\"['a.b.c']\"" - + " | fields a b"); - verifySchema(result, schema("a", "string"), schema("b", "string")); - verifyDataRows(result, rows("true", "42")); + "source=test_spath_auto | spath input=stringify_doc output=result | fields result"); + + // All values stringified, null preserved + verifySchema(result, schema("result", "struct")); + verifyDataRows(result, rows(new JSONObject("{\"n\":\"30\",\"b\":\"true\",\"x\":\"null\"}"))); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java index 150c0308ea6..8970d593dcf 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java @@ -5,7 +5,6 @@ package org.opensearch.sql.ppl.calcite; -import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; @@ -16,54 +15,47 @@ public CalcitePPLSpathTest() { } @Test - public void testSimpleEval() { - String ppl = "source=EMP | spath src.path input=ENAME"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], src.path=[JSON_EXTRACT($1, 'src.path':VARCHAR)])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - - String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " JSON_EXTRACT(`ENAME`, 'src.path') `src.path`\n" - + "FROM `scott`.`EMP`"; - verifyPPLToSparkSQL(root, expectedSparkSql); + public void testSpathPathMode() { + withPPLQuery("source=EMP | spath src.path input=ENAME") + .expectLogical( + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], src.path=[JSON_EXTRACT($1, 'src.path':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n") + .expectSparkSQL( + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " JSON_EXTRACT(`ENAME`, 'src.path') `src.path`\n" + + "FROM `scott`.`EMP`"); } @Test - public void testEvalWithOutput() { - String ppl = "source=EMP | spath src.path input=ENAME output=custom | fields custom"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalProject(custom=[JSON_EXTRACT($1, 'src.path':VARCHAR)])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - - String expectedSparkSql = - "SELECT JSON_EXTRACT(`ENAME`, 'src.path') `custom`\n" + "FROM `scott`.`EMP`"; - verifyPPLToSparkSQL(root, expectedSparkSql); + public void testSpathPathModeWithOutput() { + withPPLQuery("source=EMP | spath src.path input=ENAME output=custom | fields custom") + .expectLogical( + "LogicalProject(custom=[JSON_EXTRACT($1, 'src.path':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n") + .expectSparkSQL( + "SELECT JSON_EXTRACT(`ENAME`, 'src.path') `custom`\n" + "FROM `scott`.`EMP`"); } @Test - public void testExtractAllNoPath() { - String ppl = "source=EMP | spath input=ENAME"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalProject(EMPNO=[$0], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], ENAME=[JSON_EXTRACT_ALL($1)])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); + public void testSpathAutoExtractMode() { + withPPLQuery("source=EMP | spath input=ENAME") + .expectLogical( + "LogicalProject(EMPNO=[$0], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], ENAME=[JSON_EXTRACT_ALL($1)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n") + .expectSparkSQL( + "SELECT `EMPNO`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " JSON_EXTRACT_ALL(`ENAME`) `ENAME`\n" + + "FROM `scott`.`EMP`"); } @Test - public void testExtractAllNoPathWithOutput() { - String ppl = "source=EMP | spath input=ENAME output=result | fields result"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalProject(result=[JSON_EXTRACT_ALL($1)])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); + public void testSpathAutoExtractModeWithOutput() { + withPPLQuery("source=EMP | spath input=ENAME output=result | fields result") + .expectLogical( + "LogicalProject(result=[JSON_EXTRACT_ALL($1)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n") + .expectSparkSQL("SELECT JSON_EXTRACT_ALL(`ENAME`) `result`\n" + "FROM `scott`.`EMP`"); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java index aa14fcdd7ba..0bc9357278d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java @@ -55,24 +55,6 @@ public void testSpathMissingInputArgumentHandling() { plan("source = t | spath path=a output=a"); } - @Test - public void testSpathExtractAllRewrite() { - // spath input=a output=o (no path) -> eval o=json_extract_all(a) - SPath sp = (SPath) plan("source = t | spath input=a output=o"); - assertEquals( - eval(relation("t"), let(field("o"), function("json_extract_all", field("a")))), - sp.rewriteAsExtractAllEval()); - } - - @Test - public void testSpathExtractAllDefaultOutput() { - // spath input=a (no path, no output) -> eval a=json_extract_all(a) - SPath sp = (SPath) plan("source = t | spath input=a"); - assertEquals( - eval(relation("t"), let(field("a"), function("json_extract_all", field("a")))), - sp.rewriteAsExtractAllEval()); - } - @Test public void testSpathArgumentDeshuffle() { assertEquals(plan("source = t | spath path=a input=a"), plan("source = t | spath input=a a")); @@ -94,4 +76,20 @@ public void testSpathEscapedSpaces() { assertEquals(ev, sp.rewriteAsEval()); } + + @Test + public void testSpathAutoExtractMode() { + SPath sp = (SPath) plan("source = t | spath input=a"); + assertEquals( + eval(relation("t"), let(field("a"), function("json_extract_all", field("a")))), + sp.rewriteAsEval()); + } + + @Test + public void testSpathAutoExtractModeWithOutput() { + SPath sp = (SPath) plan("source = t | spath input=a output=o"); + assertEquals( + eval(relation("t"), let(field("o"), function("json_extract_all", field("a")))), + sp.rewriteAsEval()); + } } From 51a395c3f24d026bf74c7e0ee3d9caac1061ac66 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Thu, 12 Feb 2026 13:09:29 -0800 Subject: [PATCH 4/7] Refactor json_extract_all and fix stringify issue Signed-off-by: Chen Dai --- .../org/opensearch/sql/ast/tree/SPath.java | 24 +- .../jsonUDF/JsonExtractAllFunctionImpl.java | 32 +- .../JsonExtractAllFunctionImplTest.java | 452 ++++++++++-------- docs/user/ppl/cmd/spath.md | 20 +- .../remote/CalcitePPLSpathCommandIT.java | 4 +- 5 files changed, 296 insertions(+), 236 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java index 1fa6448c892..abfaf3cc0bc 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java @@ -18,7 +18,6 @@ import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.dsl.AstDSL; -/** AST node for the PPL {@code spath} command. */ @ToString @EqualsAndHashCode(callSuper = false) @RequiredArgsConstructor @@ -41,7 +40,7 @@ public UnresolvedPlan attach(UnresolvedPlan child) { @Override public List getChild() { - return child == null ? ImmutableList.of() : ImmutableList.of(child); + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); } @Override @@ -49,7 +48,12 @@ public T accept(AbstractNodeVisitor nodeVisitor, C context) { return nodeVisitor.visitSpath(this, context); } - /** Rewrites this spath node to an equivalent {@link Eval} node. */ + /** + * Rewrites this spath node to an equivalent {@link Eval} node. + * + *

In path mode, rewrites to {@code eval output = json_extract(input, path)}. In auto-extract + * mode (path is null), rewrites to {@code eval output = json_extract_all(input)}. + */ public Eval rewriteAsEval() { if (path != null) { return rewritePathMode(); @@ -58,18 +62,22 @@ public Eval rewriteAsEval() { } private Eval rewritePathMode() { - String unquotedPath = unquoteText(path); - String output = outField != null ? outField : unquotedPath; + String outField = this.outField; + String unquotedPath = unquoteText(this.path); + if (outField == null) { + outField = unquotedPath; + } + return AstDSL.eval( - child, + this.child, AstDSL.let( - AstDSL.field(output), + AstDSL.field(outField), AstDSL.function( "json_extract", AstDSL.field(inField), AstDSL.stringLiteral(unquotedPath)))); } private Eval rewriteAutoExtractMode() { - String output = outField != null ? outField : inField; + String output = (outField != null) ? outField : inField; return AstDSL.eval( child, AstDSL.let( diff --git a/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java index 8768f4e69a8..00f8cbd7cd9 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java @@ -5,6 +5,7 @@ package org.opensearch.sql.expression.function.jsonUDF; +import static java.util.stream.Collectors.toMap; import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; import com.fasterxml.jackson.core.JsonFactory; @@ -72,6 +73,11 @@ public Expression implement( } } + /** + * Evaluate the JSON extract-all function. Returns a {@code Map} where keys are + * dot-separated JSON paths (with {@code {}} suffix for arrays) and values are stringified. List + * values are stringified via {@code String.valueOf}, which produces {@code [a, b, c]} format. + */ public static Object eval(Object... args) { if (args.length < 1) { return null; @@ -86,27 +92,9 @@ public static Object eval(Object... args) { return parsed == null ? null : stringifyMap(parsed); } - /** - * Convert all values in the map to strings to match the declared {@code map} - * return type. List values (from duplicate key merging) are stringified element-wise. - */ - @SuppressWarnings("unchecked") - private static Map stringifyMap(Map map) { - Map result = new HashMap<>(map.size()); - for (Map.Entry entry : map.entrySet()) { - Object value = entry.getValue(); - if (value instanceof List) { - List original = (List) value; - List stringified = new LinkedList<>(); - for (Object item : original) { - stringified.add(String.valueOf(item)); - } - result.put(entry.getKey(), stringified); - } else { - result.put(entry.getKey(), String.valueOf(value)); - } - } - return result; + private static Map stringifyMap(Map map) { + return map.entrySet().stream() + .collect(toMap(Map.Entry::getKey, e -> String.valueOf(e.getValue()))); } private static Map parseJson(String jsonStr) { @@ -174,7 +162,7 @@ private static Map parseJson(String jsonStr) { @SuppressWarnings("unchecked") private static void appendValue(Map resultMap, String path, Object value) { Object existingValue = resultMap.get(path); - if (existingValue == null && !resultMap.containsKey(path)) { + if (existingValue == null && !resultMap.containsKey(path)) { // key absent, not null value resultMap.put(path, value); } else if (existingValue instanceof List) { ((List) existingValue).add(value); diff --git a/core/src/test/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImplTest.java b/core/src/test/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImplTest.java index cebd097a785..449e851b81d 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImplTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImplTest.java @@ -5,12 +5,13 @@ package org.opensearch.sql.expression.function.jsonUDF; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.anEmptyMap; +import static org.hamcrest.Matchers.is; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import java.util.List; import java.util.Map; import org.junit.jupiter.api.Test; @@ -19,39 +20,12 @@ public class JsonExtractAllFunctionImplTest { private final JsonExtractAllFunctionImpl function = new JsonExtractAllFunctionImpl(); @SuppressWarnings("unchecked") - private Map assertValidMapResult(Object result) { - assertNotNull(result); - assertTrue(result instanceof Map); - return (Map) result; - } - - @SuppressWarnings("unchecked") - private List assertListValue(Map map, String key) { - Object value = map.get(key); - assertNotNull(value); - assertTrue(value instanceof List); - return (List) value; - } - - private void assertListEquals(List actual, Object... expected) { - assertEquals(expected.length, actual.size()); - for (int i = 0; i < expected.length; i++) { - assertEquals(expected[i], actual.get(i)); - } - } - - private void assertMapListValue(Map map, String key, Object... expectedValues) { - List list = assertListValue(map, key); - assertListEquals(list, expectedValues); - } - - private void assertMapValue(Map map, String key, Object expectedValue) { - assertEquals(expectedValue, map.get(key)); - } - - private Map eval(String json) { + private Map jsonExtractAll(String json) { Object result = JsonExtractAllFunctionImpl.eval(json); - return assertValidMapResult(result); + if (result == null) { + return null; + } + return (Map) result; } @Test @@ -66,281 +40,371 @@ public void testOperandMetadata() { @Test public void testFunctionConstructor() { - JsonExtractAllFunctionImpl testFunction = new JsonExtractAllFunctionImpl(); - - assertNotNull(testFunction, "Function should be properly initialized"); + assertNotNull(new JsonExtractAllFunctionImpl(), "Function should be properly initialized"); } @Test public void testNoArguments() { - Object result = JsonExtractAllFunctionImpl.eval(); - - assertNull(result); + assertNull(JsonExtractAllFunctionImpl.eval()); } @Test public void testNullInput() { - Object result = JsonExtractAllFunctionImpl.eval((String) null); - - assertNull(result); + assertNull(jsonExtractAll(null)); } @Test public void testEmptyString() { - Object result = JsonExtractAllFunctionImpl.eval(""); - - assertNull(result); + assertNull(jsonExtractAll("")); } @Test public void testWhitespaceString() { - Object result = JsonExtractAllFunctionImpl.eval(" "); - - assertNull(result); + assertNull(jsonExtractAll(" ")); } @Test public void testEmptyJsonObject() { - Map map = eval("{}"); - - assertTrue(map.isEmpty()); + assertThat(jsonExtractAll("{}"), anEmptyMap()); } @Test - public void testSimpleJsonObject() throws Exception { - Map map = eval("{\"name\": \"John\", \"age\": 30}"); - - assertEquals("John", map.get("name")); - assertEquals("30", map.get("age")); - assertEquals(2, map.size()); + public void testSimpleJsonObject() { + assertThat( + jsonExtractAll( + """ + { + "name": "John", + "age": 30 + }\ + """), + is(Map.of("name", "John", "age", "30"))); } @Test public void testInvalidJsonReturnResults() { - Map map = eval("{\"name\": \"John\", \"age\":}"); - - assertEquals("John", map.get("name")); - assertEquals(1, map.size()); + assertThat(jsonExtractAll("{\"name\": \"John\", \"age\":}"), is(Map.of("name", "John"))); } @Test public void testNonObjectJsonArray() { - Map map = eval("[1, 2, 3]"); - - assertMapListValue(map, "{}", "1", "2", "3"); - assertEquals(1, map.size()); + assertThat(jsonExtractAll("[1, 2, 3]"), is(Map.of("{}", "[1, 2, 3]"))); } @Test public void testTopLevelArrayOfObjects() { - Map map = eval("[{\"age\": 1}, {\"age\": 2}]"); - - assertMapListValue(map, "{}.age", "1", "2"); - assertEquals(1, map.size()); + assertThat( + jsonExtractAll( + """ + [ + {"age": 1}, + {"age": 2} + ]\ + """), + is(Map.of("{}.age", "[1, 2]"))); } @Test public void testTopLevelArrayOfComplexObjects() { - Map map = - eval("[{\"name\": \"John\", \"age\": 30}, {\"name\": \"Jane\", \"age\": 25}]"); - - assertMapListValue(map, "{}.name", "John", "Jane"); - assertMapListValue(map, "{}.age", "30", "25"); - assertEquals(2, map.size()); + assertThat( + jsonExtractAll( + """ + [ + {"name": "John", "age": 30}, + {"name": "Jane", "age": 25} + ]\ + """), + is(Map.of("{}.name", "[John, Jane]", "{}.age", "[30, 25]"))); } @Test public void testNonObjectJsonPrimitive() { - Object result = JsonExtractAllFunctionImpl.eval("\"just a string\""); - - assertNull(result); + assertNull(jsonExtractAll("\"just a string\"")); } @Test public void testNonObjectJsonNumber() { - Object result = JsonExtractAllFunctionImpl.eval("42"); - - assertNull(result); + assertNull(jsonExtractAll("42")); } @Test public void testSingleLevelNesting() { - Map map = eval("{\"user\": {\"name\": \"John\"}, \"system\": \"linux\"}"); - - assertEquals("John", map.get("user.name")); - assertEquals("linux", map.get("system")); - assertEquals(2, map.size()); + assertThat( + jsonExtractAll( + """ + { + "user": {"name": "John"}, + "system": "linux" + }\ + """), + is(Map.of("user.name", "John", "system", "linux"))); } @Test public void testMultiLevelNesting() { - Map map = eval("{\"a\": {\"b\": {\"c\": \"value\"}}}"); - - assertEquals("value", map.get("a.b.c")); - assertEquals(1, map.size()); + assertThat( + jsonExtractAll( + """ + { + "a": { + "b": { + "c": "value" + } + } + }\ + """), + is(Map.of("a.b.c", "value"))); } @Test public void testMixedNestedAndFlat() { - Map map = - eval("{\"name\": \"John\", \"address\": {\"city\": \"NYC\", \"zip\": \"10001\"}}"); - - assertEquals("John", map.get("name")); - assertEquals("NYC", map.get("address.city")); - assertEquals("10001", map.get("address.zip")); - assertEquals(3, map.size()); + assertThat( + jsonExtractAll( + """ + { + "name": "John", + "address": { + "city": "NYC", + "zip": "10001" + } + }\ + """), + is(Map.of("name", "John", "address.city", "NYC", "address.zip", "10001"))); } @Test public void testDeeplyNestedStructure() { - Map map = - eval("{\"level1\": {\"level2\": {\"level3\": {\"level4\": {\"level5\": \"deep\"}}}}}"); - - assertEquals("deep", map.get("level1.level2.level3.level4.level5")); - assertEquals(1, map.size()); + assertThat( + jsonExtractAll( + """ + { + "level1": { + "level2": { + "level3": { + "level4": { + "level5": "deep" + } + } + } + } + }\ + """), + is(Map.of("level1.level2.level3.level4.level5", "deep"))); } @Test public void testSimpleArray() { - Map map = eval("{\"tags\": [\"a\", \"b\", \"c\"]}"); - - assertMapListValue(map, "tags{}", "a", "b", "c"); - assertEquals(1, map.size()); + assertThat( + jsonExtractAll( + """ + { + "tags": ["a", "b", "c"] + }\ + """), + is(Map.of("tags{}", "[a, b, c]"))); } @Test public void testArrayOfObjects() { - Map map = eval("{\"users\": [{\"name\": \"John\"}, {\"name\": \"Jane\"}]}"); - - assertMapListValue(map, "users{}.name", "John", "Jane"); - assertEquals(1, map.size()); + assertThat( + jsonExtractAll( + """ + { + "users": [ + {"name": "John"}, + {"name": "Jane"} + ] + }\ + """), + is(Map.of("users{}.name", "[John, Jane]"))); } @Test public void testNestedArray() { - Map map = eval("{\"data\": {\"items\": [1, 2, 3]}}"); - - assertMapListValue(map, "data.items{}", "1", "2", "3"); - assertEquals(1, map.size()); + assertThat( + jsonExtractAll( + """ + { + "data": { + "items": [1, 2, 3] + } + }\ + """), + is(Map.of("data.items{}", "[1, 2, 3]"))); } @Test public void testNested() { - Map map = - eval( - "{\"data\": {\"items\": [[1, 2, {\"hello\": 3}], 4], \"other\": 5}, \"another\": [6," - + " [7, 8], 9]}"); - - assertMapListValue(map, "data.items{}{}", "1", "2"); - assertMapValue(map, "data.items{}{}.hello", "3"); - assertMapValue(map, "data.items{}", "4"); - assertMapValue(map, "data.other", "5"); - assertMapListValue(map, "another{}", "6", "9"); - assertMapListValue(map, "another{}{}", "7", "8"); - assertEquals(6, map.size()); + assertThat( + jsonExtractAll( + """ + { + "data": { + "items": [[1, 2, {"hello": 3}], 4], + "other": 5 + }, + "another": [6, [7, 8], 9] + }\ + """), + is( + Map.of( + "data.items{}{}", "[1, 2]", + "data.items{}{}.hello", "3", + "data.items{}", "4", + "data.other", "5", + "another{}", "[6, 9]", + "another{}{}", "[7, 8]"))); } @Test public void testEmptyArray() { - Map map = eval("{\"empty\": []}"); - - Object emptyValue = map.get("empty{}"); - assertNull(emptyValue); + assertNull(jsonExtractAll("{\"empty\": []}").get("empty{}")); } @Test public void testStringValues() { - Map map = eval("{\"text\": \"hello world\", \"empty\": \"\"}"); - - assertMapValue(map, "text", "hello world"); - assertMapValue(map, "empty", ""); - assertEquals(2, map.size()); + assertThat( + jsonExtractAll( + """ + { + "text": "hello world", + "empty": "" + }\ + """), + is(Map.of("text", "hello world", "empty", ""))); } @Test public void testNumericValues() { - Map map = - eval( - "{\"int\": 42, \"long\": 9223372036854775807, \"hugeNumber\": 9223372036854775808," - + " \"double\": 3.14159}"); - - assertEquals(4, map.size()); - assertEquals("42", map.get("int")); - assertEquals("9223372036854775807", map.get("long")); - assertEquals("9.223372036854776E18", map.get("hugeNumber")); - assertEquals("3.14159", map.get("double")); + assertThat( + jsonExtractAll( + """ + { + "int": 42, + "long": 9223372036854775807, + "hugeNumber": 9223372036854775808, + "double": 3.14159 + }\ + """), + is( + Map.of( + "int", "42", + "long", "9223372036854775807", + "hugeNumber", "9.223372036854776E18", + "double", "3.14159"))); } @Test public void testBooleanValues() { - Map map = eval("{\"isTrue\": true, \"isFalse\": false}"); - - assertEquals("true", map.get("isTrue")); - assertEquals("false", map.get("isFalse")); - assertEquals(2, map.size()); + assertThat( + jsonExtractAll( + """ + { + "isTrue": true, + "isFalse": false + }\ + """), + is(Map.of("isTrue", "true", "isFalse", "false"))); } @Test public void testNullValues() { - Map map = eval("{\"nullValue\": null, \"notNull\": \"value\"}"); + assertThat( + jsonExtractAll( + """ + { + "nullValue": null, + "notNull": "value" + }\ + """), + is(Map.of("nullValue", "null", "notNull", "value"))); + } - assertEquals("null", map.get("nullValue")); - assertEquals("value", map.get("notNull")); - assertEquals(2, map.size()); + @Test + public void testNullValuesInArray() { + assertThat( + jsonExtractAll( + """ + [ + {"a": null}, + {"a": 1} + ]\ + """), + is(Map.of("{}.a", "[null, 1]"))); } @Test public void testMixedTypesInArray() { - Map map = eval("{\"mixed\": [\"string\", 42, true, null, 3.14]}"); - - List mixed = (List) assertListValue(map, "mixed{}"); - assertEquals(5, mixed.size()); - assertEquals("string", mixed.get(0)); - assertEquals("42", mixed.get(1)); - assertEquals("true", mixed.get(2)); - assertEquals("null", mixed.get(3)); - assertEquals("3.14", mixed.get(4)); - assertEquals(1, map.size()); + assertThat( + jsonExtractAll( + """ + { + "mixed": ["string", 42, true, null, 3.14] + }\ + """), + is(Map.of("mixed{}", "[string, 42, true, null, 3.14]"))); } @Test public void testSpecialCharactersInKeys() { - Map map = - eval( - "{\"key.with.dots\": \"value1\", \"key-with-dashes\": \"value2\"," - + " \"key_with_underscores\": \"value3\"}"); - - assertEquals("value1", map.get("key.with.dots")); - assertEquals("value2", map.get("key-with-dashes")); - assertEquals("value3", map.get("key_with_underscores")); - assertEquals(3, map.size()); + assertThat( + jsonExtractAll( + """ + { + "key.with.dots": "value1", + "key-with-dashes": "value2", + "key_with_underscores": "value3" + }\ + """), + is( + Map.of( + "key.with.dots", "value1", + "key-with-dashes", "value2", + "key_with_underscores", "value3"))); } @Test public void testUnicodeCharacters() { - Map map = eval("{\"unicode\": \"こんにちは\", \"emoji\": \"🚀\", \"🚀\": 1}"); - - assertEquals("こんにちは", map.get("unicode")); - assertEquals("🚀", map.get("emoji")); - assertEquals("1", map.get("🚀")); - assertEquals(3, map.size()); + assertThat( + jsonExtractAll( + """ + { + "unicode": "こんにちは", + "emoji": "🚀", + "🚀": 1 + }\ + """), + is(Map.of("unicode", "こんにちは", "emoji", "🚀", "🚀", "1"))); } @Test public void testComplexNestedStructure() { - Map map = - eval( - "{\"user\": {\"profile\": {\"name\": \"John\", \"contacts\": [{\"type\": \"email\"," - + " \"value\": \"john@example.com\"}, {\"type\": \"phone\", \"value\":" - + " \"123-456-7890\"}]}, \"preferences\": {\"theme\": \"dark\", \"notifications\":" - + " true}}}"); - - assertEquals("John", map.get("user.profile.name")); - assertMapListValue(map, "user.profile.contacts{}.type", "email", "phone"); - assertMapListValue(map, "user.profile.contacts{}.value", "john@example.com", "123-456-7890"); - assertEquals("dark", map.get("user.preferences.theme")); - assertEquals("true", map.get("user.preferences.notifications")); - assertEquals(5, map.size()); + assertThat( + jsonExtractAll( + """ + { + "user": { + "profile": { + "name": "John", + "contacts": [ + {"type": "email", "value": "john@example.com"}, + {"type": "phone", "value": "123-456-7890"} + ] + }, + "preferences": { + "theme": "dark", + "notifications": true + } + } + }\ + """), + is( + Map.of( + "user.profile.name", "John", + "user.profile.contacts{}.type", "[email, phone]", + "user.profile.contacts{}.value", "[john@example.com, 123-456-7890]", + "user.preferences.theme", "dark", + "user.preferences.notifications", "true"))); } @Test @@ -352,7 +416,7 @@ public void testLargeJsonObject() { } jsonBuilder.append("}"); - Map map = eval(jsonBuilder.toString()); + Map map = jsonExtractAll(jsonBuilder.toString()); assertEquals(100, map.size()); assertEquals("0", map.get("field0")); assertEquals("99", map.get("field99")); diff --git a/docs/user/ppl/cmd/spath.md b/docs/user/ppl/cmd/spath.md index 7decb623273..3de7ed262dc 100644 --- a/docs/user/ppl/cmd/spath.md +++ b/docs/user/ppl/cmd/spath.md @@ -36,7 +36,7 @@ When `path` is omitted, the `spath` command runs in auto-extract mode. Instead o - Arrays use `{}` suffix: `tags{}`, `users{}.name` - Duplicate logical keys merge into arrays: `c{}.b = [2, 3]` - Null values are preserved: a JSON `null` becomes the string `"null"` in the map -- All values are stringified: numbers and booleans are converted to their string representation (for example, `30` becomes `"30"`, `true` becomes `"true"`) +- All values are stringified: numbers and booleans are converted to their string representation (for example, `30` becomes `"30"`, `true` becomes `"true"`, and arrays become `"[a, b, c]"`) > **Note**: Auto-extract mode processes the entire input field with no character limit. For large JSON payloads, consider using path-based extraction to target specific fields. @@ -158,20 +158,20 @@ The query returns the following results: ```text fetched rows / total rows = 3/3 -+---------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+ -| doc_auto | result | -|---------------------------------------------------------------------------------+--------------------------------------------------------------------------------------| -| {"user":{"name":"John","age":30},"tags":["java","sql"],"active":true} | {'user.age': '30', 'tags{}': ['java', 'sql'], 'user.name': 'John', 'active': 'true'} | -| {"user":{"name":"Jane","age":25},"tags":["python"],"active":null} | {'user.age': '25', 'tags{}': 'python', 'user.name': 'Jane', 'active': 'null'} | -| {"user":{"name":"Bob","age":35},"tags":["go","rust","sql"],"user.name":"Bobby"} | {'user.age': '35', 'tags{}': ['go', 'rust', 'sql'], 'user.name': ['Bob', 'Bobby']} | -+---------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+ ++---------------------------------------------------------------------------------+------------------------------------------------------------------------------------+ +| doc_auto | result | +|---------------------------------------------------------------------------------+------------------------------------------------------------------------------------| +| {"user":{"name":"John","age":30},"tags":["java","sql"],"active":true} | {'user.age': '30', 'tags{}': '[java, sql]', 'user.name': 'John', 'active': 'true'} | +| {"user":{"name":"Jane","age":25},"tags":["python"],"active":null} | {'user.age': '25', 'tags{}': 'python', 'user.name': 'Jane', 'active': 'null'} | +| {"user":{"name":"Bob","age":35},"tags":["go","rust","sql"],"user.name":"Bobby"} | {'user.age': '35', 'tags{}': '[go, rust, sql]', 'user.name': '[Bob, Bobby]'} | ++---------------------------------------------------------------------------------+------------------------------------------------------------------------------------+ ``` The flattening rules demonstrated in this example: - Nested objects use dotted keys: `user.name` and `user.age` are extracted from `{"user": {"name": "John", "age": 30}}` - Arrays use `{}` suffix: `tags{}` is extracted from `{"tags": ["java", "sql"]}` -- Duplicate logical keys merge into arrays: in the third row, both `"user": {"name": "Bob"}` (nested) and `"user.name": "Bobby"` (direct dotted key) resolve to the same key `user.name`, so their values merge into `['Bob', 'Bobby']` -- All values are strings: numeric `30` becomes `'30'`, boolean `true` becomes `'true'` +- Duplicate logical keys merge into arrays: in the third row, both `"user": {"name": "Bob"}` (nested) and `"user.name": "Bobby"` (direct dotted key) resolve to the same key `user.name`, so their values merge into `'[Bob, Bobby]'` +- All values are strings: numeric `30` becomes `'30'`, boolean `true` becomes `'true'`, and arrays become strings like `'[java, sql]'` - Null values are preserved: in the second row, `"active": null` is kept as `'active': 'null'` in the map diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java index 9fea01c911c..04ec4007f05 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java @@ -87,7 +87,7 @@ public void testSpathAutoExtractArraySuffix() throws IOException { // Arrays use {} suffix: tags{} verifySchema(result, schema("result", "struct")); - verifyDataRows(result, rows(new JSONObject("{\"tags{}\":[\"java\",\"sql\"]}"))); + verifyDataRows(result, rows(new JSONObject("{\"tags{}\":\"[java, sql]\"}"))); } @Test @@ -98,7 +98,7 @@ public void testSpathAutoExtractDuplicateKeysMerge() throws IOException { // Duplicate logical keys merge into arrays: a.b from nested and dotted key verifySchema(result, schema("result", "struct")); - verifyDataRows(result, rows(new JSONObject("{\"a.b\":[\"1\",\"2\"]}"))); + verifyDataRows(result, rows(new JSONObject("{\"a.b\":\"[1, 2]\"}"))); } @Test From 2a455885cf4500d5e1377e64da211c535e65d970 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Thu, 12 Feb 2026 16:15:01 -0800 Subject: [PATCH 5/7] Fix broken IT and doctest Signed-off-by: Chen Dai --- .../jsonUDF/JsonExtractAllFunctionImpl.java | 11 +++-- docs/user/ppl/cmd/spath.md | 7 +-- .../standalone/JsonExtractAllFunctionIT.java | 46 ++++++------------- 3 files changed, 23 insertions(+), 41 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java index 00f8cbd7cd9..8168700b6da 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java @@ -75,8 +75,8 @@ public Expression implement( /** * Evaluate the JSON extract-all function. Returns a {@code Map} where keys are - * dot-separated JSON paths (with {@code {}} suffix for arrays) and values are stringified. List - * values are stringified via {@code String.valueOf}, which produces {@code [a, b, c]} format. + * dot-separated JSON paths (with {@code {}} suffix for arrays) and all values are strings. Merged + * array values use {@code [a, b, c]} format. */ public static Object eval(Object... args) { if (args.length < 1) { @@ -92,9 +92,14 @@ public static Object eval(Object... args) { return parsed == null ? null : stringifyMap(parsed); } + // TODO: JSON parsing dominates cost; consider stringify scalars in place during parsing + // to avoid this extra pass. private static Map stringifyMap(Map map) { return map.entrySet().stream() - .collect(toMap(Map.Entry::getKey, e -> String.valueOf(e.getValue()))); + .collect( + toMap( + Map.Entry::getKey, + e -> String.valueOf(e.getValue()))); // relies on List.toString() for [a, b, c] } private static Map parseJson(String jsonStr) { diff --git a/docs/user/ppl/cmd/spath.md b/docs/user/ppl/cmd/spath.md index 3de7ed262dc..00eeb4d07cc 100644 --- a/docs/user/ppl/cmd/spath.md +++ b/docs/user/ppl/cmd/spath.md @@ -13,7 +13,7 @@ The `spath` command extracts fields from structured JSON data. It operates in tw The `spath` command has the following syntax: ```syntax -spath input= [output=] [path=] +spath input= [output=] [[path=]] ``` ## Parameters @@ -40,10 +40,7 @@ When `path` is omitted, the `spath` command runs in auto-extract mode. Instead o > **Note**: Auto-extract mode processes the entire input field with no character limit. For large JSON payloads, consider using path-based extraction to target specific fields. -### Corner cases - -- Invalid or malformed JSON returns partial results containing any fields successfully parsed before the error. -- Empty JSON object (`{}`) returns an empty map. +> **Note**: Invalid or malformed JSON returns partial results containing any fields successfully parsed before the error. Empty JSON object (`{}`) returns an empty map. ## Example 1: Basic field extraction diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/JsonExtractAllFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/JsonExtractAllFunctionIT.java index d1fdf03aa3b..7f1821c5ad8 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/JsonExtractAllFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/JsonExtractAllFunctionIT.java @@ -7,7 +7,6 @@ import java.sql.ResultSet; import java.sql.SQLException; -import java.util.List; import java.util.Map; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.type.RelDataType; @@ -71,19 +70,19 @@ public void testJsonExtractAllWithSimpleObject() throws Exception { assertTrue(resultSet.next()); verifyColumns(resultSet, RESULT_FIELD); - Map map = getMap(resultSet, 1); + Map map = getMap(resultSet, 1); assertEquals("John", map.get("name")); assertEquals("30", map.get("age")); assertEquals(2, map.size()); }); } - private Map getMap(ResultSet resultSet, int columnIndex) throws SQLException { + private Map getMap(ResultSet resultSet, int columnIndex) throws SQLException { Object result = resultSet.getObject(columnIndex); assertNotNull(result); assertTrue(result instanceof Map); - return (Map) result; + return (Map) result; } @Test @@ -109,7 +108,7 @@ public void testJsonExtractAllWithNestedObject() throws Exception { assertTrue(resultSet.next()); verifyColumns(resultSet, RESULT_FIELD); - Map map = getMap(resultSet, 1); + Map map = getMap(resultSet, 1); assertEquals("John", map.get("user.name")); assertEquals("30", map.get("user.age")); assertEquals("true", map.get("active")); @@ -140,13 +139,9 @@ public void testJsonExtractAllWithArray() throws Exception { assertTrue(resultSet.next()); verifyColumns(resultSet, RESULT_FIELD); - Map map = getMap(resultSet, 1); - List tags = getList(map, "tags{}"); - - assertEquals(3, tags.size()); - assertEquals("java", tags.get(0)); - assertEquals("sql", tags.get(1)); - assertEquals("opensearch", tags.get(2)); + Map map = getMap(resultSet, 1); + assertEquals("[java, sql, opensearch]", map.get("tags{}")); + assertEquals(1, map.size()); }); } @@ -173,11 +168,8 @@ public void testJsonExtractAllWithArrayOfObjects() throws Exception { assertTrue(resultSet.next()); verifyColumns(resultSet, RESULT_FIELD); - Map map = getMap(resultSet, 1); - List names = getList(map, "users{}.name"); - assertEquals(2, names.size()); - assertEquals("John", names.get(0)); - assertEquals("Jane", names.get(1)); + Map map = getMap(resultSet, 1); + assertEquals("[John, Jane]", map.get("users{}.name")); assertEquals(1, map.size()); // Only flattened key should exist }); } @@ -205,24 +197,12 @@ public void testJsonExtractAllWithTopLevelArray() throws Exception { assertTrue(resultSet.next()); verifyColumns(resultSet, RESULT_FIELD); - Map map = getMap(resultSet, 1); - List ids = getList(map, "{}.id"); - assertEquals(2, ids.size()); - assertEquals("1", ids.get(0)); - assertEquals("2", ids.get(1)); + Map map = getMap(resultSet, 1); + assertEquals("[1, 2]", map.get("{}.id")); assertEquals(1, map.size()); }); } - @SuppressWarnings("unchecked") - private List getList(Map map, String key) { - Object value = map.get(key); - assertNotNull(value); - assertTrue(value instanceof List); - - return (List) value; - } - @Test public void testJsonExtractAllWithEmptyObject() throws Exception { String jsonString = "{}"; @@ -246,7 +226,7 @@ public void testJsonExtractAllWithEmptyObject() throws Exception { assertTrue(resultSet.next()); verifyColumns(resultSet, RESULT_FIELD); - Map map = getMap(resultSet, 1); + Map map = getMap(resultSet, 1); assertTrue(map.isEmpty()); }); } @@ -274,7 +254,7 @@ public void testJsonExtractAllWithInvalidJson() throws Exception { assertTrue(resultSet.next()); verifyColumns(resultSet, RESULT_FIELD); - Map map = getMap(resultSet, 1); + Map map = getMap(resultSet, 1); assertEquals("John", map.get("name")); assertEquals(1, map.size()); }); From 4890a4edf1631fd95613d3d827f9a433212b8e4d Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Fri, 13 Feb 2026 09:18:30 -0800 Subject: [PATCH 6/7] Address PR comments Signed-off-by: Chen Dai --- docs/user/ppl/cmd/spath.md | 4 +- .../remote/CalcitePPLSpathCommandIT.java | 58 ++++++++++++++++++- 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/docs/user/ppl/cmd/spath.md b/docs/user/ppl/cmd/spath.md index 00eeb4d07cc..124b2c619d3 100644 --- a/docs/user/ppl/cmd/spath.md +++ b/docs/user/ppl/cmd/spath.md @@ -39,8 +39,8 @@ When `path` is omitted, the `spath` command runs in auto-extract mode. Instead o - All values are stringified: numbers and booleans are converted to their string representation (for example, `30` becomes `"30"`, `true` becomes `"true"`, and arrays become `"[a, b, c]"`) > **Note**: Auto-extract mode processes the entire input field with no character limit. For large JSON payloads, consider using path-based extraction to target specific fields. - -> **Note**: Invalid or malformed JSON returns partial results containing any fields successfully parsed before the error. Empty JSON object (`{}`) returns an empty map. +> +> Invalid or malformed JSON returns partial results containing any fields successfully parsed before the error. Empty JSON object (`{}`) returns an empty map. ## Example 1: Basic field extraction diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java index 04ec4007f05..f3247e968ba 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java @@ -37,15 +37,25 @@ public void init() throws Exception { request3.setJsonEntity("{\"doc\": \"{\\\"n\\\": 3}\"}"); client().performRequest(request3); - // Auto-extract mode: one doc field per flatten rule + // Auto-extract mode: flatten rules and edge cases (empty, malformed) Request autoExtractDoc = new Request("PUT", "/test_spath_auto/_doc/1?refresh=true"); autoExtractDoc.setJsonEntity( "{\"nested_doc\": \"{\\\"user\\\":{\\\"name\\\":\\\"John\\\"}}\"," + " \"array_doc\": \"{\\\"tags\\\":[\\\"java\\\",\\\"sql\\\"]}\"," + " \"merge_doc\": \"{\\\"a\\\":{\\\"b\\\":1},\\\"a.b\\\":2}\"," - + " \"stringify_doc\":" - + " \"{\\\"n\\\":30,\\\"b\\\":true,\\\"x\\\":null}\"}"); + + " \"stringify_doc\": \"{\\\"n\\\":30,\\\"b\\\":true,\\\"x\\\":null}\"," + + " \"empty_doc\": \"{}\"," + + " \"malformed_doc\": \"{\\\"user\\\":{\\\"name\\\":\"}"); client().performRequest(autoExtractDoc); + + // Auto-extract mode: null input handling (doc 1 establishes mapping, doc 2 has null) + Request nullDoc1 = new Request("PUT", "/test_spath_null/_doc/1?refresh=true"); + nullDoc1.setJsonEntity("{\"doc\": \"{\\\"n\\\": 1}\"}"); + client().performRequest(nullDoc1); + + Request nullDoc2 = new Request("PUT", "/test_spath_null/_doc/2?refresh=true"); + nullDoc2.setJsonEntity("{\"doc\": null}"); + client().performRequest(nullDoc2); } @Test @@ -60,12 +70,22 @@ public void testSimpleSpath() throws IOException { public void testSpathAutoExtract() throws IOException { JSONObject result = executeQuery("source=test_spath | spath input=doc"); verifySchema(result, schema("doc", "struct")); + verifyDataRows( + result, + rows(new JSONObject("{\"n\":\"1\"}")), + rows(new JSONObject("{\"n\":\"2\"}")), + rows(new JSONObject("{\"n\":\"3\"}"))); } @Test public void testSpathAutoExtractWithOutput() throws IOException { JSONObject result = executeQuery("source=test_spath | spath input=doc output=result"); verifySchema(result, schema("doc", "string"), schema("result", "struct")); + verifyDataRows( + result, + rows("{\"n\": 1}", new JSONObject("{\"n\":\"1\"}")), + rows("{\"n\": 2}", new JSONObject("{\"n\":\"2\"}")), + rows("{\"n\": 3}", new JSONObject("{\"n\":\"3\"}"))); } @Test @@ -111,4 +131,36 @@ public void testSpathAutoExtractStringifyAndNull() throws IOException { verifySchema(result, schema("result", "struct")); verifyDataRows(result, rows(new JSONObject("{\"n\":\"30\",\"b\":\"true\",\"x\":\"null\"}"))); } + + @Test + public void testSpathAutoExtractNullInput() throws IOException { + JSONObject result = + executeQuery("source=test_spath_null | spath input=doc output=result | fields result"); + + // Non-null doc extracts normally, null doc returns null + verifySchema(result, schema("result", "struct")); + verifyDataRows(result, rows(new JSONObject("{\"n\":\"1\"}")), rows((Object) null)); + } + + @Test + public void testSpathAutoExtractEmptyJson() throws IOException { + JSONObject result = + executeQuery( + "source=test_spath_auto | spath input=empty_doc output=result | fields result"); + + // Empty JSON object returns empty map + verifySchema(result, schema("result", "struct")); + verifyDataRows(result, rows(new JSONObject("{}"))); + } + + @Test + public void testSpathAutoExtractMalformedJson() throws IOException { + JSONObject result = + executeQuery( + "source=test_spath_auto | spath input=malformed_doc output=result | fields result"); + + // Malformed JSON returns partial results parsed before the error + verifySchema(result, schema("result", "struct")); + verifyDataRows(result, rows(new JSONObject("{}"))); + } } From 191c21f6f2196fed4618dac52201f1ef2cd057b3 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Fri, 13 Feb 2026 09:49:03 -0800 Subject: [PATCH 7/7] Mark auto extract mode as experimental Signed-off-by: Chen Dai --- docs/user/ppl/cmd/spath.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user/ppl/cmd/spath.md b/docs/user/ppl/cmd/spath.md index 124b2c619d3..d62d14fea65 100644 --- a/docs/user/ppl/cmd/spath.md +++ b/docs/user/ppl/cmd/spath.md @@ -4,7 +4,7 @@ The `spath` command extracts fields from structured JSON data. It operates in two modes: - **Path-based mode**: When `path` is specified, extracts a single value at the given JSON path. -- **Auto-extract mode**: When `path` is omitted, extracts all fields from the JSON into a map. +- **Auto-extract mode** (experimental): When `path` is omitted, extracts all fields from the JSON into a map. > **Note**: The `spath` command is not executed on OpenSearch data nodes. It extracts fields from data after it has been returned to the coordinator node, which is slow on large datasets. We recommend indexing fields needed for filtering directly instead of using `spath` to filter nested fields. @@ -28,7 +28,7 @@ The `spath` command supports the following parameters. For more information about path syntax, see [json_extract](../functions/json.md#json_extract). -## Auto-extract mode +## Auto-extract mode (experimental) When `path` is omitted, the `spath` command runs in auto-extract mode. Instead of extracting a single value, it flattens the entire JSON into a `map` column using the following rules: