diff --git a/common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java b/common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java index 4b7752a9de5..a3f55fcd231 100644 --- a/common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java +++ b/common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java @@ -6,8 +6,10 @@ package org.opensearch.sql.common.utils; import com.google.common.base.Strings; +import java.util.Collection; import java.util.IllegalFormatException; import java.util.Locale; +import java.util.Optional; public class StringUtils { /** @@ -96,4 +98,83 @@ public static String format(final String format, Object... args) { private static boolean isQuoted(String text, String mark) { return !Strings.isNullOrEmpty(text) && text.startsWith(mark) && text.endsWith(mark); } + + /** + * Calculates the Levenshtein distance between two strings. + * + * @param s1 first string + * @param s2 second string + * @return the Levenshtein distance between s1 and s2 + */ + public static int levenshteinDistance(String s1, String s2) { + if (s1 == null || s2 == null) { + return Integer.MAX_VALUE; + } + if (s1.equals(s2)) { + return 0; + } + + int len1 = s1.length(); + int len2 = s2.length(); + + if (len1 == 0) { + return len2; + } + if (len2 == 0) { + return len1; + } + + int[] prev = new int[len2 + 1]; + int[] curr = new int[len2 + 1]; + + for (int j = 0; j <= len2; j++) { + prev[j] = j; + } + + for (int i = 1; i <= len1; i++) { + curr[0] = i; + for (int j = 1; j <= len2; j++) { + int cost = (s1.charAt(i - 1) == s2.charAt(j - 1)) ? 0 : 1; + curr[j] = Math.min(Math.min(curr[j - 1] + 1, prev[j] + 1), prev[j - 1] + cost); + } + int[] temp = prev; + prev = curr; + curr = temp; + } + + return prev[len2]; + } + + /** + * Finds the closest match to a target string from a collection of candidates using Levenshtein + * distance. Returns empty if no candidates are provided or if the best match distance is too + * large. + * + * @param target the string to match against + * @param candidates the collection of candidate strings + * @return the closest match, or empty if no good match is found + */ + public static Optional findClosestMatch(String target, Collection candidates) { + if (target == null || candidates == null || candidates.isEmpty()) { + return Optional.empty(); + } + + String bestMatch = null; + int bestDistance = Integer.MAX_VALUE; + + for (String candidate : candidates) { + int distance = levenshteinDistance(target.toLowerCase(), candidate.toLowerCase()); + if (distance < bestDistance) { + bestDistance = distance; + bestMatch = candidate; + } + } + + // Only return a suggestion if the distance is reasonable + if (bestMatch != null && bestDistance <= Math.max(4, target.length() / 2)) { + return Optional.of(bestMatch); + } + + return Optional.empty(); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java index 84fb486702a..3290c438cd5 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java @@ -20,22 +20,41 @@ import org.opensearch.sql.ast.AbstractNodeVisitor; @Getter -@EqualsAndHashCode(callSuper = false) +@EqualsAndHashCode( + callSuper = false, + exclude = {"line", "column"}) public class QualifiedName extends UnresolvedExpression { public static final String DELIMITER = "."; private final List parts; + private final Integer line; + + private final Integer column; + public QualifiedName(String name) { - this.parts = Collections.singletonList(name); + this(Collections.singletonList(name), null, null); } /** QualifiedName Constructor. */ public QualifiedName(Iterable parts) { + this(parts, null, null); + } + + /** + * Constructor with source position. + * + * @param parts The parts of the qualified name + * @param line Line number (1-based), null if not available + * @param column Column position (0-based), null if not available + */ + public QualifiedName(Iterable parts, Integer line, Integer column) { List partsList = StreamSupport.stream(parts.spliterator(), false).collect(toList()); if (partsList.isEmpty()) { throw new IllegalArgumentException("parts is empty"); } this.parts = partsList; + this.line = line; + this.column = column; } /** Construct {@link QualifiedName} from list of string. */ diff --git a/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java b/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java index dba881b3fc3..c75c0829b5f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java +++ b/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java @@ -7,6 +7,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -18,6 +19,7 @@ import org.opensearch.sql.ast.expression.QualifiedName; import org.opensearch.sql.common.error.ErrorCode; import org.opensearch.sql.common.error.ErrorReport; +import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLFuncImpTable; @@ -58,7 +60,7 @@ private static RexNode resolveInJoinCondition( return resolveFieldWithAlias(nameNode, context, 2) .or(() -> resolveFieldWithoutAlias(nameNode, context, 2)) - .orElseThrow(() -> getNotFoundException(nameNode)); + .orElseThrow(() -> getNotFoundException(nameNode, context)); } /** Resolves qualified name in non-join condition context. */ @@ -91,7 +93,7 @@ private static RexNode resolveInNonJoinCondition( return resolveCorrelationField(nameNode, context) .or(() -> replaceWithNullLiteralInCoalesce(context)) - .orElseThrow(() -> getNotFoundException(nameNode)); + .orElseThrow(() -> getNotFoundException(nameNode, context)); } private static String joinParts(List parts, int start, int length) { @@ -327,10 +329,27 @@ private static Optional replaceWithNullLiteralInCoalesce(CalcitePlanCon return Optional.empty(); } - private static ErrorReport getNotFoundException(QualifiedName node) { - return ErrorReport.wrap( - new IllegalArgumentException(String.format("Field [%s] not found.", node.toString()))) - .code(ErrorCode.FIELD_NOT_FOUND) - .build(); + private static ErrorReport getNotFoundException(QualifiedName node, CalcitePlanContext context) { + // Collect all available fields from the current context + List availableFields = context.relBuilder.peek().getRowType().getFieldNames(); + + ErrorReport.Builder builder = + ErrorReport.wrap( + new IllegalArgumentException( + String.format("Field [%s] not found.", node.toString()))) + .code(ErrorCode.FIELD_NOT_FOUND) + .context("requested_field", node.toString()) + .context("available_fields", availableFields); + + // Add a suggestion based on Levenshtein distance + StringUtils.findClosestMatch(node.toString(), availableFields) + .ifPresent(suggestion -> builder.suggestion("Did you mean: " + suggestion)); + + // Add source position if available (populated by PPL parser) + if (node.getLine() != null && node.getColumn() != null) { + builder.context("query_pos", Map.of("line", node.getLine(), "column", node.getColumn())); + } + + return builder.build(); } } diff --git a/core/src/test/java/org/opensearch/sql/common/utils/StringUtilsTest.java b/core/src/test/java/org/opensearch/sql/common/utils/StringUtilsTest.java index 2a2c9de63ab..b47ec1861fe 100644 --- a/core/src/test/java/org/opensearch/sql/common/utils/StringUtilsTest.java +++ b/core/src/test/java/org/opensearch/sql/common/utils/StringUtilsTest.java @@ -6,8 +6,11 @@ package org.opensearch.sql.common.utils; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.common.utils.StringUtils.unquoteText; +import java.util.List; +import java.util.Optional; import org.junit.jupiter.api.Test; class StringUtilsTest { @@ -46,4 +49,47 @@ void unquoteTest() { assertEquals("hel\"lo", unquoteText("\"hel\"lo\"")); assertEquals("hel\\'\\lo", unquoteText("'hel\\\\''\\\\lo'")); } + + @Test + void levenshteinDistanceTest() { + assertEquals(0, StringUtils.levenshteinDistance("test", "test")); + assertEquals(1, StringUtils.levenshteinDistance("test", "text")); + assertEquals(1, StringUtils.levenshteinDistance("test", "tst")); + assertEquals(3, StringUtils.levenshteinDistance("kitten", "sitting")); + assertEquals(4, StringUtils.levenshteinDistance("hello", "world")); + assertEquals(4, StringUtils.levenshteinDistance("test", "")); + assertEquals(4, StringUtils.levenshteinDistance("", "test")); + assertEquals(0, StringUtils.levenshteinDistance("", "")); + } + + @Test + void findClosestMatchTest() { + List fields = List.of("name", "age", "email", "address", "phone"); + + // Exact match or close typo + Optional match = StringUtils.findClosestMatch("nam", fields); + assertTrue(match.isPresent()); + assertEquals("name", match.get()); + + match = StringUtils.findClosestMatch("emal", fields); + assertTrue(match.isPresent()); + assertEquals("email", match.get()); + + match = StringUtils.findClosestMatch("addres", fields); + assertTrue(match.isPresent()); + assertEquals("address", match.get()); + + // Case insensitive + match = StringUtils.findClosestMatch("NAME", fields); + assertTrue(match.isPresent()); + assertEquals("name", match.get()); + + // Too far off - should not match (longer string with many edits) + match = StringUtils.findClosestMatch("xyzabc", fields); + assertTrue(match.isEmpty()); + + // Empty candidates + match = StringUtils.findClosestMatch("test", List.of()); + assertTrue(match.isEmpty()); + } } diff --git a/docs/user/ppl/cmd/mvcombine.md b/docs/user/ppl/cmd/mvcombine.md index 8951b0d7fed..f282217d96a 100644 --- a/docs/user/ppl/cmd/mvcombine.md +++ b/docs/user/ppl/cmd/mvcombine.md @@ -124,6 +124,6 @@ source=mvcombine_data Expected output: ```text -{'context': {'stage': 'analyzing', 'stage_description': 'Parsing and validating the query'}, 'reason': 'Field [does_not_exist] not found.', 'details': 'Field [does_not_exist] not found.', 'location': ['while preparing and validating the query plan'], 'code': 'FIELD_NOT_FOUND', 'type': 'IllegalArgumentException'} + {'context': {'stage_description': 'Parsing and validating the query', 'stage': 'analyzing', 'requested_field': 'does_not_exist', 'available_fields': ['packets_str', 'bytes', 'case', 'letters', 'ip', 'tags', '_id', '_index', '_score', '_maxscore', '_sort', '_routing'], 'query_pos': {'column': 34, 'line': 1}}, 'reason': 'Field [does_not_exist] not found.', 'details': 'Field [does_not_exist] not found.', 'location': ['while preparing and validating the query plan'], 'code': 'FIELD_NOT_FOUND', 'type': 'IllegalArgumentException'} Error: Query returned no data -``` \ No newline at end of file +``` diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java index 4c2c8176690..99d6e2314af 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java @@ -98,6 +98,20 @@ public void testFieldsShouldBeCaseSensitive() { verifyErrorMessageContains(e, "Field [NAME] not found."); } + @Test + public void testFieldNotFoundWithSuggestion() { + // Typo: "nam" instead of "name" + Throwable e = + assertThrowsWithReplace( + IllegalStateException.class, () -> executeQuery("source=test | fields nam")); + String stack = org.apache.commons.lang3.exception.ExceptionUtils.getStackTrace(e); + verifyErrorMessageContains(e, "Field [nam] not found."); + // Verify suggestion based on Levenshtein distance + verifyErrorMessageContains(e, "Did you mean: name"); + // Verify available fields are listed + verifyErrorMessageContains(e, "available_fields"); + } + @Test public void testFilterQuery1() throws IOException { JSONObject actual = executeQuery("source=test | where age = 30 | fields name, age"); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 77d5c77a635..7d2f3d9056b 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -903,11 +903,21 @@ public UnresolvedExpression visitMaxOption(OpenSearchPPLParser.MaxOptionContext } public QualifiedName visitIdentifiers(List ctx) { - return new QualifiedName( + List parts = ctx.stream() .map(RuleContext::getText) .map(StringUtils::unquoteIdentifier) - .collect(Collectors.toList())); + .collect(Collectors.toList()); + + // Capture source position from the first identifier for error reporting + if (!ctx.isEmpty()) { + ParserRuleContext first = ctx.get(0); + int line = first.getStart().getLine(); + int column = first.getStart().getCharPositionInLine(); + return new QualifiedName(parts, line, column); + } + + return new QualifiedName(parts); } private List singleFieldRelevanceArguments(