Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
package org.opensearch.sql.common.utils;

import com.google.common.base.Strings;
import java.util.Collection;
import java.util.IllegalFormatException;
import java.util.Locale;
import java.util.Optional;

public class StringUtils {
/**
Expand Down Expand Up @@ -96,4 +98,83 @@ public static String format(final String format, Object... args) {
private static boolean isQuoted(String text, String mark) {
return !Strings.isNullOrEmpty(text) && text.startsWith(mark) && text.endsWith(mark);
}

/**
* Calculates the Levenshtein distance between two strings.
*
* @param s1 first string
* @param s2 second string
* @return the Levenshtein distance between s1 and s2
*/
public static int levenshteinDistance(String s1, String s2) {
if (s1 == null || s2 == null) {
return Integer.MAX_VALUE;
}
if (s1.equals(s2)) {
return 0;
}

int len1 = s1.length();
int len2 = s2.length();

if (len1 == 0) {
return len2;
}
if (len2 == 0) {
return len1;
}

int[] prev = new int[len2 + 1];
int[] curr = new int[len2 + 1];

for (int j = 0; j <= len2; j++) {
prev[j] = j;
}

for (int i = 1; i <= len1; i++) {
curr[0] = i;
for (int j = 1; j <= len2; j++) {
int cost = (s1.charAt(i - 1) == s2.charAt(j - 1)) ? 0 : 1;
curr[j] = Math.min(Math.min(curr[j - 1] + 1, prev[j] + 1), prev[j - 1] + cost);
}
int[] temp = prev;
prev = curr;
curr = temp;
}

return prev[len2];
}

/**
* Finds the closest match to a target string from a collection of candidates using Levenshtein
* distance. Returns empty if no candidates are provided or if the best match distance is too
* large.
*
* @param target the string to match against
* @param candidates the collection of candidate strings
* @return the closest match, or empty if no good match is found
*/
public static Optional<String> findClosestMatch(String target, Collection<String> candidates) {
if (target == null || candidates == null || candidates.isEmpty()) {
return Optional.empty();
}

String bestMatch = null;
int bestDistance = Integer.MAX_VALUE;

for (String candidate : candidates) {
int distance = levenshteinDistance(target.toLowerCase(), candidate.toLowerCase());
if (distance < bestDistance) {
bestDistance = distance;
bestMatch = candidate;
}
}

// Only return a suggestion if the distance is reasonable
if (bestMatch != null && bestDistance <= Math.max(4, target.length() / 2)) {
return Optional.of(bestMatch);
}

return Optional.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,41 @@
import org.opensearch.sql.ast.AbstractNodeVisitor;

@Getter
@EqualsAndHashCode(callSuper = false)
@EqualsAndHashCode(
callSuper = false,
exclude = {"line", "column"})
public class QualifiedName extends UnresolvedExpression {
public static final String DELIMITER = ".";
private final List<String> parts;

private final Integer line;

private final Integer column;

public QualifiedName(String name) {
this.parts = Collections.singletonList(name);
this(Collections.singletonList(name), null, null);
}

/** QualifiedName Constructor. */
public QualifiedName(Iterable<String> parts) {
this(parts, null, null);
}

/**
* Constructor with source position.
*
* @param parts The parts of the qualified name
* @param line Line number (1-based), null if not available
* @param column Column position (0-based), null if not available
*/
public QualifiedName(Iterable<String> parts, Integer line, Integer column) {
List<String> partsList = StreamSupport.stream(parts.spliterator(), false).collect(toList());
if (partsList.isEmpty()) {
throw new IllegalArgumentException("parts is empty");
}
this.parts = partsList;
this.line = line;
this.column = column;
}

/** Construct {@link QualifiedName} from list of string. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
Expand All @@ -18,6 +19,7 @@
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.common.error.ErrorCode;
import org.opensearch.sql.common.error.ErrorReport;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.expression.function.BuiltinFunctionName;
import org.opensearch.sql.expression.function.PPLFuncImpTable;

Expand Down Expand Up @@ -58,7 +60,7 @@ private static RexNode resolveInJoinCondition(

return resolveFieldWithAlias(nameNode, context, 2)
.or(() -> resolveFieldWithoutAlias(nameNode, context, 2))
.orElseThrow(() -> getNotFoundException(nameNode));
.orElseThrow(() -> getNotFoundException(nameNode, context));
}

/** Resolves qualified name in non-join condition context. */
Expand Down Expand Up @@ -91,7 +93,7 @@ private static RexNode resolveInNonJoinCondition(

return resolveCorrelationField(nameNode, context)
.or(() -> replaceWithNullLiteralInCoalesce(context))
.orElseThrow(() -> getNotFoundException(nameNode));
.orElseThrow(() -> getNotFoundException(nameNode, context));
}

private static String joinParts(List<String> parts, int start, int length) {
Expand Down Expand Up @@ -327,10 +329,27 @@ private static Optional<RexNode> replaceWithNullLiteralInCoalesce(CalcitePlanCon
return Optional.empty();
}

private static ErrorReport getNotFoundException(QualifiedName node) {
return ErrorReport.wrap(
new IllegalArgumentException(String.format("Field [%s] not found.", node.toString())))
.code(ErrorCode.FIELD_NOT_FOUND)
.build();
private static ErrorReport getNotFoundException(QualifiedName node, CalcitePlanContext context) {
// Collect all available fields from the current context
List<String> availableFields = context.relBuilder.peek().getRowType().getFieldNames();

ErrorReport.Builder builder =
ErrorReport.wrap(
new IllegalArgumentException(
String.format("Field [%s] not found.", node.toString())))
.code(ErrorCode.FIELD_NOT_FOUND)
.context("requested_field", node.toString())
.context("available_fields", availableFields);

// Add a suggestion based on Levenshtein distance
StringUtils.findClosestMatch(node.toString(), availableFields)
.ifPresent(suggestion -> builder.suggestion("Did you mean: " + suggestion));

// Add source position if available (populated by PPL parser)
if (node.getLine() != null && node.getColumn() != null) {
builder.context("query_pos", Map.of("line", node.getLine(), "column", node.getColumn()));
}

return builder.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
package org.opensearch.sql.common.utils;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.opensearch.sql.common.utils.StringUtils.unquoteText;

import java.util.List;
import java.util.Optional;
import org.junit.jupiter.api.Test;

class StringUtilsTest {
Expand Down Expand Up @@ -46,4 +49,47 @@ void unquoteTest() {
assertEquals("hel\"lo", unquoteText("\"hel\"lo\""));
assertEquals("hel\\'\\lo", unquoteText("'hel\\\\''\\\\lo'"));
}

@Test
void levenshteinDistanceTest() {
assertEquals(0, StringUtils.levenshteinDistance("test", "test"));
assertEquals(1, StringUtils.levenshteinDistance("test", "text"));
assertEquals(1, StringUtils.levenshteinDistance("test", "tst"));
assertEquals(3, StringUtils.levenshteinDistance("kitten", "sitting"));
assertEquals(4, StringUtils.levenshteinDistance("hello", "world"));
assertEquals(4, StringUtils.levenshteinDistance("test", ""));
assertEquals(4, StringUtils.levenshteinDistance("", "test"));
assertEquals(0, StringUtils.levenshteinDistance("", ""));
}

@Test
void findClosestMatchTest() {
List<String> fields = List.of("name", "age", "email", "address", "phone");

// Exact match or close typo
Optional<String> match = StringUtils.findClosestMatch("nam", fields);
assertTrue(match.isPresent());
assertEquals("name", match.get());

match = StringUtils.findClosestMatch("emal", fields);
assertTrue(match.isPresent());
assertEquals("email", match.get());

match = StringUtils.findClosestMatch("addres", fields);
assertTrue(match.isPresent());
assertEquals("address", match.get());

// Case insensitive
match = StringUtils.findClosestMatch("NAME", fields);
assertTrue(match.isPresent());
assertEquals("name", match.get());

// Too far off - should not match (longer string with many edits)
match = StringUtils.findClosestMatch("xyzabc", fields);
assertTrue(match.isEmpty());

// Empty candidates
match = StringUtils.findClosestMatch("test", List.of());
assertTrue(match.isEmpty());
}
}
4 changes: 2 additions & 2 deletions docs/user/ppl/cmd/mvcombine.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,6 @@ source=mvcombine_data

Expected output:
```text
{'context': {'stage': 'analyzing', 'stage_description': 'Parsing and validating the query'}, 'reason': 'Field [does_not_exist] not found.', 'details': 'Field [does_not_exist] not found.', 'location': ['while preparing and validating the query plan'], 'code': 'FIELD_NOT_FOUND', 'type': 'IllegalArgumentException'}
{'context': {'stage_description': 'Parsing and validating the query', 'stage': 'analyzing', 'requested_field': 'does_not_exist', 'available_fields': ['packets_str', 'bytes', 'case', 'letters', 'ip', 'tags', '_id', '_index', '_score', '_maxscore', '_sort', '_routing'], 'query_pos': {'column': 34, 'line': 1}}, 'reason': 'Field [does_not_exist] not found.', 'details': 'Field [does_not_exist] not found.', 'location': ['while preparing and validating the query plan'], 'code': 'FIELD_NOT_FOUND', 'type': 'IllegalArgumentException'}
Error: Query returned no data
```
```
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@ public void testFieldsShouldBeCaseSensitive() {
verifyErrorMessageContains(e, "Field [NAME] not found.");
}

@Test
public void testFieldNotFoundWithSuggestion() {
// Typo: "nam" instead of "name"
Throwable e =
assertThrowsWithReplace(
IllegalStateException.class, () -> executeQuery("source=test | fields nam"));
String stack = org.apache.commons.lang3.exception.ExceptionUtils.getStackTrace(e);
verifyErrorMessageContains(e, "Field [nam] not found.");
// Verify suggestion based on Levenshtein distance
verifyErrorMessageContains(e, "Did you mean: name");
// Verify available fields are listed
verifyErrorMessageContains(e, "available_fields");
}

@Test
public void testFilterQuery1() throws IOException {
JSONObject actual = executeQuery("source=test | where age = 30 | fields name, age");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -903,11 +903,21 @@ public UnresolvedExpression visitMaxOption(OpenSearchPPLParser.MaxOptionContext
}

public QualifiedName visitIdentifiers(List<? extends ParserRuleContext> ctx) {
return new QualifiedName(
List<String> parts =
ctx.stream()
.map(RuleContext::getText)
.map(StringUtils::unquoteIdentifier)
.collect(Collectors.toList()));
.collect(Collectors.toList());

// Capture source position from the first identifier for error reporting
if (!ctx.isEmpty()) {
ParserRuleContext first = ctx.get(0);
int line = first.getStart().getLine();
int column = first.getStart().getCharPositionInLine();
return new QualifiedName(parts, line, column);
}

return new QualifiedName(parts);
}

private List<UnresolvedExpression> singleFieldRelevanceArguments(
Expand Down
Loading