diff --git a/.claude/commands/ppl-bugfix.md b/.claude/commands/ppl-bugfix.md new file mode 100644 index 00000000000..f2ac6d6807d --- /dev/null +++ b/.claude/commands/ppl-bugfix.md @@ -0,0 +1,162 @@ +--- +allowed-tools: Agent, Read, Bash(gh:*), Bash(git:*) +description: Run the PPL bugfix harness for a GitHub issue or follow up on an existing PR +--- + +Fix a PPL bug or follow up on an existing PR using the harness in `.claude/harness/ppl-bugfix-harness.md`. + +## Input + +Accepts one or more issue/PR references. Multiple references are processed in parallel (each gets its own subagent + worktree). + +- `/ppl-bugfix #1234` — single issue +- `/ppl-bugfix PR#5678` — single PR +- `/ppl-bugfix #1234 #5678 PR#9012` — multiple in parallel +- `/ppl-bugfix https://github.com/opensearch-project/sql/issues/1234` — URL + +Optional mode flag (append to any of the above): +- `--safe` — `acceptEdits` mode. Auto-approve file edits only, Bash commands require manual approval. (Most conservative) +- `--yolo` — `bypassPermissions` mode. Fully trusted, no prompts. Subagent runs in an isolated worktree so this is safe. (Default) + +> **Note**: `bypassPermissions` skips the interactive prompt but still respects the allow-list in `~/.claude/settings.json`. Ensure git/gh write commands are in the global allow-list. + +Examples: +- `/ppl-bugfix #1234` — single issue, defaults to yolo +- `/ppl-bugfix #1234 #5678 --yolo` — two issues in parallel +- `/ppl-bugfix PR#5293 PR#5300` — two PRs in parallel +- `/ppl-bugfix #1234 PR#5678 --safe` — mix of issue and PR + +If no argument given, ask for an issue or PR number. + +## Step 0: Resolve Permission Mode + +Parse the mode flag from the input arguments: + +| Flag | Mode | +|------|------| +| `--safe` | `acceptEdits` | +| `--yolo` | `bypassPermissions` | +| _(no flag)_ | `bypassPermissions` (default) | + +Use the resolved mode as the `mode` parameter when dispatching the subagent in Step 2A/2B. + +## Step 1: Resolve Each Reference + +For each issue/PR reference in the input, resolve its state. Run these lookups in parallel when there are multiple references. + +```bash +# Issue → PR (check multiple closing keyword variants) +gh pr list --search "Resolves #" --json number,url,state --limit 5 +gh pr list --search "Fixes #" --json number,url,state --limit 5 +gh pr list --search "Closes #" --json number,url,state --limit 5 + +# PR → Issue +gh pr view --json body | jq -r '.body' | grep -oiE '(resolves|fixes|closes) #[0-9]+' | grep -oE '[0-9]+' +``` + +| State | Action | +|-------|--------| +| Issue exists, no PR | **Initial Fix** (Step 2A) | +| Issue exists, open PR found | **Follow-up** (Step 2B) | +| PR provided directly | **Follow-up** (Step 2B) | + +## Step 2: Dispatch Subagents + +Dispatch one subagent per reference. When there are multiple references, dispatch all subagents in a single message (parallel execution). + +### 2A: Initial Fix + +``` +Agent( + mode: "", + isolation: "worktree", + name: "bugfix-", + description: "PPL bugfix #", + prompt: "Read .claude/harness/ppl-bugfix-harness.md and follow it to fix GitHub issue #. + Follow Phase 0 through Phase 3 in order. + Phase 0.3 defines TDD execution flow. Do NOT skip any phase. + CRITICAL: If Phase 0.1 determines the bug is already fixed on main, HARD STOP. + Do NOT write tests, do NOT create a PR — just comment/close the issue and report back. + If the bug IS reproducible, post the Decision Log (Phase 3.4) before completing." +) +``` + +### 2B: Follow-up + +Before dispatching, check if an existing worktree already has the PR branch checked out: + +```bash +# List worktrees and find one on the PR branch +for wt in .claude/worktrees/agent-*/; do + branch=$(git -C "$wt" branch --show-current 2>/dev/null) + if [ "$branch" = "" ]; then + echo "REUSE: $wt (branch: $branch)" + fi +done +``` + +**If existing worktree found**: Do NOT use `isolation: "worktree"`. Pass the worktree path in the prompt so the subagent works there directly. + +``` +Agent( + mode: "", + name: "bugfix-", + description: "PPL bugfix # followup", + prompt: "cd first, then read .claude/harness/ppl-bugfix-followup.md and follow it. + PR: (), Issue: # + Working directory: " +) +``` + +**If no existing worktree**: Create a new one. + +``` +Agent( + mode: "", + isolation: "worktree", + name: "bugfix-", + description: "PPL bugfix # followup", + prompt: "Read .claude/harness/ppl-bugfix-followup.md and follow it. + PR: (), Issue: #" +) +``` + +## Step 3: Report Back + +After all subagents complete, report a summary for each: +- Classification, fix summary, PR URL, worktree path and branch, items needing human attention (2A) +- What was addressed, current PR state, whether another round is needed (2B) + +**Always include the worktree→PR mapping** from the subagent's output, e.g.: + +``` +Worktree: /path/to/.claude/worktrees/agent-xxxx +Branch: bugfix-1234 +PR: #5678 +``` + +**Important**: After reporting, the main agent must remember this mapping. When the user later asks to make changes to the PR (e.g., "commit this to PR #5678"), operate in the worktree directory — not the main session directory. + +## Subagent Lifecycle + +Subagents are task-scoped. They complete and release context — they cannot poll for events. + +``` +Agent A (Phase 0-3) → creates PR → completes + (CI runs, reviewers comment, conflicts arise) +Agent B (Phase 3.5) → handles feedback → completes + (repeat as needed) +Agent N (Phase 3.5) → gh pr ready → done +``` + +Context is preserved across agents via: +- **Decision Log** (PR comment) — single source of truth for rejected alternatives, pitfalls, design rationale +- **GitHub state** (PR diff, review comments, CI logs) — reconstructed by each follow-up agent + +## Rules + +- Subagent reads `.claude/harness/ppl-bugfix-harness.md` and fetches issue/PR details itself — do NOT inline content into the prompt +- If bug is not reproducible (Phase 0.1), stop and report — do not proceed +- Issue ↔ PR auto-resolution means the user never needs to track PR numbers manually +- **Do NOT use `mode: "auto"` for subagents** — `auto` mode does not work for subagents; Bash commands still require manual approval. Only `bypassPermissions` reliably skips permission checks. +- **Always dispatch subagent** — even for trivial follow-ups (remove co-author, force push). Do NOT run commands directly in the main session; subagents with `bypassPermissions` skip permission prompts, the main session does not. diff --git a/.claude/harness/ppl-bugfix-followup.md b/.claude/harness/ppl-bugfix-followup.md new file mode 100644 index 00000000000..4b31ab6029e --- /dev/null +++ b/.claude/harness/ppl-bugfix-followup.md @@ -0,0 +1,125 @@ +# PPL Bugfix Follow-up + +## Rules + +- Do NOT add `Co-Authored-By` lines in commits — only DCO `Signed-off-by` + +--- + +## Report Working Directory + +```bash +echo "Worktree: $(pwd)" +echo "Branch: $(git branch --show-current)" +``` + +Include this in your output so the caller knows where changes are happening. + +## Reconstruct Context + +First checkout the PR branch, then load state: + +```bash +# Checkout the PR branch in this worktree +gh pr checkout + +# Resolve fork remote — the worktree may only have origin (upstream) +git remote -v +# If no fork remote exists, add it: +git remote add fork https://github.com//sql.git + +# Load PR state — reviews, CI, mergeability +gh pr view --json title,body,state,reviews,statusCheckRollup,mergeable +gh pr checks + +# Load ALL comments — includes bot comments (Code-Diff-Analyzer, PR Reviewer Guide, Code Suggestions) and human comments +gh pr view --json comments --jq '.comments[] | {author: .author.login, body: .body}' +``` + +Categorize ALL signals — not just CI and human reviews: + +| Signal | Type | +|--------|------| +| `statusCheckRollup` has failures | CI failure | +| `reviews` has CHANGES_REQUESTED | Review feedback | +| `mergeable` is CONFLICTING | Merge conflict | +| Bot comments with actionable suggestions | Review feedback (treat like human review) | +| All pass + approved | Ready — run `gh pr ready` | + +## Handle Review Feedback + +For each comment (human OR bot), **cross-check against the Decision Log first**: + +| Type | Action | +|------|--------| +| Code change | If already rejected in Decision Log, reply with reasoning. Otherwise make the change, new commit, push | +| Question | Reply with explanation — Decision Log often has the answer | +| Nit | Fix if trivial | +| Disagreement | Reply with Decision Log reasoning; if reviewer insists, escalate to user | + +```bash +git add && git commit -s -m "Address review feedback: " +git push -u fork +``` + +## Clean Up Commit History + +When you need to amend a commit (e.g. remove Co-Authored-By, reword message) and the branch has a merge commit on top, don't try `git reset --soft origin/main` — it will include unrelated changes if main has moved. Instead cherry-pick the fix onto latest main: + +```bash +git checkout -B clean-branch origin/main +git cherry-pick +git commit --amend -s -m "" +git push fork clean-branch: --force-with-lease +``` + +## Handle CI Failures + +```bash +gh pr checks # Identify failures +gh run view --log-failed # Read logs +# Test failure → fix locally, push new commit +# Spotless → ./gradlew spotlessApply, push +# Flaky → gh run rerun --failed +``` + +## Handle Merge Conflicts + +```bash +git fetch origin && git merge origin/main # Resolve conflicts +./gradlew spotlessApply && ./gradlew test && ./gradlew :integ-test:integTest # Re-verify +git commit -s -m "Resolve merge conflicts with main" +git push -u fork +``` + +## Mark Ready + +```bash +gh pr ready +``` + +## Retrospective + +After handling follow-up, reflect on the feedback received and check if it reveals gaps in the harness or command: + +For each comment addressed (bot or human): +- **Does the feedback point to a pattern the harness should have prevented?** → Add guidance to the relevant Phase in `ppl-bugfix-harness.md` +- **Was this a repeated mistake across PRs?** → Add to Quick Reference or Case Index +- **Did the harness template produce the problematic code?** → Fix the template directly +- **Was a permission or tool missing?** → Add to `.claude/settings.json` +- **Did the follow-up workflow itself miss this signal?** → Update this file + +If any improvement is needed, make the edit and include it in the same commit. + +## Completion Gate + +Before reporting "done": + +1. Run `git status --porcelain` — if any uncommitted changes remain, commit and push them. This includes harness edits from Retrospective. +2. Report in your final output: + +``` +Worktree: +Branch: +PR: +``` diff --git a/.claude/harness/ppl-bugfix-harness.md b/.claude/harness/ppl-bugfix-harness.md new file mode 100644 index 00000000000..54c2103fc9b --- /dev/null +++ b/.claude/harness/ppl-bugfix-harness.md @@ -0,0 +1,169 @@ +# PPL Bugfix Harness + +## Phase 0: Triage + +### 0.0 Report Working Directory + +```bash +echo "Worktree: $(pwd)" +echo "Branch: $(git branch --show-current)" +``` + +Include this in your output so the caller knows where changes are happening. + +### 0.1 Load & Reproduce + +```bash +gh issue view --repo opensearch-project/sql +``` + +Write a failing test or run an existing one to reproduce the bug on `main`. + +If the bug **does not reproduce** (correct results, not infra failure): + +| Finding | Action | +|---------|--------| +| Already fixed | `gh issue comment` + `gh issue close` | +| Older version only | `gh issue comment` + `gh issue close` | +| Intermittent | Label `flaky` or `needs-info`, do NOT close | +| Can't reproduce | Comment asking for repro steps, label `needs-info` | + +**HARD STOP** — do not proceed. Report back. + +### 0.2 Classify + +Identify the bug layer (Grammar, AST/Functions, Type System, Optimizer, Execution, DI/Resource) and record it. Consult `.claude/harness/ppl-bugfix-reference.md` for fix-path-specific guidance if needed. + +### 0.3 Guardrails + +Stop and report back if: +- Root cause unclear after reading 15+ source files +- Fix breaks 5+ unrelated tests +- Same build error 3 times in a row + +### 0.4 Execution Flow + +``` +Triage → Write FAILING test → Fix → Remaining tests → Verify → Commit → PR → Decision Log → Completion Gate +``` + +--- + +## Phase 1: Fix + +Find and fix the root cause. Consult `.claude/harness/ppl-bugfix-reference.md` for path-specific patterns and examples. + +--- + +## Phase 2: Tests + +Consult `.claude/harness/ppl-bugfix-reference.md` for test templates. + +Required deliverables: +- Failing test reproducing the bug (written BEFORE the fix) +- Unit tests covering happy path and edge cases +- Integration test — add to an existing `*IT.java` when possible; if creating a new one, add it to `CalciteNoPushdownIT` +- YAML REST test at `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/.yml` + +--- + +## Phase 3: Verify & Submit + +### 3.1 Verify + +```bash +./gradlew spotlessApply +./gradlew ::test --tests "" +./gradlew test +./gradlew :integ-test:integTest -Dtests.class="*" +``` + +Run `./gradlew :integ-test:yamlRestTest` if YAML tests were added. Run `./gradlew generateGrammarSource && ./gradlew :ppl:test` if grammar was modified. + +### 3.2 Commit & PR + +```bash +git add +git commit -s -m "[BugFix] Fix (#)" +git fetch origin && git merge origin/main +./gradlew test && ./gradlew :integ-test:integTest -Dtests.class="*" + +# Resolve fork remote (check git remote -v; add if missing) +git remote add fork https://github.com//sql.git +git push -u fork +``` + +Do NOT add Co-Authored-By lines. Use the git user name to infer the fork owner, or fall back to "qianheng-aws". + +```bash +gh pr create --draft --repo opensearch-project/sql \ + --title "[BugFix] Fix (#)" \ + --body "$(cat <<'EOF' +### Description + + +### Related Issues +Resolves # + +### Check List +- [x] New functionality includes testing +- [x] Commits signed per DCO (`-s`) +- [x] `spotlessCheck` passed +- [x] Unit tests passed +- [x] Integration tests passed +EOF +)" +``` + +### 3.3 Decision Log + +Post as a PR comment: + +```bash +gh pr comment --body "$(cat <<'EOF' +## Decision Log +**Root Cause**: +**Approach**: +**Alternatives Rejected**: +**Pitfalls**: +**Things to Watch**: +EOF +)" +``` + +--- + +## Completion Gate + +Run `git status --porcelain` — if any uncommitted changes remain, commit and push them before proceeding. + +Do NOT report "done" until every item below is checked. List each in your final report: + +- [ ] **Unit tests**: New test class or methods +- [ ] **Integration test**: New `*IT.java` test +- [ ] **YAML REST test**: `issues/.yml` +- [ ] **spotlessApply**: Ran successfully +- [ ] **Tests pass**: Affected modules +- [ ] **Commit**: DCO sign-off, `[BugFix]` prefix, no Co-Authored-By +- [ ] **Draft PR**: `--draft`, body contains `Resolves #` +- [ ] **Decision Log**: PR comment posted + +If any item is blocked, report which and why. + +--- + +## Phase 4: Retrospective + +- [ ] Symptom in Quick Reference? Add if missing. +- [ ] Classification correct? Fix routing if misleading. +- [ ] Test template worked as-is? Fix if broken. +- [ ] New pattern? Add to Case Index. + +Include harness improvements in the same PR. + +Report in your final output: +``` +Worktree: +Branch: +PR: +``` diff --git a/.claude/harness/ppl-bugfix-reference.md b/.claude/harness/ppl-bugfix-reference.md new file mode 100644 index 00000000000..20697c83ecb --- /dev/null +++ b/.claude/harness/ppl-bugfix-reference.md @@ -0,0 +1,157 @@ +# PPL Bugfix Reference + +Consult this file when you need fix-path-specific guidance or test templates. + +--- + +## Fix Path Reference + +### Path A — Grammar / Parser + +1. Update grammar files (must stay in sync): + - `language-grammar/src/main/antlr4/OpenSearchPPLParser.g4` (primary) + - `ppl/src/main/antlr/OpenSearchPPLParser.g4` + - `async-query-core/src/main/antlr/OpenSearchPPLParser.g4` (if applicable) +2. Regenerate: `./gradlew generateGrammarSource` +3. Update AstBuilder: `ppl/.../parser/AstBuilder.java` +4. Test: `AstBuilderTest` + +### Path B — AST / Function Implementation + +1. AST nodes in `core/.../ast/tree/`, functions in `core/.../expression/function/` or `PPLBuiltinOperators` +2. Watch Visitor pattern — sync `AbstractNodeVisitor`, `Analyzer`, `CalciteRelNodeVisitor`, `PPLQueryDataAnonymizer` +3. Test: `verifyLogical()`, `verifyPPLToSparkSQL()`, `verifyResult()` +4. **Before writing a new function-name → Calcite-op switch, try to reuse the existing visitor** + (`aggVisitor` / `rexVisitor` / `CalciteAggCallVisitor` / `CalciteRexNodeVisitor`). If the issue + is that a shared visitor resolves field references against the wrong row (e.g., wrong side of a + join), rewrite the AST field references to reference the correct names and delegate instead of + duplicating the AVG/SUM/MIN/MAX/STDDEV/... mapping by hand. + +### Path C — Type System / Semantic Analysis + +1. `OpenSearchTypeFactory.java`, `Analyzer.java`, `ExpressionAnalyzer.java` +2. Preserve nullable semantics; protect UDT from `leastRestrictive()` downgrade +3. Test: type preservation, nullable propagation, mixed types + +### Path D — Optimizer / Predicate Pushdown + +1. `PredicateAnalyzer.java`, `LogicalPlanOptimizer`, `QueryService.java` +2. Watch `nullAs` semantics; for plan bloat consider `FilterMergeRule` +3. Verify: `EXPLAIN` output + integration test correctness + +### Path E — Execution / Resource Management + +1. `OpenSearchExecutionEngine.java`, `SQLPlugin.java`, `OpenSearchPluginModule.java` +2. Common patterns: cache key collision, memory leak, unbounded growth, non-singleton, DI not injected + +--- + +## Test Templates + +**Unit test** (extend `CalcitePPLAbstractTest`): +```java +public class CalcitePPLYourFixTest extends CalcitePPLAbstractTest { + public CalcitePPLYourFixTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Before + public void init() { + doReturn(true).when(settings) + .getSettingValue(Settings.Key.CALCITE_ENGINE_ENABLED); + } + + @Test + public void testBugScenario() { + verifyLogical("source=EMP | where SAL > 1000", + "LogicalFilter(condition=[>($5, 1000)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"); + } +} +``` + +**Integration test** (extend `CalcitePPLIT`): +```java +public class CalcitePPLYourFixIT extends CalcitePPLIT { + @Override + public void init() throws IOException { + super.init(); + enableCalcite(); + } + + @Test + public void testBugFixEndToEnd() throws IOException { + JSONObject result = executeQuery("source= | "); + verifySchema(result, schema("field", "alias", "type")); + verifyDataRows(result, rows("expected_value_1"), rows("expected_value_2")); + } +} +``` + +**YAML REST test** — place at `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/.yml`: +```yaml +setup: + - do: + indices.create: + index: test_issue_ + body: + settings: { number_of_shards: 1, number_of_replicas: 0 } + mappings: { properties: { : { type: } } } + - do: + query.settings: + body: { transient: { plugins.calcite.enabled: true } } +--- +teardown: + - do: + query.settings: + body: { transient: { plugins.calcite.enabled: false } } +--- +"": + - skip: { features: [headers, allowed_warnings] } + - do: + bulk: { index: test_issue_, refresh: true, body: ['{"index": {}}', '{"": ""}'] } + - do: + headers: { Content-Type: 'application/json' } + ppl: { body: { query: "source=test_issue_ | " } } + - match: { total: } + - match: { datarows: [ [ , ], [ , ] ] } +``` + +> **Always include `datarows` assertions** — verifying only `total` and `schema` will miss +> wrong values. Count the expected output groups carefully (e.g., for `chart ... by `, +> count distinct (row_split, col_split) groups after null filtering, not the number of input rows). + +--- + +## Symptom → Fix Path + +``` +SyntaxCheckException / unrecognized syntax → Path A +SemanticCheckException / type mismatch → Path C +Field type wrong (timestamp→string) → Path C +EXPLAIN shows predicate not pushed down → Path D +Multi-condition query: missing/extra rows → Path D +OOM / memory growth over time → Path E +NPE in Transport layer → Path E +"node must be boolean/number, found XXX" → Path B +Regex/function extraction offset → Path B +``` + +--- + +## Case Index + +| Commit | Bug | Layer | Tests | +|--------|-----|-------|-------| +| `ada2e34` | UNION loses UDT type | Type System | 8 UT + 4 IT | +| `26674f9` | rex capture group index shift | AST/Functions | Multiple UTs | +| `b4df010` | isnotnull not pushed down with != | Optimizer | 2 UT + IT | +| `e045d15` | Multiple filters OOM | Optimizer | 26 output updates | +| `f024b4f` | High-cardinality GROUP BY OOM | Execution | Benchmark | +| `97d5d26` | OrdinalMap cache collision + leak | Execution | — | +| `90393bf` | Non-singleton ExecutionEngine leak | Resource | — | +| `f6be830` | Transport extensions not injected | DI | — | +| `734394d` | Grammar rule typo | Grammar | — | +| `246ed0d` | Float precision flaky test | Test Infra | — | +| `d56b8fa` | Wildcard index type conflict | Value Parsing | 3 UT + 1 IT + 1 YAML | +| `5a78b78` | Boolean coercion from numeric in wildcard queries | Value Parsing | 3 UT + 1 IT + 1 YAML | diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000000..eae8ab7e33d --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,28 @@ +{ + "permissions": { + "allow": [ + "Bash(./gradlew *)", + "Bash(gh issue:*)", + "Bash(gh pr:*)", + "Bash(gh api:*)", + "Bash(gh search:*)", + "Bash(gh run:*)", + "Bash(git add:*)", + "Bash(git commit:*)", + "Bash(git stash:*)", + "Bash(git show:*)", + "Bash(git diff:*)", + "Bash(git status:*)", + "Bash(git log:*)", + "Bash(git branch:*)", + "Bash(git remote:*)", + "Bash(git fetch:*)", + "Bash(git checkout:*)", + "Bash(git push -u:*)", + "Bash(git push --force-with-lease:*)", + "Bash(git merge:*)", + "Bash(git cherry-pick:*)", + "Bash(git reset --soft:*)" + ] + } +} diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml new file mode 100644 index 00000000000..249bbc766b6 --- /dev/null +++ b/.github/workflows/dependabot.yml @@ -0,0 +1,15 @@ +version: 2 +updates: + - package-ecosystem: "gradle" + directory: "/" + schedule: + interval: "weekly" + day: "monday" + time: "08:00" + timezone: "America/Los_Angeles" + labels: + - "skip-changelog" + group: + all-dependencies: + patterns: + - "*" diff --git a/.github/workflows/issue-dedupe.yml b/.github/workflows/issue-dedupe.yml new file mode 100644 index 00000000000..b31dd85b6cc --- /dev/null +++ b/.github/workflows/issue-dedupe.yml @@ -0,0 +1,43 @@ +--- +name: Issue Dedupe Main +on: + issues: + types: [opened] + schedule: + - cron: '0 0 * * *' + workflow_dispatch: + inputs: + job: + description: 'Job to run' + required: true + type: choice + options: + - detect + - auto-close + default: detect + issue_number: + description: 'Issue number to check for duplicates (detect only)' + required: false + type: string + +jobs: + detect: + if: (github.event_name == 'issues' && github.event.issue.user.type != 'Bot') || (github.event_name == 'workflow_dispatch' && inputs.job == 'detect') + uses: opensearch-project/opensearch-build/.github/workflows/issue-dedupe-detect.yml@main + permissions: + contents: read + issues: write + id-token: write + secrets: + BEDROCK_ACCESS_ROLE_ISSUE_DEDUPE: ${{ secrets.BEDROCK_ACCESS_ROLE_ISSUE_DEDUPE }} + with: + issue_number: ${{ inputs.issue_number || '' }} + grace_days: ${{ vars.DUPLICATE_GRACE_DAYS || '7' }} + + auto-close: + if: github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && inputs.job == 'auto-close') + uses: opensearch-project/opensearch-build/.github/workflows/issue-dedupe-autoclose.yml@main + permissions: + issues: write + with: + grace_days: ${{ vars.DUPLICATE_GRACE_DAYS || '7' }} diff --git a/.github/workflows/sql-cli-integration-test.yml b/.github/workflows/sql-cli-integration-test.yml index 63f3e91d334..0a0695bbd76 100644 --- a/.github/workflows/sql-cli-integration-test.yml +++ b/.github/workflows/sql-cli-integration-test.yml @@ -69,6 +69,7 @@ jobs: echo "Building SQL modules from current branch..." ./gradlew publishToMavenLocal -x test -x integTest echo "SQL modules published to Maven Local" + ./gradlew clean - name: Run SQL CLI tests with local SQL modules working-directory: sql-cli diff --git a/.gitignore b/.gitignore index 329348a7c12..bf9002f999d 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,10 @@ http-client.env.json .factorypath # Coding agent files (could be symlinks) -.claude +.claude/* +!.claude/settings.json +!.claude/commands/ +!.claude/harness/ +.claude/settings.local.json .clinerules memory-bank \ No newline at end of file diff --git a/.whitesource b/.whitesource index db4b0fec82c..9765a1d58b9 100644 --- a/.whitesource +++ b/.whitesource @@ -11,5 +11,11 @@ }, "issueSettings": { "minSeverityLevel": "LOW" + }, + "remediateSettings": { + "addLabels": ["skip-changelog"], + "workflowRules": { + "enabled": true + } } -} \ No newline at end of file +} diff --git a/CLAUDE.md b/CLAUDE.md index 4629dcf5fe7..03f88a60042 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -9,46 +9,16 @@ OpenSearch SQL plugin — enables SQL and PPL (Piped Processing Language) querie ## Build Commands ```bash -# Full build (compiles, tests, checks) -./gradlew build - -# Fast build (skip integration tests) -./gradlew build -x integTest - -# Build specific module -./gradlew :core:build -./gradlew :sql:build -./gradlew :ppl:build - -# Run unit tests only -./gradlew test - -# Run a single unit test class -./gradlew :core:test --tests "org.opensearch.sql.analysis.AnalyzerTest" - -# Run integration tests -./gradlew :integ-test:integTest - -# Run a single integration test -./gradlew :integ-test:integTest -Dtests.class="*QueryIT" - -# Skip Prometheus if unavailable -./gradlew :integ-test:integTest -DignorePrometheus - -# Code formatting -./gradlew spotlessCheck # Check -./gradlew spotlessApply # Auto-fix - -# Regenerate ANTLR parsers from grammar files -./gradlew generateGrammarSource - -# Run plugin locally with OpenSearch -./gradlew :opensearch-sql-plugin:run -./gradlew :opensearch-sql-plugin:run -DdebugJVM # With remote debug on port 5005 - -# Run doctests -./gradlew :doctest:doctest -./gradlew :doctest:doctest -Pdocs=search # Single file +./gradlew build # Full build (compiles, tests, checks) +./gradlew build -x integTest # Fast build (skip integration tests) +./gradlew :core:build # Build specific module +./gradlew test # Unit tests only +./gradlew :core:test --tests "*.AnalyzerTest" # Single test class +./gradlew :integ-test:integTest # Integration tests +./gradlew :integ-test:integTest -Dtests.class="*QueryIT" # Single IT +./gradlew spotlessCheck # Check formatting +./gradlew spotlessApply # Auto-fix formatting +./gradlew generateGrammarSource # Regenerate ANTLR parsers ``` ## Code Style @@ -123,6 +93,10 @@ plugin (OpenSearch plugin entry point, Guice DI wiring) - **PhysicalPlan** implements `Iterator` for streaming execution - **Guice** dependency injection in `OpenSearchPluginModule` +## Fixing PPL Bugs + +Use `/ppl-bugfix #` to fix PPL bugs. It dispatches a subagent in an isolated worktree with a structured harness covering triage, fix, tests, and PR creation. + ## Adding New PPL Commands Follow the checklist in `docs/dev/ppl-commands.md`: @@ -142,11 +116,11 @@ Follow `docs/dev/ppl-functions.md`. Three approaches: ## Calcite Engine -The project has two execution engines: the legacy **v2 engine** and the newer **Calcite engine** (Apache Calcite-based). Calcite is toggled via `plugins.calcite.enabled` setting (default: off in production, toggled per-test in integration tests). +The execution engine is Apache Calcite-based, toggled via `plugins.calcite.enabled` (default: off in production, toggled per-test in integration tests). - In integration tests, call `enableCalcite()` in `init()` to activate the Calcite path -- Some features (e.g., graphLookup) require pushdown optimization — use `enabledOnlyWhenPushdownIsEnabled()` to skip tests in the `CalciteNoPushdownIT` suite -- `CalciteNoPushdownIT` is a JUnit `@Suite` that re-runs Calcite test classes with pushdown disabled; add new test classes to its `@Suite.SuiteClasses` list +- Some features require pushdown optimization — use `enabledOnlyWhenPushdownIsEnabled()` to skip tests in `CalciteNoPushdownIT` +- `CalciteNoPushdownIT` re-runs Calcite test classes with pushdown disabled; add new test classes to its `@Suite.SuiteClasses` list ## Integration Tests diff --git a/CLAUDE_GUIDE.md b/CLAUDE_GUIDE.md new file mode 100644 index 00000000000..034b403f5c6 --- /dev/null +++ b/CLAUDE_GUIDE.md @@ -0,0 +1,32 @@ +# Claude Commands + +Slash commands for Claude Code in this repository. Use them in any Claude Code session. + +## `/ppl-bugfix` + +Fix a PPL bug end-to-end or follow up on an existing PR. + +**Usage:** + +``` +/ppl-bugfix #1234 # Single issue +/ppl-bugfix PR#5678 # Single PR follow-up +/ppl-bugfix #1234 #5678 PR#9012 # Multiple in parallel +/ppl-bugfix # By URL +``` + +**Permission mode flags** (optional, append to any input): + +| Flag | Mode | Description | +|------|------|-------------| +| `--safe` | `acceptEdits` | File edits auto-approved, Bash commands need manual approval | +| `--yolo` | `bypassPermissions` | No prompts at all — subagent runs in isolated worktree (default) | + +**What it does:** + +1. Resolves issue/PR linkage automatically +2. For new issues: dispatches a subagent in an isolated git worktree that follows the full bugfix harness (triage → fix → test → PR) +3. For existing PRs: handles CI failures, review feedback, merge conflicts, or marks as ready + +**Related files:** [`.claude/harness/ppl-bugfix-harness.md`](.claude/harness/ppl-bugfix-harness.md) + diff --git a/api/build.gradle b/api/build.gradle index fb4cafe79d8..570efc6bb0e 100644 --- a/api/build.gradle +++ b/api/build.gradle @@ -13,6 +13,7 @@ plugins { dependencies { api project(':ppl') + api group: 'org.apache.calcite', name: 'calcite-babel', version: '1.41.0' testImplementation testFixtures(project(':api')) testImplementation group: 'junit', name: 'junit', version: '4.13.2' diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java index 4332ff17660..4169963da54 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java @@ -17,19 +17,20 @@ import java.util.concurrent.Callable; import lombok.AllArgsConstructor; import lombok.Getter; -import org.apache.calcite.avatica.util.Casing; import org.apache.calcite.jdbc.CalciteSchema; import org.apache.calcite.plan.RelTraitDef; import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider; import org.apache.calcite.schema.Schema; import org.apache.calcite.schema.SchemaPlus; -import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.tools.FrameworkConfig; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Programs; import org.opensearch.sql.api.parser.CalciteSqlQueryParser; import org.opensearch.sql.api.parser.PPLQueryParser; import org.opensearch.sql.api.parser.UnifiedQueryParser; +import org.opensearch.sql.api.spec.LanguageSpec; +import org.opensearch.sql.api.spec.UnifiedPplSpec; +import org.opensearch.sql.api.spec.UnifiedSqlSpec; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.SysLimit; import org.opensearch.sql.common.setting.Settings; @@ -57,6 +58,9 @@ public class UnifiedQueryContext implements AutoCloseable { /** Query parser created eagerly from this context's configuration. */ private final UnifiedQueryParser parser; + /** Language spec for the query's frontend (SQL or PPL). */ + private final LanguageSpec langSpec; + /** * Returns the profiling result. Call after query execution to retrieve collected metrics. Returns * empty if profiling was not enabled. @@ -205,12 +209,18 @@ public Builder setting(String name, Object value) { public UnifiedQueryContext build() { Objects.requireNonNull(queryType, "Must specify language before build"); + LanguageSpec langSpec = + switch (queryType) { + case SQL -> UnifiedSqlSpec.extended(); + case PPL -> UnifiedPplSpec.create(); + }; Settings settings = buildSettings(); CalcitePlanContext planContext = CalcitePlanContext.create( - buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); + buildFrameworkConfig(langSpec), SysLimit.fromSettings(settings), queryType); QueryProfiling.activate(profiling); - return new UnifiedQueryContext(planContext, settings, createParser(planContext, settings)); + return new UnifiedQueryContext( + planContext, settings, createParser(planContext, settings), langSpec); } private UnifiedQueryParser createParser(CalcitePlanContext planContext, Settings settings) { @@ -236,22 +246,22 @@ public List getSettings() { } @SuppressWarnings({"rawtypes"}) - private FrameworkConfig buildFrameworkConfig() { + private FrameworkConfig buildFrameworkConfig(LanguageSpec langSpec) { SchemaPlus rootSchema = CalciteSchema.createRootSchema(true, cacheMetadata).plus(); catalogs.forEach(rootSchema::add); SchemaPlus defaultSchema = findSchemaByPath(rootSchema, defaultNamespace); - return Frameworks.newConfigBuilder() - .parserConfig(buildParserConfig()) - .defaultSchema(defaultSchema) - .traitDefs((List) null) - .programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE)) - .build(); - } + Frameworks.ConfigBuilder builder = + Frameworks.newConfigBuilder() + .defaultSchema(defaultSchema) + .traitDefs((List) null) + .programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE)); - private SqlParser.Config buildParserConfig() { - // Preserve identifier case for lowercase OpenSearch index names - return SqlParser.Config.DEFAULT.withUnquotedCasing(Casing.UNCHANGED); + return builder + .parserConfig(langSpec.parserConfig()) + .sqlValidatorConfig(langSpec.validatorConfig()) + .operatorTable(langSpec.operatorTable()) + .build(); } private SchemaPlus findSchemaByPath(SchemaPlus rootSchema, String defaultPath) { diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java index af4d9f518ad..edf9ae50e18 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java @@ -14,7 +14,9 @@ import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.util.SqlVisitor; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Planner; import org.opensearch.sql.api.parser.UnifiedQueryParser; @@ -59,8 +61,7 @@ public UnifiedQueryPlanner(UnifiedQueryContext context) { public RelNode plan(String query) { try { return context.measure(ANALYZE, () -> strategy.plan(query)); - } catch (SyntaxCheckException e) { - // Re-throw syntax error without wrapping + } catch (SyntaxCheckException | UnsupportedOperationException e) { throw e; } catch (Exception e) { throw new IllegalStateException("Failed to plan query", e); @@ -81,7 +82,18 @@ private static class CalciteNativeStrategy implements PlanningStrategy { public RelNode plan(String query) throws Exception { try (Planner planner = Frameworks.getPlanner(context.getPlanContext().config)) { SqlNode parsed = planner.parse(query); - SqlNode validated = planner.validate(parsed); + if (!parsed.isA(SqlKind.QUERY)) { + throw new UnsupportedOperationException( + "Only query statements are supported. Got: " + parsed.getKind()); + } + + // TODO: move post-parse rewriting into CalciteSqlQueryParser + SqlNode rewritten = parsed; + for (SqlVisitor visitor : context.getLangSpec().postParseRules()) { + rewritten = rewritten.accept(visitor); + } + + SqlNode validated = planner.validate(rewritten); RelRoot relRoot = planner.rel(validated); return relRoot.project(); } diff --git a/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java new file mode 100644 index 00000000000..89167dc27a5 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java @@ -0,0 +1,89 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import java.util.ArrayList; +import java.util.List; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.util.SqlOperatorTables; +import org.apache.calcite.sql.util.SqlVisitor; +import org.apache.calcite.sql.validate.SqlValidator; + +/** + * Language specification defining the dialect the engine accepts. Provides parser configuration, + * validator configuration, and composable {@link LanguageExtension}s that contribute operators and + * post-parse rewrite rules. + * + *

Implementations define a complete language surface — for example, {@link UnifiedSqlSpec} + * provides ANSI and extended SQL modes. A future PPL spec would implement this same interface once + * PPL converges on the Calcite pipeline. + */ +public interface LanguageSpec { + + /** + * A composable language extension that contributes operators and post-parse rewrite rules. All + * methods have defaults so extensions only override what they need. + */ + interface LanguageExtension { + + /** + * Operators (functions, aggregates) this extension adds. Chained with the standard operator + * table during validation. + */ + default SqlOperatorTable operators() { + return SqlOperatorTables.of(); + } + + /** + * AST rewrite rules applied after parsing and before validation. Each visitor transforms the + * parse tree (e.g., rewriting named arguments into MAP literals). + */ + default List> postParseRules() { + return List.of(); + } + } + + /** + * Parser configuration controlling how SQL text is tokenized and parsed into a parse tree, + * including parser factory, lexical rules, and conformance. + */ + SqlParser.Config parserConfig(); + + /** + * Validator configuration controlling what SQL semantics the validator accepts, such as GROUP BY + * behavior, LIMIT syntax, and type coercion. + */ + SqlValidator.Config validatorConfig(); + + /** + * Language extensions registered with this spec. Each extension contributes operators and + * post-parse rewrite rules that are composed by {@link #operatorTable()} and {@link + * #postParseRules()}. + */ + List extensions(); + + /** + * Chained operator table combining the standard Calcite operators with all operators contributed + * by registered extensions. + */ + default SqlOperatorTable operatorTable() { + List tables = new ArrayList<>(); + tables.add(SqlStdOperatorTable.instance()); + extensions().forEach(ext -> tables.add(ext.operators())); + return SqlOperatorTables.chain(tables); + } + + /** + * All post-parse rewrite rules from registered extensions, flattened in registration order. + * Applied to the parse tree after parsing and before validation. + */ + default List> postParseRules() { + return extensions().stream().flatMap(ext -> ext.postParseRules().stream()).toList(); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java new file mode 100644 index 00000000000..f60fc61a50c --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java @@ -0,0 +1,171 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN; + +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import lombok.AccessLevel; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.SqlOperandCountRanges; +import org.apache.calcite.sql.type.SqlOperandMetadata; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.util.SqlOperatorTables; +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; + +/** + * Declarative registry of language-level functions for the unified query engine. Functions defined + * here are part of the language spec — always resolvable regardless of the underlying data source. + * They are grouped into {@link Category categories} that callers chain into Calcite's operator + * table. Data-source capability is enforced at optimization time by pushdown rules. + */ +@Getter +@ToString(of = "funcName") +@EqualsAndHashCode(of = "funcName") +@RequiredArgsConstructor(access = AccessLevel.PRIVATE) +public final class UnifiedFunctionSpec { + + /** Function name as registered in the operator table (e.g., "match", "multi_match"). */ + private final String funcName; + + /** Calcite operator for chaining into the framework config's operator table. */ + private final SqlOperator operator; + + /** Full-text search functions. */ + public static final Category RELEVANCE = + new Category( + List.of( + function("match").vararg("field", "query").returnType(BOOLEAN).build(), + function("match_phrase").vararg("field", "query").returnType(BOOLEAN).build(), + function("match_bool_prefix").vararg("field", "query").returnType(BOOLEAN).build(), + function("match_phrase_prefix").vararg("field", "query").returnType(BOOLEAN).build(), + function("multi_match").vararg("fields", "query").returnType(BOOLEAN).build(), + function("simple_query_string").vararg("fields", "query").returnType(BOOLEAN).build(), + function("query_string").vararg("fields", "query").returnType(BOOLEAN).build())); + + /** All registered function specs, keyed by function name. */ + private static final Map ALL_SPECS = + Stream.of(RELEVANCE) + .flatMap(c -> c.specs().stream()) + .collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s)); + + /** + * Looks up a function spec by name across all categories. + * + * @param name function name (case-insensitive) + * @return the spec, or empty if not found + */ + public static Optional of(String name) { + return Optional.ofNullable(ALL_SPECS.get(name.toLowerCase())); + } + + /** + * @return required param names from {@link SqlOperandMetadata}, or empty if not available. + */ + public List getParamNames() { + return operator.getOperandTypeChecker() instanceof SqlOperandMetadata metadata + ? metadata.paramNames() + : List.of(); + } + + /** A group of function specs that can be chained into Calcite's operator table. */ + public record Category(List specs) { + public SqlOperatorTable operatorTable() { + return SqlOperatorTables.of(specs.stream().map(UnifiedFunctionSpec::getOperator).toList()); + } + + /** Returns true if this category contains the given spec. */ + public boolean contains(UnifiedFunctionSpec spec) { + return specs.contains(spec); + } + } + + public static Builder function(String name) { + return new Builder(name); + } + + /** Fluent builder for function specs. */ + @RequiredArgsConstructor(access = AccessLevel.PRIVATE) + public static class Builder { + private final String funcName; + private List paramNames = List.of(); + private SqlReturnTypeInference returnType; + + public Builder vararg(String... names) { + this.paramNames = List.of(names); + return this; + } + + public Builder returnType(SqlReturnTypeInference type) { + this.returnType = type; + return this; + } + + public UnifiedFunctionSpec build() { + Objects.requireNonNull(returnType, "returnType is required"); + return new UnifiedFunctionSpec( + funcName, + new SqlUserDefinedFunction( + new SqlIdentifier(funcName, SqlParserPos.ZERO), + SqlKind.OTHER_FUNCTION, + returnType, + InferTypes.ANY_NULLABLE, + new VariadicOperandMetadata(paramNames), + List::of)); // Pushdown-only: no local implementation + } + } + + /** + * Custom operand metadata that bypasses Calcite's built-in type checking. Calcite's {@code + * FamilyOperandTypeChecker} rejects variadic calls (CALCITE-5366), so this implementation accepts + * any operand types and delegates validation to pushdown. + */ + private record VariadicOperandMetadata(List paramNames) implements SqlOperandMetadata { + + @Override + public List paramNames() { + return paramNames; + } + + @Override + public List paramTypes(RelDataTypeFactory tf) { + return List.of(); + } + + @Override + public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) { + return true; // Bypass: CALCITE-5366 breaks optional argument type checking + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.from(paramNames.size()); + } + + @Override + public String getAllowedSignatures(SqlOperator op, String opName) { + return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])"; + } + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedPplSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedPplSpec.java new file mode 100644 index 00000000000..763f6ded540 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedPplSpec.java @@ -0,0 +1,42 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import java.util.List; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.validate.SqlValidator; + +/** + * PPL language specification. + * + *

Note: PPL currently has its own parsing and analyzing pipeline, so only configuration and + * extensions applied after RelNode construction are in use. The parser and validator configs + * returned here are inert for the PPL path. + */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class UnifiedPplSpec implements LanguageSpec { + + public static UnifiedPplSpec create() { + return new UnifiedPplSpec(); + } + + @Override + public SqlParser.Config parserConfig() { + return SqlParser.config(); + } + + @Override + public SqlValidator.Config validatorConfig() { + return SqlValidator.Config.DEFAULT; + } + + @Override + public List extensions() { + return List.of(); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java new file mode 100644 index 00000000000..a5433f015fa --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java @@ -0,0 +1,68 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import java.util.List; +import lombok.AccessLevel; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.experimental.Accessors; +import org.apache.calcite.config.Lex; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.parser.SqlParserImplFactory; +import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl; +import org.apache.calcite.sql.validate.SqlConformanceEnum; +import org.apache.calcite.sql.validate.SqlValidator; +import org.opensearch.sql.api.spec.search.SearchExtension; + +/** + * SQL language specification. Configures Calcite's parser, validator, and composable extensions for + * OpenSearch SQL compatibility. + * + *

Use {@link #extended()} for the default configuration with lenient syntax, hyphenated + * identifiers, and search functions. + */ +@RequiredArgsConstructor(access = AccessLevel.PRIVATE) +@Accessors(fluent = true) +public class UnifiedSqlSpec implements LanguageSpec { + + /** Lexical rules: identifier quoting, character escaping, and special identifier support. */ + private final Lex lex; + + /** Parser implementation: controls keyword reservation and grammar extensions. */ + private final SqlParserImplFactory parserFactory; + + /** Validation rules: what SQL semantics the validator accepts (GROUP BY, LIMIT, coercion). */ + private final SqlConformanceEnum conformance; + + /** Composable extensions contributing operators and post-parse rewrite rules. */ + @Getter private final List extensions; + + /** + * Extended SQL spec: Babel parser, BIG_QUERY lex (hyphenated identifiers, backtick quoting), + * BABEL conformance (lenient GROUP BY, LIMIT, optional FROM), and search functions. + */ + public static UnifiedSqlSpec extended() { + return new UnifiedSqlSpec( + Lex.BIG_QUERY, + SqlBabelParserImpl.FACTORY, + SqlConformanceEnum.BABEL, + List.of(new SearchExtension())); + } + + @Override + public SqlParser.Config parserConfig() { + return SqlParser.config() + .withParserFactory(parserFactory) + .withLex(lex) + .withConformance(conformance); + } + + @Override + public SqlValidator.Config validatorConfig() { + return SqlValidator.Config.DEFAULT.withConformance(conformance); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/search/NamedArgRewriter.java b/api/src/main/java/org/opensearch/sql/api/spec/search/NamedArgRewriter.java new file mode 100644 index 00000000000..8627a76f2cf --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/search/NamedArgRewriter.java @@ -0,0 +1,75 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.search; + +import java.util.List; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.util.SqlShuttle; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; + +/** + * Pre-validation rewriter for backward compatibility with non-standard named-argument syntax (e.g., + * {@code operator='AND'} instead of {@code operator => 'AND'}). Normalizes relevance function calls + * into MAP-based form so SQL and PPL paths produce identical query plans for pushdown rules. + * + *

This rewriter is subject to removal if we adopt standard SQL named-argument syntax. + */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public final class NamedArgRewriter extends SqlShuttle { + + public static final NamedArgRewriter INSTANCE = new NamedArgRewriter(); + + @Override + public @Nullable SqlNode visit(SqlCall call) { + SqlCall visited = (SqlCall) super.visit(call); + return UnifiedFunctionSpec.of(visited.getOperator().getName()) + .filter(UnifiedFunctionSpec.RELEVANCE::contains) + .map(spec -> (SqlNode) rewriteToMaps(visited, spec.getParamNames())) + .orElse(visited); + } + + /** + * Rewrites each argument into a MAP entry. For match(name, 'John', operator='AND'): + *

  • Positional arg: name → MAP('field', name) + *
  • Named arg: operator='AND' → MAP('operator', 'AND') + */ + private static SqlCall rewriteToMaps(SqlCall call, List paramNames) { + List operands = call.getOperandList(); + SqlNode[] maps = new SqlNode[operands.size()]; + for (int i = 0; i < operands.size(); i++) { + SqlNode op = operands.get(i); + if (op instanceof SqlCall eq && op.getKind() == SqlKind.EQUALS) { + SqlNode key = eq.operand(0); + String name = + key instanceof SqlIdentifier ident + ? ident.getSimple() + : key.toString(); // avoid backtick-decorated keys for reserved words + maps[i] = toMap(name, eq.operand(1)); + } else { + if (i >= paramNames.size()) { + throw new IllegalArgumentException( + String.format("Invalid arguments for function '%s'", call.getOperator().getName())); + } + maps[i] = toMap(paramNames.get(i), op); + } + } + return call.getOperator().createCall(call.getParserPosition(), maps); + } + + private static SqlNode toMap(String key, SqlNode value) { + return SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR.createCall( + SqlParserPos.ZERO, SqlLiteral.createCharString(key, SqlParserPos.ZERO), value); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/search/SearchExtension.java b/api/src/main/java/org/opensearch/sql/api/spec/search/SearchExtension.java new file mode 100644 index 00000000000..159560067c5 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/search/SearchExtension.java @@ -0,0 +1,27 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.search; + +import java.util.List; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.calcite.sql.util.SqlVisitor; +import org.opensearch.sql.api.spec.LanguageSpec; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; + +/** Search Extension: relevance functions and named argument rewriting. */ +public class SearchExtension implements LanguageSpec.LanguageExtension { + + @Override + public SqlOperatorTable operators() { + return UnifiedFunctionSpec.RELEVANCE.operatorTable(); + } + + @Override + public List> postParseRules() { + return List.of(NamedArgRewriter.INSTANCE); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlTest.java index 53accd49715..855d3d2788d 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlTest.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlTest.java @@ -8,6 +8,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThrows; +import java.util.List; import java.util.Map; import org.apache.calcite.schema.Schema; import org.apache.calcite.schema.impl.AbstractSchema; @@ -211,4 +212,49 @@ public void testSqlQueryPlanningWithMultipleCatalogs() { public void testInvalidSqlThrowsException() { assertThrows(IllegalStateException.class, () -> planner.plan("SELECT FROM")); } + + @Test + public void testNonQueryStatementsBlockedByWhitelist() { + List.of( + """ + INSERT INTO catalog.employees (id, name, age, department) + VALUES (99, 'injected', 0, 'hacked')\ + """, + """ + DELETE FROM catalog.employees + WHERE age > 30\ + """, + """ + UPDATE catalog.employees + SET department = 'Fired' + WHERE age > 50\ + """, + """ + EXPLAIN PLAN FOR + SELECT * FROM catalog.employees\ + """, + """ + MERGE INTO catalog.employees AS t + USING (SELECT 99 AS id) AS s ON t.id = s.id + WHEN MATCHED THEN UPDATE SET name = 'hacked'\ + """, + """ + SHOW TABLES\ + """) + .forEach( + sql -> + givenInvalidQuery(sql).assertErrorMessage("Only query statements are supported")); + } + + @Test + public void testNonQueryStatementsBlockedByParser() { + givenInvalidQuery( + """ + CREATE MATERIALIZED VIEW mv AS + SELECT department, count(*) + FROM catalog.employees + GROUP BY department\ + """) + .assertErrorMessage("Encountered"); + } } diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java index 9ad7aa42155..41ed12670f8 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java @@ -106,7 +106,7 @@ public void testPPLQueryPlanningWithMultipleCatalogsAndDefaultNamespace() { assertNotNull("Plan should be created with multiple catalogs", plan); } - @Test(expected = IllegalStateException.class) + @Test(expected = UnsupportedOperationException.class) public void testUnsupportedStatementType() { planner.plan("explain source = catalog.employees"); // explain statement } diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java new file mode 100644 index 00000000000..66df9c2e075 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java @@ -0,0 +1,187 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import org.junit.Test; +import org.opensearch.sql.executor.QueryType; + +/** + * Tests for relevance search functions in SQL planning path using V2/PPL syntax. Mirrors the PPL + * tests in {@link UnifiedRelevanceSearchTest} with equivalent SQL queries. Both paths produce + * identical MAP-based plans for pushdown rules. + */ +public class UnifiedRelevanceSearchSqlTest extends UnifiedQueryTestBase { + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Test + public void testMatch() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match(name, 'John')\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) + LogicalFilter(condition=[match(MAP('field', $1), MAP('query', 'John'))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMatchPhrase() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match_phrase(name, 'John Doe')\ + """) + .assertPlanContains("match_phrase(MAP('field', $1), MAP('query', 'John Doe'))"); + } + + @Test + public void testMatchBoolPrefix() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match_bool_prefix(name, 'John')\ + """) + .assertPlanContains("match_bool_prefix(MAP('field', $1), MAP('query', 'John'))"); + } + + @Test + public void testMatchPhrasePrefix() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match_phrase_prefix(name, 'John')\ + """) + .assertPlanContains("match_phrase_prefix(MAP('field', $1), MAP('query', 'John'))"); + } + + @Test + public void testMultiMatch() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE multi_match(name, 'John')\ + """) + .assertPlanContains("multi_match(MAP('fields', $1), MAP('query', 'John'))"); + } + + @Test + public void testSimpleQueryString() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE simple_query_string(name, 'John')\ + """) + .assertPlanContains("simple_query_string(MAP('fields', $1), MAP('query', 'John'))"); + } + + @Test + public void testQueryString() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE query_string(name, 'John')\ + """) + .assertPlanContains("query_string(MAP('fields', $1), MAP('query', 'John'))"); + } + + @Test + public void testMatchWithOptions() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match(name, 'John', operator='AND', boost=2.0)\ + """) + .assertPlanContains( + "match(MAP('field', $1), MAP('query', 'John')," + + " MAP('operator', 'AND'), MAP('boost', 2.0:DECIMAL(2, 1)))"); + } + + @Test + public void testMatchMissingArguments() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE match('John')\ + """) + .assertErrorMessage( + "No match found for function signature match(<(CHAR(5), CHAR(4)) MAP>)"); + } + + @Test + public void testUnknownRelevanceFunction() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE unknown_relevance(name, 'John')\ + """) + .assertErrorMessage( + "No match found for function signature unknown_relevance(, )"); + } + + @Test + public void testNonRelevanceFunctionUnaffectedByRewriter() { + givenQuery( + """ + SELECT upper(name) FROM catalog.employees\ + """) + .assertPlan( + """ + LogicalProject(EXPR$0=[UPPER($1)]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + // FIXME: Calcite's SQL parser does not support V2 bracket field list syntax ['field1', 'field2']. + // Multi-field relevance functions only accept a single column reference in the Calcite SQL path. + + @Test + public void testMultiMatchBracketSyntaxNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE multi_match(['name', 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } + + @Test + public void testMultiMatchFieldBoostNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE multi_match(['name' ^ 2.0, 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } + + @Test + public void testSimpleQueryStringBracketSyntaxNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE simple_query_string(['name', 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } + + @Test + public void testQueryStringBracketSyntaxNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE query_string(['name', 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java new file mode 100644 index 00000000000..a80ae190868 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java @@ -0,0 +1,78 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import org.junit.Test; + +/** Tests for relevance search functions in PPL planning path. */ +public class UnifiedRelevanceSearchTest extends UnifiedQueryTestBase { + + @Test + public void testMatch() { + givenQuery("source=catalog.employees | where match(name, 'John')") + .assertPlan( + """ + LogicalFilter(condition=[match(MAP('field', $1), MAP('query', 'John':VARCHAR))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMatchPhrase() { + givenQuery("source=catalog.employees | where match_phrase(name, 'John Doe')") + .assertPlanContains("match_phrase(MAP('field', $1), MAP('query', 'John Doe':VARCHAR))"); + } + + @Test + public void testMatchBoolPrefix() { + givenQuery("source=catalog.employees | where match_bool_prefix(name, 'John')") + .assertPlanContains("match_bool_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testMatchPhrasePrefix() { + givenQuery("source=catalog.employees | where match_phrase_prefix(name, 'John')") + .assertPlanContains("match_phrase_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testMultiMatch() { + givenQuery("source=catalog.employees | where multi_match(['name', 'department'], 'John')") + .assertPlanContains( + "multi_match(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE," + + " 'department':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testSimpleQueryString() { + givenQuery("source=catalog.employees | where simple_query_string(['name'], 'John')") + .assertPlanContains( + "simple_query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE))," + + " MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testQueryString() { + givenQuery("source=catalog.employees | where query_string(['name'], 'John')") + .assertPlanContains( + "query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE))," + + " MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testMatchMissingArguments() { + givenInvalidQuery("source=catalog.employees | where match('John')") + .assertErrorMessage( + "[)] is not a valid term at this part of the query:" + + " '...| where match('John')' <-- HERE. Expecting tokens: ','"); + } + + @Test + public void testUnknownRelevanceFunction() { + givenInvalidQuery("source=catalog.employees | where unknown_relevance(name, 'John')") + .assertErrorMessage("[(] is not a valid term at this part of the query"); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedSqlSpecTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedSqlSpecTest.java new file mode 100644 index 00000000000..97ddd07d0ac --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedSqlSpecTest.java @@ -0,0 +1,118 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import java.util.Map; +import org.apache.calcite.schema.Table; +import org.apache.calcite.schema.impl.AbstractSchema; +import org.junit.Test; +import org.opensearch.sql.executor.QueryType; + +public class UnifiedSqlSpecTest extends UnifiedQueryTestBase { + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Override + protected UnifiedQueryContext.Builder contextBuilder() { + AbstractSchema schema = + new AbstractSchema() { + @Override + protected Map getTableMap() { + return Map.of( + "employees", createEmployeesTable(), + "logs-2024-01", createEmployeesTable()); + } + }; + return UnifiedQueryContext.builder() + .language(queryType()) + .catalog(DEFAULT_CATALOG, schema) + .defaultNamespace(DEFAULT_CATALOG); + } + + @Test + public void hyphenatedTableIdentifier() { + givenQuery("SELECT * FROM logs-2024-01") + .assertPlanContains("LogicalTableScan(table=[[catalog, logs-2024-01]])"); + } + + @Test + public void backtickQuotedIdentifiers() { + givenQuery("SELECT `name` FROM employees").assertPlanContains("LogicalProject(name=[$1])"); + } + + @Test + public void doubleQuotedStringLiteral() { + givenQuery("SELECT \"Hello\" AS greeting FROM employees") + .assertPlanContains("LogicalProject(greeting=['Hello'])"); + } + + @Test + public void matchNotReserved() { + givenQuery("SELECT * FROM employees WHERE match(name, 'Hattie')") + .assertPlanContains("match(MAP('field', $1), MAP('query', 'Hattie'))"); + } + + @Test + public void reservedWordAsAlias() { + givenQuery("SELECT age AS year FROM employees").assertPlanContains("LogicalProject(year=[$2])"); + } + + @Test + public void limitSyntax() { + givenQuery("SELECT * FROM employees LIMIT 10").assertPlanContains("LogicalSort(fetch=[10])"); + } + + @Test + public void selectWithoutFrom() { + givenQuery("SELECT 1").assertPlanContains("LogicalValues(tuples=[[{ 1 }]])"); + } + + @Test + public void groupByAlias() { + givenQuery("SELECT department AS dept, COUNT(*) AS cnt FROM employees GROUP BY dept") + .assertPlanContains("LogicalAggregate(group=[{0}]"); + } + + @Test + public void groupByOrdinal() { + givenQuery("SELECT name, COUNT(*) AS cnt FROM employees GROUP BY 1") + .assertPlanContains("LogicalAggregate(group=[{0}], cnt=[COUNT()])") + .assertPlanContains("LogicalProject(name=[$1])"); + } + + @Test + public void castBooleanToInteger() { + givenQuery("SELECT CAST(true AS INTEGER) AS val FROM employees") + .assertPlanContains("LogicalProject(val=[1])"); + } + + @Test + public void integerComparedToString() { + givenQuery("SELECT * FROM employees WHERE age > '30'") + .assertPlanContains("condition=[>($2, CAST('30'):INTEGER NOT NULL)]"); + } + + @Test + public void matchFunction() { + givenQuery("SELECT * FROM employees WHERE match(name, 'John')") + .assertPlanContains("match(MAP('field', $1), MAP('query', 'John'))"); + } + + @Test + public void matchPhraseFunction() { + givenQuery("SELECT * FROM employees WHERE match_phrase(name, 'quick fox')") + .assertPlanContains("match_phrase(MAP('field', $1), MAP('query', 'quick fox'))"); + } + + @Test + public void namedParametersSyntax() { + givenQuery("SELECT * FROM employees WHERE match_phrase(name, 'quick fox', slop=2)") + .assertPlanContains("match_phrase(MAP('field', $1), MAP('query', 'quick fox')"); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/spec/search/NamedArgRewriterTest.java b/api/src/test/java/org/opensearch/sql/api/spec/search/NamedArgRewriterTest.java new file mode 100644 index 00000000000..52395865548 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/spec/search/NamedArgRewriterTest.java @@ -0,0 +1,108 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.search; + +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import org.apache.calcite.avatica.util.Casing; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.junit.Test; + +/** Unit tests for {@link NamedArgRewriter}. */ +public class NamedArgRewriterTest { + + /** Match production parser config in UnifiedQueryContext. */ + private static final SqlParser.Config PARSER_CONFIG = + SqlParser.Config.DEFAULT.withUnquotedCasing(Casing.UNCHANGED); + + @Test + public void testPositionalArgsRewrittenToMaps() throws Exception { + SqlNode result = rewrite("SELECT * FROM t WHERE \"match\"(name, 'John')"); + assertContains(result, "MAP['field', `name`], MAP['query', 'John']"); + } + + @Test + public void testEqualsArgRewrittenToMap() throws Exception { + SqlNode result = rewrite("SELECT * FROM t WHERE \"match\"(name, 'John', operator='AND')"); + assertContains(result, "MAP['query', 'John'], MAP['operator', 'AND']"); + } + + @Test + public void testMultipleEqualsArgs() throws Exception { + SqlNode result = + rewrite("SELECT * FROM t WHERE \"match\"(name, 'John', operator='AND', boost=2.0)"); + assertContains(result, "MAP['operator', 'AND'], MAP['boost', 2.0]"); + } + + @Test + public void testMultiMatchUsesFieldsParamName() throws Exception { + SqlNode result = rewrite("SELECT * FROM t WHERE multi_match(name, 'John')"); + assertContains(result, "MAP['fields', `name`], MAP['query', 'John']"); + } + + @Test + public void testNonRelevanceFunctionUntouched() throws Exception { + SqlNode parsed = parse("SELECT upper(name) FROM t"); + SqlNode result = parsed.accept(NamedArgRewriter.INSTANCE); + assertSame(parsed, result); + } + + @Test + public void testAllEqualsArgsNoPositional() throws Exception { + // Not valid V2 match syntax, but multi_match supports this form. + // Shuttle treats all = as named options — no positional wrapping. + SqlNode result = rewrite("SELECT * FROM t WHERE multi_match(fields=name, query='John')"); + assertContains(result, "MAP['fields', `name`], MAP['query', 'John']"); + } + + @Test + public void testReservedWordAsNamedArgKey() throws Exception { + // 'escape' is a SQL reserved word and a valid query_string parameter. + // getSimple() must be used instead of toString() to avoid backtick-decorated keys. + SqlNode result = rewrite("SELECT * FROM t WHERE query_string(name, 'test*', \"escape\"=true)"); + assertContains(result, "MAP['escape', TRUE]"); + } + + @Test + public void testEqualsBeforePositionalThrows() throws Exception { + // Not valid V2 syntax — positional must come first. + // = at index 0 goes to EQUALS branch, but remaining positional args exceed paramNames. + try { + rewrite("SELECT * FROM t WHERE \"match\"(operator='AND', name, 'John')"); + fail("Expected IllegalArgumentException for mixed order"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Invalid arguments for function")); + } + } + + @Test + public void testExtraPositionalArgsBeyondParamNamesThrows() throws Exception { + // match has 2 param names (field, query); 3 positional args causes IndexOutOfBounds + try { + rewrite("SELECT * FROM t WHERE \"match\"(a, b, c)"); + fail("Expected IllegalArgumentException for extra positional args"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Invalid arguments for function")); + } + } + + private static SqlNode rewrite(String sql) throws Exception { + return parse(sql).accept(NamedArgRewriter.INSTANCE); + } + + private static SqlNode parse(String sql) throws Exception { + return SqlParser.create(sql, PARSER_CONFIG).parseStmt(); + } + + private static void assertContains(SqlNode node, String expected) { + String actual = node.toString().replaceAll("\\n", " "); + assertTrue( + "Expected to contain: " + expected + "\nActual: " + actual, actual.contains(expected)); + } +} diff --git a/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java b/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java index eaaaccbdbf8..42df6c5a7ee 100644 --- a/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java +++ b/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java @@ -8,6 +8,7 @@ import static org.apache.calcite.sql.type.SqlTypeName.INTEGER; import static org.apache.calcite.sql.type.SqlTypeName.VARCHAR; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.util.List; import java.util.Map; @@ -148,6 +149,37 @@ protected QueryAssert givenQuery(String query) { return new QueryAssert(planner.plan(query)); } + /** Fluent helper for asserting query planning errors. */ + protected QueryErrorAssert givenInvalidQuery(String query) { + try { + planner.plan(query); + throw new AssertionError("Expected query to fail: " + query); + } catch (Exception e) { + return new QueryErrorAssert(e); + } + } + + /** Fluent assertion on a query planning error. */ + protected static class QueryErrorAssert { + private final Exception error; + + QueryErrorAssert(Exception error) { + this.error = error; + } + + /** Assert the root cause error message contains the expected substring. */ + public QueryErrorAssert assertErrorMessage(String expected) { + Throwable cause = error; + while (cause.getCause() != null) { + cause = cause.getCause(); + } + String msg = cause.getMessage() != null ? cause.getMessage() : cause.getClass().getName(); + assertTrue( + "Expected error to contain: " + expected + "\nActual: " + msg, msg.contains(expected)); + return this; + } + } + /** Fluent assertion on a query's logical plan. */ protected static class QueryAssert { private final RelNode plan; @@ -164,6 +196,15 @@ public QueryAssert assertPlan(String expected) { return this; } + /** Assert the logical plan contains the expected substring. */ + public QueryAssert assertPlanContains(String expected) { + String planStr = RelOptUtil.toString(plan).replaceAll("\\r\\n", "\n"); + assertTrue( + "Expected plan to contain: " + expected + "\nActual plan:\n" + planStr, + planStr.contains(expected)); + return this; + } + /** Assert the output field names match. */ public QueryAssert assertFields(String... names) { assertEquals(List.of(names), plan.getRowType().getFieldNames()); diff --git a/common/build.gradle b/common/build.gradle index d839466f886..233eb30d797 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -32,6 +32,15 @@ repositories { mavenCentral() } +test { + maxParallelForks = Runtime.runtime.availableProcessors() + useJUnitPlatform() + testLogging { + events "passed", "skipped", "failed" + exceptionFormat "full" + } +} + dependencies { api "org.antlr:antlr4-runtime:4.13.2" api group: 'com.google.guava', name: 'guava', version: "${guava_version}" @@ -52,6 +61,8 @@ dependencies { testImplementation group: 'org.mockito', name: 'mockito-core', version: "${mockito_version}" testImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: "${mockito_version}" testImplementation group: 'com.squareup.okhttp3', name: 'mockwebserver', version: '4.12.0' + + testRuntimeOnly('org.junit.platform:junit-platform-launcher') } diff --git a/common/src/main/java/org/opensearch/sql/common/error/ErrorCode.java b/common/src/main/java/org/opensearch/sql/common/error/ErrorCode.java new file mode 100644 index 00000000000..c86acd0d4e5 --- /dev/null +++ b/common/src/main/java/org/opensearch/sql/common/error/ErrorCode.java @@ -0,0 +1,57 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.common.error; + +/** + * Machine-readable error codes for categorizing exceptions. These codes help clients handle + * specific error types programmatically.
    + *
    + * Not a complete list, currently seeded with some initial values. Feel free to add variants or + * remove dead variants over time. + */ +public enum ErrorCode { + /** Field not found in the index mapping */ + FIELD_NOT_FOUND, + + /** Syntax error in query parsing */ + SYNTAX_ERROR, + + /** Ambiguous field reference (multiple fields with same name) */ + AMBIGUOUS_FIELD, + + /** Generic semantic validation error */ + SEMANTIC_ERROR, + + /** Expression evaluation failed */ + EVALUATION_ERROR, + + /** Type mismatch or type validation error */ + TYPE_ERROR, + + /** Unsupported feature or operation */ + UNSUPPORTED_OPERATION, + + /** Resource limit exceeded (memory, CPU, etc.) */ + RESOURCE_LIMIT_EXCEEDED, + + /** Index or datasource not found */ + INDEX_NOT_FOUND, + + /** Permission denied or insufficient privileges */ + PERMISSION_DENIED, + + /** Query planning failed */ + PLANNING_ERROR, + + /** Query execution failed */ + EXECUTION_ERROR, + + /** + * Unknown or unclassified error -- don't set this manually, it's filled in as the default if no + * other code applies. + */ + UNKNOWN +} diff --git a/common/src/main/java/org/opensearch/sql/common/error/ErrorReport.java b/common/src/main/java/org/opensearch/sql/common/error/ErrorReport.java new file mode 100644 index 00000000000..1430af5ed16 --- /dev/null +++ b/common/src/main/java/org/opensearch/sql/common/error/ErrorReport.java @@ -0,0 +1,282 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.common.error; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import lombok.Getter; + +/** + * Error report that wraps exceptions and accumulates contextual information as errors bubble up + * through system layers. + * + *

    Inspired by Rust's anyhow/eyre libraries, this class allows each layer to add context without + * modifying the original exception message. + * + *

    Example usage: + * + *

    + * try {
    + *   resolveField(fieldName);
    + * } catch (IllegalArgumentException e) {
    + *   throw ErrorReport.wrap(e)
    + *     .code(ErrorCode.FIELD_NOT_FOUND)
    + *     .stage(QueryProcessingStage.ANALYZING)
    + *     .location("while resolving fields in the index mapping")
    + *     .suggestion("Did you mean: '" + suggestedField + "'?")
    + *     .context("index_pattern", indexPattern)
    + *     .context("position", cursorPosition)
    + *     .build();
    + * }
    + * 
    + */ +public class ErrorReport extends RuntimeException { + + @Getter private final Exception cause; + @Getter private final ErrorCode code; + @Getter private final QueryProcessingStage stage; + private final List locationChain; + private final Map context; + @Getter private final String suggestion; + @Getter private final String details; + + private ErrorReport(Builder builder) { + super(builder.cause.getMessage(), builder.cause); + this.cause = builder.cause; + this.code = builder.code; + this.stage = builder.stage; + this.locationChain = new ArrayList<>(builder.locationChain); + this.context = new LinkedHashMap<>(builder.context); + this.suggestion = builder.suggestion; + this.details = builder.details; + } + + /** + * Wraps an exception with an error report builder. If the exception is already an ErrorReport, + * returns a builder initialized with the existing report's data. + * + * @param cause The underlying exception + * @return A builder for constructing the error report + */ + public static Builder wrap(Exception cause) { + if (cause instanceof ErrorReport existing) { + return new Builder(existing.cause) + .code(existing.code) + .stage(existing.stage) + .details(existing.details) + .suggestion(existing.suggestion) + .addLocationChain(existing.locationChain) + .addContext(existing.context); + } + return new Builder(cause); + } + + public List getLocationChain() { + return new ArrayList<>(locationChain); + } + + public Map getContext() { + return new LinkedHashMap<>(context); + } + + /** Get the original exception type name. */ + public String getExceptionType() { + return cause.getClass().getSimpleName(); + } + + /** + * Format as a detailed message with all context information. This is suitable for logging or + * detailed error displays. + */ + public String toDetailedMessage() { + StringBuilder sb = new StringBuilder(); + + sb.append("Error"); + if (code != null && code != ErrorCode.UNKNOWN) { + sb.append(" [").append(code).append("]"); + } + if (stage != null) { + sb.append(" at stage: ").append(stage.getDisplayName()); + } + sb.append("\n"); + + if (details != null) { + sb.append("Details: ").append(details).append("\n"); + } + + if (!locationChain.isEmpty()) { + sb.append("\nLocation chain:\n"); + for (int i = 0; i < locationChain.size(); i++) { + // The location chain is typically appended to as we traverse up the stack, but for reading + // the error it makes more sense to go down the stack. So we reverse it. + sb.append(" ") + .append(i + 1) + .append(". ") + .append(locationChain.get(locationChain.size() - i - 1)) + .append("\n"); + } + } + + if (!context.isEmpty()) { + sb.append("\nContext:\n"); + context.forEach( + (key, value) -> sb.append(" ").append(key).append(": ").append(value).append("\n")); + } + + if (suggestion != null) { + sb.append("\nSuggestion: ").append(suggestion).append("\n"); + } + + return sb.toString(); + } + + /** + * Convert to JSON-compatible map structure for REST API responses. + * + * @return Map containing error information in structured format + */ + public Map toJsonMap() { + Map json = new LinkedHashMap<>(); + + json.put("type", getExceptionType()); + + if (code != null) { + json.put("code", code.name()); + } + + if (details != null) { + json.put("details", details); + } + + if (!locationChain.isEmpty()) { + // The location chain is typically appended to as we traverse up the stack, but for reading + // the error it makes more sense to go down the stack. So we reverse it. + json.put("location", locationChain.reversed()); + } + + // Build context with stage information included + Map contextMap = new LinkedHashMap<>(context); + if (stage != null) { + contextMap.put("stage", stage.toJsonKey()); + contextMap.put("stage_description", stage.getDisplayName()); + } + if (!contextMap.isEmpty()) { + json.put("context", contextMap); + } + + if (suggestion != null) { + json.put("suggestion", suggestion); + } + + return json; + } + + /** Builder for constructing error reports with contextual information. */ + public static class Builder { + private final Exception cause; + private ErrorCode code = ErrorCode.UNKNOWN; + private QueryProcessingStage stage = null; + private final List locationChain = new ArrayList<>(); + private final Map context = new LinkedHashMap<>(); + private String suggestion = null; + private String details = null; + + private Builder(Exception cause) { + this.cause = cause; + // Default details to the original exception message + this.details = + cause.getLocalizedMessage() != null ? cause.getLocalizedMessage() : cause.getMessage(); + } + + /** Set the machine-readable error code. */ + public Builder code(ErrorCode code) { + this.code = code; + return this; + } + + /** Set the query processing stage where the error occurred. */ + public Builder stage(QueryProcessingStage stage) { + // Don't overwrite more-specific stages with less-specific ones + if (this.stage == null) { + this.stage = stage; + } + return this; + } + + /** + * Add a location to the chain describing where the error occurred. Locations are added in order + * from innermost to outermost layer. + * + * @param location Description like "while resolving fields in index mapping" + */ + public Builder location(String location) { + this.locationChain.add(location); + return this; + } + + /** + * Add multiple locations from an existing chain. + * + * @param locations List of location descriptions + */ + private Builder addLocationChain(List locations) { + this.locationChain.addAll(locations); + return this; + } + + /** + * Add structured context data (index name, query, position, etc). + * + * @param key Context key + * @param value Context value (will be converted to string for serialization) + */ + public Builder context(String key, Object value) { + this.context.put(key, value); + return this; + } + + /** + * Add multiple context entries from an existing map. + * + * @param contextMap Map of context key-value pairs + */ + private Builder addContext(Map contextMap) { + this.context.putAll(contextMap); + return this; + } + + /** + * Set a suggestion for how to fix the error. + * + * @param suggestion User-facing suggestion like "Did you mean: 'foo'?" + */ + public Builder suggestion(String suggestion) { + this.suggestion = suggestion; + return this; + } + + /** + * Override the default details message. By default, uses the wrapped exception's message. + * + * @param details Custom details message + */ + public Builder details(String details) { + this.details = details; + return this; + } + + /** + * Build and throw the error report as an exception. + * + * @return The constructed error report (can be thrown) + */ + public ErrorReport build() { + return new ErrorReport(this); + } + } +} diff --git a/common/src/main/java/org/opensearch/sql/common/error/QueryProcessingStage.java b/common/src/main/java/org/opensearch/sql/common/error/QueryProcessingStage.java new file mode 100644 index 00000000000..98da1db5880 --- /dev/null +++ b/common/src/main/java/org/opensearch/sql/common/error/QueryProcessingStage.java @@ -0,0 +1,46 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.common.error; + +import lombok.Getter; + +/** + * Enumeration of query processing stages for error location tracking. These stages represent the + * major phases of query execution in the Calcite query planner. May not be a complete list, add + * stages if needed. + */ +@Getter +public enum QueryProcessingStage { + /** + * ANALYZING stage: Semantic validation and type checking. Errors: Field not found, type + * mismatches, semantic violations. + */ + ANALYZING("Parsing and validating the query"), + + /** + * PLAN_CONVERSION stage: Conversion to Calcite execution plan with system limits. Errors: + * Unsupported operations, plan conversion failures. + */ + PLAN_CONVERSION("Preparing the query for physical execution"), + + /** + * EXECUTING stage: Query execution via OpenSearch engine. Errors: Execution failures, index + * access errors, resource limits. + */ + EXECUTING("Running the query"); + + /** -- GETTER -- Get human-readable display name for this stage. */ + private final String displayName; + + QueryProcessingStage(String displayName) { + this.displayName = displayName; + } + + /** Get lowercase name suitable for JSON serialization. */ + public String toJsonKey() { + return name().toLowerCase(); + } +} diff --git a/common/src/main/java/org/opensearch/sql/common/error/StageErrorHandler.java b/common/src/main/java/org/opensearch/sql/common/error/StageErrorHandler.java new file mode 100644 index 00000000000..2827293a9e2 --- /dev/null +++ b/common/src/main/java/org/opensearch/sql/common/error/StageErrorHandler.java @@ -0,0 +1,103 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.common.error; + +import java.util.function.Supplier; + +/** + * Utility class for handling errors at specific query processing stages. This provides a consistent + * way to wrap operations with stage-specific error context. + * + *

    Example usage in QueryService: + * + *

    + * RelNode relNode = StageErrorHandler.executeStage(
    + *   QueryProcessingStage.ANALYZING,
    + *   () -> analyze(plan, context),
    + *   "while analyzing query plan"
    + * );
    + * 
    + */ +public class StageErrorHandler { + + /** + * Execute an operation and wrap any thrown exceptions with stage context. + * + * @param stage The query processing stage + * @param operation The operation to execute + * @param location Optional location description for error context + * @param Return type of the operation + * @return The result of the operation + * @throws ErrorReport if the operation throws an exception + */ + public static T executeStage( + QueryProcessingStage stage, Supplier operation, String location) { + try { + return operation.get(); + } catch (Exception e) { + throw ErrorReport.wrap(e).stage(stage).location(location).build(); + } + } + + /** + * Execute an operation and wrap any thrown exceptions with stage context (no location). + * + * @param stage The query processing stage + * @param operation The operation to execute + * @param Return type of the operation + * @return The result of the operation + * @throws ErrorReport if the operation throws an exception + */ + public static T executeStage(QueryProcessingStage stage, Supplier operation) { + return executeStage(stage, operation, null); + } + + /** + * Execute a void operation and wrap any thrown exceptions with stage context. + * + * @param stage The query processing stage + * @param operation The operation to execute + * @param location Optional location description for error context + * @throws ErrorReport if the operation throws an exception + */ + public static void executeStageVoid( + QueryProcessingStage stage, Runnable operation, String location) { + try { + operation.run(); + } catch (Exception e) { + throw ErrorReport.wrap(e).stage(stage).location(location).build(); + } + } + + /** + * Execute a void operation and wrap any thrown exceptions with stage context (no location). + * + * @param stage The query processing stage + * @param operation The operation to execute + * @throws ErrorReport if the operation throws an exception + */ + public static void executeStageVoid(QueryProcessingStage stage, Runnable operation) { + executeStageVoid(stage, operation, null); + } + + /** + * Wrap an exception with stage context without executing an operation. Useful for re-throwing + * exceptions with additional context. + * + * @param stage The query processing stage + * @param e The exception to wrap + * @param location Optional location description + * @return ErrorReport with stage context + */ + public static ErrorReport wrapWithStage( + QueryProcessingStage stage, Exception e, String location) { + ErrorReport.Builder builder = ErrorReport.wrap(e).stage(stage); + if (location != null) { + builder.location(location); + } + return builder.build(); + } +} diff --git a/common/src/test/java/org/opensearch/sql/common/error/ErrorReportTest.java b/common/src/test/java/org/opensearch/sql/common/error/ErrorReportTest.java new file mode 100644 index 00000000000..e3460d7a703 --- /dev/null +++ b/common/src/test/java/org/opensearch/sql/common/error/ErrorReportTest.java @@ -0,0 +1,152 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.common.error; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.Map; +import org.hamcrest.CoreMatchers; +import org.hamcrest.MatcherAssert; +import org.junit.jupiter.api.Test; + +/** Unit tests for ErrorReport. */ +public class ErrorReportTest { + + @Test + public void testBasicErrorReport() { + Exception cause = new IllegalArgumentException("Field not found"); + + ErrorReport report = + ErrorReport.wrap(cause) + .code(ErrorCode.FIELD_NOT_FOUND) + .stage(QueryProcessingStage.ANALYZING) + .location("while resolving fields in projection") + .context("field_name", "timestamp") + .context("table", "logs") + .suggestion("Check that field exists") + .build(); + + assertEquals(ErrorCode.FIELD_NOT_FOUND, report.getCode()); + assertEquals(QueryProcessingStage.ANALYZING, report.getStage()); + assertEquals(1, report.getLocationChain().size()); + assertEquals("while resolving fields in projection", report.getLocationChain().get(0)); + assertEquals("timestamp", report.getContext().get("field_name")); + assertEquals("logs", report.getContext().get("table")); + assertEquals("Check that field exists", report.getSuggestion()); + assertEquals("Field not found", report.getDetails()); + } + + @Test + public void testErrorReportJsonMapWithStageInContext() { + Exception cause = new IllegalArgumentException("Field not found"); + + ErrorReport report = + ErrorReport.wrap(cause) + .code(ErrorCode.FIELD_NOT_FOUND) + .stage(QueryProcessingStage.ANALYZING) + .location("while analyzing query") + .context("field_name", "test") + .build(); + + Map json = report.toJsonMap(); + + // Check top-level fields + assertEquals("IllegalArgumentException", json.get("type")); + assertEquals("FIELD_NOT_FOUND", json.get("code")); + assertEquals("Field not found", json.get("details")); + + // Check location + assertTrue(json.containsKey("location")); + + // Check that stage is in context + assertTrue(json.containsKey("context")); + @SuppressWarnings("unchecked") + Map context = (Map) json.get("context"); + assertEquals("analyzing", context.get("stage")); + assertEquals("Parsing and validating the query", context.get("stage_description")); + assertEquals("test", context.get("field_name")); + } + + @Test + public void testIdempotentWrapping() { + Exception originalCause = new IllegalArgumentException("Original error"); + + ErrorReport firstWrap = + ErrorReport.wrap(originalCause) + .code(ErrorCode.FIELD_NOT_FOUND) + .stage(QueryProcessingStage.ANALYZING) + .context("field_name", "test") + .build(); + + // Wrap again with additional context + ErrorReport secondWrap = + ErrorReport.wrap(firstWrap) + .stage(QueryProcessingStage.PLAN_CONVERSION) + .location("during plan conversion") + .context("additional_context", "value") + .build(); + + // Original cause should still be the IllegalArgumentException + assertEquals("Original error", secondWrap.getDetails()); + + // Should have accumulated context + Map context = secondWrap.getContext(); + assertEquals("test", context.get("field_name")); + assertEquals("value", context.get("additional_context")); + + // Should have location from second wrap + assertTrue(secondWrap.getLocationChain().contains("during plan conversion")); + } + + @Test + public void testStageErrorHandler() { + // Test successful execution + String result = + StageErrorHandler.executeStage( + QueryProcessingStage.ANALYZING, () -> "success", "test operation"); + + assertEquals("success", result); + + // Test error wrapping + Exception thrown = + assertThrows( + ErrorReport.class, + () -> + StageErrorHandler.executeStage( + QueryProcessingStage.ANALYZING, + () -> { + throw new IllegalArgumentException("Test error"); + }, + "while testing")); + + ErrorReport report = (ErrorReport) thrown; + assertEquals(QueryProcessingStage.ANALYZING, report.getStage()); + assertTrue(report.getLocationChain().contains("while testing")); + } + + @Test + public void testToDetailedMessage() { + Exception cause = new IllegalArgumentException("Field not found"); + + ErrorReport report = + ErrorReport.wrap(cause) + .code(ErrorCode.FIELD_NOT_FOUND) + .stage(QueryProcessingStage.ANALYZING) + .location("while resolving fields") + .context("field_name", "test") + .suggestion("Check field name") + .build(); + + String message = report.toDetailedMessage(); + + MatcherAssert.assertThat(message, CoreMatchers.containsString("FIELD_NOT_FOUND")); + MatcherAssert.assertThat(message, CoreMatchers.containsString("validating the query")); + MatcherAssert.assertThat(message, CoreMatchers.containsString("Field not found")); + MatcherAssert.assertThat(message, CoreMatchers.containsString("while resolving fields")); + MatcherAssert.assertThat(message, CoreMatchers.containsString("field_name")); + MatcherAssert.assertThat(message, CoreMatchers.containsString("Check field name")); + } +} diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index fc96f2f389c..c25b027e4ec 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -106,6 +106,7 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.ast.tree.Union; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -897,6 +898,11 @@ public LogicalPlan visitMultisearch(Multisearch node, AnalysisContext context) { throw getOnlyForCalciteException("Multisearch"); } + @Override + public LogicalPlan visitUnion(Union node, AnalysisContext context) { + throw getOnlyForCalciteException("Union"); + } + private LogicalSort buildSort( LogicalPlan child, AnalysisContext context, Integer count, List sortFields) { ExpressionReferenceOptimizer optimizer = diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 7f02bb3ef1b..be02547a2da 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -93,6 +93,7 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.ast.tree.Union; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -472,6 +473,10 @@ public T visitMultisearch(Multisearch node, C context) { return visitChildren(node, context); } + public T visitUnion(Union node, C context) { + return visitChildren(node, context); + } + public T visitAddTotals(AddTotals node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Convert.java b/core/src/main/java/org/opensearch/sql/ast/tree/Convert.java index 74406b0daf2..259330b2dba 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Convert.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Convert.java @@ -23,6 +23,7 @@ @RequiredArgsConstructor public class Convert extends UnresolvedPlan { private final List conversions; + private final String timeFormat; private UnresolvedPlan child; @Override diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Union.java b/core/src/main/java/org/opensearch/sql/ast/tree/Union.java new file mode 100644 index 00000000000..a96831567cb --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Union.java @@ -0,0 +1,44 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; + +/** Logical plan node for Union operation. Combines results from multiple datasets (UNION ALL). */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +@AllArgsConstructor +public class Union extends UnresolvedPlan { + private final List datasets; + + private Integer maxout; + + @Override + public UnresolvedPlan attach(UnresolvedPlan child) { + List newDatasets = + ImmutableList.builder().add(child).addAll(datasets).build(); + return new Union(newDatasets, maxout); + } + + @Override + public List getChild() { + return datasets; + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitUnion(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index e4e036da3a6..15bfece5f46 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -22,6 +22,7 @@ import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_SUBSEARCH; import static org.opensearch.sql.calcite.utils.PlanUtils.getRelation; import static org.opensearch.sql.calcite.utils.PlanUtils.getRexCall; +import static org.opensearch.sql.calcite.utils.PlanUtils.stripInputSort; import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild; import static org.opensearch.sql.utils.SystemIndexUtils.DATASOURCES_TABLE_NAME; @@ -35,6 +36,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -47,16 +49,12 @@ import org.apache.calcite.adapter.enumerable.RexToLixTranslator; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.ViewExpanders; -import org.apache.calcite.rel.BiRel; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelHomogeneousShuttle; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.SetOp; import org.apache.calcite.rel.core.Sort; -import org.apache.calcite.rel.core.Uncollect; -import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.logical.LogicalSort; import org.apache.calcite.rel.logical.LogicalValues; import org.apache.calcite.rel.type.RelDataType; @@ -103,6 +101,7 @@ import org.opensearch.sql.ast.expression.ParseMethod; import org.opensearch.sql.ast.expression.PatternMethod; import org.opensearch.sql.ast.expression.PatternMode; +import org.opensearch.sql.ast.expression.QualifiedName; import org.opensearch.sql.ast.expression.Span; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.expression.UnresolvedExpression; @@ -159,6 +158,7 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.Trendline.TrendlineType; +import org.opensearch.sql.ast.tree.Union; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -174,6 +174,8 @@ import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils; import org.opensearch.sql.calcite.utils.WildcardUtils; +import org.opensearch.sql.common.error.ErrorCode; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.common.patterns.PatternUtils; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.datasource.DataSourceService; @@ -188,6 +190,18 @@ public class CalciteRelNodeVisitor extends AbstractNodeVisitor { + /** + * Prefix/suffix applied to right-side fields in the streamstats self-join plan to avoid name + * collisions with the left side and to make the renaming reversible. + */ + private static final String RIGHT_SIDE_FIELD_PREFIX = "__r_"; + + private static final String RIGHT_SIDE_FIELD_SUFFIX = "__"; + + /** Name of the right-side sequence column in the streamstats self-join plan. */ + private static final String RIGHT_SIDE_SEQ_COLUMN = + RIGHT_SIDE_FIELD_PREFIX + "seq" + RIGHT_SIDE_FIELD_SUFFIX; + private final CalciteRexNodeVisitor rexVisitor; private final CalciteAggCallVisitor aggVisitor; private final DataSourceService dataSourceService; @@ -344,7 +358,7 @@ public RelNode visitRegex(Regex node, CalcitePlanContext context) { return context.relBuilder.peek(); } - public RelNode visitRex(Rex node, CalcitePlanContext context) { + private RelNode innerRex(Rex node, CalcitePlanContext context) { visitChildren(node, context); RexNode fieldRex = rexVisitor.analyze(node.getField(), context); @@ -409,6 +423,17 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) { return context.relBuilder.peek(); } + public RelNode visitRex(Rex node, CalcitePlanContext context) { + try { + return innerRex(node, context); + } catch (RuntimeException ex) { + throw ErrorReport.wrap(ex) + .location("while processing the rex command") + .context("command", "rex") + .build(); + } + } + private boolean containsSubqueryExpression(Node expr) { if (expr == null) { return false; @@ -656,6 +681,10 @@ public RelNode visitRename(Rename node, CalcitePlanContext context) { } List matchingFields = WildcardRenameUtils.matchFieldNames(sourcePattern, newNames); + // Exclude metadata fields from wildcard rename (issue #5099) + if (WildcardRenameUtils.isWildcardPattern(sourcePattern)) { + matchingFields.removeIf(this::isMetadataField); + } for (String fieldName : matchingFields) { String newName = @@ -734,57 +763,6 @@ public RelNode visitHead(Head node, CalcitePlanContext context) { return context.relBuilder.peek(); } - /** - * Backtrack through the RelNode tree to find the first Sort node with non-empty collation. Stops - * at blocking operators that break ordering: - * - *
      - *
    • Aggregate - aggregation destroys input ordering - *
    • BiRel - covers Join, Correlate, and other binary relations - *
    • SetOp - covers Union, Intersect, Except - *
    • Uncollect - unnesting operation that may change ordering - *
    • Project with window functions (RexOver) - ordering determined by window's ORDER BY - *
    - * - * @param node the starting RelNode to backtrack from - * @return the collation found, or null if no sort or blocking operator encountered - */ - private RelCollation backtrackForCollation(RelNode node) { - while (node != null) { - // Check for blocking operators that destroy collation - // BiRel covers Join, Correlate, and other binary relations - // SetOp covers Union, Intersect, Except - // Uncollect unnests arrays/multisets which may change ordering - if (node instanceof Aggregate - || node instanceof BiRel - || node instanceof SetOp - || node instanceof Uncollect) { - return null; - } - - // Project with window functions has ordering determined by the window's ORDER BY clause - // We should not destroy its output order by inserting a reversed sort - if (node instanceof LogicalProject && ((LogicalProject) node).containsOver()) { - return null; - } - - // Check for Sort node with collation - if (node instanceof Sort) { - Sort sort = (Sort) node; - if (sort.getCollation() != null && !sort.getCollation().getFieldCollations().isEmpty()) { - return sort.getCollation(); - } - } - - // Continue to child node - if (node.getInputs().isEmpty()) { - break; - } - node = node.getInput(0); - } - return null; - } - /** * Insert a reversed sort node after finding the original sort in the tree. This rebuilds the tree * with the reversed sort inserted right after the original sort. @@ -867,7 +845,7 @@ public RelNode visitReverse( } else { // Collation not found on current node - try backtracking RelNode currentNode = context.relBuilder.peek(); - RelCollation backtrackCollation = backtrackForCollation(currentNode); + RelCollation backtrackCollation = PlanUtils.findInputCollation(currentNode); if (backtrackCollation != null && !backtrackCollation.getFieldCollations().isEmpty()) { // Found collation through backtracking - rebuild tree with reversed sort @@ -923,7 +901,7 @@ public RelNode visitTranspose( // Step 2: UNPIVOT b.unpivot( false, - ImmutableList.of("value"), + ImmutableList.of(PlanUtils.VALUE_COLUMN_FOR_TRANSPOSE), ImmutableList.of(columnName), fieldNames.stream() .map( @@ -945,7 +923,7 @@ public RelNode visitTranspose( // Step 4: PIVOT b.pivot( b.groupKey(trimmedColumnName), - ImmutableList.of(b.max(b.field("value"))), + ImmutableList.of(b.max(b.field(PlanUtils.VALUE_COLUMN_FOR_TRANSPOSE))), ImmutableList.of(b.field(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)), IntStream.rangeClosed(1, maxRows) .mapToObj(i -> Map.entry("row " + i, ImmutableList.of((RexNode) b.literal(i)))) @@ -973,10 +951,60 @@ public RelNode visitBin(Bin node, CalcitePlanContext context) { String alias = node.getAlias() != null ? node.getAlias() : fieldName; projectPlusOverriding(List.of(binExpression), List.of(alias), context); + dropStructParentsFor(alias, context); return context.relBuilder.peek(); } + /** + * If {@code dottedName} addresses a nested leaf inside a struct that OpenSearch has exposed + * through both its struct-parent columns and its flattened leaf columns (e.g. the telemetry + * mapping exposes {@code resource}, {@code resource.attributes}, ..., {@code + * resource.attributes.telemetry.sdk.version} side-by-side), drop the struct-parent prefixes from + * the current row. This keeps a subsequent {@link #tryToRemoveNestedFields(CalcitePlanContext)} + * pass from collapsing the flattened leaves back into the parents when the final implicit {@code + * fields *} projection runs. + * + *

    This preserves the behaviour that issue #4482 originally required for {@code bin} on a + * nested field without an explicit {@code fields} projection. It is invoked from two places: + * + *

      + *
    • {@link #projectPlusOverriding(List, List, CalcitePlanContext)} — for every override whose + * new name exactly matched a pre-existing column. This catches {@code eval} (and every + * other command that funnels through {@code projectPlusOverriding}) assigning to an + * existing flattened nested leaf. + *
    • {@link #visitBin(Bin, CalcitePlanContext)} — defensively, so that {@code bin} keeps + * dropping struct parents even when the alias happens not to match an existing field name + * (e.g. when the user supplied a custom alias). This is also what the regression test in + * {@code CalciteBinCommandIT#testBinWithNestedFieldWithoutExplicitProjection} exercises. + *
    + * + * Using this narrowly-scoped pruning instead of a global prefix-override in {@link + * #shouldOverrideField} is what keeps issue #5185 and the reviewer's {@code eval agent.name = + * ...} case safe. + * + *

    No-op when no such struct-parent columns exist (e.g. flat columns or MAP roots from {@code + * spath}). + */ + private void dropStructParentsFor(String dottedName, CalcitePlanContext context) { + if (dottedName == null || dottedName.indexOf('.') < 0) { + return; + } + List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); + List parentsToDrop = new ArrayList<>(); + int dotIdx = dottedName.indexOf('.'); + while (dotIdx >= 0) { + String prefix = dottedName.substring(0, dotIdx); + if (fieldNames.contains(prefix)) { + parentsToDrop.add(context.relBuilder.field(prefix)); + } + dotIdx = dottedName.indexOf('.', dotIdx + 1); + } + if (!parentsToDrop.isEmpty()) { + context.relBuilder.projectExcept(parentsToDrop); + } + } + @Override public RelNode visitParse(Parse node, CalcitePlanContext context) { visitChildren(node, context); @@ -1170,7 +1198,7 @@ public RelNode visitConvert(Convert node, CalcitePlanContext context) { ConversionState state = new ConversionState(); for (Let conversion : node.getConversions()) { - processConversion(conversion, state, context); + processConversion(conversion, node.getTimeFormat(), state, context); } return buildConversionProjection(state, context); @@ -1183,14 +1211,14 @@ private static class ConversionState { } private void processConversion( - Let conversion, ConversionState state, CalcitePlanContext context) { + Let conversion, String timeFormat, ConversionState state, CalcitePlanContext context) { String target = conversion.getVar().getField().toString(); UnresolvedExpression expression = conversion.getExpression(); if (expression instanceof Field) { processFieldCopyConversion(target, (Field) expression, state, context); } else if (expression instanceof Function) { - processFunctionConversion(target, (Function) expression, state, context); + processFunctionConversion(target, (Function) expression, timeFormat, state, context); } else { throw new SemanticCheckException("Convert command requires function call expressions"); } @@ -1213,7 +1241,11 @@ private void processFieldCopyConversion( } private void processFunctionConversion( - String target, Function function, ConversionState state, CalcitePlanContext context) { + String target, + Function function, + String timeFormat, + ConversionState state, + CalcitePlanContext context) { String functionName = function.getFuncName(); List args = function.getFuncArgs(); @@ -1230,8 +1262,7 @@ private void processFunctionConversion( state.seenFields.add(source); RexNode sourceField = context.relBuilder.field(source); - RexNode convertCall = - PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, functionName, sourceField); + RexNode convertCall = resolveConvertFunction(functionName, sourceField, timeFormat, context); if (!target.equals(source)) { state.additions.add(Pair.of(target, context.relBuilder.alias(convertCall, target))); @@ -1240,6 +1271,23 @@ private void processFunctionConversion( } } + private RexNode resolveConvertFunction( + String functionName, RexNode sourceField, String timeFormat, CalcitePlanContext context) { + + // Time functions that support timeformat parameter + Set timeFunctions = Set.of("ctime", "mktime"); + + if (timeFunctions.contains(functionName.toLowerCase()) && timeFormat != null) { + // For time functions with custom timeformat, pass the format as a second parameter + RexNode timeFormatLiteral = context.rexBuilder.makeLiteral(timeFormat); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, functionName, sourceField, timeFormatLiteral); + } else { + // Regular conversion functions or time functions without custom format + return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, functionName, sourceField); + } + } + private RelNode buildConversionProjection(ConversionState state, CalcitePlanContext context) { List originalFields = context.relBuilder.peek().getRowType().getFieldNames(); List projectList = new ArrayList<>(); @@ -1272,12 +1320,12 @@ private RelNode buildConversionProjection(ConversionState state, CalcitePlanCont private void projectPlusOverriding( List newFields, List newNames, CalcitePlanContext context) { - List originalFieldNames = context.relBuilder.peek().getRowType().getFieldNames(); + Set originalFieldNameSet = + new HashSet<>(context.relBuilder.peek().getRowType().getFieldNames()); + List overriddenNames = + newNames.stream().filter(originalFieldNameSet::contains).toList(); List toOverrideList = - originalFieldNames.stream() - .filter(originalName -> shouldOverrideField(originalName, newNames)) - .map(a -> (RexNode) context.relBuilder.field(a)) - .toList(); + overriddenNames.stream().map(a -> (RexNode) context.relBuilder.field(a)).toList(); // 1. add the new fields, For example "age0, country0" context.relBuilder.projectPlus(newFields); // 2. drop the overriding field list, it's duplicated now. For example "age, country" @@ -1293,17 +1341,49 @@ private void projectPlusOverriding( expectedRenameFields.addAll(newNames); // 5. rename context.relBuilder.rename(expectedRenameFields); + // 6. For each overridden dotted-path name that matched an existing flattened nested leaf, + // prune the struct-parent columns that OpenSearch exposed side-by-side with that leaf. Without + // this, a downstream implicit `fields *` invokes `tryToRemoveNestedFields`, which would drop + // the freshly-assigned dotted leaf back out again because its struct-parent prefix is still in + // the row schema (see issue #4482 and the scratch coverage in CalciteEvalCommandIT). + // + // Gating on "the override actually fired" is what keeps the reviewer's PR #5351 case safe: + // `source=idx | fields agent | eval agent.name = "test"` has no pre-existing `agent.name` + // column, so overriddenNames is empty and the struct-parent `agent` survives untouched. + // It also keeps issue #5185 safe — spath introduces a MAP root and subsequent eval assigns + // to brand-new dotted paths that were not already in the row schema. + for (String overridden : overriddenNames) { + dropStructParentsFor(overridden, context); + } } + /** + * Determine whether the column {@code originalName} should be replaced when a batch of new + * columns named {@code newNames} is being added. Only exact-name matches count as overrides — + * {@code eval foo.bar = ...} creates a brand new field literally named {@code foo.bar} and must + * never drop sibling or parent fields. This mirrors SPL1 semantics, where assigning a dotted name + * introduces a literal column of that name without touching any other field. + * + *

    Earlier revisions (see PR #4606 / #5351) attempted to broaden this to a {@code + * newName.startsWith(originalName + ".")} prefix match. That prefix branch silently dropped any + * column that happened to be a prefix of an eval target, which caused two regressions: + * + *

      + *
    • Issue #5185 — a MAP-typed root column produced by {@code spath} got dropped when eval + * introduced multiple dotted-path fields under it. + *
    • The reviewer's case on PR #5351 — {@code source=big5 | fields agent | eval agent.name = + * "test"} dropped the {@code agent} column entirely. + *
    + * + * Struct-parent pruning for the "override on a real flattened nested leaf" case is handled + * uniformly in {@link #projectPlusOverriding(List, List, CalcitePlanContext)}, which invokes + * {@link #dropStructParentsFor(String, CalcitePlanContext)} only for overrides that actually + * replaced an existing column. This keeps issue #4482 fixed across every command that funnels + * through {@code projectPlusOverriding} (bin, eval, rex/sed, trendline, expand, flatten, + * patterns) without reintroducing the #5185 / reviewer regressions here. + */ private boolean shouldOverrideField(String originalName, List newNames) { - return newNames.stream() - .anyMatch( - newName -> - // Match exact field names (e.g., "age" == "age") for flat fields - newName.equals(originalName) - // OR match nested paths (e.g., "resource.attributes..." starts with - // "resource.") - || newName.startsWith(originalName + ".")); + return newNames.contains(originalName); } private List> extractInputRefList(List aggCalls) { @@ -1713,7 +1793,7 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) { : duplicatedFieldNames.stream() .map(a -> (RexNode) context.relBuilder.field(a)) .toList(); - buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication); + buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication, null); } // add LogicalSystemLimit after dedup addSysLimitForJoinSubsearch(context); @@ -1771,7 +1851,7 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) { List dedupeFields = getRightColumnsInJoinCriteria(context.relBuilder, joinCondition); - buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication); + buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication, null); } // add LogicalSystemLimit after dedup addSysLimitForJoinSubsearch(context); @@ -1947,10 +2027,11 @@ public RelNode visitDedupe(Dedupe node, CalcitePlanContext context) { // Columns to deduplicate List dedupeFields = node.getFields().stream().map(f -> rexVisitor.analyze(f, context)).toList(); + RelCollation inputCollation = stripInputSort(context.relBuilder); if (keepEmpty) { - buildDedupOrNull(context.relBuilder, dedupeFields, allowedDuplication); + buildDedupOrNull(context.relBuilder, dedupeFields, allowedDuplication, inputCollation); } else { - buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication); + buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication, inputCollation); } return context.relBuilder.peek(); } @@ -2059,14 +2140,14 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) context.relBuilder.projectPlus(streamSeq); RelNode left = context.relBuilder.build(); - // 2. Run correlate + aggregate - return buildStreamWindowJoinPlan( + // 2. Use self-join approach to avoid nested correlates (which cause NPE + // in Calcite's RelDecorrelator when chaining multiple streamstats) + return buildStreamWindowSelfJoinPlan( context, left, node, groupList, ROW_NUMBER_COLUMN_FOR_STREAMSTATS, - null, new String[] {ROW_NUMBER_COLUMN_FOR_STREAMSTATS}); } @@ -2197,6 +2278,229 @@ private RelNode buildStreamWindowJoinPlan( return context.relBuilder.peek(); } + /** + * Builds a self-join based plan for streamstats with global=true + window + group. This avoids + * using LogicalCorrelate which causes NPE in Calcite's RelDecorrelator when chaining multiple + * streamstats commands. + * + *

    Plan structure: + * + *

      + *
    1. left = input + __stream_seq__ + *
    2. right = trim to only aggregate input + __stream_seq__ + *
    3. Join left and right on window frame + group conditions + *
    4. Group by all left field indices, compute AGG(right.X) + *
    5. Sort by __stream_seq__, then remove it + *
    + */ + private RelNode buildStreamWindowSelfJoinPlan( + CalcitePlanContext context, + RelNode leftWithHelpers, + StreamWindow node, + List groupList, + String seqCol, + String[] helperColsToCleanup) { + + int leftFieldCount = leftWithHelpers.getRowType().getFieldCount(); + + // Build right side: project only the fields needed for aggregation + seq + group columns + // This avoids field name collisions and keeps the right side minimal + context.relBuilder.push(leftWithHelpers); + + // Collect fields needed on right side: seq col + group cols + aggregate input fields + List rightFields = new ArrayList<>(); + List rightFieldNames = new ArrayList<>(); + + // Always include seq col + rightFields.add(context.relBuilder.field(seqCol)); + rightFieldNames.add(RIGHT_SIDE_SEQ_COLUMN); + + // Include group columns + for (UnresolvedExpression groupExpr : groupList) { + String groupName = extractGroupFieldName(groupExpr); + rightFields.add(context.relBuilder.field(groupName)); + rightFieldNames.add(toRightSideFieldName(groupName)); + } + + // Include aggregate input fields (extract field names from window functions) + Set aggInputFields = new LinkedHashSet<>(); + for (UnresolvedExpression wfExpr : node.getWindowFunctionList()) { + collectFieldNames(wfExpr, aggInputFields); + } + // Remove already-included fields + aggInputFields.remove(seqCol); + for (UnresolvedExpression groupExpr : groupList) { + aggInputFields.remove(extractGroupFieldName(groupExpr)); + } + for (String aggField : aggInputFields) { + rightFields.add(context.relBuilder.field(aggField)); + rightFieldNames.add(toRightSideFieldName(aggField)); + } + + context.relBuilder.project(rightFields, rightFieldNames); + RelNode rightProjected = context.relBuilder.build(); + + // Push left and right + context.relBuilder.push(leftWithHelpers); + context.relBuilder.push(rightProjected); + + // Build join condition using 2-input references + RexNode leftSeq = context.relBuilder.field(2, 0, seqCol); + RexNode rightSeq = context.relBuilder.field(2, 1, RIGHT_SIDE_SEQ_COLUMN); + + // Frame filter + RexNode frameFilter; + if (node.isCurrent()) { + RexNode lower = + context.relBuilder.call( + SqlStdOperatorTable.MINUS, leftSeq, context.relBuilder.literal(node.getWindow() - 1)); + frameFilter = context.relBuilder.between(rightSeq, lower, leftSeq); + } else { + RexNode lower = + context.relBuilder.call( + SqlStdOperatorTable.MINUS, leftSeq, context.relBuilder.literal(node.getWindow())); + RexNode upper = + context.relBuilder.call( + SqlStdOperatorTable.MINUS, leftSeq, context.relBuilder.literal(1)); + frameFilter = context.relBuilder.between(rightSeq, lower, upper); + } + + // Group filter + List groupFilters = new ArrayList<>(); + for (UnresolvedExpression groupExpr : groupList) { + String groupName = extractGroupFieldName(groupExpr); + RexNode leftGroup = context.relBuilder.field(2, 0, groupName); + RexNode rightGroup = context.relBuilder.field(2, 1, toRightSideFieldName(groupName)); + RexNode equalCondition = context.relBuilder.equals(leftGroup, rightGroup); + if (node.isBucketNullable()) { + RexNode bothNull = + context.relBuilder.and( + context.relBuilder.isNull(leftGroup), context.relBuilder.isNull(rightGroup)); + groupFilters.add(context.relBuilder.or(equalCondition, bothNull)); + } else { + groupFilters.add(equalCondition); + } + } + + RexNode joinCondition = + groupFilters.isEmpty() + ? frameFilter + : context.relBuilder.and(frameFilter, context.relBuilder.and(groupFilters)); + context.relBuilder.join(JoinRelType.LEFT, joinCondition); + + // After join: [left_fields(0..leftFieldCount-1), right_fields(leftFieldCount..)] + // Aggregate: group by all left fields, compute AGG on right fields + // The aggregate functions need to reference the right-side fields in the joined row + + // Build aggregate calls using the right-side field references + List aggCalls = buildAggCallsFromJoinedRight(node.getWindowFunctionList(), context); + + RelBuilder.GroupKey groupKey = + context.relBuilder.groupKey( + IntStream.range(0, leftFieldCount).mapToObj(context.relBuilder::field).toList()); + + context.relBuilder.aggregate(groupKey, aggCalls); + + // Resort by the sequence column + context.relBuilder.sort(context.relBuilder.field(seqCol)); + + // Cleanup helper columns + List cleanup = new ArrayList<>(); + for (String c : helperColsToCleanup) { + cleanup.add(context.relBuilder.field(c)); + } + context.relBuilder.projectExcept(cleanup); + return context.relBuilder.peek(); + } + + /** Collect field names referenced by an expression tree. */ + private void collectFieldNames(UnresolvedExpression expr, Set fieldNames) { + if (expr instanceof Field f) { + fieldNames.add(f.getField().toString()); + } else if (expr instanceof Alias a) { + collectFieldNames(a.getDelegated(), fieldNames); + } else if (expr instanceof WindowFunction wf) { + collectFieldNames(wf.getFunction(), fieldNames); + } else if (expr instanceof Function func) { + for (UnresolvedExpression arg : func.getFuncArgs()) { + collectFieldNames(arg, fieldNames); + } + } + } + + /** + * Build AggCall list for the self-join plan. The aggregate functions reference fields from the + * right side of the join, which carry the {@code __r___} prefix applied during right-side + * projection. This method rewrites the window function's field references to those prefixed + * names, unwraps the {@link WindowFunction} to its inner {@link Function}, and then delegates to + * the shared {@link #aggVisitor} so the self-join path reuses the same aggregate-resolution logic + * as regular {@code stats}/{@code eventstats} aggregations. + */ + private List buildAggCallsFromJoinedRight( + List windowFunctionList, CalcitePlanContext context) { + List aggCalls = new ArrayList<>(); + for (UnresolvedExpression wfExpr : windowFunctionList) { + UnresolvedExpression rewritten = rewriteWindowFunctionForSelfJoin(wfExpr); + aggCalls.add(aggVisitor.analyze(rewritten, context)); + } + return aggCalls; + } + + /** + * Rewrites a streamstats window function expression so that {@link #aggVisitor} can resolve it + * against the joined row type, where right-side fields carry the {@code __r___} prefix: + * + *
      + *
    • Unwraps {@link WindowFunction} to expose its inner {@link Function} (the aggregate). + *
    • Preserves the outer {@link Alias} so the aggregate output keeps its user-visible name. + *
    • Renames every {@link QualifiedName} / {@link Field} reference inside the function body to + * the prefixed right-side column name. + *
    + */ + private UnresolvedExpression rewriteWindowFunctionForSelfJoin(UnresolvedExpression expr) { + if (expr instanceof Alias a) { + return new Alias(a.getName(), rewriteWindowFunctionForSelfJoin(a.getDelegated())); + } + if (expr instanceof WindowFunction wf) { + return rewriteWindowFunctionForSelfJoin(wf.getFunction()); + } + if (expr instanceof Function func) { + List rewrittenArgs = + func.getFuncArgs().stream().map(this::rewriteFieldNamesToRightSide).toList(); + return new Function(func.getFuncName(), rewrittenArgs); + } + return expr; + } + + /** + * Recursively renames field references within an aggregate argument to their right-side alias. + */ + private UnresolvedExpression rewriteFieldNamesToRightSide(UnresolvedExpression expr) { + if (expr instanceof Field f && f.getField() instanceof QualifiedName qn) { + return new Field(toRightSideQualifiedName(qn), f.getFieldArgs()); + } + if (expr instanceof QualifiedName qn) { + return toRightSideQualifiedName(qn); + } + if (expr instanceof Alias a) { + return new Alias(a.getName(), rewriteFieldNamesToRightSide(a.getDelegated())); + } + if (expr instanceof Function func) { + List rewrittenArgs = + func.getFuncArgs().stream().map(this::rewriteFieldNamesToRightSide).toList(); + return new Function(func.getFuncName(), rewrittenArgs); + } + return expr; + } + + private static QualifiedName toRightSideQualifiedName(QualifiedName original) { + return new QualifiedName(toRightSideFieldName(original.toString())); + } + + private static String toRightSideFieldName(String originalName) { + return RIGHT_SIDE_FIELD_PREFIX + originalName + RIGHT_SIDE_FIELD_SUFFIX; + } + private RelNode buildResetHelperColumns(CalcitePlanContext context, StreamWindow node) { // 1. global sequence to define order RexNode rowNum = @@ -2607,6 +2911,40 @@ private String findTimestampField(RelDataType rowType) { return null; } + @Override + public RelNode visitUnion(Union node, CalcitePlanContext context) { + List inputNodes = new ArrayList<>(); + + for (UnresolvedPlan dataset : node.getDatasets()) { + UnresolvedPlan prunedDataset = dataset.accept(new EmptySourcePropagateVisitor(), null); + prunedDataset.accept(this, context); + inputNodes.add(context.relBuilder.build()); + } + + if (inputNodes.size() < 2) { + throw new IllegalArgumentException( + "Union command requires at least two datasets. Provided: " + inputNodes.size()); + } + + List unifiedInputs = + SchemaUnifier.buildUnifiedSchemaWithTypeCoercion(inputNodes, context); + + for (RelNode input : unifiedInputs) { + context.relBuilder.push(input); + } + context.relBuilder.union(true, unifiedInputs.size()); // true = UNION ALL + + if (node.getMaxout() != null) { + context.relBuilder.push( + LogicalSystemLimit.create( + LogicalSystemLimit.SystemLimitType.SUBSEARCH_MAXOUT, + context.relBuilder.build(), + context.relBuilder.literal(node.getMaxout()))); + } + + return context.relBuilder.peek(); + } + /* * Unsupported Commands of PPL with Calcite for OpenSearch 3.0.0-beta */ @@ -3205,7 +3543,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { || node.getColumnSplit() == null || Objects.equals(config.limit, 0)) { // The output of chart is expected to be ordered by row split names - relBuilder.sort(relBuilder.field(0)); + relBuilder.sort(relBuilder.nullsLast(relBuilder.field(0))); return relBuilder.peek(); } @@ -3275,7 +3613,8 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { relBuilder.field(2)) .as(aggFieldName)); // The output of chart is expected to be ordered by row and column split names - relBuilder.sort(relBuilder.field(0), relBuilder.field(1)); + relBuilder.sort( + relBuilder.nullsLast(relBuilder.field(0)), relBuilder.nullsLast(relBuilder.field(1))); return relBuilder.peek(); } @@ -3743,8 +4082,13 @@ public RelNode visitMvExpand(MvExpand mvExpand, CalcitePlanContext context) { inputType.getField(fieldName, /*caseSensitive*/ true, /*elideRecord*/ false); if (inputField == null) { - throw new SemanticCheckException( - String.format("Field '%s' not found in the schema", fieldName)); + throw ErrorReport.wrap( + new SemanticCheckException( + String.format("Field '%s' not found in the schema", fieldName))) + .code(ErrorCode.FIELD_NOT_FOUND) + .location("while evaluating the input field for mvexpand") + .context("command", "mvexpand") + .build(); } final RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(field, context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java b/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java index 0e5ac4a6e05..dba881b3fc3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java +++ b/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java @@ -16,6 +16,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.common.error.ErrorCode; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLFuncImpTable; @@ -315,14 +317,20 @@ private static Optional resolveLambdaVariable( private static Optional replaceWithNullLiteralInCoalesce(CalcitePlanContext context) { log.debug("replaceWithNullLiteralInCoalesce() called"); if (context.isInCoalesceFunction()) { + // Use SqlTypeName.NULL so the resulting literal does not bias the least-restrictive + // common-type computation toward VARCHAR. See issue #5175: previously VARCHAR was used, + // which caused COALESCE(null, 42) to be inferred as VARCHAR and returned as "42". return Optional.of( context.rexBuilder.makeNullLiteral( - context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR))); + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.NULL))); } return Optional.empty(); } - private static RuntimeException getNotFoundException(QualifiedName node) { - return new IllegalArgumentException(String.format("Field [%s] not found.", node.toString())); + private static ErrorReport getNotFoundException(QualifiedName node) { + return ErrorReport.wrap( + new IllegalArgumentException(String.format("Field [%s] not found.", node.toString()))) + .code(ErrorCode.FIELD_NOT_FOUND) + .build(); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java b/core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java index 05380ce8c48..e01cbe3992d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java +++ b/core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java @@ -14,10 +14,16 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.type.SqlTypeName; /** - * Utility class for unifying schemas across multiple RelNodes. Throws an exception when type - * conflicts are detected. + * Utility class for unifying schemas across multiple RelNodes. Supports two strategies: + * + *
      + *
    • Conflict resolution (multisearch): throws on type mismatch, fills missing fields with NULL + *
    • Type coercion (union): widens compatible types (e.g. INTEGER→BIGINT), falls back to VARCHAR + * for incompatible types, fills missing fields with NULL + *
    */ public class SchemaUnifier { @@ -147,4 +153,236 @@ RelDataType getType() { return type; } } + + /** + * Builds unified schema with type coercion for UNION command. Coerces compatible types to a + * common supertype (e.g. int+float→float), falls back to VARCHAR for incompatible types, and + * fills missing fields with NULL. + */ + public static List buildUnifiedSchemaWithTypeCoercion( + List inputs, CalcitePlanContext context) { + if (inputs.isEmpty() || inputs.size() == 1) { + return inputs; + } + + List coercedInputs = coerceUnionTypes(inputs, context); + return unifySchemasForUnion(coercedInputs, context); + } + + /** + * Aligns schemas by projecting NULL for missing fields and CAST for type mismatches. Uses + * force=true to clear collation traits and prevent EnumerableMergeUnion cast exception. + */ + private static List unifySchemasForUnion( + List inputs, CalcitePlanContext context) { + List unifiedSchema = buildUnifiedSchemaForUnion(inputs); + List fieldNames = + unifiedSchema.stream().map(SchemaField::getName).collect(Collectors.toList()); + + List projectedNodes = new ArrayList<>(); + for (RelNode node : inputs) { + List projection = buildProjectionForUnion(node, unifiedSchema, context); + RelNode projectedNode = + context.relBuilder.push(node).project(projection, fieldNames, true).build(); + projectedNodes.add(projectedNode); + } + return projectedNodes; + } + + private static List buildUnifiedSchemaForUnion(List nodes) { + List schema = new ArrayList<>(); + Map seenFields = new HashMap<>(); + + for (RelNode node : nodes) { + for (RelDataTypeField field : node.getRowType().getFieldList()) { + if (!seenFields.containsKey(field.getName())) { + schema.add(new SchemaField(field.getName(), field.getType())); + seenFields.put(field.getName(), field.getType()); + } + } + } + return schema; + } + + private static List buildProjectionForUnion( + RelNode node, List unifiedSchema, CalcitePlanContext context) { + Map nodeFieldMap = + node.getRowType().getFieldList().stream() + .collect(Collectors.toMap(RelDataTypeField::getName, field -> field)); + + List projection = new ArrayList<>(); + for (SchemaField schemaField : unifiedSchema) { + RelDataTypeField nodeField = nodeFieldMap.get(schemaField.getName()); + + if (nodeField != null) { + RexNode fieldRef = context.rexBuilder.makeInputRef(node, nodeField.getIndex()); + if (!nodeField.getType().equals(schemaField.getType())) { + projection.add(context.rexBuilder.makeCast(schemaField.getType(), fieldRef)); + } else { + projection.add(fieldRef); + } + } else { + projection.add(context.rexBuilder.makeNullLiteral(schemaField.getType())); + } + } + return projection; + } + + /** Casts fields to their common supertypes across all inputs when types differ. */ + private static List coerceUnionTypes(List inputs, CalcitePlanContext context) { + Map> fieldTypeMap = new HashMap<>(); + for (RelNode input : inputs) { + for (RelDataTypeField field : input.getRowType().getFieldList()) { + String fieldName = field.getName(); + SqlTypeName typeName = field.getType().getSqlTypeName(); + if (typeName != null) { + fieldTypeMap.computeIfAbsent(fieldName, k -> new ArrayList<>()).add(typeName); + } + } + } + + Map targetTypeMap = new HashMap<>(); + for (Map.Entry> entry : fieldTypeMap.entrySet()) { + String fieldName = entry.getKey(); + List types = entry.getValue(); + + SqlTypeName commonType = types.getFirst(); + for (int i = 1; i < types.size(); i++) { + commonType = findCommonTypeForUnion(commonType, types.get(i)); + } + targetTypeMap.put(fieldName, commonType); + } + + boolean needsCoercion = false; + for (RelNode input : inputs) { + for (RelDataTypeField field : input.getRowType().getFieldList()) { + SqlTypeName targetType = targetTypeMap.get(field.getName()); + if (targetType != null && field.getType().getSqlTypeName() != targetType) { + needsCoercion = true; + break; + } + } + if (needsCoercion) break; + } + + if (!needsCoercion) { + return inputs; + } + + List coercedInputs = new ArrayList<>(); + for (RelNode input : inputs) { + List projections = new ArrayList<>(); + List projectionNames = new ArrayList<>(); + boolean needsProjection = false; + + for (RelDataTypeField field : input.getRowType().getFieldList()) { + String fieldName = field.getName(); + SqlTypeName currentType = field.getType().getSqlTypeName(); + SqlTypeName targetType = targetTypeMap.get(fieldName); + + RexNode fieldRef = context.rexBuilder.makeInputRef(input, field.getIndex()); + + if (currentType != targetType && targetType != null) { + projections.add(context.relBuilder.cast(fieldRef, targetType)); + needsProjection = true; + } else { + projections.add(fieldRef); + } + projectionNames.add(fieldName); + } + + if (needsProjection) { + context.relBuilder.push(input); + context.relBuilder.project(projections, projectionNames, true); + coercedInputs.add(context.relBuilder.build()); + } else { + coercedInputs.add(input); + } + } + + return coercedInputs; + } + + /** + * Returns the wider type for two SqlTypeNames. Within the same family, returns the wider type + * (e.g. INTEGER+BIGINT-->BIGINT). Across families, falls back to VARCHAR. + */ + private static SqlTypeName findCommonTypeForUnion(SqlTypeName type1, SqlTypeName type2) { + if (type1 == type2) { + return type1; + } + + if (type1 == SqlTypeName.NULL) { + return type2; + } + if (type2 == SqlTypeName.NULL) { + return type1; + } + + if (isNumericTypeForUnion(type1) && isNumericTypeForUnion(type2)) { + return getWiderNumericTypeForUnion(type1, type2); + } + + if (isStringTypeForUnion(type1) && isStringTypeForUnion(type2)) { + return SqlTypeName.VARCHAR; + } + + if (isTemporalTypeForUnion(type1) && isTemporalTypeForUnion(type2)) { + return getWiderTemporalTypeForUnion(type1, type2); + } + + return SqlTypeName.VARCHAR; + } + + private static boolean isNumericTypeForUnion(SqlTypeName typeName) { + return typeName == SqlTypeName.TINYINT + || typeName == SqlTypeName.SMALLINT + || typeName == SqlTypeName.INTEGER + || typeName == SqlTypeName.BIGINT + || typeName == SqlTypeName.FLOAT + || typeName == SqlTypeName.REAL + || typeName == SqlTypeName.DOUBLE + || typeName == SqlTypeName.DECIMAL; + } + + private static boolean isStringTypeForUnion(SqlTypeName typeName) { + return typeName == SqlTypeName.CHAR || typeName == SqlTypeName.VARCHAR; + } + + private static boolean isTemporalTypeForUnion(SqlTypeName typeName) { + return typeName == SqlTypeName.DATE + || typeName == SqlTypeName.TIMESTAMP + || typeName == SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE; + } + + private static SqlTypeName getWiderNumericTypeForUnion(SqlTypeName type1, SqlTypeName type2) { + int rank1 = getNumericTypeRankForUnion(type1); + int rank2 = getNumericTypeRankForUnion(type2); + return rank1 >= rank2 ? type1 : type2; + } + + private static int getNumericTypeRankForUnion(SqlTypeName typeName) { + return switch (typeName) { + case TINYINT -> 1; + case SMALLINT -> 2; + case INTEGER -> 3; + case BIGINT -> 4; + case DECIMAL -> 5; + case REAL -> 6; + case FLOAT -> 7; + case DOUBLE -> 8; + default -> 0; + }; + } + + private static SqlTypeName getWiderTemporalTypeForUnion(SqlTypeName type1, SqlTypeName type2) { + if (type1 == SqlTypeName.TIMESTAMP || type2 == SqlTypeName.TIMESTAMP) { + return SqlTypeName.TIMESTAMP; + } + if (type1 == SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE + || type2 == SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE) { + return SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE; + } + return SqlTypeName.DATE; + } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/Dedup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/Dedup.java index 6d593787eb5..f30678b5531 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/Dedup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/Dedup.java @@ -6,10 +6,12 @@ package org.opensearch.sql.calcite.plan.rel; import java.util.List; +import javax.annotation.Nullable; import lombok.Getter; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.SingleRel; @@ -23,8 +25,23 @@ public abstract class Dedup extends SingleRel { final Integer allowedDuplication; final Boolean keepEmpty; final Boolean consecutive; + final @Nullable RelCollation inputCollation; + + /** + * Field names of the row type that {@link #inputCollation} was captured against. Used as a + * name-based anchor so callers can resolve the collation's stale indices after a planner rule has + * narrowed or replaced the dedup's input (typically a scan absorbing a narrowing project). + * + *

    Renames are handled by Calcite's own {@code Project.getMapping} propagation when a {@code + * Project} sits between dedup's old and new input — see {@code Dedup.copy}. This name list is + * only the fallback for cases where the replacement is not a {@code Project} (e.g. a scan that + * swaps in a narrower row type without a {@code Project} RelNode). Scans don't rename, so name + * equality is a stable identifier for that specific fallback. + * + *

    {@code null} iff {@link #inputCollation} is {@code null}. + */ + final @Nullable List inputCollationFieldNames; - /** */ protected Dedup( RelOptCluster cluster, RelTraitSet traitSet, @@ -32,7 +49,9 @@ protected Dedup( List dedupeFields, Integer allowedDuplication, Boolean keepEmpty, - Boolean consecutive) { + Boolean consecutive, + @Nullable RelCollation inputCollation, + @Nullable List inputCollationFieldNames) { super(cluster, traitSet, input); if (allowedDuplication <= 0) { throw new IllegalArgumentException("Number of duplicate events must be greater than 0"); @@ -44,6 +63,8 @@ protected Dedup( this.allowedDuplication = allowedDuplication; this.keepEmpty = keepEmpty; this.consecutive = consecutive; + this.inputCollation = inputCollation; + this.inputCollationFieldNames = inputCollationFieldNames; } @Override @@ -54,7 +75,9 @@ public final RelNode copy(RelTraitSet traitSet, List inputs) { this.dedupeFields, this.allowedDuplication, this.keepEmpty, - this.consecutive); + this.consecutive, + this.inputCollation, + this.inputCollationFieldNames); } public abstract Dedup copy( @@ -63,7 +86,9 @@ public abstract Dedup copy( List dedupeFields, Integer allowedDuplication, Boolean keepEmpty, - Boolean consecutive); + Boolean consecutive, + @Nullable RelCollation inputCollation, + @Nullable List inputCollationFieldNames); public Dedup copy(RelNode input, List dedupeFields) { return this.copy( @@ -72,7 +97,9 @@ public Dedup copy(RelNode input, List dedupeFields) { dedupeFields, this.allowedDuplication, this.keepEmpty, - this.consecutive); + this.consecutive, + this.inputCollation, + this.inputCollationFieldNames); } @Override @@ -81,7 +108,8 @@ public RelWriter explainTerms(RelWriter pw) { .item("dedup_fields", dedupeFields) .item("allowed_dedup", allowedDuplication) .item("keepEmpty", keepEmpty) - .item("consecutive", consecutive); + .item("consecutive", consecutive) + .itemIf("inputCollation", inputCollation, inputCollation != null); } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalDedup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalDedup.java index 2a8eb5038d6..8d1d60bb783 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalDedup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalDedup.java @@ -8,10 +8,12 @@ import static org.opensearch.sql.calcite.plan.rule.PPLDedupConvertRule.DEDUP_CONVERT_RULE; import java.util.List; +import javax.annotation.Nullable; import org.apache.calcite.plan.Convention; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rex.RexNode; @@ -24,8 +26,19 @@ protected LogicalDedup( List dedupeFields, Integer allowedDuplication, Boolean keepEmpty, - Boolean consecutive) { - super(cluster, traitSet, input, dedupeFields, allowedDuplication, keepEmpty, consecutive); + Boolean consecutive, + @Nullable RelCollation inputCollation, + @Nullable List inputCollationFieldNames) { + super( + cluster, + traitSet, + input, + dedupeFields, + allowedDuplication, + keepEmpty, + consecutive, + inputCollation, + inputCollationFieldNames); } @Override @@ -35,10 +48,20 @@ public Dedup copy( List dedupeFields, Integer allowedDuplication, Boolean keepEmpty, - Boolean consecutive) { + Boolean consecutive, + @Nullable RelCollation inputCollation, + @Nullable List inputCollationFieldNames) { assert traitSet.containsIfApplicable(Convention.NONE); return new LogicalDedup( - getCluster(), traitSet, input, dedupeFields, allowedDuplication, keepEmpty, consecutive); + getCluster(), + traitSet, + input, + dedupeFields, + allowedDuplication, + keepEmpty, + consecutive, + inputCollation, + inputCollationFieldNames); } public static LogicalDedup create( @@ -47,10 +70,33 @@ public static LogicalDedup create( Integer allowedDuplication, Boolean keepEmpty, Boolean consecutive) { + return create(input, dedupeFields, allowedDuplication, keepEmpty, consecutive, null); + } + + public static LogicalDedup create( + RelNode input, + List dedupeFields, + Integer allowedDuplication, + Boolean keepEmpty, + Boolean consecutive, + @Nullable RelCollation inputCollation) { + // Record the field names from the current input's row type so callers that encounter a stale + // collation (after a planner rule has swapped in a different, non-Project-derived input) can + // still resolve the sort keys to positions in the new input by name. See + // Dedup.inputCollationFieldNames. + List fieldNames = inputCollation == null ? null : input.getRowType().getFieldNames(); final RelOptCluster cluster = input.getCluster(); RelTraitSet traitSet = cluster.traitSetOf(Convention.NONE); return new LogicalDedup( - cluster, traitSet, input, dedupeFields, allowedDuplication, keepEmpty, consecutive); + cluster, + traitSet, + input, + dedupeFields, + allowedDuplication, + keepEmpty, + consecutive, + inputCollation, + fieldNames); } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLDedupConvertRule.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLDedupConvertRule.java index c1b452a2ac0..39bd243ea5d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLDedupConvertRule.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLDedupConvertRule.java @@ -7,10 +7,15 @@ import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_DEDUP; +import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; +import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelRule; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexWindowBounds; import org.apache.calcite.sql.fun.SqlStdOperatorTable; @@ -47,28 +52,85 @@ public void onMatch(RelOptRuleCall call) { final LogicalDedup dedup = call.rel(0); RelBuilder relBuilder = call.builder(); relBuilder.push(dedup.getInput()); + RelCollation inputCollation = + resolveCollationToCurrentInput( + dedup.getInputCollation(), + dedup.getInputCollationFieldNames(), + dedup.getInput().getRowType().getFieldNames()); if (dedup.getKeepEmpty()) { - buildDedupOrNull(relBuilder, dedup.getDedupeFields(), dedup.getAllowedDuplication()); + buildDedupOrNull( + relBuilder, dedup.getDedupeFields(), dedup.getAllowedDuplication(), inputCollation); } else { - buildDedupNotNull(relBuilder, dedup.getDedupeFields(), dedup.getAllowedDuplication()); + buildDedupNotNull( + relBuilder, dedup.getDedupeFields(), dedup.getAllowedDuplication(), inputCollation); } call.transformTo(relBuilder.build()); } + /** + * Resolve {@code collation}'s indices against {@code currentNames} (dedup's current input row + * type). If the indices are still valid against {@code currentNames}, return {@code collation} + * unchanged. Otherwise, look each collation field up by name in {@code originalNames} (the row + * type captured at LogicalDedup creation time) and find its position in {@code currentNames}; if + * any field is no longer present, drop that key. + */ + private static @Nullable RelCollation resolveCollationToCurrentInput( + @Nullable RelCollation collation, + @Nullable List originalNames, + List currentNames) { + if (collation == null || collation.getFieldCollations().isEmpty()) { + return collation; + } + int currentSize = currentNames.size(); + int maxIdx = -1; + for (RelFieldCollation fc : collation.getFieldCollations()) { + maxIdx = Math.max(maxIdx, fc.getFieldIndex()); + } + if (maxIdx < currentSize) { + // Collation is already in the current input's index space — nothing to do. + return collation; + } + if (originalNames == null) { + return null; + } + List remapped = new ArrayList<>(); + for (RelFieldCollation fc : collation.getFieldCollations()) { + int oldIdx = fc.getFieldIndex(); + if (oldIdx < 0 || oldIdx >= originalNames.size()) { + continue; + } + int newIdx = currentNames.indexOf(originalNames.get(oldIdx)); + if (newIdx < 0) { + continue; + } + remapped.add(fc.withFieldIndex(newIdx)); + } + if (remapped.isEmpty()) { + return null; + } + return RelCollations.of(remapped); + } + public static void buildDedupOrNull( - RelBuilder relBuilder, List dedupeFields, Integer allowedDuplication) { + RelBuilder relBuilder, + List dedupeFields, + Integer allowedDuplication, + RelCollation inputCollation) { /* * | dedup 2 a, b keepempty=true - * LogicalProject(...) - * +- LogicalFilter(condition=[OR(IS NULL(a), IS NULL(b), <=(_row_number_dedup_, 1))]) - * +- LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY a, b ORDER BY a, b)]) - * +- ... + * LogicalSort(...) -- re-sort to restore input order + * +- LogicalProject(...) + * +- LogicalFilter(condition=[OR(IS NULL(a), IS NULL(b), <=(_row_number_dedup_, 1))]) + * +- LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY a, b)]) + * +- ... (input with Sort stripped) */ + List orderKeys = collationToOrderKeys(relBuilder, inputCollation); RexNode rowNumber = relBuilder .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) .over() .partitionBy(dedupeFields) + .orderBy(orderKeys) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(ROW_NUMBER_COLUMN_FOR_DEDUP); relBuilder.projectPlus(rowNumber); @@ -82,31 +144,36 @@ public static void buildDedupOrNull( _row_number_dedup_, relBuilder.literal(allowedDuplication)))); // DropColumns('_row_number_dedup_) relBuilder.projectExcept(_row_number_dedup_); + // Re-sort to restore the input order that was stripped before the window + restoreInputOrder(relBuilder, inputCollation); } public static void buildDedupNotNull( - RelBuilder relBuilder, List dedupeFields, Integer allowedDuplication) { + RelBuilder relBuilder, + List dedupeFields, + Integer allowedDuplication, + RelCollation inputCollation) { /* * | dedup 2 a, b keepempty=false - * LogicalProject(...) - * +- LogicalFilter(condition=[<=(_row_number_dedup_, n)])) - * +- LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY a, b ORDER BY a, b)]) - * +- LogicalFilter(condition=[AND(IS NOT NULL(a), IS NOT NULL(b))]) - * +- ... + * LogicalSort(...) -- re-sort to restore input order + * +- LogicalProject(...) + * +- LogicalFilter(condition=[<=(_row_number_dedup_, n)])) + * +- LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY a, b)]) + * +- LogicalFilter(condition=[AND(IS NOT NULL(a), IS NOT NULL(b))]) + * +- ... (input with Sort stripped) */ + List orderKeys = collationToOrderKeys(relBuilder, inputCollation); // Filter (isnotnull('a) AND isnotnull('b)) String rowNumberAlias = ROW_NUMBER_COLUMN_FOR_DEDUP; relBuilder.filter( relBuilder.and( dedupeFields.stream().map(relBuilder::isNotNull).collect(Collectors.toList()))); - // Window [row_number() windowspecdefinition('a, 'b, 'a ASC NULLS FIRST, 'b ASC NULLS FIRST, - // specifiedwindowoundedpreceding$(), currentrow$())) AS _row_number_dedup_], ['a, 'b], ['a ASC - // NULLS FIRST, 'b ASC NULLS FIRST] RexNode rowNumber = relBuilder .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) .over() .partitionBy(dedupeFields) + .orderBy(orderKeys) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(rowNumberAlias); relBuilder.projectPlus(rowNumber); @@ -116,6 +183,44 @@ public static void buildDedupNotNull( relBuilder.lessThanOrEqual(rowNumberField, relBuilder.literal(allowedDuplication))); // DropColumns('_row_number_dedup_) relBuilder.projectExcept(rowNumberField); + // Re-sort to restore the input order that was stripped before the window + restoreInputOrder(relBuilder, inputCollation); + } + + /** + * Convert a RelCollation to a list of RexNode order keys using the RelBuilder's field references. + */ + private static List collationToOrderKeys(RelBuilder relBuilder, RelCollation collation) { + if (collation == null || collation.getFieldCollations().isEmpty()) { + return List.of(); + } + List orderKeys = new ArrayList<>(); + for (RelFieldCollation fieldCollation : collation.getFieldCollations()) { + RexNode fieldRef = relBuilder.field(fieldCollation.getFieldIndex()); + if (fieldCollation.direction.isDescending()) { + fieldRef = relBuilder.desc(fieldRef); + } + if (fieldCollation.nullDirection == RelFieldCollation.NullDirection.LAST) { + fieldRef = relBuilder.nullsLast(fieldRef); + } else if (fieldCollation.nullDirection == RelFieldCollation.NullDirection.FIRST) { + fieldRef = relBuilder.nullsFirst(fieldRef); + } + orderKeys.add(fieldRef); + } + return orderKeys; + } + + /** + * Re-apply a sort after dedup to restore the input order that may have been disrupted by the + * window operator. EnumerableWindow can re-partition data by the PARTITION BY key, destroying any + * upstream sort order. This explicit re-sort ensures the final output preserves the original + * order. + */ + private static void restoreInputOrder(RelBuilder relBuilder, RelCollation inputCollation) { + if (inputCollation != null && !inputCollation.getFieldCollations().isEmpty()) { + List sortKeys = collationToOrderKeys(relBuilder, inputCollation); + relBuilder.sort(sortKeys); + } } /** Rule configuration. */ diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java index 054141371b9..11eabfd483c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java @@ -5,14 +5,21 @@ package org.opensearch.sql.calcite.plan.rule; +import java.util.ArrayList; import java.util.List; import java.util.function.Predicate; import java.util.stream.Collectors; +import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelRule; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexFieldCollation; +import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexWindow; @@ -106,6 +113,8 @@ protected void apply( return; } + RelCollation inputCollation = extractCollationFromWindow(windows.get(0)); + RelBuilder relBuilder = call.builder(); relBuilder.push(bucketNonNullFilter.getInput()); List> targetProjections = @@ -117,13 +126,33 @@ protected void apply( targetProjections.stream().map(Pair::getValue).collect(Collectors.toList())); LogicalDedup dedup = - LogicalDedup.create(relBuilder.build(), dedupColumns, dedupNumber, false, false); + LogicalDedup.create( + relBuilder.build(), dedupColumns, dedupNumber, false, false, inputCollation); relBuilder.push(dedup); relBuilder.project(finalProject.getProjects(), finalProject.getRowType().getFieldNames()); call.transformTo(relBuilder.build()); } + private static @Nullable RelCollation extractCollationFromWindow(RexWindow window) { + if (window.orderKeys.isEmpty()) { + return null; + } + List fieldCollations = new ArrayList<>(); + for (RexFieldCollation rfc : window.orderKeys) { + if (!(rfc.left instanceof RexInputRef ref)) { + return null; + } + fieldCollations.add( + new RelFieldCollation(ref.getIndex(), rfc.getDirection(), rfc.getNullDirection())); + } + RelCollation collation = RelCollations.of(fieldCollations); + if (collation.equals(RelCollations.EMPTY)) { + return null; + } + return collation; + } + /** Rule configuration. */ @Value.Immutable public interface Config extends OpenSearchRuleConfig { diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java index a6d57ea01f6..54b9d4ffbaf 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java @@ -63,6 +63,7 @@ import org.apache.calcite.plan.RelOptSchema; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptTable.ViewExpander; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgram; import org.apache.calcite.plan.hep.HepProgramBuilder; @@ -74,6 +75,7 @@ import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.hint.HintStrategyTable; import org.apache.calcite.rel.logical.LogicalTableScan; import org.apache.calcite.rel.rules.FilterMergeRule; import org.apache.calcite.rel.type.RelDataType; @@ -104,6 +106,8 @@ import org.opensearch.sql.calcite.plan.rule.OpenSearchRules; import org.opensearch.sql.calcite.plan.rule.PPLSimplifyDedupRule; import org.opensearch.sql.calcite.profile.PlanProfileBuilder; +import org.opensearch.sql.common.error.ErrorCode; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.expression.function.PPLBuiltinOperators; import org.opensearch.sql.monitor.profile.ProfileContext; import org.opensearch.sql.monitor.profile.ProfileMetric; @@ -367,6 +371,36 @@ protected SqlToRelConverter getSqlToRelConverter( return new OpenSearchSqlToRelConverter( this, validator, catalogReader, this.cluster, convertletTable, config); } + + @Override + protected RelRoot trimUnusedFields(RelRoot root) { + final SqlToRelConverter.Config config = + SqlToRelConverter.config() + .withTrimUnusedFields(shouldTrim(root.rel)) + .withExpand(THREAD_EXPAND.get()) + .withInSubQueryThreshold(requireNonNull(THREAD_INSUBQUERY_THRESHOLD.get())); + // PPL analyzes into a pre-built RelNode before prepareStatement(rel). Reuse the incoming + // RelNode's cluster here so prepare-time trimming does not create replacement nodes under a + // different planner than the rest of the tree. + final SqlToRelConverter converter = + new OpenSearchSqlToRelConverter( + this, + getSqlValidator(), + catalogReader, + root.rel.getCluster(), + convertletTable, + config); + final boolean ordered = !root.collation.getFieldCollations().isEmpty(); + final boolean dml = SqlKind.DML.contains(root.kind); + return root.withRel(converter.trimUnusedFields(dml || ordered, root.rel)); + } + + private static boolean shouldTrim(RelNode rootRel) { + // For now, don't trim if there are more than 3 joins. The projects + // near the leaves created by trim migrate past joins and seem to + // prevent join-reordering. + return THREAD_TRIM.get() || RelOptUtil.countJoins(rootRel) < 2; + } } public static class OpenSearchSqlToRelConverter extends SqlToRelConverter { @@ -379,25 +413,102 @@ public OpenSearchSqlToRelConverter( RelOptCluster cluster, SqlRexConvertletTable convertletTable, Config config) { - super(viewExpander, validator, catalogReader, cluster, convertletTable, config); + this( + viewExpander, + validator, + catalogReader, + cluster, + convertletTable, + preserveHintStrategies(cluster, config), + true); + } + + private OpenSearchSqlToRelConverter( + ViewExpander viewExpander, + @Nullable SqlValidator validator, + CatalogReader catalogReader, + RelOptCluster cluster, + SqlRexConvertletTable convertletTable, + Config effectiveConfig, + boolean ignored) { + super(viewExpander, validator, catalogReader, cluster, convertletTable, effectiveConfig); this.relBuilder = - config + effectiveConfig .getRelBuilderFactory() .create( cluster, validator != null ? validator.getCatalogReader().unwrap(RelOptSchema.class) : null) - .transform(config.getRelBuilderConfigTransform()); + .transform(effectiveConfig.getRelBuilderConfigTransform()); } @Override protected RelFieldTrimmer newFieldTrimmer() { return new OpenSearchRelFieldTrimmer(validator, this.relBuilder); } + + // SqlToRelConverter always installs the hint strategy table from its config onto the cluster. + // When prepare-time trimming reuses an incoming RelNode cluster, preserve any PPL-specific + // aggregate hint strategies that were already registered during analysis. + private static Config preserveHintStrategies(RelOptCluster cluster, Config config) { + if (config.getHintStrategyTable() == HintStrategyTable.EMPTY + && cluster.getHintStrategies() != HintStrategyTable.EMPTY) { + return config.withHintStrategyTable(cluster.getHintStrategies()); + } + return config; + } } public static class OpenSearchRelRunners { + private static boolean isNonPushdownEnumerableAggregate(String message) { + return message.contains("Error while preparing plan") + && message.contains("CalciteEnumerableNestedAggregate"); + } + + // Detect if error is due to window functions in unsupported context (bins on time fields) + private static boolean isWindowBinOnTimeField(SQLException e) { + String errorMsg = e.getMessage(); + return errorMsg != null + && errorMsg.contains("Error while preparing plan") + && errorMsg.contains("WIDTH_BUCKET"); + } + + // Traverse Calcite SQL exceptions in search of the root cause, since Calcite's outer error + // messages aren't really usable for users + private static String rootCauseMessage(Throwable e) { + String rc = null; + if (e.getCause() != null) { + rc = rootCauseMessage(e.getCause()); + } + for (int i = 0; rc == null && i < e.getSuppressed().length; i++) { + rc = rootCauseMessage(e.getSuppressed()[i]); + } + return rc != null ? rc : e.getMessage(); + } + + private static void enrichErrorsForSpecialCases(ErrorReport.Builder report, SQLException e) { + if (e.getMessage().contains("Error while preparing plan [") && e.getCause() != null) { + // Generic 'something went wrong' planning error, try to get the cause + int planStart = e.getMessage().indexOf('['); + int planEnd = e.getMessage().lastIndexOf(']'); + report + .context("plan", e.getMessage().substring(planStart + 1, planEnd)) + .details(rootCauseMessage(e)); + } + if (isWindowBinOnTimeField(e)) { + report + .details( + "The 'bins' parameter on timestamp fields requires: (1) pushdown to be enabled" + + " (controlled by plugins.calcite.pushdown.enabled, enabled by default), and" + + " (2) the timestamp field to be used as an aggregation bucket (e.g., 'stats" + + " count() by @timestamp').") + .code(ErrorCode.UNSUPPORTED_OPERATION) + .context("is_window_bin_on_time_field", true) + .suggestion("check pushdown is enabled and review the aggregation"); + } + } + /** * Runs a relational expression by existing connection. This class copied from {@link * org.apache.calcite.tools.RelRunners#run(RelNode)} @@ -430,17 +541,12 @@ public RelNode visit(TableScan scan) { return preparedStatement; } catch (SQLException e) { // Detect if error is due to window functions in unsupported context (bins on time fields) - String errorMsg = e.getMessage(); - if (errorMsg != null - && errorMsg.contains("Error while preparing plan") - && errorMsg.contains("WIDTH_BUCKET")) { - throw new UnsupportedOperationException( - "The 'bins' parameter on timestamp fields requires: (1) pushdown to be enabled" - + " (controlled by plugins.calcite.pushdown.enabled, enabled by default), and" - + " (2) the timestamp field to be used as an aggregation bucket (e.g., 'stats" - + " count() by @timestamp')."); - } - throw Util.throwAsRuntime(e); + ErrorReport.Builder report = + ErrorReport.wrap(e) + .location("while compiling the optimized query plan for physical execution") + .code(ErrorCode.PLANNING_ERROR); + enrichErrorsForSpecialCases(report, e); + throw report.build(); } } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java index 915c45e7083..0326d3ee61d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java @@ -6,8 +6,14 @@ package org.opensearch.sql.calcite.utils; import com.google.common.base.Suppliers; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Objects; import java.util.function.Supplier; import lombok.experimental.UtilityClass; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.hint.HintStrategyTable; import org.apache.calcite.rel.hint.RelHint; @@ -20,6 +26,17 @@ public class PPLHintUtils { private static final String KEY_IGNORE_NULL_BUCKET = "ignoreNullBucket"; private static final String KEY_HAS_NESTED_AGG_CALL = "hasNestedAggCall"; + /** + * Encoded list of dedup sort keys, one per field, in pipe-separated {@code field:ORDER} form, + * e.g. {@code "gender:ASC|state:DESC"}. Each entry preserves the sort order from the original PPL + * {@code sort} collation so the pushed-down {@code top_hits} can emit a full multi-field sort + * array instead of only the first field. + */ + private static final String KEY_DEDUP_SORT_FIELDS = "dedupSortFields"; + + private static final String DEDUP_SORT_ENTRY_SEP = "|"; + private static final String DEDUP_SORT_FIELD_ORDER_SEP = ":"; + private static final Supplier HINT_STRATEGY_TABLE = Suppliers.memoize( () -> @@ -81,4 +98,75 @@ public static boolean hasNestedAggCall(Aggregate aggregate) { .getOrDefault(KEY_HAS_NESTED_AGG_CALL, "false") .equals("true")); } + + /** + * Add dedup sort info hint to aggregate so that AggregateAnalyzer can set top_hits sort. All + * field collations are propagated so a multi-field PPL {@code sort} ({@code sort state, -city | + * dedup ...}) is pushed down as a multi-field {@code top_hits} sort. + */ + public static void addDedupSortHintToAggregate( + RelBuilder relBuilder, RelCollation collation, java.util.List fieldNames) { + assert relBuilder.peek() instanceof LogicalAggregate + : "Hint HINT_AGG_ARGUMENTS can be added to LogicalAggregate only"; + String encoded = encodeDedupSortFields(collation, fieldNames); + if (encoded.isEmpty()) { + return; + } + final RelHint sortHint = + RelHint.builder(HINT_AGG_ARGUMENTS).hintOption(KEY_DEDUP_SORT_FIELDS, encoded).build(); + relBuilder.hints(sortHint); + if (relBuilder.getCluster().getHintStrategies() == HintStrategyTable.EMPTY) { + relBuilder.getCluster().setHintStrategies(HINT_STRATEGY_TABLE.get()); + } + } + + /** A single (field, order) entry from the dedup sort hint. */ + public record DedupSortKey(String field, String order) {} + + /** + * Return the dedup sort keys from aggregate hints, preserving the order from the original PPL + * {@code sort}. Empty list if not present. + */ + public static List getDedupSortKeys(Aggregate aggregate) { + return aggregate.getHints().stream() + .filter(hint -> hint.hintName.equals(HINT_AGG_ARGUMENTS)) + .map(hint -> hint.kvOptions.get(KEY_DEDUP_SORT_FIELDS)) + .filter(Objects::nonNull) + .findFirst() + .map(PPLHintUtils::decodeDedupSortFields) + .orElse(Collections.emptyList()); + } + + private static String encodeDedupSortFields(RelCollation collation, List fieldNames) { + StringBuilder sb = new StringBuilder(); + for (RelFieldCollation fc : collation.getFieldCollations()) { + int idx = fc.getFieldIndex(); + if (idx < 0 || idx >= fieldNames.size()) { + throw new IllegalStateException( + "Dedup sort collation index " + idx + " out of range for scan fields " + fieldNames); + } + if (sb.length() > 0) { + sb.append(DEDUP_SORT_ENTRY_SEP); + } + sb.append(fieldNames.get(idx)) + .append(DEDUP_SORT_FIELD_ORDER_SEP) + .append(fc.direction.isDescending() ? "DESC" : "ASC"); + } + return sb.toString(); + } + + private static List decodeDedupSortFields(String encoded) { + if (encoded == null || encoded.isEmpty()) { + return Collections.emptyList(); + } + List keys = new ArrayList<>(); + for (String entry : encoded.split("\\" + DEDUP_SORT_ENTRY_SEP)) { + int sep = entry.lastIndexOf(DEDUP_SORT_FIELD_ORDER_SEP); + if (sep <= 0 || sep == entry.length() - 1) { + continue; + } + keys.add(new DedupSortKey(entry.substring(0, sep), entry.substring(sep + 1))); + } + return keys; + } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java index abf37e68392..fcd361ba229 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java @@ -84,6 +84,10 @@ private PPLOperandTypes() {} UDFOperandMetadata.wrap( (CompositeOperandTypeChecker) OperandTypes.ANY.or(OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.INTEGER))); + public static final UDFOperandMetadata ANY_OPTIONAL_STRING = + UDFOperandMetadata.wrap( + (CompositeOperandTypeChecker) + OperandTypes.ANY.or(OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.CHARACTER))); public static final UDFOperandMetadata ANY_OPTIONAL_TIMESTAMP = UDFOperandMetadata.wrap( (CompositeOperandTypeChecker) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index 39f3a6f2d05..4d2dae4bd60 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -28,6 +28,7 @@ import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.volcano.VolcanoPlanner; +import org.apache.calcite.rel.BiRel; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelFieldCollation; @@ -37,11 +38,14 @@ import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SetOp; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.core.Uncollect; import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexCorrelVariable; @@ -84,6 +88,7 @@ public interface PlanUtils { String ROW_NUMBER_COLUMN_FOR_STREAMSTATS = "__stream_seq__"; String ROW_NUMBER_COLUMN_FOR_CHART = "_row_number_chart_"; String ROW_NUMBER_COLUMN_FOR_TRANSPOSE = "_row_number_transpose_"; + String VALUE_COLUMN_FOR_TRANSPOSE = "_value_transpose_"; static SpanUnit intervalUnitToSpanUnit(IntervalUnit unit) { return switch (unit) { @@ -596,6 +601,113 @@ public Void visitCorrelVariable(RexCorrelVariable correlVar) { } } + /** + * Walk down the plan tree to find the first Sort node with non-empty collation. Stops at blocking + * operators that destroy ordering: + * + *

      + *
    • Aggregate - aggregation destroys input ordering + *
    • BiRel - covers Join, Correlate, and other binary relations + *
    • SetOp - covers Union, Intersect, Except + *
    • Uncollect - unnesting operation that may change ordering + *
    • Project with window functions (RexOver) - ordering determined by window's ORDER BY + *
    + * + * @param node the starting RelNode to backtrack from + * @return the collation found, or null if no sort or blocking operator encountered + */ + public static @Nullable RelCollation findInputCollation(RelNode node) { + while (node != null) { + if (node instanceof Aggregate + || node instanceof BiRel + || node instanceof SetOp + || node instanceof Uncollect) { + return null; + } + if (node instanceof LogicalProject && ((LogicalProject) node).containsOver()) { + return null; + } + if (node instanceof Sort sort) { + if (sort.getCollation() != null && !sort.getCollation().getFieldCollations().isEmpty()) { + return sort.getCollation(); + } + } + if (node.getInputs().isEmpty()) { + break; + } + node = node.getInput(0); + } + return null; + } + + /** + * Strip the Sort node from the input on the RelBuilder stack, returning its collation (remapped + * through any intermediate Projects). This is necessary because EnumerableWindow re-partitions + * data by PARTITION BY key, which can destroy input sort order. Calcite's metadata system + * (RelMdCollation) incorrectly propagates the input's collation through the Window, causing the + * optimizer to eliminate a post-dedup Sort as "redundant". By stripping the Sort before the + * window and re-adding it after, we break this incorrect metadata chain. + * + * @return the remapped collation of the stripped Sort, or null if no Sort was found or the sort + * field was projected away + */ + public static @Nullable RelCollation stripInputSort(RelBuilder relBuilder) { + RelNode input = relBuilder.peek(); + // First check whether a Sort exists in the (single-input) prefix of the subtree. If there is + // no Sort, there is nothing to strip and the index-space remapping below would be pointless. + if (findInputCollation(input) == null) { + return null; + } + // Ask Calcite's RelMdCollation for the subtree's output collation. This already accounts for + // intermediate Projects (they rewrite collation via a `Mappings.TargetMapping`), so we don't + // need to hand-roll an index remapper. + RelMetadataQuery mq = input.getCluster().getMetadataQuery(); + List collations = mq.collations(input); + RelCollation outputCollation = null; + if (collations != null) { + for (RelCollation c : collations) { + if (c != null && !c.getFieldCollations().isEmpty()) { + outputCollation = c; + break; + } + } + } + if (outputCollation == null) { + // Any collation field was projected away (or RelMdCollation couldn't propagate through the + // subtree). Leave the tree untouched and report no collation. + return null; + } + RelNode stripped = removeSortFromTree(input); + if (stripped != input) { + relBuilder.clear(); + relBuilder.push(stripped); + } + return outputCollation; + } + + /** + * Remove the first Sort node found in the tree, replacing it with its input. Only traverses + * through single-input operators (Filter, Project) that preserve order. + */ + private static RelNode removeSortFromTree(RelNode node) { + if (node instanceof Sort sort) { + if (sort.getCollation() != null + && !sort.getCollation().getFieldCollations().isEmpty() + && sort.fetch == null + && sort.offset == null) { + return sort.getInput(); + } + } + if (node.getInputs().size() == 1) { + RelNode child = node.getInput(0); + RelNode newChild = removeSortFromTree(child); + if (newChild != child) { + return node.copy(node.getTraitSet(), List.of(newChild)); + } + } + return node; + } + /** * Reverses the direction of a RelCollation. * diff --git a/core/src/main/java/org/opensearch/sql/executor/DelegatingExecutionEngine.java b/core/src/main/java/org/opensearch/sql/executor/DelegatingExecutionEngine.java new file mode 100644 index 00000000000..b38251233a0 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/executor/DelegatingExecutionEngine.java @@ -0,0 +1,81 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.executor; + +import java.util.List; +import java.util.Optional; +import lombok.RequiredArgsConstructor; +import lombok.extern.log4j.Log4j2; +import org.apache.calcite.rel.RelNode; +import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.common.response.ResponseListener; +import org.opensearch.sql.planner.physical.PhysicalPlan; + +/** + * An {@link ExecutionEngine} that delegates Calcite RelNode execution to the first extension whose + * {@link ExecutionEngine#canVectorize(RelNode)} returns {@code true}, falling back to the default + * engine otherwise. Non-Calcite ({@link PhysicalPlan}) methods and unmatched RelNode plans are + * forwarded to the default engine. + */ +@RequiredArgsConstructor +@Log4j2 +public class DelegatingExecutionEngine implements ExecutionEngine { + + private final ExecutionEngine defaultEngine; + private final List extensions; + + @Override + public void execute(PhysicalPlan plan, ResponseListener listener) { + defaultEngine.execute(plan, listener); + } + + @Override + public void execute( + PhysicalPlan plan, ExecutionContext context, ResponseListener listener) { + defaultEngine.execute(plan, context, listener); + } + + @Override + public void explain(PhysicalPlan plan, ResponseListener listener) { + defaultEngine.explain(plan, listener); + } + + @Override + public boolean canVectorize(RelNode plan) { + return findExtension(plan).isPresent(); + } + + @Override + public void execute( + RelNode plan, CalcitePlanContext context, ResponseListener listener) { + Optional ext = findExtension(plan); + if (ext.isPresent()) { + log.info("Routing query to extension engine : {}", ext.get().getClass().getSimpleName()); + ext.get().execute(plan, context, listener); + } else { + defaultEngine.execute(plan, context, listener); + } + } + + @Override + public void explain( + RelNode plan, + ExplainMode mode, + CalcitePlanContext context, + ResponseListener listener) { + Optional ext = findExtension(plan); + if (ext.isPresent()) { + ext.get().explain(plan, mode, context, listener); + } else { + defaultEngine.explain(plan, mode, context, listener); + } + } + + private Optional findExtension(RelNode plan) { + return extensions.stream().filter(ext -> ext.canVectorize(plan)).findFirst(); + } +} diff --git a/core/src/main/java/org/opensearch/sql/executor/ExecutionEngine.java b/core/src/main/java/org/opensearch/sql/executor/ExecutionEngine.java index e65db7b4065..da8eae41355 100644 --- a/core/src/main/java/org/opensearch/sql/executor/ExecutionEngine.java +++ b/core/src/main/java/org/opensearch/sql/executor/ExecutionEngine.java @@ -47,15 +47,32 @@ void execute( */ void explain(PhysicalPlan plan, ResponseListener listener); + /** + * Check if this engine supports vectorized execution of the given Calcite RelNode plan. + * Vectorized execution engines (e.g. Velox) override this to advertise support for specific plan + * shapes. The default returns {@code false}. + */ + default boolean canVectorize(RelNode plan) { + return false; + } + /** Execute calcite RelNode plan with {@link ExecutionContext} and call back response listener. */ default void execute( - RelNode plan, CalcitePlanContext context, ResponseListener listener) {} + RelNode plan, CalcitePlanContext context, ResponseListener listener) { + listener.onFailure( + new UnsupportedOperationException( + getClass().getSimpleName() + " does not support RelNode execution")); + } default void explain( RelNode plan, ExplainMode mode, CalcitePlanContext context, - ResponseListener listener) {} + ResponseListener listener) { + listener.onFailure( + new UnsupportedOperationException( + getClass().getSimpleName() + " does not support RelNode explain")); + } /** Data class that encapsulates ExprValue. */ @Data diff --git a/core/src/main/java/org/opensearch/sql/executor/QueryService.java b/core/src/main/java/org/opensearch/sql/executor/QueryService.java index d205505dcf8..fe9d3e55dc1 100644 --- a/core/src/main/java/org/opensearch/sql/executor/QueryService.java +++ b/core/src/main/java/org/opensearch/sql/executor/QueryService.java @@ -36,6 +36,9 @@ import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit; import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit.SystemLimitType; import org.opensearch.sql.calcite.utils.CalciteClassLoaderHelper; +import org.opensearch.sql.common.error.ErrorReport; +import org.opensearch.sql.common.error.QueryProcessingStage; +import org.opensearch.sql.common.error.StageErrorHandler; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.utils.QueryContext; @@ -145,11 +148,30 @@ public void executeWithCalcite( CalcitePlanContext context = CalcitePlanContext.create( buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); + context.setHighlightConfig(highlightConfig); - RelNode relNode = analyze(plan, context); - RelNode calcitePlan = convertToCalcitePlan(relNode, context); + + // Wrap analyze with ANALYZING stage tracking + RelNode relNode = + StageErrorHandler.executeStage( + QueryProcessingStage.ANALYZING, + () -> analyze(plan, context), + "while preparing and validating the query plan"); + + // Wrap plan conversion with PLAN_CONVERSION stage tracking + RelNode calcitePlan = + StageErrorHandler.executeStage( + QueryProcessingStage.PLAN_CONVERSION, + () -> convertToCalcitePlan(relNode, context), + "while converting the query to an executable plan"); + analyzeMetric.set(System.nanoTime() - analyzeStart); - executionEngine.execute(calcitePlan, context, listener); + + // Wrap execution with EXECUTING stage tracking + StageErrorHandler.executeStageVoid( + QueryProcessingStage.EXECUTING, + () -> executionEngine.execute(calcitePlan, context, listener), + "while running the query"); }, QueryService.class); } catch (Throwable t) { @@ -300,22 +322,31 @@ public PhysicalPlan plan(LogicalPlan plan) { return planner.plan(plan); } + private boolean isCalciteUnsupportedError(@Nullable Throwable t) { + return switch (t) { + case null -> false; + case CalciteUnsupportedException calciteUnsupportedException -> true; + case ErrorReport errorReport when t.getCause() instanceof CalciteUnsupportedException -> true; + default -> false; + }; + } + private boolean isCalciteFallbackAllowed(@Nullable Throwable t) { // We always allow fallback the query failed with CalciteUnsupportedException. // This is for avoiding breaking changes when enable Calcite by default. - if (t instanceof CalciteUnsupportedException) { + if (isCalciteUnsupportedError(t)) { return true; - } else { - if (settings != null) { - Boolean fallback_allowed = settings.getSettingValue(Settings.Key.CALCITE_FALLBACK_ALLOWED); - if (fallback_allowed == null) { - return false; - } - return fallback_allowed; - } else { - return true; + } + + if (settings != null) { + Boolean fallback_allowed = settings.getSettingValue(Settings.Key.CALCITE_FALLBACK_ALLOWED); + if (fallback_allowed == null) { + return false; } + return fallback_allowed; } + + return true; } private boolean isCalciteEnabled(Settings settings) { diff --git a/core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java b/core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java index f42d376f649..bd0796b05af 100644 --- a/core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java +++ b/core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java @@ -249,4 +249,40 @@ private static long extractFirstNDigits(double value, int digits) { return isNegative ? -result : result; } + + /** Mapping from strftime specifiers to Java DateTimeFormatter patterns for parsing. */ + private static final Map STRFTIME_TO_JAVA_PARSE = + ImmutableMap.builder() + .put("%Y", "yyyy") + .put("%y", "yy") + .put("%m", "MM") + .put("%B", "MMMM") + .put("%b", "MMM") + .put("%d", "dd") + .put("%H", "HH") + .put("%I", "hh") + .put("%M", "mm") + .put("%S", "ss") + .put("%p", "a") + .put("%T", "HH:mm:ss") + .put("%F", "yyyy-MM-dd") + .put("%%", "'%'") + .build(); + + /** + * Convert a strftime format string to a Java DateTimeFormatter pattern suitable for parsing. + * + * @param strftimeFormat the strftime-style format string (e.g. {@code %Y-%m-%d %H:%M:%S}) + * @return a Java DateTimeFormatter pattern (e.g. {@code yyyy-MM-dd HH:mm:ss}) + */ + public static String toJavaPattern(String strftimeFormat) { + Matcher m = Pattern.compile("%[A-Za-z%]").matcher(strftimeFormat); + StringBuilder sb = new StringBuilder(); + while (m.find()) { + String replacement = STRFTIME_TO_JAVA_PARSE.getOrDefault(m.group(), m.group()); + m.appendReplacement(sb, Matcher.quoteReplacement(replacement)); + } + m.appendTail(sb); + return sb.toString(); + } } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 2aebf7efe34..0a5b0fe0e03 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -63,8 +63,12 @@ import org.opensearch.sql.expression.function.jsonUDF.JsonKeysFunctionImpl; import org.opensearch.sql.expression.function.jsonUDF.JsonSetFunctionImpl; import org.opensearch.sql.expression.function.udf.AutoConvertFunction; +import org.opensearch.sql.expression.function.udf.CTimeConvertFunction; import org.opensearch.sql.expression.function.udf.CryptographicFunction; +import org.opensearch.sql.expression.function.udf.Dur2SecConvertFunction; import org.opensearch.sql.expression.function.udf.MemkConvertFunction; +import org.opensearch.sql.expression.function.udf.MkTimeConvertFunction; +import org.opensearch.sql.expression.function.udf.MsTimeConvertFunction; import org.opensearch.sql.expression.function.udf.NumConvertFunction; import org.opensearch.sql.expression.function.udf.ParseFunction; import org.opensearch.sql.expression.function.udf.RelevanceQueryFunction; @@ -431,6 +435,10 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { public static final SqlOperator RMCOMMA = new RmcommaConvertFunction().toUDF("RMCOMMA"); public static final SqlOperator RMUNIT = new RmunitConvertFunction().toUDF("RMUNIT"); public static final SqlOperator MEMK = new MemkConvertFunction().toUDF("MEMK"); + public static final SqlOperator CTIME = new CTimeConvertFunction().toUDF("CTIME"); + public static final SqlOperator MKTIME = new MkTimeConvertFunction().toUDF("MKTIME"); + public static final SqlOperator MSTIME = new MsTimeConvertFunction().toUDF("MSTIME"); + public static final SqlOperator DUR2SEC = new Dur2SecConvertFunction().toUDF("DUR2SEC"); public static final SqlOperator WIDTH_BUCKET = new org.opensearch.sql.expression.function.udf.binning.WidthBucketFunction() diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 30d7c055470..849c60fe4eb 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -39,6 +39,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.COT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.COUNT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CRC32; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CTIME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CURDATE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CURRENT_DATE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CURRENT_TIME; @@ -61,6 +62,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.DEGREES; import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDEFUNCTION; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DUR2SEC; import static org.opensearch.sql.expression.function.BuiltinFunctionName.E; import static org.opensearch.sql.expression.function.BuiltinFunctionName.EARLIEST; import static org.opensearch.sql.expression.function.BuiltinFunctionName.EQUAL; @@ -144,12 +146,14 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_DAY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_HOUR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MKTIME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MOD; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MODULUS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MODULUSFUNCTION; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTHNAME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTH_OF_YEAR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MSTIME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLYFUNCTION; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH; @@ -991,6 +995,10 @@ void populate() { registerOperator(RMCOMMA, PPLBuiltinOperators.RMCOMMA); registerOperator(RMUNIT, PPLBuiltinOperators.RMUNIT); registerOperator(MEMK, PPLBuiltinOperators.MEMK); + registerOperator(CTIME, PPLBuiltinOperators.CTIME); + registerOperator(MKTIME, PPLBuiltinOperators.MKTIME); + registerOperator(MSTIME, PPLBuiltinOperators.MSTIME); + registerOperator(DUR2SEC, PPLBuiltinOperators.DUR2SEC); register( TOSTRING, diff --git a/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonUtils.java b/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonUtils.java index da8dc2a2413..16727295fea 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonUtils.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonUtils.java @@ -23,6 +23,15 @@ public class JsonUtils { public static String convertToJsonPath(String input) { if (input == null || input.isEmpty()) return "$"; + // Strip leading "$." or "$" to avoid double-prefixing (issue #5167) + if (input.startsWith("$.")) { + input = input.substring(2); + } else if (input.startsWith("$")) { + input = input.substring(1); + } + + if (input.isEmpty()) return "$"; + StringBuilder sb = new StringBuilder("$."); int i = 0; while (i < input.length()) { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/CTimeConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/CTimeConvertFunction.java new file mode 100644 index 00000000000..6b507936348 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/CTimeConvertFunction.java @@ -0,0 +1,105 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import java.time.Instant; +import java.time.ZoneId; +import java.time.ZonedDateTime; +import java.util.List; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.opensearch.sql.calcite.utils.PPLOperandTypes; +import org.opensearch.sql.calcite.utils.PPLReturnTypes; +import org.opensearch.sql.expression.datetime.StrftimeFormatterUtil; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +/** + * PPL ctime() conversion function. Converts UNIX epoch timestamps to human-readable time strings + * using strftime format specifiers. Default format: {@code %m/%d/%Y %H:%M:%S}. + */ +public class CTimeConvertFunction extends ImplementorUDF { + + private static final String DEFAULT_FORMAT = "%m/%d/%Y %H:%M:%S"; + + public CTimeConvertFunction() { + super(new CTimeImplementor(), NullPolicy.ANY); + } + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return PPLReturnTypes.STRING_FORCE_NULLABLE; + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + return PPLOperandTypes.ANY_OPTIONAL_STRING; + } + + public static class CTimeImplementor implements NotNullImplementor { + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + if (translatedOperands.isEmpty()) { + return Expressions.constant(null, String.class); + } + Expression fieldValue = Expressions.box(translatedOperands.get(0)); + if (translatedOperands.size() == 1) { + return Expressions.call(CTimeConvertFunction.class, "convert", fieldValue); + } + Expression timeFormat = Expressions.box(translatedOperands.get(1)); + return Expressions.call( + CTimeConvertFunction.class, "convertWithFormat", fieldValue, timeFormat); + } + } + + public static String convert(Object value) { + return convertWithFormat(value, null); + } + + public static String convertWithFormat(Object value, Object timeFormatObj) { + Double timestamp = toEpochSeconds(value); + if (timestamp == null) { + return null; + } + String format = (timeFormatObj != null) ? timeFormatObj.toString().trim() : DEFAULT_FORMAT; + if (format.isEmpty()) { + return null; + } + try { + long seconds = timestamp.longValue(); + int nanos = (int) ((timestamp - seconds) * 1_000_000_000); + Instant instant = Instant.ofEpochSecond(seconds, nanos); + ZonedDateTime zdt = ZonedDateTime.ofInstant(instant, ZoneId.of("UTC")); + return StrftimeFormatterUtil.formatZonedDateTime(zdt, format).stringValue(); + } catch (Exception e) { + return null; + } + } + + public static Double toEpochSeconds(Object value) { + if (value == null) { + return null; + } + if (value instanceof Number) { + return ((Number) value).doubleValue(); + } + String str = value.toString().trim(); + if (str.isEmpty()) { + return null; + } + try { + return Double.parseDouble(str); + } catch (NumberFormatException e) { + return null; + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/Dur2SecConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/Dur2SecConvertFunction.java new file mode 100644 index 00000000000..78facf743be --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/Dur2SecConvertFunction.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** PPL dur2sec() conversion function. Converts duration format {@code [D+]HH:MM:SS} to seconds */ +public class Dur2SecConvertFunction extends BaseConversionUDF { + + public static final Dur2SecConvertFunction INSTANCE = new Dur2SecConvertFunction(); + + // Matches [D+]HH:MM:SS — optional days prefix with + separator + private static final Pattern DURATION_PATTERN = + Pattern.compile("^(?:(\\d+)\\+)?(\\d{1,2}):(\\d{1,2}):(\\d{1,2})$"); + + public Dur2SecConvertFunction() { + super(Dur2SecConvertFunction.class); + } + + public static Object convert(Object value) { + return INSTANCE.convertValue(value); + } + + @Override + protected Object applyConversion(String preprocessedValue) { + Double existingSeconds = tryParseDouble(preprocessedValue); + if (existingSeconds != null) { + return existingSeconds; + } + + Matcher matcher = DURATION_PATTERN.matcher(preprocessedValue); + if (!matcher.matches()) { + return null; + } + + try { + int days = matcher.group(1) != null ? Integer.parseInt(matcher.group(1)) : 0; + int hours = Integer.parseInt(matcher.group(2)); + int minutes = Integer.parseInt(matcher.group(3)); + int seconds = Integer.parseInt(matcher.group(4)); + + if (hours >= 24 || minutes >= 60 || seconds >= 60) { + return null; + } + + return (double) (days * 86400 + hours * 3600 + minutes * 60 + seconds); + } catch (NumberFormatException e) { + return null; + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/MkTimeConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/MkTimeConvertFunction.java new file mode 100644 index 00000000000..0127d63e9cd --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/MkTimeConvertFunction.java @@ -0,0 +1,106 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.List; +import java.util.Locale; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.calcite.utils.PPLOperandTypes; +import org.opensearch.sql.expression.datetime.StrftimeFormatterUtil; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +/** + * PPL mktime() conversion function. Parses a human-readable time string into UNIX epoch seconds + * using strftime format specifiers. Default format: {@code %m/%d/%Y %H:%M:%S}. + */ +public class MkTimeConvertFunction extends ImplementorUDF { + + public static final MkTimeConvertFunction INSTANCE = new MkTimeConvertFunction(); + + private static final String DEFAULT_FORMAT = "%m/%d/%Y %H:%M:%S"; + + public MkTimeConvertFunction() { + super(new MkTimeImplementor(), NullPolicy.ANY); + } + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return ReturnTypes.explicit( + factory -> + factory.createTypeWithNullability(factory.createSqlType(SqlTypeName.DOUBLE), true)); + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + return PPLOperandTypes.ANY_OPTIONAL_STRING; + } + + public static class MkTimeImplementor implements NotNullImplementor { + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + if (translatedOperands.isEmpty()) { + return Expressions.constant(null, Double.class); + } + Expression fieldValue = Expressions.box(translatedOperands.get(0)); + if (translatedOperands.size() == 1) { + return Expressions.call(MkTimeConvertFunction.class, "convert", fieldValue); + } + Expression timeFormat = Expressions.box(translatedOperands.get(1)); + return Expressions.call( + MkTimeConvertFunction.class, "convertWithFormat", fieldValue, timeFormat); + } + } + + public static Object convert(Object value) { + return convertWithFormat(value, null); + } + + public static Object convertWithFormat(Object value, Object timeFormatObj) { + Double numeric = CTimeConvertFunction.toEpochSeconds(value); + if (numeric != null) { + return numeric; + } + if (value == null) { + return null; + } + String str = value instanceof String ? ((String) value).trim() : value.toString().trim(); + if (str.isEmpty()) { + return null; + } + + String strftimeFormat = + (timeFormatObj != null) ? timeFormatObj.toString().trim() : DEFAULT_FORMAT; + if (strftimeFormat.isEmpty()) { + return null; + } + return parseWithFormat(str, strftimeFormat); + } + + private static Object parseWithFormat(String dateStr, String strftimeFormat) { + try { + String javaPattern = StrftimeFormatterUtil.toJavaPattern(strftimeFormat); + DateTimeFormatter formatter = DateTimeFormatter.ofPattern(javaPattern, Locale.ROOT); + LocalDateTime dateTime = LocalDateTime.parse(dateStr, formatter); + return (double) dateTime.toEpochSecond(ZoneOffset.UTC); + } catch (DateTimeParseException | IllegalArgumentException e) { + return null; + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/MsTimeConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/MsTimeConvertFunction.java new file mode 100644 index 00000000000..362896b06b9 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/MsTimeConvertFunction.java @@ -0,0 +1,66 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * PPL mstime() conversion function. Converts {@code [MM:]SS.SSS} format to seconds The minutes + * portion is optional. + */ +public class MsTimeConvertFunction extends BaseConversionUDF { + + public static final MsTimeConvertFunction INSTANCE = new MsTimeConvertFunction(); + + // Matches optional MM: prefix, required SS, optional .SSS + private static final Pattern MSTIME_PATTERN = + Pattern.compile("^(?:(\\d{1,2}):)?(\\d{1,2})(?:\\.(\\d{1,3}))?$"); + + public MsTimeConvertFunction() { + super(MsTimeConvertFunction.class); + } + + public static Object convert(Object value) { + return INSTANCE.convertValue(value); + } + + @Override + protected Object applyConversion(String preprocessedValue) { + Double existingSeconds = tryParseDouble(preprocessedValue); + if (existingSeconds != null) { + return existingSeconds; + } + + Matcher matcher = MSTIME_PATTERN.matcher(preprocessedValue); + if (!matcher.matches()) { + return null; + } + + try { + int minutes = matcher.group(1) != null ? Integer.parseInt(matcher.group(1)) : 0; + int seconds = Integer.parseInt(matcher.group(2)); + + if (seconds >= 60) { + return null; + } + + double millis = 0.0; + if (matcher.group(3) != null) { + String milliStr = matcher.group(3); + // Pad to 3 digits + while (milliStr.length() < 3) { + milliStr += "0"; + } + millis = Double.parseDouble(milliStr.substring(0, 3)) / 1000.0; + } + + return (double) (minutes * 60 + seconds) + millis; + } catch (NumberFormatException e) { + return null; + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java index 11e1a33afbd..fcc7d1a4640 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java @@ -14,6 +14,7 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeTransforms; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -43,7 +44,7 @@ public MinspanBucketFunction() { @Override public SqlReturnTypeInference getReturnTypeInference() { - return ReturnTypes.VARCHAR_2000; + return ReturnTypes.VARCHAR_2000.andThen(SqlTypeTransforms.FORCE_NULLABLE); } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java index a8f2625b20f..e0b10803ea4 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java @@ -14,6 +14,7 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeTransforms; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -47,7 +48,7 @@ public RangeBucketFunction() { @Override public SqlReturnTypeInference getReturnTypeInference() { - return ReturnTypes.VARCHAR_2000; + return ReturnTypes.VARCHAR_2000.andThen(SqlTypeTransforms.FORCE_NULLABLE); } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java index 6970e485525..8610eb8ee9c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java @@ -14,6 +14,7 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeTransforms; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -41,7 +42,7 @@ public SpanBucketFunction() { @Override public SqlReturnTypeInference getReturnTypeInference() { - return ReturnTypes.VARCHAR_2000; + return ReturnTypes.VARCHAR_2000.andThen(SqlTypeTransforms.FORCE_NULLABLE); } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java b/core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java index 7e194dfbf22..599f0cce410 100644 --- a/core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java +++ b/core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java @@ -13,6 +13,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import org.opensearch.sql.common.error.ErrorCode; +import org.opensearch.sql.common.error.ErrorReport; /** * Common utilities for regex operations. Provides pattern caching and consistent matching behavior. @@ -69,11 +71,15 @@ public static List getNamedGroupCandidates(String pattern) { String groupName = anyGroupMatcher.group(1); if (!isValidJavaRegexGroupName(groupName)) { - throw new IllegalArgumentException( - String.format( - "Invalid capture group name '%s'. Java regex group names must start with a letter" - + " and contain only letters and digits.", - groupName)); + throw ErrorReport.wrap( + new IllegalArgumentException( + String.format("Invalid capture group name '%s'.", groupName))) + .code(ErrorCode.SYNTAX_ERROR) + .location("while validating the capture groups for the pattern") + .suggestion( + "Java Regex capture groups must be alphanumeric and start with a letter. Update the" + + " capture group to be alphanumeric.") + .build(); } } diff --git a/core/src/main/java/org/opensearch/sql/planner/Planner.java b/core/src/main/java/org/opensearch/sql/planner/Planner.java index 4625d72d3fc..8a015bc072b 100644 --- a/core/src/main/java/org/opensearch/sql/planner/Planner.java +++ b/core/src/main/java/org/opensearch/sql/planner/Planner.java @@ -14,6 +14,7 @@ import org.opensearch.sql.planner.optimizer.LogicalPlanOptimizer; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.storage.Table; +import org.opensearch.sql.storage.read.TableScanBuilder; /** Planner that plans and chooses the optimal physical plan. */ @RequiredArgsConstructor @@ -34,7 +35,35 @@ public PhysicalPlan plan(LogicalPlan plan) { if (table == null) { return plan.accept(new DefaultImplementor<>(), null); } - return table.implement(table.optimize(optimize(plan))); + LogicalPlan optimized = table.optimize(optimize(plan)); + // Give scan builders a chance to reject shapes that push-down alone cannot express safely + // (e.g. operators that land above the scan but outside its push-down contract). + validateScanBuilders(optimized); + return table.implement(optimized); + } + + /** + * Walk the optimized plan and invoke {@link TableScanBuilder#validatePlan(LogicalPlan)} on every + * scan builder, passing the fully optimized root so scan builders can inspect their ancestors. + */ + private void validateScanBuilders(LogicalPlan optimized) { + optimized.accept( + new LogicalPlanNodeVisitor() { + @Override + public Void visitNode(LogicalPlan node, Object context) { + for (LogicalPlan child : node.getChild()) { + child.accept(this, context); + } + return null; + } + + @Override + public Void visitTableScanBuilder(TableScanBuilder node, Object context) { + node.validatePlan(optimized); + return null; + } + }, + null); } private Table findTable(LogicalPlan plan) { diff --git a/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java b/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java index b2da0b67a4b..3d2fb2872e5 100644 --- a/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java +++ b/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java @@ -119,6 +119,19 @@ public boolean pushDownPageSize(LogicalPaginate paginate) { return false; } + /** + * Post-optimization validation hook. Called once by the planner after all push-down rules have + * run, with the fully optimized plan root. Subclasses may inspect the ancestors of this scan + * builder to reject planner shapes that push-down alone cannot express safely (for example, + * operators that land above the scan but outside its push-down contract and would be executed + * after the scan has already returned a bounded result set). Default is no-op. + * + * @param root the fully optimized logical plan containing this scan builder + */ + public void validatePlan(LogicalPlan root) { + // no-op by default + } + @Override public R accept(LogicalPlanNodeVisitor visitor, C context) { return visitor.visitTableScanBuilder(this, context); diff --git a/core/src/test/java/org/opensearch/sql/executor/DelegatingExecutionEngineTest.java b/core/src/test/java/org/opensearch/sql/executor/DelegatingExecutionEngineTest.java new file mode 100644 index 00000000000..6e7c59d6ac6 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/executor/DelegatingExecutionEngineTest.java @@ -0,0 +1,164 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.executor; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.List; +import org.apache.calcite.rel.RelNode; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.common.response.ResponseListener; +import org.opensearch.sql.planner.physical.PhysicalPlan; + +@ExtendWith(MockitoExtension.class) +class DelegatingExecutionEngineTest { + + @Mock private ExecutionEngine defaultEngine; + + @Mock private ExecutionEngine extension1; + + @Mock private ExecutionEngine extension2; + + @Mock private RelNode relNode; + + @Mock private CalcitePlanContext calciteContext; + + @Mock private PhysicalPlan physicalPlan; + + @Mock private ExecutionContext executionContext; + + @Mock private ResponseListener queryListener; + + @Mock private ResponseListener explainListener; + + @Test + void executeRelNodeRoutesToMatchingExtension() { + when(extension1.canVectorize(relNode)).thenReturn(true); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1, extension2)); + + engine.execute(relNode, calciteContext, queryListener); + + verify(extension1).execute(relNode, calciteContext, queryListener); + verify(defaultEngine, never()).execute(any(RelNode.class), any(), eq(queryListener)); + } + + @Test + void executeRelNodeFallsBackToDefaultWhenNoExtensionMatches() { + when(extension1.canVectorize(relNode)).thenReturn(false); + when(extension2.canVectorize(relNode)).thenReturn(false); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1, extension2)); + + engine.execute(relNode, calciteContext, queryListener); + + verify(defaultEngine).execute(relNode, calciteContext, queryListener); + verify(extension1, never()).execute(any(RelNode.class), any(), eq(queryListener)); + verify(extension2, never()).execute(any(RelNode.class), any(), eq(queryListener)); + } + + @Test + void executeRelNodeRoutesToFirstMatchingExtension() { + when(extension1.canVectorize(relNode)).thenReturn(true); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1, extension2)); + + engine.execute(relNode, calciteContext, queryListener); + + verify(extension1).execute(relNode, calciteContext, queryListener); + verify(extension2, never()).execute(any(RelNode.class), any(), eq(queryListener)); + } + + @Test + void explainRelNodeRoutesToMatchingExtension() { + when(extension1.canVectorize(relNode)).thenReturn(true); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1)); + + engine.explain(relNode, ExplainMode.STANDARD, calciteContext, explainListener); + + verify(extension1).explain(relNode, ExplainMode.STANDARD, calciteContext, explainListener); + verify(defaultEngine, never()).explain(any(RelNode.class), any(), any(), eq(explainListener)); + } + + @Test + void explainRelNodeFallsBackToDefaultWhenNoExtensionMatches() { + when(extension1.canVectorize(relNode)).thenReturn(false); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1)); + + engine.explain(relNode, ExplainMode.STANDARD, calciteContext, explainListener); + + verify(defaultEngine).explain(relNode, ExplainMode.STANDARD, calciteContext, explainListener); + } + + @Test + void canVectorizeReturnsTrueWhenExtensionMatches() { + when(extension1.canVectorize(relNode)).thenReturn(false); + when(extension2.canVectorize(relNode)).thenReturn(true); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1, extension2)); + + assert engine.canVectorize(relNode); + } + + @Test + void canVectorizeReturnsFalseWhenNoExtensionMatches() { + when(extension1.canVectorize(relNode)).thenReturn(false); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1)); + + assert !engine.canVectorize(relNode); + } + + @Test + void physicalPlanExecuteDelegatesToDefault() { + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1)); + + engine.execute(physicalPlan, queryListener); + + verify(defaultEngine).execute(physicalPlan, queryListener); + } + + @Test + void physicalPlanExecuteWithContextDelegatesToDefault() { + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1)); + + engine.execute(physicalPlan, executionContext, queryListener); + + verify(defaultEngine).execute(physicalPlan, executionContext, queryListener); + } + + @Test + void physicalPlanExplainDelegatesToDefault() { + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1)); + + engine.explain(physicalPlan, explainListener); + + verify(defaultEngine).explain(physicalPlan, explainListener); + } + + @Test + void emptyExtensionsListAlwaysFallsBackToDefault() { + DelegatingExecutionEngine engine = new DelegatingExecutionEngine(defaultEngine, List.of()); + + engine.execute(relNode, calciteContext, queryListener); + + verify(defaultEngine).execute(relNode, calciteContext, queryListener); + } +} diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ConversionFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ConversionFunctionsTest.java index 163d6508445..490f72ba346 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/udf/ConversionFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ConversionFunctionsTest.java @@ -336,4 +336,132 @@ public void testRmunitConvertNumericExtremes() { assertEquals(1.7e308, RmunitConvertFunction.convert("1.7e308")); assertEquals(-1.7e308, RmunitConvertFunction.convert("-1.7e308")); } + + // ctime() Function Tests + @Test + public void testCtimeConvertBasic() { + // Default format is %m/%d/%Y %H:%M:%S + assertEquals("10/18/2003 20:07:13", CTimeConvertFunction.convert(1066507633)); + assertEquals("01/01/1970 00:00:00", CTimeConvertFunction.convert(0)); + assertEquals("10/18/2003 20:07:13", CTimeConvertFunction.convert("1066507633")); + } + + @Test + public void testCtimeConvertInvalid() { + assertNull(CTimeConvertFunction.convert("invalid")); + assertNull(CTimeConvertFunction.convert(null)); + assertNull(CTimeConvertFunction.convert("")); + assertNull(CTimeConvertFunction.convert("abc123")); + } + + // mktime() Function Tests + @Test + public void testMktimeConvertBasic() { + // Default format is %m/%d/%Y %H:%M:%S + assertEquals(1066507633.0, MkTimeConvertFunction.convert("10/18/2003 20:07:13")); + assertEquals(946684800.0, MkTimeConvertFunction.convert("01/01/2000 00:00:00")); + assertEquals(1066473433.0, MkTimeConvertFunction.convert(1066473433)); + assertEquals(1066473433.0, MkTimeConvertFunction.convert("1066473433")); + } + + @Test + public void testMktimeConvertInvalid() { + assertNull(MkTimeConvertFunction.convert("invalid")); + assertNull(MkTimeConvertFunction.convert(null)); + assertNull(MkTimeConvertFunction.convert("")); + assertNull(MkTimeConvertFunction.convert("not-a-date")); + } + + // mstime() Function Tests + @Test + public void testMstimeConvertBasic() { + assertEquals(225.0, MsTimeConvertFunction.convert("03:45")); + assertEquals(225.123, MsTimeConvertFunction.convert("03:45.123")); + assertEquals(90.5, MsTimeConvertFunction.convert("01:30.5")); + assertEquals(3661.0, MsTimeConvertFunction.convert("61:01")); + + // SS.SSS without MM: prefix + assertEquals(45.123, MsTimeConvertFunction.convert("45.123")); + assertEquals(30.0, MsTimeConvertFunction.convert("30")); + + // Test already numeric + assertEquals(225.0, MsTimeConvertFunction.convert(225)); + assertEquals(225.0, MsTimeConvertFunction.convert("225")); + } + + @Test + public void testMstimeConvertEdgeCases() { + assertEquals(0.0, MsTimeConvertFunction.convert("00:00")); + assertEquals(0.001, MsTimeConvertFunction.convert("00:00.001")); + assertEquals(59.999, MsTimeConvertFunction.convert("00:59.999")); + } + + @Test + public void testMstimeConvertInvalid() { + assertNull(MsTimeConvertFunction.convert("invalid")); + assertNull(MsTimeConvertFunction.convert(null)); + assertNull(MsTimeConvertFunction.convert("")); + assertNull(MsTimeConvertFunction.convert("25:70")); + assertNull(MsTimeConvertFunction.convert("1:2:3")); + } + + // dur2sec() Function Tests + @Test + public void testDur2secConvertBasic() { + assertEquals(5025.0, Dur2SecConvertFunction.convert("01:23:45")); + assertEquals(3661.0, Dur2SecConvertFunction.convert("01:01:01")); + assertEquals(217815.0, Dur2SecConvertFunction.convert("2+12:30:15")); + assertEquals(90061.0, Dur2SecConvertFunction.convert("1+01:01:01")); + assertEquals(5025.0, Dur2SecConvertFunction.convert(5025)); + assertEquals(5025.0, Dur2SecConvertFunction.convert("5025")); + } + + @Test + public void testDur2secConvertEdgeCases() { + assertEquals(0.0, Dur2SecConvertFunction.convert("00:00:00")); + assertEquals(86400.0, Dur2SecConvertFunction.convert("1+00:00:00")); + assertEquals(3599.0, Dur2SecConvertFunction.convert("00:59:59")); + } + + @Test + public void testDur2secConvertInvalid() { + assertNull(Dur2SecConvertFunction.convert("invalid")); + assertNull(Dur2SecConvertFunction.convert(null)); + assertNull(Dur2SecConvertFunction.convert("")); + assertNull(Dur2SecConvertFunction.convert("25:70:80")); + assertNull(Dur2SecConvertFunction.convert("1:2")); + assertNull(Dur2SecConvertFunction.convert("1+2")); + } + + // timeformat tests for mktime() and ctime() + @Test + public void testMktimeWithCustomTimeformat() { + // Strftime format specifiers + assertEquals( + 1066507633.0, + MkTimeConvertFunction.convertWithFormat("18/10/2003 20:07:13", "%d/%m/%Y %H:%M:%S")); + assertEquals( + 1066507633.0, + MkTimeConvertFunction.convertWithFormat("2003-10-18 20:07:13", "%Y-%m-%d %H:%M:%S")); + assertEquals( + 946684800.0, + MkTimeConvertFunction.convertWithFormat("01/01/2000 00:00:00", "%d/%m/%Y %H:%M:%S")); + + // Invalid format returns null + assertNull(MkTimeConvertFunction.convertWithFormat("2003-10-18 20:07:13", "invalid format")); + + assertNull(MkTimeConvertFunction.convertWithFormat("10/18/2003 20:07:13", "")); + } + + @Test + public void testCtimeWithCustomTimeformat() { + // Strftime format specifiers + assertEquals( + "2003-10-18 20:07:13", + CTimeConvertFunction.convertWithFormat(1066507633, "%Y-%m-%d %H:%M:%S")); + assertEquals("18/10/2003", CTimeConvertFunction.convertWithFormat(1066507633, "%d/%m/%Y")); + assertEquals("1970", CTimeConvertFunction.convertWithFormat(0, "%Y")); + + assertNull(CTimeConvertFunction.convertWithFormat(1066507633, "")); + } } diff --git a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java index 8159fd6c115..5916205e0f4 100644 --- a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java @@ -19,6 +19,8 @@ import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.LiteralExpression; +import org.opensearch.sql.expression.function.jsonUDF.JsonDeleteFunctionImpl; +import org.opensearch.sql.expression.function.jsonUDF.JsonSetFunctionImpl; import org.opensearch.sql.expression.function.jsonUDF.JsonUtils; @ExtendWith(MockitoExtension.class) @@ -65,6 +67,18 @@ void test_convertToJsonPath() { assertEquals(targetJsonPath, convertedJsonPath); } + @Test + void test_convertToJsonPathWithDollarPrefix() { + // Issue #5167: paths already starting with $ or $. should not be double-prefixed + assertEquals("$.name", convertToJsonPath("$.name")); + assertEquals("$.a.b.c", convertToJsonPath("$.a.b.c")); + assertEquals("$.[*]", convertToJsonPath("$.[*]")); + assertEquals("$.a[2].c", convertToJsonPath("$.a[2].c")); + assertEquals("$.[3].bc[*].d[1]", convertToJsonPath("$.[3].bc[*].d[1]")); + // Bare $ should return $ + assertEquals("$", convertToJsonPath("$")); + } + @Test void test_convertToJsonPathWithWrongPath() { IllegalArgumentException e = @@ -100,6 +114,23 @@ void test_jsonPathExpand() { assertEquals(expandJsonPath(node, candidate4), target4); } + @Test + void test_jsonSetWithDollarPrefixedPath() { + // Issue #5167: json_set with $.key path should work correctly + Object result = + JsonSetFunctionImpl.eval( + "{\"name\":\"alice\",\"scores\":[90,85,92]}", "$.name", "modified_alice"); + assertEquals("{\"name\":\"modified_alice\",\"scores\":[90,85,92]}", result); + } + + @Test + void test_jsonDeleteWithDollarPrefixedPath() throws Exception { + // Issue #5167: json_delete with $.key path should remove the key + Object result = + JsonDeleteFunctionImpl.eval("{\"name\":\"alice\",\"scores\":[90,85,92]}", "$.name"); + assertEquals("{\"scores\":[90,85,92]}", result); + } + @Test void test_jsonPathExpandAtArray() { String jsonStr = "[{\"c\": 1}, {\"c\": 1}, {\"c\": 1}]"; diff --git a/core/src/test/java/org/opensearch/sql/expression/parse/RegexCommonUtilsTest.java b/core/src/test/java/org/opensearch/sql/expression/parse/RegexCommonUtilsTest.java index 2503b3929f1..e20c149d86b 100644 --- a/core/src/test/java/org/opensearch/sql/expression/parse/RegexCommonUtilsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/parse/RegexCommonUtilsTest.java @@ -11,6 +11,7 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.error.ErrorReport; public class RegexCommonUtilsTest { @@ -197,10 +198,8 @@ public void testGetNamedGroupCandidatesWithNumericNames() { public void testGetNamedGroupCandidatesWithInvalidCharactersThrowsException() { // Test that groups with invalid characters throw exception (even if some are valid) String pattern = "(?[a-z]+)\\s+(?<123invalid>[0-9]+)\\s+(?.*)"; - IllegalArgumentException exception = - assertThrows( - IllegalArgumentException.class, - () -> RegexCommonUtils.getNamedGroupCandidates(pattern)); + ErrorReport exception = + assertThrows(ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(pattern)); // Should fail on the first invalid group name found assertTrue(exception.getMessage().contains("Invalid capture group name")); } @@ -217,74 +216,65 @@ public void testGetNamedGroupCandidatesValidAlphanumeric() { @Test public void testGetNamedGroupCandidatesWithUnderscore() { - // Test that underscores in named groups throw IllegalArgumentException + // Test that underscores in named groups throw ErrorReport String patternWithUnderscore = ".+@(?.+)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternWithUnderscore)); assertTrue(exception.getMessage().contains("Invalid capture group name 'domain_name'")); - assertTrue( - exception - .getMessage() - .contains("must start with a letter and contain only letters and digits")); + assertTrue(exception.getSuggestion().contains("must be alphanumeric")); } @Test public void testGetNamedGroupCandidatesWithHyphen() { - // Test that hyphens in named groups throw IllegalArgumentException + // Test that hyphens in named groups throw ErrorReport String patternWithHyphen = ".+@(?.+)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, - () -> RegexCommonUtils.getNamedGroupCandidates(patternWithHyphen)); + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternWithHyphen)); assertTrue(exception.getMessage().contains("Invalid capture group name 'domain-name'")); - assertTrue( - exception - .getMessage() - .contains("must start with a letter and contain only letters and digits")); + assertTrue(exception.getSuggestion().contains("must be alphanumeric")); } @Test public void testGetNamedGroupCandidatesWithDot() { - // Test that dots in named groups throw IllegalArgumentException + // Test that dots in named groups throw ErrorReport String patternWithDot = ".+@(?.+)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, - () -> RegexCommonUtils.getNamedGroupCandidates(patternWithDot)); + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternWithDot)); assertTrue(exception.getMessage().contains("Invalid capture group name 'domain.name'")); } @Test public void testGetNamedGroupCandidatesWithSpace() { - // Test that spaces in named groups throw IllegalArgumentException + // Test that spaces in named groups throw ErrorReport String patternWithSpace = ".+@(?.+)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, - () -> RegexCommonUtils.getNamedGroupCandidates(patternWithSpace)); + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternWithSpace)); assertTrue(exception.getMessage().contains("Invalid capture group name 'domain name'")); } @Test public void testGetNamedGroupCandidatesStartingWithDigit() { - // Test that group names starting with digit throw IllegalArgumentException + // Test that group names starting with digit throw ErrorReport String patternStartingWithDigit = ".+@(?<1domain>.+)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternStartingWithDigit)); assertTrue(exception.getMessage().contains("Invalid capture group name '1domain'")); } @Test public void testGetNamedGroupCandidatesWithSpecialCharacters() { - // Test that special characters in named groups throw IllegalArgumentException + // Test that special characters in named groups throw ErrorReport String patternWithSpecialChar = ".+@(?.+)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternWithSpecialChar)); assertTrue(exception.getMessage().contains("Invalid capture group name 'domain@name'")); } @@ -304,10 +294,9 @@ public void testGetNamedGroupCandidatesWithMixedInvalidValid() { // Test that even one invalid group name fails the entire validation String patternWithMixed = "(?[a-z]+)\\s+(?[0-9]+)\\s+(?.*)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, - () -> RegexCommonUtils.getNamedGroupCandidates(patternWithMixed)); + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternWithMixed)); assertTrue(exception.getMessage().contains("Invalid capture group name 'invalid_name'")); } } diff --git a/docs/category.json b/docs/category.json index 5e9b6f954a5..2342ada464d 100644 --- a/docs/category.json +++ b/docs/category.json @@ -48,6 +48,7 @@ "user/ppl/cmd/top.md", "user/ppl/cmd/trendline.md", "user/ppl/cmd/transpose.md", + "user/ppl/cmd/union.md", "user/ppl/cmd/where.md", "user/ppl/functions/aggregations.md", "user/ppl/functions/collection.md", diff --git a/docs/user/dql/vector-search.rst b/docs/user/dql/vector-search.rst new file mode 100644 index 00000000000..8b0237a6ef0 --- /dev/null +++ b/docs/user/dql/vector-search.rst @@ -0,0 +1,331 @@ + +============================== +Vector Search [Experimental] +============================== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + +Introduction +============ + +``vectorSearch()`` is an experimental feature. Syntax, options, and +pushdown behavior may change in future releases based on feedback. + +The ``vectorSearch()`` table function runs a k-NN query against a ``knn_vector`` +field and exposes the matching documents as a relation in the ``FROM`` clause. +It relies on the OpenSearch `k-NN plugin +`_. The target index must +map the vector field as ``knn_vector`` and the index must be created with +``index.knn: true``. + +The SQL layer translates ``vectorSearch()`` into an OpenSearch search +request whose body is native k-NN query DSL; the query vector is parsed +into a numeric array before that DSL is emitted. + +Relevance is expressed through the OpenSearch ``_score`` metadata field, and +results are returned ordered by ``_score DESC`` by default. + +vectorSearch +============ + +Description +----------- + +``vectorSearch(table='', field='', vector='', option='')`` + +All four arguments are required and must be passed by name as string +literals. Positional arguments, or a mix of positional and named +arguments, are not supported. For example, the following is invalid:: + + FROM vectorSearch('my_vectors', field='embedding', + vector='[0.1,0.2]', option='k=5') AS v + +A table alias is required. Projected fields are referenced through the +alias (``v._id``, ``v._score``, ``v.category``). + +If the ``opensearch-knn`` plugin is not installed on the target cluster, +query execution fails with a ``vectorSearch() requires the k-NN plugin`` +error. ``_explain`` continues to work without the plugin. + +Arguments +--------- + +- ``table``: single concrete index or alias to search. Wildcards + (``*``), comma-separated multi-index targets, ``_all``, ``.``, and + ``..`` are not supported. The target index must have + ``index.knn: true`` and map the target field as ``knn_vector``. A + normal alias name is accepted. If the alias resolves to multiple + backing indices, the SQL layer does not prevalidate that every + backing index has a compatible ``knn_vector`` mapping, dimension, or + engine; OpenSearch execution remains the source of truth for those + checks. +- ``field``: name of the ``knn_vector`` field. +- ``vector``: query vector as a JSON-style array of numbers, passed as a + string (for example, ``'[0.1, 0.2, 0.3]'``). Components must be + comma-separated finite numbers. Semicolon, colon, and pipe separators + are not supported, and empty components (for example, ``'[1.0,,2.0]'`` + or ``'[1.0,]'``) return an error. The vector dimension must match the + ``knn_vector`` mapping on the target index. +- ``option``: comma-separated ``key=value`` pairs. Exactly one of ``k``, + ``max_distance``, or ``min_score`` is required. ``filter_type`` is + optional. + +Supported option keys +--------------------- + +Option keys are lower-case and case-sensitive. ``K=5`` or +``Filter_Type=post`` returns an "Unknown option key" error. + +- ``k``: top-k mode. Integer between 1 and 10000. The query returns up to + ``k`` nearest neighbors. +- ``max_distance``: radial mode. Non-negative number. Matches documents + within the given distance of the query vector. ``LIMIT`` is required and + caps the returned rows. +- ``min_score``: radial mode. Non-negative number. Matches documents with + score at or above the given threshold. ``LIMIT`` is required and caps + the returned rows. +- ``filter_type``: ``post`` or ``efficient``. Controls how a ``WHERE`` + clause is applied. See `Filtering`_. + +``k``, ``max_distance``, and ``min_score`` are mutually exclusive; specify +exactly one. + +Native k-NN tuning options (for example, ``method_parameters.ef_search``, +``method_parameters.nprobes``, ``rescore.oversample_factor``) are not +supported through ``vectorSearch()`` and return an "Unknown option +key" error. + +Syntax +------ + +:: + + SELECT + FROM vectorSearch( + table='', + field='', + vector='', + option='' + ) AS + [WHERE ] + [ORDER BY ._score DESC] + [LIMIT ] + +Example 1: Top-k +---------------- + +Return the five nearest neighbors of a query vector:: + + POST /_plugins/_sql + { + "query" : """ + SELECT v._id, v._score + FROM vectorSearch( + table='my_vectors', + field='embedding', + vector='[0.1, 0.2, 0.3]', + option='k=5' + ) AS v + """ + } + +In top-k mode, the request size defaults to ``k``; adding ``LIMIT n`` further +reduces the row count, but ``n`` must not exceed ``k``. + +Example 2: Radial search (``max_distance``) +------------------------------------------- + +Return up to the specified ``LIMIT`` documents within a maximum distance +of the query vector. ``LIMIT`` is required for radial searches; without +it the result set would be unbounded:: + + POST /_plugins/_sql + { + "query" : """ + SELECT v._id, v._score + FROM vectorSearch( + table='my_vectors', + field='embedding', + vector='[0.1, 0.2, 0.3]', + option='max_distance=0.5' + ) AS v + LIMIT 100 + """ + } + +Example 3: Radial search (``min_score``) +---------------------------------------- + +Return up to the specified ``LIMIT`` documents whose score is at or +above the given threshold. ``LIMIT`` is required for radial searches; +without it the result set would be unbounded:: + + POST /_plugins/_sql + { + "query" : """ + SELECT v._id, v._score + FROM vectorSearch( + table='my_vectors', + field='embedding', + vector='[0.1, 0.2, 0.3]', + option='min_score=0.8' + ) AS v + LIMIT 100 + """ + } + +Filtering +========= + +A ``WHERE`` clause on non-vector fields of the ``vectorSearch()`` alias is +pushed down to OpenSearch when it can be translated to an OpenSearch filter. +Two placement strategies are available via the ``filter_type`` option: + +- ``efficient`` (default): the ``WHERE`` predicate is embedded directly + inside the k-NN query (``knn.filter``), enabling native efficient + k-NN filtering during vector search. Efficient filtering depends on + native k-NN engine and method support; if the target index does not + support ``knn.filter`` for the configured engine and method, set + ``filter_type=post``. See the `k-NN filtering guide + `_ + for engine and method requirements. +- ``post``: the k-NN query is placed in a scoring (``bool.must``) + context and the ``WHERE`` predicate is placed as a non-scoring + ``bool.filter`` outside the k-NN clause. This is Boolean filter + placement, not the REST ``post_filter`` parameter, and may return + fewer than ``k`` rows when the filter is selective. + +Full-text predicates (``match``, ``match_phrase``, ``multi_match``, and +the rest of the full-text family) under a ``WHERE`` clause are used as +filters, not as hybrid keyword-vector score fusion. Their placement +follows ``filter_type``: the default (``efficient``) embeds supported +full-text predicates under ``knn.filter``, while ``post`` places them +in ``bool.filter`` outside the k-NN clause. In both cases they restrict +which candidates are retained but their text relevance score does not +combine with the vector ``_score``. ``vectorSearch()`` is not a hybrid +vector + text relevance scorer. + +Behavior depends on whether ``filter_type`` is specified: + +- **Omitted (default, ``efficient``)**: the ``WHERE`` predicate is + embedded under ``knn.filter`` so the k-NN engine applies native + efficient filtering during vector search. A query with no ``WHERE`` + clause is valid. ``efficient`` supports simple native filters: + ``term``, ``range``, ``wildcard``, ``exists``, full-text family + (``match``, ``match_phrase``, ``match_phrase_prefix``, + ``match_bool_prefix``, ``multi_match``, ``query_string``, + ``simple_query_string``), and boolean combinations of those filters. + Predicates that compile to script queries (arithmetic, function calls + on indexed fields, ``CASE``, date math), nested predicates, and other + query shapes are not supported under ``knn.filter`` and return an + error. Set ``filter_type=post`` to apply such predicates after the + k-NN search. If the predicate cannot be translated to an OpenSearch + filter query at all (a distinct translation failure from the + unsupported-shape cases above), the default path falls back to + evaluating the ``WHERE`` clause in memory after the k-NN results are + returned. +- **Explicit ``efficient``**: same contract as the default. Specifying + it is useful when a query should be explicit about the placement + strategy and should fail if the predicate cannot be safely embedded + under ``knn.filter``. +- **Explicit ``post``**: a ``WHERE`` clause is required and must be + translatable to an OpenSearch filter query. Predicates that translate + to native OpenSearch queries are pushed down as a ``bool.filter`` + alongside the k-NN query. Predicates that do not have a native + equivalent (for example, arithmetic or function calls on indexed + fields) are pushed down as an OpenSearch script query and evaluated + server-side. If predicate translation itself fails, the query returns + an error; there is no silent in-memory fallback under explicit + ``post``. Use ``filter_type=post`` when the predicate shape is not + supported by efficient filtering. + +Example 4: Default efficient filtering (no ``filter_type``) +----------------------------------------------------------- + +:: + + POST /_plugins/_sql + { + "query" : """ + SELECT v._id, v._score, v.category + FROM vectorSearch( + table='my_vectors', + field='embedding', + vector='[0.1, 0.2, 0.3]', + option='k=10' + ) AS v + WHERE v.category = 'books' + """ + } + +The predicate is embedded under ``knn.filter`` so the k-NN engine +applies native efficient filtering during vector search. + +Example 5: Post-filtering for predicates not supported by efficient mode +------------------------------------------------------------------------ + +Use ``filter_type=post`` for predicates that do not fit the ``efficient`` +allow-list, such as arithmetic or function calls on indexed fields:: + + POST /_plugins/_sql + { + "query" : """ + SELECT v._id, v._score, v.category + FROM vectorSearch( + table='my_vectors', + field='embedding', + vector='[0.1, 0.2, 0.3]', + option='k=10,filter_type=post' + ) AS v + WHERE v.price * 1.1 < 100 + """ + } + +Scoring, sorting, and limits +============================ + +- ``vectorSearch()`` exposes the OpenSearch ``_score`` metadata field on the + alias. For an alias ``v``, select it as ``v._score``. +- ``_score`` can be selected and referenced in ``ORDER BY``, but it cannot + appear in ``WHERE``. Use ``option='min_score=...'`` for score-threshold + vector search. +- Results are returned in ``_score DESC`` order by default. The only + supported ``ORDER BY`` expression is ``._score DESC`` (for + example, ``v._score DESC``). +- In top-k mode (``k=N``), ``LIMIT n`` is optional; when present, ``n`` must + be ``≤ k``. +- In radial mode (``max_distance`` or ``min_score``), ``LIMIT`` is required. +- ``OFFSET`` is not supported on ``vectorSearch()``. Use ``LIMIT`` only. + +Limitations +=========== + +The following are not supported on ``vectorSearch()``: + +- ``GROUP BY`` and aggregations directly over a ``vectorSearch()`` + relation are not supported and return an error. +- Operators wrapped around a ``vectorSearch()`` subquery are rejected + when they would run after ``vectorSearch()`` has already produced a + finite result set, because they can silently yield zero, skipped, or + incorrectly ordered rows. Specifically, an outer ``WHERE``, + ``ORDER BY``, ``OFFSET`` (non-zero), ``GROUP BY``, aggregation, or + ``DISTINCT`` applied to a ``vectorSearch()`` subquery returns an + error. Place ``WHERE`` predicates inside the subquery, directly on + the ``vectorSearch()`` alias, so that they participate in ``WHERE`` + pushdown. A plain outer ``LIMIT`` (without ``OFFSET``) wrapping a + ``vectorSearch()`` subquery is allowed and caps the returned rows. +- ``JOIN`` between a ``vectorSearch()`` relation and another relation is + not supported. +- ``UNION`` / ``INTERSECT`` / ``EXCEPT`` combining a ``vectorSearch()`` + relation with another relation is not supported. +- Multiple ``vectorSearch()`` calls in the same query are not supported. +- The query vector must be supplied as a literal. Parameterized vectors + (for example, values bound from another column) are not supported. +- Indexes that define a user field named ``_score`` cannot be queried + with ``vectorSearch()`` because ``_score`` is reserved for the + synthetic vector score exposed on the alias. Rename the field or query + the index with a plain ``SELECT``. diff --git a/docs/user/general/identifiers.rst b/docs/user/general/identifiers.rst index f4d455deb5c..49921d1fd79 100644 --- a/docs/user/general/identifiers.rst +++ b/docs/user/general/identifiers.rst @@ -150,7 +150,7 @@ Description To query multiple indices, you could 1. Include ``*`` in index name, this is an index pattern for wildcard match. -2. Delimited multiple indices and seperated them by ``,``. Note: no space allowed between each index. +2. Delimit multiple indices with ``,`` and enclose the entire comma-separated list in backticks. Note: no space allowed between each index. Examples diff --git a/docs/user/index.rst b/docs/user/index.rst index bb4b6399198..32ce39ed93d 100644 --- a/docs/user/index.rst +++ b/docs/user/index.rst @@ -43,6 +43,8 @@ OpenSearch SQL enables you to extract insights out of OpenSearch using the famil - `Window Functions `_ + - `Vector Search `_ + * **Beyond SQL** - `PartiQL (JSON) Support `_ diff --git a/docs/user/ppl/cmd/convert.md b/docs/user/ppl/cmd/convert.md index b3fbb7d3577..457ec2563e0 100644 --- a/docs/user/ppl/cmd/convert.md +++ b/docs/user/ppl/cmd/convert.md @@ -7,29 +7,34 @@ The `convert` command uses conversion functions to transform field values into n The `convert` command has the following syntax: ```syntax -convert () [AS ] [, () [AS ]]... +convert [timeformat=] () [AS ] [, () [AS ]]... ``` ## Parameters The `convert` command supports the following parameters. -| Parameter | Required/Optional | Description | -| --- | --- | --- | -| `` | Required | One of the conversion functions: `auto()`, `num()`, `rmcomma()`, `rmunit()`, `memk()`, or `none()`. | -| `` | Required | Single field name to convert. | -| `AS ` | Optional | Create new field with converted value, preserving original field. | +| Parameter | Required/Optional | Description | Default | +| --- | --- | --- | --- | +| `` | Required | One of the conversion functions: `auto()`, `ctime()`, `dur2sec()`, `memk()`, `mktime()`, `mstime()`, `none()`, `num()`, `rmcomma()`, or `rmunit()`. | N/A | +| `` | Required | Single field name to convert. | N/A | +| `AS ` | Optional | Create new field with converted value, preserving original field. | N/A | +| `timeformat=` | Optional | A strftime format string used by `ctime()` and `mktime()`. | `%m/%d/%Y %H:%M:%S`. | ## Conversion Functions | Function | Description | | --- | --- | | `auto(field)` | Automatically converts fields to numbers using intelligent conversion. Handles memory sizes (k/m/g), commas, units, and scientific notation. Returns `null` for non-convertible values. | +| `ctime(field)` | Converts a UNIX epoch timestamp to a human-readable time string. Uses the `timeformat` parameter if specified, otherwise defaults to `%m/%d/%Y %H:%M:%S`. All timestamps are interpreted in UTC timezone. | +| `dur2sec(field)` | Converts a duration string in `HH:MM:SS` format to total seconds. Hours must be less than 24. Returns `null` for invalid formats. | +| `memk(field)` | Converts memory size strings to kilobytes. Accepts numbers with optional k/m/g suffix (case-insensitive). Default unit is kilobytes. Returns `null` for invalid formats. | +| `mktime(field)` | Converts a human-readable time string to a UNIX epoch timestamp. Uses the `timeformat` parameter if specified, otherwise defaults to `%m/%d/%Y %H:%M:%S`. Input strings are interpreted as UTC timezone. | +| `mstime(field)` | Converts a time string in `[MM:]SS.SSS` format to total seconds. The minutes portion is optional. Returns `null` for invalid formats. | +| `none(field)` | No-op function that preserves the original field value. | | `num(field)` | Extracts leading numbers from strings. For strings without letters: removes commas as thousands separators. For strings with letters: extracts leading number, stops at letters or commas. Returns `null` for non-convertible values. | | `rmcomma(field)` | Removes commas from field values and converts to a number. Returns `null` if the value contains letters. | | `rmunit(field)` | Extracts leading numeric values from strings. Stops at the first non-numeric character (including commas). Returns `null` for non-convertible values. | -| `memk(field)` | Converts memory size strings to kilobytes. Accepts numbers with optional k/m/g suffix (case-insensitive). Default unit is kilobytes. Returns `null` for invalid formats. | -| `none(field)` | No-op function that preserves the original field value. Used for excluding specific fields from wildcard conversions. | ## Example 1: Basic auto() conversion @@ -241,6 +246,128 @@ fetched rows / total rows = 3/3 **Note:** The `none()` function is particularly useful when wildcard support is implemented, allowing you to exclude specific fields from bulk conversions. +## Example 9: Convert epoch timestamp to time string with ctime() + +```ppl +source=accounts +| eval timestamp = 1066507633 +| convert ctime(timestamp) +| fields timestamp +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++---------------------+ +| timestamp | +|---------------------| +| 10/18/2003 20:07:13 | ++---------------------+ +``` + +## Example 10: Convert time string to epoch with mktime() + +```ppl +source=accounts +| eval date_str = '10/18/2003 20:07:13' +| convert mktime(date_str) +| fields date_str +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++--------------+ +| date_str | +|--------------| +| 1.066507633E9| ++--------------+ +``` + +## Example 11: Using timeformat with ctime() and mktime() + +The `timeformat` parameter specifies a strftime format string for `ctime()` and `mktime()`: + +```ppl +source=accounts +| eval timestamp = 1066507633 +| convert timeformat="%Y-%m-%d %H:%M:%S" ctime(timestamp) +| fields timestamp +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++---------------------+ +| timestamp | +|---------------------| +| 2003-10-18 20:07:13 | ++---------------------+ +``` + +Similarly, you can use `timeformat` with `mktime()` to parse dates in custom formats: + +```ppl +source=accounts +| eval date_str = '2000-01-01 00:00:00' +| convert timeformat="%Y-%m-%d %H:%M:%S" mktime(date_str) +| fields date_str +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++------------+ +| date_str | +|------------| +| 9.466848E8 | ++------------+ +``` + +## Example 12: Convert duration to seconds with dur2sec() + +```ppl +source=accounts +| eval duration = '01:23:45' +| convert dur2sec(duration) +| fields duration +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++----------+ +| duration | +|----------| +| 5025.0 | ++----------+ +``` + +## Example 13: Convert minutes and seconds with mstime() + +```ppl +source=accounts +| eval time_str = '03:45.5' +| convert mstime(time_str) +| fields time_str +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++----------+ +| time_str | +|----------| +| 225.5 | ++----------+ +``` + ## Notes - All conversion functions return `null` for values that cannot be converted to a number diff --git a/docs/user/ppl/cmd/mvcombine.md b/docs/user/ppl/cmd/mvcombine.md index 4ccad724ca7..8951b0d7fed 100644 --- a/docs/user/ppl/cmd/mvcombine.md +++ b/docs/user/ppl/cmd/mvcombine.md @@ -124,6 +124,6 @@ source=mvcombine_data Expected output: ```text -{'reason': 'Invalid Query', 'details': 'Field [does_not_exist] not found.', 'type': 'IllegalArgumentException'} +{'context': {'stage': 'analyzing', 'stage_description': 'Parsing and validating the query'}, 'reason': 'Field [does_not_exist] not found.', 'details': 'Field [does_not_exist] not found.', 'location': ['while preparing and validating the query plan'], 'code': 'FIELD_NOT_FOUND', 'type': 'IllegalArgumentException'} Error: Query returned no data ``` \ No newline at end of file diff --git a/docs/user/ppl/cmd/mvexpand.md b/docs/user/ppl/cmd/mvexpand.md index 6fdd9bca365..2bb01f2d0e0 100644 --- a/docs/user/ppl/cmd/mvexpand.md +++ b/docs/user/ppl/cmd/mvexpand.md @@ -132,6 +132,6 @@ source=people Expected output: ```text -{'reason': 'Invalid Query', 'details': "Field 'tags' not found in the schema", 'type': 'SemanticCheckException'} +{'context': {'stage': 'analyzing', 'stage_description': 'Parsing and validating the query', 'command': 'mvexpand'}, 'reason': "Field 'tags' not found in the schema", 'details': "Field 'tags' not found in the schema", 'location': ['while preparing and validating the query plan', 'while evaluating the input field for mvexpand'], 'code': 'FIELD_NOT_FOUND', 'type': 'SemanticCheckException'} Error: Query returned no data ``` \ No newline at end of file diff --git a/docs/user/ppl/cmd/rex.md b/docs/user/ppl/cmd/rex.md index b4fe706f489..6d50a875dd6 100644 --- a/docs/user/ppl/cmd/rex.md +++ b/docs/user/ppl/cmd/rex.md @@ -228,7 +228,7 @@ source=accounts The query returns the following results: ```text -{'reason': 'Invalid Query', 'details': "Invalid capture group name 'user_name'. Java regex group names must start with a letter and contain only letters and digits.", 'type': 'IllegalArgumentException'} +{'reason': "Invalid capture group name 'user_name'.", 'code': 'SYNTAX_ERROR', 'suggestion': 'Java Regex capture groups must be alphanumeric and start with a letter. Update the capture group to be alphanumeric.', 'context': {'stage': 'analyzing', 'stage_description': 'Parsing and validating the query', 'command': 'rex'}, 'details': "Invalid capture group name 'user_name'.", 'location': ['while preparing and validating the query plan', 'while processing the rex command', 'while validating the capture groups for the pattern'], 'type': 'IllegalArgumentException'} Error: Query returned no data ``` diff --git a/docs/user/ppl/cmd/union.md b/docs/user/ppl/cmd/union.md new file mode 100644 index 00000000000..8c148b998c2 --- /dev/null +++ b/docs/user/ppl/cmd/union.md @@ -0,0 +1,197 @@ + +# union + +The `union` command combines results from multiple datasets using UNION ALL semantics. It merges rows from two or more sources into a single result set, preserving all rows including duplicates. You can optionally apply subsequent processing, such as aggregation or sorting, to the combined results. Each dataset can be a subsearch with different filtering criteria, data transformations, and field selections, or a direct index reference. + +Union is particularly useful for combining data from multiple sources, creating comprehensive datasets from different criteria, and consolidating results while handling schema differences through automatic type coercion. + +Use union for: + +* **Multi-source data combination**: Merge data from different indexes or apply different filters to the same source. +* **Dataset consolidation**: Combine results from different queries while preserving all rows including duplicates. +* **Flexible dataset patterns**: Use subsearches or direct index references with optional maxout control. +* **Schema unification**: Automatically handle different schemas with type coercion for conflicting field types and NULL-fill for missing fields. + +## Syntax + +The `union` command has the following syntax: + +```syntax +union [maxout=] [ ...] +``` + +Each dataset can be: +- **Direct index reference**: `index_name`, `index_pattern*`, `index_alias` +- **Subsearch**: `[search source=index | ]` + +The following are examples of the `union` command syntax: + +```syntax +| union logs-*, security-logs +| union [search source=accounts | where age > 30], [search source=accounts | where age < 30] +| union maxout=100 [search source=logs | fields user, action], [search source=events | fields user, action] +| union [search source=accounts | where status="active"], [search source=accounts | where status="pending"] +``` + +## Parameters + +The `union` command supports the following parameters. + +| Parameter | Required/Optional | Description | +| --- | --- | --- | +| `maxout` | Optional | Maximum number of results to return from the union operation. Default: unlimited (0). | +| `` | Required | At least two datasets are required. Each dataset can be either a subsearch enclosed in square brackets (`[search source=index | ]`) or a direct index reference (for example, `accounts`, `logs-*`). All PPL commands are supported within subsearches. | +| `` | Optional | Commands applied to the merged results after the union operation (for example, `stats`, `sort`, or `head`). | + +## Example 1: Combining age groups for demographic analysis + +This example demonstrates how to merge customers from different age segments into a unified dataset. It combines `young` and `adult` customers into a single result set and adds categorization labels for further analysis: + +```ppl +| union [search source=accounts +| where age < 30 +| eval age_group = "young" +| fields firstname, age, age_group] [search source=accounts +| where age >= 30 +| eval age_group = "adult" +| fields firstname, age, age_group] +| sort age +``` + +The query returns the following results: + +```text +fetched rows / total rows = 4/4 ++-----------+-----+-----------+ +| firstname | age | age_group | +|-----------+-----+-----------| +| Nanette | 28 | young | +| Amber | 32 | adult | +| Dale | 33 | adult | +| Hattie | 36 | adult | ++-----------+-----+-----------+ +``` + + +## Example 2: Combining filtered subsets from the same index + +This example demonstrates how to combine multiple filtered subsets from the same index using union: + +```ppl +| union [search source=accounts | where balance > 30000] [search source=accounts | where age < 30] +| fields firstname, age, balance +| sort balance desc +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++-----------+-----+---------+ +| firstname | age | balance | +|-----------+-----+---------| +| Amber | 32 | 39225 | +| Nanette | 28 | 32838 | +| Nanette | 28 | 32838 | ++-----------+-----+---------+ +``` + +Note: Nanette appears twice because she meets both conditions (balance > 30000 AND age < 30), demonstrating UNION ALL semantics which preserve all rows including duplicates. + + +## Example 3: Mid-pipeline union (implicit first dataset) + +This example demonstrates using union mid-pipeline where the upstream result is implicitly included as the first dataset: + +```ppl +search source=accounts | where age > 30 | union [search source=accounts | where age < 30] +| fields firstname, age +| sort age +``` + +The query returns the following results: + +```text +fetched rows / total rows = 4/4 ++-----------+-----+ +| firstname | age | +|-----------+-----| +| Nanette | 28 | +| Amber | 32 | +| Dale | 33 | +| Hattie | 36 | ++-----------+-----+ +``` + +Note: The upstream result `where age > 30` is automatically the first dataset, then unioned with `where age < 30`. + + +## Example 4: Using maxout option to limit results + +This example demonstrates how to limit the total number of results returned from a union operation using the `maxout` option. Note that UNION ALL semantics preserve duplicate rows: + +```ppl +| union maxout=3 [search source=accounts +| where balance > 20000] [search source=accounts +| where age > 30] +| fields firstname, age, balance +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++-----------+-----+---------+ +| firstname | age | balance | +|-----------+-----+---------| +| Amber | 32 | 39225 | +| Nanette | 28 | 32838 | +| Amber | 32 | 39225 | ++-----------+-----+---------+ +``` + +Note: Amber appears twice because she meets both conditions (balance > 20000 AND age > 30), demonstrating UNION ALL semantics which preserve all rows including duplicates. + + +## Example 5: Segmenting accounts by balance tier + +This example demonstrates how to create account segments based on balance thresholds for comparative analysis. It separates `high_balance` accounts from `regular` accounts and labels them for easy comparison: + +```ppl +| union [search source=accounts +| where balance > 20000 +| eval query_type = "high_balance" +| fields firstname, balance, query_type] [search source=accounts +| where balance > 0 AND balance <= 20000 +| eval query_type = "regular" +| fields firstname, balance, query_type] +| sort balance desc +``` + +The query returns the following results: + +```text +fetched rows / total rows = 4/4 ++-----------+---------+--------------+ +| firstname | balance | query_type | +|-----------+---------+--------------| +| Amber | 39225 | high_balance | +| Nanette | 32838 | high_balance | +| Hattie | 5686 | regular | +| Dale | 4180 | regular | ++-----------+---------+--------------+ +``` + + +## Limitations + +The `union` command has the following limitations: + +* At least two datasets must be specified. +* When fields with the same name exist across datasets but have different types, the system automatically performs type coercion to find a common supertype: + * **Compatible numeric types** → wider numeric type (for example, `INTEGER` and `BIGINT` coerce to `BIGINT`; `INTEGER` and `FLOAT` coerce to `FLOAT`) + * **String types** → `VARCHAR` (for example, `CHAR` and `VARCHAR` coerce to `VARCHAR`) + * **Temporal types** → wider temporal type (for example, `DATE` and `TIMESTAMP` coerce to `TIMESTAMP`) + * **Incompatible types** (different type families) → `VARCHAR` fallback (for example, `INTEGER` and `VARCHAR` coerce to `VARCHAR`) +* Missing fields across datasets are automatically filled with `NULL` values to unify schemas. +* Direct index references must be valid index names, patterns, or aliases (for example, `accounts`, `logs-*`, `security-alias`). diff --git a/docs/user/ppl/functions/condition.md b/docs/user/ppl/functions/condition.md index 759fa09a1e5..95f805ade94 100644 --- a/docs/user/ppl/functions/condition.md +++ b/docs/user/ppl/functions/condition.md @@ -7,6 +7,8 @@ PPL conditional functions enable global filtering of query results based on spec Returns `TRUE` if the field is `NULL`, `FALSE` otherwise. +The `field IS NULL` predicate syntax is also supported as a synonym. + The `isnull()` function is commonly used: - In `eval` expressions to create conditional fields. - With the `if()` function to provide default values. @@ -69,6 +71,14 @@ source=accounts | where isnull(employer) | fields account_number, firstname, employer ``` + +The `IS NULL` predicate syntax can be used as an equivalent alternative: + +```ppl +source=accounts +| where employer IS NULL +| fields account_number, firstname, employer +``` The query returns the following results: @@ -87,6 +97,8 @@ fetched rows / total rows = 1/1 Returns `TRUE` if the field is NOT `NULL`, `FALSE` otherwise. +The `field IS NOT NULL` predicate syntax is also supported as a synonym. + The `isnotnull()` function is commonly used: - In `eval` expressions to create Boolean flags. - In `where` clauses to filter out null values. @@ -141,6 +153,27 @@ fetched rows / total rows = 1/1 | 18 | null | +----------------+----------+ ``` + +The `IS NOT NULL` predicate syntax is equivalent to `isnotnull()`: + +```ppl +source=accounts +| where employer IS NOT NULL +| fields account_number, employer +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++----------------+----------+ +| account_number | employer | +|----------------+----------| +| 1 | Pyrami | +| 6 | Netagy | +| 13 | Quility | ++----------------+----------+ +``` The following example demonstrates using `isnotnull` with the `if` function to create validation messages: diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md index 27f59fa4b95..37947113800 100644 --- a/docs/user/ppl/index.md +++ b/docs/user/ppl/index.md @@ -73,6 +73,7 @@ source=accounts | [appendcol command](cmd/appendcol.md) | 3.1 | experimental (since 3.1) | Append the result of a sub-search and attach it alongside the input search results. | | [lookup command](cmd/lookup.md) | 3.0 | experimental (since 3.0) | Add or replace data from a lookup index. | | [multisearch command](cmd/multisearch.md) | 3.4 | experimental (since 3.4) | Execute multiple search queries and combine their results. | +| [union command](cmd/union.md) | 3.7 | experimental (since 3.7) | Combine results from multiple datasets using UNION ALL semantics. | | [ml command](cmd/ml.md) | 2.5 | stable (since 2.5) | Apply machine learning algorithms to analyze data. | | [kmeans command](cmd/kmeans.md) | 1.3 | stable (since 1.3) | Apply the kmeans algorithm on the search result returned by a PPL command. | | [ad command](cmd/ad.md) | 1.3 | deprecated (since 2.5) | Apply Random Cut Forest algorithm on the search result returned by a PPL command. | diff --git a/docs/user/ppl/interfaces/protocol.md b/docs/user/ppl/interfaces/protocol.md index 680f01fd379..d0f62e60c17 100644 --- a/docs/user/ppl/interfaces/protocol.md +++ b/docs/user/ppl/interfaces/protocol.md @@ -120,8 +120,18 @@ Expected output: ```json { "error": { - "reason": "Error occurred in OpenSearch engine: no such index [unknown]", - "details": "[unknown] IndexNotFoundException[no such index [unknown]]\nFor more details, please send request for Json format to see the raw response from OpenSearch engine.", + "context": { + "stage": "analyzing", + "index_name": "unknown", + "stage_description": "Parsing and validating the query" + }, + "reason": "no such index [unknown]", + "details": "no such index [unknown]", + "location": [ + "while preparing and validating the query plan", + "while fetching index mappings" + ], + "code": "INDEX_NOT_FOUND", "type": "IndexNotFoundException" }, "status": 404 diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index a56715845ab..29c2ffd582f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -25,6 +25,7 @@ CalciteAddColTotalsCommandIT.class, CalciteConvertCommandIT.class, CalciteArrayFunctionIT.class, + CalciteBinChartNullIT.class, CalciteBinCommandIT.class, CalciteConvertTZFunctionIT.class, CalciteCsvFormatIT.class, @@ -53,6 +54,7 @@ CalciteMultisearchCommandIT.class, CalciteMultiValueStatsIT.class, CalciteNewAddedCommandsIT.class, + CalciteNotLikeNullIT.class, CalciteNowLikeFunctionIT.class, CalciteObjectFieldOperateIT.class, CalciteOperatorIT.class, @@ -107,6 +109,7 @@ CalciteTopCommandIT.class, CalciteTrendlineCommandIT.class, CalciteTransposeCommandIT.class, + CalciteUnionCommandIT.class, CalciteVisualizationFormatIT.class, CalciteWhereCommandIT.class, CalcitePPLTpchIT.class, @@ -114,6 +117,7 @@ CalciteNoMvCommandIT.class, CalciteMvExpandCommandIT.class, CalcitePPLGraphLookupIT.class, + CalciteMixedFieldTypeIT.class, }) public class CalciteNoPushdownIT { private static boolean wasPushdownEnabled; diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinChartNullIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinChartNullIT.java new file mode 100644 index 00000000000..83b3807ff14 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinChartNullIT.java @@ -0,0 +1,63 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** Integration test for GitHub issue #5174: bin/chart NPE with null values. */ +public class CalciteBinChartNullIT extends PPLIntegTestCase { + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.BANK_WITH_NULL_VALUES); + } + + @Test + public void testBinThenChartWithNullValuesShouldNotCauseNPE() throws IOException { + // bin balance span=10000 produces null for documents without a balance field. + // chart count() over bal_bin by gender should handle these null bin values safely. + JSONObject result = + executeQuery( + String.format( + "source=%s | bin balance span=10000 as bal_bin" + + " | chart count() over bal_bin by gender", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema( + result, + schema("bal_bin", "string"), + schema("gender", "string"), + schema("count()", "bigint")); + // Should only contain rows for non-null balance values (4 records with balance) + verifyDataRows( + result, + rows("0-10000", "M", 1), + rows("30000-40000", "F", 1), + rows("30000-40000", "M", 1), + rows("40000-50000", "F", 1)); + } + + @Test + public void testBinThenChartSingleGroupWithNullValues() throws IOException { + // chart with only row split (no column split): the simpler sort path + JSONObject result = + executeQuery( + String.format( + "source=%s | bin balance span=10000 as bal_bin | chart count() over bal_bin", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema(result, schema("bal_bin", "string"), schema("count()", "bigint")); + verifyDataRows(result, rows("0-10000", 1), rows("30000-40000", 2), rows("40000-50000", 1)); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteConvertCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteConvertCommandIT.java index 1c9b6de3454..7d666951b1a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteConvertCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteConvertCommandIT.java @@ -5,6 +5,7 @@ package org.opensearch.sql.calcite.remote; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; @@ -260,4 +261,118 @@ public void testConvertAutoWithMemorySizesGigabytes() throws IOException { verifySchema(result, schema("memory", null, "double")); verifyDataRows(result, rows(2097152.0)); } + + @Test + public void testConvertMktimeWithDefaultFormat() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval date_str = '10/18/2003 20:07:13' | convert" + + " mktime(date_str) | fields date_str | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("date_str", null, "double")); + verifyDataRows(result, rows(1066507633.0)); + } + + @Test + public void testConvertMktimeWithCustomTimeformat() throws IOException { + JSONObject result = + executeQuery( + "search source=" + + TEST_INDEX_BANK + + " | eval date_str = '18/10/2003 20:07:13' | convert timeformat=\\\"%d/%m/%Y" + + " %H:%M:%S\\\" mktime(date_str) | fields date_str | head 1"); + verifySchema(result, schema("date_str", null, "double")); + verifyDataRows(result, rows(1066507633.0)); + } + + @Test + public void testConvertCtimeWithDefaultFormat() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval timestamp = 1066507633 | convert ctime(timestamp) |" + + " fields timestamp | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("timestamp", null, "string")); + verifyDataRows(result, rows("10/18/2003 20:07:13")); + } + + @Test + public void testConvertCtimeWithCustomTimeformat() throws IOException { + JSONObject result = + executeQuery( + "search source=" + + TEST_INDEX_BANK + + " | eval timestamp = 1066507633 | convert timeformat=\\\"%Y-%m-%d %H:%M:%S\\\"" + + " ctime(timestamp) | fields timestamp | head 1"); + verifySchema(result, schema("timestamp", null, "string")); + verifyDataRows(result, rows("2003-10-18 20:07:13")); + } + + @Test + public void testConvertDur2secFunction() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval duration = '01:23:45' | convert dur2sec(duration) |" + + " fields duration | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("duration", null, "double")); + verifyDataRows(result, rows(5025.0)); + } + + @Test + public void testConvertMstimeFunction() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval time_str = '03:45' | convert mstime(time_str) |" + + " fields time_str | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("time_str", null, "double")); + verifyDataRows(result, rows(225.0)); + } + + @Test + public void testConvertTimeformatWithMultipleFunctions() throws IOException { + JSONObject result = + executeQuery( + "search source=" + + TEST_INDEX_BANK + + " | eval date_str = '18/10/2003 20:07:13', timestamp = 1066507633 | convert" + + " timeformat=\\\"%d/%m/%Y %H:%M:%S\\\" mktime(date_str), ctime(timestamp) |" + + " fields date_str, timestamp | head 1"); + verifySchema(result, schema("date_str", null, "double"), schema("timestamp", null, "string")); + verifyNumOfRows(result, 1); + assertEquals(1066507633.0, result.getJSONArray("datarows").getJSONArray(0).getDouble(0), 0.001); + assertEquals( + "18/10/2003 20:07:13", result.getJSONArray("datarows").getJSONArray(0).getString(1)); + } + + @Test + public void testConvertTimeformatWithWhere() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval date_str = '10/18/2003 20:07:13' |" + + " convert mktime(date_str) | where date_str > 1000000000 |" + + " fields date_str | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("date_str", null, "double")); + verifyDataRows(result, rows(1066507633.0)); + } + + @Test + public void testConvertTimeformatWithStats() throws IOException { + JSONObject result = + executeQuery( + "search source=" + + TEST_INDEX_BANK + + " | eval timestamp = 1066507633 |" + + " convert timeformat=\\\"%Y\\\" ctime(timestamp) |" + + " stats count() by timestamp"); + verifySchema(result, schema("count()", null, "bigint"), schema("timestamp", "string")); + verifyDataRows(result, rows(7, "2003")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteErrorReportStageIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteErrorReportStageIT.java new file mode 100644 index 00000000000..f51ffabdc35 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteErrorReportStageIT.java @@ -0,0 +1,217 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.util.TestUtils.getResponseBody; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.ResponseException; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** + * Integration tests for error report builder with stage tracking. Validates that errors include + * stage information and user-friendly messages. + */ +public class CalciteErrorReportStageIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + loadIndex(Index.ACCOUNT); + enableCalcite(); + } + + @Test + public void testFieldNotFoundErrorIncludesStage() throws IOException { + ResponseException exception = + assertThrows( + ResponseException.class, + () -> executeQuery("source=" + TEST_INDEX_ACCOUNT + " | fields nonexistent_field")); + + String responseBody = getResponseBody(exception.getResponse()); + JSONObject response = new JSONObject(responseBody); + JSONObject error = response.getJSONObject("error"); + + // Verify error has context with stage information + assertTrue("Error should have context", error.has("context")); + JSONObject context = error.getJSONObject("context"); + + assertTrue("Context should have stage", context.has("stage")); + assertEquals("Stage should be 'analyzing'", "analyzing", context.getString("stage")); + + assertTrue("Context should have stage_description", context.has("stage_description")); + String stageDescription = context.getString("stage_description"); + assertTrue( + "Stage description should be user-friendly", + stageDescription.toLowerCase().contains("checking") + || stageDescription.toLowerCase().contains("query")); + + // Verify error has location chain + assertTrue("Error should have location", error.has("location")); + assertTrue("Location should be an array", error.get("location") instanceof org.json.JSONArray); + + // Verify location message is user-friendly (not technical) + org.json.JSONArray locationArray = error.getJSONArray("location"); + assertTrue("Location array should not be empty", locationArray.length() > 0); + String location = locationArray.getString(0); + assertFalse( + "Location should not mention internal terms like 'Calcite'", location.contains("Calcite")); + assertFalse( + "Location should not mention internal terms like 'RelNode'", location.contains("RelNode")); + } + + @Test + public void testIndexNotFoundErrorIncludesStage() throws IOException { + ResponseException exception = + assertThrows( + ResponseException.class, () -> executeQuery("source=nonexistent_index | fields age")); + + String responseBody = getResponseBody(exception.getResponse()); + JSONObject response = new JSONObject(responseBody); + JSONObject error = response.getJSONObject("error"); + + // Verify error has context with stage + assertTrue("Error should have context", error.has("context")); + JSONObject context = error.getJSONObject("context"); + assertTrue("Context should have stage", context.has("stage")); + + // Verify error has location + assertTrue("Error should have location", error.has("location")); + } + + @Test + public void testMultipleFieldErrorsIncludeStage() throws IOException { + ResponseException exception = + assertThrows( + ResponseException.class, + () -> + executeQuery( + "source=" + + TEST_INDEX_ACCOUNT + + " | fields nonexistent1, nonexistent2, nonexistent3")); + + String responseBody = getResponseBody(exception.getResponse()); + JSONObject response = new JSONObject(responseBody); + JSONObject error = response.getJSONObject("error"); + + // Verify stage information is present + assertTrue("Error should have context", error.has("context")); + JSONObject context = error.getJSONObject("context"); + assertTrue("Context should have stage", context.has("stage")); + assertTrue("Context should have stage_description", context.has("stage_description")); + } + + @Test + public void testErrorReportTypeMatchesExceptionType() throws IOException { + ResponseException exception = + assertThrows( + ResponseException.class, + () -> executeQuery("source=" + TEST_INDEX_ACCOUNT + " | fields bad_field_name")); + + String responseBody = getResponseBody(exception.getResponse()); + JSONObject response = new JSONObject(responseBody); + JSONObject error = response.getJSONObject("error"); + + // Verify error has type field + assertTrue("Error should have type", error.has("type")); + + // Verify error has details + assertTrue("Error should have details", error.has("details")); + } + + @Test + public void testFieldNotFoundIncludesErrorCode() throws IOException { + ResponseException exception = + assertThrows( + ResponseException.class, + () -> executeQuery("source=" + TEST_INDEX_ACCOUNT + " | fields missing_field")); + + String responseBody = getResponseBody(exception.getResponse()); + JSONObject response = new JSONObject(responseBody); + JSONObject error = response.getJSONObject("error"); + + String code = error.getString("code"); + assertFalse("Error code should not be empty", code.isEmpty()); + assertFalse("Error code should not be UNKNOWN", code.equals("UNKNOWN")); + } + + @Test + public void testLocationMessagesAreUserFriendly() throws IOException { + ResponseException exception = + assertThrows( + ResponseException.class, + () -> executeQuery("source=" + TEST_INDEX_ACCOUNT + " | fields xyz123")); + + String responseBody = getResponseBody(exception.getResponse()); + JSONObject response = new JSONObject(responseBody); + JSONObject error = response.getJSONObject("error"); + + assertTrue("Error should have location", error.has("location")); + org.json.JSONArray locationArray = error.getJSONArray("location"); + + // Verify all location messages are user-friendly + for (int i = 0; i < locationArray.length(); i++) { + String location = locationArray.getString(i); + + // Should not contain technical terms + assertFalse( + "Location should not contain 'AST'", + location.toLowerCase().contains("ast") && !location.toLowerCase().contains("last")); + assertFalse("Location should not contain 'RelNode'", location.contains("RelNode")); + assertFalse( + "Location should not contain 'semantic analysis' (too technical)", + location.contains("semantic analysis")); + + // Should use user-friendly language + assertTrue( + "Location should mention query, fields, data, cluster, or execution", + location.toLowerCase().contains("query") + || location.toLowerCase().contains("field") + || location.toLowerCase().contains("data") + || location.toLowerCase().contains("cluster") + || location.toLowerCase().contains("execut")); + } + } + + @Test + public void testStageDescriptionIsUserFriendly() throws IOException { + ResponseException exception = + assertThrows( + ResponseException.class, + () -> executeQuery("source=" + TEST_INDEX_ACCOUNT + " | fields undefined_field")); + + String responseBody = getResponseBody(exception.getResponse()); + JSONObject response = new JSONObject(responseBody); + JSONObject error = response.getJSONObject("error"); + + assertTrue("Error should have context", error.has("context")); + JSONObject context = error.getJSONObject("context"); + assertTrue("Context should have stage_description", context.has("stage_description")); + + String stageDescription = context.getString("stage_description"); + + // Stage description should not use compiler/technical terminology + assertFalse( + "Stage description should not contain 'Semantic'", stageDescription.contains("Semantic")); + assertFalse( + "Stage description should not contain 'Calcite'", stageDescription.contains("Calcite")); + assertFalse( + "Stage description should not contain 'AST'", + stageDescription.contains("AST") && !stageDescription.contains("Last")); + + // Should use analyst-friendly language + assertTrue( + "Stage description should be user-friendly", + stageDescription.toLowerCase().contains("check") + || stageDescription.toLowerCase().contains("validat") + || stageDescription.toLowerCase().contains("prepar") + || stageDescription.toLowerCase().contains("run") + || stageDescription.toLowerCase().contains("query")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteEvalCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteEvalCommandIT.java index 588a4a784f9..219020b1650 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteEvalCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteEvalCommandIT.java @@ -6,11 +6,13 @@ package org.opensearch.sql.calcite.remote; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TELEMETRY; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import static org.opensearch.sql.util.MatcherUtils.verifySchema; +import com.google.common.collect.ImmutableMap; import java.io.IOException; import org.json.JSONObject; import org.junit.jupiter.api.Test; @@ -25,6 +27,7 @@ public void init() throws Exception { enableCalcite(); loadIndex(Index.BANK); + loadIndex(Index.TELEMETRY); // Create test data for string concatenation Request request1 = new Request("PUT", "/test_eval/_doc/1?refresh=true"); @@ -38,6 +41,21 @@ public void init() throws Exception { Request request3 = new Request("PUT", "/test_eval/_doc/3?refresh=true"); request3.setJsonEntity("{\"name\": \"Charlie\", \"age\": null, \"title\": \"Analyst\"}"); client().performRequest(request3); + + // Index with a struct field `agent` to reproduce the reviewer's case from PR #5351: + // source= | fields agent | eval agent.name = "test" + // Rely on dynamic mapping — OpenSearch infers `agent` as an object with string children + // from the document contents. Using dynamic mapping keeps the init idempotent across + // repeated `@Before` invocations in the preserved cluster. + Request agentDoc1 = new Request("PUT", "/test_eval_agent/_doc/1?refresh=true"); + agentDoc1.setJsonEntity( + "{\"agent\": {\"name\": \"winlogbeat\", \"version\": \"7.0\"}, \"message\": \"hello\"}"); + client().performRequest(agentDoc1); + + Request agentDoc2 = new Request("PUT", "/test_eval_agent/_doc/2?refresh=true"); + agentDoc2.setJsonEntity( + "{\"agent\": {\"name\": \"filebeat\", \"version\": \"8.1\"}, \"message\": \"world\"}"); + client().performRequest(agentDoc2); } @Test @@ -86,6 +104,90 @@ public void testEvalStringConcatenationWithLiterals() throws IOException { rows("Charlie", "Analyst", "Name: Charlie, Title: Analyst")); } + @Test + public void testEvalDottedNameDoesNotDropStructParent() throws IOException { + // Reviewer's case from PR #5351: assigning a new dotted-path column must not remove the + // struct-parent column that happens to be a prefix of the eval target. + // Equivalent SPL1 query: + // source= | fields agent | eval agent.name = "test" + // Before the fix, the prefix-override in shouldOverrideField silently dropped the `agent` + // column entirely from the result schema. With the fix, `agent` is preserved. + // The newly-created literal column `agent.name` is also available (verified via an + // explicit trailing `fields` projection that bypasses tryToRemoveNestedFields). + JSONObject result = + executeQuery( + "source=test_eval_agent | fields agent | eval `agent.name` = 'test' | fields agent," + + " `agent.name`"); + verifySchema(result, schema("agent", "struct"), schema("agent.name", "string")); + verifyDataRows( + result, + rows(ImmutableMap.of("name", "winlogbeat", "version", "7.0"), "test"), + rows(ImmutableMap.of("name", "filebeat", "version", "8.1"), "test")); + } + + @Test + public void testEvalDottedNamePreservesStructParent_ImplicitProject() throws IOException { + // Complementary coverage for the reviewer's case without the explicit trailing projection. + // With the implicit `fields *` (AllFields) that the PPL parser appends, the downstream + // `tryToRemoveNestedFields` pass still collapses the flattened leaf back into its struct + // parent -- but the important regression guard is that the struct parent `agent` is no + // longer dropped by `shouldOverrideField`'s prefix branch. + JSONObject result = + executeQuery("source=test_eval_agent | fields agent | eval `agent.name` = 'test'"); + verifySchema(result, schema("agent", "struct")); + } + + @Test + public void testEvalOverrideOfFlattenedNestedLeafSurvivesImplicitProject() throws IOException { + // Regression guard for PR #5351: eval assigning a new value to a dotted name that matches an + // existing OpenSearch flattened nested leaf must not have that value silently eaten by the + // implicit `fields *` pass. + // + // The telemetry mapping exposes struct parents (resource, resource.attributes, ..., + // resource.attributes.telemetry.sdk) side-by-side with the flattened leaves. When eval + // overrides the leaf, projectPlusOverriding now prunes those struct parents so a subsequent + // tryToRemoveNestedFields pass does not delete the overridden leaf on the way out. + // + // Before the fix, this query returned rows with the original `resource` struct (still + // containing the pre-override integer version) and no `resource.attributes.telemetry.sdk.*` + // flattened leaves at all -- the "OVERRIDE" string was completely lost. + JSONObject result = + executeQuery( + String.format( + "source=%s | eval `resource.attributes.telemetry.sdk.version` = 'OVERRIDE'", + TEST_INDEX_TELEMETRY)); + + verifyDataRows( + result, + rows(true, "java", "opentelemetry", 9, "OVERRIDE"), + rows(false, "python", "opentelemetry", 12, "OVERRIDE"), + rows(true, "javascript", "opentelemetry", 9, "OVERRIDE"), + rows(false, "go", "opentelemetry", 16, "OVERRIDE"), + rows(true, "rust", "opentelemetry", 12, "OVERRIDE")); + } + + @Test + public void testEvalOverrideOfFlattenedNestedLeafWithExplicitProject() throws IOException { + // Control for the test above: with an explicit trailing `fields` projection, the implicit + // `fields *` codepath (and tryToRemoveNestedFields) does not run, so eval always returned + // the overridden value even before the fix. This test pins the user-facing contract for the + // explicit-projection variant regardless of internal pruning behaviour. + JSONObject result = + executeQuery( + String.format( + "source=%s | eval `resource.attributes.telemetry.sdk.version` = 'OVERRIDE' | fields" + + " `resource.attributes.telemetry.sdk.version`", + TEST_INDEX_TELEMETRY)); + verifySchema(result, schema("resource.attributes.telemetry.sdk.version", "string")); + verifyDataRows( + result, + rows("OVERRIDE"), + rows("OVERRIDE"), + rows("OVERRIDE"), + rows("OVERRIDE"), + rows("OVERRIDE")); + } + @Test public void testEvalStringConcatenationWithExistingData() throws IOException { JSONObject result = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 1b9e289e2e4..8ad0be5cc88 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2468,6 +2468,46 @@ public void testConvertMultipleFunctionsExplain() throws IOException { + " balance, age")); } + @Test + public void testConvertCtimeExplain() throws IOException { + String expected = loadExpectedPlan("explain_convert_ctime.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_bank | eval ts=1066507633 | convert ctime(ts) |" + + " fields ts")); + } + + @Test + public void testConvertMktimeExplain() throws IOException { + String expected = loadExpectedPlan("explain_convert_mktime.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_bank | eval d='10/18/2003 20:07:13' | convert" + + " mktime(d) | fields d")); + } + + @Test + public void testConvertDur2secExplain() throws IOException { + String expected = loadExpectedPlan("explain_convert_dur2sec.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_bank | eval d='01:23:45' | convert dur2sec(d) |" + + " fields d")); + } + + @Test + public void testConvertMstimeExplain() throws IOException { + String expected = loadExpectedPlan("explain_convert_mstime.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_bank | eval t='03:45.5' | convert mstime(t) |" + + " fields t")); + } + @Test public void testNotBetweenPushDownExplain() throws Exception { // test for issue https://github.com/opensearch-project/sql/issues/4903 @@ -2905,4 +2945,29 @@ public void testHighlightOsdObjectFormatExplain() throws IOException { String expected = loadExpectedPlan("explain_highlight_osd_format.yaml"); assertYamlEqualsIgnoreId(expected, result); } + + @Test + public void testExplainConsecutiveSortsAfterAggIssue5125() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_agg_consecutive_sorts_issue_5125.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format( + "source=%s | stats count() as c by gender | sort gender | sort - gender", + TEST_INDEX_BANK))); + } + + @Test + public void testExplainUnion() throws IOException { + String query = + "| union " + + "[search source=opensearch-sql_test_index_account | where age < 30] " + + "[search source=opensearch-sql_test_index_account | where age >= 30] " + + "| stats count() by gender"; + + String actual = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_union.yaml"); + assertYamlEqualsIgnoreId(expected, actual); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMixedFieldTypeIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMixedFieldTypeIT.java new file mode 100644 index 00000000000..84c53738ae3 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMixedFieldTypeIT.java @@ -0,0 +1,110 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; +import static org.opensearch.sql.util.TestUtils.createIndexByRestClient; +import static org.opensearch.sql.util.TestUtils.isIndexExist; +import static org.opensearch.sql.util.TestUtils.performRequest; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** + * Integration tests for querying wildcard indices where a field has conflicting types (e.g., text + * vs keyword) across different indices. See GitHub issue #4659. + */ +public class CalciteMixedFieldTypeIT extends PPLIntegTestCase { + + private static final String LOG_TEXT_INDEX = "test_log_text_4659"; + private static final String LOG_KEYWORD_INDEX = "test_log_keyword_4659"; + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + createTestIndices(); + } + + private void createTestIndices() throws IOException { + // Create index with msg as text type + if (!isIndexExist(client(), LOG_TEXT_INDEX)) { + String textMapping = + "{\"mappings\":{\"properties\":{\"msg\":{\"type\":\"text\"}," + + "\"idx\":{\"type\":\"integer\"}}}}"; + createIndexByRestClient(client(), LOG_TEXT_INDEX, textMapping); + Request bulkReq = new Request("POST", "/" + LOG_TEXT_INDEX + "/_bulk?refresh=true"); + bulkReq.setJsonEntity( + "{\"index\":{\"_id\":\"1\"}}\n" + "{\"msg\":\"status=200\",\"idx\":1}\n"); + performRequest(client(), bulkReq); + } + + // Create index with msg as keyword type + if (!isIndexExist(client(), LOG_KEYWORD_INDEX)) { + String keywordMapping = + "{\"mappings\":{\"properties\":{\"msg\":{\"type\":\"keyword\"}," + + "\"idx\":{\"type\":\"integer\"}}}}"; + createIndexByRestClient(client(), LOG_KEYWORD_INDEX, keywordMapping); + Request bulkReq = new Request("POST", "/" + LOG_KEYWORD_INDEX + "/_bulk?refresh=true"); + bulkReq.setJsonEntity( + "{\"index\":{\"_id\":\"1\"}}\n" + "{\"msg\":\"status=200\",\"idx\":2}\n"); + performRequest(client(), bulkReq); + } + } + + @Test + public void testWildcardQueryWithMixedTextAndKeywordField() throws IOException { + // Query using wildcard index pattern that matches both indices + // Both documents should be returned regardless of field type conflict + JSONObject result = executeQuery("source=test_log_*_4659 | fields msg, idx | sort idx"); + verifySchema(result, schema("msg", "string"), schema("idx", "int")); + verifyDataRows(result, rows("status=200", 1), rows("status=200", 2)); + } + + @Test + public void testWildcardQueryWithEvalOnMixedField() throws IOException { + // Eval uses the Calcite script engine to compute expression on each shard. + // When the merged type is keyword, DOC_VALUE is used, but text shards have no doc_values + // which returns null and causes the eval to produce null for the text-typed shard. + JSONObject result = + executeQuery( + "source=test_log_*_4659 | eval upper_msg = upper(msg) | fields idx, upper_msg" + + " | sort idx"); + verifySchema(result, schema("idx", "int"), schema("upper_msg", "string")); + verifyDataRows(result, rows(1, "STATUS=200"), rows(2, "STATUS=200")); + } + + @Test + public void testWildcardQueryWithScriptFilterOnMixedField() throws IOException { + // Script-based filter pushed down to each shard uses DOC_VALUE retrieval. + // When the merged type is keyword but the field is text on some shards, + // doc_values are not available, causing shard failures and missing results. + JSONObject result = + executeQuery( + "source=test_log_*_4659 | where upper(msg) = 'STATUS=200' | fields msg, idx" + + " | sort idx"); + verifySchema(result, schema("msg", "string"), schema("idx", "int")); + verifyDataRows(result, rows("status=200", 1), rows("status=200", 2)); + } + + @Test + public void testWildcardQueryWithRexOnMixedField() throws IOException { + // Rex command on the conflicting field should work across both indices + JSONObject result = + executeQuery( + "source=test_log_*_4659 | rex field=msg 'status=(?\\\\d+)'" + + " | fields msg, idx, statusCode | sort idx"); + verifySchema( + result, schema("msg", "string"), schema("idx", "int"), schema("statusCode", "string")); + verifyDataRows(result, rows("status=200", 1, "200"), rows("status=200", 2, "200")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java index 3ad50cdb4b0..53ef8ebe8b6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java @@ -209,24 +209,15 @@ public void testNoMvResultUsedInComparison() throws IOException { } @Test - public void testNoMvMissingFieldShouldReturn4xx() throws IOException { - ResponseException ex = - Assertions.assertThrows( - ResponseException.class, - () -> executeQuery("source=" + TEST_INDEX_BANK + " | nomv does_not_exist")); - - int status = ex.getResponse().getStatusLine().getStatusCode(); + public void testNoMvMissingField() throws IOException { + // After issue #5175 was fixed, unresolved identifiers inside COALESCE (the rewrite target + // of `nomv`) resolve to a null literal of SqlTypeName.NULL. Calcite can then promote that + // null to the array type expected by ARRAY_COMPACT, so the query succeeds instead of + // returning 4xx. The resulting column is the COALESCE empty-string fallback. + JSONObject result = + executeQuery("source=" + TEST_INDEX_BANK + " | nomv does_not_exist | head 1"); - Assertions.assertEquals(400, status, "Unexpected status. ex=" + ex.getMessage()); - - String msg = ex.getMessage(); - Assertions.assertTrue( - msg.contains("does_not_exist") - || msg.contains("field") - || msg.contains("Field") - || msg.contains("ARRAY_COMPACT") - || msg.contains("ARRAY"), - msg); + Assertions.assertTrue(result.getJSONArray("datarows").length() > 0); } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNotInNullFilterIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNotInNullFilterIT.java new file mode 100644 index 00000000000..267a750e0cc --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNotInNullFilterIT.java @@ -0,0 +1,61 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRowsInOrder; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** Integration test for NOT IN excluding null/missing rows (issue #5165). */ +public class CalciteNotInNullFilterIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.BANK_WITH_NULL_VALUES); + } + + @Test + public void testNotInExcludesNullRows() throws IOException { + // age values: 32, 36, 28, 33, 36, null, 34 + // NOT IN (32, 28) should return 36, 33, 36, 34 — excluding the null row + JSONObject result = + executeQuery( + String.format( + "source=%s | where age NOT IN (32, 28) | fields age | sort age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRowsInOrder(result, rows(33), rows(34), rows(36), rows(36)); + } + + @Test + public void testNotInExcludesNullAndMissingRows() throws IOException { + // balance values: 39225, null, 32838, 4180, null, null, 48086 + // NOT IN (39225) should return 32838, 4180, 48086 — excluding null/missing rows + JSONObject result = + executeQuery( + String.format( + "source=%s | where balance NOT IN (39225) | fields balance | sort balance", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRowsInOrder(result, rows(4180), rows(32838), rows(48086)); + } + + @Test + public void testInWithNullRowsIsUnaffected() throws IOException { + // IN should naturally exclude nulls (positive match never matches null) + JSONObject result = + executeQuery( + String.format( + "source=%s | where age IN (32, 28) | fields age | sort age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRowsInOrder(result, rows(28), rows(32)); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNotLikeNullIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNotLikeNullIT.java new file mode 100644 index 00000000000..ae4476b59f4 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNotLikeNullIT.java @@ -0,0 +1,107 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRowsInOrder; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; +import org.opensearch.client.ResponseException; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** + * Integration tests for NOT LIKE with null/missing field values. Tests the fix for issue #5169: NOT + * LIKE should exclude rows where the field is null or missing. + */ +public class CalciteNotLikeNullIT extends PPLIntegTestCase { + + private static final String TEST_INDEX = "issue5169_not_like_null"; + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + createTestIndex(); + } + + private void createTestIndex() throws IOException { + try { + Request deleteIndex = new Request("DELETE", "/" + TEST_INDEX); + client().performRequest(deleteIndex); + } catch (ResponseException e) { + // Index doesn't exist, which is fine + } + + Request createIndex = new Request("PUT", "/" + TEST_INDEX); + createIndex.setJsonEntity( + "{\n" + + " \"settings\": {\"number_of_shards\": 1, \"number_of_replicas\": 0},\n" + + " \"mappings\": {\n" + + " \"properties\": {\n" + + " \"keyword_field\": {\"type\": \"keyword\"},\n" + + " \"int_field\": {\"type\": \"integer\"}\n" + + " }\n" + + " }\n" + + "}"); + client().performRequest(createIndex); + + Request bulkRequest = new Request("POST", "/" + TEST_INDEX + "/_bulk?refresh=true"); + bulkRequest.setJsonEntity( + "{\"index\":{\"_id\":\"1\"}}\n" + + "{\"keyword_field\": \"hello\", \"int_field\": 1}\n" + + "{\"index\":{\"_id\":\"2\"}}\n" + + "{\"keyword_field\": \"world\", \"int_field\": 2}\n" + + "{\"index\":{\"_id\":\"3\"}}\n" + + "{\"keyword_field\": \"\", \"int_field\": 3}\n" + + "{\"index\":{\"_id\":\"4\"}}\n" + + "{\"keyword_field\": \"special chars...\", \"int_field\": 4}\n" + + "{\"index\":{\"_id\":\"5\"}}\n" + + "{\"keyword_field\": null, \"int_field\": null}\n"); + client().performRequest(bulkRequest); + } + + @Test + public void testNotLikeExcludesNull() throws IOException { + // NOT LIKE '%ello%' should match 'world', '', 'special chars...' but NOT null + JSONObject result = + executeQuery( + "source=" + + TEST_INDEX + + " | where NOT keyword_field LIKE '%ello%'" + + " | sort keyword_field" + + " | fields keyword_field"); + verifyDataRowsInOrder(result, rows(""), rows("special chars..."), rows("world")); + } + + @Test + public void testNotLikeWithNoMatch() throws IOException { + // NOT LIKE '%zzz%' should return all non-null rows (4 rows) + JSONObject result = + executeQuery( + "source=" + + TEST_INDEX + + " | where NOT keyword_field LIKE '%zzz%'" + + " | sort keyword_field" + + " | fields keyword_field"); + verifyDataRowsInOrder(result, rows(""), rows("hello"), rows("special chars..."), rows("world")); + } + + @Test + public void testNotGreaterThanExcludesNull() throws IOException { + // NOT int_field > 2 should return rows with int_field 1, 2 but NOT null + JSONObject result = + executeQuery( + "source=" + + TEST_INDEX + + " | where NOT int_field > 2" + + " | sort int_field" + + " | fields int_field"); + verifyDataRowsInOrder(result, rows(1), rows(2)); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendPipeCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendPipeCommandIT.java index d25d3ca80db..6ae37a027ba 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendPipeCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendPipeCommandIT.java @@ -87,4 +87,74 @@ public void testAppendpipeWithConflictTypeColumn() throws IOException { TEST_INDEX_ACCOUNT))); assertTrue(exception.getMessage().contains("due to incompatible types")); } + + /** Regression test: double appendpipe with different aggregations (issue #5173). */ + @Test + public void testDoubleAppendPipe() throws IOException { + JSONObject actual = + executeQuery( + String.format( + Locale.ROOT, + "source=%s | stats sum(age) as sum_age by gender" + + " | appendpipe [ stats avg(sum_age) as avg_sum_age ]" + + " | appendpipe [ stats max(sum_age) as max_sum_age ]", + TEST_INDEX_ACCOUNT)); + verifySchemaInOrder( + actual, + schema("sum_age", "bigint"), + schema("gender", "string"), + schema("avg_sum_age", "double"), + schema("max_sum_age", "bigint")); + // 2 original rows + 1 avg row + 1 max row + verifyDataRows( + actual, + rows(14947, "F", null, null), + rows(15224, "M", null, null), + rows(null, null, 15085.5, null), + rows(null, null, null, 15224)); + } + + /** Regression test: triple appendpipe with different aggregations (issue #5173). */ + @Test + public void testTripleAppendPipe() throws IOException { + JSONObject actual = + executeQuery( + String.format( + Locale.ROOT, + "source=%s | stats sum(age) as sum_age by gender" + + " | appendpipe [ stats avg(sum_age) as avg_sum_age ]" + + " | appendpipe [ stats max(sum_age) as max_sum_age ]" + + " | appendpipe [ stats min(sum_age) as min_sum_age ]", + TEST_INDEX_ACCOUNT)); + verifySchemaInOrder( + actual, + schema("sum_age", "bigint"), + schema("gender", "string"), + schema("avg_sum_age", "double"), + schema("max_sum_age", "bigint"), + schema("min_sum_age", "bigint")); + // 2 original rows + 1 avg + 1 max + 1 min + verifyDataRows( + actual, + rows(14947, "F", null, null, null), + rows(15224, "M", null, null, null), + rows(null, null, 15085.5, null, null), + rows(null, null, null, 15224, null), + rows(null, null, null, null, 14947)); + } + + /** Regression test: double appendpipe with non-aggregation (filter) subpipeline. */ + @Test + public void testDoubleAppendPipeWithFilter() throws IOException { + JSONObject actual = + executeQuery( + String.format( + Locale.ROOT, + "source=%s | stats sum(age) as sum_age by gender" + + " | appendpipe [ where gender = 'F' ]" + + " | appendpipe [ where gender = 'M' ]", + TEST_INDEX_ACCOUNT)); + // 2 original + 1 (F filter from original) + 1 (M filter from cumulative 3 rows) + verifyDataRows(actual, rows(14947, "F"), rows(15224, "M"), rows(14947, "F"), rows(15224, "M")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDedupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDedupIT.java index 3a2e6d69f3b..71e9e69e3ae 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDedupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDedupIT.java @@ -5,6 +5,7 @@ package org.opensearch.sql.calcite.remote; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DUPLICATION_NULLABLE; import static org.opensearch.sql.util.MatcherUtils.*; @@ -21,6 +22,7 @@ public void init() throws Exception { enableCalcite(); loadIndex(Index.DUPLICATION_NULLABLE); + loadIndex(Index.ACCOUNT); } @Test @@ -297,6 +299,70 @@ public void testDedupComplex() throws IOException { rows("Z", 1, "D")); } + /** Regression test for https://github.com/opensearch-project/sql/issues/3922 */ + @Test + public void testSortThenDedup() throws IOException { + // Verify sort order is preserved through dedup + JSONObject actual = + executeQuery( + String.format( + "source=%s | sort category | dedup 1 name | fields category, name", + TEST_INDEX_DUPLICATION_NULLABLE)); + // PPL default sort is ASC NULLS FIRST, so null-category rows come first in the sort. + // For each name, dedup keeps the first row in sort order: + // name=A first cat=X, name=B first cat=null (row #14), name=C first cat=X, + // name=D first cat=Z, name=E first cat=null. + verifyDataRows( + actual, rows(null, "B"), rows(null, "E"), rows("X", "A"), rows("X", "C"), rows("Z", "D")); + } + + /** + * Regression test for multi-field sort pushed through dedup. + * + *

    Verifies that when a PPL {@code sort} has multiple fields before a {@code dedup}, every + * field is preserved through the pushdown (not only the first one). A single-field pushdown would + * lose the tie-breaker and return a non-deterministic row for each dedup group. + * + *

    Data used: the {@code accounts} test index. In state {@code AK} there are multiple F and M + * accounts; under {@code sort state, age, account_number} the first M row is {@code (state=AK, + * age=20, account_number=23)} and the first F row is {@code (state=AK, age=21, + * account_number=334)}. Only a correct multi-field pushdown produces these exact rows. + */ + @Test + public void testMultiColumnSortThenDedup() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | sort state, age, account_number | dedup 1 gender | fields gender," + + " state, age, account_number", + TEST_INDEX_ACCOUNT)); + verifyDataRows(actual, rows("M", "AK", 20, 23), rows("F", "AK", 21, 334)); + } + + /** Regression test for https://github.com/opensearch-project/sql/issues/3922 */ + @Test + public void testSortThenDedupKeepEmpty() throws IOException { + // Verify sort order is preserved through dedup with keepempty=true + JSONObject actual = + executeQuery( + String.format( + "source=%s | sort category | dedup 1 name KEEPEMPTY=true | fields category, name", + TEST_INDEX_DUPLICATION_NULLABLE)); + // category should be in ascending order (with nulls first due to ASC-nulls-first) + // dedup 1 name KEEPEMPTY=true: keep first occurrence of each name, plus ALL null-name rows + verifyDataRows( + actual, + rows(null, null), + rows(null, "B"), + rows(null, "E"), + rows("X", null), + rows("X", "A"), + rows("X", "C"), + rows("Y", null), + rows("Z", null), + rows("Z", "D")); + } + @Test public void testDedupExpr() throws IOException { JSONObject actual = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEnhancedCoalesceIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEnhancedCoalesceIT.java index f1b546a5681..fd9a5cff774 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEnhancedCoalesceIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEnhancedCoalesceIT.java @@ -171,10 +171,63 @@ public void testCoalesceWithAllNonExistentFields() throws IOException { + " head 1", TEST_INDEX_STATE_COUNTRY_WITH_NULL)); - verifySchema(actual, schema("name", "string"), schema("result", "string")); + // When every COALESCE operand is missing/null, the result has no known type (see #5175). + verifySchema(actual, schema("name", "string"), schema("result", "undefined")); verifyDataRows(actual, rows("Jake", null)); } + @Test + public void testCoalesceWithNullLiteralAndInteger() throws IOException { + // Bug #5175: COALESCE(null, 42) must return the integer 42, not the string "42". + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = coalesce(null, 42) | fields result | head 1", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows(42)); + } + + @Test + public void testCoalesceWithIntegerAndNullLiteral() throws IOException { + // Bug #5175: COALESCE(42, null) must return the integer 42, not the string "42". + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = coalesce(42, null) | fields result | head 1", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows(42)); + } + + @Test + public void testCoalesceWithNullLiteralAndDouble() throws IOException { + // Bug #5175: COALESCE(null, 3.14) must return a numeric double, not a string. + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = coalesce(null, 3.14) | fields result | head 1", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifySchema(actual, schema("result", "double")); + verifyDataRows(actual, rows(3.14)); + } + + @Test + public void testCoalesceWithNullLiteralAndIntegerField() throws IOException { + // Bug #5175: COALESCE(null, age) on an int field must keep the integer type. + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = coalesce(null, age) | fields age, result | head 3", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifySchema(actual, schema("age", "int"), schema("result", "int")); + verifyDataRows(actual, rows(70, 70), rows(30, 30), rows(25, 25)); + } + @Test public void testCoalesceWithEmptyString() throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJsonBuiltinFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJsonBuiltinFunctionIT.java index 0ec367aa318..99af10302ae 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJsonBuiltinFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJsonBuiltinFunctionIT.java @@ -296,6 +296,38 @@ public void testJsonSetPartialSet() throws IOException { verifyDataRows(actual, rows("{\"a\":[{\"b\":1},{\"b\":{\"c\":\"3\"}}]}")); } + @Test + public void testJsonSetWithDollarPrefixedPath() throws IOException { + // Issue #5167: json_set with $.key path should not double-prefix + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval a" + + " =json_set('{\\\"name\\\":\\\"alice\\\",\\\"scores\\\":[90,85,92]}'," + + " '$.name', 'modified_alice')| fields a | head 1", + TEST_INDEX_PEOPLE2)); + + verifySchema(actual, schema("a", "string")); + + verifyDataRows(actual, rows("{\"name\":\"modified_alice\",\"scores\":[90,85,92]}")); + } + + @Test + public void testJsonDeleteWithDollarPrefixedPath() throws IOException { + // Issue #5167: json_delete with $.key path should remove the key + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval a" + + " =json_delete('{\\\"name\\\":\\\"alice\\\",\\\"scores\\\":[90,85,92]}'," + + " '$.name')| fields a | head 1", + TEST_INDEX_PEOPLE2)); + + verifySchema(actual, schema("a", "string")); + + verifyDataRows(actual, rows("{\"scores\":[90,85,92]}")); + } + @Test public void testJsonDelete() throws IOException { JSONObject actual = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLNestedAggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLNestedAggregationIT.java index faaae541d1e..c7ec6434744 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLNestedAggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLNestedAggregationIT.java @@ -17,6 +17,7 @@ import java.io.IOException; import org.json.JSONObject; import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.ppl.PPLIntegTestCase; public class CalcitePPLNestedAggregationIT extends PPLIntegTestCase { @@ -175,7 +176,7 @@ public void testNestedAggregationThrowExceptionIfPushdownCannotApplied() throws enabledOnlyWhenPushdownIsEnabled(); Throwable t = assertThrowsWithReplace( - UnsupportedOperationException.class, + ErrorReport.class, () -> executeQuery( String.format( diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLRenameIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLRenameIT.java index 6cd0674a2dc..24401444457 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLRenameIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLRenameIT.java @@ -199,6 +199,41 @@ public void testRenameFullWildcard() throws IOException { verifyDataRows(result, rows("Jake", 70), rows("Hello", 30), rows("John", 25), rows("Jane", 20)); } + @Test + public void testRenameFullWildcardExcludesMetadataFields() throws IOException { + JSONObject result = + executeQuery(String.format("source = %s | rename * as old_*", TEST_INDEX_STATE_COUNTRY)); + verifySchema( + result, + schema("old_name", "string"), + schema("old_age", "int"), + schema("old_state", "string"), + schema("old_country", "string"), + schema("old_year", "int"), + schema("old_month", "int")); + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70), + rows("Hello", "USA", "New York", 4, 2023, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20)); + } + + @Test + public void testRenamePartialWildcardExcludesMetadataFields() throws IOException { + JSONObject result = + executeQuery(String.format("source = %s | rename _* as meta_*", TEST_INDEX_STATE_COUNTRY)); + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int")); + verifyStandardDataRows(result); + } + @Test public void testRenameMultipleWildcards() throws IOException { JSONObject result = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java index ec6f8583b23..0bd7ac803f9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java @@ -216,4 +216,27 @@ public void testSpathAutoExtractWithSort() throws IOException { verifySchema(result, schema("doc.user.name", "string")); verifyDataRowsInOrder(result, rows("Alice"), rows("John")); } + + @Test + public void testSpathAutoExtractWithMultiFieldEval() throws IOException { + JSONObject result = + executeQuery( + "source=test_spath_cmd | spath input=doc" + + " | eval doc.user.name=doc.user.name, doc.user.age=doc.user.age" + + " | fields doc.user.name, doc.user.age"); + verifySchema(result, schema("doc.user.name", "string"), schema("doc.user.age", "string")); + verifyDataRows(result, rows("Alice", "25"), rows("John", "30")); + } + + @Test + public void testSpathAutoExtractWithSeparateEvalCommands() throws IOException { + JSONObject result = + executeQuery( + "source=test_spath_cmd | spath input=doc" + + " | eval doc.user.name=doc.user.name" + + " | eval doc.user.age=doc.user.age" + + " | fields doc.user.name, doc.user.age"); + verifySchema(result, schema("doc.user.name", "string"), schema("doc.user.age", "string")); + verifyDataRows(result, rows("Alice", "25"), rows("John", "30")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteParseCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteParseCommandIT.java index e25470a6e53..d5030ffa181 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteParseCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteParseCommandIT.java @@ -9,9 +9,12 @@ import java.io.IOException; import org.junit.Test; +import org.opensearch.client.ResponseException; import org.opensearch.sql.ppl.ParseCommandIT; public class CalciteParseCommandIT extends ParseCommandIT { + private static final String SUGGESTION_MATCHING_CONTENT = "capture groups must be alphanumeric"; + @Override public void init() throws Exception { super.init(); @@ -25,10 +28,9 @@ public void testParseErrorInvalidGroupNameUnderscore() throws IOException { String.format( "source=%s | parse email '.+@(?.+)' | fields email", TEST_INDEX_BANK)); fail("Should have thrown an exception for underscore in named capture group"); - } catch (Exception e) { + } catch (ResponseException e) { assertTrue(e.getMessage().contains("Invalid capture group name 'host_name'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } @@ -39,10 +41,9 @@ public void testParseErrorInvalidGroupNameHyphen() throws IOException { String.format( "source=%s | parse email '.+@(?.+)' | fields email", TEST_INDEX_BANK)); fail("Should have thrown an exception for hyphen in named capture group"); - } catch (Exception e) { + } catch (ResponseException e) { assertTrue(e.getMessage().contains("Invalid capture group name 'host-name'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } @@ -53,10 +54,9 @@ public void testParseErrorInvalidGroupNameStartingWithDigit() throws IOException String.format( "source=%s | parse email '.+@(?<1host>.+)' | fields email", TEST_INDEX_BANK)); fail("Should have thrown an exception for group name starting with digit"); - } catch (Exception e) { + } catch (ResponseException e) { assertTrue(e.getMessage().contains("Invalid capture group name '1host'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } @@ -67,10 +67,9 @@ public void testParseErrorInvalidGroupNameSpecialCharacter() throws IOException String.format( "source=%s | parse email '.+@(?.+)' | fields email", TEST_INDEX_BANK)); fail("Should have thrown an exception for special character in named capture group"); - } catch (Exception e) { + } catch (ResponseException e) { assertTrue(e.getMessage().contains("Invalid capture group name 'host@name'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRexCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRexCommandIT.java index f7a50ee0676..eca08b1fc11 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRexCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRexCommandIT.java @@ -14,6 +14,8 @@ import org.opensearch.sql.ppl.PPLIntegTestCase; public class CalciteRexCommandIT extends PPLIntegTestCase { + private static final String SUGGESTION_MATCHING_CONTENT = "capture groups must be alphanumeric"; + @Override public void init() throws Exception { super.init(); @@ -61,8 +63,7 @@ public void testRexErrorInvalidGroupNameUnderscore() throws IOException { fail("Should have thrown an exception for underscore in named capture group"); } catch (Exception e) { assertTrue(e.getMessage().contains("Invalid capture group name 'user_name'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } @@ -77,8 +78,7 @@ public void testRexErrorInvalidGroupNameHyphen() throws IOException { fail("Should have thrown an exception for hyphen in named capture group"); } catch (Exception e) { assertTrue(e.getMessage().contains("Invalid capture group name 'user-name'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } @@ -93,8 +93,7 @@ public void testRexErrorInvalidGroupNameStartingWithDigit() throws IOException { fail("Should have thrown an exception for group name starting with digit"); } catch (Exception e) { assertTrue(e.getMessage().contains("Invalid capture group name '1user'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } @@ -109,8 +108,7 @@ public void testRexErrorInvalidGroupNameSpecialCharacter() throws IOException { fail("Should have thrown an exception for special character in named capture group"); } catch (Exception e) { assertTrue(e.getMessage().contains("Invalid capture group name 'user@name'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java index dcf36f510bf..fa0b21e622f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java @@ -823,6 +823,41 @@ public void testMultipleStreamstats() throws IOException { rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20, 22.5)); } + @Test + public void testMultipleStreamstatsWithWindow() throws IOException { + // Test case from GitHub issue #4800: chained streamstats with window=2 + JSONObject actual = + executeQuery( + String.format( + "source=%s | streamstats window=2 avg(age) as avg_age by state, country" + + " | streamstats window=2 avg(avg_age) as avg_state_age by country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifySchemaInOrder( + actual, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("avg_age", "double"), + schema("avg_state_age", "double")); + + verifyDataRows( + actual, + rows("Jake", "USA", "California", 4, 2023, 70, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20, 22.5), + rows(null, "Canada", null, 4, 2023, 10, 10, 15), + rows("Kevin", null, null, 4, 2023, null, null, null)); + } + + // TODO: Fix chained reset_before + window streamstats (nested correlate issue, see #4800) + // The reset path still uses correlate, and the window self-join copies it into the right side, + // causing Calcite's RelDecorrelator to fail on duplicate correlate references. + @Test public void testMultipleStreamstatsWithNull1() throws IOException { JSONObject actual = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java index 44df58b7ab8..676cf162b03 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java @@ -5,6 +5,9 @@ package org.opensearch.sql.calcite.remote; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import static org.opensearch.sql.util.MatcherUtils.*; import static org.opensearch.sql.util.MatcherUtils.rows; @@ -141,6 +144,50 @@ public void testTransposeLowerLimit() throws IOException { rows("age", "32", "36", "28", "33", "36")); } + /** + * Regression test for #5172: transpose fails when input has a field named 'value', because the + * internal unpivot column was also hardcoded as 'value'. + */ + @Test + public void testTransposeWithValueFieldNameCollision() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | stats count() as value, avg(age) as avg_age | transpose", + TEST_INDEX_ACCOUNT)); + + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + var dataRows = result.getJSONArray("datarows"); + // Verify that each transposed row has distinct correct values + // (not all duplicated from the 'value' field) + assertEquals(2, dataRows.length()); + boolean foundValue = false; + boolean foundAvgAge = false; + for (int i = 0; i < dataRows.length(); i++) { + var row = dataRows.getJSONArray(i); + String colName = row.getString(0); + if ("value".equals(colName)) { + foundValue = true; + // count should be 1000 (total accounts) + assertEquals("1000", row.getString(1)); + } else if ("avg_age".equals(colName)) { + foundAvgAge = true; + // avg_age should not equal the count value + assertNotEquals("1000", row.getString(1)); + } + } + assertTrue("Should have 'value' row in transposed result", foundValue); + assertTrue("Should have 'avg_age' row in transposed result", foundAvgAge); + } + @Test public void testTransposeColumnName() throws IOException { var result = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteUnionCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteUnionCommandIT.java new file mode 100644 index 00000000000..1dbd34357ab --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteUnionCommandIT.java @@ -0,0 +1,270 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOCATIONS_TYPE_CONFLICT; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.ResponseException; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteUnionCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.ACCOUNT); + loadIndex(Index.BANK); + loadIndex(Index.TIME_TEST_DATA); + loadIndex(Index.TIME_TEST_DATA2); + loadIndex(Index.LOCATIONS_TYPE_CONFLICT); + } + + @Test + public void testBasicUnionTwoSubsearches() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union " + + "[search source=%s | where age < 30 | eval age_group = \\\"young\\\"] " + + "[search source=%s | where age >= 30 | eval age_group = \\\"adult\\\"] " + + "| stats count by age_group | sort age_group", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("count", null, "bigint"), schema("age_group", null, "string")); + verifyDataRows(result, rows(549L, "adult"), rows(451L, "young")); + } + + @Test + public void testUnionThreeSubsearches() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where state = \\\"IL\\\" | eval region" + + " = \\\"Illinois\\\"] [search source=%s | where state = \\\"TN\\\" | eval" + + " region = \\\"Tennessee\\\"] [search source=%s | where state = \\\"CA\\\" |" + + " eval region = \\\"California\\\"] | stats count by region | sort region", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("count", null, "bigint"), schema("region", null, "string")); + verifyDataRows(result, rows(17L, "California"), rows(22L, "Illinois"), rows(25L, "Tennessee")); + } + + @Test + public void testUnionDirectTableNames() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union %s, %s | where account_number = 1 | fields firstname, city", + TEST_INDEX_ACCOUNT, TEST_INDEX_BANK)); + + verifySchema(result, schema("firstname", null, "string"), schema("city", null, "string")); + + verifyDataRows(result, rows("Amber", "Brogan"), rows("Amber JOHnny", "Brogan")); + } + + @Test + public void testUnionMixedDirectTableAndSubsearch() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union %s, [search source=%s | where age > 30] | stats count() as total", + TEST_INDEX_ACCOUNT, TEST_INDEX_BANK)); + + verifySchema(result, schema("total", null, "bigint")); + verifyDataRows(result, rows(1006L)); + } + + @Test + public void testUnionWithDifferentIndicesSchemaMerge() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where age > 35 | fields account_number," + + " firstname, balance] [search source=%s | where age > 35 | fields" + + " account_number, balance] | stats count() as total_count", + TEST_INDEX_ACCOUNT, TEST_INDEX_BANK)); + + verifySchema(result, schema("total_count", null, "bigint")); + verifyDataRows(result, rows(241L)); + } + + @Test + public void testUnionNumericCoercion_BigIntPlusInteger() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where account_number = 1 | fields balance] [search" + + " source=%s | where account_number = 1 | eval balance = 100 | fields balance]" + + " | head 2", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("balance", null, "bigint")); + + assertEquals(2, result.getJSONArray("datarows").length()); + } + + @Test + public void testUnionIncompatibleTypes_MultipleFieldConflicts() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where account_number = 1 | fields firstname, age," + + " balance] [search source=%s | where place_id = 1001 | fields description," + + " age, place_id] | head 2", + TEST_INDEX_ACCOUNT, TEST_INDEX_LOCATIONS_TYPE_CONFLICT)); + + verifySchema( + result, + schema("firstname", null, "string"), + schema("age", null, "string"), + schema("balance", null, "bigint"), + schema("description", null, "string"), + schema("place_id", null, "int")); + + assertEquals(2, result.getJSONArray("datarows").length()); + } + + @Test + public void testUnionAllDatasetsDifferentSchemas() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where account_number = 1 | fields account_number," + + " balance] [search source=%s | where place_id = 1001 | fields description," + + " place_id] [search source=%s | where category = \\\"A\\\" | fields category," + + " value] | stats count() as total", + TEST_INDEX_ACCOUNT, + TEST_INDEX_LOCATIONS_TYPE_CONFLICT, + "opensearch-sql_test_index_time_data")); + + verifySchema(result, schema("total", null, "bigint")); + verifyDataRows(result, rows(28L)); + } + + @Test + public void testUnionMidPipeline_SingleExplicitDataset() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | where gender = \\\"M\\\" " + + "| union [search source=%s | where gender = \\\"F\\\"] " + + "| stats count() as total", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("total", null, "bigint")); + verifyDataRows(result, rows(1000L)); + } + + @Test + public void testUnionWithExplicitOrdering() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where account_number = 1 | fields account_number," + + " balance] [search source=%s | where account_number = 6 | fields" + + " account_number, balance] | sort balance desc", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema( + result, schema("account_number", null, "bigint"), schema("balance", null, "bigint")); + + verifyDataRows(result, rows(1L, 39225L), rows(6L, 5686L)); + } + + @Test + public void testUnionWithMaxout() throws IOException { + String ppl = + "| union maxout=5 " + + "[search source=%s | where gender = \\\"M\\\"] " + + "[search source=%s | where gender = \\\"F\\\"]"; + JSONObject result = executeQuery(String.format(ppl, TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema( + result, + schema("account_number", null, "bigint"), + schema("firstname", null, "string"), + schema("address", null, "string"), + schema("balance", null, "bigint"), + schema("gender", null, "string"), + schema("city", null, "string"), + schema("employer", null, "string"), + schema("state", null, "string"), + schema("age", null, "bigint"), + schema("email", null, "string"), + schema("lastname", null, "string")); + + assertEquals(5, result.getJSONArray("datarows").length()); + } + + @Test + public void testUnionWithEmptySubsearch() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union " + + "[search source=%s | where age > 25] " + + "[search source=%s | where age > 200 | eval impossible = \\\"yes\\\"] " + + "| stats count", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("count", null, "bigint")); + verifyDataRows(result, rows(733L)); + } + + @Test + public void testUnionWithAllEmptyDatasets() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union " + + "[search source=%s | where age > 1000] " + + "[search source=%s | where age > 1000] " + + "| stats count() as total", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("total", null, "bigint")); + verifyDataRows(result, rows(0L)); + } + + @Test + public void testUnionPreservesDuplicatesExactCopy() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union " + + "[search source=%s | where account_number = 1] " + + "[search source=%s | where account_number = 1] " + + "[search source=%s | where account_number = 1] " + + "| stats count() as total", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("total", null, "bigint")); + verifyDataRows(result, rows(3L)); + } + + @Test + public void testUnionWithSingleSubsearchThrowsError() { + Exception exception = + assertThrows( + ResponseException.class, + () -> + executeQuery( + String.format( + "| union " + "[search source=%s | where age > 30]", TEST_INDEX_ACCOUNT))); + + assertTrue(exception.getMessage().contains("Union command requires at least two datasets")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java index d47656471b0..22e12e71556 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java @@ -30,6 +30,7 @@ import org.opensearch.sql.analysis.Analyzer; import org.opensearch.sql.analysis.ExpressionAnalyzer; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.datasource.DataSourceService; @@ -187,21 +188,19 @@ public void onResponse(ExecutionEngine.QueryResponse response) { @Override public void onFailure(Exception e) { - if (e instanceof SyntaxCheckException) { - throw (SyntaxCheckException) e; - } else if (e instanceof QueryEngineException) { - throw (QueryEngineException) e; - } else if (e instanceof UnsupportedCursorRequestException) { - throw (UnsupportedCursorRequestException) e; - } else if (e instanceof NoCursorException) { - throw (NoCursorException) e; - } else if (e instanceof UnsupportedOperationException) { - throw (UnsupportedOperationException) e; - } else if (e instanceof IllegalArgumentException) { - // most exceptions thrown by Calcite when resolve a plan. - throw (IllegalArgumentException) e; - } else { - throw new IllegalStateException("Exception happened during execution", e); + switch (e) { + case ErrorReport errorReport -> throw errorReport; + case SyntaxCheckException syntaxCheckException -> throw syntaxCheckException; + case QueryEngineException queryEngineException -> throw queryEngineException; + case UnsupportedCursorRequestException unsupportedCursorRequestException -> + throw unsupportedCursorRequestException; + case NoCursorException noCursorException -> throw noCursorException; + case UnsupportedOperationException unsupportedOperationException -> + throw unsupportedOperationException; + case IllegalArgumentException illegalArgumentException -> + // most exceptions thrown by Calcite when resolve a plan. + throw illegalArgumentException; + default -> throw new IllegalStateException("Exception happened during execution", e); } } }, diff --git a/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceEnabledIT.java b/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceEnabledIT.java index a53c04d8710..f014ab587de 100644 --- a/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceEnabledIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceEnabledIT.java @@ -9,6 +9,7 @@ import java.io.IOException; import lombok.SneakyThrows; +import org.apache.hc.core5.http.io.entity.EntityUtils; import org.json.JSONObject; import org.junit.After; import org.junit.Assert; @@ -150,8 +151,11 @@ private void assertDataSourceCount(int expected) { @SneakyThrows private Response performRequest(Request request) { try { - return client().performRequest(request); + Response response = client().performRequest(request); + System.err.println("Successful response: " + EntityUtils.toString(response.getEntity())); + return response; } catch (ResponseException e) { + System.err.println("Failed response: " + EntityUtils.toString(e.getResponse().getEntity())); return e.getResponse(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ConvertCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ConvertCommandIT.java index 099992c9298..b1c794130b0 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ConvertCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ConvertCommandIT.java @@ -69,6 +69,38 @@ public void testConvertWithStats() { "source=%s | convert auto(balance) | stats avg(balance) by gender"); } + @Test + public void testConvertMktimeFunction() { + verifyQueryThrowsCalciteError( + "source=%s | eval date_str = '2003-10-18 20:07:13' | convert mktime(date_str) | fields" + + " date_str"); + } + + @Test + public void testConvertCtimeFunction() { + verifyQueryThrowsCalciteError( + "source=%s | eval timestamp = 1066507633 | convert ctime(timestamp) | fields timestamp"); + } + + @Test + public void testConvertDur2secFunction() { + verifyQueryThrowsCalciteError( + "source=%s | eval duration = '01:23:45' | convert dur2sec(duration) | fields duration"); + } + + @Test + public void testConvertMstimeFunction() { + verifyQueryThrowsCalciteError( + "source=%s | eval time_str = '03:45' | convert mstime(time_str) | fields time_str"); + } + + @Test + public void testConvertWithTimeformat() { + verifyQueryThrowsCalciteError( + "source=%s | eval date_str = '18/10/2003 20:07:13' | convert" + + " timeformat=\\\"%%d/%%m/%%Y %%H:%%M:%%S\\\" mktime(date_str) | fields date_str"); + } + private void verifyQueryThrowsCalciteError(String query) { Exception e = assertThrows(Exception.class, () -> executeQuery(String.format(query, TEST_INDEX_BANK))); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java index 1a2f5337998..25e7c12ffff 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java @@ -145,6 +145,47 @@ public void testNumericFieldFromString() throws Exception { client().performRequest(deleteRequest); } + @Test + public void testBooleanFieldFromNumberAcrossWildcardIndices() throws Exception { + // Reproduce issue #5269: querying across indices where same field has conflicting types + // (boolean vs text) and the text-typed index stores a numeric value like 0. + String indexBool = "repro_bool_test_bb"; + String indexText = "repro_bool_test_aa"; + + try { + // Create index with boolean mapping + Request createBool = new Request("PUT", "/" + indexBool); + createBool.setJsonEntity( + "{\"mappings\":{\"properties\":{\"flag\":{\"type\":\"boolean\"}," + + "\"startTime\":{\"type\":\"date_nanos\"}}}}"); + client().performRequest(createBool); + + // Create index with text mapping + Request createText = new Request("PUT", "/" + indexText); + createText.setJsonEntity( + "{\"mappings\":{\"properties\":{\"flag\":{\"type\":\"text\"}," + + "\"startTime\":{\"type\":\"date_nanos\"}}}}"); + client().performRequest(createText); + + // Insert boolean value into boolean-typed index + Request insertBool = new Request("PUT", "/" + indexBool + "/_doc/1?refresh=true"); + insertBool.setJsonEntity("{\"startTime\":\"2026-03-25T20:25:00.000Z\",\"flag\":false}"); + client().performRequest(insertBool); + + // Insert numeric value into text-typed index + Request insertText = new Request("PUT", "/" + indexText + "/_doc/1?refresh=true"); + insertText.setJsonEntity("{\"startTime\":\"2026-03-24T20:25:00.000Z\",\"flag\":0}"); + client().performRequest(insertText); + + // Query across both indices with wildcard — should not throw an error + JSONObject result = executeQuery("source=repro_bool_test_* | fields flag"); + assertEquals(2, result.getJSONArray("datarows").length()); + } finally { + client().performRequest(new Request("DELETE", "/" + indexBool)); + client().performRequest(new Request("DELETE", "/" + indexText)); + } + } + @Test public void testBooleanFieldFromString() throws Exception { final int docId = 2; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java index ded727765f7..837865a3585 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java @@ -530,4 +530,20 @@ public void testMvExpandInvalidLimitNegative() throws IOException { assertThat(error.getString("type"), equalTo("SyntaxCheckException")); } } + + @Test + public void testUnionUnsupportedInV2() throws IOException { + JSONObject result; + try { + result = + executeQuery( + String.format( + "| union [search source=%s | where age < 30] [search source=%s | where age >=" + + " 30]", + TEST_INDEX_BANK, TEST_INDEX_BANK)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + } + verifyQuery(result); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java index a386987e532..ad4c3475818 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java @@ -193,6 +193,27 @@ public void testIsNotNullFunction() throws IOException { verifyDataRows(result, rows("Amber JOHnny")); } + @Test + public void testIsNullPredicate() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where age IS NULL | fields firstname", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRows(result, rows("Virginia")); + } + + @Test + public void testIsNotNullPredicate() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where age IS NOT NULL and like(firstname, 'Ambe_%%') | fields" + + " firstname", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRows(result, rows("Amber JOHnny")); + } + @Test public void testWhereWithMetadataFields() throws IOException { JSONObject result = diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java index e55e406de7b..24e67f43f13 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java @@ -262,27 +262,12 @@ public void testCrossClusterRenameFullWildcard() throws IOException { JSONObject result = executeQuery(String.format("search source=%s | rename * as old_*", TEST_INDEX_DOG_REMOTE)); verifyColumn( - result, - columnName("old_dog_name"), - columnName("old_holdersName"), - columnName("old_age"), - columnName("old__id"), - columnName("old__index"), - columnName("old__score"), - columnName("old__maxscore"), - columnName("old__sort"), - columnName("old__routing")); + result, columnName("old_dog_name"), columnName("old_holdersName"), columnName("old_age")); verifySchema( result, schema("old_dog_name", "string"), schema("old_holdersName", "string"), - schema("old_age", "bigint"), - schema("old__id", "string"), - schema("old__index", "string"), - schema("old__score", "float"), - schema("old__maxscore", "float"), - schema("old__sort", "bigint"), - schema("old__routing", "string")); + schema("old_age", "bigint")); } @Test @@ -528,4 +513,15 @@ public void testCrossClusterMvExpandWithLimit() throws IOException { verifySchema(result, schema("username", "string"), schema("skills.name", "string")); verifyDataRows(result, rows("limituser", "a"), rows("limituser", "b")); } + + @Test + public void testCrossClusterUnion() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where age < 30] [search source=%s | where age >= 30] |" + + " stats count() by gender", + TEST_INDEX_BANK_REMOTE, TEST_INDEX_BANK_REMOTE)); + verifyColumn(result, columnName("count()"), columnName("gender")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java b/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java index ed25a1df2d9..4c90b7dce03 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java @@ -613,6 +613,29 @@ public void testUserWithoutMappingPermissionCannotGetFieldMappings() throws IOEx } } + @Test + public void testUserWithoutMappingPermissionGetsPermissionDeniedErrorCode() throws IOException { + // Test that security exceptions return PERMISSION_DENIED error code, not INDEX_NOT_FOUND + try { + executeQueryAsUser(String.format("describe %s", TEST_INDEX_BANK), NO_MAPPING_USER); + fail("Expected security exception for user without mapping permission"); + } catch (ResponseException e) { + assertEquals(403, e.getResponse().getStatusLine().getStatusCode()); + String responseBody = + org.opensearch.sql.legacy.TestUtils.getResponseBody(e.getResponse(), false); + JSONObject responseJson = new JSONObject(responseBody); + + // Verify the error code is PERMISSION_DENIED, not INDEX_NOT_FOUND + assertTrue("Response should have error field", responseJson.has("error")); + JSONObject error = responseJson.getJSONObject("error"); + assertTrue("Error should have code field", error.has("code")); + assertEquals( + "Security exception should return PERMISSION_DENIED error code", + "PERMISSION_DENIED", + error.getString("code")); + } + } + @Test public void testUserWithoutSettingsPermissionCannotGetSettings() throws IOException { // Test that user without settings permission gets 403 error diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/ExistsPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/ExistsPushdownIT.java new file mode 100644 index 00000000000..08ceb8c35f9 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/ExistsPushdownIT.java @@ -0,0 +1,83 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.junit.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.legacy.TestsConstants; + +/** + * Explain-plan integration tests asserting that {@code IS NOT NULL} / {@code IS NULL} predicates + * push down as native OpenSearch {@code exists} DSL rather than as serialized script queries. + * + *

    Before this change both predicates serialized through the compounded script engine, producing + * a {@code "script"} clause in the pushdown DSL. After this change the v2 filter builder emits + * {@code {"exists": {"field": ...}}} directly for {@code IS NOT NULL}, and a {@code bool} query + * with a single {@code must_not[exists]} child for {@code IS NULL}. This matches what downstream + * tooling, serverless / AOSS, and the Calcite path already produce. + */ +public class ExistsPushdownIT extends SQLIntegTestCase { + + // Anchored on the surrounding `sourceBuilder=...`, `pitId=` tokens in OpenSearchRequest's + // toString() output. Test-only coupling: if that request-string format changes (token renamed, + // pitId removed), this helper breaks even when the DSL shape is still correct. Update the regex + // anchors if that happens. + private static final Pattern SOURCE_BUILDER_JSON = + Pattern.compile("sourceBuilder=(\\{.*?\\}), pitId=", Pattern.DOTALL); + + /** Extracts and unescapes the sourceBuilder JSON embedded in the explain request string. */ + private static String extractSourceBuilderJson(String explain) { + Matcher m = SOURCE_BUILDER_JSON.matcher(explain); + assertTrue("Explain should contain sourceBuilder JSON:\n" + explain, m.find()); + return m.group(1).replace("\\\"", "\""); + } + + @Override + protected void init() throws Exception { + loadIndex(Index.ACCOUNT); + } + + private static final String TEST_INDEX = TestsConstants.TEST_INDEX_ACCOUNT; + + @Test + public void testIsNotNullPushesDownAsExistsQuery() throws IOException { + String explain = + explainQuery("SELECT age FROM " + TEST_INDEX + " WHERE age IS NOT NULL LIMIT 1"); + String sourceBuilder = extractSourceBuilderJson(explain); + + assertTrue( + "IS NOT NULL should push down as native exists DSL:\n" + sourceBuilder, + sourceBuilder.contains("\"exists\"")); + assertTrue( + "IS NOT NULL exists DSL should target the 'age' field:\n" + sourceBuilder, + sourceBuilder.contains("\"field\":\"age\"")); + assertFalse( + "IS NOT NULL should not fall through to a script query:\n" + sourceBuilder, + sourceBuilder.contains("\"script\"")); + } + + @Test + public void testIsNullPushesDownAsMustNotExistsQuery() throws IOException { + String explain = explainQuery("SELECT age FROM " + TEST_INDEX + " WHERE age IS NULL LIMIT 1"); + String sourceBuilder = extractSourceBuilderJson(explain); + + assertTrue( + "IS NULL should push down as bool/must_not[exists] DSL:\n" + sourceBuilder, + sourceBuilder.contains("\"must_not\"")); + assertTrue( + "IS NULL should wrap a native exists clause:\n" + sourceBuilder, + sourceBuilder.contains("\"exists\"")); + assertTrue( + "IS NULL exists DSL should target the 'age' field:\n" + sourceBuilder, + sourceBuilder.contains("\"field\":\"age\"")); + assertFalse( + "IS NULL should not fall through to a script query:\n" + sourceBuilder, + sourceBuilder.contains("\"script\"")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchExecutionIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchExecutionIT.java new file mode 100644 index 00000000000..36e78567d54 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchExecutionIT.java @@ -0,0 +1,227 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.util.TestUtils.createIndexByRestClient; +import static org.opensearch.sql.util.TestUtils.isIndexExist; +import static org.opensearch.sql.util.TestUtils.performRequest; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import org.json.JSONArray; +import org.json.JSONObject; +import org.junit.Assume; +import org.junit.Test; +import org.opensearch.client.Request; +import org.opensearch.client.Response; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +/** + * Happy-path execution tests for the vectorSearch() SQL table function. These tests run an actual + * k-NN query against a small in-memory knn_vector index and assert that results come back ordered + * by score and respect any WHERE filters. + * + *

    The k-NN plugin is not provisioned by the default integ-test cluster — each test calls {@link + * Assume#assumeTrue} on {@link #isKnnPluginInstalled()} so the class is silently skipped when k-NN + * is absent. Run locally against a cluster that has opensearch-knn installed. Provisioning k-NN in + * CI is a separate follow-up. + */ +public class VectorSearchExecutionIT extends SQLIntegTestCase { + + private static final String TEST_INDEX = "vector_exec_test"; + + // 6 docs in 2D — two clusters so filter/radial tests have distinguishable results. + // Cluster A near [1, 1]: docs 1-3 (state=TX, ages 25/30/40). + // Cluster B near [9, 9]: docs 4-6 (state=CA, ages 28/35/45). + // Pin Lucene HNSW + L2 so efficient filtering is deterministic (k-NN supports efficient + // filtering only on lucene+hnsw and faiss+hnsw/ivf) and the L2 → 1/(1+d) scoring used by the + // radial min_score test is well-defined. + private static final String MAPPING = + "{" + + " \"settings\": {\"index\": {\"knn\": true}}," + + " \"mappings\": {" + + " \"properties\": {" + + " \"embedding\": {" + + " \"type\": \"knn_vector\"," + + " \"dimension\": 2," + + " \"method\": {" + + " \"name\": \"hnsw\"," + + " \"engine\": \"lucene\"," + + " \"space_type\": \"l2\"" + + " }" + + " }," + + " \"state\": {\"type\": \"keyword\"}," + + " \"age\": {\"type\": \"integer\"}" + + " }" + + " }" + + "}"; + + private static final String BULK_BODY = + "{\"index\":{\"_id\":\"1\"}}\n" + + "{\"embedding\":[1.0,1.0],\"state\":\"TX\",\"age\":25}\n" + + "{\"index\":{\"_id\":\"2\"}}\n" + + "{\"embedding\":[1.1,0.9],\"state\":\"TX\",\"age\":30}\n" + + "{\"index\":{\"_id\":\"3\"}}\n" + + "{\"embedding\":[0.9,1.2],\"state\":\"TX\",\"age\":40}\n" + + "{\"index\":{\"_id\":\"4\"}}\n" + + "{\"embedding\":[9.0,9.0],\"state\":\"CA\",\"age\":28}\n" + + "{\"index\":{\"_id\":\"5\"}}\n" + + "{\"embedding\":[9.1,8.8],\"state\":\"CA\",\"age\":35}\n" + + "{\"index\":{\"_id\":\"6\"}}\n" + + "{\"embedding\":[8.7,9.3],\"state\":\"CA\",\"age\":45}\n"; + + @Override + protected void init() throws Exception { + Assume.assumeTrue("k-NN plugin not installed on test cluster", isKnnPluginInstalled()); + if (!isIndexExist(client(), TEST_INDEX)) { + createIndexByRestClient(client(), TEST_INDEX, MAPPING); + Request bulk = new Request("POST", "/" + TEST_INDEX + "/_bulk?refresh=true"); + bulk.setJsonEntity(BULK_BODY); + performRequest(client(), bulk); + } + } + + private static boolean isKnnPluginInstalled() { + try { + Response response = client().performRequest(new Request("GET", "/_cat/plugins?h=component")); + String body = new String(response.getEntity().getContent().readAllBytes()); + return body.contains("opensearch-knn"); + } catch (IOException e) { + return false; + } + } + + // ── Top-k happy path ──────────────────────────────────────────────── + + @Test + public void testTopKReturnsNearestSortedByScore() throws IOException { + JSONObject result = + executeJdbcRequest( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 1.0]', option='k=3') AS v " + + "LIMIT 3"); + + // All 3 returned docs should be from cluster A (ids 1-3), ordered by score desc. + JSONArray rows = result.getJSONArray("datarows"); + assertEquals("Expected 3 rows:\n" + result, 3, rows.length()); + for (int i = 0; i < rows.length(); i++) { + String id = rows.getJSONArray(i).getString(0); + assertTrue( + "Row " + i + " id=" + id + " should be from cluster A (1,2,3):\n" + result, + id.equals("1") || id.equals("2") || id.equals("3")); + } + // Scores must be non-increasing. + double prev = Double.POSITIVE_INFINITY; + for (int i = 0; i < rows.length(); i++) { + double score = rows.getJSONArray(i).getDouble(1); + assertTrue( + "Scores must be sorted desc, got " + score + " after " + prev + ":\n" + result, + score <= prev); + prev = score; + } + } + + // ── POST filter happy path ────────────────────────────────────────── + + @Test + public void testPostFilterReturnsOnlyMatchingDocs() throws IOException { + // Query from cluster B with WHERE state='TX' forces POST filtering to surface TX docs + // (cluster A) even though the vector is closer to cluster B. k=10 covers all 6 docs so + // post-filtering to state='TX' deterministically yields exactly {1,2,3}. filter_type=post + // is specified explicitly because the default placement is EFFICIENT — this test + // guarantees POST continues to work when the user opts into it. + JSONObject result = + executeJdbcRequest( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[9.0, 9.0]', option='k=10,filter_type=post') AS v " + + "WHERE v.state = 'TX' " + + "LIMIT 10"); + + assertRowIdsEqual(result, "1", "2", "3"); + } + + // ── EFFICIENT filter happy path ───────────────────────────────────── + + @Test + public void testEfficientFilterReturnsOnlyMatchingDocs() throws IOException { + // Query vector sits on cluster A (TX) but WHERE state='CA' forces EFFICIENT filtering to + // navigate HNSW toward CA docs. With k=3, a POST-filter implementation would return 0 rows + // (the 3 nearest candidates are all TX, which get filtered out); an efficient-filter + // implementation returns exactly the 3 CA docs {4,5,6}. This asymmetry makes the test + // discriminate between the two filter modes. + JSONObject result = + executeJdbcRequest( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 1.0]', option='k=3,filter_type=efficient') AS v " + + "WHERE v.state = 'CA' " + + "LIMIT 3"); + + assertRowIdsEqual(result, "4", "5", "6"); + } + + // ── Radial happy paths ────────────────────────────────────────────── + + @Test + public void testRadialMaxDistanceReturnsOnlyNearDocs() throws IOException { + // max_distance=1.0 (L2) centered on [1,1] includes all 3 cluster A docs (max L2 ≈ 0.22) + // and excludes cluster B which is ~11 units away. + JSONObject result = + executeJdbcRequest( + "SELECT v._id " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 1.0]', option='max_distance=1.0') AS v " + + "LIMIT 10"); + + assertRowIdsEqual(result, "1", "2", "3"); + } + + @Test + public void testRadialMinScoreReturnsOnlyHighScoreDocs() throws IOException { + // For L2 space, OpenSearch score = 1/(1+distance). Centered on [1,1], cluster A docs + // score ~0.82-1.0 and cluster B scores ~0.08. min_score=0.5 yields exactly {1,2,3}. + JSONObject result = + executeJdbcRequest( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 1.0]', option='min_score=0.5') AS v " + + "LIMIT 10"); + + JSONArray rows = result.getJSONArray("datarows"); + for (int i = 0; i < rows.length(); i++) { + double score = rows.getJSONArray(i).getDouble(1); + assertTrue("Row " + i + " score=" + score + " should be >= 0.5:\n" + result, score >= 0.5); + } + assertRowIdsEqual(result, "1", "2", "3"); + } + + /** Asserts the result's datarows column 0 contains exactly the given ids (as a set). */ + private static void assertRowIdsEqual(JSONObject result, String... expectedIds) { + JSONArray rows = result.getJSONArray("datarows"); + assertEquals( + "Expected " + expectedIds.length + " rows:\n" + result, expectedIds.length, rows.length()); + Set expected = new HashSet<>(Arrays.asList(expectedIds)); + Set actual = new HashSet<>(); + for (int i = 0; i < rows.length(); i++) { + actual.add(rows.getJSONArray(i).getString(0)); + } + assertEquals("Row id set mismatch:\n" + result, expected, actual); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchExplainIT.java new file mode 100644 index 00000000000..8719189b13a --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchExplainIT.java @@ -0,0 +1,559 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Base64; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.junit.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.legacy.TestsConstants; + +/** + * Explain-plan integration tests for vectorSearch SQL table function. These tests verify DSL + * push-down shape via _explain. They do NOT require the k-NN plugin since _explain only parses and + * plans the query without executing it against a knn index. + */ +public class VectorSearchExplainIT extends SQLIntegTestCase { + + // Matches WrapperQueryBuilder's base64 payload in explain JSON. The explain output escapes + // quotes as \", so the regex tolerates both \" and " forms around the query key/value. + private static final Pattern WRAPPER_PAYLOAD = + Pattern.compile("\\\\?\"query\\\\?\":\\\\?\"([A-Za-z0-9+/=]+)\\\\?\""); + // Anchored on the surrounding `sourceBuilder=...`, `pitId=` tokens in OpenSearchRequest's + // toString() output. Test-only coupling: if that request-string format changes (token renamed, + // pitId removed), this helper breaks even when the DSL shape is still correct. Update the regex + // anchors if that happens. + private static final Pattern SOURCE_BUILDER_JSON = + Pattern.compile("sourceBuilder=(\\{.*?\\}), pitId=", Pattern.DOTALL); + + /** Decodes every base64-encoded wrapper payload in the explain output into its knn JSON. */ + private static List decodeWrapperKnnJsons(String explain) { + List payloads = new ArrayList<>(); + Matcher m = WRAPPER_PAYLOAD.matcher(explain); + while (m.find()) { + payloads.add(new String(Base64.getDecoder().decode(m.group(1)), StandardCharsets.UTF_8)); + } + return payloads; + } + + /** Returns the single wrapper knn JSON, asserting exactly one is present. */ + private static String decodeSoleKnnJson(String explain) { + List payloads = decodeWrapperKnnJsons(explain); + assertEquals( + "Expected exactly one wrapper query payload in explain:\n" + explain, 1, payloads.size()); + return payloads.get(0); + } + + /** Extracts and unescapes the sourceBuilder JSON embedded in the explain request string. */ + private static String extractSourceBuilderJson(String explain) { + Matcher m = SOURCE_BUILDER_JSON.matcher(explain); + assertTrue("Explain should contain sourceBuilder JSON:\n" + explain, m.find()); + return m.group(1).replace("\\\"", "\""); + } + + @Override + protected void init() throws Exception { + // _explain needs the index to exist for field resolution. + loadIndex(Index.ACCOUNT); + } + + private static final String TEST_INDEX = TestsConstants.TEST_INDEX_ACCOUNT; + + // ── Top-k / radial DSL shape ───────────────────────────────────────── + + @Test + public void testExplainTopKProducesKnnQuery() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0, 3.0]', option='k=5') AS v " + + "LIMIT 5"); + + assertTrue( + "Explain should contain track_scores:\n" + explain, explain.contains("track_scores")); + + // Top-k without WHERE should have the knn at the root, not wrapped in an outer bool. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Top-k without WHERE should not wrap knn in an outer bool:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertTrue( + "knn JSON should contain the vector values:\n" + knnJson, + knnJson.contains("[1.0,2.0,3.0]")); + assertTrue("knn JSON should contain k=5:\n" + knnJson, knnJson.contains("\"k\":5")); + assertFalse( + "Top-k without WHERE should not embed a filter:\n" + knnJson, knnJson.contains("filter")); + } + + @Test + public void testExplainRadialMaxDistanceProducesKnnQuery() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='max_distance=10.5') AS v " + + "LIMIT 100"); + + // Radial without WHERE should have the knn at the root, not wrapped in an outer bool. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Radial without WHERE should not wrap knn in an outer bool:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertTrue( + "knn JSON should contain the vector values:\n" + knnJson, knnJson.contains("[1.0,2.0]")); + assertTrue( + "knn JSON should contain max_distance=10.5:\n" + knnJson, + knnJson.contains("\"max_distance\":10.5")); + assertFalse( + "Radial without WHERE should not embed a filter:\n" + knnJson, knnJson.contains("filter")); + } + + @Test + public void testExplainRadialMinScoreProducesKnnQuery() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='min_score=0.8') AS v " + + "LIMIT 100"); + + // Radial without WHERE should have the knn at the root, not wrapped in an outer bool. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Radial without WHERE should not wrap knn in an outer bool:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertTrue( + "knn JSON should contain the vector values:\n" + knnJson, knnJson.contains("[1.0,2.0]")); + assertTrue( + "knn JSON should contain min_score=0.8:\n" + knnJson, + knnJson.contains("\"min_score\":0.8")); + assertFalse( + "Radial without WHERE should not embed a filter:\n" + knnJson, knnJson.contains("filter")); + } + + // ── Default (EFFICIENT) pre-filter DSL shape ──────────────────────── + + @Test + public void testExplainDefaultFilterProducesKnnWithFilter() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0, 3.0]', option='k=10') AS v " + + "WHERE v.state = 'TX' " + + "LIMIT 10"); + + // Default (EFFICIENT) shape: WHERE is embedded inside knn.filter, the knn JSON is base64- + // encoded inside a WrapperQueryBuilder, and there is no outer bool/must wrapping. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Default EFFICIENT mode should not produce bool query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertFalse( + "Default EFFICIENT mode should not contain must clause:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertTrue("knn JSON should contain k=10:\n" + knnJson, knnJson.contains("\"k\":10")); + assertTrue( + "Default EFFICIENT mode must embed filter inside knn:\n" + knnJson, + knnJson.contains("filter")); + assertTrue( + "Default EFFICIENT mode must embed the WHERE predicate inside knn:\n" + knnJson, + knnJson.contains("state")); + } + + @Test + public void testExplainDefaultCompoundPredicateProducesKnnWithFilter() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0, 3.0]', option='k=10') AS v " + + "WHERE v.state = 'TX' AND v.age > 30 " + + "LIMIT 10"); + + // Compound default-mode WHERE must also route through knn.filter: no outer bool/must, and + // both predicate fields embedded inside the knn payload. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Default EFFICIENT mode should not produce bool query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertFalse( + "Default EFFICIENT mode should not contain must clause:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertTrue("knn JSON should contain k=10:\n" + knnJson, knnJson.contains("\"k\":10")); + assertTrue( + "Compound default EFFICIENT must embed filter inside knn:\n" + knnJson, + knnJson.contains("filter")); + assertTrue( + "Compound default EFFICIENT must embed the state predicate inside knn:\n" + knnJson, + knnJson.contains("state")); + assertTrue( + "Compound default EFFICIENT must embed the age predicate inside knn:\n" + knnJson, + knnJson.contains("age")); + } + + @Test + public void testExplainDefaultRadialWithWhereProducesKnnWithFilter() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='max_distance=10.5') AS v " + + "WHERE v.state = 'TX' " + + "LIMIT 100"); + + // Radial + default WHERE must also use the EFFICIENT shape: no outer bool/must, radial + // parameters preserved inside the knn payload, and the WHERE predicate embedded alongside. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Default EFFICIENT mode should not produce bool query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertFalse( + "Default EFFICIENT mode should not contain must clause:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertTrue( + "knn JSON should contain max_distance=10.5:\n" + knnJson, + knnJson.contains("\"max_distance\":10.5")); + assertTrue( + "Radial default EFFICIENT must embed filter inside knn:\n" + knnJson, + knnJson.contains("filter")); + assertTrue( + "Radial default EFFICIENT must embed the WHERE predicate inside knn:\n" + knnJson, + knnJson.contains("state")); + } + + // ── Sort + LIMIT explain ───────────────────────────────────────────── + + @Test + public void testOrderByScoreDescExplainSucceeds() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5') AS v " + + "ORDER BY v._score DESC " + + "LIMIT 5"); + + assertTrue( + "Explain should succeed with ORDER BY _score DESC:\n" + explain, + explain.contains("wrapper")); + } + + @Test + public void testExplainLimitWithinKSucceeds() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=10') AS v " + + "LIMIT 5"); + + assertTrue("Explain should succeed with LIMIT <= k:\n" + explain, explain.contains("wrapper")); + } + + // ── filter_type explain ───────────────────────────────────────────── + + @Test + public void testExplainFilterTypePostProducesBoolQuery() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0, 3.0]', option='k=10,filter_type=post') AS v " + + "WHERE v.state = 'TX' " + + "LIMIT 10"); + + // Explicit filter_type=post must produce the same bool.must=[knn]/bool.filter=[term] shape as + // the default, and the WHERE predicate must NOT leak into the knn payload (that would be + // efficient mode). This is the key false-positive guard: substring-only checks would pass for + // efficient mode too. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertTrue( + "Explain should contain bool query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertTrue( + "Explain should contain must:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + assertTrue( + "Explain should contain filter:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"filter\"")); + assertTrue( + "Explain should contain the outer state predicate:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"state.keyword\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertFalse( + "filter_type=post must not embed the WHERE predicate inside knn:\n" + knnJson, + knnJson.contains("state")); + assertFalse( + "filter_type=post must not embed a filter inside knn:\n" + knnJson, + knnJson.contains("filter")); + } + + @Test + public void testExplainFilterTypeEfficientProducesKnnWithFilter() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5,filter_type=efficient') AS v " + + "WHERE v.state = 'TX' " + + "LIMIT 5"); + + // Efficient mode: knn rebuilt with filter inside, wrapped in WrapperQueryBuilder. + // The knn JSON (including the embedded filter) is base64-encoded inside the wrapper, + // so we verify structure by: (1) no bool/must in plaintext (that would be post-filter shape), + // (2) decode the base64 payload to confirm the filter and predicate field are embedded inside + // the knn query. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Efficient mode should not produce bool query (that is post-filter shape):\n" + + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertFalse( + "Efficient mode should not contain must clause:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue( + "Efficient mode knn JSON should contain filter:\n" + knnJson, knnJson.contains("filter")); + assertTrue( + "Efficient mode knn JSON should contain the WHERE predicate field:\n" + knnJson, + knnJson.contains("state")); + } + + @Test + public void testEfficientFilterWithOrderByScoreDescSucceeds() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5,filter_type=efficient') AS v " + + "WHERE v.state = 'TX' " + + "ORDER BY v._score DESC " + + "LIMIT 5"); + + // Same efficient-mode shape guarantee as testExplainFilterTypeEfficientProducesKnnWithFilter, + // with an added ORDER BY _score DESC: no outer bool/must, and the WHERE predicate must be + // embedded inside the knn payload (efficient filtering, not post-filter). + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Efficient mode should not produce bool query (that is post-filter shape):\n" + + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertFalse( + "Efficient mode should not contain must clause:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue( + "Efficient mode knn JSON should contain filter:\n" + knnJson, knnJson.contains("filter")); + assertTrue( + "Efficient mode knn JSON should contain the WHERE predicate field:\n" + knnJson, + knnJson.contains("state")); + } + + // ── BETWEEN / NOT IN pushdown regression guards ───────────────────── + // These tests lock in the DSL shape currently produced for BETWEEN and NOT IN predicates + // when pushed down through vectorSearch(). They exist to catch silent regressions where a + // change in the v2 FilterQueryBuilder pipeline would fall back to a serialized script query + // instead of the native range/bool shape the cluster can index-accelerate. + + @Test + public void testBetweenPushesAsRange() throws IOException { + // Pin filter_type=post to keep the regression guard aimed at the post-filter serialization + // path: these assertions lock in the outer bool/must/filter shape that only appears when + // WHERE is applied alongside knn rather than embedded under knn.filter. + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0, 3.0]', option='k=10,filter_type=post') AS v " + + "WHERE v.balance BETWEEN 50 AND 200 " + + "LIMIT 10"); + + // BETWEEN is desugared by the analyzer into AND(>=, <=), which FilterQueryBuilder renders as + // two range clauses combined under a bool. The goal here is regression lock-in: ensure the + // pushed filter is native range DSL, not a serialized script query. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertTrue( + "Explain should contain bool query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertTrue( + "Explain should contain must clause (knn in scoring context):\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + assertTrue( + "Explain should contain filter clause (WHERE in non-scoring context):\n" + + sourceBuilderJson, + sourceBuilderJson.contains("\"filter\"")); + assertTrue( + "BETWEEN should push as native range DSL:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"range\"")); + assertTrue( + "Range should target balance field:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"balance\"")); + // RangeQueryBuilder serializes inclusive bounds as from/to + include_lower/include_upper. Lock + // both the lower bound (50) and upper bound (200) are present in the pushed DSL. + assertTrue( + "Range should contain lower bound 50:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"from\" : 50") || sourceBuilderJson.contains("\"from\":50")); + assertTrue( + "Range should contain upper bound 200:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"to\" : 200") || sourceBuilderJson.contains("\"to\":200")); + // Script-query fallback sentinel: the CompoundedScriptEngine lang marker must NOT appear when + // BETWEEN is pushed down natively. + assertFalse( + "BETWEEN must not fall back to a serialized script query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"script\"")); + + // POST-filter mode (default): the WHERE predicate must live OUTSIDE the knn payload. + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertFalse( + "Post-filter mode must not embed the balance predicate inside knn:\n" + knnJson, + knnJson.contains("balance")); + assertFalse( + "Post-filter mode must not embed a range inside knn:\n" + knnJson, + knnJson.contains("range")); + } + + @Test + public void testNotInPushesAsMustNotTerms() throws IOException { + // Pin filter_type=post to keep the regression guard aimed at the post-filter serialization + // path: these assertions lock in the outer bool/must/filter shape that only appears when + // WHERE is applied alongside knn rather than embedded under knn.filter. + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0, 3.0]', option='k=10,filter_type=post') AS v " + + "WHERE v.gender NOT IN ('M', 'F') " + + "LIMIT 10"); + + // v2 analyzer desugars `x NOT IN (a, b)` into `NOT(x = a OR x = b)`. FilterQueryBuilder maps + // NOT to bool.must_not and OR to bool.should, so the pushed DSL is must_not[should[term,term]] + // rather than a single terms clause. The shape we're locking in is: native bool with must_not + // on the keyword subfield, *not* a serialized script query. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertTrue( + "Explain should contain bool query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertTrue( + "Explain should contain must clause (knn in scoring context):\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + assertTrue( + "Explain should contain filter clause (WHERE in non-scoring context):\n" + + sourceBuilderJson, + sourceBuilderJson.contains("\"filter\"")); + assertTrue( + "NOT IN should push as bool.must_not:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must_not\"")); + // OR-of-equals desugaring means the two literals land in a bool.should of term clauses. + assertTrue( + "NOT IN should contain should clause for OR-of-equals desugaring:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"should\"")); + assertTrue( + "NOT IN should produce term clauses for each literal:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"term\"")); + // Terms target the keyword subfield of gender (text field with .keyword multi-field). + assertTrue( + "NOT IN term clauses should target gender.keyword:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"gender.keyword\"")); + // Both literals must be present in the pushed DSL. + assertTrue( + "NOT IN should contain the 'M' literal:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"M\"")); + assertTrue( + "NOT IN should contain the 'F' literal:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"F\"")); + // Script-query fallback sentinel: native pushdown must not degrade to a serialized script. + assertFalse( + "NOT IN must not fall back to a serialized script query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"script\"")); + + // POST-filter mode (default): the WHERE predicate must live OUTSIDE the knn payload. + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertFalse( + "Post-filter mode must not embed the gender predicate inside knn:\n" + knnJson, + knnJson.contains("gender")); + assertFalse( + "Post-filter mode must not embed must_not inside knn:\n" + knnJson, + knnJson.contains("must_not")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchIT.java new file mode 100644 index 00000000000..c10b3a219f6 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchIT.java @@ -0,0 +1,755 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.hamcrest.Matchers.containsString; + +import java.io.IOException; +import org.junit.Test; +import org.opensearch.client.Request; +import org.opensearch.client.ResponseException; +import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.legacy.TestsConstants; + +/** + * Integration tests for vectorSearch SQL table function — validation and error paths. These tests + * verify that invalid inputs are rejected with clear error messages. Explain-plan DSL shape tests + * live in {@link VectorSearchExplainIT}. + */ +public class VectorSearchIT extends SQLIntegTestCase { + + @Override + protected void init() throws Exception { + loadIndex(Index.ACCOUNT); + } + + private static final String TEST_INDEX = TestsConstants.TEST_INDEX_ACCOUNT; + + // ── Validation error paths ──────────────────────────────────────────── + + @Test + public void testMutualExclusivityRejectsKAndMaxDistance() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='k=5,max_distance=10') AS v")); + + assertThat(ex.getMessage(), containsString("Only one of")); + } + + @Test + public void testMutualExclusivityRejectsKAndMinScore() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='k=5,min_score=0.5') AS v")); + + assertThat(ex.getMessage(), containsString("Only one of")); + } + + @Test + public void testKTooLargeRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='k=10001') AS v")); + + assertThat(ex.getMessage(), containsString("k must be between 1 and 10000")); + } + + @Test + public void testKZeroRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='k=0') AS v")); + + assertThat(ex.getMessage(), containsString("k must be between 1 and 10000")); + } + + @Test + public void testUnknownOptionKeyRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='k=5,method.ef_search=100') AS v")); + + assertThat(ex.getMessage(), containsString("Unknown option key")); + } + + @Test + public void testEmptyVectorRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("must not be empty")); + } + + @Test + public void testInvalidFieldNameRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', " + + "field='field\\\"injection', vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid field name")); + } + + @Test + public void testMissingRequiredOptionRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='') AS v")); + + assertThat(ex.getMessage(), containsString("Missing required option")); + } + + @Test + public void testRadialWithoutLimitRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='max_distance=10.5') AS v")); + + assertThat(ex.getMessage(), containsString("LIMIT is required for radial vector search")); + } + + // ── Sort restriction validation ───────────────────────────────────────── + + @Test + public void testOrderByNonScoreFieldRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5') AS v " + + "ORDER BY v.firstname ASC " + + "LIMIT 5")); + + assertThat(ex.getMessage(), containsString("unsupported sort expression")); + } + + @Test + public void testOrderByScoreAscRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5') AS v " + + "ORDER BY v._score ASC " + + "LIMIT 5")); + + assertThat(ex.getMessage(), containsString("_score ASC is not supported")); + } + + // ── filter_type validation ──────────────────────────────────────────── + + @Test + public void testFilterTypeEfficientWithoutWhereRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5,filter_type=efficient') AS v " + + "LIMIT 5")); + + assertThat(ex.getMessage(), containsString("filter_type requires a pushdownable WHERE clause")); + } + + @Test + public void testFilterTypePostWithoutWhereRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5,filter_type=post') AS v " + + "LIMIT 5")); + + assertThat(ex.getMessage(), containsString("filter_type requires a pushdownable WHERE clause")); + } + + @Test + public void testInvalidFilterTypeRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='k=5,filter_type=bogus') AS v")); + + assertThat(ex.getMessage(), containsString("filter_type must be one of")); + } + + @Test + public void testGroupByRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v.gender, COUNT(*) FROM vectorSearch(table='" + + TEST_INDEX + + "', field='f', vector='[1.0]', option='k=5') AS v GROUP BY v.gender")); + + assertThat( + ex.getMessage(), + containsString("Aggregations are not supported on vectorSearch() relations")); + } + + @Test + public void testBareAggregateRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT COUNT(*) FROM vectorSearch(table='" + + TEST_INDEX + + "', field='f', vector='[1.0]', option='k=5') AS v")); + + assertThat( + ex.getMessage(), + containsString("Aggregations are not supported on vectorSearch() relations")); + } + + // ── OFFSET / WHERE _score / filter_type=efficient script rejection ─── + + @Test + public void testOffsetRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5') AS v " + + "LIMIT 5 OFFSET 2")); + + assertThat(ex.getMessage(), containsString("OFFSET is not supported on vectorSearch()")); + assertThat(ex.getMessage(), containsString("LIMIT only")); + } + + @Test + public void testScoreInWhereRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5') AS v " + + "WHERE v._score > 0.5 " + + "LIMIT 5")); + + assertThat(ex.getMessage(), containsString("WHERE on _score is not supported")); + assertThat(ex.getMessage(), containsString("min_score")); + } + + @Test + public void testOrderByScoreDescLimitOffsetRejected() throws IOException { + // The natural user shape pairs sort with pagination: ORDER BY _score DESC LIMIT N OFFSET M. + // The planner's pushDownSort() path can collapse the sort+limit into a top-k size, so OFFSET + // must still be rejected by pushDownLimit when the combined form is used. Without this guard + // the parent builder would push `from: ` and silently shift the top-k window. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5') AS v " + + "ORDER BY v._score DESC " + + "LIMIT 5 OFFSET 2")); + + assertThat(ex.getMessage(), containsString("OFFSET is not supported on vectorSearch()")); + } + + @Test + public void testEfficientModeRejectsScriptPredicate() throws IOException { + // WHERE age + 1 > 30 compiles to a ScriptQueryBuilder under the hood because the outer > + // is applied to an arithmetic expression, not a direct field reference. Efficient mode + // cannot embed script queries under knn.filter, so this must be rejected up front with a + // clear remediation hint instead of a cluster-side failure. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5,filter_type=efficient') AS v " + + "WHERE v.age + 1 > 30 " + + "LIMIT 5")); + + assertThat( + ex.getMessage(), containsString("vectorSearch WHERE pre-filtering does not support")); + assertThat(ex.getMessage(), containsString("script queries")); + } + + // ── k-NN plugin capability check ────────────────────────────────────── + // The default integ-test cluster does not have the k-NN plugin installed. Execution-path + // queries against vectorSearch() should therefore fail with the clear "k-NN plugin missing" + // error from KnnPluginCapability, while _explain continues to work because the capability + // probe is deferred to scan open() and does not run during analysis/planning. + + @Test + public void testExecutionWithoutKnnPluginReturnsCapabilityError() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "LIMIT 5")); + + // Lock in the full user-facing sentence, not just loose substrings. The exact wording is + // part of the contract and regressions should fail loudly rather than keep passing on a + // subtly reworded message. + assertThat( + ex.getMessage(), + containsString( + "vectorSearch() requires the k-NN plugin, which is not installed on this cluster.")); + } + + @Test + public void testExplainWithoutKnnPluginStillWorks() throws IOException { + // _explain only parses and plans the query. It must NOT require the k-NN plugin — the + // capability probe is intentionally deferred to scan open() so pluginless clusters can + // still inspect query plans. If this test starts failing with "k-NN plugin not installed", + // the probe has leaked back into an analysis-time path. + String explain = + explainQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "LIMIT 5"); + + // Assert the scan-operator name, not just "wrapper": the name confirms the plan reached + // the vectorSearch scan builder rather than some other scan shape. + assertThat(explain, containsString("VectorSearchIndexScan")); + assertThat(explain, containsString("wrapper")); + } + + // ── Argument shape validation ───────────────────────────────────────── + + @Test + public void testInvalidTableNameRejected() throws IOException { + // A slash is outside the SAFE_FIELD_NAME regex and is not a valid OpenSearch index character, + // so it should be rejected at the SQL layer before any cluster call is attempted. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='idx/evil', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid table name")); + } + + @Test + public void testWildcardTableRejectedWithDedicatedMessage() throws IOException { + // Wildcards in a table name fan out to multiple indices, which vectorSearch() does not + // support (top-k semantics, dimension checks, and embedded filter JSON are not defined + // across heterogeneous shards). Surface a dedicated user-facing error instead of the + // generic "must contain only alphanumeric..." fallback. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='sql_vector_*', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid table name")); + assertThat(ex.getMessage(), containsString("wildcards")); + assertThat(ex.getMessage(), containsString("single concrete index")); + } + + @Test + public void testMultiTargetTableRejectedWithDedicatedMessage() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='idx_a,idx_b', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid table name")); + assertThat(ex.getMessage(), containsString("multi-target")); + } + + @Test + public void testDuplicateNamedArgRejected() throws IOException { + // Previously this crashed the server with 500 ArrayIndexOutOfBoundsException. Must now + // surface as a clean 400 with a user-facing message. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='a', table='b', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Duplicate argument name")); + } + + @Test + public void testUnknownNamedArgRejected() throws IOException { + // A grammar-legal but unknown name must surface as a clean 400 from the resolver. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(bogus='idx', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Unknown argument name")); + } + + @Test + public void testPositionalArgRejected() throws IOException { + // The real shape a user would hit: `vectorSearch('idx', field=..., vector=..., option=...)`. + // The V2 grammar now accepts this form so the AstBuilder can surface a clean + // SemanticCheckException instead of letting the request fall back to the legacy SQL engine, + // which previously returned 200 with zero rows. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch('idx', field='embedding', " + + "vector='[1.0, 1.0]', option='k=3') AS v LIMIT 3")); + + assertThat(ex.getMessage(), containsString("requires named arguments")); + } + + @Test + public void testCaseInsensitiveDuplicateArgRejected() throws IOException { + // Argument names are normalized to lower-case, so `table` and `TABLE` must be treated as the + // same key and rejected as a duplicate rather than silently keeping one of the two values. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='a', TABLE='b', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Duplicate argument name")); + } + + @Test + public void testTableNameAllRejected() throws IOException { + // `_all` would fan out to every index. The preview contract is a single concrete index or + // alias, so it must be rejected explicitly rather than allowed to route broadly. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='_all', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid table name")); + } + + @Test + public void testTableNameSingleDotRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='.', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid table name")); + } + + @Test + public void testTableNameDoubleDotRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='..', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid table name")); + } + + @Test + public void testMissingRequiredArgRejected() throws IOException { + // Omitting a required named argument (here: `field`) must produce a clean 400 rather than a + // NullPointerException or a legacy-engine fallback. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='a', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("requires 4 arguments")); + } + + /** + * Users running FROM vectorSearch(...) without an AS alias previously received an opaque parser + * error from the legacy SQL engine fallback. The clearer SemanticCheckException from the v2 + * engine must surface to the user instead. + */ + @Test + public void testVectorSearchRequiresAlias() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT * FROM vectorSearch(" + + "table='t', field='f', vector='[1.0]', option='k=5') " + + "LIMIT 3")); + + String body = ex.getMessage(); + assertThat(body, containsString("requires a table alias")); + assertThat(body, containsString("vectorSearch")); + } + + // Synthetic column collision (metadata vs. user field). + // vectorSearch() exposes synthetic v._id and v._score columns. A user mapping property of the + // same name would collide on the response tuple key. OpenSearch blocks _id at mapping time; + // _score is not blocked, so VectorSearchIndex rejects it at scan-build time. + + @Test + public void testUserMappingWithIdFieldIsRejectedByOpenSearch() throws IOException { + // Locks in OpenSearch's rejection of a user property named _id: without it, v._id could + // collide with a user field at response time. The exact error message belongs to OpenSearch. + String indexName = "vs_collision_id"; + deleteIndexIfExists(indexName); + + Request createIndex = new Request("PUT", "/" + indexName); + createIndex.setJsonEntity("{\"mappings\":{\"properties\":{\"_id\":{\"type\":\"keyword\"}}}}"); + + expectThrows(ResponseException.class, () -> client().performRequest(createIndex)); + } + + @Test + public void testVectorSearchAgainstIndexWithScoreFieldRejects() throws IOException { + // _explain exercises planning (where the guard runs) without needing the k-NN plugin. + String indexName = "vs_collision_score"; + deleteIndexIfExists(indexName); + + Request createIndex = new Request("PUT", "/" + indexName); + createIndex.setJsonEntity("{\"mappings\":{\"properties\":{\"_score\":{\"type\":\"float\"}}}}"); + client().performRequest(createIndex); + + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT v._score FROM vectorSearch(table='" + + indexName + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "LIMIT 5")); + + assertEquals(400, ex.getResponse().getStatusLine().getStatusCode()); + assertThat(ex.getMessage(), containsString("_score")); + assertThat(ex.getMessage(), containsString("collides")); + } + + @Test + public void testSemicolonSeparatorInVectorRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0;2.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("vector=")); + assertThat(ex.getMessage(), containsString("comma-separated")); + } + + @Test + public void testNegativeMinScoreRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='min_score=-0.5') AS v")); + + assertThat(ex.getMessage(), containsString("min_score")); + assertThat(ex.getMessage(), containsString("non-negative")); + } + + @Test + public void testNegativeMaxDistanceRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='max_distance=-1.0') AS v")); + + assertThat(ex.getMessage(), containsString("max_distance")); + assertThat(ex.getMessage(), containsString("non-negative")); + } + + @Test + public void testTrailingCommaInVectorRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0,2.0,]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid vector component")); + assertThat(ex.getMessage(), containsString("trailing or consecutive commas")); + } + + // ── Alias with multiple backing indices ─────────────────────────────── + // vectorSearch() accepts an alias as `table=`. When the alias points at multiple backing + // indices, planning must accept the alias string instead of treating it as a wildcard or + // multi-target. Execution correctness over compatible knn_vector mappings is a separate + // concern covered by k-NN-enabled tests/follow-up; these tests lock in planning acceptance + // only, via _explain on the default no-kNN cluster. + + @Test + public void testExplainOverAliasWithMultipleBackingIndices() throws IOException { + // Create two indices with identical keyword mappings (no knn_vector, since the plugin is + // not installed) and a shared alias. We only assert the planner accepts the alias; whether + // k-NN accepts the alias at execution is a separate concern tested on a k-NN-enabled + // cluster. + // Randomized names so a stale alias/index left by an aborted prior run of this class does + // not shadow a fresh setup, which is a concrete risk on local reruns. + String suffix = java.util.UUID.randomUUID().toString().replace("-", "").substring(0, 8); + String idx1 = "vector_alias_backing_1_" + suffix; + String idx2 = "vector_alias_backing_2_" + suffix; + String alias = "vector_alias_combined_" + suffix; + try { + createSimpleIndex(idx1); + createSimpleIndex(idx2); + addToAlias(idx1, alias); + addToAlias(idx2, alias); + + String explain = + explainQuery( + "SELECT v._id FROM vectorSearch(table='" + + alias + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v"); + + assertThat(explain, containsString("VectorSearchIndexScan")); + assertThat(explain, containsString(alias)); + } finally { + // Deleting the backing indices removes the alias automatically, but delete the alias + // first for robustness against partial setup failures. + deleteAliasIfExists(alias); + deleteIndexIfExists(idx1); + deleteIndexIfExists(idx2); + } + } + + private void createSimpleIndex(String indexName) throws IOException { + Request create = new Request("PUT", "/" + indexName); + create.setJsonEntity("{\"mappings\":{\"properties\":{\"state\":{\"type\":\"keyword\"}}}}"); + client().performRequest(create); + } + + private void addToAlias(String indexName, String aliasName) throws IOException { + Request req = new Request("POST", "/_aliases"); + req.setJsonEntity( + "{\"actions\":[{\"add\":{\"index\":\"" + + indexName + + "\",\"alias\":\"" + + aliasName + + "\"}}]}"); + client().performRequest(req); + } + + private void deleteIndexIfExists(String indexName) { + try { + client().performRequest(new Request("DELETE", "/" + indexName)); + } catch (IOException ignored) { + // Index does not exist, which is fine. + } + } + + private void deleteAliasIfExists(String aliasName) { + try { + client().performRequest(new Request("DELETE", "/_all/_alias/" + aliasName)); + } catch (IOException ignored) { + // Alias does not exist, which is fine. + } + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchSubqueryIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchSubqueryIT.java new file mode 100644 index 00000000000..04346f87a76 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchSubqueryIT.java @@ -0,0 +1,306 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.hamcrest.Matchers.containsString; + +import java.io.IOException; +import org.junit.Test; +import org.opensearch.client.ResponseException; +import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.legacy.TestsConstants; + +/** + * Integration tests for vectorSearch() used inside subqueries. Locks in the rejection of outer + * WHERE on a vectorSearch() subquery, which would otherwise silently yield zero rows because the + * outer predicate is applied only after the k-NN search has already selected top-k documents by + * vector distance. + * + *

    Uses _explain-only plus error-path queries, so the k-NN plugin is not required — the planner + * validation fires during planning, before any k-NN execution. + */ +public class VectorSearchSubqueryIT extends SQLIntegTestCase { + + @Override + protected void init() throws Exception { + loadIndex(Index.ACCOUNT); + } + + private static final String TEST_INDEX = TestsConstants.TEST_INDEX_ACCOUNT; + + @Test + public void testOuterWhereOnSubqueryRejected() throws IOException { + // Without the guard the outer predicate is dropped from the pushed DSL and applied only in + // memory after k-NN returned top-k, which can yield silent zero rows. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT * FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "WHERE t.state = 'TX'")); + + assertThat( + ex.getMessage(), + containsString("Outer WHERE on a vectorSearch() subquery is not supported")); + assertThat(ex.getMessage(), containsString("silently yield zero rows")); + } + + @Test + public void testOuterWhereOnSubqueryRejectedWithLimit() throws IOException { + // Same shape with an outer LIMIT — exercises a second planner path (LogicalLimit above + // LogicalFilter above LogicalProject above scan builder). + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT * FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "WHERE t.state = 'TX' " + + "LIMIT 3")); + + assertThat( + ex.getMessage(), + containsString("Outer WHERE on a vectorSearch() subquery is not supported")); + } + + @Test + public void testOuterWhereOnSubqueryRejectedExplain() throws IOException { + // The guard must fire during planning, before any k-NN execution — so _explain must also + // return the validation error rather than a silently dropped predicate in the DSL. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT * FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "WHERE t.state = 'TX'")); + + assertThat( + ex.getMessage(), + containsString("Outer WHERE on a vectorSearch() subquery is not supported")); + } + + @Test + public void testOuterWhereWithInnerWhereStillRejected() throws IOException { + // Outer WHERE must be rejected even when the subquery already has its own inner WHERE. + // The shape reaches the planner as Filter(outer) -> Project -> Filter(inner) -> Scan, and + // the outer predicate is still separated from the k-NN search by the subquery project + // boundary. Without preserving the project marker across the inner filter, the walker + // would miss this shape and the outer predicate would silently produce zero rows. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT * FROM (SELECT v.firstname, v.state, v.age " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "WHERE v.age > 10) t " + + "WHERE t.state = 'TX'")); + + assertThat( + ex.getMessage(), + containsString("Outer WHERE on a vectorSearch() subquery is not supported")); + } + + @Test + public void testInnerWhereStillWorks() throws IOException { + // Positive control: WHERE directly on vectorSearch() inside the subquery must still plan + // successfully — the rejection is scoped to OUTER filters that cannot reach the push-down + // contract. We use _explain because the default integ-test cluster has no k-NN plugin. + String explain = + explainQuery( + "SELECT * FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "WHERE v.state = 'TX') t"); + + assertThat(explain, containsString("wrapper")); + // Inner WHERE should push down, so the state predicate appears in the DSL. + assertThat(explain, containsString("state")); + } + + @Test + public void testInnerWhereWithOuterProjectStillWorks() throws IOException { + // Another positive control: the outer layer can still project and limit columns from the + // subquery without the guard firing — only outer WHERE is rejected. + String explain = + explainQuery( + "SELECT t.firstname FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "WHERE v.state = 'TX') t " + + "LIMIT 3"); + + assertThat(explain, containsString("wrapper")); + } + + @Test + public void testSubqueryNoWhereStillWorks() throws IOException { + // Baseline: a subquery with no WHERE anywhere must not be rejected — the guard fires only + // when an outer LogicalFilter sits above a subquery project boundary. + String explain = + explainQuery( + "SELECT * FROM (SELECT v.firstname " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "LIMIT 3"); + + assertThat(explain, containsString("wrapper")); + } + + @Test + public void testInnerOrderByScoreDescInSubqueryAllowed() throws IOException { + // Positive control: inner ORDER BY _score DESC on the vectorSearch() relation inside the + // subquery is the only supported sort, and must continue to plan successfully even when + // wrapped in an outer SELECT. Proves the walker does not over-reject sort shapes that are + // below the subquery Project rather than above it. + String explain = + explainQuery( + "SELECT * FROM (SELECT v.firstname, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "ORDER BY v._score DESC) t " + + "LIMIT 3"); + + assertThat(explain, containsString("wrapper")); + } + + @Test + public void testOuterOrderByOnSubqueryRejected() throws IOException { + // Outer ORDER BY over a vectorSearch() subquery would run on a truncated top-k slice rather + // than the full relation, silently reordering only the already-ANN-selected rows. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT * FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "ORDER BY t.state")); + + assertThat( + ex.getMessage(), + containsString("Outer ORDER BY on a vectorSearch() subquery is not supported")); + } + + @Test + public void testOuterOffsetOnSubqueryRejected() throws IOException { + // Outer OFFSET silently drops top-k rows by vector distance. The inner query already caps at + // k and any outer OFFSET shifts that window in an opaque way, so reject it. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT * FROM (SELECT v.firstname " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "LIMIT 3 OFFSET 2")); + + assertThat( + ex.getMessage(), + containsString("Outer OFFSET on a vectorSearch() subquery is not supported")); + } + + @Test + public void testOuterLimitWithoutOffsetOnSubqueryAllowed() throws IOException { + // Positive control: outer LIMIT without OFFSET just caps the row count and must plan without + // error. Locks in the offset==0 boundary of the OFFSET rejection. + String explain = + explainQuery( + "SELECT * FROM (SELECT v.firstname " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "LIMIT 3"); + + assertThat(explain, containsString("wrapper")); + } + + @Test + public void testOuterAggregationOnSubqueryRejected() throws IOException { + // Outer aggregation (here COUNT(*)) over a vectorSearch() subquery would run on the + // truncated top-k slice, producing a count that silently depends on k rather than the full + // population. vectorSearch() does not support aggregations, so reject the outer-subquery + // variant with the same subquery-boundary walker that catches outer WHERE. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT COUNT(*) FROM (SELECT v.firstname " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t")); + + assertThat( + ex.getMessage(), + containsString( + "Outer GROUP BY / aggregation / DISTINCT on a vectorSearch() subquery is not" + + " supported")); + } + + @Test + public void testOuterGroupByOnSubqueryRejected() throws IOException { + // GROUP BY rewrites to LogicalAggregation and is caught by the same subquery-boundary walker. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT t.state, COUNT(*) FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "GROUP BY t.state")); + + assertThat( + ex.getMessage(), + containsString( + "Outer GROUP BY / aggregation / DISTINCT on a vectorSearch() subquery is not" + + " supported")); + } + + @Test + public void testOuterDistinctOnSubqueryRejected() throws IOException { + // SELECT DISTINCT rewrites to a LogicalAggregation with empty aggregator list and the select + // items as the group-by list. The subquery-boundary walker must catch this shape too. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT DISTINCT t.state FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t")); + + assertThat( + ex.getMessage(), + containsString( + "Outer GROUP BY / aggregation / DISTINCT on a vectorSearch() subquery is not" + + " supported")); + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml index 7718e89b02f..a73f4f508d2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($34)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},station=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[station, aws.cloudwatch.log_stream], LIMIT->500, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":500,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},station=COUNT()), PROJECT->[station, aws.cloudwatch.log_stream], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->500, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":500,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml index 2ce4f996236..b0808ca93f4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($34)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},country=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[country, aws.cloudwatch.log_stream], LIMIT->50, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":50,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},country=COUNT()), PROJECT->[country, aws.cloudwatch.log_stream], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->50, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":50,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml index b0c896f61fb..c277bb44fb8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-05 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-05 05:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->AND(SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->AND(SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml index ae4cade06e5..f900b2ccbec 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($68)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(AdvEngineID)=SUM($0),c=COUNT(),avg(ResolutionWidth)=AVG($2),dc(UserID)=COUNT(DISTINCT $3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[sum(AdvEngineID), c, avg(ResolutionWidth), dc(UserID), RegionID], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(AdvEngineID)":{"sum":{"field":"AdvEngineID"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(AdvEngineID)=SUM($0),c=COUNT(),avg(ResolutionWidth)=AVG($2),dc(UserID)=COUNT(DISTINCT $3)), PROJECT->[sum(AdvEngineID), c, avg(ResolutionWidth), dc(UserID), RegionID], SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(AdvEngineID)":{"sum":{"field":"AdvEngineID"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml index 7a8da847554..aa43e743192 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($31, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, MobilePhoneModel], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel":{"terms":{"field":"MobilePhoneModel","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), PROJECT->[u, MobilePhoneModel], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel":{"terms":{"field":"MobilePhoneModel","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml index be24923eeea..a4691fd7e38 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($31, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},u=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[u, MobilePhone, MobilePhoneModel], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"MobilePhone","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel|MobilePhone":{"multi_terms":{"terms":[{"field":"MobilePhoneModel"},{"field":"MobilePhone"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},u=COUNT(DISTINCT $2)), PROJECT->[u, MobilePhone, MobilePhoneModel], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"MobilePhone","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel|MobilePhone":{"multi_terms":{"terms":[{"field":"MobilePhoneModel"},{"field":"MobilePhone"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml index dd4f502bbde..0110be323ee 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml index d5c34e6a7f2..7dbe85eb016 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), PROJECT->[u, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml index b13cc1a62ca..1c275e53363 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, SearchEngineID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase|SearchEngineID":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"SearchEngineID"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT()), PROJECT->[c, SearchEngineID, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase|SearchEngineID":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"SearchEngineID"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml index 3f0fb7644a9..f18539393d7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($84)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[count(), UserID], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID":{"terms":{"field":"UserID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), UserID], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID":{"terms":{"field":"UserID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml index d9efea667c2..54dfa746d4d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($84), IS NOT NULL($63))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), UserID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"SearchPhrase|UserID":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"UserID"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), UserID, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"SearchPhrase|UserID":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"UserID"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml index c4005b2ea4f..74f1ebab575 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(EventDate=[$0], URLRegionID=[$1], HasGCLID=[$2], Income=[$3], Interests=[$4], Robotness=[$5], BrowserLanguage=[$6], CounterClass=[$7], BrowserCountry=[$8], OriginalURL=[$9], ClientTimeZone=[$10], RefererHash=[$11], TraficSourceID=[$12], HitColor=[$13], RefererRegionID=[$14], URLCategoryID=[$15], LocalEventTime=[$16], EventTime=[$17], UTMTerm=[$18], AdvEngineID=[$19], UserAgentMinor=[$20], UserAgentMajor=[$21], RemoteIP=[$22], Sex=[$23], JavaEnable=[$24], URLHash=[$25], URL=[$26], ParamOrderID=[$27], OpenstatSourceID=[$28], HTTPError=[$29], SilverlightVersion3=[$30], MobilePhoneModel=[$31], SilverlightVersion4=[$32], SilverlightVersion1=[$33], SilverlightVersion2=[$34], IsDownload=[$35], IsParameter=[$36], CLID=[$37], FlashMajor=[$38], FlashMinor=[$39], UTMMedium=[$40], WatchID=[$41], DontCountHits=[$42], CookieEnable=[$43], HID=[$44], SocialAction=[$45], WindowName=[$46], ConnectTiming=[$47], PageCharset=[$48], IsLink=[$49], IsArtifical=[$50], JavascriptEnable=[$51], ClientEventTime=[$52], DNSTiming=[$53], CodeVersion=[$54], ResponseEndTiming=[$55], FUniqID=[$56], WindowClientHeight=[$57], OpenstatServiceName=[$58], UTMContent=[$59], HistoryLength=[$60], IsOldCounter=[$61], MobilePhone=[$62], SearchPhrase=[$63], FlashMinor2=[$64], SearchEngineID=[$65], IsEvent=[$66], UTMSource=[$67], RegionID=[$68], OpenstatAdID=[$69], UTMCampaign=[$70], GoodEvent=[$71], IsRefresh=[$72], ParamCurrency=[$73], Params=[$74], ResolutionHeight=[$75], ClientIP=[$76], FromTag=[$77], ParamCurrencyID=[$78], ResponseStartTiming=[$79], ResolutionWidth=[$80], SendTiming=[$81], RefererCategoryID=[$82], OpenstatCampaignID=[$83], UserID=[$84], WithHash=[$85], UserAgent=[$86], ParamPrice=[$87], ResolutionDepth=[$88], IsMobile=[$89], Age=[$90], SocialSourceNetworkID=[$91], OpenerName=[$92], OS=[$93], IsNotBounce=[$94], Referer=[$95], NetMinor=[$96], Title=[$97], NetMajor=[$98], IPNetworkID=[$99], FetchTiming=[$100], SocialNetwork=[$101], SocialSourcePage=[$102], CounterID=[$103], WindowClientWidth=[$104], _id=[$105], _index=[$106], _score=[$107], _maxscore=[$108], _sort=[$109], _routing=[$110], m=[EXTRACT('minute':VARCHAR, $17)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), SORT_AGG_METRICS->[3 DESC LAST], PROJECT->[count(), UserID, m, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID|m|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCZXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJFWFRSQUNUIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQklHSU5UIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":["minute","EventTime"]}},"value_type":"long"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), UserID, m, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID|m|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCZXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJFWFRSQUNUIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQklHSU5UIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":["minute","EventTime"]}},"value_type":"long"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml index a28945e87d0..edd3dabd8d8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(LIKE($26, '%google%', '\'), <>($63, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($0, '%google%', '\'), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($0, '%google%', '\'), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT()), PROJECT->[c, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml index f5b8ec9c184..6f6b5056a9f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(LIKE($97, '%Google%', '\'), <>($63, ''), NOT(LIKE($26, '%.google.%', '\')))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),dc(UserID)=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, dc(UserID), SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),dc(UserID)=COUNT(DISTINCT $2)), PROJECT->[c, dc(UserID), SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml index a8ac7eaf9b7..bf40fe857ed 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 3},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($4)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 3},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($4)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml index 5cf8f54b258..81236b33d51 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($4)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($4)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml index e9b5c203f20..ccda84ba38a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($41), IS NOT NULL($76))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},c=COUNT(),sum(IsRefresh)=SUM($1),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},c=COUNT(),sum(IsRefresh)=SUM($1),avg(ResolutionWidth)=AVG($3)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml index e2fd395e0ec..69dc8c94239 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($26)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, URL], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml index 44a4218baf5..6ea79300182 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($26, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($2, 0), =($3, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($2, 0), =($3, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), PROJECT->[PageViews, URL], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml index cd15e03f941..d4cbbe1fc48 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($97, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($1, 0), =($2, 0), <>($3, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, Title], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"Title","boost":1.0}}],"must_not":[{"term":{"Title":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"Title":{"terms":{"field":"Title","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($1, 0), =($2, 0), <>($3, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3},PageViews=COUNT()), PROJECT->[PageViews, Title], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"Title","boost":1.0}}],"must_not":[{"term":{"Title":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"Title":{"terms":{"field":"Title","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml index c4bc303bfb2..6b85d93899f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}},{"exists":{"field":"URL","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), PROJECT->[PageViews, URL], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}},{"exists":{"field":"URL","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml index e9eefc046b2..d9eb1a4c263 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml @@ -12,4 +12,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), SORT_AGG_METRICS->[5 DESC LAST], PROJECT->[PageViews, TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"TraficSourceID|AdvEngineID|SearchEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"AdvEngineID"},{"field":"SearchEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGAXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSwKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), PROJECT->[PageViews, TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"TraficSourceID|AdvEngineID|SearchEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"AdvEngineID"},{"field":"SearchEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGAXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSwKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml index c23839c1674..6482c38bddb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[100], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate], LIMIT->[10 from 100]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"EventDate|URLHash":{"multi_terms":{"terms":[{"field":"EventDate","value_type":"long"},{"field":"URLHash"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},PageViews=COUNT()), PROJECT->[PageViews, URLHash, EventDate], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->[10 from 100]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"EventDate|URLHash":{"multi_terms":{"terms":[{"field":"EventDate","value_type":"long"},{"field":"URLHash"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml index 7a7d97c857a..96ee9b2a304 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[10000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3, 6},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WindowClientHeight|WindowClientWidth":{"multi_terms":{"terms":[{"field":"WindowClientHeight"},{"field":"WindowClientWidth"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3, 6},PageViews=COUNT()), PROJECT->[PageViews, WindowClientWidth, WindowClientHeight], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WindowClientHeight|WindowClientWidth":{"multi_terms":{"terms":[{"field":"WindowClientHeight"},{"field":"WindowClientWidth"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml index a71532d4271..05de3cbdcf9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($19, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[count(), AdvEngineID], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"AdvEngineID","boost":1.0}}],"must_not":[{"term":{"AdvEngineID":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"AdvEngineID":{"terms":{"field":"AdvEngineID","size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), AdvEngineID], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"AdvEngineID","boost":1.0}}],"must_not":[{"term":{"AdvEngineID":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"AdvEngineID":{"terms":{"field":"AdvEngineID","size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q9.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q9.yaml index 6ea001905ce..5e6bc1617c5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q9.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q9.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($68)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, RegionID], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), PROJECT->[u, RegionID], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_consecutive_sorts_issue_5125.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_consecutive_sorts_issue_5125.yaml new file mode 100644 index 00000000000..349251ab4fc --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_consecutive_sorts_issue_5125.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$1], dir0=[DESC-nulls-last]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(c=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(gender=[$4]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, gender], SORT->[1 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"last","order":"desc"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml index 1326030ea7e..9bda406291c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml @@ -15,5 +15,5 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableHashJoin(condition=[=($1, $2)], joinType=[semi]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0}), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml index 36bf1245a2d..cc79c0dc2f4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml @@ -17,5 +17,5 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..2=[{inputs}], c=[$t1], state=[$t2]) EnumerableHashJoin(condition=[=($0, $2)], joinType=[inner]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0}), LIMIT->10], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0}), LIMIT->10], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml index 75389120405..c08c533bc60 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml index be021c55e23..9c41efa9139 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum=SUM($0)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[sum, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"sum":"desc"},{"_key":"asc"}]},"aggregations":{"sum":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum=SUM($0)), PROJECT->[sum, state], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"sum":"desc"},{"_key":"asc"}]},"aggregations":{"sum":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml index e60bbe90fdc..b48e10e20c8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml @@ -9,4 +9,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[cnt, span(birthdate,1d)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","fixed_interval":"1d","offset":0,"order":[{"_count":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT()), PROJECT->[cnt, span(birthdate,1d)], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","fixed_interval":"1d","offset":0,"order":[{"_count":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":1}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml index 57132615c41..f2105ce0d3c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml @@ -9,4 +9,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(balance)=SUM($0)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[sum(balance), span(age,5)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(age,5)":{"histogram":{"field":"age","interval":5.0,"offset":0.0,"order":[{"sum(balance)":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(balance)=SUM($0)), PROJECT->[sum(balance), span(age,5)], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(age,5)":{"histogram":{"field":"age","interval":5.0,"offset":0.0,"order":[{"sum(balance)":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":1},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml index 3215115297a..cd0355241fe 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={2},sum(balance)=SUM($0),c=COUNT(),dc(employer)=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[sum(balance), c, dc(employer), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={2},sum(balance)=SUM($0),c=COUNT(),dc(employer)=COUNT(DISTINCT $1)), PROJECT->[sum(balance), c, dc(employer), state], SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml index 211aa979ce0..59cd137ca59 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},sum(balance)=SUM($2),count()=COUNT(),d=COUNT(DISTINCT $3)), SORT_AGG_METRICS->[4 DESC LAST], PROJECT->[sum(balance), count(), d, gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},sum(balance)=SUM($2),count()=COUNT(),d=COUNT(DISTINCT $3)), PROJECT->[sum(balance), count(), d, gender, new_state], SORT_AGG_METRICS->[2 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml index 4caf7759fc6..b584249d91a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), gender, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), gender, state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml index 13d8350c11f..44a51b2171d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), new_gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), new_gender, new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml index 7e010cba2ad..e24043592fe 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_ctime.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_ctime.yaml new file mode 100644 index 00000000000..dd3c53dc0da --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_ctime.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(ts=[CTIME(1066507633)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[1066507633], expr#20=[CTIME($t19)], ts=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m"}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_dur2sec.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_dur2sec.yaml new file mode 100644 index 00000000000..fdbe6f1e8b7 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_dur2sec.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(d=[DUR2SEC('01:23:45':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['01:23:45':VARCHAR], expr#20=[DUR2SEC($t19)], d=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m"}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_mktime.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_mktime.yaml new file mode 100644 index 00000000000..a817226a708 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_mktime.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(d=[MKTIME('10/18/2003 20:07:13':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['10/18/2003 20:07:13':VARCHAR], expr#20=[MKTIME($t19)], d=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m"}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_mstime.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_mstime.yaml new file mode 100644 index 00000000000..43cc390ac77 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_mstime.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(t=[MSTIME('03:45.5':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['03:45.5':VARCHAR], expr#20=[MSTIME($t19)], t=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m"}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml index 1948dfba6c3..3978b92e393 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml @@ -1,12 +1,13 @@ calcite: logical: | LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) - LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) - LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5 ORDER BY $1 NULLS FIRST, $3 DESC NULLS LAST)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"account_number"},{"field":"gender"},{"field":"age"},{"field":"state"}],"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableTopK(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"account_number"},{"field":"gender"},{"field":"age"},{"field":"state"}],"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}},"sort":[{"gender":{"order":"asc","missing":"_first"}},{"state":{"order":"desc","missing":"_last"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml index b04fef7e1bc..9f99d5ec747 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml @@ -1,12 +1,13 @@ calcite: logical: | LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) - LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) - LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5 ORDER BY $1 NULLS FIRST, $3 DESC NULLS LAST)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"account_number"},{"field":"gender"},{"field":"age"},{"field":"state"}],"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableTopK(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"account_number"},{"field":"gender"},{"field":"age"},{"field":"state"}],"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}},"sort":[{"gender":{"order":"asc","missing":"_first"}},{"state":{"order":"desc","missing":"_last"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1.yaml index f61eced9dea..dceae301b9c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1.yaml @@ -1,11 +1,11 @@ calcite: logical: | LogicalSystemLimit(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) - LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) - LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5 ORDER BY $4 NULLS FIRST, $5 DESC NULLS LAST)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1_alternative.yaml index c17eb382c17..9545f4220e0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1_alternative.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1_alternative.yaml @@ -1,11 +1,11 @@ calcite: logical: | LogicalSystemLimit(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) - LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) - LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5 ORDER BY $4 NULLS FIRST, $5 DESC NULLS LAST)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml index 93d488e69de..e523be80bed 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml @@ -1,12 +1,12 @@ calcite: logical: | LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3)]) - LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) - LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3 ORDER BY $1 NULLS FIRST, $3 DESC NULLS LAST)]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"gender"},{"field":"state"},{"field":"account_number"},{"field":"age"}],"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[1 ASC FIRST, 3 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"gender"},{"field":"state"},{"field":"account_number"},{"field":"age"}],"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}},"sort":[{"gender":{"order":"asc","missing":"_first"}},{"state":{"order":"desc","missing":"_last"}}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml index 8cc9db58e9e..616d50a732a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml @@ -1,12 +1,12 @@ calcite: logical: | LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3)]) - LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) - LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3 ORDER BY $1 NULLS FIRST, $3 DESC NULLS LAST)]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"gender"},{"field":"state"},{"field":"account_number"},{"field":"age"}],"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[1 ASC FIRST, 3 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"gender"},{"field":"state"},{"field":"account_number"},{"field":"age"}],"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}},"sort":[{"gender":{"order":"asc","missing":"_first"}},{"state":{"order":"desc","missing":"_last"}}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml index 862a45dc617..b8e0f7ed6d5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml @@ -2,20 +2,21 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age2=[$2]) - LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]) - LogicalFilter(condition=[IS NOT NULL($2)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) - LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) - LogicalProject(avg_age=[$2], state=[$0], city=[$1]) - LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) - LogicalProject(state=[$7], city=[$5], age=[$8]) - LogicalFilter(condition=[>($8, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)]) + LogicalFilter(condition=[IS NOT NULL($2)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) + LogicalProject(avg_age=[$2], state=[$0], city=[$1]) + LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) + LogicalProject(state=[$7], city=[$5], age=[$8]) + LogicalFilter(condition=[>($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0]) - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3]) - EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]) + CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]) + EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.json index 6235593dc6f..6daddb8ee76 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.json @@ -1,6 +1,6 @@ { "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]): rowcount = 56.25, cumulative cost = {165681.25 rows, 105156.5471810663 cpu, 0.0 io}, id = 9401\n LogicalProject(age2=[$2]): rowcount = 56.25, cumulative cost = {165625.0 rows, 104256.5471810663 cpu, 0.0 io}, id = 9400\n LogicalFilter(condition=[<=($3, 1)]): rowcount = 56.25, cumulative cost = {165568.75 rows, 104200.2971810663 cpu, 0.0 io}, id = 9398\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]): rowcount = 225.0, cumulative cost = {165512.5 rows, 103975.2971810663 cpu, 0.0 io}, id = 9397\n LogicalFilter(condition=[IS NOT NULL($2)]): rowcount = 225.0, cumulative cost = {165287.5 rows, 103075.2971810663 cpu, 0.0 io}, id = 9396\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]): rowcount = 500.0, cumulative cost = {165062.5 rows, 102575.2971810663 cpu, 0.0 io}, id = 9395\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]): rowcount = 500.0, cumulative cost = {164562.5 rows, 101075.2971810663 cpu, 0.0 io}, id = 9393\n LogicalProject(avg_age=[$2], state=[$0], city=[$1]): rowcount = 500.0, cumulative cost = {164062.5 rows, 26500.0 cpu, 0.0 io}, id = 9392\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]): rowcount = 500.0, cumulative cost = {163562.5 rows, 25000.0 cpu, 0.0 io}, id = 9391\n LogicalProject(state=[$7], city=[$5], age=[$8]): rowcount = 5000.0, cumulative cost = {163000.0 rows, 25000.0 cpu, 0.0 io}, id = 9390\n LogicalFilter(condition=[>($8, 30)]): rowcount = 5000.0, cumulative cost = {158000.0 rows, 10000.0 cpu, 0.0 io}, id = 9389\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]): rowcount = 10000.0, cumulative cost = {153000.0 rows, 0.0 cpu, 0.0 io}, id = 9388\n", - "physical": "EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0]): rowcount = 225.0, cumulative cost = {2981.25 rows, 7950.0 cpu, 0.0 io}, id = 11254\n EnumerableLimit(fetch=[10000]): rowcount = 225.0, cumulative cost = {2756.25 rows, 7275.0 cpu, 0.0 io}, id = 11246\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3]): rowcount = 225.0, cumulative cost = {2531.25 rows, 7050.0 cpu, 0.0 io}, id = 11250\n EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]): rowcount = 450.0, cumulative cost = {2306.25 rows, 3900.0 cpu, 0.0 io}, id = 11242\n EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3]): rowcount = 450.0, cumulative cost = {1856.25 rows, 3000.0 cpu, 0.0 io}, id = 11258\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]): rowcount = 500.0, cumulative cost = {1406.25 rows, 0.0 cpu, 0.0 io}, id = 10962\n" + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]): rowcount = 56.25, cumulative cost = {165293.75 rows, 36190.23815546412 cpu, 0.0 io}, id = 4117\n LogicalProject(age2=[$2]): rowcount = 56.25, cumulative cost = {165237.5 rows, 35290.23815546412 cpu, 0.0 io}, id = 4116\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]): rowcount = 56.25, cumulative cost = {165181.25 rows, 35233.98815546412 cpu, 0.0 io}, id = 4115\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2]): rowcount = 56.25, cumulative cost = {165125.0 rows, 29793.75 cpu, 0.0 io}, id = 4114\n LogicalFilter(condition=[<=($3, 1)]): rowcount = 56.25, cumulative cost = {165068.75 rows, 29625.0 cpu, 0.0 io}, id = 4113\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)]): rowcount = 225.0, cumulative cost = {165012.5 rows, 29400.0 cpu, 0.0 io}, id = 4112\n LogicalFilter(condition=[IS NOT NULL($2)]): rowcount = 225.0, cumulative cost = {164787.5 rows, 28500.0 cpu, 0.0 io}, id = 4111\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]): rowcount = 500.0, cumulative cost = {164562.5 rows, 28000.0 cpu, 0.0 io}, id = 4110\n LogicalProject(avg_age=[$2], state=[$0], city=[$1]): rowcount = 500.0, cumulative cost = {164062.5 rows, 26500.0 cpu, 0.0 io}, id = 4106\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]): rowcount = 500.0, cumulative cost = {163562.5 rows, 25000.0 cpu, 0.0 io}, id = 4105\n LogicalProject(state=[$7], city=[$5], age=[$8]): rowcount = 5000.0, cumulative cost = {163000.0 rows, 25000.0 cpu, 0.0 io}, id = 4104\n LogicalFilter(condition=[>($8, 30)]): rowcount = 5000.0, cumulative cost = {158000.0 rows, 10000.0 cpu, 0.0 io}, id = 4103\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]): rowcount = 10000.0, cumulative cost = {153000.0 rows, 0.0 cpu, 0.0 io}, id = 4102\n", + "physical": "EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]): rowcount = 225.0, cumulative cost = {3201.75 rows, 39096.942171903866 cpu, 0.0 io}, id = 4910\n CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000]): rowcount = 225.0, cumulative cost = {2976.75 rows, 38196.942171903866 cpu, 0.0 io}, id = 4902\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]): rowcount = 225.0, cumulative cost = {2531.25 rows, 8950.0 cpu, 0.0 io}, id = 4906\n EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]): rowcount = 450.0, cumulative cost = {2306.25 rows, 4900.0 cpu, 0.0 io}, id = 4898\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4]): rowcount = 450.0, cumulative cost = {1856.25 rows, 4000.0 cpu, 0.0 io}, id = 4914\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#4218:LogicalAggregate.NONE.[](input=RelSubset#4186,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]): rowcount = 500.0, cumulative cost = {1406.25 rows, 0.0 cpu, 0.0 io}, id = 4505\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.yaml index 63b402833fc..c760955d27b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.yaml @@ -1,21 +1,22 @@ calcite: logical: | - LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]): rowcount = 56.25, cumulative cost = {165681.25 rows, 105156.5471810663 cpu, 0.0 io}, id = 7529 - LogicalProject(age2=[$2]): rowcount = 56.25, cumulative cost = {165625.0 rows, 104256.5471810663 cpu, 0.0 io}, id = 7528 - LogicalFilter(condition=[<=($3, 1)]): rowcount = 56.25, cumulative cost = {165568.75 rows, 104200.2971810663 cpu, 0.0 io}, id = 7526 - LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]): rowcount = 225.0, cumulative cost = {165512.5 rows, 103975.2971810663 cpu, 0.0 io}, id = 7525 - LogicalFilter(condition=[IS NOT NULL($2)]): rowcount = 225.0, cumulative cost = {165287.5 rows, 103075.2971810663 cpu, 0.0 io}, id = 7524 - LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]): rowcount = 500.0, cumulative cost = {165062.5 rows, 102575.2971810663 cpu, 0.0 io}, id = 7523 - LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]): rowcount = 500.0, cumulative cost = {164562.5 rows, 101075.2971810663 cpu, 0.0 io}, id = 7521 - LogicalProject(avg_age=[$2], state=[$0], city=[$1]): rowcount = 500.0, cumulative cost = {164062.5 rows, 26500.0 cpu, 0.0 io}, id = 7520 - LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]): rowcount = 500.0, cumulative cost = {163562.5 rows, 25000.0 cpu, 0.0 io}, id = 7519 - LogicalProject(state=[$7], city=[$5], age=[$8]): rowcount = 5000.0, cumulative cost = {163000.0 rows, 25000.0 cpu, 0.0 io}, id = 7518 - LogicalFilter(condition=[>($8, 30)]): rowcount = 5000.0, cumulative cost = {158000.0 rows, 10000.0 cpu, 0.0 io}, id = 7517 - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]): rowcount = 10000.0, cumulative cost = {153000.0 rows, 0.0 cpu, 0.0 io}, id = 7516 + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]): rowcount = 56.25, cumulative cost = {165293.75 rows, 36190.23815546412 cpu, 0.0 io}, id = 3303 + LogicalProject(age2=[$2]): rowcount = 56.25, cumulative cost = {165237.5 rows, 35290.23815546412 cpu, 0.0 io}, id = 3302 + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]): rowcount = 56.25, cumulative cost = {165181.25 rows, 35233.98815546412 cpu, 0.0 io}, id = 3301 + LogicalProject(avg_age=[$0], state=[$1], age2=[$2]): rowcount = 56.25, cumulative cost = {165125.0 rows, 29793.75 cpu, 0.0 io}, id = 3300 + LogicalFilter(condition=[<=($3, 1)]): rowcount = 56.25, cumulative cost = {165068.75 rows, 29625.0 cpu, 0.0 io}, id = 3299 + LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)]): rowcount = 225.0, cumulative cost = {165012.5 rows, 29400.0 cpu, 0.0 io}, id = 3298 + LogicalFilter(condition=[IS NOT NULL($2)]): rowcount = 225.0, cumulative cost = {164787.5 rows, 28500.0 cpu, 0.0 io}, id = 3297 + LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]): rowcount = 500.0, cumulative cost = {164562.5 rows, 28000.0 cpu, 0.0 io}, id = 3296 + LogicalProject(avg_age=[$2], state=[$0], city=[$1]): rowcount = 500.0, cumulative cost = {164062.5 rows, 26500.0 cpu, 0.0 io}, id = 3292 + LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]): rowcount = 500.0, cumulative cost = {163562.5 rows, 25000.0 cpu, 0.0 io}, id = 3291 + LogicalProject(state=[$7], city=[$5], age=[$8]): rowcount = 5000.0, cumulative cost = {163000.0 rows, 25000.0 cpu, 0.0 io}, id = 3290 + LogicalFilter(condition=[>($8, 30)]): rowcount = 5000.0, cumulative cost = {158000.0 rows, 10000.0 cpu, 0.0 io}, id = 3289 + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]): rowcount = 10000.0, cumulative cost = {153000.0 rows, 0.0 cpu, 0.0 io}, id = 3288 physical: | - EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0]): rowcount = 225.0, cumulative cost = {2981.25 rows, 7950.0 cpu, 0.0 io}, id = 9382 - EnumerableLimit(fetch=[10000]): rowcount = 225.0, cumulative cost = {2756.25 rows, 7275.0 cpu, 0.0 io}, id = 9374 - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3]): rowcount = 225.0, cumulative cost = {2531.25 rows, 7050.0 cpu, 0.0 io}, id = 9378 - EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]): rowcount = 450.0, cumulative cost = {2306.25 rows, 3900.0 cpu, 0.0 io}, id = 9370 - EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3]): rowcount = 450.0, cumulative cost = {1856.25 rows, 3000.0 cpu, 0.0 io}, id = 9386 - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]): rowcount = 500.0, cumulative cost = {1406.25 rows, 0.0 cpu, 0.0 io}, id = 9090 + EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]): rowcount = 225.0, cumulative cost = {3201.75 rows, 39096.942171903866 cpu, 0.0 io}, id = 4096 + CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000]): rowcount = 225.0, cumulative cost = {2976.75 rows, 38196.942171903866 cpu, 0.0 io}, id = 4088 + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]): rowcount = 225.0, cumulative cost = {2531.25 rows, 8950.0 cpu, 0.0 io}, id = 4092 + EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]): rowcount = 450.0, cumulative cost = {2306.25 rows, 4900.0 cpu, 0.0 io}, id = 4084 + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4]): rowcount = 450.0, cumulative cost = {1856.25 rows, 4000.0 cpu, 0.0 io}, id = 4100 + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#3404:LogicalAggregate.NONE.[](input=RelSubset#3372,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]): rowcount = 500.0, cumulative cost = {1406.25 rows, 0.0 cpu, 0.0 io}, id = 3691 diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.json index 5ce93804ca4..45ec4243593 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.json @@ -1,7 +1,7 @@ { "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age2=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)])\n LogicalFilter(condition=[IS NOT NULL($2)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)])\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n LogicalProject(avg_age=[$2], state=[$0], city=[$1])\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)])\n LogicalProject(state=[$7], city=[$5], age=[$8])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0])\n EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3])\n EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n", - "extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable = v1stashed.scan();\n final org.apache.calcite.linq4j.AbstractEnumerable source = new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n while (inputEnumerator.moveNext()) {\n if ((Double) inputEnumerator.current() != null) {\n return true;\n }\n }\n return false;\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n return (Double) inputEnumerator.current() == null ? null : Double.valueOf(((Double) inputEnumerator.current()).doubleValue() + (double) 2);\n }\n\n };\n }\n\n };\n int prevStart;\n int prevEnd;\n final java.util.Comparator comparator = new java.util.Comparator(){\n public int compare(Double v0, Double v1) {\n int c;\n return 0;\n }\n\n public int compare(Object o0, Object o1) {\n return this.compare((Double) o0, (Double) o1);\n }\n\n };\n final org.apache.calcite.runtime.SortedMultiMap multiMap = new org.apache.calcite.runtime.SortedMultiMap();\n source.foreach(new org.apache.calcite.linq4j.function.Function1() {\n public Object apply(Double v) {\n Double key = v;\n multiMap.putMulti(key, v);\n return null;\n }\n public Object apply(Object v) {\n return apply(\n (Double) v);\n }\n }\n );\n final java.util.Iterator iterator = multiMap.arrays(comparator);\n final java.util.ArrayList _list = new java.util.ArrayList(\n multiMap.size());\n Long a0w0 = (Long) null;\n while (iterator.hasNext()) {\n final Object[] _rows = (Object[]) iterator.next();\n prevStart = -1;\n prevEnd = 2147483647;\n for (int i = 0; i < _rows.length; (++i)) {\n if (i != prevEnd) {\n int actualStart = i < prevEnd ? 0 : prevEnd + 1;\n prevEnd = i;\n a0w0 = Long.valueOf(((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown((i - 0 + 1))).longValue());\n }\n _list.add(new Object[] {\n (Double) _rows[i],\n a0w0});\n }\n }\n multiMap.clear();\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable0 = org.apache.calcite.linq4j.Linq4j.asEnumerable(_list);\n final org.apache.calcite.linq4j.AbstractEnumerable child = new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable0.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n while (inputEnumerator.moveNext()) {\n if (org.apache.calcite.runtime.SqlFunctions.toLong(((Object[]) inputEnumerator.current())[1]) <= $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b) {\n return true;\n }\n }\n return false;\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n final Object[] current = (Object[]) inputEnumerator.current();\n final Object input_value = current[0];\n final Object input_value0 = current[1];\n return new Object[] {\n input_value,\n input_value0};\n }\n\n static final long $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b = ((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown(1)).longValue();\n };\n }\n\n };\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable1 = child.take(10000);\n return new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable1.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n return inputEnumerator.moveNext();\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n return (Double) ((Object[]) inputEnumerator.current())[0];\n }\n\n };\n }\n\n };\n}\n\n\npublic Class getElementType() {\n return java.lang.Double.class;\n}\n\n\n" + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age2=[$2])\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)])\n LogicalFilter(condition=[IS NOT NULL($2)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)])\n LogicalProject(avg_age=[$2], state=[$0], city=[$1])\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)])\n LogicalProject(state=[$7], city=[$5], age=[$8])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1])\n CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4])\n EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#2590:LogicalAggregate.NONE.[](input=RelSubset#2558,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n", + "extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable = v1stashed.scan();\n final org.apache.calcite.linq4j.AbstractEnumerable source = new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n while (inputEnumerator.moveNext()) {\n if ((Double) ((Object[]) inputEnumerator.current())[1] != null) {\n return true;\n }\n }\n return false;\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n final Object[] current = (Object[]) inputEnumerator.current();\n final Double input_value0 = (Double) current[1];\n return new Object[] {\n current[0],\n input_value0 == null ? null : Double.valueOf(input_value0.doubleValue() + (double) 2)};\n }\n\n };\n }\n\n };\n int prevStart;\n int prevEnd;\n final java.util.Comparator comparator = new java.util.Comparator(){\n public int compare(Object[] v0, Object[] v1) {\n final int c;\n c = org.apache.calcite.runtime.Utilities.compareNullsFirst((Double) v0[1], (Double) v1[1]);\n if (c != 0) {\n return c;\n }\n return 0;\n }\n\n public int compare(Object o0, Object o1) {\n return this.compare((Object[]) o0, (Object[]) o1);\n }\n\n };\n final org.apache.calcite.runtime.SortedMultiMap multiMap = new org.apache.calcite.runtime.SortedMultiMap();\n source.foreach(new org.apache.calcite.linq4j.function.Function1() {\n public Object apply(Object[] v) {\n Double key = (Double) v[1];\n multiMap.putMulti(key, v);\n return null;\n }\n public Object apply(Object v) {\n return apply(\n (Object[]) v);\n }\n }\n );\n final java.util.Iterator iterator = multiMap.arrays(comparator);\n final java.util.ArrayList _list = new java.util.ArrayList(\n multiMap.size());\n Long a0w0 = (Long) null;\n while (iterator.hasNext()) {\n final Object[] _rows = (Object[]) iterator.next();\n prevStart = -1;\n prevEnd = 2147483647;\n for (int i = 0; i < _rows.length; (++i)) {\n final Object[] row = (Object[]) _rows[i];\n if (i != prevEnd) {\n int actualStart = i < prevEnd ? 0 : prevEnd + 1;\n prevEnd = i;\n a0w0 = Long.valueOf(((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown((i - 0 + 1))).longValue());\n }\n _list.add(new Object[] {\n row[0],\n row[1],\n a0w0});\n }\n }\n multiMap.clear();\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable0 = org.apache.calcite.linq4j.Linq4j.asEnumerable(_list);\n final org.apache.calcite.linq4j.AbstractEnumerable child = new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable0.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n while (inputEnumerator.moveNext()) {\n if (org.apache.calcite.runtime.SqlFunctions.toLong(((Object[]) inputEnumerator.current())[2]) <= $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b) {\n return true;\n }\n }\n return false;\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n final Object[] current = (Object[]) inputEnumerator.current();\n final Object input_value = current[0];\n final Object input_value0 = current[1];\n final Object input_value1 = current[2];\n return new Object[] {\n input_value,\n input_value0,\n input_value1};\n }\n\n static final long $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b = ((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown(1)).longValue();\n };\n }\n\n };\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable1 = org.apache.calcite.linq4j.EnumerableDefaults.orderBy(child, new org.apache.calcite.linq4j.function.Function1() {\n public String apply(Object[] v) {\n return v[0] == null ? null : v[0].toString();\n }\n public Object apply(Object v) {\n return apply(\n (Object[]) v);\n }\n }\n , org.apache.calcite.linq4j.function.Functions.nullsComparator(true, false), 0, 10000);\n return new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable1.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n return inputEnumerator.moveNext();\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n return (Double) ((Object[]) inputEnumerator.current())[1];\n }\n\n };\n }\n\n };\n}\n\n\npublic Class getElementType() {\n return java.lang.Double.class;\n}\n\n\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.yaml index 89137cfc835..f91ea4ba4d8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.yaml @@ -2,23 +2,24 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age2=[$2]) - LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]) - LogicalFilter(condition=[IS NOT NULL($2)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) - LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) - LogicalProject(avg_age=[$2], state=[$0], city=[$1]) - LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) - LogicalProject(state=[$7], city=[$5], age=[$8]) - LogicalFilter(condition=[>($8, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)]) + LogicalFilter(condition=[IS NOT NULL($2)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) + LogicalProject(avg_age=[$2], state=[$0], city=[$1]) + LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) + LogicalProject(state=[$7], city=[$5], age=[$8]) + LogicalFilter(condition=[>($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0]) - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3]) - EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]) + CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]) + EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#1776:LogicalAggregate.NONE.[](input=RelSubset#1744,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) extended: |+ public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) { final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get("v1stashed"); @@ -33,7 +34,7 @@ calcite: public boolean moveNext() { while (inputEnumerator.moveNext()) { - if ((Double) inputEnumerator.current() != null) { + if ((Double) ((Object[]) inputEnumerator.current())[1] != null) { return true; } } @@ -45,7 +46,11 @@ calcite: } public Object current() { - return (Double) inputEnumerator.current() == null ? null : Double.valueOf(((Double) inputEnumerator.current()).doubleValue() + (double) 2); + final Object[] current = (Object[]) inputEnumerator.current(); + final Double input_value0 = (Double) current[1]; + return new Object[] { + current[0], + input_value0 == null ? null : Double.valueOf(input_value0.doubleValue() + (double) 2)}; } }; @@ -55,26 +60,30 @@ calcite: int prevStart; int prevEnd; final java.util.Comparator comparator = new java.util.Comparator(){ - public int compare(Double v0, Double v1) { - int c; + public int compare(Object[] v0, Object[] v1) { + final int c; + c = org.apache.calcite.runtime.Utilities.compareNullsFirst((Double) v0[1], (Double) v1[1]); + if (c != 0) { + return c; + } return 0; } public int compare(Object o0, Object o1) { - return this.compare((Double) o0, (Double) o1); + return this.compare((Object[]) o0, (Object[]) o1); } }; final org.apache.calcite.runtime.SortedMultiMap multiMap = new org.apache.calcite.runtime.SortedMultiMap(); source.foreach(new org.apache.calcite.linq4j.function.Function1() { - public Object apply(Double v) { - Double key = v; + public Object apply(Object[] v) { + Double key = (Double) v[1]; multiMap.putMulti(key, v); return null; } public Object apply(Object v) { return apply( - (Double) v); + (Object[]) v); } } ); @@ -87,13 +96,15 @@ calcite: prevStart = -1; prevEnd = 2147483647; for (int i = 0; i < _rows.length; (++i)) { + final Object[] row = (Object[]) _rows[i]; if (i != prevEnd) { int actualStart = i < prevEnd ? 0 : prevEnd + 1; prevEnd = i; a0w0 = Long.valueOf(((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown((i - 0 + 1))).longValue()); } _list.add(new Object[] { - (Double) _rows[i], + row[0], + row[1], a0w0}); } } @@ -109,7 +120,7 @@ calcite: public boolean moveNext() { while (inputEnumerator.moveNext()) { - if (org.apache.calcite.runtime.SqlFunctions.toLong(((Object[]) inputEnumerator.current())[1]) <= $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b) { + if (org.apache.calcite.runtime.SqlFunctions.toLong(((Object[]) inputEnumerator.current())[2]) <= $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b) { return true; } } @@ -124,9 +135,11 @@ calcite: final Object[] current = (Object[]) inputEnumerator.current(); final Object input_value = current[0]; final Object input_value0 = current[1]; + final Object input_value1 = current[2]; return new Object[] { input_value, - input_value0}; + input_value0, + input_value1}; } static final long $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b = ((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown(1)).longValue(); @@ -134,7 +147,16 @@ calcite: } }; - final org.apache.calcite.linq4j.Enumerable _inputEnumerable1 = child.take(10000); + final org.apache.calcite.linq4j.Enumerable _inputEnumerable1 = org.apache.calcite.linq4j.EnumerableDefaults.orderBy(child, new org.apache.calcite.linq4j.function.Function1() { + public String apply(Object[] v) { + return v[0] == null ? null : v[0].toString(); + } + public Object apply(Object v) { + return apply( + (Object[]) v); + } + } + , org.apache.calcite.linq4j.function.Functions.nullsComparator(true, false), 0, 10000); return new org.apache.calcite.linq4j.AbstractEnumerable(){ public org.apache.calcite.linq4j.Enumerator enumerator() { return new org.apache.calcite.linq4j.Enumerator(){ @@ -152,7 +174,7 @@ calcite: } public Object current() { - return (Double) ((Object[]) inputEnumerator.current())[0]; + return (Double) ((Object[]) inputEnumerator.current())[1]; } }; @@ -164,4 +186,4 @@ calcite: public Class getElementType() { return java.lang.Double.class; - } + } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.json index 96e070954ec..5a3103178bb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.json @@ -1,5 +1,5 @@ { "calcite": { - "logical": "LogicalSystemLimit\n LogicalProject\n LogicalFilter\n LogicalProject\n LogicalFilter\n LogicalProject\n LogicalSort\n LogicalProject\n LogicalAggregate\n LogicalProject\n LogicalFilter\n CalciteLogicalIndexScan\n" + "logical": "LogicalSystemLimit\n LogicalProject\n LogicalSort\n LogicalProject\n LogicalFilter\n LogicalProject\n LogicalFilter\n LogicalProject\n LogicalProject\n LogicalAggregate\n LogicalProject\n LogicalFilter\n CalciteLogicalIndexScan\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.yaml index f58ffa560a7..c6b8c57b0f0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.yaml @@ -2,13 +2,14 @@ calcite: logical: | LogicalSystemLimit LogicalProject - LogicalFilter + LogicalSort LogicalProject LogicalFilter LogicalProject - LogicalSort + LogicalFilter LogicalProject - LogicalAggregate - LogicalProject - LogicalFilter - CalciteLogicalIndexScan + LogicalProject + LogicalAggregate + LogicalProject + LogicalFilter + CalciteLogicalIndexScan diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.json index a50b0baa104..1874fc81ec7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.json @@ -1,6 +1,6 @@ { "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age2=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)])\n LogicalFilter(condition=[IS NOT NULL($2)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)])\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n LogicalProject(avg_age=[$2], state=[$0], city=[$1])\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)])\n LogicalProject(state=[$7], city=[$5], age=[$8])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0])\n EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3])\n EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age2=[$2])\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)])\n LogicalFilter(condition=[IS NOT NULL($2)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)])\n LogicalProject(avg_age=[$2], state=[$0], city=[$1])\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)])\n LogicalProject(state=[$7], city=[$5], age=[$8])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1])\n CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4])\n EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.yaml index 862a45dc617..b8e0f7ed6d5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.yaml @@ -2,20 +2,21 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age2=[$2]) - LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]) - LogicalFilter(condition=[IS NOT NULL($2)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) - LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) - LogicalProject(avg_age=[$2], state=[$0], city=[$1]) - LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) - LogicalProject(state=[$7], city=[$5], age=[$8]) - LogicalFilter(condition=[>($8, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)]) + LogicalFilter(condition=[IS NOT NULL($2)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) + LogicalProject(avg_age=[$2], state=[$0], city=[$1]) + LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) + LogicalProject(state=[$7], city=[$5], age=[$8]) + LogicalFilter(condition=[>($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0]) - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3]) - EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]) + CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]) + EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global.yaml index d0c57f3b08a..124539b9d4c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global.yaml @@ -3,29 +3,19 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17}]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - LogicalAggregate(group=[{}], avg_age=[AVG($0)]) - LogicalProject(age=[$8]) - LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), OR(=($4, $cor0.gender), AND(IS NULL($4), IS NULL($cor0.gender))))]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], avg_age=[AVG($20)]) + LogicalJoin(condition=[AND(>=($18, -($17, 1)), <=($18, $17), IS NOT DISTINCT FROM($4, $19))], joinType=[left]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(__r_seq__=[ROW_NUMBER() OVER ()], __r_gender__=[$4], __r_age__=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t18]) - EnumerableLimit(fetch=[10000]) - EnumerableMergeJoin(condition=[AND(=($11, $15), =($12, $16), =($13, $17), IS NOT DISTINCT FROM($4, $14))], joinType=[left]) - EnumerableSort(sort0=[$11], sort1=[$12], sort2=[$13], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - EnumerableCalc(expr#0..11=[{inputs}], expr#12=[1], expr#13=[-($t11, $t12)], expr#14=[IS NULL($t4)], proj#0..11=[{exprs}], $f12=[$t13], $f15=[$t14]) + EnumerableCalc(expr#0..19=[{inputs}], expr#20=[0], expr#21=[=($t19, $t20)], expr#22=[null:BIGINT], expr#23=[CASE($t21, $t22, $t18)], expr#24=[CAST($t23):DOUBLE], expr#25=[/($t24, $t19)], proj#0..10=[{exprs}], avg_age=[$t25]) + CalciteEnumerableTopK(sort0=[$17], dir0=[ASC], fetch=[10000]) + EnumerableAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], agg#0=[$SUM0($20)], agg#1=[COUNT($20)]) + EnumerableNestedLoopJoin(condition=[AND(>=($18, -($17, 1)), <=($18, $17), IS NOT DISTINCT FROM($4, $19))], joinType=[left]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..2=[{inputs}], __r_seq__=[$t2], __r_gender__=[$t0], __r_age__=[$t1]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, $t6)], expr#8=[null:BIGINT], expr#9=[CASE($t7, $t8, $t4)], expr#10=[CAST($t9):DOUBLE], expr#11=[/($t10, $t5)], proj#0..3=[{exprs}], avg_age=[$t11]) - EnumerableAggregate(group=[{0, 1, 2, 3}], agg#0=[$SUM0($5)], agg#1=[COUNT($5)]) - EnumerableNestedLoopJoin(condition=[AND(>=($6, $2), <=($6, $1), OR(=($4, $0), AND(IS NULL($4), $3)))], joinType=[inner]) - EnumerableAggregate(group=[{0, 1, 2, 3}]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[-($t1, $t2)], expr#4=[IS NULL($t0)], proj#0..1=[{exprs}], $f12=[$t3], $f15=[$t4]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global_null_bucket.yaml index 52d485482b7..a1cf6ae00e9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global_null_bucket.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global_null_bucket.yaml @@ -3,29 +3,21 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17}]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - LogicalAggregate(group=[{}], avg_age=[AVG($0)]) - LogicalProject(age=[$8]) - LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), =($4, $cor0.gender))]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], avg_age=[AVG($20)]) + LogicalJoin(condition=[AND(>=($18, -($17, 1)), <=($18, $17), =($4, $19))], joinType=[left]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(__r_seq__=[ROW_NUMBER() OVER ()], __r_gender__=[$4], __r_age__=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t16]) - CalciteEnumerableTopK(sort0=[$11], dir0=[ASC], fetch=[10000]) - EnumerableMergeJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) - EnumerableSort(sort0=[$4], sort1=[$11], sort2=[$12], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - EnumerableCalc(expr#0..11=[{inputs}], expr#12=[1], expr#13=[-($t11, $t12)], proj#0..11=[{exprs}], $f12=[$t13]) + EnumerableCalc(expr#0..19=[{inputs}], expr#20=[0], expr#21=[=($t19, $t20)], expr#22=[null:BIGINT], expr#23=[CASE($t21, $t22, $t18)], expr#24=[CAST($t23):DOUBLE], expr#25=[/($t24, $t19)], proj#0..10=[{exprs}], avg_age=[$t25]) + CalciteEnumerableTopK(sort0=[$17], dir0=[ASC], fetch=[10000]) + EnumerableAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], agg#0=[$SUM0($20)], agg#1=[COUNT($20)]) + EnumerableMergeJoin(condition=[AND(=($4, $19), >=($18, -($17, 1)), <=($18, $17))], joinType=[left]) + EnumerableSort(sort0=[$4], dir0=[ASC]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) - EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) - EnumerableHashJoin(condition=[AND(=($0, $3), >=($5, $2), <=($5, $1))], joinType=[inner]) - EnumerableAggregate(group=[{0, 1, 2}]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[-($t1, $t2)], proj#0..1=[{exprs}], $f12=[$t3]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], __r_seq__=[$t2], __r_gender__=[$t0], __r_age__=[$t1]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml index c1234439efb..d0a2f80d866 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml @@ -3,9 +3,9 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4]) LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5]) - LogicalProject(value=[CAST($19):VARCHAR NOT NULL], $f20=[TRIM(FLAG(BOTH), ' ', $18)], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)]) + LogicalProject(_value_transpose_=[CAST($19):VARCHAR NOT NULL], $f20=[TRIM(FLAG(BOTH), ' ', $18)], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)]) LogicalFilter(condition=[IS NOT NULL($19)]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], value=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, null:NULL)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], _value_transpose_=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, null:NULL)]) LogicalJoin(condition=[true], joinType=[inner]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[ROW_NUMBER() OVER ()]) LogicalSort(fetch=[5]) @@ -14,7 +14,7 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5]) - EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number'], expr#14=[=($t12, $t13)], expr#15=[CAST($t0):VARCHAR NOT NULL], expr#16=['firstname'], expr#17=[=($t12, $t16)], expr#18=[CAST($t1):VARCHAR NOT NULL], expr#19=['address'], expr#20=[=($t12, $t19)], expr#21=[CAST($t2):VARCHAR NOT NULL], expr#22=['balance'], expr#23=[=($t12, $t22)], expr#24=[CAST($t3):VARCHAR NOT NULL], expr#25=['gender'], expr#26=[=($t12, $t25)], expr#27=[CAST($t4):VARCHAR NOT NULL], expr#28=['city'], expr#29=[=($t12, $t28)], expr#30=[CAST($t5):VARCHAR NOT NULL], expr#31=['employer'], expr#32=[=($t12, $t31)], expr#33=[CAST($t6):VARCHAR NOT NULL], expr#34=['state'], expr#35=[=($t12, $t34)], expr#36=[CAST($t7):VARCHAR NOT NULL], expr#37=['age'], expr#38=[=($t12, $t37)], expr#39=[CAST($t8):VARCHAR NOT NULL], expr#40=['email'], expr#41=[=($t12, $t40)], expr#42=[CAST($t9):VARCHAR NOT NULL], expr#43=['lastname'], expr#44=[=($t12, $t43)], expr#45=[CAST($t10):VARCHAR NOT NULL], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[1], expr#53=[=($t11, $t52)], expr#54=[2], expr#55=[=($t11, $t54)], expr#56=[3], expr#57=[=($t11, $t56)], expr#58=[4], expr#59=[=($t11, $t58)], value=[$t48], $f20=[$t51], $f21=[$t53], $f22=[$t55], $f23=[$t57], $f24=[$t59]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number'], expr#14=[=($t12, $t13)], expr#15=[CAST($t0):VARCHAR NOT NULL], expr#16=['firstname'], expr#17=[=($t12, $t16)], expr#18=[CAST($t1):VARCHAR NOT NULL], expr#19=['address'], expr#20=[=($t12, $t19)], expr#21=[CAST($t2):VARCHAR NOT NULL], expr#22=['balance'], expr#23=[=($t12, $t22)], expr#24=[CAST($t3):VARCHAR NOT NULL], expr#25=['gender'], expr#26=[=($t12, $t25)], expr#27=[CAST($t4):VARCHAR NOT NULL], expr#28=['city'], expr#29=[=($t12, $t28)], expr#30=[CAST($t5):VARCHAR NOT NULL], expr#31=['employer'], expr#32=[=($t12, $t31)], expr#33=[CAST($t6):VARCHAR NOT NULL], expr#34=['state'], expr#35=[=($t12, $t34)], expr#36=[CAST($t7):VARCHAR NOT NULL], expr#37=['age'], expr#38=[=($t12, $t37)], expr#39=[CAST($t8):VARCHAR NOT NULL], expr#40=['email'], expr#41=[=($t12, $t40)], expr#42=[CAST($t9):VARCHAR NOT NULL], expr#43=['lastname'], expr#44=[=($t12, $t43)], expr#45=[CAST($t10):VARCHAR NOT NULL], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[1], expr#53=[=($t11, $t52)], expr#54=[2], expr#55=[=($t11, $t54)], expr#56=[3], expr#57=[=($t11, $t56)], expr#58=[4], expr#59=[=($t11, $t58)], _value_transpose_=[$t48], $f20=[$t51], $f21=[$t53], $f22=[$t55], $f23=[$t57], $f24=[$t59]) EnumerableNestedLoopJoin(condition=[true], joinType=[inner]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_union.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_union.yaml new file mode 100644 index 00000000000..3ae4928bf42 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_union.yaml @@ -0,0 +1,20 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(gender=[$4]) + LogicalUnion(all=[true]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[<($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[>=($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], gender=[$t0]) + EnumerableAggregate(group=[{0}], count()=[COUNT()]) + EnumerableUnion(all=[true]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age], FILTER-><($1, 30), PROJECT->[gender]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"range":{"age":{"from":null,"to":30,"include_lower":true,"include_upper":false,"boost":1.0}}},"_source":{"includes":["gender"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age], FILTER->>=($1, 30), PROJECT->[gender]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":true,"include_upper":true,"boost":1.0}}},"_source":{"includes":["gender"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_ctime.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_ctime.yaml new file mode 100644 index 00000000000..e93685e279d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_ctime.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(ts=[CTIME(1066507633)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[1066507633], expr#20=[CTIME($t19)], ts=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_dur2sec.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_dur2sec.yaml new file mode 100644 index 00000000000..3f01b02a624 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_dur2sec.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(d=[DUR2SEC('01:23:45':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['01:23:45':VARCHAR], expr#20=[DUR2SEC($t19)], d=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_mktime.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_mktime.yaml new file mode 100644 index 00000000000..2367ea48feb --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_mktime.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(d=[MKTIME('10/18/2003 20:07:13':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['10/18/2003 20:07:13':VARCHAR], expr#20=[MKTIME($t19)], d=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_mstime.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_mstime.yaml new file mode 100644 index 00000000000..9bd873d1b3e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_mstime.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(t=[MSTIME('03:45.5':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['03:45.5':VARCHAR], expr#20=[MSTIME($t19)], t=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_output.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_output.yaml index 4cb67a380a5..42e82eca514 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_output.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_output.yaml @@ -2,22 +2,24 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age2=[$2]) - LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]) - LogicalFilter(condition=[IS NOT NULL($2)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) - LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) - LogicalProject(avg_age=[$2], state=[$0], city=[$1]) - LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) - LogicalProject(state=[$7], city=[$5], age=[$8]) - LogicalFilter(condition=[>($8, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)]) + LogicalFilter(condition=[IS NOT NULL($2)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) + LogicalProject(avg_age=[$2], state=[$0], city=[$1]) + LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) + LogicalProject(state=[$7], city=[$5], age=[$8]) + LogicalFilter(condition=[>($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], age2=[$t1], $condition=[$t4]) - EnumerableWindow(window#0=[window(partition {1} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[2], expr#11=[+($t9, $t10)], expr#12=[IS NOT NULL($t8)], state=[$t1], age2=[$t11], $condition=[$t12]) - EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first]) - EnumerableAggregate(group=[{5, 7}], agg#0=[$SUM0($8)], agg#1=[COUNT($8)]) - EnumerableCalc(expr#0..16=[{inputs}], expr#17=[30], expr#18=[>($t8, $t17)], proj#0..16=[{exprs}], $condition=[$t18]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]) + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]) + EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[2], expr#11=[+($t9, $t10)], expr#12=[IS NOT NULL($t8)], state=[$t1], age2=[$t11], $condition=[$t12]) + EnumerableAggregate(group=[{5, 7}], agg#0=[$SUM0($8)], agg#1=[COUNT($8)]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[30], expr#18=[>($t8, $t17)], proj#0..16=[{exprs}], $condition=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global.yaml index 522e7922e68..c56cd5d1bce 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global.yaml @@ -3,30 +3,20 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17}]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - LogicalAggregate(group=[{}], avg_age=[AVG($0)]) - LogicalProject(age=[$8]) - LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), OR(=($4, $cor0.gender), AND(IS NULL($4), IS NULL($cor0.gender))))]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], avg_age=[AVG($20)]) + LogicalJoin(condition=[AND(>=($18, -($17, 1)), <=($18, $17), IS NOT DISTINCT FROM($4, $19))], joinType=[left]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(__r_seq__=[ROW_NUMBER() OVER ()], __r_gender__=[$4], __r_age__=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t18]) + EnumerableCalc(expr#0..19=[{inputs}], expr#20=[0], expr#21=[=($t19, $t20)], expr#22=[null:BIGINT], expr#23=[CASE($t21, $t22, $t18)], expr#24=[CAST($t23):DOUBLE], expr#25=[/($t24, $t19)], proj#0..10=[{exprs}], avg_age=[$t25]) EnumerableLimit(fetch=[10000]) - EnumerableMergeJoin(condition=[AND(=($11, $15), =($12, $16), =($13, $17), IS NOT DISTINCT FROM($4, $14))], joinType=[left]) - EnumerableSort(sort0=[$11], sort1=[$12], sort2=[$13], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], expr#20=[IS NULL($t4)], proj#0..10=[{exprs}], __stream_seq__=[$t17], $f12=[$t19], $f15=[$t20]) + EnumerableSort(sort0=[$17], dir0=[ASC]) + EnumerableAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], agg#0=[$SUM0($20)], agg#1=[COUNT($20)]) + EnumerableNestedLoopJoin(condition=[AND(>=($18, -($17, 1)), <=($18, $17), IS NOT DISTINCT FROM($4, $19))], joinType=[left]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - EnumerableSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, $t6)], expr#8=[null:BIGINT], expr#9=[CASE($t7, $t8, $t4)], expr#10=[CAST($t9):DOUBLE], expr#11=[/($t10, $t5)], proj#0..3=[{exprs}], avg_age=[$t11]) - EnumerableAggregate(group=[{0, 1, 2, 3}], agg#0=[$SUM0($5)], agg#1=[COUNT($5)]) - EnumerableNestedLoopJoin(condition=[AND(>=($6, $2), <=($6, $1), OR(=($4, $0), AND(IS NULL($4), $3)))], joinType=[inner]) - EnumerableAggregate(group=[{0, 1, 2, 3}]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], expr#20=[IS NULL($t4)], gender=[$t4], __stream_seq__=[$t17], $f12=[$t19], $f15=[$t20]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - EnumerableCalc(expr#0..17=[{inputs}], gender=[$t4], age=[$t8], $2=[$t17]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file + EnumerableCalc(expr#0..17=[{inputs}], __r_seq__=[$t17], __r_gender__=[$t4], __r_age__=[$t8]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global_null_bucket.yaml index a0634448b5e..d72bf7b429f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global_null_bucket.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global_null_bucket.yaml @@ -3,29 +3,22 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17}]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - LogicalAggregate(group=[{}], avg_age=[AVG($0)]) - LogicalProject(age=[$8]) - LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), =($4, $cor0.gender))]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], avg_age=[AVG($20)]) + LogicalJoin(condition=[AND(>=($18, -($17, 1)), <=($18, $17), =($4, $19))], joinType=[left]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(__r_seq__=[ROW_NUMBER() OVER ()], __r_gender__=[$4], __r_age__=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t16]) + EnumerableCalc(expr#0..19=[{inputs}], expr#20=[0], expr#21=[=($t19, $t20)], expr#22=[null:BIGINT], expr#23=[CASE($t21, $t22, $t18)], expr#24=[CAST($t23):DOUBLE], expr#25=[/($t24, $t19)], proj#0..10=[{exprs}], avg_age=[$t25]) EnumerableLimit(fetch=[10000]) - EnumerableHashJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) - EnumerableSort(sort0=[$11], dir0=[ASC]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], proj#0..10=[{exprs}], __stream_seq__=[$t17], $f12=[$t19]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) - EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) - EnumerableHashJoin(condition=[AND(=($0, $3), >=($5, $2), <=($5, $1))], joinType=[inner]) - EnumerableAggregate(group=[{0, 1, 2}]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], gender=[$t4], __stream_seq__=[$t17], $f12=[$t19]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - EnumerableCalc(expr#0..17=[{inputs}], gender=[$t4], age=[$t8], $2=[$t17]) + EnumerableSort(sort0=[$17], dir0=[ASC]) + EnumerableAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], agg#0=[$SUM0($20)], agg#1=[COUNT($20)]) + EnumerableMergeJoin(condition=[AND(=($4, $19), >=($18, -($17, 1)), <=($18, $17))], joinType=[left]) + EnumerableSort(sort0=[$4], dir0=[ASC]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..17=[{inputs}], __r_seq__=[$t17], __r_gender__=[$t4], __r_age__=[$t8]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_union.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_union.yaml new file mode 100644 index 00000000000..22a9bb6b5bd --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_union.yaml @@ -0,0 +1,22 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(gender=[$4]) + LogicalUnion(all=[true]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[<($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[>=($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], gender=[$t0]) + EnumerableAggregate(group=[{0}], count()=[COUNT()]) + EnumerableUnion(all=[true]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[30], expr#18=[<($t8, $t17)], gender=[$t4], $condition=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[30], expr#18=[>=($t8, $t17)], gender=[$t4], $condition=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3922.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3922.yml new file mode 100644 index 00000000000..dfa26f3dc50 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3922.yml @@ -0,0 +1,71 @@ +setup: + - do: + indices.create: + index: test_issue_3922 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + name: + type: keyword + category: + type: keyword + value: + type: integer + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Sort order preserved through dedup (#3922)": + - skip: + features: + - headers + - allowed_warnings + - do: + bulk: + index: test_issue_3922 + refresh: true + body: + - '{"index": {}}' + - '{"name": "A", "category": "X", "value": 1}' + - '{"index": {}}' + - '{"name": "B", "category": "X", "value": 2}' + - '{"index": {}}' + - '{"name": "A", "category": "Y", "value": 3}' + - '{"index": {}}' + - '{"name": "C", "category": "Z", "value": 4}' + - '{"index": {}}' + - '{"name": "B", "category": "Z", "value": 5}' + - '{"index": {}}' + - '{"name": "D", "category": "A", "value": 10}' + - '{"index": {}}' + - '{"name": "E", "category": "B", "value": 11}' + - '{"index": {}}' + - '{"name": "F", "category": "C", "value": 12}' + - '{"index": {}}' + - '{"name": "G", "category": "D", "value": 13}' + - '{"index": {}}' + - '{"name": "D", "category": "E", "value": 14}' + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=test_issue_3922 | sort category | dedup 1 name | fields category, name' + - match: {"total": 7} + - match: {"datarows": [["A", "D"], ["B", "E"], ["C", "F"], ["D", "G"], ["X", "A"], ["X", "B"], ["Z", "C"]]} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4659.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4659.yml new file mode 100644 index 00000000000..f111f0175f9 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4659.yml @@ -0,0 +1,90 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + - do: + indices.create: + index: log_text_4659 + body: + mappings: + properties: + msg: + type: text + idx: + type: integer + - do: + indices.create: + index: log_keyword_4659 + body: + mappings: + properties: + msg: + type: keyword + idx: + type: integer + - do: + bulk: + index: log_text_4659 + refresh: true + body: + - '{"index": {"_id": "1"}}' + - '{"msg": "status=200", "idx": 1}' + - do: + bulk: + index: log_keyword_4659 + refresh: true + body: + - '{"index": {"_id": "1"}}' + - '{"msg": "status=200", "idx": 2}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + - do: + indices.delete: + index: log_text_4659 + ignore: 404 + - do: + indices.delete: + index: log_keyword_4659 + ignore: 404 + +--- +"PPL wildcard query returns all documents across indices with mixed text/keyword field types": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=log_*_4659 | fields msg, idx | sort idx' + - match: {"total": 2} + - match: {"datarows": [["status=200", 1], ["status=200", 2]]} + +--- +"PPL script filter works across indices with mixed text/keyword field types": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=log_*_4659 | where upper(msg) = 'STATUS=200' | fields msg, idx | sort idx" + - match: {"total": 2} + - match: {"datarows": [["status=200", 1], ["status=200", 2]]} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4800.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4800.yml new file mode 100644 index 00000000000..ef4c4769191 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4800.yml @@ -0,0 +1,57 @@ +setup: + - skip: + features: + - headers + - allowed_warnings + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: stream_test_null + body: + mappings: + properties: + name: { type: keyword } + age: { type: integer } + state: { type: keyword } + country: { type: keyword } + year: { type: integer } + month: { type: integer } + - do: + bulk: + index: stream_test_null + refresh: true + body: + - '{"index": {"_id": "1"}}' + - '{"name": "Jake", "age": 70, "state": "California", "country": "USA", "year": 2023, "month": 4}' + - '{"index": {"_id": "2"}}' + - '{"name": "Hello", "age": 30, "state": "New York", "country": "USA", "year": 2023, "month": 4}' + - '{"index": {"_id": "3"}}' + - '{"name": "John", "age": 25, "state": "Ontario", "country": "Canada", "year": 2023, "month": 4}' + - '{"index": {"_id": "4"}}' + - '{"name": "Jane", "age": 20, "state": "Quebec", "country": "Canada", "year": 2023, "month": 4}' + - '{"index": {"_id": "5"}}' + - '{"name": null, "age": 10, "state": null, "country": "Canada", "year": 2023, "month": 4}' + - '{"index": {"_id": "6"}}' + - '{"name": "Kevin", "year": 2023, "month": 4}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Chained streamstats with window should not cause NPE (#4800)": + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=stream_test_null | streamstats window=2 avg(age) as avg_age by state, country | streamstats window=2 avg(avg_age) as avg_state_age by country' + - match: {"total": 6} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5099.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5099.yml new file mode 100644 index 00000000000..a30381ecf50 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5099.yml @@ -0,0 +1,59 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: issue5099 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + name: + type: keyword + age: + type: integer + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5099", "_id": "1"}}' + - '{"name": "Alice", "age": 30}' + - '{"index": {"_index": "issue5099", "_id": "2"}}' + - '{"name": "Bob", "age": 25}' + +--- +teardown: + - do: + indices.delete: + index: issue5099 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5099: rename with wildcard should not apply on hidden fields": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5099 | rename * as old_* + + - match: { total: 2 } + - length: { schema: 2 } + - match: { schema: [ { name: "old_name", type: "string" }, { name: "old_age", type: "int" } ] } + - length: { datarows: 2 } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5125.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5125.yml new file mode 100644 index 00000000000..dd0335f73d5 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5125.yml @@ -0,0 +1,66 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: issue5125 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + gender: + type: keyword + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5125", "_id": "1"}}' + - '{"gender": "F"}' + - '{"index": {"_index": "issue5125", "_id": "2"}}' + - '{"gender": "F"}' + - '{"index": {"_index": "issue5125", "_id": "3"}}' + - '{"gender": "F"}' + - '{"index": {"_index": "issue5125", "_id": "4"}}' + - '{"gender": "M"}' + - '{"index": {"_index": "issue5125", "_id": "5"}}' + - '{"gender": "M"}' + - '{"index": {"_index": "issue5125", "_id": "6"}}' + - '{"gender": "M"}' + - '{"index": {"_index": "issue5125", "_id": "7"}}' + - '{"gender": "M"}' + +--- +teardown: + - do: + indices.delete: + index: issue5125 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5125: consecutive sorts after agg should honor latest sort direction": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5125 | stats count() as c by gender | sort gender | sort - gender + + - match: { total: 2 } + - match: { schema: [ { name: c, type: bigint }, { name: gender, type: string } ] } + - match: { datarows: [ [ 4, "M" ], [ 3, "F" ] ] } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5165.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5165.yml new file mode 100644 index 00000000000..34cb9f24370 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5165.yml @@ -0,0 +1,62 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: issue5165 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + int_field: + type: integer + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5165", "_id": "1"}}' + - '{"int_field": 42}' + - '{"index": {"_index": "issue5165", "_id": "2"}}' + - '{"int_field": -1}' + - '{"index": {"_index": "issue5165", "_id": "3"}}' + - '{"int_field": 0}' + - '{"index": {"_index": "issue5165", "_id": "4"}}' + - '{"int_field": 2147483647}' + - '{"index": {"_index": "issue5165", "_id": "5"}}' + - '{"int_field": null}' + +--- +teardown: + - do: + indices.delete: + index: issue5165 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5165: NOT IN should exclude null/missing rows": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5165 | where int_field NOT IN (42, -1, 0) | fields int_field + + - match: { total: 1 } + - length: { datarows: 1 } + - match: { datarows: [ [ 2147483647 ] ] } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5167.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5167.yml new file mode 100644 index 00000000000..cf18c4c425c --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5167.yml @@ -0,0 +1,84 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + + - do: + indices.create: + index: issue5167 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + int_field: + type: integer + json_data: + type: keyword + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5167", "_id": "1"}}' + - '{"int_field": 42, "json_data": "{\"name\":\"alice\",\"scores\":[90,85,92]}"}' + +--- +teardown: + - do: + indices.delete: + index: issue5167 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5167: json_set with $.key path should update the value": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5167 | where int_field = 42 | eval modified = json_set(json_data, '$.name', 'modified_alice') | fields modified" + + - match: { total: 1 } + - match: { datarows: [ [ "{\"name\":\"modified_alice\",\"scores\":[90,85,92]}" ] ] } + +--- +"Issue 5167: json_delete with $.key path should remove the key": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5167 | where int_field = 42 | eval deleted = json_delete(json_data, '$.name') | fields deleted" + + - match: { total: 1 } + - match: { datarows: [ [ "{\"scores\":[90,85,92]}" ] ] } + +--- +"Issue 5167: json_set with unprefixed path still works": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5167 | where int_field = 42 | eval modified = json_set(json_data, 'name', 'bob') | fields modified" + + - match: { total: 1 } + - match: { datarows: [ [ "{\"name\":\"bob\",\"scores\":[90,85,92]}" ] ] } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5169.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5169.yml new file mode 100644 index 00000000000..478fda45d46 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5169.yml @@ -0,0 +1,49 @@ +setup: + - do: + indices.create: + index: issue5169_keyword + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + keyword_field: + type: keyword + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5169_keyword", "_id": "1"}}' + - '{"keyword_field": "hello"}' + - '{"index": {"_index": "issue5169_keyword", "_id": "2"}}' + - '{"keyword_field": "world"}' + - '{"index": {"_index": "issue5169_keyword", "_id": "3"}}' + - '{"keyword_field": ""}' + - '{"index": {"_index": "issue5169_keyword", "_id": "4"}}' + - '{"keyword_field": "special chars..."}' + - '{"index": {"_index": "issue5169_keyword", "_id": "5"}}' + - '{"keyword_field": null}' + +--- +teardown: + - do: + indices.delete: + index: issue5169_keyword + ignore_unavailable: true + +--- +"Issue 5169: NOT LIKE should exclude null/missing field rows": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5169_keyword | where NOT keyword_field LIKE '%ello%' | fields keyword_field + + - match: { total: 3 } + - length: { datarows: 3 } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5172.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5172.yml new file mode 100644 index 00000000000..239d06ec968 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5172.yml @@ -0,0 +1,78 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + + - do: + indices.create: + index: issue5172 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + count: + type: integer + category: + type: keyword + subcategory: + type: keyword + value: + type: double + ts: + type: date + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5172", "_id": "1"}}' + - '{"count": 1, "category": "A", "subcategory": "X", "value": 10.5, "ts": "2024-01-01"}' + - '{"index": {"_index": "issue5172", "_id": "2"}}' + - '{"count": 2, "category": "A", "subcategory": "Y", "value": 20.3, "ts": "2024-01-02"}' + +--- +teardown: + - do: + indices.delete: + index: issue5172 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5172: transpose with value field name collision": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5172 | where category = "A" | fields category, value | transpose 2 + + - match: { total: 2 } + - match: { schema: [ { name: column, type: string }, { name: "row 1", type: string }, { name: "row 2", type: string } ] } + - length: { datarows: 2 } + +--- +"Issue 5172: transpose with stats alias named value": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5172 | stats count() as value, avg(value) as avg_val | transpose + + - match: { total: 2 } + - length: { datarows: 2 } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5173.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5173.yml new file mode 100644 index 00000000000..3db25e24f56 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5173.yml @@ -0,0 +1,99 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + + - do: + indices.create: + index: issue5173 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + gender: + type: keyword + age: + type: integer + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5173", "_id": "1"}}' + - '{"gender": "F", "age": 10}' + - '{"index": {"_index": "issue5173", "_id": "2"}}' + - '{"gender": "F", "age": 20}' + - '{"index": {"_index": "issue5173", "_id": "3"}}' + - '{"gender": "M", "age": 30}' + - '{"index": {"_index": "issue5173", "_id": "4"}}' + - '{"gender": "M", "age": 40}' + +--- +teardown: + - do: + indices.delete: + index: issue5173 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5173: double appendpipe with different aggregations should succeed": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5173 | stats sum(age) as sum_age by gender | appendpipe [ stats avg(sum_age) as avg_sum_age ] | appendpipe [ stats max(sum_age) as max_sum_age ]" + + - match: { total: 4 } + - match: + schema: + - { name: sum_age, type: bigint } + - { name: gender, type: string } + - { name: avg_sum_age, type: double } + - { name: max_sum_age, type: bigint } + - match: + datarows: + - [ 30, "F", null, null ] + - [ 70, "M", null, null ] + - [ null, null, 50.0, null ] + - [ null, null, null, 70 ] + +--- +"Issue 5173: triple appendpipe with different aggregations should succeed": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5173 | stats sum(age) as sum_age by gender | appendpipe [ stats avg(sum_age) as avg_sum_age ] | appendpipe [ stats max(sum_age) as max_sum_age ] | appendpipe [ stats min(sum_age) as min_sum_age ]" + + - match: { total: 5 } + - match: + schema: + - { name: sum_age, type: bigint } + - { name: gender, type: string } + - { name: avg_sum_age, type: double } + - { name: max_sum_age, type: bigint } + - { name: min_sum_age, type: bigint } + - match: + datarows: + - [ 30, "F", null, null, null ] + - [ 70, "M", null, null, null ] + - [ null, null, 50.0, null, null ] + - [ null, null, null, 70, null ] + - [ null, null, null, null, 30 ] diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml new file mode 100644 index 00000000000..c2f861b8194 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml @@ -0,0 +1,83 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + + - do: + indices.create: + index: issue5174 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + count: + type: integer + category: + type: keyword + subcategory: + type: keyword + value: + type: double + ts: + type: date + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5174", "_id": "1"}}' + - '{"count": 1, "category": "A", "subcategory": "X", "value": 10.5, "ts": "2024-01-01"}' + - '{"index": {"_index": "issue5174", "_id": "2"}}' + - '{"count": 2, "category": "A", "subcategory": "Y", "value": 20.3, "ts": "2024-01-02"}' + - '{"index": {"_index": "issue5174", "_id": "3"}}' + - '{"count": 10, "category": "B", "subcategory": "X", "value": 100.0, "ts": "2024-01-03"}' + - '{"index": {"_index": "issue5174", "_id": "4"}}' + - '{"count": null, "category": "B", "subcategory": "Y", "value": null, "ts": "2024-01-04"}' + +--- +teardown: + - do: + indices.delete: + index: issue5174 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5174: bin then chart with null values should not cause NPE": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5174 | bin value span=50 as val_bin | chart count() over val_bin by category + + - match: { total: 2 } + - match: { schema: [ { name: val_bin, type: string }, { name: category, type: string }, { name: "count()", type: bigint } ] } + - match: { datarows: [ [ "0-50", "A", 2 ], [ "100-150", "B", 1 ] ] } + +--- +"Issue 5174: bin then chart with single group and null values": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5174 | bin value span=50 as val_bin | chart count() over val_bin + + - match: { total: 2 } + - match: { schema: [ { name: val_bin, type: string }, { name: "count()", type: bigint } ] } + - match: { datarows: [ [ "0-50", 2 ], [ "100-150", 1 ] ] } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5175.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5175.yml new file mode 100644 index 00000000000..34139cbbf4d --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5175.yml @@ -0,0 +1,85 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + + - do: + indices.create: + index: issue5175 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + dummy: + type: keyword + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5175", "_id": "1"}}' + - '{"dummy": "row"}' + +--- +teardown: + - do: + indices.delete: + index: issue5175 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5175: COALESCE(null, 42) returns integer 42": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5175 | eval x = COALESCE(null, 42) | fields x | head 1 + + - match: { total: 1 } + - match: { schema: [ { name: x, type: int } ] } + - match: { datarows: [ [ 42 ] ] } + +--- +"Issue 5175: COALESCE(42, null) returns integer 42": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5175 | eval x = COALESCE(42, null) | fields x | head 1 + + - match: { total: 1 } + - match: { schema: [ { name: x, type: int } ] } + - match: { datarows: [ [ 42 ] ] } + +--- +"Issue 5175: COALESCE(null, 3.14) returns double": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5175 | eval x = COALESCE(null, 3.14) | fields x | head 1 + + - match: { total: 1 } + - match: { schema: [ { name: x, type: double } ] } + - match: { datarows: [ [ 3.14 ] ] } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5185.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5185.yml new file mode 100644 index 00000000000..0f939a03585 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5185.yml @@ -0,0 +1,69 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: issue5185 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + doc: + type: text + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5185", "_id": "1"}}' + - '{"doc": "{\"user\":{\"name\":\"John\",\"age\":30}}"}' + - '{"index": {"_index": "issue5185", "_id": "2"}}' + - '{"doc": "{\"user\":{\"name\":\"Alice\",\"age\":25}}"}' + +--- +teardown: + - do: + indices.delete: + index: issue5185 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5185: eval with multiple dotted-path assignments from MAP column": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5185 | spath input=doc | eval doc.user.name=doc.user.name, doc.user.age=doc.user.age | fields doc.user.name, doc.user.age" + + - match: { total: 2 } + - length: { datarows: 2 } + +--- +"Issue 5185: separate eval commands with dotted-path from MAP column": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5185 | spath input=doc | eval doc.user.name=doc.user.name | eval doc.user.age=doc.user.age | fields doc.user.name, doc.user.age" + + - match: { total: 2 } + - length: { datarows: 2 } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5269.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5269.yml new file mode 100644 index 00000000000..8c49825e6e2 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5269.yml @@ -0,0 +1,63 @@ +setup: + - do: + indices.create: + index: issue5269_bool + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + flag: + type: boolean + startTime: + type: date_nanos + + - do: + indices.create: + index: issue5269_text + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + flag: + type: text + startTime: + type: date_nanos + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5269_bool", "_id": "1"}}' + - '{"startTime": "2026-03-25T20:25:00.000Z", "flag": false}' + - '{"index": {"_index": "issue5269_text", "_id": "1"}}' + - '{"startTime": "2026-03-24T20:25:00.000Z", "flag": 0}' + +--- +teardown: + - do: + indices.delete: + index: issue5269_bool + ignore_unavailable: true + - do: + indices.delete: + index: issue5269_text + ignore_unavailable: true + +--- +"Issue 5269: PPL wildcard query across indices with boolean/text mapping conflict should not error": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5269_* | fields flag + + - match: { total: 2 } + - length: { datarows: 2 } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl/error_handling.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl/error_handling.yml new file mode 100644 index 00000000000..74219956c3c --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl/error_handling.yml @@ -0,0 +1,116 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"Test field not found returns 400 bad_request": + - skip: + features: + - headers + - allowed_warnings + - do: + bulk: + index: test_error_handling + refresh: true + body: + - '{"index": {}}' + - '{"age": 25, "name": "John"}' + - '{"index": {}}' + - '{"age": 30, "name": "Jane"}' + + - do: + catch: bad_request + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_error_handling | fields nonexistent_field + - match: {"$body": "/[Ff]ield|[Cc]olumn/"} + +--- +"Test index not found returns 404 missing": + - skip: + features: + - headers + - do: + catch: missing + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=nonexistent_index_12345 | fields age + +--- +"Test syntax error returns 400 bad_request": + - skip: + features: + - headers + - do: + bulk: + index: test_error_syntax + refresh: true + body: + - '{"index": {}}' + - '{"age": 25}' + + - do: + catch: bad_request + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_error_syntax | invalid_command_xyz + +--- +"Test semantic error returns 400 bad_request": + - skip: + features: + - headers + - do: + bulk: + index: test_error_semantic + refresh: true + body: + - '{"index": {}}' + - '{"age": 25, "name": "John"}' + + - do: + catch: bad_request + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_error_semantic | where age IN ('a', 'b', 'c') | fields age + +--- +"Test aggregation validation error returns 400 bad_request": + - skip: + features: + - headers + - do: + bulk: + index: test_error_agg + refresh: true + body: + - '{"index": {}}' + - '{"age": 25, "name": "John"}' + + - do: + catch: bad_request + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_error_agg | stats count(eval(age)) as cnt + - match: {"$body": "/[Cc]ondition.*boolean|[Bb]oolean.*expected/"} diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java index 216815b1e89..2950f6c9b85 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java @@ -5,8 +5,6 @@ package org.opensearch.sql.legacy.plugin; -import static org.opensearch.core.rest.RestStatus.BAD_REQUEST; -import static org.opensearch.core.rest.RestStatus.INTERNAL_SERVER_ERROR; import static org.opensearch.core.rest.RestStatus.OK; import com.alibaba.druid.sql.parser.ParserException; @@ -34,6 +32,7 @@ import org.opensearch.rest.RestChannel; import org.opensearch.rest.RestRequest; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.common.utils.QueryContext; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; @@ -206,14 +205,28 @@ private void delegateToV2Engine( } private void handleException(RestChannel restChannel, Exception exception) { - logAndPublishMetrics(exception); - if (exception instanceof OpenSearchException) { - OpenSearchException openSearchException = (OpenSearchException) exception; - reportError(restChannel, openSearchException, openSearchException.status()); - } else { - reportError( - restChannel, exception, isClientError(exception) ? BAD_REQUEST : INTERNAL_SERVER_ERROR); + RestStatus status = getRestStatus(exception); + logAndPublishMetrics(status, exception); + reportError(restChannel, exception, status); + } + + private static RestStatus getRestStatus(Exception ex) { + int code = getRawErrorCode(ex); + return RestStatus.fromCode(code); + } + + private static int getRawErrorCode(Exception ex) { + // Recursively unwrap ErrorReport to get to the underlying cause + if (ex instanceof ErrorReport) { + return getRawErrorCode(((ErrorReport) ex).getCause()); } + if (ex instanceof OpenSearchException) { + return ((OpenSearchException) ex).status().getStatus(); + } + if (isClientError(ex)) { + return 400; + } + return 500; } /** @@ -246,13 +259,15 @@ private void handleCursorRequest( cursorRestExecutor.execute(client, request.params(), channel); } - private static void logAndPublishMetrics(final Exception e) { - if (isClientError(e)) { + private static void logAndPublishMetrics(final RestStatus status, final Exception e) { + if (400 <= status.getStatus() && status.getStatus() < 500) { LOG.error(QueryContext.getRequestId() + " Client side error during query execution", e); Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_CUS).increment(); - } else { + } else if (500 <= status.getStatus() && status.getStatus() < 600) { LOG.error(QueryContext.getRequestId() + " Server side error during query execution", e); Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); + } else { + LOG.warn("Got an exception returning non-error status {}", status, e); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java index dab4b1e8ff1..b491f38ef80 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java @@ -30,6 +30,8 @@ import org.opensearch.common.settings.Settings; import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexSettings; +import org.opensearch.sql.common.error.ErrorCode; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.opensearch.mapping.IndexMapping; import org.opensearch.sql.opensearch.request.OpenSearchRequest; import org.opensearch.sql.opensearch.request.OpenSearchScrollRequest; @@ -97,12 +99,27 @@ public Map getIndexMappings(String... indexExpression) { .collect( Collectors.toUnmodifiableMap( Map.Entry::getKey, cursor -> new IndexMapping(cursor.getValue()))); - } catch (IndexNotFoundException | OpenSearchSecurityException e) { + } catch (IndexNotFoundException e) { // Re-throw directly to be treated as client error finally - throw e; + throw ErrorReport.wrap(e) + .code(ErrorCode.INDEX_NOT_FOUND) + .location("while fetching index mappings") + .context("index_name", indexExpression[0]) + .build(); + } catch (OpenSearchSecurityException e) { + // Re-throw with permission denied code + throw ErrorReport.wrap(e) + .code(ErrorCode.PERMISSION_DENIED) + .location("while fetching index mappings") + .context("index_name", indexExpression[0]) + .build(); } catch (Exception e) { throw new IllegalStateException( - "Failed to read mapping for index pattern [" + indexExpression + "]", e); + "Failed to read mapping for index pattern [" + + String.join(",", indexExpression) + + "]: " + + e.getMessage(), + e); } } @@ -132,7 +149,11 @@ public Map getIndexMaxResultWindows(String... indexExpression) throw e; } catch (Exception e) { throw new IllegalStateException( - "Failed to read setting for index pattern [" + indexExpression + "]", e); + "Failed to read setting for index pattern [" + + String.join(",", indexExpression) + + "]: " + + e.getMessage(), + e); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java index 427eb7d6b03..f369c0003b8 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java @@ -79,7 +79,12 @@ public Map getIndexMappings(String... indexExpression) { return response.mappings().entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, e -> new IndexMapping(e.getValue()))); } catch (IOException e) { - throw new IllegalStateException("Failed to get index mappings for " + indexExpression, e); + throw new IllegalStateException( + "Failed to get index mappings for " + + String.join(",", indexExpression) + + ": " + + e.getMessage(), + e); } } @@ -111,7 +116,12 @@ public Map getIndexMaxResultWindows(String... indexExpression) return result; } catch (IOException e) { - throw new IllegalStateException("Failed to get max result window for " + indexExpression, e); + throw new IllegalStateException( + "Failed to get max result window for " + + String.join(",", indexExpression) + + ": " + + e.getMessage(), + e); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java index 837a2a062ef..79d49a143de 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java @@ -43,7 +43,8 @@ public enum MappingType { ScaledFloat("scaled_float", ExprCoreType.DOUBLE), Double("double", ExprCoreType.DOUBLE), Boolean("boolean", ExprCoreType.BOOLEAN), - Alias("alias", ExprCoreType.UNKNOWN); + Alias("alias", ExprCoreType.UNKNOWN), + KnnVector("knn_vector", ExprCoreType.ARRAY); // TODO: ranges, geo shape, point, shape private final String name; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java index 86fe03b43b9..93c2c6b1584 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java @@ -212,6 +212,8 @@ private boolean parseBooleanValue(JsonNode node) { return node.booleanValue(); } else if (node.isTextual()) { return Boolean.parseBoolean(node.textValue()); + } else if (node.isNumber()) { + return node.intValue() != 0; } else { if (LOG.isDebugEnabled()) { LOG.debug("node '{}' must be a boolean", node); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableNestedAggregate.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableNestedAggregate.java index ef569fc2989..58dc62a8469 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableNestedAggregate.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableNestedAggregate.java @@ -79,7 +79,8 @@ public Result implement(EnumerableRelImplementor implementor, Prefer pref) { // TODO implement an enumerable nested aggregate throw new UnsupportedOperationException( String.format( - "Cannot execute nested aggregation on %s since pushdown cannot be applied.", aggCalls)); + "Cannot execute nested aggregation on %s since plugins.calcite.pushdown is disabled.", + aggCalls)); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index f8899ead2eb..d37957d4a9f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -11,7 +11,11 @@ import java.util.Set; import java.util.function.Predicate; import java.util.stream.IntStream; +import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.rules.SubstitutionRule; @@ -112,6 +116,21 @@ protected void apply( // add bucket_nullable = false hint PPLHintUtils.addIgnoreNullBucketHintToAggregate(relBuilder); + // add dedup sort hint if input collation is available. + // + // The collation's field indices refer to the dedup's input row type (i.e. `project`'s output). + // Before handing the hint off to AggregateAnalyzer — which resolves indices against the SCAN's + // row type (`project.getInput()`) — permute each collation index through `project`'s source + // mapping. If any sort key is a computed column (not a bare `RexInputRef`) we can't push it as + // an OS `top_hits` sort, so drop the hint entirely and let Calcite restore order post-dedup. + if (dedup.getInputCollation() != null + && !dedup.getInputCollation().getFieldCollations().isEmpty()) { + RelCollation scanCollation = resolveCollationToScanSchema(dedup, project); + if (scanCollation != null) { + PPLHintUtils.addDedupSortHintToAggregate( + relBuilder, scanCollation, project.getInput().getRowType().getFieldNames()); + } + } // peek the aggregate after hint being added LogicalAggregate aggregate = (LogicalAggregate) relBuilder.build(); assert aggregate.getGroupSet().asList().equals(newGroupByList) @@ -126,6 +145,64 @@ protected void apply( } } + /** + * Rewrite {@code dedup.inputCollation} into scan-schema indices. The collation was captured in + * {@link org.opensearch.sql.calcite.plan.rule.PPLSimplifyDedupRule} against a specific row type; + * by the time we reach this rule Calcite may have swapped in a different input, so the + * collation's indices may be stale. Strategy: + * + *

      + *
    1. If the collation's indices are all valid in {@code project}'s output, permute them + * through {@code project.getProjects()} into scan indices (mirrors {@code + * Project.getMapping} + {@code RelCollations.permute}). + *
    2. Otherwise, resolve each collation position by name: look up {@code + * dedup.inputCollationFieldNames[idx]} in the scan's row type. + *
    + * + * A computed-column sort key (non-{@code RexInputRef}) is not pushable as an OS field sort, so + * returns {@code null} in that case. Returns {@code null} also if any sort key cannot be resolved + * by either path. + */ + private static @Nullable RelCollation resolveCollationToScanSchema( + LogicalDedup dedup, LogicalProject project) { + RelCollation collation = dedup.getInputCollation(); + int projectOutputSize = project.getRowType().getFieldCount(); + int maxIdx = -1; + for (RelFieldCollation fc : collation.getFieldCollations()) { + maxIdx = Math.max(maxIdx, fc.getFieldIndex()); + } + if (maxIdx < projectOutputSize) { + List projections = project.getProjects(); + List remapped = new ArrayList<>(); + for (RelFieldCollation fc : collation.getFieldCollations()) { + RexNode expr = projections.get(fc.getFieldIndex()); + if (!(expr instanceof RexInputRef ref)) { + return null; + } + remapped.add(fc.withFieldIndex(ref.getIndex())); + } + return RelCollations.of(remapped); + } + List originalNames = dedup.getInputCollationFieldNames(); + if (originalNames == null) { + return null; + } + List scanNames = project.getInput().getRowType().getFieldNames(); + List remapped = new ArrayList<>(); + for (RelFieldCollation fc : collation.getFieldCollations()) { + int oldIdx = fc.getFieldIndex(); + if (oldIdx < 0 || oldIdx >= originalNames.size()) { + return null; + } + int scanIdx = scanNames.indexOf(originalNames.get(oldIdx)); + if (scanIdx < 0) { + return null; + } + remapped.add(fc.withFieldIndex(scanIdx)); + } + return RelCollations.of(remapped); + } + @Value.Immutable public interface Config extends OpenSearchRuleConfig { // +- LogicalDedup diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 7d8cb8826cd..f919fdc0e30 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -83,6 +83,7 @@ import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.PPLHintUtils; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; @@ -210,9 +211,12 @@ public static Pair, OpenSearchAggregationResponseParser try { final List groupList = aggregate.getGroupSet().asList(); List aggFieldNames = outputFields.subList(groupList.size(), outputFields.size()); + // Extract dedup sort hint if present (may be a multi-field sort) + List dedupSortKeys = PPLHintUtils.getDedupSortKeys(aggregate); // Process all aggregate calls Pair> builderAndParser = - processAggregateCalls(aggFieldNames, aggregate.getAggCallList(), project, helper); + processAggregateCalls( + aggFieldNames, aggregate.getAggCallList(), project, helper, dedupSortKeys); Builder metricBuilder = builderAndParser.getLeft(); List metricParsers = builderAndParser.getRight(); @@ -370,7 +374,8 @@ private static Pair> processAggregateCalls( List aggNames, List aggCalls, Project project, - AggregateAnalyzer.AggregateBuilderHelper helper) + AggregateAnalyzer.AggregateBuilderHelper helper, + List dedupSortKeys) throws PredicateAnalyzer.ExpressionNotAnalyzableException { Builder metricBuilder = new AggregatorFactories.Builder(); List metricParserList = new ArrayList<>(); @@ -382,7 +387,7 @@ private static Pair> processAggregateCalls( String aggName = aggNames.get(i); Pair builderAndParser = - createAggregationBuilderAndParser(aggCall, args, aggName, helper); + createAggregationBuilderAndParser(aggCall, args, aggName, helper, dedupSortKeys); builderAndParser = aggFilterAnalyzer.analyze(builderAndParser, aggCall, aggName); // Nested aggregation (https://docs.opensearch.org/docs/latest/aggregations/bucket/nested/) String nestedPath = @@ -436,11 +441,12 @@ private static Pair createAggregationBuilderAn AggregateCall aggCall, List> args, String aggName, - AggregateAnalyzer.AggregateBuilderHelper helper) { + AggregateAnalyzer.AggregateBuilderHelper helper, + List dedupSortKeys) { if (aggCall.isDistinct()) { return createDistinctAggregation(aggCall, args, aggName, helper); } else { - return createRegularAggregation(aggCall, args, aggName, helper); + return createRegularAggregation(aggCall, args, aggName, helper, dedupSortKeys); } } @@ -467,7 +473,8 @@ private static Pair createRegularAggregation( AggregateCall aggCall, List> args, String aggName, - AggregateBuilderHelper helper) { + AggregateBuilderHelper helper, + List dedupSortKeys) { return switch (aggCall.getAggregation().kind) { case AVG -> @@ -601,6 +608,16 @@ yield switch (functionName) { TopHitsAggregationBuilder topHitsAggregationBuilder = createTopHitsBuilder( aggCall, args, aggName, helper, dedupNumber, false, false, null, null); + // Emit a top_hits sort array that mirrors the original PPL sort collation + // (all fields, in order). Align NULL ordering with PPL/Calcite defaults + // (ASC -> NULLS FIRST, DESC -> NULLS LAST) so dedup picks the same row whether + // pushdown is on or off. + for (PPLHintUtils.DedupSortKey key : dedupSortKeys) { + SortOrder order = "DESC".equals(key.order()) ? SortOrder.DESC : SortOrder.ASC; + String missing = order == SortOrder.ASC ? "_first" : "_last"; + topHitsAggregationBuilder.sort( + SortBuilders.fieldSort(key.field()).order(order).missing(missing)); + } yield Pair.of(topHitsAggregationBuilder, new TopHitsParser(aggName, false, false)); } default -> diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 435cef22ef4..a694e0fea06 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -78,6 +78,10 @@ public static class PushDownUnSupportedException extends RuntimeException { public PushDownUnSupportedException(String message) { super(message); } + + public PushDownUnSupportedException(String message, Throwable cause) { + super(message, cause); + } } /** Constructor. */ diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 8e6dbede58e..cc13c3a4a7a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -577,7 +577,8 @@ private QueryExpression prefix(RexCall call) { throw new PredicateAnalyzerException(message); } - Expression operandExpr = call.getOperands().get(0).accept(this); + RexNode innerOperand = call.getOperands().get(0); + Expression operandExpr = innerOperand.accept(this); // Handle NOT(boolean_field) - Calcite simplifies "field = false" to NOT($field). // In PPL semantics, "field = false" should only match documents where the field is // explicitly false (not null or missing). This is achieved via term query {value: false}. @@ -586,9 +587,36 @@ private QueryExpression prefix(RexCall call) { return QueryExpression.create(namedField).isFalse(); } QueryExpression expr = (QueryExpression) operandExpr; + // For null-intolerant predicates (LIKE, comparisons, equality, etc.), + // negation must also exclude documents where the field is NULL/missing. + // Truth-test operators (IS_TRUE, IS_NULL, etc.) already encode null + // semantics and must NOT get an additional exists filter. + if (isNullIntolerantPredicate(innerOperand) && expr instanceof SimpleQueryExpression sqe) { + return sqe.notWithExistsFilter(); + } return expr.not(); } + /** Returns true if the given RexNode is a null-intolerant predicate (value comparison). */ + private static boolean isNullIntolerantPredicate(RexNode node) { + if (!(node instanceof RexCall innerCall)) { + return false; + } + return switch (innerCall.getKind()) { + case LIKE, + EQUALS, + NOT_EQUALS, + GREATER_THAN, + GREATER_THAN_OR_EQUAL, + LESS_THAN, + LESS_THAN_OR_EQUAL, + BETWEEN, + SEARCH -> + true; + default -> false; + }; + } + private QueryExpression postfix(RexCall call) { checkArgument( call.getKind() == SqlKind.IS_TRUE @@ -725,7 +753,14 @@ private QueryExpression binary(RexCall call) { CompoundQueryExpression.or( expression, QueryExpression.create(pair.getKey()).notExists()); // e.g. where a = 1 or a = 2 - case UNKNOWN -> expression; + // For NOT IN (complemented points), SQL three-valued logic dictates + // NULL NOT IN (...) evaluates to UNKNOWN (not TRUE), so null rows + // must be excluded via an exists filter. + case UNKNOWN -> + isSearchWithComplementedPoints(call) + ? CompoundQueryExpression.and( + false, expression, QueryExpression.create(pair.getKey()).exists()) + : expression; }; finalExpression.updateAnalyzedNodes(call); return finalExpression; @@ -1302,6 +1337,12 @@ public QueryExpression not() { return this; } + /** Negate with an exists filter to exclude null/missing documents. */ + QueryExpression notWithExistsFilter() { + builder = boolQuery().must(existsQuery(getFieldReference())).mustNot(builder()); + return this; + } + @Override public QueryExpression exists() { builder = existsQuery(getFieldReference()); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessage.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessage.java index fbe6d3cd723..b09f9346627 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessage.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessage.java @@ -8,6 +8,7 @@ import lombok.Getter; import org.json.JSONObject; import org.opensearch.core.rest.RestStatus; +import org.opensearch.sql.common.error.ErrorReport; /** Error Message. */ public class ErrorMessage { @@ -62,12 +63,26 @@ public String toString() { } private JSONObject getErrorAsJson() { - JSONObject errorJson = new JSONObject(); + if (exception instanceof ErrorReport errorReport) { + JSONObject errorJson = new JSONObject(errorReport.toJsonMap()); + // Add 'reason' field for backward compatibility with existing clients + // Use the underlying exception message as 'reason' (broad error description) + // while 'details' contains the more precise handwritten message + if (!errorJson.has("reason")) { + Exception cause = errorReport.getCause(); + String reasonMessage = + cause.getLocalizedMessage() != null ? cause.getLocalizedMessage() : cause.getMessage(); + if (reasonMessage != null) { + errorJson.put("reason", reasonMessage); + } + } + return errorJson; + } + JSONObject errorJson = new JSONObject(); errorJson.put("type", type); errorJson.put("reason", reason); errorJson.put("details", details); - return errorJson; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactory.java index 8617f264f06..b569276e3ee 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactory.java @@ -7,19 +7,25 @@ import lombok.experimental.UtilityClass; import org.opensearch.OpenSearchException; +import org.opensearch.sql.common.error.ErrorReport; @UtilityClass public class ErrorMessageFactory { /** * Create error message based on the exception type. Exceptions of OpenSearch exception type and * exceptions with wrapped OpenSearch exception causes should create {@link - * OpenSearchErrorMessage} + * OpenSearchErrorMessage}. ErrorReport exceptions preserve their context information. * * @param e exception to create error message * @param status exception status code * @return error message */ public static ErrorMessage createErrorMessage(Throwable e, int status) { + // Check for ErrorReport BEFORE unwrapping - we want to preserve the context + if (e instanceof ErrorReport) { + return new ErrorMessage(e, status); + } + Throwable cause = unwrapCause(e); if (cause instanceof OpenSearchException) { OpenSearchException exception = (OpenSearchException) cause; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/FilterType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/FilterType.java new file mode 100644 index 00000000000..cc42bb35f58 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/FilterType.java @@ -0,0 +1,43 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import java.util.Arrays; +import java.util.Set; +import java.util.stream.Collectors; +import org.opensearch.sql.exception.ExpressionEvaluationException; + +/** Filter placement strategy for vectorSearch() WHERE clauses. */ +public enum FilterType { + /** WHERE placed in bool.filter outside the knn clause (post-filtering). */ + POST("post"), + + /** WHERE placed inside knn.filter for efficient pre-filtering. */ + EFFICIENT("efficient"); + + private final String value; + + FilterType(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + + private static final Set VALID_VALUES = + Arrays.stream(values()).map(FilterType::getValue).collect(Collectors.toSet()); + + public static FilterType fromString(String str) { + for (FilterType ft : values()) { + if (ft.value.equals(str)) { + return ft; + } + } + throw new ExpressionEvaluationException( + String.format("filter_type must be one of %s, got '%s'", VALID_VALUES, str)); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngine.java index ce6740cd784..1b7de315fb6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngine.java @@ -7,10 +7,13 @@ import static org.opensearch.sql.utils.SystemIndexUtils.isSystemIndex; +import java.util.Collection; +import java.util.List; import lombok.Getter; import lombok.RequiredArgsConstructor; import org.opensearch.sql.DataSourceSchemaName; import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.expression.function.FunctionResolver; import org.opensearch.sql.opensearch.client.OpenSearchClient; import org.opensearch.sql.opensearch.storage.system.OpenSearchSystemIndex; import org.opensearch.sql.storage.StorageEngine; @@ -25,6 +28,11 @@ public class OpenSearchStorageEngine implements StorageEngine { @Getter private final Settings settings; + @Override + public Collection getFunctions() { + return List.of(new VectorSearchTableFunctionResolver(client, settings)); + } + @Override public Table getTable(DataSourceSchemaName dataSourceSchemaName, String name) { if (isSystemIndex(name)) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchIndex.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchIndex.java new file mode 100644 index 00000000000..06727a5462b --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchIndex.java @@ -0,0 +1,191 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import java.util.Map; +import java.util.function.Function; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.WrapperQueryBuilder; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.opensearch.storage.capability.KnnPluginCapability; +import org.opensearch.sql.opensearch.storage.scan.OpenSearchIndexScan; +import org.opensearch.sql.opensearch.storage.scan.VectorSearchIndexScan; +import org.opensearch.sql.opensearch.storage.scan.VectorSearchIndexScanBuilder; +import org.opensearch.sql.opensearch.storage.scan.VectorSearchQueryBuilder; +import org.opensearch.sql.storage.read.TableScanBuilder; + +/** + * Vector-search-aware OpenSearch index. Seeds the scan with a knn query and enables score tracking. + */ +public class VectorSearchIndex extends OpenSearchIndex { + + private final String field; + private final float[] vector; + private final Map options; + private final FilterType filterType; // null means default (EFFICIENT) + // Nullable for back-compat with existing tests and the non-vector-search constructor. When + // present, the scan defers a lazy k-NN plugin probe to open() so execution fails fast with a + // clear SQL error if the plugin is missing. + private final KnnPluginCapability knnCapability; + + public VectorSearchIndex( + OpenSearchClient client, + Settings settings, + String indexName, + String field, + float[] vector, + Map options, + FilterType filterType, + KnnPluginCapability knnCapability) { + super(client, settings, indexName); + this.field = field; + this.vector = vector; + this.options = options; + this.filterType = filterType; + this.knnCapability = knnCapability; + } + + public VectorSearchIndex( + OpenSearchClient client, + Settings settings, + String indexName, + String field, + float[] vector, + Map options, + FilterType filterType) { + this(client, settings, indexName, field, vector, options, filterType, null); + } + + /** + * Default constructor — preserves existing call sites; uses no explicit filter type, so the scan + * falls back to the default placement ({@link FilterType#EFFICIENT}). + */ + public VectorSearchIndex( + OpenSearchClient client, + Settings settings, + String indexName, + String field, + float[] vector, + Map options) { + this(client, settings, indexName, field, vector, options, null, null); + } + + @Override + public TableScanBuilder createScanBuilder() { + // _score is not blocked at mapping time, so a user field named _score would collide with the + // synthetic v._score column on the response tuple and fail with an opaque duplicate-key error. + // Reject here so the user sees a clear SQL error (and _explain surfaces the problem without a + // k-NN request). + if (getFieldTypes().containsKey(METADATA_FIELD_SCORE)) { + throw new IllegalArgumentException( + String.format( + "Index '%s' defines a user field named '_score' that collides with the synthetic" + + " _score column exposed by vectorSearch(). Rename the field or query the index" + + " without vectorSearch().", + getIndexName())); + } + final TimeValue cursorKeepAlive = + getSettings().getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE); + var requestBuilder = createRequestBuilder(); + + // Callback for efficient filtering: serialize WHERE QueryBuilder to JSON, + // rebuild knn query with filter embedded. JSON handling stays in this class. + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder(buildKnnQueryJson(whereQuery.toString())); + + boolean filterTypeExplicit = filterType != null; + FilterType effectiveFilterType = filterType != null ? filterType : FilterType.EFFICIENT; + + var queryBuilder = + new VectorSearchQueryBuilder( + requestBuilder, + buildKnnQuery(), + options, + effectiveFilterType, + filterTypeExplicit, + rebuildWithFilter); + requestBuilder.pushDownTrackedScore(true); + + // Default size policy: LIMIT pushdown will further reduce if present. + if (options.containsKey("k")) { + // Top-k mode: default size to k so queries without LIMIT return k results. + requestBuilder.pushDownLimitToRequestTotal(Integer.parseInt(options.get("k")), 0); + } else { + // Radial mode (max_distance/min_score): cap at maxResultWindow. + // Without an explicit cap, radial queries could return unbounded results. + requestBuilder.pushDownLimitToRequestTotal(getMaxResultWindow(), 0); + } + + Function createScanOperator = + rb -> { + var request = + rb.build(getIndexName(), cursorKeepAlive, getClient(), getFieldTypes().isEmpty()); + if (knnCapability != null) { + return new VectorSearchIndexScan( + getClient(), rb.getMaxResponseSize(), request, knnCapability); + } + return new OpenSearchIndexScan(getClient(), rb.getMaxResponseSize(), request); + }; + return new VectorSearchIndexScanBuilder(queryBuilder, createScanOperator); + } + + private QueryBuilder buildKnnQuery() { + return new WrapperQueryBuilder(buildKnnQueryJson()); + } + + // Package-private for testing + String buildKnnQueryJson() { + return buildKnnQueryJson(null); + } + + /** + * Builds knn query JSON, optionally embedding a filter clause for efficient filtering. + * + * @param filterJson serialized filter JSON to embed in knn.field.filter, or null for no filter + */ + String buildKnnQueryJson(String filterJson) { + StringBuilder vectorJson = new StringBuilder("["); + for (int i = 0; i < vector.length; i++) { + if (i > 0) vectorJson.append(","); + vectorJson.append(vector[i]); + } + vectorJson.append("]"); + + StringBuilder optionsJson = new StringBuilder(); + for (Map.Entry entry : options.entrySet()) { + optionsJson.append(","); + String value = entry.getValue(); + // All P0 option values are canonicalized to numeric strings by validateOptions(). + // The quoted fallback is retained for forward compatibility with future non-numeric options. + if (isNumeric(value)) { + optionsJson.append(String.format("\"%s\":%s", entry.getKey(), value)); + } else { + optionsJson.append(String.format("\"%s\":\"%s\"", entry.getKey(), value)); + } + } + + String filterClause = ""; + if (filterJson != null) { + filterClause = String.format(",\"filter\":%s", filterJson); + } + + return String.format( + "{\"knn\":{\"%s\":{\"vector\":%s%s%s}}}", + field, vectorJson.toString(), optionsJson.toString(), filterClause); + } + + private static boolean isNumeric(String str) { + try { + Double.parseDouble(str); + return true; + } catch (NumberFormatException e) { + return false; + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementation.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementation.java new file mode 100644 index 00000000000..c1b5354f4b1 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementation.java @@ -0,0 +1,370 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import static org.opensearch.sql.opensearch.storage.VectorSearchTableFunctionResolver.FIELD; +import static org.opensearch.sql.opensearch.storage.VectorSearchTableFunctionResolver.OPTION; +import static org.opensearch.sql.opensearch.storage.VectorSearchTableFunctionResolver.TABLE; +import static org.opensearch.sql.opensearch.storage.VectorSearchTableFunctionResolver.VECTOR; + +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.NamedArgumentExpression; +import org.opensearch.sql.expression.env.Environment; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.expression.function.TableFunctionImplementation; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.storage.capability.KnnPluginCapability; +import org.opensearch.sql.storage.Table; + +public class VectorSearchTableFunctionImplementation extends FunctionExpression + implements TableFunctionImplementation { + + /** + * P0 allowed option keys. Rejects unknown/future keys to prevent unvalidated DSL injection. A + * {@link List} (rather than a {@link Set}) so the unknown-key error message renders the supported + * keys in a stable, user-friendly order. + */ + static final List ALLOWED_OPTION_KEYS = + List.of("k", "max_distance", "min_score", "filter_type"); + + /** + * Field names must be safe for JSON interpolation: alphanumeric, dots (nested), underscores, + * hyphens. Rejects characters that could corrupt the WrapperQueryBuilder JSON. The same regex is + * reused for table names so user-supplied identifiers cannot break out of the JSON context. + */ + private static final Pattern SAFE_FIELD_NAME = Pattern.compile("^[a-zA-Z0-9._\\-]+$"); + + private final FunctionName functionName; + private final List arguments; + private final OpenSearchClient client; + private final Settings settings; + private final KnnPluginCapability knnCapability; + + public VectorSearchTableFunctionImplementation( + FunctionName functionName, + List arguments, + OpenSearchClient client, + Settings settings, + KnnPluginCapability knnCapability) { + super(functionName, arguments); + this.functionName = functionName; + this.arguments = arguments; + this.client = client; + this.settings = settings; + this.knnCapability = knnCapability; + } + + @Override + public ExprValue valueOf(Environment valueEnv) { + throw new UnsupportedOperationException( + String.format("vectorSearch function [%s] is only supported in FROM clause", functionName)); + } + + @Override + public ExprType type() { + return ExprCoreType.STRUCT; + } + + @Override + public String toString() { + List args = + arguments.stream() + .map( + arg -> { + if (arg instanceof NamedArgumentExpression) { + NamedArgumentExpression named = (NamedArgumentExpression) arg; + return String.format("%s=%s", named.getArgName(), named.getValue().toString()); + } + return arg.toString(); + }) + .collect(Collectors.toList()); + return String.format("%s(%s)", functionName, String.join(", ", args)); + } + + @Override + public Table applyArguments() { + // Local validation runs first so that malformed queries return stable SQL validation errors + // regardless of cluster state. The k-NN plugin presence is checked later, lazily at scan + // open() time, so analysis-time paths (_explain, local validation) stay functional on + // clusters without k-NN. + validateNamedArgs(); + String tableName = getArgumentValue(TABLE); + validateTableName(tableName); + String fieldName = getArgumentValue(FIELD); + validateFieldName(fieldName); + String vectorLiteral = getArgumentValue(VECTOR); + String optionStr = getArgumentValue(OPTION); + + float[] vector = parseVector(vectorLiteral); + Map options = parseOptions(optionStr); + validateOptions(options); + + // Strip filter_type — it's a SQL-layer directive, not a knn parameter + FilterType filterType = null; + if (options.containsKey("filter_type")) { + filterType = FilterType.fromString(options.remove("filter_type")); + } + + return new VectorSearchIndex( + client, settings, tableName, fieldName, vector, options, filterType, knnCapability); + } + + private float[] parseVector(String vectorLiteral) { + String cleaned = vectorLiteral.replaceAll("[\\[\\]]", "").trim(); + if (cleaned.isEmpty()) { + throw new ExpressionEvaluationException("Vector literal must not be empty"); + } + // Reject common non-comma separators before Float.parseFloat fails with a generic + // "Invalid vector component" that doesn't hint the user at the separator. + if (cleaned.indexOf(';') >= 0 || cleaned.indexOf(':') >= 0 || cleaned.indexOf('|') >= 0) { + throw new ExpressionEvaluationException( + String.format( + "Invalid vector literal '%s': vector= requires comma-separated components," + + " e.g., vector='[1.0,2.0,3.0]'", + vectorLiteral)); + } + // Preserve trailing empties (split(",", -1)) so malformed literals like "[1.0,]" or + // "[1.0,,2.0]" surface an explicit error instead of silently shrinking the vector. + String[] parts = cleaned.split(",", -1); + float[] vector = new float[parts.length]; + for (int i = 0; i < parts.length; i++) { + String component = parts[i].trim(); + if (component.isEmpty()) { + throw new ExpressionEvaluationException( + String.format( + "Invalid vector component at position %d: must be a number (check for" + + " trailing or consecutive commas in '%s')", + i, vectorLiteral)); + } + try { + vector[i] = Float.parseFloat(component); + } catch (NumberFormatException e) { + throw new ExpressionEvaluationException( + String.format("Invalid vector component '%s': must be a number", component)); + } + if (!Float.isFinite(vector[i])) { + throw new ExpressionEvaluationException( + String.format("Invalid vector component '%s': must be a finite number", component)); + } + } + return vector; + } + + static Map parseOptions(String optionStr) { + Map options = new LinkedHashMap<>(); + // A wholly empty option string is handled downstream with a clearer "missing required option" + // message than a generic malformed-segment error. + if (optionStr.trim().isEmpty()) { + return options; + } + // split(",", -1) preserves trailing empties so malformed inputs like "k=5," or "k=5,,k2=v" + // surface an explicit error instead of being silently dropped. + String[] pairs = optionStr.split(",", -1); + for (String pair : pairs) { + String trimmed = pair.trim(); + if (trimmed.isEmpty()) { + throw new ExpressionEvaluationException( + "Malformed option segment '': expected key=value (check for trailing or" + + " consecutive commas)"); + } + String[] kv = trimmed.split("=", 2); + if (kv.length != 2 || kv[0].trim().isEmpty() || kv[1].trim().isEmpty()) { + throw new ExpressionEvaluationException( + String.format("Malformed option segment '%s': expected key=value", trimmed)); + } + String key = kv[0].trim(); + if (options.containsKey(key)) { + throw new ExpressionEvaluationException(String.format("Duplicate option key '%s'", key)); + } + options.put(key, kv[1].trim()); + } + return options; + } + + /** + * Reject non-named arguments, null arg names, and duplicate named arguments early. Runs before + * any list-index-based lookup so a malformed argument list can never cause an AIOOBE downstream. + */ + private void validateNamedArgs() { + HashSet seen = new HashSet<>(); + for (Expression arg : arguments) { + if (!(arg instanceof NamedArgumentExpression)) { + throw new ExpressionEvaluationException( + "vectorSearch() requires named arguments (e.g., table='index'), " + + "but received: " + + arg.getClass().getSimpleName()); + } + String name = ((NamedArgumentExpression) arg).getArgName(); + if (name == null || name.isEmpty()) { + throw new ExpressionEvaluationException( + "vectorSearch() requires named arguments (e.g., table='index'), " + + "but received an argument with no name"); + } + if (!seen.add(name.toLowerCase(java.util.Locale.ROOT))) { + throw new ExpressionEvaluationException( + "Duplicate argument name '" + + name + + "' in vectorSearch(); each named argument may appear at most once"); + } + } + } + + /** + * Reject table names with characters that could corrupt the WrapperQueryBuilder JSON or escape + * the target index name. Allows alphanumeric, dots, underscores, and hyphens (the characters + * OpenSearch index names already permit). Explicitly rejects wildcards ('*') and multi-target + * patterns (comma-separated) with a dedicated message, because vectorSearch() targets a single + * concrete index or alias and fan-out patterns would otherwise fall through to the generic regex + * message. Also rejects the `_all` routing target and the pathologic `.` / `..` names because + * those either fan out to every index or are not valid concrete index names. Other native-invalid + * names (leading dot, leading hyphen, bare underscore, uppercase, and so on) are intentionally + * passed through for the OpenSearch client to reject with its own error message. + */ + private void validateTableName(String tableName) { + // Dedicated error for fan-out patterns ('*' and ',') before the generic regex; see Javadoc + // for why vectorSearch() targets a single index. + if (tableName.indexOf('*') >= 0 || tableName.indexOf(',') >= 0) { + throw new ExpressionEvaluationException( + String.format( + "Invalid table name '%s': vectorSearch() requires a single concrete index or alias;" + + " wildcards ('*') and multi-target patterns (comma-separated) are not" + + " supported", + tableName)); + } + if (!SAFE_FIELD_NAME.matcher(tableName).matches()) { + throw new ExpressionEvaluationException( + String.format( + "Invalid table name '%s': must contain only alphanumeric characters," + + " dots, underscores, or hyphens", + tableName)); + } + String lower = tableName.toLowerCase(java.util.Locale.ROOT); + if (lower.equals("_all") || tableName.equals(".") || tableName.equals("..")) { + throw new ExpressionEvaluationException( + String.format( + "Invalid table name '%s': vectorSearch() requires a single concrete index or alias;" + + " '_all', '.', and '..' are not supported", + tableName)); + } + } + + /** + * Reject field names with characters that could corrupt the WrapperQueryBuilder JSON. Allows + * alphanumeric, dots (nested fields), underscores, and hyphens. + */ + private void validateFieldName(String fieldName) { + if (!SAFE_FIELD_NAME.matcher(fieldName).matches()) { + throw new ExpressionEvaluationException( + String.format( + "Invalid field name '%s': must contain only alphanumeric characters," + + " dots, underscores, or hyphens", + fieldName)); + } + } + + /** + * Validates and canonicalizes option values. All P0 option values must be numeric. Parsing them + * here prevents non-numeric strings from reaching the raw JSON construction in buildKnnQuery(). + */ + private void validateOptions(Map options) { + // Reject unknown option keys — only P0 keys are allowed + for (String key : options.keySet()) { + if (!ALLOWED_OPTION_KEYS.contains(key)) { + throw new ExpressionEvaluationException( + String.format("Unknown option key '%s'. Supported keys: %s", key, ALLOWED_OPTION_KEYS)); + } + } + if (options.containsKey("filter_type")) { + // Validate early — fromString throws if invalid + FilterType.fromString(options.get("filter_type")); + } + boolean hasK = options.containsKey("k"); + boolean hasMaxDistance = options.containsKey("max_distance"); + boolean hasMinScore = options.containsKey("min_score"); + if (!hasK && !hasMaxDistance && !hasMinScore) { + throw new ExpressionEvaluationException( + "Missing required option: one of k, max_distance, or min_score"); + } + // Mutual exclusivity: exactly one search mode allowed + int modeCount = (hasK ? 1 : 0) + (hasMaxDistance ? 1 : 0) + (hasMinScore ? 1 : 0); + if (modeCount > 1) { + throw new ExpressionEvaluationException( + "Only one of k, max_distance, or min_score may be specified"); + } + // Parse and canonicalize numeric values — closes JSON injection via option values + if (hasK) { + int k = parseIntOption(options, "k"); + if (k < 1 || k > 10000) { + throw new ExpressionEvaluationException( + String.format("k must be between 1 and 10000, got %d", k)); + } + } + if (hasMaxDistance) { + double maxDistance = parseDoubleOption(options, "max_distance"); + if (maxDistance < 0) { + throw new ExpressionEvaluationException( + String.format( + "max_distance must be non-negative, got %s", options.get("max_distance"))); + } + } + if (hasMinScore) { + double minScore = parseDoubleOption(options, "min_score"); + if (minScore < 0) { + throw new ExpressionEvaluationException( + String.format("min_score must be non-negative, got %s", options.get("min_score"))); + } + } + } + + private int parseIntOption(Map options, String key) { + try { + int value = Integer.parseInt(options.get(key)); + options.put(key, Integer.toString(value)); + return value; + } catch (NumberFormatException e) { + throw new ExpressionEvaluationException( + String.format("Option '%s' must be an integer, got '%s'", key, options.get(key))); + } + } + + private double parseDoubleOption(Map options, String key) { + try { + double value = Double.parseDouble(options.get(key)); + if (!Double.isFinite(value)) { + throw new ExpressionEvaluationException( + String.format("Option '%s' must be a finite number, got '%s'", key, options.get(key))); + } + options.put(key, Double.toString(value)); + return value; + } catch (NumberFormatException e) { + throw new ExpressionEvaluationException( + String.format("Option '%s' must be a number, got '%s'", key, options.get(key))); + } + } + + private String getArgumentValue(String name) { + return arguments.stream() + .filter(arg -> ((NamedArgumentExpression) arg).getArgName().equalsIgnoreCase(name)) + .map(arg -> ((NamedArgumentExpression) arg).getValue().valueOf().stringValue()) + .findFirst() + .orElseThrow( + () -> + new ExpressionEvaluationException( + String.format("Missing required argument: %s", name))); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionResolver.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionResolver.java new file mode 100644 index 00000000000..8db1f270afd --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionResolver.java @@ -0,0 +1,109 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import java.util.HashSet; +import java.util.List; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.NamedArgumentExpression; +import org.opensearch.sql.expression.function.FunctionBuilder; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.expression.function.FunctionResolver; +import org.opensearch.sql.expression.function.FunctionSignature; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.storage.capability.KnnPluginCapability; + +public class VectorSearchTableFunctionResolver implements FunctionResolver { + + public static final String VECTOR_SEARCH = "vectorsearch"; + public static final String TABLE = "table"; + public static final String FIELD = "field"; + public static final String VECTOR = "vector"; + public static final String OPTION = "option"; + public static final List ARGUMENT_NAMES = List.of(TABLE, FIELD, VECTOR, OPTION); + + private final OpenSearchClient client; + private final Settings settings; + private final KnnPluginCapability knnCapability; + + public VectorSearchTableFunctionResolver(OpenSearchClient client, Settings settings) { + this(client, settings, new KnnPluginCapability(client)); + } + + VectorSearchTableFunctionResolver( + OpenSearchClient client, Settings settings, KnnPluginCapability knnCapability) { + this.client = client; + this.settings = settings; + this.knnCapability = knnCapability; + } + + @Override + public Pair resolve(FunctionSignature unresolvedSignature) { + FunctionName functionName = FunctionName.of(VECTOR_SEARCH); + FunctionSignature functionSignature = + new FunctionSignature(functionName, List.of(STRING, STRING, STRING, STRING)); + FunctionBuilder functionBuilder = + (functionProperties, arguments) -> { + validateArguments(arguments); + return new VectorSearchTableFunctionImplementation( + functionName, arguments, client, settings, knnCapability); + }; + return Pair.of(functionSignature, functionBuilder); + } + + @Override + public FunctionName getFunctionName() { + return FunctionName.of(VECTOR_SEARCH); + } + + private void validateArguments(List arguments) { + if (arguments.size() != ARGUMENT_NAMES.size()) { + throw new ExpressionEvaluationException( + String.format( + "vectorSearch requires %d arguments (%s), got %d", + ARGUMENT_NAMES.size(), String.join(", ", ARGUMENT_NAMES), arguments.size())); + } + // Shape check at the resolver so positional or unknown-named args produce a clean 400 before + // planning proceeds. The Implementation layer repeats the non-named and duplicate-name checks + // as defense-in-depth; the unknown-name allowlist is enforced only here because the + // Implementation looks up values by known keys and does not need to re-validate the allowlist. + HashSet seen = new HashSet<>(); + for (Expression arg : arguments) { + if (!(arg instanceof NamedArgumentExpression)) { + throw new ExpressionEvaluationException( + "vectorSearch() requires named arguments (e.g., table='index'), " + + "but received: " + + arg.getClass().getSimpleName()); + } + String name = ((NamedArgumentExpression) arg).getArgName(); + if (name == null || name.isEmpty()) { + throw new ExpressionEvaluationException( + "vectorSearch() requires named arguments (e.g., table='index'), " + + "but received an argument with no name"); + } + String lower = name.toLowerCase(java.util.Locale.ROOT); + if (!ARGUMENT_NAMES.contains(lower)) { + throw new ExpressionEvaluationException( + String.format( + "Unknown argument name '%s' in vectorSearch(); allowed names are %s", + name, ARGUMENT_NAMES)); + } + if (!seen.add(lower)) { + throw new ExpressionEvaluationException( + "Duplicate argument name '" + + name + + "' in vectorSearch(); each named argument may appear at most once"); + } + } + // At this point `seen` holds exactly ARGUMENT_NAMES.size() entries (no duplicates, no unknowns, + // and arity matches), so every required name is present. No separate missing-name check needed. + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/capability/KnnPluginCapability.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/capability/KnnPluginCapability.java new file mode 100644 index 00000000000..9ba59915e1d --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/capability/KnnPluginCapability.java @@ -0,0 +1,92 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.capability; + +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicReference; +import org.opensearch.action.admin.cluster.node.info.NodesInfoRequest; +import org.opensearch.action.admin.cluster.node.info.NodesInfoResponse; +import org.opensearch.action.admin.cluster.node.info.PluginsAndModules; +import org.opensearch.plugins.PluginInfo; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.transport.client.node.NodeClient; + +/** + * Probes the cluster's Nodes Info API once and caches whether the k-NN plugin is installed, so + * vectorSearch() fails fast with a clear error when the plugin is absent instead of surfacing a + * native OpenSearch error deep in execution. + * + *

    The probe requires a {@link NodeClient}. In REST-client mode (standalone SQL service) the node + * client is absent and the check is skipped — execution-time errors remain the signal there. + * + *

    The check runs lazily at scan open() — i.e. only when a vectorSearch() query is actually + * executed — so analysis-time paths like _explain and local argument validation keep working on + * clusters without k-NN. + */ +public class KnnPluginCapability { + + /** + * Canonical k-NN plugin class. Using the class name (not artifact name) so the check is stable + * across packaging variants. + */ + private static final String KNN_PLUGIN_CLASSNAME = "org.opensearch.knn.plugin.KNNPlugin"; + + private final OpenSearchClient client; + private final AtomicReference cached = new AtomicReference<>(); + + public KnnPluginCapability(OpenSearchClient client) { + this.client = client; + } + + /** + * Throws {@link ExpressionEvaluationException} with a user-facing message if the k-NN plugin is + * not installed on any node in the cluster. The result is cached after the first successful + * probe; probe failures are not cached so the next call retries. + */ + public void requireInstalled() { + Boolean hit = cached.get(); + if (hit == null) { + Optional probed = probe(); + if (probed.isEmpty()) { + // Probe unavailable (REST-client mode, no NodeClient). Don't block — execution-time + // errors will surface if k-NN is genuinely missing. + return; + } + hit = probed.get(); + cached.set(hit); + } + if (!hit) { + throw new ExpressionEvaluationException( + "vectorSearch() requires the k-NN plugin, which is not installed on this cluster." + + " Install opensearch-knn or use a cluster that has it."); + } + } + + private Optional probe() { + Optional maybeNode = client.getNodeClient(); + if (maybeNode.isEmpty()) { + return Optional.empty(); + } + NodeClient node = maybeNode.get(); + try { + NodesInfoRequest request = new NodesInfoRequest().clear().addMetric("plugins"); + NodesInfoResponse response = node.admin().cluster().nodesInfo(request).actionGet(); + boolean installed = + response.getNodes().stream() + .map(info -> info.getInfo(PluginsAndModules.class)) + .filter(Objects::nonNull) + .flatMap(p -> p.getPluginInfos().stream()) + .map(PluginInfo::getClassname) + .anyMatch(KNN_PLUGIN_CLASSNAME::equals); + return Optional.of(installed); + } catch (Exception e) { + // Probe failed (IO error, timeout). Don't cache — let the next call retry. + return Optional.empty(); + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index edbd1b06393..609a5aaa92f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -60,8 +60,6 @@ import org.opensearch.sql.opensearch.request.PredicateAnalyzer; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; import org.opensearch.sql.opensearch.storage.scan.context.AbstractAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggPushDownAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggregationBuilderAction; import org.opensearch.sql.opensearch.storage.scan.context.FilterDigest; import org.opensearch.sql.opensearch.storage.scan.context.LimitDigest; import org.opensearch.sql.opensearch.storage.scan.context.OSRequestBuilderAction; @@ -177,7 +175,7 @@ public double estimateRowCount(RelMetadataQuery mq) { switch (operation.type()) { case AGGREGATION -> { dRows = mq.getRowCount((RelNode) operation.digest()); - dCpu += dRows * getAggMultiplier(operation); + dCpu += dRows * getAggMultiplier(operation, pushDownContext); } // Ignored Project and Highlight in cost accumulation, but they affect the external cost case PROJECT, HIGHLIGHT -> {} @@ -236,7 +234,8 @@ public double estimateRowCount(RelMetadataQuery mq) { } /** See source in {@link org.apache.calcite.rel.core.Aggregate::computeSelfCost} */ - private static float getAggMultiplier(PushDownOperation operation) { + private static float getAggMultiplier( + PushDownOperation operation, PushDownContext pushDownContext) { // START CALCITE List aggCalls = ((Aggregate) operation.digest()).getAggCallList(); float multiplier = 1f + (float) aggCalls.size() * 0.125f; @@ -251,7 +250,9 @@ private static float getAggMultiplier(PushDownOperation operation) { // For script aggregation, we need to multiply the multiplier by 1.1 to make up the cost. As we // prefer to have non-script agg push down after optimized by {@link PPLAggregateConvertRule} - multiplier *= (float) Math.pow(1.1f, ((AggPushDownAction) operation.action()).getScriptCount()); + long scriptCount = + pushDownContext.getAggSpec() == null ? 0 : pushDownContext.getAggSpec().getScriptCount(); + multiplier *= (float) Math.pow(1.1f, scriptCount); return multiplier; } @@ -328,10 +329,11 @@ && isAnyCollationNameInAggregators(collationNames)) { Object digest; if (pushDownContext.isAggregatePushed()) { // Push down the sort into the aggregation bucket - action = - (AggregationBuilderAction) - aggAction -> - aggAction.pushDownSortIntoAggBucket(collations, getRowType().getFieldNames()); + pushDownContextWithoutSort.setAggSpec( + pushDownContextWithoutSort + .getAggSpec() + .withBucketSort(collations, getRowType().getFieldNames())); + action = (OSRequestBuilderAction) requestBuilder -> {}; digest = collations; pushDownContextWithoutSort.add(PushDownType.SORT, digest, action); return buildScan( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 4d32562f2fd..740801ff418 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -41,7 +41,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; -import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.calcite.plan.HighlightPushDown; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; @@ -59,8 +58,7 @@ import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; import org.opensearch.sql.opensearch.storage.scan.context.AbstractAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggPushDownAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggregationBuilderAction; +import org.opensearch.sql.opensearch.storage.scan.context.AggSpec; import org.opensearch.sql.opensearch.storage.scan.context.FilterDigest; import org.opensearch.sql.opensearch.storage.scan.context.LimitDigest; import org.opensearch.sql.opensearch.storage.scan.context.OSRequestBuilderAction; @@ -296,7 +294,7 @@ public CalciteLogicalIndexScan pushDownProject(List selectedColumns) { AbstractAction action; if (pushDownContext.isAggregatePushed()) { // For aggregate, we do nothing on query builder but only change the schema of the scan. - action = (AggregationBuilderAction) aggAction -> {}; + action = (OSRequestBuilderAction) requestBuilder -> {}; } else { action = (OSRequestBuilderAction) @@ -332,13 +330,8 @@ private RelTraitSet reIndexCollations(List selectedColumns) { public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { try { - if (!pushDownContext.isAggregatePushed()) return null; - List aggregationBuilders = - pushDownContext.getAggPushDownAction().getBuilderAndParser().getLeft(); - if (aggregationBuilders.size() != 1) { - return null; - } - if (!(aggregationBuilders.getFirst() instanceof CompositeAggregationBuilder)) { + AggSpec aggSpec = pushDownContext.getAggSpec(); + if (aggSpec == null || !aggSpec.isCompositeAggregation()) { return null; } List collationNames = getCollationNames(sort.getCollation().getFieldCollations()); @@ -346,11 +339,9 @@ public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { return null; } CalciteLogicalIndexScan newScan = copyWithNewTraitSet(sort.getTraitSet()); - newScan - .pushDownContext - .getAggPushDownAction() - .rePushDownSortAggMeasure( - sort.getCollation().getFieldCollations(), rowType.getFieldNames()); + newScan.pushDownContext.setAggSpec( + aggSpec.withSortMeasure( + sort.getCollation().getFieldCollations(), rowType.getFieldNames())); AbstractAction action = (OSRequestBuilderAction) requestAction -> requestAction.resetRequestTotal(); Object digest = sort.getCollation().getFieldCollations(); @@ -367,7 +358,7 @@ public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { public CalciteLogicalIndexScan pushDownRareTop(Project project, RareTopDigest digest) { try { CalciteLogicalIndexScan newScan = copyWithNewSchema(project.getRowType()); - newScan.pushDownContext.getAggPushDownAction().rePushDownRareTop(digest); + newScan.pushDownContext.setAggSpec(pushDownContext.getAggSpec().withRareTop(digest)); AbstractAction action = (OSRequestBuilderAction) requestAction -> requestAction.resetRequestTotal(); newScan.pushDownContext.add(PushDownType.RARE_TOP, digest, action); @@ -424,9 +415,13 @@ public AbstractRelNode pushDownAggregate(Aggregate aggregate, @Nullable Project OpenSearchDataType.of( OpenSearchTypeFactory.convertRelDataTypeToExprType( field.getType())))); - AggPushDownAction action = - new AggPushDownAction(builderAndParser, extendedTypeMapping, bucketNames); - newScan.pushDownContext.add(PushDownType.AGGREGATION, aggregate, action); + AggSpec aggSpec = AggSpec.create(extendedTypeMapping, bucketNames, builderAndParser); + // AggPushDownAction is lazily materialized by AggSpec.buildAction() and then this action + // will materialize agg request builder. + // The AGGREGATION pushdown operation in PushDownContext remains a no-op marker here. + newScan.pushDownContext.setAggSpec(aggSpec); + newScan.pushDownContext.add( + PushDownType.AGGREGATION, aggregate, (OSRequestBuilderAction) requestBuilder -> {}); return newScan; } catch (Exception e) { if (LOG.isDebugEnabled()) { @@ -440,9 +435,7 @@ public AbstractRelNode pushDownLimit(LogicalSort sort, Integer limit, Integer of try { if (pushDownContext.isAggregatePushed()) { int totalSize = limit + offset; - // Since the AggPushDownAction is shared among different PushDownContext, its size() may be - // inaccurate(<= the actual size). - // So take the previous limit into account to decide whether it can update the context. + AggSpec aggSpec = pushDownContext.getAggSpec(); boolean canReduceEstimatedRowsCount = !pushDownContext.isLimitPushed() || pushDownContext.getQueue().reversed().stream() @@ -452,27 +445,20 @@ public AbstractRelNode pushDownLimit(LogicalSort sort, Integer limit, Integer of .map(op -> (LimitDigest) op.digest()) .map(d -> totalSize < d.offset() + d.limit()) .orElse(true); - - // Push down the limit into the aggregation bucket in advance to detect whether the limit - // can update the aggregation builder boolean canUpdate = - canReduceEstimatedRowsCount - || pushDownContext.getAggPushDownAction().pushDownLimitIntoBucketSize(totalSize); + canReduceEstimatedRowsCount || aggSpec.canPushDownLimitIntoBucketSize(totalSize); if (!canUpdate && offset > 0) return null; CalciteLogicalIndexScan newScan = this.copyWithNewSchema(getRowType()); if (canUpdate) { - newScan - .pushDownContext - .getAggPushDownAction() - .pushDownLimitIntoBucketSize(limit + offset); + newScan.pushDownContext.setAggSpec(aggSpec.withLimit(limit + offset)); } AbstractAction action; - if (pushDownContext.getAggPushDownAction().isCompositeAggregation()) { + if (newScan.pushDownContext.getAggSpec().isCompositeAggregation()) { action = (OSRequestBuilderAction) requestBuilder -> requestBuilder.pushDownLimitToRequestTotal(limit, offset); } else { - action = (AggregationBuilderAction) aggAction -> {}; + action = (OSRequestBuilderAction) requestBuilder -> {}; } newScan.pushDownContext.add(PushDownType.LIMIT, new LimitDigest(limit, offset), action); return offset > 0 ? sort.copy(sort.getTraitSet(), List.of(newScan)) : newScan; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java index 70e6f0f2157..af9d46cd745 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java @@ -45,8 +45,8 @@ public OpenSearchIndexScanBuilder( this.scanFactory = scanFactory; } - /** Constructor used for unit tests. */ - protected OpenSearchIndexScanBuilder( + /** Constructor that accepts a custom PushDownQueryBuilder delegate. */ + public OpenSearchIndexScanBuilder( PushDownQueryBuilder translator, Function scanFactory) { this.delegate = translator; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScan.java new file mode 100644 index 00000000000..86d1934f132 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScan.java @@ -0,0 +1,37 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.request.OpenSearchRequest; +import org.opensearch.sql.opensearch.storage.capability.KnnPluginCapability; + +/** + * OpenSearch scan for vector-search relations. Delegates everything to {@link OpenSearchIndexScan} + * except for {@link #open()}, where it first verifies the k-NN plugin is installed so we fail fast + * with a clear SQL error before the native request would fail deep in execution. The check is + * deferred to open() (not applyArguments() or the scan builder) so that analysis-time paths like + * _explain continue to work on clusters without k-NN. + */ +public class VectorSearchIndexScan extends OpenSearchIndexScan { + + private final KnnPluginCapability knnCapability; + + public VectorSearchIndexScan( + OpenSearchClient client, + int maxResponseSize, + OpenSearchRequest request, + KnnPluginCapability knnCapability) { + super(client, maxResponseSize, request); + this.knnCapability = knnCapability; + } + + @Override + public void open() { + knnCapability.requireInstalled(); + super.open(); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanBuilder.java new file mode 100644 index 00000000000..a898ac41299 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanBuilder.java @@ -0,0 +1,160 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import java.util.function.Function; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.planner.logical.LogicalAggregation; +import org.opensearch.sql.planner.logical.LogicalFilter; +import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalPlan; +import org.opensearch.sql.planner.logical.LogicalProject; +import org.opensearch.sql.planner.logical.LogicalSort; + +/** + * Scan builder for vector search relations. + * + *

    Rejects planner shapes that the SQL surface cannot express safely: + * + *

      + *
    • Aggregations — native OpenSearch k-NN supports aggregations alongside similarity + * search, but the SQL layer does not plumb them through, so we fail fast rather than return + * silently unaggregated results. + *
    • Outer operators over a vectorSearch() subquery — when vectorSearch() is wrapped in a + * subquery (e.g. {@code SELECT * FROM (SELECT v.id FROM vectorSearch(...) AS v) t WHERE + * t.price < 150}), outer WHERE / ORDER BY / OFFSET / GROUP BY / aggregation / DISTINCT do not + * participate in the vectorSearch pushdown contract (the inner {@link LogicalProject} sits + * between the outer operator and this scan builder, so those nodes never match the + * direct-adjacency push-down patterns). They would then be applied in memory after + * top-k results have been selected by vector distance, which can silently yield zero rows or + * mis-ordered results. We detect these shapes in {@link #validatePlan(LogicalPlan)} and + * reject with a clear error. + *
    + */ +public class VectorSearchIndexScanBuilder extends OpenSearchIndexScanBuilder { + + public VectorSearchIndexScanBuilder( + PushDownQueryBuilder translator, + Function scanFactory) { + super(translator, scanFactory); + } + + @Override + public boolean pushDownAggregation(LogicalAggregation aggregation) { + throw new ExpressionEvaluationException( + "Aggregations are not supported on vectorSearch() relations."); + } + + /** + * Walk the fully-optimized plan and reject outer-operator-over-subquery shapes. We look for an + * outer {@link LogicalFilter}, {@link LogicalSort}, {@link LogicalLimit} with non-zero offset, or + * {@link LogicalAggregation} whose descendant chain reaches this scan builder through one or more + * {@link LogicalProject} nodes (the subquery-boundary marker). An operator directly above this + * scan builder is fine — those go through the push-down contract in the query builder. + */ + @Override + public void validatePlan(LogicalPlan root) { + checkForOuterOperator(root, null, false); + } + + /** + * Recursive walker that tracks the outermost "risky" operator seen on the current walk path and + * whether a {@link LogicalProject} has been crossed since then: + * + *
      + *
    • {@code outerOp} — name of the outermost filter/sort/offset/aggregation ancestor, or + * {@code null} if none. Projects only matter below such an operator — without one, a + * project is just the outer SELECT and should not trigger rejection. + *
    • {@code sawProjectSinceOuter} — true iff a {@link LogicalProject} has been seen between + * the outermost risky ancestor and the current position. Once separation by a Project has + * been established, it is permanent — a lower {@link LogicalFilter} below the Project does + * not undo the outer boundary. + *
    + * + *

    This matters for shapes like {@code Filter(outer) -> Project(subquery) -> Filter(inner) -> + * Scan}, where the outer predicate is still blocked from reaching the push-down contract by the + * subquery Project regardless of the inner filter. Resetting on the inner filter would make the + * walker miss this shape. + */ + private void checkForOuterOperator( + LogicalPlan node, String outerOp, boolean sawProjectSinceOuter) { + if (node == this) { + if (outerOp != null && sawProjectSinceOuter) { + throw new ExpressionEvaluationException(rejectionMessage(outerOp)); + } + return; + } + String nextOuterOp = outerOp; + boolean nextSawProject = sawProjectSinceOuter; + if (outerOp == null) { + String operator = classifyOuterOperator(node); + if (operator != null) { + nextOuterOp = operator; + } + } else if (node instanceof LogicalProject) { + nextSawProject = true; + } + for (LogicalPlan child : node.getChild()) { + checkForOuterOperator(child, nextOuterOp, nextSawProject); + } + } + + /** + * Returns a user-facing label for operators that cannot safely sit above a vectorSearch() + * subquery, or {@code null} for operators that are fine (Project, scan, etc.). {@link + * LogicalLimit} with {@code offset == 0} is safe — plain LIMIT wrapping a subquery just caps the + * row count. Non-zero OFFSET skips top-k rows by distance and is rejected. + */ + private static String classifyOuterOperator(LogicalPlan node) { + if (node instanceof LogicalFilter) { + return "WHERE"; + } + if (node instanceof LogicalSort) { + return "ORDER BY"; + } + if (node instanceof LogicalAggregation) { + return "GROUP BY / aggregation / DISTINCT"; + } + if (node instanceof LogicalLimit) { + Integer offset = ((LogicalLimit) node).getOffset(); + if (offset != null && offset != 0) { + return "OFFSET"; + } + } + return null; + } + + // Operator-specific messages: the generic "move it inside the subquery" advice is only right + // for WHERE and for ORDER BY _score DESC. OFFSET, aggregation, GROUP BY, and DISTINCT are + // themselves unsupported on vectorSearch() directly, so the message must not claim a workaround + // that would only trip the user on a second validation error. + private static String rejectionMessage(String outerOp) { + switch (outerOp) { + case "WHERE": + return "Outer WHERE on a vectorSearch() subquery is not supported: the predicate does not" + + " participate in the vectorSearch pushdown contract and would be applied only" + + " after top-k results have been selected by vector distance, which can silently" + + " yield zero rows. Move the WHERE into the same SELECT block as vectorSearch() so" + + " it participates in the vectorSearch WHERE pushdown contract."; + case "ORDER BY": + return "Outer ORDER BY on a vectorSearch() subquery is not supported: sorting does not" + + " participate in the vectorSearch pushdown contract and would be applied only" + + " after top-k results have been selected by vector distance, which can yield" + + " mis-ordered results. Use ORDER BY ._score DESC in the same SELECT block" + + " as vectorSearch(), or omit ORDER BY."; + case "OFFSET": + return "Outer OFFSET on a vectorSearch() subquery is not supported. OFFSET is not" + + " supported on vectorSearch(); use LIMIT only."; + case "GROUP BY / aggregation / DISTINCT": + return "Outer GROUP BY / aggregation / DISTINCT on a vectorSearch() subquery is not" + + " supported. Aggregations and DISTINCT are not supported on vectorSearch()" + + " relations."; + default: + return "Outer " + outerOp + " on a vectorSearch() subquery is not supported."; + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilder.java new file mode 100644 index 00000000000..33714a793ab --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilder.java @@ -0,0 +1,285 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Function; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ConstantScoreQueryBuilder; +import org.opensearch.index.query.ExistsQueryBuilder; +import org.opensearch.index.query.MatchBoolPrefixQueryBuilder; +import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; +import org.opensearch.index.query.MatchPhraseQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.MultiMatchQueryBuilder; +import org.opensearch.index.query.NestedQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.QueryStringQueryBuilder; +import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.index.query.ScriptQueryBuilder; +import org.opensearch.index.query.SimpleQueryStringBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.query.WildcardQueryBuilder; +import org.opensearch.sql.ast.tree.Sort; +import org.opensearch.sql.ast.tree.Sort.SortOption; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.ExpressionNodeVisitor; +import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.opensearch.storage.FilterType; +import org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder; +import org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder.ScriptQueryUnSupportedException; +import org.opensearch.sql.opensearch.storage.serde.DefaultExpressionSerializer; +import org.opensearch.sql.planner.logical.LogicalFilter; +import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalSort; + +/** + * Query builder for vector search. The knn relevance score is preserved regardless of placement + * strategy — in {@code EFFICIENT} mode the knn query carries its own scores, and in {@code POST} + * mode the knn query sits in a scoring ({@code must}) context while the WHERE clause is applied as + * a non-scoring ({@code filter}) clause. + * + *

    Supports two filter placement strategies via {@link FilterType}: + * + *

      + *
    • {@code EFFICIENT} — WHERE inside {@code knn.filter} for pre-filtering during ANN search + * (default). + *
    • {@code POST} — WHERE in {@code bool.filter} outside knn (post-filtering fallback, used when + * the WHERE shape is not compatible with pre-filtering). + *
    + */ +public class VectorSearchQueryBuilder extends OpenSearchIndexScanQueryBuilder { + + private final QueryBuilder knnQuery; + private final Map options; + private final FilterType filterType; + private final boolean filterTypeExplicit; + private final Function rebuildKnnWithFilter; + private boolean filterPushed = false; + private boolean limitPushed = false; + + /** Full constructor with filter type support. */ + public VectorSearchQueryBuilder( + OpenSearchRequestBuilder requestBuilder, + QueryBuilder knnQuery, + Map options, + FilterType filterType, + boolean filterTypeExplicit, + Function rebuildKnnWithFilter) { + super(requestBuilder); + requestBuilder.getSourceBuilder().query(knnQuery); + this.knnQuery = knnQuery; + this.options = options; + this.filterType = filterType != null ? filterType : FilterType.EFFICIENT; + this.filterTypeExplicit = filterTypeExplicit; + if (this.filterType == FilterType.EFFICIENT && rebuildKnnWithFilter == null) { + throw new IllegalArgumentException( + "EFFICIENT filter mode requires a non-null rebuildKnnWithFilter callback"); + } + this.rebuildKnnWithFilter = rebuildKnnWithFilter; + } + + /** + * Test-only constructor — pins {@link FilterType#POST} so callers that do not wire a {@code + * rebuildKnnWithFilter} callback (unit tests) can still exercise the push-down contract. + * Production callers always go through the full constructor, which defaults to {@link + * FilterType#EFFICIENT}. + */ + public VectorSearchQueryBuilder( + OpenSearchRequestBuilder requestBuilder, QueryBuilder knnQuery, Map options) { + this(requestBuilder, knnQuery, options, FilterType.POST, false, null); + } + + @Override + public boolean pushDownFilter(LogicalFilter filter) { + FilterQueryBuilder queryBuilder = new FilterQueryBuilder(new DefaultExpressionSerializer()); + Expression queryCondition = filter.getCondition(); + + // _score is synthetic, not a stored field; a range query on it silently returns 0 rows. + // Users who want a score floor should use option='min_score=...'. + if (containsScoreReference(queryCondition)) { + throw new ExpressionEvaluationException( + "WHERE on _score is not supported on vectorSearch()." + + " Use option='min_score=...' for score-floor filtering."); + } + + QueryBuilder whereQuery; + try { + whereQuery = queryBuilder.build(queryCondition); + } catch (ScriptQueryUnSupportedException e) { + if (filterTypeExplicit) { + throw new ExpressionEvaluationException( + "filter_type only works when the WHERE clause can be translated to an" + + " OpenSearch filter. Rewrite the WHERE clause or omit filter_type."); + } + // Default mode: fall back to in-memory filtering (matches base class behavior) + return false; + } + filterPushed = true; + + if (filterType == FilterType.EFFICIENT) { + // Fail closed: knn.filter on AOSS rejects script queries and nested predicates expand the + // preview contract. Allow-list validator beats a blacklist walker. + validateEfficientFilterSafe(whereQuery); + QueryBuilder rebuiltKnn = rebuildKnnWithFilter.apply(whereQuery); + requestBuilder.getSourceBuilder().query(rebuiltKnn); + } else { + // POST mode: knn in must (scores), WHERE in filter (no scoring impact) + BoolQueryBuilder combined = QueryBuilders.boolQuery().must(knnQuery).filter(whereQuery); + requestBuilder.getSourceBuilder().query(combined); + } + return true; + } + + @Override + public boolean pushDownLimit(LogicalLimit limit) { + // OFFSET would shift the search window and silently drop top results; reject with a clear + // error rather than have the parent path push `from: ` into the request. + if (limit.getOffset() != null && limit.getOffset() != 0) { + throw new ExpressionEvaluationException( + "OFFSET is not supported on vectorSearch(). Remove OFFSET and use LIMIT only."); + } + validateLimitWithinK(limit.getLimit()); + limitPushed = true; + return super.pushDownLimit(limit); + } + + @Override + public boolean pushDownSort(LogicalSort sort) { + // Vector search returns results sorted by _score DESC by default. + // Only _score DESC is meaningful; reject all other sort expressions. + for (Pair sortItem : sort.getSortList()) { + Expression expr = sortItem.getRight(); + if (!(expr instanceof ReferenceExpression) + || !"_score".equals(((ReferenceExpression) expr).getAttr())) { + throw new ExpressionEvaluationException( + String.format( + "vectorSearch only supports ORDER BY _score DESC; " + + "unsupported sort expression: %s", + expr)); + } + if (sortItem.getLeft().getSortOrder() != Sort.SortOrder.DESC) { + throw new ExpressionEvaluationException( + "vectorSearch only supports ORDER BY _score DESC; _score ASC is not supported"); + } + } + // _score DESC is knn's natural order, so the sort itself is not pushed. Preserve the + // parent's sort.getCount() → limit contract; SQL sends 0, PPL may combine sort+limit. + if (sort.getCount() != 0) { + validateLimitWithinK(sort.getCount()); + limitPushed = true; + requestBuilder.pushDownLimit(sort.getCount(), 0); + } + return true; + } + + /** Validates that the requested limit does not exceed k in top-k mode. */ + private void validateLimitWithinK(int limit) { + if (options.containsKey("k")) { + int k = Integer.parseInt(options.get("k")); + if (limit > k) { + throw new ExpressionEvaluationException( + String.format("LIMIT %d exceeds k=%d in top-k vector search", limit, k)); + } + } + } + + // True if any ReferenceExpression in the tree names _score (case-insensitive, so quoted/ + // backticked variants cannot bypass the guard). + private static boolean containsScoreReference(Expression expr) { + AtomicBoolean found = new AtomicBoolean(false); + expr.accept( + new ExpressionNodeVisitor() { + @Override + public Void visitReference(ReferenceExpression node, Void context) { + if (node.getAttr() != null && "_score".equalsIgnoreCase(node.getAttr())) { + found.set(true); + } + return null; + } + }, + null); + return found.get(); + } + + // Allow-list of leaf query types FilterQueryBuilder emits today. Any new wrapper or container + // appearing here must fail closed rather than silently embed under knn.filter. + private static final Set> SAFE_EFFICIENT_FILTER_LEAVES = + Set.of( + TermQueryBuilder.class, + RangeQueryBuilder.class, + WildcardQueryBuilder.class, + MatchQueryBuilder.class, + MatchPhraseQueryBuilder.class, + MatchPhrasePrefixQueryBuilder.class, + MultiMatchQueryBuilder.class, + QueryStringQueryBuilder.class, + SimpleQueryStringBuilder.class, + MatchBoolPrefixQueryBuilder.class, + ExistsQueryBuilder.class); + + // Package-private for direct branch coverage in unit tests. Fail-closed: recurse known + // containers, reject ScriptQueryBuilder/NestedQueryBuilder with targeted messages, allow + // listed leaves, reject everything else as unsupported shape. + static void validateEfficientFilterSafe(QueryBuilder qb) { + if (qb == null) { + return; + } + if (qb instanceof ScriptQueryBuilder) { + throw new ExpressionEvaluationException( + "vectorSearch WHERE pre-filtering does not support predicates that compile to" + + " script queries (arithmetic, function calls, CASE, date math). Rewrite the" + + " WHERE clause to use term/range/bool predicates, or set filter_type=post to" + + " apply the predicate after the k-NN search."); + } + if (qb instanceof BoolQueryBuilder) { + BoolQueryBuilder bool = (BoolQueryBuilder) qb; + bool.must().forEach(VectorSearchQueryBuilder::validateEfficientFilterSafe); + bool.filter().forEach(VectorSearchQueryBuilder::validateEfficientFilterSafe); + bool.should().forEach(VectorSearchQueryBuilder::validateEfficientFilterSafe); + bool.mustNot().forEach(VectorSearchQueryBuilder::validateEfficientFilterSafe); + return; + } + if (qb instanceof ConstantScoreQueryBuilder) { + validateEfficientFilterSafe(((ConstantScoreQueryBuilder) qb).innerQuery()); + return; + } + if (qb instanceof NestedQueryBuilder) { + throw new ExpressionEvaluationException( + "vectorSearch WHERE pre-filtering does not support nested predicates in this" + + " preview. Rewrite the WHERE clause using non-nested fields, or set" + + " filter_type=post to apply the predicate after the k-NN search."); + } + if (SAFE_EFFICIENT_FILTER_LEAVES.contains(qb.getClass())) { + return; + } + throw new ExpressionEvaluationException( + "vectorSearch WHERE pre-filtering encountered an unsupported filter query shape: " + + qb.getClass().getSimpleName() + + ". Rewrite the WHERE clause using simple term/range/bool predicates, or set" + + " filter_type=post to apply the predicate after the k-NN search."); + } + + @Override + public OpenSearchRequestBuilder build() { + if (filterTypeExplicit && !filterPushed) { + throw new ExpressionEvaluationException("filter_type requires a pushdownable WHERE clause"); + } + boolean isRadial = !options.containsKey("k"); + if (isRadial && !limitPushed) { + throw new ExpressionEvaluationException( + "LIMIT is required for radial vector search (max_distance or min_score)." + + " Without LIMIT, the result set size is unbounded."); + } + return super.build(); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java index 7c15586d143..35a6c1f26cf 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java @@ -12,6 +12,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.function.Consumer; import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -27,6 +28,7 @@ import org.opensearch.search.aggregations.bucket.composite.HistogramValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; import org.opensearch.search.aggregations.bucket.missing.MissingOrder; import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder; @@ -49,7 +51,6 @@ public class AggPushDownAction implements OSRequestBuilderAction { private Pair, OpenSearchAggregationResponseParser> builderAndParser; private final Map extendedTypeMapping; - private final long scriptCount; // Record the output field names of all buckets as the sequence of buckets private List bucketNames; @@ -59,12 +60,10 @@ public AggPushDownAction( List bucketNames) { this.builderAndParser = builderAndParser; this.extendedTypeMapping = extendedTypeMapping; - this.scriptCount = - builderAndParser.getLeft().stream().mapToInt(AggPushDownAction::getScriptCount).sum(); this.bucketNames = bucketNames; } - private static int getScriptCount(AggregationBuilder aggBuilder) { + static int getScriptCount(AggregationBuilder aggBuilder) { if (aggBuilder instanceof NestedAggregationBuilder) { aggBuilder = aggBuilder.getSubAggregations().iterator().next(); } @@ -85,6 +84,25 @@ private static int getScriptCount(AggregationBuilder aggBuilder) { return 0; } + private static AggregatorFactories.Builder copySubAggregations(AggregationBuilder source) { + AggregatorFactories.Builder copiedFactories = new AggregatorFactories.Builder(); + source.getSubAggregations().forEach(copiedFactories::addAggregator); + source.getPipelineAggregations().forEach(copiedFactories::addPipelineAggregator); + return copiedFactories; + } + + private void replaceRootBuilder( + AggregationBuilder originalRoot, AggregationBuilder newInnerBuilder) { + AggregationBuilder finalBuilder = newInnerBuilder; + if (originalRoot instanceof NestedAggregationBuilder nested) { + finalBuilder = + AggregationBuilders.nested(nested.getName(), nested.path()) + .subAggregation(newInnerBuilder); + } + builderAndParser = + Pair.of(Collections.singletonList(finalBuilder), builderAndParser.getRight()); + } + @Override public void apply(OpenSearchRequestBuilder requestBuilder) { requestBuilder.pushDownAggregation(builderAndParser); @@ -257,24 +275,74 @@ private TermsAggregationBuilder buildTermsAggregationBuilder( } /** Build a {@link DateHistogramAggregationBuilder} by {@link DateHistogramValuesSourceBuilder} */ - private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( - DateHistogramValuesSourceBuilder dateHisto, BucketOrder bucketOrder) { - DateHistogramAggregationBuilder dateHistoBuilder = - new DateHistogramAggregationBuilder(dateHisto.name()); - if (dateHisto.field() != null) { - dateHistoBuilder.field(dateHisto.field()); - } - if (dateHisto.script() != null) { - dateHistoBuilder.script(dateHisto.script()); + private static void copyDateHistogramInterval( + DateHistogramValuesSourceBuilder source, + Consumer fixedIntervalSetter, + Consumer calendarIntervalSetter) { + try { + fixedIntervalSetter.accept(source.getIntervalAsFixed()); + return; + } catch (IllegalArgumentException | IllegalStateException ignored) { + // Fallback to calendar interval. } try { - dateHistoBuilder.fixedInterval(dateHisto.getIntervalAsFixed()); - } catch (IllegalArgumentException e) { - dateHistoBuilder.calendarInterval(dateHisto.getIntervalAsCalendar()); + calendarIntervalSetter.accept(source.getIntervalAsCalendar()); + return; + } catch (IllegalArgumentException | IllegalStateException ignored) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot copy interval for date histogram bucket " + source.name()); + } + } + + private static void copyDateHistogramBucketOptions( + DateHistogramValuesSourceBuilder source, DateHistogramAggregationBuilder target) { + if (source.field() != null) { + target.field(source.field()); + } + if (source.script() != null) { + target.script(source.script()); + } + copyDateHistogramInterval(source, target::fixedInterval, target::calendarInterval); + if (source.userValuetypeHint() != null) { + target.userValueTypeHint(source.userValuetypeHint()); + } + if (source.timeZone() != null) { + target.timeZone(source.timeZone()); + } + if (source.offset() != 0) { + target.offset(source.offset()); + } + if (source.format() != null) { + target.format(source.format()); } - if (dateHisto.userValuetypeHint() != null) { - dateHistoBuilder.userValueTypeHint(dateHisto.userValuetypeHint()); + // Composite group-by only returns buckets with documents. Preserve that when rewriting. + target.minDocCount(1); + } + + private static void copyHistogramBucketOptions( + HistogramValuesSourceBuilder source, HistogramAggregationBuilder target) { + if (source.field() != null) { + target.field(source.field()); + } + if (source.script() != null) { + target.script(source.script()); + } + target.interval(source.interval()); + if (source.userValuetypeHint() != null) { + target.userValueTypeHint(source.userValuetypeHint()); + } + if (source.format() != null) { + target.format(source.format()); } + // Composite group-by only returns buckets with documents. Preserve that when rewriting. + target.minDocCount(1); + } + + private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( + DateHistogramValuesSourceBuilder dateHisto, BucketOrder bucketOrder) { + DateHistogramAggregationBuilder dateHistoBuilder = + new DateHistogramAggregationBuilder(dateHisto.name()); + copyDateHistogramBucketOptions(dateHisto, dateHistoBuilder); dateHistoBuilder.order(bucketOrder); return dateHistoBuilder; } @@ -283,16 +351,7 @@ private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( private HistogramAggregationBuilder buildHistogramAggregationBuilder( HistogramValuesSourceBuilder histo, BucketOrder bucketOrder) { HistogramAggregationBuilder histoBuilder = new HistogramAggregationBuilder(histo.name()); - if (histo.field() != null) { - histoBuilder.field(histo.field()); - } - if (histo.script() != null) { - histoBuilder.script(histo.script()); - } - histoBuilder.interval(histo.interval()); - if (histo.userValuetypeHint() != null) { - histoBuilder.userValueTypeHint(histo.userValuetypeHint()); - } + copyHistogramBucketOptions(histo, histoBuilder); histoBuilder.order(bucketOrder); return histoBuilder; } @@ -408,19 +467,11 @@ public void pushDownSortIntoAggBucket( newBuckets.add(buckets.get(bucketNames.indexOf(name))); newBucketNames.add(name); }); - AggregatorFactories.Builder newAggBuilder = new AggregatorFactories.Builder(); - compositeAggBuilder.getSubAggregations().forEach(newAggBuilder::addAggregator); AggregationBuilder finalBuilder = - AggregationBuilders.composite("composite_buckets", newBuckets) - .subAggregations(newAggBuilder) + AggregationBuilders.composite(compositeAggBuilder.getName(), newBuckets) + .subAggregations(copySubAggregations(compositeAggBuilder)) .size(compositeAggBuilder.size()); - if (original instanceof NestedAggregationBuilder nested) { - finalBuilder = - AggregationBuilders.nested(nested.getName(), nested.path()) - .subAggregation(finalBuilder); - } - builderAndParser = - Pair.of(Collections.singletonList(finalBuilder), builderAndParser.getRight()); + replaceRootBuilder(original, finalBuilder); bucketNames = newBucketNames; } if (builder instanceof TermsAggregationBuilder termsAggBuilder) { @@ -429,16 +480,6 @@ public void pushDownSortIntoAggBucket( // TODO for MultiTermsAggregationBuilder } - public boolean isCompositeAggregation() { - return builderAndParser.getLeft().stream() - .anyMatch( - builder -> - builder instanceof CompositeAggregationBuilder - || (builder instanceof NestedAggregationBuilder - && builder.getSubAggregations().iterator().next() - instanceof CompositeAggregationBuilder)); - } - /** * Check if the limit can be pushed down into aggregation bucket when the limit size is less than * bucket number. diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java new file mode 100644 index 00000000000..7c0b0f15f8b --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java @@ -0,0 +1,386 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan.context; + +import static org.opensearch.search.aggregations.MultiBucketConsumerService.DEFAULT_MAX_BUCKETS; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import javax.annotation.Nullable; +import lombok.Getter; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.composite.DateHistogramValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.composite.HistogramValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; +import org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; +import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder; +import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; + +/** Immutable aggregation pushdown state and ordered replay plan. */ +@Getter +public final class AggSpec { + private enum AggKind { + OTHER, + COMPOSITE, + TERMS, + MULTI_TERMS, + DATE_HISTOGRAM, + HISTOGRAM, + TOP_HITS, + RARE_TOP + } + + private enum LimitPushdownMode { + UNSUPPORTED, + ESTIMATE_ONLY, + LEAF_METRIC, + BUCKET_SIZE + } + + private interface BuildAction extends AbstractAction { + @Override + default void pushOperation(PushDownContext context, PushDownOperation operation) { + throw new UnsupportedOperationException("Internal aggregation build action cannot be queued"); + } + } + + private final Pair, OpenSearchAggregationResponseParser> + baseBuilderAndParser; + private final Map extendedTypeMapping; + private final List initialBucketNames; + // Cost model uses the script count of the base logical aggregation. Supported rewrites keep the + // same scripted sources/metrics semantically, while replay-time builders are request-scoped and + // may not preserve a structure that can be re-counted accurately after rewrite. + private final long scriptCount; + private final AggKind kind; + private final LimitPushdownMode limitPushdownMode; + // The pushdown operation queue to rewrite base agg + private final List operationsForAgg; + @Nullable private final Integer bucketSize; + + private AggSpec( + Pair, OpenSearchAggregationResponseParser> baseBuilderAndParser, + Map extendedTypeMapping, + List initialBucketNames, + long scriptCount, + AggKind kind, + LimitPushdownMode limitPushdownMode, + List operationsForAgg, + @Nullable Integer bucketSize) { + this.baseBuilderAndParser = baseBuilderAndParser; + this.extendedTypeMapping = Map.copyOf(extendedTypeMapping); + this.initialBucketNames = List.copyOf(initialBucketNames); + this.scriptCount = scriptCount; + this.kind = kind; + this.limitPushdownMode = limitPushdownMode; + this.operationsForAgg = List.copyOf(operationsForAgg); + this.bucketSize = bucketSize; + } + + public static AggSpec create( + Map extendedTypeMapping, + List bucketNames, + Pair, OpenSearchAggregationResponseParser> builderAndParser) { + AggregationBuilder rootBuilder = + builderAndParser.getLeft().isEmpty() ? null : builderAndParser.getLeft().getFirst(); + AggKind kind = inferKind(rootBuilder); + return new AggSpec( + builderAndParser, + extendedTypeMapping, + bucketNames, + builderAndParser.getLeft().stream().mapToInt(AggPushDownAction::getScriptCount).sum(), + kind, + inferBaseLimitPushdownMode(rootBuilder, kind), + List.of(), + inferBucketSize(rootBuilder)); + } + + public boolean isCompositeAggregation() { + return kind == AggKind.COMPOSITE; + } + + public boolean canPushDownLimitIntoBucketSize(int size) { + return switch (limitPushdownMode) { + case BUCKET_SIZE -> bucketSize != null && size < bucketSize; + case LEAF_METRIC -> true; + case ESTIMATE_ONLY -> false; + case UNSUPPORTED -> + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown limit into aggregation bucket"); + }; + } + + public AggSpec withBucketSort(List collations, List fieldNames) { + if (kind != AggKind.COMPOSITE && kind != AggKind.TERMS) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown sort into aggregation bucket"); + } + if (kind == AggKind.COMPOSITE) { + for (RelFieldCollation collation : collations) { + String bucketName = fieldNames.get(collation.getFieldIndex()); + if (!initialBucketNames.contains(bucketName)) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown sort into aggregation bucket"); + } + } + } + return new AggSpec( + baseBuilderAndParser, + extendedTypeMapping, + initialBucketNames, + scriptCount, + kind, + limitPushdownMode, + replaceOperations( + PushDownType.SORT, + collations, + action -> action.pushDownSortIntoAggBucket(collations, fieldNames)), + bucketSize); + } + + public AggSpec withoutBucketSort() { + if (operationsForAgg.stream().noneMatch(operation -> operation.type() == PushDownType.SORT)) { + return this; + } + return new AggSpec( + baseBuilderAndParser, + extendedTypeMapping, + initialBucketNames, + scriptCount, + kind, + limitPushdownMode, + removeOperations(PushDownType.SORT), + bucketSize); + } + + public AggSpec withSortMeasure(List collations, List fieldNames) { + AggKind rewriteTarget = inferMeasureSortTarget(); + if (rewriteTarget == null) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown sort aggregate measure"); + } + Integer resizedBucketSize = + switch (rewriteTarget) { + case TERMS, MULTI_TERMS -> bucketSize; + default -> null; + }; + return new AggSpec( + baseBuilderAndParser, + extendedTypeMapping, + initialBucketNames, + scriptCount, + rewriteTarget, + inferLimitPushdownMode(rewriteTarget), + replaceOperations( + PushDownType.SORT_AGG_METRICS, + collations, + action -> action.rePushDownSortAggMeasure(collations, fieldNames)), + resizedBucketSize); + } + + public AggSpec withRareTop(RareTopDigest digest) { + if (!supportsCurrentRareTop()) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException("Cannot pushdown " + digest); + } + return new AggSpec( + baseBuilderAndParser, + extendedTypeMapping, + initialBucketNames, + scriptCount, + AggKind.RARE_TOP, + inferLimitPushdownMode(AggKind.RARE_TOP), + replaceOperations( + PushDownType.RARE_TOP, digest, action -> action.rePushDownRareTop(digest)), + digest.byList().isEmpty() ? digest.number() : DEFAULT_MAX_BUCKETS); + } + + public AggSpec withLimit(int size) { + switch (limitPushdownMode) { + case ESTIMATE_ONLY, LEAF_METRIC: + return this; + case UNSUPPORTED: + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown limit into aggregation bucket"); + case BUCKET_SIZE: + if (!canPushDownLimitIntoBucketSize(size)) { + return this; + } + break; + } + return new AggSpec( + baseBuilderAndParser, + extendedTypeMapping, + initialBucketNames, + scriptCount, + kind, + limitPushdownMode, + replaceOperations( + PushDownType.LIMIT, + new LimitDigest(size, 0), + action -> action.pushDownLimitIntoBucketSize(size)), + size); + } + + public AggPushDownAction buildAction() { + AggPushDownAction action = + new AggPushDownAction( + baseBuilderAndParser, extendedTypeMapping, new ArrayList<>(initialBucketNames)); + operationsForAgg.forEach(operation -> ((BuildAction) operation.action()).apply(action)); + return action; + } + + private List replaceOperations( + PushDownType type, Object digest, BuildAction action) { + List newOperations = removeOperations(type); + newOperations.add(new PushDownOperation(type, digest, action)); + return newOperations; + } + + private List removeOperations(PushDownType type) { + return new ArrayList<>( + operationsForAgg.stream().filter(operation -> operation.type() != type).toList()); + } + + private static AggKind inferKind(@Nullable AggregationBuilder rootBuilder) { + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (builder instanceof CompositeAggregationBuilder) { + return AggKind.COMPOSITE; + } + if (builder instanceof TermsAggregationBuilder) { + return AggKind.TERMS; + } + if (builder instanceof MultiTermsAggregationBuilder) { + return AggKind.MULTI_TERMS; + } + if (builder instanceof DateHistogramAggregationBuilder) { + return AggKind.DATE_HISTOGRAM; + } + if (builder instanceof HistogramAggregationBuilder) { + return AggKind.HISTOGRAM; + } + if (builder instanceof TopHitsAggregationBuilder) { + return AggKind.TOP_HITS; + } + return AggKind.OTHER; + } + + private static LimitPushdownMode inferLimitPushdownMode(AggKind kind) { + return switch (kind) { + case COMPOSITE, TERMS, MULTI_TERMS, TOP_HITS, RARE_TOP -> LimitPushdownMode.BUCKET_SIZE; + case OTHER, DATE_HISTOGRAM, HISTOGRAM -> LimitPushdownMode.UNSUPPORTED; + }; + } + + private static LimitPushdownMode inferBaseLimitPushdownMode( + @Nullable AggregationBuilder rootBuilder, AggKind kind) { + if (rootBuilder == null) { + // count() optimization uses hits.total and leaves the builder list empty. Keeps + // LIMIT in PushDownContext for these cases even though no request-side limit is applied. + return LimitPushdownMode.ESTIMATE_ONLY; + } + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (builder instanceof ValuesSourceAggregationBuilder.LeafOnly) { + // Treats leaf metric aggregations as limit-pushable because they produce a single row. + return LimitPushdownMode.LEAF_METRIC; + } + return inferLimitPushdownMode(kind); + } + + private static boolean supportsBaseRareTop(@Nullable AggregationBuilder rootBuilder) { + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (!(builder instanceof CompositeAggregationBuilder composite)) { + return false; + } + if (composite.sources().size() == 1) { + return composite.sources().getFirst() instanceof TermsValuesSourceBuilder terms + && !terms.missingBucket(); + } + return composite.sources().stream() + .allMatch(src -> src instanceof TermsValuesSourceBuilder terms && !terms.missingBucket()); + } + + @Nullable + private AggKind inferMeasureSortTarget() { + if (kind != AggKind.COMPOSITE) { + return null; + } + AggregationBuilder rootBuilder = + baseBuilderAndParser.getLeft().isEmpty() ? null : baseBuilderAndParser.getLeft().getFirst(); + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (!(builder instanceof CompositeAggregationBuilder composite)) { + return null; + } + if (composite.getSubAggregations().stream() + .anyMatch(metric -> !(metric instanceof ValuesSourceAggregationBuilder.LeafOnly))) { + return null; + } + if (composite.sources().size() == 1) { + CompositeValuesSourceBuilder source = composite.sources().getFirst(); + if (source instanceof TermsValuesSourceBuilder terms && !terms.missingBucket()) { + return AggKind.TERMS; + } + if (source instanceof DateHistogramValuesSourceBuilder) { + return AggKind.DATE_HISTOGRAM; + } + if (source instanceof HistogramValuesSourceBuilder histo && !histo.missingBucket()) { + return AggKind.HISTOGRAM; + } + return null; + } + return composite.sources().stream() + .allMatch( + src -> src instanceof TermsValuesSourceBuilder terms && !terms.missingBucket()) + ? AggKind.MULTI_TERMS + : null; + } + + private boolean supportsCurrentRareTop() { + return kind == AggKind.COMPOSITE + && supportsBaseRareTop( + baseBuilderAndParser.getLeft().isEmpty() + ? null + : baseBuilderAndParser.getLeft().getFirst()); + } + + @Nullable + private static Integer inferBucketSize(@Nullable AggregationBuilder rootBuilder) { + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (builder instanceof CompositeAggregationBuilder composite) { + return composite.size(); + } + if (builder instanceof TermsAggregationBuilder terms) { + return terms.size(); + } + if (builder instanceof MultiTermsAggregationBuilder multiTerms) { + return multiTerms.size(); + } + if (builder instanceof TopHitsAggregationBuilder topHits) { + return topHits.size(); + } + return null; + } + + @Nullable + private static AggregationBuilder unwrapNestedBuilder(@Nullable AggregationBuilder rootBuilder) { + if (rootBuilder instanceof NestedAggregationBuilder nested + && !nested.getSubAggregations().isEmpty()) { + return nested.getSubAggregations().iterator().next(); + } + return rootBuilder; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java deleted file mode 100644 index f9f43c89a7b..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.storage.scan.context; - -/** A lambda action to apply on the {@link AggPushDownAction} */ -public interface AggregationBuilderAction extends AbstractAction { - default void pushOperation(PushDownContext context, PushDownOperation operation) { - // Apply transformation to aggregation builder in the optimization phase as some transformation - // may cause exception. We need to detect that exception in advance. - apply(context.getAggPushDownAction()); - context.addOperationForAgg(operation); - } -} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 2d236207c10..a622f948efb 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -11,6 +11,7 @@ import java.util.List; import javax.annotation.Nullable; import lombok.Getter; +import lombok.Setter; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Project; import org.jetbrains.annotations.NotNull; @@ -27,8 +28,7 @@ public class PushDownContext extends AbstractCollection { private ArrayDeque operationsForRequestBuilder; private boolean isAggregatePushed = false; - private AggPushDownAction aggPushDownAction; - private ArrayDeque operationsForAgg; + @Setter private AggSpec aggSpec; // Records the start pos of the query, which is updated by new added limit operations. private int startFrom = 0; @@ -49,7 +49,10 @@ public PushDownContext(OpenSearchIndex osIndex) { @Override public PushDownContext clone() { PushDownContext newContext = new PushDownContext(osIndex); - newContext.addAll(this); + for (PushDownOperation operation : this) { + newContext.add(operation); + } + newContext.aggSpec = aggSpec; return newContext; } @@ -65,6 +68,7 @@ public PushDownContext cloneWithoutSort() { newContext.add(action); } } + newContext.aggSpec = aggSpec == null ? null : aggSpec.withoutBucketSort(); return newContext; } @@ -132,20 +136,11 @@ void addOperationForRequestBuilder(PushDownOperation operation) { queue.add(operation); } - void addOperationForAgg(PushDownOperation operation) { - if (operationsForAgg == null) { - this.operationsForAgg = new ArrayDeque<>(); - } - operationsForAgg.add(operation); - queue.add(operation); - } - @Override public boolean add(PushDownOperation operation) { operation.action().pushOperation(this, operation); if (operation.type() == PushDownType.AGGREGATION) { isAggregatePushed = true; - this.aggPushDownAction = (AggPushDownAction) operation.action(); } if (operation.type() == PushDownType.LIMIT) { startFrom += ((LimitDigest) operation.digest()).offset(); @@ -214,6 +209,9 @@ public OpenSearchRequestBuilder createRequestBuilder() { operationsForRequestBuilder.forEach( operation -> ((OSRequestBuilderAction) operation.action()).apply(newRequestBuilder)); } + if (aggSpec != null) { + aggSpec.buildAction().apply(newRequestBuilder); + } return newRequestBuilder; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java index 6ca25b7e9b7..2ff0dfa4a50 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java @@ -29,6 +29,7 @@ import org.opensearch.sql.expression.function.FunctionName; import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine.ScriptEngineType; import org.opensearch.sql.opensearch.storage.script.core.ExpressionScript; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.ExistsQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.LikeQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.LuceneQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.NestedQuery; @@ -86,6 +87,8 @@ public ScriptQueryUnSupportedException(String message) { .put(BuiltinFunctionName.WILDCARD_QUERY.getName(), new WildcardQuery()) .put(BuiltinFunctionName.WILDCARDQUERY.getName(), new WildcardQuery()) .put(BuiltinFunctionName.NESTED.getName(), new NestedQuery()) + .put(BuiltinFunctionName.IS_NULL.getName(), new ExistsQuery(true /* negated */)) + .put(BuiltinFunctionName.IS_NOT_NULL.getName(), new ExistsQuery(false)) .build(); /** diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/ExistsQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/ExistsQuery.java new file mode 100644 index 00000000000..5822f2f416a --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/ExistsQuery.java @@ -0,0 +1,69 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.filter.lucene; + +import static org.opensearch.sql.analysis.NestedAnalyzer.isNestedFunction; + +import lombok.RequiredArgsConstructor; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.ReferenceExpression; + +/** + * Lucene query that builds a native {@code exists} DSL fragment for {@code IS NULL} / {@code IS NOT + * NULL} predicates. + * + *

    This replaces the previous behavior of serializing these unary predicates as compounded script + * queries. The native {@code exists} query is cheaper, AOSS / serverless compatible, and the + * expected DSL shape downstream consumers look for. + * + *

    Unlike most {@link LuceneQuery} subclasses this predicate family is unary (a single reference + * argument) rather than the standard {ref, literal} pair, so this class overrides both {@link + * #canSupport(FunctionExpression)} and {@link #build(FunctionExpression)}. + * + *

    Nested-field predicates are intentionally NOT supported here: OpenSearch DSL does not handle + * {@code IS_NULL} / {@code IS_NOT_NULL} on nested fields correctly (see the equivalent guard in + * {@code PredicateAnalyzer} for the Calcite path). When the reference is a nested function, {@link + * #canSupport} returns {@code false} and {@link + * org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder} falls back to the script + * query path, preserving correctness. + */ +@RequiredArgsConstructor +public class ExistsQuery extends LuceneQuery { + + /** When true, the predicate is {@code IS NULL} and the exists query is wrapped in must_not. */ + private final boolean negated; + + @Override + public boolean canSupport(FunctionExpression func) { + return func.getArguments().size() == 1 + && func.getArguments().get(0) instanceof ReferenceExpression + && !isNestedFunction(func.getArguments().get(0)); + } + + /** + * Unary IS NULL / IS NOT NULL has no {@code arg[1]}, so we must never route through {@link + * org.opensearch.sql.opensearch.storage.script.filter.lucene.NestedQuery#buildNested} — that path + * reads {@code func.getArguments().get(1)} and would throw. Returning {@code false} here forces + * {@code FilterQueryBuilder} to fall back to the script-query path for nested-field predicates. + */ + @Override + public boolean isNestedPredicate(FunctionExpression func) { + return false; + } + + @Override + public QueryBuilder build(FunctionExpression func) { + ReferenceExpression ref = (ReferenceExpression) func.getArguments().get(0); + String fieldName = ref.getRawPath(); + QueryBuilder existsQuery = QueryBuilders.existsQuery(fieldName); + if (negated) { + return QueryBuilders.boolQuery().mustNot(existsQuery); + } + return existsQuery; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/MergeRuleHelper.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/MergeRuleHelper.java index b2b851adec7..6cc6f1803a7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/MergeRuleHelper.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/MergeRuleHelper.java @@ -12,7 +12,7 @@ public class MergeRuleHelper { private static final List RULES = List.of( - new DeepMergeRule(), new LatestRule() // must come last + new DeepMergeRule(), new TextKeywordConflictRule(), new LatestRule() // must come last ); public static MergeRule selectRule(OpenSearchDataType source, OpenSearchDataType target) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/TextKeywordConflictRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/TextKeywordConflictRule.java new file mode 100644 index 00000000000..04d98e1a60f --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/TextKeywordConflictRule.java @@ -0,0 +1,72 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.util.MergeRules; + +import java.util.Map; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType; +import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; + +/** + * Merge rule for text/keyword type conflicts across indices. When a field is text in one index and + * keyword in another, or text-with-keyword-subfield in one and text-without in another, we merge to + * text WITHOUT keyword subfields. This forces _source retrieval instead of doc_values, which works + * universally across all shards regardless of the actual field type. + * + *

    See GitHub issue #4659. + */ +public class TextKeywordConflictRule implements MergeRule { + + @Override + public boolean isMatch(OpenSearchDataType source, OpenSearchDataType target) { + if (source == null || target == null) { + return false; + } + MappingType sourceMapping = source.getMappingType(); + MappingType targetMapping = target.getMappingType(); + if (sourceMapping == null || targetMapping == null) { + return false; + } + // Match when one is text and the other is keyword + if (isTextLike(sourceMapping) && isKeyword(targetMapping)) { + return true; + } + if (isKeyword(sourceMapping) && isTextLike(targetMapping)) { + return true; + } + // Match when both are text but one has keyword subfields and the other does not + if (isTextLike(sourceMapping) && isTextLike(targetMapping)) { + boolean sourceHasKeywordSub = hasKeywordSubField(source); + boolean targetHasKeywordSub = hasKeywordSubField(target); + return sourceHasKeywordSub != targetHasKeywordSub; + } + return false; + } + + @Override + public void mergeInto( + String key, OpenSearchDataType source, Map target) { + // Always merge to text WITHOUT keyword subfields. + // This forces _source retrieval, which works for both text and keyword fields. + target.put(key, OpenSearchTextType.of()); + } + + private static boolean isTextLike(MappingType mappingType) { + return mappingType == MappingType.Text || mappingType == MappingType.MatchOnlyText; + } + + private static boolean isKeyword(MappingType mappingType) { + return mappingType == MappingType.Keyword; + } + + private static boolean hasKeywordSubField(OpenSearchDataType type) { + if (type instanceof OpenSearchTextType textType) { + return textType.getFields().values().stream() + .anyMatch(f -> f.getMappingType() == MappingType.Keyword); + } + return false; + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java index 81261aa7a70..5885db1427b 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java @@ -66,6 +66,7 @@ import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.data.model.ExprIntegerValue; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; @@ -244,7 +245,12 @@ void get_index_mappings_with_IOException() { @Test void get_index_mappings_with_index_patterns() { mockNodeClientIndicesMappings("", null); - assertThrows(IndexNotFoundException.class, () -> client.getIndexMappings("test*")); + ErrorReport report = assertThrows(ErrorReport.class, () -> client.getIndexMappings("test*")); + assertTrue( + report.getMessage().contains("test*") && report.getMessage().contains("no such index"), + "expected index-not-found error message \"" + + report.getMessage() + + "\" to resemble \"no such index [index]\""); } @Test @@ -252,7 +258,7 @@ void get_index_mappings_with_non_exist_index() { when(nodeClient.admin().indices().prepareGetMappings(any()).setLocal(anyBoolean()).get()) .thenThrow(IndexNotFoundException.class); - assertThrows(IndexNotFoundException.class, () -> client.getIndexMappings("non_exist_index")); + assertThrows(ErrorReport.class, () -> client.getIndexMappings("non_exist_index")); } @Test @@ -493,6 +499,66 @@ void ml() { assertNotNull(client.getNodeClient()); } + @Test + void get_index_mappings_error_message_includes_single_index() { + String underlyingError = "Connection timeout"; + when(nodeClient.admin().indices()).thenThrow(new RuntimeException(underlyingError)); + + IllegalStateException exception = + assertThrows(IllegalStateException.class, () -> client.getIndexMappings("test_index")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("test_index")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + + @Test + void get_index_mappings_error_message_includes_multiple_indices() { + String underlyingError = "Access denied"; + when(nodeClient.admin().indices()).thenThrow(new RuntimeException(underlyingError)); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, + () -> client.getIndexMappings("index1", "index2", "index3")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("index1")), + () -> assertTrue(exception.getMessage().contains("index2")), + () -> assertTrue(exception.getMessage().contains("index3")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + + @Test + void get_index_max_result_windows_error_message_includes_single_index() { + String underlyingError = "Network error"; + when(nodeClient.admin().indices()).thenThrow(new RuntimeException(underlyingError)); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, () -> client.getIndexMaxResultWindows("test_index")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("test_index")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + + @Test + void get_index_max_result_windows_error_message_includes_multiple_indices() { + String underlyingError = "Permission denied"; + when(nodeClient.admin().indices()).thenThrow(new RuntimeException(underlyingError)); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, + () -> client.getIndexMaxResultWindows("logs-2024", "metrics-2024")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("logs-2024")), + () -> assertTrue(exception.getMessage().contains("metrics-2024")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + public void mockNodeClientIndicesMappings(String indexName, String mappings) { GetMappingsResponse mockResponse = mock(GetMappingsResponse.class); MappingMetadata emptyMapping = mock(MappingMetadata.class); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java index afd210da1ff..6b101a0107a 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java @@ -531,6 +531,70 @@ void ml_with_exception() { assertEquals(Optional.empty(), client.getNodeClient()); } + @Test + void get_index_mappings_error_message_includes_single_index() throws IOException { + String underlyingError = "Network timeout"; + when(restClient.indices().getMapping(any(GetMappingsRequest.class), any())) + .thenThrow(new IOException(underlyingError)); + + IllegalStateException exception = + assertThrows(IllegalStateException.class, () -> client.getIndexMappings("test_index")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("test_index")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + + @Test + void get_index_mappings_error_message_includes_multiple_indices() throws IOException { + String underlyingError = "Connection refused"; + when(restClient.indices().getMapping(any(GetMappingsRequest.class), any())) + .thenThrow(new IOException(underlyingError)); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, + () -> client.getIndexMappings("index1", "index2", "index3")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("index1")), + () -> assertTrue(exception.getMessage().contains("index2")), + () -> assertTrue(exception.getMessage().contains("index3")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + + @Test + void get_index_max_result_windows_error_message_includes_single_index() throws IOException { + String underlyingError = "Authentication failed"; + when(restClient.indices().getSettings(any(GetSettingsRequest.class), any())) + .thenThrow(new IOException(underlyingError)); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, () -> client.getIndexMaxResultWindows("test_index")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("test_index")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + + @Test + void get_index_max_result_windows_error_message_includes_multiple_indices() throws IOException { + String underlyingError = "Timeout"; + when(restClient.indices().getSettings(any(GetSettingsRequest.class), any())) + .thenThrow(new IOException(underlyingError)); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, + () -> client.getIndexMaxResultWindows("logs-2024", "metrics-2024")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("logs-2024")), + () -> assertTrue(exception.getMessage().contains("metrics-2024")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + private Map mockFieldMappings(String indexName, String mappings) throws IOException { return ImmutableMap.of(indexName, IndexMetadata.fromXContent(createParser(mappings)).mapping()); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 0734613e522..031b9243f38 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -234,6 +234,9 @@ public void constructIp() { public void constructBoolean() { assertAll( () -> assertEquals(booleanValue(true), tupleValue("{\"boolV\":true}").get("boolV")), + () -> assertEquals(booleanValue(false), tupleValue("{\"boolV\":false}").get("boolV")), + () -> assertEquals(booleanValue(true), tupleValue("{\"boolV\":1}").get("boolV")), + () -> assertEquals(booleanValue(false), tupleValue("{\"boolV\":0}").get("boolV")), () -> assertEquals(booleanValue(true), constructFromObject("boolV", true)), () -> assertEquals(booleanValue(true), constructFromObject("boolV", "true")), () -> assertEquals(booleanValue(true), constructFromObject("boolV", 1)), diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java index 572f748fd03..733c2de5213 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java @@ -1245,4 +1245,233 @@ void search_complementedPointsWithNullAsFalse_generatesExistsAndNotInQuery() """, result.toString()); } + + @Test + void search_complementedPointsWithNullAsUnknown_generatesExistsAndNotInQuery() + throws ExpressionNotAnalyzableException { + // Simulates: a NOT IN (12, 13) + // Calcite represents this as SEARCH($0, Sarg[...; NULL AS UNKNOWN]) with complemented points + // SQL three-valued logic: NULL NOT IN (...) evaluates to UNKNOWN (not TRUE), + // so null rows must be excluded. + Sarg sarg = + Sarg.of( + RexUnknownAs.UNKNOWN, + ImmutableRangeSet.builder() + .add(Range.lessThan(BigDecimal.valueOf(12))) + .add(Range.open(BigDecimal.valueOf(12), BigDecimal.valueOf(13))) + .add(Range.greaterThan(BigDecimal.valueOf(13))) + .build()); + RexNode sargLiteral = + builder.makeSearchArgumentLiteral(sarg, typeFactory.createSqlType(SqlTypeName.DECIMAL)); + RexNode call = builder.makeCall(SqlStdOperatorTable.SEARCH, field1, sargLiteral); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must" : [ + { + "bool" : { + "must_not" : [ + { + "terms" : { + "a" : [ + 12.0, + 13.0 + ], + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "exists" : { + "field" : "a", + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } + + @Test + void notLike_keywordField_generatesBoolWithExistsAndMustNot() + throws ExpressionNotAnalyzableException { + // NOT(LIKE(field, pattern)) should generate bool query with must(exists) + mustNot(wildcard) + List arguments = + Arrays.asList(field2, builder.makeLiteral("%Hi%"), builder.makeLiteral(true)); + RexNode likeCall = + PPLFuncImpTable.INSTANCE.resolve(builder, "like", arguments.toArray(new RexNode[0])); + RexNode notCall = builder.makeCall(SqlStdOperatorTable.NOT, likeCall); + QueryBuilder result = PredicateAnalyzer.analyze(notCall, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must" : [ + { + "exists" : { + "field" : "b", + "boost" : 1.0 + } + } + ], + "must_not" : [ + { + "wildcard" : { + "b.keyword" : { + "wildcard" : "*Hi*", + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } + + @Test + void notGreaterThan_generatesExistsAndMustNotRange() throws ExpressionNotAnalyzableException { + // NOT(a > 12) should generate bool query with must(exists) + mustNot(range) + RexNode gtCall = builder.makeCall(SqlStdOperatorTable.GREATER_THAN, field1, numericLiteral); + RexNode notCall = builder.makeCall(SqlStdOperatorTable.NOT, gtCall); + QueryBuilder result = PredicateAnalyzer.analyze(notCall, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must" : [ + { + "exists" : { + "field" : "a", + "boost" : 1.0 + } + } + ], + "must_not" : [ + { + "range" : { + "a" : { + "from" : 12, + "to" : null, + "include_lower" : false, + "include_upper" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } + + @Test + void notIsNotNull_generatesOnlyMustNotExists() throws ExpressionNotAnalyzableException { + // NOT(IS_NOT_NULL(a)) = IS_NULL(a) should generate must_not(exists) WITHOUT an exists in must + RexNode isNotNullCall = builder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, field1); + RexNode notCall = builder.makeCall(SqlStdOperatorTable.NOT, isNotNullCall); + QueryBuilder result = PredicateAnalyzer.analyze(notCall, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must_not" : [ + { + "exists" : { + "field" : "a", + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } + + @Test + void notIsTrue_generatesOnlyMustNotTerm() throws ExpressionNotAnalyzableException { + // NOT(IS_TRUE(e)) should generate must_not(term(e, true)) WITHOUT an exists filter + RexNode isTrueCall = builder.makeCall(SqlStdOperatorTable.IS_TRUE, field5); + RexNode notCall = builder.makeCall(SqlStdOperatorTable.NOT, isTrueCall); + QueryBuilder result = PredicateAnalyzer.analyze(notCall, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must_not" : [ + { + "term" : { + "e" : { + "value" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } + + @Test + void notIsFalse_generatesOnlyMustNotTerm() throws ExpressionNotAnalyzableException { + // NOT(IS_FALSE(e)) should generate must_not(term(e, false)) WITHOUT an exists filter + RexNode isFalseCall = builder.makeCall(SqlStdOperatorTable.IS_FALSE, field5); + RexNode notCall = builder.makeCall(SqlStdOperatorTable.NOT, isFalseCall); + QueryBuilder result = PredicateAnalyzer.analyze(notCall, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must_not" : [ + { + "term" : { + "e" : { + "value" : false, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngineTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngineTest.java index 38f2ae495e0..fa04395e065 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngineTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngineTest.java @@ -11,12 +11,14 @@ import static org.opensearch.sql.analysis.DataSourceSchemaIdentifierNameResolver.DEFAULT_DATASOURCE_NAME; import static org.opensearch.sql.utils.SystemIndexUtils.TABLE_INFO; +import java.util.Collection; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.DataSourceSchemaName; import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.expression.function.FunctionResolver; import org.opensearch.sql.opensearch.client.OpenSearchClient; import org.opensearch.sql.opensearch.storage.system.OpenSearchSystemIndex; import org.opensearch.sql.storage.Table; @@ -36,6 +38,15 @@ public void getTable() { assertAll(() -> assertNotNull(table), () -> assertTrue(table instanceof OpenSearchIndex)); } + @Test + public void getFunctionsReturnsVectorSearchResolver() { + OpenSearchStorageEngine engine = new OpenSearchStorageEngine(client, settings); + Collection functions = engine.getFunctions(); + assertTrue( + functions.stream().anyMatch(f -> f instanceof VectorSearchTableFunctionResolver), + "getFunctions() should contain a VectorSearchTableFunctionResolver"); + } + @Test public void getSystemTable() { OpenSearchStorageEngine engine = new OpenSearchStorageEngine(client, settings); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchIndexTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchIndexTest.java new file mode 100644 index 00000000000..6a9a76a48f0 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchIndexTest.java @@ -0,0 +1,266 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.when; + +import com.google.common.collect.ImmutableMap; +import java.util.LinkedHashMap; +import java.util.Map; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType; +import org.opensearch.sql.opensearch.mapping.IndexMapping; + +@ExtendWith(MockitoExtension.class) +class VectorSearchIndexTest { + + @Mock private OpenSearchClient client; + + @Mock private Settings settings; + + @Mock private IndexMapping indexMapping; + + @Test + void buildKnnQueryJsonTopK() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f, 2.0f, 3.0f}, + Map.of("k", "5")); + + String json = index.buildKnnQueryJson(); + assertEquals("{\"knn\":{\"embedding\":{\"vector\":[1.0,2.0,3.0],\"k\":5}}}", json); + } + + @Test + void buildKnnQueryJsonRadialMaxDistance() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f, 2.0f}, + Map.of("max_distance", "10.5")); + + String json = index.buildKnnQueryJson(); + assertEquals("{\"knn\":{\"embedding\":{\"vector\":[1.0,2.0],\"max_distance\":10.5}}}", json); + } + + @Test + void buildKnnQueryJsonRadialMinScore() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {0.5f}, + Map.of("min_score", "0.8")); + + String json = index.buildKnnQueryJson(); + assertEquals("{\"knn\":{\"embedding\":{\"vector\":[0.5],\"min_score\":0.8}}}", json); + } + + @Test + void buildKnnQueryJsonNestedFieldName() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "doc.embedding", + new float[] {1.0f, 2.0f}, + Map.of("k", "10")); + + String json = index.buildKnnQueryJson(); + assertTrue(json.contains("\"doc.embedding\""), "Should contain nested field name with dot"); + } + + @Test + void buildKnnQueryJsonMultiElementVector() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f, -2.5f, 0.0f, 3.14f, 100.0f}, + Map.of("k", "3")); + + String json = index.buildKnnQueryJson(); + assertTrue( + json.contains("[1.0,-2.5,0.0,3.14,100.0]"), + "Should contain all vector components with correct comma separation"); + } + + @Test + void buildKnnQueryJsonSingleElementVector() { + VectorSearchIndex index = + new VectorSearchIndex( + client, settings, "test-index", "embedding", new float[] {42.0f}, Map.of("k", "1")); + + String json = index.buildKnnQueryJson(); + assertTrue(json.contains("[42.0]"), "Should contain single-element vector"); + } + + @Test + void buildKnnQueryJsonNumericOptionRenderedUnquoted() { + LinkedHashMap options = new LinkedHashMap<>(); + options.put("k", "5"); + + VectorSearchIndex index = + new VectorSearchIndex( + client, settings, "test-index", "embedding", new float[] {1.0f}, options); + + String json = index.buildKnnQueryJson(); + assertTrue(json.contains("\"k\":5"), "Numeric option should be unquoted"); + } + + @Test + void buildKnnQueryJsonNonNumericOptionRenderedQuoted() { + LinkedHashMap options = new LinkedHashMap<>(); + options.put("k", "5"); + options.put("method", "hnsw"); + + VectorSearchIndex index = + new VectorSearchIndex( + client, settings, "test-index", "embedding", new float[] {1.0f}, options); + + String json = index.buildKnnQueryJson(); + assertTrue(json.contains("\"method\":\"hnsw\""), "Non-numeric option should be quoted"); + assertTrue(json.contains("\"k\":5"), "Numeric option should be unquoted"); + } + + @Test + void buildKnnQueryJsonWithFilterEmbeds() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f, 2.0f}, + Map.of("k", "5"), + FilterType.EFFICIENT); + + String filterJson = "{\"term\":{\"city\":{\"value\":\"Miami\"}}}"; + String json = index.buildKnnQueryJson(filterJson); + + assertTrue(json.contains("\"filter\""), "Should contain filter field"); + assertTrue(json.contains("\"term\""), "Should contain the filter content"); + assertTrue(json.contains("\"k\":5"), "Should still contain k"); + assertTrue(json.contains("\"vector\":[1.0,2.0]"), "Should contain vector"); + } + + @Test + void buildKnnQueryJsonWithFilterRadial() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f}, + Map.of("max_distance", "10.5"), + FilterType.EFFICIENT); + + String filterJson = "{\"range\":{\"rating\":{\"gte\":4.0}}}"; + String json = index.buildKnnQueryJson(filterJson); + + assertTrue(json.contains("\"max_distance\":10.5"), "Should contain max_distance"); + assertTrue(json.contains("\"filter\""), "Should contain filter"); + } + + @Test + void buildKnnQueryJsonNullFilterProducesBaseJson() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f}, + Map.of("k", "5"), + null); + + String json = index.buildKnnQueryJson(null); + String baseJson = index.buildKnnQueryJson(); + + assertEquals(baseJson, json, "null filter should produce same JSON as no-arg version"); + assertFalse(json.contains("\"filter\""), "Should not contain filter field"); + } + + @Test + void buildKnnQueryJsonExcludesFilterType() { + LinkedHashMap options = new LinkedHashMap<>(); + options.put("k", "5"); + + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f}, + options, + FilterType.EFFICIENT); + + String json = index.buildKnnQueryJson(); + assertFalse(json.contains("filter_type"), "filter_type should not appear in knn JSON"); + assertTrue(json.contains("\"k\":5"), "k should still be present"); + } + + @Test + void isInstanceOfOpenSearchIndex() { + VectorSearchIndex index = + new VectorSearchIndex( + client, settings, "test-index", "embedding", new float[] {1.0f}, Map.of("k", "5")); + assertTrue(index instanceof OpenSearchIndex); + } + + @Test + void createScanBuilderRejectsIndexWithScoreField() { + // A mapping that declares a user field named _score cannot coexist with the synthetic + // v._score column exposed by vectorSearch(); the guard in createScanBuilder should reject + // it with a clear, user-facing error. + lenient() + .when(settings.getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE)) + .thenReturn(TimeValue.timeValueMinutes(1)); + when(indexMapping.getFieldMappings()) + .thenReturn(Map.of("_score", OpenSearchDataType.of(MappingType.Float))); + when(client.getIndexMappings("test-index")) + .thenReturn(ImmutableMap.of("test-index", indexMapping)); + + VectorSearchIndex index = + new VectorSearchIndex( + client, settings, "test-index", "embedding", new float[] {1.0f}, Map.of("k", "5")); + + IllegalArgumentException ex = + assertThrows(IllegalArgumentException.class, index::createScanBuilder); + assertTrue( + ex.getMessage().contains("_score"), + "error message should mention the colliding _score field"); + assertTrue( + ex.getMessage().contains("collides"), + "error message should describe the collision, got: " + ex.getMessage()); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementationTest.java new file mode 100644 index 00000000000..7bd64838876 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementationTest.java @@ -0,0 +1,778 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.storage.capability.KnnPluginCapability; +import org.opensearch.sql.storage.Table; + +@ExtendWith(MockitoExtension.class) +class VectorSearchTableFunctionImplementationTest { + + @Mock private OpenSearchClient client; + + @Mock private Settings settings; + + // No-op capability — tests in this class don't exercise the k-NN plugin probe. + // Dedicated tests for the probe live in KnnPluginCapabilityTest. + private final KnnPluginCapability knnCapability = + org.mockito.Mockito.mock(KnnPluginCapability.class); + + @Test + void testValueOfThrows() { + VectorSearchTableFunctionImplementation impl = createImpl(); + UnsupportedOperationException ex = + assertThrows(UnsupportedOperationException.class, () -> impl.valueOf()); + assertTrue(ex.getMessage().contains("only supported in FROM clause")); + } + + @Test + void testType() { + VectorSearchTableFunctionImplementation impl = createImpl(); + assertEquals(ExprCoreType.STRUCT, impl.type()); + } + + @Test + void testToString() { + VectorSearchTableFunctionImplementation impl = createImpl(); + String str = impl.toString(); + assertTrue(str.contains("vectorsearch")); + assertTrue(str.contains("table=")); + assertTrue(str.contains("my-index")); + } + + @Test + void testApplyArguments() { + VectorSearchTableFunctionImplementation impl = createImpl(); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testApplyArgumentsDoesNotProbeKnnCapability() { + // Contract: applyArguments() runs during analysis (including _explain) and must NOT invoke + // the k-NN plugin probe. The probe is deferred to scan open() so pluginless clusters can + // still explain and validate vectorSearch() queries locally. + KnnPluginCapability observingCapability = org.mockito.Mockito.mock(KnnPluginCapability.class); + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0]")), + DSL.namedArgument("option", DSL.literal("k=5"))); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, observingCapability); + impl.applyArguments(); + org.mockito.Mockito.verify(observingCapability, org.mockito.Mockito.never()).requireInstalled(); + } + + @Test + void testApplyArgumentsWithBracketedVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0, 3.0]", "k=5"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testApplyArgumentsWithUnbracketedVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "1.0, 2.0, 3.0", "k=5"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testUnknownOptionKeyThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=10,method.ef_search=100"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Unknown option key")); + assertTrue(ex.getMessage().contains("method.ef_search")); + } + + @Test + void testApplyArgumentsWithMaxDistance() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "max_distance=10.0"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testApplyArgumentsWithMinScore() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "min_score=0.5"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testUnknownOptionKeyOnlyThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "not_a_key=100"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Unknown option key")); + } + + @Test + void testParseOptionsMultiple() { + Map opts = + VectorSearchTableFunctionImplementation.parseOptions("k=5,max_distance=10.0"); + assertEquals("5", opts.get("k")); + assertEquals("10.0", opts.get("max_distance")); + } + + @Test + void testMalformedOptionSegmentThrows() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("k=5,badoption")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + @Test + void testDuplicateOptionKeyThrows() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("k=5,k=10")); + assertTrue(ex.getMessage().contains("Duplicate option key")); + } + + @Test + void testNoRequiredOptionThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", ""); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Missing required option")); + } + + @Test + void testEmptyVectorThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must not be empty")); + } + + @Test + void testMalformedVectorComponentThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, abc, 3.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid vector component")); + } + + @Test + void testNonFiniteVectorComponentThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, Infinity, 3.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be a finite number")); + } + + @Test + void testMissingArgumentThrows() { + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0]"))); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertEquals("Missing required argument: option", ex.getMessage()); + } + + @Test + void testInvalidFieldNameThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "field\"injection", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid field name")); + } + + @Test + void testNestedFieldNameAllowed() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "doc.embedding", "[1.0, 2.0]", "k=5"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testNonNumericKThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=abc"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be an integer")); + } + + @Test + void testNonNumericMaxDistanceThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "max_distance=notanumber"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be a number")); + } + + @Test + void testInfiniteMinScoreThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "min_score=Infinity"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be a finite number")); + } + + @Test + void testMutualExclusivityKAndMaxDistanceThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=5,max_distance=10.0"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Only one of")); + } + + @Test + void testMutualExclusivityKAndMinScoreThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=5,min_score=0.5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Only one of")); + } + + @Test + void testMutualExclusivityAllThreeThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs( + "my-index", "embedding", "[1.0, 2.0]", "k=5,max_distance=10.0,min_score=0.5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Only one of")); + } + + @Test + void testKTooSmallThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=0"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("k must be between 1 and 10000")); + } + + @Test + void testKTooLargeThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=10001"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("k must be between 1 and 10000")); + } + + @Test + void testKBoundaryValuesAllowed() { + // k=1 should work + VectorSearchTableFunctionImplementation impl1 = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=1"); + assertTrue(impl1.applyArguments() instanceof VectorSearchIndex); + + // k=10000 should work + VectorSearchTableFunctionImplementation impl2 = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=10000"); + assertTrue(impl2.applyArguments() instanceof VectorSearchIndex); + } + + @Test + void testNonNamedArgThrows() { + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = List.of(DSL.literal("my-index")); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("requires named arguments")); + } + + @Test + void testNullArgNameThrows() { + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument(null, DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0]")), + DSL.namedArgument("option", DSL.literal("k=5"))); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("requires named arguments")); + } + + @Test + void testNaNVectorComponentThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, NaN, 3.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be a finite number")); + } + + @Test + void testEmptyOptionKeyThrows() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("=value")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + @Test + void testEmptyOptionValueThrows() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("key=")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + @Test + void testNegativeKThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=-1"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("k must be between 1 and 10000")); + } + + @Test + void testNaNMaxDistanceThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "max_distance=NaN"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be a finite number")); + } + + @Test + void testNaNMinScoreThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "min_score=NaN"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be a finite number")); + } + + @Test + void testCaseInsensitiveArgLookup() { + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument("TABLE", DSL.literal("my-index")), + DSL.namedArgument("FIELD", DSL.literal("embedding")), + DSL.namedArgument("VECTOR", DSL.literal("[1.0, 2.0]")), + DSL.namedArgument("OPTION", DSL.literal("k=5"))); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testInvalidFilterTypeRejects() { + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0]")), + DSL.namedArgument("option", DSL.literal("k=5,filter_type=invalid"))); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, impl::applyArguments); + assertTrue(ex.getMessage().contains("filter_type must be one of")); + } + + @Test + void testFilterTypePostAccepted() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=5,filter_type=post"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testFilterTypeEfficientAccepted() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=5,filter_type=efficient"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testParseOptionsPreservesFilterTypeValue() { + Map options = + VectorSearchTableFunctionImplementation.parseOptions("k=5,filter_type=post"); + assertEquals("post", options.get("filter_type")); + } + + @Test + void applyArguments_rejectsInvalidTableName() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("idx\"; DROP", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid table name")); + assertTrue( + ex.getMessage() + .contains("must contain only alphanumeric characters, dots, underscores, or hyphens")); + } + + @Test + void applyArguments_rejectsAllRoutingTarget() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("_all", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid table name")); + assertTrue(ex.getMessage().contains("_all")); + } + + @Test + void applyArguments_rejectsSingleDotTable() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs(".", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid table name")); + } + + @Test + void applyArguments_rejectsDoubleDotTable() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("..", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid table name")); + } + + @Test + void applyArguments_rejectsWildcardTableWithDedicatedMessage() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("sql_vector_*", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid table name")); + assertTrue(ex.getMessage().contains("wildcards ('*')")); + assertTrue(ex.getMessage().contains("single concrete index")); + } + + @Test + void applyArguments_rejectsBareStarTableWithDedicatedMessage() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("*", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("wildcards ('*')")); + } + + @Test + void applyArguments_rejectsMultiTargetTableWithDedicatedMessage() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("idx_a,idx_b", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid table name")); + assertTrue(ex.getMessage().contains("multi-target")); + assertTrue(ex.getMessage().contains("single concrete index")); + } + + @Test + void applyArguments_rejectsMidNameStarTable() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("foo*bar", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("wildcards ('*')")); + } + + @Test + void validateNamedArgs_rejectsDuplicateNames() { + // Two occurrences of "table" reach the Implementation layer directly (bypassing the resolver). + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument("table", DSL.literal("a")), + DSL.namedArgument("table", DSL.literal("b")), + DSL.namedArgument("vector", DSL.literal("[1.0]")), + DSL.namedArgument("option", DSL.literal("k=5"))); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Duplicate argument name")); + assertTrue(ex.getMessage().contains("table")); + } + + // ── Option parsing: empty value, whitespace, unknown keys ──────────── + + @Test + void parseOptions_rejectsEmptyValue() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("k=")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + @Test + void parseOptions_rejectsEmptyValueInMidSegment() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("k=,filter_type=post")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + @Test + void parseOptions_trimsWhitespaceAroundKeyAndValue() { + Map options = + VectorSearchTableFunctionImplementation.parseOptions(" k = 5 , filter_type = post "); + assertEquals("5", options.get("k")); + assertEquals("post", options.get("filter_type")); + } + + @Test + void applyArguments_rejectsUnknownOptionKey() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs( + "my-index", "embedding", "[1.0, 2.0]", "k=5,method_parameters.ef_search=100"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Unknown option key")); + assertTrue(ex.getMessage().contains("method_parameters.ef_search")); + } + + // ── Vector parsing: non-comma separator ───────────────────────────── + + @Test + void applyArguments_rejectsSemicolonSeparatorInVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0;2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("vector=")); + assertTrue(ex.getMessage().contains("comma-separated")); + } + + @Test + void applyArguments_rejectsColonSeparatorInVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0:2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("vector=")); + } + + @Test + void applyArguments_rejectsPipeSeparatorInVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0|2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("vector=")); + } + + // ── Option bounds: negative k, min_score, max_distance ────────────── + + @Test + void applyArguments_negativeKMessageCitesRange() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=-3"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("1")); + assertTrue(ex.getMessage().contains("10000")); + } + + @Test + void applyArguments_rejectsNegativeMinScore() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "min_score=-0.5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("min_score")); + assertTrue(ex.getMessage().contains("non-negative")); + } + + @Test + void applyArguments_rejectsNegativeMaxDistance() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "max_distance=-1.0"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("max_distance")); + assertTrue(ex.getMessage().contains("non-negative")); + } + + @Test + void applyArguments_acceptsZeroMinScore() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "min_score=0"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void applyArguments_acceptsZeroMaxDistance() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "max_distance=0"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + // ── Vector parsing: trailing / empty components (PR #5381 review) ───── + + @Test + void applyArguments_rejectsTrailingCommaInVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0,2.0,]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid vector component")); + assertTrue(ex.getMessage().contains("trailing or consecutive commas")); + } + + @Test + void applyArguments_rejectsConsecutiveCommasInVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0,,2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid vector component")); + assertTrue(ex.getMessage().contains("trailing or consecutive commas")); + } + + @Test + void applyArguments_rejectsLeadingCommaInVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[,1.0,2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid vector component")); + } + + // ── Option parsing: empty segments (PR #5381 review) ───────────────── + + @Test + void parseOptions_rejectsTrailingEmptySegment() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("k=5,")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + assertTrue(ex.getMessage().contains("trailing or consecutive commas")); + } + + @Test + void parseOptions_rejectsLeadingEmptySegment() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions(",k=5")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + @Test + void parseOptions_rejectsConsecutiveCommas() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("k=5,,filter_type=post")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + // ── Unknown-key error lists supported keys in stable order (PR #5381 review) ── + + @Test + void applyArguments_unknownOptionKeyErrorListsSupportedKeysInStableOrder() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=5,bogus=1"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + // Match the rendered list literal (e.g. "[k, max_distance, min_score, filter_type]") rather + // than searching for the substring "k", which would match the first "k" in "Unknown option + // key" and reduce the assertion to a tautology. + assertTrue( + ex.getMessage().contains("[k, max_distance, min_score, filter_type]"), + "expected stable key order in error; got: " + ex.getMessage()); + } + + @Test + void parseOptions_emptyStringReturnsEmptyMap() { + // The wholly empty option string is explicitly allowed through parseOptions so it flows to + // the "Missing required option" gate in validateOptions. Pins that contract. + Map opts = VectorSearchTableFunctionImplementation.parseOptions(""); + assertTrue(opts.isEmpty()); + } + + private VectorSearchTableFunctionImplementation createImpl() { + return createImplWithArgs("my-index", "embedding", "[1.0, 2.0, 3.0]", "k=5"); + } + + private VectorSearchTableFunctionImplementation createImplWithArgs( + String table, String field, String vector, String option) { + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument("table", DSL.literal(table)), + DSL.namedArgument("field", DSL.literal(field)), + DSL.namedArgument("vector", DSL.literal(vector)), + DSL.namedArgument("option", DSL.literal(option))); + return new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionResolverTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionResolverTest.java new file mode 100644 index 00000000000..c6fece7bf32 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionResolverTest.java @@ -0,0 +1,208 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import java.util.List; +import java.util.stream.Collectors; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.function.FunctionBuilder; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.expression.function.FunctionProperties; +import org.opensearch.sql.expression.function.FunctionSignature; +import org.opensearch.sql.expression.function.TableFunctionImplementation; +import org.opensearch.sql.opensearch.client.OpenSearchClient; + +@ExtendWith(MockitoExtension.class) +class VectorSearchTableFunctionResolverTest { + + @Mock private OpenSearchClient client; + + @Mock private Settings settings; + + @Mock private FunctionProperties functionProperties; + + @Test + void testResolve() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + List expressions = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0, 3.0]")), + DSL.namedArgument("option", DSL.literal("k=5"))); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + + Pair resolution = resolver.resolve(functionSignature); + + assertEquals(functionName, resolution.getKey().getFunctionName()); + assertEquals(functionName, resolver.getFunctionName()); + assertEquals(List.of(STRING, STRING, STRING, STRING), resolution.getKey().getParamTypeList()); + + TableFunctionImplementation impl = + (TableFunctionImplementation) resolution.getValue().apply(functionProperties, expressions); + assertTrue(impl instanceof VectorSearchTableFunctionImplementation); + } + + @Test + void testWrongArgumentCount() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + List expressions = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding"))); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + + Pair resolution = resolver.resolve(functionSignature); + FunctionBuilder builder = resolution.getValue(); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> builder.apply(functionProperties, expressions)); + assertTrue(ex.getMessage().contains("requires 4 arguments")); + } + + @Test + void testTooManyArguments() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + List expressions = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0]")), + DSL.namedArgument("option", DSL.literal("k=5")), + DSL.namedArgument("extra", DSL.literal("unexpected"))); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + + Pair resolution = resolver.resolve(functionSignature); + FunctionBuilder builder = resolution.getValue(); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> builder.apply(functionProperties, expressions)); + assertTrue(ex.getMessage().contains("requires 4 arguments")); + } + + @Test + void testZeroArguments() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + List expressions = List.of(); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + + Pair resolution = resolver.resolve(functionSignature); + FunctionBuilder builder = resolution.getValue(); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> builder.apply(functionProperties, expressions)); + assertTrue(ex.getMessage().contains("requires 4 arguments")); + } + + @Test + void resolve_rejectsPositionalArgument() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + // One positional literal mixed with three named arguments. Arity passes, but the resolver + // must reject this before planning so the SQL layer returns a clean 400 rather than a 200 + // with zero rows. + List expressions = + List.of( + DSL.literal("my-index"), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0]")), + DSL.namedArgument("option", DSL.literal("k=5"))); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + FunctionBuilder builder = resolver.resolve(functionSignature).getValue(); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> builder.apply(functionProperties, expressions)); + assertTrue(ex.getMessage().contains("requires named arguments")); + } + + @Test + void resolve_rejectsDuplicateNamedArgument() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + List expressions = + List.of( + DSL.namedArgument("table", DSL.literal("a")), + DSL.namedArgument("table", DSL.literal("b")), + DSL.namedArgument("vector", DSL.literal("[1.0]")), + DSL.namedArgument("option", DSL.literal("k=5"))); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + FunctionBuilder builder = resolver.resolve(functionSignature).getValue(); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> builder.apply(functionProperties, expressions)); + assertTrue(ex.getMessage().contains("Duplicate argument name")); + assertTrue(ex.getMessage().contains("table")); + } + + @Test + void resolve_rejectsUnknownArgumentName() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + List expressions = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0]")), + DSL.namedArgument("bogus", DSL.literal("k=5"))); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + FunctionBuilder builder = resolver.resolve(functionSignature).getValue(); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> builder.apply(functionProperties, expressions)); + assertTrue(ex.getMessage().contains("Unknown argument name")); + assertTrue(ex.getMessage().contains("bogus")); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/capability/KnnPluginCapabilityTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/capability/KnnPluginCapabilityTest.java new file mode 100644 index 00000000000..147a5a093ce --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/capability/KnnPluginCapabilityTest.java @@ -0,0 +1,129 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.capability; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.List; +import java.util.Optional; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.action.admin.cluster.node.info.NodeInfo; +import org.opensearch.action.admin.cluster.node.info.NodesInfoRequest; +import org.opensearch.action.admin.cluster.node.info.NodesInfoResponse; +import org.opensearch.action.admin.cluster.node.info.PluginsAndModules; +import org.opensearch.common.action.ActionFuture; +import org.opensearch.plugins.PluginInfo; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.transport.client.AdminClient; +import org.opensearch.transport.client.ClusterAdminClient; +import org.opensearch.transport.client.node.NodeClient; + +@ExtendWith(MockitoExtension.class) +class KnnPluginCapabilityTest { + + @Mock private OpenSearchClient client; + @Mock private NodeClient nodeClient; + @Mock private AdminClient adminClient; + @Mock private ClusterAdminClient clusterAdminClient; + @Mock private ActionFuture nodesInfoFuture; + + @Test + void skipsWhenNodeClientAbsent() { + when(client.getNodeClient()).thenReturn(Optional.empty()); + KnnPluginCapability capability = new KnnPluginCapability(client); + // No exception — REST-client mode cannot probe; execution-time errors remain the signal. + assertDoesNotThrow(capability::requireInstalled); + } + + @Test + void passesWhenKnnPluginInstalled() { + stubNodesInfo(pluginInfo("org.opensearch.knn.plugin.KNNPlugin")); + KnnPluginCapability capability = new KnnPluginCapability(client); + assertDoesNotThrow(capability::requireInstalled); + } + + @Test + void throwsWhenKnnPluginAbsent() { + stubNodesInfo(pluginInfo("org.opensearch.security.OpenSearchSecurityPlugin")); + KnnPluginCapability capability = new KnnPluginCapability(client); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, capability::requireInstalled); + assertTrue( + ex.getMessage().contains("k-NN plugin"), + "Expected k-NN plugin message, got: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("not installed"), + "Expected 'not installed' phrasing, got: " + ex.getMessage()); + } + + @Test + void cachesSuccessfulProbeResult() { + stubNodesInfo(pluginInfo("org.opensearch.knn.plugin.KNNPlugin")); + KnnPluginCapability capability = new KnnPluginCapability(client); + capability.requireInstalled(); + capability.requireInstalled(); + capability.requireInstalled(); + // Probe fires once regardless of how many times requireInstalled() is called. + verify(clusterAdminClient, times(1)).nodesInfo(any(NodesInfoRequest.class)); + } + + @Test + void cachesNegativeProbeResult() { + stubNodesInfo(pluginInfo("org.opensearch.security.OpenSearchSecurityPlugin")); + KnnPluginCapability capability = new KnnPluginCapability(client); + assertThrows(ExpressionEvaluationException.class, capability::requireInstalled); + assertThrows(ExpressionEvaluationException.class, capability::requireInstalled); + verify(clusterAdminClient, times(1)).nodesInfo(any(NodesInfoRequest.class)); + } + + @Test + void doesNotCacheOnProbeFailure() { + when(client.getNodeClient()).thenReturn(Optional.of(nodeClient)); + when(nodeClient.admin()).thenReturn(adminClient); + when(adminClient.cluster()).thenReturn(clusterAdminClient); + when(clusterAdminClient.nodesInfo(any(NodesInfoRequest.class))).thenReturn(nodesInfoFuture); + when(nodesInfoFuture.actionGet()).thenThrow(new RuntimeException("transport error")); + + KnnPluginCapability capability = new KnnPluginCapability(client); + assertDoesNotThrow(capability::requireInstalled); // probe failed — treat as unknown + assertDoesNotThrow(capability::requireInstalled); + // Probe retries on each call after a failure — failures are not cached. + verify(clusterAdminClient, times(2)).nodesInfo(any(NodesInfoRequest.class)); + } + + private void stubNodesInfo(PluginInfo... plugins) { + when(client.getNodeClient()).thenReturn(Optional.of(nodeClient)); + when(nodeClient.admin()).thenReturn(adminClient); + when(adminClient.cluster()).thenReturn(clusterAdminClient); + when(clusterAdminClient.nodesInfo(any(NodesInfoRequest.class))).thenReturn(nodesInfoFuture); + + NodeInfo nodeInfo = mock(NodeInfo.class); + PluginsAndModules pam = mock(PluginsAndModules.class); + when(nodeInfo.getInfo(PluginsAndModules.class)).thenReturn(pam); + when(pam.getPluginInfos()).thenReturn(List.of(plugins)); + + NodesInfoResponse response = mock(NodesInfoResponse.class); + when(response.getNodes()).thenReturn(List.of(nodeInfo)); + when(nodesInfoFuture.actionGet()).thenReturn(response); + } + + private PluginInfo pluginInfo(String classname) { + PluginInfo pluginInfo = mock(PluginInfo.class); + when(pluginInfo.getClassname()).thenReturn(classname); + return pluginInfo; + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java index a91c99e26cd..bf83d972dbe 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java @@ -39,7 +39,6 @@ import org.apache.calcite.sql.type.SqlTypeFactoryImpl; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.ImmutableBitSet; -import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -47,10 +46,8 @@ import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.setting.Settings.Key; -import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; -import org.opensearch.sql.opensearch.storage.scan.context.AggPushDownAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggregationBuilderAction; +import org.opensearch.sql.opensearch.storage.scan.context.AggSpec; import org.opensearch.sql.opensearch.storage.scan.context.FilterDigest; import org.opensearch.sql.opensearch.storage.scan.context.LimitDigest; import org.opensearch.sql.opensearch.storage.scan.context.OSRequestBuilderAction; @@ -60,6 +57,7 @@ @ExtendWith(MockitoExtension.class) public class CalciteIndexScanCostTest { static final RelDataTypeFactory typeFactory = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT); + private static final OSRequestBuilderAction NO_OP_ACTION = req -> {}; final RexBuilder builder = new RexBuilder(typeFactory); @Mock private static RelOptCluster cluster; @@ -210,17 +208,12 @@ void test_cost_on_aggregate_pushdown() { null, List.of()); when(mq.getRowCount(aggregate)).thenReturn(1000d); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - }; lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(1)); lenient().when(relDataType.getFieldCount()).thenReturn(1); lenient().when(table.getRowType()).thenReturn(relDataType); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); assertEquals(1800, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } @@ -233,11 +226,6 @@ void test_cost_on_aggregate_pushdown_with_one_aggCall() { lenient().when(table.getRowType()).thenReturn(relDataType); CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - }; AggregateCall countCall = AggregateCall.create( SqlStdOperatorTable.COUNT, @@ -266,7 +254,7 @@ public void apply(OpenSearchRequestBuilder requestBuilder) {} lenient().when(table.getRowType()).thenReturn(relDataType); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); assertEquals(2812.5, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } @@ -279,11 +267,6 @@ void test_cost_on_aggregate_pushdown_with_two_aggCall() { lenient().when(table.getRowType()).thenReturn(relDataType); CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - }; AggregateCall countCall = AggregateCall.create( SqlStdOperatorTable.COUNT, @@ -325,7 +308,7 @@ public void apply(OpenSearchRequestBuilder requestBuilder) {} lenient().when(table.getRowType()).thenReturn(relDataType); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); assertEquals( 3836.2500429153442, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } @@ -339,16 +322,8 @@ void test_cost_on_aggregate_pushdown_with_one_aggCall_with_script() { lenient().when(table.getRowType()).thenReturn(relDataType); CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - - @Override - public long getScriptCount() { - return 1; - } - }; + AggSpec aggSpec = mock(AggSpec.class); + when(aggSpec.getScriptCount()).thenReturn(1L); AggregateCall countCall = AggregateCall.create( SqlStdOperatorTable.COUNT, @@ -375,9 +350,10 @@ public long getScriptCount() { lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(2)); lenient().when(relDataType.getFieldCount()).thenReturn(2); lenient().when(table.getRowType()).thenReturn(relDataType); + scan.getPushDownContext().setAggSpec(aggSpec); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); assertEquals( 2913.7500643730164, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } @@ -474,16 +450,8 @@ void test_cost_on_aggregate_pushdown_along_with_others() { lenient().when(table.getRowType()).thenReturn(relDataType); CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - - @Override - public long getScriptCount() { - return 1; - } - }; + AggSpec aggSpec = mock(AggSpec.class); + when(aggSpec.getScriptCount()).thenReturn(1L); AggregateCall countCall = AggregateCall.create( SqlStdOperatorTable.COUNT, @@ -510,6 +478,7 @@ public long getScriptCount() { lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(2)); lenient().when(relDataType.getFieldCount()).thenReturn(2); lenient().when(table.getRowType()).thenReturn(relDataType); + scan.getPushDownContext().setAggSpec(aggSpec); List projectDigest1 = List.of("A", "B"); scan.getPushDownContext() @@ -517,19 +486,15 @@ public long getScriptCount() { new PushDownOperation( PushDownType.PROJECT, projectDigest1, (OSRequestBuilderAction) req -> {})); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); List projectDigest2 = List.of("COUNT"); scan.getPushDownContext() - .add( - new PushDownOperation( - PushDownType.PROJECT, projectDigest2, (AggregationBuilderAction) req -> {})); + .add(new PushDownOperation(PushDownType.PROJECT, projectDigest2, NO_OP_ACTION)); scan.getPushDownContext() .add(new PushDownOperation(PushDownType.SORT, null, (OSRequestBuilderAction) req -> {})); LimitDigest limitDigest = new LimitDigest(100, 0); scan.getPushDownContext() - .add( - new PushDownOperation( - PushDownType.LIMIT, limitDigest, (AggregationBuilderAction) req -> {})); + .add(new PushDownOperation(PushDownType.LIMIT, limitDigest, NO_OP_ACTION)); lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(projectDigest2.size())); assertEquals( 2102.8500643730163, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanBuilderTest.java new file mode 100644 index 00000000000..ce2f2efb824 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanBuilderTest.java @@ -0,0 +1,234 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; + +import com.google.common.collect.ImmutableList; +import java.util.Collections; +import org.junit.jupiter.api.Test; +import org.opensearch.index.query.WrapperQueryBuilder; +import org.opensearch.sql.ast.tree.Sort; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.planner.logical.LogicalAggregation; +import org.opensearch.sql.planner.logical.LogicalFilter; +import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalPlan; +import org.opensearch.sql.planner.logical.LogicalProject; +import org.opensearch.sql.planner.logical.LogicalSort; +import org.opensearch.sql.planner.logical.LogicalValues; + +class VectorSearchIndexScanBuilderTest { + + private VectorSearchIndexScanBuilder newScanBuilder() { + var requestBuilder = + new OpenSearchRequestBuilder( + mock(OpenSearchExprValueFactory.class), 10000, mock(Settings.class)); + var queryBuilder = + new VectorSearchQueryBuilder( + requestBuilder, new WrapperQueryBuilder("{\"knn\":{}}"), java.util.Map.of("k", "5")); + return new VectorSearchIndexScanBuilder(queryBuilder, rb -> mock(OpenSearchIndexScan.class)); + } + + private static LogicalProject project(LogicalPlan input) { + NamedExpression field = DSL.named("id", DSL.ref("id", ExprCoreType.STRING)); + return new LogicalProject(input, ImmutableList.of(field), ImmutableList.of()); + } + + private static LogicalFilter filter(LogicalPlan input) { + return new LogicalFilter( + input, DSL.less(DSL.ref("price", ExprCoreType.INTEGER), DSL.literal(150))); + } + + private static LogicalSort sort(LogicalPlan input) { + return new LogicalSort( + input, + ImmutableList.of( + org.apache.commons.lang3.tuple.Pair.of( + Sort.SortOption.DEFAULT_DESC, DSL.ref("price", ExprCoreType.INTEGER)))); + } + + private static LogicalLimit limit(LogicalPlan input, int offset) { + return new LogicalLimit(input, 10, offset); + } + + private static LogicalAggregation aggregation(LogicalPlan input) { + return new LogicalAggregation(input, Collections.emptyList(), Collections.emptyList(), false); + } + + @Test + void pushDownAggregationIsRejected() { + var scanBuilder = newScanBuilder(); + + var agg = + new LogicalAggregation( + new LogicalValues(Collections.emptyList()), + Collections.emptyList(), + Collections.emptyList(), + false); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, () -> scanBuilder.pushDownAggregation(agg)); + assertTrue( + ex.getMessage().contains("Aggregations are not supported"), + "Error should state aggregations are not supported; actual: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("vectorSearch"), + "Error should mention vectorSearch; actual: " + ex.getMessage()); + } + + @Test + void validatePlanRejectsOuterFilterOverSubqueryProject() { + // Models: SELECT * FROM (SELECT v.id FROM vs(...) AS v) t WHERE t.price < 150 + // Shape after optimizer: Project(outer) → Filter → Project(inner) → scanBuilder + var scanBuilder = newScanBuilder(); + LogicalPlan root = project(filter(project(scanBuilder))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> scanBuilder.validatePlan(root)); + assertTrue( + ex.getMessage().contains("Outer WHERE on a vectorSearch() subquery"), + "Error should mention outer WHERE on subquery; actual: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("silently yield zero rows"), + "Error should explain silent zero rows; actual: " + ex.getMessage()); + } + + @Test + void validatePlanRejectsDoubleWrappedOuterFilter() { + // Models nested subqueries: + // SELECT * FROM (SELECT * FROM (SELECT v.id FROM vs(...) AS v) t1) t2 WHERE t2.price < 150 + var scanBuilder = newScanBuilder(); + LogicalPlan root = filter(project(project(scanBuilder))); + + assertThrows(ExpressionEvaluationException.class, () -> scanBuilder.validatePlan(root)); + } + + @Test + void validatePlanAllowsFilterDirectlyAboveScanBuilder() { + // Models: SELECT v.id FROM vs(...) AS v WHERE v.gender='M' + // Here the filter would normally be pushed down and removed, but if it were kept (e.g. a + // non-pushdownable predicate), validatePlan must not reject it — it is already at the + // vectorSearch level, not an outer filter. + var scanBuilder = newScanBuilder(); + LogicalPlan root = project(filter(scanBuilder)); + + assertDoesNotThrow(() -> scanBuilder.validatePlan(root)); + } + + @Test + void validatePlanAllowsInnerFilterWrappedInOuterProject() { + // Models: SELECT * FROM (SELECT v.id FROM vs(...) AS v WHERE v.gender='M') t + // After pushdown the inner filter may remain when non-pushdownable; importantly, there is no + // outer filter — only outer projects wrapping an inner filter directly on scanBuilder. + var scanBuilder = newScanBuilder(); + LogicalPlan root = project(project(filter(scanBuilder))); + + assertDoesNotThrow(() -> scanBuilder.validatePlan(root)); + } + + @Test + void validatePlanRejectsFilterProjectFilterShape() { + // Models: SELECT * FROM (SELECT v.id FROM vs(...) AS v WHERE v.gender='M') t + // WHERE t.price < 150 + // Shape: Filter(outer) → Project(subquery) → Filter(inner) → scanBuilder + // The outer filter is still separated from the scan by the subquery Project; the inner + // filter sitting between the Project and the scan does not erase that boundary. Without + // preserving the project marker across the inner filter, the walker would miss this shape. + var scanBuilder = newScanBuilder(); + LogicalPlan root = filter(project(filter(scanBuilder))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> scanBuilder.validatePlan(root)); + assertTrue( + ex.getMessage().contains("Outer WHERE on a vectorSearch() subquery"), + "Error should mention outer WHERE on subquery; actual: " + ex.getMessage()); + } + + @Test + void validatePlanAllowsNoFilterAtAll() { + // Baseline: no WHERE anywhere. SELECT * FROM (SELECT v.id FROM vs(...) AS v) t + var scanBuilder = newScanBuilder(); + LogicalPlan root = project(project(scanBuilder)); + + assertDoesNotThrow(() -> scanBuilder.validatePlan(root)); + } + + @Test + void validatePlanAllowsBareScanBuilder() { + // Defensive: a plan that is just the scan builder itself. + var scanBuilder = newScanBuilder(); + + assertDoesNotThrow(() -> scanBuilder.validatePlan(scanBuilder)); + } + + @Test + void validatePlanRejectsOuterSortOverSubqueryProject() { + // Models: SELECT * FROM (SELECT v.id FROM vs(...) AS v) t ORDER BY t.price + // Shape: Sort(outer) → Project(subquery) → scanBuilder + // Outer ORDER BY would be applied only after top-k ANN results, producing an order the user + // did not ask for (vector distance ordering leaks through when rows are fewer than expected). + var scanBuilder = newScanBuilder(); + LogicalPlan root = sort(project(scanBuilder)); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> scanBuilder.validatePlan(root)); + assertTrue( + ex.getMessage().contains("Outer ORDER BY on a vectorSearch() subquery"), + "Error should mention outer ORDER BY on subquery; actual: " + ex.getMessage()); + } + + @Test + void validatePlanRejectsOuterOffsetOverSubqueryProject() { + // Models: SELECT * FROM (SELECT v.id FROM vs(...) AS v) t LIMIT 10 OFFSET 5 + // Outer OFFSET silently skips the top-N nearest rows chosen by ANN, so the remaining rows + // would be a truncated tail of the k-NN result set rather than the user's intended window. + var scanBuilder = newScanBuilder(); + LogicalPlan root = limit(project(scanBuilder), 5); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> scanBuilder.validatePlan(root)); + assertTrue( + ex.getMessage().contains("Outer OFFSET on a vectorSearch() subquery"), + "Error should mention outer OFFSET on subquery; actual: " + ex.getMessage()); + } + + @Test + void validatePlanAllowsOuterLimitWithoutOffsetOverSubquery() { + // Outer LIMIT with offset=0 just caps row count and is safe over a subquery — reject only + // non-zero OFFSET. Locks in the offset==0 boundary of the guard. + var scanBuilder = newScanBuilder(); + LogicalPlan root = limit(project(scanBuilder), 0); + + assertDoesNotThrow(() -> scanBuilder.validatePlan(root)); + } + + @Test + void validatePlanRejectsOuterAggregationOverSubqueryProject() { + // Models: SELECT COUNT(*) FROM (SELECT v.id FROM vs(...) AS v) t + // (Or outer GROUP BY / DISTINCT, both of which rewrite to LogicalAggregation.) The outer + // aggregation would run on a truncated top-k slice rather than a meaningful population, + // masking the fact that aggregations are not supported on vectorSearch() in this preview. + var scanBuilder = newScanBuilder(); + LogicalPlan root = aggregation(project(scanBuilder)); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> scanBuilder.validatePlan(root)); + assertTrue( + ex.getMessage().contains("Outer GROUP BY / aggregation / DISTINCT on a vectorSearch()"), + "Error should mention outer aggregation on subquery; actual: " + ex.getMessage()); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanTest.java new file mode 100644 index 00000000000..3fa2adec88a --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanTest.java @@ -0,0 +1,39 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; + +import org.junit.jupiter.api.Test; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.request.OpenSearchRequest; +import org.opensearch.sql.opensearch.storage.capability.KnnPluginCapability; + +class VectorSearchIndexScanTest { + + @Test + void openProbesKnnPluginBeforeFetch() { + OpenSearchClient client = mock(OpenSearchClient.class); + OpenSearchRequest request = mock(OpenSearchRequest.class); + KnnPluginCapability capability = mock(KnnPluginCapability.class); + doThrow(new ExpressionEvaluationException("k-NN plugin missing")) + .when(capability) + .requireInstalled(); + + VectorSearchIndexScan scan = new VectorSearchIndexScan(client, 10, request, capability); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, scan::open); + assertTrue(ex.getMessage().contains("k-NN plugin")); + // Capability threw, so the underlying client must not have been touched for this scan. + verify(client, never()).search(request); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilderTest.java new file mode 100644 index 00000000000..b02d680af15 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilderTest.java @@ -0,0 +1,857 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import org.apache.lucene.search.join.ScoreMode; +import org.junit.jupiter.api.Test; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.WrapperQueryBuilder; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.opensearch.storage.FilterType; +import org.opensearch.sql.planner.logical.LogicalFilter; +import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalValues; + +class VectorSearchQueryBuilderTest { + + @Test + void knnQuerySetAsScoringQuery() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + QueryBuilder query = requestBuilder.getSourceBuilder().query(); + assertTrue( + query instanceof WrapperQueryBuilder, + "knn query should be set directly as top-level query (scoring context)"); + } + + @Test + void pushDownFilterKeepsKnnInScoringContext() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + // Simulate WHERE name = 'John' + var condition = DSL.equal(new ReferenceExpression("name", STRING), DSL.literal("John")); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + boolean pushed = builder.pushDownFilter(filter); + + assertTrue(pushed, "pushDownFilter should succeed"); + QueryBuilder resultQuery = requestBuilder.getSourceBuilder().query(); + assertTrue(resultQuery instanceof BoolQueryBuilder, "Result should be a BoolQuery"); + BoolQueryBuilder boolQuery = (BoolQueryBuilder) resultQuery; + assertEquals(1, boolQuery.must().size(), "knn query should be in must (scoring context)"); + assertEquals(1, boolQuery.filter().size(), "WHERE predicate should be in filter (non-scoring)"); + assertTrue( + boolQuery.must().get(0) instanceof WrapperQueryBuilder, + "must clause should contain the original knn WrapperQueryBuilder"); + } + + @Test + void pushDownLimitWithinKSucceeds() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 3, 0); + + boolean pushed = builder.pushDownLimit(limit); + assertTrue(pushed, "LIMIT within k should succeed"); + } + + @Test + void pushDownLimitExceedingKThrows() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 10, 0); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownLimit(limit)); + assertTrue(ex.getMessage().contains("LIMIT 10 exceeds k=5")); + } + + @Test + void pushDownLimitEqualToKSucceeds() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 5, 0); + + boolean pushed = builder.pushDownLimit(limit); + assertTrue(pushed, "LIMIT equal to k should succeed"); + } + + @Test + void pushDownLimitRadialModeNoRestriction() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("max_distance", "10.0")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 100, 0); + + boolean pushed = builder.pushDownLimit(limit); + assertTrue(pushed, "Radial mode should not restrict LIMIT"); + } + + @Test + void pushDownLimitMinScoreModeNoRestriction() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("min_score", "0.5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 100, 0); + + boolean pushed = builder.pushDownLimit(limit); + assertTrue(pushed, "min_score mode should not restrict LIMIT"); + } + + @Test + void pushDownSortScoreDescAccepted() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)))); + + boolean pushed = builder.pushDownSort(sort); + assertTrue(pushed, "ORDER BY _score DESC should be accepted"); + } + + @Test + void pushDownSortPreservesSortCountAsLimit() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "10")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + // LogicalSort with count=7 simulates a sort+limit combined node (PPL path) + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + 7, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)))); + + boolean pushed = builder.pushDownSort(sort); + assertTrue(pushed, "ORDER BY _score DESC with count should be accepted"); + assertEquals( + 7, + requestBuilder.getMaxResponseSize(), + "sort.getCount() should be pushed down as request size"); + } + + @Test + void pushDownSortCountExceedingKRejects() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + // LogicalSort with count=10 exceeds k=5 — should be rejected + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + 10, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownSort(sort)); + assertTrue(ex.getMessage().contains("LIMIT 10 exceeds k=5")); + } + + @Test + void pushDownSortNonScoreFieldRejected() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC, + new ReferenceExpression("name", STRING)))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownSort(sort)); + assertTrue(ex.getMessage().contains("unsupported sort expression")); + } + + @Test + void pushDownSortMultipleExpressionsRejectsNonScore() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)), + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC, + new ReferenceExpression("name", STRING)))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownSort(sort)); + assertTrue(ex.getMessage().contains("unsupported sort expression")); + } + + @Test + void pushDownSortScoreAscRejected() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownSort(sort)); + assertTrue(ex.getMessage().contains("_score ASC is not supported")); + } + + @Test + void pushDownFilterCompoundPredicateSurvives() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + // Simulate WHERE name = 'John' AND age > 30 + var condition = + DSL.and( + DSL.equal(new ReferenceExpression("name", STRING), DSL.literal("John")), + DSL.greater(new ReferenceExpression("age", ExprCoreType.INTEGER), DSL.literal(30))); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + boolean pushed = builder.pushDownFilter(filter); + + assertTrue(pushed, "pushDownFilter with compound predicate should succeed"); + QueryBuilder resultQuery = requestBuilder.getSourceBuilder().query(); + assertTrue(resultQuery instanceof BoolQueryBuilder, "Result should be a BoolQuery"); + BoolQueryBuilder boolQuery = (BoolQueryBuilder) resultQuery; + assertEquals(1, boolQuery.must().size(), "knn query should be in must (scoring context)"); + assertEquals(1, boolQuery.filter().size(), "compound WHERE should be in filter (non-scoring)"); + } + + @Test + void pushDownFilterEfficientPlacesInsideKnn() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + // Callback simulates VectorSearchIndex rebuilding knn with filter + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder("{\"knn\":{\"filter\":\"embedded\"}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, + knnQuery, + Map.of("k", "5"), + FilterType.EFFICIENT, + true, + rebuildWithFilter); + + var condition = DSL.equal(new ReferenceExpression("city", STRING), DSL.literal("Miami")); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + boolean pushed = builder.pushDownFilter(filter); + + assertTrue(pushed, "pushDownFilter should succeed"); + QueryBuilder resultQuery = requestBuilder.getSourceBuilder().query(); + assertTrue( + resultQuery instanceof WrapperQueryBuilder, + "Efficient filter should produce a WrapperQueryBuilder (rebuilt knn), not BoolQuery"); + } + + @Test + void pushDownFilterExplicitPostProducesBool() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, knnQuery, Map.of("k", "5"), FilterType.POST, true, null); + + var condition = DSL.equal(new ReferenceExpression("name", STRING), DSL.literal("John")); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + boolean pushed = builder.pushDownFilter(filter); + + assertTrue(pushed); + QueryBuilder resultQuery = requestBuilder.getSourceBuilder().query(); + assertTrue(resultQuery instanceof BoolQueryBuilder); + BoolQueryBuilder boolQuery = (BoolQueryBuilder) resultQuery; + assertEquals(1, boolQuery.must().size()); + assertEquals(1, boolQuery.filter().size()); + } + + // ── Constructor validation ────────────────────────────────────────── + + @Test + void constructorRejectsEfficientModeWithNullCallback() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + + assertThrows( + IllegalArgumentException.class, + () -> + new VectorSearchQueryBuilder( + requestBuilder, knnQuery, Map.of("k", "5"), FilterType.EFFICIENT, true, null)); + } + + // ── Build-time validation ──────────────────────────────────────────── + + @Test + void buildRejectsExplicitFilterTypePostWithoutWhere() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, knnQuery, Map.of("k", "5"), FilterType.POST, true, null); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, builder::build); + assertTrue(ex.getMessage().contains("filter_type requires a pushdownable WHERE clause")); + } + + @Test + void buildRejectsExplicitFilterTypeEfficientWithoutWhere() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder("{\"knn\":{\"filter\":\"embedded\"}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, + knnQuery, + Map.of("k", "5"), + FilterType.EFFICIENT, + true, + rebuildWithFilter); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, builder::build); + assertTrue(ex.getMessage().contains("filter_type requires a pushdownable WHERE clause")); + } + + @Test + void buildSucceedsWithNoFilterTypeAndNoWhere() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + OpenSearchRequestBuilder result = builder.build(); + assertNotNull(result); + } + + @Test + void buildSucceedsWithFilterTypeAndPushedWhere() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, knnQuery, Map.of("k", "5"), FilterType.POST, true, null); + + var condition = DSL.equal(new ReferenceExpression("name", STRING), DSL.literal("John")); + var dummyChild = new LogicalValues(Collections.emptyList()); + builder.pushDownFilter(new LogicalFilter(dummyChild, condition)); + + OpenSearchRequestBuilder result = builder.build(); + assertNotNull(result); + } + + // ── Radial without LIMIT rejection ───────────────────────────────── + + @Test + void buildRejectsRadialMaxDistanceWithoutLimit() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("max_distance", "10.0")); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, builder::build); + assertTrue(ex.getMessage().contains("LIMIT is required for radial vector search")); + } + + @Test + void buildRejectsRadialMinScoreWithoutLimit() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("min_score", "0.5")); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, builder::build); + assertTrue(ex.getMessage().contains("LIMIT is required for radial vector search")); + } + + @Test + void buildSucceedsRadialWithLimit() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("max_distance", "10.0")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + builder.pushDownLimit(new LogicalLimit(dummyChild, 50, 0)); + + OpenSearchRequestBuilder result = builder.build(); + assertNotNull(result); + } + + @Test + void buildSucceedsTopKWithoutLimit() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + OpenSearchRequestBuilder result = builder.build(); + assertNotNull(result); + } + + // ── Regression: LIMIT and sort invariants under efficient mode ────── + + @Test + void pushDownLimitExceedingKThrowsUnderEfficientMode() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, + knnQuery, + Map.of("k", "5"), + FilterType.EFFICIENT, + true, + rebuildWithFilter); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 10, 0); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownLimit(limit)); + assertTrue(ex.getMessage().contains("LIMIT 10 exceeds k=5")); + } + + @Test + void pushDownSortScoreDescAcceptedUnderEfficientMode() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, + knnQuery, + Map.of("k", "5"), + FilterType.EFFICIENT, + true, + rebuildWithFilter); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)))); + + boolean pushed = builder.pushDownSort(sort); + assertTrue(pushed, "ORDER BY _score DESC should be accepted under efficient mode"); + } + + @Test + void pushDownSortNonScoreRejectedUnderEfficientMode() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, + knnQuery, + Map.of("k", "5"), + FilterType.EFFICIENT, + true, + rebuildWithFilter); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC, + new ReferenceExpression("name", STRING)))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownSort(sort)); + assertTrue(ex.getMessage().contains("unsupported sort expression")); + } + + // ── Non-pushdownable filter handling ────────────────────────────────── + + @Test + void pushDownFilterNonPushdownableWithExplicitFilterTypeThrows() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, knnQuery, Map.of("k", "5"), FilterType.POST, true, null); + + // STRUCT = STRUCT triggers ScriptQueryUnSupportedException in FilterQueryBuilder + var condition = + DSL.equal( + new ReferenceExpression("nested_field", ExprCoreType.STRUCT), + new ReferenceExpression("other_field", ExprCoreType.STRUCT)); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter)); + assertTrue( + ex.getMessage().contains("filter_type only works when the WHERE clause can be translated")); + assertTrue(ex.getMessage().contains("Rewrite the WHERE clause or omit filter_type")); + } + + @Test + void pushDownFilterNonPushdownableWithoutExplicitFilterTypeFallsBack() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + // STRUCT = STRUCT triggers ScriptQueryUnSupportedException in FilterQueryBuilder + var condition = + DSL.equal( + new ReferenceExpression("nested_field", ExprCoreType.STRUCT), + new ReferenceExpression("other_field", ExprCoreType.STRUCT)); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + boolean pushed = builder.pushDownFilter(filter); + assertFalse(pushed, "Non-pushdownable filter should return false for in-memory fallback"); + } + + // ── OFFSET rejection ──────────────────────────────────────────────── + + @Test + void pushDownLimit_rejectsNonZeroOffset() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + // LIMIT 3 OFFSET 2: the planner passes both through LogicalLimit + var limit = new LogicalLimit(dummyChild, 3, 2); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownLimit(limit)); + assertTrue( + ex.getMessage().contains("OFFSET is not supported on vectorSearch()"), + "Expected OFFSET rejection message, got: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("LIMIT only"), + "Expected remediation guidance in message, got: " + ex.getMessage()); + } + + @Test + void pushDownLimit_acceptsZeroOffset() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 3, 0); + + // Zero offset is the normal case; must continue to succeed. + assertTrue(builder.pushDownLimit(limit)); + } + + // ── WHERE on _score rejection ──────────────────────────────────────── + + @Test + void pushDownFilter_rejectsScoreReferenceInWhere() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + // WHERE _score > 0.5 (note: _score is a synthetic column, not a stored field) + var condition = + DSL.greater(new ReferenceExpression("_score", ExprCoreType.FLOAT), DSL.literal(0.5)); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter)); + assertTrue( + ex.getMessage().contains("WHERE on _score is not supported"), + "Expected _score rejection message, got: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("min_score"), + "Expected remediation guidance pointing at option='min_score=...', got: " + + ex.getMessage()); + } + + @Test + void pushDownFilter_rejectsScoreReferenceInsideCompound() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + // WHERE state = 'TX' AND _score > 0.5: rejection must walk compound predicates + var condition = + DSL.and( + DSL.equal(new ReferenceExpression("state", STRING), DSL.literal("TX")), + DSL.greater(new ReferenceExpression("_score", ExprCoreType.FLOAT), DSL.literal(0.5))); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter)); + assertTrue( + ex.getMessage().contains("WHERE on _score is not supported"), + "Expected _score rejection message, got: " + ex.getMessage()); + } + + @Test + void pushDownFilter_rejectsUppercaseScoreReference() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + // WHERE _SCORE > 0.5 must be rejected the same way as _score; the check is case-insensitive + // so variants that preserve original casing cannot bypass the guard. + var condition = + DSL.greater(new ReferenceExpression("_SCORE", ExprCoreType.FLOAT), DSL.literal(0.5)); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter)); + assertTrue( + ex.getMessage().contains("WHERE on _score is not supported"), + "Expected _score rejection message, got: " + ex.getMessage()); + } + + // ── filter_type=efficient rejects script subtrees ─────────────────── + + @Test + void pushDownFilter_efficient_rejectsScriptSubtree() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder("{\"knn\":{\"filter\":\"embedded\"}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, + knnQuery, + Map.of("k", "5"), + FilterType.EFFICIENT, + true, + rebuildWithFilter); + + // price + 1 > 100 lowers to a ScriptQueryBuilder; embedding it under knn.filter would + // trigger the AOSS rejection this PR guards against. + var condition = + DSL.greater( + DSL.add(new ReferenceExpression("price", ExprCoreType.INTEGER), DSL.literal(1)), + DSL.literal(100)); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter)); + assertTrue( + ex.getMessage().contains("vectorSearch WHERE pre-filtering does not support"), + "Expected script rejection message, got: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("script queries"), + "Expected script queries guidance in message, got: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("filter_type=post"), + "Expected filter_type=post fallback guidance, got: " + ex.getMessage()); + } + + @Test + void pushDownFilter_post_allowsScriptSubtree() { + // POST puts WHERE in an outer bool.filter, not under knn.filter, so scripts are fine. + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var condition = + DSL.greater( + DSL.add(new ReferenceExpression("price", ExprCoreType.INTEGER), DSL.literal(1)), + DSL.literal(100)); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + assertTrue(builder.pushDownFilter(filter), "POST mode must still accept script predicates"); + } + + @Test + void buildSucceedsRadialWithSortEmbeddedLimit() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("max_distance", "10.0")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + // LogicalSort with count=50 simulates PPL sort-with-limit path + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + 50, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)))); + + builder.pushDownSort(sort); + + // build() should not reject — limitPushed must be true via pushDownSort's count path + OpenSearchRequestBuilder result = builder.build(); + assertNotNull(result); + } + + // ── filter_type=efficient allow-list validator ────────────────────── + + @Test + void validateEfficientFilterSafe_rejectsNestedQuery() { + // FilterQueryBuilder emits NestedQueryBuilder for SQL nested(field, pred); nested vector + // semantics are outside the P0 preview so rejection must be targeted, not generic. + QueryBuilder nested = + QueryBuilders.nestedQuery( + "parent", QueryBuilders.termQuery("parent.f", "v"), ScoreMode.None); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchQueryBuilder.validateEfficientFilterSafe(nested)); + assertTrue( + ex.getMessage().contains("vectorSearch WHERE pre-filtering does not support nested"), + "Expected targeted nested rejection, got: " + ex.getMessage()); + } + + @Test + void validateEfficientFilterSafe_rejectsNestedBuriedInBool() { + // AND-ing nested() with a term must still be caught; otherwise the guard is trivially bypassed. + QueryBuilder tree = + QueryBuilders.boolQuery() + .filter(QueryBuilders.termQuery("state", "CA")) + .filter( + QueryBuilders.nestedQuery( + "parent", QueryBuilders.termQuery("parent.f", "v"), ScoreMode.None)); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchQueryBuilder.validateEfficientFilterSafe(tree)); + assertTrue(ex.getMessage().contains("nested predicates")); + } + + @Test + void validateEfficientFilterSafe_acceptsBoolOfSafeLeaves() { + QueryBuilder tree = + QueryBuilders.boolQuery() + .filter(QueryBuilders.termQuery("category", "shoes")) + .filter(QueryBuilders.rangeQuery("price").gte(80).lte(150)); + + VectorSearchQueryBuilder.validateEfficientFilterSafe(tree); + } + + @Test + void validateEfficientFilterSafe_acceptsExistsLeaf() { + // IS NOT NULL lowers to ExistsQueryBuilder; locks in allow-list coverage for that path. + QueryBuilder exists = QueryBuilders.existsQuery("brand"); + + VectorSearchQueryBuilder.validateEfficientFilterSafe(exists); + } + + @Test + void validateEfficientFilterSafe_rejectsUnknownWrapper() { + // Unknown shapes must fail closed so future FilterQueryBuilder additions cannot silently + // re-introduce the AOSS-rejection bug class this PR is guarding against. + QueryBuilder unknown = new WrapperQueryBuilder("{\"term\":{\"f\":\"v\"}}"); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchQueryBuilder.validateEfficientFilterSafe(unknown)); + assertTrue( + ex.getMessage().contains("unsupported filter query shape"), + "Expected unknown-shape rejection, got: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("WrapperQueryBuilder"), + "Expected class name in message, got: " + ex.getMessage()); + } + + private OpenSearchRequestBuilder createRequestBuilder() { + return new OpenSearchRequestBuilder( + mock(OpenSearchExprValueFactory.class), 10000, mock(Settings.class)); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java index 310bb5e73c5..e930056474a 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java @@ -174,20 +174,80 @@ void should_build_wildcard_query_for_like_expression() { } @Test - void should_build_script_query_for_unsupported_lucene_query() { + void should_build_exists_query_for_is_not_null() { + assertJsonEquals( + "{\n" + + " \"exists\" : {\n" + + " \"field\" : \"age\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + "}", + buildQuery(DSL.isnotnull(ref("age", INTEGER)))); + } + + @Test + void should_build_must_not_exists_query_for_is_null() { + assertJsonEquals( + "{\n" + + " \"bool\" : {\n" + + " \"must_not\" : [\n" + + " {\n" + + " \"exists\" : {\n" + + " \"field\" : \"age\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + "}", + buildQuery(DSL.is_null(ref("age", INTEGER)))); + } + + @Test + void should_fallback_to_script_for_nested_is_not_null() { + // Nested IS_NOT_NULL must NOT route through NestedQuery.buildNested(): that path reads + // arg[1] and unary IS_NOT_NULL only has arg[0]. ExistsQuery.isNestedPredicate() returns + // false precisely to force the script fallback here. mockToStringSerializer(); assertJsonEquals( "{\n" + " \"script\" : {\n" + " \"script\" : {\n" - + " \"source\" : \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"is not" - + " null(age)\\\"}\",\n" + + " \"source\" :" + + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"is" + + " not null(FunctionExpression(functionName=nested, arguments=[message.info," + + " message]))\\\"}\",\n" + " \"lang\" : \"opensearch_compounded_script\"\n" + " },\n" + " \"boost\" : 1.0\n" + " }\n" + "}", - buildQuery(DSL.isnotnull(ref("age", INTEGER)))); + buildQuery( + DSL.isnotnull( + DSL.nested(DSL.ref("message.info", STRING), DSL.ref("message", STRING))))); + } + + @Test + void should_fallback_to_script_for_nested_is_null() { + // Symmetric to the IS_NOT_NULL case: must not crash with an arg[1] lookup via NestedQuery. + mockToStringSerializer(); + assertJsonEquals( + "{\n" + + " \"script\" : {\n" + + " \"script\" : {\n" + + " \"source\" :" + + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"is" + + " null(FunctionExpression(functionName=nested, arguments=[message.info," + + " message]))\\\"}\",\n" + + " \"lang\" : \"opensearch_compounded_script\"\n" + + " },\n" + + " \"boost\" : 1.0\n" + + " }\n" + + "}", + buildQuery( + DSL.is_null(DSL.nested(DSL.ref("message.info", STRING), DSL.ref("message", STRING))))); } @Test diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/util/MergeRules/TextKeywordConflictRuleTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/util/MergeRules/TextKeywordConflictRuleTest.java new file mode 100644 index 00000000000..22b1b36ca91 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/util/MergeRules/TextKeywordConflictRuleTest.java @@ -0,0 +1,146 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.util.MergeRules; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType; +import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; + +class TextKeywordConflictRuleTest { + + private final TextKeywordConflictRule rule = new TextKeywordConflictRule(); + + @Test + void testMatchTextAndKeyword() { + OpenSearchDataType text = OpenSearchDataType.of(MappingType.Text); + OpenSearchDataType keyword = OpenSearchDataType.of(MappingType.Keyword); + assertTrue(rule.isMatch(text, keyword)); + assertTrue(rule.isMatch(keyword, text)); + } + + @Test + void testMatchMatchOnlyTextAndKeyword() { + OpenSearchDataType matchOnlyText = OpenSearchDataType.of(MappingType.MatchOnlyText); + OpenSearchDataType keyword = OpenSearchDataType.of(MappingType.Keyword); + assertTrue(rule.isMatch(matchOnlyText, keyword)); + assertTrue(rule.isMatch(keyword, matchOnlyText)); + } + + @Test + void testMatchTextWithKeywordSubfieldAndTextWithout() { + OpenSearchTextType textWithKeyword = + OpenSearchTextType.of(Map.of("keyword", OpenSearchDataType.of(MappingType.Keyword))); + OpenSearchTextType textWithout = OpenSearchTextType.of(); + assertTrue(rule.isMatch(textWithKeyword, textWithout)); + assertTrue(rule.isMatch(textWithout, textWithKeyword)); + } + + @Test + void testNoMatchSameTextWithoutSubfields() { + OpenSearchTextType text1 = OpenSearchTextType.of(); + OpenSearchTextType text2 = OpenSearchTextType.of(); + assertFalse(rule.isMatch(text1, text2)); + } + + @Test + void testNoMatchBothTextWithKeywordSubfields() { + OpenSearchTextType textWithKeyword1 = + OpenSearchTextType.of(Map.of("keyword", OpenSearchDataType.of(MappingType.Keyword))); + OpenSearchTextType textWithKeyword2 = + OpenSearchTextType.of(Map.of("keyword", OpenSearchDataType.of(MappingType.Keyword))); + assertFalse(rule.isMatch(textWithKeyword1, textWithKeyword2)); + } + + @Test + void testNoMatchKeywordAndKeyword() { + OpenSearchDataType keyword1 = OpenSearchDataType.of(MappingType.Keyword); + OpenSearchDataType keyword2 = OpenSearchDataType.of(MappingType.Keyword); + assertFalse(rule.isMatch(keyword1, keyword2)); + } + + @Test + void testNoMatchIntegerAndKeyword() { + OpenSearchDataType integer = OpenSearchDataType.of(MappingType.Integer); + OpenSearchDataType keyword = OpenSearchDataType.of(MappingType.Keyword); + assertFalse(rule.isMatch(integer, keyword)); + } + + @Test + void testNoMatchNullSource() { + OpenSearchDataType keyword = OpenSearchDataType.of(MappingType.Keyword); + assertFalse(rule.isMatch(null, keyword)); + } + + @Test + void testNoMatchNullTarget() { + OpenSearchDataType text = OpenSearchDataType.of(MappingType.Text); + assertFalse(rule.isMatch(text, null)); + } + + @Test + void testMergeProducesTextWithoutKeywordSubfields() { + OpenSearchDataType keyword = OpenSearchDataType.of(MappingType.Keyword); + Map target = new HashMap<>(); + target.put("msg", keyword); + + OpenSearchDataType text = OpenSearchDataType.of(MappingType.Text); + rule.mergeInto("msg", text, target); + + OpenSearchDataType merged = target.get("msg"); + assertInstanceOf(OpenSearchTextType.class, merged); + OpenSearchTextType mergedText = (OpenSearchTextType) merged; + assertTrue(mergedText.getFields().isEmpty(), "Merged type should have no keyword subfields"); + } + + @Test + void testMergeHelperIntegration() { + // Simulate merging two index mappings with conflicting text/keyword types + Map target = new HashMap<>(); + target.put("msg", OpenSearchDataType.of(MappingType.Keyword)); + target.put("idx", OpenSearchDataType.of(MappingType.Integer)); + + Map source = new HashMap<>(); + source.put("msg", OpenSearchDataType.of(MappingType.Text)); + source.put("idx", OpenSearchDataType.of(MappingType.Integer)); + + MergeRuleHelper.merge(target, source); + + // msg should be merged to text without keyword subfields + assertInstanceOf(OpenSearchTextType.class, target.get("msg")); + OpenSearchTextType mergedText = (OpenSearchTextType) target.get("msg"); + assertTrue(mergedText.getFields().isEmpty()); + + // idx should remain integer (same type in both, LatestRule applies) + assertEquals(MappingType.Integer, target.get("idx").getMappingType()); + } + + @Test + void testToKeywordSubFieldReturnsNullForMergedType() { + // After merging text and keyword, toKeywordSubField should return null, + // forcing SOURCE retrieval instead of DOC_VALUE + Map target = new HashMap<>(); + target.put("msg", OpenSearchDataType.of(MappingType.Keyword)); + + Map source = new HashMap<>(); + source.put("msg", OpenSearchDataType.of(MappingType.Text)); + + MergeRuleHelper.merge(target, source); + + OpenSearchDataType mergedType = target.get("msg"); + String result = OpenSearchTextType.toKeywordSubField("msg", mergedType.getExprType()); + // Should return null because the merged text type has no keyword subfield + assertNull(result); + } +} diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java index 41a6d8c486e..ed29bd03161 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java @@ -49,6 +49,7 @@ import org.opensearch.jobscheduler.spi.ScheduledJobParser; import org.opensearch.jobscheduler.spi.ScheduledJobRunner; import org.opensearch.plugins.ActionPlugin; +import org.opensearch.plugins.ExtensiblePlugin; import org.opensearch.plugins.Plugin; import org.opensearch.plugins.ScriptPlugin; import org.opensearch.plugins.SystemIndexPlugin; @@ -92,6 +93,7 @@ import org.opensearch.sql.directquery.transport.model.ExecuteDirectQueryActionResponse; import org.opensearch.sql.directquery.transport.model.ReadDirectQueryResourcesActionResponse; import org.opensearch.sql.directquery.transport.model.WriteDirectQueryResourcesActionResponse; +import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.ExecutionEngine.ExplainResponse; import org.opensearch.sql.executor.QueryType; import org.opensearch.sql.legacy.esdomain.LocalClusterState; @@ -102,6 +104,7 @@ import org.opensearch.sql.opensearch.setting.OpenSearchSettings; import org.opensearch.sql.opensearch.storage.OpenSearchDataSourceFactory; import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine; +import org.opensearch.sql.plugin.config.EngineExtensionsHolder; import org.opensearch.sql.plugin.config.OpenSearchPluginModule; import org.opensearch.sql.plugin.rest.AnalyticsExecutorHolder; import org.opensearch.sql.plugin.rest.RestPPLGrammarAction; @@ -140,10 +143,15 @@ import org.opensearch.watcher.ResourceWatcherService; public class SQLPlugin extends Plugin - implements ActionPlugin, ScriptPlugin, SystemIndexPlugin, JobSchedulerExtension { + implements ActionPlugin, + ScriptPlugin, + SystemIndexPlugin, + JobSchedulerExtension, + ExtensiblePlugin { private static final Logger LOGGER = LogManager.getLogger(SQLPlugin.class); + private List executionEngineExtensions = List.of(); private ClusterService clusterService; /** Settings should be inited when bootstrap the plugin. */ @@ -162,6 +170,18 @@ public String description() { return "Use sql to query OpenSearch."; } + @Override + public void loadExtensions(ExtensionLoader loader) { + List loaded = loader.loadExtensions(ExecutionEngine.class); + this.executionEngineExtensions = loaded != null ? List.copyOf(loaded) : List.of(); + if (!executionEngineExtensions.isEmpty()) { + LOGGER.info( + "Loaded {} execution engine extension(s): {}", + executionEngineExtensions.size(), + executionEngineExtensions.stream().map(e -> e.getClass().getSimpleName()).toList()); + } + } + @Override public List getRestHandlers( Settings settings, @@ -347,7 +367,7 @@ public Collection createComponents( LocalClusterState.state().setPluginSettings((OpenSearchSettings) pluginSettings); LocalClusterState.state().setClient(client); ModulesBuilder modules = new ModulesBuilder(); - modules.add(new OpenSearchPluginModule()); + modules.add(new OpenSearchPluginModule(executionEngineExtensions)); modules.add( b -> { b.bind(NodeClient.class).toInstance((NodeClient) client); @@ -382,12 +402,15 @@ public Collection createComponents( ScheduledAsyncQueryJobRunner.getJobRunnerInstance() .loadJobResource(client, clusterService, threadPool, asyncQueryExecutorService); + EngineExtensionsHolder extensionsHolder = new EngineExtensionsHolder(executionEngineExtensions); + return ImmutableList.of( dataSourceService, asyncQueryExecutorService, clusterManagerEventListener, pluginSettings, - directQueryExecutorService); + directQueryExecutorService, + extensionsHolder); } @Override diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/config/EngineExtensionsHolder.java b/plugin/src/main/java/org/opensearch/sql/plugin/config/EngineExtensionsHolder.java new file mode 100644 index 00000000000..70b3fd29011 --- /dev/null +++ b/plugin/src/main/java/org/opensearch/sql/plugin/config/EngineExtensionsHolder.java @@ -0,0 +1,20 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.plugin.config; + +import java.util.List; +import org.opensearch.sql.executor.ExecutionEngine; + +/** + * Holds execution engine engines loaded via SPI. Returned from {@code SQLPlugin.createComponents()} + * so that OpenSearch's Guice injector can inject it into transport actions like {@code + * TransportPPLQueryAction}. + */ +public record EngineExtensionsHolder(List engines) { + public EngineExtensionsHolder(List engines) { + this.engines = engines != null ? List.copyOf(engines) : List.of(); + } +} diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java index 35504dd83c2..d9406935ee5 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java @@ -5,6 +5,7 @@ package org.opensearch.sql.plugin.config; +import java.util.List; import lombok.RequiredArgsConstructor; import org.opensearch.common.inject.AbstractModule; import org.opensearch.common.inject.Provides; @@ -13,6 +14,7 @@ import org.opensearch.sql.analysis.ExpressionAnalyzer; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.datasource.DataSourceService; +import org.opensearch.sql.executor.DelegatingExecutionEngine; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.QueryManager; import org.opensearch.sql.executor.QueryService; @@ -41,6 +43,13 @@ @RequiredArgsConstructor public class OpenSearchPluginModule extends AbstractModule { + private final List executionEngineExtensions; + + /** Default constructor for when no engines are available. */ + public OpenSearchPluginModule() { + this(List.of()); + } + private final BuiltinFunctionRepository functionRepository = BuiltinFunctionRepository.getInstance(); @@ -61,7 +70,12 @@ public StorageEngine storageEngine(OpenSearchClient client, Settings settings) { @Singleton public ExecutionEngine executionEngine( OpenSearchClient client, ExecutionProtector protector, PlanSerializer planSerializer) { - return new OpenSearchExecutionEngine(client, protector, planSerializer); + ExecutionEngine defaultEngine = + new OpenSearchExecutionEngine(client, protector, planSerializer); + if (executionEngineExtensions.isEmpty()) { + return defaultEngine; + } + return new DelegatingExecutionEngine(defaultEngine, executionEngineExtensions); } @Provides diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java index ffdd90504f7..5c6266beee1 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java @@ -5,8 +5,6 @@ package org.opensearch.sql.plugin.rest; -import static org.opensearch.core.rest.RestStatus.BAD_REQUEST; -import static org.opensearch.core.rest.RestStatus.INTERNAL_SERVER_ERROR; import static org.opensearch.core.rest.RestStatus.OK; import com.google.common.collect.ImmutableList; @@ -25,10 +23,9 @@ import org.opensearch.rest.RestChannel; import org.opensearch.rest.RestRequest; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.datasources.exceptions.DataSourceClientException; -import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.QueryEngineException; -import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.legacy.metrics.MetricName; import org.opensearch.sql.legacy.metrics.Metrics; import org.opensearch.sql.opensearch.response.error.ErrorMessageFactory; @@ -49,17 +46,47 @@ public RestPPLQueryAction() { super(); } - private static boolean isClientError(Exception e) { - return e instanceof NullPointerException - // NPE is hard to differentiate but more likely caused by bad query - || e instanceof IllegalArgumentException - || e instanceof IndexNotFoundException - || e instanceof SemanticCheckException - || e instanceof ExpressionEvaluationException - || e instanceof QueryEngineException - || e instanceof SyntaxCheckException - || e instanceof DataSourceClientException - || e instanceof IllegalAccessException; + private static boolean isClientError(Exception ex) { + // (Tombstone) NullPointerException has historically been treated as a client error, but + // nowadays they're rare and should be treated as system errors, since it represents a broken + // data model in our logic. + return ex instanceof IllegalArgumentException + || ex instanceof IndexNotFoundException + || ex instanceof QueryEngineException + || ex instanceof SyntaxCheckException + || ex instanceof DataSourceClientException + || ex instanceof IllegalAccessException; + } + + private static int getRawErrorCode(Exception ex) { + if (ex instanceof ErrorReport) { + return getRawErrorCode(((ErrorReport) ex).getCause()); + } + if (ex instanceof OpenSearchException) { + return ((OpenSearchException) ex).status().getStatus(); + } + // Possible future work: We currently do this on exception types, when we have more robust + // ErrorCodes in more locations it may be worth switching this to be based on those instead. + // That lets us identify specific error cases at a granularity higher than exception types. + if (isClientError(ex)) { + return 400; + } + return 500; + } + + private static RestStatus loggedErrorCode(Exception ex) { + int code = getRawErrorCode(ex); + + // If we hit neither branch, no-op as false alarm error? I don't believe we can ever hit this + // scenario. + if (400 <= code && code < 500) { + Metrics.getInstance().getNumericalMetric(MetricName.PPL_FAILED_REQ_COUNT_CUS).increment(); + } else if (500 <= code && code < 600) { + Metrics.getInstance().getNumericalMetric(MetricName.PPL_FAILED_REQ_COUNT_SYS).increment(); + } else { + LOG.warn("Got an exception returning non-error status {}", RestStatus.fromCode(code), ex); + } + return RestStatus.fromCode(code); } @Override @@ -98,33 +125,13 @@ public void onResponse(TransportPPLQueryResponse response) { @Override public void onFailure(Exception e) { + RestStatus status = loggedErrorCode(e); if (transportPPLQueryRequest.isExplainRequest()) { - LOG.error("Error happened during explain", e); - if (isClientError(e)) { - reportError(channel, e, BAD_REQUEST); - } else { - reportError(channel, e, INTERNAL_SERVER_ERROR); - } - } else if (e instanceof OpenSearchException) { - Metrics.getInstance() - .getNumericalMetric(MetricName.PPL_FAILED_REQ_COUNT_CUS) - .increment(); - OpenSearchException exception = (OpenSearchException) e; - reportError(channel, exception, exception.status()); + LOG.error("Error happened during explain (status {})", status, e); } else { - LOG.error("Error happened during query handling", e); - if (isClientError(e)) { - Metrics.getInstance() - .getNumericalMetric(MetricName.PPL_FAILED_REQ_COUNT_CUS) - .increment(); - reportError(channel, e, BAD_REQUEST); - } else { - Metrics.getInstance() - .getNumericalMetric(MetricName.PPL_FAILED_REQ_COUNT_SYS) - .increment(); - reportError(channel, e, INTERNAL_SERVER_ERROR); - } + LOG.error("Error happened during query handling (status {})", status, e); } + reportError(channel, e, status); } }); } diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java index f83a1277753..b8559442631 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java @@ -36,6 +36,7 @@ import org.opensearch.sql.monitor.profile.QueryProfiling; import org.opensearch.sql.opensearch.executor.OpenSearchQueryManager; import org.opensearch.sql.opensearch.setting.OpenSearchSettings; +import org.opensearch.sql.plugin.config.EngineExtensionsHolder; import org.opensearch.sql.plugin.config.OpenSearchPluginModule; import org.opensearch.sql.plugin.rest.AnalyticsExecutorHolder; import org.opensearch.sql.plugin.rest.RestUnifiedQueryAction; @@ -73,11 +74,12 @@ public TransportPPLQueryAction( ClusterService clusterService, DataSourceServiceImpl dataSourceService, org.opensearch.common.settings.Settings clusterSettings, - QueryPlanExecutor> queryPlanExecutor) { + QueryPlanExecutor> queryPlanExecutor, + EngineExtensionsHolder extensionsHolder) { super(PPLQueryAction.NAME, transportService, actionFilters, TransportPPLQueryRequest::new); ModulesBuilder modules = new ModulesBuilder(); - modules.add(new OpenSearchPluginModule()); + modules.add(new OpenSearchPluginModule(extensionsHolder.engines())); modules.add( b -> { b.bind(NodeClient.class).toInstance(client); diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 36b52cf8a1b..4bc69a8f295 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -171,6 +171,8 @@ TRAINING_DATA_SIZE: 'TRAINING_DATA_SIZE'; ANOMALY_SCORE_THRESHOLD: 'ANOMALY_SCORE_THRESHOLD'; APPEND: 'APPEND'; MULTISEARCH: 'MULTISEARCH'; +UNION: 'UNION'; +MAXOUT: 'MAXOUT'; COUNTFIELD: 'COUNTFIELD'; SHOWCOUNT: 'SHOWCOUNT'; LIMIT: 'LIMIT'; @@ -187,7 +189,9 @@ PATH: 'PATH'; CASE: 'CASE'; ELSE: 'ELSE'; IN: 'IN'; +IS: 'IS'; EXISTS: 'EXISTS'; +NULL: 'NULL'; // Geo IP eval function GEOIP: 'GEOIP'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 7e3862a8683..bcaaa105774 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -49,6 +49,7 @@ pplCommands | searchCommand | multisearchCommand | graphLookupCommand + | unionCommand ; commands @@ -96,6 +97,7 @@ commands | fieldformatCommand | nomvCommand | graphLookupCommand + | unionCommand ; commandName @@ -139,6 +141,7 @@ commandName | ADDCOLTOTALS | APPEND | MULTISEARCH + | UNION | REX | APPENDPIPE | REPLACE @@ -544,7 +547,7 @@ replacementPair ; convertCommand - : CONVERT convertFunction (COMMA? convertFunction)* + : CONVERT (TIMEFORMAT EQUAL timeFormat=stringLiteral)? convertFunction (COMMA? convertFunction)* ; convertFunction @@ -596,6 +599,19 @@ multisearchCommand : MULTISEARCH (LT_SQR_PRTHS subSearch RT_SQR_PRTHS)+ ; +unionCommand + : UNION subsearchOptions? unionDataset (COMMA? unionDataset)* + ; + +subsearchOptions + : (MAXOUT EQUAL maxout=integerLiteral)? + ; + +unionDataset + : LT_SQR_PRTHS subSearch RT_SQR_PRTHS + | tableSource + ; + kmeansCommand : KMEANS (kmeansParameter)* ; @@ -911,6 +927,11 @@ expression | left = expression comparisonOperator right = expression # compareExpr | expression NOT? IN LT_PRTHS valueList RT_PRTHS # inExpr | expression NOT? BETWEEN expression AND expression # between + | expression IS nullNotnull # isNullPredicate + ; + +nullNotnull + : NOT? NULL ; @@ -1594,6 +1615,8 @@ wildcard keywordsCanBeId : searchableKeyWord | IN + | IS + | NULL ; searchableKeyWord @@ -1683,6 +1706,7 @@ searchableKeyWord | ANOMALY_SCORE_THRESHOLD | COUNTFIELD | SHOWCOUNT + | MAXOUT | PATH | INPUT | OUTPUT diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index ff0ef4bd8db..d4f5eea0fb7 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -120,6 +120,7 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.ast.tree.Union; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Window; import org.opensearch.sql.calcite.plan.OpenSearchConstants; @@ -1212,12 +1213,20 @@ public UnresolvedPlan visitConvertCommand(OpenSearchPPLParser.ConvertCommandCont .map(this::buildConversion) .filter(conversion -> conversion != null) .collect(Collectors.toList()); - return new Convert(conversions); + + String timeFormat = null; + if (ctx.timeFormat != null) { + timeFormat = StringUtils.unquoteText(ctx.timeFormat.getText()); + } + + return new Convert(conversions, timeFormat); } /** Supported PPL convert function names (case-insensitive). */ private static final Set SUPPORTED_CONVERSION_FUNCTIONS = - Set.of("auto", "num", "rmcomma", "rmunit", "memk", "none"); + Set.of( + "auto", "num", "rmcomma", "rmunit", "memk", "none", "ctime", "mktime", "dur2sec", + "mstime"); private Let buildConversion(OpenSearchPPLParser.ConvertFunctionContext funcCtx) { if (funcCtx.fieldExpression().isEmpty()) { @@ -1339,6 +1348,37 @@ public UnresolvedPlan visitMultisearchCommand(OpenSearchPPLParser.MultisearchCom return new Multisearch(subsearches); } + @Override + public UnresolvedPlan visitUnionCommand(OpenSearchPPLParser.UnionCommandContext ctx) { + List datasets = new ArrayList<>(); + + Integer maxout = null; + if (ctx.subsearchOptions() != null) { + OpenSearchPPLParser.SubsearchOptionsContext opts = ctx.subsearchOptions(); + if (opts.maxout != null) { + maxout = Integer.parseInt(opts.maxout.getText()); + } + } + + for (OpenSearchPPLParser.UnionDatasetContext datasetCtx : ctx.unionDataset()) { + if (datasetCtx.subSearch() != null) { + datasets.add(visitSubSearch(datasetCtx.subSearch())); + } else if (datasetCtx.tableSource() != null) { + datasets.add( + new Relation( + Collections.singletonList(internalVisitExpression(datasetCtx.tableSource())))); + } + } + + // Allow 1+ here; total count (including implicit upstream) validated during planning + if (datasets.isEmpty()) { + throw new SyntaxCheckException( + "Union command requires at least one dataset. Provided: " + datasets.size()); + } + + return new Union(datasets, maxout); + } + @Override public UnresolvedPlan visitRexCommand(OpenSearchPPLParser.RexCommandContext ctx) { UnresolvedExpression field = internalVisitExpression(ctx.rexExpr().field); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index c58eca20575..77d5c77a635 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -249,6 +249,15 @@ public UnresolvedExpression visitInExpr(InExprContext ctx) { return ctx.NOT() != null ? new Not(expr) : expr; } + @Override + public UnresolvedExpression visitIsNullPredicate(OpenSearchPPLParser.IsNullPredicateContext ctx) { + return new Function( + ctx.nullNotnull().NOT() == null + ? IS_NULL.getName().getFunctionName() + : IS_NOT_NULL.getName().getFunctionName(), + Arrays.asList(visit(ctx.expression()))); + } + /** Value Expression. */ @Override public UnresolvedExpression visitBinaryArithmetic(BinaryArithmeticContext ctx) { @@ -300,18 +309,20 @@ public UnresolvedExpression visitRenameFieldExpression(RenameFieldExpressionCont @Override public UnresolvedExpression visitPrefixSortField(OpenSearchPPLParser.PrefixSortFieldContext ctx) { - return buildSortField(ctx.sortFieldExpression(), ctx); + boolean ascending = ctx.MINUS() == null; + return buildSortField(ctx.sortFieldExpression(), ascending); } @Override public UnresolvedExpression visitSuffixSortField(OpenSearchPPLParser.SuffixSortFieldContext ctx) { - return buildSortField(ctx.sortFieldExpression(), ctx); + boolean ascending = (ctx.DESC() == null && ctx.D() == null); + return buildSortField(ctx.sortFieldExpression(), ascending); } @Override public UnresolvedExpression visitDefaultSortField( OpenSearchPPLParser.DefaultSortFieldContext ctx) { - return buildSortField(ctx.sortFieldExpression(), ctx); + return buildSortField(ctx.sortFieldExpression(), true); } @Override @@ -334,8 +345,7 @@ public UnresolvedExpression visitInvalidMixedSortField( } private Field buildSortField( - OpenSearchPPLParser.SortFieldExpressionContext sortFieldExpr, - OpenSearchPPLParser.SortFieldContext parentCtx) { + OpenSearchPPLParser.SortFieldExpressionContext sortFieldExpr, boolean ascending) { UnresolvedExpression fieldExpression = visit(sortFieldExpr.fieldExpression().qualifiedName()); if (sortFieldExpr.IP() != null) { @@ -346,7 +356,12 @@ private Field buildSortField( fieldExpression = new Cast(fieldExpression, AstDSL.stringLiteral("string")); } // AUTO() case uses the field expression as-is - return new Field(fieldExpression, ArgumentFactory.getArgumentList(parentCtx)); + + List arguments = + Arrays.asList( + ArgumentFactory.createSortDirectionArgument(ascending), + ArgumentFactory.getTypeArgument(sortFieldExpr)); + return new Field(fieldExpression, arguments); } @Override diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index 72090e2f069..2cdc702b785 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -27,14 +27,10 @@ import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.ChartCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DecimalLiteralContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DedupCommandContext; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DefaultSortFieldContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.EventstatsCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldsCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.IntegerLiteralContext; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.PrefixSortFieldContext; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SortFieldContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StreamstatsCommandContext; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SuffixSortFieldContext; import org.opensearch.sql.ppl.parser.AstExpressionBuilder; /** Util class to get all arguments as a list from the PPL command. */ @@ -155,63 +151,17 @@ public static List getArgumentList(DedupCommandContext ctx) { } /** - * Get list of {@link Argument}. + * Creates an "asc" argument for sort field direction. * - * @param ctx SortFieldContext instance - * @return the list of arguments fetched from the sort field in sort command + * @param ascending true for ascending sort, false for descending + * @return Argument representing the sort direction */ - public static List getArgumentList(SortFieldContext ctx) { - if (ctx instanceof PrefixSortFieldContext) { - return getArgumentList((PrefixSortFieldContext) ctx); - } else if (ctx instanceof SuffixSortFieldContext) { - return getArgumentList((SuffixSortFieldContext) ctx); - } else { - return getArgumentList((DefaultSortFieldContext) ctx); - } - } - - /** - * Get list of {@link Argument} for prefix sort field (+/- syntax). - * - * @param ctx PrefixSortFieldContext instance - * @return the list of arguments fetched from the prefix sort field - */ - public static List getArgumentList(PrefixSortFieldContext ctx) { - return Arrays.asList( - ctx.MINUS() != null - ? new Argument("asc", new Literal(false, DataType.BOOLEAN)) - : new Argument("asc", new Literal(true, DataType.BOOLEAN)), - getTypeArgument(ctx.sortFieldExpression())); - } - - /** - * Get list of {@link Argument} for suffix sort field (asc/desc syntax). - * - * @param ctx SuffixSortFieldContext instance - * @return the list of arguments fetched from the suffix sort field - */ - public static List getArgumentList(SuffixSortFieldContext ctx) { - return Arrays.asList( - (ctx.DESC() != null || ctx.D() != null) - ? new Argument("asc", new Literal(false, DataType.BOOLEAN)) - : new Argument("asc", new Literal(true, DataType.BOOLEAN)), - getTypeArgument(ctx.sortFieldExpression())); - } - - /** - * Get list of {@link Argument} for default sort field (no direction specified). - * - * @param ctx DefaultSortFieldContext instance - * @return the list of arguments fetched from the default sort field - */ - public static List getArgumentList(DefaultSortFieldContext ctx) { - return Arrays.asList( - new Argument("asc", new Literal(true, DataType.BOOLEAN)), - getTypeArgument(ctx.sortFieldExpression())); + public static Argument createSortDirectionArgument(boolean ascending) { + return new Argument("asc", new Literal(ascending, DataType.BOOLEAN)); } /** Helper method to get type argument from sortFieldExpression. */ - private static Argument getTypeArgument(OpenSearchPPLParser.SortFieldExpressionContext ctx) { + public static Argument getTypeArgument(OpenSearchPPLParser.SortFieldExpressionContext ctx) { if (ctx.AUTO() != null) { return new Argument("type", new Literal("auto", DataType.STRING)); } else if (ctx.IP() != null) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index fd1c10fea9c..4b75d444467 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -107,6 +107,7 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.ast.tree.Union; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -542,7 +543,11 @@ public String visitConvert(Convert node, String context) { return StringUtils.format("%s(%s)%s", functionName, fields, asClause); }) .collect(Collectors.joining(",")); - return StringUtils.format("%s | convert %s", child, conversions); + String timeformatClause = + node.getTimeFormat() != null + ? StringUtils.format("timeformat=\"%s\" ", node.getTimeFormat()) + : ""; + return StringUtils.format("%s | convert %s%s", child, timeformatClause, conversions); } @Override @@ -793,32 +798,37 @@ public String visitAppend(Append node, String context) { @Override public String visitMultisearch(Multisearch node, String context) { + return anonymizeSubsearchCommand("multisearch", node.getSubsearches()); + } + + @Override + public String visitUnion(Union node, String context) { + return anonymizeSubsearchCommand("union", node.getDatasets()); + } + + private String anonymizeSubsearchCommand(String commandName, List subsearches) { + String keywords = + "source|fields|where|stats|head|tail|sort|eval|rename|" + + commandName + + "|search|table|identifier|\\*\\*\\*"; List anonymizedSubsearches = new ArrayList<>(); - for (UnresolvedPlan subsearch : node.getSubsearches()) { + for (UnresolvedPlan subsearch : subsearches) { String anonymizedSubsearch = anonymizeData(subsearch); anonymizedSubsearch = "search " + anonymizedSubsearch; anonymizedSubsearch = anonymizedSubsearch - .replaceAll("\\bsource=\\w+", "source=table") // Replace table names after source= - .replaceAll( - "\\b(?!source|fields|where|stats|head|tail|sort|eval|rename|multisearch|search|table|identifier|\\*\\*\\*)\\w+(?=\\s*[<>=!])", - "identifier") // Replace field names before operators - .replaceAll( - "\\b(?!source|fields|where|stats|head|tail|sort|eval|rename|multisearch|search|table|identifier|\\*\\*\\*)\\w+(?=\\s*,)", - "identifier") // Replace field names before commas - .replaceAll( - "fields" - + " \\+\\s*\\b(?!source|fields|where|stats|head|tail|sort|eval|rename|multisearch|search|table|identifier|\\*\\*\\*)\\w+", - "fields + identifier") // Replace field names after 'fields +' + .replaceAll("\\bsource=\\w+", "source=table") + .replaceAll("\\b(?!" + keywords + ")\\w+(?=\\s*[<>=!])", "identifier") + .replaceAll("\\b(?!" + keywords + ")\\w+(?=\\s*,)", "identifier") + .replaceAll("fields \\+\\s*\\b(?!" + keywords + ")\\w+", "fields + identifier") .replaceAll( - "fields" - + " \\+\\s*identifier,\\s*\\b(?!source|fields|where|stats|head|tail|sort|eval|rename|multisearch|search|table|identifier|\\*\\*\\*)\\w+", - "fields + identifier,identifier"); // Handle multiple fields + "fields \\+\\s*identifier,\\s*\\b(?!" + keywords + ")\\w+", + "fields + identifier,identifier"); anonymizedSubsearches.add(StringUtils.format("[%s]", anonymizedSubsearch)); } - return StringUtils.format("| multisearch %s", String.join(" ", anonymizedSubsearches)); + return StringUtils.format("| %s %s", commandName, String.join(" ", anonymizedSubsearches)); } @Override diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendPipeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendPipeTest.java index faf944da4a0..56ed409b4d7 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendPipeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendPipeTest.java @@ -59,4 +59,152 @@ public void testAppendPipeWithMergedColumns() { + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + /** + * Regression test: double appendpipe with different aggregations. Result count (16 = 14 + 1 avg + + * 1 max) is verified in integration tests only because RelRunners.run() creates a new planner + * that conflicts with shared RelNode subtrees — a test framework limitation that does not affect + * the production path. + */ + @Test + public void testDoubleAppendPipe() { + String ppl = + "source=EMP | appendpipe [stats avg(SAL) as avg_sal] | appendpipe [stats max(SAL) as" + + " max_sal]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], avg_sal=[$8], max_sal=[null:DECIMAL(7, 2)])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], avg_sal=[null:DECIMAL(11, 6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[$0])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[null:DECIMAL(11, 6)], max_sal=[$0])\n" + + " LogicalAggregate(group=[{}], max_sal=[MAX($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " avg_sal=[null:DECIMAL(11, 6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[$0])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + /** + * Regression test: triple appendpipe with different aggregations. Result count (17 = 14 + 1 avg + + * 1 max + 1 min) is verified in integration tests only — see testDoubleAppendPipe for rationale. + */ + @Test + public void testTripleAppendPipe() { + String ppl = + "source=EMP | appendpipe [stats avg(SAL) as avg_sal] | appendpipe [stats max(SAL) as" + + " max_sal] | appendpipe [stats min(SAL) as min_sal]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], avg_sal=[$8], max_sal=[$9]," + + " min_sal=[null:DECIMAL(7, 2)])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], avg_sal=[$8]," + + " max_sal=[null:DECIMAL(7, 2)])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " avg_sal=[null:DECIMAL(11, 6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[$0])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[null:DECIMAL(11, 6)], max_sal=[$0])\n" + + " LogicalAggregate(group=[{}], max_sal=[MAX($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " avg_sal=[null:DECIMAL(11, 6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[$0])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[null:DECIMAL(11, 6)], max_sal=[null:DECIMAL(7, 2)], min_sal=[$0])\n" + + " LogicalAggregate(group=[{}], min_sal=[MIN($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], avg_sal=[$8]," + + " max_sal=[null:DECIMAL(7, 2)])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " avg_sal=[null:DECIMAL(11, 6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[$0])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[null:DECIMAL(11, 6)], max_sal=[$0])\n" + + " LogicalAggregate(group=[{}], max_sal=[MAX($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " avg_sal=[null:DECIMAL(11, 6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[$0])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + /** Regression test: double appendpipe with non-aggregation (filter) subpipeline. */ + @Test + public void testDoubleAppendPipeWithFilter() { + String ppl = "source=EMP | appendpipe [where DEPTNO = 20] | appendpipe [where DEPTNO = 30]"; + RelNode root = getRelNode(ppl); + verifyResultCount(root, 25); // 14 original + 5 (dept 20) + 6 (dept 30) + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java index 784fedc2ede..472e77e2d29 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java @@ -14,6 +14,7 @@ import org.apache.calcite.test.CalciteAssert; import org.junit.Ignore; import org.junit.Test; +import org.opensearch.sql.common.error.ErrorReport; public class CalcitePPLBasicTest extends CalcitePPLAbstractTest { @@ -201,9 +202,9 @@ public void testFieldsPlusThenMinus() { @Test public void testFieldsMinusThenPlusShouldThrowException() { String ppl = "source=EMP | fields - DEPTNO, SAL | fields + EMPNO, DEPTNO, SAL"; - IllegalArgumentException e = + ErrorReport e = assertThrows( - IllegalArgumentException.class, + ErrorReport.class, () -> { RelNode root = getRelNode(ppl); }); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java new file mode 100644 index 00000000000..6d62dbb2cde --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java @@ -0,0 +1,170 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Test; + +/** + * Unit test for GitHub issue #5174: bin/chart NPE with null values. + * + *

    Verifies that the chart command generates correct logical plans when the input contains null + * values from binning, and that the sort operations properly handle nulls. + */ +public class CalcitePPLChartNullTest extends CalcitePPLAbstractTest { + + public CalcitePPLChartNullTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + // Table with null values matching the issue's bounty-numbers schema + ImmutableList rows = + ImmutableList.of( + new Object[] {1, "A", "X", 10.5}, + new Object[] {2, "A", "Y", 20.3}, + new Object[] {10, "B", "X", 100.0}, + new Object[] {null, "B", "Y", null}); + schema.add("bounty_numbers", new BountyNumbersTable(rows)); + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + + @Test + public void testBinThenChartWithNullValuesLogicalPlan() { + String ppl = + "source=bounty_numbers | bin value span=50 as val_bin" + + " | chart count() over val_bin by category"; + RelNode root = getRelNode(ppl); + // Verify the SQL plan contains WHERE val_bin IS NOT NULL to filter null bin values, + // and NULLS LAST in ORDER BY for proper null handling in sort + String expectedSparkSql = + "SELECT `t2`.`val_bin`, CASE WHEN `t2`.`category` IS NULL THEN 'NULL' WHEN" + + " `t10`.`_row_number_chart_` <= 10 THEN `t2`.`category` ELSE 'OTHER' END" + + " `category`, SUM(`t2`.`count()`) `count()`\n" + + "FROM (SELECT `val_bin`, `category`, COUNT(*) `count()`\n" + + "FROM (SELECT `count`, `category`, `subcategory`, `value`," + + " SPAN_BUCKET(`value`, 50) `val_bin`\n" + + "FROM `scott`.`bounty_numbers`) `t`\n" + + "WHERE `val_bin` IS NOT NULL\n" + + "GROUP BY `val_bin`, `category`) `t2`\n" + + "LEFT JOIN (SELECT `category`, SUM(`count()`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY SUM(`count()`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT `category`, COUNT(*) `count()`\n" + + "FROM (SELECT `count`, `category`, `subcategory`, `value`," + + " SPAN_BUCKET(`value`, 50) `val_bin`\n" + + "FROM `scott`.`bounty_numbers`) `t3`\n" + + "WHERE `val_bin` IS NOT NULL\n" + + "GROUP BY `val_bin`, `category`) `t7`\n" + + "WHERE `category` IS NOT NULL\n" + + "GROUP BY `category`) `t10` ON `t2`.`category` = `t10`.`category`\n" + + "GROUP BY `t2`.`val_bin`, CASE WHEN `t2`.`category` IS NULL THEN 'NULL' WHEN" + + " `t10`.`_row_number_chart_` <= 10 THEN `t2`.`category` ELSE 'OTHER' END\n" + + "ORDER BY `t2`.`val_bin` NULLS LAST, 2 NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testBinThenChartSingleGroupWithNullValuesLogicalPlan() { + String ppl = + "source=bounty_numbers | bin value span=50 as val_bin | chart count() over val_bin"; + RelNode root = getRelNode(ppl); + // Verify null bin values are filtered and sort uses NULLS LAST + String expectedSparkSql = + "SELECT `val_bin`, COUNT(*) `count()`\n" + + "FROM (SELECT `count`, `category`, `subcategory`, `value`," + + " SPAN_BUCKET(`value`, 50) `val_bin`\n" + + "FROM `scott`.`bounty_numbers`) `t`\n" + + "WHERE `val_bin` IS NOT NULL\n" + + "GROUP BY `val_bin`\n" + + "ORDER BY `val_bin` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @RequiredArgsConstructor + public static class BountyNumbersTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("count", SqlTypeName.INTEGER) + .nullable(true) + .add("category", SqlTypeName.VARCHAR) + .nullable(true) + .add("subcategory", SqlTypeName.VARCHAR) + .nullable(true) + .add("value", SqlTypeName.DOUBLE) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(4d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLConvertTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLConvertTest.java index 936b4212f4f..f49a967aa86 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLConvertTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLConvertTest.java @@ -269,4 +269,139 @@ public void testConvertAutoWithMemoryField() { + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testConvertMktimeFunction() { + String ppl = "source=EMP | convert mktime(ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[MKTIME($1)], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, MKTIME(`ENAME`) `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`," + + " `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertCtimeFunction() { + String ppl = "source=EMP | convert ctime(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[CTIME($5)]," + + " COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, CTIME(`SAL`) `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertDur2secFunction() { + String ppl = "source=EMP | convert dur2sec(ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[DUR2SEC($1)], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, DUR2SEC(`ENAME`) `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`," + + " `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertMstimeFunction() { + String ppl = "source=EMP | convert mstime(ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[MSTIME($1)], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, MSTIME(`ENAME`) `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`," + + " `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertWithTimeformatMktime() { + String ppl = "source=EMP | convert timeformat=\"%Y-%m-%d\" mktime(ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[MKTIME($1, '%Y-%m-%d')], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, MKTIME(`ENAME`, '%Y-%m-%d') `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`," + + " `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertWithTimeformatCtime() { + String ppl = "source=EMP | convert timeformat=\"%Y-%m-%d %H:%M:%S\" ctime(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[CTIME($5," + + " '%Y-%m-%d %H:%M:%S')], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, CTIME(`SAL`, '%Y-%m-%d %H:%M:%S')" + + " `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertTimeformatWithMultipleFunctions() { + String ppl = "source=EMP | convert timeformat=\"%Y-%m-%d\" mktime(ENAME), ctime(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[MKTIME($1, '%Y-%m-%d')], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[CTIME($5, '%Y-%m-%d')], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, MKTIME(`ENAME`, '%Y-%m-%d') `ENAME`, `JOB`, `MGR`, `HIREDATE`," + + " CTIME(`SAL`, '%Y-%m-%d') `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertTimeformatMixedWithNonTimeFunctions() { + String ppl = "source=EMP | convert timeformat=\"%Y-%m-%d\" mktime(ENAME), auto(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[MKTIME($1, '%Y-%m-%d')], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[AUTO($5)], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, MKTIME(`ENAME`, '%Y-%m-%d') `ENAME`, `JOB`, `MGR`, `HIREDATE`, AUTO(`SAL`)" + + " `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java index 13a116a1a00..ca1a789b0f4 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java @@ -217,18 +217,75 @@ public void testDedupExpr() { "source=EMP | eval NEW_DEPTNO = DEPTNO + 1 | fields NEW_DEPTNO, EMPNO, ENAME, JOB | sort" + " NEW_DEPTNO | dedup 1 NEW_DEPTNO"; root = getRelNode(ppl); + // Sort is stripped from below the window and moved to the top to ensure order is preserved expectedLogical = - "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" - + " LogicalFilter(condition=[<=($4, 1)])\n" - + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n" - + " LogicalFilter(condition=[IS NOT NULL($0)])\n" - + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + "LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0 NULLS" + + " FIRST)])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); } + /** Regression test for https://github.com/opensearch-project/sql/issues/3922 */ + @Test + public void testSortThenDedup() { + String ppl = "source=EMP | sort DEPTNO | dedup 1 JOB | fields DEPTNO, ENAME, JOB"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(DEPTNO=[$7], ENAME=[$1], JOB=[$2])\n" + + " LogicalSort(sort0=[$7], dir0=[ASC-nulls-first])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[<=($8, 1)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION" + + " BY $2 ORDER BY $7 NULLS FIRST)])\n" + + " LogicalFilter(condition=[IS NOT NULL($2)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + // After fix, the sort order (DEPTNO ASC) must be preserved through dedup. + // The correct result has DEPTNO in ascending order: 10, 10, 10, 20, 30. + String expectedResult = + "DEPTNO=10; ENAME=MILLER; JOB=CLERK\n" + + "DEPTNO=10; ENAME=KING; JOB=PRESIDENT\n" + + "DEPTNO=10; ENAME=CLARK; JOB=MANAGER\n" + + "DEPTNO=20; ENAME=SCOTT; JOB=ANALYST\n" + + "DEPTNO=30; ENAME=ALLEN; JOB=SALESMAN\n"; + verifyResult(root, expectedResult); + } + + /** Regression test for https://github.com/opensearch-project/sql/issues/3922 */ + @Test + public void testSortThenDedupWithEval() { + String ppl = + "source=EMP | eval NEW_DEPTNO = DEPTNO + 1 | fields NEW_DEPTNO, EMPNO, ENAME, JOB | sort" + + " NEW_DEPTNO | dedup 1 NEW_DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0 NULLS" + + " FIRST)])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + // After fix, the sort order (NEW_DEPTNO ASC) must be preserved through dedup. + // The correct result has NEW_DEPTNO in ascending order: 11, 21, 31. + String expectedResult = + "NEW_DEPTNO=11; EMPNO=7782; ENAME=CLARK; JOB=MANAGER\n" + + "NEW_DEPTNO=21; EMPNO=7369; ENAME=SMITH; JOB=CLERK\n" + + "NEW_DEPTNO=31; EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN\n"; + verifyResult(root, expectedResult); + } + @Test public void testRenameDedup() { String ppl = @@ -261,15 +318,39 @@ public void testRenameDedup() { "source=EMP | eval TEMP_DEPTNO = DEPTNO + 1 | rename TEMP_DEPTNO as NEW_DEPTNO | fields" + " NEW_DEPTNO, EMPNO, ENAME, JOB | sort NEW_DEPTNO | dedup 1 NEW_DEPTNO"; root = getRelNode(ppl); + // Sort is stripped from below the window and moved to the top to ensure order is preserved expectedLogical = - "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" - + " LogicalFilter(condition=[<=($4, 1)])\n" - + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n" - + " LogicalFilter(condition=[IS NOT NULL($0)])\n" - + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + "LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0 NULLS" + + " FIRST)])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); } + + /** + * Edge case: sort field is projected away before dedup. The sort collation references a field + * (DEPTNO) that is no longer in the schema after the fields command. The dedup should still work + * correctly but without the sort-restore optimization since the sort field is unavailable. + */ + @Test + public void testSortFieldProjectedAwayBeforeDedup() { + String ppl = "source=EMP | sort DEPTNO | fields ENAME, JOB | dedup 1 JOB"; + RelNode root = getRelNode(ppl); + // No restore Sort at top because DEPTNO was projected away + String expectedLogical = + "LogicalProject(ENAME=[$0], JOB=[$1])\n" + + " LogicalFilter(condition=[<=($2, 1)])\n" + + " LogicalProject(ENAME=[$0], JOB=[$1], _row_number_dedup_=[ROW_NUMBER() OVER" + + " (PARTITION BY $1)])\n" + + " LogicalFilter(condition=[IS NOT NULL($1)])\n" + + " LogicalProject(ENAME=[$1], JOB=[$2])\n" + + " LogicalSort(sort0=[$7], dir0=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEnhancedCoalesceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEnhancedCoalesceTest.java index 56141eae584..8e54d45ac98 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEnhancedCoalesceTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEnhancedCoalesceTest.java @@ -138,7 +138,7 @@ public void testCoalesceWithNonExistentField() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalSort(fetch=[2])\n" - + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:VARCHAR, $1)])\n" + + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:NULL, $1)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); @@ -155,7 +155,7 @@ public void testCoalesceWithMultipleNonExistentFields() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalSort(fetch=[1])\n" - + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:VARCHAR, null:VARCHAR, $1," + + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:NULL, null:NULL, $1," + " 'fallback':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); @@ -175,8 +175,8 @@ public void testCoalesceWithAllNonExistentFields() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalSort(fetch=[1])\n" - + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:VARCHAR, null:VARCHAR," - + " null:VARCHAR)])\n" + + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:NULL, null:NULL," + + " null:NULL)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); @@ -235,4 +235,38 @@ public void testCoalesceTypeInferenceWithNonNullableOperands() { + "LIMIT 2"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testCoalesceWithNullLiteralAndInteger() { + // Bug #5175: COALESCE(null, 42) previously inferred VARCHAR because the NULL identifier + // was replaced with null:VARCHAR. The result type should be INTEGER so the value comes + // back as an int. + String ppl = "source=EMP | eval result = coalesce(null, 42) | fields EMPNO, result | head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:NULL, 42)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(NULL, 42) `result`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testCoalesceWithIntegerAndNullLiteral() { + // Bug #5175: COALESCE(42, null) should also be typed as INTEGER, not VARCHAR. + String ppl = "source=EMP | eval result = coalesce(42, null) | fields EMPNO, result | head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], result=[COALESCE(42, null:NULL)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(42, NULL) `result`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java index 70b53d3c6fc..9b37ab5b407 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java @@ -12,6 +12,7 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; +import org.opensearch.sql.common.error.ErrorReport; public class CalcitePPLEvalTest extends CalcitePPLAbstractTest { @@ -337,9 +338,9 @@ public void testComplexEvalCommands4() { "source=EMP | eval col1 = SAL | sort - col1 | head 3 | fields ENAME, col1 | eval col2 =" + " col1 | sort + col2 | fields ENAME, col2 | eval col3 = col2 | head 2 | fields" + " HIREDATE, col3"; - IllegalArgumentException e = + ErrorReport e = assertThrows( - IllegalArgumentException.class, + ErrorReport.class, () -> { RelNode root = getRelNode(ppl); }); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldFormatTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldFormatTest.java index e20bd1b0e47..5bef9c397eb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldFormatTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldFormatTest.java @@ -12,6 +12,7 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; +import org.opensearch.sql.common.error.ErrorReport; public class CalcitePPLFieldFormatTest extends CalcitePPLAbstractTest { @@ -218,9 +219,9 @@ public void testComplexFieldFormatCommands4() { "source=EMP | fieldformat col1 = SAL | sort - col1 | head 3 | fields ENAME, col1 |" + " fieldformat col2 = col1 | sort + col2 | fields ENAME, col2 | fieldformat col3 =" + " col2 | head 2 | fields HIREDATE, col3"; - IllegalArgumentException e = + ErrorReport e = assertThrows( - IllegalArgumentException.class, + ErrorReport.class, () -> { RelNode root = getRelNode(ppl); }); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java index 5d7669d20a1..d1310ce60dd 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java @@ -190,17 +190,21 @@ public void testNoMvInPipeline() { @Test public void testNoMvNonExistentField() { + // After issue #5175 was fixed, missing identifiers inside COALESCE resolve to a null + // literal of SqlTypeName.NULL (instead of VARCHAR). This lets Calcite promote the null + // to the expected array type in ARRAY_COMPACT, so the plan builds successfully and the + // nomv column evaluates to the empty-string fallback from COALESCE. String ppl = "source=EMP | eval arr = array('a', 'b') | nomv does_not_exist | head 1"; + RelNode root = getRelNode(ppl); - Exception ex = assertThrows(Exception.class, () -> getRelNode(ppl)); - - String msg = String.valueOf(ex.getMessage()); - org.junit.Assert.assertTrue( - "Expected error message to mention missing field or type error. Actual: " + msg, - msg.toLowerCase().contains("does_not_exist") - || msg.toLowerCase().contains("field") - || msg.contains("ARRAY_COMPACT") - || msg.contains("ARRAY")); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[array('a', 'b')]," + + " does_not_exist=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(null:ANY ARRAY), '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); } @Test diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java index 9967b10543e..879d48bc4de 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java @@ -123,6 +123,34 @@ public void testSpathAutoExtractModeWithFields() { + "FROM `scott`.`EMP`"); } + @Test + public void testSpathAutoExtractWithMultiFieldEval() { + // Issue #5185: eval with multiple dotted-path assignments from MAP column + // should not remove the MAP root field + withPPLQuery( + "source=EMP | spath input=ENAME" + + " | eval ENAME.user.name=ENAME.user.name, ENAME.user.age=ENAME.user.age" + + " | fields ENAME.user.name, ENAME.user.age") + .expectLogical( + "LogicalProject(ENAME.user.name=[ITEM(JSON_EXTRACT_ALL($1), 'user.name')]," + + " ENAME.user.age=[ITEM(JSON_EXTRACT_ALL($1), 'user.age')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"); + } + + @Test + public void testSpathAutoExtractWithSeparateEvalCommands() { + // Issue #5185: separate eval commands with dotted-path assignments from MAP column + withPPLQuery( + "source=EMP | spath input=ENAME" + + " | eval ENAME.user.name=ENAME.user.name" + + " | eval ENAME.user.age=ENAME.user.age" + + " | fields ENAME.user.name, ENAME.user.age") + .expectLogical( + "LogicalProject(ENAME.user.name=[ITEM(JSON_EXTRACT_ALL($1), 'user.name')]," + + " ENAME.user.age=[ITEM(JSON_EXTRACT_ALL($1), 'user.age')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"); + } + @Test public void testSpathAutoExtractModeWithSort() { withPPLQuery("source=EMP | spath input=ENAME output=result" + " | sort result.user.name") diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java index 637e8f19820..2e4b6a605dd 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java @@ -5,6 +5,10 @@ package org.opensearch.sql.ppl.calcite; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; @@ -92,41 +96,21 @@ public void testStreamstatsCurrent() { public void testStreamstatsWindow() { String ppl = "source=EMP | streamstats window = 5 max(SAL) by DEPTNO"; RelNode root = getRelNode(ppl); + // Uses self-join plan to avoid nested correlates that cause NPE in Calcite's decorrelator String expectedLogical = "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + " COMM=[$6], DEPTNO=[$7], max(SAL)=[$9])\n" + " LogicalSort(sort0=[$8], dir0=[ASC])\n" - + " LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{7," - + " 8}])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8}], max(SAL)=[MAX($11)])\n" + + " LogicalJoin(condition=[AND(>=($9, -($8, 4)), <=($9, $8), IS NOT DISTINCT" + + " FROM($7, $10))], joinType=[left])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n" - + " LogicalAggregate(group=[{}], max(SAL)=[MAX($0)])\n" - + " LogicalProject(SAL=[$5])\n" - + " LogicalFilter(condition=[AND(>=($8, -($cor0.__stream_seq__, 4)), <=($8," - + " $cor0.__stream_seq__), OR(=($7, $cor0.DEPTNO), AND(IS NULL($7), IS" - + " NULL($cor0.DEPTNO))))])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," - + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER" - + " ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(__r_seq__=[ROW_NUMBER() OVER ()], __r_DEPTNO__=[$7]," + + " __r_SAL__=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - - String expectedSparkSql = - "SELECT `$cor0`.`EMPNO`, `$cor0`.`ENAME`, `$cor0`.`JOB`, `$cor0`.`MGR`, `$cor0`.`HIREDATE`," - + " `$cor0`.`SAL`, `$cor0`.`COMM`, `$cor0`.`DEPTNO`, `t3`.`max(SAL)`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__stream_seq__`\n" - + "FROM `scott`.`EMP`) `$cor0`,\n" - + "LATERAL (SELECT MAX(`SAL`) `max(SAL)`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__stream_seq__`\n" - + "FROM `scott`.`EMP`) `t0`\n" - + "WHERE `__stream_seq__` >= `$cor0`.`__stream_seq__` - 4 AND `__stream_seq__` <=" - + " `$cor0`.`__stream_seq__` AND (`DEPTNO` = `$cor0`.`DEPTNO` OR `DEPTNO` IS NULL AND" - + " `$cor0`.`DEPTNO` IS NULL)) `t3`\n" - + "ORDER BY `$cor0`.`__stream_seq__` NULLS LAST"; - verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -223,6 +207,22 @@ public void testStreamstatsReset() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testMultipleStreamstatsWithWindow() { + String ppl = + "source=EMP | streamstats window=2 avg(SAL) as avg_sal by DEPTNO" + + " | streamstats window=2 avg(avg_sal) as avg_dept_sal by DEPTNO"; + RelNode root = getRelNode(ppl); + assertNotNull("Chained streamstats with window should produce a valid plan", root); + // Verify the plan uses self-join (LogicalJoin) instead of LogicalCorrelate + String plan = root.explain(); + assertTrue( + "Plan should contain LogicalJoin for self-join approach", plan.contains("LogicalJoin")); + assertFalse( + "Plan should not contain LogicalCorrelate for window+group streamstats", + plan.contains("LogicalCorrelate")); + } + @Test public void testStreamstatsWithReverse() { String ppl = "source=EMP | streamstats max(SAL) by DEPTNO | reverse"; diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java index b6b60c530e7..69bc1ae2638 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java @@ -24,12 +24,13 @@ public void testSimpleCountWithTranspose() { + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + " FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5], row" + " 5_null=[MAX($0) FILTER $6])\n" - + " LogicalProject(value=[CAST($3):VARCHAR NOT NULL], $f4=[TRIM(FLAG(BOTH), ' '," + + " LogicalProject(_value_transpose_=[CAST($3):VARCHAR NOT NULL]," + + " $f4=[TRIM(FLAG(BOTH), ' '," + " $2)], $f5=[=($1, 1)], $f6=[=($1, 2)], $f7=[=($1, 3)], $f8=[=($1, 4)], $f9=[=($1," + " 5)])\n" + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + " LogicalProject(c=[$0], _row_number_transpose_=[$1], column=[$2]," - + " value=[CASE(=($2, 'c'), CAST($0):VARCHAR NOT NULL, null:NULL)])\n" + + " _value_transpose_=[CASE(=($2, 'c'), CAST($0):VARCHAR NOT NULL, null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" + " LogicalProject(c=[$0], _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + " LogicalAggregate(group=[{}], c=[COUNT()])\n" @@ -40,18 +41,23 @@ public void testSimpleCountWithTranspose() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TRIM(`column`) `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + "SELECT TRIM(`column`) `column`, MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + " `_row_number_transpose_` = 5) `row 5`\n" + "FROM (SELECT `t0`.`c`, `t0`.`_row_number_transpose_`, `t1`.`column`, CASE WHEN" - + " `t1`.`column` = 'c' THEN CAST(`t0`.`c` AS STRING) ELSE NULL END `value`\n" + + " `t1`.`column` = 'c' THEN CAST(`t0`.`c` AS STRING) ELSE NULL END" + + " `_value_transpose_`\n" + "FROM (SELECT COUNT(*) `c`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t0`\n" + "CROSS JOIN (VALUES ('c')) `t1` (`column`)) `t2`\n" - + "WHERE `t2`.`value` IS NOT NULL\n" + + "WHERE `t2`.`_value_transpose_` IS NOT NULL\n" + "GROUP BY TRIM(`column`)"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -68,12 +74,13 @@ public void testMultipleAggregatesWithAliasesTranspose() { + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + " FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5], row" + " 5_null=[MAX($0) FILTER $6])\n" - + " LogicalProject(value=[CAST($6):VARCHAR NOT NULL], $f7=[TRIM(FLAG(BOTH), ' '," - + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)], $f11=[=($4, 4)], $f12=[=($4," - + " 5)])\n" + + " LogicalProject(_value_transpose_=[CAST($6):VARCHAR NOT NULL]," + + " $f7=[TRIM(FLAG(BOTH), ' '," + + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)], $f11=[=($4, 4)]," + + " $f12=[=($4, 5)])\n" + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," - + " _row_number_transpose_=[$4], column=[$5], value=[CASE(=($5, 'avg_sal')," + + " _row_number_transpose_=[$4], column=[$5], _value_transpose_=[CASE(=($5, 'avg_sal')," + " NUMBER_TO_STRING($0), =($5, 'max_sal'), NUMBER_TO_STRING($1), =($5, 'min_sal')," + " NUMBER_TO_STRING($2), =($5, 'cnt'), CAST($3):VARCHAR NOT NULL, null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" @@ -95,18 +102,22 @@ public void testMultipleAggregatesWithAliasesTranspose() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TRIM(`column`) `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + "SELECT TRIM(`column`) `column`, MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + " `_row_number_transpose_` = 5) `row 5`\n" + "FROM (SELECT `t1`.`avg_sal`, `t1`.`max_sal`, `t1`.`min_sal`, `t1`.`cnt`," + " `t1`.`_row_number_transpose_`, `t2`.`column`, CASE WHEN `t2`.`column` = 'avg_sal'" + " THEN NUMBER_TO_STRING(`t1`.`avg_sal`) WHEN `t2`.`column` = 'max_sal' THEN" + " NUMBER_TO_STRING(`t1`.`max_sal`) WHEN `t2`.`column` = 'min_sal' THEN" + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt' THEN CAST(`t1`.`cnt` AS" - + " STRING) ELSE NULL END `value`\n" + + " STRING) ELSE NULL END `_value_transpose_`\n" + "FROM (SELECT AVG(`SAL`) `avg_sal`, MAX(`SAL`) `max_sal`, MIN(`SAL`) `min_sal`," + " COUNT(*) `cnt`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t1`\n" @@ -114,7 +125,7 @@ public void testMultipleAggregatesWithAliasesTranspose() { + "('max_sal'),\n" + "('min_sal'),\n" + "('cnt')) `t2` (`column`)) `t3`\n" - + "WHERE `t3`.`value` IS NOT NULL\n" + + "WHERE `t3`.`_value_transpose_` IS NOT NULL\n" + "GROUP BY TRIM(`column`)"; /* @@ -152,11 +163,12 @@ public void testTransposeWithLimit() { "LogicalProject(column=[$0], row 1=[$1], row 2=[$2], row 3=[$3])\n" + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + " FILTER $3], row 3_null=[MAX($0) FILTER $4])\n" - + " LogicalProject(value=[CAST($6):VARCHAR NOT NULL], $f7=[TRIM(FLAG(BOTH), ' '," + + " LogicalProject(_value_transpose_=[CAST($6):VARCHAR NOT NULL]," + + " $f7=[TRIM(FLAG(BOTH), ' '," + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)])\n" + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," - + " _row_number_transpose_=[$4], column=[$5], value=[CASE(=($5, 'ENAME')," + + " _row_number_transpose_=[$4], column=[$5], _value_transpose_=[CASE(=($5, 'ENAME')," + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" @@ -176,16 +188,18 @@ public void testTransposeWithLimit() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TRIM(`column`) `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + "SELECT TRIM(`column`) `column`, MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + " `_row_number_transpose_` = 3) `row 3`\n" + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + " `t`.`_row_number_transpose_`, `t0`.`column`, CASE WHEN `t0`.`column` = 'ENAME' THEN" + " CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column` = 'COMM' THEN" + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column` = 'JOB' THEN CAST(`t`.`JOB` AS" + " STRING) WHEN `t0`.`column` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END" - + " `value`\n" + + " `_value_transpose_`\n" + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + " `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t`\n" @@ -193,12 +207,26 @@ public void testTransposeWithLimit() { + "('COMM'),\n" + "('JOB'),\n" + "('SAL')) `t0` (`column`)) `t1`\n" - + "WHERE `t1`.`value` IS NOT NULL\n" + + "WHERE `t1`.`_value_transpose_` IS NOT NULL\n" + "GROUP BY TRIM(`column`)"; verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testTransposeWithValueFieldNameCollision() { + // Reproduce issue #5172: hardcoded 'value' unpivot column collides with + // input field named 'value' + String ppl = "source=EMP | stats count() as value, avg(SAL) as avg_sal | transpose"; + RelNode root = getRelNode(ppl); + // The 'value' field from stats should appear correctly in transposed output + // and not be confused with the internal unpivot 'value' column + String expectedResult = + "column=avg_sal; row 1=2073.214285; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=value; row 1=14; row 2=null; row 3=null; row 4=null; row 5=null\n"; + verifyResult(root, expectedResult); + } + @Test public void testTransposeWithLimitColumnName() { String ppl = @@ -208,11 +236,13 @@ public void testTransposeWithLimitColumnName() { "LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3])\n" + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + " FILTER $3], row 3_null=[MAX($0) FILTER $4])\n" - + " LogicalProject(value=[CAST($6):VARCHAR NOT NULL], $f7=[TRIM(FLAG(BOTH), ' '," + + " LogicalProject(_value_transpose_=[CAST($6):VARCHAR NOT NULL]," + + " $f7=[TRIM(FLAG(BOTH), ' '," + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)])\n" + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," - + " _row_number_transpose_=[$4], column_names=[$5], value=[CASE(=($5, 'ENAME')," + + " _row_number_transpose_=[$4], column_names=[$5]," + + " _value_transpose_=[CASE(=($5, 'ENAME')," + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" @@ -231,16 +261,19 @@ public void testTransposeWithLimitColumnName() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TRIM(`column_names`) `column_names`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + "SELECT TRIM(`column_names`) `column_names`," + + " MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`," + + " MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`," + + " MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + " `_row_number_transpose_` = 3) `row 3`\n" + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + " `t`.`_row_number_transpose_`, `t0`.`column_names`, CASE WHEN `t0`.`column_names` =" + " 'ENAME' THEN CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column_names` = 'COMM' THEN" + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column_names` = 'JOB' THEN CAST(`t`.`JOB`" + " AS STRING) WHEN `t0`.`column_names` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE" - + " NULL END `value`\n" + + " NULL END `_value_transpose_`\n" + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + " `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t`\n" @@ -248,7 +281,7 @@ public void testTransposeWithLimitColumnName() { + "('COMM'),\n" + "('JOB'),\n" + "('SAL')) `t0` (`column_names`)) `t1`\n" - + "WHERE `t1`.`value` IS NOT NULL\n" + + "WHERE `t1`.`_value_transpose_` IS NOT NULL\n" + "GROUP BY TRIM(`column_names`)"; verifyPPLToSparkSQL(root, expectedSparkSql); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLUnionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLUnionTest.java new file mode 100644 index 00000000000..a16e0e6a6be --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLUnionTest.java @@ -0,0 +1,591 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import com.google.common.collect.ImmutableList; +import java.sql.Timestamp; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Test; + +public class CalcitePPLUnionTest extends CalcitePPLAbstractTest { + + public CalcitePPLUnionTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + + ImmutableList timeData1 = + ImmutableList.of( + new Object[] { + Timestamp.valueOf("2025-08-01 03:47:41"), + 8762, + "A", + Timestamp.valueOf("2025-08-01 03:47:41") + }, + new Object[] { + Timestamp.valueOf("2025-08-01 01:14:11"), + 9015, + "B", + Timestamp.valueOf("2025-08-01 01:14:11") + }, + new Object[] { + Timestamp.valueOf("2025-07-31 23:40:33"), + 8676, + "A", + Timestamp.valueOf("2025-07-31 23:40:33") + }, + new Object[] { + Timestamp.valueOf("2025-07-31 21:07:03"), + 8490, + "B", + Timestamp.valueOf("2025-07-31 21:07:03") + }); + + ImmutableList timeData2 = + ImmutableList.of( + new Object[] { + Timestamp.valueOf("2025-08-01 04:00:00"), + 2001, + "E", + Timestamp.valueOf("2025-08-01 04:00:00") + }, + new Object[] { + Timestamp.valueOf("2025-08-01 02:30:00"), + 2002, + "F", + Timestamp.valueOf("2025-08-01 02:30:00") + }, + new Object[] { + Timestamp.valueOf("2025-08-01 01:00:00"), + 2003, + "E", + Timestamp.valueOf("2025-08-01 01:00:00") + }, + new Object[] { + Timestamp.valueOf("2025-07-31 22:15:00"), + 2004, + "F", + Timestamp.valueOf("2025-07-31 22:15:00") + }); + + ImmutableList nonTimeData = + ImmutableList.of( + new Object[] {1001, "Product A", 100.0}, new Object[] {1002, "Product B", 200.0}); + + schema.add("TIME_DATA1", new TimeDataTable(timeData1)); + schema.add("TIME_DATA2", new TimeDataTable(timeData2)); + schema.add("NON_TIME_DATA", new NonTimeDataTable(nonTimeData)); + + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + + @Test + public void testBasicUnionTwoDatasets() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10] " + + "[search source=EMP | where DEPTNO = 20]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 10\n" + + "UNION ALL\n" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 20"; + verifyPPLToSparkSQL(root, expectedSparkSql); + verifyResultCount(root, 8); + } + + @Test + public void testUnionThreeDatasets() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10] " + + "[search source=EMP | where DEPTNO = 20] " + + "[search source=EMP | where DEPTNO = 30]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 30)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 10\n" + + "UNION ALL\n" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 20\n" + + "UNION ALL\n" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 30"; + verifyPPLToSparkSQL(root, expectedSparkSql); + verifyResultCount(root, 14); + } + + @Test + public void testUnionCrossIndicesSchemaDifference() { + String ppl = + "| union [search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME," + + " JOB] [search source=DEPT | where DEPTNO = 10 | fields DEPTNO, DNAME, LOC]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], DEPTNO=[null:TINYINT]," + + " DNAME=[null:VARCHAR(14)], LOC=[null:VARCHAR(13)])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n" + + " LogicalFilter(condition=[=($0, 10)])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, CAST(NULL AS TINYINT) `DEPTNO`, CAST(NULL AS STRING)" + + " `DNAME`, CAST(NULL AS STRING) `LOC`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 10\n" + + "UNION ALL\n" + + "SELECT CAST(NULL AS SMALLINT) `EMPNO`, CAST(NULL AS STRING) `ENAME`, CAST(NULL AS" + + " STRING) `JOB`, `DEPTNO`, `DNAME`, `LOC`\n" + + "FROM `scott`.`DEPT`\n" + + "WHERE `DEPTNO` = 10"; + verifyPPLToSparkSQL(root, expectedSparkSql); + verifyResultCount(root, 4); + } + + @Test + public void testUnionWithStats() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | eval type = \"accounting\"] " + + "[search source=EMP | where DEPTNO = 20 | eval type = \"research\"] " + + "| stats count by type"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(count=[$1], type=[$0])\n" + + " LogicalAggregate(group=[{0}], count=[COUNT()])\n" + + " LogicalProject(type=[$8])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], type=['accounting':VARCHAR])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], type=['research':VARCHAR])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT COUNT(*) `count`, `type`\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " 'accounting' `type`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 10\n" + + "UNION ALL\n" + + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " 'research' `type`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 20) `t3`\n" + + "GROUP BY `type`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + verifyResultCount(root, 2); + } + + @Test + public void testUnionDirectTableNames() { + String ppl = "| union EMP, DEPT"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], DNAME=[null:VARCHAR(14)]," + + " LOC=[null:VARCHAR(13)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[CAST($0):TINYINT]," + + " DNAME=[$1], LOC=[$2])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testUnionNonStreamingModeAppend() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME] " + + "[search source=NON_TIME_DATA | fields id, name]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], id=[null:INTEGER], name=[null:VARCHAR])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)], id=[$0]," + + " name=[$1])\n" + + " LogicalTableScan(table=[[scott, NON_TIME_DATA]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testUnionWithMaxout() { + String ppl = + "| union maxout=5 " + + "[search source=EMP | where DEPTNO = 10] " + + "[search source=EMP | where DEPTNO = 20]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSystemLimit(fetch=[5], type=[SUBSEARCH_MAXOUT])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testUnionWithIdenticalSchemasAndFieldProjection() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME], " + + "[search source=EMP | where DEPTNO = 20 | fields EMPNO, ENAME]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 8); + } + + @Test + public void testUnionAsFirstCommand() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME] " + + "[search source=EMP | where DEPTNO = 20 | fields EMPNO, ENAME]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 8); + } + + @Test + public void testUnionWithCompletelyDifferentSchemas() { + String ppl = + "| union " + + "[search source=EMP | fields EMPNO, ENAME] " + + "[search source=DEPT | fields DEPTNO, DNAME]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], DEPTNO=[null:TINYINT]," + + " DNAME=[null:VARCHAR(14)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)], DEPTNO=[$0]," + + " DNAME=[$1])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 18); + } + + @Test + public void testUnionWithPartialSchemaOverlap() { + String ppl = + "| union " + + "[search source=EMP | fields EMPNO, ENAME, JOB] " + + "[search source=EMP | fields EMPNO, ENAME, SAL]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], SAL=[null:DECIMAL(7, 2)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[null:VARCHAR(9)], SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 28); + } + + @Test + public void testUnionWithFilteredSubsearches() { + String ppl = + "| union " + + "[search source=EMP | where SAL > 2000 | fields EMPNO, ENAME] " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[>($5, 2000)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testUnionPreservesDuplicateRows() { + String ppl = + "| union " + + "[search source=EMP | where EMPNO = 7369 | fields EMPNO, ENAME] " + + "[search source=EMP | where EMPNO = 7369 | fields EMPNO, ENAME]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($0, 7369)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($0, 7369)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 2); + } + + @Test + public void testUnionWithEmptyDataset() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME] " + + "[search source=EMP | where DEPTNO = 99 | fields EMPNO, ENAME]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 99)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 3); + } + + @Test + public void testUnionFollowedByAggregation() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME], " + + "[search source=EMP | where DEPTNO = 20 | fields EMPNO, ENAME] " + + "| stats count()"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalAggregate(group=[{}], count()=[COUNT()])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 1); + } + + @Test + public void testUnionFollowedBySort() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME] " + + "[search source=EMP | where DEPTNO = 20 | fields EMPNO, ENAME] " + + "| sort ENAME"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 8); + } + + @RequiredArgsConstructor + static class TimeDataTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("timestamp", SqlTypeName.TIMESTAMP) + .nullable(true) + .add("value", SqlTypeName.INTEGER) + .nullable(true) + .add("category", SqlTypeName.VARCHAR) + .nullable(true) + .add("@timestamp", SqlTypeName.TIMESTAMP) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } + + @RequiredArgsConstructor + static class NonTimeDataTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("id", SqlTypeName.INTEGER) + .nullable(true) + .add("name", SqlTypeName.VARCHAR) + .nullable(true) + .add("value", SqlTypeName.DOUBLE) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index a53e4a5d8dd..e7f3f986752 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -1828,4 +1828,39 @@ public void testEmptyPipeAndTrailingPipeTogether() { public void testMalformedPipeProducesSyntaxError() { plan("source=t | invalidCmd |"); } + + @Test + public void testUnionWithSubsearches() { + plan("| union [search source=t1 | where age > 30] " + "[search source=t2 | where age < 20]"); + } + + @Test + public void testUnionWithDirectTableNames() { + plan("| union t1, t2"); + } + + @Test + public void testUnionWithDateSuffixIndex() { + plan("| union logs-2024.01.01, logs-2024.01.02"); + } + + @Test + public void testUnionWithDottedCatalogPath() { + plan("| union catalog.my_index, catalog.other_index"); + } + + @Test + public void testUnionMidPipeline() { + plan("source=t1 | union t2, t3"); + } + + @Test + public void testUnionWithMaxoutOption() { + plan("| union maxout=500 t1, t2"); + } + + @Test + public void testMaxoutAsFieldName() { + plan("source=t | eval maxout = 1"); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index af10b53defb..ce7a120ff56 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -299,6 +299,24 @@ public void testBooleanIsNotNullFunction() { filter(relation("t"), function("is not null", field("a")))); } + @Test + public void testIsNullPredicate() { + assertEqual( + "source=t | where a is null", filter(relation("t"), function("is null", field("a")))); + assertEqual( + "source=t | where a IS NULL", filter(relation("t"), function("is null", field("a")))); + } + + @Test + public void testIsNotNullPredicate() { + assertEqual( + "source=t | where a is not null", + filter(relation("t"), function("is not null", field("a")))); + assertEqual( + "source=t | where a IS NOT NULL", + filter(relation("t"), function("is not null", field("a")))); + } + /** Todo. search operator should not include functionCall, need to change antlr. */ @Ignore("search operator should not include functionCall, need to change antlr") public void testEvalExpr() { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index de230d208bb..585575b2b24 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -1147,6 +1147,26 @@ public void testConvertCommand() { assertEquals( "source=table | convert (identifier) AS identifier", anonymize("source=t | convert none(empno) AS empno_same")); + assertEquals( + "source=table | convert dur2sec(identifier)", + anonymize("source=t | convert dur2sec(duration)")); + assertEquals( + "source=table | convert mstime(identifier)", + anonymize("source=t | convert mstime(elapsed)")); + assertEquals( + "source=table | convert memk(identifier) AS identifier", + anonymize("source=t | convert memk(virt) AS virt_kb")); + } + + @Test + public void testConvertCommandWithTimeformat() { + assertEquals( + "source=table | convert timeformat=\"%Y-%m-%d\" mktime(identifier)", + anonymize("source=t | convert timeformat=\"%Y-%m-%d\" mktime(date_str)")); + assertEquals( + "source=table | convert timeformat=\"%m/%d/%Y %H:%M:%S\" ctime(identifier) AS identifier", + anonymize( + "source=t | convert timeformat=\"%m/%d/%Y %H:%M:%S\" ctime(ts) AS formatted_time")); } @Test @@ -1160,4 +1180,28 @@ public void testMvexpandCommandWithLimit() { "source=table | mvexpand identifier limit=***", anonymize("source=t | mvexpand skills limit=5")); } + + @Test + public void testUnion() { + assertEquals( + "| union [search source=table | where identifier < ***] [search source=table |" + + " where identifier >= ***]", + anonymize( + "| union [search source=accounts | where age < 30] [search source=accounts" + + " | where age >= 30]")); + + assertEquals( + "| union [search source=table | where identifier > ***] [search source=table |" + + " where identifier = ***]", + anonymize( + "| union [search source=accounts | where balance > 20000] [search" + + " source=accounts | where state = 'CA']")); + + assertEquals( + "| union [search source=table | fields + identifier,identifier] [search" + + " source=table | where identifier = ***]", + anonymize( + "| union [search source=accounts | fields firstname, lastname] [search" + + " source=accounts | where age = 25]")); + } } diff --git a/release-notes/opensearch-sql.release-notes-3.6.0.0.md b/release-notes/opensearch-sql.release-notes-3.6.0.0.md new file mode 100644 index 00000000000..92136767c01 --- /dev/null +++ b/release-notes/opensearch-sql.release-notes-3.6.0.0.md @@ -0,0 +1,67 @@ +## Version 3.6.0.0 Release Notes + +Compatible with OpenSearch and OpenSearch Dashboards version 3.6.0.0 + +### Features +* Update mend config to allow remediation ([#5287](https://github.com/opensearch-project/sql/pull/5287)) +* Add unified query parser API ([#5274](https://github.com/opensearch-project/sql/pull/5274)) +* Add profiling support to unified query API ([#5268](https://github.com/opensearch-project/sql/pull/5268)) +* Add Calcite native SQL planning in UnifiedQueryPlanner ([#5257](https://github.com/opensearch-project/sql/pull/5257)) +* Add query cancellation support via _tasks/_cancel API for PPL queries ([#5254](https://github.com/opensearch-project/sql/pull/5254)) +* Support graphLookup with literal value as its start ([#5253](https://github.com/opensearch-project/sql/pull/5253)) +* PPL Highlight Support ([#5234](https://github.com/opensearch-project/sql/pull/5234)) +* Support creating/updating prometheus rules ([#5228](https://github.com/opensearch-project/sql/pull/5228)) +* Change the final output result of struct from list to map ([#5227](https://github.com/opensearch-project/sql/pull/5227)) +* added cloudwatch style contains operator ([#5219](https://github.com/opensearch-project/sql/pull/5219)) +* Update graphlookup syntax ([#5209](https://github.com/opensearch-project/sql/pull/5209)) +* Onboard code diff analyzer and reviewer (sql) ([#5183](https://github.com/opensearch-project/sql/pull/5183)) +* Add grammar bundle generation API for PPL language features ([#5162](https://github.com/opensearch-project/sql/pull/5162)) +* Support PPL queries when having trailing pipes and/or empty pipes ([#5161](https://github.com/opensearch-project/sql/pull/5161)) +* Bump ANTLR Version to 4.13.2 ([#5159](https://github.com/opensearch-project/sql/pull/5159)) +* feat: Implement PPL convert command with 5 conversion functions ([#5157](https://github.com/opensearch-project/sql/pull/5157)) +* Make sql plugin aware of FIPS build param (-Pcrypto.standard=FIPS-140-3) ([#5155](https://github.com/opensearch-project/sql/pull/5155)) +* PPL Command: MvExpand ([#5144](https://github.com/opensearch-project/sql/pull/5144)) +* Add auto-extract mode for `spath` command ([#5140](https://github.com/opensearch-project/sql/pull/5140)) +* Support bi-directional graph traversal command `graphlookup` ([#5138](https://github.com/opensearch-project/sql/pull/5138)) +* Add nomv command ([#5130](https://github.com/opensearch-project/sql/pull/5130)) +* Improve resource monitor errors ([#5129](https://github.com/opensearch-project/sql/pull/5129)) +* Support fetch_size API for PPL ([#5109](https://github.com/opensearch-project/sql/pull/5109)) +* LAST/FIRST/TAKE aggregation should support TEXT type and Scripts ([#5091](https://github.com/opensearch-project/sql/pull/5091)) +* fieldformat command implementation ([#5080](https://github.com/opensearch-project/sql/pull/5080)) +* Implement `reverse` performance optimization ([#4775](https://github.com/opensearch-project/sql/pull/4775)) + +### Bug Fixes +* Fix flaky TPC-H Q1 test due to bugs in `MatcherUtils.closeTo()` ([#5283](https://github.com/opensearch-project/sql/pull/5283)) +* Fix typo: rename renameClasue to renameClause ([#5252](https://github.com/opensearch-project/sql/pull/5252)) +* Fix `isnotnull()` not being pushed down when combined with multiple `!=` conditions ([#5238](https://github.com/opensearch-project/sql/pull/5238)) +* Fix memory leak: ExecutionEngine recreated per query appending to global function registry ([#5222](https://github.com/opensearch-project/sql/pull/5222)) +* Fix PIT (Point in Time) resource leaks in v2 query engine ([#5221](https://github.com/opensearch-project/sql/pull/5221)) +* Fix MAP path resolution for `top/rare`, `join`, `lookup` and `streamstats` ([#5206](https://github.com/opensearch-project/sql/pull/5206)) +* Fix #5163: Return null for double overflow to Infinity in arithmetic ([#5202](https://github.com/opensearch-project/sql/pull/5202)) +* Fix MAP path resolution for symbol-based PPL commands ([#5198](https://github.com/opensearch-project/sql/pull/5198)) +* Fix #5176: Return actual null from JSON_EXTRACT for missing/null paths ([#5196](https://github.com/opensearch-project/sql/pull/5196)) +* Fix multisearch UDT type loss through UNION (#5145, #5146, #5147) ([#5154](https://github.com/opensearch-project/sql/pull/5154)) +* Fix path navigation on map columns for `spath` command ([#5149](https://github.com/opensearch-project/sql/pull/5149)) +* Fix pitest dependency resolution with stable runtime version ([#5143](https://github.com/opensearch-project/sql/pull/5143)) +* Fix #5114: preserve head/TopK semantics for sort-expression pushdown ([#5135](https://github.com/opensearch-project/sql/pull/5135)) +* Fix fallback error handling to show original Calcite error ([#5133](https://github.com/opensearch-project/sql/pull/5133)) +* Fix the bug when boolean comparison condition is simplifed to field ([#5071](https://github.com/opensearch-project/sql/pull/5071)) +* Fix issue connecting with prometheus by wrapping with AccessController.doPrivilegedChecked ([#5061](https://github.com/opensearch-project/sql/pull/5061)) + +### Infrastructure +* Add gradle.properties file to build sql with -Pcrypto.standard=FIPS-140-3 by default ([#5231](https://github.com/opensearch-project/sql/pull/5231)) +* Fix the flaky yamlRestTest caused by order of sample_logs ([#5119](https://github.com/opensearch-project/sql/pull/5119)) +* Fix the filter of integTestWithSecurity ([#5098](https://github.com/opensearch-project/sql/pull/5098)) + +### Documentation +* Apply docs website feedback to ppl functions ([#5207](https://github.com/opensearch-project/sql/pull/5207)) + +### Maintenance +* Move some maintainers from active to Emeritus ([#5260](https://github.com/opensearch-project/sql/pull/5260)) +* Add CLAUDE.md ([#5259](https://github.com/opensearch-project/sql/pull/5259)) +* Add songkant-aws as maintainer ([#5244](https://github.com/opensearch-project/sql/pull/5244)) +* Add ahkcs as maintainer ([#5223](https://github.com/opensearch-project/sql/pull/5223)) +* Fix bc-fips jar hell by marking dependency as compileOnly ([#5158](https://github.com/opensearch-project/sql/pull/5158)) +* Revert dynamic column support ([#5139](https://github.com/opensearch-project/sql/pull/5139)) +* Increment version to 3.6.0-SNAPSHOT ([#5115](https://github.com/opensearch-project/sql/pull/5115)) +* Upgrade assertj-core to 3.27.7 ([#5100](https://github.com/opensearch-project/sql/pull/5100)) diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 5f7361160b3..6b34507eacc 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -109,8 +109,18 @@ fromClause ; relation - : tableName (AS? alias)? # tableAsRelation - | LR_BRACKET subquery = querySpecification RR_BRACKET AS? alias # subqueryAsRelation + : tableName (AS? alias)? # tableAsRelation + | LR_BRACKET subquery = querySpecification RR_BRACKET AS? alias # subqueryAsRelation + | qualifiedName LR_BRACKET tableFunctionArgs RR_BRACKET (AS? alias)? # tableFunctionRelation + ; + +tableFunctionArgs + : tableFunctionArg (COMMA tableFunctionArg)* + ; + +tableFunctionArg + : ident EQUAL_SYMBOL functionArg + | functionArg ; whereClause diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java index bdbc360713c..5250ab7fb0f 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java @@ -13,6 +13,7 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.SelectElementContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.SubqueryAsRelationContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.TableAsRelationContext; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.TableFunctionRelationContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.WhereClauseContext; import static org.opensearch.sql.sql.parser.ParserUtils.getTextInQuery; import static org.opensearch.sql.utils.SystemIndexUtils.TABLE_INFO; @@ -20,12 +21,14 @@ import com.google.common.collect.ImmutableList; import java.util.Collections; +import java.util.Locale; import java.util.Optional; import lombok.RequiredArgsConstructor; import org.antlr.v4.runtime.tree.ParseTree; import org.opensearch.sql.ast.expression.Alias; import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.tree.DescribeRelation; import org.opensearch.sql.ast.tree.Filter; @@ -34,10 +37,12 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.RelationSubquery; import org.opensearch.sql.ast.tree.SubqueryAlias; +import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.common.utils.StringUtils; +import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.QuerySpecificationContext; @@ -189,6 +194,57 @@ public UnresolvedPlan visitSubqueryAsRelation(SubqueryAsRelationContext ctx) { return new RelationSubquery(visit(ctx.subquery), subqueryAlias); } + @Override + public UnresolvedPlan visitTableFunctionRelation(TableFunctionRelationContext ctx) { + // The grammar accepts both `ident = value` and bare `value` forms for each table function + // argument so that the real positional shape (e.g. `vectorSearch('idx', field='f', ...)`) + // reaches this V2 builder instead of failing to parse and silently falling back to the + // legacy SQL engine. Reject the positional shape here with a SemanticCheckException so the + // user receives a clean 400 rather than an opaque legacy parser error. + ctx.tableFunctionArgs() + .tableFunctionArg() + .forEach( + arg -> { + if (arg.ident() == null) { + String functionName = ctx.qualifiedName().getText(); + throw new SemanticCheckException( + String.format( + Locale.ROOT, + "Table function '%s' requires named arguments (e.g. name='value')," + + " but received a positional argument: %s", + functionName, + arg.functionArg().getText())); + } + }); + ImmutableList.Builder args = ImmutableList.builder(); + ctx.tableFunctionArgs() + .tableFunctionArg() + .forEach( + arg -> { + String argName = + StringUtils.unquoteIdentifier(arg.ident().getText()).toLowerCase(Locale.ROOT); + UnresolvedExpression argValue = visitAstExpression(arg.functionArg()); + args.add(new UnresolvedArgument(argName, argValue)); + }); + TableFunction tableFunction = + new TableFunction(visitAstExpression(ctx.qualifiedName()), args.build()); + if (ctx.alias() == null) { + String functionName = ctx.qualifiedName().getText(); + // Use SemanticCheckException (not SyntaxCheckException) so the request does not fall back + // to the legacy SQL engine, whose opaque parser error would mask this message. + throw new SemanticCheckException( + String.format( + Locale.ROOT, + "Table function '%s' requires a table alias." + + " Add an alias after the closing parenthesis, for example:" + + " FROM %s(...) AS v", + functionName, + functionName)); + } + String alias = StringUtils.unquoteIdentifier(ctx.alias().getText()); + return new SubqueryAlias(alias, tableFunction); + } + @Override public UnresolvedPlan visitWhereClause(WhereClauseContext ctx) { return new Filter(visitAstExpression(ctx.expression())); diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java index 1ecaa181e6f..695cf85b144 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java @@ -6,6 +6,8 @@ package org.opensearch.sql.sql.parser; import static java.util.Collections.emptyList; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.ast.dsl.AstDSL.agg; @@ -40,7 +42,11 @@ import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.NestedAllTupleFields; +import org.opensearch.sql.ast.expression.UnresolvedArgument; +import org.opensearch.sql.ast.tree.SubqueryAlias; +import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.exception.SemanticCheckException; class AstBuilderTest extends AstBuilderTestBase { @@ -131,6 +137,142 @@ public void can_build_from_index_with_alias_quoted() { buildAST("SELECT `t`.name FROM test `t` WHERE `t`.age = 30")); } + @Test + public void can_build_from_table_function() { + assertEquals( + project( + new SubqueryAlias( + "v", + new TableFunction( + qualifiedName("vectorSearch"), + ImmutableList.of( + new UnresolvedArgument("table", stringLiteral("products")), + new UnresolvedArgument("field", stringLiteral("embedding")), + new UnresolvedArgument("vector", stringLiteral("[0.1,0.2]")), + new UnresolvedArgument("option", stringLiteral("k=10"))))), + AllFields.of()), + buildAST( + "SELECT * FROM vectorSearch(" + + "table='products', field='embedding', " + + "vector='[0.1,0.2]', option='k=10') AS v")); + } + + @Test + public void can_build_from_table_function_with_where_order_limit() { + assertEquals( + project( + limit( + sort( + filter( + new SubqueryAlias( + "s", + new TableFunction( + qualifiedName("vectorSearch"), + ImmutableList.of( + new UnresolvedArgument("table", stringLiteral("products")), + new UnresolvedArgument("field", stringLiteral("embedding")), + new UnresolvedArgument("vector", stringLiteral("[0.1,0.2]")), + new UnresolvedArgument("option", stringLiteral("k=10"))))), + function("=", qualifiedName("s", "category"), stringLiteral("shoes"))), + field(qualifiedName("s", "_score"), argument("asc", booleanLiteral(false)))), + 5, + 0), + alias("s.title", qualifiedName("s", "title")), + alias("s._score", qualifiedName("s", "_score"))), + buildAST( + "SELECT s.title, s._score FROM vectorSearch(" + + "table='products', field='embedding', " + + "vector='[0.1,0.2]', option='k=10') AS s " + + "WHERE s.category = 'shoes' " + + "ORDER BY s._score DESC " + + "LIMIT 5")); + } + + @Test + public void table_function_args_are_resolved_by_name_not_position() { + assertEquals( + project( + new SubqueryAlias( + "v", + new TableFunction( + qualifiedName("vectorSearch"), + ImmutableList.of( + new UnresolvedArgument("option", stringLiteral("k=10")), + new UnresolvedArgument("field", stringLiteral("embedding")), + new UnresolvedArgument("table", stringLiteral("products")), + new UnresolvedArgument("vector", stringLiteral("[0.1,0.2]"))))), + AllFields.of()), + buildAST( + "SELECT * FROM vectorSearch(" + + "option='k=10', field='embedding', " + + "table='products', vector='[0.1,0.2]') AS v")); + } + + @Test + public void table_function_arg_names_are_canonicalized() { + assertEquals( + project( + new SubqueryAlias( + "v", + new TableFunction( + qualifiedName("vectorSearch"), + ImmutableList.of( + new UnresolvedArgument("table", stringLiteral("products")), + new UnresolvedArgument("field", stringLiteral("embedding")), + new UnresolvedArgument("vector", stringLiteral("[0.1,0.2]")), + new UnresolvedArgument("option", stringLiteral("k=10"))))), + AllFields.of()), + buildAST( + "SELECT * FROM vectorSearch(" + + "TABLE='products', FIELD='embedding', " + + "VECTOR='[0.1,0.2]', OPTION='k=10') AS v")); + } + + @Test + public void table_function_allows_alias_without_as_keyword() { + assertEquals( + project( + new SubqueryAlias( + "v", + new TableFunction( + qualifiedName("vectorSearch"), + ImmutableList.of( + new UnresolvedArgument("table", stringLiteral("products")), + new UnresolvedArgument("vector", stringLiteral("[0.1]"))))), + AllFields.of()), + buildAST("SELECT * FROM vectorSearch(table='products', vector='[0.1]') v")); + } + + @Test + public void table_function_relation_requires_alias() { + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> + buildAST( + "SELECT * FROM vectorSearch(" + + "table='products', field='embedding', " + + "vector='[0.1,0.2]', option='k=10')")); + assertThat(ex.getMessage(), containsString("requires a table alias")); + assertThat(ex.getMessage(), containsString("vectorSearch")); + } + + @Test + public void table_function_relation_rejects_positional_argument() { + // Grammar accepts both `ident=value` and bare `value` for each table function argument so + // the real positional shape reaches the V2 AstBuilder. The AstBuilder must reject it with a + // SemanticCheckException rather than let the request fall back to the legacy engine. + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> + buildAST( + "SELECT * FROM vectorSearch('products', field='embedding', " + + "vector='[0.1,0.2]', option='k=10') AS v")); + org.junit.jupiter.api.Assertions.assertTrue( + ex.getMessage().contains("requires named arguments")); + } + @Test public void can_build_where_clause() { assertEquals(