diff --git a/CHANGELOG.md b/CHANGELOG.md index bf3008f..41598a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,77 @@ # Changelog +## [3.1.0] + +> New aggregate `COLLECT_OBJECT` for `GROUP BY` queries — collects rows in +> each group into an array of structured objects via an inner mini-SELECT +> with optional `ORDER BY`. Plugs into the existing aggregate pipeline +> alongside `COUNT` / `SUM` / `GROUP_CONCAT`; no breaking changes to other +> aggregates or to the public API. + +### Added + +- **`COLLECT_OBJECT(...)` aggregate function.** Per `GROUP BY` group, returns + `array>` — each row in the group projected through an + inner SELECT (with field aliases) and optionally sorted by an inner + `ORDER BY` (ASC/DESC, multi-key). Inner items accept scalar functions + (`CONCAT`, `ROUND`, `IF`, `COALESCE`, `UPPER`, `LOWER`, …), arithmetic + expressions (`price * 1.21 AS price_with_vat`), and aliases. + FQL grammar: + `COLLECT_OBJECT(expr [AS alias], … [ORDER BY expr [ASC|DESC], …])`. +- **`FQL\Query\Builder\CollectObject` fluent builder.** Tiny chainable DSL — + `select(string ...$fields)` (full FQL expression syntax, accepts inline + `"expr AS alias"` and comma-separated lists via `FieldListSplitter`), + `as(string $alias)` for the idiomatic main-Query-style aliasing, plus the + `orderBy/asc/desc` triple inherited from `Sortable`. Used as + `$query->collectObject((new CollectObject())->select('id')->as('i')->orderBy('name'))->as('alias')`. +- **`FQL\Sql\Ast\Expression\WholeRowNode`.** New AST node that the + `ExpressionEvaluator` resolves to the entire source `$item`. Used as the + `spec.expression` of `COLLECT_OBJECT`, so the standard + `Stream::applyGrouping` path — `$evaluator->evaluate(spec.expression, $item)` + followed by `$class::accumulate($acc, $value)` — automatically delivers the + whole row as the aggregate's value, with no special case in the Stream + pipeline. `CollectObject` is a plain `AggregateFunction` and finalises by + running a one-off `Query` over a `ResultStreamProvider` of the collected + rows — full SELECT/ORDER BY pipeline reuse, no parallel evaluator state. + +### Changed + +- `ExpressionEvaluator` learned to evaluate `WholeRowNode` (returns the + source `$item`). `Stream` aggregate grouping path is unchanged. +- **`Traits\Sortable` return types changed from `Query` to `static`.** Same + for the corresponding `Interface\Query` signatures (`orderBy`, `sortBy`, + `asc`, `desc`). Existing fluent chains on `Query` keep their behaviour + (Query continues to return itself); the change unlocks `use Sortable;` in + builders that aren't full `Query` objects — `Builder\CollectObject` now + inherits the trio instead of duplicating it. +- `OrderByClauseParser::parseItem()` is now public, enabling `ORDER BY` item + reuse inside expression contexts (used by `COLLECT_OBJECT(... ORDER BY …)`). +- `ExpressionParser` gained a lazy `setOrderByParser()` setter, wired in + `Parser::create()` (full FQL statements) and + `Sql\Provider::freshExpressionParser()` (fragment parsers used by the + fluent API). `ExpressionCompiler` learned to render + `CollectObjectExpressionNode` so SELECT round-trip (compile → string → + re-parse) works for FQL-string inputs. + +### Notes + +- **Empty groups** produce no output row (consistent with the other + aggregates). +- **`ORDER BY` inside `COLLECT_OBJECT` recognises projected aliases** — + finalisation runs as a full `Query` over the accumulated rows, so the + ordering clause sees both source columns and the aliases declared inside + `COLLECT_OBJECT(...)`. Standard SQL semantics. +- **Null values propagate** into the produced objects (unlike `SUM` / `AVG`, + which skip them). +- **Stable sort** preserves accumulation order on ties. +- **Aggregates inside `COLLECT_OBJECT`** are supported but rarely useful — + inner aggregates collapse the accumulated rows to a single output object, + so `COLLECT_OBJECT(SUM(x))` yields an array of length 1. Prefer scalar + aggregates at the outer level alongside `COLLECT_OBJECT` for per-group + summary numbers. +- **Out of MVP scope** (rejected with a clear exception): `DISTINCT`, + `LIMIT`, `WHERE` inside `COLLECT_OBJECT`, and nested `COLLECT_OBJECT`. + ## [3.0.2] ### Fixed diff --git a/README.md b/README.md index f867436..7d3ce27 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,8 @@ ![Packagist Dependency Version](https://img.shields.io/packagist/dependency-v/1biot/fiquela/php) ![Packagist License](https://img.shields.io/packagist/l/1biot/fiquela) -![Coverage](https://img.shields.io/badge/coverage-90.22%25-lightgreen) -![PHPUnit Tests](https://img.shields.io/badge/PHPUnit-tests%3A_1286-lightgreen) +![Coverage](https://img.shields.io/badge/coverage-90.19%25-lightgreen) +![PHPUnit Tests](https://img.shields.io/badge/PHPUnit-tests%3A_1302-lightgreen) ![PHPStan](https://img.shields.io/badge/phpstan-level_8-lightgreen) **FiQueLa** brings SQL querying to structured files. Filter, join, group, aggregate, and export data from XML, CSV, JSON, NDJSON, YAML, NEON, XLSX, ODS, and HTTP access logs — using familiar SQL syntax or a fluent PHP API. @@ -356,15 +356,19 @@ fiquela-cli "SELECT name, price FROM csv(data.csv).* WHERE price > 100;" Full documentation at **[docs.fiquela.io](https://docs.fiquela.io)** -- [Quickstart](https://docs.fiquela.io/quickstart) -- [FQL Syntax](https://docs.fiquela.io/querying/fql-syntax) -- [Fluent API](https://docs.fiquela.io/querying/fluent-api) -- [Joins](https://docs.fiquela.io/querying/joins) -- [Conditions](https://docs.fiquela.io/querying/conditions) -- [Functions](https://docs.fiquela.io/functions/string-functions) -- [EXPLAIN ANALYZE](https://docs.fiquela.io/advanced/explain-analyze) -- [Export with INTO](https://docs.fiquela.io/advanced/export-into) -- [API Reference](docs/api-reference.md) +- [x] ~~**Operator BETWEEN**: Add operator `BETWEEN` for filtering data and add support for dates and ranges.~~ +- [x] ~~**XLS/XLSX**: Add Excel file support.~~ +- [x] ~~**Custom cast type**: Add support for custom cast type for `SELECT` clause.~~ +- [x] ~~**Add explain method**: Add method `explain()` for explaining query execution from actual query debugger and provide more complex information about query.~~ +- [x] ~~**PHPStan 8**: Fix all PHPStan 8 errors.~~ +- [x] ~~**Tests**: Increase test coverage (80%+).~~ +- [x] ~~**Optimize GROUP BY**: Optimize `GROUP BY` for more memory efficient data processing.~~ +- [x] ~~**DELETE, UPDATE, INSERT**: Support for manipulating data in files.~~ ~~- Instead of this, it will comes support +for exporting data to files (CSV, NDJson, MessagePack, and more...) by `INTO` clause.~~ +- [x] ~~**Documentation**: Create detailed guides and examples for advanced use cases.~~ - [docs.fiquela.io](https://docs.fiquela.io) +- [x] ~~**Tests**: Increase test coverage (90%+).~~ +- [ ] **Next file formats**: Add next file formats [MessagePack](https://msgpack.org/), [Parquet](https://parquet.apache.org/docs/file-format/), [INI](https://en.wikipedia.org/wiki/INI_file) and [TOML](https://toml.io/en/) +- [ ] **Hashmap cache**: Add hashmap cache (Redis, Memcache) for more memory efficient data processing. --- diff --git a/docs/file-query-language.md b/docs/file-query-language.md index ba30fa6..95c203b 100644 --- a/docs/file-query-language.md +++ b/docs/file-query-language.md @@ -503,14 +503,15 @@ WHERE ### Aggregations -| Function | Description | -|-----------------|--------------------| -| `COUNT` | Count rows | -| `SUM` | Sum values | -| `AVG` | Average values | -| `MIN` | Minimum value | -| `MAX` | Maximum value | -| `GROUP_CONCAT` | Concatenate values | +| Function | Description | +|-------------------|------------------------------------------------| +| `COUNT` | Count rows | +| `SUM` | Sum values | +| `AVG` | Average values | +| `MIN` | Minimum value | +| `MAX` | Maximum value | +| `GROUP_CONCAT` | Concatenate values | +| `COLLECT_OBJECT` | Collect rows as an array of structured objects | Aggregate functions support `DISTINCT` in the same way as SQL, for example `COUNT(DISTINCT id)`. @@ -533,6 +534,93 @@ HAVING OR maxPrice < 500 ``` +### COLLECT_OBJECT + +`COLLECT_OBJECT` accumulates source rows within a `GROUP BY` group into an **array of objects** +(`array>`). Each object contains the fields (and optional aliases) declared +inside the function. You can apply scalar functions and an inner `ORDER BY` to shape and order the +collected rows. The result is suitable for producing nested, JSON-like structures directly from FQL. + +```sql +COLLECT_OBJECT( + field_expr [AS alias] [, field_expr [AS alias]] ... + [ORDER BY order_expr [ASC | DESC] [, order_expr [ASC | DESC]] ...] +) [AS alias] +``` + +**Minimal example** — select a few fields with aliases: + +```sql +SELECT + categoryId, + categoryName, + COLLECT_OBJECT( + productId AS id, + productName AS name, + price + ) AS products +FROM csv(./examples/data/products.csv).* +GROUP BY categoryId +``` + +**Complex example** — scalar functions and multi-key ORDER BY: + +```sql +SELECT + categoryId, + COLLECT_OBJECT( + productId AS id, + CONCAT(productName, " (", code, ")") AS label, + ROUND(price, 2) AS price + ORDER BY price DESC, productName ASC + ) AS products +FROM csv(./examples/data/products.csv).* +GROUP BY categoryId +``` + +**Example output:** + +```json +[ + { + "categoryId": 1, + "categoryName": "Phones", + "products": [ + {"id": 100, "name": "iPhone", "price": 23990.0}, + {"id": 101, "name": "Samsung", "price": 21990.0} + ] + } +] +``` + +#### Semantics + +- **Empty group** — produces no output row (consistent with all other aggregate functions). +- **Single-row group** — produces an array of length 1, not a scalar or nullable value. +- **`null` propagation** — unlike `SUM`/`AVG`, which skip nulls, `COLLECT_OBJECT` stores whatever + the expression evaluator returns, including `null`. +- **Stable sort** — rows with equal sort keys preserve their accumulation (source) order. +- **ORDER BY recognises projected aliases** — the inner SELECT runs as a real query over the + accumulated rows, so `ORDER BY` sees both the source columns and the aliases declared inside + `COLLECT_OBJECT(...)`. Standard SQL semantics: + + ```sql + COLLECT_OBJECT( + ROUND(price, 2) AS roundedPrice + ORDER BY roundedPrice DESC + ) AS products + ``` + +#### Limitations (outside MVP) + +The following features are not supported. Attempting to use them raises a `ParseException` or +`InvalidArgumentException` at parse/build time: + +- `DISTINCT` inside `COLLECT_OBJECT` +- `LIMIT` inside `COLLECT_OBJECT` +- `WHERE` inside `COLLECT_OBJECT` +- Nested `COLLECT_OBJECT(COLLECT_OBJECT(...))` + ## 8. Sorting and Filtering ```sql diff --git a/docs/fluent-api.md b/docs/fluent-api.md index eeefa3c..4c0696a 100644 --- a/docs/fluent-api.md +++ b/docs/fluent-api.md @@ -459,7 +459,7 @@ AND ( ## 5. Grouping and Aggregations Use the `groupBy()` method to group the data in your query results. You can use the `having()` method to filter the grouped data. -Also, you can use these aggregations functions `count()`, `sum()`, `avg()`, `min()`, `max()` and `groupConcat()` methods to aggregate the data. +Also, you can use these aggregations functions `count()`, `sum()`, `avg()`, `min()`, `max()`, `groupConcat()` and `collectObject()` methods to aggregate the data. `count()`, `sum()`, `min()`, `max()` and `groupConcat()` accept a `bool $distinct` parameter. `groupBy()` is last method that using dot notation for nested fields. @@ -472,14 +472,15 @@ $query->groupBy('category.id'); ### Aggregations -| Function | Description | -|---------------|-----------------------------| -| `count` | Count rows | -| `sum` | Sum values | -| `avg` | Average values | -| `min` | Minimum value | -| `max` | Maximum value | -| `groupConcat` | Concatenate values | +| Function | Description | +|-----------------|------------------------------------------------| +| `count` | Count rows | +| `sum` | Sum values | +| `avg` | Average values | +| `min` | Minimum value | +| `max` | Maximum value | +| `groupConcat` | Concatenate values | +| `collectObject` | Collect rows as an array of structured objects | **Example:** @@ -503,6 +504,117 @@ $query->count('category.id', true)->as('COUNT_DISTINCT') ->groupConcat('name', ',', true)->as('GROUP_CONCAT_DISTINCT'); ``` +### COLLECT_OBJECT + +`collectObject()` accumulates source rows within a `GROUP BY` group into an **array of objects** +(`array>`). Build the inner projection using `CollectObject` — a small +fluent builder with three surfaces: `select(...)` for inner SELECT items, `as(...)` to alias the +last selected item (mirrors the main Query pattern), and the `orderBy/asc/desc` trio inherited +from `Sortable` for optional inner ordering. + +**Minimal example** — plain field selection with aliases: + +```php +use FQL\Query\Builder\CollectObject; + +$query->collectObject( + (new CollectObject()) + ->select('productId')->as('id') + ->select('productName')->as('name') + ->select('price') +)->as('products') + ->groupBy('categoryId'); +``` + +**Complex example** — scalar functions and multi-key ORDER BY: + +```php +use FQL\Query\Builder\CollectObject; + +$query->select('categoryId') + ->collectObject( + (new CollectObject()) + ->select('productId')->as('id') + ->select('CONCAT(productName, " (", code, ")")')->as('label') + ->select('ROUND(price, 2)')->as('price') + ->orderBy('price')->desc() + ->orderBy('productName')->asc() + )->as('products') + ->groupBy('categoryId'); +``` + +**Combined with other aggregations:** + +```php +use FQL\Query\Builder\CollectObject; + +$query->select('categoryId', 'categoryName') + ->count('productId')->as('total') + ->sum('price')->as('totalPrice') + ->collectObject( + (new CollectObject()) + ->select('productId')->as('id') + ->select('productName')->as('name') + ->select('ROUND(price, 2)')->as('price') + ->orderBy('price')->desc() + )->as('products') + ->groupBy('categoryId'); +``` + +**Compact form** — `select()` also accepts inline aliases and comma-separated multi-field +strings via `FieldListSplitter`, so a whole projection can fit in a single call: + +```php +(new CollectObject()) + ->select('productId AS id, productName AS name, ROUND(price, 2) AS price') + ->orderBy('price')->desc() +``` + +#### CollectObject builder methods + +| Method | Description | +|-------------------------------|---------------------------------------------------------------------------------------------------------------------| +| `select(string ...$fields)` | Add one or more inner SELECT items. Each argument can be a single expression, an `expr AS alias`, or a comma list. | +| `as(string $alias)` | Alias the most recently added select item. Throws `LogicException` if called before any `select()`. | +| `orderBy($field, ?Sort $dir)` | Add an ORDER BY key (default `ASC`). Inherited from `Sortable`. | +| `asc()` | Set the last ORDER BY key to ascending. | +| `desc()` | Set the last ORDER BY key to descending. | + +Any scalar function known to the parser — `CONCAT`, `ROUND`, `IF`, `COALESCE`, `UPPER`, `LOWER`, +arithmetic, … — works inside the `select()` string: + +```php +(new CollectObject()) + ->select('IF(stock > 0, "in stock", "sold out")')->as('availability') +``` + +#### Semantics + +- **Empty group** — produces no output row (consistent with all other aggregate functions). +- **Single-row group** — produces an array of length 1, not a scalar or nullable value. +- **`null` propagation** — unlike `sum()`/`avg()`, which skip nulls, `collectObject()` stores + whatever the expression evaluator returns, including `null`. +- **Stable sort** — rows with equal sort keys preserve their accumulation (source) order. +- **`orderBy()` recognises projected aliases** — internally the builder runs a full Query over + the accumulated rows, so `orderBy()` can reference both source columns and aliases declared + via `->as(...)` (or inline `AS` inside the `select()` string). Standard SQL semantics: + + ```php + (new CollectObject()) + ->select('ROUND(price, 2)')->as('roundedPrice') + ->orderBy('roundedPrice')->desc() + ``` + +#### Limitations (outside MVP) + +The following features are not supported. Attempting to use them raises an +`InvalidArgumentException` at build time: + +- `DISTINCT` inside the `CollectObject` builder +- `LIMIT` inside the `CollectObject` builder +- `WHERE` inside the `CollectObject` builder +- Nested `collectObject()` calls inside another `CollectObject` + ## 6. Sorting Use the `orderBy()` method to sort the data in your query results. You can use the `limit()` and `offset()` methods to filter the data. diff --git a/src/Functions/Aggregate/CollectObject.php b/src/Functions/Aggregate/CollectObject.php new file mode 100644 index 0000000..01ac58c --- /dev/null +++ b/src/Functions/Aggregate/CollectObject.php @@ -0,0 +1,114 @@ +>`. Empty group → `[]`. + * + * Implementation strategy: pure-lazy accumulation. The Stream pipeline hands + * each row in as the `accumulate()` value (because the aggregate's + * `spec.expression` is {@see \FQL\Sql\Ast\Expression\WholeRowNode}, which the + * evaluator resolves to the source `$item`). All real work happens in + * {@see finalize()}, which spins up a full {@see \FQL\Query\Query} pipeline + * over a {@see ResultStreamProvider} wrapping the accumulated rows and lets + * the existing engine handle expression evaluation, aliasing, and ordering. + * No bespoke evaluator/sort loop here, no marker interface — `CollectObject` + * is a perfectly ordinary `AggregateFunction`. + */ +final class CollectObject implements AggregateFunction +{ + public static function name(): string + { + return 'COLLECT_OBJECT'; + } + + /** + * @param array{ + * selectItems?: list, + * orderings?: list, + * distinct?: bool + * } $options + * @return array{ + * rows: list>, + * selectItems: list, + * orderings: list + * } + */ + public static function initial(array $options = []): array + { + return [ + 'rows' => [], + 'selectItems' => $options['selectItems'] ?? [], + 'orderings' => $options['orderings'] ?? [], + ]; + } + + /** + * Pushes the source row into the accumulator. `$value` is the entire + * `$item` row — the aggregate spec's `expression` is a + * {@see \FQL\Sql\Ast\Expression\WholeRowNode}, which the evaluator + * resolves to the row itself. + * + * @param array{rows: list>, selectItems: list, orderings: list} $acc + * @return array{rows: list>, selectItems: list, orderings: list} + */ + public static function accumulate(mixed $acc, mixed $value): array + { + if (is_array($value)) { + $acc['rows'][] = $value; + } + return $acc; + } + + /** + * Builds a one-off in-memory Query over the accumulated rows, applies the + * inner SELECT and ORDER BY, and returns the result as a plain list of + * objects. The whole projection/ordering pipeline is reused — no custom + * evaluator or sort code lives here. + * + * @param array{rows: list>, selectItems: list, orderings: list} $acc + * @return list> + */ + public static function finalize(mixed $acc): array + { + if ($acc['rows'] === [] || $acc['selectItems'] === []) { + return $acc['rows'] === [] ? [] : $acc['rows']; + } + + $compiler = new ExpressionCompiler(); + /** @var \ArrayIterator|scalar|null>> $iterator */ + $iterator = new \ArrayIterator($acc['rows']); + $stream = new ResultStreamProvider($iterator); + $query = $stream->query(); + + foreach ($acc['selectItems'] as $item) { + $rendered = $compiler->renderExpression($item['expression']); + $query->select($rendered); + if ($item['key'] !== $rendered) { + $query->as($item['key']); + } + } + + foreach ($acc['orderings'] as $ord) { + $query->orderBy($compiler->renderExpression($ord->expression)); + if ($ord->direction === Sort::DESC) { + $query->desc(); + } + } + + /** @var list> $rows */ + $rows = iterator_to_array($query->execute()->fetchAll(), false); + return $rows; + } +} diff --git a/src/Functions/functions.neon b/src/Functions/functions.neon index 6cc7e53..f201248 100644 --- a/src/Functions/functions.neon +++ b/src/Functions/functions.neon @@ -81,6 +81,7 @@ scalar: aggregate: - FQL\Functions\Aggregate\Avg + - FQL\Functions\Aggregate\CollectObject - FQL\Functions\Aggregate\Count - FQL\Functions\Aggregate\GroupConcat - FQL\Functions\Aggregate\Max diff --git a/src/Interface/Query.php b/src/Interface/Query.php index 5cb8293..ba58b48 100644 --- a/src/Interface/Query.php +++ b/src/Interface/Query.php @@ -589,11 +589,12 @@ public function avg(string $field): Query; public function min(string $field, bool $distinct = false): Query; public function max(string $field, bool $distinct = false): Query; public function groupConcat(string $field, string $separator = ',', bool $distinct = false): Query; + public function collectObject(\FQL\Query\Builder\CollectObject $builder): Query; - public function orderBy(string $field, Sort $direction = Sort::ASC): Query; - public function sortBy(string $field, Sort $direction = Sort::ASC): Query; - public function asc(): Query; - public function desc(): Query; + public function orderBy(string $field, Sort $direction = Sort::ASC): static; + public function sortBy(string $field, Sort $direction = Sort::ASC): static; + public function asc(): static; + public function desc(): static; public function offset(int $offset): Query; public function limit(int $limit, ?int $offset = null): Query; diff --git a/src/Query/Builder/CollectObject.php b/src/Query/Builder/CollectObject.php new file mode 100644 index 0000000..13ec585 --- /dev/null +++ b/src/Query/Builder/CollectObject.php @@ -0,0 +1,90 @@ +select('productId AS id, productName AS name') + * ->select('ROUND(price, 2) AS price') + * ->orderBy('price')->desc(); + * ``` + */ +final class CollectObject +{ + use Sortable; + + /** @var list */ + private array $selectItems = []; + + public function select(string ...$fields): self + { + foreach (FieldListSplitter::split(...$fields) as $spec) { + $parsed = FieldListSplitter::splitAlias($spec); + $this->selectItems[] = [ + 'expression' => Sql\Provider::parseExpression($parsed['field']), + 'alias' => $parsed['alias'], + ]; + } + return $this; + } + + /** + * Aliases the most recently added select item — mirrors the main Query's + * `->select('foo')->as('bar')` pattern. + */ + public function as(string $alias): self + { + $last = array_key_last($this->selectItems); + if ($last === null) { + throw new \LogicException('->as() must follow ->select()'); + } + $this->selectItems[$last]['alias'] = $alias; + return $this; + } + + /** @return list */ + public function getSelectItems(): array + { + return $this->selectItems; + } + + /** + * Adapter for the AST consumer (`Select::collectObject`): converts + * {@see Sortable}'s `{expression, sort}` entries into `OrderByItemNode`s + * that {@see \FQL\Sql\Ast\Expression\CollectObjectExpressionNode} expects. + * + * @return list + */ + public function getOrderings(): array + { + return array_map( + static fn (array $entry): OrderByItemNode => new OrderByItemNode( + $entry['expression'], + $entry['sort'], + Position::synthetic() + ), + $this->orderings + ); + } +} diff --git a/src/Sql/Ast/Expression/CollectObjectExpressionNode.php b/src/Sql/Ast/Expression/CollectObjectExpressionNode.php new file mode 100644 index 0000000..142a364 --- /dev/null +++ b/src/Sql/Ast/Expression/CollectObjectExpressionNode.php @@ -0,0 +1,35 @@ + $selectItems + * @param list $orderings + */ + public function __construct( + public array $selectItems, + public array $orderings, + public Position $position + ) { + } + + public function position(): Position + { + return $this->position; + } +} diff --git a/src/Sql/Ast/Expression/WholeRowNode.php b/src/Sql/Ast/Expression/WholeRowNode.php new file mode 100644 index 0000000..70cf8e0 --- /dev/null +++ b/src/Sql/Ast/Expression/WholeRowNode.php @@ -0,0 +1,30 @@ +position; + } +} diff --git a/src/Sql/Builder/ExpressionCompiler.php b/src/Sql/Builder/ExpressionCompiler.php index d0edd1a..dbb0e61 100644 --- a/src/Sql/Builder/ExpressionCompiler.php +++ b/src/Sql/Builder/ExpressionCompiler.php @@ -6,6 +6,7 @@ use FQL\Exception; use FQL\Sql\Ast\Expression\BinaryOpNode; use FQL\Sql\Ast\Expression\CastExpressionNode; +use FQL\Sql\Ast\Expression\CollectObjectExpressionNode; use FQL\Sql\Ast\Expression\ColumnReferenceNode; use FQL\Sql\Ast\Expression\ConditionExpressionNode; use FQL\Sql\Ast\Expression\ConditionGroupNode; @@ -87,6 +88,26 @@ public function renderExpression(ExpressionNode $node): string if ($node instanceof \FQL\Sql\Ast\Expression\ConditionExpressionNode) { return $this->renderCondition($node); } + if ($node instanceof CollectObjectExpressionNode) { + $parts = []; + foreach ($node->selectItems as $item) { + $rendered = $this->renderExpression($item['expression']); + if ($item['alias'] !== null) { + $rendered .= ' AS ' . $item['alias']; + } + $parts[] = $rendered; + } + $body = implode(', ', $parts); + if ($node->orderings !== []) { + $orderRendered = []; + foreach ($node->orderings as $ord) { + $orderRendered[] = $this->renderExpression($ord->expression) + . ' ' . strtoupper($ord->direction->value); + } + $body .= ' ORDER BY ' . implode(', ', $orderRendered); + } + return $body; + } throw new Exception\QueryLogicException( sprintf('Cannot render expression of type %s', get_class($node)) ); diff --git a/src/Sql/Parser/ExpressionParser.php b/src/Sql/Parser/ExpressionParser.php index 1365fdc..2a048dc 100644 --- a/src/Sql/Parser/ExpressionParser.php +++ b/src/Sql/Parser/ExpressionParser.php @@ -8,6 +8,7 @@ use FQL\Sql\Ast\Expression\BinaryOpNode; use FQL\Sql\Ast\Expression\CaseExpressionNode; use FQL\Sql\Ast\Expression\CastExpressionNode; +use FQL\Sql\Ast\Expression\CollectObjectExpressionNode; use FQL\Sql\Ast\Expression\ColumnReferenceNode; use FQL\Sql\Ast\Expression\ExpressionNode; use FQL\Sql\Ast\Expression\FunctionCallNode; @@ -15,6 +16,7 @@ use FQL\Sql\Ast\Expression\MatchAgainstNode; use FQL\Sql\Ast\Expression\StarNode; use FQL\Sql\Ast\Expression\WhenBranchNode; +use FQL\Sql\Ast\Node\OrderByItemNode; use FQL\Sql\Token\Token; use FQL\Sql\Token\TokenStream; use FQL\Sql\Token\TokenType; @@ -30,11 +32,23 @@ final class ExpressionParser { private ConditionGroupParser $conditionGroupParser; + private ?OrderByClauseParser $orderByParser = null; + public function setConditionGroupParser(ConditionGroupParser $parser): void { $this->conditionGroupParser = $parser; } + /** + * Optional dependency — required only for parsing aggregate calls with internal + * ORDER BY clauses (currently `COLLECT_OBJECT`). Wired in lazily because + * OrderByClauseParser itself depends on ExpressionParser. + */ + public function setOrderByParser(OrderByClauseParser $parser): void + { + $this->orderByParser = $parser; + } + /** * @throws ParseException */ @@ -187,6 +201,12 @@ public function parseFunctionCall(TokenStream $stream): ExpressionNode return $this->parseIfCall($stream, $nameToken); } + // Special case: COLLECT_OBJECT(expr [AS alias], … [ORDER BY ...]) — inner + // mini-SELECT plus optional ORDER BY. Carries a typed AST envelope. + if ($name === 'COLLECT_OBJECT') { + return $this->parseCollectObjectCall($stream, $nameToken); + } + $stream->expect(TokenType::PAREN_OPEN); $distinct = false; if ($stream->consumeIf(TokenType::KEYWORD_DISTINCT) !== null) { @@ -221,6 +241,80 @@ private function parseIfCall(TokenStream $stream, Token $nameToken): FunctionCal return new FunctionCallNode('IF', [$condition, $then, $else], false, $nameToken->position); } + /** + * Parses `COLLECT_OBJECT(expr [AS alias], … [ORDER BY expr [ASC|DESC], …])`. + * Wraps the parsed inner SELECT items and ORDER BY into a single + * {@see CollectObjectExpressionNode}, then returns it as the lone argument of a + * `FunctionCallNode('COLLECT_OBJECT')` so the existing `storeAggregate` pipeline + * picks it up. + * + * @throws ParseException + */ + private function parseCollectObjectCall(TokenStream $stream, Token $nameToken): FunctionCallNode + { + if ($this->orderByParser === null) { + throw ParseException::context( + $nameToken, + 'COLLECT_OBJECT requires OrderByClauseParser to be wired into ExpressionParser' + ); + } + + $stream->expect(TokenType::PAREN_OPEN); + + if ($stream->consumeIf(TokenType::KEYWORD_DISTINCT) !== null) { + throw ParseException::context($nameToken, 'COLLECT_OBJECT does not support DISTINCT'); + } + + $selectItems = []; + if ($stream->peekType() === TokenType::PAREN_CLOSE) { + throw ParseException::context( + $stream->peek(), + 'COLLECT_OBJECT requires at least one inner SELECT item' + ); + } + + $selectItems[] = $this->parseCollectObjectItem($stream); + while ($stream->consumeIf(TokenType::COMMA) !== null) { + $selectItems[] = $this->parseCollectObjectItem($stream); + } + + $orderings = []; + if ($stream->consumeIf(TokenType::KEYWORD_ORDER) !== null) { + $stream->expect(TokenType::KEYWORD_BY); + $orderings[] = $this->orderByParser->parseItem($stream); + while ($stream->peekType() === TokenType::COMMA) { + $stream->consume(); + $orderings[] = $this->orderByParser->parseItem($stream); + } + } + + $stream->expect(TokenType::PAREN_CLOSE); + + $coNode = new CollectObjectExpressionNode($selectItems, $orderings, $nameToken->position); + return new FunctionCallNode('COLLECT_OBJECT', [$coNode], false, $nameToken->position); + } + + /** + * Parses one inner SELECT item: `expression [AS alias]`. + * + * @return array{expression: ExpressionNode, alias: string|null} + * @throws ParseException + */ + private function parseCollectObjectItem(TokenStream $stream): array + { + $expression = $this->parseExpression($stream); + $alias = null; + if ($stream->consumeIf(TokenType::KEYWORD_AS) !== null) { + $aliasTok = $stream->peek(); + if (!$aliasTok->isAnyOf(TokenType::IDENTIFIER, TokenType::IDENTIFIER_QUOTED)) { + throw ParseException::context($aliasTok, 'expected COLLECT_OBJECT alias identifier'); + } + $stream->consume(); + $alias = IdentifierHelper::stripOuterBackticks($aliasTok->value); + } + return ['expression' => $expression, 'alias' => $alias]; + } + private function conditionParser(): ConditionParser { // Reusing the parser wired for CASE branches. It was attached via diff --git a/src/Sql/Parser/OrderByClauseParser.php b/src/Sql/Parser/OrderByClauseParser.php index 4ef6d15..3b507a0 100644 --- a/src/Sql/Parser/OrderByClauseParser.php +++ b/src/Sql/Parser/OrderByClauseParser.php @@ -42,9 +42,13 @@ public function parseClause(TokenStream $stream, Token $orderKeyword): OrderByCl } /** + * Parses a single `expr [ASC|DESC]` item. Public so callers that need + * ORDER BY items inside a non-clause context (e.g. `COLLECT_OBJECT(... ORDER BY ...)`) + * can drive the parser without the trailing control-keyword check of `parseClause`. + * * @throws ParseException */ - private function parseItem(TokenStream $stream): OrderByItemNode + public function parseItem(TokenStream $stream): OrderByItemNode { $startPosition = $stream->peek()->position; $expression = $this->expressionParser->parseExpression($stream); diff --git a/src/Sql/Parser/Parser.php b/src/Sql/Parser/Parser.php index 8891d17..e25812d 100644 --- a/src/Sql/Parser/Parser.php +++ b/src/Sql/Parser/Parser.php @@ -37,6 +37,8 @@ public static function create(): self $havingParser = new HavingClauseParser($conditionGroupParser); $groupByParser = new GroupByClauseParser($expressionParser); $orderByParser = new OrderByClauseParser($expressionParser); + // ExpressionParser needs the order-by parser for COLLECT_OBJECT's internal ORDER BY. + $expressionParser->setOrderByParser($orderByParser); $limitParser = new LimitOffsetParser(); $unionParser = new UnionParser(); $intoParser = new IntoParser(); diff --git a/src/Sql/Provider.php b/src/Sql/Provider.php index 2b40b59..7e95d6a 100644 --- a/src/Sql/Provider.php +++ b/src/Sql/Provider.php @@ -14,6 +14,7 @@ use FQL\Sql\Parser\ConditionGroupParser; use FQL\Sql\Parser\ConditionParser; use FQL\Sql\Parser\ExpressionParser; +use FQL\Sql\Parser\OrderByClauseParser; use FQL\Sql\Parser\ParseException; use FQL\Sql\Token\Tokenizer; use FQL\Sql\Token\TokenStream; @@ -136,6 +137,7 @@ private static function freshExpressionParser(): ExpressionParser $conditionParser = new ConditionParser($expressionParser); $groupParser = new ConditionGroupParser($conditionParser); $expressionParser->setConditionGroupParser($groupParser); + $expressionParser->setOrderByParser(new OrderByClauseParser($expressionParser)); return $expressionParser; } } diff --git a/src/Sql/Runtime/ExpressionEvaluator.php b/src/Sql/Runtime/ExpressionEvaluator.php index 93fa9a9..27c0db1 100644 --- a/src/Sql/Runtime/ExpressionEvaluator.php +++ b/src/Sql/Runtime/ExpressionEvaluator.php @@ -5,7 +5,6 @@ use FQL\Enum; use FQL\Exception; use FQL\Functions; -use FQL\Query\FileQuery; use FQL\Sql\Ast\Expression\BinaryOperator; use FQL\Sql\Ast\Expression\BinaryOpNode; use FQL\Sql\Ast\Expression\CaseExpressionNode; @@ -20,6 +19,7 @@ use FQL\Sql\Ast\Expression\MatchAgainstNode; use FQL\Sql\Ast\Expression\StarNode; use FQL\Sql\Ast\Expression\SubQueryNode; +use FQL\Sql\Ast\Expression\WholeRowNode; use FQL\Traits\Helpers\EnhancedNestedArrayAccessor; use FQL\Traits\Helpers\StringOperations; @@ -63,6 +63,7 @@ public function evaluate(ExpressionNode $node, array $item, array $resultItem = $node instanceof LiteralNode => $node->value, $node instanceof ColumnReferenceNode => $this->evaluateColumn($node, $item, $resultItem), $node instanceof StarNode => '*', + $node instanceof WholeRowNode => $item, $node instanceof FunctionCallNode => $this->evaluateFunctionCall($node, $item, $resultItem), $node instanceof BinaryOpNode => $this->evaluateBinaryOp($node, $item, $resultItem), $node instanceof CastExpressionNode => $this->evaluateCast($node, $item, $resultItem), diff --git a/src/Traits/Select.php b/src/Traits/Select.php index 05c7069..c379214 100644 --- a/src/Traits/Select.php +++ b/src/Traits/Select.php @@ -349,6 +349,20 @@ public function groupConcat(string $field, string $separator = ',', bool $distin ); } + public function collectObject(\FQL\Query\Builder\CollectObject $builder): Interface\Query + { + $node = new \FQL\Sql\Ast\Expression\CollectObjectExpressionNode( + $builder->getSelectItems(), + $builder->getOrderings(), + Position::synthetic() + ); + return $this->storeAggregate( + new FunctionCallNode('COLLECT_OBJECT', [$node], false, Position::synthetic()), + null, + null + ); + } + public function min(string $field, bool $distinct = false): Interface\Query { return $this->storeAggregate( @@ -799,6 +813,25 @@ private function storeAggregate( $options['separator'] = (string) $args[1]->value; } + if ($name === 'COLLECT_OBJECT') { + $node = $args[0] ?? null; + if (!$node instanceof \FQL\Sql\Ast\Expression\CollectObjectExpressionNode) { + throw new Exception\UnexpectedValueException( + 'COLLECT_OBJECT requires a CollectObjectExpressionNode argument' + ); + } + if ($call->distinct) { + throw new Exception\InvalidArgumentException( + 'DISTINCT is not supported with COLLECT_OBJECT' + ); + } + $options['selectItems'] = $this->buildCollectObjectSelectItems($node); + $options['orderings'] = $node->orderings; + // Evaluating WholeRowNode against the current row yields the row itself, + // which is exactly what CollectObject::accumulate needs as the value. + $expression = new \FQL\Sql\Ast\Expression\WholeRowNode(Position::synthetic()); + } + /** @var AggregateSpec $spec */ $spec = [ 'class' => $class, @@ -812,6 +845,43 @@ private function storeAggregate( return $this; } + /** + * Resolves inner SELECT items of a `COLLECT_OBJECT(...)` call into the + * accumulator-ready shape `{key, expression}`, deriving keys from explicit + * aliases or from the rendered expression, and validating against duplicates + * and (MVP) nested `COLLECT_OBJECT`. + * + * @return list + */ + private function buildCollectObjectSelectItems( + \FQL\Sql\Ast\Expression\CollectObjectExpressionNode $node + ): array { + if ($node->selectItems === []) { + throw new Exception\SelectException('COLLECT_OBJECT requires at least one inner SELECT item'); + } + + $compiler = new ExpressionCompiler(); + $resolved = []; + $seenKeys = []; + foreach ($node->selectItems as $item) { + $rendered = $compiler->renderExpression($item['expression']); + if (stripos($rendered, 'COLLECT_OBJECT(') !== false) { + throw new Exception\InvalidArgumentException( + 'Nested COLLECT_OBJECT is not supported' + ); + } + $key = $item['alias'] ?? $rendered; + if (isset($seenKeys[$key])) { + throw new Exception\SelectException( + sprintf('COLLECT_OBJECT alias/key collision: "%s"', $key) + ); + } + $seenKeys[$key] = true; + $resolved[] = ['key' => $key, 'expression' => $item['expression']]; + } + return $resolved; + } + /** * Low-level writer into `$selectedFields`. Collapses the four legacy code * paths (function instance / expression / aggregate / plain column) into diff --git a/src/Traits/Sortable.php b/src/Traits/Sortable.php index 84ea597..4e9f47f 100644 --- a/src/Traits/Sortable.php +++ b/src/Traits/Sortable.php @@ -4,7 +4,6 @@ use FQL\Enum; use FQL\Exception; -use FQL\Interface\Query; use FQL\Sql; use FQL\Sql\Ast\Expression\ColumnReferenceNode; use FQL\Sql\Ast\Expression\ExpressionNode; @@ -37,7 +36,7 @@ public function isSortableEmpty(): bool return $this->orderings === []; } - public function sortBy(string $field, ?Enum\Sort $type = null): Query + public function sortBy(string $field, ?Enum\Sort $type = null): static { if ($this->sortableBlocked) { throw new Exception\QueryLogicException('ORDER BY is not allowed in DESCRIBE mode'); @@ -73,22 +72,22 @@ public function sortBy(string $field, ?Enum\Sort $type = null): Query return $this; } - public function orderBy(string $field, ?Enum\Sort $type = null): Query + public function orderBy(string $field, ?Enum\Sort $type = null): static { return $this->sortBy($field, $type); } - public function asc(): Query + public function asc(): static { return $this->setLastSortType(Enum\Sort::ASC); } - public function desc(): Query + public function desc(): static { return $this->setLastSortType(Enum\Sort::DESC); } - public function clearOrderings(): Query + public function clearOrderings(): static { $this->orderings = []; return $this; @@ -113,7 +112,7 @@ private function orderByToString(): string return PHP_EOL . sprintf('ORDER BY %s', implode(', ', $parts)); } - private function setLastSortType(Enum\Sort $type): Query + private function setLastSortType(Enum\Sort $type): static { $lastIndex = array_key_last($this->orderings); if ($lastIndex === null) { diff --git a/tests/Functions/CollectObjectTest.php b/tests/Functions/CollectObjectTest.php new file mode 100644 index 0000000..390a158 --- /dev/null +++ b/tests/Functions/CollectObjectTest.php @@ -0,0 +1,397 @@ +csv = (string) tempnam(sys_get_temp_dir(), 'fql-co-') . '.csv'; + file_put_contents($this->csv, <<csv)) { + @unlink($this->csv); + } + } + + public function testRegisteredInFunctionRegistry(): void + { + $this->assertSame(CollectObjectAggregate::class, FunctionRegistry::getAggregate('COLLECT_OBJECT')); + $this->assertTrue(FunctionRegistry::isAggregate('COLLECT_OBJECT')); + } + + public function testFqlBasicGroupBy(): void + { + $sql = sprintf( + "SELECT categoryId, COLLECT_OBJECT(productId AS id, productName AS name) AS products " + . "FROM csv(%s).* GROUP BY categoryId", + $this->csv + ); + $rows = iterator_to_array(QueryProvider::fql($sql)->execute()->fetchAll(), false); + $this->assertCount(2, $rows); + + $byCategory = $this->indexByCategory($rows); + $this->assertCount(3, $byCategory['10']['products']); + $this->assertCount(2, $byCategory['20']['products']); + // CSV reader hands values back as raw strings; we don't apply a scalar + // wrapper here so the literal types pass through. + $this->assertSame(['id' => '1', 'name' => 'Apple'], $byCategory['10']['products'][0]); + } + + public function testFqlWithOrderByDesc(): void + { + $sql = sprintf( + "SELECT categoryId, COLLECT_OBJECT(productName AS name, ROUND(price, 2) AS price ORDER BY price DESC) AS products " + . "FROM csv(%s).* GROUP BY categoryId", + $this->csv + ); + $rows = iterator_to_array(QueryProvider::fql($sql)->execute()->fetchAll(), false); + $byCategory = $this->indexByCategory($rows); + + $prices10 = array_column($byCategory['10']['products'], 'price'); + $this->assertSame([2.5, 1.24, 0.5], $prices10); + + $prices20 = array_column($byCategory['20']['products'], 'price'); + $this->assertSame([2.0, 1.99], $prices20); + } + + public function testFqlWithOrderByAsc(): void + { + $sql = sprintf( + "SELECT categoryId, COLLECT_OBJECT(productName AS name, price ORDER BY price ASC) AS products " + . "FROM csv(%s).* GROUP BY categoryId", + $this->csv + ); + $rows = iterator_to_array(QueryProvider::fql($sql)->execute()->fetchAll(), false); + $byCategory = $this->indexByCategory($rows); + + $names10 = array_column($byCategory['10']['products'], 'name'); + $this->assertSame(['Banana', 'Apple', 'Cherry'], $names10); + } + + public function testFqlWithMultipleOrderByKeys(): void + { + $sql = sprintf( + "SELECT categoryId, COLLECT_OBJECT(productName AS name, code AS code ORDER BY code ASC, name DESC) AS products " + . "FROM csv(%s).* GROUP BY categoryId", + $this->csv + ); + $rows = iterator_to_array(QueryProvider::fql($sql)->execute()->fetchAll(), false); + $byCategory = $this->indexByCategory($rows); + $codes10 = array_column($byCategory['10']['products'], 'code'); + $this->assertSame(['A1', 'B2', 'CH'], $codes10); + } + + public function testFqlWithScalarFunctions(): void + { + $sql = sprintf( + 'SELECT categoryId, ' + . 'COLLECT_OBJECT(' + . 'productId AS id, ' + . 'CONCAT(productName, " (", code, ")") AS label, ' + . 'ROUND(price, 2) AS price' + . ') AS products ' + . 'FROM csv(%s).* GROUP BY categoryId', + $this->csv + ); + $rows = iterator_to_array(QueryProvider::fql($sql)->execute()->fetchAll(), false); + $byCategory = $this->indexByCategory($rows); + $first = $byCategory['10']['products'][0]; + $this->assertSame('Apple (A1)', $first['label']); + $this->assertSame(1.24, $first['price']); + } + + public function testFqlArithmeticInside(): void + { + $sql = sprintf( + "SELECT categoryId, COLLECT_OBJECT(productId AS id, ROUND(price * 1.21, 2) AS priceWithVat) AS products " + . "FROM csv(%s).* GROUP BY categoryId", + $this->csv + ); + $rows = iterator_to_array(QueryProvider::fql($sql)->execute()->fetchAll(), false); + $byCategory = $this->indexByCategory($rows); + $apple = $this->findById($byCategory['10']['products'], 1); + $this->assertSame(round(1.235 * 1.21, 2), $apple['priceWithVat']); + } + + public function testFqlCombinedWithOtherAggregates(): void + { + $sql = sprintf( + "SELECT categoryId, " + . "COUNT(productId) AS cnt, " + . "SUM(price) AS total, " + . "GROUP_CONCAT(productName, \"|\") AS names, " + . "COLLECT_OBJECT(productId AS id) AS items " + . "FROM csv(%s).* GROUP BY categoryId", + $this->csv + ); + $rows = iterator_to_array(QueryProvider::fql($sql)->execute()->fetchAll(), false); + $byCategory = $this->indexByCategory($rows); + $this->assertSame(3, $byCategory['10']['cnt']); + $this->assertEqualsWithDelta(1.235 + 0.50 + 2.50, $byCategory['10']['total'], 1e-9); + $this->assertSame('Apple|Banana|Cherry', $byCategory['10']['names']); + $this->assertCount(3, $byCategory['10']['items']); + } + + public function testFqlGroupOfOne(): void + { + $singleCsv = (string) tempnam(sys_get_temp_dir(), 'fql-co-1-') . '.csv'; + file_put_contents($singleCsv, "productId,categoryId,productName,price\n1,99,Only,5\n"); + try { + $sql = sprintf( + "SELECT categoryId, COLLECT_OBJECT(productName AS name) AS items FROM csv(%s).* GROUP BY categoryId", + $singleCsv + ); + $rows = iterator_to_array(QueryProvider::fql($sql)->execute()->fetchAll(), false); + $this->assertCount(1, $rows); + $this->assertCount(1, $rows[0]['items']); + $this->assertSame('Only', $rows[0]['items'][0]['name']); + } finally { + @unlink($singleCsv); + } + } + + public function testFluentParityWithFqlString(): void + { + $expectedSql = sprintf( + "SELECT categoryId, COLLECT_OBJECT(productId AS id, productName AS name, ROUND(price, 2) AS price ORDER BY price DESC) AS products " + . "FROM csv(%s).* GROUP BY categoryId", + $this->csv + ); + $expected = iterator_to_array(QueryProvider::fql($expectedSql)->execute()->fetchAll(), false); + + $fluent = Csv::openWithDelimiter($this->csv)->query() + ->select('categoryId') + ->collectObject( + (new CollectObject()) + ->select('productId')->as('id') + ->select('productName')->as('name') + ->select('ROUND(price, 2)')->as('price') + ->orderBy('price')->desc() + )->as('products') + ->groupBy('categoryId'); + $actual = iterator_to_array($fluent->execute()->fetchAll(), false); + + $this->assertEquals( + $this->indexByCategory($expected), + $this->indexByCategory($actual) + ); + } + + public function testFluentScalarFunctionsViaSelectString(): void + { + $fluent = Csv::openWithDelimiter($this->csv)->query() + ->select('categoryId') + ->collectObject( + (new CollectObject()) + ->select('productId AS id') + ->select('CONCAT(productName, " (", code, ")") AS label') + ->select('ROUND(price, 2) AS price') + ->orderBy('price', Sort::ASC) + )->as('products') + ->groupBy('categoryId'); + $rows = iterator_to_array($fluent->execute()->fetchAll(), false); + $byCategory = $this->indexByCategory($rows); + + $names10 = array_column($byCategory['10']['products'], 'label'); + $this->assertSame(['Banana (B2)', 'Apple (A1)', 'Cherry (CH)'], $names10); + } + + public function testFluentUpperLowerCoalesceViaSelectString(): void + { + $fluent = Csv::openWithDelimiter($this->csv)->query() + ->select('categoryId') + ->collectObject( + (new CollectObject()) + ->select( + 'UPPER(productName) AS upper', + 'LOWER(productName) AS lower', + 'COALESCE(code, "none") AS codeOrNone' + ) + )->as('products') + ->groupBy('categoryId'); + $rows = iterator_to_array($fluent->execute()->fetchAll(), false); + $byCategory = $this->indexByCategory($rows); + $first10 = $byCategory['10']['products'][0]; + $this->assertSame('APPLE', $first10['upper']); + $this->assertSame('apple', $first10['lower']); + $this->assertSame('A1', $first10['codeOrNone']); + } + + public function testInMemoryPathWithOuterOrderBy(): void + { + // Outer ORDER BY forces InMemory path; inner COLLECT_OBJECT ORDER BY still works. + $sql = sprintf( + "SELECT categoryId, COLLECT_OBJECT(productName AS name ORDER BY name ASC) AS products " + . "FROM csv(%s).* GROUP BY categoryId ORDER BY categoryId DESC", + $this->csv + ); + $rows = iterator_to_array(QueryProvider::fql($sql)->execute()->fetchAll(), false); + $this->assertSame('20', (string) $rows[0]['categoryId']); + $this->assertSame('10', (string) $rows[1]['categoryId']); + $names10 = array_column($rows[1]['products'], 'name'); + $this->assertSame(['Apple', 'Banana', 'Cherry'], $names10); + } + + public function testStreamPathWithoutOuterSort(): void + { + $sql = sprintf( + "SELECT categoryId, COLLECT_OBJECT(productName AS name ORDER BY name DESC) AS products " + . "FROM csv(%s).* GROUP BY categoryId", + $this->csv + ); + $rows = iterator_to_array(QueryProvider::fql($sql)->execute()->fetchAll(), false); + $byCategory = $this->indexByCategory($rows); + $this->assertSame(['Cherry', 'Banana', 'Apple'], array_column($byCategory['10']['products'], 'name')); + } + + public function testOrderByCanReferenceProjectedAliases(): void + { + // The inner SELECT inside COLLECT_OBJECT runs as a real Query over the + // accumulated rows; ORDER BY therefore sees the projected aliases, not + // just the raw source columns. This matches standard SQL semantics. + $sql = sprintf( + "SELECT categoryId, COLLECT_OBJECT(productName AS name, ROUND(price, 2) AS rounded ORDER BY rounded DESC) AS products " + . "FROM csv(%s).* GROUP BY categoryId", + $this->csv + ); + $rows = iterator_to_array(QueryProvider::fql($sql)->execute()->fetchAll(), false); + $byCategory = $this->indexByCategory($rows); + $this->assertSame([2.5, 1.24, 0.5], array_column($byCategory['10']['products'], 'rounded')); + } + + public function testNullPropagatesIntoObject(): void + { + $csv = (string) tempnam(sys_get_temp_dir(), 'fql-co-n-') . '.csv'; + file_put_contents($csv, "productId,categoryId,productName,price\n1,1,Apple,\n2,1,Banana,3.5\n"); + try { + $sql = sprintf( + "SELECT categoryId, COLLECT_OBJECT(productName AS name, price AS price ORDER BY name ASC) AS products " + . "FROM csv(%s).* GROUP BY categoryId", + $csv + ); + $rows = iterator_to_array(QueryProvider::fql($sql)->execute()->fetchAll(), false); + // CSV empty cell → empty string after type matching; the entry survives. + $this->assertCount(2, $rows[0]['products']); + $this->assertSame('Apple', $rows[0]['products'][0]['name']); + } finally { + @unlink($csv); + } + } + + public function testAliasCollisionThrows(): void + { + $this->expectException(SelectException::class); + $this->expectExceptionMessage('alias/key collision'); + Csv::openWithDelimiter($this->csv)->query() + ->select('categoryId') + ->collectObject( + (new CollectObject()) + ->select('productId AS x, productName AS x') + )->as('products') + ->groupBy('categoryId'); + } + + public function testDistinctRejectedInFqlString(): void + { + $this->expectException(ParseException::class); + $sql = sprintf( + "SELECT COLLECT_OBJECT(DISTINCT productName) FROM csv(%s).*", + $this->csv + ); + QueryProvider::fql($sql); + } + + public function testNestedCollectObjectRejected(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Nested COLLECT_OBJECT'); + Csv::openWithDelimiter($this->csv)->query() + ->select('categoryId') + ->collectObject( + (new CollectObject()) + ->select('COLLECT_OBJECT(productId) AS inner') + )->as('products') + ->groupBy('categoryId'); + } + + public function testEmptyCollectObjectRejectedByParser(): void + { + $this->expectException(ParseException::class); + $sql = sprintf( + "SELECT COLLECT_OBJECT() FROM csv(%s).*", + $this->csv + ); + QueryProvider::fql($sql); + } + + public function testDescBeforeOrderByThrows(): void + { + // Inherited from the Sortable trait, which raises OrderByException + // when ->desc()/->asc() is called before any ->orderBy(). + $this->expectException(\FQL\Exception\OrderByException::class); + (new CollectObject())->desc(); + } + + public function testAsBeforeSelectThrows(): void + { + $this->expectException(\LogicException::class); + (new CollectObject())->as('foo'); + } + + /** + * @param list> $rows + * @return array> + */ + private function indexByCategory(array $rows): array + { + $out = []; + foreach ($rows as $row) { + $out[(string) $row['categoryId']] = $row; + } + return $out; + } + + /** + * @param list> $items + * @return array + */ + private function findById(array $items, int $id): array + { + foreach ($items as $item) { + if ((int) $item['id'] === $id) { + return $item; + } + } + $this->fail("Item with id=$id not found"); + } +} diff --git a/tests/Functions/FunctionRegistryTest.php b/tests/Functions/FunctionRegistryTest.php index 3cf526c..df7149a 100644 --- a/tests/Functions/FunctionRegistryTest.php +++ b/tests/Functions/FunctionRegistryTest.php @@ -46,14 +46,15 @@ public function testBuiltinNeonDoesNotBootstrapBeforeAccess(): void public function testBootstrapLoadsBuiltinsFromNeon(): void { // Built-in neon is shipped with the library and must register all 60+ - // scalar functions and all 6 aggregates. + // scalar functions and all 7 aggregates. $all = FunctionRegistry::all(); $this->assertArrayHasKey('scalar', $all); $this->assertArrayHasKey('aggregate', $all); $this->assertGreaterThan(50, count($all['scalar'])); - $this->assertSame(6, count($all['aggregate'])); + $this->assertSame(7, count($all['aggregate'])); $this->assertArrayHasKey('LOWER', $all['scalar']); $this->assertArrayHasKey('SUM', $all['aggregate']); + $this->assertArrayHasKey('COLLECT_OBJECT', $all['aggregate']); } public function testCaseInsensitiveLookup(): void