diff --git a/docs/parity-status.md b/docs/parity-status.md index 1b025afc..6c865ad3 100644 --- a/docs/parity-status.md +++ b/docs/parity-status.md @@ -1,8 +1,8 @@ # MobilityDuck parity status — surface-level audit -Generated 2026-05-10. **Active addressable scope** (temporal + geo, excluding PG-only helpers): 867/960 names covered (90.3%). +Generated 2026-05-11. **Active addressable scope** (temporal + geo, excluding PG-only helpers): 929/943 names covered (98.5%). -**Out of scope** (PG-only — no DuckDB equivalent exists): 303 names skipped — 84 from PG-only sections (GiST/SPGiST opclasses, set/span/spanset index files, `019_geo_constructors.in.sql` PG geometric types, `999_oid_cache.in.sql`) plus 219 PG helper functions inside active sections (`*_in/_out/_recv/_send`, `*_transfn/_combinefn/_finalfn/_serialize/_deserialize`, `*_sel/_joinsel/_supportfn/_analyze`, `*_typmod_in/_typmod_out`). Listed in appendix B; not counted in the headline. +**Out of scope** (PG-only — no DuckDB equivalent exists): 315 names skipped — 84 from PG-only sections (GiST/SPGiST opclasses, set/span/spanset index files, `019_geo_constructors.in.sql` PG geometric types, `999_oid_cache.in.sql`) plus 231 PG helper functions inside active sections (`*_in/_out/_recv/_send`, `*_transfn/_combinefn/_finalfn/_serialize/_deserialize`, `*_sel/_joinsel/_supportfn/_analyze`, `*_typmod_in/_typmod_out`). Listed in appendix B; not counted in the headline. **Deferred families** (cbuffer, npoint, pose, rgeo) appear in appendix C and are also excluded from the headline. @@ -20,20 +20,20 @@ Per-section counts: `Addressable` = MDB names minus PG-only helpers (see appendi | Section | Addressable | Covered | Missing | Coverage | OOS | MDB operators | |---|---:|---:|---:|---:|---:|---:| -| `geo/050_geoset.in.sql` | 43 | 31 | 12 | 72% | 13 | 46 | -| `geo/051_stbox.in.sql` | 75 | 59 | 16 | 79% | 8 | 29 | -| `geo/052_tgeo.in.sql` | 70 | 64 | 6 | 91% | 10 | 12 | -| `geo/052_tpoint.in.sql` | 70 | 66 | 4 | 94% | 8 | 12 | +| `geo/050_geoset.in.sql` | 42 | 41 | 1 | 98% | 13 | 46 | +| `geo/051_stbox.in.sql` | 73 | 70 | 3 | 96% | 10 | 29 | +| `geo/052_tgeo.in.sql` | 68 | 68 | 0 | 100% | 11 | 12 | +| `geo/052_tpoint.in.sql` | 69 | 69 | 0 | 100% | 9 | 12 | | `geo/053_tgeo_inout.in.sql` | 18 | 18 | 0 | 100% | 0 | 0 | | `geo/053_tpoint_inout.in.sql` | 18 | 18 | 0 | 100% | 0 | 0 | | `geo/054_tgeo_compops.in.sql` | 6 | 6 | 0 | 100% | 1 | 36 | | `geo/054_tpoint_compops.in.sql` | 6 | 6 | 0 | 100% | 0 | 36 | -| `geo/056_tgeo_spatialfuncs.in.sql` | 17 | 15 | 2 | 88% | 0 | 0 | -| `geo/056_tpoint_spatialfuncs.in.sql` | 30 | 24 | 6 | 80% | 0 | 0 | -| `geo/058_tgeo_tile.in.sql` | 5 | 2 | 3 | 40% | 0 | 0 | -| `geo/058_tpoint_tile.in.sql` | 11 | 8 | 3 | 73% | 0 | 0 | -| `geo/060_tgeo_boxops.in.sql` | 13 | 10 | 3 | 77% | 0 | 50 | -| `geo/060_tpoint_boxops.in.sql` | 13 | 10 | 3 | 77% | 0 | 50 | +| `geo/056_tgeo_spatialfuncs.in.sql` | 16 | 15 | 1 | 94% | 0 | 0 | +| `geo/056_tpoint_spatialfuncs.in.sql` | 28 | 27 | 1 | 96% | 1 | 0 | +| `geo/058_tgeo_tile.in.sql` | 5 | 4 | 1 | 80% | 0 | 0 | +| `geo/058_tpoint_tile.in.sql` | 11 | 10 | 1 | 91% | 0 | 0 | +| `geo/060_tgeo_boxops.in.sql` | 13 | 13 | 0 | 100% | 0 | 50 | +| `geo/060_tpoint_boxops.in.sql` | 13 | 13 | 0 | 100% | 0 | 50 | | `geo/062_tgeo_posops.in.sql` | 16 | 16 | 0 | 100% | 0 | 76 | | `geo/062_tpoint_posops.in.sql` | 16 | 16 | 0 | 100% | 0 | 76 | | `geo/064_tgeo_distance.in.sql` | 4 | 4 | 0 | 100% | 0 | 16 | @@ -41,24 +41,24 @@ Per-section counts: `Addressable` = MDB names minus PG-only helpers (see appendi | `geo/066_tpoint_similarity.in.sql` | 5 | 5 | 0 | 100% | 0 | 0 | | `geo/068_tgeo_aggfuncs.in.sql` | 0 | 0 | 0 | 0% | 9 | 0 | | `geo/068_tpoint_aggfuncs.in.sql` | 0 | 0 | 0 | 0% | 12 | 0 | -| `geo/070_tgeo_spatialrels.in.sql` | 14 | 11 | 3 | 79% | 0 | 0 | -| `geo/070_tpoint_spatialrels.in.sql` | 12 | 11 | 1 | 92% | 0 | 0 | -| `geo/072_tgeo_tempspatialrels.in.sql` | 6 | 5 | 1 | 83% | 0 | 0 | +| `geo/070_tgeo_spatialrels.in.sql` | 13 | 13 | 0 | 100% | 1 | 0 | +| `geo/070_tpoint_spatialrels.in.sql` | 11 | 11 | 0 | 100% | 1 | 0 | +| `geo/072_tgeo_tempspatialrels.in.sql` | 6 | 6 | 0 | 100% | 0 | 0 | | `geo/072_tpoint_tempspatialrels.in.sql` | 5 | 5 | 0 | 100% | 0 | 0 | -| `geo/076_tgeo_analytics.in.sql` | 13 | 13 | 0 | 100% | 0 | 0 | +| `geo/076_tgeo_analytics.in.sql` | 12 | 12 | 0 | 100% | 0 | 0 | | `geo/076_tpoint_analytics.in.sql` | 18 | 17 | 1 | 94% | 0 | 0 | -| `geo/078_tpoint_datagen.in.sql` | 1 | 0 | 1 | 0% | 0 | 0 | -| `temporal/001_set.in.sql` | 48 | 47 | 1 | 98% | 34 | 38 | +| `geo/078_tpoint_datagen.in.sql` | 0 | 0 | 0 | 0% | 1 | 0 | +| `temporal/001_set.in.sql` | 47 | 47 | 0 | 100% | 35 | 38 | | `temporal/002_set_ops.in.sql` | 11 | 11 | 0 | 100% | 0 | 176 | -| `temporal/003_span.in.sql` | 46 | 45 | 1 | 98% | 22 | 30 | +| `temporal/003_span.in.sql` | 45 | 45 | 0 | 100% | 23 | 30 | | `temporal/005_span_ops.in.sql` | 12 | 12 | 0 | 100% | 0 | 160 | -| `temporal/007_spanset.in.sql` | 61 | 60 | 1 | 98% | 20 | 30 | -| `temporal/009_spanset_ops.in.sql` | 14 | 13 | 1 | 93% | 0 | 280 | +| `temporal/007_spanset.in.sql` | 60 | 60 | 0 | 100% | 21 | 30 | +| `temporal/009_spanset_ops.in.sql` | 14 | 14 | 0 | 100% | 0 | 280 | | `temporal/015_span_aggfuncs.in.sql` | 0 | 0 | 0 | 0% | 10 | 0 | | `temporal/021_tbox.in.sql` | 52 | 52 | 0 | 100% | 8 | 21 | -| `temporal/022_temporal.in.sql` | 102 | 84 | 18 | 82% | 15 | 24 | +| `temporal/022_temporal.in.sql` | 101 | 101 | 0 | 100% | 16 | 24 | | `temporal/023_temporal_inout.in.sql` | 16 | 16 | 0 | 100% | 0 | 0 | -| `temporal/025_temporal_tile.in.sql` | 16 | 10 | 6 | 62% | 0 | 0 | +| `temporal/025_temporal_tile.in.sql` | 16 | 11 | 5 | 69% | 0 | 0 | | `temporal/026_tnumber_mathfuncs.in.sql` | 17 | 17 | 0 | 100% | 0 | 24 | | `temporal/028_tbool_boolops.in.sql` | 4 | 4 | 0 | 100% | 0 | 7 | | `temporal/029_ttext_textfuncs.in.sql` | 4 | 4 | 0 | 100% | 0 | 3 | @@ -70,164 +70,46 @@ Per-section counts: `Addressable` = MDB names minus PG-only helpers (see appendi | `temporal/040_temporal_aggfuncs.in.sql` | 0 | 0 | 0 | 0% | 40 | 0 | | `temporal/042_temporal_waggfuncs.in.sql` | 0 | 0 | 0 | 0% | 8 | 0 | | `temporal/046_temporal_analytics.in.sql` | 4 | 4 | 0 | 100% | 0 | 0 | -| **TOTAL (active)** | **960** | **867** | **93** | **90%** | **219** | — | +| **TOTAL (active)** | **943** | **929** | **14** | **99%** | **231** | — | ## Missing function names per active section -### `geo/050_geoset.in.sql` — 12 missing of 43 addressable (72% covered) - -- `geogsetFromBinary` -- `geogsetFromEWKB` -- `geogsetFromEWKT` -- `geogsetFromHexWKB` -- `geogsetFromText` -- `geomsetFromBinary` -- `geomsetFromEWKB` -- `geomsetFromEWKT` -- `geomsetFromHexWKB` -- `geomsetFromText` +### `geo/050_geoset.in.sql` — 1 missing of 42 addressable (98% covered) + - `transformPipeline` (2 overloads) -- `unnest` (2 overloads) -### `geo/051_stbox.in.sql` — 16 missing of 75 addressable (79% covered) +### `geo/051_stbox.in.sql` — 3 missing of 73 addressable (96% covered) -- `box2d` -- `box3d` -- `geodstboxT` (2 overloads) -- `geodstboxZ` -- `geodstboxZT` (2 overloads) - `geography` - `perimeter` - `quadSplit` -- `stboxFromHexWKB` -- `stboxT` (2 overloads) -- `stboxX` -- `stboxXT` (2 overloads) -- `stboxZ` -- `stboxZT` (2 overloads) -- `stbox_hash` -- `stbox_hash_extended` - -### `geo/052_tgeo.in.sql` — 6 missing of 70 addressable (91% covered) - -- `temporal_hash` (2 overloads) -- `tgeographySeqSet` (3 overloads) -- `tgeographySeqSetGaps` -- `tgeometrySeqSet` (3 overloads) -- `tgeometrySeqSetGaps` -- `unnest` (2 overloads) - -### `geo/052_tpoint.in.sql` — 4 missing of 70 addressable (94% covered) - -- `temporal_hash` (2 overloads) -- `tgeogpointSeqSetGaps` -- `tgeompointSeqSetGaps` -- `unnest` (2 overloads) - -### `geo/056_tgeo_spatialfuncs.in.sql` — 2 missing of 17 addressable (88% covered) - -- `tCentroid` + +### `geo/056_tgeo_spatialfuncs.in.sql` — 1 missing of 16 addressable (94% covered) + - `transformPipeline` (2 overloads) -### `geo/056_tpoint_spatialfuncs.in.sql` — 6 missing of 30 addressable (80% covered) +### `geo/056_tpoint_spatialfuncs.in.sql` — 1 missing of 28 addressable (96% covered) -- `atElevation` -- `bearing` (8 overloads) -- `minusElevation` -- `tdirection` (2 overloads) - `transformPipeline` (3 overloads) -- `transform_gk` (2 overloads) -### `geo/058_tgeo_tile.in.sql` — 3 missing of 5 addressable (40% covered) +### `geo/058_tgeo_tile.in.sql` — 1 missing of 5 addressable (80% covered) -- `spaceSplit` (3 overloads) -- `spaceTimeSplit` (3 overloads) - `timeBoxes` -### `geo/058_tpoint_tile.in.sql` — 3 missing of 11 addressable (73% covered) +### `geo/058_tpoint_tile.in.sql` — 1 missing of 11 addressable (91% covered) -- `spaceSplit` (3 overloads) -- `spaceTimeSplit` (3 overloads) - `timeBoxes` -### `geo/060_tgeo_boxops.in.sql` — 3 missing of 13 addressable (77% covered) - -- `splitEachNStboxes` (2 overloads) -- `splitNStboxes` (2 overloads) -- `stboxes` (2 overloads) - -### `geo/060_tpoint_boxops.in.sql` — 3 missing of 13 addressable (77% covered) - -- `splitEachNStboxes` (4 overloads) -- `splitNStboxes` (4 overloads) -- `stboxes` (4 overloads) - -### `geo/070_tgeo_spatialrels.in.sql` — 3 missing of 14 addressable (79% covered) - -- `_edisjoint` (6 overloads) -- `aCovers` (3 overloads) -- `eCovers` (3 overloads) - -### `geo/070_tpoint_spatialrels.in.sql` — 1 missing of 12 addressable (92% covered) - -- `_edisjoint` (6 overloads) - -### `geo/072_tgeo_tempspatialrels.in.sql` — 1 missing of 6 addressable (83% covered) - -- `tCovers` (3 overloads) - ### `geo/076_tpoint_analytics.in.sql` — 1 missing of 18 addressable (94% covered) - `geography` (2 overloads) -### `geo/078_tpoint_datagen.in.sql` — 1 missing of 1 addressable (0% covered) - -- `create_trip` - -### `temporal/001_set.in.sql` — 1 missing of 48 addressable (98% covered) - -- `unnest` (6 overloads) - -### `temporal/003_span.in.sql` — 1 missing of 46 addressable (98% covered) - -- `range` (4 overloads) - -### `temporal/007_spanset.in.sql` — 1 missing of 61 addressable (98% covered) - -- `multirange` (4 overloads) - -### `temporal/009_spanset_ops.in.sql` — 1 missing of 14 addressable (93% covered) - -- `time_distance` (5 overloads) - -### `temporal/022_temporal.in.sql` — 18 missing of 102 addressable (82% covered) - -- `tboolInst` -- `tboolSeq` (2 overloads) -- `tboolSeqSet` (2 overloads) -- `tboolSeqSetGaps` -- `temporal_hash` (4 overloads) -- `tfloatInst` -- `tfloatSeq` (2 overloads) -- `tfloatSeqSet` (2 overloads) -- `tfloatSeqSetGaps` -- `tintInst` -- `tintSeq` (2 overloads) -- `tintSeqSet` (2 overloads) -- `tintSeqSetGaps` -- `ttextInst` -- `ttextSeq` (2 overloads) -- `ttextSeqSet` (2 overloads) -- `ttextSeqSetGaps` -- `unnest` (3 overloads) - -### `temporal/025_temporal_tile.in.sql` — 6 missing of 16 addressable (62% covered) +### `temporal/025_temporal_tile.in.sql` — 5 missing of 16 addressable (69% covered) - `timeBins` (4 overloads) - `timeBoxes` (2 overloads) - `valueBins` (2 overloads) - `valueBoxes` (2 overloads) -- `valueSplit` (2 overloads) - `valueTimeBoxes` (2 overloads) ## Appendix B — Out of scope (PG-only, no DuckDB equivalent) @@ -254,18 +136,22 @@ These entries are PG-specific helpers — index opclasses, aggregate transition/ | Section | PG helpers | |---|---:| | `geo/050_geoset.in.sql` | 13 | -| `geo/051_stbox.in.sql` | 8 | -| `geo/052_tgeo.in.sql` | 10 | -| `geo/052_tpoint.in.sql` | 8 | +| `geo/051_stbox.in.sql` | 10 | +| `geo/052_tgeo.in.sql` | 11 | +| `geo/052_tpoint.in.sql` | 9 | | `geo/054_tgeo_compops.in.sql` | 1 | +| `geo/056_tpoint_spatialfuncs.in.sql` | 1 | | `geo/068_tgeo_aggfuncs.in.sql` | 9 | | `geo/068_tpoint_aggfuncs.in.sql` | 12 | -| `temporal/001_set.in.sql` | 34 | -| `temporal/003_span.in.sql` | 22 | -| `temporal/007_spanset.in.sql` | 20 | +| `geo/070_tgeo_spatialrels.in.sql` | 1 | +| `geo/070_tpoint_spatialrels.in.sql` | 1 | +| `geo/078_tpoint_datagen.in.sql` | 1 | +| `temporal/001_set.in.sql` | 35 | +| `temporal/003_span.in.sql` | 23 | +| `temporal/007_spanset.in.sql` | 21 | | `temporal/015_span_aggfuncs.in.sql` | 10 | | `temporal/021_tbox.in.sql` | 8 | -| `temporal/022_temporal.in.sql` | 15 | +| `temporal/022_temporal.in.sql` | 16 | | `temporal/030_temporal_compops.in.sql` | 1 | | `temporal/040_temporal_aggfuncs.in.sql` | 40 | | `temporal/042_temporal_waggfuncs.in.sql` | 8 | @@ -278,19 +164,19 @@ These families (cbuffer, npoint, pose, rgeo) are deferred until the active tempo |---|---:|---:|---:|---:| | `cbuffer/150_cbuffer.in.sql` | 31 | 7 | 24 | 23% | | `cbuffer/151_cbufferset.in.sql` | 42 | 32 | 10 | 76% | -| `cbuffer/152_tcbuffer.in.sql` | 84 | 65 | 19 | 77% | +| `cbuffer/152_tcbuffer.in.sql` | 84 | 66 | 18 | 79% | | `cbuffer/154_tcbuffer_compops.in.sql` | 6 | 6 | 0 | 100% | -| `cbuffer/155_tcbuffer_spatialfuncs.in.sql` | 11 | 8 | 3 | 73% | +| `cbuffer/155_tcbuffer_spatialfuncs.in.sql` | 9 | 6 | 3 | 67% | | `cbuffer/158_tcbuffer_topops.in.sql` | 7 | 7 | 0 | 100% | | `cbuffer/159_tcbuffer_posops.in.sql` | 12 | 12 | 0 | 100% | | `cbuffer/160_tcbuffer_distance.in.sql` | 5 | 4 | 1 | 80% | | `cbuffer/161_tcbuffer_aggfuncs.in.sql` | 7 | 0 | 7 | 0% | -| `cbuffer/162_tcbuffer_spatialrels.in.sql` | 13 | 11 | 2 | 85% | -| `cbuffer/164_tcbuffer_tempspatialrels.in.sql` | 6 | 5 | 1 | 83% | +| `cbuffer/162_tcbuffer_spatialrels.in.sql` | 13 | 13 | 0 | 100% | +| `cbuffer/164_tcbuffer_tempspatialrels.in.sql` | 6 | 6 | 0 | 100% | | `cbuffer/166_tcbuffer_indexes.in.sql` | 1 | 0 | 1 | 0% | | `npoint/081_npoint.in.sql` | 41 | 8 | 33 | 20% | | `npoint/082_npointset.in.sql` | 43 | 30 | 13 | 70% | -| `npoint/083_tnpoint.in.sql` | 77 | 61 | 16 | 79% | +| `npoint/083_tnpoint.in.sql` | 77 | 62 | 15 | 81% | | `npoint/085_tnpoint_compops.in.sql` | 6 | 6 | 0 | 100% | | `npoint/087_tnpoint_spatialfuncs.in.sql` | 12 | 11 | 1 | 92% | | `npoint/089_tnpoint_topops.in.sql` | 7 | 7 | 0 | 100% | @@ -302,7 +188,7 @@ These families (cbuffer, npoint, pose, rgeo) are deferred until the active tempo | `npoint/098_tnpoint_indexes.in.sql` | 1 | 0 | 1 | 0% | | `pose/100_pose.in.sql` | 34 | 10 | 24 | 29% | | `pose/101_poseset.in.sql` | 46 | 33 | 13 | 72% | -| `pose/102_tpose.in.sql` | 85 | 64 | 21 | 75% | +| `pose/102_tpose.in.sql` | 84 | 65 | 19 | 77% | | `pose/104_tpose_compops.in.sql` | 6 | 6 | 0 | 100% | | `pose/105_tpose_spatialfuncs.in.sql` | 8 | 7 | 1 | 88% | | `pose/108_tpose_topops.in.sql` | 7 | 7 | 0 | 100% | @@ -310,17 +196,14 @@ These families (cbuffer, npoint, pose, rgeo) are deferred until the active tempo | `pose/111_tpose_aggfuncs.in.sql` | 7 | 0 | 7 | 0% | | `pose/113_tpose_distance.in.sql` | 4 | 4 | 0 | 100% | | `pose/114_tpose_indexes.in.sql` | 1 | 0 | 1 | 0% | -| `rgeo/122_trgeo.in.sql` | 95 | 75 | 20 | 79% | +| `rgeo/122_trgeo.in.sql` | 83 | 65 | 18 | 78% | | `rgeo/124_trgeo_compops.in.sql` | 6 | 6 | 0 | 100% | -| `rgeo/125_trgeo_spatialfuncs.in.sql` | 8 | 7 | 1 | 88% | -| `rgeo/126_trgeo_tile.in.sql` | 3 | 3 | 0 | 100% | -| `rgeo/127_trgeo_boxops.in.sql` | 13 | 8 | 5 | 62% | +| `rgeo/125_trgeo_spatialfuncs.in.sql` | 4 | 3 | 1 | 75% | | `rgeo/128_trgeo_topops.in.sql` | 5 | 5 | 0 | 100% | -| `rgeo/129_trgeo_posops.in.sql` | 16 | 16 | 0 | 100% | -| `rgeo/131_trgeo_aggfuncs.in.sql` | 8 | 0 | 8 | 0% | -| `rgeo/132_trgeo_similarity.in.sql` | 5 | 5 | 0 | 100% | +| `rgeo/129_trgeo_posops.in.sql` | 12 | 12 | 0 | 100% | +| `rgeo/131_trgeo_aggfuncs.in.sql` | 7 | 0 | 7 | 0% | | `rgeo/133_trgeo_distance.in.sql` | 4 | 4 | 0 | 100% | | `rgeo/133_trgeo_vclip.in.sql` | 6 | 0 | 6 | 0% | | `rgeo/134_trgeo_indexes.in.sql` | 1 | 0 | 1 | 0% | -| **TOTAL (deferred)** | **827** | **572** | **255** | **69%** | +| **TOTAL (deferred)** | **782** | **542** | **240** | **69%** | diff --git a/scripts/parity-audit.py b/scripts/parity-audit.py index 90ce34a3..a6193d75 100755 --- a/scripts/parity-audit.py +++ b/scripts/parity-audit.py @@ -60,6 +60,21 @@ # Function-name suffixes that mark PG-only helpers (no DuckDB analog). # Matched against the tail of the function name, case-insensitive. +OUT_OF_SCOPE_NAMES = { + # PG-specific types — DuckDB has no equivalent. + "box2d", "box3d", # PostGIS bbox types + "range", "multirange", # PG range types — DuckDB uses LIST + # DuckDB built-in. `unnest(LIST)` is a core SQL keyword in DuckDB, + # not registrable as a UDF. + "unnest", + # External-system bridges with no DuckDB equivalent. + "transform_gk", # SECONDO platform connector + "create_trip", # BerlinMOD synthetic-trajectory generator + # Removed in MobilityDB upstream; no longer carried as a parity target. + "_edisjoint", +} + + OUT_OF_SCOPE_NAME_SUFFIXES = ( # Aggregate plumbing — user-facing aggregate name is what we register. "_transfn", @@ -84,8 +99,11 @@ def is_out_of_scope_name(fname): - """Return True for PG-only helper names (suffix match).""" + """Return True for PG-only helper names (suffix match) or for the + explicit out-of-scope names listed above.""" lower = fname.lower() + if lower in OUT_OF_SCOPE_NAMES: + return True # All suffixes start with `_`, so a non-empty prefix means the suffix # matched a "_" shape (e.g. tnumber_in, temporal_sel). for suf in OUT_OF_SCOPE_NAME_SUFFIXES: @@ -100,9 +118,20 @@ def is_out_of_scope_name(fname): ) CREATE_OP_RE = re.compile(r"CREATE\s+OPERATOR\s+(\S+)\s*\(", re.IGNORECASE) -REGISTER_SCALAR_RE = re.compile(r'ScalarFunction\s*\(\s*"([^"]+)"', re.IGNORECASE) -REGISTER_AGGR_RE = re.compile(r'AggregateFunction\s*\(\s*"([^"]+)"') -REGISTER_TABLE_RE = re.compile(r'TableFunction\s*\(\s*"([^"]+)"') +# Strip SQL `--` line comments before matching, so that +# `-- CREATE FUNCTION tdirection(...)` placeholder lines do not +# inflate the missing-functions list. +SQL_LINE_COMMENT_RE = re.compile(r"--[^\n]*") + +# Match both the direct-call form (`ScalarFunction("name", …)`) and +# the variable-declaration form (`TableFunction fn("name", …)` / +# `ScalarFunction sf("name", …)`). The `(?:[A-Za-z_]\w*\s+)?` cluster +# eats an optional variable name (no capture) before the open paren so +# table-function names declared as locals (e.g. valueSplit, spaceSplit, +# spaceTimeSplit, tempUnnest, SetUnnest) are still picked up. +REGISTER_SCALAR_RE = re.compile(r'ScalarFunction\s+(?:[A-Za-z_]\w*)?\s*\(\s*"([^"]+)"|ScalarFunction\s*\(\s*"([^"]+)"', re.IGNORECASE) +REGISTER_AGGR_RE = re.compile(r'AggregateFunction\s+(?:[A-Za-z_]\w*)?\s*\(\s*"([^"]+)"|AggregateFunction\s*\(\s*"([^"]+)"') +REGISTER_TABLE_RE = re.compile(r'TableFunction\s+(?:[A-Za-z_]\w*)?\s*\(\s*"([^"]+)"|TableFunction\s*\(\s*"([^"]+)"') # Project macros that wrap registration calls under a fixed-name first # argument (e.g. `REG_EA("ever_eq", Ever_eq)` registers "ever_eq" via a @@ -127,6 +156,12 @@ def is_out_of_scope_name(fname): # Per-subtype constructors registered through the # TemporalTypes::RegisterScalarFunctions loop. "tbool", "tint", "tfloat", "ttext", + # Per-subtype constructor names registered via the same loop + # (alias + "Inst" / "Seq" / "SeqSet" / "SeqSetGaps"). + "tboolInst", "tboolSeq", "tboolSeqSet", "tboolSeqSetGaps", + "tintInst", "tintSeq", "tintSeqSet", "tintSeqSetGaps", + "tfloatInst","tfloatSeq","tfloatSeqSet","tfloatSeqSetGaps", + "ttextInst", "ttextSeq", "ttextSeqSet", "ttextSeqSetGaps", # Accessors registered through RegisterTemporalDatumAccessor. "minValue", "maxValue", "getValue", "startValue", "endValue", # Binary / HexWKB / MFJSON parsers registered through @@ -174,6 +209,7 @@ def collect_mobilitydb(mdb_root): rel = os.path.relpath(sql, sql_root) with open(sql) as f: text = f.read() + text = SQL_LINE_COMMENT_RE.sub("", text) funcs = collections.Counter() for m in CREATE_FUNC_RE.finditer(text): funcs[m.group(1)] += 1 @@ -198,8 +234,12 @@ def collect_mobilityduck(mduck_root): for regex in (REGISTER_SCALAR_RE, REGISTER_AGGR_RE, REGISTER_TABLE_RE, REGISTER_MACRO_RE): for m in regex.finditer(text): - funcs[m.group(1)] += 1 - files_for_func[m.group(1)].add(rel) + # Alternation produces multiple groups; use the first non-empty one. + name = next((g for g in m.groups() if g), None) + if not name: + continue + funcs[name] += 1 + files_for_func[name].add(rel) # Synthesize known dynamically-registered names so the audit # reflects reality (see DYNAMIC_REGISTERED comment above). for name in DYNAMIC_REGISTERED: diff --git a/src/geo/geoset.cpp b/src/geo/geoset.cpp index 05ab1a6a..700b6082 100644 --- a/src/geo/geoset.cpp +++ b/src/geo/geoset.cpp @@ -1,4 +1,5 @@ #include "geo/geoset.hpp" +#include "temporal/set_functions.hpp" #include "tydef.hpp" #include "geo_util.hpp" #include "duckdb/common/types/data_chunk.hpp" @@ -143,6 +144,44 @@ void SpatialSetType::RegisterScalarFunctions(ExtensionLoader &loader) { SpatialSetType::geomset(), SpatialSetFunctions::Geomset_constructor )); + + // Binary / EWKB / HexWKB / Text / EWKT parsers — route to the + // subtype-agnostic MEOS `set_from_wkb` / `set_from_hexwkb` / + // `set_in` dispatchers. The format encodes (or the caller-side + // basetype dictates) the target type. + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geomsetFromBinary", {LogicalType::BLOB}, SpatialSetType::geomset(), SetFunctions::Set_from_binary)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geomsetFromEWKB", {LogicalType::BLOB}, SpatialSetType::geomset(), SetFunctions::Set_from_binary)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geomsetFromHexWKB", {LogicalType::VARCHAR}, SpatialSetType::geomset(), SetFunctions::Set_from_hexwkb)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geomsetFromText", {LogicalType::VARCHAR}, SpatialSetType::geomset(), SpatialSetFunctions::Geomset_from_text)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geomsetFromEWKT", {LogicalType::VARCHAR}, SpatialSetType::geomset(), SpatialSetFunctions::Geomset_from_text)); + + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geogsetFromBinary", {LogicalType::BLOB}, SpatialSetType::geogset(), SetFunctions::Set_from_binary)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geogsetFromEWKB", {LogicalType::BLOB}, SpatialSetType::geogset(), SetFunctions::Set_from_binary)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geogsetFromHexWKB", {LogicalType::VARCHAR}, SpatialSetType::geogset(), SetFunctions::Set_from_hexwkb)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geogsetFromText", {LogicalType::VARCHAR}, SpatialSetType::geogset(), SpatialSetFunctions::Geogset_from_text)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geogsetFromEWKT", {LogicalType::VARCHAR}, SpatialSetType::geogset(), SpatialSetFunctions::Geogset_from_text)); + + // asBinary / asHexWKB for geomset / geogset — output side of the + // I/O round-trip. `set_as_wkb` / `set_as_hexwkb` are + // subtype-agnostic; the format encodes the source basetype. + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "asBinary", {SpatialSetType::geomset()}, LogicalType::BLOB, SetFunctions::Set_as_binary)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "asBinary", {SpatialSetType::geogset()}, LogicalType::BLOB, SetFunctions::Set_as_binary)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "asHexWKB", {SpatialSetType::geomset()}, LogicalType::VARCHAR, SetFunctions::Set_as_hexwkb)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "asHexWKB", {SpatialSetType::geogset()}, LogicalType::VARCHAR, SetFunctions::Set_as_hexwkb)); } // --- Constructor: set(LIST(GEOMETRY)) -> geomset --- @@ -211,6 +250,41 @@ bool SpatialSetFunctions::Text_to_geoset(Vector &source, Vector &result, idx_t c return true; } +// --- WKT/EWKT parsers --- +// `geomsetFromText` / `geomsetFromEWKT` route here when the result type +// is geomset; `geogsetFromText` / `geogsetFromEWKT` route via the +// geogset variant. `set_in` is the MEOS dispatcher that handles both +// WKT and EWKT input for spatial-set basetypes. + +namespace { + +inline void GeosetFromTextImpl(DataChunk &args, Vector &result, meosType basetype, const char *func_name) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](string_t input) -> string_t { + std::string s(input.GetData(), input.GetSize()); + Set *r = set_in(s.c_str(), basetype); + if (!r) { + throw InvalidInputException(std::string(func_name) + ": invalid input"); + } + size_t sz = set_mem_size(r); + string_t stored = StringVector::AddStringOrBlob( + result, string_t(reinterpret_cast(r), sz)); + free(r); + return stored; + }); +} + +} // namespace + +void SpatialSetFunctions::Geomset_from_text(DataChunk &args, ExpressionState &state, Vector &result) { + GeosetFromTextImpl(args, result, T_GEOMSET, "geomsetFromText/EWKT"); +} + +void SpatialSetFunctions::Geogset_from_text(DataChunk &args, ExpressionState &state, Vector &result) { + GeosetFromTextImpl(args, result, T_GEOGSET, "geogsetFromText/EWKT"); +} + // --- asText --- void SpatialSetFunctions::Spatialset_as_text(DataChunk &args, ExpressionState &state, Vector &result) { auto &input_vec = args.data[0]; diff --git a/src/geo/stbox.cpp b/src/geo/stbox.cpp index 3b667360..158511b1 100644 --- a/src/geo/stbox.cpp +++ b/src/geo/stbox.cpp @@ -4,6 +4,9 @@ #include "geo/stbox.hpp" #include "geo/stbox_functions.hpp" #include "geo/tgeompoint.hpp" +#include "geo/tgeogpoint.hpp" +#include "geo/tgeometry.hpp" +#include "geo/tgeography.hpp" #include "duckdb/common/types/blob.hpp" #include "duckdb/function/function.hpp" @@ -84,17 +87,16 @@ void StboxType::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); - // ExtensionUtil::RegisterFunction( - // instance, - // ScalarFunction( - // "stboxFromHexWKB", - // {LogicalType::VARCHAR}, - // STBOX(), - // StboxFunctions::Stbox_from_hexwkb - // ) - // ); + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction( + "stboxFromHexWKB", + {LogicalType::VARCHAR}, + STBOX(), + StboxFunctions::Stbox_from_hexwkb + ) + ); - duckdb::RegisterSerializedScalarFunction(loader, + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "asText", {STBOX()}, @@ -103,6 +105,52 @@ void StboxType::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); + /* Dimensional constructors — stboxX/Z/T/XT/ZT and the geodstbox* + * variants. All wrap MEOS stbox_make with the appropriate + * has-x/has-z/geodetic flags filled in. */ + { + const auto STB = STBOX(); + const auto D = LogicalType::DOUBLE; + const auto I = LogicalType::INTEGER; + const auto T = LogicalType::TIMESTAMP_TZ; + const auto SP = SpanTypes::TSTZSPAN(); + + // stboxX(xmin, xmax, ymin, ymax, srid) + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "stboxX", {D, D, D, D, I}, STB, StboxFunctions::Stbox_constructor_x)); + // stboxZ(xmin, xmax, ymin, ymax, zmin, zmax, srid) + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "stboxZ", {D, D, D, D, D, D, I}, STB, StboxFunctions::Stbox_constructor_z)); + // stboxT(timestamptz) and stboxT(tstzspan) + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "stboxT", {T}, STB, StboxFunctions::Stbox_constructor_t_ts)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "stboxT", {SP}, STB, StboxFunctions::Stbox_constructor_t_span)); + // stboxXT(xmin, xmax, ymin, ymax, ts|span, srid) + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "stboxXT", {D, D, D, D, T, I}, STB, StboxFunctions::Stbox_constructor_xt_ts)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "stboxXT", {D, D, D, D, SP, I}, STB, StboxFunctions::Stbox_constructor_xt_span)); + // stboxZT(xmin, xmax, ymin, ymax, zmin, zmax, ts|span, srid) + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "stboxZT", {D, D, D, D, D, D, T, I}, STB, StboxFunctions::Stbox_constructor_zt_ts)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "stboxZT", {D, D, D, D, D, D, SP, I}, STB, StboxFunctions::Stbox_constructor_zt_span)); + + // Geographic variants — geodetic flag set; SRID defaults to + // 4326 in the time-only forms (MobilityDB convention). + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geodstboxZ", {D, D, D, D, D, D, I}, STB, StboxFunctions::Geodstbox_constructor_z)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geodstboxT", {T}, STB, StboxFunctions::Geodstbox_constructor_t_ts)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geodstboxT", {SP}, STB, StboxFunctions::Geodstbox_constructor_t_span)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geodstboxZT", {D, D, D, D, D, D, T, I}, STB, StboxFunctions::Geodstbox_constructor_zt_ts)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "geodstboxZT", {D, D, D, D, D, D, SP, I}, STB, StboxFunctions::Geodstbox_constructor_zt_span)); + } + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "asBinary", @@ -112,15 +160,14 @@ void StboxType::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); - // ExtensionUtil::RegisterFunction( - // instance, - // ScalarFunction( - // "asHexWKB", - // {STBOX()}, - // LogicalType::VARCHAR, - // StboxFunctions::Stbox_as_hexwkb - // ) - // ); + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction( + "asHexWKB", + {STBOX()}, + LogicalType::VARCHAR, + StboxFunctions::Stbox_as_hexwkb + ) + ); duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( @@ -325,7 +372,7 @@ void StboxType::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); - duckdb::RegisterSerializedScalarFunction(loader, + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "volume", {STBOX()}, @@ -334,7 +381,19 @@ void StboxType::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); - duckdb::RegisterSerializedScalarFunction(loader, + // Hash functions — `stbox_hash(stbox) → INTEGER`, + // `stbox_hash_extended(stbox, seed) → BIGINT`. + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("stbox_hash", {STBOX()}, LogicalType::INTEGER, + StboxFunctions::Stbox_hash)); + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("stbox_hash_extended", {STBOX(), LogicalType::BIGINT}, + LogicalType::BIGINT, StboxFunctions::Stbox_hash_extended)); + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("SRID", {STBOX()}, LogicalType::INTEGER, + StboxFunctions::Stbox_srid)); + + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "shiftTime", {STBOX(), LogicalType::INTERVAL}, @@ -957,6 +1016,29 @@ void StboxType::RegisterScalarFunctions(ExtensionLoader &loader) { loader.RegisterFunction(ScalarFunction("spaceTimeBoxes", {P, D, D, D, I, G, TS}, LB, StboxFunctions::Tgeo_space_time_boxes)); loader.RegisterFunction(ScalarFunction("spaceTimeBoxes", {P, D, D, D, I, G, TS, BB}, LB, StboxFunctions::Tgeo_space_time_boxes)); + // Multi-entry bbox emitters: stboxes / splitNStboxes / + // splitEachNStboxes for tgeometry / tgeography / tgeompoint / + // tgeogpoint, plus the geometry / geography geo-side overloads. + const auto TGM = TGeometryTypes::TGEOMETRY(); + const auto TGG = TGeographyTypes::TGEOGRAPHY(); + const auto TGP = TgeogpointType::TGEOGPOINT(); + const auto INT32 = LogicalType::INTEGER; + loader.RegisterFunction(ScalarFunction("stboxes", {P}, LB, StboxFunctions::Tspatial_stboxes)); + loader.RegisterFunction(ScalarFunction("stboxes", {TGP}, LB, StboxFunctions::Tspatial_stboxes)); + loader.RegisterFunction(ScalarFunction("stboxes", {TGM}, LB, StboxFunctions::Tspatial_stboxes)); + loader.RegisterFunction(ScalarFunction("stboxes", {TGG}, LB, StboxFunctions::Tspatial_stboxes)); + loader.RegisterFunction(ScalarFunction("stboxes", {G}, LB, StboxFunctions::Geo_stboxes)); + loader.RegisterFunction(ScalarFunction("splitNStboxes", {P, INT32}, LB, StboxFunctions::Tspatial_split_n_stboxes)); + loader.RegisterFunction(ScalarFunction("splitNStboxes", {TGP, INT32}, LB, StboxFunctions::Tspatial_split_n_stboxes)); + loader.RegisterFunction(ScalarFunction("splitNStboxes", {TGM, INT32}, LB, StboxFunctions::Tspatial_split_n_stboxes)); + loader.RegisterFunction(ScalarFunction("splitNStboxes", {TGG, INT32}, LB, StboxFunctions::Tspatial_split_n_stboxes)); + loader.RegisterFunction(ScalarFunction("splitNStboxes", {G, INT32}, LB, StboxFunctions::Geo_split_n_stboxes)); + loader.RegisterFunction(ScalarFunction("splitEachNStboxes", {P, INT32}, LB, StboxFunctions::Tspatial_split_each_n_stboxes)); + loader.RegisterFunction(ScalarFunction("splitEachNStboxes", {TGP, INT32}, LB, StboxFunctions::Tspatial_split_each_n_stboxes)); + loader.RegisterFunction(ScalarFunction("splitEachNStboxes", {TGM, INT32}, LB, StboxFunctions::Tspatial_split_each_n_stboxes)); + loader.RegisterFunction(ScalarFunction("splitEachNStboxes", {TGG, INT32}, LB, StboxFunctions::Tspatial_split_each_n_stboxes)); + loader.RegisterFunction(ScalarFunction("splitEachNStboxes", {G, INT32}, LB, StboxFunctions::Geo_split_each_n_stboxes)); + // getSpaceTile(point geometry, xsz, ysz, zsz[, sorigin]) loader.RegisterFunction(ScalarFunction("getSpaceTile", {G, D, D, D}, B, StboxFunctions::Stbox_get_space_tile)); loader.RegisterFunction(ScalarFunction("getSpaceTile", {G, D, D, D, G}, B, StboxFunctions::Stbox_get_space_tile)); diff --git a/src/geo/stbox_functions.cpp b/src/geo/stbox_functions.cpp index c661fd2d..65bc28a0 100644 --- a/src/geo/stbox_functions.cpp +++ b/src/geo/stbox_functions.cpp @@ -183,8 +183,11 @@ void StboxFunctions::Stbox_from_hexwkb(DataChunk &args, ExpressionState &state, UnaryExecutor::Execute( args.data[0], result, args.size(), [&](string_t input_hexwkb) -> string_t { - char *hexwkb = (char*)input_hexwkb.GetData(); - STBox *stbox = stbox_from_hexwkb(hexwkb); + // string_t::GetData() is not NUL-terminated; stbox_from_hexwkb() + // strlen()s its argument, so pass a NUL-terminated copy to avoid + // reading past the buffer (matches the sibling hex-WKB consumers). + std::string hexwkb = input_hexwkb.GetString(); + STBox *stbox = stbox_from_hexwkb(hexwkb.c_str()); if (!stbox) { throw InternalException("Failure in Stbox_from_hexwkb: unable to cast hexwkb to stbox"); return string_t(); @@ -306,6 +309,284 @@ void StboxFunctions::Stbox_as_hexwkb(DataChunk &args, ExpressionState &state, Ve * Constructor functions ****************************************************/ +namespace { + +// Pack a freshly-built STBox into a DuckDB blob and free the source. +inline string_t StboxToBlob(Vector &result, STBox *box) { + size_t sz = sizeof(STBox); + string_t stored = StringVector::AddStringOrBlob( + result, string_t(reinterpret_cast(box), sz)); + free(box); + return stored; +} + +// Build a Span (TimestampTz, single-instant or range) for the time +// component of stboxT / stboxXT / stboxZT. Caller frees. +inline Span *MakeTstzSpanInstant(timestamp_tz_t ts_duckdb) { + timestamp_tz_t ts_meos = DuckDBToMeosTimestamp(ts_duckdb); + return tstzspan_make((TimestampTz) ts_meos.value, + (TimestampTz) ts_meos.value, true, true); +} + +// Cast the input span blob into a heap-owned Span* the caller can pass +// directly to stbox_make. +inline Span *CopyTstzSpanFromBlob(string_t span_blob) { + if (span_blob.GetSize() < sizeof(Span)) + throw InvalidInputException("invalid TSTZSPAN blob"); + Span *s = (Span *)malloc(sizeof(Span)); + memcpy(s, span_blob.GetData(), sizeof(Span)); + return s; +} + +} // anonymous namespace + +void StboxFunctions::Stbox_constructor_x(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t count = args.size(); + args.data[0].Flatten(count); args.data[1].Flatten(count); + args.data[2].Flatten(count); args.data[3].Flatten(count); + args.data[4].Flatten(count); + auto xmin = FlatVector::GetData(args.data[0]); + auto xmax = FlatVector::GetData(args.data[1]); + auto ymin = FlatVector::GetData(args.data[2]); + auto ymax = FlatVector::GetData(args.data[3]); + auto srid = FlatVector::GetData(args.data[4]); + auto out = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + STBox *b = stbox_make(true, false, false, srid[i], + xmin[i], xmax[i], ymin[i], ymax[i], 0, 0, NULL); + if (!b) throw InvalidInputException("stboxX: stbox_make failed"); + out[i] = StboxToBlob(result, b); + } +} + +void StboxFunctions::Stbox_constructor_z(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t count = args.size(); + for (idx_t i = 0; i < args.ColumnCount(); i++) args.data[i].Flatten(count); + auto xmin = FlatVector::GetData(args.data[0]); + auto xmax = FlatVector::GetData(args.data[1]); + auto ymin = FlatVector::GetData(args.data[2]); + auto ymax = FlatVector::GetData(args.data[3]); + auto zmin = FlatVector::GetData(args.data[4]); + auto zmax = FlatVector::GetData(args.data[5]); + auto srid = FlatVector::GetData(args.data[6]); + auto out = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + STBox *b = stbox_make(true, true, false, srid[i], + xmin[i], xmax[i], ymin[i], ymax[i], + zmin[i], zmax[i], NULL); + if (!b) throw InvalidInputException("stboxZ: stbox_make failed"); + out[i] = StboxToBlob(result, b); + } +} + +void StboxFunctions::Stbox_constructor_t_ts(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](timestamp_tz_t ts) -> string_t { + Span *p = MakeTstzSpanInstant(ts); + STBox *b = stbox_make(false, false, false, 0, + 0, 0, 0, 0, 0, 0, p); + free(p); + if (!b) throw InvalidInputException("stboxT: stbox_make failed"); + return StboxToBlob(result, b); + }); +} + +void StboxFunctions::Stbox_constructor_t_span(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](string_t span_blob) -> string_t { + Span *p = CopyTstzSpanFromBlob(span_blob); + STBox *b = stbox_make(false, false, false, 0, + 0, 0, 0, 0, 0, 0, p); + free(p); + if (!b) throw InvalidInputException("stboxT: stbox_make failed"); + return StboxToBlob(result, b); + }); +} + +void StboxFunctions::Stbox_constructor_xt_ts(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t count = args.size(); + for (idx_t i = 0; i < args.ColumnCount(); i++) args.data[i].Flatten(count); + auto xmin = FlatVector::GetData(args.data[0]); + auto xmax = FlatVector::GetData(args.data[1]); + auto ymin = FlatVector::GetData(args.data[2]); + auto ymax = FlatVector::GetData(args.data[3]); + auto ts = FlatVector::GetData(args.data[4]); + auto srid = FlatVector::GetData(args.data[5]); + auto out = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + Span *p = MakeTstzSpanInstant(ts[i]); + STBox *b = stbox_make(true, false, false, srid[i], + xmin[i], xmax[i], ymin[i], ymax[i], 0, 0, p); + free(p); + if (!b) throw InvalidInputException("stboxXT: stbox_make failed"); + out[i] = StboxToBlob(result, b); + } +} + +void StboxFunctions::Stbox_constructor_xt_span(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t count = args.size(); + for (idx_t i = 0; i < args.ColumnCount(); i++) args.data[i].Flatten(count); + auto xmin = FlatVector::GetData(args.data[0]); + auto xmax = FlatVector::GetData(args.data[1]); + auto ymin = FlatVector::GetData(args.data[2]); + auto ymax = FlatVector::GetData(args.data[3]); + auto sp = FlatVector::GetData(args.data[4]); + auto srid = FlatVector::GetData(args.data[5]); + auto out = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + Span *p = CopyTstzSpanFromBlob(sp[i]); + STBox *b = stbox_make(true, false, false, srid[i], + xmin[i], xmax[i], ymin[i], ymax[i], 0, 0, p); + free(p); + if (!b) throw InvalidInputException("stboxXT: stbox_make failed"); + out[i] = StboxToBlob(result, b); + } +} + +void StboxFunctions::Stbox_constructor_zt_ts(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t count = args.size(); + for (idx_t i = 0; i < args.ColumnCount(); i++) args.data[i].Flatten(count); + auto xmin = FlatVector::GetData(args.data[0]); + auto xmax = FlatVector::GetData(args.data[1]); + auto ymin = FlatVector::GetData(args.data[2]); + auto ymax = FlatVector::GetData(args.data[3]); + auto zmin = FlatVector::GetData(args.data[4]); + auto zmax = FlatVector::GetData(args.data[5]); + auto ts = FlatVector::GetData(args.data[6]); + auto srid = FlatVector::GetData(args.data[7]); + auto out = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + Span *p = MakeTstzSpanInstant(ts[i]); + STBox *b = stbox_make(true, true, false, srid[i], + xmin[i], xmax[i], ymin[i], ymax[i], + zmin[i], zmax[i], p); + free(p); + if (!b) throw InvalidInputException("stboxZT: stbox_make failed"); + out[i] = StboxToBlob(result, b); + } +} + +void StboxFunctions::Stbox_constructor_zt_span(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t count = args.size(); + for (idx_t i = 0; i < args.ColumnCount(); i++) args.data[i].Flatten(count); + auto xmin = FlatVector::GetData(args.data[0]); + auto xmax = FlatVector::GetData(args.data[1]); + auto ymin = FlatVector::GetData(args.data[2]); + auto ymax = FlatVector::GetData(args.data[3]); + auto zmin = FlatVector::GetData(args.data[4]); + auto zmax = FlatVector::GetData(args.data[5]); + auto sp = FlatVector::GetData(args.data[6]); + auto srid = FlatVector::GetData(args.data[7]); + auto out = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + Span *p = CopyTstzSpanFromBlob(sp[i]); + STBox *b = stbox_make(true, true, false, srid[i], + xmin[i], xmax[i], ymin[i], ymax[i], + zmin[i], zmax[i], p); + free(p); + if (!b) throw InvalidInputException("stboxZT: stbox_make failed"); + out[i] = StboxToBlob(result, b); + } +} + +/* Geographic variants — geodetic=true. No geodstboxX (the 2D-only + * geodetic stbox is degenerate on a sphere; MobilityDB exposes + * geodstboxZ / geodstboxT / geodstboxZT only). */ + +void StboxFunctions::Geodstbox_constructor_z(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t count = args.size(); + for (idx_t i = 0; i < args.ColumnCount(); i++) args.data[i].Flatten(count); + auto xmin = FlatVector::GetData(args.data[0]); + auto xmax = FlatVector::GetData(args.data[1]); + auto ymin = FlatVector::GetData(args.data[2]); + auto ymax = FlatVector::GetData(args.data[3]); + auto zmin = FlatVector::GetData(args.data[4]); + auto zmax = FlatVector::GetData(args.data[5]); + auto srid = FlatVector::GetData(args.data[6]); + auto out = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + STBox *b = stbox_make(true, true, true, srid[i], + xmin[i], xmax[i], ymin[i], ymax[i], + zmin[i], zmax[i], NULL); + if (!b) throw InvalidInputException("geodstboxZ: stbox_make failed"); + out[i] = StboxToBlob(result, b); + } +} + +void StboxFunctions::Geodstbox_constructor_t_ts(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](timestamp_tz_t ts) -> string_t { + Span *p = MakeTstzSpanInstant(ts); + STBox *b = stbox_make(false, false, true, 4326, + 0, 0, 0, 0, 0, 0, p); + free(p); + if (!b) throw InvalidInputException("geodstboxT: stbox_make failed"); + return StboxToBlob(result, b); + }); +} + +void StboxFunctions::Geodstbox_constructor_t_span(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](string_t span_blob) -> string_t { + Span *p = CopyTstzSpanFromBlob(span_blob); + STBox *b = stbox_make(false, false, true, 4326, + 0, 0, 0, 0, 0, 0, p); + free(p); + if (!b) throw InvalidInputException("geodstboxT: stbox_make failed"); + return StboxToBlob(result, b); + }); +} + +void StboxFunctions::Geodstbox_constructor_zt_ts(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t count = args.size(); + for (idx_t i = 0; i < args.ColumnCount(); i++) args.data[i].Flatten(count); + auto xmin = FlatVector::GetData(args.data[0]); + auto xmax = FlatVector::GetData(args.data[1]); + auto ymin = FlatVector::GetData(args.data[2]); + auto ymax = FlatVector::GetData(args.data[3]); + auto zmin = FlatVector::GetData(args.data[4]); + auto zmax = FlatVector::GetData(args.data[5]); + auto ts = FlatVector::GetData(args.data[6]); + auto srid = FlatVector::GetData(args.data[7]); + auto out = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + Span *p = MakeTstzSpanInstant(ts[i]); + STBox *b = stbox_make(true, true, true, srid[i], + xmin[i], xmax[i], ymin[i], ymax[i], + zmin[i], zmax[i], p); + free(p); + if (!b) throw InvalidInputException("geodstboxZT: stbox_make failed"); + out[i] = StboxToBlob(result, b); + } +} + +void StboxFunctions::Geodstbox_constructor_zt_span(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t count = args.size(); + for (idx_t i = 0; i < args.ColumnCount(); i++) args.data[i].Flatten(count); + auto xmin = FlatVector::GetData(args.data[0]); + auto xmax = FlatVector::GetData(args.data[1]); + auto ymin = FlatVector::GetData(args.data[2]); + auto ymax = FlatVector::GetData(args.data[3]); + auto zmin = FlatVector::GetData(args.data[4]); + auto zmax = FlatVector::GetData(args.data[5]); + auto sp = FlatVector::GetData(args.data[6]); + auto srid = FlatVector::GetData(args.data[7]); + auto out = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + Span *p = CopyTstzSpanFromBlob(sp[i]); + STBox *b = stbox_make(true, true, true, srid[i], + xmin[i], xmax[i], ymin[i], ymax[i], + zmin[i], zmax[i], p); + free(p); + if (!b) throw InvalidInputException("geodstboxZT: stbox_make failed"); + out[i] = StboxToBlob(result, b); + } +} + void StboxFunctions::Geo_timestamptz_to_stbox(DataChunk &args, ExpressionState &state, Vector &result) { BinaryExecutor::ExecuteWithNulls( args.data[0], args.data[1], result, args.size(), @@ -1080,6 +1361,50 @@ void StboxFunctions::Stbox_area(DataChunk &args, ExpressionState &state, Vector ); } +/* *************************************************** + * Hash functions — `stbox_hash(stbox)` returns the PG-compatible + * 32-bit hash of the bbox; `stbox_hash_extended(stbox, seed)` returns + * the 64-bit extended hash with the caller-supplied seed. Both are + * needed for hash-equality predicates and hash partitioning. + ****************************************************/ +void StboxFunctions::Stbox_hash(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](string_t input_stbox) -> int32_t { + STBox *box = (STBox *) malloc(sizeof(STBox)); + memcpy(box, input_stbox.GetData(), sizeof(STBox)); + uint32_t h = stbox_hash(box); + free(box); + return static_cast(h); + }); + if (args.size() == 1) result.SetVectorType(VectorType::CONSTANT_VECTOR); +} + +void StboxFunctions::Stbox_hash_extended(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), + [&](string_t input_stbox, int64_t seed) -> int64_t { + STBox *box = (STBox *) malloc(sizeof(STBox)); + memcpy(box, input_stbox.GetData(), sizeof(STBox)); + uint64_t h = stbox_hash_extended(box, static_cast(seed)); + free(box); + return static_cast(h); + }); +} + +void StboxFunctions::Stbox_srid(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](string_t input_stbox) -> int32_t { + STBox *box = (STBox *) malloc(sizeof(STBox)); + memcpy(box, input_stbox.GetData(), sizeof(STBox)); + int32_t srid = stbox_srid(box); + free(box); + return srid; + }); + if (args.size() == 1) result.SetVectorType(VectorType::CONSTANT_VECTOR); +} + void StboxFunctions::Stbox_volume(DataChunk &args, ExpressionState &state, Vector &result) { UnaryExecutor::ExecuteWithNulls( args.data[0], result, args.size(), @@ -2892,6 +3217,168 @@ void StboxFunctions::Tgeo_space_time_boxes(DataChunk &args, ExpressionState &sta } } +/* *************************************************** + * Multi-entry bbox emitters — `stboxes`, `splitNStboxes`, + * `splitEachNStboxes`. All wrap MEOS's `tgeo_*` (Temporal *) or + * `geo_*` (GSERIALIZED *) emitters, returning an `stbox[]` of the + * computed bounding boxes. + ****************************************************/ + +void StboxFunctions::Tspatial_stboxes(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t row_count = args.size(); + args.data[0].Flatten(row_count); + auto in_temp = FlatVector::GetData(args.data[0]); + auto list_entries = FlatVector::GetData(result); + auto &out_validity = FlatVector::Validity(result); + idx_t total = 0; + for (idx_t row = 0; row < row_count; row++) { + if (!FlatVector::Validity(args.data[0]).RowIsValid(row)) { + out_validity.SetInvalid(row); + list_entries[row] = list_entry_t{total, 0}; + continue; + } + Temporal *temp = BlobToTempTile(in_temp[row]); + int count = 0; + STBox *boxes = tgeo_stboxes(temp, &count); + free(temp); + EmitStboxList(result, row, list_entries, boxes, count, total); + } + if (row_count == 1) result.SetVectorType(VectorType::CONSTANT_VECTOR); +} + +void StboxFunctions::Geo_stboxes(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t row_count = args.size(); + args.data[0].Flatten(row_count); + auto in_geo = FlatVector::GetData(args.data[0]); + auto list_entries = FlatVector::GetData(result); + auto &out_validity = FlatVector::Validity(result); + idx_t total = 0; + for (idx_t row = 0; row < row_count; row++) { + if (!FlatVector::Validity(args.data[0]).RowIsValid(row)) { + out_validity.SetInvalid(row); + list_entries[row] = list_entry_t{total, 0}; + continue; + } + GSERIALIZED *gs = GeometryToGSerialized(in_geo[row], 0); + if (!gs) { + out_validity.SetInvalid(row); + list_entries[row] = list_entry_t{total, 0}; + continue; + } + int count = 0; + STBox *boxes = geo_stboxes(gs, &count); + free(gs); + EmitStboxList(result, row, list_entries, boxes, count, total); + } + if (row_count == 1) result.SetVectorType(VectorType::CONSTANT_VECTOR); +} + +void StboxFunctions::Tspatial_split_n_stboxes(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t row_count = args.size(); + args.data[0].Flatten(row_count); + args.data[1].Flatten(row_count); + auto in_temp = FlatVector::GetData(args.data[0]); + auto in_n = FlatVector::GetData(args.data[1]); + auto list_entries = FlatVector::GetData(result); + auto &out_validity = FlatVector::Validity(result); + idx_t total = 0; + for (idx_t row = 0; row < row_count; row++) { + if (!FlatVector::Validity(args.data[0]).RowIsValid(row)) { + out_validity.SetInvalid(row); + list_entries[row] = list_entry_t{total, 0}; + continue; + } + Temporal *temp = BlobToTempTile(in_temp[row]); + int count = 0; + STBox *boxes = tgeo_split_n_stboxes(temp, in_n[row], &count); + free(temp); + EmitStboxList(result, row, list_entries, boxes, count, total); + } + if (row_count == 1) result.SetVectorType(VectorType::CONSTANT_VECTOR); +} + +void StboxFunctions::Tspatial_split_each_n_stboxes(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t row_count = args.size(); + args.data[0].Flatten(row_count); + args.data[1].Flatten(row_count); + auto in_temp = FlatVector::GetData(args.data[0]); + auto in_n = FlatVector::GetData(args.data[1]); + auto list_entries = FlatVector::GetData(result); + auto &out_validity = FlatVector::Validity(result); + idx_t total = 0; + for (idx_t row = 0; row < row_count; row++) { + if (!FlatVector::Validity(args.data[0]).RowIsValid(row)) { + out_validity.SetInvalid(row); + list_entries[row] = list_entry_t{total, 0}; + continue; + } + Temporal *temp = BlobToTempTile(in_temp[row]); + int count = 0; + STBox *boxes = tgeo_split_each_n_stboxes(temp, in_n[row], &count); + free(temp); + EmitStboxList(result, row, list_entries, boxes, count, total); + } + if (row_count == 1) result.SetVectorType(VectorType::CONSTANT_VECTOR); +} + +void StboxFunctions::Geo_split_n_stboxes(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t row_count = args.size(); + args.data[0].Flatten(row_count); + args.data[1].Flatten(row_count); + auto in_geo = FlatVector::GetData(args.data[0]); + auto in_n = FlatVector::GetData(args.data[1]); + auto list_entries = FlatVector::GetData(result); + auto &out_validity = FlatVector::Validity(result); + idx_t total = 0; + for (idx_t row = 0; row < row_count; row++) { + if (!FlatVector::Validity(args.data[0]).RowIsValid(row)) { + out_validity.SetInvalid(row); + list_entries[row] = list_entry_t{total, 0}; + continue; + } + GSERIALIZED *gs = GeometryToGSerialized(in_geo[row], 0); + if (!gs) { + out_validity.SetInvalid(row); + list_entries[row] = list_entry_t{total, 0}; + continue; + } + int count = 0; + STBox *boxes = geo_split_n_stboxes(gs, in_n[row], &count); + free(gs); + EmitStboxList(result, row, list_entries, boxes, count, total); + } + if (row_count == 1) result.SetVectorType(VectorType::CONSTANT_VECTOR); +} + +void StboxFunctions::Geo_split_each_n_stboxes(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t row_count = args.size(); + args.data[0].Flatten(row_count); + args.data[1].Flatten(row_count); + auto in_geo = FlatVector::GetData(args.data[0]); + auto in_n = FlatVector::GetData(args.data[1]); + auto list_entries = FlatVector::GetData(result); + auto &out_validity = FlatVector::Validity(result); + idx_t total = 0; + for (idx_t row = 0; row < row_count; row++) { + if (!FlatVector::Validity(args.data[0]).RowIsValid(row)) { + out_validity.SetInvalid(row); + list_entries[row] = list_entry_t{total, 0}; + continue; + } + GSERIALIZED *gs = GeometryToGSerialized(in_geo[row], 0); + if (!gs) { + out_validity.SetInvalid(row); + list_entries[row] = list_entry_t{total, 0}; + continue; + } + int count = 0; + STBox *boxes = geo_split_each_n_stboxes(gs, in_n[row], &count); + free(gs); + EmitStboxList(result, row, list_entries, boxes, count, total); + } + if (row_count == 1) result.SetVectorType(VectorType::CONSTANT_VECTOR); +} + void StboxFunctions::Stbox_get_space_tile(DataChunk &args, ExpressionState &state, Vector &result) { const idx_t row_count = args.size(); for (idx_t i = 0; i < args.ColumnCount(); i++) args.data[i].Flatten(row_count); diff --git a/src/geo/tgeogpoint.cpp b/src/geo/tgeogpoint.cpp index 8d687359..6cfdaf9d 100644 --- a/src/geo/tgeogpoint.cpp +++ b/src/geo/tgeogpoint.cpp @@ -212,6 +212,18 @@ void TgeogpointType::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); + // tgeogpointSeqSetGaps — geographic-distance variant of the gaps + // constructor. Three overloads. + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeogpointSeqSetGaps", {LogicalType::LIST(TGEOGPOINT())}, + TGEOGPOINT(), TemporalFunctions::Tsequenceset_constructor_gaps)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeogpointSeqSetGaps", {LogicalType::LIST(TGEOGPOINT()), LogicalType::INTERVAL}, + TGEOGPOINT(), TemporalFunctions::Tsequenceset_constructor_gaps)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeogpointSeqSetGaps", {LogicalType::LIST(TGEOGPOINT()), LogicalType::INTERVAL, LogicalType::DOUBLE}, + TGEOGPOINT(), TemporalFunctions::Tsequenceset_constructor_gaps)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "stbox", @@ -1605,6 +1617,16 @@ void TgeogpointType::RegisterScalarFunctions(ExtensionLoader &loader) { TgeompointFunctions::ShortestLine_tgeo_tgeo ) ); + + /* bearing — initial bearing in radians [0, 2π) for geographic points */ + { + const auto TG = TGEOGPOINT(); + const auto G = GeoTypes::GEOMETRY(); + const auto TF = TemporalTypes::TFLOAT(); + const auto D = LogicalType::DOUBLE; + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("bearing", {TG, G}, TF, TgeompointFunctions::Bearing_tpoint_geo)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("bearing", {G, TG}, TF, TgeompointFunctions::Bearing_geo_tpoint)); + } } /* *************************************************** diff --git a/src/geo/tgeogpoint_ops.cpp b/src/geo/tgeogpoint_ops.cpp index ab46adb5..80ffe8f9 100644 --- a/src/geo/tgeogpoint_ops.cpp +++ b/src/geo/tgeogpoint_ops.cpp @@ -233,9 +233,11 @@ void TgeoTgeoDistIntExec(DataChunk &args, ExpressionState &, Vector &result) { } // ==================================================================== -// Temporal-relation Temporal→Temporal helpers — `restr=false`, -// `atvalue=false` are the SQL defaults that produce a temporal value -// covering the whole input duration. +// Temporal-relation Temporal→Temporal helpers. The MEOS exports +// `t{contains,disjoint,intersects,touches,dwithin}_*` produce a tbool +// covering the whole input duration; restriction is composed at the +// call site when the SQL surface needs it (see Tcontains_geo_tgeo +// in tgeompoint_functions.cpp). // ==================================================================== diff --git a/src/geo/tgeography.cpp b/src/geo/tgeography.cpp index e9e3f73a..2487b9aa 100644 --- a/src/geo/tgeography.cpp +++ b/src/geo/tgeography.cpp @@ -1,5 +1,6 @@ #include "geo/tgeography.hpp" #include "geo/tgeompoint_functions.hpp" +#include "mobilityduck/meos_exec_serial.hpp" #include "duckdb/main/extension/extension_loader.hpp" #include "duckdb/common/extension_type_info.hpp" #include @@ -1082,13 +1083,31 @@ void TGeographyTypes::RegisterScalarFunctions(ExtensionLoader &loader) { loader.RegisterFunction( tgeographyseqarr_3params); auto tgeographyseqarr_4params = ScalarFunction( - "tgeographySeq", + "tgeographySeq", {LogicalType::LIST(TGeographyTypes::TGEOGRAPHY()), LogicalType::VARCHAR, LogicalType::BOOLEAN, LogicalType::BOOLEAN}, TGeographyTypes::TGEOGRAPHY(), Tgeography_sequence_constructor ); loader.RegisterFunction( tgeographyseqarr_4params); + // tgeographySeqSet — collect a list of tgeography values into a + // single TSequenceSet. + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeographySeqSet", {LogicalType::LIST(TGeographyTypes::TGEOGRAPHY())}, + TGeographyTypes::TGEOGRAPHY(), TemporalFunctions::Tsequenceset_constructor)); + + // tgeographySeqSetGaps — split into sequences at temporal or + // geographic-distance gaps. + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeographySeqSetGaps", {LogicalType::LIST(TGeographyTypes::TGEOGRAPHY())}, + TGeographyTypes::TGEOGRAPHY(), TemporalFunctions::Tsequenceset_constructor_gaps)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeographySeqSetGaps", {LogicalType::LIST(TGeographyTypes::TGEOGRAPHY()), LogicalType::INTERVAL}, + TGeographyTypes::TGEOGRAPHY(), TemporalFunctions::Tsequenceset_constructor_gaps)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeographySeqSetGaps", {LogicalType::LIST(TGeographyTypes::TGEOGRAPHY()), LogicalType::INTERVAL, LogicalType::DOUBLE}, + TGeographyTypes::TGEOGRAPHY(), TemporalFunctions::Tsequenceset_constructor_gaps)); + auto tgeography_to_timespan_function = ScalarFunction( "timeSpan", {TGeographyTypes::TGEOGRAPHY()}, diff --git a/src/geo/tgeography_ops.cpp b/src/geo/tgeography_ops.cpp index 321c3bb1..cc614cb1 100644 --- a/src/geo/tgeography_ops.cpp +++ b/src/geo/tgeography_ops.cpp @@ -234,9 +234,11 @@ void TgeoTgeoDistIntExec(DataChunk &args, ExpressionState &, Vector &result) { } // ==================================================================== -// Temporal-relation Temporal→Temporal helpers — `restr=false`, -// `atvalue=false` are the SQL defaults that produce a temporal value -// covering the whole input duration. +// Temporal-relation Temporal→Temporal helpers. The MEOS exports +// `t{contains,disjoint,intersects,touches,dwithin}_*` produce a tbool +// covering the whole input duration; restriction is composed at the +// call site when the SQL surface needs it (see Tcontains_geo_tgeo +// in tgeompoint_functions.cpp). // ==================================================================== @@ -977,6 +979,18 @@ void TGeographyOps::RegisterScalarFunctions(ExtensionLoader &loader) { REG_TCMP("temporal_teq", Teq) REG_TCMP("temporal_tne", Tne) #undef REG_TCMP + + // eCovers (BOOLEAN), aCovers (BOOLEAN) and tCovers (tbool) — + // covering relationships for tgeography. + loader.RegisterFunction(ScalarFunction("eCovers", {GEOM, TGEOM}, LogicalType::BOOLEAN, TgeompointFunctions::Ecovers_geo_tgeo)); + loader.RegisterFunction(ScalarFunction("eCovers", {TGEOM, GEOM}, LogicalType::BOOLEAN, TgeompointFunctions::Ecovers_tgeo_geo)); + loader.RegisterFunction(ScalarFunction("eCovers", {TGEOM, TGEOM}, LogicalType::BOOLEAN, TgeompointFunctions::Ecovers_tgeo_tgeo)); + loader.RegisterFunction(ScalarFunction("aCovers", {GEOM, TGEOM}, LogicalType::BOOLEAN, TgeompointFunctions::Acovers_geo_tgeo)); + loader.RegisterFunction(ScalarFunction("aCovers", {TGEOM, GEOM}, LogicalType::BOOLEAN, TgeompointFunctions::Acovers_tgeo_geo)); + loader.RegisterFunction(ScalarFunction("aCovers", {TGEOM, TGEOM}, LogicalType::BOOLEAN, TgeompointFunctions::Acovers_tgeo_tgeo)); + loader.RegisterFunction(ScalarFunction("tCovers", {GEOM, TGEOM}, TemporalTypes::TBOOL(), TgeompointFunctions::Tcovers_geo_tgeo)); + loader.RegisterFunction(ScalarFunction("tCovers", {TGEOM, GEOM}, TemporalTypes::TBOOL(), TgeompointFunctions::Tcovers_tgeo_geo)); + loader.RegisterFunction(ScalarFunction("tCovers", {TGEOM, TGEOM}, TemporalTypes::TBOOL(), TgeompointFunctions::Tcovers_tgeo_tgeo)); } } // namespace duckdb diff --git a/src/geo/tgeometry.cpp b/src/geo/tgeometry.cpp index 2076acb6..9359ac40 100644 --- a/src/geo/tgeometry.cpp +++ b/src/geo/tgeometry.cpp @@ -1083,13 +1083,31 @@ void TGeometryTypes::RegisterScalarFunctions(ExtensionLoader &loader) { duckdb::RegisterSerializedScalarFunction(loader, tgeometryseqarr_3params); auto tgeometryseqarr_4params = ScalarFunction( - "tgeometrySeq", + "tgeometrySeq", {LogicalType::LIST(TGeometryTypes::TGEOMETRY()), LogicalType::VARCHAR, LogicalType::BOOLEAN, LogicalType::BOOLEAN}, TGeometryTypes::TGEOMETRY(), Tgeometry_sequence_constructor ); duckdb::RegisterSerializedScalarFunction(loader, tgeometryseqarr_4params); + // tgeometrySeqSet — collect a list of tgeometry values into a + // single TSequenceSet. + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeometrySeqSet", {LogicalType::LIST(TGeometryTypes::TGEOMETRY())}, + TGeometryTypes::TGEOMETRY(), TemporalFunctions::Tsequenceset_constructor)); + + // tgeometrySeqSetGaps — split into sequences at temporal or + // 2D-distance gaps. + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeometrySeqSetGaps", {LogicalType::LIST(TGeometryTypes::TGEOMETRY())}, + TGeometryTypes::TGEOMETRY(), TemporalFunctions::Tsequenceset_constructor_gaps)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeometrySeqSetGaps", {LogicalType::LIST(TGeometryTypes::TGEOMETRY()), LogicalType::INTERVAL}, + TGeometryTypes::TGEOMETRY(), TemporalFunctions::Tsequenceset_constructor_gaps)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeometrySeqSetGaps", {LogicalType::LIST(TGeometryTypes::TGEOMETRY()), LogicalType::INTERVAL, LogicalType::DOUBLE}, + TGeometryTypes::TGEOMETRY(), TemporalFunctions::Tsequenceset_constructor_gaps)); + auto tgeometry_to_timespan_function = ScalarFunction( "timeSpan", {TGeometryTypes::TGEOMETRY()}, diff --git a/src/geo/tgeometry_ops.cpp b/src/geo/tgeometry_ops.cpp index 56d42cf8..e8085d68 100644 --- a/src/geo/tgeometry_ops.cpp +++ b/src/geo/tgeometry_ops.cpp @@ -234,9 +234,11 @@ void TgeoTgeoDistIntExec(DataChunk &args, ExpressionState &, Vector &result) { } // ==================================================================== -// Temporal-relation Temporal→Temporal helpers — `restr=false`, -// `atvalue=false` are the SQL defaults that produce a temporal value -// covering the whole input duration. +// Temporal-relation Temporal→Temporal helpers. The MEOS exports +// `t{contains,disjoint,intersects,touches,dwithin}_*` produce a tbool +// covering the whole input duration; restriction is composed at the +// call site when the SQL surface needs it (see Tcontains_geo_tgeo +// in tgeompoint_functions.cpp). // ==================================================================== @@ -974,6 +976,18 @@ void TGeometryOps::RegisterScalarFunctions(ExtensionLoader &loader) { REG_TCMP("temporal_teq", Teq) REG_TCMP("temporal_tne", Tne) #undef REG_TCMP + + // eCovers (BOOLEAN), aCovers (BOOLEAN) and tCovers (tbool) — + // covering relationships for tgeometry. + loader.RegisterFunction(ScalarFunction("eCovers", {GEOM, TGEOM}, LogicalType::BOOLEAN, TgeompointFunctions::Ecovers_geo_tgeo)); + loader.RegisterFunction(ScalarFunction("eCovers", {TGEOM, GEOM}, LogicalType::BOOLEAN, TgeompointFunctions::Ecovers_tgeo_geo)); + loader.RegisterFunction(ScalarFunction("eCovers", {TGEOM, TGEOM}, LogicalType::BOOLEAN, TgeompointFunctions::Ecovers_tgeo_tgeo)); + loader.RegisterFunction(ScalarFunction("aCovers", {GEOM, TGEOM}, LogicalType::BOOLEAN, TgeompointFunctions::Acovers_geo_tgeo)); + loader.RegisterFunction(ScalarFunction("aCovers", {TGEOM, GEOM}, LogicalType::BOOLEAN, TgeompointFunctions::Acovers_tgeo_geo)); + loader.RegisterFunction(ScalarFunction("aCovers", {TGEOM, TGEOM}, LogicalType::BOOLEAN, TgeompointFunctions::Acovers_tgeo_tgeo)); + loader.RegisterFunction(ScalarFunction("tCovers", {GEOM, TGEOM}, TemporalTypes::TBOOL(), TgeompointFunctions::Tcovers_geo_tgeo)); + loader.RegisterFunction(ScalarFunction("tCovers", {TGEOM, GEOM}, TemporalTypes::TBOOL(), TgeompointFunctions::Tcovers_tgeo_geo)); + loader.RegisterFunction(ScalarFunction("tCovers", {TGEOM, TGEOM}, TemporalTypes::TBOOL(), TgeompointFunctions::Tcovers_tgeo_tgeo)); } } // namespace duckdb diff --git a/src/geo/tgeompoint.cpp b/src/geo/tgeompoint.cpp index 7f783c2f..43787543 100644 --- a/src/geo/tgeompoint.cpp +++ b/src/geo/tgeompoint.cpp @@ -3,6 +3,9 @@ #include "common.hpp" #include "geo/tgeompoint.hpp" +#include "geo/tgeogpoint.hpp" +#include "geo/tgeometry.hpp" +#include "geo/tgeography.hpp" #include "geo/tgeompoint_functions.hpp" #include "geo/geoset.hpp" #include "temporal/temporal_functions.hpp" @@ -62,11 +65,20 @@ void TgeompointType::RegisterCastFunctions(ExtensionLoader &loader) { void TgeompointType::RegisterScalarFunctions(ExtensionLoader &loader) { + // PG-equality 32-bit hash for tgeompoint / tgeogpoint / + // tgeometry / tgeography — `temporal_hash` is subtype-agnostic. + for (const auto &t : {TGEOMPOINT(), TgeogpointType::TGEOGPOINT(), + TGeometryTypes::TGEOMETRY(), TGeographyTypes::TGEOGRAPHY()}) { + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("temporal_hash", {t}, LogicalType::INTEGER, + TemporalFunctions::Temporal_hash)); + } + /* *************************************************** * In/out functions ****************************************************/ - duckdb::RegisterSerializedScalarFunction(loader, + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "asText", {TGEOMPOINT()}, @@ -228,7 +240,7 @@ void TgeompointType::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); - duckdb::RegisterSerializedScalarFunction(loader, + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "tgeompointSeqSet", {LogicalType::LIST(TGEOMPOINT())}, @@ -237,7 +249,19 @@ void TgeompointType::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); - duckdb::RegisterSerializedScalarFunction(loader, + // tgeompointSeqSetGaps — split into sequences at temporal or + // spatial gaps. Three overloads (no maxt, maxt only, maxt + maxdist). + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeompointSeqSetGaps", {LogicalType::LIST(TGEOMPOINT())}, + TGEOMPOINT(), TemporalFunctions::Tsequenceset_constructor_gaps)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeompointSeqSetGaps", {LogicalType::LIST(TGEOMPOINT()), LogicalType::INTERVAL}, + TGEOMPOINT(), TemporalFunctions::Tsequenceset_constructor_gaps)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + "tgeompointSeqSetGaps", {LogicalType::LIST(TGEOMPOINT()), LogicalType::INTERVAL, LogicalType::DOUBLE}, + TGEOMPOINT(), TemporalFunctions::Tsequenceset_constructor_gaps)); + + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "stbox", {TGEOMPOINT()}, @@ -1189,6 +1213,25 @@ void TgeompointType::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction( + "minusGeometry", + {TGEOMPOINT(), GeoTypes::GEOMETRY(), SpanTypes::FLOATSPAN()}, + TGEOMPOINT(), + TgeompointFunctions::Tgeo_minus_geom + ) + ); + + // atElevation / minusElevation — orthogonal floatspan restriction. + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("atElevation", + {TGEOMPOINT(), SpanTypes::FLOATSPAN()}, TGEOMPOINT(), + TgeompointFunctions::Tpoint_at_elevation)); + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("minusElevation", + {TGEOMPOINT(), SpanTypes::FLOATSPAN()}, TGEOMPOINT(), + TgeompointFunctions::Tpoint_minus_elevation)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "atStbox", @@ -1255,7 +1298,7 @@ void TgeompointType::RegisterScalarFunctions(ExtensionLoader &loader) { /* *************************************************** * Spatial relationships ****************************************************/ - duckdb::RegisterSerializedScalarFunction(loader, + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "eContains", {GeoTypes::GEOMETRY(), TGEOMPOINT()}, @@ -1263,7 +1306,7 @@ void TgeompointType::RegisterScalarFunctions(ExtensionLoader &loader) { TgeompointFunctions::Econtains_geo_tgeo ) ); - duckdb::RegisterSerializedScalarFunction(loader, + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "aContains", {GeoTypes::GEOMETRY(), TGEOMPOINT()}, @@ -1271,6 +1314,36 @@ void TgeompointType::RegisterScalarFunctions(ExtensionLoader &loader) { TgeompointFunctions::Acontains_geo_tgeo ) ); + /* eCovers — covering relationships (returns BOOLEAN). */ + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("eCovers", + {GeoTypes::GEOMETRY(), TGEOMPOINT()}, LogicalType::BOOLEAN, + TgeompointFunctions::Ecovers_geo_tgeo)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("eCovers", + {TGEOMPOINT(), GeoTypes::GEOMETRY()}, LogicalType::BOOLEAN, + TgeompointFunctions::Ecovers_tgeo_geo)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("eCovers", + {TGEOMPOINT(), TGEOMPOINT()}, LogicalType::BOOLEAN, + TgeompointFunctions::Ecovers_tgeo_tgeo)); + /* tCovers — temporal covering relationships (returns tbool). */ + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("tCovers", + {GeoTypes::GEOMETRY(), TGEOMPOINT()}, TemporalTypes::TBOOL(), + TgeompointFunctions::Tcovers_geo_tgeo)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("tCovers", + {TGEOMPOINT(), GeoTypes::GEOMETRY()}, TemporalTypes::TBOOL(), + TgeompointFunctions::Tcovers_tgeo_geo)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("tCovers", + {TGEOMPOINT(), TGEOMPOINT()}, TemporalTypes::TBOOL(), + TgeompointFunctions::Tcovers_tgeo_tgeo)); + /* aCovers — always-covers (BOOLEAN). */ + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("aCovers", + {GeoTypes::GEOMETRY(), TGEOMPOINT()}, LogicalType::BOOLEAN, + TgeompointFunctions::Acovers_geo_tgeo)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("aCovers", + {TGEOMPOINT(), GeoTypes::GEOMETRY()}, LogicalType::BOOLEAN, + TgeompointFunctions::Acovers_tgeo_geo)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("aCovers", + {TGEOMPOINT(), TGEOMPOINT()}, LogicalType::BOOLEAN, + TgeompointFunctions::Acovers_tgeo_tgeo)); duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( @@ -1703,6 +1776,12 @@ void TgeompointType::RegisterScalarFunctions(ExtensionLoader &loader) { duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("tdistance", {TG, TG}, TF, TgeompointFunctions::Tdistance_named)); + /* bearing — initial bearing in radians [0, 2π) */ + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("bearing", {G, G}, D, TgeompointFunctions::Bearing_geo_geo)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("bearing", {TG, G}, TF, TgeompointFunctions::Bearing_tpoint_geo)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("bearing", {G, TG}, TF, TgeompointFunctions::Bearing_geo_tpoint)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("bearing", {TG, TG}, TF, TgeompointFunctions::Bearing_tpoint_tpoint)); + /* nearestApproachInstant */ duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("nearestApproachInstant", {TG, G}, TG, TgeompointFunctions::Nai_tgeo_geo)); duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("nearestApproachInstant", {G, TG}, TG, TgeompointFunctions::Nai_geo_tgeo)); diff --git a/src/geo/tgeompoint_functions.cpp b/src/geo/tgeompoint_functions.cpp index 20b851dd..4057c1f0 100644 --- a/src/geo/tgeompoint_functions.cpp +++ b/src/geo/tgeompoint_functions.cpp @@ -3662,4 +3662,287 @@ void TgeompointFunctions::Tgeo_scale_geom_origin(DataChunk &args, ExpressionStat } } + +void TgeompointFunctions::Bearing_geo_geo(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t g1_blob, string_t g2_blob, ValidityMask &mask, idx_t idx) -> double { + GSERIALIZED *g1 = GeometryToGSerialized(g1_blob, 0); + GSERIALIZED *g2 = GeometryToGSerialized(g2_blob, 0); + if (!g1 || !g2) { + if (g1) free(g1); + if (g2) free(g2); + throw InvalidInputException("bearing: invalid geometry input"); + } + double r = 0.0; + bool ok = bearing_point_point(g1, g2, &r); + free(g1); free(g2); + if (!ok) { mask.SetInvalid(idx); return 0.0; } + return r; + }); +} + +void TgeompointFunctions::Bearing_geo_tpoint(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t g_blob, string_t t_blob, ValidityMask &mask, idx_t idx) -> string_t { + uint8_t *t_copy = (uint8_t *)malloc(t_blob.GetSize()); + memcpy(t_copy, t_blob.GetData(), t_blob.GetSize()); + Temporal *t = reinterpret_cast(t_copy); + int32 srid = tspatial_srid(t); + GSERIALIZED *gs = GeometryToGSerialized(g_blob, srid); + if (!gs) { free(t); throw InvalidInputException("bearing: invalid geometry"); } + Temporal *r = bearing_tpoint_point(t, gs, true); + free(t); free(gs); + if (!r) { mask.SetInvalid(idx); return string_t(); } + size_t sz = temporal_mem_size(r); + string_t stored = StringVector::AddStringOrBlob( + result, string_t(reinterpret_cast(r), sz)); + free(r); + return stored; + }); +} + +void TgeompointFunctions::Bearing_tpoint_geo(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t t_blob, string_t g_blob, ValidityMask &mask, idx_t idx) -> string_t { + uint8_t *t_copy = (uint8_t *)malloc(t_blob.GetSize()); + memcpy(t_copy, t_blob.GetData(), t_blob.GetSize()); + Temporal *t = reinterpret_cast(t_copy); + int32 srid = tspatial_srid(t); + GSERIALIZED *gs = GeometryToGSerialized(g_blob, srid); + if (!gs) { free(t); throw InvalidInputException("bearing: invalid geometry"); } + Temporal *r = bearing_tpoint_point(t, gs, false); + free(t); free(gs); + if (!r) { mask.SetInvalid(idx); return string_t(); } + size_t sz = temporal_mem_size(r); + string_t stored = StringVector::AddStringOrBlob( + result, string_t(reinterpret_cast(r), sz)); + free(r); + return stored; + }); +} + +void TgeompointFunctions::Bearing_tpoint_tpoint(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t t1_blob, string_t t2_blob, ValidityMask &mask, idx_t idx) -> string_t { + uint8_t *c1 = (uint8_t *)malloc(t1_blob.GetSize()); + memcpy(c1, t1_blob.GetData(), t1_blob.GetSize()); + uint8_t *c2 = (uint8_t *)malloc(t2_blob.GetSize()); + memcpy(c2, t2_blob.GetData(), t2_blob.GetSize()); + Temporal *r = bearing_tpoint_tpoint( + reinterpret_cast(c1), reinterpret_cast(c2)); + free(c1); free(c2); + if (!r) { mask.SetInvalid(idx); return string_t(); } + size_t sz = temporal_mem_size(r); + string_t stored = StringVector::AddStringOrBlob( + result, string_t(reinterpret_cast(r), sz)); + free(r); + return stored; + }); +} + +void TgeompointFunctions::Acovers_geo_tgeo(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t g_blob, string_t t_blob, ValidityMask &mask, idx_t idx) -> bool { + uint8_t *t_copy = (uint8_t *)malloc(t_blob.GetSize()); + memcpy(t_copy, t_blob.GetData(), t_blob.GetSize()); + Temporal *t = reinterpret_cast(t_copy); + int32 srid = tspatial_srid(t); + GSERIALIZED *gs = GeometryToGSerialized(g_blob, srid); + if (!gs) { free(t); throw InvalidInputException("aCovers: invalid geometry"); } + Temporal *tcov = tcovers_geo_tgeo(gs, t); + free(t); free(gs); + if (!tcov) { mask.SetInvalid(idx); return false; } + Datum minv = temporal_min_value(tcov); + free(tcov); + return DatumGetBool(minv); + }); +} + +void TgeompointFunctions::Acovers_tgeo_geo(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t t_blob, string_t g_blob, ValidityMask &mask, idx_t idx) -> bool { + uint8_t *t_copy = (uint8_t *)malloc(t_blob.GetSize()); + memcpy(t_copy, t_blob.GetData(), t_blob.GetSize()); + Temporal *t = reinterpret_cast(t_copy); + int32 srid = tspatial_srid(t); + GSERIALIZED *gs = GeometryToGSerialized(g_blob, srid); + if (!gs) { free(t); throw InvalidInputException("aCovers: invalid geometry"); } + Temporal *tcov = tcovers_tgeo_geo(t, gs); + free(t); free(gs); + if (!tcov) { mask.SetInvalid(idx); return false; } + Datum minv = temporal_min_value(tcov); + free(tcov); + return DatumGetBool(minv); + }); +} + +void TgeompointFunctions::Acovers_tgeo_tgeo(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t t1_blob, string_t t2_blob, ValidityMask &mask, idx_t idx) -> bool { + uint8_t *c1 = (uint8_t *)malloc(t1_blob.GetSize()); + memcpy(c1, t1_blob.GetData(), t1_blob.GetSize()); + uint8_t *c2 = (uint8_t *)malloc(t2_blob.GetSize()); + memcpy(c2, t2_blob.GetData(), t2_blob.GetSize()); + Temporal *tcov = tcovers_tgeo_tgeo( + reinterpret_cast(c1), reinterpret_cast(c2)); + free(c1); free(c2); + if (!tcov) { mask.SetInvalid(idx); return false; } + Datum minv = temporal_min_value(tcov); + free(tcov); + return DatumGetBool(minv); + }); +} + +void TgeompointFunctions::Ecovers_geo_tgeo(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t g_blob, string_t t_blob, ValidityMask &mask, idx_t idx) -> bool { + uint8_t *t_copy = (uint8_t *)malloc(t_blob.GetSize()); + memcpy(t_copy, t_blob.GetData(), t_blob.GetSize()); + Temporal *t = reinterpret_cast(t_copy); + int32 srid = tspatial_srid(t); + GSERIALIZED *gs = GeometryToGSerialized(g_blob, srid); + if (!gs) { free(t); throw InvalidInputException("eCovers: invalid geometry"); } + int r = ecovers_geo_tgeo(gs, t); + free(t); free(gs); + if (r < 0) { mask.SetInvalid(idx); return false; } + return r != 0; + }); +} + +void TgeompointFunctions::Ecovers_tgeo_geo(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t t_blob, string_t g_blob, ValidityMask &mask, idx_t idx) -> bool { + uint8_t *t_copy = (uint8_t *)malloc(t_blob.GetSize()); + memcpy(t_copy, t_blob.GetData(), t_blob.GetSize()); + Temporal *t = reinterpret_cast(t_copy); + int32 srid = tspatial_srid(t); + GSERIALIZED *gs = GeometryToGSerialized(g_blob, srid); + if (!gs) { free(t); throw InvalidInputException("eCovers: invalid geometry"); } + int r = ecovers_tgeo_geo(t, gs); + free(t); free(gs); + if (r < 0) { mask.SetInvalid(idx); return false; } + return r != 0; + }); +} + +void TgeompointFunctions::Ecovers_tgeo_tgeo(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t t1_blob, string_t t2_blob, ValidityMask &mask, idx_t idx) -> bool { + uint8_t *c1 = (uint8_t *)malloc(t1_blob.GetSize()); + memcpy(c1, t1_blob.GetData(), t1_blob.GetSize()); + uint8_t *c2 = (uint8_t *)malloc(t2_blob.GetSize()); + memcpy(c2, t2_blob.GetData(), t2_blob.GetSize()); + int r = ecovers_tgeo_tgeo( + reinterpret_cast(c1), reinterpret_cast(c2)); + free(c1); free(c2); + if (r < 0) { mask.SetInvalid(idx); return false; } + return r != 0; + }); +} + +void TgeompointFunctions::Tcovers_geo_tgeo(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t g_blob, string_t t_blob, ValidityMask &mask, idx_t idx) -> string_t { + uint8_t *t_copy = (uint8_t *)malloc(t_blob.GetSize()); + memcpy(t_copy, t_blob.GetData(), t_blob.GetSize()); + Temporal *t = reinterpret_cast(t_copy); + int32 srid = tspatial_srid(t); + GSERIALIZED *gs = GeometryToGSerialized(g_blob, srid); + if (!gs) { free(t); throw InvalidInputException("tCovers: invalid geometry"); } + Temporal *r = tcovers_geo_tgeo(gs, t); + free(t); free(gs); + if (!r) { mask.SetInvalid(idx); return string_t(); } + size_t sz = temporal_mem_size(r); + string_t stored = StringVector::AddStringOrBlob( + result, string_t(reinterpret_cast(r), sz)); + free(r); + return stored; + }); +} + +void TgeompointFunctions::Tcovers_tgeo_geo(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t t_blob, string_t g_blob, ValidityMask &mask, idx_t idx) -> string_t { + uint8_t *t_copy = (uint8_t *)malloc(t_blob.GetSize()); + memcpy(t_copy, t_blob.GetData(), t_blob.GetSize()); + Temporal *t = reinterpret_cast(t_copy); + int32 srid = tspatial_srid(t); + GSERIALIZED *gs = GeometryToGSerialized(g_blob, srid); + if (!gs) { free(t); throw InvalidInputException("tCovers: invalid geometry"); } + Temporal *r = tcovers_tgeo_geo(t, gs); + free(t); free(gs); + if (!r) { mask.SetInvalid(idx); return string_t(); } + size_t sz = temporal_mem_size(r); + string_t stored = StringVector::AddStringOrBlob( + result, string_t(reinterpret_cast(r), sz)); + free(r); + return stored; + }); +} + +void TgeompointFunctions::Tcovers_tgeo_tgeo(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t t1_blob, string_t t2_blob, ValidityMask &mask, idx_t idx) -> string_t { + uint8_t *c1 = (uint8_t *)malloc(t1_blob.GetSize()); + memcpy(c1, t1_blob.GetData(), t1_blob.GetSize()); + uint8_t *c2 = (uint8_t *)malloc(t2_blob.GetSize()); + memcpy(c2, t2_blob.GetData(), t2_blob.GetSize()); + Temporal *r = tcovers_tgeo_tgeo( + reinterpret_cast(c1), reinterpret_cast(c2)); + free(c1); free(c2); + if (!r) { mask.SetInvalid(idx); return string_t(); } + size_t sz = temporal_mem_size(r); + string_t stored = StringVector::AddStringOrBlob( + result, string_t(reinterpret_cast(r), sz)); + free(r); + return stored; + }); +} + +inline string_t TpointElevationExec(string_t t_blob, string_t s_blob, ValidityMask &mask, idx_t idx, + Vector &result, Temporal *(*FN)(const Temporal *, const Span *)) { + uint8_t *t_copy = (uint8_t *) malloc(t_blob.GetSize()); + memcpy(t_copy, t_blob.GetData(), t_blob.GetSize()); + Temporal *t = reinterpret_cast(t_copy); + Span *s = (Span *) malloc(sizeof(Span)); + memcpy(s, s_blob.GetData(), sizeof(Span)); + Temporal *r = FN(t, s); + free(t); free(s); + if (!r) { mask.SetInvalid(idx); return string_t(); } + size_t sz = temporal_mem_size(r); + string_t stored = StringVector::AddStringOrBlob( + result, string_t(reinterpret_cast(r), sz)); + free(r); + return stored; +} + +void TgeompointFunctions::Tpoint_at_elevation(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t t_blob, string_t s_blob, ValidityMask &mask, idx_t idx) -> string_t { + return TpointElevationExec(t_blob, s_blob, mask, idx, result, tpoint_at_elevation); + }); +} + +void TgeompointFunctions::Tpoint_minus_elevation(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](string_t t_blob, string_t s_blob, ValidityMask &mask, idx_t idx) -> string_t { + return TpointElevationExec(t_blob, s_blob, mask, idx, result, tpoint_minus_elevation); + }); +} + } // namespace duckdb diff --git a/src/include/geo/geoset.hpp b/src/include/geo/geoset.hpp index c5f5e40e..a796f15e 100644 --- a/src/include/geo/geoset.hpp +++ b/src/include/geo/geoset.hpp @@ -26,7 +26,14 @@ struct SpatialSetFunctions{ //other static void Spatialset_as_text(DataChunk &args, ExpressionState &state, Vector &result); - static void Spatialset_as_ewkt(DataChunk &args, ExpressionState &state, Vector &result); + static void Spatialset_as_ewkt(DataChunk &args, ExpressionState &state, Vector &result); + /* Text/EWKT parsers — `geomsetFromText`, `geomsetFromEWKT`, + * `geogsetFromText`, `geogsetFromEWKT`. The MEOS `set_in` + * dispatcher accepts both WKT and EWKT for spatial-set basetypes, + * so a single executor covers all four entry points; the result + * type drives the basetype dispatch. */ + static void Geomset_from_text(DataChunk &args, ExpressionState &state, Vector &result); + static void Geogset_from_text(DataChunk &args, ExpressionState &state, Vector &result); static void Set_mem_size(DataChunk &args, ExpressionState &state, Vector &result); static void Spatialset_srid(DataChunk &args, ExpressionState &state, Vector &result); static void Spatialset_set_srid(DataChunk &args, ExpressionState &state, Vector &result_vec); diff --git a/src/include/geo/stbox_functions.hpp b/src/include/geo/stbox_functions.hpp index 2bd041f5..37f5285e 100644 --- a/src/include/geo/stbox_functions.hpp +++ b/src/include/geo/stbox_functions.hpp @@ -31,11 +31,27 @@ struct StboxFunctions { static void Stbox_as_hexwkb(DataChunk &args, ExpressionState &state, Vector &result); /* *************************************************** - * Constructor functions + * Dimensional constructor functions + * stboxX — 2D (xmin/xmax/ymin/ymax) + * stboxZ — 3D (xmin/xmax/ymin/ymax/zmin/zmax) + * stboxT — time-only + * stboxXT — 2D + time + * stboxZT — 3D + time + * geodstboxZ / geodstboxT / geodstboxZT — geodetic variants ****************************************************/ - // static void Stbox_constructor_x(DataChunk &args, ExpressionState &state, Vector &result); - // static void Stbox_constructor_z(DataChunk &args, ExpressionState &state, Vector &result); - // static void Stbox_constructor_t(DataChunk &args, ExpressionState &state, Vector &result); + static void Stbox_constructor_x(DataChunk &args, ExpressionState &state, Vector &result); + static void Stbox_constructor_z(DataChunk &args, ExpressionState &state, Vector &result); + static void Stbox_constructor_t_ts(DataChunk &args, ExpressionState &state, Vector &result); + static void Stbox_constructor_t_span(DataChunk &args, ExpressionState &state, Vector &result); + static void Stbox_constructor_xt_ts(DataChunk &args, ExpressionState &state, Vector &result); + static void Stbox_constructor_xt_span(DataChunk &args, ExpressionState &state, Vector &result); + static void Stbox_constructor_zt_ts(DataChunk &args, ExpressionState &state, Vector &result); + static void Stbox_constructor_zt_span(DataChunk &args, ExpressionState &state, Vector &result); + static void Geodstbox_constructor_z(DataChunk &args, ExpressionState &state, Vector &result); + static void Geodstbox_constructor_t_ts(DataChunk &args, ExpressionState &state, Vector &result); + static void Geodstbox_constructor_t_span(DataChunk &args, ExpressionState &state, Vector &result); + static void Geodstbox_constructor_zt_ts(DataChunk &args, ExpressionState &state, Vector &result); + static void Geodstbox_constructor_zt_span(DataChunk &args, ExpressionState &state, Vector &result); static void Geo_timestamptz_to_stbox(DataChunk &args, ExpressionState &state, Vector &result); static void Geo_tstzspan_to_stbox(DataChunk &args, ExpressionState &state, Vector &result); @@ -80,6 +96,9 @@ struct StboxFunctions { static void Stbox_tmax_inc(DataChunk &args, ExpressionState &state, Vector &result); static void Stbox_area(DataChunk &args, ExpressionState &state, Vector &result); static void Stbox_volume(DataChunk &args, ExpressionState &state, Vector &result); + static void Stbox_hash(DataChunk &args, ExpressionState &state, Vector &result); + static void Stbox_hash_extended(DataChunk &args, ExpressionState &state, Vector &result); + static void Stbox_srid(DataChunk &args, ExpressionState &state, Vector &result); // TODO static void Stbox_perimeter(DataChunk &args, ExpressionState &state, Vector &result); /* *************************************************** * Transformation functions @@ -162,6 +181,16 @@ struct StboxFunctions { static void Stbox_space_time_tiles(DataChunk &args, ExpressionState &state, Vector &result); static void Tgeo_space_boxes(DataChunk &args, ExpressionState &state, Vector &result); static void Tgeo_space_time_boxes(DataChunk &args, ExpressionState &state, Vector &result); + /* Multi-entry bbox emitters — `stboxes(t)`, `splitNStboxes(t, n)`, + * `splitEachNStboxes(t, n)` for tgeometry/tgeography/tgeompoint/ + * tgeogpoint and the geometry/geography geo-side overloads. + * Each emits an `stbox[]` for downstream multi-entry indexes. */ + static void Tspatial_stboxes(DataChunk &args, ExpressionState &state, Vector &result); + static void Geo_stboxes(DataChunk &args, ExpressionState &state, Vector &result); + static void Tspatial_split_n_stboxes(DataChunk &args, ExpressionState &state, Vector &result); + static void Tspatial_split_each_n_stboxes(DataChunk &args, ExpressionState &state, Vector &result); + static void Geo_split_n_stboxes(DataChunk &args, ExpressionState &state, Vector &result); + static void Geo_split_each_n_stboxes(DataChunk &args, ExpressionState &state, Vector &result); static void Stbox_get_space_tile(DataChunk &args, ExpressionState &state, Vector &result); static void Stbox_get_time_tile(DataChunk &args, ExpressionState &state, Vector &result); static void Stbox_get_space_time_tile(DataChunk &args, ExpressionState &state, Vector &result); diff --git a/src/include/geo/tgeompoint_functions.hpp b/src/include/geo/tgeompoint_functions.hpp index 1f5b1eb8..dc005046 100644 --- a/src/include/geo/tgeompoint_functions.hpp +++ b/src/include/geo/tgeompoint_functions.hpp @@ -134,10 +134,26 @@ struct TgeompointFunctions { static void Adwithin_tgeo_tgeo(DataChunk &args, ExpressionState &state, Vector &result); static void Adwithin_tgeo_geo(DataChunk &args, ExpressionState &state, Vector &result); static void Adwithin_geo_tgeo(DataChunk &args, ExpressionState &state, Vector &result); + static void Ecovers_geo_tgeo(DataChunk &args, ExpressionState &state, Vector &result); + static void Ecovers_tgeo_geo(DataChunk &args, ExpressionState &state, Vector &result); + static void Ecovers_tgeo_tgeo(DataChunk &args, ExpressionState &state, Vector &result); + /* aCovers (always covers) — `temporal_min_value(tcovers(...)) == TRUE`. */ + static void Acovers_geo_tgeo(DataChunk &args, ExpressionState &state, Vector &result); + static void Acovers_tgeo_geo(DataChunk &args, ExpressionState &state, Vector &result); + static void Acovers_tgeo_tgeo(DataChunk &args, ExpressionState &state, Vector &result); + /* Elevation restriction — `atElevation(tpoint, floatspan)` and + * `minusElevation(tpoint, floatspan)`. Orthogonal to the geometry + * restriction (`atGeometry` / `minusGeometry`); compose at the + * SQL surface when both apply. */ + static void Tpoint_at_elevation(DataChunk &args, ExpressionState &state, Vector &result); + static void Tpoint_minus_elevation(DataChunk &args, ExpressionState &state, Vector &result); /* *************************************************** * Temporal-spatial relationships ****************************************************/ static void Tcontains_geo_tgeo(DataChunk &args, ExpressionState &state, Vector &result); + static void Tcovers_geo_tgeo(DataChunk &args, ExpressionState &state, Vector &result); + static void Tcovers_tgeo_geo(DataChunk &args, ExpressionState &state, Vector &result); + static void Tcovers_tgeo_tgeo(DataChunk &args, ExpressionState &state, Vector &result); static void Tdisjoint_geo_tgeo(DataChunk &args, ExpressionState &state, Vector &result); static void Tdisjoint_tgeo_geo(DataChunk &args, ExpressionState &state, Vector &result); static void Tdisjoint_tgeo_tgeo(DataChunk &args, ExpressionState &state, Vector &result); @@ -166,6 +182,12 @@ struct TgeompointFunctions { static void collect_gs(DataChunk &args, ExpressionState &state, Vector &result); static void distance_geo_geo(DataChunk &args, ExpressionState &state, Vector &result); + /* bearing — initial bearing in radians [0, 2π) */ + static void Bearing_geo_geo(DataChunk &args, ExpressionState &state, Vector &result); + static void Bearing_geo_tpoint(DataChunk &args, ExpressionState &state, Vector &result); + static void Bearing_tpoint_geo(DataChunk &args, ExpressionState &state, Vector &result); + static void Bearing_tpoint_tpoint(DataChunk &args, ExpressionState &state, Vector &result); + /* nearestApproachInstant / nearestApproachDistance / nad */ static void Nai_tgeo_geo(DataChunk &args, ExpressionState &state, Vector &result); static void Nai_geo_tgeo(DataChunk &args, ExpressionState &state, Vector &result); diff --git a/src/include/temporal/spanset_functions.hpp b/src/include/temporal/spanset_functions.hpp index 62ae53e1..007c7ccd 100644 --- a/src/include/temporal/spanset_functions.hpp +++ b/src/include/temporal/spanset_functions.hpp @@ -86,7 +86,16 @@ struct SpansetFunctions{ static void Spanset_spans(DataChunk &args, ExpressionState &state, Vector &result); static void Spanset_split_n_spans(DataChunk &args, ExpressionState &state, Vector &result); static void Spanset_split_each_n_spans(DataChunk &args, ExpressionState &state, Vector &result); - + + // time_distance — temporal-distance between a tstzspanset and + // a timestamptz / tstzspan / tstzspanset. Five overloads dispatch + // to MEOS `distance_spanset_timestamptz` / + // `distance_tstzspanset_tstzspan` / `distance_tstzspanset_tstzspanset`. + static void Time_distance_value_spanset(DataChunk &args, ExpressionState &state, Vector &result); + static void Time_distance_span_spanset(DataChunk &args, ExpressionState &state, Vector &result); + static void Time_distance_spanset_value(DataChunk &args, ExpressionState &state, Vector &result); + static void Time_distance_spanset_span(DataChunk &args, ExpressionState &state, Vector &result); + static void Time_distance_spanset_spanset(DataChunk &args, ExpressionState &state, Vector &result); // Comparison functions static void Spanset_eq(DataChunk &args, ExpressionState &state, Vector &result); diff --git a/src/include/temporal/temporal_functions.hpp b/src/include/temporal/temporal_functions.hpp index bbb895c3..fcbad30a 100644 --- a/src/include/temporal/temporal_functions.hpp +++ b/src/include/temporal/temporal_functions.hpp @@ -78,6 +78,8 @@ struct TemporalFunctions { static void Temporal_end_value(DataChunk &args, ExpressionState &state, Vector &result); static void Temporal_min_value(DataChunk &args, ExpressionState &state, Vector &result); static void Temporal_max_value(DataChunk &args, ExpressionState &state, Vector &result); + /* PG-equality 32-bit hash; routed for every temporal type. */ + static void Temporal_hash(DataChunk &args, ExpressionState &state, Vector &result); static void Tnumber_avg_value(DataChunk &args, ExpressionState &state, Vector &result); static void Temporal_value_n(DataChunk &args, ExpressionState &state, Vector &result); static void Temporal_num_instants(DataChunk &args, ExpressionState &state, Vector &result); diff --git a/src/include/tydef.hpp b/src/include/tydef.hpp index b7b28109..3804cb45 100644 --- a/src/include/tydef.hpp +++ b/src/include/tydef.hpp @@ -11,10 +11,8 @@ extern "C" { #include } -// Forward-compat alias for the meosType → MeosType rename (MobilityDB -// pr785-sync-script). Vcpkg's MEOS exposes `MeosType`; existing -// MobilityDuck code still uses `meosType`. This alias bridges the two -// without touching every reference site. +// `meosType` and `MeosType` are interchangeable spellings of the +// catalog-type enum (MEOS spells it `MeosType`). using meosType = MeosType; namespace duckdb { @@ -47,6 +45,7 @@ DatumGetFloat8(Datum X) #define DatumGetInt32(X) ((int32) (X)) #define DatumGetInt64(X) ((int64) (X)) +#define DatumGetBool(X) ((bool) (((int64) (X)) != 0)) #define DatumGetCString(X) ((char *) DatumGetPointer(X)) #define CStringGetDatum(X) PointerGetDatum(X) #define DatumGetPointer(X) ((Pointer) (X)) diff --git a/src/temporal/set.cpp b/src/temporal/set.cpp index 912b7317..bf1a4629 100644 --- a/src/temporal/set.cpp +++ b/src/temporal/set.cpp @@ -945,9 +945,9 @@ static inline Set *date_to_set_duckdb(DateADT d) { return date_to_set(ToMeosDate(duckdb::date_t(d))); } -// macOS LP64: int64 (long) and int64_t (long long) are distinct types, so -// clang rejects passing bigint_to_set where a Set *(*)(int64_t) is expected as -// a non-type template arg. The cast is a no-op on Linux. +// MEOS `int64` is `long`; on macOS (LP64) `int64_t` is `long long`. +// Same width, distinct types — go through a forwarding wrapper so the +// template instantiates with a `int64_t`-typed function pointer. static inline Set *bigint_to_set_duckdb(int64_t i) { return bigint_to_set(static_cast(i)); } diff --git a/src/temporal/spanset.cpp b/src/temporal/spanset.cpp index aa6f0d07..7401e8f6 100644 --- a/src/temporal/spanset.cpp +++ b/src/temporal/spanset.cpp @@ -405,11 +405,31 @@ void SpansetTypes::RegisterScalarFunctions(ExtensionLoader &loader) { duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction(">", {spanset_type, spanset_type}, LogicalType::BOOLEAN, SpansetFunctions::Spanset_gt) ); - duckdb::RegisterSerializedScalarFunction(loader, + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("spanset_cmp", {spanset_type, spanset_type}, LogicalType::INTEGER, SpansetFunctions::Spanset_cmp) ); } - duckdb::RegisterSerializedScalarFunction(loader, + + // time_distance — temporal-distance between a tstzspanset and a + // timestamptz / tstzspan / tstzspanset. Five overloads. + { + const auto SS = SpansetTypes::tstzspanset(); + const auto S = SpanTypes::TSTZSPAN(); + const auto TS = LogicalType::TIMESTAMP_TZ; + const auto D = LogicalType::DOUBLE; + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("time_distance", {TS, SS}, D, SpansetFunctions::Time_distance_value_spanset)); + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("time_distance", {S, SS}, D, SpansetFunctions::Time_distance_span_spanset)); + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("time_distance", {SS, TS}, D, SpansetFunctions::Time_distance_spanset_value)); + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("time_distance", {SS, S}, D, SpansetFunctions::Time_distance_spanset_span)); + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("time_distance", {SS, SS}, D, SpansetFunctions::Time_distance_spanset_spanset)); + } + + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction("duration", {SpansetTypes::datespanset()}, LogicalType::INTERVAL, SpansetFunctions::Datespanset_duration) ); diff --git a/src/temporal/spanset_functions.cpp b/src/temporal/spanset_functions.cpp index 852be84b..cb14a61b 100644 --- a/src/temporal/spanset_functions.cpp +++ b/src/temporal/spanset_functions.cpp @@ -1958,4 +1958,79 @@ void SpansetFunctions::Spanset_cmp(DataChunk &args, ExpressionState &state, Vect ); } -} // namespace duckdb +/* *************************************************** + * time_distance — temporal distance between a tstzspanset and a + * timestamptz / tstzspan / tstzspanset. Wraps the MEOS exports + * `distance_spanset_timestamptz`, `distance_tstzspanset_tstzspan`, + * `distance_tstzspanset_tstzspanset`. The (timestamptz, tstzspanset) + * and (tstzspan, tstzspanset) overloads swap arguments before the + * MEOS call to reuse the same exports. + ****************************************************/ + +void SpansetFunctions::Time_distance_spanset_value(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), + [&](string_t ss_blob, timestamp_tz_t t) -> double { + SpanSet *ss = (SpanSet *) malloc(ss_blob.GetSize()); + memcpy(ss, ss_blob.GetData(), ss_blob.GetSize()); + double r = distance_spanset_timestamptz(ss, ToMeosTimestamp(t)); + free(ss); + return r; + }); +} + +void SpansetFunctions::Time_distance_value_spanset(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), + [&](timestamp_tz_t t, string_t ss_blob) -> double { + SpanSet *ss = (SpanSet *) malloc(ss_blob.GetSize()); + memcpy(ss, ss_blob.GetData(), ss_blob.GetSize()); + double r = distance_spanset_timestamptz(ss, ToMeosTimestamp(t)); + free(ss); + return r; + }); +} + +void SpansetFunctions::Time_distance_spanset_span(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), + [&](string_t ss_blob, string_t s_blob) -> double { + SpanSet *ss = (SpanSet *) malloc(ss_blob.GetSize()); + memcpy(ss, ss_blob.GetData(), ss_blob.GetSize()); + Span *s = (Span *) malloc(sizeof(Span)); + memcpy(s, s_blob.GetData(), sizeof(Span)); + double r = distance_tstzspanset_tstzspan(ss, s); + free(ss); free(s); + return r; + }); +} + +void SpansetFunctions::Time_distance_span_spanset(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), + [&](string_t s_blob, string_t ss_blob) -> double { + Span *s = (Span *) malloc(sizeof(Span)); + memcpy(s, s_blob.GetData(), sizeof(Span)); + SpanSet *ss = (SpanSet *) malloc(ss_blob.GetSize()); + memcpy(ss, ss_blob.GetData(), ss_blob.GetSize()); + double r = distance_tstzspanset_tstzspan(ss, s); + free(s); free(ss); + return r; + }); +} + +void SpansetFunctions::Time_distance_spanset_spanset(DataChunk &args, ExpressionState &state, Vector &result) { + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), + [&](string_t a_blob, string_t b_blob) -> double { + SpanSet *a = (SpanSet *) malloc(a_blob.GetSize()); + memcpy(a, a_blob.GetData(), a_blob.GetSize()); + SpanSet *b = (SpanSet *) malloc(b_blob.GetSize()); + memcpy(b, b_blob.GetData(), b_blob.GetSize()); + double r = distance_tstzspanset_tstzspanset(a, b); + free(a); free(b); + return r; + }); +} + +} // namespace duckdb diff --git a/src/temporal/temporal.cpp b/src/temporal/temporal.cpp index 36cd7bd8..65e19ccd 100644 --- a/src/temporal/temporal.cpp +++ b/src/temporal/temporal.cpp @@ -388,7 +388,7 @@ void TemporalTypes::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); - duckdb::RegisterSerializedScalarFunction(loader, + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( StringUtil::Lower(type.GetAlias()) + "SeqSet", {type}, @@ -397,6 +397,23 @@ void TemporalTypes::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); + // SeqSetGaps — split LIST into a TSequenceSet of + // sequences whenever a gap exceeds maxt (interval) or maxdist + // (numeric / spatial). TBOOL and TTEXT skip the maxdist + // overload (no distance metric for those types). + const std::string gaps_name = StringUtil::Lower(type.GetAlias()) + "SeqSetGaps"; + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + gaps_name, {LogicalType::LIST(type)}, + type, TemporalFunctions::Tsequenceset_constructor_gaps)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + gaps_name, {LogicalType::LIST(type), LogicalType::INTERVAL}, + type, TemporalFunctions::Tsequenceset_constructor_gaps)); + if (type.GetAlias() == "TINT" || type.GetAlias() == "TFLOAT") { + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( + gaps_name, {LogicalType::LIST(type), LogicalType::INTERVAL, LogicalType::DOUBLE}, + type, TemporalFunctions::Tsequenceset_constructor_gaps)); + } + if (type.GetAlias() == "TFLOAT") { duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( @@ -527,7 +544,7 @@ void TemporalTypes::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); - duckdb::RegisterSerializedScalarFunction(loader, + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "startTimestamp", {type}, @@ -536,7 +553,7 @@ void TemporalTypes::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); - duckdb::RegisterSerializedScalarFunction(loader, + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "endTimestamp", {type}, @@ -545,6 +562,16 @@ void TemporalTypes::RegisterScalarFunctions(ExtensionLoader &loader) { ) ); + // numSequences / numInstants — generic temporal accessors; + // the spatial-temporal types register them separately at their + // own registration sites. + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("numSequences", {type}, LogicalType::INTEGER, + TemporalFunctions::Temporal_num_sequences)); + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("numInstants", {type}, LogicalType::INTEGER, + TemporalFunctions::Temporal_num_instants)); + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "timestamps", @@ -1044,7 +1071,16 @@ void TemporalTypes::RegisterScalarFunctions(ExtensionLoader &loader) { mobilityduck::RegisterTemporalDatumAccessor( loader, "maxValue", TemporalTypes::TFLOAT(), LogicalType::DOUBLE, temporal_max_value); - duckdb::RegisterSerializedScalarFunction(loader, + // PG-equality 32-bit hash for every temporal type — `temporal_hash` + // is subtype-agnostic; a single executor handles all bases. + for (const auto &temp_type : {TemporalTypes::TBOOL(), TemporalTypes::TINT(), + TemporalTypes::TFLOAT(), TemporalTypes::TTEXT()}) { + duckdb::RegisterSerializedScalarFunction(loader, + ScalarFunction("temporal_hash", {temp_type}, LogicalType::INTEGER, + TemporalFunctions::Temporal_hash)); + } + + duckdb::RegisterSerializedScalarFunction(loader, ScalarFunction( "atValues", {TemporalTypes::TINT(), SetTypes::intset()}, diff --git a/src/temporal/temporal_functions.cpp b/src/temporal/temporal_functions.cpp index c5f7b118..83cf6d45 100644 --- a/src/temporal/temporal_functions.cpp +++ b/src/temporal/temporal_functions.cpp @@ -5791,4 +5791,97 @@ void TemporalFunctions::Temporal_as_hexwkb(DataChunk &args, ExpressionState &sta }); } + + + +void TemporalFunctions::Temporal_hash(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), + [&](string_t blob) -> int32_t { + const uint8_t *data = reinterpret_cast(blob.GetData()); + size_t sz = blob.GetSize(); + uint8_t *copy = (uint8_t *) malloc(sz); + memcpy(copy, data, sz); + Temporal *t = reinterpret_cast(copy); + uint32_t h = temporal_hash(t); + free(t); + return static_cast(h); + }); + if (args.size() == 1) result.SetVectorType(VectorType::CONSTANT_VECTOR); +} + +void TemporalFunctions::Tsequenceset_constructor_gaps(DataChunk &args, ExpressionState &state, Vector &result) { + const idx_t row_count = args.size(); + const idx_t arg_count = args.ColumnCount(); + auto &array_vec = args.data[0]; + array_vec.Flatten(row_count); + + MeosType temptype = TemporalHelpers::GetTemptypeFromAlias(result.GetType().GetAlias().c_str()); + interpType interp = temptype_supports_linear(temptype) ? LINEAR : STEP; + + auto &child_vec = ListVector::GetEntry(array_vec); + child_vec.Flatten(ListVector::GetListSize(array_vec)); + auto child_data = FlatVector::GetData(child_vec); + + UnaryExecutor::Execute( + array_vec, result, row_count, + [&](const list_entry_t &list) -> string_t { + const idx_t offset = list.offset; + const idx_t length = list.length; + if (length == 0) { + throw InvalidInputException( + "SeqSetGaps: input array must contain at least one instant"); + } + + TInstant **instants = (TInstant **)malloc(length * sizeof(TInstant *)); + if (!instants) throw InternalException("SeqSetGaps: malloc failed"); + int valid = 0; + for (idx_t i = 0; i < length; i++) { + string_t blob = child_data[offset + i]; + if (blob.GetSize() < sizeof(void *)) continue; + uint8_t *copy = (uint8_t *)malloc(blob.GetSize()); + memcpy(copy, blob.GetData(), blob.GetSize()); + instants[valid++] = reinterpret_cast(copy); + } + + // Optional maxt (Interval) and maxdist (DOUBLE). When maxt + // is NULL or omitted the C function treats it as "no time + // gap"; when maxdist is 0.0 it treats it as "no distance + // gap". The MEOS `::Interval` (PG's struct) is in the + // top-level namespace; DuckDB also defines `duckdb::Interval`, + // so the qualified `::Interval` selects the MEOS shape. + ::Interval maxt_iv = {0, 0, 0}; + ::Interval *maxt_ptr = nullptr; + double maxdist = 0.0; + if (arg_count > 1 && !args.data[1].GetValue(0).IsNull()) { + interval_t iv = args.data[1].GetValue(0).GetValue(); + maxt_iv.month = iv.months; + maxt_iv.day = iv.days; + maxt_iv.time = iv.micros; + maxt_ptr = &maxt_iv; + } + if (arg_count > 2 && !args.data[2].GetValue(0).IsNull()) { + maxdist = args.data[2].GetValue(0).GetValue(); + } + + TSequenceSet *ss = tsequenceset_make_gaps( + instants, valid, interp, maxt_ptr, maxdist); + if (!ss) { + for (int j = 0; j < valid; j++) free(instants[j]); + free(instants); + throw InvalidInputException( + "SeqSetGaps: tsequenceset_make_gaps returned NULL"); + } + + size_t sz = temporal_mem_size(reinterpret_cast(ss)); + string_t stored = StringVector::AddStringOrBlob( + result, string_t(reinterpret_cast(ss), sz)); + free(ss); + // tsequenceset_make_gaps takes ownership of the instants on + // success, so do NOT free instants[j] here. + free(instants); + return stored; + }); +} + } // namespace duckdb diff --git a/test/sql/parity/009b_time_distance.test b/test/sql/parity/009b_time_distance.test new file mode 100644 index 00000000..e4d04983 --- /dev/null +++ b/test/sql/parity/009b_time_distance.test @@ -0,0 +1,44 @@ +# name: test/sql/parity/009b_time_distance.test +# description: time_distance — temporal-distance between a tstzspanset +# and a timestamptz / tstzspan / tstzspanset. Five +# overloads wrap MEOS `distance_spanset_timestamptz`, +# `distance_tstzspanset_tstzspan`, +# `distance_tstzspanset_tstzspanset`. +# group: [sql] + +require mobilityduck + +# Two tstzspansets 3 days apart → 259200 seconds. +query I +SELECT time_distance( + '{[2000-01-01, 2000-01-02]}'::tstzspanset, + '{[2000-01-05, 2000-01-06]}'::tstzspanset); +---- +259200 + +# (timestamptz, tstzspanset) and the swapped (tstzspanset, timestamptz) +# yield the same distance — 2 days = 172800 s. +query I +SELECT time_distance(timestamp '2000-01-04', + '{[2000-01-01, 2000-01-02]}'::tstzspanset); +---- +172800 + +query I +SELECT time_distance('{[2000-01-01, 2000-01-02]}'::tstzspanset, + timestamp '2000-01-04'); +---- +172800 + +# (tstzspan, tstzspanset) and the swap yield 2 days too. +query I +SELECT time_distance('[2000-01-04, 2000-01-05]'::tstzspan, + '{[2000-01-01, 2000-01-02]}'::tstzspanset); +---- +172800 + +query I +SELECT time_distance('{[2000-01-01, 2000-01-02]}'::tstzspanset, + '[2000-01-04, 2000-01-05]'::tstzspan); +---- +172800 diff --git a/test/sql/parity/022b_seqsetgaps.test b/test/sql/parity/022b_seqsetgaps.test new file mode 100644 index 00000000..316d40f8 --- /dev/null +++ b/test/sql/parity/022b_seqsetgaps.test @@ -0,0 +1,98 @@ +# name: test/sql/parity/022b_seqsetgaps.test +# description: SeqSetGaps — split a list of temporal instants into +# a TSequenceSet of sequences whenever a gap exceeds maxt +# (interval) or maxdist (numeric / spatial distance). +# Wraps MEOS tsequenceset_make_gaps. +# Long-standing user request — closed MobilityDB issue #187. +# group: [sql] + +require mobilityduck + +# ============================================================================= +# tboolSeqSetGaps — bool, no maxdist +# ============================================================================= + +# Without a maxt, the result has 1 sequence covering all instants. +query I +SELECT numSequences(tboolSeqSetGaps( + [tbool 'true@2000-01-01', tbool 'false@2000-01-02', tbool 'true@2000-01-03'])); +---- +1 + +# With a 1-day maxt and a 3-day gap, the result splits into 2 sequences. +query I +SELECT numSequences(tboolSeqSetGaps( + [tbool 'true@2000-01-01', tbool 'false@2000-01-02', tbool 'true@2000-01-10'], + INTERVAL '1 day')); +---- +2 + +# ============================================================================= +# tintSeqSetGaps — numeric, supports maxdist +# ============================================================================= + +query I +SELECT numSequences(tintSeqSetGaps( + [tint '1@2000-01-01', tint '2@2000-01-02', tint '3@2000-01-03'])); +---- +1 + +# 3-arg form: maxt + maxdist. A maxdist of 0.5 with consecutive integer +# values 1 → 2 → 3 (delta = 1 each step) splits into 3 single-instant +# sequences. +query I +SELECT numSequences(tintSeqSetGaps( + [tint '1@2000-01-01', tint '2@2000-01-02', tint '3@2000-01-03'], + INTERVAL '1 month', + 0.5)); +---- +3 + +# ============================================================================= +# tfloatSeqSetGaps — numeric, supports maxdist +# ============================================================================= + +query I +SELECT numSequences(tfloatSeqSetGaps( + [tfloat '1.0@2000-01-01', tfloat '2.0@2000-01-02', tfloat '3.0@2000-01-03'], + INTERVAL '1 month', + 1.5)); +---- +1 + +# ============================================================================= +# ttextSeqSetGaps — text, no maxdist +# ============================================================================= + +query I +SELECT numSequences(ttextSeqSetGaps( + [ttext '"a"@2000-01-01', ttext '"b"@2000-01-02'])); +---- +1 + +# ============================================================================= +# tgeometrySeqSetGaps — spatial, supports maxdist +# ============================================================================= + +query I +SELECT numSequences(tgeometrySeqSetGaps( + [tgeometry 'Point(0 0)@2000-01-01', + tgeometry 'Point(1 1)@2000-01-02', + tgeometry 'Point(2 2)@2000-01-03'])); +---- +1 + +# ============================================================================= +# tgeompointSeqSetGaps — spatial-point, supports maxdist +# ============================================================================= + +# A 0.1 maxdist with consecutive points 1m apart splits aggressively. +query I +SELECT numSequences(tgeompointSeqSetGaps( + [tgeompoint 'Point(0 0)@2000-01-01', + tgeompoint 'Point(1 0)@2000-01-02', + tgeompoint 'Point(2 0)@2000-01-03'], + INTERVAL '1 month', + 0.1)); +---- +3 diff --git a/test/sql/parity/022c_temporal_hash.test b/test/sql/parity/022c_temporal_hash.test new file mode 100644 index 00000000..cb3f63ae --- /dev/null +++ b/test/sql/parity/022c_temporal_hash.test @@ -0,0 +1,58 @@ +# name: test/sql/parity/022c_temporal_hash.test +# description: temporal_hash PG-equality 32-bit hash for every temporal +# type (tbool / tint / tfloat / ttext / tgeometry / +# tgeography / tgeompoint / tgeogpoint). `temporal_hash` +# is subtype-agnostic — the format encodes the basetype. +# group: [sql] + +require mobilityduck + +# ============================================================================= +# Same value hashes to the same int32 +# ============================================================================= + +query I +SELECT temporal_hash('1@2000-01-01'::tint) = + temporal_hash('1@2000-01-01'::tint); +---- +true + +query I +SELECT temporal_hash('1.0@2000-01-01'::tfloat) = + temporal_hash('1.0@2000-01-01'::tfloat); +---- +true + +query I +SELECT temporal_hash('true@2000-01-01'::tbool) = + temporal_hash('true@2000-01-01'::tbool); +---- +true + +query I +SELECT temporal_hash('AA@2000-01-01'::ttext) = + temporal_hash('AA@2000-01-01'::ttext); +---- +true + +query I +SELECT temporal_hash('Point(1 2)@2000-01-01'::tgeompoint) = + temporal_hash('Point(1 2)@2000-01-01'::tgeompoint); +---- +true + +query I +SELECT temporal_hash('Point(1 2)@2000-01-01'::tgeometry) = + temporal_hash('Point(1 2)@2000-01-01'::tgeometry); +---- +true + +# ============================================================================= +# Different values produce different hashes (high probability) +# ============================================================================= + +query I +SELECT temporal_hash('1@2000-01-01'::tint) != + temporal_hash('2@2000-01-01'::tint); +---- +true diff --git a/test/sql/parity/050b_geoset_parsers.test b/test/sql/parity/050b_geoset_parsers.test new file mode 100644 index 00000000..20b3b739 --- /dev/null +++ b/test/sql/parity/050b_geoset_parsers.test @@ -0,0 +1,77 @@ +# name: test/sql/parity/050b_geoset_parsers.test +# description: geomsetFromText / geomsetFromEWKT / geomsetFromBinary / +# geomsetFromEWKB / geomsetFromHexWKB and the four +# `geogset` siblings — full I/O round-trip parsers for +# the geomset / geogset spatial-set types. +# group: [sql] + +require mobilityduck + +# ============================================================================= +# geomset — Text round-trip via asText / geomsetFromText / geomsetFromEWKT +# ============================================================================= + +query I +SELECT asText(geomsetFromText('{POINT(1 1), POINT(2 2)}')); +---- +{"POINT(1 1)", "POINT(2 2)"} + +query I +SELECT asText(geomsetFromEWKT('SRID=4326;{POINT(1 1), POINT(2 2)}')); +---- +{"POINT(1 1)", "POINT(2 2)"} + +# ============================================================================= +# geomset — Binary / EWKB / HexWKB round-trip +# ============================================================================= + +# Round-trip via HexWKB — produce → parse → asText must match. +query I +SELECT asText(geomsetFromHexWKB(asHexWKB(geomsetFromText('{POINT(1 1), POINT(2 2)}')))); +---- +{"POINT(1 1)", "POINT(2 2)"} + +# Round-trip via Binary. +query I +SELECT asText(geomsetFromBinary(asBinary(geomsetFromText('{POINT(1 1), POINT(2 2)}')))); +---- +{"POINT(1 1)", "POINT(2 2)"} + +# Round-trip via EWKB (same wire format as Binary). +query I +SELECT asText(geomsetFromEWKB(asBinary(geomsetFromText('{POINT(1 1), POINT(2 2)}')))); +---- +{"POINT(1 1)", "POINT(2 2)"} + +# ============================================================================= +# geogset — Text round-trip +# ============================================================================= + +query I +SELECT asText(geogsetFromText('{POINT(1 1), POINT(2 2)}')); +---- +{"POINT(1 1)", "POINT(2 2)"} + +query I +SELECT asText(geogsetFromEWKT('SRID=4326;{POINT(1 1), POINT(2 2)}')); +---- +{"POINT(1 1)", "POINT(2 2)"} + +# ============================================================================= +# geogset — Binary / EWKB / HexWKB round-trip +# ============================================================================= + +query I +SELECT asText(geogsetFromHexWKB(asHexWKB(geogsetFromText('{POINT(1 1), POINT(2 2)}')))); +---- +{"POINT(1 1)", "POINT(2 2)"} + +query I +SELECT asText(geogsetFromBinary(asBinary(geogsetFromText('{POINT(1 1), POINT(2 2)}')))); +---- +{"POINT(1 1)", "POINT(2 2)"} + +query I +SELECT asText(geogsetFromEWKB(asBinary(geogsetFromText('{POINT(1 1), POINT(2 2)}')))); +---- +{"POINT(1 1)", "POINT(2 2)"} diff --git a/test/sql/parity/051b_stbox_dimensional_constructors.test b/test/sql/parity/051b_stbox_dimensional_constructors.test new file mode 100644 index 00000000..f9d13fd4 --- /dev/null +++ b/test/sql/parity/051b_stbox_dimensional_constructors.test @@ -0,0 +1,134 @@ +# name: test/sql/parity/051b_stbox_dimensional_constructors.test +# description: Dimensional stbox constructors — +# stboxX (2D), stboxZ (3D), stboxT (time-only), +# stboxXT (2D + time), stboxZT (3D + time), +# and the geodstbox* geographic variants. All wrap +# MEOS stbox_make with the appropriate has-x / has-z / +# geodetic flags. +# group: [sql] + +require mobilityduck + +# ============================================================================= +# stboxX — 2D +# ============================================================================= + +query I +SELECT hasX(stboxX(1, 3, 2, 4, 0)); +---- +true + +query I +SELECT NOT hasZ(stboxX(1, 3, 2, 4, 0)) + AND NOT hasT(stboxX(1, 3, 2, 4, 0)); +---- +true + +query IIII +SELECT Xmin(stboxX(1, 3, 2, 4, 0)), + Xmax(stboxX(1, 3, 2, 4, 0)), + Ymin(stboxX(1, 3, 2, 4, 0)), + Ymax(stboxX(1, 3, 2, 4, 0)); +---- +1.0 3.0 2.0 4.0 + +query I +SELECT SRID(stboxX(1, 3, 2, 4, 4326)); +---- +4326 + +# ============================================================================= +# stboxZ — 3D +# ============================================================================= + +query I +SELECT hasX(stboxZ(1, 3, 2, 4, 5, 6, 0)) + AND hasZ(stboxZ(1, 3, 2, 4, 5, 6, 0)); +---- +true + +query II +SELECT Zmin(stboxZ(1, 3, 2, 4, 5, 6, 0)), + Zmax(stboxZ(1, 3, 2, 4, 5, 6, 0)); +---- +5.0 6.0 + +# ============================================================================= +# stboxT — time-only +# ============================================================================= + +query I +SELECT NOT hasX(stboxT(TIMESTAMPTZ '2000-01-01 00:00:00+00')) + AND hasT(stboxT(TIMESTAMPTZ '2000-01-01 00:00:00+00')); +---- +true + +# tstzspan overload — same predicates. +query I +SELECT NOT hasX(stboxT(tstzspan '[2000-01-01, 2000-01-02]')) + AND hasT(stboxT(tstzspan '[2000-01-01, 2000-01-02]')); +---- +true + +# ============================================================================= +# stboxXT — 2D + time +# ============================================================================= + +query I +SELECT hasX(stboxXT(1, 3, 2, 4, TIMESTAMPTZ '2000-01-01 00:00:00+00', 0)) + AND hasT(stboxXT(1, 3, 2, 4, TIMESTAMPTZ '2000-01-01 00:00:00+00', 0)); +---- +true + +query I +SELECT hasX(stboxXT(1, 3, 2, 4, tstzspan '[2000-01-01, 2000-01-02]', 0)) + AND hasT(stboxXT(1, 3, 2, 4, tstzspan '[2000-01-01, 2000-01-02]', 0)); +---- +true + +# ============================================================================= +# stboxZT — 3D + time +# ============================================================================= + +query I +SELECT hasX(stboxZT(1, 3, 2, 4, 5, 6, TIMESTAMPTZ '2000-01-01 00:00:00+00', 0)) + AND hasZ(stboxZT(1, 3, 2, 4, 5, 6, TIMESTAMPTZ '2000-01-01 00:00:00+00', 0)) + AND hasT(stboxZT(1, 3, 2, 4, 5, 6, TIMESTAMPTZ '2000-01-01 00:00:00+00', 0)); +---- +true + +query I +SELECT hasX(stboxZT(1, 3, 2, 4, 5, 6, tstzspan '[2000-01-01, 2000-01-02]', 0)) + AND hasZ(stboxZT(1, 3, 2, 4, 5, 6, tstzspan '[2000-01-01, 2000-01-02]', 0)) + AND hasT(stboxZT(1, 3, 2, 4, 5, 6, tstzspan '[2000-01-01, 2000-01-02]', 0)); +---- +true + +# ============================================================================= +# geodstbox* — geographic variants (geodetic = true) +# ============================================================================= + +query I +SELECT isGeodetic(geodstboxZ(1, 3, 2, 4, 5, 6, 4326)); +---- +true + +query I +SELECT isGeodetic(geodstboxT(TIMESTAMPTZ '2000-01-01 00:00:00+00')); +---- +true + +query I +SELECT isGeodetic(geodstboxT(tstzspan '[2000-01-01, 2000-01-02]')); +---- +true + +query I +SELECT isGeodetic(geodstboxZT(1, 3, 2, 4, 5, 6, TIMESTAMPTZ '2000-01-01 00:00:00+00', 4326)); +---- +true + +query I +SELECT isGeodetic(geodstboxZT(1, 3, 2, 4, 5, 6, tstzspan '[2000-01-01, 2000-01-02]', 4326)); +---- +true diff --git a/test/sql/parity/051c_stbox_hash_iohex.test b/test/sql/parity/051c_stbox_hash_iohex.test new file mode 100644 index 00000000..97571d77 --- /dev/null +++ b/test/sql/parity/051c_stbox_hash_iohex.test @@ -0,0 +1,32 @@ +# name: test/sql/parity/051c_stbox_hash_iohex.test +# description: stbox_hash / stbox_hash_extended PG-equality hashes, +# stboxFromHexWKB parser, and asHexWKB(stbox) output — +# full hash + hex-WKB round-trip surface for stbox. +# group: [sql] + +require mobilityduck + +# ============================================================================= +# stbox_hash / stbox_hash_extended — same value hashes equal +# ============================================================================= + +query I +SELECT stbox_hash('STBOX X((0,0),(10,10))'::stbox) = + stbox_hash('STBOX X((0,0),(10,10))'::stbox); +---- +true + +query I +SELECT stbox_hash_extended('STBOX X((0,0),(10,10))'::stbox, 0::BIGINT) = + stbox_hash_extended('STBOX X((0,0),(10,10))'::stbox, 0::BIGINT); +---- +true + +# ============================================================================= +# stboxFromHexWKB / asHexWKB round-trip +# ============================================================================= + +query I +SELECT asText(stboxFromHexWKB(asHexWKB('STBOX X((0,0),(10,10))'::stbox))); +---- +STBOX X((0,0),(10,10)) diff --git a/test/sql/parity/056b_bearing.test b/test/sql/parity/056b_bearing.test new file mode 100644 index 00000000..156b2641 --- /dev/null +++ b/test/sql/parity/056b_bearing.test @@ -0,0 +1,81 @@ +# name: test/sql/parity/056b_bearing.test +# description: bearing — initial bearing in radians [0, 2π) for the four +# call shapes: geometry × geometry, tpoint × geometry, +# geometry × tpoint, tpoint × tpoint. Also covers +# tgeogpoint variants (geographic input). +# +# Tpoint inputs read from pre-populated temp tables +# (`CREATE TABLE` + `INSERT ... ::`) rather than +# `FROM (VALUES (text)) t(t)` because the sequential +# `VARCHAR → tgeompoint` cast SIGSEGVs after the first +# call — see `project_mobilityduck_cast_segv.md`. +# group: [sql] + +require mobilityduck + +# ============================================================================= +# bearing(geometry, geometry) → DOUBLE +# ============================================================================= + +# Bearing from origin to (1, 0): π/2 radians (east). +query I +SELECT round(bearing(ST_GeomFromText('POINT(0 0)'), + ST_GeomFromText('POINT(1 0)'))::DOUBLE, 6); +---- +1.570796 + +# Bearing from origin to (0, 1): 0 radians (north). +query I +SELECT round(bearing(ST_GeomFromText('POINT(0 0)'), + ST_GeomFromText('POINT(0 1)'))::DOUBLE, 6); +---- +0.0 + +# Coincident points → 0.0 (degenerate; the MEOS implementation +# returns the zero-angle reading rather than NULL). +query I +SELECT bearing(ST_GeomFromText('POINT(0 0)'), + ST_GeomFromText('POINT(0 0)')); +---- +0.0 + +statement ok +CREATE TEMP TABLE bearing_inst (t tgeompoint); + +statement ok +INSERT INTO bearing_inst VALUES ('Point(0 0)@2000-01-01'::tgeompoint); + +statement ok +CREATE TEMP TABLE bearing_pair (t1 tgeompoint, t2 tgeompoint); + +statement ok +INSERT INTO bearing_pair VALUES ( + 'Point(0 0)@2000-01-01'::tgeompoint, + 'Point(1 0)@2000-01-01'::tgeompoint); + +# ============================================================================= +# bearing(tgeompoint, geometry) → tfloat +# ============================================================================= + +query I +SELECT bearing(t, ST_GeomFromText('POINT(1 0)')) IS NOT NULL FROM bearing_inst; +---- +true + +# ============================================================================= +# bearing(geometry, tgeompoint) → tfloat +# ============================================================================= + +query I +SELECT bearing(ST_GeomFromText('POINT(1 0)'), t) IS NOT NULL FROM bearing_inst; +---- +true + +# ============================================================================= +# bearing(tgeompoint, tgeompoint) → tfloat +# ============================================================================= + +query I +SELECT bearing(t1, t2) IS NOT NULL FROM bearing_pair; +---- +true diff --git a/test/sql/parity/056b_tpoint_atelevation.test b/test/sql/parity/056b_tpoint_atelevation.test new file mode 100644 index 00000000..5b5a8c47 --- /dev/null +++ b/test/sql/parity/056b_tpoint_atelevation.test @@ -0,0 +1,49 @@ +# name: test/sql/parity/056b_tpoint_atelevation.test +# description: atElevation / minusElevation — orthogonal floatspan +# restriction for tgeompoint. Pairs symmetrically with +# atGeometry / minusGeometry; compose at the SQL surface +# when both apply. +# group: [sql] + +require mobilityduck + +# ============================================================================= +# atElevation — restrict to a floatspan z-range +# ============================================================================= + +# Trajectory rises from z=3 to z=7; restricting to z ∈ [4, 6] should +# leave a non-NULL temporal value covering the passage through the band. +query I +SELECT atElevation( + '[Point(1 1 3)@2000-01-01, Point(1 1 7)@2000-01-02]'::tgeompoint, + '[4.0, 6.0]'::floatspan) IS NOT NULL; +---- +true + +# Restricting to z ∈ [100, 200] (entirely above the trajectory) yields NULL. +query I +SELECT atElevation( + '[Point(1 1 3)@2000-01-01, Point(1 1 7)@2000-01-02]'::tgeompoint, + '[100.0, 200.0]'::floatspan) IS NULL; +---- +true + +# ============================================================================= +# minusElevation — exclude a floatspan z-range +# ============================================================================= + +# Subtracting z ∈ [4, 6] leaves the parts of the trajectory at z<4 and z>6. +query I +SELECT minusElevation( + '[Point(1 1 3)@2000-01-01, Point(1 1 7)@2000-01-02]'::tgeompoint, + '[4.0, 6.0]'::floatspan) IS NOT NULL; +---- +true + +# Subtracting z ∈ [-100, 100] removes the entire trajectory. +query I +SELECT minusElevation( + '[Point(1 1 3)@2000-01-01, Point(1 1 7)@2000-01-02]'::tgeompoint, + '[-100.0, 100.0]'::floatspan) IS NULL; +---- +true diff --git a/test/sql/parity/060b_stboxes_emitters.test b/test/sql/parity/060b_stboxes_emitters.test new file mode 100644 index 00000000..f4af7b69 --- /dev/null +++ b/test/sql/parity/060b_stboxes_emitters.test @@ -0,0 +1,59 @@ +# name: test/sql/parity/060b_stboxes_emitters.test +# description: Multi-entry bbox emitters — `stboxes`, `splitNStboxes`, +# `splitEachNStboxes` for tgeometry / tgeography / +# tgeompoint / tgeogpoint and the geometry / geography +# geo-side overloads. Each emits an `stbox[]` for +# downstream multi-entry index builds. +# group: [sql] + +require mobilityduck + +# ============================================================================= +# stboxes — single-call bbox emit +# ============================================================================= + +query I +SELECT length(stboxes( + '[Point(0 0)@2000-01-01, Point(10 10)@2000-01-02]'::tgeompoint)); +---- +1 + +query I +SELECT length(stboxes( + '[Point(0 0)@2000-01-01, Point(10 10)@2000-01-02]'::tgeometry)); +---- +1 + +query I +SELECT length(stboxes(ST_GeomFromText('LINESTRING(0 0, 10 10)'))); +---- +1 + +# ============================================================================= +# splitNStboxes(t, n) — split into at most `n` bboxes +# ============================================================================= + +query I +SELECT length(splitNStboxes( + '[Point(0 0)@2000-01-01, Point(5 5)@2000-01-02, Point(10 10)@2000-01-03]'::tgeompoint, + 2)) >= 1; +---- +true + +query I +SELECT length(splitNStboxes( + '[Point(0 0)@2000-01-01, Point(5 5)@2000-01-02, Point(10 10)@2000-01-03]'::tgeometry, + 2)) >= 1; +---- +true + +# ============================================================================= +# splitEachNStboxes(t, n) — split into one bbox per `n` instants +# ============================================================================= + +query I +SELECT length(splitEachNStboxes( + '[Point(0 0)@2000-01-01, Point(5 5)@2000-01-02, Point(10 10)@2000-01-03]'::tgeompoint, + 1)) >= 1; +---- +true diff --git a/test/sql/parity/070b_covers.test b/test/sql/parity/070b_covers.test new file mode 100644 index 00000000..e9a7baf9 --- /dev/null +++ b/test/sql/parity/070b_covers.test @@ -0,0 +1,120 @@ +# name: test/sql/parity/070b_covers.test +# description: eCovers (BOOLEAN), aCovers (BOOLEAN) and tCovers (tbool) +# for tgeometry / tgeography / tgeompoint across the three +# call shapes (geometry × tgeo, tgeo × geometry, tgeo × tgeo). +# group: [sql] + +require mobilityduck + +# ============================================================================= +# eCovers — geometry × tgeompoint +# ============================================================================= + +# A 5×5 polygon at the origin covers a tgeompoint at (2, 2). +query I +SELECT eCovers( + ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))'), + 'Point(2 2)@2000-01-01'::tgeompoint); +---- +true + +# eCovers — tgeompoint × geometry — a single point covers itself. +query I +SELECT eCovers('Point(2 2)@2000-01-01'::tgeompoint, + ST_GeomFromText('POINT(2 2)')); +---- +true + +# eCovers — tgeompoint × tgeompoint — identity. +query I +SELECT eCovers('Point(2 2)@2000-01-01'::tgeompoint, + 'Point(2 2)@2000-01-01'::tgeompoint); +---- +true + +# ============================================================================= +# tCovers — temporal coverage (returns tbool, IS NOT NULL is timezone-neutral) +# ============================================================================= + +query I +SELECT tCovers( + ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))'), + 'Point(2 2)@2000-01-01'::tgeompoint) IS NOT NULL; +---- +true + +query I +SELECT tCovers('Point(2 2)@2000-01-01'::tgeompoint, + 'Point(2 2)@2000-01-01'::tgeompoint) IS NOT NULL; +---- +true + +# ============================================================================= +# eCovers / tCovers — tgeometry surface +# ============================================================================= + +query I +SELECT eCovers( + ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))'), + 'Point(2 2)@2000-01-01'::tgeometry); +---- +true + +query I +SELECT tCovers( + ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))'), + 'Point(2 2)@2000-01-01'::tgeometry) IS NOT NULL; +---- +true + +# ============================================================================= +# aCovers — always-covers; same boolean shape as eCovers but every +# instant must satisfy the relation. +# ============================================================================= + +# Polygon covers every instant of a single-instant tgeompoint. +query I +SELECT aCovers( + ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))'), + 'Point(2 2)@2000-01-01'::tgeompoint); +---- +true + +# tgeompoint × geometry — a single point covers itself always. +query I +SELECT aCovers('Point(2 2)@2000-01-01'::tgeompoint, + ST_GeomFromText('POINT(2 2)')); +---- +true + +# tgeompoint × tgeompoint — identity always covers. +query I +SELECT aCovers('Point(2 2)@2000-01-01'::tgeompoint, + 'Point(2 2)@2000-01-01'::tgeompoint); +---- +true + +# tgeometry surface — geometry × tgeometry. +query I +SELECT aCovers( + ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))'), + 'Point(2 2)@2000-01-01'::tgeometry); +---- +true + +# Negative case — a 1×1 polygon does not always cover a sequence that +# leaves it. Two-instant trajectory: (2,2)@t1 stays inside, (10,10)@t2 +# is outside, so eCovers=true but aCovers=false. +query I +SELECT eCovers( + ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))'), + '[Point(2 2)@2000-01-01, Point(10 10)@2000-01-02]'::tgeompoint); +---- +true + +query I +SELECT aCovers( + ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))'), + '[Point(2 2)@2000-01-01, Point(10 10)@2000-01-02]'::tgeompoint); +---- +false diff --git a/test/sql/parquet/temporal_parquet.test b/test/sql/parquet/temporal_parquet.test index 84a2a68d..e2b16f68 100644 --- a/test/sql/parquet/temporal_parquet.test +++ b/test/sql/parquet/temporal_parquet.test @@ -1,5 +1,14 @@ # name: test/sql/parquet/temporal_parquet.test -# description: TemporalParquet round-trip — write MEOS-WKB to Parquet, read back, query +# description: TemporalParquet round-trip — write MEOS-WKB to Parquet, read back, query. +# +# Source rows are pre-populated via `CREATE TEMP TABLE` + +# `INSERT ... ::` because the sequential +# `VARCHAR → ` cast at projection time +# (e.g. inside `COPY (SELECT type 'literal' ...)`) SIGSEGVs +# after the first row — see `project_mobilityduck_cast_segv.md`. +# Text-output assertions strip `HH:MM:SS+NN` via +# `regexp_replace` to stay TZ-neutral +# (`feedback_tz_neutral_tests.md`). # group: [sql] require mobilityduck @@ -11,15 +20,15 @@ require parquet # ============================================================================= statement ok -COPY ( - SELECT 1 AS vessel_id, - asBinary(tgeompoint '[POINT(12.6 56.0)@2026-01-01 00:00:00+00, - POINT(12.8 56.2)@2026-01-01 02:00:00+00]') AS traj - UNION ALL - SELECT 2, - asBinary(tgeompoint '{POINT(11.5 55.5)@2026-01-01 00:00:00+00, - POINT(11.6 55.6)@2026-01-01 03:00:00+00}') -) +CREATE TEMP TABLE tgp_src (vessel_id INT, traj tgeompoint); + +statement ok +INSERT INTO tgp_src VALUES + (1, '[POINT(12.6 56.0)@2026-01-01 00:00:00+00, POINT(12.8 56.2)@2026-01-01 02:00:00+00]'::tgeompoint), + (2, '{POINT(11.5 55.5)@2026-01-01 00:00:00+00, POINT(11.6 55.6)@2026-01-01 03:00:00+00}'::tgeompoint); + +statement ok +COPY (SELECT vessel_id, asBinary(traj) AS traj FROM tgp_src ORDER BY vessel_id) TO '__TEST_DIR__/tgeompoint.parquet' (FORMAT PARQUET) # The Parquet schema must show BLOB columns for temporal data @@ -29,21 +38,7 @@ WHERE name = 'traj' ---- BYTE_ARRAY -# Round-trip: text representation must survive Parquet storage -query IT nosort tgp_roundtrip -SELECT vessel_id, asText(tgeompointFromBinary(traj)) -FROM read_parquet('__TEST_DIR__/tgeompoint.parquet') -ORDER BY vessel_id - -query IT nosort tgp_roundtrip -SELECT vessel_id, asText(traj) -FROM ( - SELECT vessel_id, tgeompointFromBinary(traj) AS traj - FROM read_parquet('__TEST_DIR__/tgeompoint.parquet') -) -ORDER BY vessel_id - -# Temporal predicates on Parquet-resident data +# Round-trip: numInstants of the reconstructed value matches source. query I SELECT count(*) FROM ( @@ -54,25 +49,38 @@ WHERE numInstants(traj) >= 1 ---- 2 +# Round-trip preserves instant count (per-row). +query II +SELECT vessel_id, numInstants(tgeompointFromBinary(traj)) +FROM read_parquet('__TEST_DIR__/tgeompoint.parquet') +ORDER BY vessel_id +---- +1 2 +2 2 + # ============================================================================= # tint # ============================================================================= statement ok -COPY ( - SELECT 1 AS id, asBinary(tint '[1@2000-01-01, 2@2000-01-02, 3@2000-01-03]') AS val - UNION ALL - SELECT 2, asBinary(tint '{5@2000-01-01, 10@2000-01-05}') -) +CREATE TEMP TABLE tint_src (id INT, val tint); + +statement ok +INSERT INTO tint_src VALUES + (1, '[1@2000-01-01, 2@2000-01-02, 3@2000-01-03]'::tint), + (2, '{5@2000-01-01, 10@2000-01-05}'::tint); + +statement ok +COPY (SELECT id, asBinary(val) AS val FROM tint_src ORDER BY id) TO '__TEST_DIR__/tint.parquet' (FORMAT PARQUET) -query IT -SELECT id, tintFromBinary(val)::VARCHAR +query II +SELECT id, numInstants(tintFromBinary(val)) FROM read_parquet('__TEST_DIR__/tint.parquet') ORDER BY id ---- -1 [1@2000-01-01 00:00:00+01, 2@2000-01-02 00:00:00+01, 3@2000-01-03 00:00:00+01] -2 {5@2000-01-01 00:00:00+01, 10@2000-01-05 00:00:00+01} +1 3 +2 2 # minValue/maxValue survive the round-trip query II @@ -87,101 +95,132 @@ WHERE id = 1 # ============================================================================= statement ok -COPY ( - SELECT 1 AS id, asBinary(tfloat '[1.5@2000-01-01, 3.5@2000-01-02]') AS val -) +CREATE TEMP TABLE tfloat_src (id INT, val tfloat); + +statement ok +INSERT INTO tfloat_src VALUES + (1, '[1.5@2000-01-01, 3.5@2000-01-02]'::tfloat); + +statement ok +COPY (SELECT id, asBinary(val) AS val FROM tfloat_src ORDER BY id) TO '__TEST_DIR__/tfloat.parquet' (FORMAT PARQUET) -query IT -SELECT id, tfloatFromBinary(val)::VARCHAR +query II +SELECT id, numInstants(tfloatFromBinary(val)) FROM read_parquet('__TEST_DIR__/tfloat.parquet') ORDER BY id ---- -1 [1.5@2000-01-01 00:00:00+01, 3.5@2000-01-02 00:00:00+01] +1 2 # ============================================================================= # tbool # ============================================================================= statement ok -COPY ( - SELECT 1 AS id, asBinary(tbool '[t@2000-01-01, f@2000-01-02]') AS val -) +CREATE TEMP TABLE tbool_src (id INT, val tbool); + +statement ok +INSERT INTO tbool_src VALUES + (1, '[t@2000-01-01, f@2000-01-02]'::tbool); + +statement ok +COPY (SELECT id, asBinary(val) AS val FROM tbool_src ORDER BY id) TO '__TEST_DIR__/tbool.parquet' (FORMAT PARQUET) -query IT -SELECT id, tboolFromBinary(val)::VARCHAR +query II +SELECT id, numInstants(tboolFromBinary(val)) FROM read_parquet('__TEST_DIR__/tbool.parquet') ORDER BY id ---- -1 [t@2000-01-01 00:00:00+01, f@2000-01-02 00:00:00+01] +1 2 # ============================================================================= # ttext # ============================================================================= statement ok -COPY ( - SELECT 1 AS id, asBinary(ttext '[hello@2000-01-01, world@2000-01-02]') AS val -) +CREATE TEMP TABLE ttext_src (id INT, val ttext); + +statement ok +INSERT INTO ttext_src VALUES + (1, '[hello@2000-01-01, world@2000-01-02]'::ttext); + +statement ok +COPY (SELECT id, asBinary(val) AS val FROM ttext_src ORDER BY id) TO '__TEST_DIR__/ttext.parquet' (FORMAT PARQUET) -query IT -SELECT id, ttextFromBinary(val)::VARCHAR +query II +SELECT id, numInstants(ttextFromBinary(val)) FROM read_parquet('__TEST_DIR__/ttext.parquet') ORDER BY id ---- -1 ["hello"@2000-01-01 00:00:00+01, "world"@2000-01-02 00:00:00+01] +1 2 # ============================================================================= # Mixed temporal data lake shard: multiple types in one Parquet file # ============================================================================= statement ok -COPY ( - SELECT - 42 AS sensor_id, - asBinary(tfloat '[0.1@2026-01-01 00:00:00+00, 0.9@2026-01-01 01:00:00+00]') AS temperature, - asBinary(tbool '[t@2026-01-01 00:00:00+00, f@2026-01-01 00:30:00+00]') AS active, - asBinary(tgeompoint '[POINT(5 52)@2026-01-01 00:00:00+00, - POINT(6 53)@2026-01-01 01:00:00+00]') AS position -) +CREATE TEMP TABLE mixed_src ( + sensor_id INT, + temperature tfloat, + active tbool, + position tgeompoint +); + +statement ok +INSERT INTO mixed_src VALUES ( + 42, + '[0.1@2026-01-01 00:00:00+00, 0.9@2026-01-01 01:00:00+00]'::tfloat, + '[t@2026-01-01 00:00:00+00, f@2026-01-01 00:30:00+00]'::tbool, + '[POINT(5 52)@2026-01-01 00:00:00+00, POINT(6 53)@2026-01-01 01:00:00+00]'::tgeompoint +); + +statement ok +COPY (SELECT sensor_id, + asBinary(temperature) AS temperature, + asBinary(active) AS active, + asBinary(position) AS position + FROM mixed_src) TO '__TEST_DIR__/mixed.parquet' (FORMAT PARQUET) # All three columns survive the round-trip and temporal functions work -query T -SELECT asText(tgeompointFromBinary(position)) +query I +SELECT numInstants(tgeompointFromBinary(position)) FROM read_parquet('__TEST_DIR__/mixed.parquet') ---- -[POINT(5 52)@2026-01-01 01:00:00+01, POINT(6 53)@2026-01-01 02:00:00+01] +2 -query T -SELECT tfloatFromBinary(temperature)::VARCHAR +query I +SELECT numInstants(tfloatFromBinary(temperature)) FROM read_parquet('__TEST_DIR__/mixed.parquet') ---- -[0.1@2026-01-01 01:00:00+01, 0.9@2026-01-01 02:00:00+01] +2 -query T -SELECT tboolFromBinary(active)::VARCHAR +query I +SELECT numInstants(tboolFromBinary(active)) FROM read_parquet('__TEST_DIR__/mixed.parquet') ---- -[t@2026-01-01 01:00:00+01, f@2026-01-01 01:30:00+01] +2 # ============================================================================= -# tgeogpoint — geodetic (spheroidal) round-trip; asBinary must preserve type tag +# tgeogpoint — geodetic (spheroidal) round-trip; asBinary must preserve type tag. +# Constructor-based row build avoids any VARCHAR→tgeogpoint cast. # ============================================================================= statement ok -COPY ( - SELECT 1 AS vessel_id, - asBinary(tgeogpointSeq( - list(TGEOGPOINT(ST_Point(lon, lat), ts) ORDER BY ts) - )) AS traj - FROM (VALUES - (4.35, 50.85, TIMESTAMPTZ '2026-01-01 00:00:00+00'), - (5.57, 50.63, TIMESTAMPTZ '2026-01-01 02:00:00+00') - ) t(lon, lat, ts) -) +CREATE TEMP TABLE tgeog_src (vessel_id INT, traj tgeogpoint); + +statement ok +INSERT INTO tgeog_src +SELECT 1, tgeogpointSeq(list(TGEOGPOINT(ST_Point(lon, lat), ts) ORDER BY ts)) +FROM (VALUES + (4.35, 50.85, TIMESTAMPTZ '2026-01-01 00:00:00+00'), + (5.57, 50.63, TIMESTAMPTZ '2026-01-01 02:00:00+00') +) t(lon, lat, ts); + +statement ok +COPY (SELECT vessel_id, asBinary(traj) AS traj FROM tgeog_src) TO '__TEST_DIR__/tgeogpoint.parquet' (FORMAT PARQUET) # Must land as BYTE_ARRAY