From c1723302df7dfcec16f59a79a6011795daeedeee Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Wed, 20 May 2026 00:41:16 -0700 Subject: [PATCH] [GH-3013] Box3D aggregate: ST_3DExtent Final slice of the Box3D Phase 1 epic. Mirrors PostGIS's ST_3DExtent and parallels Sedona's existing ST_Extent (which returns a Box2D). - `Envelope3DBuffer` case class: aggregator buffer with six doubles + merge logic. Spark Encoders use it because JTS doesn't have a 3D envelope analog and Box3D itself isn't a Spark Encoder-friendly Product type. - `ST_3DExtent` aggregator: Aggregator[Geometry, Option[Envelope3DBuffer], Box3D]. Skips null and empty geometries, returns null when no rows contributed. Geometries without a Z dimension fold into z=0 per-coordinate, matching PostGIS. - Registered in `Catalog.aggregateExpressions` so SQL `SELECT ST_3DExtent(geom) FROM ...` resolves. - `Box3DExtentSuite`: aggregation over mixed XY/XYZ rows, NULL on empty input, NULL-row skip. Box3D Phase 1 is now complete across 5 PRs (foundation, constructors, accessors + AsText, predicates, this aggregate). --- python/sedona/spark/sql/st_aggregates.py | 16 ++++ python/tests/sql/test_dataframe_api.py | 9 +++ .../org/apache/sedona/sql/UDF/Catalog.scala | 1 + .../expressions/AggregateFunctions.scala | 81 ++++++++++++++++++- .../expressions/st_aggregates.scala | 10 +++ .../apache/sedona/sql/Box3DExtentSuite.scala | 61 ++++++++++++++ .../sedona/sql/dataFrameAPITestScala.scala | 7 ++ 7 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 spark/common/src/test/scala/org/apache/sedona/sql/Box3DExtentSuite.scala diff --git a/python/sedona/spark/sql/st_aggregates.py b/python/sedona/spark/sql/st_aggregates.py index 794871b3abf..562f750d220 100644 --- a/python/sedona/spark/sql/st_aggregates.py +++ b/python/sedona/spark/sql/st_aggregates.py @@ -56,6 +56,22 @@ def ST_Extent(geometry: ColumnOrName) -> Column: return _call_aggregate_function("ST_Extent", geometry) +@validate_argument_types +def ST_3DExtent(geometry: ColumnOrName) -> Column: + """Aggregate Function: Get the 3D bounding box (Box3D) of a geometry column. + + Returns NULL when the input contains no rows or all rows are null/empty + geometries. Geometries without a Z dimension contribute ``z = 0`` per + coordinate, matching PostGIS. Mirrors PostGIS ST_3DExtent. + + :param geometry: Geometry column to aggregate. + :type geometry: ColumnOrName + :return: Box3D representing the union of 3D bounding boxes of the geometry column. + :rtype: Column + """ + return _call_aggregate_function("ST_3DExtent", geometry) + + @validate_argument_types def ST_Intersection_Aggr(geometry: ColumnOrName) -> Column: """Aggregate Function: Get the aggregate intersection of a geometry column. diff --git a/python/tests/sql/test_dataframe_api.py b/python/tests/sql/test_dataframe_api.py index 276b3c22db3..a38274ef7ac 100644 --- a/python/tests/sql/test_dataframe_api.py +++ b/python/tests/sql/test_dataframe_api.py @@ -1355,6 +1355,15 @@ "", Box2D(0.0, 0.0, 1.0, 1.0), ), + ( + sta.ST_3DExtent, + ("geom",), + "exploded_points", + # Box3DType has no Python UDT yet; cast to STRING uses Box3D.toString for comparison. + "CAST(geom AS STRING)", + # 2D inputs fold Z=0 per PostGIS semantics. + "BOX3D(0.0 0.0 0.0, 1.0 1.0 0.0)", + ), # Test aliases for *_Aggr functions with *_Agg suffix ( sta.ST_Envelope_Agg, diff --git a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala index 2296501cc85..fb32517e62f 100644 --- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala +++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala @@ -548,6 +548,7 @@ object Catalog extends AbstractCatalog with Logging { Seq( new ST_Envelope_Aggr, new ST_Extent, + new ST_3DExtent, new ST_Intersection_Aggr, new ST_Union_Aggr(), new ST_Collect_Agg()) diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/AggregateFunctions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/AggregateFunctions.scala index 2140c8716b1..f7d73740c4b 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/AggregateFunctions.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/AggregateFunctions.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.sedona_sql.expressions import org.apache.sedona.common.Functions -import org.apache.sedona.common.geometryObjects.Box2D +import org.apache.sedona.common.geometryObjects.{Box2D, Box3D} import org.apache.spark.sql.{Encoder, Encoders} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.expressions.Aggregator @@ -210,6 +210,85 @@ private[apache] class ST_Extent extends Aggregator[Geometry, Option[EnvelopeBuff def zero: Option[EnvelopeBuffer] = None } +/** + * Aggregator-buffer for the 3D extent. Geometries without a Z dimension fold into the `z = 0` + * plane on a per-coordinate basis, matching PostGIS's flat-XY-treated-as-XY[Z=0] convention. + */ +case class Envelope3DBuffer( + minX: Double, + maxX: Double, + minY: Double, + maxY: Double, + minZ: Double, + maxZ: Double) { + def isNull: Boolean = minX > maxX + + def merge(other: Envelope3DBuffer): Envelope3DBuffer = { + if (this.isNull) other + else if (other.isNull) this + else + Envelope3DBuffer( + math.min(this.minX, other.minX), + math.max(this.maxX, other.maxX), + math.min(this.minY, other.minY), + math.max(this.maxY, other.maxY), + math.min(this.minZ, other.minZ), + math.max(this.maxZ, other.maxZ)) + } +} + +/** + * Return the 3D bounding box (Box3D) of all geometries in the given column. Returns NULL when the + * input contains no rows or all rows are null/empty geometries. Mirrors PostGIS `ST_3DExtent`. + * Geometries without a Z dimension are treated as having `z = 0`. + */ +private[apache] class ST_3DExtent extends Aggregator[Geometry, Option[Envelope3DBuffer], Box3D] { + + val outputSerde: ExpressionEncoder[Box3D] = ExpressionEncoder[Box3D]() + + def reduce(buffer: Option[Envelope3DBuffer], input: Geometry): Option[Envelope3DBuffer] = { + if (input == null || input.isEmpty) return buffer + val box = Box3D.fromGeometry(input) + if (box == null) return buffer + val incoming = Envelope3DBuffer( + box.getXMin, + box.getXMax, + box.getYMin, + box.getYMax, + box.getZMin, + box.getZMax) + buffer match { + case Some(b) => Some(b.merge(incoming)) + case None => Some(incoming) + } + } + + def merge( + buffer1: Option[Envelope3DBuffer], + buffer2: Option[Envelope3DBuffer]): Option[Envelope3DBuffer] = { + (buffer1, buffer2) match { + case (Some(b1), Some(b2)) => Some(b1.merge(b2)) + case (Some(_), None) => buffer1 + case (None, Some(_)) => buffer2 + case (None, None) => None + } + } + + def finish(reduction: Option[Envelope3DBuffer]): Box3D = { + reduction match { + case Some(b) => new Box3D(b.minX, b.minY, b.minZ, b.maxX, b.maxY, b.maxZ) + case None => null + } + } + + def bufferEncoder: Encoder[Option[Envelope3DBuffer]] = + Encoders.product[Option[Envelope3DBuffer]] + + def outputEncoder: ExpressionEncoder[Box3D] = outputSerde + + def zero: Option[Envelope3DBuffer] = None +} + /** * Return the polygon intersection of all Polygon in the given column */ diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_aggregates.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_aggregates.scala index c2e891f0b83..422165f1be5 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_aggregates.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_aggregates.scala @@ -73,6 +73,16 @@ object st_aggregates { aggrFunc(col(geometry)) } + def ST_3DExtent(geometry: Column): Column = { + val aggrFunc = udaf(new ST_3DExtent) + aggrFunc(geometry) + } + + def ST_3DExtent(geometry: String): Column = { + val aggrFunc = udaf(new ST_3DExtent) + aggrFunc(col(geometry)) + } + // Aliases for *_Aggr functions with *_Agg suffix def ST_Envelope_Agg(geometry: Column): Column = ST_Envelope_Aggr(geometry) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/Box3DExtentSuite.scala b/spark/common/src/test/scala/org/apache/sedona/sql/Box3DExtentSuite.scala new file mode 100644 index 00000000000..6d1690c6b46 --- /dev/null +++ b/spark/common/src/test/scala/org/apache/sedona/sql/Box3DExtentSuite.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.sql + +class Box3DExtentSuite extends TestBaseScala { + + describe("ST_3DExtent aggregate") { + + it("aggregates 3D bbox over geometry rows, treating XY as Z=0") { + val row = sparkSession + .sql(""" + WITH t AS ( + SELECT ST_GeomFromText('POINT(1 1)') AS g UNION ALL + SELECT ST_GeomFromWKT('POINT Z(5 7 -2)') UNION ALL + SELECT ST_GeomFromWKT('LINESTRING Z(3 2 4, 6 4 9)') + ) + SELECT ST_AsText(ST_3DExtent(g)) AS s FROM t + """) + .collect()(0) + assert(row.getString(0) == "BOX3D(1.0 1.0 -2.0, 6.0 7.0 9.0)") + } + + it("returns NULL on empty input") { + val v = sparkSession + .sql("SELECT ST_3DExtent(g) FROM (SELECT ST_GeomFromText(NULL) AS g) WHERE false") + .collect() + assert(v.isEmpty || v(0).isNullAt(0)) + } + + it("skips NULL and empty geometry rows") { + val row = sparkSession + .sql(""" + WITH t AS ( + SELECT ST_GeomFromWKT('POINT Z(5 7 -2)') AS g UNION ALL + SELECT ST_GeomFromText(NULL) UNION ALL + SELECT ST_GeomFromText('LINESTRING EMPTY') UNION ALL + SELECT ST_GeomFromWKT('POINT Z(1 1 1)') + ) + SELECT ST_AsText(ST_3DExtent(g)) AS s FROM t + """) + .collect()(0) + assert(row.getString(0) == "BOX3D(1.0 1.0 -2.0, 5.0 7.0 1.0)") + } + } +} diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala index 8501d9e6e8b..82f5913746a 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala @@ -685,6 +685,13 @@ class dataFrameAPITestScala extends TestBaseScala { assert(!row.getBoolean(1)) } + it("Passed ST_3DExtent") { + val pointsDf = sparkSession.sql("SELECT explode(array(" + + "ST_PointZ(0.0, 0.0, -1.0), ST_PointZ(2.0, 4.0, 6.0), ST_PointZ(1.0, 1.0, 1.0))) AS geom") + val actual = pointsDf.select(ST_3DExtent("geom")).first().get(0).toString + assert(actual == "BOX3D(0.0 0.0 -1.0, 2.0 4.0 6.0)") + } + it("Passed ST_Expand") { val baseDf = sparkSession.sql( "SELECT ST_GeomFromWKT('POLYGON ((50 50 1, 50 80 2, 80 80 3, 80 50 2, 50 50 1))') as geom")