diff --git a/pom.xml b/pom.xml index b51818ac8..f0d5cf77e 100644 --- a/pom.xml +++ b/pom.xml @@ -96,6 +96,7 @@ 2.12.17 2.12 3.4.4 + 1.6.1 1.20.0 1.39.0 diff --git a/wayang-api/wayang-api-scala-java/src/main/scala/org/apache/wayang/api/DataQuanta.scala b/wayang-api/wayang-api-scala-java/src/main/scala/org/apache/wayang/api/DataQuanta.scala index 2a2f60cc0..e792d246b 100644 --- a/wayang-api/wayang-api-scala-java/src/main/scala/org/apache/wayang/api/DataQuanta.scala +++ b/wayang-api/wayang-api-scala-java/src/main/scala/org/apache/wayang/api/DataQuanta.scala @@ -34,12 +34,13 @@ import org.apache.wayang.core.optimizer.ProbabilisticDoubleInterval import org.apache.wayang.core.optimizer.cardinality.CardinalityEstimator import org.apache.wayang.core.optimizer.costs.LoadProfileEstimator import org.apache.wayang.core.plan.wayangplan._ +import org.apache.wayang.core.api.spatial.{SpatialGeometry, SpatialPredicate} import org.apache.wayang.core.platform.Platform import org.apache.wayang.core.util.{Tuple => WayangTuple} import org.apache.wayang.basic.data.{Record, Tuple2 => WayangTuple2} -import org.apache.wayang.basic.model.{DLModel, LogisticRegressionModel,DecisionTreeRegressionModel}; +import org.apache.wayang.basic.model.{DLModel, LogisticRegressionModel,DecisionTreeRegressionModel} import org.apache.wayang.commons.util.profiledb.model.Experiment -import com.google.protobuf.ByteString; +import com.google.protobuf.ByteString import org.apache.wayang.api.python.function._ import org.tensorflow.ndarray.NdArray @@ -632,6 +633,81 @@ class DataQuanta[Out: ClassTag](val operator: ElementaryOperator, outputIndex: I joinOperator } + /** + * Applies a spatial filter to this instance. + * + * @param keySelector UDF to extract spatial geometry from data quanta + * @param predicateType the spatial predicate type + * @param filterGeometry the geometry to filter against + * @param columnName optional SQL column name for database pushdown + * @return a new instance representing the filtered output + */ + def spatialFilter(keySelector: Out => SpatialGeometry, + predicateType: SpatialPredicate, + filterGeometry: SpatialGeometry, + columnName: String = null): DataQuanta[Out] = + spatialFilterJava(toSerializableFunction(keySelector), predicateType, filterGeometry, columnName) + + /** + * Applies a spatial filter to this instance. + * + * @param keySelector UDF to extract spatial geometry from data quanta + * @param predicateType the spatial predicate type + * @param filterGeometry the geometry to filter against + * @param columnName optional SQL column name for database pushdown + * @return a new instance representing the filtered output + */ + def spatialFilterJava(keySelector: SerializableFunction[Out, _ <: SpatialGeometry], + predicateType: SpatialPredicate, + filterGeometry: SpatialGeometry, + columnName: String = null): DataQuanta[Out] = { + val op = new SpatialFilterOperator(predicateType, keySelector, dataSetType[Out], filterGeometry) + if (columnName != null) op.getKeyDescriptor.withSqlImplementation(null, columnName) + this.connectTo(op, 0) + wrap[Out](op) + } + + /** + * Feeds this and a further instance into a [[SpatialJoinOperator]]. + * + * @param thisKeyUdf UDF to extract spatial geometry from this instance's elements + * @param that the other instance + * @param thatKeyUdf UDF to extract spatial geometry from `that` instance's elements + * @param predicateType the spatial predicate type for the join + * @return a new instance representing the SpatialJoinOperator's output + */ + def spatialJoin[ThatOut: ClassTag]( + thisKeyUdf: Out => SpatialGeometry, + that: DataQuanta[ThatOut], + thatKeyUdf: ThatOut => SpatialGeometry, + predicateType: SpatialPredicate): DataQuanta[WayangTuple2[Out, ThatOut]] = + spatialJoinJava(toSerializableFunction(thisKeyUdf), that, toSerializableFunction(thatKeyUdf), predicateType) + + /** + * Feeds this and a further instance into a [[SpatialJoinOperator]]. + * + * @param thisKeyUdf UDF to extract spatial geometry from this instance's elements + * @param that the other instance + * @param thatKeyUdf UDF to extract spatial geometry from `that` instance's elements + * @param predicateType the spatial predicate type for the join + * @return a new instance representing the SpatialJoinOperator's output + */ + def spatialJoinJava[ThatOut: ClassTag]( + thisKeyUdf: SerializableFunction[Out, _ <: SpatialGeometry], + that: DataQuanta[ThatOut], + thatKeyUdf: SerializableFunction[ThatOut, _ <: SpatialGeometry], + predicateType: SpatialPredicate): DataQuanta[WayangTuple2[Out, ThatOut]] = { + require(this.planBuilder eq that.planBuilder, s"$this and $that must use the same plan builders.") + val op = new SpatialJoinOperator( + new TransformationDescriptor(thisKeyUdf.asInstanceOf[SerializableFunction[Out, SpatialGeometry]], basicDataUnitType[Out], basicDataUnitType[SpatialGeometry]), + new TransformationDescriptor(thatKeyUdf.asInstanceOf[SerializableFunction[ThatOut, SpatialGeometry]], basicDataUnitType[ThatOut], basicDataUnitType[SpatialGeometry]), + predicateType + ) + this.connectTo(op, 0) + that.connectTo(op, 1) + wrap[WayangTuple2[Out, ThatOut]](op) + } + def predict[ThatOut: ClassTag]( that: DataQuanta[ThatOut], inputType: Class[_ <: Any], diff --git a/wayang-api/wayang-api-scala-java/src/main/scala/org/apache/wayang/api/DataQuantaBuilder.scala b/wayang-api/wayang-api-scala-java/src/main/scala/org/apache/wayang/api/DataQuantaBuilder.scala index dad054a2f..391ae7a14 100644 --- a/wayang-api/wayang-api-scala-java/src/main/scala/org/apache/wayang/api/DataQuantaBuilder.scala +++ b/wayang-api/wayang-api-scala-java/src/main/scala/org/apache/wayang/api/DataQuantaBuilder.scala @@ -30,6 +30,7 @@ import org.apache.wayang.basic.data.{Record, Tuple2 => RT2} import org.apache.wayang.basic.model.{DLModel, Model, LogisticRegressionModel,DecisionTreeRegressionModel} import org.apache.wayang.basic.operators.{DLTrainingOperator, GlobalReduceOperator, LocalCallbackSink, MapOperator, SampleOperator, LogisticRegressionOperator,DecisionTreeRegressionOperator, LinearSVCOperator} import org.apache.wayang.commons.util.profiledb.model.Experiment +import org.apache.wayang.core.api.spatial.{SpatialGeometry, SpatialPredicate} import org.apache.wayang.core.function.FunctionDescriptor.{SerializableBiFunction, SerializableBinaryOperator, SerializableFunction, SerializableIntUnaryOperator, SerializablePredicate} import org.apache.wayang.core.optimizer.ProbabilisticDoubleInterval import org.apache.wayang.core.optimizer.cardinality.CardinalityEstimator @@ -281,6 +282,57 @@ trait DataQuantaBuilder[+This <: DataQuantaBuilder[_, Out], Out] extends Logging thatKeyUdf: SerializableFunction[ThatOut, Key]) = new JoinDataQuantaBuilder(this, that, thisKeyUdf, thatKeyUdf) + /** + * Feed the built [[DataQuanta]] into a spatial filter operator. + * Requires the wayang-spatial plugin to be loaded. + * + * @param keyUdf function to extract geometry from elements + * @param predicate the spatial predicate type + * @param filterGeometry the geometry to filter against + * @return a [[DataQuantaBuilder]] representing the filtered output + */ + def spatialFilter( + keyUdf: SerializableFunction[Out, _ <: SpatialGeometry], + predicate: SpatialPredicate, + filterGeometry: SpatialGeometry + ): SpatialFilterDataQuantaBuilder[Out] = + new SpatialFilterDataQuantaBuilder(this, keyUdf, predicate, filterGeometry) + + /** + * Feed the built [[DataQuanta]] into a spatial filter operator with SQL pushdown support. + * + * @param keyUdf function to extract geometry from elements + * @param predicate the spatial predicate type + * @param filterGeometry the geometry to filter against + * @param sqlGeometryColumn the name of the geometry column in the database for SQL pushdown + * @return a [[SpatialFilterDataQuantaBuilder]] representing the filtered output + */ + def spatialFilter( + keyUdf: SerializableFunction[Out, _ <: SpatialGeometry], + predicate: SpatialPredicate, + filterGeometry: SpatialGeometry, + sqlGeometryColumn: String + ): SpatialFilterDataQuantaBuilder[Out] = + new SpatialFilterDataQuantaBuilder(this, keyUdf, predicate, filterGeometry) + .withSqlGeometryColumnName(sqlGeometryColumn) + + /** + * Feed the built [[DataQuanta]] of this and the given instance into a spatial join operator. + * + * @param thisKeyUdf function to extract geometry from this instance's elements + * @param that the other [[DataQuantaBuilder]] to join with + * @param thatKeyUdf function to extract geometry from `that` instance's elements + * @param predicate the spatial predicate type + * @return a [[SpatialJoinDataQuantaBuilder]] representing the joined output as Tuple2 + */ + def spatialJoin[ThatOut]( + thisKeyUdf: SerializableFunction[Out, _ <: SpatialGeometry], + that: DataQuantaBuilder[_, ThatOut], + thatKeyUdf: SerializableFunction[ThatOut, _ <: SpatialGeometry], + predicate: SpatialPredicate + ): SpatialJoinDataQuantaBuilder[Out, ThatOut] = + new SpatialJoinDataQuantaBuilder(this, that, thisKeyUdf, thatKeyUdf, predicate) + /** * Feed the built [[DataQuanta]] of this and the given instance into a * [[org.apache.wayang.basic.operators.DLTrainingOperator]]. @@ -510,12 +562,12 @@ trait DataQuantaBuilder[+This <: DataQuantaBuilder[_, Out], Out] extends Logging * @param catalog Iceberg Catalog * @param schema Iceberg Schema of the table to create * @param tableIdentifier Iceberg Table Identifier of the table to create - * @param outputFileFormat File format of the output data files + * @param outputFileFormat File format of the output data files * @return the collected data quanta */ - def writeIcebergTable(catalog: Catalog, - schema: Schema, + def writeIcebergTable(catalog: Catalog, + schema: Schema, tableIdentifier: TableIdentifier, outputFileFormat: FileFormat, jobName: String): Unit = { @@ -1929,3 +1981,41 @@ class KeyedDataQuantaBuilder[Out, Key](private val dataQuantaBuilder: DataQuanta dataQuantaBuilder.coGroup(this.keyExtractor, that.dataQuantaBuilder, that.keyExtractor) } + +class SpatialFilterDataQuantaBuilder[T](inputDataQuanta: DataQuantaBuilder[_, T], + keySelector: SerializableFunction[T, _ <: SpatialGeometry], + predicateType: SpatialPredicate, + filterGeometry: SpatialGeometry) + (implicit javaPlanBuilder: JavaPlanBuilder) + extends BasicDataQuantaBuilder[SpatialFilterDataQuantaBuilder[T], T] { + + private var columnName: String = _ + + def withSqlGeometryColumnName(columnName: String): SpatialFilterDataQuantaBuilder[T] = { + this.columnName = columnName + this + } + + override protected def build: DataQuanta[T] = { + val dq = inputDataQuanta.dataQuanta() + dq.spatialFilterJava(keySelector, predicateType, filterGeometry, this.columnName) + } +} + +class SpatialJoinDataQuantaBuilder[In0, In1](inputDataQuanta0: DataQuantaBuilder[_, In0], + inputDataQuanta1: DataQuantaBuilder[_, In1], + keyUdf0: SerializableFunction[In0, _ <: SpatialGeometry], + keyUdf1: SerializableFunction[In1, _ <: SpatialGeometry], + predicateType: SpatialPredicate) + (implicit javaPlanBuilder: JavaPlanBuilder) + extends BasicDataQuantaBuilder[SpatialJoinDataQuantaBuilder[In0, In1], RT2[In0, In1]] { + + override protected def build: DataQuanta[RT2[In0, In1]] = { + val dq0 = inputDataQuanta0.dataQuanta() + val dq1 = inputDataQuanta1.dataQuanta() + applyTargetPlatforms( + dq0.spatialJoinJava(keyUdf0, dq1, keyUdf1, predicateType)(inputDataQuanta1.classTag), + this.getTargetPlatforms() + ) + } +} diff --git a/wayang-api/wayang-api-scala-java/src/test/java/org/apache/wayang/api/JavaApiTest.java b/wayang-api/wayang-api-scala-java/src/test/java/org/apache/wayang/api/JavaApiTest.java index 9240f2d9c..4ade8bc26 100644 --- a/wayang-api/wayang-api-scala-java/src/test/java/org/apache/wayang/api/JavaApiTest.java +++ b/wayang-api/wayang-api-scala-java/src/test/java/org/apache/wayang/api/JavaApiTest.java @@ -113,6 +113,22 @@ void testMapReduceBy() { assertEquals(WayangCollections.asSet(4 + 16, 1 + 9), WayangCollections.asSet(outputCollection)); } + @Test + void testFilter() { + WayangContext wayangContext = new WayangContext().with(Java.basicPlugin()); + JavaPlanBuilder builder = new JavaPlanBuilder(wayangContext); + + final List inputValues = Arrays.asList(1, 2, 3, 4, 5, 6); + + final Collection outputValues = builder + .loadCollection(inputValues).withName("Load input values") + .filter(i -> (i & 1) == 0).withName("Filter even numbers") + .collect(); + + Set expectedValues = WayangCollections.asSet(2, 4, 6); + assertEquals(expectedValues, WayangCollections.asSet(outputValues)); + } + @Test void testBroadcast2() { WayangContext wayangContext = new WayangContext().with(Java.basicPlugin()); diff --git a/wayang-api/wayang-api-sql/src/main/java/org/apache/wayang/api/sql/sources/fs/CsvRowConverter.java b/wayang-api/wayang-api-sql/src/main/java/org/apache/wayang/api/sql/sources/fs/CsvRowConverter.java index 4d8682b1b..af490acb1 100755 --- a/wayang-api/wayang-api-sql/src/main/java/org/apache/wayang/api/sql/sources/fs/CsvRowConverter.java +++ b/wayang-api/wayang-api-sql/src/main/java/org/apache/wayang/api/sql/sources/fs/CsvRowConverter.java @@ -132,6 +132,7 @@ public static Object convert(RelDataType fieldType, String string) { } catch (ParseException e) { return null; } + case GEOMETRY: case VARCHAR: default: return string; diff --git a/wayang-benchmark/pom.xml b/wayang-benchmark/pom.xml index ffdb42af8..e37b99b38 100644 --- a/wayang-benchmark/pom.xml +++ b/wayang-benchmark/pom.xml @@ -54,6 +54,11 @@ wayang-postgres 1.1.2-SNAPSHOT + + org.apache.wayang + wayang-spatial + 1.1.2-SNAPSHOT + org.apache.wayang wayang-sqlite3 diff --git a/wayang-benchmark/src/main/java/org/apache/wayang/apps/spatial/SpatialFilter.java b/wayang-benchmark/src/main/java/org/apache/wayang/apps/spatial/SpatialFilter.java new file mode 100644 index 000000000..65f678d3a --- /dev/null +++ b/wayang-benchmark/src/main/java/org/apache/wayang/apps/spatial/SpatialFilter.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.apps.spatial; + +import org.apache.wayang.api.JavaPlanBuilder; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.spark.Spark; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.api.WayangContext; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.java.Java; +import org.apache.wayang.spatial.Spatial; + +import java.util.Arrays; +import java.util.Collection; + +public class SpatialFilter { + public static void main(String[] args) { + System.out.println("Running Spatial Filter Benchmark with args " + Arrays.toString(args)); + + String fileUrl = args[1]; + String platform = args[2]; + String selectivity = args[3]; + + WayangContext wayangContext = new WayangContext(new Configuration()) + .withPlugin(Java.basicPlugin()) + .withPlugin(Spark.basicPlugin()) + .withPlugin(Spatial.plugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("filter test") + .withUdfJarOf(SpatialFilter.class); + + SpatialGeometry queryGeometry = WayangGeometry.fromStringInput( + "POLYGON((0.0 0.0, " + selectivity + " 0.0, " + selectivity + " " + selectivity + ", 0.0 " + selectivity + ", 0.0 0.0))" + ); + + Collection outputcount = + planBuilder.readTextFile(fileUrl) + .spatialFilter( + (input -> WayangGeometry.fromStringInput((input.split("\",")[0]).replace("\"", ""))), + SpatialPredicate.INTERSECTS, + queryGeometry + ).withTargetPlatform( + switch (platform) { + case "java" -> Java.platform(); + case "spark" -> Spark.platform(); + default -> Java.platform(); + } + ) + .count() + .collect(); + + System.out.println("Spatial Filter Result Size: " + outputcount); + } +} \ No newline at end of file diff --git a/wayang-benchmark/src/main/java/org/apache/wayang/apps/spatial/SpatialFilterPostgis.java b/wayang-benchmark/src/main/java/org/apache/wayang/apps/spatial/SpatialFilterPostgis.java new file mode 100644 index 000000000..fa3568f69 --- /dev/null +++ b/wayang-benchmark/src/main/java/org/apache/wayang/apps/spatial/SpatialFilterPostgis.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.apps.spatial; + +import org.apache.wayang.api.JavaPlanBuilder; +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.api.WayangContext; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.java.Java; +import org.apache.wayang.postgres.Postgres; +import org.apache.wayang.postgres.operators.PostgresTableSource; +import org.apache.wayang.spatial.Spatial; +import org.apache.wayang.spatial.data.WayangGeometry; + +import java.util.Arrays; +import java.util.Collection; + +public class SpatialFilterPostgis { + public static void main(String[] args) { + System.out.println("Running Spatial Filter Benchmark with args " + Arrays.toString(args) + " on Postgres"); + + Configuration configuration = new Configuration(); + + String tableName = args[1]; + String node_name = args[2]; + String database_name = args[3]; + String selectivity = args[4]; + + configuration.setProperty("wayang.postgres.jdbc.url", "jdbc:postgresql://" + node_name + ":5432/" + database_name); // Default port 5432 + configuration.setProperty("wayang.postgres.jdbc.user", "wayang_user"); + configuration.setProperty("wayang.postgres.jdbc.password", "wayang"); + + WayangContext wayangContext = new WayangContext(configuration) + .withPlugin(Java.basicPlugin()) + .withPlugin(Postgres.plugin()) + .withPlugin(Spatial.plugin()); + + JavaPlanBuilder builder = new JavaPlanBuilder(wayangContext); + + SpatialGeometry queryGeometry = WayangGeometry.fromStringInput( + "POLYGON((0.0 0.0, " + selectivity + " 0.0, " + selectivity + " " + selectivity + ", 0.0 " + selectivity + ", 0.0 0.0))" + ); + + final Collection outputcount = builder + .readTable(new PostgresTableSource(tableName, "ST_AsText(geom)")) + .spatialFilter( + (input -> WayangGeometry.fromStringInput(input.getString(0))), + SpatialPredicate.INTERSECTS, + queryGeometry + ).withSqlGeometryColumnName("geom") + .withTargetPlatform(Postgres.platform()) + .count() + .collect(); + + System.out.println("Spatial Filter Postgres Result Size: " + outputcount); + } +} diff --git a/wayang-benchmark/src/main/java/org/apache/wayang/apps/spatial/SpatialJoin.java b/wayang-benchmark/src/main/java/org/apache/wayang/apps/spatial/SpatialJoin.java new file mode 100644 index 000000000..23a92ebdd --- /dev/null +++ b/wayang-benchmark/src/main/java/org/apache/wayang/apps/spatial/SpatialJoin.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.apps.spatial; + +import org.apache.wayang.api.DataQuantaBuilder; +import org.apache.wayang.api.JavaPlanBuilder; +import org.apache.wayang.api.UnarySourceDataQuantaBuilder; +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.api.WayangContext; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.java.Java; +import org.apache.wayang.spark.Spark; +import org.apache.wayang.spatial.Spatial; +import org.apache.wayang.spatial.data.WayangGeometry; + +import java.util.Arrays; +import java.util.Collection; + +public class SpatialJoin { + + public static void main(String[] args) { + System.out.println("Running Spatial Join Benchmark with args " + Arrays.toString(args)); + + WayangContext wayangContext = new WayangContext(new Configuration()) + .withPlugin(Java.basicPlugin()) + .withPlugin(Spark.basicPlugin()) + .withPlugin(Spatial.plugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext); + + String file1Url = args[1]; + String file2Url = args[2]; + String platform = args[3]; + DataQuantaBuilder, String> table1 = planBuilder.readTextFile(file1Url); + DataQuantaBuilder, String> table2 = planBuilder.readTextFile(file2Url); + + Collection outputcount = table1 + .spatialJoin( + WayangGeometry::fromStringInput, + table2, + WayangGeometry::fromStringInput, + SpatialPredicate.INTERSECTS + ).withTargetPlatform( + switch (platform) { + case "java" -> Java.platform(); + case "spark" -> Spark.platform(); + default -> Java.platform(); + }) + .count() + .collect(); + System.out.println("Spatial Join Result Size: " + outputcount); + } +} diff --git a/wayang-benchmark/src/main/java/org/apache/wayang/apps/spatial/SpatialJoinPostgis.java b/wayang-benchmark/src/main/java/org/apache/wayang/apps/spatial/SpatialJoinPostgis.java new file mode 100644 index 000000000..b892efc73 --- /dev/null +++ b/wayang-benchmark/src/main/java/org/apache/wayang/apps/spatial/SpatialJoinPostgis.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.apps.spatial; + +import org.apache.wayang.basic.operators.SpatialJoinOperator; +import org.apache.wayang.basic.operators.TableSource; +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.api.WayangContext; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.java.Java; +import org.apache.wayang.core.plan.wayangplan.WayangPlan; +import org.apache.wayang.postgres.Postgres; +import org.apache.wayang.postgres.operators.PostgresTableSource; +import org.apache.wayang.spark.Spark; +import org.apache.wayang.spatial.Spatial; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.apache.wayang.basic.data.Tuple2; +import org.apache.wayang.basic.operators.*; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.basic.data.Record; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; + +public class SpatialJoinPostgis { + public static void main(String[] args) { + System.out.println("Running Spatial Join Benchmark with args " + Arrays.toString(args) + " on Postgres"); + + Configuration configuration = new Configuration(); + + String tableName1 = args[1]; + String tableName2 = args[2]; + String node_name = args[3]; + String database_name = args[4]; + String platform = args[5]; + + configuration.setProperty("wayang.postgres.jdbc.url", "jdbc:postgresql://" + node_name + ":5432/" + database_name); + configuration.setProperty("wayang.postgres.jdbc.user", "wayang_user"); + configuration.setProperty("wayang.postgres.jdbc.password", "wayang"); + + WayangContext wayangContext = new WayangContext(configuration) + .withPlugin(Java.basicPlugin()) + .withPlugin(Postgres.plugin()) + .withPlugin(Spark.basicPlugin()) + .withPlugin(Spatial.plugin()); + + TableSource table1 = new PostgresTableSource(tableName1, "ST_AsText(geom)"); + TableSource table2 = new PostgresTableSource(tableName2, "ST_AsText(geom)"); + + SpatialJoinOperator spatialJoin = new SpatialJoinOperator<>( + (record -> WayangGeometry.fromStringInput(record.getString(0))), + (record -> WayangGeometry.fromStringInput(record.getString(0))), + Record.class, Record.class, + SpatialPredicate.INTERSECTS + ); + + spatialJoin.getKeyDescriptor0().withSqlImplementation(tableName1, "geom"); + spatialJoin.getKeyDescriptor1().withSqlImplementation(tableName2, "geom"); + spatialJoin.addTargetPlatform(switch (platform) { + case "java" -> Java.platform(); + case "spark" -> Spark.platform(); + default -> Postgres.platform(); + }); + + table1.connectTo(0, spatialJoin, 0); + table2.connectTo(0, spatialJoin, 1); + + CountOperator> count = new CountOperator<>( + DataSetType.createDefaultUnchecked(Tuple2.class) + ); + spatialJoin.connectTo(0, count, 0); + + Collection outputcount = new ArrayList<>(); + LocalCallbackSink sink = LocalCallbackSink.createCollectingSink( + outputcount, + DataSetType.createDefaultUnchecked(Long.class) + ); + + count.connectTo(0, sink, 0); + + wayangContext.execute("Benchmark spatial_join", new WayangPlan(sink)); + + System.out.println("Spatial Join Postgres Result Size: " + outputcount); + } +} diff --git a/wayang-commons/wayang-basic/pom.xml b/wayang-commons/wayang-basic/pom.xml index 70de0debc..e76ca2408 100644 --- a/wayang-commons/wayang-basic/pom.xml +++ b/wayang-commons/wayang-basic/pom.xml @@ -175,7 +175,7 @@ slf4j-simple 2.0.16 - + diff --git a/wayang-commons/wayang-basic/src/main/java/org/apache/wayang/basic/operators/GeoJsonFileSource.java b/wayang-commons/wayang-basic/src/main/java/org/apache/wayang/basic/operators/GeoJsonFileSource.java new file mode 100644 index 000000000..0f47d4b5e --- /dev/null +++ b/wayang-commons/wayang-basic/src/main/java/org/apache/wayang/basic/operators/GeoJsonFileSource.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.basic.operators; + +import org.apache.wayang.basic.data.Record; +import org.apache.wayang.core.plan.wayangplan.UnarySource; +import org.apache.wayang.core.types.DataSetType; + +/** + * Logical operator representing a GeoJSON file source producing {@link Record} elements. + */ +public class GeoJsonFileSource extends UnarySource { + + private final String inputUrl; + + public GeoJsonFileSource(String inputUrl) { + super(DataSetType.createDefault(Record.class)); + this.inputUrl = inputUrl; + } + + public GeoJsonFileSource(GeoJsonFileSource that) { + super(that); + this.inputUrl = that.getInputUrl(); + } + + public String getInputUrl() { + return inputUrl; + } +} diff --git a/wayang-commons/wayang-basic/src/main/java/org/apache/wayang/basic/operators/SpatialFilterOperator.java b/wayang-commons/wayang-basic/src/main/java/org/apache/wayang/basic/operators/SpatialFilterOperator.java new file mode 100644 index 000000000..ca54ff576 --- /dev/null +++ b/wayang-commons/wayang-basic/src/main/java/org/apache/wayang/basic/operators/SpatialFilterOperator.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.basic.operators; + +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.function.FunctionDescriptor; +import org.apache.wayang.core.function.TransformationDescriptor; +import org.apache.wayang.core.optimizer.OptimizationContext; +import org.apache.wayang.core.optimizer.cardinality.CardinalityEstimate; +import org.apache.wayang.core.plan.wayangplan.UnaryToUnaryOperator; +import org.apache.wayang.core.types.DataSetType; + + +/** + * This operator returns a new dataset after filtering by applying a spatial predicate. + */ +public class SpatialFilterOperator extends UnaryToUnaryOperator { + + protected final SpatialPredicate predicateType; + protected final TransformationDescriptor keyDescriptor; + protected final SpatialGeometry referenceGeometry; + + @SuppressWarnings("unchecked") + public SpatialFilterOperator(SpatialPredicate predicateType, + FunctionDescriptor.SerializableFunction keyExtractor, + DataSetType inputClassDatasetType, + SpatialGeometry geometry) { + super(inputClassDatasetType, inputClassDatasetType, true); + this.predicateType = predicateType; + this.keyDescriptor = new TransformationDescriptor<>( + (FunctionDescriptor.SerializableFunction) (FunctionDescriptor.SerializableFunction) keyExtractor, + inputClassDatasetType.getDataUnitType().getTypeClass(), SpatialGeometry.class); + this.referenceGeometry = geometry; + } + + /** + * Copies an instance (exclusive of broadcasts). + * + * @param that that should be copied + */ + public SpatialFilterOperator(SpatialFilterOperator that) { + super(that); + this.predicateType = that.predicateType; + this.keyDescriptor = that.keyDescriptor; + this.referenceGeometry = that.referenceGeometry; + } + + public SpatialPredicate getPredicateType() { + return this.predicateType; + } + + public SpatialGeometry getReferenceGeometry() { + return this.referenceGeometry; + } + + public TransformationDescriptor getKeyDescriptor() { + return this.keyDescriptor; + } + + /** + * Custom {@link org.apache.wayang.core.optimizer.cardinality.CardinalityEstimator} for {@link SpatialFilterOperator}s. + */ + private class CardinalityEstimator implements org.apache.wayang.core.optimizer.cardinality.CardinalityEstimator { + + @Override + public CardinalityEstimate estimate(OptimizationContext optimizationContext, CardinalityEstimate... inputEstimates) { + return new CardinalityEstimate(10, 800, 0.9); + } + } +} diff --git a/wayang-commons/wayang-basic/src/main/java/org/apache/wayang/basic/operators/SpatialJoinOperator.java b/wayang-commons/wayang-basic/src/main/java/org/apache/wayang/basic/operators/SpatialJoinOperator.java new file mode 100644 index 000000000..4d0e2f4b5 --- /dev/null +++ b/wayang-commons/wayang-basic/src/main/java/org/apache/wayang/basic/operators/SpatialJoinOperator.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.basic.operators; + +import org.apache.wayang.basic.data.Tuple2; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.function.FunctionDescriptor; +import org.apache.wayang.core.function.TransformationDescriptor; +import org.apache.wayang.core.optimizer.OptimizationContext; +import org.apache.wayang.core.optimizer.cardinality.CardinalityEstimate; +import org.apache.wayang.core.plan.wayangplan.BinaryToUnaryOperator; +import org.apache.wayang.core.types.DataSetType; + +/** + * This operator returns a new dataset after joining the input tables using the predicate. + */ +public class SpatialJoinOperator extends BinaryToUnaryOperator> { + + private static DataSetType> createOutputDataSetType() { + return DataSetType.createDefaultUnchecked(Tuple2.class); + } + + protected final TransformationDescriptor keyDescriptor0; + + protected final TransformationDescriptor keyDescriptor1; + + protected final SpatialPredicate predicateType; + + public SpatialJoinOperator(TransformationDescriptor keyDescriptor0, + TransformationDescriptor keyDescriptor1, + SpatialPredicate predicateType) { + super(DataSetType.createDefault(keyDescriptor0.getInputType()), + DataSetType.createDefault(keyDescriptor1.getInputType()), + SpatialJoinOperator.createOutputDataSetType(), + true); + this.keyDescriptor0 = keyDescriptor0; + this.keyDescriptor1 = keyDescriptor1; + this.predicateType = predicateType; + } + + public SpatialJoinOperator(TransformationDescriptor keyDescriptor0, + TransformationDescriptor keyDescriptor1, + DataSetType inputType0, + DataSetType inputType1, + SpatialPredicate predicateType) { + super(inputType0, inputType1, SpatialJoinOperator.createOutputDataSetType(), true); + this.keyDescriptor0 = keyDescriptor0; + this.keyDescriptor1 = keyDescriptor1; + this.predicateType = predicateType; + } + + public SpatialJoinOperator(SpatialJoinOperator that) { + super(that); + this.keyDescriptor0 = that.keyDescriptor0; + this.keyDescriptor1 = that.keyDescriptor1; + this.predicateType = that.predicateType; + } + + @SuppressWarnings("unchecked") + public SpatialJoinOperator( + FunctionDescriptor.SerializableFunction keyExtractor0, + FunctionDescriptor.SerializableFunction keyExtractor1, + Class input0Class, + Class input1Class, + SpatialPredicate predicateType) { + this( + new TransformationDescriptor<>( + (FunctionDescriptor.SerializableFunction) keyExtractor0, + input0Class, SpatialGeometry.class), + new TransformationDescriptor<>( + (FunctionDescriptor.SerializableFunction) keyExtractor1, + input1Class, SpatialGeometry.class), + predicateType + ); + } + + public TransformationDescriptor getKeyDescriptor0() { + return this.keyDescriptor0; + } + + public TransformationDescriptor getKeyDescriptor1() { + return this.keyDescriptor1; + } + + public SpatialPredicate getPredicateType() { + return this.predicateType; + } + + private class CardinalityEstimator implements org.apache.wayang.core.optimizer.cardinality.CardinalityEstimator { + + @Override + public CardinalityEstimate estimate(OptimizationContext optimizationContext, CardinalityEstimate... inputEstimates) { + return new CardinalityEstimate(10, 800, 0.9); + } + } +} diff --git a/wayang-commons/wayang-core/src/main/java/org/apache/wayang/core/api/spatial/SpatialGeometry.java b/wayang-commons/wayang-core/src/main/java/org/apache/wayang/core/api/spatial/SpatialGeometry.java new file mode 100644 index 000000000..f7141d59f --- /dev/null +++ b/wayang-commons/wayang-core/src/main/java/org/apache/wayang/core/api/spatial/SpatialGeometry.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.core.api.spatial; + +import java.io.Serializable; + +/** + * Abstract geometry interface for spatial operations. + * Implementations (e.g., WayangGeometry) provide JTS-backed functionality. + */ +public interface SpatialGeometry extends Serializable { + + /** + * Returns Well-Known Text (WKT) representation of this geometry. + * + * @return WKT string + */ + String toWKT(); + + /** + * Returns Well-Known Binary (WKB) representation of this geometry as hex string. + * + * @return WKB hex string + */ + String toWKB(); +} diff --git a/wayang-commons/wayang-core/src/main/java/org/apache/wayang/core/api/spatial/SpatialPredicate.java b/wayang-commons/wayang-core/src/main/java/org/apache/wayang/core/api/spatial/SpatialPredicate.java new file mode 100644 index 000000000..eabaa3def --- /dev/null +++ b/wayang-commons/wayang-core/src/main/java/org/apache/wayang/core/api/spatial/SpatialPredicate.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.core.api.spatial; + +/** + * Spatial relationship predicates for filtering and joining. + */ +public enum SpatialPredicate { + INTERSECTS, + CONTAINS, + WITHIN, + OVERLAPS, + TOUCHES, + CROSSES, + EQUALS +} diff --git a/wayang-platforms/wayang-jdbc-template/src/main/java/org/apache/wayang/jdbc/execution/JdbcExecutor.java b/wayang-platforms/wayang-jdbc-template/src/main/java/org/apache/wayang/jdbc/execution/JdbcExecutor.java index f7a9d7c5a..9f3e7bbe9 100644 --- a/wayang-platforms/wayang-jdbc-template/src/main/java/org/apache/wayang/jdbc/execution/JdbcExecutor.java +++ b/wayang-platforms/wayang-jdbc-template/src/main/java/org/apache/wayang/jdbc/execution/JdbcExecutor.java @@ -20,6 +20,10 @@ import org.apache.wayang.basic.channels.FileChannel; import org.apache.wayang.basic.data.Tuple2; +import org.apache.wayang.basic.operators.SpatialFilterOperator; +import org.apache.wayang.basic.operators.SpatialJoinOperator; +import org.apache.wayang.basic.operators.FilterOperator; +import org.apache.wayang.basic.operators.JoinOperator; import org.apache.wayang.basic.operators.TableSource; import org.apache.wayang.core.api.Job; import org.apache.wayang.core.api.exception.WayangException; @@ -170,21 +174,22 @@ protected static Tuple2 createSqlQuery(final E // Extract the different types of ExecutionOperators from the stage. final JdbcTableSource tableOp = (JdbcTableSource) startTask.getOperator(); SqlQueryChannel.Instance tipChannelInstance = JdbcExecutor.instantiateOutboundChannel(startTask, context, jdbcExecutor); - final Collection filterTasks = new ArrayList<>(4); + final Collection filterTasks = new ArrayList<>(4); JdbcProjectionOperator projectionTask = null; - final Collection> joinTasks = new ArrayList<>(); + final Collection joinTasks = new ArrayList<>(); final Set allTasks = stage.getAllTasks(); assert allTasks.size() <= 3; ExecutionTask nextTask = JdbcExecutor.findJdbcExecutionOperatorTaskInStage(startTask, stage); while (nextTask != null) { // Evaluate the nextTask. - if (nextTask.getOperator() instanceof final JdbcFilterOperator filterOperator) { - filterTasks.add(filterOperator); - } else if (nextTask.getOperator() instanceof JdbcProjectionOperator projectionOperator) { + final var operator = nextTask.getOperator(); + if (operator instanceof FilterOperator || operator instanceof SpatialFilterOperator) { + filterTasks.add((JdbcExecutionOperator) operator); + } else if (operator instanceof JdbcProjectionOperator) { assert projectionTask == null; // Allow one projection operator per stage for now. - projectionTask = projectionOperator; - } else if (nextTask.getOperator() instanceof JdbcJoinOperator joinOperator) { - joinTasks.add(joinOperator); + projectionTask = (JdbcProjectionOperator) operator; + } else if (operator instanceof JoinOperator || (operator instanceof SpatialJoinOperator)) { + joinTasks.add((JdbcExecutionOperator) operator); } else { throw new WayangException(String.format("Unsupported JDBC execution task %s", nextTask.toString())); } @@ -202,8 +207,9 @@ protected static Tuple2 createSqlQuery(final E } public static StringBuilder createSqlString(final JdbcExecutor jdbcExecutor, final JdbcTableSource tableOp, - final Collection filterTasks, JdbcProjectionOperator projectionTask, - final Collection> joinTasks) { + final Collection filterTasks, + JdbcProjectionOperator projectionTask, + final Collection joinTasks) { final String tableName = tableOp.createSqlClause(jdbcExecutor.connection, jdbcExecutor.functionCompiler); final Collection conditions = filterTasks.stream() .map(op -> op.createSqlClause(jdbcExecutor.connection, jdbcExecutor.functionCompiler)) diff --git a/wayang-platforms/wayang-spark/src/main/java/org/apache/wayang/spark/platform/SparkPlatform.java b/wayang-platforms/wayang-spark/src/main/java/org/apache/wayang/spark/platform/SparkPlatform.java index 77fbcc1b5..de3fb612d 100644 --- a/wayang-platforms/wayang-spark/src/main/java/org/apache/wayang/spark/platform/SparkPlatform.java +++ b/wayang-platforms/wayang-spark/src/main/java/org/apache/wayang/spark/platform/SparkPlatform.java @@ -161,6 +161,7 @@ public SparkContextReference getSparkContext(Job job) { this.registerJarIfNotNull(ReflectionUtils.getDeclaringJar(SparkPlatform.class)); // wayang-spark this.registerJarIfNotNull(ReflectionUtils.getDeclaringJar(WayangBasic.class)); // wayang-basic this.registerJarIfNotNull(ReflectionUtils.getDeclaringJar(WayangContext.class)); // wayang-core + final Set udfJarPaths = job.getUdfJarPaths(); if (udfJarPaths.isEmpty()) { this.logger.warn("Non-local SparkContext but not UDF JARs have been declared."); diff --git a/wayang-plugins/pom.xml b/wayang-plugins/pom.xml index 6c6e597b3..1d11b2da0 100644 --- a/wayang-plugins/pom.xml +++ b/wayang-plugins/pom.xml @@ -38,6 +38,7 @@ wayang-iejoin + wayang-spatial diff --git a/wayang-plugins/wayang-spatial/pom.xml b/wayang-plugins/wayang-spatial/pom.xml new file mode 100644 index 000000000..3f959dcdc --- /dev/null +++ b/wayang-plugins/wayang-spatial/pom.xml @@ -0,0 +1,180 @@ + + + + + 4.0.0 + + + wayang-plugins + org.apache.wayang + 1.1.2-SNAPSHOT + + + wayang-spatial + 1.1.2-SNAPSHOT + + + org.apache.wayang.extensions.spatial + 1.19.0 + + + + + + org.apache.wayang + wayang-core + 1.1.2-SNAPSHOT + + + org.apache.wayang + wayang-basic + 1.1.2-SNAPSHOT + + + org.apache.wayang + wayang-java + 1.1.2-SNAPSHOT + + + org.apache.wayang + wayang-spark + 1.1.2-SNAPSHOT + + + org.apache.wayang + wayang-jdbc-template + 1.1.2-SNAPSHOT + + + org.apache.wayang + wayang-postgres + 1.1.2-SNAPSHOT + + + + + org.apache.wayang + wayang-api-scala-java + 1.1.2-SNAPSHOT + + + + + org.scala-lang + scala-library + ${scala.version} + + + + + org.locationtech.jts + jts-core + 1.19.0 + test + + + org.locationtech.jts.io + jts-io-common + ${jts.version} + + + + + org.apache.sedona + sedona-spark-shaded-3.4_2.12 + ${sedona.version} + + + + + org.apache.spark + spark-core_2.12 + ${spark.version} + + + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + + + org.junit.jupiter + junit-jupiter + test + + + org.mockito + mockito-core + ${mockito.version} + test + + + org.hsqldb + hsqldb + 2.7.1 + test + + + + + + + + + net.alchim31.maven + scala-maven-plugin + 4.9.5 + + + compile-scala + process-resources + + add-source + compile + + + ${scala.version} + ${project.build.sourceDirectory}/../scala + + + + test-compile-scala + process-test-resources + + testCompile + + + ${scala.version} + ${project.build.testSourceDirectory}/../scala + + + + + + + diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/Spatial.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/Spatial.java new file mode 100644 index 000000000..6fc97554e --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/Spatial.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial; + +import org.apache.wayang.basic.operators.GeoJsonFileSource; +import org.apache.wayang.basic.operators.SpatialFilterOperator; +import org.apache.wayang.basic.operators.SpatialJoinOperator; +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.mapping.Mapping; +import org.apache.wayang.core.optimizer.channels.ChannelConversion; +import org.apache.wayang.core.platform.Platform; +import org.apache.wayang.core.plugin.Plugin; +import org.apache.wayang.spatial.mapping.Mappings; +import org.apache.wayang.java.Java; +import org.apache.wayang.java.platform.JavaPlatform; +import org.apache.wayang.spark.Spark; +import org.apache.wayang.spark.platform.SparkPlatform; +import org.apache.wayang.postgres.Postgres; +import org.apache.wayang.postgres.platform.PostgresPlatform; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; + +/** + * Provides {@link Plugin}s that enable usage of the {@link SpatialFilterOperator}, {@link SpatialJoinOperator}, + * and {@link GeoJsonFileSource}. + */ +public class Spatial { + + /** + * Enables use with the {@link JavaPlatform}, {@link SparkPlatform}, and {@link PostgresPlatform}. + */ + private static final Plugin PLUGIN = new Plugin() { + + @Override + public Collection getRequiredPlatforms() { + return Arrays.asList(Java.platform(), Spark.platform(), Postgres.platform()); + } + + @Override + public Collection getMappings() { + Collection mappings = new ArrayList<>(); + mappings.addAll(Mappings.javaMappings); + mappings.addAll(Mappings.sparkMappings); + mappings.addAll(Mappings.postgresMappings); + return mappings; + } + + @Override + public Collection getChannelConversions() { + return Collections.emptyList(); + } + + @Override + public void setProperties(Configuration configuration) { + } + }; + + /** + * Retrieve a {@link Plugin} to use spatial operators on the + * {@link JavaPlatform}, {@link SparkPlatform}, and {@link PostgresPlatform}. + * + * @return the {@link Plugin} + */ + public static Plugin plugin() { + return PLUGIN; + } + + /** + * Enables use with the {@link JavaPlatform}. + */ + private static final Plugin JAVA_PLUGIN = new Plugin() { + + @Override + public Collection getRequiredPlatforms() { + return Collections.singleton(Java.platform()); + } + + @Override + public Collection getMappings() { + return Mappings.javaMappings; + } + + @Override + public Collection getChannelConversions() { + return Collections.emptyList(); + } + + @Override + public void setProperties(Configuration configuration) { + } + }; + + /** + * Retrieve a {@link Plugin} to use spatial operators on the {@link JavaPlatform}. + * + * @return the {@link Plugin} + */ + public static Plugin javaPlugin() { + return JAVA_PLUGIN; + } + + /** + * Enables use with the {@link SparkPlatform}. + */ + public static final Plugin SPARK_PLUGIN = new Plugin() { + + @Override + public Collection getRequiredPlatforms() { + return Collections.singleton(Spark.platform()); + } + + @Override + public Collection getMappings() { + return Mappings.sparkMappings; + } + + @Override + public Collection getChannelConversions() { + return Collections.emptyList(); + } + + @Override + public void setProperties(Configuration configuration) { + } + }; + + /** + * Retrieve a {@link Plugin} to use spatial operators on the {@link SparkPlatform}. + * + * @return the {@link Plugin} + */ + public static Plugin sparkPlugin() { + return SPARK_PLUGIN; + } + + /** + * Enables use with the {@link PostgresPlatform}. + */ + public static final Plugin POSTGRES_PLUGIN = new Plugin() { + + @Override + public Collection getRequiredPlatforms() { + return Collections.singleton(Postgres.platform()); + } + + @Override + public Collection getMappings() { + return Mappings.postgresMappings; + } + + @Override + public Collection getChannelConversions() { + return Collections.emptyList(); + } + + @Override + public void setProperties(Configuration configuration) { + } + }; + + /** + * Retrieve a {@link Plugin} to use spatial operators on the {@link PostgresPlatform}. + * + * @return the {@link Plugin} + */ + public static Plugin postgresPlugin() { + return POSTGRES_PLUGIN; + } + +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/data/WayangGeometry.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/data/WayangGeometry.java new file mode 100644 index 000000000..e08682462 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/data/WayangGeometry.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.data; + +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.geom.GeometryFactory; +import org.locationtech.jts.io.*; +import org.locationtech.jts.io.geojson.GeoJsonReader; +import org.locationtech.jts.io.geojson.GeoJsonWriter; + +import java.util.HashMap; + +public class WayangGeometry implements SpatialGeometry { + + private final HashMap data; + + public WayangGeometry() { + this.data = new HashMap<>(); + } + + /** + * Backwards-compatible constructor, treats input as WKT. + */ + public WayangGeometry(String wkt) { + this(); + this.data.put("wkt", wkt); + } + /** + * Create WayangGeometry from string input. + * Detects WKT, WKB-hex, or GeoJSON and stores only that + * representation initially. Other conversions are done lazily. + * + * @param input geometry string (WKT / WKB-hex / GeoJSON) + * @return WayangGeometry instance + */ + public static WayangGeometry fromStringInput(String input) { + String trimmed = input.trim(); + WayangGeometry wg = new WayangGeometry(); + + if (wg.looksLikeWKT(trimmed)) { + wg.data.put("wkt", trimmed); + } else if (wg.looksLikeGeoJSON(trimmed)) { + wg.data.put("geojson", trimmed); + } else { + // Assume WKB hex string + wg.data.put("wkb", trimmed); + } + + return wg; + } + + /** + * Create WayangGeometry from an existing JTS Geometry object. + * The geometry is stored, and all other formats (WKT/WKB/GeoJSON) + * are generated lazily when their getters are called. + * + * @param geometry JTS Geometry instance + * @return WayangGeometry wrapper + */ + public static WayangGeometry fromGeometry(Geometry geometry) { + if (geometry == null) { + throw new IllegalArgumentException("Geometry must not be null."); + } + WayangGeometry wg = new WayangGeometry(); + wg.data.put("geometry", geometry); + return wg; + } + + public static WayangGeometry fromGeoJson(String geoJson) { + WayangGeometry wg = new WayangGeometry(); + wg.data.put("geojson", geoJson); + // could directly create the respective geometry with jts + return wg; + } + + /** + * Get the geometry as WKT. If WKT is not yet available, it is + * generated from another stored representation and cached. + * + * @return WKT string + */ + @Override + public String toWKT() { + return getWKT(); + } + + /** + * Get the geometry as WKB hex string. If WKB is not yet available, + * it is generated from another stored representation and cached. + * + * @return WKB hex string + */ + @Override + public String toWKB() { + return getWKB(); + } + + /** + * Get the geometry as WKT. If WKT is not yet available, it is + * generated from another stored representation and cached. + * + * @return WKT string + */ + public String getWKT() { + Object wktObj = this.data.get("wkt"); + if (wktObj != null) { + return wktObj.toString(); + } + + Geometry geometry = getGeometry(); + WKTWriter writer = new WKTWriter(); + String wkt = writer.write(geometry); + this.data.put("wkt", wkt); + return wkt; + } + + /** + * Get the geometry as WKB hex string. If WKB is not yet available, + * it is generated from another stored representation and cached. + * + * @return WKB hex string + */ + public String getWKB() { + Object wkbObj = this.data.get("wkb"); + if (wkbObj != null) { + return wkbObj.toString(); + } + + Geometry geometry = getGeometry(); + WKBWriter writer = new WKBWriter(); + byte[] wkbBytes = writer.write(geometry); + String wkbHex = WKBWriter.toHex(wkbBytes); + this.data.put("wkb", wkbHex); + return wkbHex; + } + + /** + * Get the geometry as GeoJSON string. If GeoJSON is not yet + * available, it is generated from another stored representation + * and cached. + * + * @return GeoJSON string + */ + public String getGeoJSON() { + Object geoJsonObj = this.data.get("geojson"); + if (geoJsonObj != null) { + return geoJsonObj.toString(); + } + + Geometry geometry = getGeometry(); + GeoJsonWriter writer = new GeoJsonWriter(); + String geoJson = writer.write(geometry); + this.data.put("geojson", geoJson); + return geoJson; + } + + /** + * Convert one of the stored geometry representations (WKT, WKB-hex, + * or GeoJSON) into a JTS Geometry object. + * + * The first available representation is used in this order: + * WKT -> WKB-hex -> GeoJSON + * + * The resulting Geometry is cached in the data map under "geometry". + * + * @return JTS Geometry instance + */ + public Geometry getGeometry() { + Object geomObj = this.data.get("geometry"); + if (geomObj instanceof Geometry) { + return (Geometry) geomObj; + } + + GeometryFactory gf = new GeometryFactory(); + Geometry geometry; + + try { + if (this.data.containsKey("wkt")) { + String wkt = cleanSRID(this.data.get("wkt").toString().trim()); + WKTReader reader = new WKTReader(gf); + geometry = reader.read(wkt); + + } else if (this.data.containsKey("wkb")) { + String wkbHex = this.data.get("wkb").toString().trim(); + byte[] wkbBytes = WKBReader.hexToBytes(wkbHex); + WKBReader reader = new WKBReader(gf); + geometry = reader.read(wkbBytes); + + } else if (this.data.containsKey("geojson")) { + String geoJson = this.data.get("geojson").toString().trim(); + GeoJsonReader reader = new GeoJsonReader(gf); + geometry = reader.read(geoJson); + + } else { + throw new IllegalStateException("No geometry representation available in WayangGeometry."); + } + } catch (ParseException e) { + throw new RuntimeException("Failed to parse geometry from stored representations.", e); + } + + this.data.put("geometry", geometry); + return geometry; + } + + // ---------- Helpers ---------- // + + private boolean looksLikeWKT(String s) { + return s.startsWith("SRID=") || + s.startsWith("POINT") || + s.startsWith("LINESTRING") || + s.startsWith("POLYGON") || + s.startsWith("MULTI") || + s.startsWith("GEOMETRYCOLLECTION"); + } + + private boolean looksLikeGeoJSON(String s) { + return s.startsWith("{") && s.contains("\"type\""); + } + + private String cleanSRID(String wkt) { + if (wkt.startsWith("SRID=")) { + int idx = wkt.indexOf(';'); + if (idx > 0 && idx < wkt.length() - 1) { + return wkt.substring(idx + 1); + } + } + return wkt; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof WayangGeometry)) return false; + + WayangGeometry that = (WayangGeometry) o; + + Geometry g1 = this.getGeometry(); + Geometry g2 = that.getGeometry(); + + if (g1 == null || g2 == null) { + return g1 == g2; + } + + // Delegate to JTS Geometry equality (structural / topological, depending on JTS version). + return g1.equals(g2); + } + + @Override + public int hashCode() { + Geometry geometry = this.getGeometry(); + return geometry != null ? geometry.hashCode() : 0; + } + +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/function/JtsSpatialPredicate.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/function/JtsSpatialPredicate.java new file mode 100644 index 000000000..3d95f450e --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/function/JtsSpatialPredicate.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.function; + +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.locationtech.jts.geom.Geometry; + +import java.util.Arrays; +import java.util.function.BiPredicate; + +public enum JtsSpatialPredicate { + + INTERSECTS("INTERSECTS", "ST_Intersects", Geometry::intersects), + CONTAINS("CONTAINS", "ST_Contains", Geometry::contains), + WITHIN("WITHIN", "ST_Within", Geometry::within), + TOUCHES("TOUCHES", "ST_Touches", Geometry::touches), + OVERLAPS("OVERLAPS", "ST_Overlaps", Geometry::overlaps), + CROSSES("CROSSES", "ST_Crosses", Geometry::crosses), + EQUALS("EQUALS", "ST_Equals", Geometry::equalsTopo); + + private final String opName; + private final String sqlFunctionName; + private final BiPredicate predicate; + + JtsSpatialPredicate(String opName, + String sqlFunctionName, + BiPredicate predicate) { + this.opName = opName; + this.sqlFunctionName = sqlFunctionName; + this.predicate = predicate; + } + + public static JtsSpatialPredicate fromString(String opName) { + return Arrays.stream(values()) + .filter(r -> r.opName.equalsIgnoreCase(opName)) + .findFirst() + .orElseThrow(() -> new IllegalArgumentException( + "Unsupported spatial filter type: " + opName)); + } + + /** + * Convert from the core module's {@link SpatialPredicate} to this enum. + * + * @param predicate the spatial predicate + * @return the corresponding JtsSpatialPredicate + */ + public static JtsSpatialPredicate of(SpatialPredicate predicate) { + return switch (predicate) { + case INTERSECTS -> INTERSECTS; + case CONTAINS -> CONTAINS; + case WITHIN -> WITHIN; + case OVERLAPS -> OVERLAPS; + case TOUCHES -> TOUCHES; + case CROSSES -> CROSSES; + case EQUALS -> EQUALS; + }; + } + + public boolean test(Geometry candidate, Geometry reference) { + return predicate.test(candidate, reference); + } + + public String toSql(String columnExpr, String geomLiteral) { + return String.format("%s(%s, %s)", this.sqlFunctionName, columnExpr, geomLiteral); + } + + public String toSql(String leftTable, String leftKey, String rightTable, String rightKey) { + return String.format("%s(%s.%s, %s.%s)", this.sqlFunctionName, leftTable, leftKey, rightTable, rightKey); + } +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/Mappings.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/Mappings.java new file mode 100644 index 000000000..7b70039cb --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/Mappings.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.mapping; + +import org.apache.wayang.basic.operators.GeoJsonFileSource; +import org.apache.wayang.basic.operators.SpatialFilterOperator; +import org.apache.wayang.basic.operators.SpatialJoinOperator; +import org.apache.wayang.core.mapping.Mapping; +import org.apache.wayang.java.platform.JavaPlatform; +import org.apache.wayang.spark.platform.SparkPlatform; +import org.apache.wayang.postgres.platform.PostgresPlatform; + +import java.util.Arrays; +import java.util.Collection; + +/** + * {@link Mapping}s for the {@link SpatialFilterOperator}, {@link SpatialJoinOperator}, and {@link GeoJsonFileSource}. + */ +public class Mappings { + + /** + * {@link Mapping}s towards the {@link JavaPlatform}. + */ + public static Collection javaMappings = Arrays.asList( + new org.apache.wayang.spatial.mapping.java.SpatialFilterMapping(), + new org.apache.wayang.spatial.mapping.java.SpatialJoinMapping(), + new org.apache.wayang.spatial.mapping.java.GeoJsonFileSourceMapping() + ); + + /** + * {@link Mapping}s towards the {@link SparkPlatform}. + */ + public static Collection sparkMappings = Arrays.asList( + new org.apache.wayang.spatial.mapping.spark.SpatialFilterMapping(), + new org.apache.wayang.spatial.mapping.spark.SpatialJoinMapping() + ); + + /** + * {@link Mapping}s towards the {@link PostgresPlatform}. + */ + public static Collection postgresMappings = Arrays.asList( + new org.apache.wayang.spatial.mapping.postgres.SpatialFilterMapping(), + new org.apache.wayang.spatial.mapping.postgres.SpatialJoinMapping() + ); + +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/java/GeoJsonFileSourceMapping.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/java/GeoJsonFileSourceMapping.java new file mode 100644 index 000000000..4f101a314 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/java/GeoJsonFileSourceMapping.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.mapping.java; + +import org.apache.wayang.basic.operators.GeoJsonFileSource; +import org.apache.wayang.core.mapping.Mapping; +import org.apache.wayang.core.mapping.OperatorPattern; +import org.apache.wayang.core.mapping.SubplanPattern; +import org.apache.wayang.core.mapping.PlanTransformation; +import org.apache.wayang.core.mapping.ReplacementSubplanFactory; +import org.apache.wayang.spatial.operators.java.JavaGeoJsonFileSource; +import org.apache.wayang.java.platform.JavaPlatform; + +import java.util.Collection; +import java.util.Collections; + +/** + * Mapping from {@link GeoJsonFileSource} to {@link JavaGeoJsonFileSource}. + */ +public class GeoJsonFileSourceMapping implements Mapping { + @Override + public Collection getTransformations() { + return Collections.singleton(new PlanTransformation( + this.createSubplanPattern(), + this.createReplacementSubplanFactory(), + JavaPlatform.getInstance() + )); + } + + private SubplanPattern createSubplanPattern() { + final OperatorPattern operatorPattern = new OperatorPattern( + "source", new GeoJsonFileSource((String) null), false + ); + return SubplanPattern.createSingleton(operatorPattern); + } + + private ReplacementSubplanFactory createReplacementSubplanFactory() { + return new ReplacementSubplanFactory.OfSingleOperators( + (matchedOperator, epoch) -> new JavaGeoJsonFileSource(matchedOperator).at(epoch) + ); + } +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/java/SpatialFilterMapping.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/java/SpatialFilterMapping.java new file mode 100644 index 000000000..8644fcb3b --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/java/SpatialFilterMapping.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.mapping.java; + +import org.apache.wayang.basic.operators.SpatialFilterOperator; +import org.apache.wayang.core.mapping.*; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.spatial.operators.java.JavaSpatialFilterOperator; +import org.apache.wayang.java.platform.JavaPlatform; + +import java.util.Collection; +import java.util.Collections; + +/** + * Mapping from {@link SpatialFilterOperator} to {@link JavaSpatialFilterOperator}. + */ +@SuppressWarnings("unchecked") +public class SpatialFilterMapping implements Mapping { + + @Override + public Collection getTransformations() { + return Collections.singleton( + new PlanTransformation( + this.createSubplanPattern(), + this.createReplacementSubplanFactory(), + JavaPlatform.getInstance() + ) + ); + } + + private SubplanPattern createSubplanPattern() { + final OperatorPattern operatorPattern = new OperatorPattern( + "spatialFilter", new SpatialFilterOperator(null, null, DataSetType.none(), null), false); + return SubplanPattern.createSingleton(operatorPattern); + } + + private ReplacementSubplanFactory createReplacementSubplanFactory() { + return new ReplacementSubplanFactory.OfSingleOperators( + (matchedOperator, epoch) -> new JavaSpatialFilterOperator(matchedOperator).at(epoch) + ); + } +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/java/SpatialJoinMapping.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/java/SpatialJoinMapping.java new file mode 100644 index 000000000..e68196eaa --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/java/SpatialJoinMapping.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.mapping.java; + +import org.apache.wayang.basic.operators.SpatialJoinOperator; +import org.apache.wayang.core.mapping.*; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.spatial.operators.java.JavaSpatialJoinOperator; +import org.apache.wayang.java.platform.JavaPlatform; + +import java.util.Collection; +import java.util.Collections; + +/** + * Mapping from {@link SpatialJoinOperator} to {@link JavaSpatialJoinOperator}. + */ +public class SpatialJoinMapping implements Mapping { + + @Override + public Collection getTransformations() { + return Collections.singleton(new PlanTransformation( + this.createSubplanPattern(), + this.createReplacementSubplanFactory(), + JavaPlatform.getInstance() + )); + } + + private SubplanPattern createSubplanPattern() { + final OperatorPattern operatorPattern = new OperatorPattern<>( + "spatialJoin", new SpatialJoinOperator<>(null, null, DataSetType.none(), DataSetType.none(), null), false + ); + return SubplanPattern.createSingleton(operatorPattern); + } + + private ReplacementSubplanFactory createReplacementSubplanFactory() { + return new ReplacementSubplanFactory.OfSingleOperators>( + (matchedOperator, epoch) -> new JavaSpatialJoinOperator<>(matchedOperator).at(epoch) + ); + } +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/postgres/SpatialFilterMapping.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/postgres/SpatialFilterMapping.java new file mode 100644 index 000000000..9841175b2 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/postgres/SpatialFilterMapping.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.mapping.postgres; + +import org.apache.wayang.basic.operators.SpatialFilterOperator; +import org.apache.wayang.core.mapping.*; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.spatial.operators.postgres.PostgresSpatialFilterOperator; +import org.apache.wayang.postgres.platform.PostgresPlatform; + +import java.util.Collection; +import java.util.Collections; + + +/** + * Mapping from {@link SpatialFilterOperator} to {@link PostgresSpatialFilterOperator}. + */ +@SuppressWarnings("unchecked") +public class SpatialFilterMapping implements Mapping { + + @Override + public Collection getTransformations() { + return Collections.singleton(new PlanTransformation( + this.createSubplanPattern(), + this.createReplacementSubplanFactory(), + PostgresPlatform.getInstance() + )); + } + + private SubplanPattern createSubplanPattern() { + final OperatorPattern operatorPattern = new OperatorPattern<>( + "spatialFilter", new SpatialFilterOperator(null, null, DataSetType.none(), null), false + ).withAdditionalTest(op -> op.getKeyDescriptor().getSqlImplementation() != null); + return SubplanPattern.createSingleton(operatorPattern); + } + + private ReplacementSubplanFactory createReplacementSubplanFactory() { + return new ReplacementSubplanFactory.OfSingleOperators( + (matchedOperator, epoch) -> new PostgresSpatialFilterOperator(matchedOperator).at(epoch) + ); + } +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/postgres/SpatialJoinMapping.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/postgres/SpatialJoinMapping.java new file mode 100644 index 000000000..510c77b96 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/postgres/SpatialJoinMapping.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.mapping.postgres; + +import org.apache.wayang.basic.operators.SpatialJoinOperator; +import org.apache.wayang.core.mapping.*; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.spatial.operators.postgres.PostgresSpatialJoinOperator; +import org.apache.wayang.postgres.platform.PostgresPlatform; + +import java.util.Collection; +import java.util.Collections; + + +/** + * Mapping from {@link SpatialJoinOperator} to {@link PostgresSpatialJoinOperator}. + */ +@SuppressWarnings("unchecked") +public class SpatialJoinMapping implements Mapping { + + @Override + public Collection getTransformations() { + return Collections.singleton(new PlanTransformation( + this.createSubplanPattern(), + this.createReplacementSubplanFactory(), + PostgresPlatform.getInstance() + )); + } + + private SubplanPattern createSubplanPattern() { + final OperatorPattern operatorPattern = new OperatorPattern<>( + "spatialFilter", new SpatialJoinOperator(null, null, DataSetType.none(), DataSetType.none(), null), false + ).withAdditionalTest(op -> op.getKeyDescriptor0().getSqlImplementation() != null + && op.getKeyDescriptor1().getSqlImplementation() != null); // require SQL pushdown support + return SubplanPattern.createSingleton(operatorPattern); + } + + private ReplacementSubplanFactory createReplacementSubplanFactory() { + return new ReplacementSubplanFactory.OfSingleOperators( + (matchedOperator, epoch) -> new PostgresSpatialJoinOperator(matchedOperator).at(epoch) + ); + } +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/spark/SpatialFilterMapping.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/spark/SpatialFilterMapping.java new file mode 100644 index 000000000..c8362720c --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/spark/SpatialFilterMapping.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.mapping.spark; + +import org.apache.wayang.basic.operators.SpatialFilterOperator; +import org.apache.wayang.core.mapping.*; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.spatial.operators.spark.SparkSpatialFilterOperator; +import org.apache.wayang.spark.platform.SparkPlatform; + +import java.util.Collection; +import java.util.Collections; + +/** + * Mapping from {@link SpatialFilterOperator} to {@link SparkSpatialFilterOperator}. + */ +@SuppressWarnings("unchecked") +public class SpatialFilterMapping implements Mapping { + + @Override + public Collection getTransformations() { + return Collections.singleton(new PlanTransformation( + this.createSubplanPattern(), + this.createReplacementSubplanFactory(), + SparkPlatform.getInstance() + )); + } + + private SubplanPattern createSubplanPattern() { + final OperatorPattern operatorPattern = new OperatorPattern<>( + "spatialFilter", new SpatialFilterOperator(null, null, DataSetType.none(), null), false + ); + return SubplanPattern.createSingleton(operatorPattern); + } + + private ReplacementSubplanFactory createReplacementSubplanFactory() { + return new ReplacementSubplanFactory.OfSingleOperators( + (matchedOperator, epoch) -> new SparkSpatialFilterOperator(matchedOperator).at(epoch) + ); + } + +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/spark/SpatialJoinMapping.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/spark/SpatialJoinMapping.java new file mode 100644 index 000000000..4c90f94ae --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/mapping/spark/SpatialJoinMapping.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.mapping.spark; + +import org.apache.wayang.basic.operators.SpatialJoinOperator; +import org.apache.wayang.core.mapping.*; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.spatial.operators.spark.SparkSpatialJoinOperator; +import org.apache.wayang.spark.platform.SparkPlatform; + +import java.util.Collection; +import java.util.Collections; + +/** + * Mapping from {@link SpatialJoinOperator} to {@link SparkSpatialJoinOperator}. + */ +@SuppressWarnings("unchecked") +public class SpatialJoinMapping implements Mapping { + + @Override + public Collection getTransformations() { + return Collections.singleton(new PlanTransformation( + this.createSubplanPattern(), + this.createReplacementSubplanFactory(), + SparkPlatform.getInstance() + )); + } + + private SubplanPattern createSubplanPattern() { + final OperatorPattern operatorPattern = new OperatorPattern<>( + "spatialJoin", new SpatialJoinOperator<>( + null, + null, + DataSetType.none(), + DataSetType.none(), + null), false + ); + return SubplanPattern.createSingleton(operatorPattern); + } + + private ReplacementSubplanFactory createReplacementSubplanFactory() { + return new ReplacementSubplanFactory.OfSingleOperators>( + (matchedOperator, epoch) -> new SparkSpatialJoinOperator<>( + matchedOperator + ).at(epoch) + ); + } + +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/java/JavaGeoJsonFileSource.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/java/JavaGeoJsonFileSource.java new file mode 100644 index 000000000..661b2ff0a --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/java/JavaGeoJsonFileSource.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.java; + +import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.wayang.basic.data.Record; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.apache.wayang.basic.operators.GeoJsonFileSource; +import org.apache.wayang.core.api.exception.WayangException; +import org.apache.wayang.core.optimizer.OptimizationContext; +import org.apache.wayang.core.plan.wayangplan.ExecutionOperator; +import org.apache.wayang.core.platform.ChannelDescriptor; +import org.apache.wayang.core.platform.ChannelInstance; +import org.apache.wayang.core.platform.lineage.ExecutionLineageNode; +import org.apache.wayang.core.util.Tuple; +import org.apache.wayang.java.channels.StreamChannel; +import org.apache.wayang.java.execution.JavaExecutor; +import org.apache.wayang.java.operators.JavaExecutionOperator; + +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.io.InputStream; +import java.util.*; +import java.util.stream.Stream; + +/** + * Java execution operator that parses a GeoJSON document and emits each feature as a {@link Record}. + * Each emitted Record is created from the feature JSON text. The Record consists of the geometry and properties + * of the feature (i.e., the Record's schema has two fields: "geometry" and "properties", where "geometry" + * is of type {@link WayangGeometry} and "properties" is of type {@linkplain Map}). + */ +public class JavaGeoJsonFileSource extends GeoJsonFileSource implements JavaExecutionOperator { + + public JavaGeoJsonFileSource(String inputUrl) { + super(inputUrl); + } + + public JavaGeoJsonFileSource(GeoJsonFileSource that) { + super(that); + } + + public static Stream readFeatureCollectionFromFile(final String path) { + try { + final URI uri = URI.create(path); + + // use streaming parser to avoid loading entire file into memory + ObjectMapper objectMapper = new ObjectMapper(); + JsonFactory jsonFactory = objectMapper.getFactory(); + List records = new ArrayList<>(); + + try (InputStream in = Files.newInputStream(Paths.get(uri.getPath())); + JsonParser parser = jsonFactory.createParser(in)) { + + // advance to start object + if (parser.nextToken() != JsonToken.START_OBJECT) { + throw new WayangException("Expected JSON object at root"); + } + + // find the "features" array + while (parser.nextToken() != null) { + if (parser.currentToken() == JsonToken.FIELD_NAME + && "features".equals(parser.getCurrentName())) { + if (parser.nextToken() != JsonToken.START_ARRAY) { + throw new WayangException("Expected 'features' to be an array"); + } + // iterate features + while (parser.nextToken() != JsonToken.END_ARRAY) { + // parser is at START_OBJECT of a feature + JsonNode featureNode = objectMapper.readTree(parser); + JsonNode geometryNode = featureNode.path("geometry"); + JsonNode propertiesNode = featureNode.path("properties"); + + String geometryJsonString = objectMapper.writeValueAsString(geometryNode); + WayangGeometry wayangGeometry = WayangGeometry.fromGeoJson(geometryJsonString); + + Map propertiesMap = objectMapper.convertValue(propertiesNode, Map.class); + + Record record = new Record(); + record.addField(wayangGeometry); + record.addField(propertiesMap); + records.add(record); + } + break; + } + } + } + return records.stream(); + } catch (final Exception e) { + throw new WayangException(e); + } + } + + @Override + public Tuple, Collection> evaluate( + final ChannelInstance[] inputs, + final ChannelInstance[] outputs, + final JavaExecutor javaExecutor, + final OptimizationContext.OperatorContext operatorContext) { + + assert outputs.length == this.getNumOutputs(); + + final String path = this.getInputUrl(); + final Stream wayangGeometryStream = readFeatureCollectionFromFile(path); + + ((StreamChannel.Instance) outputs[0]).accept(wayangGeometryStream); + + return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); + } + + @Override + public JavaGeoJsonFileSource copy() { + return new JavaGeoJsonFileSource(this.getInputUrl()); + } + + @Override + public List getSupportedInputChannels(final int index) { + throw new UnsupportedOperationException(String.format("%s does not have input channels.", this)); + } + + @Override + public List getSupportedOutputChannels(final int index) { + assert index <= this.getNumOutputs() || (index == 0 && this.getNumOutputs() == 0); + return Collections.singletonList(StreamChannel.DESCRIPTOR); + } +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/java/JavaSpatialFilterOperator.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/java/JavaSpatialFilterOperator.java new file mode 100644 index 000000000..5ec2ac845 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/java/JavaSpatialFilterOperator.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.java; + +import org.apache.wayang.basic.operators.SpatialFilterOperator; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.function.FunctionDescriptor; +import org.apache.wayang.core.optimizer.OptimizationContext; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.core.plan.wayangplan.ExecutionOperator; +import org.apache.wayang.core.platform.ChannelDescriptor; +import org.apache.wayang.core.platform.ChannelInstance; +import org.apache.wayang.core.platform.lineage.ExecutionLineageNode; +import org.apache.wayang.core.util.Tuple; +import org.apache.wayang.java.channels.CollectionChannel; +import org.apache.wayang.java.channels.JavaChannelInstance; +import org.apache.wayang.java.channels.StreamChannel; +import org.apache.wayang.java.execution.JavaExecutor; +import org.apache.wayang.java.operators.JavaExecutionOperator; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.apache.wayang.spatial.function.JtsSpatialPredicate; +import org.locationtech.jts.geom.Geometry; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; +import java.util.function.Predicate; + +/** + * Java implementation of the {@link SpatialFilterOperator}. + */ +public class JavaSpatialFilterOperator + extends SpatialFilterOperator + implements JavaExecutionOperator { + + /** + * Creates a new instance. + * + * @param relation the type of spatial filter (e.g., "INTERSECTS", "CONTAINS", "WITHIN") + */ + public JavaSpatialFilterOperator(SpatialPredicate relation, + FunctionDescriptor.SerializableFunction keyExtractor, + DataSetType inputClassDatasetType, + SpatialGeometry geometry) { + super(relation, keyExtractor, inputClassDatasetType, geometry); + } + + public JavaSpatialFilterOperator(SpatialFilterOperator that) { + super(that); + } + + @Override + @SuppressWarnings("unchecked") + public Tuple, Collection> evaluate( + ChannelInstance[] inputs, + ChannelInstance[] outputs, + JavaExecutor javaExecutor, + OptimizationContext.OperatorContext operatorContext) { + + final Predicate filterPredicate = this.buildSpatialPredicate(javaExecutor); + ((StreamChannel.Instance) outputs[0]).accept( + ((JavaChannelInstance) inputs[0]).provideStream().filter(filterPredicate) + ); + + return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); + } + + private Predicate buildSpatialPredicate(JavaExecutor javaExecutor) { + WayangGeometry wRef = (WayangGeometry) this.referenceGeometry; + final Geometry reference = wRef.getGeometry(); + final Function keyExtractor = javaExecutor.getCompiler().compile(this.keyDescriptor); + JtsSpatialPredicate predicate = JtsSpatialPredicate.of(this.predicateType); + + return input -> predicate.test(((WayangGeometry) keyExtractor.apply(input)).getGeometry(), reference); + } + + @Override + public List getSupportedInputChannels(int index) { + assert index <= this.getNumInputs() || (index == 0 && this.getNumInputs() == 0); + if (this.getInput(index).isBroadcast()) return Collections.singletonList(CollectionChannel.DESCRIPTOR); + return Arrays.asList(CollectionChannel.DESCRIPTOR, StreamChannel.DESCRIPTOR); + } + + @Override + public List getSupportedOutputChannels(int index) { + assert index <= this.getNumOutputs() || (index == 0 && this.getNumOutputs() == 0); + return Collections.singletonList(StreamChannel.DESCRIPTOR); + } + +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/java/JavaSpatialJoinOperator.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/java/JavaSpatialJoinOperator.java new file mode 100644 index 000000000..00a8f3d10 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/java/JavaSpatialJoinOperator.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.java; + +import org.apache.wayang.basic.data.Tuple2; +import org.apache.wayang.basic.operators.SpatialJoinOperator; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.function.FunctionDescriptor; +import org.apache.wayang.core.function.TransformationDescriptor; +import org.apache.wayang.core.optimizer.OptimizationContext; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.core.plan.wayangplan.ExecutionOperator; +import org.apache.wayang.core.platform.ChannelDescriptor; +import org.apache.wayang.core.platform.ChannelInstance; +import org.apache.wayang.core.platform.lineage.ExecutionLineageNode; +import org.apache.wayang.core.util.Tuple; +import org.apache.wayang.java.channels.CollectionChannel; +import org.apache.wayang.java.channels.StreamChannel; +import org.apache.wayang.java.execution.JavaExecutor; +import org.apache.wayang.java.operators.JavaExecutionOperator; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.apache.wayang.spatial.function.JtsSpatialPredicate; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.index.strtree.STRtree; + +import java.util.*; +import java.util.function.Function; +import java.util.stream.Stream; + +public class JavaSpatialJoinOperator + extends SpatialJoinOperator + implements JavaExecutionOperator { + + public JavaSpatialJoinOperator(TransformationDescriptor keyDescriptor0, + TransformationDescriptor keyDescriptor1, + DataSetType inputType0, + DataSetType inputType1, + SpatialPredicate predicate) { + super(keyDescriptor0, keyDescriptor1, inputType0, inputType1, predicate); + } + + public JavaSpatialJoinOperator(FunctionDescriptor.SerializableFunction keyExtractor0, + FunctionDescriptor.SerializableFunction keyExtractor1, + Class input0Class, + Class input1Class, + SpatialPredicate predicate) { + super(keyExtractor0, keyExtractor1, input0Class, input1Class, predicate); + } + + + public JavaSpatialJoinOperator(SpatialJoinOperator that) { + super(that); + } + + @Override + public Tuple, Collection> evaluate( + ChannelInstance[] inputs, + ChannelInstance[] outputs, + JavaExecutor javaExecutor, + OptimizationContext.OperatorContext operatorContext) { + + assert inputs.length == this.getNumInputs(); + assert outputs.length == this.getNumOutputs(); + + final Function keyExtractor0 = + javaExecutor.getCompiler().compile(this.keyDescriptor0); + final Function keyExtractor1 = + javaExecutor.getCompiler().compile(this.keyDescriptor1); + + final Stream leftStream = + ((org.apache.wayang.java.channels.JavaChannelInstance) inputs[0]) + .provideStream(); + final Stream rightStream = + ((org.apache.wayang.java.channels.JavaChannelInstance) inputs[1]) + .provideStream(); + + JtsSpatialPredicate predicate = JtsSpatialPredicate.of(this.predicateType); + + STRtree index = new STRtree(); + + rightStream.forEach(v1 -> { + WayangGeometry wGeom = (WayangGeometry) keyExtractor1.apply(v1); + Geometry geom = (wGeom == null) ? null : wGeom.getGeometry(); + if (geom != null) { + index.insert(geom.getEnvelopeInternal(), new AbstractMap.SimpleEntry<>(v1, geom)); + } + }); + + index.build(); + + final Stream> joinStream = leftStream.flatMap(v0 -> { + Geometry geom0 = Optional.ofNullable((WayangGeometry) keyExtractor0.apply(v0)) + .map(WayangGeometry::getGeometry).orElse(null); + if (geom0 == null) return Stream.empty(); + + List> candidates = index.query(geom0.getEnvelopeInternal()); + + return candidates.stream() + .filter(e -> predicate.test(geom0, e.getValue())) + .map(e -> new Tuple2<>(v0, e.getKey())); + }); + + // Push the result into the output channel. + ((org.apache.wayang.java.channels.StreamChannel.Instance) outputs[0]).accept(joinStream); + + // Use the standard lazy-execution lineage modeling. + return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); + } + + @Override + public List getSupportedInputChannels(int index) { + assert index <= this.getNumInputs() || (index == 0 && this.getNumInputs() == 0); + if (this.getInput(index).isBroadcast()) return Collections.singletonList(CollectionChannel.DESCRIPTOR); + return Arrays.asList(CollectionChannel.DESCRIPTOR, StreamChannel.DESCRIPTOR); + } + + @Override + public List getSupportedOutputChannels(int index) { + assert index <= this.getNumOutputs() || (index == 0 && this.getNumOutputs() == 0); + return Collections.singletonList(StreamChannel.DESCRIPTOR); + } +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/jdbc/JdbcSpatialFilterOperator.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/jdbc/JdbcSpatialFilterOperator.java new file mode 100644 index 000000000..aab8d65c4 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/jdbc/JdbcSpatialFilterOperator.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.jdbc; + +import org.apache.wayang.basic.operators.SpatialFilterOperator; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.function.FunctionDescriptor; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.spatial.function.JtsSpatialPredicate; +import org.apache.wayang.jdbc.compiler.FunctionCompiler; +import org.apache.wayang.jdbc.operators.JdbcExecutionOperator; + +import java.sql.Connection; + + +/** + * Template for JDBC-based {@link SpatialFilterOperator}. + */ +public abstract class JdbcSpatialFilterOperator extends SpatialFilterOperator implements JdbcExecutionOperator { + + /** + * Creates a new instance. + * + * @param relation the type of spatial filter (e.g., "INTERSECTS", "CONTAINS", "WITHIN") + */ + public JdbcSpatialFilterOperator(SpatialPredicate relation, + FunctionDescriptor.SerializableFunction keyExtractor, + DataSetType inputClassDatasetType, + SpatialGeometry geometry) { + super(relation, keyExtractor, inputClassDatasetType, geometry); + } + + public JdbcSpatialFilterOperator(SpatialFilterOperator that) { + super(that); + } + + @Override + public String createSqlClause(Connection connection, FunctionCompiler compiler) { + if (this.referenceGeometry == null) { + throw new IllegalStateException("Geometry for spatial filter must not be null."); + } + + // Column expression (e.g. "geom" or "t.geom") + final String columnExpr = this.keyDescriptor.getSqlImplementation().getField1(); + + // Geometry literal as ST_GeomFromText('WKT', srid) + final String wkt = this.referenceGeometry.toWKT(); + // TODO: Check which SRID to use. + final int srid = 4326; + + final String geomLiteral; + if (srid > 0) { + geomLiteral = String.format("ST_GeomFromText('%s', %d)", wkt, srid); + } else { + geomLiteral = String.format("ST_GeomFromText('%s')", wkt); + } + + JtsSpatialPredicate relation = JtsSpatialPredicate.of(this.predicateType); + return relation.toSql(columnExpr, geomLiteral); + } +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/jdbc/JdbcSpatialJoinOperator.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/jdbc/JdbcSpatialJoinOperator.java new file mode 100644 index 000000000..f44e88638 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/jdbc/JdbcSpatialJoinOperator.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.jdbc; + +import org.apache.wayang.basic.operators.SpatialJoinOperator; +import org.apache.wayang.basic.data.Record; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.function.TransformationDescriptor; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.core.util.Tuple; +import org.apache.wayang.spatial.function.JtsSpatialPredicate; +import org.apache.wayang.jdbc.compiler.FunctionCompiler; +import org.apache.wayang.jdbc.operators.JdbcExecutionOperator; + +import java.sql.Connection; + +public abstract class JdbcSpatialJoinOperator + extends SpatialJoinOperator + implements JdbcExecutionOperator { + + + public JdbcSpatialJoinOperator( + TransformationDescriptor keyDescriptor0, + TransformationDescriptor keyDescriptor1, + SpatialPredicate predicateType + ) { + super( + keyDescriptor0, + keyDescriptor1, + predicateType + ); + } + + /** + * Copies an instance. + * + * @param that that should be copied + */ + public JdbcSpatialJoinOperator(SpatialJoinOperator that) { + super(that); + } + + public String createSqlClause(Connection connection, FunctionCompiler compiler) { + final Tuple left = this.keyDescriptor0.getSqlImplementation(); + final Tuple right = this.keyDescriptor1.getSqlImplementation(); + if (left == null || right == null) { + throw new IllegalStateException("Spatial join requires SQL implementations for both inputs."); + } + final String leftTableName = left.field0; + final String leftKey = left.field1; + final String rightTableName = right.field0; + final String rightKey = right.field1; + + JtsSpatialPredicate predicate = JtsSpatialPredicate.of(this.predicateType); + return "JOIN " + rightTableName + " ON " + + predicate.toSql(leftTableName, leftKey, rightTableName, rightKey); + } +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/postgres/PostgresSpatialFilterOperator.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/postgres/PostgresSpatialFilterOperator.java new file mode 100644 index 000000000..eca725b45 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/postgres/PostgresSpatialFilterOperator.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.postgres; + +import org.apache.wayang.basic.operators.SpatialFilterOperator; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.function.FunctionDescriptor; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.spatial.operators.jdbc.JdbcSpatialFilterOperator; +import org.apache.wayang.postgres.operators.PostgresExecutionOperator; + + +/** + * PostgreSQL implementation of the {@link SpatialFilterOperator}. + */ +public class PostgresSpatialFilterOperator extends JdbcSpatialFilterOperator implements PostgresExecutionOperator { + + /** + * Creates a new instance. + * + * @param relation the type of spatial filter (e.g., "INTERSECTS", "CONTAINS", "WITHIN") + */ + public PostgresSpatialFilterOperator(SpatialPredicate relation, + FunctionDescriptor.SerializableFunction keyExtractor, + DataSetType inputClassDatasetType, + SpatialGeometry geometry) { + super(relation, keyExtractor, inputClassDatasetType, geometry); + } + + /** + * Copies an instance (exclusive of broadcasts). + * + * @param that that should be copied + */ + public PostgresSpatialFilterOperator(SpatialFilterOperator that) { + super(that); + } + + @Override + protected PostgresSpatialFilterOperator createCopy() { + return new PostgresSpatialFilterOperator<>(this); + } +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/postgres/PostgresSpatialJoinOperator.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/postgres/PostgresSpatialJoinOperator.java new file mode 100644 index 000000000..72489f234 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/postgres/PostgresSpatialJoinOperator.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.postgres; + +import org.apache.wayang.basic.operators.SpatialJoinOperator; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.function.TransformationDescriptor; +import org.apache.wayang.spatial.operators.jdbc.JdbcSpatialJoinOperator; +import org.apache.wayang.postgres.operators.PostgresExecutionOperator; + +public class PostgresSpatialJoinOperator extends JdbcSpatialJoinOperator implements PostgresExecutionOperator { + /** + * Creates a new instance. + * + * @param predicate the type of spatial join (e.g., "INTERSECTS", "CONTAINS", "WITHIN") + */ + public PostgresSpatialJoinOperator(TransformationDescriptor keyDescriptor0, + TransformationDescriptor keyDescriptor1, + SpatialPredicate predicate) { + super(keyDescriptor0, keyDescriptor1, predicate); + } + + /** + * Copies an instance (exclusive of broadcasts). + * + * @param that that should be copied + */ + public PostgresSpatialJoinOperator(SpatialJoinOperator that) { + super(that); + } + + @Override + protected PostgresSpatialJoinOperator createCopy() { + return new PostgresSpatialJoinOperator<>(this); + } +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/spark/SparkSpatialFilterOperator.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/spark/SparkSpatialFilterOperator.java new file mode 100644 index 000000000..5fe582bb6 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/spark/SparkSpatialFilterOperator.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.spark; + +import org.apache.sedona.core.spatialOperator.RangeQuery; +import org.apache.sedona.core.spatialRDD.SpatialRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.wayang.basic.operators.SpatialFilterOperator; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.function.FunctionDescriptor; +import org.apache.wayang.core.optimizer.OptimizationContext; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.core.plan.wayangplan.ExecutionOperator; +import org.apache.wayang.core.platform.ChannelDescriptor; +import org.apache.wayang.core.platform.ChannelInstance; +import org.apache.wayang.core.platform.lineage.ExecutionLineageNode; +import org.apache.wayang.core.util.ReflectionUtils; +import org.apache.wayang.core.util.Tuple; +import org.apache.wayang.spark.channels.BroadcastChannel; +import org.apache.wayang.spark.channels.RddChannel; +import org.apache.wayang.spark.execution.SparkExecutor; +import org.apache.wayang.spark.operators.SparkExecutionOperator; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.locationtech.jts.geom.Geometry; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +/** + * Spark implementation of the {@link SpatialFilterOperator}. + */ +public class SparkSpatialFilterOperator + extends SpatialFilterOperator + implements SparkExecutionOperator { + + /** + * Creates a new instance. + * + * @param relation the type of spatial filter (e.g., "INTERSECTS", "CONTAINS", "WITHIN") + * + */ + public SparkSpatialFilterOperator(SpatialPredicate relation, + FunctionDescriptor.SerializableFunction keyExtractor, + DataSetType inputClassDatasetType, + SpatialGeometry geometry) { + super(relation, keyExtractor, inputClassDatasetType, geometry); + } + + /** + * Copies an instance (exclusive of broadcasts). + * + * @param that that should be copied + */ + public SparkSpatialFilterOperator(SpatialFilterOperator that) { + super(that); + } + + @Override + @SuppressWarnings("unchecked") + public Tuple, Collection> evaluate( + ChannelInstance[] inputs, + ChannelInstance[] outputs, + SparkExecutor sparkExecutor, + OptimizationContext.OperatorContext operatorContext) { + assert inputs.length == this.getNumInputs(); + assert outputs.length == this.getNumOutputs(); + + // Register Sedona JAR with Spark executors if running in cluster mode. + if (!sparkExecutor.sc.isLocal()) { + String sedonaJar = ReflectionUtils.getDeclaringJar(SpatialRDD.class); + if (sedonaJar != null) { + sparkExecutor.sc.addJar(sedonaJar); + } + } + + WayangGeometry wRef = (WayangGeometry) this.referenceGeometry; + final Geometry reference = wRef == null ? null : wRef.getGeometry(); + if (reference == null) { + throw new IllegalStateException("Reference geometry must not be null for spatial filtering."); + } + + final JavaRDD inputRdd = ((RddChannel.Instance) inputs[0]).provideRdd(); + + final FunctionDescriptor.SerializableFunction keyExtractor = + (FunctionDescriptor.SerializableFunction) this.keyDescriptor.getJavaImplementation(); + + // Build an RDD of Geometries where userData = original element (Type) + final JavaRDD geometryRdd = inputRdd + .map((Type value) -> { + final WayangGeometry wGeom = (WayangGeometry) keyExtractor.apply(value); + if (wGeom == null) { + return null; + } + final Geometry geom = wGeom.getGeometry(); + if (geom != null) { + geom.setUserData(value); // keep original object + } + return geom; + }) + .filter(Objects::nonNull); + + final SpatialRDD spatialRDD = new SpatialRDD<>(); + spatialRDD.setRawSpatialRDD(geometryRdd); + spatialRDD.analyze(); + + final JavaRDD outputRdd = this.applySedonaSpatialFilter(spatialRDD, reference); + this.name(outputRdd); + ((RddChannel.Instance) outputs[0]).accept(outputRdd, sparkExecutor); + + return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); + } + + + private JavaRDD applySedonaSpatialFilter(SpatialRDD spatialRDD, Geometry reference) { + final org.apache.sedona.core.spatialOperator.SpatialPredicate predicate = toSedonaPredicate(this.predicateType); + + try { + final JavaRDD matched = + RangeQuery.SpatialRangeQuery(spatialRDD, reference, predicate, false); + + // Extract original input object from userData + return matched.map(geom -> (Type) geom.getUserData()); + } catch (Exception e) { + throw new RuntimeException("Sedona range query failed for spatial filter.", e); + } + } + + private org.apache.sedona.core.spatialOperator.SpatialPredicate toSedonaPredicate(SpatialPredicate predicateType) { + return switch (predicateType) { + case INTERSECTS -> org.apache.sedona.core.spatialOperator.SpatialPredicate.INTERSECTS; + case CONTAINS -> org.apache.sedona.core.spatialOperator.SpatialPredicate.CONTAINS; + case WITHIN -> org.apache.sedona.core.spatialOperator.SpatialPredicate.WITHIN; + case TOUCHES -> org.apache.sedona.core.spatialOperator.SpatialPredicate.TOUCHES; + case OVERLAPS -> org.apache.sedona.core.spatialOperator.SpatialPredicate.OVERLAPS; + case CROSSES -> org.apache.sedona.core.spatialOperator.SpatialPredicate.CROSSES; + case EQUALS -> org.apache.sedona.core.spatialOperator.SpatialPredicate.EQUALS; + default -> throw new IllegalStateException("Unsupported spatial filter predicate: " + predicateType); + }; + } + + + @Override + public String getLoadProfileEstimatorConfigurationKey() { + return "wayang.spark.spatialfilter.load"; + } + + @Override + public List getSupportedInputChannels(int index) { + if (index == 0) { + return Arrays.asList(RddChannel.UNCACHED_DESCRIPTOR, RddChannel.CACHED_DESCRIPTOR); + } else { + return Collections.singletonList(BroadcastChannel.DESCRIPTOR); + } + } + + @Override + public List getSupportedOutputChannels(int index) { + return Collections.singletonList(RddChannel.UNCACHED_DESCRIPTOR); + } + + @Override + public boolean containsAction() { + return false; + } + +} diff --git a/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/spark/SparkSpatialJoinOperator.java b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/spark/SparkSpatialJoinOperator.java new file mode 100644 index 000000000..7416ae1e0 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/main/java/org/apache/wayang/spatial/operators/spark/SparkSpatialJoinOperator.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.spark; + +import org.apache.sedona.core.enums.GridType; +import org.apache.sedona.core.spatialOperator.JoinQuery; +import org.apache.sedona.core.spatialRDD.SpatialRDD; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.wayang.basic.data.Tuple2; +import org.apache.wayang.basic.operators.SpatialJoinOperator; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.function.FunctionDescriptor; +import org.apache.wayang.core.function.TransformationDescriptor; +import org.apache.wayang.core.optimizer.OptimizationContext; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.core.plan.wayangplan.ExecutionOperator; +import org.apache.wayang.core.platform.ChannelDescriptor; +import org.apache.wayang.core.platform.ChannelInstance; +import org.apache.wayang.core.platform.lineage.ExecutionLineageNode; +import org.apache.wayang.core.util.ReflectionUtils; +import org.apache.wayang.core.util.Tuple; +import org.apache.wayang.spark.channels.RddChannel; +import org.apache.wayang.spark.execution.SparkExecutor; +import org.apache.wayang.spark.operators.SparkExecutionOperator; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.locationtech.jts.geom.Geometry; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +public class SparkSpatialJoinOperator + extends SpatialJoinOperator + implements SparkExecutionOperator { + + public SparkSpatialJoinOperator(SparkSpatialJoinOperator that) { + super(that); + } + + public SparkSpatialJoinOperator(SpatialJoinOperator that) { + super(that); + } + + public SparkSpatialJoinOperator( + TransformationDescriptor keyDescriptor0, + TransformationDescriptor keyDescriptor1, + DataSetType inputType0, + DataSetType inputType1, + SpatialPredicate predicateType) { + super(keyDescriptor0, keyDescriptor1, inputType0, inputType1, predicateType); + } + + public SparkSpatialJoinOperator( + FunctionDescriptor.SerializableFunction keyExtractor0, + FunctionDescriptor.SerializableFunction keyExtractor1, + Class input0Class, + Class input1Class, + SpatialPredicate predicateType) { + super(keyExtractor0, keyExtractor1, input0Class, input1Class, predicateType); + } + + @Override + @SuppressWarnings("unchecked") + public Tuple, Collection> evaluate(ChannelInstance[] inputs, ChannelInstance[] outputs, SparkExecutor sparkExecutor, OptimizationContext.OperatorContext operatorContext) { + // Register Sedona JAR with Spark executors if running in cluster mode. + if (!sparkExecutor.sc.isLocal()) { + String sedonaJar = ReflectionUtils.getDeclaringJar(SpatialRDD.class); + if (sedonaJar != null) { + sparkExecutor.sc.addJar(sedonaJar); + } + } + + final JavaRDD leftIn = ((RddChannel.Instance) inputs[0]).provideRdd(); + final JavaRDD rightIn = ((RddChannel.Instance) inputs[1]).provideRdd(); + + final FunctionDescriptor.SerializableFunction keyExtractor0 = + (FunctionDescriptor.SerializableFunction) this.keyDescriptor0.getJavaImplementation(); + final FunctionDescriptor.SerializableFunction keyExtractor1 = + (FunctionDescriptor.SerializableFunction) this.keyDescriptor1.getJavaImplementation(); + + + final JavaRDD leftInGeometry = leftIn.map((InputType0 in1) -> { + final WayangGeometry wGeom = (WayangGeometry) keyExtractor0.apply(in1); + Geometry geom = wGeom.getGeometry(); + geom.setUserData(in1); + return geom; + }); + + final JavaRDD rightInGeometry = rightIn.map((InputType1 in2) -> { + final WayangGeometry wGeom = (WayangGeometry) keyExtractor1.apply(in2); + Geometry geom = wGeom.getGeometry(); + geom.setUserData(in2); + return geom; + }); + + + final SpatialRDD spatialRDDLeft = new SpatialRDD<>(); + final SpatialRDD spatialRDDRight = new SpatialRDD<>(); + + try { + spatialRDDLeft.setRawSpatialRDD(leftInGeometry); + spatialRDDRight.setRawSpatialRDD(rightInGeometry); + + spatialRDDLeft.analyze(); + spatialRDDRight.analyze(); + + final int maxPartitions = 64; // constant for now, later depend on cluster size + final long estimatedCount = spatialRDDLeft.approximateTotalCount; + final int numPartitions = (int) Math.max(1, Math.min(estimatedCount / 2, maxPartitions)); + spatialRDDLeft.spatialPartitioning(GridType.QUADTREE, numPartitions); + spatialRDDRight.spatialPartitioning(spatialRDDLeft.getPartitioner()); + + JavaPairRDD sedonaJoin = JoinQuery.spatialJoin( + spatialRDDLeft, + spatialRDDRight, + new JoinQuery.JoinParams(false, toSedonaPredicate(this.predicateType)) + ); + final JavaRDD> outputRdd = + sedonaJoin.map(geoTuple -> + new Tuple2<>( + (InputType0) geoTuple._1().getUserData(), + (InputType1) geoTuple._2().getUserData() + ) + ); + + ((RddChannel.Instance) outputs[0]).accept(outputRdd, sparkExecutor); + return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private org.apache.sedona.core.spatialOperator.SpatialPredicate toSedonaPredicate(SpatialPredicate predicateType) { + return switch (predicateType) { + case INTERSECTS -> org.apache.sedona.core.spatialOperator.SpatialPredicate.INTERSECTS; + case CONTAINS -> org.apache.sedona.core.spatialOperator.SpatialPredicate.CONTAINS; + case WITHIN -> org.apache.sedona.core.spatialOperator.SpatialPredicate.WITHIN; + case TOUCHES -> org.apache.sedona.core.spatialOperator.SpatialPredicate.TOUCHES; + case OVERLAPS -> org.apache.sedona.core.spatialOperator.SpatialPredicate.OVERLAPS; + case CROSSES -> org.apache.sedona.core.spatialOperator.SpatialPredicate.CROSSES; + case EQUALS -> org.apache.sedona.core.spatialOperator.SpatialPredicate.EQUALS; + default -> throw new IllegalStateException("Unsupported spatial filter predicate: " + predicateType); + }; + } + + @Override + public String getLoadProfileEstimatorConfigurationKey() { + return "wayang.spark.spatialjoin.load"; + } + + @Override + public List getSupportedInputChannels(int index) { + assert index <= this.getNumInputs() || (index == 0 && this.getNumInputs() == 0); + return Arrays.asList(RddChannel.UNCACHED_DESCRIPTOR, RddChannel.CACHED_DESCRIPTOR); + } + + @Override + public List getSupportedOutputChannels(int index) { + assert index <= this.getNumOutputs() || (index == 0 && this.getNumOutputs() == 0); + return Collections.singletonList(RddChannel.UNCACHED_DESCRIPTOR); + } + + @Override + public boolean containsAction() { + return false; + } +} diff --git a/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/api/JavaApiSpatialTest.java b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/api/JavaApiSpatialTest.java new file mode 100644 index 000000000..d2de4b5e7 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/api/JavaApiSpatialTest.java @@ -0,0 +1,574 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.api; + +import org.apache.wayang.basic.data.Record; +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.api.WayangContext; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.java.Java; +import org.apache.wayang.postgres.Postgres; +import org.apache.wayang.postgres.operators.PostgresTableSource; +import org.apache.wayang.spark.Spark; +import org.apache.wayang.spatial.Spatial; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for the fluent spatial API on DataQuantaBuilder. + */ +public class JavaApiSpatialTest { + + // ==================== Java Platform Tests ==================== + + @Test + void testSpatialFilter() { + WayangContext wayangContext = new WayangContext(new Configuration()) + .withPlugin(Java.basicPlugin()) + .withPlugin(Spatial.javaPlugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("Spatial Filter Test"); + + List testData = Arrays.asList( + "0.0,0.0,1.0,1.0", // Box at origin + "0.5,0.5,1.5,1.5", // Overlapping box + "2.0,2.0,3.0,3.0", // Non-overlapping box + "0.25,0.25,0.75,0.75" // Box inside first + ); + + WayangGeometry queryGeometry = WayangGeometry.fromStringInput( + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))" + ); + + Collection result = planBuilder.loadCollection(testData) + .spatialFilter( + (input -> { + String[] parts = input.split(","); + double xmin = Double.parseDouble(parts[0]); + double ymin = Double.parseDouble(parts[1]); + double xmax = Double.parseDouble(parts[2]); + double ymax = Double.parseDouble(parts[3]); + String wkt = String.format( + "POLYGON((%f %f, %f %f, %f %f, %f %f, %f %f))", + xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax, xmin, ymin + ); + return WayangGeometry.fromStringInput(wkt); + }), + SpatialPredicate.INTERSECTS, + queryGeometry + ) + .count() + .collect(); + + // Should match 3 boxes (first overlaps, second overlaps, fourth is inside) + assertEquals(1, result.size()); + Long count = result.iterator().next(); + assertEquals(3L, count); + } + + @Test + void testSpatialJoin() { + WayangContext wayangContext = new WayangContext(new Configuration()) + .withPlugin(Java.basicPlugin()) + .withPlugin(Spatial.javaPlugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("Spatial Join Test"); + + List leftData = Arrays.asList( + "POINT(0.5 0.5)", // Inside first box + "POINT(1.5 1.5)", // Inside second box + "POINT(0.25 0.75)" // Inside first box + ); + + List rightData = Arrays.asList( + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))", // Contains first and third points + "POLYGON((1 1, 2 1, 2 2, 1 2, 1 1))" // Contains second point + ); + + Collection result = planBuilder.loadCollection(leftData) + .spatialJoin( + (WayangGeometry::fromStringInput), + planBuilder.loadCollection(rightData), + (WayangGeometry::fromStringInput), + SpatialPredicate.INTERSECTS + ) + .count() + .collect(); + + // Should have 3 matches: + // - POINT(0.5 0.5) with first box + // - POINT(1.5 1.5) with second box + // - POINT(0.25 0.75) with first box + assertEquals(1, result.size()); + Long count = result.iterator().next(); + assertEquals(3L, count); + } + + @Test + void testChainedOperations() { + WayangContext wayangContext = new WayangContext(new Configuration()) + .withPlugin(Java.basicPlugin()) + .withPlugin(Spatial.javaPlugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("Chained Operations Test"); + + // Data: "id,xmin,ymin,xmax,ymax" + List testData = Arrays.asList( + "1,0.0,0.0,1.0,1.0", + "2,0.5,0.5,1.5,1.5", + "3,2.0,2.0,3.0,3.0", + "4,0.25,0.25,0.75,0.75" + ); + + WayangGeometry queryGeometry = WayangGeometry.fromStringInput( + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))" + ); + + // Chain: spatialFilter -> map (extract id) -> filter (id > 1) -> collect + Collection result = planBuilder.loadCollection(testData) + .spatialFilter( + (input -> { + String[] parts = input.split(","); + double xmin = Double.parseDouble(parts[1]); + double ymin = Double.parseDouble(parts[2]); + double xmax = Double.parseDouble(parts[3]); + double ymax = Double.parseDouble(parts[4]); + String wkt = String.format( + "POLYGON((%f %f, %f %f, %f %f, %f %f, %f %f))", + xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax, xmin, ymin + ); + return WayangGeometry.fromStringInput(wkt); + }), + SpatialPredicate.INTERSECTS, + queryGeometry + ) + .map(line -> Integer.parseInt(line.split(",")[0])) // Extract ID + .filter(id -> id > 1) // Keep only IDs > 1 + .collect(); + + // Should match boxes 1, 2, 4 (intersect), then filter to IDs > 1 -> 2, 4 + assertEquals(2, result.size()); + assertTrue(result.contains(2)); + assertTrue(result.contains(4)); + } + + @Test + void testSpatialJoinChainedWithMapAndReduce() { + WayangContext wayangContext = new WayangContext(new Configuration()) + .withPlugin(Java.basicPlugin()) + .withPlugin(Spatial.javaPlugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("Spatial Join Chained Test"); + + // Left data: points with values "wkt;value" + List leftData = Arrays.asList( + "POINT(0.5 0.5);10", + "POINT(1.5 1.5);20", + "POINT(0.25 0.75);30" + ); + + // Right data: boxes with multipliers "wkt;multiplier" + List rightData = Arrays.asList( + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0));2", + "POLYGON((1 1, 2 1, 2 2, 1 2, 1 1));3" + ); + + // Chain: spatialJoin -> map (multiply values) -> reduce (sum) + Collection result = planBuilder.loadCollection(leftData) + .spatialJoin( + (input -> WayangGeometry.fromStringInput(input.split(";")[0])), + planBuilder.loadCollection(rightData), + (input -> WayangGeometry.fromStringInput(input.split(";")[0])), + SpatialPredicate.INTERSECTS + ) + .map(tuple -> { + int leftValue = Integer.parseInt(tuple.field0.split(";")[1]); + int rightMultiplier = Integer.parseInt(tuple.field1.split(";")[1]); + return leftValue * rightMultiplier; + }) + .reduce((a, b) -> a + b) + .collect(); + + // Matches: + // - POINT(0.5 0.5);10 with box;2 -> 10*2 = 20 + // - POINT(1.5 1.5);20 with box;3 -> 20*3 = 60 + // - POINT(0.25 0.75);30 with box;2 -> 30*2 = 60 + // Sum = 20 + 60 + 60 = 140 + assertEquals(1, result.size()); + assertEquals(140, result.iterator().next()); + } + + @Test + void testChainedSpatialFilters() { + WayangContext wayangContext = new WayangContext(new Configuration()) + .withPlugin(Java.basicPlugin()) + .withPlugin(Spatial.javaPlugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("Chained Spatial Filters Test"); + + List testData = Arrays.asList( + "POLYGON((0.1 0.1, 0.3 0.1, 0.3 0.3, 0.1 0.3, 0.1 0.1))", // Inside both query geometries + "POLYGON((0.6 0.6, 0.8 0.6, 0.8 0.8, 0.6 0.8, 0.6 0.6))", // Inside first, outside second + "POLYGON((2 2, 3 2, 3 3, 2 3, 2 2))" // Outside both + ); + + WayangGeometry queryGeometry1 = WayangGeometry.fromStringInput( + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))" // Unit square + ); + WayangGeometry queryGeometry2 = WayangGeometry.fromStringInput( + "POLYGON((0 0, 0.5 0, 0.5 0.5, 0 0.5, 0 0))" // Smaller square (0-0.5 range) + ); + + // Chain two spatial filters + Collection result = planBuilder.loadCollection(testData) + .spatialFilter( + (WayangGeometry::fromStringInput), + SpatialPredicate.INTERSECTS, + queryGeometry1 + ) + .map(x -> x).withOutputClass(String.class) // Preserve type for chaining + .spatialFilter( + (WayangGeometry::fromStringInput), + SpatialPredicate.INTERSECTS, + queryGeometry2 + ) + .count() + .collect(); + + // Only the first box (0.1-0.3) should pass both filters + assertEquals(1, result.size()); + assertEquals(1L, result.iterator().next()); + } + + @Test + void testSpatialFilterFollowedBySpatialJoin() { + WayangContext wayangContext = new WayangContext(new Configuration()) + .withPlugin(Java.basicPlugin()) + .withPlugin(Spatial.javaPlugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("Spatial Filter then Join Test"); + + List leftData = Arrays.asList( + "POINT(0.5 0.5)", + "POINT(1.5 1.5)", + "POINT(0.25 0.25)" + ); + + List rightData = Arrays.asList( + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))", + "POLYGON((1 1, 2 1, 2 2, 1 2, 1 1))" + ); + + WayangGeometry preFilterGeometry = WayangGeometry.fromStringInput( + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))" + ); + + // Filter left data, then join with right + var filteredLeft = planBuilder.loadCollection(leftData) + .spatialFilter( + (input -> WayangGeometry.fromStringInput(input)), + SpatialPredicate.INTERSECTS, + preFilterGeometry + ) + .map(x -> x).withOutputClass(String.class); + + Collection result = filteredLeft + .spatialJoin( + (input -> WayangGeometry.fromStringInput(input)), + planBuilder.loadCollection(rightData), + (input -> WayangGeometry.fromStringInput(input)), + SpatialPredicate.INTERSECTS + ) + .count() + .collect(); + + // After filter: POINT(0.5 0.5) and POINT(0.25 0.25) remain + // Join matches: both with first box = 2 matches + assertEquals(1, result.size()); + assertEquals(2L, result.iterator().next()); + } + + // ==================== Spark Platform Tests ==================== + + @Test + void testSpatialFilterWithSpark() { + WayangContext wayangContext = new WayangContext(new Configuration()) + .withPlugin(Spark.basicPlugin()) + .withPlugin(Spatial.sparkPlugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("Spatial Filter Spark Test"); + + List testData = Arrays.asList( + "0.0,0.0,1.0,1.0", + "0.5,0.5,1.5,1.5", + "2.0,2.0,3.0,3.0", + "0.25,0.25,0.75,0.75" + ); + + WayangGeometry queryGeometry = WayangGeometry.fromStringInput( + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))" + ); + + Collection result = planBuilder.loadCollection(testData) + .spatialFilter( + (input -> { + String[] parts = input.split(","); + double xmin = Double.parseDouble(parts[0]); + double ymin = Double.parseDouble(parts[1]); + double xmax = Double.parseDouble(parts[2]); + double ymax = Double.parseDouble(parts[3]); + String wkt = String.format( + "POLYGON((%f %f, %f %f, %f %f, %f %f, %f %f))", + xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax, xmin, ymin + ); + return WayangGeometry.fromStringInput(wkt); + }), + SpatialPredicate.INTERSECTS, + queryGeometry + ) + .withTargetPlatform(Spark.platform()) + .count() + .collect(); + + assertEquals(1, result.size()); + assertEquals(3L, result.iterator().next()); + } + + @Test + void testSpatialJoinWithSpark() { + WayangContext wayangContext = new WayangContext(new Configuration()) + .withPlugin(Spark.basicPlugin()) + .withPlugin(Spatial.sparkPlugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("Spatial Join Spark Test"); + + List leftData = Arrays.asList( + "POINT(0.5 0.5)", + "POINT(1.5 1.5)", + "POINT(0.25 0.75)" + ); + + List rightData = Arrays.asList( + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))", + "POLYGON((1 1, 2 1, 2 2, 1 2, 1 1))" + ); + + Collection result = planBuilder.loadCollection(leftData) + .spatialJoin( + (input -> WayangGeometry.fromStringInput(input)), + planBuilder.loadCollection(rightData), + (input -> WayangGeometry.fromStringInput(input)), + SpatialPredicate.INTERSECTS + ) + .withTargetPlatform(Spark.platform()) + .count() + .collect(); + + assertEquals(1, result.size()); + assertEquals(3L, result.iterator().next()); + } + + @Test + void testSpatialFilterWithJavaAndSpark() { + WayangContext wayangContext = new WayangContext(new Configuration()) + .withPlugin(Java.basicPlugin()) + .withPlugin(Spark.basicPlugin()) + .withPlugin(Spatial.plugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("Spatial Filter Java+Spark Test"); + + List testData = Arrays.asList( + "0.0,0.0,1.0,1.0", + "0.5,0.5,1.5,1.5", + "2.0,2.0,3.0,3.0", + "0.25,0.25,0.75,0.75" + ); + + WayangGeometry queryGeometry = WayangGeometry.fromStringInput( + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))" + ); + + // Let Wayang choose the platform + Collection result = planBuilder.loadCollection(testData) + .spatialFilter( + (input -> { + String[] parts = input.split(","); + double xmin = Double.parseDouble(parts[0]); + double ymin = Double.parseDouble(parts[1]); + double xmax = Double.parseDouble(parts[2]); + double ymax = Double.parseDouble(parts[3]); + String wkt = String.format( + "POLYGON((%f %f, %f %f, %f %f, %f %f, %f %f))", + xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax, xmin, ymin + ); + return WayangGeometry.fromStringInput(wkt); + }), + SpatialPredicate.INTERSECTS, + queryGeometry + ) + .count() + .collect(); + + assertEquals(1, result.size()); + assertEquals(3L, result.iterator().next()); + } + + // ==================== PostgreSQL Platform Tests ==================== + // These tests use PostgreSQL spatial operators with ST_Intersects pushdown. + + /** + * Helper method to create a PostgreSQL-configured WayangContext. + * Connects to spiderdb on localhost:5433. + */ + private Configuration getPostgresConfiguration() { + Configuration configuration = new Configuration(); + configuration.setProperty("wayang.postgres.jdbc.url", "jdbc:postgresql://localhost:5433/spiderdb"); + configuration.setProperty("wayang.postgres.jdbc.user", "postgres"); + configuration.setProperty("wayang.postgres.jdbc.password", "postgres"); + return configuration; + } + + @Test + @Disabled("Requires local Postgres test database.") + void testSpatialFilterWithPostgres() { + Configuration configuration = getPostgresConfiguration(); + + WayangContext wayangContext = new WayangContext(configuration) + .withPlugin(Java.basicPlugin()) + .withPlugin(Postgres.plugin()) + .withPlugin(Spatial.postgresPlugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("Spatial Filter with Postgres Test"); + + // Query geometry: a box in the lower-left quadrant (0,0) to (0.4, 0.4) + WayangGeometry queryGeometry = WayangGeometry.fromStringInput( + "POLYGON((0.0 0.0, 0.4 0.0, 0.4 0.4, 0.0 0.4, 0.0 0.0))" + ); + + // Read from spider_boxes table and apply spatial filter using PostgreSQL ST_Intersects + Collection result = planBuilder + .readTable(new PostgresTableSource("spider_boxes", "x_min", "y_min", "x_max", "y_max", "geom")) + .spatialFilter( + (Record record) -> WayangGeometry.fromStringInput(record.getString(4)), + SpatialPredicate.INTERSECTS, + queryGeometry, + "geom" // SQL geometry column name for PostgreSQL pushdown + ) + .withTargetPlatform(Postgres.platform()) + .count() + .collect(); + + // Verify we got results (exact count depends on data in spider_boxes) + assertEquals(1, result.size()); + Long count = result.iterator().next(); + assertTrue(count > 0, "Expected at least one box intersecting the query geometry"); + System.out.println("PostgreSQL Spatial Filter (ST_Intersects): " + count + " boxes intersect the query geometry"); + } + + @Test + @Disabled("Requires local Postgres test database.") + void testSpatialFilterWithPostgresAndMapping() { + Configuration configuration = getPostgresConfiguration(); + + WayangContext wayangContext = new WayangContext(configuration) + .withPlugin(Java.basicPlugin()) + .withPlugin(Postgres.plugin()) + .withPlugin(Spatial.postgresPlugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("Spatial Filter with Postgres and Mapping Test"); + + // Query geometry covering center area + WayangGeometry queryGeometry = WayangGeometry.fromStringInput( + "POLYGON((0.3 0.3, 0.7 0.3, 0.7 0.7, 0.3 0.7, 0.3 0.3))" + ); + + // Read from spider_boxes, filter spatially with PostgreSQL, then map to extract bounds + Collection result = planBuilder + .readTable(new PostgresTableSource("spider_boxes", "x_min", "y_min", "x_max", "y_max", "geom")) + .spatialFilter( + (Record record) -> WayangGeometry.fromStringInput(record.getString(4)), + SpatialPredicate.INTERSECTS, + queryGeometry, + "geom" // SQL geometry column name for PostgreSQL pushdown + ) + .withTargetPlatform(Postgres.platform()) + .map((Record record) -> String.format("Box: (%.2f,%.2f)-(%.2f,%.2f)", + record.getDouble(0), record.getDouble(1), + record.getDouble(2), record.getDouble(3))) + .collect(); + + assertTrue(result.size() > 0, "Expected at least one box intersecting the query geometry"); + System.out.println("PostgreSQL Spatial Filter + Mapping: " + result.size() + " results"); + result.stream().limit(5).forEach(System.out::println); + } + + @Test + @Disabled("Requires local Postgres test database.") + void testSpatialFilterWithPostgresContains() { + Configuration configuration = getPostgresConfiguration(); + + WayangContext wayangContext = new WayangContext(configuration) + .withPlugin(Java.basicPlugin()) + .withPlugin(Postgres.plugin()) + .withPlugin(Spatial.postgresPlugin()); + + JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext) + .withJobName("Spatial Filter with Postgres Contains Test"); + + // Query geometry: full unit square - should contain all boxes that are fully inside + WayangGeometry queryGeometry = WayangGeometry.fromStringInput( + "POLYGON((0.0 0.0, 1.0 0.0, 1.0 1.0, 0.0 1.0, 0.0 0.0))" + ); + + // Test WITHIN predicate - find boxes that are completely within the query geometry + Collection result = planBuilder + .readTable(new PostgresTableSource("spider_boxes", "x_min", "y_min", "x_max", "y_max", "geom")) + .spatialFilter( + (Record record) -> WayangGeometry.fromStringInput(record.getString(4)), + SpatialPredicate.WITHIN, + queryGeometry, + "geom" // SQL geometry column name for PostgreSQL pushdown + ) + .withTargetPlatform(Postgres.platform()) + .count() + .collect(); + + assertEquals(1, result.size()); + Long count = result.iterator().next(); + System.out.println("PostgreSQL Spatial Filter (ST_Within): " + count + " boxes within the query geometry"); + } +} diff --git a/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/data/WayangGeometryTest.java b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/data/WayangGeometryTest.java new file mode 100644 index 000000000..02d2855bb --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/data/WayangGeometryTest.java @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.data; + +import org.apache.wayang.spatial.data.WayangGeometry; +import org.junit.Test; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.geom.GeometryFactory; +import org.locationtech.jts.geom.Point; +import org.locationtech.jts.io.WKBReader; +import org.locationtech.jts.io.WKBWriter; +import org.locationtech.jts.io.WKTWriter; + +import static org.junit.Assert.*; + +public class WayangGeometryTest { + + private final GeometryFactory gf = new GeometryFactory(); + + @Test + public void testFromGeometryStoresAndCachesGeometry() { + Point point = gf.createPoint(new Coordinate(1.0, 2.0)); + + WayangGeometry wGeometry = WayangGeometry.fromGeometry(point); + + // First call should give us exactly the same instance + Geometry first = wGeometry.getGeometry(); + assertSame("Geometry instance should be the same as the one passed in.", + point, first); + + // Second call should return the same cached instance + Geometry second = wGeometry.getGeometry(); + assertSame("Geometry instance should be cached and reused.", first, second); + + // Derived representations should be non-null / non-empty + String wkt = wGeometry.getWKT(); + String wkb = wGeometry.getWKB(); + String geoJson = wGeometry.getGeoJSON(); + + assertNotNull("WKT should not be null.", wkt); + assertFalse("WKT should not be empty.", wkt.isEmpty()); + assertNotNull("WKB should not be null.", wkb); + assertFalse("WKB should not be empty.", wkb.isEmpty()); + assertNotNull("GeoJSON should not be null.", geoJson); + assertFalse("GeoJSON should not be empty.", geoJson.isEmpty()); + } + + @Test + public void testFromStringInputWKTAndSRIDCleaning() { + // WKT with SRID prefix + String wktWithSrid = "SRID=4326;POINT (1 2)"; + WayangGeometry wGeometry = WayangGeometry.fromStringInput(wktWithSrid); + + Geometry geom = wGeometry.getGeometry(); + assertTrue("Geometry should be a Point.", geom instanceof Point); + Point p = (Point) geom; + assertEquals(1.0, p.getX(), 1e-9); + assertEquals(2.0, p.getY(), 1e-9); + + // getWKT returns the original stored WKT, including SRID + String wkt = wGeometry.getWKT(); + assertTrue("Original WKT (with SRID) should be preserved.", wkt.startsWith("SRID=")); + + // Verify that parsing the same WKT without SRID gives an equal geometry, + // which indirectly asserts that cleanSRID() worked as expected. + String wktWithoutSrid = "POINT (1 2)"; + WayangGeometry wGeometryNoSrid = WayangGeometry.fromStringInput(wktWithoutSrid); + Geometry geomNoSrid = wGeometryNoSrid.getGeometry(); + + assertTrue("Geometry from SRID-prefixed WKT should equal geometry from plain WKT.", + geom.equalsExact(geomNoSrid)); + } + + + @Test + public void testFromStringInputPlainWKT() { + // Use JTS writer to generate canonical WKT string + Point point = gf.createPoint(new Coordinate(3.0, 4.0)); + String canonicalWkt = new WKTWriter().write(point); + + WayangGeometry wGeometry = WayangGeometry.fromStringInput(canonicalWkt); + + Geometry geom = wGeometry.getGeometry(); + assertTrue(geom instanceof Point); + assertEquals(point.getCoordinate().x, geom.getCoordinate().x, 1e-9); + assertEquals(point.getCoordinate().y, geom.getCoordinate().y, 1e-9); + + // getWKT should match the canonical representation from JTS + String wkt = wGeometry.getWKT(); + assertEquals("WKT should match JTS canonical representation.", canonicalWkt, wkt); + } + + @Test + public void testFromStringInputWKBHexRoundTrip() { + Point original = gf.createPoint(new Coordinate(5.0, 6.0)); + + // Encode to WKB hex using same mechanism as WayangGeometry + WKBWriter wkbWriter = new WKBWriter(); + byte[] wkbBytes = wkbWriter.write(original); + String wkbHex = WKBWriter.toHex(wkbBytes); + + WayangGeometry wGeometry = WayangGeometry.fromStringInput(wkbHex); + Geometry parsed = wGeometry.getGeometry(); + + assertTrue("Parsed geometry should be a Point.", parsed instanceof Point); + assertTrue("Parsed geometry should be exactly equal to original.", + original.equalsExact(parsed)); + + // getWKB should give back a hex string that decodes to the same WKB bytes + String producedHex = wGeometry.getWKB(); + byte[] producedBytes = WKBReader.hexToBytes(producedHex); + assertArrayEquals("WKB bytes should be identical after round-trip.", + wkbBytes, producedBytes); + } + + @Test + public void testFromStringInputGeoJSONAndRoundTripThroughGeometry() { + // Simple GeoJSON Point + String geoJson = "{\"type\":\"Point\",\"coordinates\":[7.0,8.0]}"; + + WayangGeometry wGeometry = WayangGeometry.fromStringInput(geoJson); + Geometry geom = wGeometry.getGeometry(); + + assertTrue("Geometry should be a Point.", geom instanceof Point); + Point p = (Point) geom; + assertEquals(7.0, p.getX(), 1e-9); + assertEquals(8.0, p.getY(), 1e-9); + + // Now go back through fromGeometry + GeoJSON + WayangGeometry fromGeom = WayangGeometry.fromGeometry(geom); + String generatedGeoJson = fromGeom.getGeoJSON(); + + // We don't depend on exact string equality/ordering of JSON, + // but we do expect that parsing generated GeoJSON yields an equal geometry. + WayangGeometry reParsed = WayangGeometry.fromStringInput(generatedGeoJson); + Geometry geom2 = reParsed.getGeometry(); + + assertTrue("Geometry from re-parsed GeoJSON should be exactly equal.", + geom.equalsExact(geom2)); + } + + @Test + public void testPreferredRepresentationOrderWktThenWkbThenGeoJson() { + // Start with WKT-only instance + Point point = gf.createPoint(new Coordinate(10.0, 20.0)); + String wkt = new WKTWriter().write(point); + WayangGeometry wFromWkt = WayangGeometry.fromStringInput(wkt); + + Geometry g1 = wFromWkt.getGeometry(); + assertTrue(g1 instanceof Point); + assertEquals(point.getX(), g1.getCoordinate().x, 1e-9); + assertEquals(point.getY(), g1.getCoordinate().y, 1e-9); + + // Now WKB-only instance + WKBWriter wkbWriter = new WKBWriter(); + byte[] wkbBytes = wkbWriter.write(point); + String wkbHex = WKBWriter.toHex(wkbBytes); + WayangGeometry wFromWkb = WayangGeometry.fromStringInput(wkbHex); + + Geometry g2 = wFromWkb.getGeometry(); + assertTrue(g2 instanceof Point); + assertTrue(point.equalsExact(g2)); + + // And GeoJSON-only instance + WayangGeometry wFromGeo = WayangGeometry.fromGeometry(point); + String geoJson = wFromGeo.getGeoJSON(); + WayangGeometry wFromGeoOnly = WayangGeometry.fromStringInput(geoJson); + + Geometry g3 = wFromGeoOnly.getGeometry(); + assertTrue(g3 instanceof Point); + assertTrue(point.equalsExact(g3)); + } + + @Test(expected = RuntimeException.class) + public void testInvalidWKTThrowsRuntimeException() { + // This should cause JTS WKTReader to throw ParseException, + // which WayangGeometry wraps in a RuntimeException. + String invalidWkt = "POINT (1)"; + WayangGeometry wGeometry = WayangGeometry.fromStringInput(invalidWkt); + + // Should throw + wGeometry.getGeometry(); + } + + @Test(expected = IllegalStateException.class) + public void testNoRepresentationAvailableThrowsIllegalStateException() { + // Default constructor, no wkt/wkb/geojson/geometry set + WayangGeometry wGeometry = new WayangGeometry(); + + // Should hit the "No geometry representation available" branch + wGeometry.getGeometry(); + } + + @Test + public void testGetGeometryIsCached() { + Point point = gf.createPoint(new Coordinate(11.0, 22.0)); + WayangGeometry wGeometry = WayangGeometry.fromGeometry(point); + + Geometry g1 = wGeometry.getGeometry(); + Geometry g2 = wGeometry.getGeometry(); + + assertSame("getGeometry should cache and return the same instance.", g1, g2); + } +} diff --git a/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/integration/PostgresSpatialIntegrationTest.java b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/integration/PostgresSpatialIntegrationTest.java new file mode 100644 index 000000000..66721a3ad --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/integration/PostgresSpatialIntegrationTest.java @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.integration; + +import org.apache.wayang.basic.data.Record; +import org.apache.wayang.basic.data.Tuple2; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.apache.wayang.basic.operators.*; +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.api.WayangContext; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.plan.wayangplan.WayangPlan; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.core.util.ReflectionUtils; +import org.apache.wayang.java.Java; +import org.apache.wayang.postgres.Postgres; +import org.apache.wayang.postgres.operators.PostgresTableSource; +import org.apache.wayang.spark.Spark; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.geom.GeometryFactory; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +@Disabled("Requires local Postgres test database.") +public class PostgresSpatialIntegrationTest { + + + public static void main(String[] args) { + WayangPlan wayangPlan; + Configuration configuration = new Configuration(); + configuration.setProperty("wayang.postgres.jdbc.url", "jdbc:postgresql://localhost:5432/imdb"); + configuration.setProperty("wayang.postgres.jdbc.user", "postgres"); + configuration.setProperty("wayang.postgres.jdbc.password", "password"); + + WayangContext wayangContext = new WayangContext(configuration) + .withPlugin(Java.basicPlugin()) + .withPlugin(Spark.basicPlugin()) + .withPlugin(Postgres.plugin()); + + Collection collector = new ArrayList<>(); + + TableSource customer = new PostgresTableSource("person"); + MapOperator projection = MapOperator.createProjection( + Record.class, + Record.class, + "name"); + + LocalCallbackSink sink = LocalCallbackSink.createCollectingSink(collector, Record.class); + customer.connectTo(0,projection,0); + projection.connectTo(0,sink,0); + + + wayangPlan = new WayangPlan(sink); + + wayangContext.execute("PostgreSql test", wayangPlan); + + + int count = 10; + for(Record r : collector) { + System.out.println(r.getField(0).toString()); + if(--count == 0 ) { + break; + } + } + System.out.println("Done"); + } + + WayangContext getTestWayangContext() { + Configuration configuration = new Configuration(); + configuration.setProperty("wayang.postgres.jdbc.url", "jdbc:postgresql://localhost:5433/postgres"); // Default port 5432 + configuration.setProperty("wayang.postgres.jdbc.user", "postgres"); + configuration.setProperty("wayang.postgres.jdbc.password", "postgres"); + + return new WayangContext(configuration); + } + + @Test + @Disabled("Requires local Postgres test database.") + void testSpatialFilterOperator() { + WayangContext wayangContext = getTestWayangContext() + .withPlugin(Java.basicPlugin()) + .withPlugin(Spark.basicPlugin()) + .withPlugin(Postgres.plugin()); + + /// Scalar Geometry + GeometryFactory geometryFactory = new GeometryFactory(); + Envelope envelope = new Envelope(0.00, 0.4, 0.00, 0.40); + Geometry geom2 = geometryFactory.toGeometry(envelope); + + TableSource spider = + new PostgresTableSource("spider_boxes", "id", "geom"); + + SpatialFilterOperator spatialFilterOperator = new SpatialFilterOperator( + SpatialPredicate.INTERSECTS, + (record -> (WayangGeometry.fromStringInput(record.getString(1)))), + DataSetType.createDefaultUnchecked(Record.class), + WayangGeometry.fromStringInput("POLYGON((0.00 0.00,0.4 0.00,0.4 0.4,0.00 0.4,0.00 0.00))")); + + spatialFilterOperator.getKeyDescriptor().withSqlImplementation("spatialdb", "geom"); + spatialFilterOperator.addTargetPlatform(Spark.platform()); + spider.connectTo(0,spatialFilterOperator,0); + + Collection> collector = new ArrayList<>(); + LocalCallbackSink> sink + = LocalCallbackSink.createCollectingSink(collector, DataSetType.createDefaultUnchecked(Record.class)); + spatialFilterOperator.connectTo(0, sink, 0); + + wayangContext.execute("PostgreSql test", new WayangPlan(sink)); + + System.out.println(collector); + + assertEquals(19, collector.size()); + } + + @Test + @Disabled("Requires local Postgres test database.") + void testSpatialFilterWithTuple() { + WayangContext wayangContext = getTestWayangContext() + .withPlugin(Java.basicPlugin()) + .withPlugin(Spark.basicPlugin()) + .withPlugin(Postgres.plugin()); + + /// Scalar Geometry + GeometryFactory geometryFactory = new GeometryFactory(); + Envelope envelope = new Envelope(0.00, 0.4, 0.00, 0.40); + Geometry geom2 = geometryFactory.toGeometry(envelope); + + TableSource spider = + new PostgresTableSource("spider", "id", "geom"); + + MapOperator> mapToTuple = new MapOperator>( + record -> { + Tuple2 tuple = new Tuple2<>(); + tuple.field0 = record.getInt(0); + tuple.field1 = WayangGeometry.fromStringInput(record.getField(1).toString()); + return tuple; + }, + Record.class, + ReflectionUtils.specify(Tuple2.class) + ); + + SpatialFilterOperator> spatialFilterOperator = new SpatialFilterOperator>( + SpatialPredicate.INTERSECTS, + Tuple2::getField1, + DataSetType.createDefaultUnchecked(Tuple2.class), + WayangGeometry.fromStringInput("POLYGON((0.00 0.00,0.4 0.00,0.4 0.4,0.00 0.4,0.00 0.00))")); + + spatialFilterOperator.addTargetPlatform(Java.platform()); + spider.connectTo(0,mapToTuple,0); + mapToTuple.connectTo(0,spatialFilterOperator,0); + + Collection> collector = new ArrayList<>(); + LocalCallbackSink> sink + = LocalCallbackSink.createCollectingSink(collector, DataSetType.createDefaultUnchecked(Tuple2.class)); + spatialFilterOperator.connectTo(0, sink, 0); + + wayangContext.execute("PostgreSql test", new WayangPlan(sink)); + + System.out.println(collector); + assertEquals(19, collector.size()); + } + + @Test + @Disabled("Requires local Postgres test database.") + void testSpatialJoin() { + WayangContext wayangContext = getTestWayangContext() + .withPlugin(Java.basicPlugin()) + .withPlugin(Spark.basicPlugin()) + .withPlugin(Postgres.plugin()); + + TableSource table1 = new PostgresTableSource("spider_boxes", "id", "x_min", "y_min", "x_max", "y_max", "geom"); + + // Input polygons: nested axis-aligned squares. + final List inputValues = Arrays.asList( + WayangGeometry.fromStringInput("POLYGON((0.00 0.00,0.40 0.00,0.40 0.40,0.00 0.40,0.00 0.00))"), + WayangGeometry.fromStringInput("POLYGON((0.00 0.00,0.30 0.00,0.30 0.30,0.00 0.30,0.00 0.00))"), + WayangGeometry.fromStringInput("POLYGON((0.00 0.00,0.20 0.00,0.20 0.20,0.00 0.20,0.00 0.00))"), + WayangGeometry.fromStringInput("POLYGON((0.00 0.00,0.10 0.00,0.10 0.10,0.00 0.10,0.00 0.00))") + ); + CollectionSource inputCollection = new CollectionSource<>(inputValues, WayangGeometry.class); + + + SpatialJoinOperator spatialJoinOperator = new SpatialJoinOperator<>( + record -> WayangGeometry.fromStringInput(record.getString(4)), + wgeometry -> wgeometry, + Record.class, WayangGeometry.class, + SpatialPredicate.INTERSECTS + ); + table1.connectTo(0, spatialJoinOperator, 0); + inputCollection.connectTo(0, spatialJoinOperator, 1); + + Collection> collector = new ArrayList<>(); + LocalCallbackSink> sink + = LocalCallbackSink.createCollectingSink(collector, DataSetType.createDefaultUnchecked(Tuple2.class)); + spatialJoinOperator.connectTo(0, sink, 0); + wayangContext.execute("PostgreSql test", new WayangPlan(sink)); + + System.out.println(collector); + + assertEquals(30, collector.size()); + } + + @Test + @Disabled("Requires local Postgres test database.") + void testSpatialJoinDbSources() { + WayangContext wayangContext = getTestWayangContext() + .withPlugin(Java.basicPlugin()) + .withPlugin(Spark.basicPlugin()) + .withPlugin(Postgres.plugin()); + + // Two logical sources over the same table. + TableSource table1 = new PostgresTableSource("spider_boxes", "id", "x_min", "y_min", "x_max", "y_max", "geom"); + TableSource table2 = new PostgresTableSource("spider_boxes", "id", "x_min", "y_min", "x_max", "y_max", "geom"); + + // Spatial join on INTERSECTS; both sides use the geom column (index 5). + SpatialJoinOperator spatialJoinOperator = + new SpatialJoinOperator<>( + record -> WayangGeometry.fromStringInput(record.getString(5)), + record -> WayangGeometry.fromStringInput(record.getString(5)), + Record.class, Record.class, + SpatialPredicate.INTERSECTS + ); + + // Register SQL implementations for both inputs + spatialJoinOperator.getKeyDescriptor0() + .withSqlImplementation("spiderdb", "geom"); + spatialJoinOperator.getKeyDescriptor1() + .withSqlImplementation("spiderdb", "geom"); + + spatialJoinOperator.addTargetPlatform(Postgres.platform()); + + // Wire up both DB sources as inputs to the spatial join. + table1.connectTo(0, spatialJoinOperator, 0); + table2.connectTo(0, spatialJoinOperator, 1); + + // Collect results. + Collection> collector = new ArrayList<>(); + LocalCallbackSink> sink = + LocalCallbackSink.createCollectingSink( + collector, + DataSetType.createDefaultUnchecked(Tuple2.class) + ); + spatialJoinOperator.connectTo(0, sink, 0); + + // Execute the plan. + wayangContext.execute("PostgreSql spatial join DB-DB", new WayangPlan(sink)); + + // Basic sanity check: we should get at least self-intersections. + assertFalse(collector.isEmpty(), "Spatial join result should not be empty."); + + // Semantic check: every returned pair must actually intersect according to JTS. + for (Tuple2 pair : collector) { + Geometry g1 = WayangGeometry.fromStringInput(pair.field0.getString(1)).getGeometry(); + Geometry g2 = WayangGeometry.fromStringInput(pair.field1.getString(1)).getGeometry(); + assertTrue( + g1.intersects(g2), + "Found non-intersecting pair in spatial join result." + ); + } + } +} diff --git a/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/java/JavaSpatialFilterOperatorTest.java b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/java/JavaSpatialFilterOperatorTest.java new file mode 100644 index 000000000..a2962fc8f --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/java/JavaSpatialFilterOperatorTest.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.java; + +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.api.Job; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.optimizer.DefaultOptimizationContext; +import org.apache.wayang.core.optimizer.OptimizationContext; +import org.apache.wayang.core.optimizer.cardinality.CardinalityEstimate; +import org.apache.wayang.core.plan.wayangplan.Operator; +import org.apache.wayang.core.platform.CrossPlatformExecutor; +import org.apache.wayang.core.profiling.NoInstrumentationStrategy; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.java.channels.JavaChannelInstance; +import org.apache.wayang.java.channels.StreamChannel; +import org.apache.wayang.java.execution.JavaExecutor; +import org.apache.wayang.java.platform.JavaPlatform; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Test suite for {@link JavaSpatialFilterOperator}. + */ +class JavaSpatialFilterOperatorTest { + + private static Configuration configuration; + private static Job job; + + @BeforeAll + static void init() { + configuration = new Configuration(); + job = mock(Job.class); + when(job.getConfiguration()).thenReturn(configuration); + DefaultOptimizationContext optimizationContext = new DefaultOptimizationContext(job); + when(job.getCrossPlatformExecutor()).thenReturn(new CrossPlatformExecutor(job, new NoInstrumentationStrategy())); + when(job.getOptimizationContext()).thenReturn(optimizationContext); + } + + private static JavaExecutor createExecutor() { + return new JavaExecutor(JavaPlatform.getInstance(), job); + } + + private static OptimizationContext.OperatorContext createOperatorContext(Operator operator) { + OptimizationContext optimizationContext = job.getOptimizationContext(); + final OptimizationContext.OperatorContext operatorContext = optimizationContext.addOneTimeOperator(operator); + for (int i = 0; i < operator.getNumInputs(); i++) { + operatorContext.setInputCardinality(i, new CardinalityEstimate(100, 10000, 0.1)); + } + for (int i = 0; i < operator.getNumOutputs(); i++) { + operatorContext.setOutputCardinality(i, new CardinalityEstimate(100, 10000, 0.1)); + } + return operatorContext; + } + + private static StreamChannel.Instance createStreamChannelInstance() { + return (StreamChannel.Instance) StreamChannel.DESCRIPTOR + .createChannel(null, configuration) + .createInstance(mock(JavaExecutor.class), null, -1); + } + + private static StreamChannel.Instance createStreamChannelInstance(Stream stream) { + StreamChannel.Instance instance = createStreamChannelInstance(); + instance.accept(stream); + return instance; + } + + @Test + void testIntersectsFilter() { + // 4 polygons: larger than reference, overlapping, fully inside, fully outside + List input = Arrays.asList( + new WayangGeometry("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))"), + new WayangGeometry("POLYGON ((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, 0.5 0.5))"), + new WayangGeometry("POLYGON ((0.2 0.2, 0.8 0.2, 0.8 0.8, 0.2 0.8, 0.2 0.2))"), + new WayangGeometry("POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))") + ); + + WayangGeometry reference = new WayangGeometry("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))"); + + JavaSpatialFilterOperator filterOp = new JavaSpatialFilterOperator<>( + SpatialPredicate.INTERSECTS, + w -> w, + DataSetType.createDefault(WayangGeometry.class), + reference + ); + + JavaChannelInstance[] inputs = new JavaChannelInstance[]{createStreamChannelInstance(input.stream())}; + JavaChannelInstance[] outputs = new JavaChannelInstance[]{createStreamChannelInstance()}; + filterOp.evaluate(inputs, outputs, createExecutor(), createOperatorContext(filterOp)); + + List result = outputs[0].provideStream().collect(Collectors.toList()); + assertEquals(3, result.size()); + } + + @Test + void testWithinFilter() { + // Same 4 polygons; only the fully-inside one is WITHIN the unit square + List input = Arrays.asList( + new WayangGeometry("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))"), + new WayangGeometry("POLYGON ((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, 0.5 0.5))"), + new WayangGeometry("POLYGON ((0.2 0.2, 0.8 0.2, 0.8 0.8, 0.2 0.8, 0.2 0.2))"), + new WayangGeometry("POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))") + ); + + WayangGeometry reference = new WayangGeometry("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))"); + + JavaSpatialFilterOperator filterOp = new JavaSpatialFilterOperator<>( + SpatialPredicate.WITHIN, + w -> w, + DataSetType.createDefault(WayangGeometry.class), + reference + ); + + JavaChannelInstance[] inputs = new JavaChannelInstance[]{createStreamChannelInstance(input.stream())}; + JavaChannelInstance[] outputs = new JavaChannelInstance[]{createStreamChannelInstance()}; + filterOp.evaluate(inputs, outputs, createExecutor(), createOperatorContext(filterOp)); + + List result = outputs[0].provideStream().collect(Collectors.toList()); + assertEquals(1, result.size()); + } + + @Test + void testFilterNoMatches() { + List input = Arrays.asList( + new WayangGeometry("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))"), + new WayangGeometry("POLYGON ((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, 0.5 0.5))"), + new WayangGeometry("POLYGON ((0.2 0.2, 0.8 0.2, 0.8 0.8, 0.2 0.8, 0.2 0.2))"), + new WayangGeometry("POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))") + ); + + // Distant geometry — no intersections + WayangGeometry reference = new WayangGeometry("POLYGON ((100 100, 101 100, 101 101, 100 101, 100 100))"); + + JavaSpatialFilterOperator filterOp = new JavaSpatialFilterOperator<>( + SpatialPredicate.INTERSECTS, + w -> w, + DataSetType.createDefault(WayangGeometry.class), + reference + ); + + JavaChannelInstance[] inputs = new JavaChannelInstance[]{createStreamChannelInstance(input.stream())}; + JavaChannelInstance[] outputs = new JavaChannelInstance[]{createStreamChannelInstance()}; + filterOp.evaluate(inputs, outputs, createExecutor(), createOperatorContext(filterOp)); + + List result = outputs[0].provideStream().collect(Collectors.toList()); + assertEquals(0, result.size()); + } +} diff --git a/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/java/JavaSpatialJoinOperatorTest.java b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/java/JavaSpatialJoinOperatorTest.java new file mode 100644 index 000000000..3b528adfe --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/java/JavaSpatialJoinOperatorTest.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.java; + +import org.apache.wayang.basic.data.Tuple2; +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.api.Job; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.optimizer.DefaultOptimizationContext; +import org.apache.wayang.core.optimizer.OptimizationContext; +import org.apache.wayang.core.optimizer.cardinality.CardinalityEstimate; +import org.apache.wayang.core.plan.wayangplan.Operator; +import org.apache.wayang.core.platform.CrossPlatformExecutor; +import org.apache.wayang.core.profiling.NoInstrumentationStrategy; +import org.apache.wayang.java.channels.JavaChannelInstance; +import org.apache.wayang.java.channels.StreamChannel; +import org.apache.wayang.java.execution.JavaExecutor; +import org.apache.wayang.java.platform.JavaPlatform; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Test suite for {@link JavaSpatialJoinOperator}. + */ +class JavaSpatialJoinOperatorTest { + + private static Configuration configuration; + private static Job job; + + @BeforeAll + static void init() { + configuration = new Configuration(); + job = mock(Job.class); + when(job.getConfiguration()).thenReturn(configuration); + DefaultOptimizationContext optimizationContext = new DefaultOptimizationContext(job); + when(job.getCrossPlatformExecutor()).thenReturn(new CrossPlatformExecutor(job, new NoInstrumentationStrategy())); + when(job.getOptimizationContext()).thenReturn(optimizationContext); + } + + private static JavaExecutor createExecutor() { + return new JavaExecutor(JavaPlatform.getInstance(), job); + } + + private static OptimizationContext.OperatorContext createOperatorContext(Operator operator) { + OptimizationContext optimizationContext = job.getOptimizationContext(); + final OptimizationContext.OperatorContext operatorContext = optimizationContext.addOneTimeOperator(operator); + for (int i = 0; i < operator.getNumInputs(); i++) { + operatorContext.setInputCardinality(i, new CardinalityEstimate(100, 10000, 0.1)); + } + for (int i = 0; i < operator.getNumOutputs(); i++) { + operatorContext.setOutputCardinality(i, new CardinalityEstimate(100, 10000, 0.1)); + } + return operatorContext; + } + + private static StreamChannel.Instance createStreamChannelInstance() { + return (StreamChannel.Instance) StreamChannel.DESCRIPTOR + .createChannel(null, configuration) + .createInstance(mock(JavaExecutor.class), null, -1); + } + + private static StreamChannel.Instance createStreamChannelInstance(Stream stream) { + StreamChannel.Instance instance = createStreamChannelInstance(); + instance.accept(stream); + return instance; + } + + @Test + void testIntersectsJoin() { + // Left: 3 points — two in box1, one in box2 + List left = Arrays.asList( + new WayangGeometry("POINT (0.5 0.5)"), + new WayangGeometry("POINT (0.5 0.8)"), + new WayangGeometry("POINT (5.5 5.5)") + ); + + // Right: 2 non-overlapping boxes + List right = Arrays.asList( + new WayangGeometry("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))"), + new WayangGeometry("POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))") + ); + + JavaSpatialJoinOperator joinOp = new JavaSpatialJoinOperator<>( + w -> w, + w -> w, + WayangGeometry.class, + WayangGeometry.class, + SpatialPredicate.INTERSECTS + ); + + JavaChannelInstance[] inputs = new JavaChannelInstance[]{ + createStreamChannelInstance(left.stream()), + createStreamChannelInstance(right.stream()) + }; + JavaChannelInstance[] outputs = new JavaChannelInstance[]{createStreamChannelInstance()}; + joinOp.evaluate(inputs, outputs, createExecutor(), createOperatorContext(joinOp)); + + List> result = + outputs[0].>provideStream().collect(Collectors.toList()); + assertEquals(3, result.size()); + } + + @Test + void testJoinNoMatches() { + // Left: points far from right boxes + List left = Arrays.asList( + new WayangGeometry("POINT (100 100)"), + new WayangGeometry("POINT (200 200)") + ); + + List right = Arrays.asList( + new WayangGeometry("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))"), + new WayangGeometry("POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))") + ); + + JavaSpatialJoinOperator joinOp = new JavaSpatialJoinOperator<>( + w -> w, + w -> w, + WayangGeometry.class, + WayangGeometry.class, + SpatialPredicate.INTERSECTS + ); + + JavaChannelInstance[] inputs = new JavaChannelInstance[]{ + createStreamChannelInstance(left.stream()), + createStreamChannelInstance(right.stream()) + }; + JavaChannelInstance[] outputs = new JavaChannelInstance[]{createStreamChannelInstance()}; + joinOp.evaluate(inputs, outputs, createExecutor(), createOperatorContext(joinOp)); + + List> result = + outputs[0].>provideStream().collect(Collectors.toList()); + assertEquals(0, result.size()); + } + + @Test + void testJoinWithStringKeyExtractor() { + // Input type is String (WKT), key extractors parse via WayangGeometry.fromStringInput + List left = Arrays.asList( + "POINT (0.5 0.5)", + "POINT (5.5 5.5)" + ); + + List right = Arrays.asList( + "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", + "POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))" + ); + + JavaSpatialJoinOperator joinOp = new JavaSpatialJoinOperator<>( + WayangGeometry::fromStringInput, + WayangGeometry::fromStringInput, + String.class, + String.class, + SpatialPredicate.INTERSECTS + ); + + JavaChannelInstance[] inputs = new JavaChannelInstance[]{ + createStreamChannelInstance(left.stream()), + createStreamChannelInstance(right.stream()) + }; + JavaChannelInstance[] outputs = new JavaChannelInstance[]{createStreamChannelInstance()}; + joinOp.evaluate(inputs, outputs, createExecutor(), createOperatorContext(joinOp)); + + List> result = + outputs[0].>provideStream().collect(Collectors.toList()); + assertEquals(2, result.size()); + } +} diff --git a/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/jdbc/JdbcSpatialFilterOperatorTest.java b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/jdbc/JdbcSpatialFilterOperatorTest.java new file mode 100644 index 000000000..9fe8344d0 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/jdbc/JdbcSpatialFilterOperatorTest.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.jdbc; + +import org.apache.wayang.basic.data.Record; +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.api.Job; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.function.FunctionDescriptor; +import org.apache.wayang.core.optimizer.DefaultOptimizationContext; +import org.apache.wayang.core.plan.executionplan.ExecutionStage; +import org.apache.wayang.core.plan.executionplan.ExecutionTask; +import org.apache.wayang.core.platform.CrossPlatformExecutor; +import org.apache.wayang.core.profiling.NoInstrumentationStrategy; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.jdbc.channels.SqlQueryChannel; +import org.apache.wayang.jdbc.execution.JdbcExecutor; +import org.apache.wayang.jdbc.operators.JdbcTableSource; +import org.apache.wayang.jdbc.operators.SqlToStreamOperator; +import org.apache.wayang.jdbc.platform.JdbcPlatformTemplate; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.apache.wayang.spatial.test.HsqldbPlatform; +import org.apache.wayang.spatial.test.HsqldbTableSource; +import org.junit.jupiter.api.Test; + +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Collections; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Test suite for {@link JdbcSpatialFilterOperator}. + * Verifies that the generated SQL contains the expected spatial predicate clause. + */ +class JdbcSpatialFilterOperatorTest { + + /** + * Concrete subclass for testing against HSQLDB. + */ + private static class TestJdbcSpatialFilterOperator extends JdbcSpatialFilterOperator { + + TestJdbcSpatialFilterOperator(SpatialPredicate relation, + FunctionDescriptor.SerializableFunction keyExtractor, + DataSetType inputClassDatasetType, + SpatialGeometry geometry) { + super(relation, keyExtractor, inputClassDatasetType, geometry); + } + + @Override + public JdbcPlatformTemplate getPlatform() { + return HsqldbPlatform.getInstance(); + } + } + + @Test + void testSpatialFilterIntersectsGeneratesCorrectSql() throws SQLException { + String sql = buildSpatialFilterSql( + SpatialPredicate.INTERSECTS, + new WayangGeometry("POINT (0 0)") + ); + + assertTrue(sql.startsWith("SELECT"), + "SQL should be a SELECT statement, but was: " + sql); + assertTrue(sql.contains("FROM testGeom"), + "SQL should select from testGeom, but was: " + sql); + assertTrue(sql.contains("ST_Intersects(geom, ST_GeomFromText('POINT (0 0)', 4326))"), + "SQL should contain ST_Intersects predicate, but was: " + sql); + } + + @Test + void testSpatialFilterWithinGeneratesCorrectSql() throws SQLException { + WayangGeometry polygon = new WayangGeometry("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))"); + String sql = buildSpatialFilterSql(SpatialPredicate.WITHIN, polygon); + + assertTrue(sql.contains("ST_Within(geom, ST_GeomFromText('" + polygon.getWKT() + "', 4326))"), + "SQL should contain ST_Within predicate, but was: " + sql); + } + + @Test + void testSpatialFilterContainsGeneratesCorrectSql() throws SQLException { + WayangGeometry polygon = new WayangGeometry("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))"); + String sql = buildSpatialFilterSql(SpatialPredicate.CONTAINS, polygon); + + assertTrue(sql.contains("ST_Contains(geom, ST_GeomFromText('" + polygon.getWKT() + "', 4326))"), + "SQL should contain ST_Contains predicate, but was: " + sql); + } + + /** + * Sets up a JDBC execution pipeline (table source -> spatial filter -> SqlToStream) + * and returns the generated SQL query string. + */ + private String buildSpatialFilterSql(SpatialPredicate predicateType, + WayangGeometry referenceGeometry) throws SQLException { + Configuration configuration = new Configuration(); + + Job job = mock(Job.class); + when(job.getConfiguration()).thenReturn(configuration); + when(job.getCrossPlatformExecutor()) + .thenReturn(new CrossPlatformExecutor(job, new NoInstrumentationStrategy())); + + HsqldbPlatform hsqldbPlatform = new HsqldbPlatform(); + SqlQueryChannel.Descriptor sqlChannelDescriptor = + HsqldbPlatform.getInstance().getSqlQueryChannelDescriptor(); + + ExecutionStage sqlStage = mock(ExecutionStage.class); + + // Create a simple test table with a "geom" column. + try (Connection jdbcConnection = + hsqldbPlatform.createDatabaseDescriptor(configuration).createJdbcConnection()) { + final Statement statement = jdbcConnection.createStatement(); + statement.execute("CREATE TABLE IF NOT EXISTS testGeom (id INT, geom VARCHAR(255));"); + } + + // Table source for testGeom. + JdbcTableSource tableSource = new HsqldbTableSource("testGeom"); + ExecutionTask tableSourceTask = new ExecutionTask(tableSource); + tableSourceTask.setOutputChannel(0, + new SqlQueryChannel(sqlChannelDescriptor, tableSource.getOutput(0))); + tableSourceTask.setStage(sqlStage); + + // Spatial filter operator with SQL implementation on the key descriptor. + TestJdbcSpatialFilterOperator filterOp = new TestJdbcSpatialFilterOperator<>( + predicateType, + record -> WayangGeometry.fromStringInput((String) record.getField(1)), + DataSetType.createDefault(Record.class), + referenceGeometry + ); + filterOp.getKeyDescriptor().withSqlImplementation("testGeom", "geom"); + + ExecutionTask filterTask = new ExecutionTask(filterOp); + tableSourceTask.getOutputChannel(0).addConsumer(filterTask, 0); + filterTask.setOutputChannel(0, + new SqlQueryChannel(sqlChannelDescriptor, filterOp.getOutput(0))); + filterTask.setStage(sqlStage); + + when(sqlStage.getStartTasks()).thenReturn(Collections.singleton(tableSourceTask)); + when(sqlStage.getTerminalTasks()).thenReturn(Collections.singleton(filterTask)); + + // Next stage that consumes the SQL. + ExecutionStage nextStage = mock(ExecutionStage.class); + SqlToStreamOperator sqlToStreamOperator = new SqlToStreamOperator(HsqldbPlatform.getInstance()); + ExecutionTask sqlToStreamTask = new ExecutionTask(sqlToStreamOperator); + filterTask.getOutputChannel(0).addConsumer(sqlToStreamTask, 0); + sqlToStreamTask.setStage(nextStage); + + // Execute the SQL stage to build the SQL string. + JdbcExecutor executor = new JdbcExecutor(HsqldbPlatform.getInstance(), job); + executor.execute(sqlStage, new DefaultOptimizationContext(job), job.getCrossPlatformExecutor()); + + SqlQueryChannel.Instance sqlQueryChannelInstance = + (SqlQueryChannel.Instance) job.getCrossPlatformExecutor() + .getChannelInstance(sqlToStreamTask.getInputChannel(0)); + + return sqlQueryChannelInstance.getSqlQuery(); + } +} diff --git a/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/jdbc/JdbcSpatialJoinOperatorTest.java b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/jdbc/JdbcSpatialJoinOperatorTest.java new file mode 100644 index 000000000..313679f65 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/jdbc/JdbcSpatialJoinOperatorTest.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.jdbc; + +import org.apache.wayang.basic.data.Record; +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.api.Job; +import org.apache.wayang.core.api.spatial.SpatialGeometry; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.function.TransformationDescriptor; +import org.apache.wayang.core.optimizer.DefaultOptimizationContext; +import org.apache.wayang.core.plan.executionplan.ExecutionStage; +import org.apache.wayang.core.plan.executionplan.ExecutionTask; +import org.apache.wayang.core.plan.wayangplan.ExecutionOperator; +import org.apache.wayang.core.platform.CrossPlatformExecutor; +import org.apache.wayang.core.profiling.NoInstrumentationStrategy; +import org.apache.wayang.jdbc.channels.SqlQueryChannel; +import org.apache.wayang.jdbc.execution.JdbcExecutor; +import org.apache.wayang.jdbc.operators.JdbcTableSource; +import org.apache.wayang.jdbc.operators.SqlToStreamOperator; +import org.apache.wayang.jdbc.platform.JdbcPlatformTemplate; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.apache.wayang.spatial.test.HsqldbPlatform; +import org.apache.wayang.spatial.test.HsqldbTableSource; +import org.junit.jupiter.api.Test; + +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Collections; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Test suite for {@link JdbcSpatialJoinOperator}. + * Verifies that the generated SQL contains the expected spatial JOIN clause. + */ +class JdbcSpatialJoinOperatorTest { + + /** + * Concrete subclass for testing against HSQLDB. + */ + private static class TestJdbcSpatialJoinOperator extends JdbcSpatialJoinOperator { + + TestJdbcSpatialJoinOperator( + TransformationDescriptor keyDescriptor0, + TransformationDescriptor keyDescriptor1, + SpatialPredicate predicateType) { + super(keyDescriptor0, keyDescriptor1, predicateType); + } + + @Override + public JdbcPlatformTemplate getPlatform() { + return HsqldbPlatform.getInstance(); + } + } + + @Test + void testSpatialJoinIntersectsGeneratesCorrectSql() throws SQLException { + String sql = buildSpatialJoinSql(SpatialPredicate.INTERSECTS); + + assertEquals( + "SELECT * FROM testA JOIN testB ON ST_Intersects(testA.geom, testB.geom);", + sql + ); + } + + @Test + void testSpatialJoinContainsGeneratesCorrectSql() throws SQLException { + String sql = buildSpatialJoinSql(SpatialPredicate.CONTAINS); + + assertEquals( + "SELECT * FROM testA JOIN testB ON ST_Contains(testA.geom, testB.geom);", + sql + ); + } + + @Test + void testSpatialJoinWithinGeneratesCorrectSql() throws SQLException { + String sql = buildSpatialJoinSql(SpatialPredicate.WITHIN); + + assertEquals( + "SELECT * FROM testA JOIN testB ON ST_Within(testA.geom, testB.geom);", + sql + ); + } + + /** + * Sets up a JDBC execution pipeline (two table sources -> spatial join -> SqlToStream) + * and returns the generated SQL query string. + */ + private String buildSpatialJoinSql(SpatialPredicate predicateType) throws SQLException { + Configuration configuration = new Configuration(); + + Job job = mock(Job.class); + when(job.getConfiguration()).thenReturn(configuration); + when(job.getCrossPlatformExecutor()) + .thenReturn(new CrossPlatformExecutor(job, new NoInstrumentationStrategy())); + + HsqldbPlatform hsqldbPlatform = new HsqldbPlatform(); + SqlQueryChannel.Descriptor sqlChannelDescriptor = + HsqldbPlatform.getInstance().getSqlQueryChannelDescriptor(); + + ExecutionStage sqlStage = mock(ExecutionStage.class); + + // Create two test tables. + try (Connection jdbcConnection = + hsqldbPlatform.createDatabaseDescriptor(configuration).createJdbcConnection()) { + final Statement statement = jdbcConnection.createStatement(); + statement.execute("DROP TABLE testA IF EXISTS;"); + statement.execute("DROP TABLE testB IF EXISTS;"); + statement.execute("CREATE TABLE testA (id INT, geom VARCHAR(255));"); + statement.execute("INSERT INTO testA VALUES (0, 'POINT (0 0)');"); + statement.execute("CREATE TABLE testB (id INT, geom VARCHAR(255));"); + statement.execute("INSERT INTO testB VALUES (0, 'POINT (0 0)');"); + } + + JdbcTableSource tableSourceA = new HsqldbTableSource("testA"); + JdbcTableSource tableSourceB = new HsqldbTableSource("testB"); + + ExecutionTask tableSourceATask = new ExecutionTask(tableSourceA); + tableSourceATask.setOutputChannel(0, + new SqlQueryChannel(sqlChannelDescriptor, tableSourceA.getOutput(0))); + tableSourceATask.setStage(sqlStage); + + ExecutionTask tableSourceBTask = new ExecutionTask(tableSourceB); + tableSourceBTask.setOutputChannel(0, + new SqlQueryChannel(sqlChannelDescriptor, tableSourceB.getOutput(0))); + tableSourceBTask.setStage(sqlStage); + + // Key descriptors with SQL implementation. + TransformationDescriptor leftKey = + new TransformationDescriptor<>( + record -> WayangGeometry.fromStringInput((String) record.getField(1)), + Record.class, + WayangGeometry.class + ).withSqlImplementation("testA", "geom"); + + TransformationDescriptor rightKey = + new TransformationDescriptor<>( + record -> WayangGeometry.fromStringInput((String) record.getField(1)), + Record.class, + WayangGeometry.class + ).withSqlImplementation("testB", "geom"); + + final ExecutionOperator joinOp = new TestJdbcSpatialJoinOperator( + leftKey, rightKey, predicateType + ); + + ExecutionTask joinTask = new ExecutionTask(joinOp); + tableSourceATask.getOutputChannel(0).addConsumer(joinTask, 0); + tableSourceBTask.getOutputChannel(0).addConsumer(joinTask, 1); + joinTask.setOutputChannel(0, + new SqlQueryChannel(sqlChannelDescriptor, joinOp.getOutput(0))); + joinTask.setStage(sqlStage); + + when(sqlStage.getStartTasks()).thenReturn(Collections.singleton(tableSourceATask)); + when(sqlStage.getTerminalTasks()).thenReturn(Collections.singleton(joinTask)); + + // Next stage. + ExecutionStage nextStage = mock(ExecutionStage.class); + SqlToStreamOperator sqlToStreamOperator = new SqlToStreamOperator(HsqldbPlatform.getInstance()); + ExecutionTask sqlToStreamTask = new ExecutionTask(sqlToStreamOperator); + joinTask.getOutputChannel(0).addConsumer(sqlToStreamTask, 0); + sqlToStreamTask.setStage(nextStage); + + // Execute to build the SQL string. + JdbcExecutor executor = new JdbcExecutor(HsqldbPlatform.getInstance(), job); + executor.execute(sqlStage, new DefaultOptimizationContext(job), job.getCrossPlatformExecutor()); + + SqlQueryChannel.Instance sqlQueryChannelInstance = + (SqlQueryChannel.Instance) job.getCrossPlatformExecutor() + .getChannelInstance(sqlToStreamTask.getInputChannel(0)); + + return sqlQueryChannelInstance.getSqlQuery(); + } +} diff --git a/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/spark/SparkSpatialFilterOperatorTest.java b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/spark/SparkSpatialFilterOperatorTest.java new file mode 100644 index 000000000..889339b06 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/spark/SparkSpatialFilterOperatorTest.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.spark; + +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.api.Job; +import org.apache.wayang.core.api.WayangContext; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.optimizer.DefaultOptimizationContext; +import org.apache.wayang.core.optimizer.OptimizationContext; +import org.apache.wayang.core.plan.wayangplan.Operator; +import org.apache.wayang.core.plan.wayangplan.WayangPlan; +import org.apache.wayang.core.platform.ChannelInstance; +import org.apache.wayang.core.platform.CrossPlatformExecutor; +import org.apache.wayang.core.profiling.FullInstrumentationStrategy; +import org.apache.wayang.core.types.DataSetType; +import org.apache.wayang.core.util.WayangCollections; +import org.apache.wayang.spark.channels.RddChannel; +import org.apache.wayang.spark.execution.SparkExecutor; +import org.apache.wayang.spark.operators.SparkExecutionOperator; +import org.apache.wayang.spark.platform.SparkPlatform; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.lang.reflect.Field; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; + +/** + * Test suite for {@link SparkSpatialFilterOperator}. + */ +class SparkSpatialFilterOperatorTest { + + private Configuration configuration; + private SparkExecutor sparkExecutor; + private Job job; + + @BeforeEach + void setUp() { + WayangContext context = new WayangContext(new Configuration()); + this.job = context.createJob("spark-spatial-filter-test", new WayangPlan()); + this.configuration = this.job.getConfiguration(); + this.ensureCrossPlatformExecutor(); + this.sparkExecutor = (SparkExecutor) SparkPlatform.getInstance().getExecutorFactory().create(this.job); + } + + private void ensureCrossPlatformExecutor() { + try { + Field field = Job.class.getDeclaredField("crossPlatformExecutor"); + field.setAccessible(true); + if (field.get(this.job) == null) { + CrossPlatformExecutor executor = new CrossPlatformExecutor(this.job, new FullInstrumentationStrategy()); + field.set(this.job, executor); + } + } catch (ReflectiveOperationException e) { + throw new RuntimeException("Failed to initialize CrossPlatformExecutor for tests.", e); + } + } + + private OptimizationContext.OperatorContext createOperatorContext(Operator operator) { + OptimizationContext optimizationContext = new DefaultOptimizationContext(this.job); + return optimizationContext.addOneTimeOperator(operator); + } + + private void evaluate(SparkExecutionOperator operator, + ChannelInstance[] inputs, + ChannelInstance[] outputs) { + operator.evaluate(inputs, outputs, this.sparkExecutor, this.createOperatorContext(operator)); + } + + private RddChannel.Instance createRddChannelInstance() { + return (RddChannel.Instance) RddChannel.UNCACHED_DESCRIPTOR + .createChannel(null, this.configuration) + .createInstance(mock(SparkExecutor.class), null, -1); + } + + private RddChannel.Instance createRddChannelInstance(Collection collection) { + RddChannel.Instance instance = createRddChannelInstance(); + instance.accept(this.sparkExecutor.sc.parallelize(WayangCollections.asList(collection)), this.sparkExecutor); + return instance; + } + + @Test + void testIntersectsFilter() { + List input = Arrays.asList( + new WayangGeometry("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))"), + new WayangGeometry("POLYGON ((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, 0.5 0.5))"), + new WayangGeometry("POLYGON ((0.2 0.2, 0.8 0.2, 0.8 0.8, 0.2 0.8, 0.2 0.2))"), + new WayangGeometry("POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))") + ); + + WayangGeometry reference = new WayangGeometry("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))"); + + SparkSpatialFilterOperator filterOp = new SparkSpatialFilterOperator<>( + SpatialPredicate.INTERSECTS, + w -> w, + DataSetType.createDefault(WayangGeometry.class), + reference + ); + + RddChannel.Instance inputChannel = this.createRddChannelInstance(input); + RddChannel.Instance outputChannel = this.createRddChannelInstance(); + + this.evaluate(filterOp, + new ChannelInstance[]{inputChannel}, + new ChannelInstance[]{outputChannel}); + + List result = outputChannel.provideRdd().collect(); + assertEquals(3, result.size()); + } + + @Test + void testWithinFilter() { + List input = Arrays.asList( + new WayangGeometry("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))"), + new WayangGeometry("POLYGON ((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, 0.5 0.5))"), + new WayangGeometry("POLYGON ((0.2 0.2, 0.8 0.2, 0.8 0.8, 0.2 0.8, 0.2 0.2))"), + new WayangGeometry("POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))") + ); + + WayangGeometry reference = new WayangGeometry("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))"); + + SparkSpatialFilterOperator filterOp = new SparkSpatialFilterOperator<>( + SpatialPredicate.WITHIN, + w -> w, + DataSetType.createDefault(WayangGeometry.class), + reference + ); + + RddChannel.Instance inputChannel = this.createRddChannelInstance(input); + RddChannel.Instance outputChannel = this.createRddChannelInstance(); + + this.evaluate(filterOp, + new ChannelInstance[]{inputChannel}, + new ChannelInstance[]{outputChannel}); + + List result = outputChannel.provideRdd().collect(); + assertEquals(1, result.size()); + } + + @Test + void testFilterNoMatches() { + List input = Arrays.asList( + new WayangGeometry("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))"), + new WayangGeometry("POLYGON ((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, 0.5 0.5))"), + new WayangGeometry("POLYGON ((0.2 0.2, 0.8 0.2, 0.8 0.8, 0.2 0.8, 0.2 0.2))"), + new WayangGeometry("POLYGON ((5 5, 6 5, 6 6, 5 6, 5 5))") + ); + + WayangGeometry reference = new WayangGeometry("POLYGON ((100 100, 101 100, 101 101, 100 101, 100 100))"); + + SparkSpatialFilterOperator filterOp = new SparkSpatialFilterOperator<>( + SpatialPredicate.INTERSECTS, + w -> w, + DataSetType.createDefault(WayangGeometry.class), + reference + ); + + RddChannel.Instance inputChannel = this.createRddChannelInstance(input); + RddChannel.Instance outputChannel = this.createRddChannelInstance(); + + this.evaluate(filterOp, + new ChannelInstance[]{inputChannel}, + new ChannelInstance[]{outputChannel}); + + List result = outputChannel.provideRdd().collect(); + assertEquals(0, result.size()); + } +} diff --git a/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/spark/SparkSpatialJoinOperatorTest.java b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/spark/SparkSpatialJoinOperatorTest.java new file mode 100644 index 000000000..ff3ae059b --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/operators/spark/SparkSpatialJoinOperatorTest.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.operators.spark; + +import org.apache.wayang.basic.data.Tuple2; +import org.apache.wayang.core.api.Configuration; +import org.apache.wayang.core.api.Job; +import org.apache.wayang.core.api.WayangContext; +import org.apache.wayang.core.api.spatial.SpatialPredicate; +import org.apache.wayang.core.optimizer.DefaultOptimizationContext; +import org.apache.wayang.core.optimizer.OptimizationContext; +import org.apache.wayang.core.plan.wayangplan.Operator; +import org.apache.wayang.core.plan.wayangplan.WayangPlan; +import org.apache.wayang.core.platform.ChannelInstance; +import org.apache.wayang.core.platform.CrossPlatformExecutor; +import org.apache.wayang.core.profiling.FullInstrumentationStrategy; +import org.apache.wayang.core.util.WayangCollections; +import org.apache.wayang.spark.channels.RddChannel; +import org.apache.wayang.spark.execution.SparkExecutor; +import org.apache.wayang.spark.operators.SparkExecutionOperator; +import org.apache.wayang.spark.platform.SparkPlatform; +import org.apache.wayang.spatial.data.WayangGeometry; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.lang.reflect.Field; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; + +/** + * Test suite for {@link SparkSpatialJoinOperator}. + */ +class SparkSpatialJoinOperatorTest { + + private Configuration configuration; + private SparkExecutor sparkExecutor; + private Job job; + + @BeforeEach + void setUp() { + WayangContext context = new WayangContext(new Configuration()); + this.job = context.createJob("spark-spatial-join-test", new WayangPlan()); + this.configuration = this.job.getConfiguration(); + this.ensureCrossPlatformExecutor(); + this.sparkExecutor = (SparkExecutor) SparkPlatform.getInstance().getExecutorFactory().create(this.job); + } + + private void ensureCrossPlatformExecutor() { + try { + Field field = Job.class.getDeclaredField("crossPlatformExecutor"); + field.setAccessible(true); + if (field.get(this.job) == null) { + CrossPlatformExecutor executor = new CrossPlatformExecutor(this.job, new FullInstrumentationStrategy()); + field.set(this.job, executor); + } + } catch (ReflectiveOperationException e) { + throw new RuntimeException("Failed to initialize CrossPlatformExecutor for tests.", e); + } + } + + private OptimizationContext.OperatorContext createOperatorContext(Operator operator) { + OptimizationContext optimizationContext = new DefaultOptimizationContext(this.job); + return optimizationContext.addOneTimeOperator(operator); + } + + private void evaluate(SparkExecutionOperator operator, + ChannelInstance[] inputs, + ChannelInstance[] outputs) { + operator.evaluate(inputs, outputs, this.sparkExecutor, this.createOperatorContext(operator)); + } + + private RddChannel.Instance createRddChannelInstance() { + return (RddChannel.Instance) RddChannel.UNCACHED_DESCRIPTOR + .createChannel(null, this.configuration) + .createInstance(mock(SparkExecutor.class), null, -1); + } + + private RddChannel.Instance createRddChannelInstance(Collection collection) { + RddChannel.Instance instance = createRddChannelInstance(); + instance.accept(this.sparkExecutor.sc.parallelize(WayangCollections.asList(collection)), this.sparkExecutor); + return instance; + } + + @Test + void testIntersectsJoin() { + // Left: 3 polygons, two overlap with the right polygon, one doesn't + List left = Arrays.asList( + WayangGeometry.fromStringInput("POLYGON ((0 0, 0 0.1, 0.1 0.1, 0.1 0, 0 0))"), + WayangGeometry.fromStringInput("POLYGON ((0.2 0.2, 0.2 0.3, 0.3 0.3, 0.3 0.2, 0.2 0.2))"), + WayangGeometry.fromStringInput("POLYGON ((0.4 0, 0.4 0.5, 0.5 0.5, 0.5 0.4, 0.4 0))") + ); + + // Right: 1 polygon that overlaps with polygon #2 from left + List right = Arrays.asList( + WayangGeometry.fromStringInput("POLYGON ((0.9 0.9, 0.9 1, 1 1, 1 0.9, 0.9 0.9))"), + WayangGeometry.fromStringInput("POLYGON ((0.2 0.2, 0.2 0.3, 0.3 0.3, 0.3 0.2, 0.2 0.2))") + ); + + SparkSpatialJoinOperator joinOp = new SparkSpatialJoinOperator<>( + w -> w, + w -> w, + WayangGeometry.class, + WayangGeometry.class, + SpatialPredicate.INTERSECTS + ); + + RddChannel.Instance leftChannel = this.createRddChannelInstance(left); + RddChannel.Instance rightChannel = this.createRddChannelInstance(right); + RddChannel.Instance outputChannel = this.createRddChannelInstance(); + + this.evaluate(joinOp, + new ChannelInstance[]{leftChannel, rightChannel}, + new ChannelInstance[]{outputChannel}); + + List> result = + outputChannel.>provideRdd().collect(); + assertEquals(1, result.size()); + } + + @Test + void testJoinNoMatches() { + List left = Arrays.asList( + WayangGeometry.fromStringInput("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"), + WayangGeometry.fromStringInput("POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))") + ); + + List right = Arrays.asList( + WayangGeometry.fromStringInput("POLYGON ((10 10, 10 11, 11 11, 11 10, 10 10))"), + WayangGeometry.fromStringInput("POLYGON ((20 20, 20 21, 21 21, 21 20, 20 20))") + ); + + SparkSpatialJoinOperator joinOp = new SparkSpatialJoinOperator<>( + w -> w, + w -> w, + WayangGeometry.class, + WayangGeometry.class, + SpatialPredicate.INTERSECTS + ); + + RddChannel.Instance leftChannel = this.createRddChannelInstance(left); + RddChannel.Instance rightChannel = this.createRddChannelInstance(right); + RddChannel.Instance outputChannel = this.createRddChannelInstance(); + + this.evaluate(joinOp, + new ChannelInstance[]{leftChannel, rightChannel}, + new ChannelInstance[]{outputChannel}); + + List> result = + outputChannel.>provideRdd().collect(); + assertEquals(0, result.size()); + } +} diff --git a/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/test/HsqldbPlatform.java b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/test/HsqldbPlatform.java new file mode 100644 index 000000000..79f5bb51e --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/test/HsqldbPlatform.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.test; + +import org.apache.wayang.jdbc.platform.JdbcPlatformTemplate; + +/** + * {@link JdbcPlatformTemplate} implementation based on HSQLDB for test purposes. + */ +public class HsqldbPlatform extends JdbcPlatformTemplate { + + private static final HsqldbPlatform instance = new HsqldbPlatform(); + + public HsqldbPlatform() { + super("HSQLDB (test)", "hsqldb"); + } + + public static HsqldbPlatform getInstance() { + return instance; + } + + @Override + protected String getJdbcDriverClassName() { + return org.hsqldb.jdbc.JDBCDriver.class.getName(); + } +} diff --git a/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/test/HsqldbTableSource.java b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/test/HsqldbTableSource.java new file mode 100644 index 000000000..7eca106a0 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/java/org/apache/wayang/spatial/test/HsqldbTableSource.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.wayang.spatial.test; + +import org.apache.wayang.basic.operators.TableSource; +import org.apache.wayang.core.platform.ChannelDescriptor; +import org.apache.wayang.jdbc.operators.JdbcTableSource; + +import java.util.List; + +/** + * Test implementation of {@link JdbcTableSource} backed by HSQLDB. + */ +public class HsqldbTableSource extends JdbcTableSource { + + public HsqldbTableSource(String tableName, String... columnNames) { + super(tableName, columnNames); + } + + @Override + public HsqldbPlatform getPlatform() { + return HsqldbPlatform.getInstance(); + } + + @Override + public List getSupportedInputChannels(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public List getSupportedOutputChannels(int index) { + throw new UnsupportedOperationException(); + } +} diff --git a/wayang-plugins/wayang-spatial/src/test/resources/geojson-sample.json b/wayang-plugins/wayang-spatial/src/test/resources/geojson-sample.json new file mode 100644 index 000000000..988623d3f --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/resources/geojson-sample.json @@ -0,0 +1,33 @@ +{ "type": "FeatureCollection", + "features": [ + { "type": "Feature", + "geometry": {"type": "Point", "coordinates": [102.0, 0.5]}, + "properties": {"prop0": "value0"} + }, + { "type": "Feature", + "geometry": { + "type": "LineString", + "coordinates": [ + [102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0] + ] + }, + "properties": { + "prop0": "value1", + "prop1": 0.0 + } + }, + { "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ [100.0, 0.0], [101.0, 0.0], [101.0, 1.0], + [100.0, 1.0], [100.0, 0.0] ] + ] + }, + "properties": { + "prop0": "value2", + "prop1": {"this": "that"} + } + } + ] +} \ No newline at end of file diff --git a/wayang-plugins/wayang-spatial/src/test/resources/uniform.csv b/wayang-plugins/wayang-spatial/src/test/resources/uniform.csv new file mode 100644 index 000000000..d7adc9555 --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/resources/uniform.csv @@ -0,0 +1,114 @@ +0.5990458879227311,0.6852399722341064,0.6467575018566728,0.7983340062873524 +0.7390284178914223,0.8943876374027306,0.7407687315884391,0.9502623559281034 +0.916329702245795,0.6421821952577356,0.9703837317508324,0.6557669110161128 +0.4527562220050858,0.21213636134414426,0.48538187355924167,0.28100930389551637 +0.004729718608568894,0.16174907815533504,0.021498660569235512,0.2717105227723613 +0.7418110418862056,0.15127816725410403,0.7896398613720779,0.16793025746197243 +0.6173992746000406,0.0744149478668128,0.6175057663321927,0.17898351723372663 +0.17998374923034013,0.16313670329852414,0.23892901575989564,0.2678256351509404 +0.2731284636808022,0.920808160315048,0.3054818718130508,1.002147817585119 +0.1840602563005856,0.8829821423489673,0.2254987727670001,0.9989698598270308 +0.882905979534807,0.288831534426083,0.90457737561815,0.3087462612816525 +0.13666113651759865,0.028953113531216997,0.1547426825637664,0.10132631322013552 +-0.006753786474479197,0.6593367736515034,0.013520025223192712,0.696531725443879 +0.8090442814716775,0.45187208345582713,0.8279918678224641,0.5096182898042508 +0.6754161472465822,0.055628995740626706,0.7339221210352364,0.058372863330953186 +0.7492529962299174,0.7976165911493036,0.7503370483526292,0.8921451876269557 +0.3659121242372747,0.5757152019349381,0.36645682744644636,0.5813224561788111 +0.1750238378356576,0.9098357148440873,0.1868151380664381,1.0005240843382455 +0.9193238652059308,0.9207562371242246,0.9399867739891115,0.9633314217311741 +0.521460234621175,0.7306991313099155,0.527943406765065,0.8205068980880751 +0.7961277301930479,0.8029463080850491,0.7983256249896177,0.9164423302901111 +0.07285503802898988,0.28565530437222353,0.10950469031454732,0.39582576663622093 +0.32360520551289745,0.9054505972672162,0.35631384773879177,0.9429446415883651 +0.31445409869676,0.16854569282759363,0.3191458726299944,0.18640985818238348 +0.6843287034161367,0.9938136847237965,0.6940204712816136,0.9996399443496112 +0.9745224681473852,0.5192905451295771,0.9988757494211495,0.5477709375920916 +0.5802902676485769,0.8009900748839313,0.6076301737380169,0.8516009487133219 +0.05472559440881516,0.886455496536741,0.056688867606971706,0.9456832003472408 +0.8164793484668319,0.07358391339580607,0.8603792138694325,0.18755974091043393 +0.6272003751354768,0.7619362383443308,0.6335982117173123,0.8140828612163832 +0.14040095675389871,0.8175446021953644,0.15808973232661697,0.8719231918153615 +0.970019720977069,0.8250402715182455,1.0285801751618668,0.8794650702692403 +0.47378772505088124,0.7120436627380357,0.5025302720595903,0.7469663767956726 +0.39247939279078553,0.08720310047757057,0.4150994376579084,0.20580962452639234 +0.9448362099629809,0.6066562453384494,0.9747955570029482,0.6472736965712287 +0.06567684714063256,0.22026692572988116,0.11259798540402226,0.3243533425431574 +0.3380792902591662,0.7443467746667766,0.38457317578628475,0.827698384600884 +0.6531154920722476,0.7173705619990872,0.674921397872999,0.8019069108798916 +0.25775963161739546,0.4442322464428792,0.3039444800068526,0.5271382012055632 +0.27436128133182175,0.9107079038691823,0.31334281028989625,0.9803872030316494 +0.004059888381709859,0.5066923166609996,0.019101425663240173,0.5872894675294915 +0.44351588054461305,0.7688221108835555,0.4823620971965466,0.864537224484558 +0.3257496874462827,0.5943717528959639,0.37001932860911707,0.6937543930865077 +0.3239478138560361,0.8015793697944729,0.3761424890037057,0.8841798046191041 +0.9605780053885201,0.9247560721302551,0.9916663712153616,0.9882768818225826 +0.13805116692958436,0.807985945381652,0.19429406839809443,0.8652546173259645 +0.6695149820770153,0.7093768706459661,0.7133361876110415,0.7299960812022174 +0.7604049413512042,0.5555977783970177,0.800338315101845,0.6060928220645722 +0.6046310934058325,0.7314798774374063,0.6428430935094431,0.8179296143866266 +0.014389963275765638,0.12101604398789736,0.04085406820715226,0.1990300915933607 +0.20011220945003086,0.6834455296497423,0.23796411917024116,0.6884689342954847 +0.4699612142369572,0.21823098256510312,0.4732097948827911,0.23425420874651678 +0.311550038766346,0.17940776018599028,0.3231516448738435,0.1836866662471834 +0.4469763452480773,0.3448889862765522,0.4836839822922583,0.4157087145357278 +0.2378265911899478,0.8788597781724075,0.23786622813739505,0.9275048679943183 +0.27507583771041827,0.3601601111409564,0.2819803040215401,0.4599245183112906 +0.3554727331945985,0.0303728107528094,0.3922865786657642,0.04175141042210017 +0.5277939760226901,0.2818752823025421,0.5626477503139037,0.39687134274989355 +0.7941323042828005,0.3805641342471513,0.8429116952872079,0.457639719259336 +0.35156459045402616,0.10828093892450308,0.38732084899361924,0.1759440355405012 +0.9389542943718466,0.9469299537219775,0.9754708879581265,0.9890638835896335 +0.8902294489136134,-0.03300124252687952,0.8967045492786153,0.034896199083644046 +0.5962843501800941,0.08722357743667163,0.6340517279186739,0.1941773910768141 +0.36905822801886934,0.41419411858846206,0.3826383798867702,0.4491734683920064 +0.9436204547632455,0.32495569776259287,1.001288800566997,0.4346053698169677 +0.5663805613288762,0.23004632373125933,0.6252393950743569,0.2896029843098693 +0.38596341982816645,0.28964726544472114,0.44502001061212987,0.348657831556713 +0.2827417718879107,0.43963450242672164,0.29005493263224646,0.5142368970917521 +0.42001644485747136,0.2434947809311238,0.46691882267291474,0.3427114174032198 +0.00498966807866417,0.4764494637538396,0.021416925248509154,0.5886798395870034 +0.7738765829909259,0.23637424265467694,0.7899378936892775,0.25494612137530875 +0.9705840763893285,0.2647103697148771,1.0070663318448,0.32166614073035943 +0.6225724309431874,0.5967629843927131,0.6671808252508319,0.6109444636227948 +0.7443937418221795,0.2805224602336671,0.7764036170190063,0.32085705771417716 +0.28289206371294734,0.508205144745932,0.3107522681487271,0.5515298706853503 +0.7439243170064157,0.5756593202001297,0.7461100518161284,0.605950073940108 +0.42897550527088535,0.8838376825759551,0.48225193982091163,0.9493087570300056 +0.0017285704003554992,0.7438441578014942,0.027392462398985698,0.8129211721485264 +0.6937248451076973,0.5775324268519313,0.7226375888215647,0.6869382643650208 +0.35991610895195186,0.3800776011377153,0.41103019220611453,0.403653780194089 +0.29446697090547647,0.7798468223065371,0.2984302276685399,0.8802002581834072 +0.6860306543823025,0.38597635160211935,0.7032123192284223,0.4796638502959875 +0.8963232390589541,0.10973845744228358,0.9250279957190725,0.17562989815153776 +0.4930760905903206,0.2955858718265622,0.502287755427425,0.36588814057729496 +0.8049224628548171,0.019243688788432557,0.8187236183626221,0.11759493706931198 +0.7909839058140369,0.6202454903454034,0.792517106256702,0.7069573206300014 +0.9576485459812274,0.9954009024639117,0.9997234349944095,1.0012680532981517 +0.822690515425995,0.162097408532248,0.8614347608170433,0.2763668100627128 +0.7036685785924889,0.07954601979087932,0.7212181531341354,0.18970575884231405 +0.1373119182811989,0.6009411457274286,0.16214809318741788,0.6202827329137902 +0.6191586047659252,0.020810942432131573,0.6256514878790465,0.06631482914270923 +0.05474626910924761,0.01302098786228515,0.08926161157653513,0.10210150282841288 +0.865507003672247,0.11545869580537309,0.8914072338228145,0.15320705019755274 +0.5720614887756044,0.4671278698631763,0.6283737695152194,0.5120316544792843 +0.05121794418374389,0.6594155229184584,0.0602844966597245,0.7351770348504265 +0.48919737744137676,0.8731698711867295,0.5224908295030375,0.9476752103507993 +0.2556210152671329,0.5066405822234284,0.2708724307132092,0.5967100159275444 +0.5099630960762344,0.11150800303242,0.5240283148810868,0.15605369802735083 +0.715354003670386,0.140019563963164,0.7581517822255519,0.21862121307610766 +0.08250800778958188,0.6604657182511552,0.08797870426719691,0.6754407133633958 +0.567666458931731,0.20975701922218556,0.6202820578767386,0.2674131708417098 +0.32240240679167814,0.7529631834300263,0.32417006547627497,0.8399639823569336 +0.02509794812920067,0.10993430880423688,0.08222043604495562,0.19166781553837592 +0.1939071002026296,0.07617400880374292,0.2522689481597377,0.15598153899061573 +0.8018522582219899,0.11850732737288268,0.8393401066420746,0.1610231188806786 +0.2166061229902681,0.31234477611465383,0.2534315651726861,0.354185384946335 +0.3608149340554291,0.09755202177887976,0.41068096517688224,0.17529878557083958 +0.7789559006726312,0.40234502597947636,0.830050468908305,0.4644439716892567 +0.5704220092121891,0.5495278597703872,0.6148316108322683,0.5969890476664732 +0.0909265259000231,0.030798403448909405,0.1030709769798082,0.03553277727770419 +0.8664470318456158,0.480965690878243,0.9120678739813958,0.48101844673504396 +0.4516026021755871,0.8640581947208218,0.48877052499501394,0.9154955753237107 +0.460933295281955,0.09020598744847245,0.47021369093283133,0.10929091048146296 +0.34822380866838537,0.37653064575636896,0.4010443784531727,0.3947843159887213 diff --git a/wayang-plugins/wayang-spatial/src/test/resources/wayang-hsqldb-defaults.properties b/wayang-plugins/wayang-spatial/src/test/resources/wayang-hsqldb-defaults.properties new file mode 100644 index 000000000..a6acb4b8e --- /dev/null +++ b/wayang-plugins/wayang-spatial/src/test/resources/wayang-hsqldb-defaults.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Use in-memory HSQLDB to avoid messing with files in the tests. +wayang.hsqldb.jdbc.url = jdbc:hsqldb:mem:testdb +wayang.hsqldb.costs.fix = 0.0 +wayang.hsqldb.costs.per-ms = 1.0