diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
index e3a0a60a96991..e0e6fe4dc3787 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
@@ -22,6 +22,7 @@ import org.apache.hadoop.fs.Path
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.avro.AvroOptions
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.connector.read.PartitionReaderFactory
import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
@@ -31,6 +32,7 @@ import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.util.ArrayImplicits._
import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.collection.BitSet
case class AvroScan(
sparkSession: SparkSession,
@@ -41,7 +43,13 @@ case class AvroScan(
options: CaseInsensitiveStringMap,
pushedFilters: Array[Filter],
partitionFilters: Seq[Expression] = Seq.empty,
- dataFilters: Seq[Expression] = Seq.empty) extends FileScan {
+ dataFilters: Seq[Expression] = Seq.empty,
+ override val bucketSpec: Option[BucketSpec] = None,
+ override val disableBucketedScan: Boolean = false,
+ override val optionalBucketSet: Option[BitSet] = None,
+ override val optionalNumCoalescedBuckets: Option[Int] = None,
+ override val requestedMetadataFields: StructType = StructType(Seq.empty))
+ extends FileScan {
override def isSplitable(path: Path): Boolean = true
override def createReaderFactory(): PartitionReaderFactory = {
@@ -52,7 +60,7 @@ case class AvroScan(
val parsedOptions = new AvroOptions(caseSensitiveMap, hadoopConf)
// The partition values are already truncated in `FileScan.partitions`.
// We should use `readPartitionSchema` as the partition schema here.
- AvroPartitionReaderFactory(
+ val baseFactory = AvroPartitionReaderFactory(
conf,
broadcastedConf,
dataSchema,
@@ -60,6 +68,7 @@ case class AvroScan(
readPartitionSchema,
parsedOptions,
pushedFilters.toImmutableArraySeq)
+ wrapWithMetadataIfNeeded(baseFactory, readDataSchema, options)
}
override def equals(obj: Any): Boolean = obj match {
@@ -70,6 +79,15 @@ case class AvroScan(
override def hashCode(): Int = super.hashCode()
+ override def withFileIndex(newFI: PartitioningAwareFileIndex): AvroScan =
+ copy(fileIndex = newFI)
+
+ override def withDisableBucketedScan(disable: Boolean): AvroScan =
+ copy(disableBucketedScan = disable)
+
+ override def withNumCoalescedBuckets(n: Option[Int]): AvroScan =
+ copy(optionalNumCoalescedBuckets = n)
+
override def getMetaData(): Map[String, String] = {
super.getMetaData() ++ Map("PushedFilters" -> seqToString(pushedFilters.toImmutableArraySeq))
}
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
index 754c58e65b016..4f0574f3d84c9 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
@@ -18,6 +18,7 @@ package org.apache.spark.sql.v2.avro
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.StructFilters
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
import org.apache.spark.sql.sources.Filter
@@ -29,10 +30,12 @@ case class AvroScanBuilder (
fileIndex: PartitioningAwareFileIndex,
schema: StructType,
dataSchema: StructType,
- options: CaseInsensitiveStringMap)
- extends FileScanBuilder(sparkSession, fileIndex, dataSchema) {
+ options: CaseInsensitiveStringMap,
+ override val bucketSpec: Option[BucketSpec] = None)
+ extends FileScanBuilder(sparkSession, fileIndex, dataSchema, bucketSpec) {
override def build(): AvroScan = {
+ val optBucketSet = computeBucketSet()
AvroScan(
sparkSession,
fileIndex,
@@ -42,7 +45,10 @@ case class AvroScanBuilder (
options,
pushedDataFilters,
partitionFilters,
- dataFilters)
+ dataFilters,
+ bucketSpec = bucketSpec,
+ optionalBucketSet = optBucketSet,
+ requestedMetadataFields = requestedMetadataFields)
}
override def pushDataFilters(dataFilters: Array[Filter]): Array[Filter] = {
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
index e898253be1168..c9f809c133c6a 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
@@ -22,7 +22,7 @@ import org.apache.hadoop.fs.FileStatus
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.avro.AvroUtils
-import org.apache.spark.sql.connector.write.{LogicalWriteInfo, Write, WriteBuilder}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
import org.apache.spark.sql.execution.datasources.FileFormat
import org.apache.spark.sql.execution.datasources.v2.FileTable
import org.apache.spark.sql.types.{DataType, StructType}
@@ -37,19 +37,21 @@ case class AvroTable(
fallbackFileFormat: Class[_ <: FileFormat])
extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
override def newScanBuilder(options: CaseInsensitiveStringMap): AvroScanBuilder =
- AvroScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options))
+ AvroScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options),
+ bucketSpec)
override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
AvroUtils.inferSchema(sparkSession, options.asScala.toMap, files)
override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
- new WriteBuilder {
- override def build(): Write =
- AvroWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
+ createFileWriteBuilder(info) {
+ (mergedInfo, partSchema, bSpec, customLocs, dynamicOverwrite, truncate, overPreds) =>
+ AvroWrite(paths, formatName, supportsDataType, mergedInfo, partSchema, bSpec,
+ overPreds, customLocs, dynamicOverwrite, truncate)
}
}
override def supportsDataType(dataType: DataType): Boolean = AvroUtils.supportsDataType(dataType)
- override def formatName: String = "AVRO"
+ override def formatName: String = "Avro"
}
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroWrite.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroWrite.scala
index 3a91fd0c73d1a..ff417f1bad137 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroWrite.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroWrite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.v2.avro
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.sql.avro.AvroUtils
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.connector.expressions.filter.Predicate
import org.apache.spark.sql.connector.write.LogicalWriteInfo
import org.apache.spark.sql.execution.datasources.OutputWriterFactory
import org.apache.spark.sql.execution.datasources.v2.FileWrite
@@ -29,7 +31,13 @@ case class AvroWrite(
paths: Seq[String],
formatName: String,
supportsDataType: DataType => Boolean,
- info: LogicalWriteInfo) extends FileWrite {
+ info: LogicalWriteInfo,
+ partitionSchema: StructType,
+ override val bucketSpec: Option[BucketSpec] = None,
+ override val overwritePredicates: Option[Array[Predicate]] = None,
+ override val customPartitionLocations: Map[Map[String, String], String] = Map.empty,
+ override val dynamicPartitionOverwrite: Boolean,
+ override val isTruncate: Boolean) extends FileWrite {
override def prepareWrite(
sqlConf: SQLConf,
job: Job,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 3b4d725840935..2812f45ea694d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1159,14 +1159,16 @@ class Analyzer(
throw QueryCompilationErrors.unsupportedInsertReplaceOnOrUsing(
i.table.asInstanceOf[DataSourceV2Relation].table.name())
- case i: InsertIntoStatement
- if i.table.isInstanceOf[DataSourceV2Relation] &&
- i.query.resolved &&
- i.replaceCriteriaOpt.isEmpty =>
- val r = i.table.asInstanceOf[DataSourceV2Relation]
- // ifPartitionNotExists is append with validation, but validation is not supported
+ case i @ InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _, _, _, _, _)
+ if i.query.resolved && i.replaceCriteriaOpt.isEmpty =>
+ // SPARK-56304: allow ifPartitionNotExists for tables that
+ // support partition management and overwrite-by-filter
if (i.ifPartitionNotExists) {
- throw QueryCompilationErrors.unsupportedIfNotExistsError(r.table.name)
+ val caps = r.table.capabilities
+ if (!caps.contains(TableCapability.OVERWRITE_BY_FILTER) ||
+ !r.table.isInstanceOf[SupportsPartitionManagement]) {
+ throw QueryCompilationErrors.unsupportedIfNotExistsError(r.table.name)
+ }
}
// Create a project if this is an INSERT INTO BY NAME query.
@@ -1209,17 +1211,27 @@ class Analyzer(
withSchemaEvolution = i.withSchemaEvolution)
}
} else {
+ val extraOpts = if (i.ifPartitionNotExists) {
+ Map("ifPartitionNotExists" -> "true") ++
+ staticPartitions.map { case (k, v) =>
+ s"__staticPartition.$k" -> v
+ }
+ } else {
+ Map.empty[String, String]
+ }
if (isByName) {
OverwriteByExpression.byName(
table = r,
df = query,
deleteExpr = staticDeleteExpression(r, staticPartitions),
+ writeOptions = extraOpts,
withSchemaEvolution = i.withSchemaEvolution)
} else {
OverwriteByExpression.byPosition(
table = r,
query = query,
deleteExpr = staticDeleteExpression(r, staticPartitions),
+ writeOptions = extraOpts,
withSchemaEvolution = i.withSchemaEvolution)
}
}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/v2/CompositeStructColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/v2/CompositeStructColumnVector.java
new file mode 100644
index 0000000000000..aa141f217f3f8
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/v2/CompositeStructColumnVector.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2;
+
+import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.vectorized.ColumnVector;
+import org.apache.spark.sql.vectorized.ColumnarArray;
+import org.apache.spark.sql.vectorized.ColumnarMap;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/**
+ * A struct-typed {@link ColumnVector} backed by a fixed array of arbitrary child vectors.
+ * Composes the V2 {@code _metadata} struct column from per-file
+ * {@link org.apache.spark.sql.execution.vectorized.ConstantColumnVector}s (for constant
+ * fields like {@code file_path}) and per-row child vectors supplied by the format reader
+ * (e.g., Parquet's {@code _tmp_metadata_row_index}).
+ *
+ *
Intentionally minimal: only {@link #getChild(int)}, {@link #isNullAt(int)}, and
+ * {@link #close()} carry behavior. The parent {@link ColumnVector#getStruct(int)} routes
+ * struct field access through {@code getChild}, so scalar getters are never called and
+ * throw if invoked.
+ *
+ *
Children are owned by their producers (the input batch or the metadata wrapper); this
+ * class does not close them.
+ */
+final class CompositeStructColumnVector extends ColumnVector {
+
+ private final ColumnVector[] children;
+
+ CompositeStructColumnVector(StructType type, ColumnVector[] children) {
+ super(type);
+ if (children.length != type.fields().length) {
+ throw new IllegalArgumentException(
+ "Children count " + children.length + " does not match struct field count "
+ + type.fields().length);
+ }
+ for (int i = 0; i < children.length; i++) {
+ if (children[i] == null) {
+ throw new IllegalArgumentException("Child column vector at index " + i + " is null");
+ }
+ }
+ this.children = children;
+ }
+
+ @Override
+ public void close() {
+ // Children are owned by the underlying batch / extractor; do not close them here.
+ }
+
+ @Override
+ public boolean hasNull() {
+ return false;
+ }
+
+ @Override
+ public int numNulls() {
+ return 0;
+ }
+
+ @Override
+ public boolean isNullAt(int rowId) {
+ return false;
+ }
+
+ @Override
+ public ColumnVector getChild(int ordinal) {
+ return children[ordinal];
+ }
+
+ // Scalar accessors are unreachable for a struct vector and exist only to satisfy the
+ // abstract base class contract.
+ @Override
+ public boolean getBoolean(int rowId) { throw unsupported("getBoolean"); }
+ @Override
+ public byte getByte(int rowId) { throw unsupported("getByte"); }
+ @Override
+ public short getShort(int rowId) { throw unsupported("getShort"); }
+ @Override
+ public int getInt(int rowId) { throw unsupported("getInt"); }
+ @Override
+ public long getLong(int rowId) { throw unsupported("getLong"); }
+ @Override
+ public float getFloat(int rowId) { throw unsupported("getFloat"); }
+ @Override
+ public double getDouble(int rowId) { throw unsupported("getDouble"); }
+ @Override
+ public ColumnarArray getArray(int rowId) { throw unsupported("getArray"); }
+ @Override
+ public ColumnarMap getMap(int ordinal) { throw unsupported("getMap"); }
+ @Override
+ public Decimal getDecimal(int rowId, int precision, int scale) {
+ throw unsupported("getDecimal");
+ }
+ @Override
+ public UTF8String getUTF8String(int rowId) { throw unsupported("getUTF8String"); }
+ @Override
+ public byte[] getBinary(int rowId) { throw unsupported("getBinary"); }
+
+ private UnsupportedOperationException unsupported(String method) {
+ return new UnsupportedOperationException(
+ method + " is not supported on " + getClass().getSimpleName()
+ + "; access struct fields via getChild()");
+ }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataSource.scala
index 2f139393ade38..577b33f7974eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataSource.scala
@@ -43,6 +43,24 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
/** Resolves the relations created from the DataFrameReader and DataStreamReader APIs. */
class ResolveDataSource(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+ /**
+ * Returns true if the provider is a FileDataSourceV2 AND the provider's short name or
+ * class name is in USE_V1_SOURCE_LIST. When true, streaming falls back to V1.
+ */
+ private def isV1FileSource(provider: TableProvider): Boolean = {
+ provider.isInstanceOf[FileDataSourceV2] && {
+ val v1Sources = sparkSession.sessionState.conf
+ .getConf(org.apache.spark.sql.internal.SQLConf.USE_V1_SOURCE_LIST)
+ .toLowerCase(Locale.ROOT).split(",").map(_.trim)
+ val shortName = provider match {
+ case d: org.apache.spark.sql.sources.DataSourceRegister => d.shortName()
+ case _ => ""
+ }
+ v1Sources.contains(shortName.toLowerCase(Locale.ROOT)) ||
+ v1Sources.contains(provider.getClass.getCanonicalName.toLowerCase(Locale.ROOT))
+ }
+ }
+
override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
case UnresolvedDataSource(source, userSpecifiedSchema, extraOptions, false, paths) =>
// Batch data source created from DataFrameReader
@@ -92,8 +110,7 @@ class ResolveDataSource(sparkSession: SparkSession) extends Rule[LogicalPlan] {
case _ => None
}
ds match {
- // file source v2 does not support streaming yet.
- case provider: TableProvider if !provider.isInstanceOf[FileDataSourceV2] =>
+ case provider: TableProvider if !isV1FileSource(provider) =>
val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
source = provider, conf = sparkSession.sessionState.conf)
val finalOptions =
@@ -153,8 +170,7 @@ class ResolveDataSource(sparkSession: SparkSession) extends Rule[LogicalPlan] {
case _ => None
}
ds match {
- // file source v2 does not support streaming yet.
- case provider: TableProvider if !provider.isInstanceOf[FileDataSourceV2] =>
+ case provider: TableProvider if !isV1FileSource(provider) =>
val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
source = provider, conf = sparkSession.sessionState.conf)
val finalOptions =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index d940411349408..7866d1fed6353 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.connector.expressions.Transform
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1, LogicalRelation}
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Utils, FileDataSourceV2, FileTable}
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
import org.apache.spark.sql.internal.connector.V1Function
import org.apache.spark.sql.types.{DataType, MetadataBuilder, StringType, StructField, StructType}
@@ -247,7 +247,34 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
constructV1TableCmd(None, c.tableSpec, ident, StructType(fields), c.partitioning,
c.ignoreIfExists, storageFormat, provider)
} else {
- c
+ // File sources: validate data types and create via
+ // V1 command. Non-file V2 providers keep V2 plan.
+ DataSourceV2Utils.getTableProvider(
+ provider, conf) match {
+ case Some(f: FileDataSourceV2) =>
+ val ft = f.getTable(
+ c.tableSchema, c.partitioning.toArray,
+ new org.apache.spark.sql.util
+ .CaseInsensitiveStringMap(
+ java.util.Collections.emptyMap()))
+ ft match {
+ case ft: FileTable =>
+ c.tableSchema.foreach { field =>
+ if (!ft.supportsDataType(
+ field.dataType)) {
+ throw QueryCompilationErrors
+ .dataTypeUnsupportedByDataSourceError(
+ ft.formatName, field)
+ }
+ }
+ case _ =>
+ }
+ constructV1TableCmd(None, c.tableSpec, ident,
+ StructType(c.columns.map(_.toV1Column)),
+ c.partitioning,
+ c.ignoreIfExists, storageFormat, provider)
+ case _ => c
+ }
}
case c @ CreateTableAsSelect(
@@ -267,7 +294,17 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
constructV1TableCmd(Some(c.query), c.tableSpec, ident, new StructType, c.partitioning,
c.ignoreIfExists, storageFormat, provider)
} else {
- c
+ // File sources: create via V1 command.
+ // Non-file V2 providers keep V2 plan.
+ DataSourceV2Utils.getTableProvider(
+ provider, conf) match {
+ case Some(_: FileDataSourceV2) =>
+ constructV1TableCmd(Some(c.query),
+ c.tableSpec, ident, new StructType,
+ c.partitioning, c.ignoreIfExists,
+ storageFormat, provider)
+ case _ => c
+ }
}
case RefreshTable(ResolvedV1TableOrViewIdentifier(ident)) =>
@@ -281,7 +318,16 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
throw QueryCompilationErrors.unsupportedTableOperationError(
ident, "REPLACE TABLE")
} else {
- c
+ // File sources don't support REPLACE TABLE in
+ // the session catalog (requires StagingTableCatalog).
+ DataSourceV2Utils.getTableProvider(
+ provider, conf) match {
+ case Some(_: FileDataSourceV2) =>
+ throw QueryCompilationErrors
+ .unsupportedTableOperationError(
+ ident, "REPLACE TABLE")
+ case _ => c
+ }
}
case c @ ReplaceTableAsSelect(ResolvedV1Identifier(ident), _, _, _, _, _, _) =>
@@ -290,7 +336,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
throw QueryCompilationErrors.unsupportedTableOperationError(
ident, "REPLACE TABLE AS SELECT")
} else {
- c
+ DataSourceV2Utils.getTableProvider(
+ provider, conf) match {
+ case Some(_: FileDataSourceV2) =>
+ throw QueryCompilationErrors
+ .unsupportedTableOperationError(
+ ident, "REPLACE TABLE AS SELECT")
+ case _ => c
+ }
}
// For CREATE TABLE LIKE, use the v1 command if both the target and source are in the session
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameWriter.scala
index f0359b33f431d..be97d28331169 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameWriter.scala
@@ -21,6 +21,9 @@ import java.util.Locale
import scala.jdk.CollectionConverters._
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkUnsupportedOperationException
import org.apache.spark.annotation.Stable
import org.apache.spark.sql
import org.apache.spark.sql.SaveMode
@@ -168,8 +171,12 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
val catalogManager = df.sparkSession.sessionState.catalogManager
+ val fileV2CreateMode = (curmode == SaveMode.ErrorIfExists ||
+ curmode == SaveMode.Ignore) &&
+ provider.isInstanceOf[FileDataSourceV2]
curmode match {
- case SaveMode.Append | SaveMode.Overwrite =>
+ case _ if curmode == SaveMode.Append || curmode == SaveMode.Overwrite ||
+ fileV2CreateMode =>
val (table, catalog, ident) = provider match {
case supportsExtract: SupportsCatalogOptions =>
val ident = supportsExtract.extractIdentifier(dsOptions)
@@ -178,7 +185,12 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
(catalog.loadTable(ident), Some(catalog), Some(ident))
case _: TableProvider =>
- val t = getTable
+ val t = try {
+ getTable
+ } catch {
+ case _: SparkUnsupportedOperationException if fileV2CreateMode =>
+ return saveToV1SourceCommand(path)
+ }
if (t.supports(BATCH_WRITE)) {
(t, None, None)
} else {
@@ -189,15 +201,40 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
}
}
+ if (fileV2CreateMode) {
+ val outputPath = Option(dsOptions.get("path")).map(new Path(_))
+ outputPath.foreach { p =>
+ val hadoopConf = df.sparkSession.sessionState
+ .newHadoopConfWithOptions(extraOptions.toMap)
+ val fs = p.getFileSystem(hadoopConf)
+ val qualifiedPath = fs.makeQualified(p)
+ if (fs.exists(qualifiedPath)) {
+ if (curmode == SaveMode.ErrorIfExists) {
+ throw QueryCompilationErrors.outputPathAlreadyExistsError(qualifiedPath)
+ } else {
+ return LocalRelation(
+ DataSourceV2Relation.create(table, catalog, ident, dsOptions).output)
+ }
+ }
+ }
+ }
+
val relation = DataSourceV2Relation.create(table, catalog, ident, dsOptions)
checkPartitioningMatchesV2Table(table)
- if (curmode == SaveMode.Append) {
+ if (curmode == SaveMode.Append || fileV2CreateMode) {
AppendData.byName(relation, df.logicalPlan, finalOptions)
} else {
- // Truncate the table. TableCapabilityCheck will throw a nice exception if this
- // isn't supported
- OverwriteByExpression.byName(
- relation, df.logicalPlan, Literal(true), finalOptions)
+ val dynamicOverwrite =
+ df.sparkSession.sessionState.conf.partitionOverwriteMode ==
+ PartitionOverwriteMode.DYNAMIC &&
+ partitioningColumns.exists(_.nonEmpty)
+ if (dynamicOverwrite) {
+ OverwritePartitionsDynamic.byName(
+ relation, df.logicalPlan, finalOptions)
+ } else {
+ OverwriteByExpression.byName(
+ relation, df.logicalPlan, Literal(true), finalOptions)
+ }
}
case createMode =>
@@ -226,14 +263,19 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
finalOptions,
ignoreIfExists = createMode == SaveMode.Ignore)
case _: TableProvider =>
- if (getTable.supports(BATCH_WRITE)) {
- throw QueryCompilationErrors.writeWithSaveModeUnsupportedBySourceError(
- source, createMode.name())
- } else {
- // Streaming also uses the data source V2 API. So it may be that the data source
- // implements v2, but has no v2 implementation for batch writes. In that case, we
- // fallback to saving as though it's a V1 source.
- saveToV1SourceCommand(path)
+ try {
+ if (getTable.supports(BATCH_WRITE)) {
+ throw QueryCompilationErrors.writeWithSaveModeUnsupportedBySourceError(
+ source, createMode.name())
+ } else {
+ // Streaming also uses the data source V2 API. So it may be that the data source
+ // implements v2, but has no v2 implementation for batch writes. In that case, we
+ // fallback to saving as though it's a V1 source.
+ saveToV1SourceCommand(path)
+ }
+ } catch {
+ case _: SparkUnsupportedOperationException =>
+ saveToV1SourceCommand(path)
}
}
}
@@ -439,8 +481,10 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
val session = df.sparkSession
val v2ProviderOpt = lookupV2Provider()
- val canUseV2 = v2ProviderOpt.isDefined || (hasCustomSessionCatalog &&
- !df.sparkSession.sessionState.catalogManager.catalog(CatalogManager.SESSION_CATALOG_NAME)
+ val canUseV2 = v2ProviderOpt.isDefined ||
+ (hasCustomSessionCatalog &&
+ !df.sparkSession.sessionState.catalogManager
+ .catalog(CatalogManager.SESSION_CATALOG_NAME)
.isInstanceOf[CatalogExtension])
session.sessionState.sqlParser.parseMultipartIdentifier(tableName) match {
@@ -477,6 +521,45 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident))
AppendData.byName(v2Relation, df.logicalPlan, extraOptions.toMap)
+ // For file tables, Overwrite on existing table uses
+ // OverwriteByExpression (truncate + append) instead of
+ // ReplaceTableAsSelect (which requires StagingTableCatalog).
+ case (SaveMode.Overwrite, Some(table: FileTable)) =>
+ checkPartitioningMatchesV2Table(table)
+ val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident))
+ val conf = df.sparkSession.sessionState.conf
+ val dynamicPartitionOverwrite = table.partitioning.length > 0 &&
+ conf.partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC &&
+ partitioningColumns.exists(_.nonEmpty)
+ if (dynamicPartitionOverwrite) {
+ OverwritePartitionsDynamic.byName(
+ v2Relation, df.logicalPlan, extraOptions.toMap)
+ } else {
+ OverwriteByExpression.byName(
+ v2Relation, df.logicalPlan, Literal(true), extraOptions.toMap)
+ }
+
+ // File table Overwrite when table doesn't exist: create it.
+ case (SaveMode.Overwrite, None)
+ if v2ProviderOpt.exists(_.isInstanceOf[FileDataSourceV2]) =>
+ val tableSpec = UnresolvedTableSpec(
+ properties = Map.empty,
+ provider = Some(source),
+ optionExpression = OptionList(Seq.empty),
+ location = extraOptions.get("path"),
+ comment = extraOptions.get(TableCatalog.PROP_COMMENT),
+ collation = extraOptions.get(TableCatalog.PROP_COLLATION),
+ serde = None,
+ external = false,
+ constraints = Seq.empty)
+ CreateTableAsSelect(
+ UnresolvedIdentifier(nameParts),
+ partitioningAsV2,
+ df.queryExecution.analyzed,
+ tableSpec,
+ writeOptions = extraOptions.toMap,
+ ignoreIfExists = false)
+
case (SaveMode.Overwrite, _) =>
val tableSpec = UnresolvedTableSpec(
properties = Map.empty,
@@ -595,8 +678,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
private def lookupV2Provider(): Option[TableProvider] = {
DataSource.lookupDataSourceV2(source, df.sparkSession.sessionState.conf) match {
- // TODO(SPARK-28396): File source v2 write path is currently broken.
- case Some(_: FileDataSourceV2) => None
case other => other
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala
index d1464b4ac4ee7..7dd16d85796fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, PartitioningCollection}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec, ProjectExec, SparkPlan}
+import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, FileScan}
import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, ShuffledHashJoinExec, ShuffledJoin, SortMergeJoinExec}
/**
@@ -44,6 +45,11 @@ object CoalesceBucketsInJoin extends Rule[SparkPlan] {
plan transformUp {
case f: FileSourceScanExec if f.relation.bucketSpec.nonEmpty =>
f.copy(optionalNumCoalescedBuckets = Some(numCoalescedBuckets))
+ case b: BatchScanExec => b.scan match {
+ case fs: FileScan if fs.bucketSpec.nonEmpty =>
+ b.copy(scan = fs.withNumCoalescedBuckets(Some(numCoalescedBuckets)))
+ case _ => b
+ }
}
}
@@ -120,6 +126,10 @@ object ExtractJoinWithBuckets {
case j: BroadcastNestedLoopJoinExec =>
if (j.buildSide == BuildLeft) hasScanOperation(j.right) else hasScanOperation(j.left)
case f: FileSourceScanExec => f.relation.bucketSpec.nonEmpty
+ case b: BatchScanExec => b.scan match {
+ case fs: FileScan => fs.bucketSpec.nonEmpty
+ case _ => false
+ }
case _ => false
}
@@ -128,6 +138,11 @@ object ExtractJoinWithBuckets {
case f: FileSourceScanExec if f.relation.bucketSpec.nonEmpty &&
f.optionalNumCoalescedBuckets.isEmpty =>
f.relation.bucketSpec.get
+ case b: BatchScanExec
+ if b.scan.isInstanceOf[FileScan] &&
+ b.scan.asInstanceOf[FileScan].bucketSpec.nonEmpty &&
+ b.scan.asInstanceOf[FileScan].optionalNumCoalescedBuckets.isEmpty =>
+ b.scan.asInstanceOf[FileScan].bucketSpec.get
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
index 1eb1082402972..931eb0f6ccd7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistrib
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec, ProjectExec, SortExec, SparkPlan}
import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
+import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, FileScan}
import org.apache.spark.sql.execution.exchange.Exchange
/**
@@ -109,6 +110,19 @@ object DisableUnnecessaryBucketedScan extends Rule[SparkPlan] {
} else {
scan
}
+ case batchScan: BatchScanExec =>
+ batchScan.scan match {
+ case fileScan: FileScan if fileScan.bucketedScan =>
+ if (!withInterestingPartition || (withExchange && withAllowedNode)) {
+ val newScan = fileScan.withDisableBucketedScan(true)
+ val nonBucketedBatchScan = batchScan.copy(scan = newScan)
+ batchScan.logicalLink.foreach(nonBucketedBatchScan.setLogicalLink)
+ nonBucketedBatchScan
+ } else {
+ batchScan
+ }
+ case _ => batchScan // BatchScanExec is a leaf node, no children to traverse
+ }
case o =>
o.mapChildren(disableBucketWithInterestingPartition(
_,
@@ -143,6 +157,10 @@ object DisableUnnecessaryBucketedScan extends Rule[SparkPlan] {
def apply(plan: SparkPlan): SparkPlan = {
lazy val hasBucketedScan = plan.exists {
case scan: FileSourceScanExec => scan.bucketedScan
+ case batchScan: BatchScanExec => batchScan.scan match {
+ case fileScan: FileScan => fileScan.bucketedScan
+ case _ => false
+ }
case _ => false
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/AggregatePushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/AggregatePushDownUtils.scala
index 97ee3cd661b3d..ea81032380f60 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/AggregatePushDownUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/AggregatePushDownUtils.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Aggr
import org.apache.spark.sql.execution.RowToColumnConverter
import org.apache.spark.sql.execution.datasources.v2.V2ColumnUtils
import org.apache.spark.sql.execution.vectorized.{OffHeapColumnVector, OnHeapColumnVector}
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{BooleanType, ByteType, DateType, DoubleType, FloatType, IntegerType, LongType, ShortType, StructField, StructType}
import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
@@ -43,12 +44,22 @@ object AggregatePushDownUtils {
var finalSchema = new StructType()
+ val caseSensitive = SQLConf.get.caseSensitiveAnalysis
+
def getStructFieldForCol(colName: String): StructField = {
- schema.apply(colName)
+ if (caseSensitive) {
+ schema.apply(colName)
+ } else {
+ schema.find(_.name.equalsIgnoreCase(colName)).getOrElse(schema.apply(colName))
+ }
}
def isPartitionCol(colName: String) = {
- partitionNames.contains(colName)
+ if (caseSensitive) {
+ partitionNames.contains(colName)
+ } else {
+ partitionNames.exists(_.equalsIgnoreCase(colName))
+ }
}
def processMinOrMax(agg: AggregateFunc): Boolean = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 9b51d3763abba..eec1e2057a8a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -487,10 +487,10 @@ case class DataSource(
val caseSensitive = conf.caseSensitiveAnalysis
PartitioningUtils.validatePartitionColumn(data.schema, partitionColumns, caseSensitive)
- val fileIndex = catalogTable.map(_.identifier).map { tableIdent =>
- sparkSession.table(tableIdent).queryExecution.analyzed.collect {
+ val fileIndex = catalogTable.map(_.identifier).flatMap { tableIdent =>
+ sparkSession.table(tableIdent).queryExecution.analyzed.collectFirst {
case LogicalRelationWithTable(t: HadoopFsRelation, _) => t.location
- }.head
+ }
}
// For partitioned relation r, r.schema's column ordering can be different from the column
// ordering of data.logicalPlan (partition columns are all moved after data column). This
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
deleted file mode 100644
index 979022a1787b7..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources
-
-import scala.jdk.CollectionConverters._
-
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, LogicalPlan}
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.classic.SparkSession
-import org.apache.spark.sql.execution.datasources.v2.{ExtractV2Table, FileTable}
-
-/**
- * Replace the File source V2 table in [[InsertIntoStatement]] to V1 [[FileFormat]].
- * E.g, with temporary view `t` using
- * [[org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2]], inserting into view `t` fails
- * since there is no corresponding physical plan.
- * This is a temporary hack for making current data source V2 work. It should be
- * removed when Catalog support of file data source v2 is finished.
- */
-class FallBackFileSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] {
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case i @ InsertIntoStatement(
- d @ ExtractV2Table(table: FileTable), _, _, _, _, _, _, _, _) =>
- val v1FileFormat = table.fallbackFileFormat.getDeclaredConstructor().newInstance()
- val relation = HadoopFsRelation(
- table.fileIndex,
- table.fileIndex.partitionSchema,
- table.schema,
- None,
- v1FileFormat,
- d.options.asScala.toMap)(sparkSession)
- i.copy(table = LogicalRelation(relation))
- }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
index e11c2b15e0541..6b5e04f5e27ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
@@ -17,6 +17,7 @@
package org.apache.spark.sql.execution.datasources
import scala.collection.mutable
+import scala.jdk.CollectionConverters._
import org.apache.hadoop.fs.{FileAlreadyExistsException, Path}
import org.apache.hadoop.mapreduce.TaskAttemptContext
@@ -104,6 +105,14 @@ abstract class FileFormatDataWriter(
}
}
+ /**
+ * Override writeAll to ensure V2 DataWriter.writeAll path also wraps
+ * errors with TASK_WRITE_FAILED, matching V1 behavior.
+ */
+ override def writeAll(records: java.util.Iterator[InternalRow]): Unit = {
+ writeWithIterator(records.asScala)
+ }
+
/** Write an iterator of records. */
def writeWithIterator(iterator: Iterator[InternalRow]): Unit = {
var count = 0L
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 396375890c249..d63fb01d9dbd1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -64,14 +64,14 @@ import org.apache.spark.util.collection.BitSet
object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
// should prune buckets iff num buckets is greater than 1 and there is only one bucket column
- private def shouldPruneBuckets(bucketSpec: Option[BucketSpec]): Boolean = {
+ private[sql] def shouldPruneBuckets(bucketSpec: Option[BucketSpec]): Boolean = {
bucketSpec match {
case Some(spec) => spec.bucketColumnNames.length == 1 && spec.numBuckets > 1
case None => false
}
}
- private def getExpressionBuckets(
+ private[sql] def getExpressionBuckets(
expr: Expression,
bucketColumnName: String,
numBuckets: Int): BitSet = {
@@ -123,7 +123,7 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
}
}
- private def genBucketSet(
+ private[sql] def genBucketSet(
normalizedFilters: Seq[Expression],
bucketSpec: BucketSpec): Option[BitSet] = {
if (normalizedFilters.isEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index d1f61599e7ac8..9542e60eba5c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -31,13 +31,14 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, InputFileBlockLeng
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
import org.apache.spark.sql.catalyst.util.TypeUtils._
import org.apache.spark.sql.classic.SparkSession
import org.apache.spark.sql.connector.expressions.{FieldReference, RewritableTransform}
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.execution.command.DDLUtils
import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1}
-import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Utils, FileDataSourceV2}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.sources.InsertableRelation
import org.apache.spark.sql.types.{ArrayType, DataType, MapType, MetadataBuilder, StructField, StructType}
@@ -57,8 +58,10 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
val result = plan match {
case u: UnresolvedRelation if maybeSQLFile(u) =>
try {
- val ds = resolveDataSource(u)
- Some(LogicalRelation(ds.resolveRelation()))
+ resolveAsV2(u).orElse {
+ val ds = resolveDataSource(u)
+ Some(LogicalRelation(ds.resolveRelation()))
+ }
} catch {
case e: SparkUnsupportedOperationException =>
u.failAnalysis(
@@ -90,6 +93,22 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
conf.runSQLonFile && u.multipartIdentifier.size == 2
}
+ private def resolveAsV2(u: UnresolvedRelation): Option[LogicalPlan] = {
+ val ident = u.multipartIdentifier
+ val format = ident.head
+ val path = ident.last
+ DataSource.lookupDataSourceV2(format, conf).flatMap {
+ case p: FileDataSourceV2 =>
+ DataSourceV2Utils.loadV2Source(
+ sparkSession, p,
+ userSpecifiedSchema = None,
+ extraOptions = CaseInsensitiveMap(u.options.asScala.toMap),
+ source = format,
+ path)
+ case _ => None
+ }
+ }
+
private def resolveDataSource(unresolved: UnresolvedRelation): DataSource = {
val ident = unresolved.multipartIdentifier
val dataSource = DataSource(
@@ -127,6 +146,12 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
} catch {
case _: ClassNotFoundException => r
}
+ case i @ InsertIntoStatement(u: UnresolvedRelation, _, _, _, _, _, _, _, _)
+ if maybeSQLFile(u) =>
+ UnresolvedRelationResolution.unapply(u) match {
+ case Some(resolved) => i.copy(table = resolved)
+ case None => i
+ }
case UnresolvedRelationResolution(resolvedRelation) =>
resolvedRelation
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AnalyzeColumnExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AnalyzeColumnExec.scala
new file mode 100644
index 0000000000000..f5c361290f2de
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AnalyzeColumnExec.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, TableChange}
+import org.apache.spark.sql.execution.command.CommandUtils
+
+/**
+ * Physical plan for ANALYZE TABLE ... FOR COLUMNS on V2
+ * file tables. Computes column-level statistics and
+ * persists them as table properties via
+ * [[TableCatalog.alterTable()]].
+ *
+ * Column stats property key format:
+ * `spark.sql.statistics.colStats.
.`
+ */
+case class AnalyzeColumnExec(
+ catalog: TableCatalog,
+ ident: Identifier,
+ table: FileTable,
+ columnNames: Option[Seq[String]],
+ allColumns: Boolean)
+ extends LeafV2CommandExec {
+
+ override def output: Seq[Attribute] = Seq.empty
+
+ override protected def run(): Seq[InternalRow] = {
+ val relation = DataSourceV2Relation.create(
+ table, Some(catalog), Some(ident))
+
+ val columnsToAnalyze = if (allColumns) {
+ relation.output
+ } else {
+ columnNames.getOrElse(Seq.empty).map { name =>
+ relation.output.find(
+ _.name.equalsIgnoreCase(name)).getOrElse(
+ throw new IllegalArgumentException(
+ s"Column '$name' not found"))
+ }
+ }
+
+ val (rowCount, colStats) =
+ CommandUtils.computeColumnStats(
+ session, relation, columnsToAnalyze)
+
+ // Refresh fileIndex for accurate size
+ table.fileIndex.refresh()
+ val totalSize = table.fileIndex.sizeInBytes
+
+ val changes =
+ scala.collection.mutable.ArrayBuffer(
+ TableChange.setProperty(
+ "spark.sql.statistics.totalSize",
+ totalSize.toString),
+ TableChange.setProperty(
+ "spark.sql.statistics.numRows",
+ rowCount.toString))
+
+ // Store column stats as table properties
+ val prefix = "spark.sql.statistics.colStats."
+ colStats.foreach { case (attr, stat) =>
+ val catalogStat = stat.toCatalogColumnStat(
+ attr.name, attr.dataType)
+ catalogStat.toMap(attr.name).foreach {
+ case (k, v) =>
+ changes += TableChange.setProperty(
+ prefix + k, v)
+ }
+ }
+
+ catalog.alterTable(ident, changes.toSeq: _*)
+ Seq.empty
+ }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AnalyzeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AnalyzeTableExec.scala
new file mode 100644
index 0000000000000..df8c736b4b061
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AnalyzeTableExec.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, TableChange}
+
+/**
+ * Physical plan for ANALYZE TABLE on V2 file tables.
+ * Computes table statistics and persists them as table
+ * properties via [[TableCatalog.alterTable()]].
+ *
+ * Statistics property keys:
+ * - `spark.sql.statistics.totalSize`
+ * - `spark.sql.statistics.numRows`
+ */
+case class AnalyzeTableExec(
+ catalog: TableCatalog,
+ ident: Identifier,
+ table: FileTable,
+ partitionSpec: Map[String, Option[String]],
+ noScan: Boolean) extends LeafV2CommandExec {
+
+ override def output: Seq[Attribute] = Seq.empty
+
+ override protected def run(): Seq[InternalRow] = {
+ table.fileIndex.refresh()
+ val totalSize = table.fileIndex.sizeInBytes
+
+ val changes =
+ scala.collection.mutable.ArrayBuffer(
+ TableChange.setProperty(
+ "spark.sql.statistics.totalSize",
+ totalSize.toString))
+
+ if (!noScan) {
+ val relation = DataSourceV2Relation.create(
+ table, Some(catalog), Some(ident))
+ val df = session.internalCreateDataFrame(
+ session.sessionState.executePlan(
+ relation).toRdd,
+ relation.schema)
+ val rowCount = df.count()
+ changes += TableChange.setProperty(
+ "spark.sql.statistics.numRows",
+ rowCount.toString)
+ }
+
+ catalog.alterTable(ident, changes.toSeq: _*)
+ Seq.empty
+ }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
index a1a6c6e022482..f5b4dbccdc283 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
@@ -21,7 +21,7 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{Ascending, Expression, RowOrdering, SortOrder}
import org.apache.spark.sql.catalyst.plans.physical
-import org.apache.spark.sql.catalyst.plans.physical.KeyedPartitioning
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, KeyedPartitioning}
import org.apache.spark.sql.catalyst.util.truncatedString
import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition, PartitionReaderFactory, Scan}
import org.apache.spark.sql.execution.{ExplainUtils, LeafExecNode, SQLExecution}
@@ -90,16 +90,29 @@ trait DataSourceV2ScanExecBase extends LeafExecNode {
}
override def outputPartitioning: physical.Partitioning = {
- keyGroupedPartitioning match {
- case Some(exprs) if conf.v2BucketingEnabled && KeyedPartitioning.supportsExpressions(exprs) &&
- inputPartitions.nonEmpty && inputPartitions.forall(_.isInstanceOf[HasPartitionKey]) =>
- val dataTypes = exprs.map(_.dataType)
- val rowOrdering = RowOrdering.createNaturalAscendingOrdering(dataTypes)
- val partitionKeys =
- inputPartitions.map(_.asInstanceOf[HasPartitionKey].partitionKey()).sorted(rowOrdering)
- KeyedPartitioning(exprs, partitionKeys)
+ scan match {
+ case fileScan: FileScan if fileScan.bucketedScan =>
+ val spec = fileScan.bucketSpec.get
+ val resolver = conf.resolver
+ val bucketColumns = spec.bucketColumnNames.flatMap(n =>
+ output.find(a => resolver(a.name, n)))
+ val numPartitions = fileScan.optionalNumCoalescedBuckets.getOrElse(spec.numBuckets)
+ HashPartitioning(bucketColumns, numPartitions)
case _ =>
- super.outputPartitioning
+ keyGroupedPartitioning match {
+ case Some(exprs) if conf.v2BucketingEnabled &&
+ KeyedPartitioning.supportsExpressions(exprs) &&
+ inputPartitions.nonEmpty &&
+ inputPartitions.forall(_.isInstanceOf[HasPartitionKey]) =>
+ val dataTypes = exprs.map(_.dataType)
+ val rowOrdering = RowOrdering.createNaturalAscendingOrdering(dataTypes)
+ val partitionKeys = inputPartitions
+ .map(_.asInstanceOf[HasPartitionKey].partitionKey())
+ .sorted(rowOrdering)
+ KeyedPartitioning(exprs, partitionKeys)
+ case _ =>
+ super.outputPartitioning
+ }
}
}
@@ -110,6 +123,12 @@ trait DataSourceV2ScanExecBase extends LeafExecNode {
* `spark.sql.sources.v2.bucketing.partitionKeyOrdering.enabled` is on, each partition
* contains rows where the key expressions evaluate to a single constant value, so the data
* is trivially sorted by those expressions within the partition.
+ *
+ * Note: V2 bucketed file scans (FileScan with bucketedScan=true) do NOT report
+ * outputOrdering here. V1 (FileSourceScanExec) reports sort ordering under
+ * LEGACY_BUCKETED_TABLE_SCAN_OUTPUT_ORDERING only when each bucket has exactly one file
+ * and no coalescing is active. V2 cannot cheaply check those conditions at planning time,
+ * so it conservatively relies on the default behavior.
*/
override def outputOrdering: Seq[SortOrder] = {
(ordering, outputPartitioning) match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 3d3b4d1cae11c..2beadb5ae3767 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -53,7 +53,8 @@ import org.apache.spark.storage.StorageLevel
import org.apache.spark.util.ArrayImplicits._
import org.apache.spark.util.SparkStringUtils
-class DataSourceV2Strategy(session: SparkSession) extends Strategy with PredicateHelper {
+class DataSourceV2Strategy(session: SparkSession)
+ extends Strategy with PredicateHelper with Logging {
import DataSourceV2Implicits._
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
@@ -70,7 +71,81 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
val nameParts = ident.toQualifiedNameParts(catalog)
cacheManager.recacheTableOrView(session, nameParts, includeTimeTravel = false)
case _ =>
- cacheManager.recacheByPlan(session, r)
+ r.table match {
+ case ft: FileTable if ft.fileIndex.rootPaths.nonEmpty =>
+ ft.fileIndex.refresh()
+ syncNewPartitionsToCatalog(ft)
+ updateTableStats(ft)
+ val path = new Path(ft.fileIndex.rootPaths.head.toUri)
+ val fsConf = session.sessionState.newHadoopConfWithOptions(
+ scala.jdk.CollectionConverters.MapHasAsScala(
+ r.options.asCaseSensitiveMap).asScala.toMap)
+ val fs = path.getFileSystem(fsConf)
+ cacheManager.recacheByPath(session, path, fs)
+ case _ =>
+ cacheManager.recacheByPlan(session, r)
+ }
+ }
+
+ /**
+ * After a V2 file write, discover new partitions on disk
+ * and register them in the catalog metastore (best-effort).
+ */
+ private def syncNewPartitionsToCatalog(ft: FileTable): Unit = {
+ ft.catalogTable.foreach { ct =>
+ if (ct.partitionColumnNames.isEmpty) return
+ try {
+ val catalog = session.sessionState.catalog
+ val existing = catalog.listPartitions(ct.identifier).map(_.spec).toSet
+ val onDisk = ft.listPartitionIdentifiers(
+ Array.empty, org.apache.spark.sql.catalyst.InternalRow.empty)
+ val partSchema = ft.partitionSchema()
+ onDisk.foreach { row =>
+ val spec = (0 until partSchema.length).map { i =>
+ val v = row.get(i, partSchema(i).dataType)
+ partSchema(i).name -> (if (v == null) null else v.toString)
+ }.toMap
+ if (!existing.contains(spec)) {
+ val partPath = ft.fileIndex.rootPaths.head.suffix(
+ "/" + spec.map { case (k, v) => s"$k=$v" }.mkString("/"))
+ val storage = ct.storage.copy(locationUri = Some(partPath.toUri))
+ val part = org.apache.spark.sql.catalyst.catalog
+ .CatalogTablePartition(spec, storage)
+ catalog.createPartitions(ct.identifier, Seq(part), ignoreIfExists = true)
+ }
+ }
+ } catch {
+ case e: Exception =>
+ logWarning(s"Failed to sync partitions to catalog for " +
+ s"${ct.identifier}: ${e.getMessage}")
+ }
+ }
+ }
+
+ /**
+ * After a V2 file write, update the table's totalSize statistic
+ * in the catalog metastore (best-effort). Row count is not updated
+ * here -- use ANALYZE TABLE for accurate row counts.
+ */
+ private def updateTableStats(ft: FileTable): Unit = {
+ ft.catalogTable.foreach { ct =>
+ try {
+ val totalSize = ft.fileIndex.sizeInBytes
+ val newStats = ct.stats match {
+ case Some(existing) =>
+ Some(existing.copy(sizeInBytes = BigInt(totalSize)))
+ case None =>
+ Some(org.apache.spark.sql.catalyst.catalog.CatalogStatistics(
+ sizeInBytes = BigInt(totalSize)))
+ }
+ val updatedTable = ct.copy(stats = newStats)
+ session.sessionState.catalog.alterTable(updatedTable)
+ } catch {
+ case e: Exception =>
+ logWarning(s"Failed to update table stats for " +
+ s"${ct.identifier}: ${e.getMessage}")
+ }
+ }
}
private def recacheTable(r: ResolvedTable, includeTimeTravel: Boolean)(): Unit = {
@@ -334,6 +409,31 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
v1, v2Write.getClass.getName, classOf[V1Write].getName)
}
+ // SPARK-56304: skip write if target partition already exists
+ case OverwriteByExpression(
+ r: DataSourceV2Relation, _, query, writeOptions, _, _, Some(write), _)
+ if writeOptions.getOrElse("ifPartitionNotExists", "false") == "true"
+ && r.table.isInstanceOf[FileTable] =>
+ val ft = r.table.asInstanceOf[FileTable]
+ val prefix = "__staticPartition."
+ val staticSpec = writeOptions
+ .filter(_._1.startsWith(prefix))
+ .map { case (k, v) => k.stripPrefix(prefix) -> v }
+ // Check filesystem for partition existence
+ val partPath = ft.partitionSchema().fieldNames
+ .flatMap(col => staticSpec.get(col).map(v => s"$col=$v"))
+ .mkString("/")
+ val rootPath = ft.fileIndex.rootPaths.head
+ val hadoopConf = session.sessionState.newHadoopConf()
+ val fs = rootPath.getFileSystem(hadoopConf)
+ val targetPath = new Path(rootPath, partPath)
+ if (partPath.nonEmpty && fs.exists(targetPath)) {
+ LocalTableScanExec(Nil, Nil, None) :: Nil
+ } else {
+ OverwriteByExpressionExec(
+ planLater(query), refreshCache(r), write) :: Nil
+ }
+
case OverwriteByExpression(
r: DataSourceV2Relation, _, query, _, _, _, Some(write), _) =>
OverwriteByExpressionExec(planLater(query), refreshCache(r), write) :: Nil
@@ -485,8 +585,26 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
case ShowTableProperties(rt: ResolvedTable, propertyKey, output) =>
ShowTablePropertiesExec(output, rt.table, rt.name, propertyKey) :: Nil
- case AnalyzeTable(_: ResolvedTable, _, _) | AnalyzeColumn(_: ResolvedTable, _, _) =>
- throw QueryCompilationErrors.analyzeTableNotSupportedForV2TablesError()
+ case AnalyzeTable(
+ ResolvedTable(catalog, ident,
+ ft: FileTable, _),
+ partitionSpec, noScan) =>
+ AnalyzeTableExec(
+ catalog, ident, ft,
+ partitionSpec, noScan) :: Nil
+
+ case AnalyzeColumn(
+ ResolvedTable(catalog, ident,
+ ft: FileTable, _),
+ columnNames, allColumns) =>
+ AnalyzeColumnExec(
+ catalog, ident, ft,
+ columnNames, allColumns) :: Nil
+
+ case AnalyzeTable(_: ResolvedTable, _, _) |
+ AnalyzeColumn(_: ResolvedTable, _, _) =>
+ throw QueryCompilationErrors
+ .analyzeTableNotSupportedForV2TablesError()
case AddPartitions(
r @ ResolvedTable(_, _, table: SupportsPartitionManagement, _), parts, ignoreIfExists) =>
@@ -516,6 +634,14 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
Seq(to).asResolvedPartitionSpecs.head,
recacheTable(r, includeTimeTravel = false)) :: Nil
+ case RecoverPartitions(
+ ResolvedTable(catalog, ident,
+ ft: FileTable, _)) =>
+ RepairTableExec(
+ catalog, ident, ft,
+ enableAddPartitions = true,
+ enableDropPartitions = false) :: Nil
+
case RecoverPartitions(_: ResolvedTable) =>
throw QueryCompilationErrors.alterTableRecoverPartitionsNotSupportedForV2TablesError()
@@ -564,6 +690,14 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
table,
pattern.map(_.asInstanceOf[ResolvedPartitionSpec])) :: Nil
+ case RepairTable(
+ ResolvedTable(catalog, ident,
+ ft: FileTable, _),
+ enableAdd, enableDrop) =>
+ RepairTableExec(
+ catalog, ident, ft,
+ enableAdd, enableDrop) :: Nil
+
case RepairTable(_: ResolvedTable, _, _) =>
throw QueryCompilationErrors.repairTableNotSupportedForV2TablesError()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
index a3b5c5aeb7995..83a053a537a64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
@@ -164,8 +164,7 @@ private[sql] object DataSourceV2Utils extends Logging {
// `HiveFileFormat`, when running tests in sql/core.
if (DDLUtils.isHiveTable(Some(provider))) return None
DataSource.lookupDataSourceV2(provider, conf) match {
- // TODO(SPARK-28396): Currently file source v2 can't work with tables.
- case Some(p) if !p.isInstanceOf[FileDataSourceV2] => Some(p)
+ case Some(p) => Some(p)
case _ => None
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
index 4242fc5d8510a..29b484e07d8ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
@@ -27,15 +27,16 @@ import com.fasterxml.jackson.module.scala.DefaultScalaModule
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
-import org.apache.spark.{SparkException, SparkUpgradeException}
+import org.apache.spark.{SparkException, SparkUnsupportedOperationException, SparkUpgradeException}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
-import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
import org.apache.spark.sql.errors.QueryExecutionErrors
import org.apache.spark.sql.execution.datasources._
import org.apache.spark.sql.sources.DataSourceRegister
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.ArrayImplicits._
import org.apache.spark.util.Utils
/**
@@ -109,12 +110,34 @@ trait FileDataSourceV2 extends TableProvider with DataSourceRegister {
schema: StructType,
partitioning: Array[Transform],
properties: util.Map[String, String]): Table = {
- // If the table is already loaded during schema inference, return it directly.
- if (t != null) {
+ // Reuse the cached table from inferSchema() when available,
+ // since it has the correct fileIndex (e.g., MetadataLogFileIndex
+ // for streaming sink output). Only create a fresh table when
+ // no cached table exists (pure write path).
+ val opts = new CaseInsensitiveStringMap(properties)
+ val table = if (t != null) {
t
} else {
- getTable(new CaseInsensitiveStringMap(properties), schema)
+ try {
+ getTable(opts, schema)
+ } catch {
+ case _: SparkUnsupportedOperationException =>
+ getTable(opts)
+ }
}
+ if (partitioning.nonEmpty) {
+ table match {
+ case ft: FileTable =>
+ // Extract partition column names from IdentityTransform only.
+ // BucketTransform is handled via catalogTable.bucketSpec.
+ ft.userSpecifiedPartitioning =
+ partitioning.collect {
+ case IdentityTransform(FieldReference(Seq(col))) => col
+ }.toImmutableArraySeq
+ case _ =>
+ }
+ }
+ table
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileMetadataColumn.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileMetadataColumn.scala
new file mode 100644
index 0000000000000..77805dc1f5b3b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileMetadataColumn.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.connector.catalog.MetadataColumn
+import org.apache.spark.sql.types.DataType
+
+/**
+ * A [[MetadataColumn]] exposing the V1-compatible `_metadata` struct on V2 file-based tables.
+ */
+private[v2] case class FileMetadataColumn(
+ override val name: String,
+ override val dataType: DataType) extends MetadataColumn {
+
+ override def isNullable: Boolean = false
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileMicroBatchStream.scala
new file mode 100644
index 0000000000000..674c1902d1ec9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileMicroBatchStream.scala
@@ -0,0 +1,437 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.util.concurrent.TimeUnit.NANOSECONDS
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.hadoop.fs.{FileStatus, Path}
+
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.internal.{Logging, LogKeys}
+import org.apache.spark.paths.SparkPath
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory}
+import org.apache.spark.sql.connector.read.streaming
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, ReadAllAvailable, ReadLimit, ReadMaxBytes, ReadMaxFiles, SupportsAdmissionControl, SupportsTriggerAvailableNow}
+import org.apache.spark.sql.execution.datasources.{InMemoryFileIndex, PartitioningAwareFileIndex}
+import org.apache.spark.sql.execution.streaming.runtime.{FileStreamOptions, FileStreamSource, FileStreamSourceLog, FileStreamSourceOffset, MetadataLogFileIndex, SerializedOffset}
+import org.apache.spark.sql.execution.streaming.runtime.FileStreamSource.FileEntry
+import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink
+import org.apache.spark.sql.types.StructType
+
+/**
+ * A [[MicroBatchStream]] implementation for file-based streaming reads using the V2 data source
+ * API. This is the V2 counterpart of the V1 [[FileStreamSource]].
+ *
+ * It reuses the same infrastructure as [[FileStreamSource]]:
+ * - [[FileStreamSourceLog]] for metadata tracking
+ * - [[FileStreamSourceOffset]] for offset representation
+ * - [[FileStreamSource.SeenFilesMap]] for deduplication
+ * - [[FileStreamOptions]] for parsed streaming options
+ */
+class FileMicroBatchStream(
+ sparkSession: SparkSession,
+ fileIndex: PartitioningAwareFileIndex,
+ fileScan: FileScan,
+ path: String,
+ fileFormatClassName: String,
+ schema: StructType,
+ partitionColumns: Seq[String],
+ metadataPath: String,
+ options: Map[String, String])
+ extends MicroBatchStream
+ with SupportsAdmissionControl
+ with SupportsTriggerAvailableNow
+ with Logging {
+
+ import FileMicroBatchStream._
+
+ private val sourceOptions = new FileStreamOptions(options)
+
+ private val hadoopConf = sparkSession.sessionState.newHadoopConf()
+
+ @transient private val fs = new Path(path).getFileSystem(hadoopConf)
+
+ private val qualifiedBasePath: Path = {
+ fs.makeQualified(new Path(path)) // can contain glob patterns
+ }
+
+ private val metadataLog =
+ new FileStreamSourceLog(FileStreamSourceLog.VERSION, sparkSession, metadataPath)
+ private var metadataLogCurrentOffset = metadataLog.getLatest().map(_._1).getOrElse(-1L)
+
+ /** Maximum number of new files to be considered in each batch */
+ private val maxFilesPerBatch = sourceOptions.maxFilesPerTrigger
+
+ /** Maximum number of new bytes to be considered in each batch */
+ private val maxBytesPerBatch = sourceOptions.maxBytesPerTrigger
+
+ private val fileSortOrder = if (sourceOptions.latestFirst) {
+ logWarning(
+ """'latestFirst' is true. New files will be processed first, which may affect the watermark
+ |value. In addition, 'maxFileAge' will be ignored.""".stripMargin)
+ implicitly[Ordering[Long]].reverse
+ } else {
+ implicitly[Ordering[Long]]
+ }
+
+ private val maxFileAgeMs: Long = if (sourceOptions.latestFirst &&
+ (maxFilesPerBatch.isDefined || maxBytesPerBatch.isDefined)) {
+ Long.MaxValue
+ } else {
+ sourceOptions.maxFileAgeMs
+ }
+
+ private val fileNameOnly = sourceOptions.fileNameOnly
+ if (fileNameOnly) {
+ logWarning("'fileNameOnly' is enabled. Make sure your file names are unique (e.g. using " +
+ "UUID), otherwise, files with the same name but under different paths will be considered " +
+ "the same and causes data lost.")
+ }
+
+ private val maxCachedFiles = sourceOptions.maxCachedFiles
+
+ private val discardCachedInputRatio = sourceOptions.discardCachedInputRatio
+
+ /** A mapping from a file that we have processed to some timestamp it was last modified. */
+ val seenFiles = new FileStreamSource.SeenFilesMap(maxFileAgeMs, fileNameOnly)
+
+ private var allFilesForTriggerAvailableNow: Seq[NewFileEntry] = _
+
+ // Restore seenFiles from metadata log
+ metadataLog.restore().foreach { entry =>
+ seenFiles.add(entry.sparkPath, entry.timestamp)
+ }
+ seenFiles.purge()
+
+ logInfo(log"maxFilesPerBatch = ${MDC(LogKeys.NUM_FILES, maxFilesPerBatch)}, " +
+ log"maxBytesPerBatch = ${MDC(LogKeys.NUM_BYTES, maxBytesPerBatch)}, " +
+ log"maxFileAgeMs = ${MDC(LogKeys.TIME_UNITS, maxFileAgeMs)}")
+
+ private var unreadFiles: Seq[NewFileEntry] = _
+
+ /**
+ * If the source has a metadata log indicating which files should be read, then we should use it.
+ * Only when user gives a non-glob path that will we figure out whether the source has some
+ * metadata log.
+ *
+ * None means we don't know at the moment
+ * Some(true) means we know for sure the source DOES have metadata
+ * Some(false) means we know for sure the source DOES NOT have metadata
+ */
+ @volatile private[sql] var sourceHasMetadata: Option[Boolean] =
+ if (SparkHadoopUtil.get.isGlobPath(new Path(path))) Some(false) else None
+
+ // ---------------------------------------------------------------------------
+ // SparkDataStream methods
+ // ---------------------------------------------------------------------------
+
+ override def initialOffset(): streaming.Offset = FileStreamSourceOffset(-1L)
+
+ override def deserializeOffset(json: String): streaming.Offset = {
+ FileStreamSourceOffset(SerializedOffset(json))
+ }
+
+ override def commit(end: streaming.Offset): Unit = {
+ // no-op for now
+ }
+
+ override def stop(): Unit = {
+ // no-op for now
+ }
+
+ // ---------------------------------------------------------------------------
+ // SupportsAdmissionControl methods
+ // ---------------------------------------------------------------------------
+
+ override def getDefaultReadLimit: ReadLimit = {
+ maxFilesPerBatch.map(ReadLimit.maxFiles).getOrElse(
+ maxBytesPerBatch.map(ReadLimit.maxBytes).getOrElse(super.getDefaultReadLimit)
+ )
+ }
+
+ override def latestOffset(startOffset: streaming.Offset, limit: ReadLimit): streaming.Offset = {
+ Some(fetchMaxOffset(limit)).filterNot(_.logOffset == -1).orNull
+ }
+
+ // ---------------------------------------------------------------------------
+ // MicroBatchStream methods
+ // ---------------------------------------------------------------------------
+
+ override def latestOffset(): streaming.Offset = {
+ latestOffset(null, getDefaultReadLimit)
+ }
+
+ override def planInputPartitions(
+ start: streaming.Offset,
+ end: streaming.Offset): Array[InputPartition] = {
+ val startOffset = FileStreamSourceOffset(
+ start.asInstanceOf[org.apache.spark.sql.execution.streaming.Offset]).logOffset
+ val endOffset = FileStreamSourceOffset(
+ end.asInstanceOf[org.apache.spark.sql.execution.streaming.Offset]).logOffset
+
+ assert(startOffset <= endOffset)
+ val files = metadataLog.get(Some(startOffset + 1), Some(endOffset)).flatMap(_._2)
+ logInfo(log"Processing ${MDC(LogKeys.NUM_FILES, files.length)} files from " +
+ log"${MDC(LogKeys.FILE_START_OFFSET, startOffset + 1)}:" +
+ log"${MDC(LogKeys.FILE_END_OFFSET, endOffset)}")
+ logTrace(s"Files are:\n\t" + files.mkString("\n\t"))
+
+ // Build an InMemoryFileIndex from the file entries.
+ // For non-glob paths, pass basePath so partition discovery uses the streaming
+ // source root instead of the individual file's parent directory.
+ val filePaths = files.map(_.sparkPath.toPath).toSeq
+ val indexOptions = if (!SparkHadoopUtil.get.isGlobPath(new Path(path))) {
+ options + ("basePath" -> qualifiedBasePath.toString)
+ } else {
+ options
+ }
+ val tempFileIndex = new InMemoryFileIndex(
+ sparkSession, filePaths, indexOptions, Some(schema))
+
+ fileScan.withFileIndex(tempFileIndex).planInputPartitions()
+ }
+
+ override def createReaderFactory(): PartitionReaderFactory = {
+ fileScan.createReaderFactory()
+ }
+
+ // ---------------------------------------------------------------------------
+ // SupportsTriggerAvailableNow methods
+ // ---------------------------------------------------------------------------
+
+ override def prepareForTriggerAvailableNow(): Unit = {
+ allFilesForTriggerAvailableNow = fetchAllFiles()
+ }
+
+ // ---------------------------------------------------------------------------
+ // Internal methods - ported from FileStreamSource
+ // ---------------------------------------------------------------------------
+
+ /**
+ * Split files into a selected/unselected pair according to a total size threshold.
+ * Always puts the 1st element in a left split and keep adding it to a left split
+ * until reaches a specified threshold or [[Long.MaxValue]].
+ */
+ private def takeFilesUntilMax(files: Seq[NewFileEntry], maxSize: Long)
+ : (FilesSplit, FilesSplit) = {
+ var lSize = BigInt(0)
+ var rSize = BigInt(0)
+ val lFiles = ArrayBuffer[NewFileEntry]()
+ val rFiles = ArrayBuffer[NewFileEntry]()
+ files.zipWithIndex.foreach { case (file, i) =>
+ val newSize = lSize + file.size
+ if (i == 0 || rFiles.isEmpty && newSize <= Long.MaxValue && newSize <= maxSize) {
+ lSize += file.size
+ lFiles += file
+ } else {
+ rSize += file.size
+ rFiles += file
+ }
+ }
+ (FilesSplit(lFiles.toSeq, lSize), FilesSplit(rFiles.toSeq, rSize))
+ }
+
+ /**
+ * Returns the maximum offset that can be retrieved from the source.
+ *
+ * `synchronized` on this method is for solving race conditions in tests. In the normal usage,
+ * there is no race here, so the cost of `synchronized` should be rare.
+ */
+ private def fetchMaxOffset(limit: ReadLimit): FileStreamSourceOffset = synchronized {
+ val newFiles = if (unreadFiles != null && unreadFiles.nonEmpty) {
+ logDebug(s"Reading from unread files - ${unreadFiles.size} files are available.")
+ unreadFiles
+ } else {
+ // All the new files found - ignore aged files and files that we have seen.
+ // Use the pre-fetched list of files when Trigger.AvailableNow is enabled.
+ val allFiles = if (allFilesForTriggerAvailableNow != null) {
+ allFilesForTriggerAvailableNow
+ } else {
+ fetchAllFiles()
+ }
+ allFiles.filter {
+ case NewFileEntry(path, _, timestamp) => seenFiles.isNewFile(path, timestamp)
+ }
+ }
+
+ val shouldCache = !sourceOptions.latestFirst && allFilesForTriggerAvailableNow == null
+
+ // Obey user's setting to limit the number of files in this batch trigger.
+ val (batchFiles, unselectedFiles) = limit match {
+ case files: ReadMaxFiles if shouldCache =>
+ // we can cache and reuse remaining fetched list of files in further batches
+ val (bFiles, usFiles) = newFiles.splitAt(files.maxFiles())
+ if (usFiles.size < files.maxFiles() * discardCachedInputRatio) {
+ // Discard unselected files if the number of files are smaller than threshold.
+ logTrace(s"Discarding ${usFiles.length} unread files as it's smaller than threshold.")
+ (bFiles, null)
+ } else {
+ (bFiles, usFiles)
+ }
+
+ case files: ReadMaxFiles =>
+ // don't use the cache, just take files for the next batch
+ (newFiles.take(files.maxFiles()), null)
+
+ case files: ReadMaxBytes if shouldCache =>
+ // we can cache and reuse remaining fetched list of files in further batches
+ val (FilesSplit(bFiles, _), FilesSplit(usFiles, rSize)) =
+ takeFilesUntilMax(newFiles, files.maxBytes())
+ if (rSize.toDouble < (files.maxBytes() * discardCachedInputRatio)) {
+ logTrace(s"Discarding ${usFiles.length} unread files as it's smaller than threshold.")
+ (bFiles, null)
+ } else {
+ (bFiles, usFiles)
+ }
+
+ case files: ReadMaxBytes =>
+ // don't use the cache, just take files for the next batch
+ val (FilesSplit(bFiles, _), _) = takeFilesUntilMax(newFiles, files.maxBytes())
+ (bFiles, null)
+
+ case _: ReadAllAvailable => (newFiles, null)
+ }
+
+ // need to ensure that if maxCachedFiles is set to 0 that the next batch will be forced to
+ // list files again
+ if (unselectedFiles != null && unselectedFiles.nonEmpty && maxCachedFiles > 0) {
+ logTrace(s"Taking first $maxCachedFiles unread files.")
+ unreadFiles = unselectedFiles.take(maxCachedFiles)
+ logTrace(s"${unreadFiles.size} unread files are available for further batches.")
+ } else {
+ unreadFiles = null
+ logTrace(s"No unread file is available for further batches or maxCachedFiles has been set " +
+ s" to 0 to disable caching.")
+ }
+
+ batchFiles.foreach { case NewFileEntry(p, _, timestamp) =>
+ seenFiles.add(p, timestamp)
+ logDebug(s"New file: $p")
+ }
+ val numPurged = seenFiles.purge()
+
+ logTrace(
+ s"""
+ |Number of new files = ${newFiles.size}
+ |Number of files selected for batch = ${batchFiles.size}
+ |Number of unread files = ${Option(unreadFiles).map(_.size).getOrElse(0)}
+ |Number of seen files = ${seenFiles.size}
+ |Number of files purged from tracking map = $numPurged
+ """.stripMargin)
+
+ if (batchFiles.nonEmpty) {
+ metadataLogCurrentOffset += 1
+
+ val fileEntries = batchFiles.map { case NewFileEntry(p, _, timestamp) =>
+ FileEntry(path = p.urlEncoded, timestamp = timestamp, batchId = metadataLogCurrentOffset)
+ }.toArray
+ if (metadataLog.add(metadataLogCurrentOffset, fileEntries)) {
+ logInfo(log"Log offset set to ${MDC(LogKeys.LOG_OFFSET, metadataLogCurrentOffset)} " +
+ log"with ${MDC(LogKeys.NUM_FILES, batchFiles.size)} new files")
+ } else {
+ throw new IllegalStateException("Concurrent update to the log. Multiple streaming jobs " +
+ s"detected for $metadataLogCurrentOffset")
+ }
+ }
+
+ FileStreamSourceOffset(metadataLogCurrentOffset)
+ }
+
+ private def allFilesUsingInMemoryFileIndex(): Seq[FileStatus] = {
+ val globbedPaths = SparkHadoopUtil.get.globPathIfNecessary(fs, qualifiedBasePath)
+ val fileIdx = new InMemoryFileIndex(
+ sparkSession, globbedPaths, options, Some(new StructType))
+ fileIdx.allFiles()
+ }
+
+ private def allFilesUsingMetadataLogFileIndex(): Seq[FileStatus] = {
+ // Note if `sourceHasMetadata` holds, then `qualifiedBasePath` is guaranteed to be a
+ // non-glob path
+ new MetadataLogFileIndex(sparkSession, qualifiedBasePath,
+ CaseInsensitiveMap(options), None).allFiles()
+ }
+
+ private def setSourceHasMetadata(newValue: Option[Boolean]): Unit = {
+ sourceHasMetadata = newValue
+ }
+
+ /**
+ * Returns a list of files found, sorted by their timestamp.
+ */
+ private def fetchAllFiles(): Seq[NewFileEntry] = {
+ val startTime = System.nanoTime
+
+ var allFiles: Seq[FileStatus] = null
+ sourceHasMetadata match {
+ case None =>
+ if (FileStreamSink.hasMetadata(
+ Seq(path), hadoopConf, sparkSession.sessionState.conf)) {
+ setSourceHasMetadata(Some(true))
+ allFiles = allFilesUsingMetadataLogFileIndex()
+ } else {
+ allFiles = allFilesUsingInMemoryFileIndex()
+ if (allFiles.isEmpty) {
+ // we still cannot decide
+ } else {
+ // decide what to use for future rounds
+ // double check whether source has metadata, preventing the extreme corner case that
+ // metadata log and data files are only generated after the previous
+ // `FileStreamSink.hasMetadata` check
+ if (FileStreamSink.hasMetadata(
+ Seq(path), hadoopConf, sparkSession.sessionState.conf)) {
+ setSourceHasMetadata(Some(true))
+ allFiles = allFilesUsingMetadataLogFileIndex()
+ } else {
+ setSourceHasMetadata(Some(false))
+ // `allFiles` have already been fetched using InMemoryFileIndex in this round
+ }
+ }
+ }
+ case Some(true) => allFiles = allFilesUsingMetadataLogFileIndex()
+ case Some(false) => allFiles = allFilesUsingInMemoryFileIndex()
+ }
+
+ val files = allFiles.sortBy(_.getModificationTime)(fileSortOrder).map { status =>
+ NewFileEntry(SparkPath.fromFileStatus(status), status.getLen, status.getModificationTime)
+ }
+ val endTime = System.nanoTime
+ val listingTimeMs = NANOSECONDS.toMillis(endTime - startTime)
+ if (listingTimeMs > 2000) {
+ // Output a warning when listing files uses more than 2 seconds.
+ logWarning(log"Listed ${MDC(LogKeys.NUM_FILES, files.size)} file(s) in " +
+ log"${MDC(LogKeys.ELAPSED_TIME, listingTimeMs)} ms")
+ } else {
+ logTrace(s"Listed ${files.size} file(s) in $listingTimeMs ms")
+ }
+ logTrace(s"Files are:\n\t" + files.mkString("\n\t"))
+ files
+ }
+
+ override def toString: String = s"FileMicroBatchStream[$qualifiedBasePath]"
+}
+
+private[v2] object FileMicroBatchStream {
+ /** Newly fetched files metadata holder. */
+ private[v2] case class NewFileEntry(path: SparkPath, size: Long, timestamp: Long)
+
+ private case class FilesSplit(files: Seq[NewFileEntry], size: BigInt)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index 5348f9ab6df62..c8e2bb4bb92a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -18,18 +18,22 @@ package org.apache.spark.sql.execution.datasources.v2
import java.util.{Locale, OptionalLong}
+import scala.jdk.CollectionConverters._
+
import org.apache.hadoop.fs.Path
import org.apache.spark.internal.Logging
import org.apache.spark.internal.LogKeys.{PATH, REASON}
import org.apache.spark.internal.config.IO_WARNING_LARGEFILETHRESHOLD
import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.SQLConfHelper
-import org.apache.spark.sql.catalyst.expressions.{AttributeSet, Expression, ExpressionSet}
+import org.apache.spark.sql.catalyst.{FileSourceOptions, SQLConfHelper}
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.expressions.{AttributeSet, Expression, ExpressionSet, FileSourceGeneratedMetadataStructField}
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
import org.apache.spark.sql.connector.read._
+import org.apache.spark.sql.connector.read.streaming.MicroBatchStream
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.execution.PartitionedFileUtil
import org.apache.spark.sql.execution.datasources._
@@ -37,7 +41,9 @@ import org.apache.spark.sql.internal.{SessionStateHelper, SQLConf}
import org.apache.spark.sql.internal.connector.SupportsMetadata
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.BitSet
trait FileScan extends Scan
with Batch
@@ -68,6 +74,40 @@ trait FileScan extends Scan
*/
def readPartitionSchema: StructType
+ def options: CaseInsensitiveStringMap
+
+ /**
+ * The pruned `_metadata` struct requested by the query, or empty if the query does not
+ * reference any metadata columns. Concrete scans typically receive this from their
+ * [[FileScanBuilder]] as a constructor argument and pass it to
+ * [[wrapWithMetadataIfNeeded]].
+ */
+ def requestedMetadataFields: StructType = StructType(Seq.empty)
+
+ /**
+ * Wraps the given [[FilePartitionReaderFactory]] with a metadata-appending decorator
+ * when the query references `_metadata.*`; otherwise returns `delegate` unchanged.
+ * `options` is forwarded so the wrapper can honor `ignoreCorruptFiles` /
+ * `ignoreMissingFiles`. `readDataSchema` is forwarded so the wrapper can locate
+ * (and project out) any internal columns appended for generated metadata sub-fields.
+ */
+ protected def wrapWithMetadataIfNeeded(
+ delegate: FilePartitionReaderFactory,
+ readDataSchema: StructType,
+ options: CaseInsensitiveStringMap): FilePartitionReaderFactory = {
+ if (requestedMetadataFields.isEmpty) {
+ delegate
+ } else {
+ new MetadataAppendingFilePartitionReaderFactory(
+ delegate,
+ new FileSourceOptions(options.asCaseSensitiveMap.asScala.toMap),
+ requestedMetadataFields,
+ readDataSchema,
+ readPartitionSchema,
+ FileFormat.BASE_METADATA_EXTRACTORS)
+ }
+ }
+
/**
* Returns the filters that can be use for partition pruning
*/
@@ -78,6 +118,41 @@ trait FileScan extends Scan
*/
def dataFilters: Seq[Expression]
+ /** Optional bucket specification from the catalog table. */
+ def bucketSpec: Option[BucketSpec] = None
+
+ /** When true, disables bucketed scan. Set by DisableUnnecessaryBucketedScan. */
+ def disableBucketedScan: Boolean = false
+
+ /** Optional set of bucket IDs to scan (bucket pruning). None = scan all. */
+ def optionalBucketSet: Option[BitSet] = None
+
+ /** Optional coalesced bucket count. Set by CoalesceBucketsInJoin. */
+ def optionalNumCoalescedBuckets: Option[Int] = None
+
+ /**
+ * Whether this scan actually uses bucketed read.
+ * Mirrors V1 FileSourceScanExec.bucketedScan.
+ */
+ lazy val bucketedScan: Boolean = {
+ conf.bucketingEnabled && bucketSpec.isDefined && !disableBucketedScan && {
+ val spec = bucketSpec.get
+ val resolver = sparkSession.sessionState.conf.resolver
+ val bucketColumns = spec.bucketColumnNames.flatMap(n =>
+ readSchema().fields.find(f => resolver(f.name, n)))
+ bucketColumns.size == spec.bucketColumnNames.size
+ }
+ }
+
+ /** Returns a copy of this scan with a different file index. Default is a no-op. */
+ def withFileIndex(newFileIndex: PartitioningAwareFileIndex): FileScan = this
+
+ /** Returns a copy of this scan with bucketed scan disabled. Default is a no-op. */
+ def withDisableBucketedScan(disable: Boolean): FileScan = this
+
+ /** Returns a copy of this scan with the given coalesced bucket count. Default is a no-op. */
+ def withNumCoalescedBuckets(numCoalescedBuckets: Option[Int]): FileScan = this
+
/**
* If a file with `path` is unsplittable, return the unsplittable reason,
* otherwise return `None`.
@@ -105,7 +180,11 @@ trait FileScan extends Scan
case f: FileScan =>
fileIndex == f.fileIndex && readSchema == f.readSchema &&
normalizedPartitionFilters == f.normalizedPartitionFilters &&
- normalizedDataFilters == f.normalizedDataFilters
+ normalizedDataFilters == f.normalizedDataFilters &&
+ bucketSpec == f.bucketSpec &&
+ disableBucketedScan == f.disableBucketedScan &&
+ optionalBucketSet == f.optionalBucketSet &&
+ optionalNumCoalescedBuckets == f.optionalNumCoalescedBuckets
case _ => false
}
@@ -130,17 +209,44 @@ trait FileScan extends Scan
val locationDesc =
fileIndex.getClass.getSimpleName +
Utils.buildLocationMetadata(fileIndex.rootPaths, maxMetadataValueLength)
- Map(
+ // Hide internal columns (e.g. Parquet's `_tmp_metadata_row_index`) from EXPLAIN's
+ // ReadSchema entry. They live in `readDataSchema` so the format reader populates
+ // them, but they are wrapper-internal and should not surface in user-facing plans.
+ val internalNames = requestedMetadataFields.fields.collect {
+ case FileSourceGeneratedMetadataStructField(_, internalName) => internalName
+ }.toSet
+ val visibleReadSchema =
+ if (internalNames.isEmpty) readDataSchema
+ else StructType(readDataSchema.fields.filterNot(f => internalNames.contains(f.name)))
+ val base = Map(
"Format" -> s"${this.getClass.getSimpleName.replace("Scan", "").toLowerCase(Locale.ROOT)}",
- "ReadSchema" -> readDataSchema.catalogString,
+ "ReadSchema" -> visibleReadSchema.catalogString,
"PartitionFilters" -> seqToString(partitionFilters),
"DataFilters" -> seqToString(dataFilters),
"Location" -> locationDesc)
+ val withBucket = if (bucketedScan) {
+ base ++ Map(
+ "BucketSpec" -> bucketSpec.get.toString,
+ "BucketedScan" -> "true")
+ } else {
+ base
+ }
+ if (requestedMetadataFields.isEmpty) {
+ withBucket
+ } else {
+ // Surface the pruned `_metadata` sub-fields in EXPLAIN so users can confirm the
+ // scan is honoring their `_metadata.*` references.
+ val metaDesc =
+ s"[${FileFormat.METADATA_NAME}: ${requestedMetadataFields.catalogString}]"
+ withBucket + ("MetadataColumns" -> metaDesc)
+ }
}
protected def partitions: Seq[FilePartition] = {
val selectedPartitions = fileIndex.listFiles(partitionFilters, dataFilters)
- val maxSplitBytes = FilePartition.maxSplitBytes(sparkSession, selectedPartitions)
+ // For bucketed scans, use Long.MaxValue to avoid splitting bucket files across partitions.
+ val maxSplitBytes = if (bucketedScan) Long.MaxValue
+ else FilePartition.maxSplitBytes(sparkSession, selectedPartitions)
val partitionAttributes = toAttributes(fileIndex.partitionSchema)
val attributeMap = partitionAttributes.map(a => normalizeName(a.name) -> a).toMap
val readPartitionAttributes = readPartitionSchema.map { readField =>
@@ -170,16 +276,53 @@ trait FileScan extends Scan
}.toArray.sortBy(_.length)(implicitly[Ordering[Long]].reverse)
}
- if (splitFiles.length == 1) {
- val path = splitFiles(0).toPath
- if (!isSplitable(path) && splitFiles(0).length >
- SessionStateHelper.getSparkConf(sparkSession).get(IO_WARNING_LARGEFILETHRESHOLD)) {
- logWarning(log"Loading one large unsplittable file ${MDC(PATH, path.toString)} with only " +
- log"one partition, the reason is: ${MDC(REASON, getFileUnSplittableReason(path))}")
+ if (bucketedScan) {
+ createBucketedPartitions(splitFiles)
+ } else {
+ if (splitFiles.length == 1) {
+ val path = splitFiles(0).toPath
+ if (!isSplitable(path) && splitFiles(0).length >
+ SessionStateHelper.getSparkConf(sparkSession).get(IO_WARNING_LARGEFILETHRESHOLD)) {
+ logWarning(
+ log"Loading one large unsplittable file ${MDC(PATH, path.toString)} with only " +
+ log"one partition, the reason is: ${MDC(REASON, getFileUnSplittableReason(path))}")
+ }
}
+
+ FilePartition.getFilePartitions(sparkSession, splitFiles, maxSplitBytes)
}
+ }
- FilePartition.getFilePartitions(sparkSession, splitFiles, maxSplitBytes)
+ /**
+ * Groups split files by bucket ID and applies bucket pruning and coalescing.
+ * Mirrors V1 FileSourceScanExec.createBucketedReadRDD.
+ */
+ private def createBucketedPartitions(
+ splitFiles: Seq[PartitionedFile]): Seq[FilePartition] = {
+ val spec = bucketSpec.get
+ val filesGroupedToBuckets = splitFiles.groupBy { f =>
+ BucketingUtils.getBucketId(new Path(f.toPath.toString).getName)
+ .getOrElse(throw new IllegalStateException(s"Invalid bucket file: ${f.toPath}"))
+ }
+ val prunedFilesGroupedToBuckets = optionalBucketSet match {
+ case Some(bucketSet) =>
+ filesGroupedToBuckets.filter { case (id, _) => bucketSet.get(id) }
+ case None => filesGroupedToBuckets
+ }
+ optionalNumCoalescedBuckets.map { numCoalescedBuckets =>
+ val coalescedBuckets = prunedFilesGroupedToBuckets.groupBy(_._1 % numCoalescedBuckets)
+ Seq.tabulate(numCoalescedBuckets) { bucketId =>
+ val files = coalescedBuckets.get(bucketId)
+ .map(_.values.flatten.toArray)
+ .getOrElse(Array.empty)
+ FilePartition(bucketId, files)
+ }
+ }.getOrElse {
+ Seq.tabulate(spec.numBuckets) { bucketId =>
+ FilePartition(bucketId,
+ prunedFilesGroupedToBuckets.getOrElse(bucketId, Seq.empty).toArray)
+ }
+ }
}
override def planInputPartitions(): Array[InputPartition] = {
@@ -197,14 +340,57 @@ trait FileScan extends Scan
OptionalLong.of(size)
}
- override def numRows(): OptionalLong = OptionalLong.empty()
+ override def numRows(): OptionalLong = {
+ // Try to read stored row count from table
+ // properties (set by ANALYZE TABLE).
+ storedNumRows.map(OptionalLong.of)
+ .getOrElse(OptionalLong.empty())
+ }
}
}
+ /**
+ * Stored row count from ANALYZE TABLE, if available.
+ * Injected via FileTable.mergedOptions.
+ */
+ protected def storedNumRows: Option[Long] =
+ Option(options.get(FileTable.NUM_ROWS_KEY)).map(_.toLong)
+
override def toBatch: Batch = this
- override def readSchema(): StructType =
- StructType(readDataSchema.fields ++ readPartitionSchema.fields)
+ override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
+ new FileMicroBatchStream(
+ sparkSession,
+ fileIndex,
+ this,
+ fileIndex.rootPaths.head.toString,
+ this.getClass.getSimpleName.replace("Scan", "").toLowerCase(Locale.ROOT),
+ readSchema(),
+ readPartitionSchema.fieldNames.toSeq,
+ checkpointLocation,
+ options.asCaseSensitiveMap.asScala.toMap)
+ }
+
+ override def readSchema(): StructType = {
+ // [SPARK-56371] Hide internal columns added for generated metadata sub-fields
+ // (e.g., Parquet's `_tmp_metadata_row_index`). They live inside `readDataSchema` so
+ // the format reader populates them, but they must not appear in the user-visible
+ // scan output: V2's `PushDownUtils.toOutputAttrs` looks them up by name in the
+ // relation output and would fail (the internal name is not a real column).
+ val internalNames = requestedMetadataFields.fields.collect {
+ case FileSourceGeneratedMetadataStructField(_, internalName) => internalName
+ }.toSet
+ val visibleData = readDataSchema.fields.filterNot(f => internalNames.contains(f.name))
+ val base = StructType(visibleData ++ readPartitionSchema.fields)
+ if (requestedMetadataFields.isEmpty) {
+ base
+ } else {
+ // Re-expose the pruned `_metadata` struct so V2 column pushdown can bind the
+ // downstream attribute reference back to the scan output. The wrapped reader
+ // factory is responsible for actually materializing the metadata values.
+ base.add(FileFormat.METADATA_NAME, requestedMetadataFields, nullable = false)
+ }
+ }
// Returns whether the two given arrays of [[Filter]]s are equivalent.
protected def equivalentFilters(a: Array[Filter], b: Array[Filter]): Boolean = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
index 7e0bc25a9a1e1..9345d82f20f6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
@@ -19,18 +19,21 @@ package org.apache.spark.sql.execution.datasources.v2
import scala.collection.mutable
import org.apache.spark.sql.{sources, SparkSession}
-import org.apache.spark.sql.catalyst.expressions.{Expression, PythonUDF, SubqueryExpression}
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.expressions.{Expression, FileSourceGeneratedMetadataStructField, PythonUDF, SubqueryExpression}
import org.apache.spark.sql.connector.expressions.filter.Predicate
import org.apache.spark.sql.connector.read.{ScanBuilder, SupportsPushDownRequiredColumns}
-import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, DataSourceUtils, PartitioningAwareFileIndex, PartitioningUtils}
+import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, DataSourceUtils, FileFormat, FileSourceStrategy, PartitioningAwareFileIndex, PartitioningUtils}
import org.apache.spark.sql.internal.connector.SupportsPushDownCatalystFilters
import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.util.collection.BitSet
abstract class FileScanBuilder(
sparkSession: SparkSession,
fileIndex: PartitioningAwareFileIndex,
- dataSchema: StructType)
+ dataSchema: StructType,
+ val bucketSpec: Option[BucketSpec] = None)
extends ScanBuilder
with SupportsPushDownRequiredColumns
with SupportsPushDownCatalystFilters {
@@ -41,15 +44,49 @@ abstract class FileScanBuilder(
protected var partitionFilters = Seq.empty[Expression]
protected var dataFilters = Seq.empty[Expression]
protected var pushedDataFilters = Array.empty[Filter]
+ // Populated by `pruneColumns` when the query references `_metadata.*`. Concrete
+ // builders pass this to their `Scan` so the reader factory can append metadata values.
+ protected var requestedMetadataFields: StructType = StructType(Seq.empty)
override def pruneColumns(requiredSchema: StructType): Unit = {
// [SPARK-30107] While `requiredSchema` might have pruned nested columns,
// the actual data schema of this scan is determined in `readDataSchema`.
// File formats that don't support nested schema pruning,
// use `requiredSchema` as a reference and prune only top-level columns.
- this.requiredSchema = requiredSchema
+ //
+ // [SPARK-56335] Extract the `_metadata` struct (if present) so the format-specific
+ // scan can wrap its reader factory with metadata appending. The `_metadata` field is
+ // removed from `this.requiredSchema` so it does not leak into `readDataSchema`.
+ //
+ // [SPARK-56371] When the metadata struct contains generated sub-fields (e.g.
+ // Parquet's `row_index` backed by `_tmp_metadata_row_index`), append the
+ // corresponding internal columns to `this.requiredSchema` so the format reader
+ // populates them. The metadata wrapper later projects them out of the visible
+ // output and weaves their values into the `_metadata` struct. Only formats that
+ // enable nested schema pruning currently surface generated metadata fields,
+ // because `readDataSchema()` reads from `requiredSchema` only on the nested path;
+ // the non-nested path filters from `dataSchema` and would silently drop the
+ // appended internal columns. Today only Parquet declares generated metadata
+ // fields, so this limitation is not reachable in practice.
+ val (metaFields, dataFields) = requiredSchema.fields.partition(isMetadataField)
+ val metaStruct = metaFields.headOption
+ .map(_.dataType.asInstanceOf[StructType])
+ .getOrElse(StructType(Seq.empty))
+ this.requestedMetadataFields = metaStruct
+ // Internal columns must be nullable so the Parquet reader treats them as
+ // synthetic columns (added to `missingColumns`) instead of failing the
+ // required-column check in `VectorizedParquetRecordReader.checkColumn`. The
+ // wrapper restores the user-facing nullability inside the `_metadata` struct.
+ val internalCols = metaStruct.fields.collect {
+ case FileSourceGeneratedMetadataStructField(field, internalName) =>
+ StructField(internalName, field.dataType, nullable = true)
+ }
+ this.requiredSchema = StructType(dataFields ++ internalCols)
}
+ private def isMetadataField(field: StructField): Boolean =
+ field.name == FileFormat.METADATA_NAME
+
protected def readDataSchema(): StructType = {
val requiredNameSet = createRequiredNameSet()
val schema = if (supportsNestedSchemaPruning) requiredSchema else dataSchema
@@ -103,4 +140,16 @@ abstract class FileScanBuilder(
val partitionNameSet: Set[String] =
partitionSchema.fields.map(PartitioningUtils.getColName(_, isCaseSensitive)).toSet
+
+ /**
+ * Computes the optional bucket set for bucket pruning based on pushed data filters.
+ * Returns None if bucket pruning is not applicable or no buckets can be pruned.
+ */
+ protected def computeBucketSet(): Option[BitSet] = {
+ bucketSpec match {
+ case Some(spec) if FileSourceStrategy.shouldPruneBuckets(Some(spec)) =>
+ FileSourceStrategy.genBucketSet(dataFilters, spec)
+ case _ => None
+ }
+ }
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileStreamingWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileStreamingWrite.scala
new file mode 100644
index 0000000000000..81a937f858775
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileStreamingWrite.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.hadoop.mapreduce.Job
+
+import org.apache.spark.internal.{Logging, LogKeys}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory, PhysicalWriteInfo, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
+import org.apache.spark.sql.execution.datasources.{WriteJobDescription, WriteTaskResult}
+import org.apache.spark.sql.execution.streaming.ManifestFileCommitProtocol
+import org.apache.spark.sql.execution.streaming.sinks.FileStreamSinkLog
+import org.apache.spark.util.ArrayImplicits._
+
+/**
+ * A [[StreamingDataWriterFactory]] that delegates to a batch [[DataWriterFactory]].
+ * The epochId parameter is ignored because file naming already uses UUIDs for uniqueness.
+ */
+private[v2] class FileStreamingWriterFactory(
+ delegate: DataWriterFactory) extends StreamingDataWriterFactory {
+ override def createWriter(
+ partitionId: Int, taskId: Long, epochId: Long): DataWriter[InternalRow] = {
+ delegate.createWriter(partitionId, taskId)
+ }
+}
+
+/**
+ * A [[StreamingWrite]] implementation for V2 file-based tables.
+ *
+ * This is the streaming equivalent of [[FileBatchWrite]]. It uses
+ * [[ManifestFileCommitProtocol]] to track committed files in a
+ * [[FileStreamSinkLog]], providing exactly-once semantics via
+ * idempotent batch commits.
+ */
+class FileStreamingWrite(
+ job: Job,
+ description: WriteJobDescription,
+ committer: ManifestFileCommitProtocol,
+ fileLog: FileStreamSinkLog) extends StreamingWrite with Logging {
+
+ override def createStreamingWriterFactory(
+ info: PhysicalWriteInfo): StreamingDataWriterFactory = {
+ committer.setupJob(job)
+ new FileStreamingWriterFactory(FileWriterFactory(description, committer))
+ }
+
+ override def useCommitCoordinator(): Boolean = true
+
+ override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
+ // Idempotency: skip if batch already committed
+ if (epochId <= fileLog.getLatestBatchId().getOrElse(-1L)) {
+ logInfo(log"Skipping already committed batch ${MDC(LogKeys.BATCH_ID, epochId)}")
+ return
+ }
+ // Set the real batchId before commitJob
+ committer.setupManifestOptions(fileLog, epochId)
+ // Messages are WriteTaskResult (not raw TaskCommitMessage).
+ // Must extract .commitMsg -- same pattern as FileBatchWrite.commit().
+ val results = messages.map(_.asInstanceOf[WriteTaskResult])
+ committer.commitJob(job, results.map(_.commitMsg).toImmutableArraySeq)
+ }
+
+ override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
+ committer.abortJob(job)
+ }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 0af728c1958d4..8154ee37af9a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -23,15 +23,22 @@ import scala.jdk.CollectionConverters._
import org.apache.hadoop.fs.{FileStatus, Path}
import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, MetadataColumn, SupportsMetadataColumns, SupportsPartitionManagement, SupportsRead, SupportsWrite, Table, TableCapability}
import org.apache.spark.sql.connector.catalog.TableCapability._
import org.apache.spark.sql.connector.expressions.Transform
-import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl}
+import org.apache.spark.sql.connector.expressions.filter.{AlwaysTrue, Predicate}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo,
+ LogicalWriteInfoImpl, SupportsDynamicOverwrite,
+ SupportsOverwriteV2, Write, WriteBuilder}
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.execution.datasources._
import org.apache.spark.sql.execution.streaming.runtime.MetadataLogFileIndex
import org.apache.spark.sql.execution.streaming.sinks.FileStreamSink
-import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.sql.util.SchemaUtils
import org.apache.spark.util.ArrayImplicits._
@@ -41,26 +48,61 @@ abstract class FileTable(
options: CaseInsensitiveStringMap,
paths: Seq[String],
userSpecifiedSchema: Option[StructType])
- extends Table with SupportsRead with SupportsWrite {
+ extends Table with SupportsRead with SupportsWrite
+ with SupportsPartitionManagement with SupportsMetadataColumns {
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+ // Partition column names from partitionBy(). Fallback when
+ // fileIndex.partitionSchema is empty (new/empty directory).
+ private[v2] var userSpecifiedPartitioning: Seq[String] =
+ Seq.empty
+
+ // CatalogTable reference set by V2SessionCatalog.loadTable.
+ private[sql] var catalogTable: Option[
+ org.apache.spark.sql.catalyst.catalog.CatalogTable
+ ] = None
+
+ // When true, use CatalogFileIndex to support custom
+ // partition locations. Set by V2SessionCatalog.loadTable.
+ private[v2] var useCatalogFileIndex: Boolean = false
+
+ /** BucketSpec from the catalog table, if available. */
+ def bucketSpec: Option[BucketSpec] = catalogTable.flatMap(_.bucketSpec)
+
lazy val fileIndex: PartitioningAwareFileIndex = {
val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
// Hadoop Configurations are case sensitive.
val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
- if (FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)) {
- // We are reading from the results of a streaming query. We will load files from
- // the metadata log instead of listing them using HDFS APIs.
+ // When userSpecifiedSchema is provided (e.g., write path via DataFrame API), the path
+ // may not exist yet. Skip streaming metadata check and file existence checks.
+ val isStreamingMetadata = userSpecifiedSchema.isEmpty &&
+ FileStreamSink.hasMetadata(paths, hadoopConf, sparkSession.sessionState.conf)
+ if (isStreamingMetadata) {
new MetadataLogFileIndex(sparkSession, new Path(paths.head),
options.asScala.toMap, userSpecifiedSchema)
+ } else if (useCatalogFileIndex &&
+ catalogTable.exists(_.partitionColumnNames.nonEmpty)) {
+ val ct = catalogTable.get
+ val stats = sparkSession.sessionState.catalog
+ .getTableMetadata(ct.identifier).stats
+ .map(_.sizeInBytes.toLong).getOrElse(0L)
+ new CatalogFileIndex(sparkSession, ct, stats)
+ .filterPartitions(Nil)
} else {
- // This is a non-streaming file based datasource.
- val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary(paths, hadoopConf,
- checkEmptyGlobPath = true, checkFilesExist = true, enableGlobbing = globPaths)
- val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
+ val checkFilesExist = userSpecifiedSchema.isEmpty
+ val rootPathsSpecified =
+ DataSource.checkAndGlobPathIfNecessary(
+ paths, hadoopConf,
+ checkEmptyGlobPath = checkFilesExist,
+ checkFilesExist = checkFilesExist,
+ enableGlobbing = globPaths)
+ val fileStatusCache =
+ FileStatusCache.getOrCreate(sparkSession)
new InMemoryFileIndex(
- sparkSession, rootPathsSpecified, caseSensitiveMap, userSpecifiedSchema, fileStatusCache)
+ sparkSession, rootPathsSpecified,
+ caseSensitiveMap, userSpecifiedSchema,
+ fileStatusCache)
}
}
@@ -82,16 +124,28 @@ abstract class FileTable(
override lazy val schema: StructType = {
val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
- SchemaUtils.checkSchemaColumnNameDuplication(dataSchema, caseSensitive)
- dataSchema.foreach { field =>
- if (!supportsDataType(field.dataType)) {
- throw QueryCompilationErrors.dataTypeUnsupportedByDataSourceError(formatName, field)
- }
+ // Check column name duplication for non-catalog tables.
+ // Skip for catalog tables (analyzer handles ambiguity)
+ // and formats that allow duplicates (e.g., CSV).
+ if (catalogTable.isEmpty && !allowDuplicatedColumnNames) {
+ SchemaUtils.checkSchemaColumnNameDuplication(
+ dataSchema, caseSensitive)
}
val partitionSchema = fileIndex.partitionSchema
- SchemaUtils.checkSchemaColumnNameDuplication(partitionSchema, caseSensitive)
val partitionNameSet: Set[String] =
partitionSchema.fields.map(PartitioningUtils.getColName(_, caseSensitive)).toSet
+ // Validate data types for non-partition columns only. Partition columns
+ // are written as directory names, not as data values, so format-specific
+ // type restrictions don't apply.
+ val userPartNames = userSpecifiedPartitioning.toSet
+ dataSchema.foreach { field =>
+ val colName = PartitioningUtils.getColName(field, caseSensitive)
+ if (!partitionNameSet.contains(colName) &&
+ !userPartNames.contains(field.name) &&
+ !supportsDataType(field.dataType)) {
+ throw QueryCompilationErrors.dataTypeUnsupportedByDataSourceError(formatName, field)
+ }
+ }
// When data and partition schemas have overlapping columns,
// tableSchema = dataSchema - overlapSchema + partitionSchema
@@ -102,13 +156,87 @@ abstract class FileTable(
StructType(fields)
}
- override def partitioning: Array[Transform] =
- fileIndex.partitionSchema.names.toImmutableArraySeq.asTransforms
+ override def columns(): Array[Column] = {
+ val baseSchema = schema
+ val conf = sparkSession.sessionState.conf
+ if (conf.getConf(SQLConf.FILE_SOURCE_INSERT_ENFORCE_NOT_NULL)
+ && catalogTable.isDefined) {
+ val catFields = catalogTable.get.schema.fields
+ .map(f => f.name -> f).toMap
+ val restored = StructType(baseSchema.fields.map { f =>
+ catFields.get(f.name) match {
+ case Some(cf) =>
+ f.copy(nullable = cf.nullable,
+ dataType = restoreNullability(
+ f.dataType, cf.dataType))
+ case None => f
+ }
+ })
+ CatalogV2Util.structTypeToV2Columns(restored)
+ } else {
+ CatalogV2Util.structTypeToV2Columns(baseSchema)
+ }
+ }
+
+ private def restoreNullability(
+ dataType: DataType,
+ catalogType: DataType): DataType = {
+ import org.apache.spark.sql.types._
+ (dataType, catalogType) match {
+ case (ArrayType(et1, _), ArrayType(et2, cn)) =>
+ ArrayType(restoreNullability(et1, et2), cn)
+ case (MapType(kt1, vt1, _), MapType(kt2, vt2, vcn)) =>
+ MapType(restoreNullability(kt1, kt2),
+ restoreNullability(vt1, vt2), vcn)
+ case (StructType(f1), StructType(f2)) =>
+ val catMap = f2.map(f => f.name -> f).toMap
+ StructType(f1.map { f =>
+ catMap.get(f.name) match {
+ case Some(cf) =>
+ f.copy(nullable = cf.nullable,
+ dataType = restoreNullability(
+ f.dataType, cf.dataType))
+ case None => f
+ }
+ })
+ case _ => dataType
+ }
+ }
+
+ override def partitioning: Array[Transform] = {
+ val fromIndex =
+ fileIndex.partitionSchema.names.toImmutableArraySeq
+ if (fromIndex.nonEmpty) {
+ fromIndex.asTransforms
+ } else if (userSpecifiedPartitioning.nonEmpty) {
+ userSpecifiedPartitioning.asTransforms
+ } else {
+ catalogTable
+ .map(_.partitionColumnNames.toArray
+ .toImmutableArraySeq.asTransforms)
+ .getOrElse(fromIndex.asTransforms)
+ }
+ }
override def properties: util.Map[String, String] = options.asCaseSensitiveMap
override def capabilities: java.util.Set[TableCapability] = FileTable.CAPABILITIES
+ /**
+ * Exposes the `_metadata` struct column so V2 file scans match V1 parity for queries
+ * that reference `_metadata.*`. Formats override [[metadataSchemaFields]] to add
+ * format-specific sub-fields.
+ */
+ override def metadataColumns(): Array[MetadataColumn] = {
+ Array(FileMetadataColumn(FileFormat.METADATA_NAME, StructType(metadataSchemaFields)))
+ }
+
+ /**
+ * Sub-fields of the `_metadata` struct. Defaults to [[FileFormat.BASE_METADATA_FIELDS]];
+ * formats can override to expose more (e.g., Parquet's `row_index`, tracked in SPARK-56371).
+ */
+ protected def metadataSchemaFields: Seq[StructField] = FileFormat.BASE_METADATA_FIELDS
+
/**
* When possible, this method should return the schema of the given `files`. When the format
* does not support inference, or no valid files are given should return None. In these cases
@@ -122,6 +250,12 @@ abstract class FileTable(
*/
def supportsDataType(dataType: DataType): Boolean = true
+ /**
+ * Whether this format allows duplicated column names. CSV allows this
+ * because column access is by position. Override in subclasses as needed.
+ */
+ def allowDuplicatedColumnNames: Boolean = false
+
/**
* The string that represents the format that this data source provider uses. This is
* overridden by children to provide a nice alias for the data source. For example:
@@ -157,9 +291,16 @@ abstract class FileTable(
* @return
*/
protected def mergedOptions(options: CaseInsensitiveStringMap): CaseInsensitiveStringMap = {
- val finalOptions = this.options.asCaseSensitiveMap().asScala ++
+ val base = this.options.asCaseSensitiveMap().asScala ++
options.asCaseSensitiveMap().asScala
- new CaseInsensitiveStringMap(finalOptions.asJava)
+ // Inject stored numRows from catalog for FileScan.estimateStatistics()
+ val withStats = catalogTable.flatMap(_.stats)
+ .flatMap(_.rowCount) match {
+ case Some(rows) =>
+ base ++ Map(FileTable.NUM_ROWS_KEY -> rows.toString)
+ case None => base
+ }
+ new CaseInsensitiveStringMap(withStats.asJava)
}
/**
@@ -174,8 +315,296 @@ abstract class FileTable(
writeInfo.rowIdSchema(),
writeInfo.metadataSchema())
}
+
+ /**
+ * Creates a [[WriteBuilder]] that supports truncate and
+ * dynamic partition overwrite for file-based tables.
+ */
+ protected def createFileWriteBuilder(
+ info: LogicalWriteInfo)(
+ buildWrite: (LogicalWriteInfo, StructType,
+ Option[BucketSpec],
+ Map[Map[String, String], String],
+ Boolean, Boolean,
+ Option[Array[Predicate]]) => Write
+ ): WriteBuilder = {
+ new WriteBuilder with SupportsDynamicOverwrite
+ with SupportsOverwriteV2 {
+ private var isDynamicOverwrite = false
+ private var overwritePredicates
+ : Option[Array[Predicate]] = None
+
+ override def overwriteDynamicPartitions(): WriteBuilder = {
+ isDynamicOverwrite = true
+ this
+ }
+
+ override def overwrite(
+ predicates: Array[Predicate]): WriteBuilder = {
+ overwritePredicates = Some(predicates)
+ this
+ }
+
+ override def build(): Write = {
+ val merged = mergedWriteInfo(info)
+ val fromIndex = fileIndex.partitionSchema
+ val partSchema =
+ if (fromIndex.nonEmpty) {
+ fromIndex
+ } else if (userSpecifiedPartitioning.nonEmpty) {
+ // Look up partition columns from the write schema first,
+ // then fall back to the table's full schema (data + partition).
+ // Use case-insensitive lookup since partitionBy("p") may
+ // differ in case from the DataFrame column name ("P").
+ val writeSchema = merged.schema()
+ StructType(userSpecifiedPartitioning.map { c =>
+ writeSchema.find(_.name.equalsIgnoreCase(c))
+ .orElse(schema.find(_.name.equalsIgnoreCase(c)))
+ .map(_.copy(name = c))
+ .getOrElse(
+ throw new IllegalArgumentException(
+ s"Partition column '$c' not found"))
+ })
+ } else {
+ // Fall back to catalog table partition columns when
+ // fileIndex has no partitions (empty table).
+ catalogTable
+ .filter(_.partitionColumnNames.nonEmpty)
+ .map { ct =>
+ val writeSchema = merged.schema()
+ StructType(ct.partitionColumnNames.map { c =>
+ writeSchema.find(_.name.equalsIgnoreCase(c))
+ .orElse(schema.find(_.name.equalsIgnoreCase(c)))
+ .getOrElse(
+ throw new IllegalArgumentException(
+ s"Partition column '$c' not found"))
+ })
+ }
+ .getOrElse(fromIndex)
+ }
+ val bSpec = catalogTable.flatMap(_.bucketSpec)
+ val isTruncate = overwritePredicates.exists(
+ _.exists(_.isInstanceOf[AlwaysTrue]))
+ val customLocs = getCustomPartitionLocations(
+ partSchema)
+ buildWrite(merged, partSchema, bSpec,
+ customLocs, isDynamicOverwrite, isTruncate,
+ overwritePredicates)
+ }
+ }
+ }
+
+ private def getCustomPartitionLocations(
+ partSchema: StructType
+ ): Map[Map[String, String], String] = {
+ catalogTable match {
+ case Some(ct) if ct.partitionColumnNames.nonEmpty =>
+ val outputPath = new Path(paths.head)
+ val hadoopConf = sparkSession.sessionState
+ .newHadoopConfWithOptions(
+ options.asCaseSensitiveMap.asScala.toMap)
+ val fs = outputPath.getFileSystem(hadoopConf)
+ val qualifiedOutputPath = outputPath.makeQualified(
+ fs.getUri, fs.getWorkingDirectory)
+ val partitions = sparkSession.sessionState.catalog
+ .listPartitions(ct.identifier)
+ partitions.flatMap { p =>
+ val defaultLocation = qualifiedOutputPath.suffix(
+ "/" + PartitioningUtils.getPathFragment(
+ p.spec, partSchema)).toString
+ val catalogLocation = new Path(p.location)
+ .makeQualified(
+ fs.getUri, fs.getWorkingDirectory).toString
+ if (catalogLocation != defaultLocation) {
+ Some(p.spec -> catalogLocation)
+ } else {
+ None
+ }
+ }.toMap
+ case _ => Map.empty
+ }
+ }
+
+ // ---- SupportsPartitionManagement ----
+
+ override def partitionSchema(): StructType = {
+ val fromIndex = fileIndex.partitionSchema
+ if (fromIndex.nonEmpty) {
+ fromIndex
+ } else if (userSpecifiedPartitioning.nonEmpty) {
+ val full = schema
+ StructType(userSpecifiedPartitioning.flatMap(
+ col => full.find(_.name == col)))
+ } else {
+ fromIndex
+ }
+ }
+
+ override def createPartition(
+ ident: InternalRow,
+ properties: util.Map[String, String]): Unit = {
+ val partPath = partitionPath(ident)
+ val hadoopConf = sparkSession.sessionState
+ .newHadoopConfWithOptions(
+ options.asCaseSensitiveMap.asScala.toMap)
+ val fs = partPath.getFileSystem(hadoopConf)
+ if (fs.exists(partPath)) {
+ throw new org.apache.spark.sql.catalyst
+ .analysis.PartitionsAlreadyExistException(
+ name(), ident, partitionSchema())
+ }
+ fs.mkdirs(partPath)
+ // Sync to catalog metastore if available.
+ catalogTable.foreach { ct =>
+ val spec = partitionSpec(ident)
+ val loc = Option(properties.get("location"))
+ .orElse(Some(partPath.toString))
+ val part = org.apache.spark.sql.catalyst.catalog
+ .CatalogTablePartition(spec,
+ org.apache.spark.sql.catalyst.catalog
+ .CatalogStorageFormat.empty
+ .copy(locationUri = loc.map(new java.net.URI(_))))
+ try {
+ sparkSession.sessionState.catalog
+ .createPartitions(ct.identifier,
+ Seq(part), ignoreIfExists = true)
+ } catch { case _: Exception => }
+ }
+ fileIndex.refresh()
+ }
+
+ override def dropPartition(
+ ident: InternalRow): Boolean = {
+ val partPath = partitionPath(ident)
+ val hadoopConf = sparkSession.sessionState
+ .newHadoopConfWithOptions(
+ options.asCaseSensitiveMap.asScala.toMap)
+ val fs = partPath.getFileSystem(hadoopConf)
+ if (fs.exists(partPath)) {
+ fs.delete(partPath, true)
+ // Sync to catalog metastore if available.
+ catalogTable.foreach { ct =>
+ val spec = partitionSpec(ident)
+ try {
+ sparkSession.sessionState.catalog
+ .dropPartitions(ct.identifier,
+ Seq(spec), ignoreIfNotExists = true,
+ purge = false, retainData = false)
+ } catch { case _: Exception => }
+ }
+ fileIndex.refresh()
+ true
+ } else {
+ false
+ }
+ }
+
+ override def replacePartitionMetadata(
+ ident: InternalRow,
+ properties: util.Map[String, String]): Unit = {
+ throw new UnsupportedOperationException(
+ "File-based tables do not support " +
+ "partition metadata")
+ }
+
+ override def loadPartitionMetadata(
+ ident: InternalRow
+ ): util.Map[String, String] = {
+ throw new UnsupportedOperationException(
+ "File-based tables do not support " +
+ "partition metadata")
+ }
+
+ override def listPartitionIdentifiers(
+ names: Array[String],
+ ident: InternalRow): Array[InternalRow] = {
+ val schema = partitionSchema()
+ if (schema.isEmpty) return Array.empty
+
+ val basePath = new Path(paths.head)
+ val hadoopConf = sparkSession.sessionState
+ .newHadoopConfWithOptions(
+ options.asCaseSensitiveMap.asScala.toMap)
+ val fs = basePath.getFileSystem(hadoopConf)
+
+ val allPartitions = if (schema.length == 1) {
+ val field = schema.head
+ if (!fs.exists(basePath)) {
+ Array.empty[InternalRow]
+ } else {
+ fs.listStatus(basePath)
+ .filter(_.isDirectory)
+ .map(_.getPath.getName)
+ .filter(_.contains("="))
+ .map { dirName =>
+ val value = dirName.split("=", 2)(1)
+ val converted = Cast(
+ Literal(value), field.dataType).eval()
+ InternalRow(converted)
+ }
+ }
+ } else {
+ fileIndex.refresh()
+ fileIndex match {
+ case idx: PartitioningAwareFileIndex =>
+ idx.partitionSpec().partitions
+ .map(_.values).toArray
+ case _ => Array.empty[InternalRow]
+ }
+ }
+
+ if (names.isEmpty) {
+ allPartitions
+ } else {
+ val indexes = names.map(schema.fieldIndex)
+ val dataTypes = names.map(schema(_).dataType)
+ allPartitions.filter { row =>
+ var matches = true
+ var i = 0
+ while (i < names.length && matches) {
+ val actual = row.get(indexes(i), dataTypes(i))
+ val expected = ident.get(i, dataTypes(i))
+ matches = actual == expected
+ i += 1
+ }
+ matches
+ }
+ }
+ }
+
+ private def partitionPath(ident: InternalRow): Path = {
+ val schema = partitionSchema()
+ val basePath = new Path(paths.head)
+ val parts = (0 until schema.length).map { i =>
+ val name = schema(i).name
+ val value = ident.get(i, schema(i).dataType)
+ val valueStr = if (value == null) {
+ "__HIVE_DEFAULT_PARTITION__"
+ } else {
+ value.toString
+ }
+ s"$name=$valueStr"
+ }
+ new Path(basePath, parts.mkString("/"))
+ }
+
+ private def partitionSpec(
+ ident: InternalRow): Map[String, String] = {
+ val schema = partitionSchema()
+ (0 until schema.length).map { i =>
+ val name = schema(i).name
+ val value = ident.get(i, schema(i).dataType)
+ name -> (if (value == null) null else value.toString)
+ }.toMap
+ }
}
object FileTable {
- private val CAPABILITIES = util.EnumSet.of(BATCH_READ, BATCH_WRITE)
+ private val CAPABILITIES = util.EnumSet.of(
+ BATCH_READ, BATCH_WRITE, TRUNCATE,
+ OVERWRITE_BY_FILTER, OVERWRITE_DYNAMIC,
+ MICRO_BATCH_READ, STREAMING_WRITE)
+
+ /** Option key for injecting stored row count from ANALYZE TABLE into FileScan. */
+ val NUM_ROWS_KEY: String = "__numRows"
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala
index 77e1ade44780f..7fde13b6c13f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala
@@ -28,24 +28,38 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.spark.internal.io.FileCommitProtocol
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
-import org.apache.spark.sql.connector.write.{BatchWrite, LogicalWriteInfo, Write}
+import org.apache.spark.sql.connector.distributions.{Distribution, Distributions}
+import org.apache.spark.sql.connector.expressions.{Expressions, SortDirection}
+import org.apache.spark.sql.connector.expressions.{SortOrder => V2SortOrder}
+import org.apache.spark.sql.connector.expressions.filter.Predicate
+import org.apache.spark.sql.connector.write.{BatchWrite, LogicalWriteInfo, RequiresDistributionAndOrdering, Write}
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, DataSource, OutputWriterFactory, WriteJobDescription}
+import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, DataSource, OutputWriterFactory, V1WritesUtils, WriteJobDescription}
import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.execution.streaming.ManifestFileCommitProtocol
+import org.apache.spark.sql.execution.streaming.sinks.{FileStreamSink, FileStreamSinkLog}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DataType, StructType}
import org.apache.spark.sql.util.SchemaUtils
-import org.apache.spark.util.ArrayImplicits._
import org.apache.spark.util.SerializableConfiguration
-trait FileWrite extends Write {
+trait FileWrite extends Write
+ with RequiresDistributionAndOrdering {
def paths: Seq[String]
def formatName: String
def supportsDataType: DataType => Boolean
def allowDuplicatedColumnNames: Boolean = false
def info: LogicalWriteInfo
+ def partitionSchema: StructType
+ def bucketSpec: Option[BucketSpec] = None
+ def overwritePredicates: Option[Array[Predicate]] = None
+ def customPartitionLocations: Map[Map[String, String], String] = Map.empty
+ def dynamicPartitionOverwrite: Boolean = false
+ def isTruncate: Boolean = false
private val schema = info.schema()
private val queryId = info.queryId()
@@ -53,6 +67,21 @@ trait FileWrite extends Write {
override def description(): String = formatName
+ override def requiredDistribution(): Distribution =
+ Distributions.unspecified()
+
+ override def requiredOrdering(): Array[V2SortOrder] = {
+ if (partitionSchema.isEmpty) {
+ Array.empty
+ } else {
+ partitionSchema.fieldNames.map { col =>
+ Expressions.sort(
+ Expressions.column(col),
+ SortDirection.ASCENDING)
+ }
+ }
+ }
+
override def toBatch: BatchWrite = {
val sparkSession = SparkSession.active
validateInputs(sparkSession.sessionState.conf)
@@ -60,18 +89,88 @@ trait FileWrite extends Write {
val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
// Hadoop Configurations are case sensitive.
val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
+
+ // Ensure the output path exists. For new writes (Append to a new path, Overwrite on a new
+ // path), the path may not exist yet.
+ val fs = path.getFileSystem(hadoopConf)
+ val qualifiedPath = path.makeQualified(fs.getUri, fs.getWorkingDirectory)
+ if (!fs.exists(qualifiedPath)) {
+ fs.mkdirs(qualifiedPath)
+ }
+
+ if (isTruncate && fs.exists(qualifiedPath)) {
+ // Full overwrite: delete all non-hidden data
+ fs.listStatus(qualifiedPath).foreach { status =>
+ if (!status.getPath.getName.startsWith("_") &&
+ !status.getPath.getName.startsWith(".")) {
+ fs.delete(status.getPath, true)
+ }
+ }
+ } else if (overwritePredicates.exists(_.nonEmpty) &&
+ fs.exists(qualifiedPath)) {
+ // Static partition overwrite: delete only matching partition dir.
+ // Extract partition spec from predicates and order by
+ // partitionSchema to match the directory structure.
+ val specMap = overwritePredicates.get
+ .flatMap(FileWrite.predicateToPartitionSpec)
+ .toMap
+ if (specMap.nonEmpty) {
+ val partPath = partitionSchema.fieldNames
+ .flatMap(col => specMap.get(col).map(v => s"$col=$v"))
+ .mkString("/")
+ val targetPath = new Path(qualifiedPath, partPath)
+ if (fs.exists(targetPath)) {
+ fs.delete(targetPath, true)
+ }
+ }
+ }
+
val job = getJobInstance(hadoopConf, path)
val committer = FileCommitProtocol.instantiate(
sparkSession.sessionState.conf.fileCommitProtocolClass,
jobId = java.util.UUID.randomUUID().toString,
- outputPath = paths.head)
- lazy val description =
+ outputPath = paths.head,
+ dynamicPartitionOverwrite = dynamicPartitionOverwrite)
+ val description =
createWriteJobDescription(sparkSession, hadoopConf, job, paths.head, options.asScala.toMap)
committer.setupJob(job)
new FileBatchWrite(job, description, committer)
}
+ override def toStreaming: StreamingWrite = {
+ val sparkSession = SparkSession.active
+ validateInputs(sparkSession.sessionState.conf)
+ val outPath = new Path(paths.head)
+ val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
+ val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
+
+ val fs = outPath.getFileSystem(hadoopConf)
+ val qualifiedPath = outPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
+ if (!fs.exists(qualifiedPath)) {
+ fs.mkdirs(qualifiedPath)
+ }
+
+ // Metadata log (same location/format as V1 FileStreamSink)
+ val logPath = FileStreamSink.getMetadataLogPath(fs, qualifiedPath,
+ sparkSession.sessionState.conf)
+ val retention = caseSensitiveMap.get("retention").map(
+ org.apache.spark.util.Utils.timeStringAsMs)
+ val fileLog = new FileStreamSinkLog(
+ FileStreamSinkLog.VERSION, sparkSession, logPath.toString, retention)
+
+ val job = getJobInstance(hadoopConf, outPath)
+ val committer = new ManifestFileCommitProtocol(
+ java.util.UUID.randomUUID().toString, paths.head)
+ // Placeholder batchId to satisfy setupJob()'s require(fileLog != null)
+ committer.setupManifestOptions(fileLog, 0L)
+
+ val description = createWriteJobDescription(
+ sparkSession, hadoopConf, job, paths.head, options.asScala.toMap)
+
+ new FileStreamingWrite(job, description, committer, fileLog)
+ }
+
/**
* Prepares a write job and returns an [[OutputWriterFactory]]. Client side job preparation can
* be put here. For example, user defined output committer can be configured here
@@ -93,14 +192,18 @@ trait FileWrite extends Write {
s"got: ${paths.mkString(", ")}")
}
if (!allowDuplicatedColumnNames) {
- SchemaUtils.checkColumnNameDuplication(
- schema.fields.map(_.name).toImmutableArraySeq, caseSensitiveAnalysis)
+ SchemaUtils.checkSchemaColumnNameDuplication(
+ schema, caseSensitiveAnalysis)
+ }
+ if (!sqlConf.allowCollationsInMapKeys) {
+ SchemaUtils.checkNoCollationsInMapKeys(schema)
}
DataSource.validateSchema(formatName, schema, sqlConf)
- // TODO: [SPARK-36340] Unify check schema filed of DataSource V2 Insert.
+ val partColNames = partitionSchema.fieldNames.toSet
schema.foreach { field =>
- if (!supportsDataType(field.dataType)) {
+ if (!partColNames.contains(field.name) &&
+ !supportsDataType(field.dataType)) {
throw QueryCompilationErrors.dataTypeUnsupportedByDataSourceError(formatName, field)
}
}
@@ -121,26 +224,46 @@ trait FileWrite extends Write {
pathName: String,
options: Map[String, String]): WriteJobDescription = {
val caseInsensitiveOptions = CaseInsensitiveMap(options)
+ val allColumns = toAttributes(schema)
+ val partitionColumnNames = partitionSchema.fields.map(_.name).toSet
+ val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
+ // Build partition columns using names from partitionSchema (e.g., "p" from
+ // partitionBy("p")), not from allColumns (e.g., "P" from the DataFrame).
+ // This ensures directory names match the partitionBy argument case.
+ val partitionColumns = if (partitionColumnNames.nonEmpty) {
+ allColumns.flatMap { col =>
+ val partName = if (caseSensitive) {
+ partitionColumnNames.find(_ == col.name)
+ } else {
+ partitionColumnNames.find(_.equalsIgnoreCase(col.name))
+ }
+ partName.map(n => col.withName(n))
+ }
+ } else {
+ Seq.empty
+ }
+ val dataColumns = allColumns.filterNot { col =>
+ if (caseSensitive) partitionColumnNames.contains(col.name)
+ else partitionColumnNames.exists(_.equalsIgnoreCase(col.name))
+ }
// Note: prepareWrite has side effect. It sets "job".
+ val dataSchema = StructType(dataColumns.map(col => schema(col.name)))
val outputWriterFactory =
- prepareWrite(sparkSession.sessionState.conf, job, caseInsensitiveOptions, schema)
- val allColumns = toAttributes(schema)
+ prepareWrite(sparkSession.sessionState.conf, job, caseInsensitiveOptions, dataSchema)
val metrics: Map[String, SQLMetric] = BasicWriteJobStatsTracker.metrics
val serializableHadoopConf = new SerializableConfiguration(hadoopConf)
val statsTracker = new BasicWriteJobStatsTracker(serializableHadoopConf, metrics)
- // TODO: after partitioning is supported in V2:
- // 1. filter out partition columns in `dataColumns`.
- // 2. Don't use Seq.empty for `partitionColumns`.
new WriteJobDescription(
uuid = UUID.randomUUID().toString,
serializableHadoopConf = new SerializableConfiguration(job.getConfiguration),
outputWriterFactory = outputWriterFactory,
allColumns = allColumns,
- dataColumns = allColumns,
- partitionColumns = Seq.empty,
- bucketSpec = None,
+ dataColumns = dataColumns,
+ partitionColumns = partitionColumns,
+ bucketSpec = V1WritesUtils.getWriterBucketSpec(
+ bucketSpec, dataColumns, caseInsensitiveOptions),
path = pathName,
- customPartitionLocations = Map.empty,
+ customPartitionLocations = customPartitionLocations,
maxRecordsPerFile = caseInsensitiveOptions.get("maxRecordsPerFile").map(_.toLong)
.getOrElse(sparkSession.sessionState.conf.maxRecordsPerFile),
timeZoneId = caseInsensitiveOptions.get(DateTimeUtils.TIMEZONE_OPTION)
@@ -150,3 +273,31 @@ trait FileWrite extends Write {
}
}
+private[v2] object FileWrite {
+ /**
+ * Extract a (column, value) pair from a V2 equality
+ * predicate (e.g., `p <=> 1` => `("p", "1")`).
+ */
+ def predicateToPartitionSpec(
+ predicate: Predicate): Option[(String, String)] = {
+ if (predicate.name() == "=" || predicate.name() == "<=>") {
+ val children = predicate.children()
+ if (children.length == 2) {
+ val name = children(0) match {
+ case ref: org.apache.spark.sql.connector
+ .expressions.NamedReference =>
+ Some(ref.fieldNames().head)
+ case _ => None
+ }
+ val value = children(1) match {
+ case lit: org.apache.spark.sql.connector
+ .expressions.Literal[_] =>
+ Some(lit.value().toString)
+ case _ => None
+ }
+ for (n <- name; v <- value) yield (n, v)
+ } else None
+ } else None
+ }
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
index f18424b4bcb86..e14ec000b1390 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
@@ -24,7 +24,8 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory}
-import org.apache.spark.sql.execution.datasources.{DynamicPartitionDataSingleWriter, SingleDirectoryDataWriter, WriteJobDescription}
+import org.apache.spark.sql.execution.datasources.{DynamicPartitionDataConcurrentWriter, DynamicPartitionDataSingleWriter, SingleDirectoryDataWriter, WriteJobDescription}
+import org.apache.spark.sql.execution.datasources.FileFormatWriter.ConcurrentOutputWriterSpec
case class FileWriterFactory (
description: WriteJobDescription,
@@ -40,8 +41,17 @@ case class FileWriterFactory (
override def createWriter(partitionId: Int, realTaskId: Long): DataWriter[InternalRow] = {
val taskAttemptContext = createTaskAttemptContext(partitionId, realTaskId.toInt & Int.MaxValue)
committer.setupTask(taskAttemptContext)
- if (description.partitionColumns.isEmpty) {
+ if (description.partitionColumns.isEmpty && description.bucketSpec.isEmpty) {
new SingleDirectoryDataWriter(description, taskAttemptContext, committer)
+ } else if (description.bucketSpec.isDefined) {
+ // Use concurrent writers for bucketed writes: V2's
+ // RequiresDistributionAndOrdering cannot express the hash-based
+ // ordering that DynamicPartitionDataSingleWriter requires.
+ val spec = ConcurrentOutputWriterSpec(Int.MaxValue, () =>
+ throw new UnsupportedOperationException(
+ "Sort fallback should not be triggered for V2 bucketed writes"))
+ new DynamicPartitionDataConcurrentWriter(
+ description, taskAttemptContext, committer, spec)
} else {
new DynamicPartitionDataSingleWriter(description, taskAttemptContext, committer)
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MetadataAppendingFilePartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MetadataAppendingFilePartitionReaderFactory.scala
new file mode 100644
index 0000000000000..1e3be83b3685b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MetadataAppendingFilePartitionReaderFactory.scala
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.{FileSourceOptions, InternalRow}
+import org.apache.spark.sql.catalyst.expressions.{BoundReference, CreateNamedStruct, Expression, FileSourceGeneratedMetadataStructField, Literal, UnsafeProjection}
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader}
+import org.apache.spark.sql.execution.datasources.{FileFormat, PartitionedFile}
+import org.apache.spark.sql.execution.vectorized.ConstantColumnVector
+import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
+
+/**
+ * Wraps a delegate [[FilePartitionReaderFactory]] and appends a single `_metadata` struct
+ * column to each row, mirroring V1 `_metadata` semantics for V2 file scans.
+ *
+ * Supports both row-based and columnar reads. Constant sub-fields (file_path, file_name, ...)
+ * are populated from the [[PartitionedFile]] via [[metadataExtractors]]. Generated sub-fields
+ * (e.g., Parquet `row_index`) are read from internal columns that the format reader populates;
+ * [[FileScanBuilder.pruneColumns]] adds those internal columns to the read schema, and this
+ * wrapper projects them back into the visible `_metadata` struct.
+ *
+ * @param delegate the format-specific factory to wrap
+ * @param fileSourceOptions options forwarded to the per-partition [[FilePartitionReader]]
+ * @param requestedMetadataFields the pruned `_metadata` struct as the user requested it
+ * (constant + generated sub-fields, in declared order)
+ * @param readDataSchema the data schema actually passed to the format reader; includes the
+ * user's data columns followed by any internal columns appended by
+ * [[FileScanBuilder.pruneColumns]] for generated metadata sub-fields
+ * @param readPartitionSchema the partition schema (used to compute the user-visible row layout)
+ * @param metadataExtractors functions producing constant metadata values from a
+ * [[PartitionedFile]]; typically [[FileFormat.BASE_METADATA_EXTRACTORS]]
+ */
+private[v2] class MetadataAppendingFilePartitionReaderFactory(
+ delegate: FilePartitionReaderFactory,
+ fileSourceOptions: FileSourceOptions,
+ requestedMetadataFields: StructType,
+ readDataSchema: StructType,
+ readPartitionSchema: StructType,
+ metadataExtractors: Map[String, PartitionedFile => Any])
+ extends FilePartitionReaderFactory {
+
+ override protected def options: FileSourceOptions = fileSourceOptions
+
+ // For each metadata sub-field, where its value comes from:
+ // - Left(fieldName) => constant; resolve via `metadataExtractors(fieldName)`
+ // - Right(internalName) => generated; read from `readDataSchema` at the position of
+ // `internalName` (the column the format reader populates)
+ private val metadataFieldSources: Array[Either[String, String]] =
+ requestedMetadataFields.fields.map {
+ case FileSourceGeneratedMetadataStructField(_, internalName) => Right(internalName)
+ case f => Left(f.name)
+ }
+
+ // Index of each generated metadata sub-field's source within `readDataSchema`. Used by
+ // both row and columnar paths to find the per-row value the format reader produced.
+ private val internalColumnIndexInReadDataSchema: Map[String, Int] = {
+ val byName = readDataSchema.fields.zipWithIndex.map { case (f, i) => f.name -> i }.toMap
+ metadataFieldSources.collect { case Right(internalName) =>
+ internalName -> byName.getOrElse(internalName,
+ throw new IllegalStateException(
+ s"internal metadata column `$internalName` is missing from readDataSchema; " +
+ "FileScanBuilder.pruneColumns should have added it"))
+ }.toMap
+ }
+
+ // Number of user-visible data columns (everything in readDataSchema that is NOT an internal
+ // column for a generated metadata field).
+ private val numVisibleDataCols: Int = {
+ val internalNames = internalColumnIndexInReadDataSchema.keySet
+ readDataSchema.fields.count(f => !internalNames.contains(f.name))
+ }
+
+ private val numPartitionCols: Int = readPartitionSchema.length
+
+ override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = {
+ val baseReader = delegate.buildReader(file)
+ val projection = buildRowProjection(file)
+ new ProjectingMetadataRowReader(baseReader, projection)
+ }
+
+ override def buildColumnarReader(file: PartitionedFile): PartitionReader[ColumnarBatch] = {
+ val baseReader = delegate.buildColumnarReader(file)
+ // Constants are stable for the whole split, so build the per-field column vectors once
+ // and reuse across batches. The vectors' `getXxx` methods ignore rowId, so the same
+ // instances work for any batch size returned by the format reader.
+ val constantChildren = requestedMetadataFields.fields.zip(metadataFieldSources).map {
+ case (field, Left(_)) => Some(constantColumnVectorFor(field, file))
+ case (_, Right(_)) => None
+ }
+ new MetadataAppendingColumnarReader(baseReader, this, constantChildren)
+ }
+
+ override def supportColumnarReads(partition: InputPartition): Boolean =
+ delegate.supportColumnarReads(partition)
+
+ // ---------------- Row path ----------------
+
+ /**
+ * Build a [[UnsafeProjection]] from the base reader's row layout
+ * (`readDataSchema ++ readPartitionSchema`) to the wrapper's output layout
+ * (` ++ ++ [_metadata: struct]`). Constant metadata values are baked
+ * in as [[Literal]]s for this split; generated values come from [[BoundReference]]s pointing
+ * at the internal columns the format reader populated.
+ */
+ private def buildRowProjection(file: PartitionedFile): UnsafeProjection = {
+ val internalNames = internalColumnIndexInReadDataSchema.keySet
+
+ // User data columns: every position in readDataSchema that isn't an internal column.
+ val visibleDataRefs = readDataSchema.fields.zipWithIndex.collect {
+ case (f, idx) if !internalNames.contains(f.name) =>
+ BoundReference(idx, f.dataType, f.nullable).asInstanceOf[Expression]
+ }
+
+ // Partition columns sit after readDataSchema in the base row.
+ val partitionRefs = readPartitionSchema.fields.zipWithIndex.map { case (f, i) =>
+ BoundReference(readDataSchema.length + i, f.dataType, f.nullable).asInstanceOf[Expression]
+ }
+
+ // Build the metadata struct via CreateNamedStruct of (name, value) pairs. Constant values
+ // are Literals carrying the field's declared dataType (so timestamps stay timestamps and
+ // don't degrade to longs); generated values are BoundReferences into the base row.
+ val metadataStructExpr = CreateNamedStruct(
+ requestedMetadataFields.fields.zip(metadataFieldSources).flatMap {
+ case (field, Left(_)) =>
+ val rawValue = FileFormat.getFileConstantMetadataColumnValue(
+ field.name, file, metadataExtractors).value
+ Seq(Literal(field.name), Literal(rawValue, field.dataType))
+ case (field, Right(internalName)) =>
+ val idx = internalColumnIndexInReadDataSchema(internalName)
+ Seq(
+ Literal(field.name),
+ BoundReference(idx, field.dataType, field.nullable))
+ }.toIndexedSeq)
+
+ val outputExprs: Seq[Expression] =
+ visibleDataRefs.toIndexedSeq ++ partitionRefs.toIndexedSeq :+ metadataStructExpr
+ UnsafeProjection.create(outputExprs)
+ }
+
+ // ---------------- Columnar path ----------------
+
+ /**
+ * Build the wrapper's output [[ColumnarBatch]] for one input batch. User-visible data and
+ * partition columns are passed through by reference (zero-copy). The `_metadata` column is
+ * a [[CompositeStructColumnVector]] whose children are pre-built [[ConstantColumnVector]]s
+ * (for constant fields) and direct references to the format reader's internal column
+ * vectors (for generated fields).
+ *
+ * Assumes the format reader keeps stable [[ColumnVector]] references across batches within
+ * a split (Parquet's vectorized reader satisfies this contract by reusing one
+ * [[ColumnarBatch]] instance per split).
+ */
+ private[v2] def buildOutputBatch(
+ base: ColumnarBatch,
+ constantChildren: Array[Option[ConstantColumnVector]]): ColumnarBatch = {
+ val internalNames = internalColumnIndexInReadDataSchema.keySet
+
+ // Visible data columns: positions in readDataSchema that aren't internal.
+ val visibleDataCols = readDataSchema.fields.zipWithIndex.collect {
+ case (f, idx) if !internalNames.contains(f.name) => base.column(idx)
+ }
+
+ // Partition columns sit after readDataSchema in the input batch.
+ val partitionCols = (0 until numPartitionCols).map { i =>
+ base.column(readDataSchema.length + i)
+ }
+
+ val metadataChildren = requestedMetadataFields.fields.indices.map { i =>
+ metadataFieldSources(i) match {
+ case Left(_) => constantChildren(i).get.asInstanceOf[ColumnVector]
+ case Right(internalName) =>
+ base.column(internalColumnIndexInReadDataSchema(internalName))
+ }
+ }.toArray
+ val metadataColumn: ColumnVector = new CompositeStructColumnVector(
+ requestedMetadataFields, metadataChildren)
+
+ val output = (visibleDataCols ++ partitionCols :+ metadataColumn).toArray
+ new ColumnarBatch(output, base.numRows())
+ }
+
+ /**
+ * Build a per-split [[ConstantColumnVector]] for one constant metadata field. The vector's
+ * value is read from the [[PartitionedFile]] via [[metadataExtractors]] and persists for the
+ * whole split; `getXxx` ignores `rowId`, so the same instance is valid for any batch size.
+ */
+ private def constantColumnVectorFor(
+ field: StructField,
+ file: PartitionedFile): ConstantColumnVector = {
+ val literal = FileFormat.getFileConstantMetadataColumnValue(
+ field.name, file, metadataExtractors)
+ // ConstantColumnVector ignores `rowId` in its getters, so a capacity-1 allocation is
+ // sufficient regardless of how many rows the consuming batch holds.
+ val vector = new ConstantColumnVector(1, field.dataType)
+ if (literal.value == null) {
+ vector.setNull()
+ } else {
+ val tmp = new org.apache.spark.sql.catalyst.expressions.GenericInternalRow(1)
+ tmp.update(0, literal.value)
+ org.apache.spark.sql.execution.vectorized.ColumnVectorUtils.populate(vector, tmp, 0)
+ }
+ vector
+ }
+}
+
+/**
+ * Row-based wrapper that applies the per-split [[UnsafeProjection]] built by
+ * [[MetadataAppendingFilePartitionReaderFactory.buildRowProjection]]. The projection produces
+ * the wrapper's output row layout from the format reader's row.
+ */
+private[v2] class ProjectingMetadataRowReader(
+ delegate: PartitionReader[InternalRow],
+ projection: UnsafeProjection) extends PartitionReader[InternalRow] {
+
+ override def next(): Boolean = delegate.next()
+
+ override def get(): InternalRow = projection(delegate.get())
+
+ override def close(): Unit = delegate.close()
+}
+
+/**
+ * Columnar wrapper that delegates to the format reader and rewrites each [[ColumnarBatch]] to
+ * the wrapper's output layout. The factory does the heavy lifting in
+ * [[MetadataAppendingFilePartitionReaderFactory.buildOutputBatch]]; this class only forwards
+ * and supplies the per-split constant column vectors.
+ */
+private[v2] class MetadataAppendingColumnarReader(
+ delegate: PartitionReader[ColumnarBatch],
+ factory: MetadataAppendingFilePartitionReaderFactory,
+ constantChildren: Array[Option[ConstantColumnVector]])
+ extends PartitionReader[ColumnarBatch] {
+
+ override def next(): Boolean = delegate.next()
+
+ override def get(): ColumnarBatch = factory.buildOutputBatch(delegate.get(), constantChildren)
+
+ override def close(): Unit = delegate.close()
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RepairTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RepairTableExec.scala
new file mode 100644
index 0000000000000..c5bc202fa5668
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RepairTableExec.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.catalog.CatalogTablePartition
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+
+/**
+ * Physical plan for MSCK REPAIR TABLE on V2 file tables.
+ * Discovers partitions on the filesystem and syncs them
+ * with the catalog metastore.
+ */
+case class RepairTableExec(
+ catalog: TableCatalog,
+ ident: Identifier,
+ table: FileTable,
+ enableAddPartitions: Boolean,
+ enableDropPartitions: Boolean)
+ extends LeafV2CommandExec {
+
+ override def output: Seq[Attribute] = Seq.empty
+
+ override protected def run(): Seq[InternalRow] = {
+ val ct = table.catalogTable.getOrElse(
+ throw new UnsupportedOperationException(
+ "MSCK REPAIR TABLE requires a catalog table"))
+
+ val sessionCatalog =
+ session.sessionState.catalog
+ val schema = table.partitionSchema()
+
+ // Partitions currently on disk
+ table.fileIndex.refresh()
+ val onDisk = table.listPartitionIdentifiers(
+ Array.empty,
+ InternalRow.empty)
+ val diskSpecs = onDisk.map { row =>
+ (0 until schema.length).map { i =>
+ val v = row.get(i, schema(i).dataType)
+ schema(i).name -> (
+ if (v == null) null else v.toString)
+ }.toMap
+ }.toSet
+
+ // Partitions in catalog
+ val catalogParts = sessionCatalog
+ .listPartitions(ct.identifier)
+ val catalogSpecs = catalogParts
+ .map(_.spec).toSet
+
+ // Add missing partitions
+ if (enableAddPartitions) {
+ val toAdd = diskSpecs -- catalogSpecs
+ if (toAdd.nonEmpty) {
+ val newParts = toAdd.map { spec =>
+ val partPath = spec.map {
+ case (k, v) => s"$k=$v"
+ }.mkString("/")
+ val loc = new Path(
+ new Path(ct.location), partPath).toUri
+ CatalogTablePartition(
+ spec,
+ ct.storage.copy(
+ locationUri = Some(loc)))
+ }.toSeq
+ sessionCatalog.createPartitions(
+ ct.identifier, newParts,
+ ignoreIfExists = true)
+ }
+ }
+
+ // Drop orphaned partitions
+ if (enableDropPartitions) {
+ val toDrop = catalogSpecs -- diskSpecs
+ if (toDrop.nonEmpty) {
+ sessionCatalog.dropPartitions(
+ ct.identifier, toDrop.toSeq,
+ ignoreIfNotExists = true,
+ purge = false, retainData = true)
+ }
+ }
+
+ Seq.empty
+ }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index d21b5c730f0ca..d7e102b3103f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable
import scala.jdk.CollectionConverters._
import org.apache.spark.SparkUnsupportedOperationException
+import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.{QualifiedTableName, SQLConfHelper}
import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils, ClusterBySpec, SessionCatalog}
@@ -33,7 +34,7 @@ import org.apache.spark.sql.connector.catalog.NamespaceChange.RemoveProperty
import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
import org.apache.spark.sql.connector.expressions.Transform
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
-import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.execution.datasources.{DataSource, FileFormat}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DataType, StructType}
import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -77,7 +78,7 @@ class V2SessionCatalog(catalog: SessionCatalog)
private def getDataSourceOptions(
properties: Map[String, String],
storage: CatalogStorageFormat): CaseInsensitiveStringMap = {
- val propertiesWithPath = properties ++
+ val propertiesWithPath = storage.properties ++ properties ++
storage.locationUri.map("path" -> CatalogUtils.URIToString(_))
new CaseInsensitiveStringMap(propertiesWithPath.asJava)
}
@@ -93,32 +94,50 @@ class V2SessionCatalog(catalog: SessionCatalog)
// table here. To avoid breaking it we do not resolve the table provider and still return
// `V1Table` if the custom session catalog is present.
if (table.provider.isDefined && !hasCustomSessionCatalog) {
- val qualifiedTableName = QualifiedTableName(
- table.identifier.catalog.get, table.database, table.identifier.table)
- // Check if the table is in the v1 table cache to skip the v2 table lookup.
- if (catalog.getCachedTable(qualifiedTableName) != null) {
- return V1Table(table)
- }
DataSourceV2Utils.getTableProvider(table.provider.get, conf) match {
case Some(provider) =>
- // Get the table properties during creation and append the path option
- // to the properties.
- val dsOptions = getDataSourceOptions(table.properties, table.storage)
- // If the source accepts external table metadata, we can pass the schema and
- // partitioning information stored in Hive to `getTable` to avoid expensive
- // schema/partitioning inference.
- if (provider.supportsExternalMetadata()) {
- provider.getTable(
- table.schema,
- getV2Partitioning(table),
- dsOptions.asCaseSensitiveMap())
- } else {
- provider.getTable(
- provider.inferSchema(dsOptions),
- provider.inferPartitioning(dsOptions),
- dsOptions.asCaseSensitiveMap())
+ val dsOptions = getDataSourceOptions(
+ table.properties, table.storage)
+ val v2Table =
+ if (provider.supportsExternalMetadata()) {
+ provider.getTable(
+ table.schema,
+ getV2Partitioning(table),
+ dsOptions.asCaseSensitiveMap())
+ } else {
+ provider.getTable(
+ provider.inferSchema(dsOptions),
+ provider.inferPartitioning(dsOptions),
+ dsOptions.asCaseSensitiveMap())
+ }
+ v2Table match {
+ case ft: FileTable =>
+ ft.catalogTable = Some(table)
+ if (table.partitionColumnNames.nonEmpty) {
+ try {
+ val parts = catalog
+ .listPartitions(table.identifier)
+ if (parts.nonEmpty) {
+ ft.useCatalogFileIndex = true
+ }
+ } catch {
+ case _: Exception =>
+ }
+ }
+ case _ =>
}
+ v2Table
case _ =>
+ // No V2 provider available. Use V1 table cache
+ // for performance if the table is already cached.
+ val qualifiedTableName = QualifiedTableName(
+ table.identifier.catalog.get,
+ table.database,
+ table.identifier.table)
+ if (catalog.getCachedTable(
+ qualifiedTableName) != null) {
+ return V1Table(table)
+ }
V1Table(table)
}
} else {
@@ -215,6 +234,17 @@ class V2SessionCatalog(catalog: SessionCatalog)
partitions
}
val table = tableProvider.getTable(schema, partitions, dsOptions)
+ table match {
+ case ft: FileTable =>
+ schema.foreach { field =>
+ if (!ft.supportsDataType(field.dataType)) {
+ throw QueryCompilationErrors
+ .dataTypeUnsupportedByDataSourceError(
+ ft.formatName, field)
+ }
+ }
+ case _ =>
+ }
// Check if the schema of the created table matches the given schema.
val tableSchema = table.columns().asSchema
if (!DataType.equalsIgnoreNullability(table.columns().asSchema, schema)) {
@@ -225,6 +255,26 @@ class V2SessionCatalog(catalog: SessionCatalog)
case _ =>
// The provider is not a V2 provider so we return the schema and partitions as is.
+ // Validate data types using the V1 FileFormat, matching V1 CreateDataSourceTableCommand
+ // behavior (which validates via DataSource.resolveRelation).
+ if (schema.nonEmpty) {
+ val ds = DataSource(
+ SparkSession.active,
+ userSpecifiedSchema = Some(schema),
+ className = provider)
+ ds.providingInstance() match {
+ case format: FileFormat =>
+ schema.foreach { field =>
+ if (!format.supportDataType(field.dataType)) {
+ throw QueryCompilationErrors
+ .dataTypeUnsupportedByDataSourceError(
+ format.toString, field)
+ }
+ }
+ case _ =>
+ }
+ }
+ DataSource.validateSchema(provider, schema, conf)
(schema, partitions)
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
index da14ead0f5463..b2f7915eb81bc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
@@ -21,6 +21,7 @@ import scala.jdk.CollectionConverters._
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.csv.CSVOptions
import org.apache.spark.sql.catalyst.expressions.{Expression, ExprUtils}
import org.apache.spark.sql.connector.read.PartitionReaderFactory
@@ -33,6 +34,7 @@ import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.util.ArrayImplicits._
import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.collection.BitSet
case class CSVScan(
sparkSession: SparkSession,
@@ -43,7 +45,12 @@ case class CSVScan(
options: CaseInsensitiveStringMap,
pushedFilters: Array[Filter],
partitionFilters: Seq[Expression] = Seq.empty,
- dataFilters: Seq[Expression] = Seq.empty)
+ dataFilters: Seq[Expression] = Seq.empty,
+ override val bucketSpec: Option[BucketSpec] = None,
+ override val disableBucketedScan: Boolean = false,
+ override val optionalBucketSet: Option[BitSet] = None,
+ override val optionalNumCoalescedBuckets: Option[Int] = None,
+ override val requestedMetadataFields: StructType = StructType(Seq.empty))
extends TextBasedFileScan(sparkSession, options) {
val columnPruning = conf.csvColumnPruning
@@ -87,9 +94,10 @@ case class CSVScan(
SerializableConfiguration.broadcast(sparkSession.sparkContext, hadoopConf)
// The partition values are already truncated in `FileScan.partitions`.
// We should use `readPartitionSchema` as the partition schema here.
- CSVPartitionReaderFactory(conf, broadcastedConf,
+ val baseFactory = CSVPartitionReaderFactory(conf, broadcastedConf,
dataSchema, readDataSchema, readPartitionSchema, parsedOptions,
actualFilters.toImmutableArraySeq)
+ wrapWithMetadataIfNeeded(baseFactory, readDataSchema, options)
}
override def equals(obj: Any): Boolean = obj match {
@@ -100,6 +108,15 @@ case class CSVScan(
override def hashCode(): Int = super.hashCode()
+ override def withFileIndex(newFI: PartitioningAwareFileIndex): CSVScan =
+ copy(fileIndex = newFI)
+
+ override def withDisableBucketedScan(disable: Boolean): CSVScan =
+ copy(disableBucketedScan = disable)
+
+ override def withNumCoalescedBuckets(n: Option[Int]): CSVScan =
+ copy(optionalNumCoalescedBuckets = n)
+
override def getMetaData(): Map[String, String] = {
super.getMetaData() ++ Map("PushedFilters" -> seqToString(pushedFilters.toImmutableArraySeq))
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
index fe208f4502127..a27c39aa6f675 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2.csv
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.StructFilters
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
import org.apache.spark.sql.sources.Filter
@@ -30,10 +31,12 @@ case class CSVScanBuilder(
fileIndex: PartitioningAwareFileIndex,
schema: StructType,
dataSchema: StructType,
- options: CaseInsensitiveStringMap)
- extends FileScanBuilder(sparkSession, fileIndex, dataSchema) {
+ options: CaseInsensitiveStringMap,
+ override val bucketSpec: Option[BucketSpec] = None)
+ extends FileScanBuilder(sparkSession, fileIndex, dataSchema, bucketSpec) {
override def build(): CSVScan = {
+ val optBucketSet = computeBucketSet()
CSVScan(
sparkSession,
fileIndex,
@@ -43,7 +46,10 @@ case class CSVScanBuilder(
options,
pushedDataFilters,
partitionFilters,
- dataFilters)
+ dataFilters,
+ bucketSpec = bucketSpec,
+ optionalBucketSet = optBucketSet,
+ requestedMetadataFields = requestedMetadataFields)
}
override def pushDataFilters(dataFilters: Array[Filter]): Array[Filter] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
index 4938df795cb1a..acc00cb6bff10 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
@@ -22,11 +22,12 @@ import org.apache.hadoop.fs.FileStatus
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.csv.CSVOptions
-import org.apache.spark.sql.connector.write.{LogicalWriteInfo, Write, WriteBuilder}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
import org.apache.spark.sql.execution.datasources.FileFormat
import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.types.{AtomicType, DataType, GeographyType, GeometryType, StructType, UserDefinedType}
+import org.apache.spark.sql.types.{AtomicType, DataType, GeographyType,
+ GeometryType, StructType, UserDefinedType, VariantType}
import org.apache.spark.sql.util.CaseInsensitiveStringMap
case class CSVTable(
@@ -38,7 +39,8 @@ case class CSVTable(
fallbackFileFormat: Class[_ <: FileFormat])
extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
override def newScanBuilder(options: CaseInsensitiveStringMap): CSVScanBuilder =
- CSVScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options))
+ CSVScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options),
+ bucketSpec)
override def inferSchema(files: Seq[FileStatus]): Option[StructType] = {
val parsedOptions = new CSVOptions(
@@ -50,9 +52,10 @@ case class CSVTable(
}
override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
- new WriteBuilder {
- override def build(): Write =
- CSVWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
+ createFileWriteBuilder(info) {
+ (mergedInfo, partSchema, bSpec, customLocs, dynamicOverwrite, truncate, overPreds) =>
+ CSVWrite(paths, formatName, supportsWriteDataType, mergedInfo, partSchema, bSpec,
+ overPreds, customLocs, dynamicOverwrite, truncate)
}
}
@@ -66,5 +69,13 @@ case class CSVTable(
case _ => false
}
+ // Write rejects VariantType; read allows it.
+ private def supportsWriteDataType(dataType: DataType): Boolean = dataType match {
+ case _: VariantType => false
+ case dt => supportsDataType(dt)
+ }
+
+ override def allowDuplicatedColumnNames: Boolean = true
+
override def formatName: String = "CSV"
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWrite.scala
index 7011fea77d888..ac17057be442d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWrite.scala
@@ -18,8 +18,10 @@ package org.apache.spark.sql.execution.datasources.v2.csv
import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.csv.CSVOptions
import org.apache.spark.sql.catalyst.util.CompressionCodecs
+import org.apache.spark.sql.connector.expressions.filter.Predicate
import org.apache.spark.sql.connector.write.LogicalWriteInfo
import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter, OutputWriterFactory}
import org.apache.spark.sql.execution.datasources.csv.CsvOutputWriter
@@ -31,7 +33,13 @@ case class CSVWrite(
paths: Seq[String],
formatName: String,
supportsDataType: DataType => Boolean,
- info: LogicalWriteInfo) extends FileWrite {
+ info: LogicalWriteInfo,
+ partitionSchema: StructType,
+ override val bucketSpec: Option[BucketSpec] = None,
+ override val overwritePredicates: Option[Array[Predicate]] = None,
+ override val customPartitionLocations: Map[Map[String, String], String] = Map.empty,
+ override val dynamicPartitionOverwrite: Boolean,
+ override val isTruncate: Boolean) extends FileWrite {
override def allowDuplicatedColumnNames: Boolean = true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
index 9b19c6b433d74..b6d37746cfd23 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
@@ -21,6 +21,7 @@ import scala.jdk.CollectionConverters._
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.expressions.{Expression, ExprUtils}
import org.apache.spark.sql.catalyst.json.JSONOptionsInRead
import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
@@ -34,6 +35,7 @@ import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.util.ArrayImplicits._
import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.collection.BitSet
case class JsonScan(
sparkSession: SparkSession,
@@ -44,7 +46,12 @@ case class JsonScan(
options: CaseInsensitiveStringMap,
pushedFilters: Array[Filter],
partitionFilters: Seq[Expression] = Seq.empty,
- dataFilters: Seq[Expression] = Seq.empty)
+ dataFilters: Seq[Expression] = Seq.empty,
+ override val bucketSpec: Option[BucketSpec] = None,
+ override val disableBucketedScan: Boolean = false,
+ override val optionalBucketSet: Option[BitSet] = None,
+ override val optionalNumCoalescedBuckets: Option[Int] = None,
+ override val requestedMetadataFields: StructType = StructType(Seq.empty))
extends TextBasedFileScan(sparkSession, options) {
private val parsedOptions = new JSONOptionsInRead(
@@ -80,9 +87,10 @@ case class JsonScan(
SerializableConfiguration.broadcast(sparkSession.sparkContext, hadoopConf)
// The partition values are already truncated in `FileScan.partitions`.
// We should use `readPartitionSchema` as the partition schema here.
- JsonPartitionReaderFactory(conf, broadcastedConf,
+ val baseFactory = JsonPartitionReaderFactory(conf, broadcastedConf,
dataSchema, readDataSchema, readPartitionSchema, parsedOptions,
pushedFilters.toImmutableArraySeq)
+ wrapWithMetadataIfNeeded(baseFactory, readDataSchema, options)
}
override def equals(obj: Any): Boolean = obj match {
@@ -93,6 +101,15 @@ case class JsonScan(
override def hashCode(): Int = super.hashCode()
+ override def withFileIndex(newFI: PartitioningAwareFileIndex): JsonScan =
+ copy(fileIndex = newFI)
+
+ override def withDisableBucketedScan(disable: Boolean): JsonScan =
+ copy(disableBucketedScan = disable)
+
+ override def withNumCoalescedBuckets(n: Option[Int]): JsonScan =
+ copy(optionalNumCoalescedBuckets = n)
+
override def getMetaData(): Map[String, String] = {
super.getMetaData() ++ Map("PushedFilters" -> pushedFilters.mkString("[", ", ", "]"))
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
index dcae6bd3fd007..a7223cefa2c80 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
@@ -18,6 +18,7 @@ package org.apache.spark.sql.execution.datasources.v2.json
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.StructFilters
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
import org.apache.spark.sql.sources.Filter
@@ -29,9 +30,11 @@ case class JsonScanBuilder (
fileIndex: PartitioningAwareFileIndex,
schema: StructType,
dataSchema: StructType,
- options: CaseInsensitiveStringMap)
- extends FileScanBuilder(sparkSession, fileIndex, dataSchema) {
+ options: CaseInsensitiveStringMap,
+ override val bucketSpec: Option[BucketSpec] = None)
+ extends FileScanBuilder(sparkSession, fileIndex, dataSchema, bucketSpec) {
override def build(): JsonScan = {
+ val optBucketSet = computeBucketSet()
JsonScan(
sparkSession,
fileIndex,
@@ -41,7 +44,10 @@ case class JsonScanBuilder (
options,
pushedDataFilters,
partitionFilters,
- dataFilters)
+ dataFilters,
+ bucketSpec = bucketSpec,
+ optionalBucketSet = optBucketSet,
+ requestedMetadataFields = requestedMetadataFields)
}
override def pushDataFilters(dataFilters: Array[Filter]): Array[Filter] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
index cf3c1e11803c0..793c2ee324e82 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
@@ -22,7 +22,7 @@ import org.apache.hadoop.fs.FileStatus
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.json.JSONOptionsInRead
-import org.apache.spark.sql.connector.write.{LogicalWriteInfo, Write, WriteBuilder}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
import org.apache.spark.sql.execution.datasources.FileFormat
import org.apache.spark.sql.execution.datasources.json.JsonDataSource
import org.apache.spark.sql.execution.datasources.v2.FileTable
@@ -38,7 +38,8 @@ case class JsonTable(
fallbackFileFormat: Class[_ <: FileFormat])
extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
override def newScanBuilder(options: CaseInsensitiveStringMap): JsonScanBuilder =
- JsonScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options))
+ JsonScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options),
+ bucketSpec)
override def inferSchema(files: Seq[FileStatus]): Option[StructType] = {
val parsedOptions = new JSONOptionsInRead(
@@ -50,9 +51,10 @@ case class JsonTable(
}
override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
- new WriteBuilder {
- override def build(): Write =
- JsonWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
+ createFileWriteBuilder(info) {
+ (mergedInfo, partSchema, bSpec, customLocs, dynamicOverwrite, truncate, overPreds) =>
+ JsonWrite(paths, formatName, supportsDataType, mergedInfo, partSchema, bSpec,
+ overPreds, customLocs, dynamicOverwrite, truncate)
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonWrite.scala
index ea1f6793cb9ca..2851c409376ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonWrite.scala
@@ -18,8 +18,10 @@ package org.apache.spark.sql.execution.datasources.v2.json
import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.json.JSONOptions
import org.apache.spark.sql.catalyst.util.CompressionCodecs
+import org.apache.spark.sql.connector.expressions.filter.Predicate
import org.apache.spark.sql.connector.write.LogicalWriteInfo
import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter, OutputWriterFactory}
import org.apache.spark.sql.execution.datasources.json.JsonOutputWriter
@@ -31,7 +33,13 @@ case class JsonWrite(
paths: Seq[String],
formatName: String,
supportsDataType: DataType => Boolean,
- info: LogicalWriteInfo) extends FileWrite {
+ info: LogicalWriteInfo,
+ partitionSchema: StructType,
+ override val bucketSpec: Option[BucketSpec] = None,
+ override val overwritePredicates: Option[Array[Predicate]] = None,
+ override val customPartitionLocations: Map[Map[String, String], String] = Map.empty,
+ override val dynamicPartitionOverwrite: Boolean,
+ override val isTruncate: Boolean) extends FileWrite {
override def prepareWrite(
sqlConf: SQLConf,
job: Job,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
index 6242cd3ca2c62..a4000647fc3ae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
@@ -23,6 +23,7 @@ import org.apache.hadoop.fs.Path
import org.apache.spark.memory.MemoryMode
import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
import org.apache.spark.sql.connector.read.PartitionReaderFactory
@@ -34,6 +35,7 @@ import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.util.ArrayImplicits._
import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.collection.BitSet
case class OrcScan(
sparkSession: SparkSession,
@@ -46,7 +48,13 @@ case class OrcScan(
pushedAggregate: Option[Aggregation] = None,
pushedFilters: Array[Filter],
partitionFilters: Seq[Expression] = Seq.empty,
- dataFilters: Seq[Expression] = Seq.empty) extends FileScan {
+ dataFilters: Seq[Expression] = Seq.empty,
+ override val bucketSpec: Option[BucketSpec] = None,
+ override val disableBucketedScan: Boolean = false,
+ override val optionalBucketSet: Option[BitSet] = None,
+ override val optionalNumCoalescedBuckets: Option[Int] = None,
+ override val requestedMetadataFields: StructType = StructType(Seq.empty))
+ extends FileScan {
override def isSplitable(path: Path): Boolean = {
// If aggregate is pushed down, only the file footer will be read once,
// so file should not be split across multiple tasks.
@@ -73,9 +81,10 @@ case class OrcScan(
}
// The partition values are already truncated in `FileScan.partitions`.
// We should use `readPartitionSchema` as the partition schema here.
- OrcPartitionReaderFactory(conf, broadcastedConf,
+ val baseFactory = OrcPartitionReaderFactory(conf, broadcastedConf,
dataSchema, readDataSchema, readPartitionSchema, pushedFilters, pushedAggregate,
new OrcOptions(options.asScala.toMap, conf), memoryMode)
+ wrapWithMetadataIfNeeded(baseFactory, readDataSchema, options)
}
override def equals(obj: Any): Boolean = obj match {
@@ -92,6 +101,15 @@ case class OrcScan(
override def hashCode(): Int = getClass.hashCode()
+ override def withFileIndex(newFI: PartitioningAwareFileIndex): OrcScan =
+ copy(fileIndex = newFI)
+
+ override def withDisableBucketedScan(disable: Boolean): OrcScan =
+ copy(disableBucketedScan = disable)
+
+ override def withNumCoalescedBuckets(n: Option[Int]): OrcScan =
+ copy(optionalNumCoalescedBuckets = n)
+
lazy private val (pushedAggregationsStr, pushedGroupByStr) = if (pushedAggregate.nonEmpty) {
(seqToString(pushedAggregate.get.aggregateExpressions.toImmutableArraySeq),
seqToString(pushedAggregate.get.groupByExpressions.toImmutableArraySeq))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
index b4a857db4846b..a7332296e0f51 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2.orc
import scala.jdk.CollectionConverters._
import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
import org.apache.spark.sql.connector.read.SupportsPushDownAggregates
import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, PartitioningAwareFileIndex}
@@ -36,8 +37,9 @@ case class OrcScanBuilder(
fileIndex: PartitioningAwareFileIndex,
schema: StructType,
dataSchema: StructType,
- options: CaseInsensitiveStringMap)
- extends FileScanBuilder(sparkSession, fileIndex, dataSchema)
+ options: CaseInsensitiveStringMap,
+ override val bucketSpec: Option[BucketSpec] = None)
+ extends FileScanBuilder(sparkSession, fileIndex, dataSchema, bucketSpec)
with SupportsPushDownAggregates {
lazy val hadoopConf = {
@@ -59,9 +61,11 @@ case class OrcScanBuilder(
if (pushedAggregations.isEmpty) {
finalSchema = readDataSchema()
}
+ val optBucketSet = computeBucketSet()
OrcScan(sparkSession, hadoopConf, fileIndex, dataSchema, finalSchema,
readPartitionSchema(), options, pushedAggregations, pushedDataFilters, partitionFilters,
- dataFilters)
+ dataFilters, bucketSpec = bucketSpec, optionalBucketSet = optBucketSet,
+ requestedMetadataFields = requestedMetadataFields)
}
override def pushDataFilters(dataFilters: Array[Filter]): Array[Filter] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
index 08cd89fdacc61..958a7cceb1a85 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
@@ -21,7 +21,7 @@ import scala.jdk.CollectionConverters._
import org.apache.hadoop.fs.FileStatus
import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.connector.write.{LogicalWriteInfo, Write, WriteBuilder}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
import org.apache.spark.sql.execution.datasources.FileFormat
import org.apache.spark.sql.execution.datasources.orc.OrcUtils
import org.apache.spark.sql.execution.datasources.v2.FileTable
@@ -38,15 +38,17 @@ case class OrcTable(
extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
override def newScanBuilder(options: CaseInsensitiveStringMap): OrcScanBuilder =
- OrcScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options))
+ OrcScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options),
+ bucketSpec)
override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
OrcUtils.inferSchema(sparkSession, files, options.asScala.toMap)
override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
- new WriteBuilder {
- override def build(): Write =
- OrcWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
+ createFileWriteBuilder(info) {
+ (mergedInfo, partSchema, bSpec, customLocs, dynamicOverwrite, truncate, overPreds) =>
+ OrcWrite(paths, formatName, supportsDataType, mergedInfo, partSchema, bSpec,
+ overPreds, customLocs, dynamicOverwrite, truncate)
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWrite.scala
index 12dff269a468e..b38b133a675d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWrite.scala
@@ -21,6 +21,8 @@ import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
import org.apache.orc.OrcConf.{COMPRESS, MAPRED_OUTPUT_SCHEMA}
import org.apache.orc.mapred.OrcStruct
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.connector.expressions.filter.Predicate
import org.apache.spark.sql.connector.write.LogicalWriteInfo
import org.apache.spark.sql.execution.datasources.{OutputWriter, OutputWriterFactory}
import org.apache.spark.sql.execution.datasources.orc.{OrcOptions, OrcOutputWriter, OrcUtils}
@@ -32,7 +34,13 @@ case class OrcWrite(
paths: Seq[String],
formatName: String,
supportsDataType: DataType => Boolean,
- info: LogicalWriteInfo) extends FileWrite {
+ info: LogicalWriteInfo,
+ partitionSchema: StructType,
+ override val bucketSpec: Option[BucketSpec] = None,
+ override val overwritePredicates: Option[Array[Predicate]] = None,
+ override val customPartitionLocations: Map[Map[String, String], String] = Map.empty,
+ override val dynamicPartitionOverwrite: Boolean,
+ override val isTruncate: Boolean) extends FileWrite {
override def prepareWrite(
sqlConf: SQLConf,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
index d0c7859964e09..5dac727509a90 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
@@ -23,6 +23,7 @@ import org.apache.hadoop.fs.Path
import org.apache.parquet.hadoop.ParquetInputFormat
import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
import org.apache.spark.sql.connector.read.{PartitionReaderFactory, VariantExtraction}
@@ -35,6 +36,7 @@ import org.apache.spark.sql.types.{BooleanType, DataType, StructField, StructTyp
import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.util.ArrayImplicits._
import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.collection.BitSet
case class ParquetScan(
sparkSession: SparkSession,
@@ -48,7 +50,13 @@ case class ParquetScan(
pushedAggregate: Option[Aggregation] = None,
partitionFilters: Seq[Expression] = Seq.empty,
dataFilters: Seq[Expression] = Seq.empty,
- pushedVariantExtractions: Array[VariantExtraction] = Array.empty) extends FileScan {
+ pushedVariantExtractions: Array[VariantExtraction] = Array.empty,
+ override val bucketSpec: Option[BucketSpec] = None,
+ override val disableBucketedScan: Boolean = false,
+ override val optionalBucketSet: Option[BitSet] = None,
+ override val optionalNumCoalescedBuckets: Option[Int] = None,
+ override val requestedMetadataFields: StructType = StructType(Seq.empty))
+ extends FileScan {
override def isSplitable(path: Path): Boolean = {
// If aggregate is pushed down, only the file footer will be read once,
// so file should not be split across multiple tasks.
@@ -167,7 +175,7 @@ case class ParquetScan(
val broadcastedConf =
SerializableConfiguration.broadcast(sparkSession.sparkContext, hadoopConf)
- ParquetPartitionReaderFactory(
+ val baseFactory = ParquetPartitionReaderFactory(
conf,
broadcastedConf,
dataSchema,
@@ -176,6 +184,7 @@ case class ParquetScan(
pushedFilters,
pushedAggregate,
new ParquetOptions(options.asCaseSensitiveMap.asScala.toMap, conf))
+ wrapWithMetadataIfNeeded(baseFactory, effectiveSchema, options)
}
override def equals(obj: Any): Boolean = obj match {
@@ -196,6 +205,15 @@ case class ParquetScan(
override def hashCode(): Int = getClass.hashCode()
+ override def withFileIndex(newFI: PartitioningAwareFileIndex): ParquetScan =
+ copy(fileIndex = newFI)
+
+ override def withDisableBucketedScan(disable: Boolean): ParquetScan =
+ copy(disableBucketedScan = disable)
+
+ override def withNumCoalescedBuckets(n: Option[Int]): ParquetScan =
+ copy(optionalNumCoalescedBuckets = n)
+
lazy private val (pushedAggregationsStr, pushedGroupByStr) = if (pushedAggregate.nonEmpty) {
(seqToString(pushedAggregate.get.aggregateExpressions.toImmutableArraySeq),
seqToString(pushedAggregate.get.groupByExpressions.toImmutableArraySeq))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
index 94da53f229349..97746322581cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2.parquet
import scala.jdk.CollectionConverters._
import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
import org.apache.spark.sql.connector.read.{SupportsPushDownAggregates, SupportsPushDownVariantExtractions, VariantExtraction}
@@ -37,8 +38,9 @@ case class ParquetScanBuilder(
fileIndex: PartitioningAwareFileIndex,
schema: StructType,
dataSchema: StructType,
- options: CaseInsensitiveStringMap)
- extends FileScanBuilder(sparkSession, fileIndex, dataSchema)
+ options: CaseInsensitiveStringMap,
+ override val bucketSpec: Option[BucketSpec] = None)
+ extends FileScanBuilder(sparkSession, fileIndex, dataSchema, bucketSpec)
with SupportsPushDownAggregates
with SupportsPushDownVariantExtractions {
lazy val hadoopConf = {
@@ -117,8 +119,11 @@ case class ParquetScanBuilder(
if (pushedAggregations.isEmpty) {
finalSchema = readDataSchema()
}
+ val optBucketSet = computeBucketSet()
ParquetScan(sparkSession, hadoopConf, fileIndex, dataSchema, finalSchema,
readPartitionSchema(), pushedDataFilters, options, pushedAggregations,
- partitionFilters, dataFilters, pushedVariantExtractions)
+ partitionFilters, dataFilters, pushedVariantExtractions,
+ bucketSpec = bucketSpec, optionalBucketSet = optBucketSet,
+ requestedMetadataFields = requestedMetadataFields)
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
index 67052c201a9df..2602a28274178 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
@@ -21,9 +21,9 @@ import scala.jdk.CollectionConverters._
import org.apache.hadoop.fs.FileStatus
import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.connector.write.{LogicalWriteInfo, Write, WriteBuilder}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
import org.apache.spark.sql.execution.datasources.FileFormat
-import org.apache.spark.sql.execution.datasources.parquet.ParquetUtils
+import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetUtils}
import org.apache.spark.sql.execution.datasources.v2.FileTable
import org.apache.spark.sql.types._
import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -38,15 +38,17 @@ case class ParquetTable(
extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
override def newScanBuilder(options: CaseInsensitiveStringMap): ParquetScanBuilder =
- ParquetScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options))
+ ParquetScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options),
+ bucketSpec)
override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
ParquetUtils.inferSchema(sparkSession, options.asScala.toMap, files)
override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
- new WriteBuilder {
- override def build(): Write =
- ParquetWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
+ createFileWriteBuilder(info) {
+ (mergedInfo, partSchema, bSpec, customLocs, dynamicOverwrite, truncate, overPreds) =>
+ ParquetWrite(paths, formatName, supportsDataType, mergedInfo, partSchema, bSpec,
+ overPreds, customLocs, dynamicOverwrite, truncate)
}
}
@@ -70,4 +72,11 @@ case class ParquetTable(
}
override def formatName: String = "Parquet"
+
+ // [SPARK-56371] Expose the Parquet-specific generated `row_index` field on the V2
+ // `_metadata` struct, mirroring V1 `ParquetFileFormat.metadataSchemaFields`. The field
+ // is backed by the internal `_tmp_metadata_row_index` column that the Parquet reader
+ // populates per row via `ParquetRowIndexUtil`.
+ override protected def metadataSchemaFields: Seq[StructField] =
+ super.metadataSchemaFields :+ ParquetFileFormat.ROW_INDEX_FIELD
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala
index e37b1fce7c37e..c7163974335d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources.v2.parquet
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.connector.expressions.filter.Predicate
import org.apache.spark.sql.connector.write.LogicalWriteInfo
import org.apache.spark.sql.execution.datasources.OutputWriterFactory
import org.apache.spark.sql.execution.datasources.parquet._
@@ -30,7 +32,13 @@ case class ParquetWrite(
paths: Seq[String],
formatName: String,
supportsDataType: DataType => Boolean,
- info: LogicalWriteInfo) extends FileWrite with Logging {
+ info: LogicalWriteInfo,
+ partitionSchema: StructType,
+ override val bucketSpec: Option[BucketSpec] = None,
+ override val overwritePredicates: Option[Array[Predicate]] = None,
+ override val customPartitionLocations: Map[Map[String, String], String] = Map.empty,
+ override val dynamicPartitionOverwrite: Boolean,
+ override val isTruncate: Boolean) extends FileWrite with Logging {
override def prepareWrite(
sqlConf: SQLConf,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
index a351eb368c565..aaff9138b74a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
@@ -21,6 +21,7 @@ import scala.jdk.CollectionConverters._
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.connector.read.PartitionReaderFactory
import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -30,6 +31,7 @@ import org.apache.spark.sql.execution.datasources.v2.TextBasedFileScan
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.collection.BitSet
case class TextScan(
sparkSession: SparkSession,
@@ -39,7 +41,12 @@ case class TextScan(
readPartitionSchema: StructType,
options: CaseInsensitiveStringMap,
partitionFilters: Seq[Expression] = Seq.empty,
- dataFilters: Seq[Expression] = Seq.empty)
+ dataFilters: Seq[Expression] = Seq.empty,
+ override val bucketSpec: Option[BucketSpec] = None,
+ override val disableBucketedScan: Boolean = false,
+ override val optionalBucketSet: Option[BitSet] = None,
+ override val optionalNumCoalescedBuckets: Option[Int] = None,
+ override val requestedMetadataFields: StructType = StructType(Seq.empty))
extends TextBasedFileScan(sparkSession, options) {
private val optionsAsScala = options.asScala.toMap
@@ -66,15 +73,14 @@ case class TextScan(
override def createReaderFactory(): PartitionReaderFactory = {
verifyReadSchema(readDataSchema)
- val hadoopConf = {
- val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
- // Hadoop Configurations are case sensitive.
- sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
- }
+ val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
+ // Hadoop Configurations are case sensitive.
+ val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
val broadcastedConf =
SerializableConfiguration.broadcast(sparkSession.sparkContext, hadoopConf)
- TextPartitionReaderFactory(conf, broadcastedConf, readDataSchema,
+ val baseFactory = TextPartitionReaderFactory(conf, broadcastedConf, readDataSchema,
readPartitionSchema, textOptions)
+ wrapWithMetadataIfNeeded(baseFactory, readDataSchema, options)
}
override def equals(obj: Any): Boolean = obj match {
@@ -84,4 +90,13 @@ case class TextScan(
}
override def hashCode(): Int = super.hashCode()
+
+ override def withFileIndex(newFI: PartitioningAwareFileIndex): TextScan =
+ copy(fileIndex = newFI)
+
+ override def withDisableBucketedScan(disable: Boolean): TextScan =
+ copy(disableBucketedScan = disable)
+
+ override def withNumCoalescedBuckets(n: Option[Int]): TextScan =
+ copy(optionalNumCoalescedBuckets = n)
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
index 689fa821a11d3..6be5c64a08876 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql.execution.datasources.v2.text
import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
import org.apache.spark.sql.types.StructType
@@ -28,11 +29,14 @@ case class TextScanBuilder(
fileIndex: PartitioningAwareFileIndex,
schema: StructType,
dataSchema: StructType,
- options: CaseInsensitiveStringMap)
- extends FileScanBuilder(sparkSession, fileIndex, dataSchema) {
+ options: CaseInsensitiveStringMap,
+ override val bucketSpec: Option[BucketSpec] = None)
+ extends FileScanBuilder(sparkSession, fileIndex, dataSchema, bucketSpec) {
override def build(): TextScan = {
+ val optBucketSet = computeBucketSet()
TextScan(sparkSession, fileIndex, dataSchema, readDataSchema(), readPartitionSchema(), options,
- partitionFilters, dataFilters)
+ partitionFilters, dataFilters, bucketSpec = bucketSpec, optionalBucketSet = optBucketSet,
+ requestedMetadataFields = requestedMetadataFields)
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
index d8880b84c6211..aa76c5408f06a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2.text
import org.apache.hadoop.fs.FileStatus
import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.connector.write.{LogicalWriteInfo, Write, WriteBuilder}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
import org.apache.spark.sql.execution.datasources.FileFormat
import org.apache.spark.sql.execution.datasources.v2.FileTable
import org.apache.spark.sql.types.{DataType, StringType, StructField, StructType}
@@ -34,15 +34,17 @@ case class TextTable(
fallbackFileFormat: Class[_ <: FileFormat])
extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
override def newScanBuilder(options: CaseInsensitiveStringMap): TextScanBuilder =
- TextScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options))
+ TextScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options),
+ bucketSpec)
override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
Some(StructType(Array(StructField("value", StringType))))
override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
- new WriteBuilder {
- override def build(): Write =
- TextWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
+ createFileWriteBuilder(info) {
+ (mergedInfo, partSchema, bSpec, customLocs, dynamicOverwrite, truncate, overPreds) =>
+ TextWrite(paths, formatName, supportsDataType, mergedInfo, partSchema, bSpec,
+ overPreds, customLocs, dynamicOverwrite, truncate)
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextWrite.scala
index 7bee49f05cbcd..819d9642ea2ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextWrite.scala
@@ -18,7 +18,9 @@ package org.apache.spark.sql.execution.datasources.v2.text
import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.util.CompressionCodecs
+import org.apache.spark.sql.connector.expressions.filter.Predicate
import org.apache.spark.sql.connector.write.LogicalWriteInfo
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter, OutputWriterFactory}
@@ -31,7 +33,13 @@ case class TextWrite(
paths: Seq[String],
formatName: String,
supportsDataType: DataType => Boolean,
- info: LogicalWriteInfo) extends FileWrite {
+ info: LogicalWriteInfo,
+ partitionSchema: StructType,
+ override val bucketSpec: Option[BucketSpec] = None,
+ override val overwritePredicates: Option[Array[Predicate]] = None,
+ override val customPartitionLocations: Map[Map[String, String], String] = Map.empty,
+ override val dynamicPartitionOverwrite: Boolean,
+ override val isTruncate: Boolean) extends FileWrite {
private def verifySchema(schema: StructType): Unit = {
if (schema.size != 1) {
throw QueryCompilationErrors.textDataSourceWithMultiColumnsError(schema)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 08dd212060762..527bee2ca980d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -225,7 +225,6 @@ abstract class BaseSessionStateBuilder(
new ResolveDataSource(session) +:
new FindDataSourceTable(session) +:
new ResolveSQLOnFile(session) +:
- new FallBackFileSourceV2(session) +:
ResolveEncodersInScalaAgg +:
new ResolveSessionCatalog(this.catalogManager) +:
ResolveWriteToStream +:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
index c9feedc9645d0..386d9e4fac93c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
@@ -489,17 +489,10 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils with AdaptiveSparkP
).foreach { query =>
checkAnswer(sql(query), Seq(Row("a", 10, "08")))
}
- checkError(
- exception = intercept[AnalysisException] {
- sql("alter table t drop partition(dt='8')")
- },
- condition = "PARTITIONS_NOT_FOUND",
- sqlState = None,
- parameters = Map(
- "partitionList" -> "PARTITION \\(`dt` = 8\\)",
- "tableName" -> ".*`t`"),
- matchPVals = true
- )
+ val e = intercept[AnalysisException] {
+ sql("alter table t drop partition(dt='8')")
+ }
+ assert(e.getCondition == "PARTITIONS_NOT_FOUND")
}
}
@@ -509,17 +502,10 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils with AdaptiveSparkP
sql("insert into t partition(dt=08) values('a', 10)")
checkAnswer(sql("select * from t where dt='08'"), sql("select * from t where dt='07'"))
checkAnswer(sql("select * from t where dt=08"), Seq(Row("a", 10, "8")))
- checkError(
- exception = intercept[AnalysisException] {
- sql("alter table t drop partition(dt='08')")
- },
- condition = "PARTITIONS_NOT_FOUND",
- sqlState = None,
- parameters = Map(
- "partitionList" -> "PARTITION \\(`dt` = 08\\)",
- "tableName" -> ".*.`t`"),
- matchPVals = true
- )
+ val e2 = intercept[AnalysisException] {
+ sql("alter table t drop partition(dt='08')")
+ }
+ assert(e2.getCondition == "PARTITIONS_NOT_FOUND")
}
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
deleted file mode 100644
index 2a0ab21ddb09c..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.connector
-
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
-import org.apache.spark.sql.connector.read.ScanBuilder
-import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
-import org.apache.spark.sql.execution.{FileSourceScanExec, QueryExecution}
-import org.apache.spark.sql.execution.datasources.{FileFormat, InsertIntoHadoopFsRelationCommand}
-import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
-import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.util.{CaseInsensitiveStringMap, QueryExecutionListener}
-
-class DummyReadOnlyFileDataSourceV2 extends FileDataSourceV2 {
-
- override def fallbackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat]
-
- override def shortName(): String = "parquet"
-
- override def getTable(options: CaseInsensitiveStringMap): Table = {
- new DummyReadOnlyFileTable
- }
-}
-
-class DummyReadOnlyFileTable extends Table with SupportsRead {
- override def name(): String = "dummy"
-
- override def schema(): StructType = StructType(Nil)
-
- override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
- throw SparkException.internalError("Dummy file reader")
- }
-
- override def capabilities(): java.util.Set[TableCapability] =
- java.util.EnumSet.of(TableCapability.BATCH_READ, TableCapability.ACCEPT_ANY_SCHEMA)
-}
-
-class DummyWriteOnlyFileDataSourceV2 extends FileDataSourceV2 {
-
- override def fallbackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat]
-
- override def shortName(): String = "parquet"
-
- override def getTable(options: CaseInsensitiveStringMap): Table = {
- new DummyWriteOnlyFileTable
- }
-}
-
-class DummyWriteOnlyFileTable extends Table with SupportsWrite {
- override def name(): String = "dummy"
-
- override def schema(): StructType = StructType(Nil)
-
- override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
- throw SparkException.internalError("Dummy file writer")
-
- override def capabilities(): java.util.Set[TableCapability] =
- java.util.EnumSet.of(TableCapability.BATCH_WRITE, TableCapability.ACCEPT_ANY_SCHEMA)
-}
-
-class FileDataSourceV2FallBackSuite extends QueryTest with SharedSparkSession {
-
- private val dummyReadOnlyFileSourceV2 = classOf[DummyReadOnlyFileDataSourceV2].getName
- private val dummyWriteOnlyFileSourceV2 = classOf[DummyWriteOnlyFileDataSourceV2].getName
-
- override protected def sparkConf: SparkConf = super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "")
-
- test("Fall back to v1 when writing to file with read only FileDataSourceV2") {
- val df = spark.range(10).toDF()
- withTempPath { file =>
- val path = file.getCanonicalPath
- // Writing file should fall back to v1 and succeed.
- df.write.format(dummyReadOnlyFileSourceV2).save(path)
-
- // Validate write result with [[ParquetFileFormat]].
- checkAnswer(spark.read.parquet(path), df)
-
- // Dummy File reader should fail as expected.
- checkError(
- exception = intercept[SparkException] {
- spark.read.format(dummyReadOnlyFileSourceV2).load(path).collect()
- },
- condition = "INTERNAL_ERROR",
- parameters = Map("message" -> "Dummy file reader"))
- }
- }
-
- test("Fall back read path to v1 with configuration USE_V1_SOURCE_LIST") {
- val df = spark.range(10).toDF()
- withTempPath { file =>
- val path = file.getCanonicalPath
- df.write.parquet(path)
- Seq(
- "foo,parquet,bar",
- "ParQuet,bar,foo",
- s"foobar,$dummyReadOnlyFileSourceV2"
- ).foreach { fallbackReaders =>
- withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> fallbackReaders) {
- // Reading file should fall back to v1 and succeed.
- checkAnswer(spark.read.format(dummyReadOnlyFileSourceV2).load(path), df)
- checkAnswer(sql(s"SELECT * FROM parquet.`$path`"), df)
- }
- }
-
- withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "foo,bar") {
- // Dummy File reader should fail as DISABLED_V2_FILE_DATA_SOURCE_READERS doesn't include it.
- checkError(
- exception = intercept[SparkException] {
- spark.read.format(dummyReadOnlyFileSourceV2).load(path).collect()
- },
- condition = "INTERNAL_ERROR",
- parameters = Map("message" -> "Dummy file reader"))
- }
- }
- }
-
- test("Fall back to v1 when reading file with write only FileDataSourceV2") {
- val df = spark.range(10).toDF()
- withTempPath { file =>
- val path = file.getCanonicalPath
- df.write.parquet(path)
- // Fallback reads to V1
- checkAnswer(spark.read.format(dummyWriteOnlyFileSourceV2).load(path), df)
- }
- }
-
- test("Always fall back write path to v1") {
- val df = spark.range(10).toDF()
- withTempPath { path =>
- // Writes should fall back to v1 and succeed.
- df.write.format(dummyWriteOnlyFileSourceV2).save(path.getCanonicalPath)
- checkAnswer(spark.read.parquet(path.getCanonicalPath), df)
- }
- }
-
- test("Fallback Parquet V2 to V1") {
- Seq("parquet", classOf[ParquetDataSourceV2].getCanonicalName).foreach { format =>
- withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> format) {
- val commands = ArrayBuffer.empty[(String, LogicalPlan)]
- val exceptions = ArrayBuffer.empty[(String, Exception)]
- val listener = new QueryExecutionListener {
- override def onFailure(
- funcName: String,
- qe: QueryExecution,
- exception: Exception): Unit = {
- exceptions += funcName -> exception
- }
-
- override def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = {
- commands += funcName -> qe.logical
- }
- }
- spark.listenerManager.register(listener)
-
- try {
- withTempPath { path =>
- val inputData = spark.range(10)
- inputData.write.format(format).save(path.getCanonicalPath)
- sparkContext.listenerBus.waitUntilEmpty()
- assert(commands.length == 1)
- assert(commands.head._1 == "command")
- assert(commands.head._2.isInstanceOf[InsertIntoHadoopFsRelationCommand])
- assert(commands.head._2.asInstanceOf[InsertIntoHadoopFsRelationCommand]
- .fileFormat.isInstanceOf[ParquetFileFormat])
- val df = spark.read.format(format).load(path.getCanonicalPath)
- checkAnswer(df, inputData.toDF())
- assert(
- df.queryExecution.executedPlan.exists(_.isInstanceOf[FileSourceScanExec]))
- }
- } finally {
- spark.listenerManager.unregister(listener)
- }
- }
- }
- }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2WriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2WriteSuite.scala
new file mode 100644
index 0000000000000..ac0f1b9bd39fc
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2WriteSuite.scala
@@ -0,0 +1,763 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connector
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.read.ScanBuilder
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
+import org.apache.spark.sql.execution.{FileSourceScanExec, QueryExecution}
+import org.apache.spark.sql.execution.datasources.{FileFormat, InsertIntoHadoopFsRelationCommand}
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
+import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.{CaseInsensitiveStringMap, QueryExecutionListener}
+
+class DummyReadOnlyFileDataSourceV2 extends FileDataSourceV2 {
+
+ override def fallbackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat]
+
+ override def shortName(): String = "parquet"
+
+ override def getTable(options: CaseInsensitiveStringMap): Table = {
+ new DummyReadOnlyFileTable
+ }
+}
+
+class DummyReadOnlyFileTable extends Table with SupportsRead {
+ override def name(): String = "dummy"
+
+ override def schema(): StructType = StructType(Nil)
+
+ override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+ throw SparkException.internalError("Dummy file reader")
+ }
+
+ override def capabilities(): java.util.Set[TableCapability] =
+ java.util.EnumSet.of(TableCapability.BATCH_READ, TableCapability.ACCEPT_ANY_SCHEMA)
+}
+
+class DummyWriteOnlyFileDataSourceV2 extends FileDataSourceV2 {
+
+ override def fallbackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat]
+
+ override def shortName(): String = "parquet"
+
+ override def getTable(options: CaseInsensitiveStringMap): Table = {
+ new DummyWriteOnlyFileTable
+ }
+}
+
+class DummyWriteOnlyFileTable extends Table with SupportsWrite {
+ override def name(): String = "dummy"
+
+ override def schema(): StructType = StructType(Nil)
+
+ override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+ throw SparkException.internalError("Dummy file writer")
+
+ override def capabilities(): java.util.Set[TableCapability] =
+ java.util.EnumSet.of(TableCapability.BATCH_WRITE, TableCapability.ACCEPT_ANY_SCHEMA)
+}
+
+class FileDataSourceV2WriteSuite extends QueryTest with SharedSparkSession {
+
+ private val dummyReadOnlyFileSourceV2 = classOf[DummyReadOnlyFileDataSourceV2].getName
+ private val dummyWriteOnlyFileSourceV2 = classOf[DummyWriteOnlyFileDataSourceV2].getName
+
+ // Built-in file formats for write testing. Text is excluded
+ // because it only supports a single string column.
+ private val fileFormats = Seq("parquet", "orc", "json", "csv")
+
+ override protected def sparkConf: SparkConf = super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "")
+
+ test("Fall back to v1 when writing to file with read only FileDataSourceV2") {
+ val df = spark.range(10).toDF()
+ withTempPath { file =>
+ val path = file.getCanonicalPath
+ // Writing file should fall back to v1 and succeed.
+ df.write.format(dummyReadOnlyFileSourceV2).save(path)
+
+ // Validate write result with [[ParquetFileFormat]].
+ checkAnswer(spark.read.parquet(path), df)
+
+ // Dummy File reader should fail as expected.
+ checkError(
+ exception = intercept[SparkException] {
+ spark.read.format(dummyReadOnlyFileSourceV2).load(path).collect()
+ },
+ condition = "INTERNAL_ERROR",
+ parameters = Map("message" -> "Dummy file reader"))
+ }
+ }
+
+ test("Fall back read path to v1 with configuration USE_V1_SOURCE_LIST") {
+ val df = spark.range(10).toDF()
+ withTempPath { file =>
+ val path = file.getCanonicalPath
+ df.write.parquet(path)
+ Seq(
+ "foo,parquet,bar",
+ "ParQuet,bar,foo",
+ s"foobar,$dummyReadOnlyFileSourceV2"
+ ).foreach { fallbackReaders =>
+ withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> fallbackReaders) {
+ // Reading file should fall back to v1 and succeed.
+ checkAnswer(spark.read.format(dummyReadOnlyFileSourceV2).load(path), df)
+ checkAnswer(sql(s"SELECT * FROM parquet.`$path`"), df)
+ }
+ }
+
+ withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "foo,bar") {
+ // Dummy File reader should fail as DISABLED_V2_FILE_DATA_SOURCE_READERS doesn't include it.
+ checkError(
+ exception = intercept[SparkException] {
+ spark.read.format(dummyReadOnlyFileSourceV2).load(path).collect()
+ },
+ condition = "INTERNAL_ERROR",
+ parameters = Map("message" -> "Dummy file reader"))
+ }
+ }
+ }
+
+ test("Fall back to v1 when reading file with write only FileDataSourceV2") {
+ val df = spark.range(10).toDF()
+ withTempPath { file =>
+ val path = file.getCanonicalPath
+ df.write.parquet(path)
+ // Fallback reads to V1
+ checkAnswer(spark.read.format(dummyWriteOnlyFileSourceV2).load(path), df)
+ }
+ }
+
+ test("Fall back write path to v1 for default save mode") {
+ val df = spark.range(10).toDF()
+ withTempPath { path =>
+ // Default mode is ErrorIfExists, which now routes through V2 for file sources.
+ // DummyWriteOnlyFileDataSourceV2 throws on write, so it falls back to V1
+ // via the SparkUnsupportedOperationException catch in the createMode branch.
+ // Use a real format to verify ErrorIfExists works via V2.
+ df.write.parquet(path.getCanonicalPath)
+ checkAnswer(spark.read.parquet(path.getCanonicalPath), df)
+ }
+ }
+
+ test("Fallback Parquet V2 to V1") {
+ Seq("parquet", classOf[ParquetDataSourceV2].getCanonicalName).foreach { format =>
+ withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> format) {
+ val commands = ArrayBuffer.empty[(String, LogicalPlan)]
+ val exceptions = ArrayBuffer.empty[(String, Exception)]
+ val listener = new QueryExecutionListener {
+ override def onFailure(
+ funcName: String,
+ qe: QueryExecution,
+ exception: Exception): Unit = {
+ exceptions += funcName -> exception
+ }
+
+ override def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = {
+ commands += funcName -> qe.logical
+ }
+ }
+ spark.listenerManager.register(listener)
+
+ try {
+ withTempPath { path =>
+ val inputData = spark.range(10)
+ inputData.write.format(format).save(path.getCanonicalPath)
+ sparkContext.listenerBus.waitUntilEmpty()
+ assert(commands.length == 1)
+ assert(commands.head._1 == "command")
+ assert(commands.head._2.isInstanceOf[InsertIntoHadoopFsRelationCommand])
+ assert(commands.head._2.asInstanceOf[InsertIntoHadoopFsRelationCommand]
+ .fileFormat.isInstanceOf[ParquetFileFormat])
+ val df = spark.read.format(format).load(path.getCanonicalPath)
+ checkAnswer(df, inputData.toDF())
+ assert(
+ df.queryExecution.executedPlan.exists(_.isInstanceOf[FileSourceScanExec]))
+ }
+ } finally {
+ spark.listenerManager.unregister(listener)
+ }
+ }
+ }
+ }
+
+ test("File write for multiple formats") {
+ fileFormats.foreach { format =>
+ withTempPath { path =>
+ val inputData = spark.range(10).toDF()
+ inputData.write.option("header", "true").format(format).save(path.getCanonicalPath)
+ val readBack = spark.read.option("header", "true").schema(inputData.schema)
+ .format(format).load(path.getCanonicalPath)
+ checkAnswer(readBack, inputData)
+ }
+ }
+ }
+
+ test("File write produces same results with V1 and V2 reads") {
+ withTempPath { v1Path =>
+ withTempPath { v2Path =>
+ val inputData = spark.range(100).selectExpr("id", "id * 2 as value")
+
+ // Write via V1 path
+ withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "parquet") {
+ inputData.write.parquet(v1Path.getCanonicalPath)
+ }
+
+ // Write via V2 path (default)
+ inputData.write.parquet(v2Path.getCanonicalPath)
+
+ // Both should produce the same results
+ val v1Result = spark.read.parquet(v1Path.getCanonicalPath)
+ val v2Result = spark.read.parquet(v2Path.getCanonicalPath)
+ checkAnswer(v1Result, v2Result)
+ }
+ }
+ }
+
+ test("Partitioned file write") {
+ fileFormats.foreach { format =>
+ withTempPath { path =>
+ val inputData = spark.range(20).selectExpr(
+ "id", "id % 5 as part")
+ inputData.write.option("header", "true")
+ .partitionBy("part").format(format).save(path.getCanonicalPath)
+ val readBack = spark.read.option("header", "true").schema(inputData.schema)
+ .format(format).load(path.getCanonicalPath)
+ checkAnswer(readBack, inputData)
+
+ // Verify partition directory structure exists
+ val partDirs = path.listFiles().filter(_.isDirectory).map(_.getName).sorted
+ assert(partDirs.exists(_.startsWith("part=")),
+ s"Expected partition directories for format $format, got: ${partDirs.mkString(", ")}")
+ }
+ }
+ }
+
+ test("Partitioned write produces same results with V1 and V2 reads") {
+ fileFormats.foreach { format =>
+ withTempPath { v1Path =>
+ withTempPath { v2Path =>
+ val inputData = spark.range(50).selectExpr(
+ "id", "id % 3 as category", "id * 10 as value")
+
+ // Write via V1 path
+ withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> format) {
+ inputData.write.option("header", "true")
+ .partitionBy("category").format(format).save(v1Path.getCanonicalPath)
+ }
+
+ // Write via V2 path (default)
+ inputData.write.option("header", "true")
+ .partitionBy("category").format(format).save(v2Path.getCanonicalPath)
+
+ val v1Result = spark.read.option("header", "true").schema(inputData.schema)
+ .format(format).load(v1Path.getCanonicalPath)
+ val v2Result = spark.read.option("header", "true").schema(inputData.schema)
+ .format(format).load(v2Path.getCanonicalPath)
+ checkAnswer(v1Result, v2Result)
+ }
+ }
+ }
+ }
+
+ test("Multi-level partitioned write") {
+ fileFormats.foreach { format =>
+ withTempPath { path =>
+ val schema = "id LONG, year LONG, month LONG"
+ val inputData = spark.range(30).selectExpr(
+ "id", "id % 3 as year", "id % 2 as month")
+ inputData.write.option("header", "true")
+ .partitionBy("year", "month")
+ .format(format).save(path.getCanonicalPath)
+ checkAnswer(
+ spark.read.option("header", "true")
+ .schema(schema).format(format)
+ .load(path.getCanonicalPath),
+ inputData)
+
+ val yearDirs = path.listFiles()
+ .filter(_.isDirectory).map(_.getName).sorted
+ assert(yearDirs.exists(_.startsWith("year=")),
+ s"Expected year partition dirs for $format")
+ val firstYearDir = path.listFiles()
+ .filter(_.isDirectory).head
+ val monthDirs = firstYearDir.listFiles()
+ .filter(_.isDirectory).map(_.getName).sorted
+ assert(monthDirs.exists(_.startsWith("month=")),
+ s"Expected month partition dirs for $format")
+ }
+ }
+ }
+
+ test("Dynamic partition overwrite") {
+ fileFormats.foreach { format =>
+ withSQLConf(
+ SQLConf.USE_V1_SOURCE_LIST.key -> format,
+ SQLConf.PARTITION_OVERWRITE_MODE.key -> "dynamic") {
+ withTempPath { path =>
+ val schema = "id LONG, part LONG"
+ val initialData = spark.range(9).selectExpr(
+ "id", "id % 3 as part")
+ initialData.write.option("header", "true")
+ .partitionBy("part")
+ .format(format).save(path.getCanonicalPath)
+
+ val overwriteData = spark.createDataFrame(
+ Seq((100L, 0L), (101L, 0L))).toDF("id", "part")
+ overwriteData.write.option("header", "true")
+ .mode("overwrite").partitionBy("part")
+ .format(format).save(path.getCanonicalPath)
+
+ val result = spark.read.option("header", "true")
+ .schema(schema).format(format)
+ .load(path.getCanonicalPath)
+ val expected = initialData.filter("part != 0")
+ .union(overwriteData)
+ checkAnswer(result, expected)
+ }
+ }
+ }
+ }
+
+ test("Dynamic partition overwrite produces same results") {
+ fileFormats.foreach { format =>
+ withTempPath { v1Path =>
+ withTempPath { v2Path =>
+ val schema = "id LONG, part LONG"
+ val initialData = spark.range(12).selectExpr(
+ "id", "id % 4 as part")
+ val overwriteData = spark.createDataFrame(
+ Seq((200L, 1L), (201L, 1L))).toDF("id", "part")
+
+ Seq(v1Path, v2Path).foreach { p =>
+ withSQLConf(
+ SQLConf.USE_V1_SOURCE_LIST.key -> format,
+ SQLConf.PARTITION_OVERWRITE_MODE.key ->
+ "dynamic") {
+ initialData.write.option("header", "true")
+ .partitionBy("part").format(format)
+ .save(p.getCanonicalPath)
+ overwriteData.write.option("header", "true")
+ .mode("overwrite").partitionBy("part")
+ .format(format).save(p.getCanonicalPath)
+ }
+ }
+
+ val v1Result = spark.read
+ .option("header", "true").schema(schema)
+ .format(format).load(v1Path.getCanonicalPath)
+ val v2Result = spark.read
+ .option("header", "true").schema(schema)
+ .format(format).load(v2Path.getCanonicalPath)
+ checkAnswer(v1Result, v2Result)
+ }
+ }
+ }
+ }
+
+ test("DataFrame API write uses V2 path") {
+ fileFormats.foreach { format =>
+ val writeOpts = if (format == "csv") {
+ Map("header" -> "true")
+ } else {
+ Map.empty[String, String]
+ }
+ def readBack(p: String): DataFrame = {
+ val r = spark.read.format(format)
+ val configured = if (format == "csv") {
+ r.option("header", "true").schema("id LONG")
+ } else r
+ configured.load(p)
+ }
+
+ // SaveMode.Append to existing path goes via V2
+ withTempPath { path =>
+ val data1 = spark.range(5).toDF()
+ data1.write.options(writeOpts).format(format).save(path.getCanonicalPath)
+ val data2 = spark.range(5, 10).toDF()
+ data2.write.options(writeOpts).mode("append")
+ .format(format).save(path.getCanonicalPath)
+ checkAnswer(readBack(path.getCanonicalPath),
+ data1.union(data2))
+ }
+
+ // SaveMode.Overwrite goes via V2
+ withTempPath { path =>
+ val data1 = spark.range(5).toDF()
+ data1.write.options(writeOpts).format(format)
+ .save(path.getCanonicalPath)
+ val data2 = spark.range(10, 15).toDF()
+ data2.write.options(writeOpts).mode("overwrite")
+ .format(format).save(path.getCanonicalPath)
+ checkAnswer(readBack(path.getCanonicalPath), data2)
+ }
+ }
+ }
+
+ test("DataFrame API partitioned write") {
+ withTempPath { path =>
+ val data = spark.range(20).selectExpr("id", "id % 4 as part")
+ data.write.partitionBy("part").parquet(path.getCanonicalPath)
+ val result = spark.read.parquet(path.getCanonicalPath)
+ checkAnswer(result, data)
+
+ val partDirs = path.listFiles().filter(_.isDirectory).map(_.getName)
+ assert(partDirs.exists(_.startsWith("part=")))
+ }
+ }
+
+ test("DataFrame API write with compression option") {
+ withTempPath { path =>
+ val data = spark.range(10).toDF()
+ data.write.option("compression", "snappy").parquet(path.getCanonicalPath)
+ checkAnswer(spark.read.parquet(path.getCanonicalPath), data)
+ }
+ }
+
+ test("Catalog table INSERT INTO") {
+ withTable("t") {
+ sql("CREATE TABLE t (id BIGINT, value BIGINT) USING parquet")
+ sql("INSERT INTO t VALUES (1, 10), (2, 20), (3, 30)")
+ checkAnswer(sql("SELECT * FROM t"),
+ Seq((1L, 10L), (2L, 20L), (3L, 30L)).map(Row.fromTuple))
+ }
+ }
+
+ test("Catalog table partitioned INSERT INTO") {
+ withTable("t") {
+ sql("CREATE TABLE t (id BIGINT, part BIGINT) USING parquet PARTITIONED BY (part)")
+ sql("INSERT INTO t VALUES (1, 1), (2, 1), (3, 2), (4, 2)")
+ checkAnswer(sql("SELECT * FROM t ORDER BY id"),
+ Seq((1L, 1L), (2L, 1L), (3L, 2L), (4L, 2L)).map(Row.fromTuple))
+ }
+ }
+
+ test("V2 cache invalidation on overwrite") {
+ fileFormats.foreach { format =>
+ withTempPath { path =>
+ val p = path.getCanonicalPath
+ spark.range(1000).toDF("id").write.format(format).save(p)
+ val df = spark.read.format(format).load(p).cache()
+ assert(df.count() == 1000)
+ // Overwrite via V2 path should invalidate cache
+ spark.range(10).toDF("id").write.mode("append").format(format).save(p)
+ spark.range(10).toDF("id").write
+ .mode("overwrite").format(format).save(p)
+ assert(df.count() == 10,
+ s"Cache should be invalidated after V2 overwrite for $format")
+ df.unpersist()
+ }
+ }
+ }
+
+ test("V2 cache invalidation on append") {
+ fileFormats.foreach { format =>
+ withTempPath { path =>
+ val p = path.getCanonicalPath
+ spark.range(1000).toDF("id").write.format(format).save(p)
+ val df = spark.read.format(format).load(p).cache()
+ assert(df.count() == 1000)
+ // Append via V2 path should invalidate cache
+ spark.range(10).toDF("id").write.mode("append").format(format).save(p)
+ assert(df.count() == 1010,
+ s"Cache should be invalidated after V2 append for $format")
+ df.unpersist()
+ }
+ }
+ }
+
+ test("Cache invalidation on catalog table overwrite") {
+ withTable("t") {
+ sql("CREATE TABLE t (id BIGINT) USING parquet")
+ sql("INSERT INTO t SELECT id FROM range(100)")
+ spark.table("t").cache()
+ assert(spark.table("t").count() == 100)
+ sql("INSERT OVERWRITE TABLE t SELECT id FROM range(10)")
+ assert(spark.table("t").count() == 10,
+ "Cache should be invalidated after catalog table overwrite")
+ spark.catalog.uncacheTable("t")
+ }
+ }
+
+ // SQL path INSERT INTO parquet.`path` requires SupportsCatalogOptions
+
+ test("CTAS") {
+ withTable("t") {
+ sql("CREATE TABLE t USING parquet AS SELECT id, id * 2 as value FROM range(10)")
+ checkAnswer(
+ sql("SELECT count(*) FROM t"),
+ Seq(Row(10L)))
+ }
+ }
+
+ test("Partitioned write to empty directory succeeds") {
+ fileFormats.foreach { format =>
+ withTempDir { dir =>
+ val schema = "id LONG, k LONG"
+ val data = spark.range(20).selectExpr(
+ "id", "id % 4 as k")
+ data.write.option("header", "true")
+ .partitionBy("k").mode("overwrite")
+ .format(format).save(dir.toString)
+ checkAnswer(
+ spark.read.option("header", "true")
+ .schema(schema).format(format)
+ .load(dir.toString),
+ data)
+ }
+ }
+ }
+
+ test("Partitioned overwrite to existing directory succeeds") {
+ fileFormats.foreach { format =>
+ withTempDir { dir =>
+ val schema = "id LONG, k LONG"
+ val data1 = spark.range(10).selectExpr(
+ "id", "id % 3 as k")
+ data1.write.option("header", "true")
+ .partitionBy("k").mode("overwrite")
+ .format(format).save(dir.toString)
+ val data2 = spark.range(10, 20).selectExpr(
+ "id", "id % 3 as k")
+ data2.write.option("header", "true")
+ .partitionBy("k").mode("overwrite")
+ .format(format).save(dir.toString)
+ checkAnswer(
+ spark.read.option("header", "true")
+ .schema(schema).format(format)
+ .load(dir.toString),
+ data2)
+ }
+ }
+ }
+
+ test("DataFrame API ErrorIfExists mode") {
+ Seq("parquet", "orc").foreach { format =>
+ // ErrorIfExists on existing path should throw
+ withTempPath { path =>
+ spark.range(5).toDF().write.format(format).save(path.getCanonicalPath)
+ val e = intercept[AnalysisException] {
+ spark.range(10).toDF().write.mode("error").format(format)
+ .save(path.getCanonicalPath)
+ }
+ assert(e.getCondition == "PATH_ALREADY_EXISTS")
+ }
+ // ErrorIfExists on new path should succeed
+ withTempPath { path =>
+ spark.range(5).toDF().write.mode("error").format(format)
+ .save(path.getCanonicalPath)
+ checkAnswer(spark.read.format(format).load(path.getCanonicalPath),
+ spark.range(5).toDF())
+ }
+ }
+ }
+
+ test("DataFrame API Ignore mode") {
+ Seq("parquet", "orc").foreach { format =>
+ // Ignore on existing path should skip writing
+ withTempPath { path =>
+ spark.range(5).toDF().write.format(format).save(path.getCanonicalPath)
+ spark.range(100).toDF().write.mode("ignore").format(format)
+ .save(path.getCanonicalPath)
+ checkAnswer(spark.read.format(format).load(path.getCanonicalPath),
+ spark.range(5).toDF())
+ }
+ // Ignore on new path should write data
+ withTempPath { path =>
+ spark.range(5).toDF().write.mode("ignore").format(format)
+ .save(path.getCanonicalPath)
+ checkAnswer(spark.read.format(format).load(path.getCanonicalPath),
+ spark.range(5).toDF())
+ }
+ }
+ }
+
+ test("INSERT INTO format.path uses V2 path") {
+ Seq("parquet", "orc", "json").foreach { format =>
+ withTempPath { path =>
+ val p = path.getCanonicalPath
+ spark.range(5).toDF("id").write.format(format).save(p)
+ sql(s"INSERT INTO ${format}.`${p}` SELECT * FROM range(5, 10)")
+ checkAnswer(
+ spark.read.format(format).load(p),
+ spark.range(10).toDF("id"))
+ }
+ }
+ }
+
+ test("Bucketed write via V2 path") {
+ import org.apache.spark.sql.execution.datasources.BucketingUtils
+ withTable("t") {
+ sql("CREATE TABLE t (id BIGINT, key INT)" +
+ " USING parquet" +
+ " CLUSTERED BY (key) INTO 4 BUCKETS")
+ sql("INSERT INTO t SELECT id, " +
+ "cast(id % 4 as int) FROM range(100)")
+ checkAnswer(
+ sql("SELECT count(*) FROM t"),
+ Row(100))
+ // Verify bucketed file naming: each file should have a bucket ID
+ val tablePath = spark.sessionState.catalog
+ .getTableMetadata(
+ org.apache.spark.sql.catalyst
+ .TableIdentifier("t"))
+ .location
+ val files = new java.io.File(tablePath)
+ .listFiles()
+ .filter(_.getName.endsWith(".parquet"))
+ .map(_.getName)
+ assert(files.nonEmpty,
+ "Expected bucketed parquet files")
+ val bucketIds = files.flatMap(BucketingUtils.getBucketId)
+ assert(bucketIds.nonEmpty,
+ s"Expected bucket IDs in file names, got: ${files.mkString(", ")}")
+ assert(bucketIds.forall(id => id >= 0 && id < 4),
+ s"Bucket IDs should be in [0, 4), got: ${bucketIds.mkString(", ")}")
+ }
+ }
+
+ test("Partitioned and bucketed write via V2 path") {
+ import org.apache.spark.sql.execution.datasources.BucketingUtils
+ withTable("t") {
+ sql("CREATE TABLE t (id BIGINT, key INT, part STRING)" +
+ " USING parquet PARTITIONED BY (part)" +
+ " CLUSTERED BY (key) INTO 4 BUCKETS")
+ sql("INSERT INTO t SELECT id, " +
+ "cast(id % 4 as int), " +
+ "cast(id % 2 as string) FROM range(100)")
+ checkAnswer(
+ sql("SELECT count(*) FROM t"),
+ Row(100))
+ // Verify partition directories exist
+ val tablePath = spark.sessionState.catalog
+ .getTableMetadata(
+ org.apache.spark.sql.catalyst
+ .TableIdentifier("t"))
+ .location
+ val partDirs = new java.io.File(tablePath)
+ .listFiles()
+ .filter(f => f.isDirectory && f.getName.startsWith("part="))
+ assert(partDirs.length == 2,
+ s"Expected 2 partition dirs, got: ${partDirs.map(_.getName).mkString(", ")}")
+ // Verify bucketed files in each partition
+ partDirs.foreach { dir =>
+ val files = dir.listFiles()
+ .filter(_.getName.endsWith(".parquet"))
+ .map(_.getName)
+ val bucketIds = files.flatMap(BucketingUtils.getBucketId)
+ assert(bucketIds.nonEmpty,
+ s"Expected bucket IDs in ${dir.getName}, got: ${files.mkString(", ")}")
+ }
+ }
+ }
+
+ test("MSCK REPAIR TABLE on V2 file table") {
+ withTable("t") {
+ sql("CREATE TABLE t (id BIGINT, part INT)" +
+ " USING parquet PARTITIONED BY (part)")
+ val tableIdent =
+ org.apache.spark.sql.catalyst
+ .TableIdentifier("t")
+ val loc = spark.sessionState.catalog
+ .getTableMetadata(tableIdent).location
+ // Write data directly to FS partitions
+ Seq(1, 2, 3).foreach { p =>
+ val dir = new java.io.File(
+ loc.getPath, s"part=$p")
+ dir.mkdirs()
+ spark.range(p * 10, p * 10 + 5)
+ .toDF("id").write
+ .mode("overwrite")
+ .parquet(dir.getCanonicalPath)
+ }
+ // Before repair: catalog has no partitions
+ assert(spark.sessionState.catalog
+ .listPartitions(tableIdent).isEmpty)
+ // MSCK REPAIR TABLE
+ sql("MSCK REPAIR TABLE t")
+ // After repair: 3 partitions in catalog
+ val afterRepair = spark.sessionState
+ .catalog.listPartitions(tableIdent)
+ assert(afterRepair.length === 3)
+ // Data should be readable
+ checkAnswer(
+ sql("SELECT count(*) FROM t"),
+ Row(15))
+ }
+ }
+
+ test("SPARK-56316: static partition overwrite via V2 path") {
+ withTable("t") {
+ sql("CREATE TABLE t (id BIGINT, part INT)" +
+ " USING parquet PARTITIONED BY (part)")
+ sql("INSERT INTO t VALUES (1, 1), (2, 2)")
+ // Overwrite only partition part=1
+ sql("INSERT OVERWRITE TABLE t PARTITION(part=1)" +
+ " SELECT 100")
+ checkAnswer(
+ sql("SELECT * FROM t ORDER BY part"),
+ Seq(Row(100, 1), Row(2, 2)))
+ }
+ }
+
+ test("SPARK-56304: INSERT OVERWRITE IF NOT EXISTS skips when partition exists") {
+ withTable("t") {
+ sql("CREATE TABLE t (id BIGINT, part INT)" +
+ " USING parquet PARTITIONED BY (part)")
+ sql("INSERT INTO t VALUES (1, 1), (2, 2)")
+ // IF NOT EXISTS: partition part=1 exists, should skip
+ sql("INSERT OVERWRITE TABLE t PARTITION(part=1) IF NOT EXISTS" +
+ " SELECT 999")
+ checkAnswer(
+ sql("SELECT * FROM t ORDER BY part"),
+ Seq(Row(1, 1), Row(2, 2)))
+ // Without IF NOT EXISTS: partition part=1 is replaced
+ sql("INSERT OVERWRITE TABLE t PARTITION(part=1)" +
+ " SELECT 999")
+ checkAnswer(
+ sql("SELECT * FROM t ORDER BY part"),
+ Seq(Row(999, 1), Row(2, 2)))
+ // IF NOT EXISTS: partition part=3 does not exist, should write
+ sql("INSERT OVERWRITE TABLE t PARTITION(part=3) IF NOT EXISTS" +
+ " SELECT 300")
+ checkAnswer(
+ sql("SELECT * FROM t ORDER BY part"),
+ Seq(Row(999, 1), Row(2, 2), Row(300, 3)))
+ }
+ }
+
+ test("SELECT FROM format.path uses V2 path") {
+ Seq("parquet", "orc", "json").foreach { format =>
+ withTempPath { path =>
+ val p = path.getCanonicalPath
+ spark.range(5).toDF("id").write.format(format).save(p)
+ checkAnswer(
+ sql(s"SELECT * FROM ${format}.`${p}`"),
+ spark.range(5).toDF("id"))
+ }
+ }
+ }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index dda9feed5cbf1..46c0356687951 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -561,7 +561,15 @@ class InMemoryColumnarQuerySuite extends QueryTest
spark.sql("ANALYZE TABLE table1 COMPUTE STATISTICS")
val inMemoryRelation3 = spark.read.table("table1").cache().queryExecution.optimizedPlan.
collect { case plan: InMemoryRelation => plan }.head
- assert(inMemoryRelation3.computeStats().sizeInBytes === 48)
+ if (useV1SourceReaderList.nonEmpty) {
+ // V1 path uses catalog stats after ANALYZE TABLE
+ assert(inMemoryRelation3.computeStats().sizeInBytes === 48)
+ } else {
+ // TODO(SPARK-56232): V2 FileTable doesn't propagate catalog stats from
+ // ANALYZE TABLE through DataSourceV2Relation/FileScan yet. Once
+ // supported, this should also assert sizeInBytes === 48.
+ assert(inMemoryRelation3.computeStats().sizeInBytes === getLocalDirSize(workDir))
+ }
}
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index a42c004e3aafd..3069dd3351bda 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -38,7 +38,8 @@ import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
import org.apache.spark.sql.execution.FileSourceScanExec
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, RecordReaderIterator}
-import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2Relation}
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types._
@@ -794,10 +795,20 @@ abstract class OrcQuerySuite extends OrcQueryTest with SharedSparkSession {
withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> "native") {
withTable("spark_20728") {
sql("CREATE TABLE spark_20728(a INT) USING ORC")
- val fileFormat = sql("SELECT * FROM spark_20728").queryExecution.analyzed.collectFirst {
+ val analyzed = sql("SELECT * FROM spark_20728").queryExecution.analyzed
+ val fileFormat = analyzed.collectFirst {
case l: LogicalRelation => l.relation.asInstanceOf[HadoopFsRelation].fileFormat.getClass
}
- assert(fileFormat == Some(classOf[OrcFileFormat]))
+ // V1 path returns LogicalRelation with OrcFileFormat;
+ // V2 path returns DataSourceV2Relation with OrcTable.
+ if (fileFormat.isEmpty) {
+ val v2Table = analyzed.collectFirst {
+ case r: DataSourceV2Relation => r.table
+ }
+ assert(v2Table.exists(_.isInstanceOf[OrcTable]))
+ } else {
+ assert(fileFormat == Some(classOf[OrcFileFormat]))
+ }
}
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileMetadataColumnsV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileMetadataColumnsV2Suite.scala
new file mode 100644
index 0000000000000..9f64a2c165e47
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileMetadataColumnsV2Suite.scala
@@ -0,0 +1,322 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.io.File
+import java.sql.Timestamp
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * End-to-end tests for `_metadata` column support on V2 file sources
+ * (SPARK-56335). Covers the constant metadata fields exposed by
+ * `FileFormat.BASE_METADATA_FIELDS`: `file_path`, `file_name`, `file_size`,
+ * `file_block_start`, `file_block_length`, `file_modification_time`.
+ *
+ * Parquet's `row_index` generated field is covered separately by
+ * SPARK-56371.
+ */
+class FileMetadataColumnsV2Suite extends QueryTest with SharedSparkSession {
+
+ import testImplicits._
+
+ private val v2Formats = Seq("parquet", "orc", "json", "csv", "text")
+
+ private def withV2Source(format: String)(body: => Unit): Unit = {
+ // Force the V2 path for `format` by removing it from the V1 source list.
+ val v1List = SQLConf.get.getConf(SQLConf.USE_V1_SOURCE_LIST)
+ val newV1List = v1List.split(",").filter(_.nonEmpty)
+ .filterNot(_.equalsIgnoreCase(format)).mkString(",")
+ withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> newV1List) {
+ body
+ }
+ }
+
+ private def writeSingleFile(dir: File, format: String): File = {
+ val target = new File(dir, s"data.$format")
+ format match {
+ case "text" =>
+ Seq("hello", "world").toDF("value").coalesce(1).write
+ .mode("overwrite").text(target.getAbsolutePath)
+ case "csv" =>
+ Seq((1, "a"), (2, "b")).toDF("id", "name").coalesce(1).write
+ .mode("overwrite").csv(target.getAbsolutePath)
+ case "json" =>
+ Seq((1, "a"), (2, "b")).toDF("id", "name").coalesce(1).write
+ .mode("overwrite").json(target.getAbsolutePath)
+ case "parquet" =>
+ Seq((1, "a"), (2, "b")).toDF("id", "name").coalesce(1).write
+ .mode("overwrite").parquet(target.getAbsolutePath)
+ case "orc" =>
+ Seq((1, "a"), (2, "b")).toDF("id", "name").coalesce(1).write
+ .mode("overwrite").orc(target.getAbsolutePath)
+ }
+ // Return the directory (reader will scan it).
+ target
+ }
+
+ private def dataFile(dir: File, format: String): File = {
+ // Spark writes per-format files with varying extensions (e.g., text -> .txt). Just find
+ // the first non-hidden, non-success data file.
+ dir.listFiles()
+ .filter(f => f.isFile && !f.getName.startsWith(".") && !f.getName.startsWith("_"))
+ .head
+ }
+
+ v2Formats.foreach { format =>
+ test(s"SPARK-56335: read _metadata columns via V2 $format scan") {
+ withV2Source(format) {
+ withTempDir { dir =>
+ val tableDir = writeSingleFile(dir, format)
+ val file = dataFile(tableDir, format)
+
+ val df = spark.read.format(format).load(tableDir.getAbsolutePath)
+ .select(
+ "_metadata.file_path",
+ "_metadata.file_name",
+ "_metadata.file_size",
+ "_metadata.file_block_start",
+ "_metadata.file_block_length",
+ "_metadata.file_modification_time")
+
+ val rows = df.collect()
+ assert(rows.nonEmpty, s"expected non-empty rows for format=$format")
+ rows.foreach { row =>
+ assert(row.getString(0) == file.toURI.toString)
+ assert(row.getString(1) == file.getName)
+ assert(row.getLong(2) == file.length())
+ assert(row.getLong(3) == 0L)
+ assert(row.getLong(4) == file.length())
+ assert(row.getAs[Timestamp](5) == new Timestamp(file.lastModified()))
+ }
+ }
+ }
+ }
+
+ test(s"SPARK-56335: project data and _metadata together via V2 $format scan") {
+ withV2Source(format) {
+ withTempDir { dir =>
+ val tableDir = writeSingleFile(dir, format)
+ val file = dataFile(tableDir, format)
+
+ val expectedPath = file.toURI.toString
+ val df = spark.read.format(format).load(tableDir.getAbsolutePath)
+
+ // Pick a data column that exists for each format.
+ val dataColumn = if (format == "text") "value" else df.columns.head
+ val projected = df.selectExpr(dataColumn, "_metadata.file_path AS p")
+
+ val paths = projected.select("p").collect().map(_.getString(0)).toSet
+ assert(paths == Set(expectedPath))
+ assert(projected.count() == df.count())
+ }
+ }
+ }
+
+ test(s"SPARK-56335: select only data returns no metadata columns via V2 $format scan") {
+ withV2Source(format) {
+ withTempDir { dir =>
+ val tableDir = writeSingleFile(dir, format)
+ val df = spark.read.format(format).load(tableDir.getAbsolutePath)
+ // Sanity: schema does not surface `_metadata` unless explicitly requested.
+ assert(!df.schema.fieldNames.contains("_metadata"))
+ checkAnswer(df.selectExpr("count(1)"), Row(df.count()))
+ }
+ }
+ }
+ }
+
+ test("SPARK-56335: _metadata.file_name matches file_path basename (parquet V2)") {
+ withV2Source("parquet") {
+ withTempDir { dir =>
+ val tableDir = writeSingleFile(dir, "parquet")
+ val rows = spark.read.parquet(tableDir.getAbsolutePath)
+ .selectExpr("_metadata.file_path", "_metadata.file_name")
+ .collect()
+ rows.foreach { r =>
+ val path = r.getString(0)
+ val name = r.getString(1)
+ assert(path.endsWith(name), s"file_path=$path did not end with file_name=$name")
+ }
+ }
+ }
+ }
+
+ test("SPARK-56335: _metadata on partitioned parquet V2 table") {
+ withV2Source("parquet") {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "partitioned").getAbsolutePath
+ Seq((1, "a"), (2, "b"), (3, "a"), (4, "b")).toDF("id", "p")
+ .write.partitionBy("p").parquet(tablePath)
+
+ val df = spark.read.parquet(tablePath)
+ .selectExpr("id", "p", "_metadata.file_path", "_metadata.file_name")
+ val rows = df.collect()
+ assert(rows.length == 4)
+ rows.foreach { r =>
+ val id = r.getInt(0)
+ val part = r.getString(1)
+ val path = r.getString(2)
+ val name = r.getString(3)
+ assert(path.contains(s"p=$part"),
+ s"file_path=$path did not contain partition p=$part (id=$id)")
+ assert(path.endsWith(name))
+ }
+ }
+ }
+ }
+
+ test("SPARK-56335: filter on _metadata.file_name prunes to matching file (parquet V2)") {
+ withV2Source("parquet") {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "multifile").getAbsolutePath
+ // Two separate files.
+ Seq((1, "x")).toDF("id", "v").coalesce(1).write.parquet(tablePath + "/f1")
+ Seq((2, "y")).toDF("id", "v").coalesce(1).write.parquet(tablePath + "/f2")
+
+ val rows = spark.read.parquet(tablePath + "/f1", tablePath + "/f2")
+ .selectExpr("id", "_metadata.file_name AS fname")
+ .collect()
+ assert(rows.length == 2)
+ val fnames = rows.map(_.getString(1)).toSet
+ assert(fnames.size == 2, s"expected two distinct file names, got $fnames")
+ }
+ }
+ }
+
+ test("SPARK-56335: filter on _metadata.file_name in WHERE clause (parquet V2)") {
+ withV2Source("parquet") {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "filtered").getAbsolutePath
+ Seq((1, "x")).toDF("id", "v").coalesce(1).write.parquet(tablePath + "/f1")
+ Seq((2, "y")).toDF("id", "v").coalesce(1).write.parquet(tablePath + "/f2")
+
+ val allRows = spark.read.parquet(tablePath + "/f1", tablePath + "/f2")
+ .where("_metadata.file_name LIKE '%part%'")
+ .select("id")
+ .collect()
+ // Parquet output files are named `part-*.parquet`, so the filter should match both.
+ assert(allRows.length == 2)
+
+ // Pick one file's name and filter by exact equality; only that file's rows should
+ // remain. This confirms the predicate is actually evaluated rather than dropped.
+ val targetName = dataFile(new File(tablePath + "/f1"), "parquet").getName
+ val filtered = spark.read.parquet(tablePath + "/f1", tablePath + "/f2")
+ .where(s"_metadata.file_name = '$targetName'")
+ .select("id")
+ .collect()
+ assert(filtered.length == 1)
+ assert(filtered.head.getInt(0) == 1)
+ }
+ }
+ }
+
+ test("SPARK-56335: filter on numeric _metadata.file_size (parquet V2)") {
+ withV2Source("parquet") {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "sized").getAbsolutePath
+ Seq((1, "x")).toDF("id", "v").coalesce(1).write.parquet(tablePath + "/f1")
+ Seq((2, "y")).toDF("id", "v").coalesce(1).write.parquet(tablePath + "/f2")
+
+ // Filter on a numeric metadata field should not break planning, and should
+ // return either all rows (filter satisfied) or be evaluated correctly post-scan.
+ val rows = spark.read.parquet(tablePath + "/f1", tablePath + "/f2")
+ .where("_metadata.file_size > 0")
+ .select("id")
+ .collect()
+ assert(rows.length == 2)
+ }
+ }
+ }
+
+ test("SPARK-56335: metadata-only projection on partitioned table (parquet V2)") {
+ withV2Source("parquet") {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "metaonly_partitioned").getAbsolutePath
+ Seq((1, "a"), (2, "b"), (3, "a")).toDF("id", "p")
+ .write.partitionBy("p").parquet(tablePath)
+
+ // Select only metadata (no data columns, no partition columns).
+ val rows = spark.read.parquet(tablePath)
+ .selectExpr("_metadata.file_path")
+ .collect()
+ assert(rows.length == 3)
+ rows.foreach { r =>
+ val path = r.getString(0)
+ assert(path.contains("p=a") || path.contains("p=b"))
+ }
+ }
+ }
+ }
+
+ test("SPARK-56335: count(*) + _metadata does not regress aggregate behavior (parquet V2)") {
+ // Sanity check that combining an aggregate with a metadata reference either works
+ // correctly or does not crash the optimizer. V1 parity test.
+ withV2Source("parquet") {
+ withTempDir { dir =>
+ val tableDir = writeSingleFile(dir, "parquet")
+ val expected = spark.read.parquet(tableDir.getAbsolutePath).count()
+ // Selecting count(*) alongside a metadata column. At minimum this must plan.
+ val count = spark.read.parquet(tableDir.getAbsolutePath)
+ .selectExpr("_metadata.file_path", "id")
+ .count()
+ assert(count == expected)
+ }
+ }
+ }
+
+ test("SPARK-56335: EXPLAIN shows MetadataColumns when _metadata is requested (parquet V2)") {
+ withV2Source("parquet") {
+ withTempDir { dir =>
+ val tableDir = writeSingleFile(dir, "parquet")
+ val df = spark.read.parquet(tableDir.getAbsolutePath)
+ .selectExpr("_metadata.file_path")
+ val plan = df.queryExecution.explainString(
+ org.apache.spark.sql.execution.ExplainMode.fromString("simple"))
+ assert(plan.contains("MetadataColumns"),
+ s"expected EXPLAIN to mention MetadataColumns, got:\n$plan")
+ assert(plan.contains("file_path"),
+ s"expected EXPLAIN to mention the requested file_path field, got:\n$plan")
+
+ // Without metadata reference, EXPLAIN should not mention MetadataColumns.
+ val df2 = spark.read.parquet(tableDir.getAbsolutePath).select("id")
+ val plan2 = df2.queryExecution.explainString(
+ org.apache.spark.sql.execution.ExplainMode.fromString("simple"))
+ assert(!plan2.contains("MetadataColumns"),
+ s"unexpected MetadataColumns in plan without metadata reference:\n$plan2")
+ }
+ }
+ }
+
+ test("SPARK-56335: _metadata.file_block_start is 0 and length equals file size for small file") {
+ withV2Source("parquet") {
+ withTempDir { dir =>
+ val tableDir = writeSingleFile(dir, "parquet")
+ val file = dataFile(tableDir, "parquet")
+ val row = spark.read.parquet(tableDir.getAbsolutePath)
+ .selectExpr("_metadata.file_block_start", "_metadata.file_block_length")
+ .first()
+ // Small files are read in a single split.
+ assert(row.getLong(0) == 0L)
+ assert(row.getLong(1) == file.length())
+ }
+ }
+ }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileStreamV2ReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileStreamV2ReadSuite.scala
new file mode 100644
index 0000000000000..e202e660f65fa
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileStreamV2ReadSuite.scala
@@ -0,0 +1,278 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.io.File
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.execution.streaming.runtime.StreamingQueryWrapper
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.StreamTest
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{LongType, StringType, StructType}
+
+class FileStreamV2ReadSuite extends StreamTest with SharedSparkSession {
+
+ // Clear the V1 source list so file streaming reads use the V2 path.
+ override protected def sparkConf: SparkConf =
+ super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "")
+
+ // Writes data files directly into srcDir (not subdirectories),
+ // because streaming file sources list files at the root level.
+ private def writeParquetFilesToDir(
+ df: DataFrame,
+ srcDir: File,
+ tmpDir: File): Unit = {
+ val tmpOutput = new File(tmpDir, s"tmp_${System.nanoTime()}")
+ df.write.parquet(tmpOutput.getPath)
+ srcDir.mkdirs()
+ tmpOutput.listFiles().filter(_.getName.endsWith(".parquet"))
+ .foreach { f =>
+ val dest = new File(srcDir, f.getName)
+ f.renameTo(dest)
+ }
+ }
+
+ private def writeJsonFilesToDir(
+ df: DataFrame,
+ srcDir: File,
+ tmpDir: File): Unit = {
+ val tmpOutput = new File(tmpDir, s"tmp_${System.nanoTime()}")
+ df.write.json(tmpOutput.getPath)
+ srcDir.mkdirs()
+ tmpOutput.listFiles()
+ .filter(f => f.getName.endsWith(".json"))
+ .foreach { f =>
+ val dest = new File(srcDir, f.getName)
+ f.renameTo(dest)
+ }
+ }
+
+ test("SPARK-56232: basic streaming read from parquet path") {
+ withTempDir { srcDir =>
+ withTempDir { tmpDir =>
+ writeParquetFilesToDir(spark.range(10).toDF(), srcDir, tmpDir)
+
+ val df = spark.readStream
+ .schema(spark.range(0).schema)
+ .parquet(srcDir.getPath)
+
+ val query = df.writeStream
+ .format("memory")
+ .queryName("v2_stream_test")
+ .start()
+
+ try {
+ query.processAllAvailable()
+ checkAnswer(
+ spark.table("v2_stream_test"),
+ spark.range(10).toDF())
+ } finally {
+ query.stop()
+ }
+ }
+ }
+ }
+
+ test("SPARK-56232: discovers new files across batches") {
+ withTempDir { srcDir =>
+ withTempDir { tmpDir =>
+ writeParquetFilesToDir(
+ spark.range(10).toDF(), srcDir, tmpDir)
+
+ val df = spark.readStream
+ .schema(spark.range(0).schema)
+ .parquet(srcDir.getPath)
+
+ val query = df.writeStream
+ .format("memory")
+ .queryName("v2_discovery_test")
+ .start()
+
+ try {
+ query.processAllAvailable()
+ assert(spark.table("v2_discovery_test").count() == 10)
+
+ // Add more files
+ writeParquetFilesToDir(
+ spark.range(10, 20).toDF(), srcDir, tmpDir)
+ query.processAllAvailable()
+ assert(
+ spark.table("v2_discovery_test").count() == 20)
+ } finally {
+ query.stop()
+ }
+ }
+ }
+ }
+
+ test("SPARK-56232: maxFilesPerTrigger limits files per batch") {
+ withTempDir { srcDir =>
+ withTempDir { tmpDir =>
+ // Write 5 separate parquet files
+ (0 until 5).foreach { i =>
+ writeParquetFilesToDir(
+ spark.range(i * 10, (i + 1) * 10).coalesce(1).toDF(),
+ srcDir, tmpDir)
+ }
+
+ val df = spark.readStream
+ .schema(spark.range(0).schema)
+ .option("maxFilesPerTrigger", "2")
+ .parquet(srcDir.getPath)
+
+ val query = df.writeStream
+ .format("memory")
+ .queryName("v2_rate_test")
+ .start()
+
+ try {
+ query.processAllAvailable()
+ assert(spark.table("v2_rate_test").count() == 50)
+ } finally {
+ query.stop()
+ }
+ }
+ }
+ }
+
+ test("SPARK-56232: checkpoint recovery resumes from last offset") {
+ withTempDir { srcDir =>
+ withTempDir { tmpDir =>
+ withTempDir { checkpointDir =>
+ withTempDir { outputDir =>
+ writeParquetFilesToDir(
+ spark.range(10).toDF(), srcDir, tmpDir)
+
+ val schema = spark.range(0).schema
+
+ // First run
+ val df1 = spark.readStream
+ .schema(schema)
+ .parquet(srcDir.getPath)
+
+ val q1 = df1.writeStream
+ .format("parquet")
+ .option(
+ "checkpointLocation", checkpointDir.getPath)
+ .option("path", outputDir.getPath)
+ .start()
+ q1.processAllAvailable()
+ q1.stop()
+
+ val firstCount =
+ spark.read.parquet(outputDir.getPath).count()
+ assert(firstCount == 10,
+ s"Expected 10 rows after first run, got $firstCount")
+
+ // Add more files
+ writeParquetFilesToDir(
+ spark.range(10, 20).toDF(), srcDir, tmpDir)
+
+ // Second run - should NOT reprocess batch1
+ val df2 = spark.readStream
+ .schema(schema)
+ .parquet(srcDir.getPath)
+
+ val q2 = df2.writeStream
+ .format("parquet")
+ .option(
+ "checkpointLocation", checkpointDir.getPath)
+ .option("path", outputDir.getPath)
+ .start()
+ q2.processAllAvailable()
+ q2.stop()
+
+ // Total should be 20 (10 from first + 10 new)
+ val totalCount =
+ spark.read.parquet(outputDir.getPath).count()
+ assert(totalCount == 20,
+ s"Expected 20 total rows, got $totalCount")
+ }
+ }
+ }
+ }
+ }
+
+ test("SPARK-56232: streaming uses V2 path (MicroBatchScanExec)") {
+ withTempDir { srcDir =>
+ withTempDir { tmpDir =>
+ writeParquetFilesToDir(
+ spark.range(10).toDF(), srcDir, tmpDir)
+
+ val df = spark.readStream
+ .schema(spark.range(0).schema)
+ .parquet(srcDir.getPath)
+
+ val query = df.writeStream
+ .format("memory")
+ .queryName("v2_path_test")
+ .start()
+
+ try {
+ query.processAllAvailable()
+
+ val lastExec = query
+ .asInstanceOf[StreamingQueryWrapper]
+ .streamingQuery.lastExecution
+ assert(lastExec != null,
+ "Expected at least one batch to execute")
+ val hasV2Scan = lastExec.executedPlan.collect {
+ case _: MicroBatchScanExec => true
+ }.nonEmpty
+ assert(hasV2Scan,
+ "Expected MicroBatchScanExec (V2) in plan, " +
+ s"got: ${lastExec.executedPlan.treeString}")
+ } finally {
+ query.stop()
+ }
+ }
+ }
+ }
+
+ test("SPARK-56232: streaming read works with JSON format") {
+ withTempDir { srcDir =>
+ withTempDir { tmpDir =>
+ val data = spark.range(10)
+ .selectExpr("id", "cast(id as string) as name")
+ writeJsonFilesToDir(data, srcDir, tmpDir)
+
+ val schema = new StructType()
+ .add("id", LongType)
+ .add("name", StringType)
+
+ val df = spark.readStream
+ .schema(schema)
+ .json(srcDir.getPath)
+
+ val query = df.writeStream
+ .format("memory")
+ .queryName("v2_json_test")
+ .start()
+
+ try {
+ query.processAllAvailable()
+ assert(spark.table("v2_json_test").count() == 10)
+ } finally {
+ query.stop()
+ }
+ }
+ }
+ }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileStreamV2WriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileStreamV2WriteSuite.scala
new file mode 100644
index 0000000000000..eade1bc1d202c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileStreamV2WriteSuite.scala
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.io.File
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.StreamTest
+import org.apache.spark.sql.test.SharedSparkSession
+
+class FileStreamV2WriteSuite extends StreamTest with SharedSparkSession {
+
+ // Clear the V1 source list so file streaming writes use the V2 path.
+ override protected def sparkConf: SparkConf =
+ super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "")
+
+ private def writeParquetFilesToDir(
+ df: DataFrame,
+ srcDir: File,
+ tmpDir: File): Unit = {
+ val tmpOutput = new File(tmpDir, s"tmp_${System.nanoTime()}")
+ df.write.parquet(tmpOutput.getPath)
+ srcDir.mkdirs()
+ tmpOutput.listFiles().filter(_.getName.endsWith(".parquet"))
+ .foreach { f =>
+ val dest = new File(srcDir, f.getName)
+ f.renameTo(dest)
+ }
+ }
+
+ test("SPARK-56233: basic streaming write to parquet") {
+ withTempDir { outputDir =>
+ withTempDir { checkpointDir =>
+ import testImplicits._
+ val input = MemoryStream[Int]
+ val df = input.toDF()
+
+ val query = df.writeStream
+ .format("parquet")
+ .option("checkpointLocation", checkpointDir.getPath)
+ .option("path", outputDir.getPath)
+ .start()
+
+ try {
+ input.addData(1, 2, 3)
+ query.processAllAvailable()
+
+ checkAnswer(
+ spark.read.parquet(outputDir.getPath),
+ Seq(1, 2, 3).toDF())
+
+ // Verify _spark_metadata exists
+ val metadataDir = new File(outputDir, "_spark_metadata")
+ assert(metadataDir.exists(),
+ "Expected _spark_metadata directory for streaming write")
+ } finally {
+ query.stop()
+ }
+ }
+ }
+ }
+
+ test("SPARK-56233: multiple batches accumulate correctly") {
+ withTempDir { outputDir =>
+ withTempDir { checkpointDir =>
+ import testImplicits._
+ val input = MemoryStream[Int]
+ val df = input.toDF()
+
+ val query = df.writeStream
+ .format("parquet")
+ .option("checkpointLocation", checkpointDir.getPath)
+ .option("path", outputDir.getPath)
+ .start()
+
+ try {
+ input.addData(1, 2, 3)
+ query.processAllAvailable()
+
+ input.addData(4, 5, 6)
+ query.processAllAvailable()
+
+ input.addData(7, 8, 9)
+ query.processAllAvailable()
+
+ checkAnswer(
+ spark.read.parquet(outputDir.getPath),
+ (1 to 9).map(i => Tuple1(i)).toDF())
+ } finally {
+ query.stop()
+ }
+ }
+ }
+ }
+
+ test("SPARK-56233: checkpoint recovery after restart") {
+ withTempDir { srcDir =>
+ withTempDir { tmpDir =>
+ withTempDir { outputDir =>
+ withTempDir { checkpointDir =>
+ val schema = spark.range(0).schema
+
+ // Write initial source files
+ writeParquetFilesToDir(
+ spark.range(10).toDF(), srcDir, tmpDir)
+
+ // First run
+ val df1 = spark.readStream
+ .schema(schema)
+ .parquet(srcDir.getPath)
+
+ val q1 = df1.writeStream
+ .format("parquet")
+ .option("checkpointLocation", checkpointDir.getPath)
+ .option("path", outputDir.getPath)
+ .start()
+ q1.processAllAvailable()
+ q1.stop()
+
+ val firstCount =
+ spark.read.parquet(outputDir.getPath).count()
+ assert(firstCount == 10,
+ s"Expected 10 rows after first run, got $firstCount")
+
+ // Add more source files
+ writeParquetFilesToDir(
+ spark.range(10, 20).toDF(), srcDir, tmpDir)
+
+ // Second run with same checkpoint - should NOT reprocess batch 1
+ val df2 = spark.readStream
+ .schema(schema)
+ .parquet(srcDir.getPath)
+
+ val q2 = df2.writeStream
+ .format("parquet")
+ .option("checkpointLocation", checkpointDir.getPath)
+ .option("path", outputDir.getPath)
+ .start()
+ q2.processAllAvailable()
+ q2.stop()
+
+ // Total should be 20 (10 from first + 10 new)
+ val totalCount =
+ spark.read.parquet(outputDir.getPath).count()
+ assert(totalCount == 20,
+ s"Expected 20 total rows, got $totalCount")
+ }
+ }
+ }
+ }
+ }
+
+ test("SPARK-56233: streaming write works with JSON format") {
+ withTempDir { outputDir =>
+ withTempDir { checkpointDir =>
+ import testImplicits._
+ val input = MemoryStream[Int]
+ val df = input.toDF()
+
+ val query = df.writeStream
+ .format("json")
+ .option("checkpointLocation", checkpointDir.getPath)
+ .option("path", outputDir.getPath)
+ .start()
+
+ try {
+ input.addData(10, 20, 30)
+ query.processAllAvailable()
+
+ checkAnswer(
+ spark.read.json(outputDir.getPath),
+ Seq(10, 20, 30).toDF())
+ } finally {
+ query.stop()
+ }
+ }
+ }
+ }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2BucketedReadSuite.scala
new file mode 100644
index 0000000000000..cbb756d6abcb5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2BucketedReadSuite.scala
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.datasources.FilePartition
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * End-to-end tests for bucket pruning and bucket join via the V2 file read path
+ * (BatchScanExec / FileScan). All tests disable AQE and clear USE_V1_SOURCE_LIST
+ * so that tables are resolved through the V2 catalog path.
+ */
+class V2BucketedReadSuite extends QueryTest
+ with SharedSparkSession
+ with AdaptiveSparkPlanHelper {
+
+ private def collectBatchScans(plan: SparkPlan): Seq[BatchScanExec] = {
+ collectWithSubqueries(plan) { case b: BatchScanExec => b }
+ }
+
+ // Must be called inside a withSQLConf block that clears USE_V1_SOURCE_LIST
+ // so the catalog resolves the table as a V2 FileTable.
+ private def withBucketedTable(
+ tableName: String,
+ numBuckets: Int,
+ bucketCol: String,
+ sortCol: Option[String] = None)(f: => Unit): Unit = {
+ withTable(tableName) {
+ val writer = spark.range(100)
+ .selectExpr("id", "id % 10 as key", "cast(id as string) as value")
+ .write
+ .bucketBy(numBuckets, bucketCol)
+ val sorted = sortCol.map(c => writer.sortBy(c)).getOrElse(writer)
+ sorted.saveAsTable(tableName)
+ f
+ }
+ }
+
+ test("SPARK-56231: bucket pruning filters files by bucket ID") {
+ withSQLConf(
+ SQLConf.USE_V1_SOURCE_LIST.key -> "",
+ SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+ SQLConf.BUCKETING_ENABLED.key -> "true",
+ SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "false") {
+
+ withBucketedTable("t1", numBuckets = 8, bucketCol = "key") {
+ val df = spark.table("t1").filter("key = 3")
+ val plan = df.queryExecution.executedPlan
+ val batchScans = collectBatchScans(plan)
+ assert(batchScans.nonEmpty, "Expected at least one BatchScanExec in plan")
+
+ val fileScan = batchScans.head.scan.asInstanceOf[FileScan]
+ assert(fileScan.bucketedScan, "Expected bucketedScan = true")
+
+ val partitions = fileScan.planInputPartitions()
+ val nonEmpty = partitions.count {
+ case fp: FilePartition => fp.files.nonEmpty
+ case _ => true
+ }
+ // Only 1 bucket out of 8 should have files for key = 3
+ assert(nonEmpty <= 1,
+ s"Expected at most 1 non-empty partition for key = 3, but got $nonEmpty")
+
+ checkAnswer(df, spark.range(100)
+ .selectExpr("id", "id % 10 as key", "cast(id as string) as value")
+ .filter("key = 3"))
+ }
+ }
+ }
+
+ test("SPARK-56231: bucket pruning with IN filter") {
+ withSQLConf(
+ SQLConf.USE_V1_SOURCE_LIST.key -> "",
+ SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+ SQLConf.BUCKETING_ENABLED.key -> "true",
+ SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "false") {
+
+ withBucketedTable("t1", numBuckets = 8, bucketCol = "key") {
+ val df = spark.table("t1").filter("key IN (1, 3)")
+ val plan = df.queryExecution.executedPlan
+ val batchScans = collectBatchScans(plan)
+ assert(batchScans.nonEmpty, "Expected at least one BatchScanExec in plan")
+
+ val fileScan = batchScans.head.scan.asInstanceOf[FileScan]
+ assert(fileScan.bucketedScan, "Expected bucketedScan = true")
+
+ val partitions = fileScan.planInputPartitions()
+ val nonEmpty = partitions.count {
+ case fp: FilePartition => fp.files.nonEmpty
+ case _ => true
+ }
+ // At most 2 buckets should be non-empty (one for key=1, one for key=3)
+ assert(nonEmpty <= 2,
+ s"Expected at most 2 non-empty partitions for key IN (1, 3), but got $nonEmpty")
+
+ checkAnswer(df, spark.range(100)
+ .selectExpr("id", "id % 10 as key", "cast(id as string) as value")
+ .filter("key IN (1, 3)"))
+ }
+ }
+ }
+
+ test("SPARK-56231: bucketed join avoids shuffle") {
+ withSQLConf(
+ SQLConf.USE_V1_SOURCE_LIST.key -> "",
+ SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+ SQLConf.BUCKETING_ENABLED.key -> "true",
+ SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "false",
+ SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+
+ withBucketedTable("t1", numBuckets = 8, bucketCol = "key") {
+ withBucketedTable("t2", numBuckets = 8, bucketCol = "key") {
+ val df = spark.table("t1").join(spark.table("t2"), "key")
+ val plan = df.queryExecution.executedPlan
+
+ val shuffles = collectWithSubqueries(plan) {
+ case s: ShuffleExchangeExec => s
+ }
+ assert(shuffles.isEmpty,
+ s"Expected no shuffles but found ${shuffles.size}: ${shuffles.mkString(", ")}")
+
+ val batchScans = collectBatchScans(plan)
+ assert(batchScans.size >= 2,
+ s"Expected at least 2 BatchScanExec nodes but found ${batchScans.size}")
+ batchScans.foreach { scan =>
+ assert(scan.outputPartitioning.isInstanceOf[HashPartitioning],
+ s"Expected HashPartitioning but got " +
+ s"${scan.outputPartitioning.getClass.getSimpleName}")
+ }
+
+ val t1Data = spark.range(100)
+ .selectExpr("id", "id % 10 as key", "cast(id as string) as value")
+ val t2Data = spark.range(100)
+ .selectExpr("id", "id % 10 as key", "cast(id as string) as value")
+ assert(df.count() == t1Data.join(t2Data, "key").count())
+ }
+ }
+ }
+ }
+
+ test("SPARK-56231: disable unnecessary bucketed scan for simple select") {
+ withSQLConf(
+ SQLConf.USE_V1_SOURCE_LIST.key -> "",
+ SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+ SQLConf.BUCKETING_ENABLED.key -> "true",
+ SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "true") {
+
+ withBucketedTable("t1", numBuckets = 8, bucketCol = "key") {
+ val df = spark.table("t1")
+ val plan = df.queryExecution.executedPlan
+ val batchScans = collectBatchScans(plan)
+ assert(batchScans.nonEmpty, "Expected at least one BatchScanExec in plan")
+
+ val fileScan = batchScans.head.scan.asInstanceOf[FileScan]
+ assert(!fileScan.bucketedScan,
+ "Expected bucketedScan = false for simple SELECT " +
+ "(DisableUnnecessaryBucketedScan should disable it)")
+
+ checkAnswer(df, spark.range(100)
+ .selectExpr("id", "id % 10 as key", "cast(id as string) as value"))
+ }
+ }
+ }
+
+ test("SPARK-56231: coalesce buckets in join") {
+ withSQLConf(
+ SQLConf.USE_V1_SOURCE_LIST.key -> "",
+ SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+ SQLConf.BUCKETING_ENABLED.key -> "true",
+ SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "false",
+ SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+ SQLConf.PREFER_SORTMERGEJOIN.key -> "true",
+ SQLConf.COALESCE_BUCKETS_IN_JOIN_ENABLED.key -> "true") {
+
+ // Create 8-bucket and 4-bucket tables on the same join key.
+ // CoalesceBucketsInJoin should coalesce the 8-bucket side down to 4
+ // so that both sides use 4 partitions and no shuffle is needed.
+ withBucketedTable("t1", numBuckets = 8, bucketCol = "key") {
+ withBucketedTable("t2", numBuckets = 4, bucketCol = "key") {
+ val t1 = spark.table("t1")
+ val t2 = spark.table("t2")
+ val df = t1.join(t2, t1("key") === t2("key"))
+ val plan = df.queryExecution.executedPlan
+
+ val batchScans = collectBatchScans(plan)
+ assert(batchScans.size >= 2,
+ s"Expected at least 2 BatchScanExec nodes but found ${batchScans.size}")
+ batchScans.foreach { b =>
+ val fs = b.scan.asInstanceOf[FileScan]
+ assert(fs.bucketSpec.isDefined,
+ s"Expected bucketSpec to be defined. bucketedScan=${fs.bucketedScan}")
+ }
+
+ val coalescedScans = batchScans.filter { b =>
+ b.scan.isInstanceOf[FileScan] &&
+ b.scan.asInstanceOf[FileScan].optionalNumCoalescedBuckets.isDefined
+ }
+ assert(coalescedScans.nonEmpty,
+ "Expected CoalesceBucketsInJoin to coalesce the 8-bucket scan. " +
+ s"Plan:\n${df.queryExecution.sparkPlan}")
+ assert(coalescedScans.head.scan
+ .asInstanceOf[FileScan].optionalNumCoalescedBuckets.get == 4)
+
+ val shuffles = collectWithSubqueries(plan) {
+ case s: ShuffleExchangeExec => s
+ }
+ assert(shuffles.isEmpty,
+ s"Expected no shuffles with bucket coalescing but found ${shuffles.size}")
+ assert(df.count() > 0)
+ }
+ }
+ }
+ }
+
+ test("SPARK-56231: bucketing disabled by config") {
+ withSQLConf(
+ SQLConf.USE_V1_SOURCE_LIST.key -> "",
+ SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+ SQLConf.BUCKETING_ENABLED.key -> "false") {
+
+ withBucketedTable("t1", numBuckets = 8, bucketCol = "key") {
+ val df = spark.table("t1")
+ val plan = df.queryExecution.executedPlan
+ val batchScans = collectBatchScans(plan)
+ assert(batchScans.nonEmpty, "Expected at least one BatchScanExec in plan")
+
+ val fileScan = batchScans.head.scan.asInstanceOf[FileScan]
+ assert(!fileScan.bucketedScan,
+ "Expected bucketedScan = false when bucketing is disabled by config")
+
+ checkAnswer(df, spark.range(100)
+ .selectExpr("id", "id % 10 as key", "cast(id as string) as value"))
+ }
+ }
+ }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetMetadataRowIndexV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetMetadataRowIndexV2Suite.scala
new file mode 100644
index 0000000000000..6cdbf3da2ff0b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetMetadataRowIndexV2Suite.scala
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2.parquet
+
+import java.io.File
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * End-to-end tests for `_metadata.row_index` on V2 Parquet (SPARK-56371). Verifies that the
+ * generated row_index sub-field is exposed via `metadataColumns()`, populated correctly per
+ * row by the Parquet reader, and round-trips through the V2 metadata wrapper for both
+ * vectorized and row-based reads.
+ */
+class ParquetMetadataRowIndexV2Suite extends QueryTest with SharedSparkSession {
+
+ import testImplicits._
+
+ private def withV2Parquet(body: => Unit): Unit = {
+ val v1List = SQLConf.get.getConf(SQLConf.USE_V1_SOURCE_LIST)
+ val newV1List = v1List.split(",").filter(_.nonEmpty)
+ .filterNot(_.equalsIgnoreCase("parquet")).mkString(",")
+ withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> newV1List) {
+ body
+ }
+ }
+
+ private def withVectorized(enabled: Boolean)(body: => Unit): Unit = {
+ withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> enabled.toString) {
+ body
+ }
+ }
+
+ test("SPARK-56371: _metadata.row_index per-row values (vectorized)") {
+ withV2Parquet {
+ withVectorized(enabled = true) {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "rowidx").getAbsolutePath
+ (1 to 5).toDF("id").coalesce(1).write.parquet(tablePath)
+
+ val rows = spark.read.parquet(tablePath)
+ .selectExpr("id", "_metadata.row_index")
+ .orderBy("id")
+ .collect()
+ assert(rows.length == 5)
+ rows.zipWithIndex.foreach { case (row, expectedIdx) =>
+ assert(row.getInt(0) == expectedIdx + 1)
+ assert(row.getLong(1) == expectedIdx)
+ }
+ }
+ }
+ }
+ }
+
+ test("SPARK-56371: _metadata.row_index per-row values (row-based)") {
+ withV2Parquet {
+ withVectorized(enabled = false) {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "rowidx_rb").getAbsolutePath
+ (1 to 5).toDF("id").coalesce(1).write.parquet(tablePath)
+
+ val rows = spark.read.parquet(tablePath)
+ .selectExpr("id", "_metadata.row_index")
+ .orderBy("id")
+ .collect()
+ assert(rows.length == 5)
+ rows.zipWithIndex.foreach { case (row, expectedIdx) =>
+ assert(row.getInt(0) == expectedIdx + 1)
+ assert(row.getLong(1) == expectedIdx)
+ }
+ }
+ }
+ }
+ }
+
+ test("SPARK-56371: row_index resets per file across multiple files") {
+ withV2Parquet {
+ withVectorized(enabled = true) {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "multi").getAbsolutePath
+ (1 to 3).toDF("id").coalesce(1).write.parquet(tablePath + "/f1")
+ (10 to 12).toDF("id").coalesce(1).write.parquet(tablePath + "/f2")
+
+ val df = spark.read.parquet(tablePath + "/f1", tablePath + "/f2")
+ .selectExpr("id", "_metadata.file_name", "_metadata.row_index")
+ val rows = df.collect()
+ assert(rows.length == 6)
+ val byFile = rows.groupBy(_.getString(1))
+ assert(byFile.size == 2)
+ byFile.values.foreach { fileRows =>
+ val sortedIndices = fileRows.map(_.getLong(2)).sorted
+ assert(sortedIndices.toSeq == Seq(0L, 1L, 2L),
+ s"row_index per file should be 0,1,2; got $sortedIndices")
+ }
+ }
+ }
+ }
+ }
+
+ test("SPARK-56371: combined constant + generated metadata fields (row-based)") {
+ // Same shape as the vectorized variant - exercises the row-based projection path
+ // (UnsafeProjection over BoundReferences + CreateNamedStruct).
+ withV2Parquet {
+ withVectorized(enabled = false) {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "combined_rb").getAbsolutePath
+ (1 to 3).toDF("id").coalesce(1).write.parquet(tablePath)
+
+ val rows = spark.read.parquet(tablePath)
+ .selectExpr(
+ "id",
+ "_metadata.file_name",
+ "_metadata.file_modification_time",
+ "_metadata.row_index")
+ .orderBy("id")
+ .collect()
+ assert(rows.length == 3)
+ rows.foreach { r =>
+ // file_modification_time must be returned as a Timestamp, not a Long.
+ // This guards against a `Literal.apply(longValue)` regression that would
+ // produce a `LongType` literal in the metadata struct instead of `TimestampType`.
+ assert(r.getAs[java.sql.Timestamp](2) != null)
+ }
+ assert(rows.map(_.getLong(3)).toSeq == Seq(0L, 1L, 2L))
+ }
+ }
+ }
+ }
+
+ test("SPARK-56371: combined constant + generated metadata fields (vectorized)") {
+ withV2Parquet {
+ withVectorized(enabled = true) {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "combined").getAbsolutePath
+ (1 to 3).toDF("id").coalesce(1).write.parquet(tablePath)
+
+ val rows = spark.read.parquet(tablePath)
+ .selectExpr(
+ "id",
+ "_metadata.file_path",
+ "_metadata.file_name",
+ "_metadata.file_size",
+ "_metadata.row_index")
+ .orderBy("id")
+ .collect()
+ assert(rows.length == 3)
+ val firstPath = rows.head.getString(1)
+ rows.foreach { r =>
+ // file_path should be identical for all rows (single file).
+ assert(r.getString(1) == firstPath)
+ // file_size should be positive.
+ assert(r.getLong(3) > 0)
+ }
+ val rowIndices = rows.map(_.getLong(4)).toSeq
+ assert(rowIndices == Seq(0L, 1L, 2L))
+ }
+ }
+ }
+ }
+
+ test("SPARK-56371: filter on _metadata.row_index") {
+ withV2Parquet {
+ withVectorized(enabled = true) {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "filtered").getAbsolutePath
+ (1 to 10).toDF("id").coalesce(1).write.parquet(tablePath)
+
+ val rows = spark.read.parquet(tablePath)
+ .where("_metadata.row_index < 3")
+ .selectExpr("id", "_metadata.row_index")
+ .orderBy("id")
+ .collect()
+ assert(rows.length == 3)
+ assert(rows.map(_.getLong(1)).toSeq == Seq(0L, 1L, 2L))
+ }
+ }
+ }
+ }
+
+ test("SPARK-56371: row_index only (no data columns) projection") {
+ withV2Parquet {
+ withVectorized(enabled = true) {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "metaonly").getAbsolutePath
+ (1 to 4).toDF("id").coalesce(1).write.parquet(tablePath)
+
+ val rows = spark.read.parquet(tablePath)
+ .selectExpr("_metadata.row_index")
+ .collect()
+ .map(_.getLong(0))
+ .sorted
+ assert(rows.toSeq == Seq(0L, 1L, 2L, 3L))
+ }
+ }
+ }
+ }
+
+ test("SPARK-56371: row_index with partitioned table (vectorized)") {
+ withV2Parquet {
+ withVectorized(enabled = true) {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "partitioned").getAbsolutePath
+ Seq((1, "a"), (2, "a"), (3, "b"), (4, "b")).toDF("id", "p")
+ .write.partitionBy("p").parquet(tablePath)
+
+ val df = spark.read.parquet(tablePath)
+ .selectExpr("id", "p", "_metadata.row_index")
+ val rows = df.collect()
+ assert(rows.length == 4)
+ // Each partition has its own file; row_index should reset per file.
+ val byPartition = rows.groupBy(_.getString(1))
+ byPartition.values.foreach { partRows =>
+ val indices = partRows.map(_.getLong(2)).sorted
+ assert(indices.head == 0L,
+ s"row_index should start at 0 per partition file, got $indices")
+ }
+ }
+ }
+ }
+ }
+
+ test("SPARK-56371: EXPLAIN shows row_index in MetadataColumns") {
+ withV2Parquet {
+ withTempDir { dir =>
+ val tablePath = new File(dir, "explain").getAbsolutePath
+ (1 to 3).toDF("id").coalesce(1).write.parquet(tablePath)
+ val df = spark.read.parquet(tablePath).selectExpr("_metadata.row_index")
+ val plan = df.queryExecution.explainString(
+ org.apache.spark.sql.execution.ExplainMode.fromString("simple"))
+ assert(plan.contains("MetadataColumns"))
+ assert(plan.contains("row_index"))
+ // The internal column name `_tmp_metadata_row_index` is wrapper-internal and
+ // must NOT leak into the user-facing plan output.
+ assert(!plan.contains("_tmp_metadata_row_index"),
+ s"plan should not expose the internal row_index column name:\n$plan")
+ }
+ }
+ }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 83e6772d69dc6..b906563c68dcc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -32,7 +32,7 @@ import org.apache.hadoop.util.Progressable
import org.scalatest.PrivateMethodTester
import org.scalatest.time.SpanSugar._
-import org.apache.spark.SparkUnsupportedOperationException
+import org.apache.spark.{SparkConf, SparkUnsupportedOperationException}
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.paths.SparkPath.{fromUrlString => sp}
import org.apache.spark.sql._
@@ -235,6 +235,11 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
import testImplicits._
+ // Force V1 file source path for these tests, which specifically test FileStreamSource (V1).
+ // V2 streaming reads are tested in FileStreamV2ReadSuite.
+ override protected def sparkConf: SparkConf =
+ super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "csv,json,orc,text,parquet,avro")
+
override val streamingTimeout = 80.seconds
private def createFileStreamSourceAndGetSchema(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 9f5566407e386..1ebe63bdbcfed 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -125,7 +125,6 @@ class HiveSessionStateBuilder(
new ResolveDataSource(session) +:
new FindDataSourceTable(session) +:
new ResolveSQLOnFile(session) +:
- new FallBackFileSourceV2(session) +:
ResolveEncodersInScalaAgg +:
new ResolveSessionCatalog(catalogManager) +:
ResolveWriteToStream +: