From 77e7b898f3baae73f7d022a5d906a2788cca6b29 Mon Sep 17 00:00:00 2001 From: wilmerdooley Date: Fri, 19 Jun 2026 01:18:46 +0000 Subject: [PATCH] SPARK-56573: Widen the default tablesample seed to reduce collisions Signed-off-by: wilmerdooley --- .../apache/spark/sql/execution/basicPhysicalOperators.scala | 4 ++-- .../execution/datasources/v2/V2ScanRelationPushDown.scala | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index 88c74ab7adc41..136d53d7fa6b3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} import org.apache.spark.sql.types.{LongType, StructType} import org.apache.spark.sql.vectorized.ColumnarBatch -import org.apache.spark.util.ThreadUtils +import org.apache.spark.util.{ThreadUtils, Utils} import org.apache.spark.util.random.{BernoulliCellSampler, PoissonSampler} /** Physical plan for Project. */ @@ -482,7 +482,7 @@ case class SampleExec( seed: Option[Long], child: SparkPlan) extends UnaryExecNode with CodegenSupport { - val resolvedSeed: Long = seed.getOrElse((math.random() * 1000).toLong) + val resolvedSeed: Long = seed.getOrElse(Utils.random.nextLong()) override def output: Seq[Attribute] = child.output diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala index 2b291bf3a4db1..1c574f5fde049 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala @@ -897,10 +897,7 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper { sample.lowerBound, sample.upperBound, sample.withReplacement, - // TODO(SPARK-56573): The * 1000 limits the seed to only 1000 distinct values. - // Kept here for consistency with SampleExec.resolvedSeed; will be fixed - // across all call sites in SPARK-56573. - sample.seed.getOrElse((math.random() * 1000).toLong), + sample.seed.getOrElse(Utils.random.nextLong()), sampleMethod = sample.sampleMethod) val pushed = PushDownUtils.pushTableSample(sHolder.builder, tableSample) if (pushed) {