From 580f287e4168d936f82a8e2c73687e90d6903e95 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Thu, 18 Jun 2026 13:09:45 -0700 Subject: [PATCH] [SPARK-57540][SQL] Instantiate Woodstox `WstxOutputFactory` directly in `StaxXmlGenerator` Construct the shaded `WstxOutputFactory` directly instead of resolving a StAX implementation via `XMLOutputFactory.newInstance()`, so the factory always matches the shaded `WstxOutputProperties` keys it is configured with. Co-authored-by: Isaac --- .../spark/sql/catalyst/xml/StaxXmlGenerator.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala index 2a08a2e4d4b58..6e381a2974c79 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala @@ -19,11 +19,11 @@ package org.apache.spark.sql.catalyst.xml import java.io.Writer import java.sql.Timestamp import java.util.Base64 -import javax.xml.stream.XMLOutputFactory import scala.collection.Map import org.apache.hadoop.shaded.com.ctc.wstx.api.WstxOutputProperties +import org.apache.hadoop.shaded.com.ctc.wstx.stax.WstxOutputFactory import org.apache.spark.SparkIllegalArgumentException import org.apache.spark.sql.catalyst.InternalRow @@ -72,7 +72,13 @@ class StaxXmlGenerator( private val binaryFormatter = ToStringBase.getBinaryFormatter private val gen = { - val factory = XMLOutputFactory.newInstance() + // Instantiate the Woodstox factory directly from the shaded Hadoop classes instead of + // using XMLOutputFactory.newInstance(). The latter resolves an implementation via the + // service-loader mechanism, which could pick up a different (unshaded) StAX provider on the + // classpath. Such a provider would not understand the shaded WstxOutputProperties keys set + // below and would throw IllegalArgumentException. Constructing the shaded factory directly + // guarantees the properties and the implementation always match. + val factory = new WstxOutputFactory() // to_xml disables structure validation to allow multiple root tags factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE, validateStructure) factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_NAMES, options.validateName)