Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
<apache.httpclient.version>4.5.14</apache.httpclient.version>
<apache.httpcomponents.version>4.4.16</apache.httpcomponents.version>
<auto.publish.central>true</auto.publish.central>
<avro.version>1.11.4</avro.version>
<avro.version>1.12.0</avro.version>
<bouncycastle.version>1.79</bouncycastle.version>
<codehaus.version>1.9.13</codehaus.version>
<commonscodec.version>1.18.0</commonscodec.version>
Expand All @@ -51,13 +51,13 @@
<commonsio.version>2.17.0</commonsio.version>
<commonslang3.version>3.18.0</commonslang3.version>
<commonstext.version>1.13.0</commonstext.version>
<fasterxml.version>2.18.1</fasterxml.version>
<fasterxml.version>2.19.2</fasterxml.version>
<google.httpclient.version>1.45.0</google.httpclient.version>
<grpc.version>1.67.1</grpc.version>
<gson.version>2.11.0</gson.version>
<guava.version>33.3.1-jre</guava.version>
<hadoop.version>3.4.2</hadoop.version>
<iceberg.version>1.6.1</iceberg.version>
<iceberg.version>1.10.0</iceberg.version>
<jacoco.skip.instrument>true</jacoco.skip.instrument>
<jacoco.version>0.8.13</jacoco.version>
<license.processing.dependencyJarsDir>${project.build.directory}/dependency-jars</license.processing.dependencyJarsDir>
Expand All @@ -70,13 +70,13 @@
<netty.version>4.1.124.Final</netty.version>
<nimbusds.version>10.0.2</nimbusds.version>
<objenesis.version>3.1</objenesis.version>
<parquet.version>1.14.4</parquet.version>
<parquet.version>1.16.0</parquet.version>
<powermock.version>2.0.9</powermock.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<protobuf.version>4.27.5</protobuf.version>
<shadeBase>net.snowflake.ingest.internal</shadeBase>
<slf4j.version>1.7.36</slf4j.version>
<snappy.version>1.1.10.5</snappy.version>
<slf4j.version>2.0.17</slf4j.version>
<snappy.version>1.1.10.7</snappy.version>
<snowjdbc.version>3.25.1</snowjdbc.version>
<threetenbp.version>1.7.0</threetenbp.version>
<version.plugin.buildhelper>3.6.1</version.plugin.buildhelper>
Expand Down Expand Up @@ -421,6 +421,12 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
<version>${parquet.version}</version>
<exclusions>
<exclusion>
<groupId>org.locationtech.jts</groupId>
<artifactId>jts-core</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
Expand Down Expand Up @@ -1306,6 +1312,7 @@
<includedLicense>BSD 2-Clause License</includedLicense>
<includedLicense>3-Clause BSD License</includedLicense>
<includedLicense>The MIT License</includedLicense>
<includedLicense>MIT</includedLicense>
<includedLicense>EDL 1.0</includedLicense>
<includedLicense>The Go License</includedLicense>
<includedLicense>Bouncy Castle Licence</includedLicense>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@
import com.google.common.collect.Lists;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import javax.annotation.Nonnull;
import org.apache.iceberg.parquet.TypeToMessageType;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.JsonUtil;
import org.apache.parquet.schema.LogicalTypeAnnotation;

/**
* This class is used to Iceberg data type (include primitive types and nested types) serialization
* and deserialization.
* This class is used to Iceberg data type (include primitive types, variant, and nested types)
* serialization and deserialization.
*
* <p>This code is modified from
* GlobalServices/modules/data-lake/datalake-api/src/main/java/com/snowflake/metadata/iceberg
Expand Down Expand Up @@ -72,6 +74,8 @@ public static org.apache.parquet.schema.Type parseIcebergDataTypeStringToParquet
if (icebergType.isPrimitiveType()) {
parquetType =
typeToMessageType.primitive(icebergType.asPrimitiveType(), repetition, id, name);
} else if (icebergType.isVariantType()) {
parquetType = typeToMessageType.variant(repetition, id, name);
} else {
switch (icebergType.typeId()) {
case LIST:
Expand Down Expand Up @@ -118,6 +122,10 @@ public static Type deserializeIcebergType(String icebergDataType) {
*/
public static Type getTypeFromJson(@Nonnull JsonNode jsonNode) {
if (jsonNode.isTextual()) {
if (jsonNode.asText().toLowerCase(Locale.ROOT).equals(Types.VariantType.get().toString())) {
return Types.VariantType.get();
}

return Types.fromPrimitiveString(jsonNode.asText());
} else if (jsonNode.isObject()) {
if (!jsonNode.has(TYPE)) {
Expand Down Expand Up @@ -252,7 +260,22 @@ private static org.apache.parquet.schema.Type replaceWithOriginalFieldName(
.id(parquetType.getId().intValue())
.length(parquetType.asPrimitiveType().getTypeLength())
.named(fieldName);
} else if (parquetType.getLogicalTypeAnnotation()
instanceof LogicalTypeAnnotation.VariantLogicalTypeAnnotation) {
/* rename field name */
org.apache.parquet.schema.Types.GroupBuilder<org.apache.parquet.schema.GroupType> builder =
org.apache.parquet.schema.Types.buildGroup(parquetType.getRepetition());

for (org.apache.parquet.schema.Type type : parquetType.asGroupType().getFields()) {
builder.addField(type);
}

return builder
.as(parquetType.getLogicalTypeAnnotation())
.id(parquetType.getId().intValue())
.named(fieldName);
}

org.apache.parquet.schema.Types.GroupBuilder<org.apache.parquet.schema.GroupType> builder =
org.apache.parquet.schema.Types.buildGroup(parquetType.getRepetition());
for (org.apache.parquet.schema.Type parquetFieldType : parquetType.asGroupType().getFields()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ public void setup() {
generateFieldId(),
Types.IntegerType.get(),
Types.StringType.get())));
dataTypesToTest.add(new DataTypeInfo("\"variant\"", Types.VariantType.get()));
}

/** Helper function to generate a unique fieldId for nested types */
Expand Down
Loading