diff --git a/benchmarks/java/src/main/java/org/apache/fory/benchmark/SchemaEvolutionSuite.java b/benchmarks/java/src/main/java/org/apache/fory/benchmark/SchemaEvolutionSuite.java new file mode 100644 index 0000000000..6b24d77344 --- /dev/null +++ b/benchmarks/java/src/main/java/org/apache/fory/benchmark/SchemaEvolutionSuite.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.benchmark; + +import java.util.Arrays; +import org.apache.fory.format.annotation.ForyVersion; +import org.apache.fory.format.encoder.Encoders; +import org.apache.fory.format.encoder.RowEncoder; +import org.apache.fory.logging.Logger; +import org.apache.fory.logging.LoggerFactory; +import org.openjdk.jmh.Main; +import org.openjdk.jmh.annotations.Benchmark; + +/** + * Row-codec schema-evolution throughput and allocation. Pair with the JMH gc profiler ({@code -prof + * gc}) to read {@code gc.alloc.rate.norm} (bytes per op). Two comparisons matter: {@code + * currentDecode} vs {@code olderDecode} shows that decoding an older payload through a projection + * codec allocates no more than decoding the current schema, because each projection holds its + * historical schema's row layout (no per-decode rebuild); and the {@code *NoEvolution} benchmarks + * vs their evolution-on counterparts show the steady-state cost of enabling {@code + * withSchemaEvolution()} when reading and writing current-version data. + */ +public class SchemaEvolutionSuite { + private static final Logger LOG = LoggerFactory.getLogger(SchemaEvolutionSuite.class); + + public static class PersonV1 { + String name; + int age; + } + + public static class PersonV2 { + String name; + int age; + + @ForyVersion(since = 2) + String email; + } + + // Evolution-enabled codecs for the current (V2) schema; the V1 codec only produces a payload + // whose hash routes the V2 reader onto its projection path. Both standard and compact formats + // are measured: compact is where a per-projection cached row layout matters, so olderDecode vs + // currentDecode there is the parity check. + private static final RowEncoder v1Codec = + Encoders.buildBeanCodec(PersonV1.class).withSchemaEvolution().build().get(); + private static final RowEncoder v2Codec = + Encoders.buildBeanCodec(PersonV2.class).withSchemaEvolution().build().get(); + private static final RowEncoder v1CompactCodec = + Encoders.buildBeanCodec(PersonV1.class).compactEncoding().withSchemaEvolution().build().get(); + private static final RowEncoder v2CompactCodec = + Encoders.buildBeanCodec(PersonV2.class).compactEncoding().withSchemaEvolution().build().get(); + + // Evolution-disabled codecs for the same current (V2) schema. Comparing the *NoEvolution + // benchmarks against their evolution-on counterparts isolates the steady-state cost of the + // withSchemaEvolution() flag on the common path (reading and writing current-version data): the + // 8-byte hash slot the evolution wire format adds, plus the hash compare on decode. + private static final RowEncoder v2PlainCodec = + Encoders.buildBeanCodec(PersonV2.class).build().get(); + private static final RowEncoder v2PlainCompactCodec = + Encoders.buildBeanCodec(PersonV2.class).compactEncoding().build().get(); + + private static final PersonV2 person = newPerson(); + private static final byte[] currentBytes = v2Codec.encode(person); + private static final byte[] olderBytes = v1Codec.encode(newPersonV1()); + private static final byte[] currentCompactBytes = v2CompactCodec.encode(person); + private static final byte[] olderCompactBytes = v1CompactCodec.encode(newPersonV1()); + private static final byte[] plainBytes = v2PlainCodec.encode(person); + private static final byte[] plainCompactBytes = v2PlainCompactCodec.encode(person); + + private static PersonV2 newPerson() { + PersonV2 p = new PersonV2(); + p.name = "Ada Lovelace"; + p.age = 36; + p.email = "ada@example.com"; + return p; + } + + private static PersonV1 newPersonV1() { + PersonV1 p = new PersonV1(); + p.name = "Ada Lovelace"; + p.age = 36; + return p; + } + + @Benchmark + public Object encode() { + return v2Codec.encode(person); + } + + @Benchmark + public Object currentDecode() { + return v2Codec.decode(currentBytes); + } + + @Benchmark + public Object olderDecode() { + return v2Codec.decode(olderBytes); + } + + @Benchmark + public Object compactEncode() { + return v2CompactCodec.encode(person); + } + + @Benchmark + public Object compactCurrentDecode() { + return v2CompactCodec.decode(currentCompactBytes); + } + + @Benchmark + public Object compactOlderDecode() { + return v2CompactCodec.decode(olderCompactBytes); + } + + // Evolution-off baselines for the current path. Pair each with its evolution-on counterpart + // (encode/currentDecode and the compact variants) to read the flag's overhead. + @Benchmark + public Object encodeNoEvolution() { + return v2PlainCodec.encode(person); + } + + @Benchmark + public Object currentDecodeNoEvolution() { + return v2PlainCodec.decode(plainBytes); + } + + @Benchmark + public Object compactEncodeNoEvolution() { + return v2PlainCompactCodec.encode(person); + } + + @Benchmark + public Object compactCurrentDecodeNoEvolution() { + return v2PlainCompactCodec.decode(plainCompactBytes); + } + + public static void main(String[] args) throws Exception { + if (args.length == 0) { + String commandLine = + "org.apache.fory.*SchemaEvolutionSuite.* -f 3 -wi 3 -i 3 -t 1 -w 2s -r 2s -prof gc -rf csv"; + args = commandLine.split(" "); + } + LOG.info("command line: {}", Arrays.toString(args)); + Main.main(args); + } +} diff --git a/docs/guide/java/row-format.md b/docs/guide/java/row-format.md index 477f9ec136..07c029b6a8 100644 --- a/docs/guide/java/row-format.md +++ b/docs/guide/java/row-format.md @@ -187,6 +187,97 @@ std::string str = bar10->get_string(0); | Memory usage | Full object graph in memory | Only accessed fields | | Suitable for | Small objects, full access | Large objects, selective access | +## Schema evolution + +Enable `.withSchemaEvolution()` on a row, array, or map codec builder to read payloads written +by older versions of the same bean. Writing always uses the current version; reading detects +the payload's version from a strict hash at the head of the payload. Java only. + +Annotate fields added after v1 with `@ForyVersion(since = N)`: + +```java +@Data +public class Person { + String name; + int age; + + @ForyVersion(since = 2) + String email; +} +``` + +A v1 payload (with `name` and `age` only) decodes to a `Person` whose `email` is `null`. +Primitive fields added later default to `0`, `0.0`, or `false`. Unannotated fields are treated +as present from the first version, so a class can adopt versioning by annotating only the fields +added after v1. + +Remove a field by deleting the Java member and declaring it on a nested history interface as a +method with a `@ForyVersion(until = N)`. The method's return type carries any parameterized +type information from the original field. + +```java +@Data +@ForySchema(removedFields = Person.History.class) +public class Person { + String name; + + @ForyVersion(since = 2) + String email; + + interface History { + @ForyVersion(until = 3) + int age(); + + @ForyVersion(until = 5) + List tags(); + } +} +``` + +The history method name matches the original live descriptor name: the field name for Lombok +`@Data` or records (`age`, `tags`), or the full accessor name for JavaBeans-style classes and +interfaces (`getAge`). + +### Wire format and limitations + +Producers and consumers must agree on the `withSchemaEvolution()` flag — they are not +wire-compatible otherwise. Row payloads always carry an 8-byte hash slot; under evolution its +value is the strict hash (which includes field name and nullability), so a flag-mismatched +peer fails loudly with `ClassNotCompatibleException`. Arrays and maps of bean elements prepend +an 8-byte strict-hash prefix under evolution and no prefix otherwise; an evolution-on consumer +reading evolution-off bytes also fails with `ClassNotCompatibleException`, but the reverse +direction (evolution-off consumer, evolution-on bytes) is undefined. + +To adopt the flag on an existing deployment, enable `withSchemaEvolution()` on both sides in a +release that changes no schema, then start evolving schemas only once every peer is on the +evolution-enabled build. Turning the flag on and changing a schema in the same release strands +any peer that has not yet upgraded. + +Cross-language consumers (Python, C++) cannot read evolution-enabled payloads. + +A reader selects the matching layout from the 8-byte strict hash on the payload. The hash includes +field names and nullability and is checked for collisions across a bean's own versions when the +codec is built, but it is still a 64-bit value: a payload whose hash coincides with one of the +reader's historical layouts is decoded against that layout. This is the same hash-based dispatch +the row format has always used, so feeding a codec bytes it was not built for has undefined results +whether or not evolution is enabled. Only hand a codec payloads produced for the same bean. + +Map keys do not carry a per-payload hash; a versioned bean used as a map key is read with the +current schema only, not dispatched to a projection codec. + +Nested evolution works to arbitrary depth and places no restriction on shape: a versioned bean +may contain versioned beans that themselves contain versioned beans, the same versioned bean +class may back more than one field, and fields typed as a non-evolving bean, a list, or a map are +unrestricted. Each nesting level is routed to the correct historical layout. + +When a versioned bean contains other versioned beans, the reader generates one projection codec +class per combination of versions across the composition. The count grows as the product of the +version counts of the distinct nested versioned bean classes, not the number of fields, so +reusing a class across several fields adds no combinations. If the product across distinct classes +becomes a concern, drop entries from each bean's `History` interface once you no longer need to +read payloads from that range. Retiring a history entry is purely a read-side decision; the writer +always uses the current schema. + ## Related Topics - [Xlang Serialization](xlang-serialization.md) - xlang mode diff --git a/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java b/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java index ab6adc373b..42bfe99242 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java @@ -969,7 +969,11 @@ private static boolean isSupported(TypeRef typeRef, TypeResolutionContext ctx return false; } Tuple2, TypeRef> mapKeyValueType = getMapKeyValueType(typeRef); - return isSupported(mapKeyValueType.f0) && isSupported(mapKeyValueType.f1); + // Thread ctx through both key and value, matching the iterable branch above. The context-less + // isSupported overload resets synthesizeInterfaces to false, which would reject an interface + // bean used as a map key or value even though the same type is supported as a direct field or + // list element. + return isSupported(mapKeyValueType.f0, ctx) && isSupported(mapKeyValueType.f1, ctx); } else if (cls.isEnum()) { return true; } else { diff --git a/java/fory-format/src/main/java/org/apache/fory/format/annotation/ForySchema.java b/java/fory-format/src/main/java/org/apache/fory/format/annotation/ForySchema.java new file mode 100644 index 0000000000..97fa61aa65 --- /dev/null +++ b/java/fory-format/src/main/java/org/apache/fory/format/annotation/ForySchema.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Class-level row-codec schema metadata used when the codec builder enables schema evolution. + * + *

Live fields without a {@link ForyVersion} annotation are treated as present from the first + * version, so a class can adopt versioning by annotating only the fields added later. + * + *

{@link #removedFields()} points at a class (conventionally a nested {@code interface}) whose + * accessor methods describe fields that have been removed from this bean but still appear on the + * wire in older payloads. Each method's return type is the original Java type of the removed field; + * each method must carry a {@link ForyVersion} annotation with {@code until} set, since removed + * fields have a known end-of-life version. + * + *

Example: + * + *

{@code
+ * @Data
+ * @ForySchema(removedFields = MyBean.History.class)
+ * public class MyBean {
+ *   private String name;
+ *
+ *   interface History {
+ *     @ForyVersion(until = 3)
+ *     List tags();
+ *
+ *     @ForyVersion(since = 2, until = 5)
+ *     Map counters();
+ *   }
+ * }
+ * }
+ */ +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface ForySchema { + /** + * A class whose accessor methods describe historically-present-but-now-removed fields. Default + * {@code void.class} means there are no removed fields. The class is never instantiated; the + * codec reads its method signatures and annotations. + */ + Class removedFields() default void.class; +} diff --git a/java/fory-format/src/main/java/org/apache/fory/format/annotation/ForyVersion.java b/java/fory-format/src/main/java/org/apache/fory/format/annotation/ForyVersion.java new file mode 100644 index 0000000000..b1c86c1944 --- /dev/null +++ b/java/fory-format/src/main/java/org/apache/fory/format/annotation/ForyVersion.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Declares the version window in which a row-codec field is logically present. The window is + * inclusive on the left and exclusive on the right, so {@code since=2, until=5} means versions 2, + * 3, and 4. + * + *

Only effective when the codec builder is configured with {@code withSchemaEvolution()}; + * otherwise the annotation is ignored and the field is treated as always present. + * + *

May be placed on a field, an accessor method, or a record component. Record components are + * covered by {@code FIELD} and {@code METHOD} rather than {@code ElementType.RECORD_COMPONENT}: the + * compiler propagates a record-component annotation to the backing field and the accessor method + * (the targets it declares), and the codec reads the annotation from those elements. {@code + * RECORD_COMPONENT} is a JDK 16 enum constant and would break this Java 11 module at runtime, so it + * is intentionally omitted. + */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) +public @interface ForyVersion { + /** First version (inclusive) that contains this field. Defaults to the class base version. */ + int since() default 1; + + /** First version (exclusive) that no longer contains this field. */ + int until() default Integer.MAX_VALUE; +} diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayCodecBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayCodecBuilder.java index 6e6c6d3645..5643d108a6 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayCodecBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayCodecBuilder.java @@ -22,18 +22,23 @@ import static org.apache.fory.type.TypeUtils.getRawType; import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.MethodType; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Set; import java.util.function.Function; import java.util.function.Supplier; +import org.apache.fory.Fory; +import org.apache.fory.collection.LongMap; import org.apache.fory.format.row.binary.writer.BinaryArrayWriter; +import org.apache.fory.format.type.CustomTypeEncoderRegistry; import org.apache.fory.format.type.DataTypes; import org.apache.fory.format.type.Field; +import org.apache.fory.format.type.SchemaHistory; import org.apache.fory.format.type.TypeInference; import org.apache.fory.reflect.TypeRef; +import org.apache.fory.type.TypeResolutionContext; import org.apache.fory.type.TypeUtils; import org.apache.fory.util.ExceptionUtils; @@ -63,17 +68,110 @@ public ArrayEncoder get() { Function> buildWithWriter() { loadArrayInnerCodecs(); - final Function generatedEncoderFactory = + if (!schemaEvolution || evolutionBean() == null) { + final Function generatedEncoderFactory = + generatedEncoderFactory(); + return new Function>() { + @Override + public ArrayEncoder apply(final BinaryArrayWriter writer) { + return new BinaryArrayEncoder<>( + writer, generatedEncoderFactory.apply(writer), sizeEmbedded); + } + }; + } + return buildVersionedWithWriter(); + } + + /** + * Bean this array evolves on, reachable through the element type. A directly-typed bean + * (versioned or not) takes the evolution path so the strict-hash prefix is always present and an + * evolution-on consumer can detect a flag-mismatched producer cleanly; a versioned bean nested + * inside a list/map/array element is found by descending the wrapper. Null when the element + * carries no bean. + * + *

The resolution context matches the row-format type inference, which synthesizes + * interface-typed bean fields; without it a class with interface members would not be recognized + * as a bean even though the row codec can encode it. + */ + private Class evolutionBean() { + return SchemaHistory.evolutionBean( + TypeUtils.getElementType(collectionType), + new TypeResolutionContext(CustomTypeEncoderRegistry.customTypeHandler(), true)); + } + + private Function> buildVersionedWithWriter() { + Class elementClass = evolutionBean(); + SchemaHistory history = buildSchemaHistory(elementClass); + SchemaHistory.VersionedSchema current = history.current(); + + // Make sure the current-version row codec class is generated. + Encoders.loadOrGenRowCodecClass(elementClass, codecFormat); + // Generate per-combination row codec classes and per-combination array codec classes. The + // suffix encodes the outer version plus each chosen inner-bean version so that distinct + // cross-product entries do not collide on a single generated class. + Map projectionFactories = new HashMap<>(); + for (SchemaHistory.VersionedSchema vs : history.versions()) { + if (vs == current) { + continue; + } + String suffix = ProjectionRouting.projectionSuffix(vs); + Map, String> nestedSuffixes = ProjectionRouting.nestedSuffixesFor(vs, codecFormat); + Encoders.loadOrGenProjectionRowCodecClass( + elementClass, codecFormat, vs.schema(), vs.liveFieldNames(), suffix, nestedSuffixes); + Class arrayClass = + Encoders.loadOrGenProjectionArrayCodecClass( + collectionType, TypeRef.of(elementClass), codecFormat, suffix); + MethodHandle ctor = Encoders.constructorHandleFor(arrayClass, GeneratedArrayEncoder.class); + // The array's "elementField" is a ListType whose valueField is the element. Project that + // value onto this historical version so the projection codec produces a BinaryArray with the + // right element width. The bean sits directly at the value or inside a list/map/array element + // wrapper, which projectThroughWrapper preserves around the historical struct. + Field histValueField = + SchemaHistory.projectThroughWrapper( + DataTypes.arrayElementField(elementField), + TypeUtils.getElementType(collectionType), + vs); + Field histListField = DataTypes.arrayField(elementField.name(), histValueField); + projectionFactories.put(vs.strictHash(), new ProjectionArrayFactory(histListField, ctor)); + } + final Function currentFactory = generatedEncoderFactory(); + long currentHash = current.strictHash(); return new Function>() { @Override public ArrayEncoder apply(final BinaryArrayWriter writer) { + LongMap proj = + new LongMap<>(projectionFactories.size()); + for (Map.Entry entry : projectionFactories.entrySet()) { + proj.put(entry.getKey(), entry.getValue().instantiate(fory)); + } return new BinaryArrayEncoder<>( - writer, generatedEncoderFactory.apply(writer), sizeEmbedded); + writer, currentFactory.apply(writer), sizeEmbedded, currentHash, proj); } }; } + private final class ProjectionArrayFactory { + private final Field elementField; + private final MethodHandle ctor; + + ProjectionArrayFactory(Field elementField, MethodHandle ctor) { + this.elementField = elementField; + this.ctor = ctor; + } + + BinaryArrayEncoder.ProjectionArrayCodec instantiate(Fory fory) { + try { + BinaryArrayWriter projWriter = codecFormat.newArrayWriter(elementField); + Object[] references = {elementField, projWriter, fory}; + GeneratedArrayEncoder codec = (GeneratedArrayEncoder) ctor.invokeExact(references); + return new BinaryArrayEncoder.ProjectionArrayCodec(projWriter, codec); + } catch (Throwable e) { + throw ExceptionUtils.throwException(e); + } + } + } + private void loadArrayInnerCodecs() { final Set> set = new HashSet<>(); Encoders.findBeanToken(collectionType, set); @@ -90,30 +188,15 @@ Function generatedEncoderFactory() { final TypeRef elementType = TypeUtils.getElementType(collectionType); final Class arrayCodecClass = Encoders.loadOrGenArrayCodecClass(collectionType, elementType, codecFormat); - - final MethodHandle constructorHandle; - try { - final var constructor = - arrayCodecClass.asSubclass(GeneratedArrayEncoder.class).getConstructor(Object[].class); - constructorHandle = - MethodHandles.lookup() - .unreflectConstructor(constructor) - .asType(MethodType.methodType(GeneratedArrayEncoder.class, Object[].class)); - } catch (final NoSuchMethodException | IllegalAccessException e) { - throw new EncoderException( - "Failed to construct array codec for " - + collectionType - + " with element class " - + elementType, - e); - } + final MethodHandle constructorHandle = + Encoders.constructorHandleFor(arrayCodecClass, GeneratedArrayEncoder.class); return new Function() { @Override public GeneratedArrayEncoder apply(final BinaryArrayWriter writer) { final Object[] references = {writer.getField(), writer, fory}; try { return (GeneratedArrayEncoder) constructorHandle.invokeExact(references); - } catch (final Throwable t) { + } catch (Throwable t) { throw ExceptionUtils.throwException(t); } } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayEncoderBuilder.java index c24611cd82..4184edf8f5 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ArrayEncoderBuilder.java @@ -54,7 +54,17 @@ public ArrayEncoderBuilder(Class arrayCls, Class beanClass) { } public ArrayEncoderBuilder(TypeRef clsType, TypeRef beanType) { + this(clsType, beanType, null); + } + + /** + * Construct an array codec builder that embeds row codec class references for its element bean + * with the supplied suffix. Used by schema-evolution code to point per-version array codecs at + * per-version row codecs. + */ + ArrayEncoderBuilder(TypeRef clsType, TypeRef beanType, String rowCodecSuffix) { super(new CodegenContext(), beanType); + this.rowCodecSuffixForBeans = rowCodecSuffix; arrayToken = clsType; ctx.reserveName(ROOT_ARRAY_WRITER_NAME); ctx.reserveName(ROOT_ARRAY_NAME); @@ -83,7 +93,9 @@ public ArrayEncoderBuilder(TypeRef clsType, TypeRef beanType) { @Override public String genCode() { ctx.setPackage(CodeGenerator.getPackage(beanClass)); - String className = codecClassName(beanClass, TypeInference.inferTypeName(arrayToken)); + String className = + codecClassName(beanClass, TypeInference.inferTypeName(arrayToken)) + + (rowCodecSuffixForBeans == null ? "" : rowCodecSuffixForBeans); ctx.setClassName(className); // don't addImport(arrayClass), because user class may name collide. // janino don't support generics, so GeneratedCodec has no generics @@ -123,8 +135,8 @@ public String genCode() { long startTime = System.nanoTime(); String code = ctx.genCode(); - long durationMs = (System.nanoTime() - startTime) / 1000_000; - LOG.info("Generate array codec for class {} take {} us", beanClass, durationMs); + long durationUs = (System.nanoTime() - startTime) / 1000; + LOG.info("Generate array codec for class {} take {} us", beanClass, durationUs); return code; } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseBinaryEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseBinaryEncoderBuilder.java index a46d8585f0..c836f5f250 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseBinaryEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseBinaryEncoderBuilder.java @@ -96,12 +96,25 @@ public abstract class BaseBinaryEncoderBuilder extends CodecBuilder { protected static TypeRef binaryArrayTypeToken = TypeRef.of(BinaryArray.class); protected final Map, Reference> arrayWriterMap = new HashMap<>(); - protected final Map, Reference> beanEncoderMap = new HashMap<>(); + + // Keyed by beanCodecKey(typeRef) rather than the raw typeRef so a single builder can hold two + // codecs for one bean class when position matters: a map decodes its key bean at the current + // schema while its value bean may project to a historical one, and the two would otherwise + // collide on the same type key. + protected final Map beanEncoderMap = new HashMap<>(); + + /** + * When non-null, nested bean codec class references generated by this builder will be suffixed + * with this string. Used by schema-evolution code paths to direct generated array/map codecs to + * the projection variant of an element bean's row codec. + */ + protected String rowCodecSuffixForBeans; + // We need to call beanEncoder's rowWriter.reset() before write a corresponding nested bean every // time. // Outermost beanEncoder's rowWriter.reset() should be called outside generated code before // writer an outermost bean every time. - protected final Map, Reference> rowWriterMap = new HashMap<>(); + protected final Map rowWriterMap = new HashMap<>(); protected final CustomTypeHandler customTypeHandler = CustomTypeEncoderRegistry.customTypeHandler(); protected final TypeResolutionContext typeCtx; @@ -482,34 +495,10 @@ protected Expression serializeForBean( Field fieldIfKnown, TypeRef typeRef, Expression structField) { - Class rawType = getRawType(typeRef); - Reference rowWriter; - Reference beanEncoder = beanEncoderMap.get(typeRef); - if (beanEncoder == null) { - // janino generics don't add cast, so this `<${type}>` is only for generated code readability - Expression schema = createSchemaFromStructField(structField); - String rowWriterName = - ctx.newName(StringUtils.uncapitalize(rawType.getSimpleName() + "RowWriter")); - NewInstance newRowWriter = new NewInstance(rowWriterType(), schema, writer); - ctx.addField(ctx.type(rowWriterType()), rowWriterName, newRowWriter); - - Preconditions.checkArgument(!codecClassName(rawType).contains(".")); - String encoderName = ctx.newName(StringUtils.uncapitalize(codecClassName(rawType))); - String encoderClass = codecQualifiedClassName(rawType); - TypeRef codecTypeRef = TypeRef.of(GeneratedRowEncoder.class); - NewInstance newEncoder = - new NewInstance( - codecTypeRef, - encoderClass, - ExpressionUtils.newObjectArray(schema, newRowWriter, foryRef)); - ctx.addField(encoderClass, encoderName, newEncoder); - - rowWriter = new Reference(rowWriterName, rowWriterType()); - rowWriterMap.put(typeRef, rowWriter); - beanEncoder = new Reference(encoderName, codecTypeRef); - beanEncoderMap.put(typeRef, beanEncoder); - } - rowWriter = rowWriterMap.get(typeRef); + registerBeanCodec(writer, typeRef, structField); + Object codecKey = beanCodecKey(typeRef); + Reference rowWriter = rowWriterMap.get(codecKey); + Reference beanEncoder = beanEncoderMap.get(codecKey); Expression expression = serializeForNotNullBean(ordinal, writer, inputObject, fieldIfKnown, rowWriter, beanEncoder); @@ -518,6 +507,58 @@ protected Expression serializeForBean( new Expression.IsNull(inputObject), new Invoke(writer, "setNullAt", ordinal), expression); } + /** + * Idempotently add the nested-bean row writer and row encoder as fields on the generated codec + * class and register them in {@link #beanEncoderMap} and {@link #rowWriterMap}. Used both by + * {@link #serializeForBean} and by decode-only projection codegen, where the encode pass is + * skipped but the decode pass still needs the bean encoder reference. + */ + protected void registerBeanCodec(Expression writer, TypeRef typeRef, Expression structField) { + Object codecKey = beanCodecKey(typeRef); + if (beanEncoderMap.containsKey(codecKey)) { + return; + } + Class rawType = getRawType(typeRef); + Expression schema = createSchemaFromStructField(structField); + String rowWriterName = + ctx.newName(StringUtils.uncapitalize(rawType.getSimpleName() + "RowWriter")); + NewInstance newRowWriter = new NewInstance(rowWriterType(), schema, writer); + ctx.addField(ctx.type(rowWriterType()), rowWriterName, newRowWriter); + + Preconditions.checkArgument(!codecClassName(rawType).contains(".")); + String encoderName = ctx.newName(StringUtils.uncapitalize(codecClassName(rawType))); + String encoderClass = codecQualifiedClassName(rawType) + nestedBeanSuffix(typeRef); + TypeRef codecTypeRef = TypeRef.of(GeneratedRowEncoder.class); + NewInstance newEncoder = + new NewInstance( + codecTypeRef, + encoderClass, + ExpressionUtils.newObjectArray(schema, newRowWriter, foryRef)); + ctx.addField(encoderClass, encoderName, newEncoder); + + rowWriterMap.put(codecKey, new Reference(rowWriterName, rowWriterType())); + beanEncoderMap.put(codecKey, new Reference(encoderName, codecTypeRef)); + } + + /** + * Registration key for a nested bean's row writer and codec in {@link #beanEncoderMap} and {@link + * #rowWriterMap}. Defaults to the type itself, so each bean class maps to a single codec. + * Subclasses where one class can appear in two positions that need different codecs (such as a + * map key versus its value) override this to keep those registrations distinct. + */ + protected Object beanCodecKey(TypeRef typeRef) { + return typeRef; + } + + /** + * Suffix to append to a nested bean's codec class name when emitting a reference. Defaults to the + * single uniform suffix (or empty); subclasses with per-type version routing can override to + * return a per-typeRef suffix from a map. + */ + protected String nestedBeanSuffix(TypeRef typeRef) { + return rowCodecSuffixForBeans == null ? "" : rowCodecSuffixForBeans; + } + protected Expression createSchemaFromStructField(Expression structField) { return new StaticInvoke( DataTypes.class, "schemaFromStructField", "schema", SCHEMA_TYPE, false, structField); @@ -681,9 +722,19 @@ protected Expression deserializeFor( * . */ protected Expression deserializeForBean(Expression row, TypeRef typeRef) { - Reference beanEncoder = beanEncoderMap.get(typeRef); + // beanCodecKey(typeRef) must resolve to the same position (key vs value) here as it did when + // registerBeanCodec() ran. A miss means this decode is reached outside the position scope that + // registered the codec -- e.g. a nested key bean built outside MapEncoderBuilder.keyScoped / + // KeyPositionScope -- so the lookup falls back to the wrong key. Fail loud rather than + // mis-route. + Reference beanEncoder = beanEncoderMap.get(beanCodecKey(typeRef)); if (beanEncoder == null) { - throw new IllegalStateException("beanEncoder should have be added in serializeForBean()"); + throw new IllegalStateException( + "No bean codec registered for " + + typeRef + + " under key " + + beanCodecKey(typeRef) + + "; registerBeanCodec() must run in the same key/value position as this decode"); } Invoke beanObj = new Invoke(beanEncoder, "fromRow", TypeUtils.OBJECT_TYPE, false, row); return new Cast(beanObj, typeRef, "bean"); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseCodecBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseCodecBuilder.java index 81f78ca247..045012e9da 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseCodecBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseCodecBuilder.java @@ -19,17 +19,33 @@ package org.apache.fory.format.encoder; +import java.util.function.UnaryOperator; import org.apache.fory.Fory; import org.apache.fory.format.row.binary.CompactBinaryRow; import org.apache.fory.format.row.binary.writer.CompactBinaryRowWriter; import org.apache.fory.format.type.Schema; +import org.apache.fory.format.type.SchemaHistory; +import org.apache.fory.logging.Logger; +import org.apache.fory.logging.LoggerFactory; public class BaseCodecBuilder> { + private static final Logger LOG = LoggerFactory.getLogger(BaseCodecBuilder.class); + + /** + * Number of historical schemas for one bean above which {@link #buildSchemaHistory} logs a + * warning. Each distinct schema becomes one generated projection codec class (compiled and loaded + * at build time), and the count grows as the product of the per-class version counts across + * nested versioned beans. The JVM handles far more classes than this; the threshold flags a + * likely misconfigured version history, since no hand-written history reaches it by accident. + */ + private static final int PROJECTION_COUNT_WARN_THRESHOLD = 256; + protected Schema schema; protected int initialBufferSize = 16; protected boolean sizeEmbedded = true; protected Fory fory; protected Encoding codecFormat = DefaultCodecFormat.INSTANCE; + protected boolean schemaEvolution = false; BaseCodecBuilder(final Schema schema) { this.schema = schema; @@ -58,6 +74,22 @@ public B withSizeEmbedded(final boolean sizeEmbedded) { return castThis(); } + /** + * Enable schema evolution. The codec accepts payloads written by older versions of the same bean, + * using the {@link org.apache.fory.format.annotation.ForyVersion} and {@link + * org.apache.fory.format.annotation.ForySchema} annotations to reconstruct historical schemas. + * Writing always uses the current version. + * + *

For array and map codecs, this changes the wire format by adding an 8-byte strict-hash + * prefix to the payload, so producers and consumers must agree on the flag. Row payloads already + * carry an 8-byte hash slot; under schema evolution that slot is computed with a stricter hash + * that also distinguishes field names and nullability. + */ + public B withSchemaEvolution() { + this.schemaEvolution = true; + return castThis(); + } + /** * Configure compact encoding, which is more space efficient than the default encoding, but is not * yet stable. See {@link CompactBinaryRow} for details. @@ -68,6 +100,31 @@ public B compactEncoding() { return castThis(); } + /** + * Build the schema history for {@code targetClass} under the active codec format. The compact + * format sorts schema fields, so historical schemas must be sorted the same way for their strict + * hashes and layouts to match what the writer produces; the default format passes schemas through + * unchanged. + */ + protected SchemaHistory buildSchemaHistory(final Class targetClass) { + UnaryOperator schemaTransform = + codecFormat == CompactCodecFormat.INSTANCE + ? CompactBinaryRowWriter::sortSchema + : UnaryOperator.identity(); + SchemaHistory history = SchemaHistory.build(targetClass, schemaTransform); + int projectionCount = history.versions().size(); + if (projectionCount > PROJECTION_COUNT_WARN_THRESHOLD) { + LOG.warn( + "Schema evolution for {} resolved {} historical schemas, each generating a projection " + + "codec class. This count grows as the product of per-class version counts across " + + "nested versioned beans; retire @ForyVersion history ranges you no longer read to " + + "reduce it.", + targetClass.getName(), + projectionCount); + } + return history; + } + @SuppressWarnings("unchecked") protected B castThis() { return (B) this; diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryArrayEncoder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryArrayEncoder.java index d1b2b9184f..1ef965b3e2 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryArrayEncoder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryArrayEncoder.java @@ -19,9 +19,12 @@ package org.apache.fory.format.encoder; +import org.apache.fory.collection.LongMap; +import org.apache.fory.exception.ClassNotCompatibleException; import org.apache.fory.format.row.binary.BinaryArray; import org.apache.fory.format.row.binary.writer.BinaryArrayWriter; import org.apache.fory.format.type.Field; +import org.apache.fory.memory.LittleEndian; import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.memory.MemoryUtils; @@ -30,13 +33,47 @@ class BinaryArrayEncoder implements ArrayEncoder { private final GeneratedArrayEncoder codec; private final boolean sizeEmbedded; + /** + * Strict hash of the element bean's current schema; written before the array payload when {@code + * schemaEvolution} is on. + */ + private final long currentHash; + + /** Per-version projection codecs and their element fields. {@code null} disables versioning. */ + private final LongMap projections; + + /** + * A projection variant of the array codec along with the writer used to materialize an array + * instance of the right physical type (standard vs. compact) for the historical element field. + */ + static final class ProjectionArrayCodec { + final BinaryArrayWriter writer; + final GeneratedArrayEncoder codec; + + ProjectionArrayCodec(BinaryArrayWriter writer, GeneratedArrayEncoder codec) { + this.writer = writer; + this.codec = codec; + } + } + BinaryArrayEncoder( final BinaryArrayWriter writer, final GeneratedArrayEncoder codec, final boolean sizeEmbedded) { + this(writer, codec, sizeEmbedded, 0L, null); + } + + BinaryArrayEncoder( + final BinaryArrayWriter writer, + final GeneratedArrayEncoder codec, + final boolean sizeEmbedded, + final long currentHash, + final LongMap projections) { this.writer = writer; this.codec = codec; this.sizeEmbedded = sizeEmbedded; + this.currentHash = currentHash; + this.projections = projections; } @Override @@ -62,22 +99,61 @@ public T decode(final MemoryBuffer buffer) { @Override public T decode(final byte[] bytes) { - // byte[] overloads ignore sizeEmbedded: encode writes no size prefix, decode uses bytes.length. + // byte[] overloads ignore sizeEmbedded: encode writes no length prefix (under schema evolution + // an 8-byte hash leads the body, but that is data, not framing), so decode takes the size from + // bytes.length. return decode(MemoryUtils.wrap(bytes), bytes.length); } + @SuppressWarnings("unchecked") T decode(final MemoryBuffer buffer, final int size) { - final BinaryArray array = writer.newArray(); + if (projections == null) { + final BinaryArray array = writer.newArray(); + final int readerIndex = buffer.readerIndex(); + array.pointTo(buffer, readerIndex, size); + buffer.readerIndex(readerIndex + size); + return fromArray(array); + } + if (size < 8) { + throw new ClassNotCompatibleException( + "Array payload too small for an 8-byte schema hash under schema evolution: size=" + size); + } + final long peerHash = buffer.readInt64(); + final int bodySize = size - 8; + if (peerHash == currentHash) { + final BinaryArray array = writer.newArray(); + final int readerIndex = buffer.readerIndex(); + array.pointTo(buffer, readerIndex, bodySize); + buffer.readerIndex(readerIndex + bodySize); + return fromArray(array); + } + ProjectionArrayCodec projection = projections.get(peerHash); + if (projection == null) { + throw new ClassNotCompatibleException( + String.format( + "Array element schema is not consistent. self/peer hash are %s/%s.", + currentHash, peerHash)); + } + BinaryArray array = projection.writer.newArray(); final int readerIndex = buffer.readerIndex(); - array.pointTo(buffer, readerIndex, size); - buffer.readerIndex(readerIndex + size); - return fromArray(array); + array.pointTo(buffer, readerIndex, bodySize); + buffer.readerIndex(readerIndex + bodySize); + return (T) projection.codec.fromArray(array); } @Override public byte[] encode(final T obj) { final BinaryArray array = toArray(obj); - return writer.getBuffer().getBytes(0, array.getSizeInBytes()); + if (projections == null) { + return writer.getBuffer().getBytes(0, array.getSizeInBytes()); + } + // Build the result with a single allocation: the result byte[]. The hash header is poked + // in via LittleEndian (no buffer wrapper) and the body is copied in via System.arraycopy. + final int n = array.getSizeInBytes(); + final byte[] result = new byte[8 + n]; + LittleEndian.putInt64(result, 0, currentHash); + writer.getBuffer().get(0, result, 8, n); + return result; } @Override @@ -87,6 +163,9 @@ public int encode(final MemoryBuffer buffer, final T obj) { if (sizeEmbedded) { buffer.writeInt32(-1); } + if (projections != null) { + buffer.writeInt64(currentHash); + } try { writer.setBuffer(buffer); toArray(obj); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryMapEncoder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryMapEncoder.java index 90ba96dc5e..cd846b3860 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryMapEncoder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryMapEncoder.java @@ -19,10 +19,13 @@ package org.apache.fory.format.encoder; +import org.apache.fory.collection.LongMap; +import org.apache.fory.exception.ClassNotCompatibleException; import org.apache.fory.format.row.binary.BinaryArray; import org.apache.fory.format.row.binary.BinaryMap; import org.apache.fory.format.row.binary.writer.BinaryArrayWriter; import org.apache.fory.format.type.Field; +import org.apache.fory.memory.LittleEndian; import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.memory.MemoryUtils; @@ -33,6 +36,24 @@ class BinaryMapEncoder implements MapEncoder { private final BinaryArrayWriter keyWriter; private final GeneratedMapEncoder codec; private final boolean sizeEmbedded; + private final long currentHash; + private final LongMap projections; + + /** + * Per-version projection codec; the {@code Encoding} and historical {@code mapField} together + * materialize an empty map shaped for the historical layout (standard vs. compact). + */ + static final class ProjectionMapCodec { + final Encoding format; + final Field mapField; + final GeneratedMapEncoder codec; + + ProjectionMapCodec(Encoding format, Field mapField, GeneratedMapEncoder codec) { + this.format = format; + this.mapField = mapField; + this.codec = codec; + } + } BinaryMapEncoder( final Encoding format, @@ -41,12 +62,26 @@ class BinaryMapEncoder implements MapEncoder { final BinaryArrayWriter keyWriter, final GeneratedMapEncoder codec, final boolean sizeEmbedded) { + this(format, mapField, valWriter, keyWriter, codec, sizeEmbedded, 0L, null); + } + + BinaryMapEncoder( + final Encoding format, + final Field mapField, + final BinaryArrayWriter valWriter, + final BinaryArrayWriter keyWriter, + final GeneratedMapEncoder codec, + final boolean sizeEmbedded, + final long currentHash, + final LongMap projections) { this.format = format; this.mapField = mapField; this.valWriter = valWriter; this.keyWriter = keyWriter; this.codec = codec; this.sizeEmbedded = sizeEmbedded; + this.currentHash = currentHash; + this.projections = projections; } @Override @@ -75,24 +110,63 @@ public M decode(final MemoryBuffer buffer) { return decode(buffer, sizeEmbedded ? buffer.readInt32() : buffer.remaining()); } + @SuppressWarnings("unchecked") M decode(final MemoryBuffer buffer, final int size) { - final BinaryMap map = format.newMap(mapField); - final int readerIndex = buffer.readerIndex(); - map.pointTo(buffer, readerIndex, size); - buffer.readerIndex(readerIndex + size); - return fromMap(map); + if (projections == null) { + final BinaryMap map = format.newMap(mapField); + final int readerIndex = buffer.readerIndex(); + map.pointTo(buffer, readerIndex, size); + buffer.readerIndex(readerIndex + size); + return fromMap(map); + } + if (size < 8) { + throw new ClassNotCompatibleException( + "Map payload too small for an 8-byte schema hash under schema evolution: size=" + size); + } + long peerHash = buffer.readInt64(); + int bodySize = size - 8; + if (peerHash == currentHash) { + final BinaryMap map = format.newMap(mapField); + int readerIndex = buffer.readerIndex(); + map.pointTo(buffer, readerIndex, bodySize); + buffer.readerIndex(readerIndex + bodySize); + return fromMap(map); + } + ProjectionMapCodec projection = projections.get(peerHash); + if (projection == null) { + throw new ClassNotCompatibleException( + String.format( + "Map bean schema is not consistent. self/peer hash are %s/%s.", + currentHash, peerHash)); + } + BinaryMap map = projection.format.newMap(projection.mapField); + int readerIndex = buffer.readerIndex(); + map.pointTo(buffer, readerIndex, bodySize); + buffer.readerIndex(readerIndex + bodySize); + return (M) projection.codec.fromMap(map); } @Override public M decode(final byte[] bytes) { - // byte[] overloads ignore sizeEmbedded: encode writes no size prefix, decode uses bytes.length. + // byte[] overloads ignore sizeEmbedded: encode writes no length prefix (under schema evolution + // an 8-byte hash leads the body, but that is data, not framing), so decode takes the size from + // bytes.length. return decode(MemoryUtils.wrap(bytes), bytes.length); } @Override public byte[] encode(final M obj) { final BinaryMap map = toMap(obj); - return map.getBuf().getBytes(map.getBaseOffset(), map.getSizeInBytes()); + if (projections == null) { + return map.getBuf().getBytes(map.getBaseOffset(), map.getSizeInBytes()); + } + // Build the result with a single allocation: the result byte[]. The hash header is poked + // in via LittleEndian (no buffer wrapper) and the body is copied in via System.arraycopy. + final int n = map.getSizeInBytes(); + final byte[] result = new byte[8 + n]; + LittleEndian.putInt64(result, 0, currentHash); + map.getBuf().get(map.getBaseOffset(), result, 8, n); + return result; } @Override @@ -102,6 +176,9 @@ public int encode(final MemoryBuffer buffer, final M obj) { if (sizeEmbedded) { buffer.writeInt32(-1); } + if (projections != null) { + buffer.writeInt64(currentHash); + } try { keyWriter.setBuffer(buffer); valWriter.setBuffer(buffer); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryRowEncoder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryRowEncoder.java index 7cafa0ab2c..3118b49ad0 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryRowEncoder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BinaryRowEncoder.java @@ -19,6 +19,7 @@ package org.apache.fory.format.encoder; +import org.apache.fory.collection.LongMap; import org.apache.fory.exception.ClassNotCompatibleException; import org.apache.fory.format.row.binary.BinaryRow; import org.apache.fory.format.row.binary.writer.BaseBinaryRowWriter; @@ -33,18 +34,50 @@ class BinaryRowEncoder implements RowEncoder { private final BaseBinaryRowWriter writer; private final boolean sizeEmbedded; private final long schemaHash; + + /** + * Hash → (historical schema, projection codec) for older versions. {@code null} when schema + * evolution is disabled; in that case a hash mismatch is a hard error. + */ + private final LongMap projections; + private final MemoryBuffer buffer = MemoryUtils.buffer(16); + /** + * A historical schema, the projection codec that reads it, and a row factory with that schema's + * layout precomputed so projection decodes match the current-schema path's per-call cost. + */ + static final class ProjectionCodec { + final RowFactory rowFactory; + final GeneratedRowEncoder codec; + + ProjectionCodec(RowFactory rowFactory, GeneratedRowEncoder codec) { + this.rowFactory = rowFactory; + this.codec = codec; + } + } + BinaryRowEncoder( final Schema schema, final GeneratedRowEncoder codec, final BaseBinaryRowWriter writer, final boolean sizeEmbedded) { + this(schema, codec, writer, sizeEmbedded, DataTypes.computeSchemaHash(schema), null); + } + + BinaryRowEncoder( + final Schema schema, + final GeneratedRowEncoder codec, + final BaseBinaryRowWriter writer, + final boolean sizeEmbedded, + final long schemaHash, + final LongMap projections) { this.schema = schema; this.codec = codec; this.writer = writer; this.sizeEmbedded = sizeEmbedded; - this.schemaHash = DataTypes.computeSchemaHash(schema); + this.schemaHash = schemaHash; + this.projections = projections; } @Override @@ -68,26 +101,45 @@ public T decode(final MemoryBuffer buffer) { return decode(buffer, sizeEmbedded ? buffer.readInt32() : buffer.remaining()); } + @SuppressWarnings("unchecked") T decode(final MemoryBuffer buffer, final int size) { - final long peerSchemaHash = buffer.readInt64(); - if (peerSchemaHash != schemaHash) { + if (size < 8) { throw new ClassNotCompatibleException( - String.format( - "Schema is not consistent, encoder schema is %s. " - + "self/peer schema hash are %s/%s. " - + "Please check writer schema.", - schema, schemaHash, peerSchemaHash)); + "Row payload too small for an 8-byte schema hash: size=" + size); } + final long peerSchemaHash = buffer.readInt64(); + // The 8-byte hash has just been consumed; the row body occupies the remaining bytes. final int rowSize = size - 8; - final BinaryRow row = writer.newRow(); - row.pointTo(buffer, buffer.readerIndex(), rowSize); - buffer.increaseReaderIndex(rowSize); - return fromRow(row); + if (peerSchemaHash == schemaHash) { + // Hot path: writer.newRow() reuses the writer's cached row layout for the current schema. + final BinaryRow row = writer.newRow(); + row.pointTo(buffer, buffer.readerIndex(), rowSize); + buffer.increaseReaderIndex(rowSize); + return fromRow(row); + } + if (projections != null) { + ProjectionCodec projection = projections.get(peerSchemaHash); + if (projection != null) { + // The writer is bound to the current schema, so the historical row comes from the + // projection's own factory, which carries that schema's precomputed layout. + final BinaryRow row = projection.rowFactory.newRow(); + row.pointTo(buffer, buffer.readerIndex(), rowSize); + buffer.increaseReaderIndex(rowSize); + return (T) projection.codec.fromRow(row); + } + } + throw new ClassNotCompatibleException( + String.format( + "Schema is not consistent, encoder schema is %s. " + + "self/peer schema hash are %s/%s. " + + "Please check writer schema.", + schema, schemaHash, peerSchemaHash)); } @Override public T decode(final byte[] bytes) { - // byte[] overloads ignore sizeEmbedded: encode writes no size prefix, decode uses bytes.length. + // byte[] overloads ignore sizeEmbedded: encode writes no length prefix (the schema-hash prefix + // is part of the body, not framing), so decode takes the size from bytes.length. return decode(MemoryUtils.wrap(bytes), bytes.length); } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactArrayEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactArrayEncoderBuilder.java index 65f8508e35..b6a659c00e 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactArrayEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactArrayEncoderBuilder.java @@ -33,6 +33,11 @@ public CompactArrayEncoderBuilder(final TypeRef clsType, final TypeRef bea super(clsType, beanType); } + CompactArrayEncoderBuilder( + final TypeRef clsType, final TypeRef beanType, final String rowCodecSuffix) { + super(clsType, beanType, rowCodecSuffix); + } + @Override protected Invoke beanWriterReset( final Expression writer, final Reference rowWriter, final Expression ordinal) { diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactCodecFormat.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactCodecFormat.java index c92be822b4..f6f1ab8a0c 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactCodecFormat.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactCodecFormat.java @@ -21,10 +21,12 @@ import java.util.Collection; import java.util.Map; +import java.util.Set; import org.apache.fory.format.row.binary.BinaryArray; import org.apache.fory.format.row.binary.BinaryMap; import org.apache.fory.format.row.binary.CompactBinaryArray; import org.apache.fory.format.row.binary.CompactBinaryMap; +import org.apache.fory.format.row.binary.CompactRowLayout; import org.apache.fory.format.row.binary.writer.BaseBinaryRowWriter; import org.apache.fory.format.row.binary.writer.BinaryArrayWriter; import org.apache.fory.format.row.binary.writer.CompactBinaryArrayWriter; @@ -62,18 +64,53 @@ public RowEncoderBuilder newRowEncoder(final TypeRef beanType) { return new CompactRowEncoderBuilder(beanType); } + @Override + public RowEncoderBuilder newProjectionRowEncoder( + final TypeRef beanType, + final Schema historicalSchema, + final Set liveNames, + final String classSuffix, + final Map, String> nestedSuffixes) { + return new CompactRowEncoderBuilder( + beanType, historicalSchema, liveNames, classSuffix, nestedSuffixes); + } + @Override public ArrayEncoderBuilder newArrayEncoder( final TypeRef> collectionType, final TypeRef elementType) { return new CompactArrayEncoderBuilder(collectionType, elementType); } + @Override + public ArrayEncoderBuilder newProjectionArrayEncoder( + final TypeRef> collectionType, + final TypeRef elementType, + final String rowCodecSuffix) { + return new CompactArrayEncoderBuilder(collectionType, elementType, rowCodecSuffix); + } + @Override public MapEncoderBuilder newMapEncoder( final TypeRef> mapType, final TypeRef beanToken) { return new CompactMapEncoderBuilder(mapType, beanToken); } + @Override + public MapEncoderBuilder newProjectionMapEncoder( + final TypeRef> mapType, + final TypeRef beanToken, + final String rowCodecSuffix) { + return new CompactMapEncoderBuilder(mapType, beanToken, rowCodecSuffix); + } + + @Override + public RowFactory newRowFactory(final Schema schema) { + // Compute the compact layout once; every newRow() call reuses it (same model as the writer + // and the nested-slot read path). + final CompactRowLayout layout = new CompactRowLayout(schema); + return layout::newRow; + } + @Override public BinaryArray newArray(final Field field) { return new CompactBinaryArray(field); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactMapEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactMapEncoderBuilder.java index be3d206d59..7a55f54881 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactMapEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactMapEncoderBuilder.java @@ -36,6 +36,11 @@ public CompactMapEncoderBuilder(final TypeRef clsType, final TypeRef beanT super(clsType, beanType); } + CompactMapEncoderBuilder( + final TypeRef clsType, final TypeRef beanType, final String rowCodecSuffix) { + super(clsType, beanType, rowCodecSuffix); + } + @Override protected Invoke beanWriterReset( final Expression writer, final Reference rowWriter, final Expression ordinal) { diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactRowEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactRowEncoderBuilder.java index 79ccc53391..b9d0012a4f 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactRowEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/CompactRowEncoderBuilder.java @@ -19,6 +19,8 @@ package org.apache.fory.format.encoder; +import java.util.Map; +import java.util.Set; import org.apache.fory.codegen.Expression; import org.apache.fory.codegen.Expression.Invoke; import org.apache.fory.codegen.Expression.ListExpression; @@ -41,6 +43,15 @@ public CompactRowEncoderBuilder(final TypeRef beanType) { super(beanType); } + CompactRowEncoderBuilder( + final TypeRef beanType, + final Schema historicalSchema, + final Set liveNames, + final String classSuffix, + final Map, String> nestedSuffixes) { + super(beanType, historicalSchema, liveNames, classSuffix, nestedSuffixes); + } + @Override protected Schema inferSchema(final TypeRef beanType) { return CompactBinaryRowWriter.sortSchema(super.inferSchema(beanType)); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/DefaultCodecFormat.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/DefaultCodecFormat.java index 8ee0f3a8f1..a72c4d7b7a 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/DefaultCodecFormat.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/DefaultCodecFormat.java @@ -21,8 +21,10 @@ import java.util.Collection; import java.util.Map; +import java.util.Set; import org.apache.fory.format.row.binary.BinaryArray; import org.apache.fory.format.row.binary.BinaryMap; +import org.apache.fory.format.row.binary.BinaryRow; import org.apache.fory.format.row.binary.writer.BaseBinaryRowWriter; import org.apache.fory.format.row.binary.writer.BinaryArrayWriter; import org.apache.fory.format.row.binary.writer.BinaryRowWriter; @@ -59,18 +61,50 @@ public RowEncoderBuilder newRowEncoder(final TypeRef beanClass) { return new RowEncoderBuilder(beanClass); } + @Override + public RowEncoderBuilder newProjectionRowEncoder( + final TypeRef beanType, + final Schema historicalSchema, + final Set liveNames, + final String classSuffix, + final Map, String> nestedSuffixes) { + return new RowEncoderBuilder( + beanType, historicalSchema, liveNames, classSuffix, nestedSuffixes); + } + @Override public ArrayEncoderBuilder newArrayEncoder( final TypeRef> collectionType, final TypeRef elementType) { return new ArrayEncoderBuilder(collectionType, elementType); } + @Override + public ArrayEncoderBuilder newProjectionArrayEncoder( + final TypeRef> collectionType, + final TypeRef elementType, + final String rowCodecSuffix) { + return new ArrayEncoderBuilder(collectionType, elementType, rowCodecSuffix); + } + @Override public MapEncoderBuilder newMapEncoder( final TypeRef> mapType, final TypeRef beanToken) { return new MapEncoderBuilder(mapType, beanToken); } + @Override + public MapEncoderBuilder newProjectionMapEncoder( + final TypeRef> mapType, + final TypeRef beanToken, + final String rowCodecSuffix) { + return new MapEncoderBuilder(mapType, beanToken, rowCodecSuffix); + } + + @Override + public RowFactory newRowFactory(final Schema schema) { + return () -> new BinaryRow(schema); + } + @Override public BinaryArray newArray(final Field field) { return new BinaryArray(field); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoders.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoders.java index 4a8c45021e..3c251f538a 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoders.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoders.java @@ -22,6 +22,10 @@ import static org.apache.fory.type.TypeUtils.OBJECT_TYPE; import static org.apache.fory.type.TypeUtils.getRawType; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; +import java.lang.reflect.Constructor; import java.util.Collection; import java.util.HashSet; import java.util.LinkedHashSet; @@ -35,6 +39,7 @@ import org.apache.fory.format.row.binary.writer.BinaryRowWriter; import org.apache.fory.format.type.CustomTypeEncoderRegistry; import org.apache.fory.format.type.CustomTypeRegistration; +import org.apache.fory.format.type.Schema; import org.apache.fory.format.type.TypeInference; import org.apache.fory.logging.Logger; import org.apache.fory.logging.LoggerFactory; @@ -316,6 +321,31 @@ static Class loadOrGenRowCodecClass(Class beanClass, Encoding codecFactory return loadCls(compileUnits); } + /** + * Compile and load a projection codec class for one historical version of {@code beanClass}. The + * current-version codec class is loaded separately by {@link #loadOrGenRowCodecClass}; this is + * used by schema-evolution code paths to materialize a decoder for each older version. The {@code + * nestedSuffixes} map directs codegen to the projection codec class to embed for each nested + * versioned bean type. + */ + static Class loadOrGenProjectionRowCodecClass( + Class beanClass, + Encoding codecFactory, + Schema historicalSchema, + Set liveNames, + String classSuffix, + Map, String> nestedSuffixes) { + final RowEncoderBuilder codecBuilder = + codecFactory.newProjectionRowEncoder( + TypeRef.of(beanClass), historicalSchema, liveNames, classSuffix, nestedSuffixes); + CompileUnit compileUnit = + new CompileUnit( + CodeGenerator.getPackage(beanClass), + codecBuilder.codecClassName(beanClass) + classSuffix, + codecBuilder::genCode); + return loadCls(compileUnit); + } + static Class loadOrGenArrayCodecClass( TypeRef> arrayCls, TypeRef elementType, Encoding codecFactory) { LOG.info("Create ArrayCodec for classes {}", elementType); @@ -333,6 +363,23 @@ static Class loadOrGenArrayCodecClass( return loadCls(compileUnit); } + static Class loadOrGenProjectionArrayCodecClass( + TypeRef> arrayCls, + TypeRef elementType, + Encoding codecFactory, + String rowCodecSuffix) { + Class cls = getRawType(elementType); + String prefix = TypeInference.inferTypeName(arrayCls); + ArrayEncoderBuilder codecBuilder = + codecFactory.newProjectionArrayEncoder(arrayCls, elementType, rowCodecSuffix); + CompileUnit compileUnit = + new CompileUnit( + CodeGenerator.getPackage(cls), + codecBuilder.codecClassName(cls, prefix) + rowCodecSuffix, + codecBuilder::genCode); + return loadCls(compileUnit); + } + static Class loadOrGenMapCodecClass( TypeRef> mapCls, TypeRef keyToken, @@ -366,6 +413,23 @@ static Class loadOrGenMapCodecClass( return loadCls(compileUnit); } + static Class loadOrGenProjectionMapCodecClass( + TypeRef> mapCls, + TypeRef beanToken, + Encoding codecFactory, + String rowCodecSuffix) { + Class cls = getRawType(beanToken); + String prefix = TypeInference.inferTypeName(mapCls); + MapEncoderBuilder codecBuilder = + codecFactory.newProjectionMapEncoder(mapCls, beanToken, rowCodecSuffix); + CompileUnit compileUnit = + new CompileUnit( + CodeGenerator.getPackage(cls), + codecBuilder.codecClassName(cls, prefix) + rowCodecSuffix, + codecBuilder::genCode); + return loadCls(compileUnit); + } + private static Class loadCls(CompileUnit... compileUnit) { CodeGenerator codeGenerator = CodeGenerator.getSharedCodeGenerator(Thread.currentThread().getContextClassLoader()); @@ -377,4 +441,21 @@ private static Class loadCls(CompileUnit... compileUnit) { throw new IllegalStateException("Impossible because we just compiled class", e); } } + + /** + * Build a {@link MethodHandle} bound to {@code generatedClass}'s {@code (Object[])} constructor, + * adapted so it returns {@code generatedType}. All generated row/array/map codec classes share + * this constructor shape; this helper centralises the reflection and exception wrapping. + */ + static MethodHandle constructorHandleFor(Class generatedClass, Class generatedType) { + try { + Constructor constructor = + generatedClass.asSubclass(generatedType).getConstructor(Object[].class); + return MethodHandles.lookup() + .unreflectConstructor(constructor) + .asType(MethodType.methodType(generatedType, Object[].class)); + } catch (NoSuchMethodException | IllegalAccessException e) { + throw new EncoderException("Failed to resolve constructor for " + generatedClass, e); + } + } } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoding.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoding.java index c28b4d3b19..dee4dc9b81 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoding.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/Encoding.java @@ -21,6 +21,7 @@ import java.util.Collection; import java.util.Map; +import java.util.Set; import org.apache.fory.format.row.binary.BinaryArray; import org.apache.fory.format.row.binary.BinaryMap; import org.apache.fory.format.row.binary.writer.BaseBinaryRowWriter; @@ -41,11 +42,50 @@ interface Encoding { RowEncoderBuilder newRowEncoder(TypeRef beanType); + /** + * Construct a projection codec builder for an older version of {@code beanType}, reading the + * supplied historical schema and producing instances of the current bean class. The {@code + * nestedSuffixes} map directs codegen to embed a specific projection codec class for each + * nested-bean type (used when a nested versioned bean was on the wire at an older version). An + * empty map means all nested beans use their current-version codecs. + */ + RowEncoderBuilder newProjectionRowEncoder( + TypeRef beanType, + Schema historicalSchema, + Set liveNames, + String classSuffix, + Map, String> nestedSuffixes); + ArrayEncoderBuilder newArrayEncoder( TypeRef> collectionType, TypeRef elementType); + /** + * Construct an array encoder builder whose generated code references the row codec class for the + * element bean with the supplied suffix. Used by schema-evolution paths to generate one array + * codec per historical version of the element bean. + */ + ArrayEncoderBuilder newProjectionArrayEncoder( + TypeRef> collectionType, + TypeRef elementType, + String rowCodecSuffix); + MapEncoderBuilder newMapEncoder(TypeRef> mapType, TypeRef beanToken); + /** + * Construct a map encoder builder whose generated code references the bean row codec class with + * the supplied suffix. Used by schema-evolution paths to generate one map codec per historical + * version of the bean. + */ + MapEncoderBuilder newProjectionMapEncoder( + TypeRef> mapType, TypeRef beanToken, String rowCodecSuffix); + + /** + * Build a {@link RowFactory} for {@code schema}, precomputing any schema-derived layout once. + * Used by the schema-evolution decode path to allocate rows for a historical schema without + * re-deriving the layout on every decode. + */ + RowFactory newRowFactory(Schema schema); + BinaryArray newArray(Field field); BinaryMap newMap(Field field); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapCodecBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapCodecBuilder.java index 44ad87e6de..a6d1f2ec2e 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapCodecBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapCodecBuilder.java @@ -20,16 +20,20 @@ package org.apache.fory.format.encoder; import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.MethodType; +import java.util.HashMap; import java.util.Map; import java.util.function.BiFunction; import java.util.function.Supplier; +import org.apache.fory.Fory; +import org.apache.fory.collection.LongMap; import org.apache.fory.format.row.binary.writer.BinaryArrayWriter; +import org.apache.fory.format.type.CustomTypeEncoderRegistry; import org.apache.fory.format.type.DataTypes; import org.apache.fory.format.type.Field; +import org.apache.fory.format.type.SchemaHistory; import org.apache.fory.format.type.TypeInference; import org.apache.fory.reflect.TypeRef; +import org.apache.fory.type.TypeResolutionContext; import org.apache.fory.type.TypeUtils; import org.apache.fory.util.ExceptionUtils; @@ -55,23 +59,116 @@ public class MapCodecBuilder> extends BaseCodecBuilder> build() { loadMapInnerCodecs(); - final var mapEncoderFactory = generatedMapEncoder(); + if (!schemaEvolution || evolutionBean() == null) { + final var mapEncoderFactory = generatedMapEncoder(); + return new Supplier>() { + @Override + public MapEncoder get() { + final BinaryArrayWriter keyWriter = codecFormat.newArrayWriter(keyField); + final BinaryArrayWriter valWriter = + codecFormat.newArrayWriter(valField, keyWriter.getBuffer()); + final var codec = mapEncoderFactory.apply(keyWriter, valWriter); + return new BufferResettingMapEncoder<>( + initialBufferSize, + keyWriter, + valWriter, + new BinaryMapEncoder( + codecFormat, field, valWriter, keyWriter, codec, sizeEmbedded)); + } + }; + } + return buildVersioned(); + } + + /** + * Bean this map evolves on, reachable through the value type. A directly-typed bean (versioned or + * not) takes the evolution path so the strict-hash prefix is always present and an evolution-on + * consumer can detect a flag-mismatched producer cleanly; a versioned bean nested inside a + * list/map/array value is found by descending the wrapper. Null when the value carries no bean. + */ + private Class evolutionBean() { + return SchemaHistory.evolutionBean( + valType, new TypeResolutionContext(CustomTypeEncoderRegistry.customTypeHandler(), true)); + } + + private Supplier> buildVersioned() { + Class valClass = evolutionBean(); + SchemaHistory history = buildSchemaHistory(valClass); + SchemaHistory.VersionedSchema current = history.current(); + + Encoders.loadOrGenRowCodecClass(valClass, codecFormat); + // Generate per-combination row codec classes and per-combination map codec classes. The + // suffix encodes the outer version plus each chosen inner-bean version so that distinct + // cross-product entries do not collide on a single generated class. + Map projectionFactories = new HashMap<>(); + for (SchemaHistory.VersionedSchema vs : history.versions()) { + if (vs == current) { + continue; + } + String suffix = ProjectionRouting.projectionSuffix(vs); + Map, String> nestedSuffixes = ProjectionRouting.nestedSuffixesFor(vs, codecFormat); + Encoders.loadOrGenProjectionRowCodecClass( + valClass, codecFormat, vs.schema(), vs.liveFieldNames(), suffix, nestedSuffixes); + Class mapClass = + Encoders.loadOrGenProjectionMapCodecClass( + mapType, TypeRef.of(valClass), codecFormat, suffix); + MethodHandle ctor = Encoders.constructorHandleFor(mapClass, GeneratedMapEncoder.class); + // Rebuild the map field with the value projected onto this historical version. The key stays + // at the current schema; the bean sits directly at the value or inside a list/map value + // wrapper, which projectThroughWrapper preserves around the historical struct. + Field histVal = + SchemaHistory.projectThroughWrapper(DataTypes.itemFieldForMap(field), valType, vs); + Field histMapField = + DataTypes.mapField(field.name(), DataTypes.keyFieldForMap(field), histVal); + projectionFactories.put(vs.strictHash(), new ProjectionMapFactory(histMapField, ctor)); + } + final var currentFactory = generatedMapEncoder(); + long currentHash = current.strictHash(); return new Supplier>() { @Override public MapEncoder get() { - final BinaryArrayWriter keyWriter = codecFormat.newArrayWriter(keyField); - final BinaryArrayWriter valWriter = - codecFormat.newArrayWriter(valField, keyWriter.getBuffer()); - final var codec = mapEncoderFactory.apply(keyWriter, valWriter); + BinaryArrayWriter keyWriter = codecFormat.newArrayWriter(keyField); + BinaryArrayWriter valWriter = codecFormat.newArrayWriter(valField, keyWriter.getBuffer()); + var codec = currentFactory.apply(keyWriter, valWriter); + LongMap proj = + new LongMap<>(projectionFactories.size()); + for (Map.Entry entry : projectionFactories.entrySet()) { + proj.put(entry.getKey(), entry.getValue().instantiate(codecFormat, fory)); + } return new BufferResettingMapEncoder<>( initialBufferSize, keyWriter, valWriter, - new BinaryMapEncoder(codecFormat, field, valWriter, keyWriter, codec, sizeEmbedded)); + new BinaryMapEncoder( + codecFormat, field, valWriter, keyWriter, codec, sizeEmbedded, currentHash, proj)); } }; } + private final class ProjectionMapFactory { + private final Field histMapField; + private final MethodHandle ctor; + + ProjectionMapFactory(Field histMapField, MethodHandle ctor) { + this.histMapField = histMapField; + this.ctor = ctor; + } + + BinaryMapEncoder.ProjectionMapCodec instantiate(Encoding format, Fory fory) { + try { + Field histKeyField = DataTypes.keyArrayFieldForMap(histMapField); + Field histValField = DataTypes.itemArrayFieldForMap(histMapField); + BinaryArrayWriter projKey = format.newArrayWriter(histKeyField); + BinaryArrayWriter projVal = format.newArrayWriter(histValField, projKey.getBuffer()); + Object[] references = {histKeyField, histValField, projKey, projVal, fory, histMapField}; + GeneratedMapEncoder codec = (GeneratedMapEncoder) ctor.invokeExact(references); + return new BinaryMapEncoder.ProjectionMapCodec(format, histMapField, codec); + } catch (Throwable e) { + throw ExceptionUtils.throwException(e); + } + } + } + private void loadMapInnerCodecs() { Encoders.loadMapCodecs(keyType, codecFormat); Encoders.loadMapCodecs(valType, codecFormat); @@ -81,17 +178,8 @@ BiFunction generatedM final Class arrayCodecClass = Encoders.loadOrGenMapCodecClass(mapType, keyType, valType, codecFormat); - final MethodHandle constructorHandle; - try { - final var constructor = - arrayCodecClass.asSubclass(GeneratedMapEncoder.class).getConstructor(Object[].class); - constructorHandle = - MethodHandles.lookup() - .unreflectConstructor(constructor) - .asType(MethodType.methodType(GeneratedMapEncoder.class, Object[].class)); - } catch (final NoSuchMethodException | IllegalAccessException e) { - throw new EncoderException("Failed to construct array codec for " + mapType, e); - } + final MethodHandle constructorHandle = + Encoders.constructorHandleFor(arrayCodecClass, GeneratedMapEncoder.class); return new BiFunction() { @Override public GeneratedMapEncoder apply( @@ -99,7 +187,7 @@ public GeneratedMapEncoder apply( final Object[] references = {keyField, valField, keyWriter, valWriter, fory, field}; try { return (GeneratedMapEncoder) constructorHandle.invokeExact(references); - } catch (final Throwable t) { + } catch (Throwable t) { throw ExceptionUtils.throwException(t); } } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapEncoderBuilder.java index fa84944188..d52df21b17 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/MapEncoderBuilder.java @@ -24,10 +24,13 @@ import static org.apache.fory.type.TypeUtils.getRawType; import java.util.Map; +import java.util.function.Supplier; import org.apache.fory.Fory; +import org.apache.fory.codegen.Code; import org.apache.fory.codegen.CodeGenerator; import org.apache.fory.codegen.CodegenContext; import org.apache.fory.codegen.Expression; +import org.apache.fory.codegen.Expression.AbstractExpression; import org.apache.fory.codegen.ExpressionUtils; import org.apache.fory.format.row.binary.BinaryArray; import org.apache.fory.format.row.binary.BinaryMap; @@ -53,12 +56,24 @@ public class MapEncoderBuilder extends BaseBinaryEncoderBuilder { private final TypeRef mapToken; + // True while the key-array subtree generates. Map keys are always read at the current schema + // (they carry no per-payload version hash), so in a projection codec the key bean must resolve to + // its current, unsuffixed codec rather than the value's historical projection. Nested bean codecs + // register lazily inside genCode, so the flag toggles during the key subtree's genCode via + // KeyPositionScope rather than at expression construction. The value path is left untouched. + private boolean inKeyPosition; + public MapEncoderBuilder(Class mapCls, Class keyClass) { this(TypeRef.of(mapCls), TypeRef.of(keyClass)); } public MapEncoderBuilder(TypeRef clsType, TypeRef beanType) { + this(clsType, beanType, null); + } + + MapEncoderBuilder(TypeRef clsType, TypeRef beanType, String rowCodecSuffix) { super(new CodegenContext(), beanType); + this.rowCodecSuffixForBeans = rowCodecSuffix; mapToken = clsType; ctx.reserveName(ROOT_KEY_WRITER_NAME); ctx.reserveName(ROOT_VALUE_WRITER_NAME); @@ -72,7 +87,9 @@ public MapEncoderBuilder(TypeRef clsType, TypeRef beanType) { @Override public String genCode() { ctx.setPackage(CodeGenerator.getPackage(beanClass)); - String className = codecClassName(beanClass, TypeInference.inferTypeName(mapToken)); + String className = + codecClassName(beanClass, TypeInference.inferTypeName(mapToken)) + + (rowCodecSuffixForBeans == null ? "" : rowCodecSuffixForBeans); ctx.setClassName(className); // don't addImport(arrayClass), because user class may name collide. // janino don't support generics, so GeneratedCodec has no generics @@ -137,8 +154,8 @@ public String genCode() { long startTime = System.nanoTime(); String code = ctx.genCode(); - long durationMs = (System.nanoTime() - startTime) / 1000_000; - LOG.info("Generate map codec for class {} take {} us", beanClass, durationMs); + long durationUs = (System.nanoTime() - startTime) / 1000; + LOG.info("Generate map codec for class {} take {} us", beanClass, durationUs); return code; } @@ -178,7 +195,9 @@ public Expression buildEncodeExpression() { expressions.add( new Expression.Invoke(keyArrayWriter, "writeDirectly", Expression.Literal.ofInt(-1))); Expression keySerializationExpr = - serializeForArrayByWriter(keySet, keyArrayWriter, keySetType, null, keyFieldExpr); + keyScoped( + () -> + serializeForArrayByWriter(keySet, keyArrayWriter, keySetType, null, keyFieldExpr)); Expression.Invoke keyArray = new Expression.Invoke(keyArrayWriter, "toArray", TypeRef.of(BinaryArray.class)); expressions.add(map); @@ -239,9 +258,9 @@ private Expression directlyDeserializeMap( Expression keyJavaArray; Expression valueJavaArray; if (TypeUtils.ITERABLE_TYPE.isSupertypeOf(keysType)) { - keyJavaArray = deserializeForCollection(keyArrayRef, keysType); + keyJavaArray = keyScoped(() -> deserializeForCollection(keyArrayRef, keysType)); } else { - keyJavaArray = deserializeForArray(keyArrayRef, keysType); + keyJavaArray = keyScoped(() -> deserializeForArray(keyArrayRef, keysType)); } if (TypeUtils.ITERABLE_TYPE.isSupertypeOf(valuesType)) { valueJavaArray = deserializeForCollection(valArrayRef, valuesType); @@ -258,4 +277,89 @@ private Expression directlyDeserializeMap( ExpressionUtils.notNull(key), new Expression.Invoke(map, "put", key, value))); return new Expression.ListExpression(map, put); } + + /** + * In the key position the bean is always decoded with its current schema, so drop any projection + * suffix. The value position keeps the inherited behavior. + */ + @Override + protected String nestedBeanSuffix(TypeRef typeRef) { + return inKeyPosition ? "" : super.nestedBeanSuffix(typeRef); + } + + /** + * Register the key bean's codec under a distinct key so it does not collide with a same-class + * value bean that projects to a historical schema. Both would otherwise share one {@code + * beanEncoderMap} entry and the first-registered (suffixed) codec would wrongly decode the key. + */ + @Override + protected Object beanCodecKey(TypeRef typeRef) { + return inKeyPosition ? new KeyCodecKey(typeRef) : typeRef; + } + + /** Distinguishes a key-position bean codec registration from the value-position one. */ + private static final class KeyCodecKey { + private final TypeRef typeRef; + + KeyCodecKey(TypeRef typeRef) { + this.typeRef = typeRef; + } + + @Override + public boolean equals(Object o) { + return o instanceof KeyCodecKey && typeRef.equals(((KeyCodecKey) o).typeRef); + } + + @Override + public int hashCode() { + return typeRef.hashCode() * 31 + 1; + } + } + + /** + * Build a key-array subtree with {@link #inKeyPosition} set. Nested bean codecs register both at + * expression construction (the encode {@code ForEach} builds its body eagerly) and during genCode + * (the decode lazy-array body), so the scope has to cover both: the flag is set around the build + * here, and {@link KeyPositionScope} re-sets it around the subtree's genCode. + */ + private Expression keyScoped(Supplier build) { + boolean prev = inKeyPosition; + inKeyPosition = true; + try { + return new KeyPositionScope(build.get()); + } finally { + inKeyPosition = prev; + } + } + + /** Re-sets {@link #inKeyPosition} around the key subtree's genCode; see {@link #keyScoped}. */ + private final class KeyPositionScope extends AbstractExpression { + private final Expression key; + + KeyPositionScope(Expression key) { + super(key); + this.key = key; + } + + @Override + public TypeRef type() { + return key.type(); + } + + @Override + public boolean nullable() { + return key.nullable(); + } + + @Override + public Code.ExprCode doGenCode(CodegenContext ctx) { + boolean prev = inKeyPosition; + inKeyPosition = true; + try { + return key.genCode(ctx); + } finally { + inKeyPosition = prev; + } + } + } } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/ProjectionRouting.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ProjectionRouting.java new file mode 100644 index 0000000000..6ab6059d2c --- /dev/null +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/ProjectionRouting.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.encoder; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.fory.format.type.SchemaHistory; + +/** + * Suffix routing shared by row/array/map projection codec generation. Each cross-product entry gets + * a class-name suffix that uniquely identifies its full nested combination, and the per-nested-bean + * suffix map directs codegen to embed the right inner projection class for each nested-bean type at + * this combination's versions. + */ +final class ProjectionRouting { + private ProjectionRouting() {} + + /** + * Build a class-name suffix that uniquely identifies {@code vs} across the whole cross-product, + * at any nesting depth. The suffix encodes the outer version and, for each nested bean, that + * inner's simple name, version, and the low bits of its strict hash. The strict hash is computed + * over the inner's fully substituted (deep) schema and is collision-checked at build time, so two + * distinct inner subtrees that share a class and version number still produce different suffixes. + * Sorted by class name for determinism across JVM invocations. + */ + static String projectionSuffix(SchemaHistory.VersionedSchema vs) { + StringBuilder sb = new StringBuilder("_V").append(vs.version()); + if (!vs.nestedBeanSchemas().isEmpty()) { + List, SchemaHistory.VersionedSchema>> entries = + new ArrayList<>(vs.nestedBeanSchemas().entrySet()); + entries.sort((a, b) -> a.getKey().getName().compareTo(b.getKey().getName())); + for (Map.Entry, SchemaHistory.VersionedSchema> e : entries) { + SchemaHistory.VersionedSchema inner = e.getValue(); + sb.append("_") + .append(e.getKey().getSimpleName()) + .append(inner.version()) + .append("h") + .append(Long.toHexString(inner.strictHash())); + } + } + return sb.toString(); + } + + /** + * Per-nested-bean-type suffix map for codegen, recursively materializing every inner projection + * class implied by {@code vs}. Empty string means the inner bean uses its current-version codec + * class. The chosen inner entry is taken directly from {@code vs}, so this resolves the correct + * combination to arbitrary depth without re-deriving it from a version number. + */ + static Map, String> nestedSuffixesFor( + SchemaHistory.VersionedSchema vs, Encoding codecFormat) { + Map, String> out = new HashMap<>(); + for (Map.Entry, SchemaHistory.VersionedSchema> e : vs.nestedBeanSchemas().entrySet()) { + Class innerClass = e.getKey(); + SchemaHistory.VersionedSchema innerVs = e.getValue(); + if (innerVs.isCurrent()) { + out.put(innerClass, ""); + } else { + String innerSuffix = projectionSuffix(innerVs); + out.put(innerClass, innerSuffix); + // Eagerly generate the inner's projection class so the outer's `new InnerCodec` + // resolves at class load. Recurses through the inner's own nested combination. + Encoders.loadOrGenProjectionRowCodecClass( + innerClass, + codecFormat, + innerVs.schema(), + innerVs.liveFieldNames(), + innerSuffix, + nestedSuffixesFor(innerVs, codecFormat)); + } + } + return out; + } +} diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowCodecBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowCodecBuilder.java index eeb624d706..d94c885d2c 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowCodecBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowCodecBuilder.java @@ -20,11 +20,15 @@ package org.apache.fory.format.encoder; import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.MethodType; +import java.util.HashMap; +import java.util.Map; import java.util.function.Function; import java.util.function.Supplier; +import org.apache.fory.Fory; +import org.apache.fory.collection.LongMap; import org.apache.fory.format.row.binary.writer.BaseBinaryRowWriter; +import org.apache.fory.format.type.Schema; +import org.apache.fory.format.type.SchemaHistory; import org.apache.fory.format.type.TypeInference; import org.apache.fory.util.ExceptionUtils; @@ -45,51 +49,143 @@ public class RowCodecBuilder extends BaseCodecBuilder> { * virtual thread. */ public Supplier> build() { - final Function> rowEncoderFactory = buildForWriter(); + final RowEncoderFactory factory = buildEncoderFactory(); return new Supplier>() { @Override public RowEncoder get() { - final BaseBinaryRowWriter writer = codecFormat.newWriter(schema); - return new BufferResettingRowEncoder( - initialBufferSize, writer, rowEncoderFactory.apply(writer)); + final BaseBinaryRowWriter writer = codecFormat.newWriter(factory.schema); + return new BufferResettingRowEncoder(initialBufferSize, writer, factory.apply(writer)); } }; } Function> buildForWriter() { + return buildEncoderFactory(); + } + + /** + * Resolve the schema and the per-writer encoder factory together. The evolution path rotates the + * schema to the history-derived current version; returning it alongside the factory keeps that + * resolution out of the mutable builder state, so a reused builder or a direct {@link + * #buildForWriter()} caller is unaffected. + */ + private RowEncoderFactory buildEncoderFactory() { + return schemaEvolution ? evolvingBuildForWriter() : defaultBuildForWriter(); + } + + private RowEncoderFactory defaultBuildForWriter() { + final Schema currentSchema = schema; final Function rowEncoderFactory = - rowEncoderFactory(); - return new Function>() { + rowEncoderFactory(currentSchema); + return new RowEncoderFactory(currentSchema) { @Override public RowEncoder apply(final BaseBinaryRowWriter writer) { return new BinaryRowEncoder( - schema, rowEncoderFactory.apply(writer), writer, sizeEmbedded); + currentSchema, rowEncoderFactory.apply(writer), writer, sizeEmbedded); } }; } - Function rowEncoderFactory() { - final Class rowCodecClass = Encoders.loadOrGenRowCodecClass(beanClass, codecFormat); - MethodHandle constructorHandle; - try { - final var constructor = - rowCodecClass.asSubclass(GeneratedRowEncoder.class).getConstructor(Object[].class); - constructorHandle = - MethodHandles.lookup() - .unreflectConstructor(constructor) - .asType(MethodType.methodType(GeneratedRowEncoder.class, Object[].class)); - } catch (final NoSuchMethodException | IllegalAccessException e) { - throw new EncoderException("Failed to construct codec for " + beanClass, e); + private RowEncoderFactory evolvingBuildForWriter() { + SchemaHistory history = buildSchemaHistory(beanClass); + SchemaHistory.VersionedSchema currentVersion = history.current(); + // The history-derived schema is what writers, generated codec, and decode dispatch must agree + // on. It travels back to build() through the returned factory rather than the mutable schema + // field, so building does not rotate builder state that a later build()/buildForWriter() reads. + final Schema currentSchema = currentVersion.schema(); + + final Function currentFactory = + rowEncoderFactory(currentSchema); + // Projection codecs for each non-current combination of (outer-version, inner-versions). + // The suffix encodes the combination so different cross-product entries get distinct + // generated classes; the nested-bean version map directs the projection codec to embed + // the right inner projection class for each nested-bean type. + final Map projectionFactories = new HashMap<>(); + for (SchemaHistory.VersionedSchema vs : history.versions()) { + if (vs == currentVersion) { + continue; + } + String suffix = ProjectionRouting.projectionSuffix(vs); + Map, String> nestedSuffixes = ProjectionRouting.nestedSuffixesFor(vs, codecFormat); + Class projectionClass = + Encoders.loadOrGenProjectionRowCodecClass( + beanClass, codecFormat, vs.schema(), vs.liveFieldNames(), suffix, nestedSuffixes); + MethodHandle ctor = Encoders.constructorHandleFor(projectionClass, GeneratedRowEncoder.class); + RowFactory rowFactory = codecFormat.newRowFactory(vs.schema()); + projectionFactories.put( + vs.strictHash(), new ProjectionCodecFactory(vs.schema(), ctor, rowFactory)); + } + + final long currentHash = currentVersion.strictHash(); + return new RowEncoderFactory(currentSchema) { + @Override + public RowEncoder apply(final BaseBinaryRowWriter writer) { + LongMap projections = + new LongMap<>(projectionFactories.size()); + for (Map.Entry entry : projectionFactories.entrySet()) { + projections.put(entry.getKey(), entry.getValue().instantiate(writer, fory)); + } + return new BinaryRowEncoder( + currentSchema, + currentFactory.apply(writer), + writer, + sizeEmbedded, + currentHash, + projections); + } + }; + } + + /** + * A per-writer encoder factory that also carries the schema the writer must be created with. The + * schema travels with the factory instead of through the mutable builder, so {@link #build()} can + * create the writer without reading builder state that the evolution path would otherwise rotate. + */ + abstract static class RowEncoderFactory + implements Function> { + final Schema schema; + + RowEncoderFactory(final Schema schema) { + this.schema = schema; + } + } + + private static final class ProjectionCodecFactory { + private final Schema historicalSchema; + private final MethodHandle ctor; + // The RowFactory depends only on the historical schema and codec format, both fixed at build + // time, so build it once here rather than per encoder instance. Only the generated codec, which + // binds the per-instance writer, is rebuilt in instantiate(). + private final RowFactory rowFactory; + + ProjectionCodecFactory(Schema historicalSchema, MethodHandle ctor, RowFactory rowFactory) { + this.historicalSchema = historicalSchema; + this.ctor = ctor; + this.rowFactory = rowFactory; + } + + BinaryRowEncoder.ProjectionCodec instantiate(BaseBinaryRowWriter writer, Fory fory) { + try { + Object[] references = {historicalSchema, writer, fory}; + GeneratedRowEncoder codec = (GeneratedRowEncoder) ctor.invokeExact(references); + return new BinaryRowEncoder.ProjectionCodec(rowFactory, codec); + } catch (Throwable e) { + throw ExceptionUtils.throwException(e); + } } + } + + Function rowEncoderFactory(final Schema codecSchema) { + final Class rowCodecClass = Encoders.loadOrGenRowCodecClass(beanClass, codecFormat); + final MethodHandle constructorHandle = + Encoders.constructorHandleFor(rowCodecClass, GeneratedRowEncoder.class); return new Function() { @Override public GeneratedRowEncoder apply(final BaseBinaryRowWriter writer) { try { - final Object[] references = {schema, writer, fory}; + final Object[] references = {codecSchema, writer, fory}; return (GeneratedRowEncoder) constructorHandle.invokeExact(references); - } catch (final ReflectiveOperationException e) { - throw new EncoderException("Failed to construct codec for " + beanClass, e); - } catch (final Throwable e) { + } catch (Throwable e) { throw ExceptionUtils.throwException(e); } } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java index ea7dc25ece..69c39a4e44 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowEncoderBuilder.java @@ -26,11 +26,13 @@ import java.lang.reflect.Method; import java.lang.reflect.Modifier; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.SortedMap; import org.apache.fory.Fory; import org.apache.fory.builder.CodecBuilder; @@ -77,15 +79,48 @@ class RowEncoderBuilder extends BaseBinaryEncoderBuilder { private final CodegenContext generatedBeanImpl; private final String generatedBeanImplName; + /** + * When non-null, this builder produces a decode-only projection codec: schema fields whose name + * is in {@code projectionLiveNames} are assigned to the bean as usual; others are decoded for + * offset arithmetic only and discarded. {@code toRow} on a projection codec throws. + */ + private final Set projectionLiveNames; + + private final String projectionClassSuffix; + private final Map, String> nestedSuffixes; + public RowEncoderBuilder(Class beanClass) { this(TypeRef.of(beanClass)); } public RowEncoderBuilder(TypeRef beanType) { + this(beanType, null, null, null, Collections.emptyMap()); + } + + /** + * Construct a decode-only projection builder for an older version of {@code beanType}. The + * supplied {@code historicalSchema} is used as the layout to decode; only fields whose name is in + * {@code liveNames} are written into the resulting bean. {@code classSuffix} distinguishes this + * codec from the current-version codec and from other historical projections. {@code + * nestedSuffixes} routes each nested-bean type to a specific projection codec class (used when an + * inner versioned bean was on the wire at an older version). + */ + RowEncoderBuilder( + TypeRef beanType, + Schema historicalSchema, + Set liveNames, + String classSuffix, + Map, String> nestedSuffixes) { super(new CodegenContext(), beanType); Preconditions.checkArgument(beanClass.isInterface() || TypeUtils.isBean(beanType, typeCtx)); - className = codecClassName(beanClass); - this.schema = inferSchema(beanType); + this.projectionLiveNames = liveNames; + this.projectionClassSuffix = classSuffix; + this.nestedSuffixes = nestedSuffixes == null ? Collections.emptyMap() : nestedSuffixes; + className = + projectionClassSuffix == null + ? codecClassName(beanClass) + : codecClassName(beanClass) + projectionClassSuffix; + this.schema = historicalSchema != null ? historicalSchema : inferSchema(beanType); this.descriptorsMap = Descriptor.getDescriptorsMap(beanClass); ctx.reserveName(ROOT_ROW_WRITER_NAME); ctx.reserveName(SCHEMA_NAME); @@ -105,7 +140,13 @@ public RowEncoderBuilder(TypeRef beanType) { ctx.addImports(Row.class, ArrayData.class, MapData.class); ctx.addImports(BinaryRow.class, BinaryArray.class, BinaryMap.class); if (beanClass.isInterface()) { - generatedBeanImplName = beanClass.getSimpleName() + "GeneratedImpl"; + // Append the projection suffix so each historical version of an interface bean gets its + // own impl class; the impl classes are inner classes of the codec and would collide on + // the simple name otherwise. + generatedBeanImplName = + beanClass.getSimpleName() + + "GeneratedImpl" + + (projectionClassSuffix == null ? "" : projectionClassSuffix); generatedBeanImpl = buildImplClass(); } else { generatedBeanImplName = null; @@ -117,6 +158,12 @@ protected Schema inferSchema(TypeRef beanType) { return TypeInference.inferSchema(getRawType(beanType)); } + @Override + protected String nestedBeanSuffix(TypeRef typeRef) { + String s = nestedSuffixes.get(getRawType(typeRef)); + return s != null ? s : super.nestedBeanSuffix(typeRef); + } + @Override protected String codecSuffix() { return "RowCodec"; @@ -177,8 +224,8 @@ public String genCode() { + generatedBeanImpl.genCode() + code.substring(insertPoint); } - long durationMs = (System.nanoTime() - startTime) / 1000; - LOG.info("Generate codec for class {} take {} us", beanClass, durationMs); + long durationUs = (System.nanoTime() - startTime) / 1000; + LOG.info("Generate codec for class {} take {} us", beanClass, durationUs); return code; } @@ -203,8 +250,14 @@ public Expression buildEncodeExpression() { // schema field's name must correspond to descriptor's name. for (int i = 0; i < numFields; i++) { Field field = schema.field(i); + if (projectionLiveNames != null && !projectionLiveNames.contains(field.name())) { + // Removed wire field — no Java accessor to read from, so we cannot emit encode + // code. The projection codec's encode body is unreachable anyway because + // BinaryRowEncoder never dispatches a projection codec on write. + continue; + } Descriptor d = getDescriptorByFieldName(field.name()); - Preconditions.checkNotNull(d); + Preconditions.checkNotNull(d, "missing descriptor for schema field " + field.name()); TypeRef fieldType = d.getTypeRef(); Expression fieldValue = getFieldValue(bean, d); Literal ordinal = Literal.ofInt(i); @@ -215,6 +268,12 @@ public Expression buildEncodeExpression() { serializeFor(ordinal, fieldValue, writer, fieldType, field, foryField, new HashSet<>()); expressions.add(fieldExpr); } + if (projectionLiveNames != null) { + // Decode-only: never run the writer logic. The expressions above were generated only for + // their side effects on the codegen context (registering nested-bean encoder fields). + return new Expression.Block( + "throw new UnsupportedOperationException(\"projection codec is decode-only\");\n"); + } expressions.add( new Expression.Return( new Expression.Invoke(writer, "getRow", TypeRef.of(BinaryRow.class)))); @@ -237,19 +296,27 @@ public Expression buildDecodeExpression() { bean = new Expression.Reference("new " + generatedBeanImplName + "(row)"); } else { int numFields = schema.numFields(); - List fieldNames = new ArrayList<>(numFields); - Expression[] values = new Expression[numFields]; - Descriptor[] descriptors = new Descriptor[numFields]; - // schema field's name must correspond to descriptor's name. + // Build, in schema order, the per-slot bean-side info for live fields only. Discarded + // slots are part of the row layout but have no Java target; we skip emitting any code + // for them because BinaryRow's offset arithmetic is keyed on slot index, not on prior + // reads. + List liveFieldDescriptorNames = new ArrayList<>(); + List liveDescriptors = new ArrayList<>(); + List liveValues = new ArrayList<>(); for (int i = 0; i < numFields; i++) { Literal ordinal = Literal.ofInt(i); - Descriptor d = getDescriptorByFieldName(schema.field(i).name()); - fieldNames.add(d.getName()); - descriptors[i] = d; + String wireName = schema.field(i).name(); + if (projectionLiveNames != null && !projectionLiveNames.contains(wireName)) { + continue; + } + Descriptor d = getDescriptorByFieldName(wireName); + Preconditions.checkNotNull(d, "missing descriptor for wire field " + wireName); TypeRef fieldType = d.getTypeRef(); Expression.Variable value = new Expression.Variable("value_" + d.getName(), nullValue(fieldType)); - values[i] = value; + liveFieldDescriptorNames.add(d.getName()); + liveDescriptors.add(d); + liveValues.add(value); expressions.add(value); Expression.Invoke isNullAt = new Expression.Invoke( @@ -267,17 +334,12 @@ public Expression buildDecodeExpression() { expressions.add(decode); } if (RecordUtils.isRecord(beanClass)) { - int[] map = RecordUtils.buildRecordComponentMapping(beanClass, fieldNames); - Expression[] args = new Expression[numFields]; - for (int i = 0; i < numFields; i++) { - args[i] = values[map[i]]; - } - bean = new Expression.NewInstance(beanType, beanType.getRawType().getName(), args); + bean = buildRecordInstance(liveFieldDescriptorNames, liveValues); } else { bean = newBean(); expressions.add(bean); - for (int i = 0; i < values.length; i++) { - expressions.add(setFieldValue(bean, descriptors[i], values[i])); + for (int i = 0; i < liveDescriptors.size(); i++) { + expressions.add(setFieldValue(bean, liveDescriptors.get(i), liveValues.get(i))); } } } @@ -290,6 +352,30 @@ public Expression buildDecodeExpression() { return expressions; } + /** + * Build a record instance, supplying defaults for components not contributed by the wire. The + * non-projection path always supplies every component; the projection path may supply a subset. + */ + private Expression buildRecordInstance( + List liveDescriptorNames, List liveValues) { + Map byName = new HashMap<>(liveDescriptorNames.size() * 2); + for (int i = 0; i < liveDescriptorNames.size(); i++) { + byName.put(liveDescriptorNames.get(i), liveValues.get(i)); + } + java.lang.reflect.RecordComponent[] components = beanClass.getRecordComponents(); + Expression[] args = new Expression[components.length]; + for (int i = 0; i < components.length; i++) { + String compName = components[i].getName(); + Expression value = byName.get(compName); + if (value == null) { + TypeRef compType = TypeRef.of(components[i].getGenericType()); + value = nullValue(compType); + } + args[i] = value; + } + return new Expression.NewInstance(beanType, beanType.getRawType().getName(), args); + } + private static Expression nullValue(TypeRef fieldType) { Class rawType = fieldType.getRawType(); if (TypeUtils.isOptionalType(rawType)) { @@ -303,7 +389,11 @@ private void addDecoderMethods() { int numFields = schema.numFields(); for (int i = 0; i < numFields; i++) { Literal ordinal = Literal.ofInt(i); - Descriptor d = getDescriptorByFieldName(schema.field(i).name()); + String wireName = schema.field(i).name(); + if (projectionLiveNames != null && !projectionLiveNames.contains(wireName)) { + continue; + } + Descriptor d = getDescriptorByFieldName(wireName); TypeRef fieldType = d.getTypeRef(); Class rawFieldType = fieldType.getRawType(); TypeRef columnAccessType = fieldType; @@ -355,7 +445,14 @@ private CodegenContext buildImplClass() { int numFields = schema.numFields(); for (int i = 0; i < numFields; i++) { Literal ordinal = Literal.ofInt(i); - Descriptor d = getDescriptorByFieldName(schema.field(i).name()); + String wireName = schema.field(i).name(); + if (projectionLiveNames != null && !projectionLiveNames.contains(wireName)) { + // Removed wire field — no Java member to back this slot. The other interface methods + // can still be served lazily from the row; the row's offset arithmetic does not need + // us to read this slot. + continue; + } + Descriptor d = getDescriptorByFieldName(wireName); TypeRef fieldType = d.getTypeRef(); Class rawFieldType = fieldType.getRawType(); @@ -407,6 +504,7 @@ private CodegenContext buildImplClass() { // Note: adding constructor captures init code, so must happen after all fields are collected implClass.addConstructor("this.row = row;", BinaryRow.class, "row"); + final boolean projecting = projectionLiveNames != null; methodsNeedingImpl.forEach( (methodName, signatures) -> signatures.forEach( @@ -419,16 +517,54 @@ private CodegenContext buildImplClass() { params[i * 2] = methodType.parameterType(i); params[i * 2 + 1] = "unused" + i; } - implClass.addMethod( - methodName, - "throw new UnsupportedOperationException();", - methodType.returnType(), - params); + String body; + if (projecting && isAccessorOfAbsentField(methodName, methodType)) { + body = + "return " + + defaultValueExpression(methodType.returnType(), implClass) + + ";"; + } else { + body = "throw new UnsupportedOperationException();"; + } + implClass.addMethod(methodName, body, methodType.returnType(), params); })); return implClass; } + /** + * True when {@code methodName(returnType)} on the current bean class names a property whose field + * is not in the historical schema this projection codec is generating. Such a method gets a + * default-value body instead of {@code throw} so the interface proxy can serve callers that don't + * know the field is missing in this version. + */ + private boolean isAccessorOfAbsentField(String methodName, MethodType methodType) { + // An accessor takes no arguments; the live-member pass above only removes the no-arg signature. + // A parameterized method sharing a name and return type with a descriptor is not that field's + // accessor, so it must still throw rather than be silenced into a default value. + if (methodType.parameterCount() != 0) { + return false; + } + Descriptor d = descriptorsMap.get(methodName); + if (d == null) { + return false; + } + if (d.getTypeRef().getRawType() != methodType.returnType()) { + return false; + } + // The main loop above emits getters for every wire field that is also a live Java member. + // Anything left in methodsNeedingImpl that matches a descriptor by name and type must + // correspond to a Java member whose wire field is not in this version. + return true; + } + + private static String defaultValueExpression(Class returnType, CodegenContext ctx) { + if (TypeUtils.isOptionalType(returnType)) { + return ctx.type(returnType) + ".empty()"; + } + return TypeUtils.defaultValue(returnType); + } + private Descriptor getDescriptorByFieldName(String fieldName) { String name = StringUtils.lowerUnderscoreToLowerCamelCase(fieldName); return descriptorsMap.get(name); diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowFactory.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowFactory.java new file mode 100644 index 0000000000..4e8e94f0cc --- /dev/null +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/RowFactory.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.encoder; + +import org.apache.fory.format.row.binary.BinaryRow; + +/** + * Allocates fresh {@link BinaryRow} instances for a fixed schema. Obtained once per schema from + * {@link Encoding#newRowFactory}. The compact format captures its schema-derived layout (offsets, + * widths, nullability) in the factory so every {@link #newRow} call reuses it; the default format + * builds a {@link BinaryRow} directly per call, matching {@code BinaryRowWriter#newRow}. Either way + * the schema-evolution decode path holds one factory per historical schema, giving it the same + * per-decode cost as the current-schema path that reads through the writer's cached layout. + */ +@FunctionalInterface +interface RowFactory { + BinaryRow newRow(); +} diff --git a/java/fory-format/src/main/java/org/apache/fory/format/type/SchemaHistory.java b/java/fory-format/src/main/java/org/apache/fory/format/type/SchemaHistory.java new file mode 100644 index 0000000000..fa107300a5 --- /dev/null +++ b/java/fory-format/src/main/java/org/apache/fory/format/type/SchemaHistory.java @@ -0,0 +1,692 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.type; + +import java.lang.reflect.AnnotatedElement; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.function.UnaryOperator; +import org.apache.fory.annotation.Internal; +import org.apache.fory.collection.Tuple2; +import org.apache.fory.format.annotation.ForySchema; +import org.apache.fory.format.annotation.ForyVersion; +import org.apache.fory.reflect.TypeRef; +import org.apache.fory.type.Descriptor; +import org.apache.fory.type.TypeResolutionContext; +import org.apache.fory.type.TypeUtils; +import org.apache.fory.util.StringUtils; + +/** + * Resolves the version history of a row-codec bean. Each entry exposes the schema as it appeared at + * a particular version, along with a strict hash that uniquely identifies the historical layout. + * Only used when {@code withSchemaEvolution()} is configured on the codec builder. + * + *

The hash mixes field names and nullability in addition to types, so that two schemas that + * differ only in field order or naming are distinguishable. This is intentionally a different hash + * from {@link DataTypes#computeSchemaHash} and is used only by versioning code paths. + */ +@Internal +public final class SchemaHistory { + + /** Implicit version of a live field that carries no {@link ForyVersion}. */ + private static final int FIRST_VERSION = 1; + + /** One entry in a {@link SchemaHistory}. */ + public static final class VersionedSchema { + private final int version; + private final Schema schema; + private final long strictHash; + private final boolean current; + private final Set liveFieldNames; + private final Map, VersionedSchema> nestedBeanSchemas; + + VersionedSchema( + int version, + Schema schema, + long strictHash, + boolean current, + Set liveFieldNames, + Map, VersionedSchema> nestedBeanSchemas) { + this.version = version; + this.schema = schema; + this.strictHash = strictHash; + this.current = current; + this.liveFieldNames = liveFieldNames; + this.nestedBeanSchemas = nestedBeanSchemas; + } + + public int version() { + return version; + } + + public Schema schema() { + return schema; + } + + public long strictHash() { + return strictHash; + } + + /** + * True when this entry is its bean's current (writer-side) schema. Routing uses this to decide + * whether a nested-bean slot embeds the current-version codec class (no suffix) or a historical + * projection class. + */ + public boolean isCurrent() { + return current; + } + + /** + * Names of fields in this version that still have a Java member on the current bean class. + * Other fields are read-and-discarded during projection. + */ + public Set liveFieldNames() { + return liveFieldNames; + } + + /** + * For each nested versioned bean class referenced by this schema, the exact inner entry chosen + * for this combination. Empty when the schema has no nested versioned beans. Each value carries + * its own {@code strictHash} and {@code nestedBeanSchemas}, so routing can identify and recurse + * into the inner subtree to arbitrary depth without re-deriving it from a version number. + * + *

Keyed by class, not by field. A writer writes one definition of a given bean class, so + * every field of that class in a single payload is at the same version; the enumeration carries + * one entry per class, and a class may back more than one field. + */ + public Map, VersionedSchema> nestedBeanSchemas() { + return nestedBeanSchemas; + } + } + + private final List versions; + private final VersionedSchema current; + + private SchemaHistory(List versions, VersionedSchema current) { + this.versions = versions; + this.current = current; + } + + public VersionedSchema current() { + return current; + } + + /** All known versions, ordered by version number ascending. */ + public List versions() { + return versions; + } + + /** + * Build a history from the bean's annotations. The schema for each version is transformed by + * {@code schemaTransform} after filtering; pass an identity for standard format, or {@code + * CompactBinaryRowWriter::sortSchema} for compact format. + */ + public static SchemaHistory build(Class beanClass, UnaryOperator schemaTransform) { + ForySchema schemaAnn = beanClass.getAnnotation(ForySchema.class); + Class removedFieldsClass = schemaAnn == null ? void.class : schemaAnn.removedFields(); + + List all = collectLiveFields(beanClass); + if (removedFieldsClass != void.class) { + all.addAll(collectRemovedFields(removedFieldsClass)); + } + + // Recursively expand any nested versioned bean field's own history. A versioned bean can be the + // field type directly, or the element of a list, or the value of a map; we locate it at any of + // those sites so the outer's enumeration can cross-product over the inner's versions. The inner + // schema substitutes back into the same site at materialization time. + // + // This recursion needs no cycle guard. TypeInference.inferField calls ctx.checkNoCycle on every + // bean it descends into, and RowCodecBuilder runs inferSchema in its constructor before build() + // reaches here, so a self-referential bean is already rejected. Recursion depth is bounded by + // the acyclic nesting of distinct versioned bean types. + for (FieldEntry fe : all) { + Class nested = findVersionedBean(fe.typeRef); + if (nested != null) { + fe.nestedBeanClass = nested; + fe.innerHistory = build(nested, schemaTransform); + } + } + + // Materialize a schema at every version V where the field set changes — both "since" and + // "until" boundaries qualify, because either adds or removes a field from the active set. + TreeSet schemaVersions = new TreeSet<>(); + schemaVersions.add(FIRST_VERSION); + for (FieldEntry fe : all) { + schemaVersions.add(fe.since); + if (fe.until != Integer.MAX_VALUE) { + schemaVersions.add(fe.until); + } + } + + validateNoNameCollision(all); + + // Sort by Java member name so the per-version schema matches the order + // TypeInference.inferSchema produces (which iterates Descriptor.getDescriptors, alphabetical + // by Java member name). Removed fields synthesize a Java name from their wire name. + all.sort((a, b) -> a.javaName.compareTo(b.javaName)); + // A field with finite [since, until) can leave two boundaries with identical field sets + // (e.g. v1 and v4 both lack a field that lived in [v2, v4)). Collapse boundaries that + // produce the same field set into one VersionedSchema, since they round-trip identically. + // A real strict-hash collision — two distinct field sets producing the same hash — is + // caught by comparing canonical signatures on insertion. + int latestVersion = schemaVersions.last(); + Map bySignature = new LinkedHashMap<>(); + Map hashToSignature = new HashMap<>(); + String currentSignature = null; + for (int v : schemaVersions) { + List activeEntries = new ArrayList<>(); + for (FieldEntry fe : all) { + if (fe.since <= v && v < fe.until) { + activeEntries.add(fe); + } + } + // Cross-product over each nested versioned bean *class*, not each field. A writer always + // writes one definition of a given bean class, so every field of that class in a single + // payload is at the same version; the off-diagonal combinations (the same class at two + // versions in one record) are unreachable on the wire. Enumerating one dimension per class + // keeps the count a product over distinct nested classes rather than over fields, and lets + // a class appear in more than one field. If no entries have nested histories, this yields a + // single combination. + // + // The class count generated downstream is the product of the per-class version counts. If + // that growth becomes a concern, drop entries from each bean's History interface once you + // no longer need to read payloads from that range — that removes the corresponding + // VersionedSchema from this enumeration. Retiring history entries is purely a read-side + // concern; the writer always uses the current schema. + LinkedHashMap, List> innerChoices = new LinkedHashMap<>(); + for (FieldEntry fe : activeEntries) { + if (fe.innerHistory != null) { + innerChoices.putIfAbsent(fe.nestedBeanClass, fe.innerHistory.versions()); + } + } + for (Map, VersionedSchema> combination : cartesian(innerChoices)) { + List fields = new ArrayList<>(activeEntries.size()); + Set liveNames = new HashSet<>(); + for (FieldEntry fe : activeEntries) { + Field current = TypeInference.inferNamedField(fe.name, fe.typeRef); + VersionedSchema innerVs = + fe.nestedBeanClass == null ? null : combination.get(fe.nestedBeanClass); + // Substitute the chosen inner version's struct into the bean's site (direct field, + // list element, or map value), keeping the collection wrapper intact. + Field field = + innerVs == null + ? current + : substituteNestedStruct( + current, fe.typeRef, new DataTypes.StructType(innerVs.schema().fields())); + fields.add(field); + if (fe.live) { + liveNames.add(fe.name); + } + } + Schema schema = schemaTransform.apply(new Schema(fields)); + long hash = computeStrictSchemaHash(schema); + String signature = schemaSignature(schema); + String previousSig = hashToSignature.putIfAbsent(hash, signature); + if (previousSig != null && !previousSig.equals(signature)) { + throw new IllegalStateException( + "Strict hash collision for bean " + + beanClass.getName() + + " at version " + + v + + ": two distinct historical schemas hashed to the same value. Please file an " + + "issue with the bean definition."); + } + // This combination represents the writer-side configuration at outer version v only when + // every chosen inner is itself that inner's current schema. The bean's own current schema + // is the writer-side configuration at the latest version. + boolean innerAllCurrent = + combination.entrySet().stream().allMatch(e -> e.getValue().isCurrent()); + boolean isCurrent = v == latestVersion && innerAllCurrent; + VersionedSchema vs = + new VersionedSchema( + v, + schema, + hash, + isCurrent, + Collections.unmodifiableSet(liveNames), + Collections.unmodifiableMap(new HashMap<>(combination))); + // Prefer the all-current combination on collapse so the stored VS's nestedBeanSchemas + // map reflects the writer-side state at this outer version. This guards a contract on + // current().nestedBeanSchemas() in case two combinations ever canonicalize to the same + // signature; today's inner-bySignature collapse means inner.versions() has no wire-equal + // duplicates, but the guard preserves the invariant for future callers. + if (innerAllCurrent) { + bySignature.put(signature, vs); + } else { + bySignature.putIfAbsent(signature, vs); + } + if (isCurrent) { + currentSignature = signature; + } + } + } + // The all-current combination at the latest version is always one of the cartesian entries, + // so currentSignature is always set and present here. + VersionedSchema current = bySignature.get(currentSignature); + if (current == null) { + throw new IllegalStateException("No current schema resolved for bean " + beanClass.getName()); + } + return new SchemaHistory( + Collections.unmodifiableList(new ArrayList<>(bySignature.values())), current); + } + + /** Cartesian product over (nested bean class, list-of-inner-VersionedSchema). */ + private static List, VersionedSchema>> cartesian( + LinkedHashMap, List> choices) { + List, VersionedSchema>> out = new ArrayList<>(); + out.add(new HashMap<>()); + for (Map.Entry, List> choice : choices.entrySet()) { + Class cls = choice.getKey(); + List options = choice.getValue(); + List, VersionedSchema>> next = new ArrayList<>(out.size() * options.size()); + for (Map, VersionedSchema> prefix : out) { + for (VersionedSchema opt : options) { + Map, VersionedSchema> extended = new HashMap<>(prefix); + extended.put(cls, opt); + next.add(extended); + } + } + out = next; + } + return out; + } + + /** + * Bean a top-level array/map codec evolves on, reachable through {@code elementType} (the array + * element or map value). Descends list/map/array wrappers and returns the bean at the leaf, + * matching the way {@link #findVersionedBean} descends. The bean need not be versioned: an + * unversioned bean must still take the evolution path so the strict-hash prefix is always present + * and the producer and consumer stay wire-compatible. Returns null when no bean is reachable and + * the codec needs no projection. Map keys are not inspected; they are always read at the current + * schema. + */ + public static Class evolutionBean(TypeRef elementType, TypeResolutionContext typeCtx) { + Class raw = TypeUtils.getRawType(elementType); + if (raw == null) { + return null; + } + if (raw.isArray() || TypeUtils.isCollection(raw)) { + return evolutionBean(elementTypeRef(elementType, raw), typeCtx); + } + if (TypeUtils.isMap(raw)) { + return evolutionBean(TypeUtils.getMapKeyValueType(elementType).f1, typeCtx); + } + return TypeUtils.isBean(TypeRef.of(raw), typeCtx) ? raw : null; + } + + /** + * Project {@code currentField} (an array element or map value field at the bean's current schema) + * onto {@code historical}, swapping the bean's struct while keeping any list/map/array wrapper. + * For a directly-typed bean this is just the historical struct; for {@code List} or {@code + * Map} the wrapper is preserved around the historical struct. + */ + public static Field projectThroughWrapper( + Field currentField, TypeRef elementType, VersionedSchema historical) { + return substituteNestedStruct( + currentField, elementType, new DataTypes.StructType(historical.schema().fields())); + } + + /** + * Find the versioned bean reachable from a field type: the field type itself, a list/array + * element, or a map value. Returns null when no versioned bean is present. Map keys are not + * inspected: they carry no per-payload hash on the wire and are always read with the current + * schema, so enumerating key versions would only generate projection codecs that decode never + * dispatches to. + */ + private static Class findVersionedBean(TypeRef typeRef) { + Class raw = TypeUtils.getRawType(typeRef); + if (raw == null) { + return null; + } + if (raw.isArray() || TypeUtils.isCollection(raw)) { + return findVersionedBean(elementTypeRef(typeRef, raw)); + } + if (TypeUtils.isMap(raw)) { + Tuple2, TypeRef> kv = TypeUtils.getMapKeyValueType(typeRef); + return findVersionedBean(kv.f1); + } + return isBeanWithVersioning(raw) ? raw : null; + } + + /** + * Replace the nested bean's struct in {@code current} (the field at the bean's current schema) + * with {@code historical}, keeping any list/map wrapper. The bean sits at the field, the + * list/array element, or the map value, matching {@link #findVersionedBean}. + */ + private static Field substituteNestedStruct( + Field current, TypeRef typeRef, DataTypes.StructType historical) { + Class raw = TypeUtils.getRawType(typeRef); + if (raw != null && (raw.isArray() || TypeUtils.isCollection(raw))) { + Field element = + substituteNestedStruct( + DataTypes.arrayElementField(current), elementTypeRef(typeRef, raw), historical); + return DataTypes.arrayField(current.name(), element); + } + if (raw != null && TypeUtils.isMap(raw)) { + Tuple2, TypeRef> kv = TypeUtils.getMapKeyValueType(typeRef); + Field keyField = DataTypes.keyFieldForMap(current); + Field itemField = + substituteNestedStruct(DataTypes.itemFieldForMap(current), kv.f1, historical); + return DataTypes.mapField(current.name(), keyField, itemField); + } + return DataTypes.field(current.name(), historical, current.nullable()); + } + + /** + * Element type of a list/array field, derived the same way {@link TypeInference} does: arrays use + * the component type, iterables use the element type. + */ + private static TypeRef elementTypeRef(TypeRef typeRef, Class raw) { + return raw.isArray() ? typeRef.getComponentType() : TypeUtils.getElementType(typeRef); + } + + /** True if the class is a row-codec bean and carries any schema-evolution annotations. */ + private static boolean isBeanWithVersioning(Class cls) { + if (cls.isAnnotationPresent(ForySchema.class)) { + return true; + } + // Only introspect classes the row format actually treats as beans. TypeInference.inferField + // routes collection/map/array/enum field types away from Descriptor.getDescriptors, so a + // collection subclass that shadows a field name across its hierarchy round-trips fine even + // though getDescriptors would reject it. Gating on isBean keeps this probe consistent with + // inferField; getDescriptors then only throws for a class that genuinely cannot be a bean, + // which fails identically on the real encode/decode path. Use the same synthesize-interfaces + // context as inferField and the top-level array/map entry point (evolutionBean), so an + // interface bean nested as a field type, list element, or map value is discovered as a bean + // rather than rejected; otherwise its older versions are never enumerated and an older payload + // decodes at the interface's current layout. + TypeResolutionContext typeCtx = + new TypeResolutionContext(CustomTypeEncoderRegistry.customTypeHandler(), true); + if (!TypeUtils.isBean(cls, typeCtx)) { + return false; + } + for (Descriptor d : Descriptor.getDescriptors(cls)) { + if (lookupForyVersion(d) != null) { + return true; + } + } + return false; + } + + /** + * Canonical textual signature of a schema, used to distinguish a real strict-hash collision (two + * genuinely different schemas with the same hash) from the benign case where two version + * boundaries produce the same field set. + */ + private static String schemaSignature(Schema schema) { + StringBuilder sb = new StringBuilder(64); + for (Field field : schema.fields()) { + sb.append(field.name()) + .append(':') + .append(field.type()) + .append(field.nullable() ? "?" : "!") + .append(';'); + } + return sb.toString(); + } + + private static List collectRemovedFields(Class historyClass) { + List descriptors = Descriptor.getDescriptors(historyClass); + List out = new ArrayList<>(descriptors.size()); + for (Descriptor d : descriptors) { + ForyVersion ann = lookupForyVersion(d); + if (ann == null) { + throw new IllegalStateException( + "Removed-field declaration " + + historyClass.getName() + + "." + + d.getName() + + " requires a @ForyVersion(until = ...) annotation"); + } + if (ann.until() == Integer.MAX_VALUE) { + throw new IllegalStateException( + "Removed-field declaration " + + historyClass.getName() + + "." + + d.getName() + + " must specify @ForyVersion.until (no upper bound makes no sense for a field " + + "that has been removed)"); + } + if (ann.since() < FIRST_VERSION) { + throw new IllegalStateException( + "Invalid @ForyVersion on " + + historyClass.getName() + + "." + + d.getName() + + ": since (" + + ann.since() + + ") must be >= " + + FIRST_VERSION + + " (the first schema version). A since below that adds a version no writer can " + + "emit."); + } + if (ann.since() >= ann.until()) { + throw new IllegalStateException( + "Invalid @ForyVersion on " + + historyClass.getName() + + "." + + d.getName() + + ": since (" + + ann.since() + + ") must be strictly less than until (" + + ann.until() + + ")"); + } + // The history method's name must mirror the live field/method name. Wire names are + // derived the same way the live path derives them: descriptor name -> lower_underscore. + // For Lombok @Data or record-style beans the descriptor name is the field name + // ("tags"); for interface beans or JavaBean-style classes it is the method name + // ("getTags"). The user writes the history method to match. + String wireName = StringUtils.lowerCamelToLowerUnderscore(d.getName()); + out.add( + new FieldEntry( + wireName, d.getName(), d.getTypeRef(), ann.since(), ann.until(), /*live*/ false)); + } + return out; + } + + private static List collectLiveFields(Class beanClass) { + List descriptors = Descriptor.getDescriptors(beanClass); + List out = new ArrayList<>(descriptors.size()); + for (Descriptor d : descriptors) { + ForyVersion ann = lookupForyVersion(d); + int since = ann == null ? FIRST_VERSION : ann.since(); + int until = ann == null ? Integer.MAX_VALUE : ann.until(); + if (since < FIRST_VERSION) { + throw new IllegalStateException( + "Invalid @ForyVersion on " + + beanClass.getName() + + "." + + d.getName() + + ": since (" + + since + + ") must be >= " + + FIRST_VERSION + + " (the first schema version). A since below that adds a version no writer can " + + "emit."); + } + // A live field still exists as a Java member, so it has no end-of-life version. A finite + // until would silently drop it from the current schema (until extends the version set, so + // latestVersion >= until excludes the field), and the writer would stop serializing a field + // the bean still has. Removals are declared on the history class via + // @ForySchema.removedFields. + if (until != Integer.MAX_VALUE) { + throw new IllegalStateException( + "Invalid @ForyVersion on " + + beanClass.getName() + + "." + + d.getName() + + ": a live field must not set until (" + + until + + "). Declare removed fields on the @ForySchema.removedFields history class instead."); + } + // No since/until ordering check here: a live field always has until == MAX_VALUE (enforced + // above), so the ordering check lives only on the removed-field path in collectRemovedFields. + String wireName = StringUtils.lowerCamelToLowerUnderscore(d.getName()); + out.add(new FieldEntry(wireName, d.getName(), d.getTypeRef(), since, until, /*live*/ true)); + } + return out; + } + + private static ForyVersion lookupForyVersion(Descriptor d) { + ForyVersion ann = readAnnotation(d.getField()); + if (ann != null) { + return ann; + } + return readAnnotation(d.getReadMethod()); + } + + private static ForyVersion readAnnotation(AnnotatedElement element) { + return element == null ? null : element.getAnnotation(ForyVersion.class); + } + + private static void validateNoNameCollision(List entries) { + // For each pair with the same name, their [since, until) windows must not overlap. + Map> byName = new HashMap<>(); + for (FieldEntry fe : entries) { + byName.computeIfAbsent(fe.name, k -> new ArrayList<>()).add(fe); + } + for (Map.Entry> e : byName.entrySet()) { + List group = e.getValue(); + if (group.size() < 2) { + continue; + } + group.sort((a, b) -> Integer.compare(a.since, b.since)); + for (int i = 1; i < group.size(); i++) { + FieldEntry prev = group.get(i - 1); + FieldEntry curr = group.get(i); + if (curr.since < prev.until) { + throw new IllegalStateException( + "Field name '" + + e.getKey() + + "' is declared with overlapping version windows [" + + prev.since + + "," + + prev.until + + ") and [" + + curr.since + + "," + + curr.until + + "); each version must have one definition per name. Adjust the @ForyVersion " + + "annotations on the live field or in the removed-fields class to make the " + + "windows disjoint."); + } + } + } + } + + /** + * Strict schema hash, used only by versioning code paths. Distinguishes schemas that differ in + * field name or nullability, unlike {@link DataTypes#computeSchemaHash}. + */ + private static long computeStrictSchemaHash(Schema schema) { + long hash = 1469598103934665603L; // FNV offset basis + Set seen = new HashSet<>(); + for (Field field : schema.fields()) { + if (!seen.add(field.name())) { + throw new IllegalStateException("Duplicate field name in schema: " + field.name()); + } + hash = hashField(hash, field); + } + return hash; + } + + private static long hashField(long hash, Field field) { + hash = mix(hash, field.name()); + DataType type = field.type(); + // The type's name() carries its identity including any inline width (e.g. + // fixedSizeBinary(N)), which is enough for every type except DecimalType, whose + // precision and scale are stored separately. Mix those in explicitly so two decimals of + // different shape don't collide. + hash = mix(hash, type.name()); + if (type instanceof DataTypes.DecimalType) { + hash = mix(hash, ((DataTypes.DecimalType) type).precision()); + hash = mix(hash, ((DataTypes.DecimalType) type).scale()); + } + hash = mix(hash, field.nullable() ? 1 : 0); + if (type instanceof DataTypes.ListType) { + hash = hashField(hash, DataTypes.arrayElementField(field)); + } else if (type instanceof DataTypes.MapType) { + hash = hashField(hash, DataTypes.keyFieldForMap(field)); + hash = hashField(hash, DataTypes.itemFieldForMap(field)); + } else if (type instanceof DataTypes.StructType) { + for (Field child : type.fields()) { + hash = hashField(hash, child); + } + } + return hash; + } + + private static long mix(long hash, long value) { + hash ^= value; + hash *= 1099511628211L; // FNV prime + return hash; + } + + private static long mix(long hash, String value) { + for (int i = 0; i < value.length(); i++) { + hash = mix(hash, value.charAt(i)); + } + return mix(hash, 0); + } + + private static final class FieldEntry { + final String name; + + /** + * Java member name used for canonical ordering. Matches {@link Descriptor#getName} so live + * fields and removed fields (declared on the history class) sort into the same order as {@link + * TypeInference#inferSchema} produces. + */ + final String javaName; + + final TypeRef typeRef; + final int since; + final int until; + final boolean live; + + /** + * The versioned bean reachable from this field (the field type, a list element, or a map + * value), or null when none. Keys the outer cross-product so every field backed by the same + * bean class shares one version dimension. + */ + Class nestedBeanClass; + + /** SchemaHistory of {@link #nestedBeanClass}, when this field references a versioned bean. */ + SchemaHistory innerHistory; + + FieldEntry( + String name, String javaName, TypeRef typeRef, int since, int until, boolean live) { + this.name = name; + this.javaName = javaName; + this.typeRef = typeRef; + this.since = since; + this.until = until; + this.live = live; + } + } +} diff --git a/java/fory-format/src/main/java/org/apache/fory/format/type/TypeInference.java b/java/fory-format/src/main/java/org/apache/fory/format/type/TypeInference.java index 4617f04faa..656d0203a7 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/type/TypeInference.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/type/TypeInference.java @@ -249,6 +249,16 @@ private static Field inferField(String name, TypeRef typeRef, TypeResolutionC } } + /** + * Infer a single named field from its Java type, used by schema-evolution code paths that need to + * reconstruct historical fields by name and type without going through a Java member. + */ + static Field inferNamedField(String name, TypeRef typeRef) { + TypeResolutionContext ctx = + new TypeResolutionContext(CustomTypeEncoderRegistry.customTypeHandler(), true); + return inferField(name, typeRef, ctx); + } + public static String inferTypeName(TypeRef token) { StringBuilder sb = new StringBuilder(); if (TypeUtils.ITERABLE_TYPE.isSupertypeOf(token)) { diff --git a/java/fory-format/src/main/java11/module-info.java b/java/fory-format/src/main/java11/module-info.java index 0f6064b8e9..82f6432366 100644 --- a/java/fory-format/src/main/java11/module-info.java +++ b/java/fory-format/src/main/java11/module-info.java @@ -24,6 +24,7 @@ requires static transitive org.apache.arrow.memory.core; requires static transitive org.apache.arrow.vector; + exports org.apache.fory.format.annotation; exports org.apache.fory.format.encoder; exports org.apache.fory.format.row; exports org.apache.fory.format.row.binary; diff --git a/java/fory-format/src/test/java/org/apache/fory/format/encoder/ImplementInterfaceTest.java b/java/fory-format/src/test/java/org/apache/fory/format/encoder/ImplementInterfaceTest.java index 448ea689d3..ca5960e6e3 100644 --- a/java/fory-format/src/test/java/org/apache/fory/format/encoder/ImplementInterfaceTest.java +++ b/java/fory-format/src/test/java/org/apache/fory/format/encoder/ImplementInterfaceTest.java @@ -20,7 +20,9 @@ package org.apache.fory.format.encoder; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalDouble; import java.util.OptionalInt; @@ -329,6 +331,41 @@ public void testListTooLazy() { Assert.assertEquals(deserializedBean.f1().get(0).f1(), 42); } + public interface MapOuter { + Map f1(); + } + + static class MapOuterImpl implements MapOuter { + private final Map f1; + + MapOuterImpl(final Map f1) { + this.f1 = f1; + } + + @Override + public Map f1() { + return f1; + } + } + + /** + * Interface bean as a map value. Type inference reaches the map value type through {@code + * isSupported}, which must recognize the interface as a synthesizable bean the same way it does + * for a direct field or list element. + */ + @Test + public void testMapValueInterface() { + final Map map = new HashMap<>(); + map.put("k", new ListInnerImpl(42)); + final MapOuter bean1 = new MapOuterImpl(map); + final RowEncoder encoder = Encoders.bean(MapOuter.class); + final BinaryRow row = encoder.toRow(bean1); + final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes()); + row.pointTo(buffer, 0, buffer.size()); + final MapOuter deserializedBean = encoder.fromRow(row); + Assert.assertEquals(deserializedBean.f1().get("k").f1(), 42); + } + public interface Value extends Comparable { int v(); diff --git a/java/fory-format/src/test/java/org/apache/fory/format/encoder/SchemaEvolutionStressTest.java b/java/fory-format/src/test/java/org/apache/fory/format/encoder/SchemaEvolutionStressTest.java new file mode 100644 index 0000000000..975e9eb24f --- /dev/null +++ b/java/fory-format/src/test/java/org/apache/fory/format/encoder/SchemaEvolutionStressTest.java @@ -0,0 +1,1317 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.encoder; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import lombok.Data; +import org.apache.fory.exception.ClassNotCompatibleException; +import org.apache.fory.format.annotation.ForySchema; +import org.apache.fory.format.annotation.ForyVersion; +import org.apache.fory.format.type.SchemaHistory; +import org.apache.fory.reflect.TypeRef; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * Stress tests for row-codec schema evolution. Each test probes a specific edge case; the names say + * what is being stressed. Tests that surfaced real bugs are kept with a note pointing at the fix; + * tests kept for coverage are short. + */ +public class SchemaEvolutionStressTest { + + // --------------------------------------------------------------------------- + // Long version chain: a field added at each version 1..5, plus a removal at v3. + // Verifies projection codecs are built and dispatched for every historical version. + // --------------------------------------------------------------------------- + + @Data + public static class ChainV1 { + private int a; // since 1 + } + + @Data + public static class ChainV2 { + private int a; + + @ForyVersion(since = 2) + private String b; + } + + @Data + public static class ChainV3 { + private int a; + + @ForyVersion(since = 2) + private String b; + + @ForyVersion(since = 3) + private long c; + } + + @Data + public static class ChainV4 { + private int a; + + @ForyVersion(since = 2) + private String b; + + @ForyVersion(since = 3) + private long c; + + @ForyVersion(since = 4) + private double d; + } + + /** + * v5 also removes the v1 'a' field starting at v5. The reader must therefore know about three + * different historical schemas: v1, v2-3, and v4 (since 'a' is removed and a new field 'e' shows + * up in v5; 'a' removal makes v5 differ from v4). + */ + @Data + @ForySchema(removedFields = ChainV5.History.class) + public static class ChainV5 { + @ForyVersion(since = 2) + private String b; + + @ForyVersion(since = 3) + private long c; + + @ForyVersion(since = 4) + private double d; + + @ForyVersion(since = 5) + private boolean e; + + interface History { + @ForyVersion(until = 5) + int a(); + } + } + + @Test + public void longChainAllVersionsReadable() { + RowEncoder w1 = evolvingCodec(ChainV1.class); + RowEncoder w2 = evolvingCodec(ChainV2.class); + RowEncoder w3 = evolvingCodec(ChainV3.class); + RowEncoder w4 = evolvingCodec(ChainV4.class); + RowEncoder reader = evolvingCodec(ChainV5.class); + + ChainV1 v1 = new ChainV1(); + v1.setA(11); + ChainV2 v2 = new ChainV2(); + v2.setA(21); + v2.setB("two"); + ChainV3 v3 = new ChainV3(); + v3.setA(31); + v3.setB("three"); + v3.setC(333L); + ChainV4 v4 = new ChainV4(); + v4.setA(41); + v4.setB("four"); + v4.setC(444L); + v4.setD(4.4); + + ChainV5 out1 = reader.decode(w1.encode(v1)); + Assert.assertNull(out1.getB()); + Assert.assertEquals(out1.getC(), 0L); + Assert.assertEquals(out1.getD(), 0.0); + Assert.assertFalse(out1.isE()); + + ChainV5 out2 = reader.decode(w2.encode(v2)); + Assert.assertEquals(out2.getB(), "two"); + Assert.assertEquals(out2.getC(), 0L); + + ChainV5 out3 = reader.decode(w3.encode(v3)); + Assert.assertEquals(out3.getC(), 333L); + Assert.assertEquals(out3.getD(), 0.0); + + ChainV5 out4 = reader.decode(w4.encode(v4)); + Assert.assertEquals(out4.getB(), "four"); + Assert.assertEquals(out4.getC(), 444L); + Assert.assertEquals(out4.getD(), 4.4); + Assert.assertFalse(out4.isE()); + } + + // --------------------------------------------------------------------------- + // Compact format with alignment shuffle: v1 has only longs; v2 adds a byte. + // Compact sorts fields by alignment width so the v1 and v2 schemas have + // different physical orders, even though their logical field sets differ by + // only the added byte. + // --------------------------------------------------------------------------- + + @Data + public static class AlignV1 { + private long x; + private long y; + } + + @Data + public static class AlignV2 { + private long x; + private long y; + + @ForyVersion(since = 2) + private byte flag; + } + + @Test + public void compactAlignmentReshuffleAcrossVersions() { + RowEncoder writer = + Encoders.buildBeanCodec(AlignV1.class) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + RowEncoder reader = + Encoders.buildBeanCodec(AlignV2.class) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + AlignV1 in = new AlignV1(); + in.setX(11); + in.setY(22); + AlignV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getX(), 11); + Assert.assertEquals(out.getY(), 22); + Assert.assertEquals(out.getFlag(), (byte) 0); // primitive default + } + + // --------------------------------------------------------------------------- + // Boxed vs primitive default for an absent field. + // --------------------------------------------------------------------------- + + @Data + public static class DefaultsV1 { + private String name; + } + + @Data + public static class DefaultsV2 { + private String name; + + @ForyVersion(since = 2) + private int primitiveCount; // default 0 + + @ForyVersion(since = 2) + private Integer boxedCount; // default null + } + + @Test + public void primitiveAndBoxedDefaults() { + RowEncoder writer = evolvingCodec(DefaultsV1.class); + RowEncoder reader = evolvingCodec(DefaultsV2.class); + DefaultsV1 in = new DefaultsV1(); + in.setName("n"); + DefaultsV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getName(), "n"); + Assert.assertEquals(out.getPrimitiveCount(), 0); + Assert.assertNull(out.getBoxedCount()); + } + + // --------------------------------------------------------------------------- + // Disjoint-window false collision (regression). A field whose [since, until) + // window starts above the base version and ends below infinity leaves the + // pre-since and post-until boundaries with identical field sets. SchemaHistory + // must collapse those into one entry rather than flagging a false collision. + // --------------------------------------------------------------------------- + + @Data + @ForySchema(removedFields = GappedWindow.History.class) + public static class GappedWindow { + private String name; + + interface History { + @ForyVersion(since = 2, until = 4) + int oldField(); + } + } + + @Test + public void disjointWindowDoesNotFalseCollide() { + // Build alone is the assertion: the bug was an IllegalStateException at build time. + RowEncoder codec = evolvingCodec(GappedWindow.class); + GappedWindow in = new GappedWindow(); + in.setName("hi"); + Assert.assertEquals(codec.decode(codec.encode(in)).getName(), "hi"); + } + + // --------------------------------------------------------------------------- + // Removed field whose original type was a nested struct. The projection + // codec must skip the slot without trying to read or decode it. + // --------------------------------------------------------------------------- + + @Data + public static class StructRefV1 { + private String id; + private DefaultsV1 detail; // removed at v2 + } + + @Data + @ForySchema(removedFields = StructRefV2.History.class) + public static class StructRefV2 { + private String id; + + interface History { + @ForyVersion(until = 2) + DefaultsV1 detail(); + } + } + + @Test + public void removedNestedStructField() { + RowEncoder writer = evolvingCodec(StructRefV1.class); + RowEncoder reader = evolvingCodec(StructRefV2.class); + StructRefV1 in = new StructRefV1(); + in.setId("x"); + DefaultsV1 d = new DefaultsV1(); + d.setName("inner"); + in.setDetail(d); + StructRefV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getId(), "x"); + } + + // --------------------------------------------------------------------------- + // Removed collection-typed field. The history interface preserves the full + // parameterized type, so List and Map round-trip + // through the projection without losing element-type information. + // --------------------------------------------------------------------------- + + @Data + public static class CollectionsV1 { + private String id; + private List tags; // removed at v2 + private Map counters; // removed at v2 + } + + @Data + @ForySchema(removedFields = CollectionsV2.History.class) + public static class CollectionsV2 { + private String id; + + interface History { + @ForyVersion(until = 2) + List tags(); + + @ForyVersion(until = 2) + Map counters(); + } + } + + @Test + public void removedParameterizedCollectionFields() { + RowEncoder writer = evolvingCodec(CollectionsV1.class); + RowEncoder reader = evolvingCodec(CollectionsV2.class); + CollectionsV1 in = new CollectionsV1(); + in.setId("c"); + in.setTags(Arrays.asList("alpha", "beta")); + Map counters = new HashMap<>(); + counters.put("k1", 1L); + counters.put("k2", 2L); + in.setCounters(counters); + CollectionsV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getId(), "c"); + } + + // --------------------------------------------------------------------------- + // Same wire-name retyped across versions: 'tag' was int [1,3), then String [3,inf). + // --------------------------------------------------------------------------- + + @Data + public static class RetypeV1 { + private int tag; // present in v1, v2 + } + + @Data + @ForySchema(removedFields = RetypeV3.History.class) + public static class RetypeV3 { + @ForyVersion(since = 3) + private String tag; + + interface History { + @ForyVersion(until = 3) + int tag(); + } + } + + @Test + public void retypedSameNameAcrossVersions() { + RowEncoder writer = evolvingCodec(RetypeV1.class); + RowEncoder reader = evolvingCodec(RetypeV3.class); + RetypeV1 in = new RetypeV1(); + in.setTag(7); + RetypeV3 out = reader.decode(writer.encode(in)); + // The 'tag' on the wire was int and is dropped during projection; the v3 String 'tag' has + // no source in this payload so defaults to null. + Assert.assertNull(out.getTag()); + } + + // --------------------------------------------------------------------------- + // Wide schema (more than 64 fields) crossing the null-bitmap word boundary. + // --------------------------------------------------------------------------- + + @Data + public static class WideV1 { + private int f00, f01, f02, f03, f04, f05, f06, f07, f08, f09; + private int f10, f11, f12, f13, f14, f15, f16, f17, f18, f19; + private int f20, f21, f22, f23, f24, f25, f26, f27, f28, f29; + private int f30, f31, f32, f33, f34, f35, f36, f37, f38, f39; + private int f40, f41, f42, f43, f44, f45, f46, f47, f48, f49; + private int f50, f51, f52, f53, f54, f55, f56, f57, f58, f59; + private int f60, f61, f62, f63, f64, f65, f66, f67; + } + + @Data + public static class WideV2 { + private int f00, f01, f02, f03, f04, f05, f06, f07, f08, f09; + private int f10, f11, f12, f13, f14, f15, f16, f17, f18, f19; + private int f20, f21, f22, f23, f24, f25, f26, f27, f28, f29; + private int f30, f31, f32, f33, f34, f35, f36, f37, f38, f39; + private int f40, f41, f42, f43, f44, f45, f46, f47, f48, f49; + private int f50, f51, f52, f53, f54, f55, f56, f57, f58, f59; + private int f60, f61, f62, f63, f64, f65, f66, f67; + + @ForyVersion(since = 2) + private String extra; + } + + @Test + public void wideSchemaAcrossBitmapWord() { + RowEncoder writer = evolvingCodec(WideV1.class); + RowEncoder reader = evolvingCodec(WideV2.class); + WideV1 in = new WideV1(); + in.setF00(100); + in.setF63(163); + in.setF67(167); // past the first 64-bit bitmap word + WideV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getF00(), 100); + Assert.assertEquals(out.getF63(), 163); + Assert.assertEquals(out.getF67(), 167); + Assert.assertNull(out.getExtra()); + } + + // --------------------------------------------------------------------------- + // Many elements through a single projection codec: 100 elements written by the + // same older version must all decode correctly via the same projection codec, + // with each element's data preserved and no carry-over of state across slots. + // --------------------------------------------------------------------------- + + @Test + public void arrayManyElementsThroughOneProjection() { + ArrayEncoder> writer = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> reader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + List in = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + ChainV2 e = new ChainV2(); + e.setA(i); + e.setB("elem-" + i); + in.add(e); + } + List out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 100); + for (int i = 0; i < 100; i++) { + Assert.assertEquals(out.get(i).getB(), "elem-" + i); + Assert.assertEquals(out.get(i).getC(), 0L); + Assert.assertFalse(out.get(i).isE()); + } + } + + // --------------------------------------------------------------------------- + // Sanity: two readers for the same (class, history) co-exist without + // interfering. The two readers share the cached generated codec class (by + // design of the codec cache), so the test exercises whether + // BinaryRowEncoder's per-instance projection map and current-codec instance + // are correctly per-reader rather than accidentally shared. + // --------------------------------------------------------------------------- + + @Test + public void twoIndependentReadersForSameClass() { + RowEncoder writer = evolvingCodec(DefaultsV1.class); + RowEncoder r1 = evolvingCodec(DefaultsV2.class); + RowEncoder r2 = evolvingCodec(DefaultsV2.class); + DefaultsV1 in1 = new DefaultsV1(); + in1.setName("first"); + DefaultsV1 in2 = new DefaultsV1(); + in2.setName("second"); + byte[] b1 = writer.encode(in1); + byte[] b2 = writer.encode(in2); + Assert.assertEquals(r1.decode(b1).getName(), "first"); + Assert.assertEquals(r2.decode(b2).getName(), "second"); + Assert.assertEquals(r1.decode(b2).getName(), "second"); + Assert.assertEquals(r2.decode(b1).getName(), "first"); + } + + // --------------------------------------------------------------------------- + // Schema-history misconfiguration: overlapping windows for the same name + // must fail builder construction, not at first bad payload. + // --------------------------------------------------------------------------- + + @Data + @ForySchema(removedFields = OverlapMisconfig.History.class) + public static class OverlapMisconfig { + // Live field 'x' since 1 (default) collides with the removed window [1, 5). + private int x; + + interface History { + @ForyVersion(since = 1, until = 5) + int x(); + } + } + + @Test(expectedExceptions = IllegalStateException.class) + public void overlappingWindowFailsAtBuild() { + evolvingCodec(OverlapMisconfig.class); + } + + // --------------------------------------------------------------------------- + // A removed-field history declaration must carry a well-formed @ForyVersion. + // Each misconfiguration fails at build with a message that names the offending + // declaration, so the user can fix the annotation rather than chase a decode error. + // --------------------------------------------------------------------------- + + @Data + @ForySchema(removedFields = MissingAnnotation.History.class) + public static class MissingAnnotation { + private int x; + + interface History { + // No @ForyVersion: a removed field has no [since, until) window without it. + String legacy(); + } + } + + @Data + @ForySchema(removedFields = MissingUntil.History.class) + public static class MissingUntil { + private int x; + + interface History { + @ForyVersion(since = 2) + String legacy(); + } + } + + @Data + @ForySchema(removedFields = EmptyWindow.History.class) + public static class EmptyWindow { + private int x; + + interface History { + @ForyVersion(since = 5, until = 5) + String legacy(); + } + } + + @Test + public void removedFieldWithoutForyVersionFailsAtBuild() { + IllegalStateException e = + Assert.expectThrows( + IllegalStateException.class, + () -> + Encoders.buildBeanCodec(MissingAnnotation.class) + .withSchemaEvolution() + .build() + .get()); + Assert.assertTrue(e.getMessage().contains("requires a @ForyVersion"), e.getMessage()); + } + + @Test + public void removedFieldWithoutUntilFailsAtBuild() { + IllegalStateException e = + Assert.expectThrows(IllegalStateException.class, () -> evolvingCodec(MissingUntil.class)); + Assert.assertTrue(e.getMessage().contains("must specify @ForyVersion.until"), e.getMessage()); + } + + @Test + public void removedFieldEmptyWindowFailsAtBuild() { + IllegalStateException e = + Assert.expectThrows(IllegalStateException.class, () -> evolvingCodec(EmptyWindow.class)); + Assert.assertTrue(e.getMessage().contains("must be strictly less than until"), e.getMessage()); + } + + /** A still-present field carrying a finite until; removals belong on the history class. */ + @Data + public static class LiveFieldWithUntil { + private int x; + + @ForyVersion(until = 3) + private String stillHere; + } + + @Test + public void liveFieldWithUntilFailsAtBuild() { + IllegalStateException e = + Assert.expectThrows( + IllegalStateException.class, () -> evolvingCodec(LiveFieldWithUntil.class)); + Assert.assertTrue(e.getMessage().contains("live field must not set until"), e.getMessage()); + } + + /** A since below the first version adds a schema version no writer can emit. */ + @Data + public static class LiveFieldSinceBelowFirst { + private int x; + + @ForyVersion(since = 0) + private String added; + } + + @Test + public void liveFieldSinceBelowFirstFailsAtBuild() { + IllegalStateException e = + Assert.expectThrows( + IllegalStateException.class, () -> evolvingCodec(LiveFieldSinceBelowFirst.class)); + Assert.assertTrue(e.getMessage().contains("must be >= 1"), e.getMessage()); + } + + @Data + @ForySchema(removedFields = RemovedFieldSinceBelowFirst.History.class) + public static class RemovedFieldSinceBelowFirst { + private int x; + + interface History { + @ForyVersion(since = 0, until = 3) + String legacy(); + } + } + + @Test + public void removedFieldSinceBelowFirstFailsAtBuild() { + IllegalStateException e = + Assert.expectThrows( + IllegalStateException.class, () -> evolvingCodec(RemovedFieldSinceBelowFirst.class)); + Assert.assertTrue(e.getMessage().contains("must be >= 1"), e.getMessage()); + } + + // --------------------------------------------------------------------------- + // A field whose type is a Collection subclass that shadows a field name across + // its own hierarchy. The row format encodes it through the iterable branch and + // never introspects it as a bean, so it round-trips fine. SchemaHistory must + // apply the same iterable/map/bean classification before introspecting a nested + // field type; otherwise it calls Descriptor.getDescriptors on the shadowed + // collection class and fails the whole history build on a bean that works. + // --------------------------------------------------------------------------- + + public static class TaggedListBase extends ArrayList { + protected String marker; + } + + // Shadows TaggedListBase.marker, which makes Descriptor.getDescriptors reject + // this class even though the codec treats it purely as a List. + public static class TaggedList extends TaggedListBase { + protected String marker; + } + + @Data + public static class ShadowedCollectionV2 { + private TaggedList labels; + + @ForyVersion(since = 2) + private String tag; + } + + @Test + public void versionedBeanWithShadowedCollectionFieldBuilds() { + RowEncoder codec = evolvingCodec(ShadowedCollectionV2.class); + ShadowedCollectionV2 in = new ShadowedCollectionV2(); + TaggedList labels = new TaggedList<>(); + labels.add("a"); + labels.add("b"); + in.setLabels(labels); + in.setTag("t"); + ShadowedCollectionV2 out = codec.decode(codec.encode(in)); + Assert.assertEquals(out.getLabels(), Arrays.asList("a", "b")); + Assert.assertEquals(out.getTag(), "t"); + } + + // --------------------------------------------------------------------------- + // Roundtrip a List field nested inside a versioned outer record. + // Verifies the projection codec generated for the outer correctly handles + // an inline list of plain beans whose layout is fixed. + // --------------------------------------------------------------------------- + + @Data + public static class NestedListV1 { + private List items; + } + + @Data + public static class NestedListV2 { + private List items; + + @ForyVersion(since = 2) + private String tag; + } + + // --------------------------------------------------------------------------- + // Evolution flag asymmetry: same class, one side opt-in, the other not. + // Documented as wire-incompatible. Verify the failure mode is a clear + // ClassNotCompatibleException, not silent garbage. + // --------------------------------------------------------------------------- + + @Test + public void evolutionFlagAsymmetryFailsLoud() { + RowEncoder withFlag = evolvingCodec(DefaultsV1.class); + RowEncoder noFlag = Encoders.buildBeanCodec(DefaultsV1.class).build().get(); + DefaultsV1 in = new DefaultsV1(); + in.setName("hi"); + byte[] withFlagBytes = withFlag.encode(in); + try { + noFlag.decode(withFlagBytes); + Assert.fail("expected ClassNotCompatibleException"); + } catch (ClassNotCompatibleException expected) { + // ok + } + byte[] noFlagBytes = noFlag.encode(in); + try { + withFlag.decode(noFlagBytes); + Assert.fail("expected ClassNotCompatibleException"); + } catch (ClassNotCompatibleException expected) { + // ok + } + } + + @Test + public void evolutionFlagAsymmetryFailsLoud_array() { + ArrayEncoder> withFlag = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> noFlag = + Encoders.buildArrayCodec(new TypeRef>() {}).build().get(); + DefaultsV1 v = new DefaultsV1(); + v.setName("hi"); + List in = Arrays.asList(v); + // Evolution-on consumer reading evolution-off bytes: the absent strict-hash prefix is read + // out of the array header and produces a hash mismatch. + byte[] noFlagBytes = noFlag.encode(in); + try { + withFlag.decode(noFlagBytes); + Assert.fail("expected ClassNotCompatibleException"); + } catch (ClassNotCompatibleException expected) { + // ok + } + // Evolution-off consumer reading evolution-on bytes: the 8-byte hash prefix bleeds into the + // array header. We cannot guarantee a clean failure mode without a wire-format-level flag, + // but we at least require the decode to throw rather than silently return a plausible-looking + // array. Documented as wire-incompatible in the user guide; mismatched producers/consumers + // must use the same flag. + byte[] withFlagBytes = withFlag.encode(in); + try { + List out = noFlag.decode(withFlagBytes); + // If decode returned, sanity-check it didn't silently produce a "correct" result. The + // array length and the recovered string must not both look right. + boolean lengthLooksRight = out != null && out.size() == in.size(); + boolean stringLooksRight = + lengthLooksRight && !out.isEmpty() && "hi".equals(out.get(0).getName()); + Assert.assertFalse( + lengthLooksRight && stringLooksRight, + "evolution-off decoder silently accepted evolution-on bytes as a valid array"); + } catch (RuntimeException | AssertionError expected) { + // ok — undefined behavior, but a thrown exception is a tolerable failure mode. + } + } + + @Test + public void evolutionFlagAsymmetryFailsLoud_map() { + MapEncoder> withFlag = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> noFlag = + Encoders.buildMapCodec(new TypeRef>() {}).build().get(); + DefaultsV1 v = new DefaultsV1(); + v.setName("hi"); + LinkedHashMap in = new LinkedHashMap<>(); + in.put("k", v); + // Evolution-on consumer reading evolution-off bytes: clean hash mismatch. + byte[] noFlagBytes = noFlag.encode(in); + try { + withFlag.decode(noFlagBytes); + Assert.fail("expected ClassNotCompatibleException"); + } catch (ClassNotCompatibleException expected) { + // ok + } + // Reverse direction: see the array test above for the rationale. Require a thrown exception + // or a value that is observably wrong. + byte[] withFlagBytes = withFlag.encode(in); + try { + Map out = noFlag.decode(withFlagBytes); + boolean sizeLooksRight = out != null && out.size() == in.size(); + boolean valueLooksRight = + sizeLooksRight + && out.containsKey("k") + && out.get("k") != null + && "hi".equals(out.get("k").getName()); + Assert.assertFalse( + sizeLooksRight && valueLooksRight, + "evolution-off decoder silently accepted evolution-on bytes as a valid map"); + } catch (RuntimeException | AssertionError expected) { + // ok — undefined behavior, but a thrown exception is a tolerable failure mode. + } + } + + // --------------------------------------------------------------------------- + // Map with a versioned bean as the KEY (rare; documented as not dispatched). + // Verify the codec at least builds and the current-version round-trip works, + // confirming the documented behavior doesn't crash. + // --------------------------------------------------------------------------- + + @Test + public void mapWithVersionedKey() { + MapEncoder> codec = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + DefaultsV2 k = new DefaultsV2(); + k.setName("k"); + k.setPrimitiveCount(1); + k.setBoxedCount(2); + Map in = new HashMap<>(); + in.put(k, "v"); + Map out = codec.decode(codec.encode(in)); + Assert.assertEquals(out.size(), 1); + DefaultsV2 outKey = out.keySet().iterator().next(); + Assert.assertEquals(outKey.getName(), "k"); + Assert.assertEquals(outKey.getPrimitiveCount(), 1); + Assert.assertEquals(outKey.getBoxedCount(), Integer.valueOf(2)); + } + + // A top-level map whose value evolves while the key stays a struct bean. The value projects from + // an older version; the key (same shape on both sides) must round-trip unchanged. The map codec + // only applies the value's projection suffix to the value position (MapEncoderBuilder scopes + // nestedBeanSuffix to inValuePosition), so the key bean is always decoded at its current schema. + @Test + public void mapStructKeyValueEvolution() { + MapEncoder> writer = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> reader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + DefaultsV2 key = new DefaultsV2(); + key.setName("k"); + key.setPrimitiveCount(7); + key.setBoxedCount(8); + DefaultsV1 val = new DefaultsV1(); + val.setName("val"); + Map in = new HashMap<>(); + in.put(key, val); + Map out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 1); + Map.Entry entry = out.entrySet().iterator().next(); + Assert.assertEquals(entry.getKey().getName(), "k"); + Assert.assertEquals(entry.getKey().getPrimitiveCount(), 7); + Assert.assertEquals(entry.getKey().getBoxedCount(), Integer.valueOf(8)); + Assert.assertEquals(entry.getValue().getName(), "val"); + Assert.assertEquals(entry.getValue().getPrimitiveCount(), 0); + Assert.assertNull(entry.getValue().getBoxedCount()); + } + + // A row field typed as Map. findVersionedBean must not treat the map + // key + // as a version dimension: keys carry no per-payload hash and are read with the current schema, so + // enumerating key versions would only generate projection codecs decode never dispatches to. The + // outer bean still evolves on its own fields; the keyed map round-trips with the key at current. + @Data + public static class KeyMapHolderV1 { + private Map byKey; + } + + @Data + public static class KeyMapHolderV2 { + private Map byKey; + + @ForyVersion(since = 2) + private String note; + } + + @Test + public void versionedBeanAsMapKeyInRowField() { + RowEncoder writer = evolvingCodec(KeyMapHolderV1.class); + RowEncoder reader = evolvingCodec(KeyMapHolderV2.class); + DefaultsV2 key = new DefaultsV2(); + key.setName("k"); + key.setPrimitiveCount(7); + key.setBoxedCount(8); + KeyMapHolderV1 in = new KeyMapHolderV1(); + in.setByKey(new HashMap<>()); + in.getByKey().put(key, "v"); + KeyMapHolderV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getByKey().size(), 1); + DefaultsV2 outKey = out.getByKey().keySet().iterator().next(); + Assert.assertEquals(outKey.getName(), "k"); + Assert.assertEquals(outKey.getPrimitiveCount(), 7); + Assert.assertEquals(out.getByKey().get(outKey), "v"); + Assert.assertNull(out.getNote()); // note added at v2; v1 payload defaults it + } + + // --------------------------------------------------------------------------- + // Removed nullable struct that was null on the wire: the v1 writer leaves + // the slot's null bit set; the v2 reader skips the slot during projection. + // --------------------------------------------------------------------------- + + @Data + public static class NullableStructV1 { + private String id; + private DefaultsV1 detail; // nullable, removed at v2 + } + + @Data + @ForySchema(removedFields = NullableStructV2.History.class) + public static class NullableStructV2 { + private String id; + + interface History { + @ForyVersion(until = 2) + DefaultsV1 detail(); + } + } + + @Test + public void removedNullableStructWasNullOnWire() { + RowEncoder writer = evolvingCodec(NullableStructV1.class); + RowEncoder reader = evolvingCodec(NullableStructV2.class); + NullableStructV1 in = new NullableStructV1(); + in.setId("only-id"); + // detail intentionally left null + NullableStructV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getId(), "only-id"); + } + + // --------------------------------------------------------------------------- + // Builder method ordering: compactEncoding() before vs after withSchemaEvolution() + // must produce equivalent codecs. + // --------------------------------------------------------------------------- + + @Test + public void builderMethodOrderingIsCommutative() { + RowEncoder w = + Encoders.buildBeanCodec(DefaultsV1.class) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + RowEncoder rOrderA = + Encoders.buildBeanCodec(DefaultsV2.class) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + RowEncoder rOrderB = + Encoders.buildBeanCodec(DefaultsV2.class) + .withSchemaEvolution() + .compactEncoding() + .build() + .get(); + DefaultsV1 in = new DefaultsV1(); + in.setName("commute"); + byte[] bytes = w.encode(in); + Assert.assertEquals(rOrderA.decode(bytes).getName(), "commute"); + Assert.assertEquals(rOrderB.decode(bytes).getName(), "commute"); + } + + @Test + public void nestedListSurvivesOuterProjection() { + RowEncoder writer = evolvingCodec(NestedListV1.class); + RowEncoder reader = evolvingCodec(NestedListV2.class); + DefaultsV1 a = new DefaultsV1(); + a.setName("a"); + DefaultsV1 b = new DefaultsV1(); + b.setName("b"); + NestedListV1 in = new NestedListV1(); + in.setItems(Arrays.asList(a, b)); + NestedListV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getItems().size(), 2); + Assert.assertEquals(out.getItems().get(0).getName(), "a"); + Assert.assertEquals(out.getItems().get(1).getName(), "b"); + Assert.assertNull(out.getTag()); + } + + // --------------------------------------------------------------------------- + // Nested versioned bean: a parent bean with a struct field whose own type is + // versioned independently. The wire layout for the inner struct is inline in + // the parent's bytes with no per-inner hash. The reader, dispatching on the + // parent's strict hash, needs to choose an inner schema consistent with what + // the writer used. + // --------------------------------------------------------------------------- + + /** Stand-in for "older code that wrote the inner struct without field x". */ + @Data + public static class NestedInnerWriter { + private String name; + } + + /** Stand-in for "older code that wrote the outer containing NestedInnerWriter". */ + @Data + public static class NestedOuterWriter { + private long id; + private NestedInnerWriter inner; + } + + /** Newer inner with an added field at v2. */ + @Data + public static class NestedInnerV2 { + private String name; + + @ForyVersion(since = 2) + private String addedField; + } + + /** Newer outer that still has just (id, inner) but its inner type evolved. */ + @Data + public static class NestedOuterV2 { + private long id; + private NestedInnerV2 inner; + } + + @Test + public void nestedInnerEvolution_readerInnerNewerThanWriter() { + // Writer uses the "older shape" inner. Both writer and reader are evolution-on so they + // agree on strict-hash framing. + RowEncoder writer = evolvingCodec(NestedOuterWriter.class); + RowEncoder reader = evolvingCodec(NestedOuterV2.class); + + NestedOuterWriter in = new NestedOuterWriter(); + in.setId(42); + NestedInnerWriter inn = new NestedInnerWriter(); + inn.setName("hello"); + in.setInner(inn); + + byte[] bytes = writer.encode(in); + NestedOuterV2 out = reader.decode(bytes); + Assert.assertEquals(out.getId(), 42); + Assert.assertNotNull(out.getInner()); + Assert.assertEquals(out.getInner().getName(), "hello"); + Assert.assertNull(out.getInner().getAddedField()); + } + + // --------------------------------------------------------------------------- + // Outer + inner versioned independently. The cross-product enumeration must + // generate a projection codec for each (outer-version, inner-version) pair + // that isn't the current combination. + // --------------------------------------------------------------------------- + + /** Outer with its own added field at v2; inner stays at v1. */ + @Data + public static class CrossOuterV2_InnerV1 { + private long id; + private NestedInnerWriter inner; + + @ForyVersion(since = 2) + private String label; + } + + /** Outer v2 reader with inner evolved to v2. Both dimensions evolve independently. */ + @Data + public static class CrossOuterV2_InnerV2 { + private long id; + private NestedInnerV2 inner; + + @ForyVersion(since = 2) + private String label; + } + + @Test + public void crossOuterAndInnerEvolution() { + // Writer writes outer V1 + inner V1 (no label, no addedField). + RowEncoder writer = evolvingCodec(NestedOuterWriter.class); + RowEncoder reader = evolvingCodec(CrossOuterV2_InnerV2.class); + + NestedOuterWriter in = new NestedOuterWriter(); + in.setId(100); + NestedInnerWriter inn = new NestedInnerWriter(); + inn.setName("legacy-inner"); + in.setInner(inn); + + byte[] bytes = writer.encode(in); + CrossOuterV2_InnerV2 out = reader.decode(bytes); + Assert.assertEquals(out.getId(), 100); + Assert.assertEquals(out.getInner().getName(), "legacy-inner"); + Assert.assertNull(out.getInner().getAddedField()); + Assert.assertNull(out.getLabel()); + } + + /** + * Contract: {@code SchemaHistory.current().nestedBeanSchemas()} must report each nested bean at + * its current entry. Two cross-product combinations canonicalizing to the same signature is rare + * today (the inner's own bySignature collapses wire-equal schemas before the outer sees them) but + * the contract is documented and future callers may rely on it. + */ + @Test + public void schemaHistoryCurrentReflectsCurrentInnerVersions() { + SchemaHistory history = + SchemaHistory.build( + CrossOuterV2_InnerV2.class, java.util.function.UnaryOperator.identity()); + SchemaHistory.VersionedSchema current = history.current(); + Assert.assertTrue(current.isCurrent(), "history.current() must be marked current"); + for (Map.Entry, SchemaHistory.VersionedSchema> e : + current.nestedBeanSchemas().entrySet()) { + SchemaHistory innerHistory = + SchemaHistory.build(e.getKey(), java.util.function.UnaryOperator.identity()); + Assert.assertTrue( + e.getValue().isCurrent(), + "current().nestedBeanSchemas() must report inner " + e.getKey() + " at its current"); + Assert.assertEquals( + e.getValue().version(), + innerHistory.current().version(), + "inner current version mismatch for " + e.getKey()); + } + } + + // --------------------------------------------------------------------------- + // Cross-product enumeration must route inner-bean versions through array and + // map projection codecs, not just through the row codec. The reader's outer + // type has N outer versions x M inner versions; multiple cross-product entries + // share an outer version number, so the per-class suffix must encode the + // inner version to keep them from colliding on the codegen cache. + // --------------------------------------------------------------------------- + + @Test + public void crossOuterAndInnerEvolution_array() { + ArrayEncoder> writer = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> reader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + + List in = new ArrayList<>(); + for (int i = 0; i < 3; i++) { + NestedOuterWriter e = new NestedOuterWriter(); + e.setId(i); + NestedInnerWriter inn = new NestedInnerWriter(); + inn.setName("legacy-" + i); + e.setInner(inn); + in.add(e); + } + + List out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 3); + for (int i = 0; i < 3; i++) { + Assert.assertEquals(out.get(i).getId(), i); + Assert.assertEquals(out.get(i).getInner().getName(), "legacy-" + i); + Assert.assertNull(out.get(i).getInner().getAddedField()); + Assert.assertNull(out.get(i).getLabel()); + } + } + + @Test + public void crossOuterAndInnerEvolution_map() { + MapEncoder> writer = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> reader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + + LinkedHashMap in = new LinkedHashMap<>(); + for (int i = 0; i < 3; i++) { + NestedOuterWriter e = new NestedOuterWriter(); + e.setId(i); + NestedInnerWriter inn = new NestedInnerWriter(); + inn.setName("legacy-" + i); + e.setInner(inn); + in.put("k" + i, e); + } + + Map out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.size(), 3); + for (int i = 0; i < 3; i++) { + CrossOuterV2_InnerV2 v = out.get("k" + i); + Assert.assertNotNull(v, "missing key k" + i); + Assert.assertEquals(v.getId(), i); + Assert.assertEquals(v.getInner().getName(), "legacy-" + i); + Assert.assertNull(v.getInner().getAddedField()); + Assert.assertNull(v.getLabel()); + } + } + + // --------------------------------------------------------------------------- + // Under evolution, array/map payloads carry an 8-byte schema-hash prefix. A + // payload too small to hold that prefix is malformed and must fail loudly + // rather than feed a negative size into pointTo. + // --------------------------------------------------------------------------- + + @Test + public void arrayPayloadBelowHashPrefixFailsLoudly() { + ArrayEncoder> codec = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + Assert.expectThrows(ClassNotCompatibleException.class, () -> codec.decode(new byte[3])); + } + + @Test + public void mapPayloadBelowHashPrefixFailsLoudly() { + MapEncoder> codec = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + Assert.expectThrows(ClassNotCompatibleException.class, () -> codec.decode(new byte[3])); + } + + // --------------------------------------------------------------------------- + // Three-level nesting: L1 -> L2 -> L3, each independently versioned. Because + // L2's own history cross-products over L3's versions, L2's history holds two + // entries that share a version number but differ in their L3 layout. Routing + // must pick the L2 entry whose L3 matches the writer, not the first one with a + // matching version number. Identifies the inner combination by strict hash, so + // it resolves the correct subtree to arbitrary depth. + // --------------------------------------------------------------------------- + + @Data + public static class L3Writer { + private String name; + } + + @Data + public static class L2Writer { + private long tag; + private L3Writer leaf; + } + + @Data + public static class L1Writer { + private long id; + private L2Writer mid; + } + + @Data + public static class L3V2 { + private String name; + + @ForyVersion(since = 2) + private String note; + } + + @Data + public static class L2V2 { + private long tag; + private L3V2 leaf; + + @ForyVersion(since = 2) + private String midLabel; + } + + @Data + public static class L1V2 { + private long id; + private L2V2 mid; + + @ForyVersion(since = 2) + private String outerLabel; + } + + @Test + public void threeLevelNestedEvolution() { + RowEncoder writer = evolvingCodec(L1Writer.class); + RowEncoder reader = evolvingCodec(L1V2.class); + + L1Writer in = new L1Writer(); + in.setId(7); + L2Writer mid = new L2Writer(); + mid.setTag(11); + L3Writer leaf = new L3Writer(); + leaf.setName("deep"); + mid.setLeaf(leaf); + in.setMid(mid); + + L1V2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getId(), 7); + Assert.assertNull(out.getOuterLabel()); + Assert.assertEquals(out.getMid().getTag(), 11); + Assert.assertNull(out.getMid().getMidLabel()); + Assert.assertEquals(out.getMid().getLeaf().getName(), "deep"); + Assert.assertNull(out.getMid().getLeaf().getNote()); + } + + // --------------------------------------------------------------------------- + // The same versioned bean class in two fields. A writer writes one definition + // of that class, so both fields are always at the same version on the wire; + // the enumeration carries one version dimension per class, not per field, so a + // class may back more than one slot. + // --------------------------------------------------------------------------- + + @Data + public static class TwoLeafWriter { + private L3Writer first; + private L3Writer second; + } + + @Data + public static class TwoLeafV2 { + private L3V2 first; + private L3V2 second; + } + + @Test + public void sameClassInTwoFields() { + RowEncoder writer = evolvingCodec(TwoLeafWriter.class); + RowEncoder reader = evolvingCodec(TwoLeafV2.class); + + TwoLeafWriter in = new TwoLeafWriter(); + L3Writer a = new L3Writer(); + a.setName("alpha"); + L3Writer b = new L3Writer(); + b.setName("beta"); + in.setFirst(a); + in.setSecond(b); + + TwoLeafV2 out = reader.decode(writer.encode(in)); + Assert.assertEquals(out.getFirst().getName(), "alpha"); + Assert.assertNull(out.getFirst().getNote()); + Assert.assertEquals(out.getSecond().getName(), "beta"); + Assert.assertNull(out.getSecond().getNote()); + } + + private static RowEncoder evolvingCodec(Class beanClass) { + return Encoders.buildBeanCodec(beanClass).withSchemaEvolution().build().get(); + } +} diff --git a/java/fory-format/src/test/java/org/apache/fory/format/encoder/SchemaEvolutionTest.java b/java/fory-format/src/test/java/org/apache/fory/format/encoder/SchemaEvolutionTest.java new file mode 100644 index 0000000000..c77b5fa518 --- /dev/null +++ b/java/fory-format/src/test/java/org/apache/fory/format/encoder/SchemaEvolutionTest.java @@ -0,0 +1,820 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.format.encoder; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import lombok.Data; +import org.apache.fory.format.annotation.ForySchema; +import org.apache.fory.format.annotation.ForyVersion; +import org.apache.fory.memory.MemoryBuffer; +import org.apache.fory.reflect.TypeRef; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class SchemaEvolutionTest { + + /** Original v1 bean: just a name and an age. */ + @Data + public static class PersonV1 { + private String name; + private int age; + } + + /** + * v2: added an email. The codec built against this class must still be able to read v1 payloads + * (email will default to null). + */ + @Data + public static class PersonV2 { + private String name; + private int age; + + @ForyVersion(since = 2) + private String email; + } + + /** + * v3: same as v2 with the age field removed. The codec built against this class must read v1 + * payloads (with age) and v2 payloads (with age + email). + */ + @Data + @ForySchema(removedFields = PersonV3.History.class) + public static class PersonV3 { + private String name; + + @ForyVersion(since = 2) + private String email; + + interface History { + @ForyVersion(until = 3) + int age(); + } + } + + /** Round-trip at the current version: writing PersonV2, reading PersonV2 with evolution on. */ + @Test + public void currentVersionRoundTrip() { + RowEncoder codec = evolvingCodec(PersonV2.class); + PersonV2 in = new PersonV2(); + in.setName("alice"); + in.setAge(30); + in.setEmail("alice@example.com"); + byte[] bytes = codec.encode(in); + PersonV2 out = codec.decode(bytes); + Assert.assertEquals(out.getName(), "alice"); + Assert.assertEquals(out.getAge(), 30); + Assert.assertEquals(out.getEmail(), "alice@example.com"); + } + + /** + * The crux: a payload produced by PersonV1 (literally a different Java class with the v1-shaped + * schema) decoded by PersonV2's evolution-enabled codec. We use PersonV1 as a stand-in for "what + * older code wrote." Both classes are encoded with schema evolution on so they share the + * strict-hash format; PersonV1's history is a single entry, and PersonV2's history contains both + * v1 (without email) and v2 (with email) entries that match PersonV1's single entry by hash. + */ + @Test + public void olderPayloadReadByNewerCodec() { + RowEncoder oldWriter = evolvingCodec(PersonV1.class); + RowEncoder newReader = evolvingCodec(PersonV2.class); + + PersonV1 in = new PersonV1(); + in.setName("alice"); + in.setAge(30); + byte[] bytes = oldWriter.encode(in); + + PersonV2 out = newReader.decode(bytes); + Assert.assertEquals(out.getName(), "alice"); + Assert.assertEquals(out.getAge(), 30); + Assert.assertNull(out.getEmail()); + } + + // --- Compact row format --- + + @Test + public void compactRowOlderPayloadReadByNewerCodec() { + RowEncoder oldWriter = + Encoders.buildBeanCodec(PersonV1.class) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + RowEncoder newReader = + Encoders.buildBeanCodec(PersonV2.class) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + PersonV1 in = new PersonV1(); + in.setName("bob"); + in.setAge(42); + byte[] bytes = oldWriter.encode(in); + PersonV2 out = newReader.decode(bytes); + Assert.assertEquals(out.getName(), "bob"); + Assert.assertEquals(out.getAge(), 42); + Assert.assertNull(out.getEmail()); + } + + /** + * The byte[] overloads use bytes.length for the body size; the MemoryBuffer overloads write and + * read an embedded int32 size prefix ahead of the 8-byte hash. That framing is a distinct code + * path, so exercise a projection hit (older payload, newer reader) through it. Two records are + * written into one buffer and read back in order to confirm the reader advances past each + * record's embedded size. + */ + @Test + public void streamingOlderPayloadReadByNewerCodec() { + RowEncoder oldWriter = evolvingCodec(PersonV1.class); + RowEncoder newReader = evolvingCodec(PersonV2.class); + + PersonV1 alice = new PersonV1(); + alice.setName("alice"); + alice.setAge(30); + PersonV1 bob = new PersonV1(); + bob.setName("bob"); + bob.setAge(42); + + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(32); + oldWriter.encode(buffer, alice); + oldWriter.encode(buffer, bob); + + PersonV2 outAlice = newReader.decode(buffer); + PersonV2 outBob = newReader.decode(buffer); + Assert.assertEquals(outAlice.getName(), "alice"); + Assert.assertEquals(outAlice.getAge(), 30); + Assert.assertNull(outAlice.getEmail()); + Assert.assertEquals(outBob.getName(), "bob"); + Assert.assertEquals(outBob.getAge(), 42); + Assert.assertNull(outBob.getEmail()); + } + + // --- Array of versioned beans --- + + @Test + public void arrayStandardOlderPayloadReadByNewerCodec() { + ArrayEncoder> oldWriter = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> newReader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + PersonV1 a = new PersonV1(); + a.setName("alice"); + a.setAge(30); + PersonV1 b = new PersonV1(); + b.setName("bob"); + b.setAge(42); + byte[] bytes = oldWriter.encode(Arrays.asList(a, b)); + List out = newReader.decode(bytes); + Assert.assertEquals(out.size(), 2); + Assert.assertEquals(out.get(0).getName(), "alice"); + Assert.assertEquals(out.get(0).getAge(), 30); + Assert.assertNull(out.get(0).getEmail()); + Assert.assertEquals(out.get(1).getName(), "bob"); + } + + @Test + public void arrayCompactOlderPayloadReadByNewerCodec() { + ArrayEncoder> oldWriter = + Encoders.buildArrayCodec(new TypeRef>() {}) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> newReader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + PersonV1 p = new PersonV1(); + p.setName("carol"); + p.setAge(25); + byte[] bytes = oldWriter.encode(Arrays.asList(p)); + List out = newReader.decode(bytes); + Assert.assertEquals(out.size(), 1); + Assert.assertEquals(out.get(0).getName(), "carol"); + Assert.assertEquals(out.get(0).getAge(), 25); + Assert.assertNull(out.get(0).getEmail()); + } + + // --- Map with versioned bean values --- + + @Test + public void mapStandardOlderPayloadReadByNewerCodec() { + MapEncoder> oldWriter = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder> newReader = + Encoders.buildMapCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + Map in = new HashMap<>(); + PersonV1 p = new PersonV1(); + p.setName("dave"); + p.setAge(40); + in.put("k1", p); + byte[] bytes = oldWriter.encode(in); + Map out = newReader.decode(bytes); + Assert.assertEquals(out.size(), 1); + Assert.assertEquals(out.get("k1").getName(), "dave"); + Assert.assertEquals(out.get("k1").getAge(), 40); + Assert.assertNull(out.get("k1").getEmail()); + } + + @Test + public void mapCompactOlderPayloadReadByNewerCodec() { + MapEncoder> oldWriter = + Encoders.buildMapCodec(new TypeRef>() {}) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + MapEncoder> newReader = + Encoders.buildMapCodec(new TypeRef>() {}) + .compactEncoding() + .withSchemaEvolution() + .build() + .get(); + Map in = new HashMap<>(); + PersonV1 p = new PersonV1(); + p.setName("eve"); + p.setAge(28); + in.put("k1", p); + byte[] bytes = oldWriter.encode(in); + Map out = newReader.decode(bytes); + Assert.assertEquals(out.get("k1").getName(), "eve"); + Assert.assertEquals(out.get("k1").getAge(), 28); + Assert.assertNull(out.get("k1").getEmail()); + } + + // --- Interface-typed beans --- + // + // The wire field name is derived from each interface's accessor method name (via + // lowerCamelToLowerUnderscore), so two interfaces that share the same accessor names produce + // the same wire layout. Use accessor-style getters consistently across versions. + + /** v1 interface: just name and age. */ + public interface PersonIfaceV1 { + String getName(); + + int getAge(); + } + + /** v2 interface: adds email. Same accessor naming so the wire field names match. */ + public interface PersonIfaceV2 { + String getName(); + + int getAge(); + + @ForyVersion(since = 2) + String getEmail(); + } + + @Test + public void interfaceOlderPayloadReadByNewerCodec() { + RowEncoder oldWriter = evolvingCodec(PersonIfaceV1.class); + RowEncoder newReader = evolvingCodec(PersonIfaceV2.class); + PersonIfaceV1 in = + new PersonIfaceV1() { + public String getName() { + return "alice"; + } + + public int getAge() { + return 30; + } + }; + byte[] bytes = oldWriter.encode(in); + PersonIfaceV2 out = newReader.decode(bytes); + Assert.assertEquals(out.getName(), "alice"); + Assert.assertEquals(out.getAge(), 30); + // email was added in v2; v1 payload has none. The interface proxy returns the default. + Assert.assertNull(out.getEmail()); + } + + /** + * v3 interface: name and email; age removed (only present in v1 and v2). The history interface + * declares the removed field's original signature; its method name follows the same JavaBeans + * accessor convention as the live interface, so {@code getAge()} maps to wire name {@code age}. + */ + @ForySchema(removedFields = PersonIfaceV3.History.class) + public interface PersonIfaceV3 { + String getName(); + + @ForyVersion(since = 2) + String getEmail(); + + interface History { + @ForyVersion(until = 3) + int getAge(); + } + } + + @Test + public void interfaceRemovedFieldReadByNewerCodec() { + RowEncoder v2Writer = evolvingCodec(PersonIfaceV2.class); + RowEncoder v3Reader = evolvingCodec(PersonIfaceV3.class); + PersonIfaceV2 in = + new PersonIfaceV2() { + public String getName() { + return "alice"; + } + + public int getAge() { + return 30; + } + + public String getEmail() { + return "alice@example.com"; + } + }; + byte[] bytes = v2Writer.encode(in); + PersonIfaceV3 out = v3Reader.decode(bytes); + Assert.assertEquals(out.getName(), "alice"); + Assert.assertEquals(out.getEmail(), "alice@example.com"); + } + + /** + * v1 writer interface: just a name. Used to produce a payload that the reader below projects to + * its v1 schema, where {@code score} is absent. + */ + public interface ScoredV1 { + String getName(); + } + + /** + * Current reader interface. {@code getScore()} is a live {@code since=2} accessor, so when a v1 + * payload is projected it is absent and gets a default-value body. {@code getScore(int)} is a + * parameterized overload sharing that name and return type. It is not an accessor — accessors + * take no arguments — so the projection proxy must throw for it rather than silence it into a + * default. Without the {@code parameterCount() != 0} guard in {@code isAccessorOfAbsentField}, it + * would match the absent {@code score} descriptor by name and return type and return {@code 0}. + */ + public interface ScoredV2 { + String getName(); + + @ForyVersion(since = 2) + int getScore(); + + int getScore(int seed); + } + + @Test + public void projectionNonAccessorOverloadStillThrows() { + RowEncoder v1Writer = evolvingCodec(ScoredV1.class); + RowEncoder reader = evolvingCodec(ScoredV2.class); + ScoredV1 in = () -> "alice"; + ScoredV2 out = reader.decode(v1Writer.encode(in)); + Assert.assertEquals(out.getName(), "alice"); + // score was added in v2; the v1 payload has none, so the no-arg accessor defaults to 0. + Assert.assertEquals(out.getScore(), 0); + try { + out.getScore(7); + Assert.fail( + "parameterized getScore is not an accessor and must not be silenced to a default"); + } catch (UnsupportedOperationException expected) { + // The projection proxy does not implement non-accessor methods. + } + } + + /** Removed-field test: v3 codec reads v2 payload, dropping the no-longer-present 'age'. */ + @Test + public void removedFieldReadByNewerCodec() { + RowEncoder v2Writer = evolvingCodec(PersonV2.class); + RowEncoder v3Reader = evolvingCodec(PersonV3.class); + + PersonV2 in = new PersonV2(); + in.setName("alice"); + in.setAge(30); + in.setEmail("alice@example.com"); + byte[] bytes = v2Writer.encode(in); + + PersonV3 out = v3Reader.decode(bytes); + Assert.assertEquals(out.getName(), "alice"); + Assert.assertEquals(out.getEmail(), "alice@example.com"); + } + + // --------------------------------------------------------------------------- + // Compositional test + // + // Outer mutable bean evolves v1 -> v2 (adds displayName, removes legacyName). + // The bean carries diverse nested data shapes that themselves do not evolve: + // a concrete struct, an interface-typed struct (lazy proxy), an inline list + // of structs, and an inline map. The test exercises one + // dispatch boundary (the outer codec, or the outer list codec) and verifies + // that the projected outer correctly carries every nested shape through. + // --------------------------------------------------------------------------- + + @Data + public static class Profile { + private String bio; + private int rating; + } + + /** Address is interface-typed; the row codec generates a lazy proxy for reads. */ + public interface Address { + String getStreet(); + + String getCity(); + } + + @Data + public static class Item { + private String name; + private long quantity; + } + + @Data + public static class OuterV1 { + private long id; + private String legacyName; + private Profile profile; + private Address address; + private List items; + private Map properties; + } + + /** + * OuterV2 adds {@code displayName} at version 2 and removes {@code legacyName} at version 2. + * Everything else carries forward unchanged. The compositional test writes an OuterV1 and reads + * as OuterV2. + */ + @Data + @ForySchema(removedFields = OuterV2.History.class) + public static class OuterV2 { + private long id; + + @ForyVersion(since = 2) + private String displayName; + + private Profile profile; + private Address address; + private List items; + private Map properties; + + interface History { + @ForyVersion(until = 2) + String legacyName(); + } + } + + private static OuterV1 sampleV1() { + OuterV1 in = new OuterV1(); + in.setId(7); + in.setLegacyName("retired"); + Profile p = new Profile(); + p.setBio("hello"); + p.setRating(5); + in.setProfile(p); + in.setAddress( + new Address() { + public String getStreet() { + return "1 Main"; + } + + public String getCity() { + return "Springfield"; + } + }); + Item a = new Item(); + a.setName("a"); + a.setQuantity(1); + Item b = new Item(); + b.setName("b"); + b.setQuantity(2); + in.setItems(Arrays.asList(a, b)); + Map props = new HashMap<>(); + props.put("k1", a); + props.put("k2", b); + in.setProperties(props); + return in; + } + + private static void assertProjectedToV2(OuterV2 out) { + Assert.assertEquals(out.getId(), 7); + Assert.assertNull(out.getDisplayName()); // added in v2, absent in v1 wire + Assert.assertEquals(out.getProfile().getBio(), "hello"); + Assert.assertEquals(out.getProfile().getRating(), 5); + Assert.assertEquals(out.getAddress().getStreet(), "1 Main"); + Assert.assertEquals(out.getAddress().getCity(), "Springfield"); + Assert.assertEquals(out.getItems().size(), 2); + Assert.assertEquals(out.getItems().get(0).getName(), "a"); + Assert.assertEquals(out.getItems().get(1).getQuantity(), 2); + Assert.assertEquals(out.getProperties().get("k1").getName(), "a"); + Assert.assertEquals(out.getProperties().get("k2").getQuantity(), 2); + } + + @Test + public void compositionalRowEvolution() { + RowEncoder writer = evolvingCodec(OuterV1.class); + RowEncoder reader = evolvingCodec(OuterV2.class); + byte[] bytes = writer.encode(sampleV1()); + assertProjectedToV2(reader.decode(bytes)); + } + + @Test + public void compositionalArrayEvolution() { + ArrayEncoder> writer = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder> reader = + Encoders.buildArrayCodec(new TypeRef>() {}) + .withSchemaEvolution() + .build() + .get(); + byte[] bytes = writer.encode(Arrays.asList(sampleV1(), sampleV1())); + List out = reader.decode(bytes); + Assert.assertEquals(out.size(), 2); + assertProjectedToV2(out.get(0)); + assertProjectedToV2(out.get(1)); + } + + // --------------------------------------------------------------------------- + // A versioned bean nested inside a collection field of the outer bean. The + // outer's SchemaHistory must look through the list/map wrapper to discover the + // inner bean and enumerate its versions, so an older payload (inner at v1) is + // projected into the newer reader (inner at v2). Without that, the outer has no + // projection matching the older inner layout and decode throws. + // --------------------------------------------------------------------------- + + @Data + public static class TagV1 { + private String key; + } + + @Data + public static class TagV2 { + private String key; + + @ForyVersion(since = 2) + private long weight; + } + + @Data + public static class CatalogV1 { + private String id; + private List tags; + private Map labels; + } + + @Data + public static class CatalogV2 { + private String id; + private List tags; + private Map labels; + } + + private static CatalogV1 sampleCatalog() { + CatalogV1 in = new CatalogV1(); + in.setId("c1"); + TagV1 a = new TagV1(); + a.setKey("alpha"); + TagV1 b = new TagV1(); + b.setKey("beta"); + in.setTags(Arrays.asList(a, b)); + Map labels = new HashMap<>(); + labels.put("k1", a); + in.setLabels(labels); + return in; + } + + @Test + public void evolvingBeanInCollectionField() { + RowEncoder writer = evolvingCodec(CatalogV1.class); + RowEncoder reader = evolvingCodec(CatalogV2.class); + CatalogV2 out = reader.decode(writer.encode(sampleCatalog())); + Assert.assertEquals(out.getId(), "c1"); + Assert.assertEquals(out.getTags().size(), 2); + Assert.assertEquals(out.getTags().get(0).getKey(), "alpha"); + Assert.assertEquals(out.getTags().get(1).getKey(), "beta"); + // weight was added at v2; the v1 payload has no source for it. + Assert.assertEquals(out.getTags().get(0).getWeight(), 0L); + Assert.assertEquals(out.getLabels().get("k1").getKey(), "alpha"); + } + + // --------------------------------------------------------------------------- + // A versioned *interface* bean nested inside an evolving outer bean. Interface + // beans are valid versioned row beans at the top level (see PersonIfaceV1/V2), + // so they must also be discovered when nested as a field type, a list element, + // or a map value. SchemaHistory.findVersionedBean has to recognize an interface + // the same way the top-level container path does (synthesizing the interface as + // a bean); otherwise the outer's cross-product never enumerates the inner's + // older versions, an older inner payload has no matching projection, and decode + // fails with a schema-hash mismatch (ClassNotCompatibleException). + // --------------------------------------------------------------------------- + + /** v1 interface bean: a single key accessor. */ + public interface SlugV1 { + String getKey(); + } + + /** v2 interface bean: adds a weight at version 2. Same accessor naming as v1. */ + public interface SlugV2 { + String getKey(); + + @ForyVersion(since = 2) + long getWeight(); + } + + @Data + public static class BoxV1 { + private String id; + private SlugV1 slug; + private List slugs; + private Map labels; + } + + @Data + public static class BoxV2 { + private String id; + private SlugV2 slug; + private List slugs; + private Map labels; + } + + private static SlugV1 slugV1(String key) { + return () -> key; + } + + @Test + public void evolvingInterfaceBeanNestedInOuterBean() { + RowEncoder writer = evolvingCodec(BoxV1.class); + RowEncoder reader = evolvingCodec(BoxV2.class); + + BoxV1 in = new BoxV1(); + in.setId("b1"); + in.setSlug(slugV1("direct")); + in.setSlugs(Arrays.asList(slugV1("alpha"), slugV1("beta"))); + Map labels = new HashMap<>(); + labels.put("k1", slugV1("gamma")); + in.setLabels(labels); + + BoxV2 out = reader.decode(writer.encode(in)); + + Assert.assertEquals(out.getId(), "b1"); + Assert.assertEquals(out.getSlug().getKey(), "direct"); + Assert.assertEquals(out.getSlugs().size(), 2); + Assert.assertEquals(out.getSlugs().get(0).getKey(), "alpha"); + Assert.assertEquals(out.getSlugs().get(1).getKey(), "beta"); + Assert.assertEquals(out.getLabels().get("k1").getKey(), "gamma"); + // weight was added at v2; the v1 payload has no source, so it defaults. + Assert.assertEquals(out.getSlug().getWeight(), 0L); + Assert.assertEquals(out.getSlugs().get(0).getWeight(), 0L); + Assert.assertEquals(out.getLabels().get("k1").getWeight(), 0L); + } + + // --- Versioned bean nested inside a top-level container's element/value --- + // + // A top-level array or map whose element/value is itself a collection of a versioned bean + // (List, Map<.., Person>) must still evolve. The versioned bean is reachable through + // the container element/value the same way SchemaHistory.findVersionedBean descends, so an + // older payload must decode under the newer codec rather than being read at a stale layout. + + @Test + public void mapOfListValueOlderPayloadReadByNewerCodec() { + MapEncoder>> oldWriter = + Encoders.buildMapCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder>> newReader = + Encoders.buildMapCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + Map> in = new HashMap<>(); + PersonV1 p = new PersonV1(); + p.setName("dave"); + p.setAge(40); + in.put("k1", Arrays.asList(p)); + byte[] bytes = oldWriter.encode(in); + Map> out = newReader.decode(bytes); + Assert.assertEquals(out.size(), 1); + PersonV2 read = out.get("k1").get(0); + Assert.assertEquals(read.getName(), "dave"); + Assert.assertEquals(read.getAge(), 40); + Assert.assertNull(read.getEmail()); + } + + @Test + public void arrayOfListElementOlderPayloadReadByNewerCodec() { + ArrayEncoder>> oldWriter = + Encoders.buildArrayCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + ArrayEncoder>> newReader = + Encoders.buildArrayCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + PersonV1 p = new PersonV1(); + p.setName("dave"); + p.setAge(40); + byte[] bytes = oldWriter.encode(Arrays.asList(Arrays.asList(p))); + List> out = newReader.decode(bytes); + Assert.assertEquals(out.size(), 1); + PersonV2 read = out.get(0).get(0); + Assert.assertEquals(read.getName(), "dave"); + Assert.assertEquals(read.getAge(), 40); + Assert.assertNull(read.getEmail()); + } + + /** Map value is itself a map of the versioned bean, exercising the map-wrapper projection. */ + @Test + public void mapOfMapValueOlderPayloadReadByNewerCodec() { + MapEncoder>> oldWriter = + Encoders.buildMapCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + MapEncoder>> newReader = + Encoders.buildMapCodec(new TypeRef>>() {}) + .withSchemaEvolution() + .build() + .get(); + PersonV1 p = new PersonV1(); + p.setName("dave"); + p.setAge(40); + Map inner = new HashMap<>(); + inner.put("inner", p); + Map> in = new HashMap<>(); + in.put("k1", inner); + Map> out = newReader.decode(oldWriter.encode(in)); + PersonV2 read = out.get("k1").get("inner"); + Assert.assertEquals(read.getName(), "dave"); + Assert.assertEquals(read.getAge(), 40); + Assert.assertNull(read.getEmail()); + } + + // --------------------------------------------------------------------------- + // Added reference-typed fields. Every other added-field test defaults a scalar + // (String/int/...); defaulting an added struct or collection slot is a distinct + // projection path. v2 adds a nested struct and a list of structs that are absent + // from the v1 wire, so both must read back as null. + // --------------------------------------------------------------------------- + + @Data + public static class HolderV1 { + private long id; + } + + @Data + public static class HolderV2 { + private long id; + + @ForyVersion(since = 2) + private Profile profile; + + @ForyVersion(since = 2) + private List items; + } + + @Test + public void addedReferenceFieldsDefaultToNull() { + RowEncoder writer = evolvingCodec(HolderV1.class); + RowEncoder reader = evolvingCodec(HolderV2.class); + + HolderV1 in = new HolderV1(); + in.setId(7); + HolderV2 out = reader.decode(writer.encode(in)); + + Assert.assertEquals(out.getId(), 7); + Assert.assertNull(out.getProfile()); + Assert.assertNull(out.getItems()); + } + + private static RowEncoder evolvingCodec(Class beanClass) { + return Encoders.buildBeanCodec(beanClass).withSchemaEvolution().build().get(); + } +} diff --git a/java/fory-latest-jdk-tests/src/test/java/org/apache/fory/integration_tests/RecordRowTest.java b/java/fory-latest-jdk-tests/src/test/java/org/apache/fory/integration_tests/RecordRowTest.java index 99c61c64ce..49d3674208 100644 --- a/java/fory-latest-jdk-tests/src/test/java/org/apache/fory/integration_tests/RecordRowTest.java +++ b/java/fory-latest-jdk-tests/src/test/java/org/apache/fory/integration_tests/RecordRowTest.java @@ -21,6 +21,8 @@ import java.time.Instant; import java.time.LocalDate; +import org.apache.fory.format.annotation.ForySchema; +import org.apache.fory.format.annotation.ForyVersion; import org.apache.fory.format.encoder.Encoders; import org.apache.fory.format.encoder.RowEncoder; import org.apache.fory.format.row.binary.BinaryRow; @@ -86,4 +88,82 @@ public void testRecordNestedInterface() { final TestRecordNestedInterface deserializedBean = encoder.fromRow(row); Assert.assertEquals(deserializedBean.f1().f1(), bean.f1().f1()); } + + // --------------------------------------------------------------------------- + // Records with schema evolution. @ForyVersion on a record component propagates + // to the backing field and the accessor (its FIELD/METHOD targets), where the + // codec reads it, so a newer reader record can pick up older payloads and + // default components added later. The history interface still works because the + // bean is a record: live component names match the wire field names (record + // short-style naming). + // --------------------------------------------------------------------------- + + public record PersonV1(String name, int age) {} + + @ForySchema(removedFields = PersonV2.History.class) + public record PersonV2(String name, @ForyVersion(since = 2) String email) { + interface History { + @ForyVersion(until = 2) + int age(); + } + } + + @Test + public void recordSchemaEvolution_readsOlderPayloads() { + RowEncoder writer = + Encoders.buildBeanCodec(PersonV1.class).withSchemaEvolution().build().get(); + RowEncoder reader = + Encoders.buildBeanCodec(PersonV2.class).withSchemaEvolution().build().get(); + PersonV2 out = reader.decode(writer.encode(new PersonV1("Luna", 7))); + Assert.assertEquals(out.name(), "Luna"); + Assert.assertNull(out.email()); + } + + @Test + public void recordSchemaEvolution_currentRoundTrip() { + RowEncoder codec = + Encoders.buildBeanCodec(PersonV2.class).withSchemaEvolution().build().get(); + PersonV2 in = new PersonV2("Mars", "mars@example.com"); + Assert.assertEquals(codec.decode(codec.encode(in)), in); + } + + /** Record with a primitive added at v2: an older payload must produce the primitive default. */ + public record CounterV1(String name) {} + + public record CounterV2(String name, @ForyVersion(since = 2) int count) {} + + @Test + public void recordSchemaEvolution_primitiveDefault() { + RowEncoder writer = + Encoders.buildBeanCodec(CounterV1.class).withSchemaEvolution().build().get(); + RowEncoder reader = + Encoders.buildBeanCodec(CounterV2.class).withSchemaEvolution().build().get(); + CounterV2 out = reader.decode(writer.encode(new CounterV1("Luna"))); + Assert.assertEquals(out.name(), "Luna"); + Assert.assertEquals(out.count(), 0); + } + + // A record component whose own type is a versioned record. The inner struct is + // inline in the outer's bytes with no per-inner hash, so the reader must pick an + // inner schema consistent with the outer's strict hash. This drives the nested + // cross-product enumeration with record-component field naming. + public record InnerV1(String name) {} + + public record InnerV2(String name, @ForyVersion(since = 2) String tag) {} + + public record OuterInnerV1(long id, InnerV1 inner) {} + + public record OuterInnerV2(long id, InnerV2 inner) {} + + @Test + public void recordSchemaEvolution_nestedRecordInnerNewerThanWriter() { + RowEncoder writer = + Encoders.buildBeanCodec(OuterInnerV1.class).withSchemaEvolution().build().get(); + RowEncoder reader = + Encoders.buildBeanCodec(OuterInnerV2.class).withSchemaEvolution().build().get(); + OuterInnerV2 out = reader.decode(writer.encode(new OuterInnerV1(42, new InnerV1("hello")))); + Assert.assertEquals(out.id(), 42); + Assert.assertEquals(out.inner().name(), "hello"); + Assert.assertNull(out.inner().tag()); + } }