Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
fa37f00
feat(format): schema evolution for the Java row codec
May 28, 2026
1595c0a
perf(format): one allocation per encode in evolution-enabled array/ma…
May 28, 2026
e4db3ac
test(format): add row-format allocation probe
May 28, 2026
594fd80
feat(format): dispatch nested versioned beans by recursive strict hash
May 28, 2026
1d6d526
fix(format): route inner-bean version through array/map projection co…
May 28, 2026
94e5033
test(format): cover producer/consumer flag asymmetry on array and map…
May 28, 2026
7fbfe09
fix(format): prefer all-current combination when SchemaHistory signat…
May 28, 2026
bf6225c
test(format): cover @ForyVersion on record components
May 28, 2026
a50a454
docs(format): clarify wire format and cross-product growth
May 28, 2026
286a844
docs(format): tighten row-format schema-evolution prose
May 28, 2026
d573042
fix(format): harden evolution decode and address review feedback
Jun 26, 2026
5935ae8
test(format): add JMH schema-evolution benchmark suite
Jun 26, 2026
ecb433c
style(format): apply spotless to row schema-evolution sources
Jun 26, 2026
d75320b
fix(format): route nested versioned beans by schema identity, enumera…
Jun 26, 2026
d1dfc4a
fix(format): harden row decode and trim allocations on the projection…
Jun 26, 2026
da1ddb0
fix(format): log array/map codec generation time in the unit it reports
Jun 26, 2026
dd913ab
perf(format): benchmark schema-evolution flag overhead on the current…
Jun 26, 2026
aba4338
fix(format): gate version-history bean probe behind isBean
Jun 26, 2026
bda509e
fix(format): keep inferField overloads contiguous to satisfy checkstyle
Jun 26, 2026
2b78362
fix(format): enumerate versioned beans nested inside collection fields
Jun 26, 2026
c7faca0
fix(format): drop RECORD_COMPONENT from @ForyVersion target for Java …
Jun 26, 2026
496951d
fix(format): reject finite @ForyVersion(until) on a live field
Jun 26, 2026
2c79f65
fix(format): stop enumerating map keys in versioned-bean cross-product
Jun 26, 2026
56a87f1
refactor(format): carry resolved schema in factory instead of mutatin…
Jun 26, 2026
c241f59
perf(format): precompute projection RowFactory once at build time
Jun 26, 2026
90d54fd
test(format): add disabled reproducer for map struct-key value-evolut…
Jun 26, 2026
67d919f
fix(format): decode map struct keys at current schema during value pr…
Jun 26, 2026
4b6f992
fix(format): evolve top-level array/map whose element/value wraps a v…
Jun 26, 2026
8f0f9c7
refactor(format): drop unreachable live-field since/until check
Jun 26, 2026
318943f
docs(format): note strict-hash dispatch is a 64-bit collision boundary
Jun 26, 2026
9113abb
test(format): cover added reference and collection field defaults on …
Jun 26, 2026
46f257e
fix(format): support interface beans as map values and discover them …
Jun 27, 2026
14484ad
docs(format): point bean-codec missing-registration error at register…
Jun 27, 2026
12a54fc
fix(format): throw for non-accessor methods colliding with absent pro…
Jun 27, 2026
02ed848
fix(format): warn on large projection cross-product and clarify decod…
Jun 27, 2026
b0c5ae6
fix(format): reject @ForyVersion(since) below the first schema version
Jun 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.fory.benchmark;

import java.util.Arrays;
import org.apache.fory.format.annotation.ForyVersion;
import org.apache.fory.format.encoder.Encoders;
import org.apache.fory.format.encoder.RowEncoder;
import org.apache.fory.logging.Logger;
import org.apache.fory.logging.LoggerFactory;
import org.openjdk.jmh.Main;
import org.openjdk.jmh.annotations.Benchmark;

/**
* Row-codec schema-evolution throughput and allocation. Pair with the JMH gc profiler ({@code -prof
* gc}) to read {@code gc.alloc.rate.norm} (bytes per op). Two comparisons matter: {@code
* currentDecode} vs {@code olderDecode} shows that decoding an older payload through a projection
* codec allocates no more than decoding the current schema, because each projection holds its
* historical schema's row layout (no per-decode rebuild); and the {@code *NoEvolution} benchmarks
* vs their evolution-on counterparts show the steady-state cost of enabling {@code
* withSchemaEvolution()} when reading and writing current-version data.
*/
public class SchemaEvolutionSuite {
private static final Logger LOG = LoggerFactory.getLogger(SchemaEvolutionSuite.class);

public static class PersonV1 {
String name;
int age;
}

public static class PersonV2 {
String name;
int age;

@ForyVersion(since = 2)
String email;
}

// Evolution-enabled codecs for the current (V2) schema; the V1 codec only produces a payload
// whose hash routes the V2 reader onto its projection path. Both standard and compact formats
// are measured: compact is where a per-projection cached row layout matters, so olderDecode vs
// currentDecode there is the parity check.
private static final RowEncoder<PersonV1> v1Codec =
Encoders.buildBeanCodec(PersonV1.class).withSchemaEvolution().build().get();
private static final RowEncoder<PersonV2> v2Codec =
Encoders.buildBeanCodec(PersonV2.class).withSchemaEvolution().build().get();
private static final RowEncoder<PersonV1> v1CompactCodec =
Encoders.buildBeanCodec(PersonV1.class).compactEncoding().withSchemaEvolution().build().get();
private static final RowEncoder<PersonV2> v2CompactCodec =
Encoders.buildBeanCodec(PersonV2.class).compactEncoding().withSchemaEvolution().build().get();

// Evolution-disabled codecs for the same current (V2) schema. Comparing the *NoEvolution
// benchmarks against their evolution-on counterparts isolates the steady-state cost of the
// withSchemaEvolution() flag on the common path (reading and writing current-version data): the
// 8-byte hash slot the evolution wire format adds, plus the hash compare on decode.
private static final RowEncoder<PersonV2> v2PlainCodec =
Encoders.buildBeanCodec(PersonV2.class).build().get();
private static final RowEncoder<PersonV2> v2PlainCompactCodec =
Encoders.buildBeanCodec(PersonV2.class).compactEncoding().build().get();

private static final PersonV2 person = newPerson();
private static final byte[] currentBytes = v2Codec.encode(person);
private static final byte[] olderBytes = v1Codec.encode(newPersonV1());
private static final byte[] currentCompactBytes = v2CompactCodec.encode(person);
private static final byte[] olderCompactBytes = v1CompactCodec.encode(newPersonV1());
private static final byte[] plainBytes = v2PlainCodec.encode(person);
private static final byte[] plainCompactBytes = v2PlainCompactCodec.encode(person);

private static PersonV2 newPerson() {
PersonV2 p = new PersonV2();
p.name = "Ada Lovelace";
p.age = 36;
p.email = "ada@example.com";
return p;
}

private static PersonV1 newPersonV1() {
PersonV1 p = new PersonV1();
p.name = "Ada Lovelace";
p.age = 36;
return p;
}

@Benchmark
public Object encode() {
return v2Codec.encode(person);
}

@Benchmark
public Object currentDecode() {
return v2Codec.decode(currentBytes);
}

@Benchmark
public Object olderDecode() {
return v2Codec.decode(olderBytes);
}

@Benchmark
public Object compactEncode() {
return v2CompactCodec.encode(person);
}

@Benchmark
public Object compactCurrentDecode() {
return v2CompactCodec.decode(currentCompactBytes);
}

@Benchmark
public Object compactOlderDecode() {
return v2CompactCodec.decode(olderCompactBytes);
}

// Evolution-off baselines for the current path. Pair each with its evolution-on counterpart
// (encode/currentDecode and the compact variants) to read the flag's overhead.
@Benchmark
public Object encodeNoEvolution() {
return v2PlainCodec.encode(person);
}

@Benchmark
public Object currentDecodeNoEvolution() {
return v2PlainCodec.decode(plainBytes);
}

@Benchmark
public Object compactEncodeNoEvolution() {
return v2PlainCompactCodec.encode(person);
}

@Benchmark
public Object compactCurrentDecodeNoEvolution() {
return v2PlainCompactCodec.decode(plainCompactBytes);
}

public static void main(String[] args) throws Exception {
if (args.length == 0) {
String commandLine =
"org.apache.fory.*SchemaEvolutionSuite.* -f 3 -wi 3 -i 3 -t 1 -w 2s -r 2s -prof gc -rf csv";
args = commandLine.split(" ");
}
LOG.info("command line: {}", Arrays.toString(args));
Main.main(args);
}
}
91 changes: 91 additions & 0 deletions docs/guide/java/row-format.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,97 @@ std::string str = bar10->get_string(0);
| Memory usage | Full object graph in memory | Only accessed fields |
| Suitable for | Small objects, full access | Large objects, selective access |

## Schema evolution
Comment thread
stevenschlansker marked this conversation as resolved.

Enable `.withSchemaEvolution()` on a row, array, or map codec builder to read payloads written
by older versions of the same bean. Writing always uses the current version; reading detects
the payload's version from a strict hash at the head of the payload. Java only.

Annotate fields added after v1 with `@ForyVersion(since = N)`:

```java
@Data
public class Person {
String name;
int age;

@ForyVersion(since = 2)
String email;
}
```

A v1 payload (with `name` and `age` only) decodes to a `Person` whose `email` is `null`.
Primitive fields added later default to `0`, `0.0`, or `false`. Unannotated fields are treated
as present from the first version, so a class can adopt versioning by annotating only the fields
added after v1.

Remove a field by deleting the Java member and declaring it on a nested history interface as a
method with a `@ForyVersion(until = N)`. The method's return type carries any parameterized
type information from the original field.

```java
@Data
@ForySchema(removedFields = Person.History.class)
public class Person {
String name;

@ForyVersion(since = 2)
String email;

interface History {
@ForyVersion(until = 3)
int age();

@ForyVersion(until = 5)
List<String> tags();
}
}
```

The history method name matches the original live descriptor name: the field name for Lombok
`@Data` or records (`age`, `tags`), or the full accessor name for JavaBeans-style classes and
interfaces (`getAge`).

### Wire format and limitations

Producers and consumers must agree on the `withSchemaEvolution()` flag — they are not
wire-compatible otherwise. Row payloads always carry an 8-byte hash slot; under evolution its
Comment thread
stevenschlansker marked this conversation as resolved.
value is the strict hash (which includes field name and nullability), so a flag-mismatched
peer fails loudly with `ClassNotCompatibleException`. Arrays and maps of bean elements prepend
an 8-byte strict-hash prefix under evolution and no prefix otherwise; an evolution-on consumer
reading evolution-off bytes also fails with `ClassNotCompatibleException`, but the reverse
direction (evolution-off consumer, evolution-on bytes) is undefined.

To adopt the flag on an existing deployment, enable `withSchemaEvolution()` on both sides in a
release that changes no schema, then start evolving schemas only once every peer is on the
evolution-enabled build. Turning the flag on and changing a schema in the same release strands
any peer that has not yet upgraded.

Cross-language consumers (Python, C++) cannot read evolution-enabled payloads.

A reader selects the matching layout from the 8-byte strict hash on the payload. The hash includes
field names and nullability and is checked for collisions across a bean's own versions when the
codec is built, but it is still a 64-bit value: a payload whose hash coincides with one of the
reader's historical layouts is decoded against that layout. This is the same hash-based dispatch
the row format has always used, so feeding a codec bytes it was not built for has undefined results
whether or not evolution is enabled. Only hand a codec payloads produced for the same bean.

Map keys do not carry a per-payload hash; a versioned bean used as a map key is read with the
current schema only, not dispatched to a projection codec.

Nested evolution works to arbitrary depth and places no restriction on shape: a versioned bean
may contain versioned beans that themselves contain versioned beans, the same versioned bean
class may back more than one field, and fields typed as a non-evolving bean, a list, or a map are
unrestricted. Each nesting level is routed to the correct historical layout.

When a versioned bean contains other versioned beans, the reader generates one projection codec
class per combination of versions across the composition. The count grows as the product of the
version counts of the distinct nested versioned bean classes, not the number of fields, so
reusing a class across several fields adds no combinations. If the product across distinct classes
becomes a concern, drop entries from each bean's `History` interface once you no longer need to
read payloads from that range. Retiring a history entry is purely a read-side decision; the writer
always uses the current schema.

## Related Topics

- [Xlang Serialization](xlang-serialization.md) - xlang mode
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -969,7 +969,11 @@ private static boolean isSupported(TypeRef<?> typeRef, TypeResolutionContext ctx
return false;
}
Tuple2<TypeRef<?>, TypeRef<?>> mapKeyValueType = getMapKeyValueType(typeRef);
return isSupported(mapKeyValueType.f0) && isSupported(mapKeyValueType.f1);
// Thread ctx through both key and value, matching the iterable branch above. The context-less
// isSupported overload resets synthesizeInterfaces to false, which would reject an interface
// bean used as a map key or value even though the same type is supported as a direct field or
// list element.
return isSupported(mapKeyValueType.f0, ctx) && isSupported(mapKeyValueType.f1, ctx);
} else if (cls.isEnum()) {
return true;
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.fory.format.annotation;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

/**
* Class-level row-codec schema metadata used when the codec builder enables schema evolution.
*
* <p>Live fields without a {@link ForyVersion} annotation are treated as present from the first
* version, so a class can adopt versioning by annotating only the fields added later.
*
* <p>{@link #removedFields()} points at a class (conventionally a nested {@code interface}) whose
* accessor methods describe fields that have been removed from this bean but still appear on the
* wire in older payloads. Each method's return type is the original Java type of the removed field;
* each method must carry a {@link ForyVersion} annotation with {@code until} set, since removed
* fields have a known end-of-life version.
*
* <p>Example:
*
* <pre>{@code
* @Data
* @ForySchema(removedFields = MyBean.History.class)
* public class MyBean {
* private String name;
*
* interface History {
* @ForyVersion(until = 3)
* List<String> tags();
*
* @ForyVersion(since = 2, until = 5)
* Map<String, Long> counters();
* }
* }
* }</pre>
*/
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface ForySchema {
/**
* A class whose accessor methods describe historically-present-but-now-removed fields. Default
* {@code void.class} means there are no removed fields. The class is never instantiated; the
* codec reads its method signatures and annotations.
*/
Class<?> removedFields() default void.class;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.fory.format.annotation;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

/**
* Declares the version window in which a row-codec field is logically present. The window is
* inclusive on the left and exclusive on the right, so {@code since=2, until=5} means versions 2,
* 3, and 4.
*
* <p>Only effective when the codec builder is configured with {@code withSchemaEvolution()};
* otherwise the annotation is ignored and the field is treated as always present.
*
* <p>May be placed on a field, an accessor method, or a record component. Record components are
* covered by {@code FIELD} and {@code METHOD} rather than {@code ElementType.RECORD_COMPONENT}: the
* compiler propagates a record-component annotation to the backing field and the accessor method
* (the targets it declares), and the codec reads the annotation from those elements. {@code
* RECORD_COMPONENT} is a JDK 16 enum constant and would break this Java 11 module at runtime, so it
* is intentionally omitted.
*/
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.METHOD})
public @interface ForyVersion {
/** First version (inclusive) that contains this field. Defaults to the class base version. */
int since() default 1;

/** First version (exclusive) that no longer contains this field. */
int until() default Integer.MAX_VALUE;
}
Loading
Loading