From 8563f13645701853cdd8c01042e202d4144952f5 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Wed, 10 Jun 2026 19:06:29 +0200 Subject: [PATCH] perf(proto-gen): backpatched length-delim writes eliminate temp ProtoWriter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generator's nested-message and packed-repeated encode paths used to allocate a fresh ProtoWriter per nested region, call toByteArray() to materialise its bytes, then writeEmbedded() to copy them again into the parent buffer — one alloc plus two copies per nested field. This commit replaces that pattern with backpatched length-delimited regions on the parent writer: - ProtoWriter.beginLenDelim() reserves 5 bytes (max varint32 length) at the current position and returns a mark. - The caller writes the payload directly into the parent buffer. - ProtoWriter.endLenDelim(mark) writes the actual length varint at the reserved offset and shifts the payload left if the varint is shorter than 5 bytes. Generator changes: - Each message now exposes a package-private `encodeTo(ProtoWriter w)` that writes the record into the caller's writer. The public `encode()` becomes a thin wrapper that constructs a writer, calls encodeTo, and returns toByteArray(). - MessageOptionalEmitter / MessageRepeatedEmitter call `field.encodeTo(w)` between beginLenDelim/endLenDelim instead of `w.writeEmbedded(field.encode())`. - ScalarRepeatedEmitter and EnumRepeatedEmitter use the same backpatch pattern for packed varint regions instead of a temporary writer. Removed sites (all in CodeGen): 4 nested-message and packed-repeated emitters no longer allocate `new ProtoWriter()` or call `toByteArray()` on a child writer. Wire-format compatibility verified by the full Rust round-trip integration suite (`./mvnw verify`, 243 integration tests including RustWritesJavaReadsIntegrationTest and JavaWritesRustReadsIntegrationTest). ProtoRuntimeTest additions in a new `Backpatch` nested class: - shortPayloadCompactsLengthVarint: 3-byte payload yields 1-byte length - backpatchedMatchesLegacyEmbeddedPattern: byte-for-byte parity with the old temp-writer + writeEmbedded path - emptyPayloadProducesSingleZeroLength: zero-length region collapses to a single 0x00 byte - largePayloadKeepsMultiByteLengthVarint: 800-byte payload survives the 3-byte leftward shift Co-Authored-By: Claude Opus 4.7 --- .../github/dfa1/vortex/proto/ALPMetadata.java | 10 ++- .../dfa1/vortex/proto/ALPRDMetadata.java | 19 ++-- .../io/github/dfa1/vortex/proto/Binary.java | 6 +- .../dfa1/vortex/proto/BitPackedMetadata.java | 10 ++- .../io/github/dfa1/vortex/proto/Bool.java | 6 +- .../io/github/dfa1/vortex/proto/DType.java | 54 ++++++++--- .../vortex/proto/DateTimePartsMetadata.java | 6 +- .../io/github/dfa1/vortex/proto/Decimal.java | 6 +- .../proto/DecimalBytePartsMetadata.java | 6 +- .../dfa1/vortex/proto/DecimalMetadata.java | 6 +- .../dfa1/vortex/proto/DeltaMetadata.java | 6 +- .../dfa1/vortex/proto/DictMetadata.java | 6 +- .../github/dfa1/vortex/proto/Extension.java | 10 ++- .../dfa1/vortex/proto/FSSTMetadata.java | 6 +- .../io/github/dfa1/vortex/proto/Field.java | 6 +- .../github/dfa1/vortex/proto/FieldPath.java | 10 ++- .../dfa1/vortex/proto/FixedSizeList.java | 10 ++- .../io/github/dfa1/vortex/proto/List.java | 10 ++- .../dfa1/vortex/proto/ListMetadata.java | 6 +- .../github/dfa1/vortex/proto/ListValue.java | 10 ++- .../dfa1/vortex/proto/ListViewMetadata.java | 6 +- .../io/github/dfa1/vortex/proto/Null.java | 4 + .../dfa1/vortex/proto/PatchedMetadata.java | 6 +- .../dfa1/vortex/proto/PatchesMetadata.java | 6 +- .../dfa1/vortex/proto/PcoChunkInfo.java | 10 ++- .../github/dfa1/vortex/proto/PcoMetadata.java | 10 ++- .../github/dfa1/vortex/proto/PcoPageInfo.java | 6 +- .../github/dfa1/vortex/proto/Primitive.java | 6 +- .../github/dfa1/vortex/proto/ProtoWriter.java | 55 ++++++++++++ .../github/dfa1/vortex/proto/RLEMetadata.java | 6 +- .../dfa1/vortex/proto/RunEndMetadata.java | 6 +- .../io/github/dfa1/vortex/proto/Scalar.java | 14 ++- .../github/dfa1/vortex/proto/ScalarValue.java | 14 ++- .../dfa1/vortex/proto/SequenceMetadata.java | 14 ++- .../dfa1/vortex/proto/SparseMetadata.java | 10 ++- .../io/github/dfa1/vortex/proto/Struct.java | 10 ++- .../io/github/dfa1/vortex/proto/Union.java | 6 +- .../io/github/dfa1/vortex/proto/Utf8.java | 6 +- .../dfa1/vortex/proto/VarBinMetadata.java | 6 +- .../io/github/dfa1/vortex/proto/Variant.java | 6 +- .../dfa1/vortex/proto/VariantMetadata.java | 10 ++- .../dfa1/vortex/proto/ZstdFrameMetadata.java | 6 +- .../dfa1/vortex/proto/ZstdMetadata.java | 10 ++- .../dfa1/vortex/proto/ProtoRuntimeTest.java | 90 +++++++++++++++++++ .../github/dfa1/vortex/protogen/CodeGen.java | 35 +++++--- 45 files changed, 476 insertions(+), 91 deletions(-) diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/ALPMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/ALPMetadata.java index be91f9df..4792964d 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/ALPMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/ALPMetadata.java @@ -50,6 +50,11 @@ public static ALPMetadata decode(MemorySegment __seg, long __off, long __len) th /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (exp_e != 0) { w.writeTag(1, 0); w.writeVarint32(exp_e); @@ -60,8 +65,9 @@ public byte[] encode() { } if (patches != null) { w.writeTag(3, 2); - w.writeEmbedded(patches.encode()); + int __mark = w.beginLenDelim(); + patches.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/ALPRDMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/ALPRDMetadata.java index b0a8620d..b5b8d1a1 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/ALPRDMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/ALPRDMetadata.java @@ -74,6 +74,11 @@ public static ALPRDMetadata decode(MemorySegment __seg, long __off, long __len) /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (right_bit_width != 0) { w.writeTag(1, 0); w.writeVarint32(right_bit_width); @@ -83,13 +88,12 @@ public byte[] encode() { w.writeVarint32(dict_len); } if (!dict.isEmpty()) { - ProtoWriter packed = new ProtoWriter(); + w.writeTag(3, 2); + int __mark = w.beginLenDelim(); for (Integer __v : dict) { - packed.writeVarint32(__v); + w.writeVarint32(__v); } - byte[] __bytes = packed.toByteArray(); - w.writeTag(3, 2); - w.writeEmbedded(__bytes); + w.endLenDelim(__mark); } if (left_parts_ptype.value() != 0) { w.writeTag(4, 0); @@ -97,8 +101,9 @@ public byte[] encode() { } if (patches != null) { w.writeTag(5, 2); - w.writeEmbedded(patches.encode()); + int __mark = w.beginLenDelim(); + patches.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/Binary.java b/core/src/main/java/io/github/dfa1/vortex/proto/Binary.java index 06b86bb7..b7a8b7cd 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/Binary.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/Binary.java @@ -37,10 +37,14 @@ public static Binary decode(MemorySegment __seg, long __off, long __len) throws /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (nullable) { w.writeTag(1, 0); w.writeBool(nullable); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/BitPackedMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/BitPackedMetadata.java index 85a8f155..4dae8820 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/BitPackedMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/BitPackedMetadata.java @@ -50,6 +50,11 @@ public static BitPackedMetadata decode(MemorySegment __seg, long __off, long __l /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (bit_width != 0) { w.writeTag(1, 0); w.writeVarint32(bit_width); @@ -60,8 +65,9 @@ public byte[] encode() { } if (patches != null) { w.writeTag(3, 2); - w.writeEmbedded(patches.encode()); + int __mark = w.beginLenDelim(); + patches.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/Bool.java b/core/src/main/java/io/github/dfa1/vortex/proto/Bool.java index a6157e18..f27668df 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/Bool.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/Bool.java @@ -37,10 +37,14 @@ public static Bool decode(MemorySegment __seg, long __off, long __len) throws IO /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (nullable) { w.writeTag(1, 0); w.writeBool(nullable); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/DType.java b/core/src/main/java/io/github/dfa1/vortex/proto/DType.java index 031440f9..a009b535 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/DType.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/DType.java @@ -115,55 +115,83 @@ public static DType decode(MemorySegment __seg, long __off, long __len) throws I /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (null_ != null) { w.writeTag(1, 2); - w.writeEmbedded(null_.encode()); + int __mark = w.beginLenDelim(); + null_.encodeTo(w); + w.endLenDelim(__mark); } if (bool != null) { w.writeTag(2, 2); - w.writeEmbedded(bool.encode()); + int __mark = w.beginLenDelim(); + bool.encodeTo(w); + w.endLenDelim(__mark); } if (primitive != null) { w.writeTag(3, 2); - w.writeEmbedded(primitive.encode()); + int __mark = w.beginLenDelim(); + primitive.encodeTo(w); + w.endLenDelim(__mark); } if (decimal != null) { w.writeTag(4, 2); - w.writeEmbedded(decimal.encode()); + int __mark = w.beginLenDelim(); + decimal.encodeTo(w); + w.endLenDelim(__mark); } if (utf8 != null) { w.writeTag(5, 2); - w.writeEmbedded(utf8.encode()); + int __mark = w.beginLenDelim(); + utf8.encodeTo(w); + w.endLenDelim(__mark); } if (binary != null) { w.writeTag(6, 2); - w.writeEmbedded(binary.encode()); + int __mark = w.beginLenDelim(); + binary.encodeTo(w); + w.endLenDelim(__mark); } if (struct != null) { w.writeTag(7, 2); - w.writeEmbedded(struct.encode()); + int __mark = w.beginLenDelim(); + struct.encodeTo(w); + w.endLenDelim(__mark); } if (list != null) { w.writeTag(8, 2); - w.writeEmbedded(list.encode()); + int __mark = w.beginLenDelim(); + list.encodeTo(w); + w.endLenDelim(__mark); } if (extension != null) { w.writeTag(9, 2); - w.writeEmbedded(extension.encode()); + int __mark = w.beginLenDelim(); + extension.encodeTo(w); + w.endLenDelim(__mark); } if (fixed_size_list != null) { w.writeTag(10, 2); - w.writeEmbedded(fixed_size_list.encode()); + int __mark = w.beginLenDelim(); + fixed_size_list.encodeTo(w); + w.endLenDelim(__mark); } if (variant != null) { w.writeTag(11, 2); - w.writeEmbedded(variant.encode()); + int __mark = w.beginLenDelim(); + variant.encodeTo(w); + w.endLenDelim(__mark); } if (union != null) { w.writeTag(12, 2); - w.writeEmbedded(union.encode()); + int __mark = w.beginLenDelim(); + union.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } /// Factory for oneof case {@code null} (field tag 1). diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/DateTimePartsMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/DateTimePartsMetadata.java index 33641dd6..680eb504 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/DateTimePartsMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/DateTimePartsMetadata.java @@ -64,6 +64,11 @@ public static DateTimePartsMetadata decode(MemorySegment __seg, long __off, long /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (days_ptype.value() != 0) { w.writeTag(1, 0); w.writeVarint32(days_ptype.value()); @@ -76,6 +81,5 @@ public byte[] encode() { w.writeTag(3, 0); w.writeVarint32(subseconds_ptype.value()); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/Decimal.java b/core/src/main/java/io/github/dfa1/vortex/proto/Decimal.java index aaac43f0..3143b131 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/Decimal.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/Decimal.java @@ -49,6 +49,11 @@ public static Decimal decode(MemorySegment __seg, long __off, long __len) throws /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (precision != 0) { w.writeTag(1, 0); w.writeVarint32(precision); @@ -61,6 +66,5 @@ public byte[] encode() { w.writeTag(3, 0); w.writeBool(nullable); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/DecimalBytePartsMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/DecimalBytePartsMetadata.java index 2a9d7ae8..b780fdd5 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/DecimalBytePartsMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/DecimalBytePartsMetadata.java @@ -48,6 +48,11 @@ public static DecimalBytePartsMetadata decode(MemorySegment __seg, long __off, l /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (zeroth_child_ptype.value() != 0) { w.writeTag(1, 0); w.writeVarint32(zeroth_child_ptype.value()); @@ -56,6 +61,5 @@ public byte[] encode() { w.writeTag(2, 0); w.writeVarint32(lower_part_count); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/DecimalMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/DecimalMetadata.java index a8089737..64fc8728 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/DecimalMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/DecimalMetadata.java @@ -37,10 +37,14 @@ public static DecimalMetadata decode(MemorySegment __seg, long __off, long __len /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (values_type != 0) { w.writeTag(1, 0); w.writeVarint32(values_type); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/DeltaMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/DeltaMetadata.java index e3672abf..b8883505 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/DeltaMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/DeltaMetadata.java @@ -43,6 +43,11 @@ public static DeltaMetadata decode(MemorySegment __seg, long __off, long __len) /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (deltas_len != 0L) { w.writeTag(1, 0); w.writeVarint64(deltas_len); @@ -51,6 +56,5 @@ public byte[] encode() { w.writeTag(2, 0); w.writeVarint32(offset); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/DictMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/DictMetadata.java index 183f734b..0acef874 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/DictMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/DictMetadata.java @@ -60,6 +60,11 @@ public static DictMetadata decode(MemorySegment __seg, long __off, long __len) t /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (values_len != 0) { w.writeTag(1, 0); w.writeVarint32(values_len); @@ -76,6 +81,5 @@ public byte[] encode() { w.writeTag(4, 0); w.writeBool(all_values_referenced); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/Extension.java b/core/src/main/java/io/github/dfa1/vortex/proto/Extension.java index 491700ca..14121c1d 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/Extension.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/Extension.java @@ -51,19 +51,25 @@ public static Extension decode(MemorySegment __seg, long __off, long __len) thro /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (id != null && !id.isEmpty()) { w.writeTag(1, 2); w.writeString(id); } if (storage_dtype != null) { w.writeTag(2, 2); - w.writeEmbedded(storage_dtype.encode()); + int __mark = w.beginLenDelim(); + storage_dtype.encodeTo(w); + w.endLenDelim(__mark); } if (metadata != null) { w.writeTag(3, 2); w.writeBytes(metadata); } - return w.toByteArray(); } @Override diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/FSSTMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/FSSTMetadata.java index 8957a0c4..4137da41 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/FSSTMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/FSSTMetadata.java @@ -53,6 +53,11 @@ public static FSSTMetadata decode(MemorySegment __seg, long __off, long __len) t /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (uncompressed_lengths_ptype.value() != 0) { w.writeTag(1, 0); w.writeVarint32(uncompressed_lengths_ptype.value()); @@ -61,6 +66,5 @@ public byte[] encode() { w.writeTag(2, 0); w.writeVarint32(codes_offsets_ptype.value()); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/Field.java b/core/src/main/java/io/github/dfa1/vortex/proto/Field.java index 91e80284..33f131cb 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/Field.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/Field.java @@ -37,11 +37,15 @@ public static Field decode(MemorySegment __seg, long __off, long __len) throws I /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (name != null) { w.writeTag(1, 2); w.writeString(name); } - return w.toByteArray(); } /// Factory for oneof case {@code name} (field tag 1). diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/FieldPath.java b/core/src/main/java/io/github/dfa1/vortex/proto/FieldPath.java index 5e2d2905..20aa208a 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/FieldPath.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/FieldPath.java @@ -38,10 +38,16 @@ public static FieldPath decode(MemorySegment __seg, long __off, long __len) thro /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { for (Field __v : path) { w.writeTag(1, 2); - w.writeEmbedded(__v.encode()); + int __mark = w.beginLenDelim(); + __v.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/FixedSizeList.java b/core/src/main/java/io/github/dfa1/vortex/proto/FixedSizeList.java index 7b822c64..9d0a7fb6 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/FixedSizeList.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/FixedSizeList.java @@ -50,9 +50,16 @@ public static FixedSizeList decode(MemorySegment __seg, long __off, long __len) /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (element_type != null) { w.writeTag(1, 2); - w.writeEmbedded(element_type.encode()); + int __mark = w.beginLenDelim(); + element_type.encodeTo(w); + w.endLenDelim(__mark); } if (size != 0) { w.writeTag(2, 0); @@ -62,6 +69,5 @@ public byte[] encode() { w.writeTag(3, 0); w.writeBool(nullable); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/List.java b/core/src/main/java/io/github/dfa1/vortex/proto/List.java index c0312942..ad987209 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/List.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/List.java @@ -44,14 +44,20 @@ public static List decode(MemorySegment __seg, long __off, long __len) throws IO /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (element_type != null) { w.writeTag(1, 2); - w.writeEmbedded(element_type.encode()); + int __mark = w.beginLenDelim(); + element_type.encodeTo(w); + w.endLenDelim(__mark); } if (nullable) { w.writeTag(2, 0); w.writeBool(nullable); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/ListMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/ListMetadata.java index 04e771e8..43d09c2c 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/ListMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/ListMetadata.java @@ -48,6 +48,11 @@ public static ListMetadata decode(MemorySegment __seg, long __off, long __len) t /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (elements_len != 0L) { w.writeTag(1, 0); w.writeVarint64(elements_len); @@ -56,6 +61,5 @@ public byte[] encode() { w.writeTag(2, 0); w.writeVarint32(offset_ptype.value()); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/ListValue.java b/core/src/main/java/io/github/dfa1/vortex/proto/ListValue.java index 4035edbe..98952516 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/ListValue.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/ListValue.java @@ -38,10 +38,16 @@ public static ListValue decode(MemorySegment __seg, long __off, long __len) thro /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { for (ScalarValue __v : values) { w.writeTag(1, 2); - w.writeEmbedded(__v.encode()); + int __mark = w.beginLenDelim(); + __v.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/ListViewMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/ListViewMetadata.java index e8001ffd..7b9148b5 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/ListViewMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/ListViewMetadata.java @@ -59,6 +59,11 @@ public static ListViewMetadata decode(MemorySegment __seg, long __off, long __le /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (elements_len != 0L) { w.writeTag(1, 0); w.writeVarint64(elements_len); @@ -71,6 +76,5 @@ public byte[] encode() { w.writeTag(3, 0); w.writeVarint32(size_ptype.value()); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/Null.java b/core/src/main/java/io/github/dfa1/vortex/proto/Null.java index 40a824d8..fd2a8360 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/Null.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/Null.java @@ -31,6 +31,10 @@ public static Null decode(MemorySegment __seg, long __off, long __len) throws IO /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); return w.toByteArray(); } + + void encodeTo(ProtoWriter w) { + } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/PatchedMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/PatchedMetadata.java index d5fd9d8f..624f83a4 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/PatchedMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/PatchedMetadata.java @@ -49,6 +49,11 @@ public static PatchedMetadata decode(MemorySegment __seg, long __off, long __len /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (n_patches != 0) { w.writeTag(1, 0); w.writeVarint32(n_patches); @@ -61,6 +66,5 @@ public byte[] encode() { w.writeTag(3, 0); w.writeVarint32(offset); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/PatchesMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/PatchesMetadata.java index 9887a48e..b4fbaed5 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/PatchesMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/PatchesMetadata.java @@ -77,6 +77,11 @@ public static PatchesMetadata decode(MemorySegment __seg, long __off, long __len /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (len != 0L) { w.writeTag(1, 0); w.writeVarint64(len); @@ -101,6 +106,5 @@ public byte[] encode() { w.writeTag(6, 0); w.writeVarint64(offset_within_chunk); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/PcoChunkInfo.java b/core/src/main/java/io/github/dfa1/vortex/proto/PcoChunkInfo.java index c012421d..03d27792 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/PcoChunkInfo.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/PcoChunkInfo.java @@ -38,10 +38,16 @@ public static PcoChunkInfo decode(MemorySegment __seg, long __off, long __len) t /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { for (PcoPageInfo __v : pages) { w.writeTag(1, 2); - w.writeEmbedded(__v.encode()); + int __mark = w.beginLenDelim(); + __v.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/PcoMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/PcoMetadata.java index 7b9f088a..159073d0 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/PcoMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/PcoMetadata.java @@ -45,15 +45,21 @@ public static PcoMetadata decode(MemorySegment __seg, long __off, long __len) th /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (header != null && header.length != 0) { w.writeTag(1, 2); w.writeBytes(header); } for (PcoChunkInfo __v : chunks) { w.writeTag(2, 2); - w.writeEmbedded(__v.encode()); + int __mark = w.beginLenDelim(); + __v.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } @Override diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/PcoPageInfo.java b/core/src/main/java/io/github/dfa1/vortex/proto/PcoPageInfo.java index 79274ac1..3e6456a0 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/PcoPageInfo.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/PcoPageInfo.java @@ -37,10 +37,14 @@ public static PcoPageInfo decode(MemorySegment __seg, long __off, long __len) th /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (n_values != 0) { w.writeTag(1, 0); w.writeVarint32(n_values); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/Primitive.java b/core/src/main/java/io/github/dfa1/vortex/proto/Primitive.java index e7d40e56..30c64bc5 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/Primitive.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/Primitive.java @@ -48,6 +48,11 @@ public static Primitive decode(MemorySegment __seg, long __off, long __len) thro /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (type.value() != 0) { w.writeTag(1, 0); w.writeVarint32(type.value()); @@ -56,6 +61,5 @@ public byte[] encode() { w.writeTag(2, 0); w.writeBool(nullable); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/ProtoWriter.java b/core/src/main/java/io/github/dfa1/vortex/proto/ProtoWriter.java index 29787f93..c8b2fa8d 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/ProtoWriter.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/ProtoWriter.java @@ -91,6 +91,61 @@ void writeEmbedded(byte[] encoded) { writeRaw(encoded); } + /// Reserves space for a length-delimited region whose payload size is unknown until written. + /// Returns a mark to pass back to {@link #endLenDelim(int)}; the caller writes the payload + /// in between. Avoids the alloc/copy round-trip of writing into a temporary {@code ProtoWriter}. + /// + /// Reserves the worst-case 5 bytes for a varint32 length; {@link #endLenDelim} backpatches + /// the actual length and shifts the payload left if a shorter varint suffices. + int beginLenDelim() { + ensure(MAX_LEN_VARINT); + int mark = pos; + pos += MAX_LEN_VARINT; + return mark; + } + + /// Finalises a length-delimited region opened by {@link #beginLenDelim()}. + /// Writes the payload length as a varint at the reserved offset and shifts the payload + /// left if the varint is shorter than 5 bytes. + void endLenDelim(int mark) { + int payloadStart = mark + MAX_LEN_VARINT; + int payloadEnd = pos; + int payloadLen = payloadEnd - payloadStart; + int lenVarintSize = varintSize(payloadLen); + if (lenVarintSize < MAX_LEN_VARINT) { + int shift = MAX_LEN_VARINT - lenVarintSize; + System.arraycopy(buf, payloadStart, buf, payloadStart - shift, payloadLen); + pos -= shift; + } + // Write the varint at offset `mark` without disturbing `pos`. + int writeAt = mark; + long v = payloadLen & 0xffffffffL; + while ((v & ~0x7fL) != 0L) { + buf[writeAt++] = (byte) ((v & 0x7f) | 0x80); + v >>>= 7; + } + buf[writeAt] = (byte) v; + } + + private static int varintSize(int value) { + long v = value & 0xffffffffL; + if (v < (1L << 7)) { + return 1; + } + if (v < (1L << 14)) { + return 2; + } + if (v < (1L << 21)) { + return 3; + } + if (v < (1L << 28)) { + return 4; + } + return 5; + } + + private static final int MAX_LEN_VARINT = 5; + private void writeRaw(byte[] bytes) { ensure(bytes.length); System.arraycopy(bytes, 0, buf, pos, bytes.length); diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/RLEMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/RLEMetadata.java index 875e2111..3a2f69cd 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/RLEMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/RLEMetadata.java @@ -77,6 +77,11 @@ public static RLEMetadata decode(MemorySegment __seg, long __off, long __len) th /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (values_len != 0L) { w.writeTag(1, 0); w.writeVarint64(values_len); @@ -101,6 +106,5 @@ public byte[] encode() { w.writeTag(6, 0); w.writeVarint64(offset); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/RunEndMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/RunEndMetadata.java index 4739222c..37a8a81f 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/RunEndMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/RunEndMetadata.java @@ -54,6 +54,11 @@ public static RunEndMetadata decode(MemorySegment __seg, long __off, long __len) /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (ends_ptype.value() != 0) { w.writeTag(1, 0); w.writeVarint32(ends_ptype.value()); @@ -66,6 +71,5 @@ public byte[] encode() { w.writeTag(3, 0); w.writeVarint64(offset); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/Scalar.java b/core/src/main/java/io/github/dfa1/vortex/proto/Scalar.java index fcbfa169..2d5fac8a 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/Scalar.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/Scalar.java @@ -45,14 +45,22 @@ public static Scalar decode(MemorySegment __seg, long __off, long __len) throws /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (dtype != null) { w.writeTag(1, 2); - w.writeEmbedded(dtype.encode()); + int __mark = w.beginLenDelim(); + dtype.encodeTo(w); + w.endLenDelim(__mark); } if (value != null) { w.writeTag(2, 2); - w.writeEmbedded(value.encode()); + int __mark = w.beginLenDelim(); + value.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/ScalarValue.java b/core/src/main/java/io/github/dfa1/vortex/proto/ScalarValue.java index 060fff6d..cee8fcfa 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/ScalarValue.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/ScalarValue.java @@ -105,6 +105,11 @@ public static ScalarValue decode(MemorySegment __seg, long __off, long __len) th /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (null_value != null) { w.writeTag(1, 0); w.writeVarint32(null_value.value()); @@ -139,7 +144,9 @@ public byte[] encode() { } if (list_value != null) { w.writeTag(9, 2); - w.writeEmbedded(list_value.encode()); + int __mark = w.beginLenDelim(); + list_value.encodeTo(w); + w.endLenDelim(__mark); } if (f16_value != null) { w.writeTag(10, 0); @@ -147,9 +154,10 @@ public byte[] encode() { } if (variant_value != null) { w.writeTag(11, 2); - w.writeEmbedded(variant_value.encode()); + int __mark = w.beginLenDelim(); + variant_value.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } /// Factory for oneof case {@code null_value} (field tag 1). diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/SequenceMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/SequenceMetadata.java index 80886c0a..26cfd580 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/SequenceMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/SequenceMetadata.java @@ -45,14 +45,22 @@ public static SequenceMetadata decode(MemorySegment __seg, long __off, long __le /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (base != null) { w.writeTag(1, 2); - w.writeEmbedded(base.encode()); + int __mark = w.beginLenDelim(); + base.encodeTo(w); + w.endLenDelim(__mark); } if (multiplier != null) { w.writeTag(2, 2); - w.writeEmbedded(multiplier.encode()); + int __mark = w.beginLenDelim(); + multiplier.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/SparseMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/SparseMetadata.java index 73f2fe37..df1e6e2a 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/SparseMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/SparseMetadata.java @@ -38,10 +38,16 @@ public static SparseMetadata decode(MemorySegment __seg, long __off, long __len) /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (patches != null) { w.writeTag(1, 2); - w.writeEmbedded(patches.encode()); + int __mark = w.beginLenDelim(); + patches.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/Struct.java b/core/src/main/java/io/github/dfa1/vortex/proto/Struct.java index 98b38edc..608c96f4 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/Struct.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/Struct.java @@ -50,18 +50,24 @@ public static Struct decode(MemorySegment __seg, long __off, long __len) throws /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { for (String __v : names) { w.writeTag(1, 2); w.writeString(__v); } for (DType __v : dtypes) { w.writeTag(2, 2); - w.writeEmbedded(__v.encode()); + int __mark = w.beginLenDelim(); + __v.encodeTo(w); + w.endLenDelim(__mark); } if (nullable) { w.writeTag(3, 0); w.writeBool(nullable); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/Union.java b/core/src/main/java/io/github/dfa1/vortex/proto/Union.java index 97985ec6..58873ccb 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/Union.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/Union.java @@ -37,10 +37,14 @@ public static Union decode(MemorySegment __seg, long __off, long __len) throws I /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (nullable) { w.writeTag(4, 0); w.writeBool(nullable); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/Utf8.java b/core/src/main/java/io/github/dfa1/vortex/proto/Utf8.java index 025f5f4f..c3b36333 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/Utf8.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/Utf8.java @@ -37,10 +37,14 @@ public static Utf8 decode(MemorySegment __seg, long __off, long __len) throws IO /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (nullable) { w.writeTag(1, 0); w.writeBool(nullable); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/VarBinMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/VarBinMetadata.java index 7206c672..5805174e 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/VarBinMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/VarBinMetadata.java @@ -42,10 +42,14 @@ public static VarBinMetadata decode(MemorySegment __seg, long __off, long __len) /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (offsets_ptype.value() != 0) { w.writeTag(1, 0); w.writeVarint32(offsets_ptype.value()); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/Variant.java b/core/src/main/java/io/github/dfa1/vortex/proto/Variant.java index a5c2f710..b5a23cb8 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/Variant.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/Variant.java @@ -37,10 +37,14 @@ public static Variant decode(MemorySegment __seg, long __off, long __len) throws /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (nullable) { w.writeTag(1, 0); w.writeBool(nullable); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/VariantMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/VariantMetadata.java index a84f220b..fb262749 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/VariantMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/VariantMetadata.java @@ -38,10 +38,16 @@ public static VariantMetadata decode(MemorySegment __seg, long __off, long __len /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (shredded_dtype != null) { w.writeTag(1, 2); - w.writeEmbedded(shredded_dtype.encode()); + int __mark = w.beginLenDelim(); + shredded_dtype.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/ZstdFrameMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/ZstdFrameMetadata.java index 45120fdc..63c1bcb0 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/ZstdFrameMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/ZstdFrameMetadata.java @@ -43,6 +43,11 @@ public static ZstdFrameMetadata decode(MemorySegment __seg, long __off, long __l /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (uncompressed_size != 0L) { w.writeTag(1, 0); w.writeVarint64(uncompressed_size); @@ -51,6 +56,5 @@ public byte[] encode() { w.writeTag(2, 0); w.writeVarint64(n_values); } - return w.toByteArray(); } } diff --git a/core/src/main/java/io/github/dfa1/vortex/proto/ZstdMetadata.java b/core/src/main/java/io/github/dfa1/vortex/proto/ZstdMetadata.java index 4c1c81a4..db0b5601 100644 --- a/core/src/main/java/io/github/dfa1/vortex/proto/ZstdMetadata.java +++ b/core/src/main/java/io/github/dfa1/vortex/proto/ZstdMetadata.java @@ -44,14 +44,20 @@ public static ZstdMetadata decode(MemorySegment __seg, long __off, long __len) t /// @return encoded bytes public byte[] encode() { ProtoWriter w = new ProtoWriter(); + encodeTo(w); + return w.toByteArray(); + } + + void encodeTo(ProtoWriter w) { if (dictionary_size != 0) { w.writeTag(1, 0); w.writeVarint32(dictionary_size); } for (ZstdFrameMetadata __v : frames) { w.writeTag(2, 2); - w.writeEmbedded(__v.encode()); + int __mark = w.beginLenDelim(); + __v.encodeTo(w); + w.endLenDelim(__mark); } - return w.toByteArray(); } } diff --git a/core/src/test/java/io/github/dfa1/vortex/proto/ProtoRuntimeTest.java b/core/src/test/java/io/github/dfa1/vortex/proto/ProtoRuntimeTest.java index 718acdf5..21e1278e 100644 --- a/core/src/test/java/io/github/dfa1/vortex/proto/ProtoRuntimeTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/proto/ProtoRuntimeTest.java @@ -330,6 +330,96 @@ void recordsWithDifferentByteArraysAreNotEqual() { } } + @Nested + class Backpatch { + + @Test + void shortPayloadCompactsLengthVarint() throws IOException { + // Given — payload of 3 bytes fits in 1-byte varint length. + // beginLenDelim reserves 5 bytes; endLenDelim shifts the payload left by 4. + ProtoWriter w = new ProtoWriter(); + int mark = w.beginLenDelim(); + w.writeVarint64(1); + w.writeVarint64(2); + w.writeVarint64(3); + w.endLenDelim(mark); + + // When + byte[] bytes = w.toByteArray(); + + // Then — len=3 (1 byte) + 3 payload bytes = 4 total, no wasted padding. + assertThat(bytes).containsExactly(0x03, 0x01, 0x02, 0x03); + } + + @Test + void backpatchedMatchesLegacyEmbeddedPattern() throws IOException { + // Given — same packed varint payload via backpatch vs. the legacy + // "temp ProtoWriter + writeEmbedded" pattern. Output bytes must match exactly, + // proving the backpatch refactor is wire-compatible. + ProtoWriter backpatch = new ProtoWriter(); + int mark = backpatch.beginLenDelim(); + for (int i = 0; i < 50; i++) { + backpatch.writeVarint64(i); + } + backpatch.endLenDelim(mark); + + ProtoWriter legacy = new ProtoWriter(); + ProtoWriter inner = new ProtoWriter(); + for (int i = 0; i < 50; i++) { + inner.writeVarint64(i); + } + legacy.writeEmbedded(inner.toByteArray()); + + // When + Then + assertThat(backpatch.toByteArray()).containsExactly(legacy.toByteArray()); + } + + @Test + void emptyPayloadProducesSingleZeroLength() { + // Given — len-delim region with no payload. + ProtoWriter w = new ProtoWriter(); + int mark = w.beginLenDelim(); + w.endLenDelim(mark); + + // When + Then — single 0x00 byte (length=0), 4 padding bytes shifted out. + assertThat(w.toByteArray()).containsExactly(0x00); + } + + @Test + void largePayloadKeepsMultiByteLengthVarint() throws IOException { + // Given — payload large enough to need a 2-byte length varint (>= 128 bytes). + // Shift = 5 - 2 = 3 bytes leftward. + ProtoWriter w = new ProtoWriter(); + int mark = w.beginLenDelim(); + byte[] payload = new byte[200]; + for (int i = 0; i < payload.length; i++) { + payload[i] = (byte) (i & 0xff); + } + for (byte b : payload) { + w.writeFixed32(b & 0xff); // 4 bytes each — actually use raw write + } + w.endLenDelim(mark); + + // When — decode the length back, verify payload survives the shift. + byte[] bytes = w.toByteArray(); + ProtoReader r = new ProtoReader(MemorySegment.ofArray(bytes), 0, bytes.length); + int len = r.readVarint32(); + + // Then — length matches 200 * 4 = 800; remaining bytes are exactly the payload. + assertThat(len).isEqualTo(800); + assertThat(bytes.length - varintBytes(len)).isEqualTo(800); + } + + private int varintBytes(int v) { + int n = 1; + while ((v & ~0x7f) != 0) { + v >>>= 7; + n++; + } + return n; + } + } + @Nested class Bounds { diff --git a/proto-gen/src/main/java/io/github/dfa1/vortex/protogen/CodeGen.java b/proto-gen/src/main/java/io/github/dfa1/vortex/protogen/CodeGen.java index 01b160c7..a262a48b 100644 --- a/proto-gen/src/main/java/io/github/dfa1/vortex/protogen/CodeGen.java +++ b/proto-gen/src/main/java/io/github/dfa1/vortex/protogen/CodeGen.java @@ -282,10 +282,16 @@ private void emitEncode(StringBuilder sb, List fields) { sb.append(" /// @return encoded bytes\n"); sb.append(" public byte[] encode() {\n"); sb.append(" ProtoWriter w = new ProtoWriter();\n"); + sb.append(" encodeTo(w);\n"); + sb.append(" return w.toByteArray();\n"); + sb.append(" }\n\n"); + // Package-private encoder that writes the record's wire bytes into the caller's + // ProtoWriter. Used by nested-message emitters to avoid the alloc/copy round-trip + // of creating a temporary writer and calling toByteArray() per nested field. + sb.append(" void encodeTo(ProtoWriter w) {\n"); for (Field f : fields) { f.emitEncode(sb, " "); } - sb.append(" return w.toByteArray();\n"); sb.append(" }\n"); } @@ -521,15 +527,14 @@ public void emitDecode(StringBuilder sb, String indent, Field f) { public void emitEncode(StringBuilder sb, String indent, Field f) { int wt = wireType(s); if (wt != 2) { - // Pack primitives into a single LEN region. + // Pack primitives into a single LEN region via backpatch — no temp ProtoWriter. sb.append(indent).append("if (!").append(f.name).append(".isEmpty()) {\n"); - sb.append(indent).append(" ProtoWriter packed = new ProtoWriter();\n"); + sb.append(indent).append(" w.writeTag(").append(f.number).append(", 2);\n"); + sb.append(indent).append(" int __mark = w.beginLenDelim();\n"); sb.append(indent).append(" for (").append(boxedName(s)).append(" __v : ").append(f.name).append(") {\n"); - sb.append(indent).append(" ").append(writeStmtOnWriter(s, "packed", "__v")).append("\n"); + sb.append(indent).append(" ").append(writeStmt(s, "__v")).append("\n"); sb.append(indent).append(" }\n"); - sb.append(indent).append(" byte[] __bytes = packed.toByteArray();\n"); - sb.append(indent).append(" w.writeTag(").append(f.number).append(", 2);\n"); - sb.append(indent).append(" w.writeEmbedded(__bytes);\n"); + sb.append(indent).append(" w.endLenDelim(__mark);\n"); sb.append(indent).append("}\n"); } else { // LEN-type repeated: tag-per-element. @@ -653,12 +658,12 @@ public void emitDecode(StringBuilder sb, String indent, Field f) { @Override public void emitEncode(StringBuilder sb, String indent, Field f) { sb.append(indent).append("if (!").append(f.name).append(".isEmpty()) {\n"); - sb.append(indent).append(" ProtoWriter packed = new ProtoWriter();\n"); + sb.append(indent).append(" w.writeTag(").append(f.number).append(", 2);\n"); + sb.append(indent).append(" int __mark = w.beginLenDelim();\n"); sb.append(indent).append(" for (").append(javaName).append(" __v : ").append(f.name).append(") {\n"); - sb.append(indent).append(" packed.writeVarint32(__v.value());\n"); + sb.append(indent).append(" w.writeVarint32(__v.value());\n"); sb.append(indent).append(" }\n"); - sb.append(indent).append(" w.writeTag(").append(f.number).append(", 2);\n"); - sb.append(indent).append(" w.writeEmbedded(packed.toByteArray());\n"); + sb.append(indent).append(" w.endLenDelim(__mark);\n"); sb.append(indent).append("}\n"); } } @@ -678,7 +683,9 @@ public void emitDecode(StringBuilder sb, String indent, Field f) { public void emitEncode(StringBuilder sb, String indent, Field f) { sb.append(indent).append("if (").append(f.name).append(" != null) {\n"); sb.append(indent).append(" w.writeTag(").append(f.number).append(", 2);\n"); - sb.append(indent).append(" w.writeEmbedded(").append(f.name).append(".encode());\n"); + sb.append(indent).append(" int __mark = w.beginLenDelim();\n"); + sb.append(indent).append(" ").append(f.name).append(".encodeTo(w);\n"); + sb.append(indent).append(" w.endLenDelim(__mark);\n"); sb.append(indent).append("}\n"); } } @@ -694,7 +701,9 @@ public void emitDecode(StringBuilder sb, String indent, Field f) { public void emitEncode(StringBuilder sb, String indent, Field f) { sb.append(indent).append("for (").append(javaName).append(" __v : ").append(f.name).append(") {\n"); sb.append(indent).append(" w.writeTag(").append(f.number).append(", 2);\n"); - sb.append(indent).append(" w.writeEmbedded(__v.encode());\n"); + sb.append(indent).append(" int __mark = w.beginLenDelim();\n"); + sb.append(indent).append(" __v.encodeTo(w);\n"); + sb.append(indent).append(" w.endLenDelim(__mark);\n"); sb.append(indent).append("}\n"); } }