Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions include/openzl/codecs/zl_partition.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <stdint.h>

#include "openzl/zl_errors.h"
#include "openzl/zl_graphs.h"
#include "openzl/zl_nodes.h"

#if defined(__cplusplus)
Expand Down Expand Up @@ -74,6 +75,18 @@ ZL_Compressor_buildPartitionNode(
const uint64_t* partitionSizes,
size_t numPartitions);

/// If set to ZL_TernaryParam_auto, use default behavior
/// If set to ZL_TernaryParam_enable, enable optimal mode (slower for slightly
/// better compression)
/// If set to ZL_TernaryParam_disable, disable optimal mode (much faster but
/// slightly worse compression)
#define ZL_GRAPH_PARTITION_BITPACK_OPTIMAL_PID 0

/// Graph that computes partition boundaries for 16-bit numeric data,
/// routing bucket IDs to ZL_GRAPH_BITPACK and offsets to ZL_GRAPH_STORE.
#define ZL_GRAPH_PARTITION_BITPACK \
ZL_MAKE_GRAPH_ID(ZL_StandardGraphID_partition_bitpack)

#if defined(__cplusplus)
}
#endif
Expand Down
12 changes: 12 additions & 0 deletions include/openzl/zl_decompress.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,18 @@ typedef enum {
*/
ZL_DParam_checkContentChecksum = 3,

/**
* @brief Enable codec fusion during decompression.
*
* Codec fusion combines multiple adjacent codec nodes into a single
* optimized decoder. Setting this to ZL_TernaryParam_disable causes each
* codec in the graph to be decoded individually, which can be useful for
* debugging or testing codec correctness without fusion.
*
* Valid values use the ZL_TernaryParam format defaulting to enabled.
*/
ZL_DParam_enableCodecFusion = 4,

} ZL_DParam;

/**
Expand Down
2 changes: 2 additions & 0 deletions include/openzl/zl_graphs.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ typedef enum {

ZL_StandardGraphID_lz4,

ZL_StandardGraphID_partition_bitpack,

ZL_StandardGraphID_public_end // last id, used to detect end of public
// range
} ZL_StandardGraphID;
Expand Down
68 changes: 44 additions & 24 deletions src/openzl/codecs/bitpack/decode_bitpack_binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,33 @@
#include "openzl/zl_data.h"
#include "openzl/zl_dtransform.h"

ZL_Report ZL_BitpackHeader_parse(
ZL_BitpackHeader* parsed,
const void* headerData,
size_t headerSize,
size_t packedSize)
{
ZL_RESULT_DECLARE_SCOPE_REPORT(NULL);
ZL_ERR_IF_LE(headerSize, 0, header_unknown, "Empty bitpack header");
ZL_ERR_IF_GT(headerSize, 2, header_unknown, "Bitpack header too large");

const uint8_t* header = (const uint8_t*)headerData;
parsed->eltWidth = (size_t)1 << ((header[0] >> 6) & 0x3);
parsed->nbBits = (size_t)(1 + (header[0] & 0x3F));
ZL_ERR_IF_GT(parsed->nbBits, parsed->eltWidth * 8, corruption);

size_t nbExtraElts = 0;
if (headerSize > 1) {
nbExtraElts = header[1];
}

const size_t maxNbElts = (packedSize * 8) / parsed->nbBits;
ZL_ERR_IF_GT(nbExtraElts, maxNbElts, corruption, "bitpack header corrupt");
parsed->numElts = maxNbElts - nbExtraElts;

return ZL_returnSuccess();
}

static ZL_Report
DI_bitpack_typed(ZL_Decoder* dictx, const ZL_Input* ins[], ZL_Type type)
{
Expand All @@ -21,39 +48,32 @@ DI_bitpack_typed(ZL_Decoder* dictx, const ZL_Input* ins[], ZL_Type type)
size_t srcSize = ZL_Input_numElts(in);

ZL_RBuffer const headerBuffer = ZL_Decoder_getCodecHeader(dictx);
ZL_ERR_IF_GT(headerBuffer.size, 2, header_unknown);
ZL_ERR_IF_LE(headerBuffer.size, 0, header_unknown);
uint8_t const header = *(uint8_t const*)headerBuffer.start;
bool const hasExtraSpace = headerBuffer.size > 1;
size_t const dstEltWidth = (size_t)1 << ((header >> 6) & 0x3);
int const nbBits = 1 + (header & 0x3F);

ZL_ERR_IF_GT((size_t)nbBits, dstEltWidth * 8, internalBuffer_tooSmall);
ZL_ERR_IF_LE(nbBits, 0, header_unknown);
ZL_BitpackHeader bpHeader;
ZL_ERR_IF_ERR(ZL_BitpackHeader_parse(
&bpHeader, headerBuffer.start, headerBuffer.size, srcSize));
if (type == ZL_Type_serial) {
ZL_ERR_IF_NE(dstEltWidth, 1, header_unknown, "Serialized has width 1!");
ZL_ERR_IF_NE(
bpHeader.eltWidth,
1,
header_unknown,
"Serialized has width 1!");
}

size_t dstNbElts;
if (hasExtraSpace) {
size_t const maxNbElts = (srcSize * 8) / (size_t)nbBits;
uint8_t const nbExtraElts = ((uint8_t const*)headerBuffer.start)[1];
ZL_ERR_IF_GT(
nbExtraElts, maxNbElts, corruption, "bitpack header corrupt");
dstNbElts = maxNbElts - nbExtraElts;
} else {
dstNbElts = (srcSize * 8) / (size_t)nbBits;
}
ZL_Output* const out =
ZL_Decoder_create1OutStream(dictx, dstNbElts, dstEltWidth);
ZL_Output* const out = ZL_Decoder_create1OutStream(
dictx, bpHeader.numElts, bpHeader.eltWidth);
ZL_ERR_IF_NULL(out, allocation);

size_t const srcConsumed = ZS_bitpackDecode(
ZL_Output_ptr(out), dstNbElts, dstEltWidth, src, srcSize, nbBits);
ZL_Output_ptr(out),
bpHeader.numElts,
bpHeader.eltWidth,
src,
srcSize,
(int)bpHeader.nbBits);
ZL_ERR_IF_NE(
srcConsumed, srcSize, corruption, "entire source not consumed");

ZL_ERR_IF_ERR(ZL_Output_commit(out, dstNbElts));
ZL_ERR_IF_ERR(ZL_Output_commit(out, bpHeader.numElts));

// Return the number of output streams.
return ZL_returnValue(1);
Expand Down
19 changes: 19 additions & 0 deletions src/openzl/codecs/bitpack/decode_bitpack_binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,25 @@

ZL_BEGIN_C_DECLS

/// Parsed bitpack header fields.
typedef struct {
size_t eltWidth; // Output element width in bytes (1, 2, 4, or 8)
size_t nbBits; // Number of bits per packed element (1..64)
size_t numElts; // Number of output elements
} ZL_BitpackHeader;

/// Parse a bitpack header and compute the number of output elements.
///
/// @param[out] parsed Receives the parsed header fields.
/// @param headerData Pointer to the raw header bytes (1 or 2 bytes).
/// @param headerSize Size of the header in bytes.
/// @param packedSize Size of the packed data stream in bytes.
ZL_Report ZL_BitpackHeader_parse(
ZL_BitpackHeader* parsed,
const void* headerData,
size_t headerSize,
size_t packedSize);

/* new methods, based on typedTransform */
ZL_Report DI_bitpack_numeric(ZL_Decoder* dictx, const ZL_Input* in[]);
ZL_Report DI_bitpack_serialized(ZL_Decoder* dictx, const ZL_Input* in[]);
Expand Down
18 changes: 18 additions & 0 deletions src/openzl/codecs/decoder_registry.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "openzl/codecs/parse_int/decode_parse_int_binding.h"
#include "openzl/codecs/parse_int/graph_parse_int.h"
#include "openzl/codecs/partition/decode_partition_binding.h"
#include "openzl/codecs/partition/decode_partition_bitpack_fusion.h"
#include "openzl/codecs/prefix/decode_prefix_binding.h"
#include "openzl/codecs/quantize/decode_quantize_binding.h"
#include "openzl/codecs/range_pack/decode_range_pack_binding.h"
Expand Down Expand Up @@ -169,4 +170,21 @@ const StandardDTransform SDecoders_array[ZL_StandardTransformID_end] = {
REGISTER_DEPRECATED_TTRANSFORM_G(ZL_StandardTransformID_huffman_deprecated, 3, 14, DI_HUFFMAN, PIPE_GRAPH),
REGISTER_DEPRECATED_TTRANSFORM_G(ZL_StandardTransformID_huffman_fixed_deprecated, 3, 14, DI_HUFFMAN_FIXED, FIXED_ENTROPY_GRAPH),
};

const ZL_DecoderFusionDesc ZL_DecoderFusion_array[ZL_DecoderFusionID_end] = {
[ZL_DecoderFusionID_partitionBitpack] = {
.pattern = {
.parentCodec = ZL_StandardTransformID_partition,
.numChildren = 1,
.children = (const ZL_DecoderFusionChild[]){
{
.codec = ZL_StandardTransformID_bitpack_int,
.numRegens = 1,
.parentIndices = (const uint32_t[]){ 0 },
}
},
},
.fusionFn = ZL_partitionBitpackFusedDecode,
},
};
// clang-format on
14 changes: 13 additions & 1 deletion src/openzl/codecs/decoder_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
#ifndef ZSTRONG_TRANSFORMS_DECODER_REGISTRY_H
#define ZSTRONG_TRANSFORMS_DECODER_REGISTRY_H

#include "openzl/common/wire_format.h" // ZL_StandardTransformID_end
#include "openzl/common/wire_format.h" // ZL_StandardTransformID_end
#include "openzl/decompress/decoder_fusion.h"
#include "openzl/decompress/dtransforms.h" // DTransform, DTrDesc
#include "openzl/shared/portability.h"

Expand All @@ -17,6 +18,17 @@ typedef struct {

extern const StandardDTransform SDecoders_array[ZL_StandardTransformID_end];

/// IDs for the built-in decoder fusions. New fusions should be added before
/// ZL_DecoderFusionID_end.
typedef enum {
ZL_DecoderFusionID_partitionBitpack,
ZL_DecoderFusionID_end,
} ZL_DecoderFusionID;

/// The built-in decoder fusion descriptors, indexed by ZL_DecoderFusionID.
extern const ZL_DecoderFusionDesc
ZL_DecoderFusion_array[ZL_DecoderFusionID_end];

ZL_END_C_DECLS

#endif
103 changes: 103 additions & 0 deletions src/openzl/codecs/partition/common_partition.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

#include "openzl/codecs/partition/common_partition.h"

#include "openzl/codecs/common/bitstream/ff_bitstream.h"
#include "openzl/codecs/zl_partition.h"
#include "openzl/shared/bits.h"
#include "openzl/shared/overflow.h"
#include "openzl/shared/utils.h"
#include "openzl/shared/varint.h"

bool ZL_PartitionParams_validate(const ZL_PartitionParams* params)
{
Expand Down Expand Up @@ -156,3 +158,104 @@ void ZL_PartitionParams_computeBasesU64(
bases[i] = bases[i - 1] + params->partitionSizes[i - 1];
}
}

uint64_t ZL_PartitionParams_getLargestPartitionSize(
const ZL_PartitionParams* params)
{
uint64_t max = 0;
for (size_t i = 0; i < params->numPartitions; ++i) {
max = ZL_MAX(max, params->partitionSizes[i]);
}
return max;
}

size_t ZL_PartitionParams_getNumTrailingZeros(const ZL_PartitionParams* params)
{
ZL_ASSERT(ZL_PartitionParams_validate(params));
int numTrailingZeros =
params->startValue == 0 ? 64 : ZL_ctz64(params->startValue);

for (size_t i = 0; i < params->numPartitions; ++i) {
numTrailingZeros =
ZL_MIN(numTrailingZeros, ZL_ctz64(params->partitionSizes[i]));
}
ZL_ASSERT_LT(numTrailingZeros, 64);
return (size_t)numTrailingZeros;
}

ZL_Report ZL_PartitionParams_parseHeader(
ZL_PartitionParams* params,
size_t* width,
const uint8_t* header,
size_t headerSize,
uint64_t* partitionSizesBuffer)
{
ZL_RESULT_DECLARE_SCOPE_REPORT(NULL);

ZL_ERR_IF_LT(headerSize, 1, corruption, "Empty header");
const uint8_t* hdr = header;
const uint8_t* const end = hdr + headerSize;
const uint8_t flags = *hdr++;

*width = 1u << (flags & 0x3);

if (flags & ZL_PARTITION_HEADER_IS_PRESET_BIT) {
const ZL_PartitionParamsPreset preset =
(ZL_PartitionParamsPreset)(flags >> 3);
ZL_PartitionParams const* const presetParams =
ZL_PartitionParams_getPreset(preset);
ZL_ERR_IF_NULL(presetParams, corruption);
*params = *presetParams;
return ZL_returnSuccess();
}

if (flags & ZL_PARTITION_HEADER_IS_FIRST_VALUE_ZERO_BIT) {
params->startValue = 0;
} else {
ZL_TRY_SET(uint64_t, params->startValue, ZL_varintDecode(&hdr, end));
}

if (flags & ZL_PARTITION_HEADER_IS_POW2_BIT) {
const size_t numBits = (size_t)(flags >> 6) + 3;
ZL_ERR_IF_EQ(hdr, end, corruption, "Missing partition sizes");
ZL_ERR_IF_EQ(
end[-1], 0, corruption, "Corrupted partition sizes bitstream");
const size_t unusedBits = 8 - (size_t)ZL_highbit32(end[-1]);
const size_t totalBits = 8 * (size_t)(end - hdr) - unusedBits;
ZL_ERR_IF_NE(
totalBits % numBits,
0,
corruption,
"bitstream size not multiple of numBits");
params->numPartitions = totalBits / numBits;

ZL_ERR_IF_GT(
params->numPartitions, ZL_PARTITION_MAX_PARTITIONS, corruption);

ZS_BitDStreamFF bitstream =
ZS_BitDStreamFF_init(hdr, (size_t)(end - hdr));
for (size_t i = 0; i < params->numPartitions; ++i) {
uint64_t const log2Size = ZS_BitDStreamFF_read(&bitstream, numBits);
partitionSizesBuffer[i] = 1ULL << log2Size;
ZS_BitDStreamFF_reload(&bitstream);
}

ZL_ERR_IF_ERR(ZS_BitDStreamFF_finish(&bitstream));
params->partitionSizes = partitionSizesBuffer;
} else {
params->numPartitions = 0;
while (hdr < end) {
ZL_ERR_IF_GE(
params->numPartitions,
ZL_PARTITION_MAX_PARTITIONS,
corruption);
ZL_TRY_SET(
uint64_t,
partitionSizesBuffer[params->numPartitions++],
ZL_varintDecode(&hdr, end));
}
params->partitionSizes = partitionSizesBuffer;
}
ZL_ERR_IF_NOT(ZL_PartitionParams_validate(params), corruption);
return ZL_returnSuccess();
}
38 changes: 38 additions & 0 deletions src/openzl/codecs/partition/common_partition.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,26 @@ void ZL_PartitionParams_computeBasesU64(
const ZL_PartitionParams* params,
uint64_t* bases);

/// @returns The largest partition size.
uint64_t ZL_PartitionParams_getLargestPartitionSize(
const ZL_PartitionParams* params);

/// @returns The number of trailing zeros shared by the start value and all the
/// partition sizes.
/// @note This is useful for encoding to reduce the size of the
/// offset->partition LUT. Offsets can be right shifted by this amount, because
/// partition boundaries can only happen at multiples of 2^NumTrailingZeros.
size_t ZL_PartitionParams_getNumTrailingZeros(const ZL_PartitionParams* params);

typedef struct {
void* opaque;
void* (*alloc)(void* opaque, size_t size);
} ZL_PartitionScratchAlloc;

/// The maximum partition size where encode & decode can unroll the loop 4 times
/// when reading and writing the offset bits.
#define ZL_PARTITION_MAX_PARTITION_SIZE_FOR_UNROLL4 (1u << 14)

/// Header flag bits for the partition codec header byte.
/// Bits [1:0]: log2(element width in bytes).

Expand All @@ -55,6 +75,24 @@ void ZL_PartitionParams_computeBasesU64(
/// Bit 5: all partition sizes are powers of 2.
#define ZL_PARTITION_HEADER_IS_POW2_BIT 0x20

/// Parse partition parameters from a codec header buffer.
/// For presets, @p partitionSizesBuffer is unused and params->partitionSizes
/// points to static data. For non-presets, @p partitionSizesBuffer must have
/// at least ZL_PARTITION_MAX_PARTITIONS entries, and params->partitionSizes
/// will point into it.
/// @param[out] params Parsed partition parameters.
/// @param[out] width Output element width in bytes (1, 2, 4, or 8).
/// @param[in] header Pointer to the codec header bytes.
/// @param[in] headerSize Size of the codec header in bytes.
/// @param[out] partitionSizesBuffer Scratch buffer for non-preset partition
/// sizes (at least ZL_PARTITION_MAX_PARTITIONS entries).
ZL_Report ZL_PartitionParams_parseHeader(
ZL_PartitionParams* params,
size_t* width,
const uint8_t* header,
size_t headerSize,
uint64_t* partitionSizesBuffer);

ZL_END_C_DECLS

#endif
Loading
Loading