Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,27 @@ zs_library(
],
)

zs_library(
name = "dict",
srcs = glob([
"src/openzl/dict/**/*.c",
]),
headers = private_headers(glob([
"src/openzl/dict/**/*.h",
])),
header_namespace = "",
exported_deps = [
":common",
],
)

zs_library(
name = "zstronglib",
exported_deps = [
":common",
":compress",
":decompress",
":dict",
],
exported_external_deps = [
"zstd",
Expand Down
4 changes: 4 additions & 0 deletions include/openzl/detail/zl_errors_detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ extern "C" {
#define ZL_ErrorCode_srcSize_tooLarge__desc_str "Source size too large"
#define ZL_ErrorCode_integerOverflow__desc_str "Integer overflow"
#define ZL_ErrorCode_invalidName__desc_str "Invalid name of graph component"
#define ZL_ErrorCode_dict_corruption__desc_str \
"Dictionary corruption: raw blob does not match expected wire format"
#define ZL_ErrorCode_dict_materialization__desc_str \
"Dictionary materialization failed"

/**********************
* ZL_StaticErrorInfo *
Expand Down
3 changes: 3 additions & 0 deletions include/openzl/zl_errors_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ typedef enum {
ZL_ErrorCode_formatVersion_unsupported = 60,
ZL_ErrorCode_formatVersion_notSet = 61,
ZL_ErrorCode_node_versionMismatch = 62,
/* dictionary errors */
ZL_ErrorCode_dict_corruption = 65,
ZL_ErrorCode_dict_materialization = 66,
/* internal errors */
ZL_ErrorCode_allocation = 70,
ZL_ErrorCode_internalBuffer_tooSmall = 71,
Expand Down
4 changes: 4 additions & 0 deletions src/openzl/common/errors.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ const char* ZL_ErrorCode_toString(ZL_ErrorCode code)
return ZL_ErrorCode_srcSize_tooLarge__desc_str;
case ZL_ErrorCode_integerOverflow:
return ZL_ErrorCode_integerOverflow__desc_str;
case ZL_ErrorCode_dict_corruption:
return ZL_ErrorCode_dict_corruption__desc_str;
case ZL_ErrorCode_dict_materialization:
return ZL_ErrorCode_dict_materialization__desc_str;
case ZL_ErrorCode_maxCode:
default:
ZL_ASSERT_FAIL("Invalid error code!: %d", (int)code);
Expand Down
4 changes: 4 additions & 0 deletions src/openzl/common/limits.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ size_t ZL_transformOutStreamsLimit(unsigned formatVersion);
/// Size limit for the variable sized comment field
#define ZL_MAX_HEADER_COMMENT_SIZE_LIMIT 10000

/// Registering more than this number of dictionaries on a single
/// compressor/dctx will fail.
#define ZL_DICTIONARY_LIMIT 1000

////////////////////////////////////////
// Compressor Serialization Limits
////////////////////////////////////////
Expand Down
41 changes: 41 additions & 0 deletions src/openzl/dict/dict.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.

#include "openzl/dict/dict.h"

#include <string.h>

#include "openzl/common/errors_internal.h"

const uint32_t ZL_DICT_MAGIC = 0x5A4C4449; // "ZLDI"

ZL_RESULT_OF(ZL_ParsedDict)
ZL_Dict_parse(
ZL_OperationContext* opctx,
const void* dictBlob,
const size_t blobSize)
{
ZL_ASSERT_NN(opctx);
ZL_RESULT_DECLARE_SCOPE(ZL_ParsedDict, opctx);
ZL_ERR_IF_NULL(dictBlob, dict_materialization, "dictBlob cannot be NULL");
ZL_ERR_IF_LT(
blobSize,
3 * sizeof(uint32_t),
dict_corruption,
"dict blob must contain metadata fields");
ZL_ERR_IF_NE(
memcmp(dictBlob, &ZL_DICT_MAGIC, sizeof(ZL_DICT_MAGIC)),
0,
dict_corruption,
"invalid dict magic");
ZL_ParsedDict ret;
ret.codecId = ((const uint32_t*)dictBlob)[1];
ret.dictSize = ((const uint32_t*)dictBlob)[2];
ZL_ERR_IF_NE(
blobSize,
ret.dictSize + 3 * sizeof(uint32_t),
dict_corruption,
"Dict blob size mismatch");
ret.rawDictContent = (const uint8_t*)dictBlob + 3 * sizeof(uint32_t);
ret.hash = ZL_SHA256_compute(ret.rawDictContent, ret.dictSize);
return ZL_WRAP_VALUE(ret);
}
60 changes: 60 additions & 0 deletions src/openzl/dict/dict.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.

#ifndef OPENZL_DICT_DICT_H
#define OPENZL_DICT_DICT_H

#include "openzl/common/allocation.h"
#include "openzl/dict/sha256.h"
#include "openzl/zl_errors.h"
#include "openzl/zl_opaque_types.h" // ZL_NodeID

#if defined(__cplusplus)
extern "C" {
#endif

// ============================================================================
// ZL_Dict - Single Dictionary Structure
// ============================================================================

// Intermediate structure useful for routing the dict to the correct node's
// materializer. Does NOT own any pointers. Ensure the raw dict blob outlives
// this struct.
typedef struct {
ZL_IDType codecId;
ZL_SHA256 hash;
uint32_t dictSize;
const void* rawDictContent; // NON-owning pointer
} ZL_ParsedDict;
ZL_RESULT_DECLARE_TYPE(ZL_ParsedDict);

/**
* @brief Single dictionary structure
*
* NOTE: ZL_Dict is the MATERIALIZED dictionary object, not the serialized
* wire representation. It is created from raw dictionary bytes via
* ZL_Dict_create() or via context-specific materialization functions.
*/
typedef struct {
ZL_IDType codecId; // Codec responsible for materializing/dematerializing
ZL_SHA256 hash; // Precomputed SHA-256 hash of dictContent
void (*dematerializeFn)(void* dictObj);
void* dictObj; // Materialized data (caller-allocated or
// arena-allocated)
} ZL_Dict;

/**
* Does basic validation and generates an intermediate representation of the
* dict blob. Used by the ZL_Compressor and ZL_DCtx, which are expected to then
* call the proper node materializer to generate the full ZL_Dict structure.
*/
ZL_RESULT_OF(ZL_ParsedDict)
ZL_Dict_parse(
ZL_OperationContext* opctx,
const void* dictBlob,
const size_t blobSize);

#if defined(__cplusplus)
} // extern "C"
#endif

#endif // OPENZL_DICT_DICT_H
82 changes: 82 additions & 0 deletions src/openzl/dict/dictbundle.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.

#include "openzl/dict/dictbundle.h"

#include <stdint.h>
#include <string.h>

#include "openzl/common/allocation.h"

const uint32_t ZL_DICTBUNDLE_COMP_MAGIC =
0x5A4C4243; // "ZLBC" (compression-only)
const uint32_t ZL_DICTBUNDLE_BIDI_MAGIC = 0x5A4C4249; // "ZLBI" (bidirectional)

// ============================================================================
// ZL_DictBundle Implementation
// ============================================================================

ZL_RESULT_OF(ZL_DictBundlePtr)
DictBundle_create(
ZL_OperationContext* opctx,
Arena* arena,
const void* dictBlob,
const size_t blobSize)
{
ZL_RESULT_DECLARE_SCOPE(ZL_DictBundlePtr, opctx);
ZL_ASSERT_NN(arena);
ZL_ASSERT_NN(dictBlob);

ZL_ERR_IF_LT(
blobSize,
sizeof(uint32_t) * 2 + sizeof(ZL_SHA256),
dict_materialization,
"DictBundle blob too small for header");

const char* ptr = (const char*)dictBlob;

uint32_t magic;
memcpy(&magic, ptr, sizeof(uint32_t));
ptr += sizeof(uint32_t);

ZL_DictBundle* bundle = ALLOC_Arena_malloc(arena, sizeof(ZL_DictBundle));
ZL_ERR_IF_NULL(bundle, allocation, "Failed to allocate ZL_DictBundle");
if (magic == ZL_DICTBUNDLE_COMP_MAGIC) {
bundle->compressionOnly = true;
} else if (magic == ZL_DICTBUNDLE_BIDI_MAGIC) {
bundle->compressionOnly = false;
} else {
ZL_ERR(dict_corruption, "Invalid dict bundle magic");
}

memcpy(&bundle->bundleId, ptr, sizeof(ZL_SHA256));
ptr += sizeof(ZL_SHA256);

memcpy(&bundle->numDicts, ptr, sizeof(uint32_t));
ptr += sizeof(uint32_t);

ZL_ERR_IF_NE(
blobSize,
sizeof(uint32_t) * 2 + sizeof(ZL_SHA256)
+ bundle->numDicts * sizeof(ZL_SHA256),
dict_corruption,
"DictBundle blob size mismatch");

bundle->dicts =
ALLOC_Arena_malloc(arena, sizeof(ZL_SHA256) * bundle->numDicts);
ZL_ERR_IF_NULL(
bundle->dicts,
allocation,
"Failed to allocate ZL_DictBundle dicts");
memcpy(bundle->dicts, ptr, sizeof(ZL_SHA256) * bundle->numDicts);

return ZL_WRAP_VALUE(bundle);
}

void DictBundle_free(Arena* arena, ZL_DictBundle* bundle)
{
if (bundle == NULL) {
return;
}
ALLOC_Arena_free(arena, bundle->dicts);
ALLOC_Arena_free(arena, bundle);
}
45 changes: 45 additions & 0 deletions src/openzl/dict/dictbundle.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.

#ifndef OPENZL_DICT_DICTBUNDLE_H
#define OPENZL_DICT_DICTBUNDLE_H

#include <stddef.h> // size_t

#include "openzl/common/allocation.h"
#include "openzl/common/errors_internal.h"
#include "openzl/dict/sha256.h"

#if defined(__cplusplus)
extern "C" {
#endif

typedef struct {
ZL_SHA256 bundleId;
bool compressionOnly;
size_t numDicts;
ZL_SHA256* dicts;
} ZL_DictBundle;

typedef ZL_DictBundle* ZL_DictBundlePtr;
ZL_RESULT_DECLARE_TYPE(ZL_DictBundlePtr);

size_t ZL_DictBundle_numDicts(const ZL_DictBundle* bundle);
ZL_SHA256* ZL_DictBundle_dictHashes(const ZL_DictBundle* bundle);

/**
* Parse and allocate a bundle from the raw blob. Does safety checks.
*/
ZL_RESULT_OF(ZL_DictBundlePtr)
DictBundle_create(
ZL_OperationContext* opctx,
Arena* arena,
const void* dictBlob,
const size_t blobSize);

void DictBundle_free(Arena* arena, ZL_DictBundle* bundle);

#if defined(__cplusplus)
} // extern "C"
#endif

#endif // OPENZL_DICT_DICTBUNDLE_H
Loading
Loading