From 735f4db79a4a794ac24ba37a88b24078843ce8a5 Mon Sep 17 00:00:00 2001 From: Harry Mander Date: Thu, 19 Jun 2025 16:12:56 +1200 Subject: [PATCH 1/3] Raise TypeError when type annotated with Field that doesn't support it E.g. Annotated[float, dcs.NativeIntField(...)] --- dataclasses_struct/dataclass.py | 4 +++- test/test_fields.py | 10 +++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/dataclasses_struct/dataclass.py b/dataclasses_struct/dataclass.py index fa5b426..3456859 100644 --- a/dataclasses_struct/dataclass.py +++ b/dataclasses_struct/dataclass.py @@ -436,7 +436,9 @@ def _resolve_field( if opt_field is None: raise TypeError(f"type not supported: {annotation}") field = opt_field - elif isinstance(annotation_arg, Field): + elif isinstance(annotation_arg, Field) and issubclass( + type_, annotation_arg.field_type + ): field = annotation_arg elif get_origin(type_) is list: item_annotations = get_args(type_) diff --git a/test/test_fields.py b/test/test_fields.py index 7568b4e..2410b21 100644 --- a/test/test_fields.py +++ b/test/test_fields.py @@ -237,7 +237,7 @@ class _: @parametrize_all_sizes_and_byteorders() -def test_invalid_annotated_fails(byteorder, size) -> None: +def test_type_annotated_with_invalid_type_fails(byteorder, size) -> None: with raises_invalid_field_annotation(): @dcs.dataclass_struct(byteorder=byteorder, size=size) @@ -245,6 +245,14 @@ class _: x: Annotated[int, dcs.I64] +def test_type_annotated_with_unsupported_field_type_fails() -> None: + with raises_invalid_field_annotation(): + + @dcs.dataclass_struct() + class _: + x: Annotated[float, dcs.NativeIntField("i", "int")] + + @parametrize_all_sizes_and_byteorders() def test_bytes_with_too_many_annotations_fails(byteorder, size) -> None: with pytest.raises(TypeError, match=r"^too many annotations: 12$"): From 7d93957a3de58944d383468f8d6d432a2ed5ab5b Mon Sep 17 00:00:00 2001 From: Harry Mander Date: Thu, 19 Jun 2025 16:29:28 +1200 Subject: [PATCH 2/3] Add support for length-prefixed bytes arrays ('Pascal strings') --- dataclasses_struct/__init__.py | 2 + dataclasses_struct/types.py | 45 ++++++++++++++++++++++ test/test_fields.py | 67 ++++++++++++++++++++++++++++++++- test/test_pack_unpack.py | 68 ++++++++++++++++++++++++++++++++++ test/test_validation.py | 21 +++++++++++ 5 files changed, 202 insertions(+), 1 deletion(-) diff --git a/dataclasses_struct/__init__.py b/dataclasses_struct/__init__.py index 3fdb1c4..b1ef547 100644 --- a/dataclasses_struct/__init__.py +++ b/dataclasses_struct/__init__.py @@ -36,6 +36,7 @@ Bool, Char, Int, + LengthPrefixed, Long, LongLong, PadAfter, @@ -73,6 +74,7 @@ "FloatingPointField", "Int", "IntField", + "LengthPrefixed", "Long", "LongLong", "NativeIntField", diff --git a/dataclasses_struct/types.py b/dataclasses_struct/types.py index fc41e69..d251df2 100644 --- a/dataclasses_struct/types.py +++ b/dataclasses_struct/types.py @@ -99,6 +99,51 @@ Supported in both size modes.""" +class LengthPrefixed(field.Field[bytes]): + """ + Length-prefixed byte array, also known as a 'Pascal string'. + + Packed to a fixed-length array of bytes, where the first byte is the length + of the data. Data shorter than the maximum size is padded with zero bytes. + + Must be used to annotate a `bytes` field with `typing.Annotated`: + + ```python + import dataclasses_struct as dcs + + @dcs.dataclass_struct() + class Example: + fixed_length: Annotated[bytes, dcs.LengthPrefixed(10)] + ``` + + Args: + size: The maximum size of the string including the length byte. Must be + between 2 and 256 inclusive. The maximum array length that can be + stored without truncation is `size - 1`. + + Raises: + ValueError: If `size` is outside the valid range. + """ + + field_type = bytes + + def __init__(self, size: int): + if not (isinstance(size, int) and 2 <= size <= 256): + raise ValueError("size must be an int between 2 and 256") + self.size = size + + def format(self) -> str: + return f"{self.size}p" + + def __repr__(self) -> str: + return f"{type(self).__name__}({self.size})" + + def validate_default(self, val: bytes) -> None: + if len(val) > self.size - 1: + msg = f"bytes cannot be longer than {self.size - 1} bytes" + raise ValueError(msg) + + class _Padding: before: bool diff --git a/test/test_fields.py b/test/test_fields.py index 2410b21..e178f55 100644 --- a/test/test_fields.py +++ b/test/test_fields.py @@ -1,5 +1,6 @@ import dataclasses import itertools +import re from contextlib import contextmanager from re import escape from typing import Annotated @@ -253,15 +254,79 @@ class _: x: Annotated[float, dcs.NativeIntField("i", "int")] +@contextmanager +def raises_too_many_annotations_error(extra: object): + extra = re.escape(str(extra)) + with pytest.raises(TypeError, match=rf"^too many annotations: {extra}$"): + yield + + @parametrize_all_sizes_and_byteorders() def test_bytes_with_too_many_annotations_fails(byteorder, size) -> None: - with pytest.raises(TypeError, match=r"^too many annotations: 12$"): + with raises_too_many_annotations_error(12): @dcs.dataclass_struct(byteorder=byteorder, size=size) class _: x: Annotated[bytes, 1, 12] +def test_length_prefixed_bytes_format() -> None: + @dcs.dataclass_struct() + class T: + x: Annotated[bytes, dcs.LengthPrefixed(256)] + + assert T.__dataclass_struct__.format[1:] == "256p" + + +@parametrize_all_sizes_and_byteorders() +def test_length_prefixed_bytes_has_same_size_as_length( + byteorder, size +) -> None: + @dcs.dataclass_struct(size=size, byteorder=byteorder) + class T: + x: Annotated[bytes, dcs.LengthPrefixed(256)] + + assert dcs.get_struct_size(T) == 256 + + +@pytest.mark.parametrize("size", (1, 257, "100", 100.0)) +def test_length_prefixed_bytes_invalid_size_fails(size: int): + with pytest.raises( + ValueError, + match=r"^size must be an int between 2 and 256$", + ): + + @dcs.dataclass_struct() + class _: + x: Annotated[bytes, dcs.LengthPrefixed(size)] + + +def test_length_prefixed_bytes_fails_when_annotating_non_bytes_type() -> None: + with raises_invalid_field_annotation(): + + @dcs.dataclass_struct() + class _: + x: Annotated[int, dcs.LengthPrefixed(100)] + + +def test_bytes_annotated_with_integer_and_length_prefixed_bytes_fails() -> ( + None +): + with raises_too_many_annotations_error(100): + + @dcs.dataclass_struct() + class _: + x: Annotated[int, dcs.LengthPrefixed(100), 100] + + +def test_bytes_annotated_with_multiple_length_prefixed_bytess_fails() -> None: + with raises_too_many_annotations_error("LengthPrefixed(100)"): + + @dcs.dataclass_struct() + class _: + x: Annotated[int, dcs.LengthPrefixed(100), dcs.LengthPrefixed(100)] + + def parametrize_all_size_and_byteorder_combinations() -> pytest.MarkDecorator: """ All combinations of size and byteorder, including invalid combinations. diff --git a/test/test_pack_unpack.py b/test/test_pack_unpack.py index c0ffa5c..455a681 100644 --- a/test/test_pack_unpack.py +++ b/test/test_pack_unpack.py @@ -136,6 +136,74 @@ class T: assert unpacked.x == b"123\0\0" +@parametrize_all_sizes_and_byteorders() +def test_packed_length_prefixed_bytes_shorter_than_size_is_zero_padded( + size, byteorder +) -> None: + @dcs.dataclass_struct(size=size, byteorder=byteorder) + class T: + x: Annotated[bytes, dcs.LengthPrefixed(5)] + + packed = T(b"123").pack() + assert packed == b"\x03123\x00" + + +@parametrize_all_sizes_and_byteorders() +def test_packed_length_prefixed_bytes_greater_than_size_is_truncated( + size, byteorder +) -> None: + @dcs.dataclass_struct(size=size, byteorder=byteorder) + class T: + x: Annotated[bytes, dcs.LengthPrefixed(5)] + + packed = T(b"1234").pack() + assert packed == b"\x041234" + + +@parametrize_all_sizes_and_byteorders() +def test_pack_unpack_empty_length_prefixed_bytes(size, byteorder) -> None: + @dcs.dataclass_struct(size=size, byteorder=byteorder) + class T: + x: Annotated[bytes, dcs.LengthPrefixed(5)] + + packed = T(b"").pack() + assert T.from_packed(packed) == T(b"") + + +@parametrize_all_sizes_and_byteorders() +def test_pack_unpack_length_prefixed_bytes_shorter_than_size( + size, byteorder +) -> None: + @dcs.dataclass_struct(size=size, byteorder=byteorder) + class T: + x: Annotated[bytes, dcs.LengthPrefixed(5)] + + packed = T(b"123").pack() + assert T.from_packed(packed) == T(b"123") + + +@parametrize_all_sizes_and_byteorders() +def test_pack_unpack_length_prefixed_bytes_exact_size(size, byteorder) -> None: + @dcs.dataclass_struct(size=size, byteorder=byteorder) + class T: + x: Annotated[bytes, dcs.LengthPrefixed(5)] + + packed = T(b"1234").pack() + assert T.from_packed(packed) == T(b"1234") + + +@parametrize_all_sizes_and_byteorders() +def test_pack_unpack_length_prefixed_bytes_longer_than_size( + size, byteorder +) -> None: + @dcs.dataclass_struct(size=size, byteorder=byteorder) + class T: + x: Annotated[bytes, dcs.LengthPrefixed(5)] + + packed = T(b"12345").pack() + assert T.from_packed(packed) == T(b"1234") + + @parametrize_all_sizes_and_byteorders() def test_pack_unpack_nested(size, byteorder) -> None: @dcs.dataclass_struct(size=size, byteorder=byteorder) diff --git a/test/test_validation.py b/test/test_validation.py index ad8dc5c..5e0ef45 100644 --- a/test/test_validation.py +++ b/test/test_validation.py @@ -234,6 +234,27 @@ class _: x: Annotated[bytes, 8] = b"123456789" +@pytest.mark.parametrize("default", (b"", b"123", b"1234")) +def test_length_prefixed_bytes_default(default: bytes) -> None: + @dcs.dataclass_struct() + class T: + x: Annotated[bytes, dcs.LengthPrefixed(5)] = default + + t = T() + assert t.x == default + + +def test_length_prefixed_bytes_default_too_long_fails() -> None: + with pytest.raises( + ValueError, + match=r"^bytes cannot be longer than 4 bytes$", + ): + + @dcs.dataclass_struct() + class _: + x: Annotated[bytes, dcs.LengthPrefixed(5)] = b"12345" + + @parametrize_all_sizes_and_byteorders() @parametrize_fields(float_fields, "float_field") @pytest.mark.parametrize("default", (10, 10.12)) From dd2982d35b9061e613de320963424bb7aeb6cb61 Mon Sep 17 00:00:00 2001 From: Harry Mander Date: Thu, 19 Jun 2025 17:16:21 +1200 Subject: [PATCH 3/3] docs: document length-prefixed bytes arrays and null-terminated strings --- docs/guide.md | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 2 ++ 2 files changed, 67 insertions(+) diff --git a/docs/guide.md b/docs/guide.md index fc7ab15..0c5d8c1 100644 --- a/docs/guide.md +++ b/docs/guide.md @@ -269,6 +269,8 @@ class Chars: ### Bytes arrays +#### Fixed-length + Fixed-length byte arrays can be represented in both size modes by annotating a field with `typing.Annotated` and a positive length. The field's unpacked Python representation will be a `bytes` object zero-padded or truncated to the @@ -287,6 +289,69 @@ class FixedLength: FixedLength(fixed=b'Hello, wor') ``` +!!! tip "Tip: null-terminated strings" + + Fixed-length `bytes` arrays are truncated to the exact length specified in + the `Annotated` argument. If you require `bytes` arrays to always be + null-terminated (e.g. for passing to a C API), add a [`PadAfter` + annotation](#manual-padding) to the field: + + ```python + @dcs.dataclass_struct() + class FixedLengthNullTerminated: + # Equivalent to `unsigned char[11]` in C + fixed: Annotated[bytes, 10, dcs.PadAfter(1)] + ``` + + ```python + >>> FixedLengthNullTerminated(b"0123456789A").pack() + b'0123456789\x00' + ``` + +#### Length-prefixed + +One issue with fixed-length `bytes` arrays is that data shorter than the length +will be zero-padded when unpacking to the Python type: + +```python +>>> packed = FixedLength(b'Hello').pack() +>>> packed +b'Hello\x00\x00\x00\x00\x00' +>>> FixedLength.from_packed(packed) +FixedLength(fixed=b'Hello\x00\x00\x00\x00\x00') +``` + +An alternative is to use *length-prefixed arrays*, also known as [*Pascal +strings*](https://en.wikipedia.org/wiki/Pascal_string). These store the length +of the array in the first byte, meaning that the available length without +truncation is 255. To use length-prefixed arrays, annotate a `bytes` with +[`LengthPrefixed`][dataclasses_struct.LengthPrefixed]: + +```python +from typing import Annotated + +@dcs.dataclass_struct() +class PascalStrings: + s: Annotated[bytes, dcs.LengthPrefixed(10)] # (1)! +``` + +1. The length passed to `LengthPrefixed` must be between 2 and 256 inclusive. + +```python +>>> packed = PascalStrings(b"12345").pack() +>>> packed +b'\x05Hello\x00\x00\x00\x00' +>>> PascalStrings.from_packed(packed) +PascalStrings(s=b'Hello') +``` + +!!! note + + The size passed to [`LengthPrefixed`][dataclasses_struct.LengthPrefixed] is + the size of the packed representation of the field *including the size + byte*, so the maximum length the array can be without truncation is one less + than the size. + ### Fixed-length arrays Fixed-length arrays can be represented by annotating a `list` field with diff --git a/mkdocs.yml b/mkdocs.yml index ec9b557..b24a933 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -40,6 +40,8 @@ markdown_extensions: - pymdownx.inlinehilite - pymdownx.snippets - pymdownx.superfences + - pymdownx.details - pymdownx.magiclink + - admonition watch: - dataclasses_struct