Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dataclasses_struct/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
Bool,
Char,
Int,
LengthPrefixed,
Long,
LongLong,
PadAfter,
Expand Down Expand Up @@ -73,6 +74,7 @@
"FloatingPointField",
"Int",
"IntField",
"LengthPrefixed",
"Long",
"LongLong",
"NativeIntField",
Expand Down
4 changes: 3 additions & 1 deletion dataclasses_struct/dataclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,9 @@ def _resolve_field(
if opt_field is None:
raise TypeError(f"type not supported: {annotation}")
field = opt_field
elif isinstance(annotation_arg, Field):
elif isinstance(annotation_arg, Field) and issubclass(
type_, annotation_arg.field_type
):
field = annotation_arg
elif get_origin(type_) is list:
item_annotations = get_args(type_)
Expand Down
45 changes: 45 additions & 0 deletions dataclasses_struct/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,51 @@
Supported in both size modes."""


class LengthPrefixed(field.Field[bytes]):
"""
Length-prefixed byte array, also known as a 'Pascal string'.

Packed to a fixed-length array of bytes, where the first byte is the length
of the data. Data shorter than the maximum size is padded with zero bytes.

Must be used to annotate a `bytes` field with `typing.Annotated`:

```python
import dataclasses_struct as dcs

@dcs.dataclass_struct()
class Example:
fixed_length: Annotated[bytes, dcs.LengthPrefixed(10)]
```

Args:
size: The maximum size of the string including the length byte. Must be
between 2 and 256 inclusive. The maximum array length that can be
stored without truncation is `size - 1`.

Raises:
ValueError: If `size` is outside the valid range.
"""

field_type = bytes

def __init__(self, size: int):
if not (isinstance(size, int) and 2 <= size <= 256):
raise ValueError("size must be an int between 2 and 256")
self.size = size

def format(self) -> str:
return f"{self.size}p"

def __repr__(self) -> str:
return f"{type(self).__name__}({self.size})"

def validate_default(self, val: bytes) -> None:
if len(val) > self.size - 1:
msg = f"bytes cannot be longer than {self.size - 1} bytes"
raise ValueError(msg)


class _Padding:
before: bool

Expand Down
65 changes: 65 additions & 0 deletions docs/guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ class Chars:

### Bytes arrays

#### Fixed-length

Fixed-length byte arrays can be represented in both size modes by annotating a
field with `typing.Annotated` and a positive length. The field's unpacked Python
representation will be a `bytes` object zero-padded or truncated to the
Expand All @@ -287,6 +289,69 @@ class FixedLength:
FixedLength(fixed=b'Hello, wor')
```

!!! tip "Tip: null-terminated strings"

Fixed-length `bytes` arrays are truncated to the exact length specified in
the `Annotated` argument. If you require `bytes` arrays to always be
null-terminated (e.g. for passing to a C API), add a [`PadAfter`
annotation](#manual-padding) to the field:

```python
@dcs.dataclass_struct()
class FixedLengthNullTerminated:
# Equivalent to `unsigned char[11]` in C
fixed: Annotated[bytes, 10, dcs.PadAfter(1)]
```

```python
>>> FixedLengthNullTerminated(b"0123456789A").pack()
b'0123456789\x00'
```

#### Length-prefixed

One issue with fixed-length `bytes` arrays is that data shorter than the length
will be zero-padded when unpacking to the Python type:

```python
>>> packed = FixedLength(b'Hello').pack()
>>> packed
b'Hello\x00\x00\x00\x00\x00'
>>> FixedLength.from_packed(packed)
FixedLength(fixed=b'Hello\x00\x00\x00\x00\x00')
```

An alternative is to use *length-prefixed arrays*, also known as [*Pascal
strings*](https://en.wikipedia.org/wiki/Pascal_string). These store the length
of the array in the first byte, meaning that the available length without
truncation is 255. To use length-prefixed arrays, annotate a `bytes` with
[`LengthPrefixed`][dataclasses_struct.LengthPrefixed]:

```python
from typing import Annotated

@dcs.dataclass_struct()
class PascalStrings:
s: Annotated[bytes, dcs.LengthPrefixed(10)] # (1)!
```

1. The length passed to `LengthPrefixed` must be between 2 and 256 inclusive.

```python
>>> packed = PascalStrings(b"12345").pack()
>>> packed
b'\x05Hello\x00\x00\x00\x00'
>>> PascalStrings.from_packed(packed)
PascalStrings(s=b'Hello')
```

!!! note

The size passed to [`LengthPrefixed`][dataclasses_struct.LengthPrefixed] is
the size of the packed representation of the field *including the size
byte*, so the maximum length the array can be without truncation is one less
than the size.

### Fixed-length arrays

Fixed-length arrays can be represented by annotating a `list` field with
Expand Down
2 changes: 2 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ markdown_extensions:
- pymdownx.inlinehilite
- pymdownx.snippets
- pymdownx.superfences
- pymdownx.details
- pymdownx.magiclink
- admonition
watch:
- dataclasses_struct
77 changes: 75 additions & 2 deletions test/test_fields.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import dataclasses
import itertools
import re
from contextlib import contextmanager
from re import escape
from typing import Annotated
Expand Down Expand Up @@ -237,23 +238,95 @@ class _:


@parametrize_all_sizes_and_byteorders()
def test_invalid_annotated_fails(byteorder, size) -> None:
def test_type_annotated_with_invalid_type_fails(byteorder, size) -> None:
with raises_invalid_field_annotation():

@dcs.dataclass_struct(byteorder=byteorder, size=size)
class _:
x: Annotated[int, dcs.I64]


def test_type_annotated_with_unsupported_field_type_fails() -> None:
with raises_invalid_field_annotation():

@dcs.dataclass_struct()
class _:
x: Annotated[float, dcs.NativeIntField("i", "int")]


@contextmanager
def raises_too_many_annotations_error(extra: object):
extra = re.escape(str(extra))
with pytest.raises(TypeError, match=rf"^too many annotations: {extra}$"):
yield


@parametrize_all_sizes_and_byteorders()
def test_bytes_with_too_many_annotations_fails(byteorder, size) -> None:
with pytest.raises(TypeError, match=r"^too many annotations: 12$"):
with raises_too_many_annotations_error(12):

@dcs.dataclass_struct(byteorder=byteorder, size=size)
class _:
x: Annotated[bytes, 1, 12]


def test_length_prefixed_bytes_format() -> None:
@dcs.dataclass_struct()
class T:
x: Annotated[bytes, dcs.LengthPrefixed(256)]

assert T.__dataclass_struct__.format[1:] == "256p"


@parametrize_all_sizes_and_byteorders()
def test_length_prefixed_bytes_has_same_size_as_length(
byteorder, size
) -> None:
@dcs.dataclass_struct(size=size, byteorder=byteorder)
class T:
x: Annotated[bytes, dcs.LengthPrefixed(256)]

assert dcs.get_struct_size(T) == 256


@pytest.mark.parametrize("size", (1, 257, "100", 100.0))
def test_length_prefixed_bytes_invalid_size_fails(size: int):
with pytest.raises(
ValueError,
match=r"^size must be an int between 2 and 256$",
):

@dcs.dataclass_struct()
class _:
x: Annotated[bytes, dcs.LengthPrefixed(size)]


def test_length_prefixed_bytes_fails_when_annotating_non_bytes_type() -> None:
with raises_invalid_field_annotation():

@dcs.dataclass_struct()
class _:
x: Annotated[int, dcs.LengthPrefixed(100)]


def test_bytes_annotated_with_integer_and_length_prefixed_bytes_fails() -> (
None
):
with raises_too_many_annotations_error(100):

@dcs.dataclass_struct()
class _:
x: Annotated[int, dcs.LengthPrefixed(100), 100]


def test_bytes_annotated_with_multiple_length_prefixed_bytess_fails() -> None:
with raises_too_many_annotations_error("LengthPrefixed(100)"):

@dcs.dataclass_struct()
class _:
x: Annotated[int, dcs.LengthPrefixed(100), dcs.LengthPrefixed(100)]


def parametrize_all_size_and_byteorder_combinations() -> pytest.MarkDecorator:
"""
All combinations of size and byteorder, including invalid combinations.
Expand Down
68 changes: 68 additions & 0 deletions test/test_pack_unpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,74 @@ class T:
assert unpacked.x == b"123\0\0"


@parametrize_all_sizes_and_byteorders()
def test_packed_length_prefixed_bytes_shorter_than_size_is_zero_padded(
size, byteorder
) -> None:
@dcs.dataclass_struct(size=size, byteorder=byteorder)
class T:
x: Annotated[bytes, dcs.LengthPrefixed(5)]

packed = T(b"123").pack()
assert packed == b"\x03123\x00"


@parametrize_all_sizes_and_byteorders()
def test_packed_length_prefixed_bytes_greater_than_size_is_truncated(
size, byteorder
) -> None:
@dcs.dataclass_struct(size=size, byteorder=byteorder)
class T:
x: Annotated[bytes, dcs.LengthPrefixed(5)]

packed = T(b"1234").pack()
assert packed == b"\x041234"


@parametrize_all_sizes_and_byteorders()
def test_pack_unpack_empty_length_prefixed_bytes(size, byteorder) -> None:
@dcs.dataclass_struct(size=size, byteorder=byteorder)
class T:
x: Annotated[bytes, dcs.LengthPrefixed(5)]

packed = T(b"").pack()
assert T.from_packed(packed) == T(b"")


@parametrize_all_sizes_and_byteorders()
def test_pack_unpack_length_prefixed_bytes_shorter_than_size(
size, byteorder
) -> None:
@dcs.dataclass_struct(size=size, byteorder=byteorder)
class T:
x: Annotated[bytes, dcs.LengthPrefixed(5)]

packed = T(b"123").pack()
assert T.from_packed(packed) == T(b"123")


@parametrize_all_sizes_and_byteorders()
def test_pack_unpack_length_prefixed_bytes_exact_size(size, byteorder) -> None:
@dcs.dataclass_struct(size=size, byteorder=byteorder)
class T:
x: Annotated[bytes, dcs.LengthPrefixed(5)]

packed = T(b"1234").pack()
assert T.from_packed(packed) == T(b"1234")


@parametrize_all_sizes_and_byteorders()
def test_pack_unpack_length_prefixed_bytes_longer_than_size(
size, byteorder
) -> None:
@dcs.dataclass_struct(size=size, byteorder=byteorder)
class T:
x: Annotated[bytes, dcs.LengthPrefixed(5)]

packed = T(b"12345").pack()
assert T.from_packed(packed) == T(b"1234")


@parametrize_all_sizes_and_byteorders()
def test_pack_unpack_nested(size, byteorder) -> None:
@dcs.dataclass_struct(size=size, byteorder=byteorder)
Expand Down
21 changes: 21 additions & 0 deletions test/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,27 @@ class _:
x: Annotated[bytes, 8] = b"123456789"


@pytest.mark.parametrize("default", (b"", b"123", b"1234"))
def test_length_prefixed_bytes_default(default: bytes) -> None:
@dcs.dataclass_struct()
class T:
x: Annotated[bytes, dcs.LengthPrefixed(5)] = default

t = T()
assert t.x == default


def test_length_prefixed_bytes_default_too_long_fails() -> None:
with pytest.raises(
ValueError,
match=r"^bytes cannot be longer than 4 bytes$",
):

@dcs.dataclass_struct()
class _:
x: Annotated[bytes, dcs.LengthPrefixed(5)] = b"12345"


@parametrize_all_sizes_and_byteorders()
@parametrize_fields(float_fields, "float_field")
@pytest.mark.parametrize("default", (10, 10.12))
Expand Down