From 23f08523b63d8baafe2e4b3b38bfa98a68e896ce Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Tue, 24 Feb 2026 20:59:43 -0800 Subject: [PATCH] feat(codegen): add Arrow schema output target Add `--format arrow` to the CLI, producing pyarrow Schemas from Overture feature models. The renderer walks ModelSpecs top-down: primitives map to Arrow scalars via TypeRegistry, MODEL-kind fields expand recursively into nested structs, enums and literals fall back to string, and discriminated unions merge member structs into a single flattened struct with all fields nullable. Dict fields emit map types. List-wrapped fields emit list types. Field descriptions and schema-level model metadata (name, description, constraint prose) are embedded in Arrow metadata so downstream consumers can inspect documentation without the source models. pyarrow is an optional dependency (`arrow` extra) to avoid adding weight for markdown-only users. --- Makefile | 2 +- .../overture-schema-codegen/pyproject.toml | 3 + .../overture/schema/codegen/arrow_renderer.py | 273 ++++++++++ .../src/overture/schema/codegen/cli.py | 62 ++- .../overture/schema/codegen/type_registry.py | 50 +- .../tests/test_arrow_renderer.py | 470 ++++++++++++++++++ .../overture-schema-codegen/tests/test_cli.py | 58 +++ uv.lock | 64 +++ 8 files changed, 957 insertions(+), 25 deletions(-) create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/arrow_renderer.py create mode 100644 packages/overture-schema-codegen/tests/test_arrow_renderer.py diff --git a/Makefile b/Makefile index 0ae1d38e6..ca9741b82 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ default: test-all install: uv-sync uv-sync: - @uv sync --all-packages 2> /dev/null + @uv sync --all-packages --all-extras 2> /dev/null check: test doctest @uv run ruff check -q packages/ diff --git a/packages/overture-schema-codegen/pyproject.toml b/packages/overture-schema-codegen/pyproject.toml index de42c5fb9..314339317 100644 --- a/packages/overture-schema-codegen/pyproject.toml +++ b/packages/overture-schema-codegen/pyproject.toml @@ -18,6 +18,9 @@ name = "overture-schema-codegen" [project.scripts] overture-codegen = "overture.schema.codegen.cli:main" +[project.optional-dependencies] +arrow = ["pyarrow>=14.0"] + [tool.uv.sources] overture-schema-core = { workspace = true } overture-schema-system = { workspace = true } diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/arrow_renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/arrow_renderer.py new file mode 100644 index 000000000..33a1abbe2 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/arrow_renderer.py @@ -0,0 +1,273 @@ +"""Arrow schema renderer for Pydantic models.""" + +from __future__ import annotations + +import logging +from collections.abc import Sequence +from types import MappingProxyType +from typing import NamedTuple + +import pyarrow as pa # type: ignore[import-untyped] + +from .model_extraction import extract_model +from .specs import FieldSpec, ModelSpec, UnionSpec +from .type_analyzer import TypeInfo, TypeKind +from .type_registry import PRIMITIVE_TYPES, get_type_mapping + +log = logging.getLogger(__name__) + +__all__ = [ + "field_spec_to_arrow", + "merge_model_variants", + "model_spec_to_arrow_schema", + "type_info_to_arrow", + "union_spec_to_arrow_schema", +] + + +def _build_arrow_factories() -> MappingProxyType[str, pa.DataType]: + """Build Arrow type lookup from the type registry's arrow mappings. + + Collects every non-None arrow name from PRIMITIVE_TYPES and resolves + it via getattr(pa, name)() at import time. Fails fast if a registry + entry names a nonexistent pyarrow factory. + """ + factories: dict[str, pa.DataType] = {} + for mapping in PRIMITIVE_TYPES.values(): + name = mapping.arrow + if name is None or name in factories: + continue + factory = getattr(pa, name, None) + if factory is None: + raise AttributeError(f"pyarrow has no factory {name!r}") + factories[name] = factory() + return MappingProxyType(factories) + + +_ARROW_FACTORIES = _build_arrow_factories() + +_DEFAULT_ARROW_TYPE = _ARROW_FACTORIES["utf8"] + +# Types needing construction beyond a simple pa.() factory. +_CUSTOM_ARROW_TYPES: MappingProxyType[str, pa.DataType] = MappingProxyType( + { + "BBox": pa.struct( + [ + pa.field("xmin", pa.float64()), + pa.field("ymin", pa.float64()), + pa.field("xmax", pa.float64()), + pa.field("ymax", pa.float64()), + ] + ), + } +) + + +def _model_to_struct( + model_class: type, + ancestors: frozenset[type], +) -> pa.DataType: + """Convert a BaseModel subclass to a pa.StructType.""" + if model_class in ancestors: + log.warning("Cycle detected at %s, substituting utf8", model_class.__name__) + return pa.utf8() + + spec = extract_model(model_class) + child_ancestors = ancestors | {model_class} + fields = [field_spec_to_arrow(f, child_ancestors) for f in spec.fields] + return pa.struct(fields) + + +def type_info_to_arrow( + type_info: TypeInfo, + ancestors: frozenset[type] = frozenset(), +) -> pa.DataType: + """Convert a TypeInfo to a PyArrow DataType.""" + if type_info.is_dict: + if type_info.dict_key_type is None or type_info.dict_value_type is None: + raise ValueError( + f"Dict TypeInfo missing key or value type: {type_info.base_type}" + ) + key_type = type_info_to_arrow(type_info.dict_key_type, ancestors) + value_type = type_info_to_arrow(type_info.dict_value_type, ancestors) + return pa.map_(key_type, value_type) + + if type_info.kind == TypeKind.UNION: + if not type_info.union_members: + raise ValueError(f"Union TypeInfo has no members: {type_info.base_type}") + arrow_type = merge_model_variants(type_info.union_members) + elif type_info.kind == TypeKind.MODEL: + if type_info.source_type is None: + raise ValueError( + f"MODEL TypeInfo missing source_type: {type_info.base_type}" + ) + arrow_type = _model_to_struct(type_info.source_type, ancestors) + elif type_info.kind in (TypeKind.ENUM, TypeKind.LITERAL): + arrow_type = pa.utf8() + elif type_info.base_type in _CUSTOM_ARROW_TYPES: + arrow_type = _CUSTOM_ARROW_TYPES[type_info.base_type] + else: + mapping = get_type_mapping(type_info.base_type) + if mapping and mapping.arrow: + arrow_type = _ARROW_FACTORIES[mapping.arrow] + else: + log.warning( + "Unknown Arrow type for %r, falling back to utf8", + type_info.base_type, + ) + arrow_type = _DEFAULT_ARROW_TYPE + + if type_info.is_list: + return pa.list_(arrow_type) + + return arrow_type + + +def field_spec_to_arrow( + field_spec: FieldSpec, + ancestors: frozenset[type] = frozenset(), +) -> pa.Field: + """Convert a FieldSpec to a PyArrow Field.""" + arrow_type = type_info_to_arrow(field_spec.type_info, ancestors) + nullable = field_spec.type_info.is_optional + metadata = ( + {b"description": field_spec.description.encode()} + if field_spec.description is not None + else None + ) + return pa.field(field_spec.name, arrow_type, nullable=nullable, metadata=metadata) + + +def _build_schema_metadata( + version: str | None, + entry_point: str | None, +) -> dict[bytes, bytes] | None: + """Build schema-level metadata dict, or None if empty.""" + metadata: dict[bytes, bytes] = {} + if version: + metadata[b"overture-schema.version"] = version.encode() + if entry_point is not None: + metadata[b"model"] = entry_point.encode() + return metadata or None + + +def model_spec_to_arrow_schema( + model_spec: ModelSpec, + *, + version: str | None = None, +) -> pa.Schema: + """Convert a ModelSpec to a PyArrow Schema.""" + fields = [field_spec_to_arrow(f) for f in model_spec.fields] + return pa.schema( + fields, metadata=_build_schema_metadata(version, model_spec.entry_point) + ) + + +def union_spec_to_arrow_schema( + union_spec: UnionSpec, + *, + version: str | None = None, +) -> pa.Schema: + """Convert a UnionSpec to a PyArrow Schema by merging member variants.""" + merged_struct = merge_model_variants(union_spec.members) + return pa.schema( + list(merged_struct), + metadata=_build_schema_metadata(version, union_spec.entry_point), + ) + + +class _NumericRank(NamedTuple): + width: int + is_float: bool + + +_NUMERIC_RANKS: MappingProxyType[pa.DataType, _NumericRank] = MappingProxyType( + { + pa.int8(): _NumericRank(8, False), + pa.int16(): _NumericRank(16, False), + pa.int32(): _NumericRank(32, False), + pa.int64(): _NumericRank(64, False), + pa.uint8(): _NumericRank(8, False), + pa.uint16(): _NumericRank(16, False), + pa.uint32(): _NumericRank(32, False), + pa.uint64(): _NumericRank(64, False), + pa.float32(): _NumericRank(32, True), + pa.float64(): _NumericRank(64, True), + } +) + +# When mixing signed and unsigned at the same width, promote to the next +# wider signed type. Width 64 maps to int64 -- lossy for large uint64 +# values, but Arrow has no int128 and this matches Spark/Parquet behavior. +_WIDER_SIGNED: MappingProxyType[int, pa.DataType] = MappingProxyType( + { + 8: pa.int16(), + 16: pa.int32(), + 32: pa.int64(), + 64: pa.int64(), + } +) + + +def _promote_arrow_types(a: pa.DataType, b: pa.DataType) -> pa.DataType: + """Promote two Arrow types to a common wider type. + + Rules: + - Same type returns unchanged. + - Mixing int and float promotes to float64. + - Mixing signed and unsigned promotes to the next wider signed int. + - Otherwise the wider type wins. + + Raises ValueError for non-numeric type conflicts (struct, list, binary) + where no promotion path exists. + """ + if a == b: + return a + + rank_a = _NUMERIC_RANKS.get(a) + rank_b = _NUMERIC_RANKS.get(b) + if rank_a is None or rank_b is None: + raise ValueError(f"Cannot promote non-numeric Arrow types {a} and {b}") + + if rank_a.is_float != rank_b.is_float: + return pa.float64() + + if pa.types.is_unsigned_integer(a) != pa.types.is_unsigned_integer(b): + return _WIDER_SIGNED[max(rank_a.width, rank_b.width)] + + return a if rank_a.width >= rank_b.width else b + + +def merge_model_variants(model_classes: Sequence[type]) -> pa.StructType: + """Merge multiple BaseModel variants into a single Arrow struct. + + Fields present in all variants keep their promoted type. + Fields absent from some variants become nullable. + """ + variant_fields: list[dict[str, pa.Field]] = [] + for cls in model_classes: + spec = extract_model(cls) + fields_dict = {f.name: field_spec_to_arrow(f) for f in spec.fields} + variant_fields.append(fields_dict) + + all_names = dict.fromkeys( + name for fields_dict in variant_fields for name in fields_dict + ) + + merged: list[pa.Field] = [] + for name in all_names: + present_in = [fd for fd in variant_fields if name in fd] + absent_from_some = len(present_in) < len(variant_fields) + + first_field = present_in[0][name] + result_type = first_field.type + for fd in present_in[1:]: + result_type = _promote_arrow_types(result_type, fd[name].type) + + nullable = absent_from_some or any(fd[name].nullable for fd in present_in) + metadata = next( + (fd[name].metadata for fd in present_in if fd[name].metadata), None + ) + merged.append(pa.field(name, result_type, nullable=nullable, metadata=metadata)) + + return pa.struct(merged) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py index ed0a15486..57bd0dc59 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py @@ -1,5 +1,6 @@ """CLI entrypoint for schema code generation.""" +import importlib.metadata import json import logging from pathlib import Path, PurePosixPath @@ -8,6 +9,7 @@ from overture.schema.core.discovery import discover_models +from .case_conversion import to_snake_case from .markdown_pipeline import generate_markdown_pages from .model_extraction import extract_model from .module_layout import ( @@ -15,9 +17,12 @@ compute_schema_root, entry_point_class, entry_point_module, + output_dir_for_entry_point, ) from .specs import ( FeatureSpec, + ModelSpec, + UnionSpec, is_model_class, is_union_alias, ) @@ -27,7 +32,7 @@ __all__ = ["cli"] -_OUTPUT_FORMATS = ("markdown",) +_OUTPUT_FORMATS = ("markdown", "arrow") _FEATURE_FRONTMATTER = "---\nsidebar_position: 1\n---\n\n" @@ -73,7 +78,7 @@ def list_models() -> None: "output_format", required=True, type=click.Choice(_OUTPUT_FORMATS), - help="Output format", + help="Output format: markdown or arrow.", ) @click.option( "--theme", @@ -120,7 +125,12 @@ def generate( ) ) - _generate_markdown(feature_specs, schema_root, output_dir) + if output_format == "markdown": + _generate_markdown(feature_specs, schema_root, output_dir) + elif output_format == "arrow": + _generate_arrow(feature_specs, schema_root, _schema_version(), output_dir) + else: + raise click.UsageError(f"Unknown format: {output_format!r}") def _generate_markdown( @@ -186,6 +196,52 @@ def _write_category_files( file_path.write_text(json.dumps(category, indent=2) + "\n") +def _schema_version() -> str | None: + """Resolve the installed overture-schema version, or None if unavailable.""" + try: + return importlib.metadata.version("overture-schema") + except importlib.metadata.PackageNotFoundError: + return None + + +def _generate_arrow( + feature_specs: list[FeatureSpec], + schema_root: str, + version: str | None, + output_dir: Path | None, +) -> None: + """Generate Arrow schema output (text or Parquet metadata files).""" + try: + import pyarrow.parquet as pq # type: ignore[import-untyped] # noqa: PLC0415 + except ModuleNotFoundError as e: + raise click.UsageError( + "pyarrow is required for arrow output: " + "pip install overture-schema-codegen[arrow]" + ) from e + + from .arrow_renderer import ( # noqa: PLC0415 + model_spec_to_arrow_schema, + union_spec_to_arrow_schema, + ) + + for spec in feature_specs: + if isinstance(spec, ModelSpec): + schema = model_spec_to_arrow_schema(spec, version=version) + elif isinstance(spec, UnionSpec): + schema = union_spec_to_arrow_schema(spec, version=version) + else: + continue + slug = to_snake_case(spec.name) + rel_dir = output_dir_for_entry_point(spec.entry_point, schema_root) + if output_dir: + file_path = output_dir / rel_dir / f"{slug}.parquet" + file_path.parent.mkdir(parents=True, exist_ok=True) + pq.write_metadata(schema, str(file_path)) + else: + content = f"# {spec.name}\n{schema.to_string()}" + _write_output(content, None, rel_dir / f"{slug}.arrow") + + def main() -> None: """Run the CLI entry point.""" cli() diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/type_registry.py b/packages/overture-schema-codegen/src/overture/schema/codegen/type_registry.py index 505657866..dc2a9498b 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/type_registry.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/type_registry.py @@ -18,35 +18,41 @@ class TypeMapping: """Maps a type to its representation in different targets.""" markdown: str + arrow: str | None = None - def for_target(self, target: str) -> str: - """Get the type representation for a named target.""" - if target != "markdown": - raise ValueError(f"Unknown target {target!r}, expected 'markdown'") - return self.markdown + def for_target(self, target: str) -> str | None: + """Get the type representation for a named target. + + Returns None for targets where this type has no mapping. + """ + if target == "markdown": + return self.markdown + if target == "arrow": + return self.arrow + raise ValueError(f"Unknown target {target!r}") PRIMITIVE_TYPES: dict[str, TypeMapping] = { # Signed integers - "int8": TypeMapping(markdown="int8"), - "int16": TypeMapping(markdown="int16"), - "int32": TypeMapping(markdown="int32"), - "int64": TypeMapping(markdown="int64"), + "int8": TypeMapping(markdown="int8", arrow="int8"), + "int16": TypeMapping(markdown="int16", arrow="int16"), + "int32": TypeMapping(markdown="int32", arrow="int32"), + "int64": TypeMapping(markdown="int64", arrow="int64"), # Unsigned integers - "uint8": TypeMapping(markdown="uint8"), - "uint16": TypeMapping(markdown="uint16"), - "uint32": TypeMapping(markdown="uint32"), + "uint8": TypeMapping(markdown="uint8", arrow="uint8"), + "uint16": TypeMapping(markdown="uint16", arrow="uint16"), + "uint32": TypeMapping(markdown="uint32", arrow="uint32"), # Floating point - "float32": TypeMapping(markdown="float32"), - "float64": TypeMapping(markdown="float64"), + "float32": TypeMapping(markdown="float32", arrow="float32"), + "float64": TypeMapping(markdown="float64", arrow="float64"), # Basic types - "str": TypeMapping(markdown="string"), - "bool": TypeMapping(markdown="boolean"), + "str": TypeMapping(markdown="string", arrow="utf8"), + "bool": TypeMapping(markdown="boolean", arrow="bool_"), # Python builtins (aliases to their portable equivalents) - "int": TypeMapping(markdown="int64"), - "float": TypeMapping(markdown="float64"), + "int": TypeMapping(markdown="int64", arrow="int64"), + "float": TypeMapping(markdown="float64", arrow="float64"), # Geometry types - "Geometry": TypeMapping(markdown="geometry"), + "Geometry": TypeMapping(markdown="geometry", arrow="binary"), "BBox": TypeMapping(markdown="bbox"), } @@ -93,7 +99,7 @@ def resolve_type_name(type_info: TypeInfo, target: str) -> str: type_info : TypeInfo The analyzed type information. target : str - The output target ("markdown"). + The output target ("markdown" or "arrow"). Returns ------- @@ -104,7 +110,9 @@ def resolve_type_name(type_info: TypeInfo, target: str) -> str: if mapping is None and type_info.source_type is not None: mapping = get_type_mapping(type_info.source_type.__name__) if mapping is not None: - return mapping.for_target(target) + result = mapping.for_target(target) + if result is not None: + return result # Semantic NewType wrapping an unregistered type (e.g., Sources wrapping # SourceItem): use the underlying class name rather than the NewType alias. diff --git a/packages/overture-schema-codegen/tests/test_arrow_renderer.py b/packages/overture-schema-codegen/tests/test_arrow_renderer.py new file mode 100644 index 000000000..b1fca94e0 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_arrow_renderer.py @@ -0,0 +1,470 @@ +"""Tests for Arrow schema renderer.""" + +from enum import Enum as StdEnum +from typing import Annotated, Literal + +import pyarrow as pa # type: ignore[import-untyped] +import pytest +from overture.schema.codegen.arrow_renderer import ( + field_spec_to_arrow, + merge_model_variants, + model_spec_to_arrow_schema, + type_info_to_arrow, + union_spec_to_arrow_schema, +) +from overture.schema.codegen.model_extraction import extract_model +from overture.schema.codegen.specs import ( + ModelSpec, + UnionSpec, + filter_model_classes, + is_union_alias, +) +from overture.schema.codegen.type_analyzer import UnsupportedUnionError +from overture.schema.codegen.union_extraction import extract_union +from overture.schema.system.primitive import ( + BBox, + Geometry, + float32, + float64, + int8, + int16, + int32, + int64, + uint8, +) +from pydantic import BaseModel, Field + + +def _arrow_type_for(annotation: object) -> pa.DataType: + """Build a single-field model with the given annotation, return its Arrow type.""" + model = type("_M", (BaseModel,), {"__annotations__": {"f": annotation}}) + spec = extract_model(model) + return type_info_to_arrow(spec.fields[0].type_info) + + +def _arrow_field_for( + annotation: object, + *, + default: object = ..., + description: str | None = None, +) -> pa.Field: + """Build a single-field model with the given annotation, return its Arrow field.""" + attrs: dict[str, object] = {"__annotations__": {"f": annotation}} + if description is not None or default is not ...: + attrs["f"] = Field(default=default, description=description) + model = type("_M", (BaseModel,), attrs) + spec = extract_model(model) + return field_spec_to_arrow(spec.fields[0]) + + +class _UnionBase(BaseModel): + type: str + shared: str + + +class _VariantA(_UnionBase): + type: Literal["a"] = "a" + a_only: int32 + + +class _VariantB(_UnionBase): + type: Literal["b"] = "b" + b_only: float64 + + +_UNION_ANNOTATION = Annotated[_VariantA | _VariantB, Field(discriminator="type")] + + +@pytest.fixture +def union_spec() -> UnionSpec: + return extract_union("TestUnion", _UNION_ANNOTATION) + + +class TestTypeInfoToArrowPrimitives: + """Primitive scalar types map to Arrow types.""" + + @pytest.mark.parametrize( + ("annotation", "expected"), + [ + (str, pa.utf8()), + (bool, pa.bool_()), + (int8, pa.int8()), + (int16, pa.int16()), + (int32, pa.int32()), + (int64, pa.int64()), + (float32, pa.float32()), + (float64, pa.float64()), + (Geometry, pa.binary()), + ], + ids=lambda x: getattr(x, "__name__", str(x)), + ) + def test_primitive_mapping(self, annotation: object, expected: pa.DataType) -> None: + assert _arrow_type_for(annotation) == expected + + def test_bbox_maps_to_struct(self) -> None: + result = _arrow_type_for(BBox) + assert isinstance(result, pa.StructType) + assert result.num_fields == 4 + for name in ("xmin", "ymin", "xmax", "ymax"): + idx = result.get_field_index(name) + assert idx >= 0, f"missing field {name}" + assert result.field(idx).type == pa.float64() + + +class TestTypeInfoToArrowFallbacks: + """Enums and Literals fall back to utf8.""" + + def test_enum_maps_to_utf8(self) -> None: + class Color(str, StdEnum): + RED = "red" + BLUE = "blue" + + assert _arrow_type_for(Color) == pa.utf8() + + def test_literal_maps_to_utf8(self) -> None: + assert _arrow_type_for(Literal["building"]) == pa.utf8() + + +class TestTypeInfoToArrowLists: + """List types wrap element types with pa.list_().""" + + @pytest.mark.parametrize( + ("annotation", "expected"), + [ + (list[str], pa.list_(pa.utf8())), + (list[int32], pa.list_(pa.int32())), + ], + ids=["str", "int32"], + ) + def test_list_mapping(self, annotation: object, expected: pa.DataType) -> None: + assert _arrow_type_for(annotation) == expected + + +class TestTypeInfoToArrowDicts: + """Dict types map to Arrow map types.""" + + @pytest.mark.parametrize( + ("annotation", "expected"), + [ + (dict[str, str], pa.map_(pa.utf8(), pa.utf8())), + (dict[str, int32], pa.map_(pa.utf8(), pa.int32())), + ], + ids=["str_str", "str_int32"], + ) + def test_dict_mapping(self, annotation: object, expected: pa.DataType) -> None: + assert _arrow_type_for(annotation) == expected + + def test_optional_dict_nullable(self) -> None: + result = _arrow_field_for(dict[str, str] | None, default=None) + assert result.nullable is True + assert result.type == pa.map_(pa.utf8(), pa.utf8()) + + +class TestTypeInfoToArrowUnions: + """Inline union fields produce merged structs.""" + + def test_union_field_becomes_struct(self) -> None: + result = _arrow_type_for(_UNION_ANNOTATION) + assert isinstance(result, pa.StructType) + assert result.get_field_index("type") >= 0 + assert result.get_field_index("shared") >= 0 + assert result.get_field_index("a_only") >= 0 + assert result.get_field_index("b_only") >= 0 + + def test_list_of_union_becomes_list_of_struct(self) -> None: + result = _arrow_type_for(list[_UNION_ANNOTATION]) + assert isinstance(result, pa.ListType) + assert isinstance(result.value_type, pa.StructType) + + +class TestFieldSpecToArrow: + """FieldSpec converts to pa.Field with nullability.""" + + def test_required_field_not_nullable(self) -> None: + result = _arrow_field_for(str) + assert result == pa.field("f", pa.utf8(), nullable=False) + + def test_optional_field_nullable(self) -> None: + result = _arrow_field_for(str | None, default=None) + assert result == pa.field("f", pa.utf8(), nullable=True) + + def test_field_name_from_alias(self) -> None: + class M(BaseModel): + class_: str | None = Field(default=None, alias="class") + + spec = extract_model(M) + result = field_spec_to_arrow(spec.fields[0]) + assert result.name == "class" + + +class TestModelSpecToArrowSchema: + """ModelSpec converts to pa.Schema.""" + + def test_simple_model(self) -> None: + class M(BaseModel): + id: str + count: int32 + label: str | None = None + + spec = extract_model(M) + result = model_spec_to_arrow_schema(spec) + assert isinstance(result, pa.Schema) + assert result.field("id").type == pa.utf8() + assert result.field("id").nullable is False + assert result.field("count").type == pa.int32() + assert result.field("count").nullable is False + assert result.field("label").type == pa.utf8() + assert result.field("label").nullable is True + + def test_schema_field_count(self) -> None: + class M(BaseModel): + a: str + b: int32 + + spec = extract_model(M) + result = model_spec_to_arrow_schema(spec) + assert len(result) == 2 + + def test_schema_metadata_with_version_and_model(self) -> None: + class M(BaseModel): + id: str + + spec = extract_model(M) + spec.entry_point = "overture.schema.buildings:Building" + result = model_spec_to_arrow_schema(spec, version="1.2.3") + assert result.metadata == { + b"overture-schema.version": b"1.2.3", + b"model": b"overture.schema.buildings:Building", + } + + def test_schema_metadata_version_only(self) -> None: + class M(BaseModel): + id: str + + spec = extract_model(M) + result = model_spec_to_arrow_schema(spec, version="1.0.0") + assert result.metadata == {b"overture-schema.version": b"1.0.0"} + + def test_schema_metadata_model_only(self) -> None: + class M(BaseModel): + id: str + + spec = extract_model(M) + spec.entry_point = "overture.schema.places:Place" + result = model_spec_to_arrow_schema(spec) + assert result.metadata == {b"model": b"overture.schema.places:Place"} + + def test_schema_metadata_absent_by_default(self) -> None: + class M(BaseModel): + id: str + + spec = extract_model(M) + result = model_spec_to_arrow_schema(spec) + assert result.metadata is None + + +class TestArrowNestedModels: + """MODEL-kind fields expand to Arrow struct types.""" + + def test_nested_model_becomes_struct(self) -> None: + class Inner(BaseModel): + x: int32 + y: int32 + + class Outer(BaseModel): + point: Inner + + spec = extract_model(Outer) + result = type_info_to_arrow(spec.fields[0].type_info) + assert isinstance(result, pa.StructType) + assert result.get_field_index("x") >= 0 + assert result.field("x").type == pa.int32() + + def test_optional_nested_model(self) -> None: + class Inner(BaseModel): + val: str + + class Outer(BaseModel): + nested: Inner | None = None + + spec = extract_model(Outer) + result = field_spec_to_arrow(spec.fields[0]) + assert result.nullable is True + assert isinstance(result.type, pa.StructType) + + def test_list_of_models(self) -> None: + class Item(BaseModel): + name: str + + class Container(BaseModel): + items: list[Item] + + spec = extract_model(Container) + result = type_info_to_arrow(spec.fields[0].type_info) + assert isinstance(result, pa.ListType) + assert isinstance(result.value_type, pa.StructType) + + +class TestArrowUnionMerging: + """Discriminated union variants merge into a single struct.""" + + def test_merges_shared_and_variant_fields(self) -> None: + class A(BaseModel): + type: Literal["a"] = "a" + shared: str + a_only: int32 + + class B(BaseModel): + type: Literal["b"] = "b" + shared: str + b_only: float64 + + result = merge_model_variants([A, B]) + assert isinstance(result, pa.StructType) + + assert result.field("type").type == pa.utf8() + assert result.field("shared").type == pa.utf8() + + a_idx = result.get_field_index("a_only") + b_idx = result.get_field_index("b_only") + assert a_idx >= 0 + assert b_idx >= 0 + assert result.field(a_idx).nullable is True + assert result.field(b_idx).nullable is True + + @pytest.mark.parametrize( + ("type_a", "type_b", "expected"), + [ + (int32, int64, pa.int64()), + (int64, uint8, pa.int64()), + (int32, float64, pa.float64()), + ], + ids=["wider_int", "signed_unsigned", "int_float"], + ) + def test_type_promotion( + self, type_a: type, type_b: type, expected: pa.DataType + ) -> None: + A = type("A", (BaseModel,), {"__annotations__": {"val": type_a}}) + B = type("B", (BaseModel,), {"__annotations__": {"val": type_b}}) + result = merge_model_variants([A, B]) + assert result.field("val").type == expected + + +class TestFieldMetadata: + """Field descriptions embed as Arrow field metadata.""" + + def test_description_becomes_metadata(self) -> None: + result = _arrow_field_for(str, description="The display name") + assert result.metadata == {b"description": b"The display name"} + + def test_no_description_means_no_metadata(self) -> None: + result = _arrow_field_for(str) + assert result.metadata is None + + def test_nested_struct_fields_carry_metadata(self) -> None: + class Inner(BaseModel): + val: str = Field(description="Inner value") + + class Outer(BaseModel): + nested: Inner + + spec = extract_model(Outer) + result = field_spec_to_arrow(spec.fields[0]) + inner_field = result.type.field("val") + assert inner_field.metadata == {b"description": b"Inner value"} + + def test_description_in_schema_fields(self) -> None: + class M(BaseModel): + id: str = Field(description="Unique identifier") + count: int32 + + spec = extract_model(M) + schema = model_spec_to_arrow_schema(spec) + assert schema.field("id").metadata == {b"description": b"Unique identifier"} + assert schema.field("count").metadata is None + + def test_merge_preserves_first_metadata(self) -> None: + class A(BaseModel): + name: str = Field(description="The name") + + class B(BaseModel): + name: str + + result = merge_model_variants([A, B]) + assert result.field("name").metadata == {b"description": b"The name"} + + +class TestUnionSpecToArrowSchema: + """UnionSpec converts to pa.Schema by merging member variants.""" + + def test_merges_members_into_schema(self, union_spec: UnionSpec) -> None: + result = union_spec_to_arrow_schema(union_spec) + + assert isinstance(result, pa.Schema) + assert result.field("type").type == pa.utf8() + assert result.field("shared").type == pa.utf8() + assert result.field("a_only").nullable is True + assert result.field("b_only").nullable is True + + def test_schema_metadata(self, union_spec: UnionSpec) -> None: + union_spec.entry_point = "overture.schema.test:TestUnion" + result = union_spec_to_arrow_schema(union_spec, version="2.0.0") + + assert result.metadata == { + b"overture-schema.version": b"2.0.0", + b"model": b"overture.schema.test:TestUnion", + } + + +class TestArrowRealModels: + """Integration tests with real Overture models.""" + + def test_building_schema(self, building_spec: ModelSpec) -> None: + result = model_spec_to_arrow_schema(building_spec) + assert isinstance(result, pa.Schema) + + assert result.field("id").nullable is False + assert result.field("id").type == pa.utf8() + assert result.field("geometry").type == pa.binary() + assert result.field("geometry").nullable is False + + assert result.field("height").nullable is True + + bbox_type = result.field("bbox").type + assert isinstance(bbox_type, pa.StructType) + assert bbox_type.field("xmin").type == pa.float64() + + def test_building_schema_metadata(self, building_spec: ModelSpec) -> None: + building_spec.entry_point = "overture.schema.buildings:Building" + result = model_spec_to_arrow_schema(building_spec, version="1.0.0") + assert result.metadata == { + b"overture-schema.version": b"1.0.0", + b"model": b"overture.schema.buildings:Building", + } + + def test_all_models_no_crash(self, all_discovered_models: dict) -> None: + """All discovered models convert to Arrow schemas without errors.""" + for model_class in filter_model_classes(all_discovered_models): + try: + spec = extract_model(model_class) + except UnsupportedUnionError: + continue + result = model_spec_to_arrow_schema(spec) + assert isinstance(result, pa.Schema) + assert len(result) > 0 + + def test_all_union_aliases_no_crash(self, all_discovered_models: dict) -> None: + """All discovered union aliases convert to Arrow schemas without errors.""" + from overture.schema.codegen.module_layout import entry_point_class + + for key, entry in all_discovered_models.items(): + if not is_union_alias(entry): + continue + spec = extract_union( + entry_point_class(key.entry_point), + entry, + entry_point=key.entry_point, + ) + result = union_spec_to_arrow_schema(spec) + assert isinstance(result, pa.Schema) + assert len(result) > 0 diff --git a/packages/overture-schema-codegen/tests/test_cli.py b/packages/overture-schema-codegen/tests/test_cli.py index a85ceb52a..bf8f4192b 100644 --- a/packages/overture-schema-codegen/tests/test_cli.py +++ b/packages/overture-schema-codegen/tests/test_cli.py @@ -356,6 +356,64 @@ def spy(feature_specs: list, schema_root: str, output_dir: object) -> None: ) +class TestArrowCLI: + """CLI generates Arrow output.""" + + def test_arrow_format_stdout(self, cli_runner: CliRunner) -> None: + """generate --format arrow should output Arrow schema to stdout.""" + result = cli_runner.invoke( + cli, ["generate", "--format", "arrow", "--theme", "buildings"] + ) + assert result.exit_code == 0 + # Arrow schema .to_string() output contains field names + assert "id:" in result.output + assert "geometry:" in result.output + + def test_arrow_format_output_dir( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """generate --format arrow --output-dir should write .parquet files.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "arrow", + "--theme", + "buildings", + "--output-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 0 + parquet_files = list(tmp_path.rglob("*.parquet")) + assert len(parquet_files) > 0 + + def test_arrow_parquet_has_schema( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """Parquet files should contain valid Arrow schema.""" + import pyarrow.parquet as pq # type: ignore[import-untyped] # noqa: PLC0415 + + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "arrow", + "--theme", + "buildings", + "--output-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 0 + parquet_files = list(tmp_path.rglob("*.parquet")) + schema = pq.read_schema(parquet_files[0]) + assert len(schema) > 0 + assert "id" in schema.names + + class TestCliHelp: """Tests for CLI help.""" diff --git a/uv.lock b/uv.lock index 5deec1907..fb86148e7 100644 --- a/uv.lock +++ b/uv.lock @@ -791,14 +791,21 @@ dependencies = [ { name = "tomli", marker = "python_full_version < '3.11'" }, ] +[package.optional-dependencies] +arrow = [ + { name = "pyarrow" }, +] + [package.metadata] requires-dist = [ { name = "click", specifier = ">=8.0" }, { name = "jinja2", specifier = ">=3.0" }, { name = "overture-schema-core", editable = "packages/overture-schema-core" }, { name = "overture-schema-system", editable = "packages/overture-schema-system" }, + { name = "pyarrow", marker = "extra == 'arrow'", specifier = ">=14.0" }, { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0" }, ] +provides-extras = ["arrow"] [[package]] name = "overture-schema-core" @@ -981,6 +988,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567, upload-time = "2018-02-15T19:01:27.172Z" }, ] +[[package]] +name = "pyarrow" +version = "23.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/88/22/134986a4cc224d593c1afde5494d18ff629393d74cc2eddb176669f234a4/pyarrow-23.0.1.tar.gz", hash = "sha256:b8c5873e33440b2bc2f4a79d2b47017a89c5a24116c055625e6f2ee50523f019", size = 1167336, upload-time = "2026-02-16T10:14:12.39Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/a8/24e5dc6855f50a62936ceb004e6e9645e4219a8065f304145d7fb8a79d5d/pyarrow-23.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:3fab8f82571844eb3c460f90a75583801d14ca0cc32b1acc8c361650e006fd56", size = 34307390, upload-time = "2026-02-16T10:08:08.654Z" }, + { url = "https://files.pythonhosted.org/packages/bc/8e/4be5617b4aaae0287f621ad31c6036e5f63118cfca0dc57d42121ff49b51/pyarrow-23.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:3f91c038b95f71ddfc865f11d5876c42f343b4495535bd262c7b321b0b94507c", size = 35853761, upload-time = "2026-02-16T10:08:17.811Z" }, + { url = "https://files.pythonhosted.org/packages/2e/08/3e56a18819462210432ae37d10f5c8eed3828be1d6c751b6e6a2e93c286a/pyarrow-23.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:d0744403adabef53c985a7f8a082b502a368510c40d184df349a0a8754533258", size = 44493116, upload-time = "2026-02-16T10:08:25.792Z" }, + { url = "https://files.pythonhosted.org/packages/f8/82/c40b68001dbec8a3faa4c08cd8c200798ac732d2854537c5449dc859f55a/pyarrow-23.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:c33b5bf406284fd0bba436ed6f6c3ebe8e311722b441d89397c54f871c6863a2", size = 47564532, upload-time = "2026-02-16T10:08:34.27Z" }, + { url = "https://files.pythonhosted.org/packages/20/bc/73f611989116b6f53347581b02177f9f620efdf3cd3f405d0e83cdf53a83/pyarrow-23.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ddf743e82f69dcd6dbbcb63628895d7161e04e56794ef80550ac6f3315eeb1d5", size = 48183685, upload-time = "2026-02-16T10:08:42.889Z" }, + { url = "https://files.pythonhosted.org/packages/b0/cc/6c6b3ecdae2a8c3aced99956187e8302fc954cc2cca2a37cf2111dad16ce/pyarrow-23.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e052a211c5ac9848ae15d5ec875ed0943c0221e2fcfe69eee80b604b4e703222", size = 50605582, upload-time = "2026-02-16T10:08:51.641Z" }, + { url = "https://files.pythonhosted.org/packages/8d/94/d359e708672878d7638a04a0448edf7c707f9e5606cee11e15aaa5c7535a/pyarrow-23.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:5abde149bb3ce524782d838eb67ac095cd3fd6090eba051130589793f1a7f76d", size = 27521148, upload-time = "2026-02-16T10:08:58.077Z" }, + { url = "https://files.pythonhosted.org/packages/b0/41/8e6b6ef7e225d4ceead8459427a52afdc23379768f54dd3566014d7618c1/pyarrow-23.0.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:6f0147ee9e0386f519c952cc670eb4a8b05caa594eeffe01af0e25f699e4e9bb", size = 34302230, upload-time = "2026-02-16T10:09:03.859Z" }, + { url = "https://files.pythonhosted.org/packages/bf/4a/1472c00392f521fea03ae93408bf445cc7bfa1ab81683faf9bc188e36629/pyarrow-23.0.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:0ae6e17c828455b6265d590100c295193f93cc5675eb0af59e49dbd00d2de350", size = 35850050, upload-time = "2026-02-16T10:09:11.877Z" }, + { url = "https://files.pythonhosted.org/packages/0c/b2/bd1f2f05ded56af7f54d702c8364c9c43cd6abb91b0e9933f3d77b4f4132/pyarrow-23.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:fed7020203e9ef273360b9e45be52a2a47d3103caf156a30ace5247ffb51bdbd", size = 44491918, upload-time = "2026-02-16T10:09:18.144Z" }, + { url = "https://files.pythonhosted.org/packages/0b/62/96459ef5b67957eac38a90f541d1c28833d1b367f014a482cb63f3b7cd2d/pyarrow-23.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:26d50dee49d741ac0e82185033488d28d35be4d763ae6f321f97d1140eb7a0e9", size = 47562811, upload-time = "2026-02-16T10:09:25.792Z" }, + { url = "https://files.pythonhosted.org/packages/7d/94/1170e235add1f5f45a954e26cd0e906e7e74e23392dcb560de471f7366ec/pyarrow-23.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3c30143b17161310f151f4a2bcfe41b5ff744238c1039338779424e38579d701", size = 48183766, upload-time = "2026-02-16T10:09:34.645Z" }, + { url = "https://files.pythonhosted.org/packages/0e/2d/39a42af4570377b99774cdb47f63ee6c7da7616bd55b3d5001aa18edfe4f/pyarrow-23.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db2190fa79c80a23fdd29fef4b8992893f024ae7c17d2f5f4db7171fa30c2c78", size = 50607669, upload-time = "2026-02-16T10:09:44.153Z" }, + { url = "https://files.pythonhosted.org/packages/00/ca/db94101c187f3df742133ac837e93b1f269ebdac49427f8310ee40b6a58f/pyarrow-23.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:f00f993a8179e0e1c9713bcc0baf6d6c01326a406a9c23495ec1ba9c9ebf2919", size = 27527698, upload-time = "2026-02-16T10:09:50.263Z" }, + { url = "https://files.pythonhosted.org/packages/9a/4b/4166bb5abbfe6f750fc60ad337c43ecf61340fa52ab386da6e8dbf9e63c4/pyarrow-23.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f4b0dbfa124c0bb161f8b5ebb40f1a680b70279aa0c9901d44a2b5a20806039f", size = 34214575, upload-time = "2026-02-16T10:09:56.225Z" }, + { url = "https://files.pythonhosted.org/packages/e1/da/3f941e3734ac8088ea588b53e860baeddac8323ea40ce22e3d0baa865cc9/pyarrow-23.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:7707d2b6673f7de054e2e83d59f9e805939038eebe1763fe811ee8fa5c0cd1a7", size = 35832540, upload-time = "2026-02-16T10:10:03.428Z" }, + { url = "https://files.pythonhosted.org/packages/88/7c/3d841c366620e906d54430817531b877ba646310296df42ef697308c2705/pyarrow-23.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:86ff03fb9f1a320266e0de855dee4b17da6794c595d207f89bba40d16b5c78b9", size = 44470940, upload-time = "2026-02-16T10:10:10.704Z" }, + { url = "https://files.pythonhosted.org/packages/2c/a5/da83046273d990f256cb79796a190bbf7ec999269705ddc609403f8c6b06/pyarrow-23.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:813d99f31275919c383aab17f0f455a04f5a429c261cc411b1e9a8f5e4aaaa05", size = 47586063, upload-time = "2026-02-16T10:10:17.95Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/b7d2ebcff47a514f47f9da1e74b7949138c58cfeb108cdd4ee62f43f0cf3/pyarrow-23.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bf5842f960cddd2ef757d486041d57c96483efc295a8c4a0e20e704cbbf39c67", size = 48173045, upload-time = "2026-02-16T10:10:25.363Z" }, + { url = "https://files.pythonhosted.org/packages/43/b2/b40961262213beaba6acfc88698eb773dfce32ecdf34d19291db94c2bd73/pyarrow-23.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:564baf97c858ecc03ec01a41062e8f4698abc3e6e2acd79c01c2e97880a19730", size = 50621741, upload-time = "2026-02-16T10:10:33.477Z" }, + { url = "https://files.pythonhosted.org/packages/f6/70/1fdda42d65b28b078e93d75d371b2185a61da89dda4def8ba6ba41ebdeb4/pyarrow-23.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:07deae7783782ac7250989a7b2ecde9b3c343a643f82e8a4df03d93b633006f0", size = 27620678, upload-time = "2026-02-16T10:10:39.31Z" }, + { url = "https://files.pythonhosted.org/packages/47/10/2cbe4c6f0fb83d2de37249567373d64327a5e4d8db72f486db42875b08f6/pyarrow-23.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6b8fda694640b00e8af3c824f99f789e836720aa8c9379fb435d4c4953a756b8", size = 34210066, upload-time = "2026-02-16T10:10:45.487Z" }, + { url = "https://files.pythonhosted.org/packages/cb/4f/679fa7e84dadbaca7a65f7cdba8d6c83febbd93ca12fa4adf40ba3b6362b/pyarrow-23.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:8ff51b1addc469b9444b7c6f3548e19dc931b172ab234e995a60aea9f6e6025f", size = 35825526, upload-time = "2026-02-16T10:10:52.266Z" }, + { url = "https://files.pythonhosted.org/packages/f9/63/d2747d930882c9d661e9398eefc54f15696547b8983aaaf11d4a2e8b5426/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:71c5be5cbf1e1cb6169d2a0980850bccb558ddc9b747b6206435313c47c37677", size = 44473279, upload-time = "2026-02-16T10:11:01.557Z" }, + { url = "https://files.pythonhosted.org/packages/b3/93/10a48b5e238de6d562a411af6467e71e7aedbc9b87f8d3a35f1560ae30fb/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9b6f4f17b43bc39d56fec96e53fe89d94bac3eb134137964371b45352d40d0c2", size = 47585798, upload-time = "2026-02-16T10:11:09.401Z" }, + { url = "https://files.pythonhosted.org/packages/5c/20/476943001c54ef078dbf9542280e22741219a184a0632862bca4feccd666/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fc13fc6c403d1337acab46a2c4346ca6c9dec5780c3c697cf8abfd5e19b6b37", size = 48179446, upload-time = "2026-02-16T10:11:17.781Z" }, + { url = "https://files.pythonhosted.org/packages/4b/b6/5dd0c47b335fcd8edba9bfab78ad961bd0fd55ebe53468cc393f45e0be60/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5c16ed4f53247fa3ffb12a14d236de4213a4415d127fe9cebed33d51671113e2", size = 50623972, upload-time = "2026-02-16T10:11:26.185Z" }, + { url = "https://files.pythonhosted.org/packages/d5/09/a532297c9591a727d67760e2e756b83905dd89adb365a7f6e9c72578bcc1/pyarrow-23.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:cecfb12ef629cf6be0b1887f9f86463b0dd3dc3195ae6224e74006be4736035a", size = 27540749, upload-time = "2026-02-16T10:12:23.297Z" }, + { url = "https://files.pythonhosted.org/packages/a5/8e/38749c4b1303e6ae76b3c80618f84861ae0c55dd3c2273842ea6f8258233/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:29f7f7419a0e30264ea261fdc0e5fe63ce5a6095003db2945d7cd78df391a7e1", size = 34471544, upload-time = "2026-02-16T10:11:32.535Z" }, + { url = "https://files.pythonhosted.org/packages/a3/73/f237b2bc8c669212f842bcfd842b04fc8d936bfc9d471630569132dc920d/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:33d648dc25b51fd8055c19e4261e813dfc4d2427f068bcecc8b53d01b81b0500", size = 35949911, upload-time = "2026-02-16T10:11:39.813Z" }, + { url = "https://files.pythonhosted.org/packages/0c/86/b912195eee0903b5611bf596833def7d146ab2d301afeb4b722c57ffc966/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd395abf8f91c673dd3589cadc8cc1ee4e8674fa61b2e923c8dd215d9c7d1f41", size = 44520337, upload-time = "2026-02-16T10:11:47.764Z" }, + { url = "https://files.pythonhosted.org/packages/69/c2/f2a717fb824f62d0be952ea724b4f6f9372a17eed6f704b5c9526f12f2f1/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:00be9576d970c31defb5c32eb72ef585bf600ef6d0a82d5eccaae96639cf9d07", size = 47548944, upload-time = "2026-02-16T10:11:56.607Z" }, + { url = "https://files.pythonhosted.org/packages/84/a7/90007d476b9f0dc308e3bc57b832d004f848fd6c0da601375d20d92d1519/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c2139549494445609f35a5cda4eb94e2c9e4d704ce60a095b342f82460c73a83", size = 48236269, upload-time = "2026-02-16T10:12:04.47Z" }, + { url = "https://files.pythonhosted.org/packages/b0/3f/b16fab3e77709856eb6ac328ce35f57a6d4a18462c7ca5186ef31b45e0e0/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7044b442f184d84e2351e5084600f0d7343d6117aabcbc1ac78eb1ae11eb4125", size = 50604794, upload-time = "2026-02-16T10:12:11.797Z" }, + { url = "https://files.pythonhosted.org/packages/e9/a1/22df0620a9fac31d68397a75465c344e83c3dfe521f7612aea33e27ab6c0/pyarrow-23.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a35581e856a2fafa12f3f54fce4331862b1cfb0bef5758347a858a4aa9d6bae8", size = 27660642, upload-time = "2026-02-16T10:12:17.746Z" }, + { url = "https://files.pythonhosted.org/packages/8d/1b/6da9a89583ce7b23ac611f183ae4843cd3a6cf54f079549b0e8c14031e73/pyarrow-23.0.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:5df1161da23636a70838099d4aaa65142777185cc0cdba4037a18cee7d8db9ca", size = 34238755, upload-time = "2026-02-16T10:12:32.819Z" }, + { url = "https://files.pythonhosted.org/packages/ae/b5/d58a241fbe324dbaeb8df07be6af8752c846192d78d2272e551098f74e88/pyarrow-23.0.1-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:fa8e51cb04b9f8c9c5ace6bab63af9a1f88d35c0d6cbf53e8c17c098552285e1", size = 35847826, upload-time = "2026-02-16T10:12:38.949Z" }, + { url = "https://files.pythonhosted.org/packages/54/a5/8cbc83f04aba433ca7b331b38f39e000efd9f0c7ce47128670e737542996/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b95a3994f015be13c63148fef8832e8a23938128c185ee951c98908a696e0eb", size = 44536859, upload-time = "2026-02-16T10:12:45.467Z" }, + { url = "https://files.pythonhosted.org/packages/36/2e/c0f017c405fcdc252dbccafbe05e36b0d0eb1ea9a958f081e01c6972927f/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4982d71350b1a6e5cfe1af742c53dfb759b11ce14141870d05d9e540d13bc5d1", size = 47614443, upload-time = "2026-02-16T10:12:55.525Z" }, + { url = "https://files.pythonhosted.org/packages/af/6b/2314a78057912f5627afa13ba43809d9d653e6630859618b0fd81a4e0759/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c250248f1fe266db627921c89b47b7c06fee0489ad95b04d50353537d74d6886", size = 48232991, upload-time = "2026-02-16T10:13:04.729Z" }, + { url = "https://files.pythonhosted.org/packages/40/f2/1bcb1d3be3460832ef3370d621142216e15a2c7c62602a4ea19ec240dd64/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5f4763b83c11c16e5f4c15601ba6dfa849e20723b46aa2617cb4bffe8768479f", size = 50645077, upload-time = "2026-02-16T10:13:14.147Z" }, + { url = "https://files.pythonhosted.org/packages/eb/3f/b1da7b61cd66566a4d4c8383d376c606d1c34a906c3f1cb35c479f59d1aa/pyarrow-23.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:3a4c85ef66c134161987c17b147d6bffdca4566f9a4c1d81a0a01cdf08414ea5", size = 28234271, upload-time = "2026-02-16T10:14:09.397Z" }, + { url = "https://files.pythonhosted.org/packages/b5/78/07f67434e910a0f7323269be7bfbf58699bd0c1d080b18a1ab49ba943fe8/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:17cd28e906c18af486a499422740298c52d7c6795344ea5002a7720b4eadf16d", size = 34488692, upload-time = "2026-02-16T10:13:21.541Z" }, + { url = "https://files.pythonhosted.org/packages/50/76/34cf7ae93ece1f740a04910d9f7e80ba166b9b4ab9596a953e9e62b90fe1/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:76e823d0e86b4fb5e1cf4a58d293036e678b5a4b03539be933d3b31f9406859f", size = 35964383, upload-time = "2026-02-16T10:13:28.63Z" }, + { url = "https://files.pythonhosted.org/packages/46/90/459b827238936d4244214be7c684e1b366a63f8c78c380807ae25ed92199/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a62e1899e3078bf65943078b3ad2a6ddcacf2373bc06379aac61b1e548a75814", size = 44538119, upload-time = "2026-02-16T10:13:35.506Z" }, + { url = "https://files.pythonhosted.org/packages/28/a1/93a71ae5881e99d1f9de1d4554a87be37da11cd6b152239fb5bd924fdc64/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:df088e8f640c9fae3b1f495b3c64755c4e719091caf250f3a74d095ddf3c836d", size = 47571199, upload-time = "2026-02-16T10:13:42.504Z" }, + { url = "https://files.pythonhosted.org/packages/88/a3/d2c462d4ef313521eaf2eff04d204ac60775263f1fb08c374b543f79f610/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:46718a220d64677c93bc243af1d44b55998255427588e400677d7192671845c7", size = 48259435, upload-time = "2026-02-16T10:13:49.226Z" }, + { url = "https://files.pythonhosted.org/packages/cc/f1/11a544b8c3d38a759eb3fbb022039117fd633e9a7b19e4841cc3da091915/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a09f3876e87f48bc2f13583ab551f0379e5dfb83210391e68ace404181a20690", size = 50629149, upload-time = "2026-02-16T10:13:57.238Z" }, + { url = "https://files.pythonhosted.org/packages/50/f2/c0e76a0b451ffdf0cf788932e182758eb7558953f4f27f1aff8e2518b653/pyarrow-23.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:527e8d899f14bd15b740cd5a54ad56b7f98044955373a17179d5956ddb93d9ce", size = 28365807, upload-time = "2026-02-16T10:14:03.892Z" }, +] + [[package]] name = "pydantic" version = "2.12.5"