Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions builders/server/core/runtime/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,17 @@ def validate(data: dict, schema: dict[str, SchemaType]) -> None:
)


def validate_rows(data_list: list[dict], schema: dict[str, SchemaType]) -> None:
def validate_rows(data_list: object, schema: dict[str, SchemaType]) -> None:
"""Validate each dict in a list against the declared schema."""
for data in data_list:
if not isinstance(data_list, list):
raise ValidationError(
f"Builder output expected a list of rows, got '{type(data_list).__name__}'"
)

for index, data in enumerate(data_list):
if not isinstance(data, dict):
raise ValidationError(
f"Builder output row {index} expected a dict, "
f"got '{type(data).__name__}'"
)
validate(data, schema)
Comment on lines +26 to 39
14 changes: 14 additions & 0 deletions builders/server/tests/core/runtime/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,20 @@ def test_validate_rows_empty_list() -> None:
validate_rows([], {"ticker": SchemaType.STR})


@pytest.mark.parametrize("data", [None, {}])
def test_validate_rows_rejects_non_list_output(data: object) -> None:
"""Builder output must be a list of row dictionaries."""
with pytest.raises(ValidationError, match="expected a list of rows"):
validate_rows(data, {"ticker": SchemaType.STR})
Comment on lines +85 to +89


@pytest.mark.parametrize("row", [None, "bad", 123])
def test_validate_rows_rejects_non_dict_rows(row: object) -> None:
"""Each builder output row must be a dictionary."""
with pytest.raises(ValidationError, match="row 0 expected a dict"):
validate_rows([row], {"ticker": SchemaType.STR})


def test_validate_rows_invalid_item_raises() -> None:
"""Invalid item in the list raises ValidationError."""
with pytest.raises(ValidationError, match="Missing key 'price'"):
Expand Down
1 change: 1 addition & 0 deletions dev-docs/SPEC-backend.md
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ Builders are stateless Python scripts. To each dataset there is a builder script

The `[schema]` section in `config.toml` is used for runtime validation:
- After a builder returns its output list, the builder server validates each dict in the list against the schema before inserting into the DB.
- Builder output must be a list of row dictionaries; non-list output fails validation before row-level schema checks.
- Validation checks that all declared keys are present and that values match the declared types.
- Validation correctness is the priority over performance.
- The builder script for dataset `(dataset_name, dataset_version)` is under `builders/scripts/dataset_name/dataset_version/builder.py`. The config is stored under `builders/scripts/dataset_name/dataset_version/config.toml`.
Expand Down