diff --git a/builders/server/core/runtime/validator.py b/builders/server/core/runtime/validator.py index b59ce5d..6837f87 100644 --- a/builders/server/core/runtime/validator.py +++ b/builders/server/core/runtime/validator.py @@ -23,7 +23,17 @@ def validate(data: dict, schema: dict[str, SchemaType]) -> None: ) -def validate_rows(data_list: list[dict], schema: dict[str, SchemaType]) -> None: +def validate_rows(data_list: object, schema: dict[str, SchemaType]) -> None: """Validate each dict in a list against the declared schema.""" - for data in data_list: + if not isinstance(data_list, list): + raise ValidationError( + f"Builder output expected a list of rows, got '{type(data_list).__name__}'" + ) + + for index, data in enumerate(data_list): + if not isinstance(data, dict): + raise ValidationError( + f"Builder output row {index} expected a dict, " + f"got '{type(data).__name__}'" + ) validate(data, schema) diff --git a/builders/server/tests/core/runtime/test_validator.py b/builders/server/tests/core/runtime/test_validator.py index d8ee66c..c85fae8 100644 --- a/builders/server/tests/core/runtime/test_validator.py +++ b/builders/server/tests/core/runtime/test_validator.py @@ -82,6 +82,20 @@ def test_validate_rows_empty_list() -> None: validate_rows([], {"ticker": SchemaType.STR}) +@pytest.mark.parametrize("data", [None, {}]) +def test_validate_rows_rejects_non_list_output(data: object) -> None: + """Builder output must be a list of row dictionaries.""" + with pytest.raises(ValidationError, match="expected a list of rows"): + validate_rows(data, {"ticker": SchemaType.STR}) + + +@pytest.mark.parametrize("row", [None, "bad", 123]) +def test_validate_rows_rejects_non_dict_rows(row: object) -> None: + """Each builder output row must be a dictionary.""" + with pytest.raises(ValidationError, match="row 0 expected a dict"): + validate_rows([row], {"ticker": SchemaType.STR}) + + def test_validate_rows_invalid_item_raises() -> None: """Invalid item in the list raises ValidationError.""" with pytest.raises(ValidationError, match="Missing key 'price'"): diff --git a/dev-docs/SPEC-backend.md b/dev-docs/SPEC-backend.md index f9afdb0..eda4d82 100644 --- a/dev-docs/SPEC-backend.md +++ b/dev-docs/SPEC-backend.md @@ -421,6 +421,7 @@ Builders are stateless Python scripts. To each dataset there is a builder script The `[schema]` section in `config.toml` is used for runtime validation: - After a builder returns its output list, the builder server validates each dict in the list against the schema before inserting into the DB. +- Builder output must be a list of row dictionaries; non-list output fails validation before row-level schema checks. - Validation checks that all declared keys are present and that values match the declared types. - Validation correctness is the priority over performance. - The builder script for dataset `(dataset_name, dataset_version)` is under `builders/scripts/dataset_name/dataset_version/builder.py`. The config is stored under `builders/scripts/dataset_name/dataset_version/config.toml`.