diff --git a/builders/server/core/runtime/validator.py b/builders/server/core/runtime/validator.py index b59ce5d..cb1fc8f 100644 --- a/builders/server/core/runtime/validator.py +++ b/builders/server/core/runtime/validator.py @@ -8,9 +8,13 @@ class ValidationError(Exception): def validate(data: dict, schema: dict[str, SchemaType]) -> None: """Validate that data matches the declared schema. - Checks that all declared keys are present and values match declared types. - Raises ValidationError on failure. + Checks that all keys in data are in the schema, all schema keys are present in data, + and values match declared types. Raises ValidationError on failure. """ + for key in data: + if key not in schema: + raise ValidationError(f"Unexpected key '{key}' in builder output") + for key, schema_type in schema.items(): if key not in data: raise ValidationError(f"Missing key '{key}' in builder output") diff --git a/builders/server/tests/core/runtime/test_validator.py b/builders/server/tests/core/runtime/test_validator.py index d8ee66c..6b772f3 100644 --- a/builders/server/tests/core/runtime/test_validator.py +++ b/builders/server/tests/core/runtime/test_validator.py @@ -11,14 +11,10 @@ def test_valid_data_passes() -> None: ) -def test_empty_schema_passes_anything() -> None: - """Empty schema means no constraints.""" - validate({"anything": 123, "goes": "here"}, {}) - - -def test_extra_keys_allowed() -> None: - """Data with extra keys beyond schema passes.""" - validate({"ticker": "AAPL", "extra": 999}, {"ticker": SchemaType.STR}) +def test_extra_keys_raises() -> None: + """Data with extra keys beyond schema fails.""" + with pytest.raises(ValidationError, match="Unexpected key 'extra'"): + validate({"ticker": "AAPL", "extra": 999}, {"ticker": SchemaType.STR}) def test_missing_key_raises() -> None: diff --git a/builders/server/tests/core/service/test_worker.py b/builders/server/tests/core/service/test_worker.py index 5b986a7..122e2b2 100644 --- a/builders/server/tests/core/service/test_worker.py +++ b/builders/server/tests/core/service/test_worker.py @@ -82,7 +82,9 @@ def test_missing_timestamps_built_and_inserted( @patch("core.service.worker.registry") def test_builder_failure_no_partial_insert(mock_registry, mock_db, mock_runner) -> None: """If builder fails on timestamp 3 of 5, no rows are inserted.""" - mock_registry.get_config.return_value = _cfg(name="ds") + mock_registry.get_config.return_value = _cfg( + name="ds", schema={"val": SchemaType.INT} + ) mock_db.get_existing_timestamps.return_value = [] # all missing call_count = 0 @@ -112,7 +114,9 @@ def fail_on_third(*args, **kwargs): @patch("core.service.worker.registry") def test_cancelled_event_stops_early(mock_registry, mock_db, mock_runner) -> None: """When cancelled is set, worker stops before building remaining timestamps.""" - mock_registry.get_config.return_value = _cfg(name="ds") + mock_registry.get_config.return_value = _cfg( + name="ds", schema={"val": SchemaType.INT} + ) mock_db.get_existing_timestamps.return_value = [] mock_runner.run_builder.return_value = [{"val": 1}] @@ -146,6 +150,7 @@ def test_lookback_dep_uses_get_rows_range(mock_registry, mock_db, mock_runner) - """Dependency with lookback fetches data via get_rows_range.""" mock_registry.get_config.return_value = _cfg( name="ds", + schema={"val": SchemaType.INT}, dependencies={ "dep": DependencyInfo(version=V010, lookback_subtract=timedelta(days=4)), }, @@ -174,6 +179,7 @@ def test_no_lookback_dep_uses_get_rows_timestamps( """Dependency without lookback fetches data via get_rows_timestamps.""" mock_registry.get_config.return_value = _cfg( name="ds", + schema={"val": SchemaType.INT}, dependencies={ "dep": DependencyInfo(version=V010), }, diff --git a/builders/server/tests/integration/test_errors.py b/builders/server/tests/integration/test_errors.py index 83d38fd..b0d15d6 100644 --- a/builders/server/tests/integration/test_errors.py +++ b/builders/server/tests/integration/test_errors.py @@ -140,3 +140,34 @@ def build(dependencies, timestamp: datetime) -> list[dict]: ) assert resp.status_code == 500 assert _row_count(db_conn, name) == 0 + + +def test_schema_unexpected_key(client, db_conn, write_temp_builder): + """builder returns key not in the schema -> 500, 0 rows.""" + name, version = write_temp_builder( + "unexpected-key", + "0.1.0", + """\ +name = "unexpected-key" +version = "0.1.0" +builder = "builder.py" +calendar = "everyday" +granularity = "1d" +start-date = "2020-01-01" + +[schema] +value = "int" +""", + """\ +from datetime import datetime + +def build(dependencies, timestamp: datetime) -> list[dict]: + return [{"value": 1, "unexpected_key": "value"}] +""", + ) + resp = client.post( + f"/api/v1/build/{name}/{version}", + params={"start": "2024-01-02", "end": "2024-01-02"}, + ) + assert resp.status_code == 500 + assert _row_count(db_conn, name) == 0