From d731859d304499547f873515096caedad08105f6 Mon Sep 17 00:00:00 2001 From: Scr4tch587 Date: Mon, 22 Jun 2026 19:03:34 -0400 Subject: [PATCH] feat: add dry-run flag to build_dataset Co-Authored-By: Claude Opus 4.8 --- builders/server/core/service/builder.py | 13 +++++-- .../server/tests/core/service/test_builder.py | 36 ++++++++++++++++--- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/builders/server/core/service/builder.py b/builders/server/core/service/builder.py index 4ab4a1a..69e0b6b 100644 --- a/builders/server/core/service/builder.py +++ b/builders/server/core/service/builder.py @@ -6,6 +6,7 @@ import core.db.datasets from core.runtime import registry from core.service.orchestrator import run_build +from core.service.store import MemoryStore, PostgresStore from core.service.timestamps import NoValidTimestampsError, generate_timestamps from core.utils.semver import SemVer @@ -35,9 +36,17 @@ def build_dataset( dataset_version: SemVer, start: datetime, end: datetime, -) -> None: + *, + dry_run: bool = False, +) -> dict[datetime, list[dict]] | None: """Public entrypoint for building a dataset and its dependencies.""" - run_build(dataset_name, dataset_version, start, end) + if not dry_run: + run_build(dataset_name, dataset_version, start, end, store=PostgresStore()) + return None + + store = MemoryStore() + run_build(dataset_name, dataset_version, start, end, store=store) + return store.get_rows_range(dataset_name, dataset_version, start, end) def get_data( diff --git a/builders/server/tests/core/service/test_builder.py b/builders/server/tests/core/service/test_builder.py index e955808..2d986b9 100644 --- a/builders/server/tests/core/service/test_builder.py +++ b/builders/server/tests/core/service/test_builder.py @@ -129,12 +129,38 @@ def test_generate_timestamps_start_on_closed_day_no_valid_range_returns_empty() @patch("core.service.builder.run_build") def test_build_dataset_delegates_to_orchestrator(mock_run_build: MagicMock) -> None: - """build_dataset delegates to run_build with the same args.""" - build_dataset("ds", V010, datetime(2024, 1, 1), datetime(2024, 1, 5)) + """build_dataset delegates to run_build with a PostgresStore for real builds.""" + from core.service.store import PostgresStore - mock_run_build.assert_called_once_with( - "ds", V010, datetime(2024, 1, 1), datetime(2024, 1, 5) - ) + result = build_dataset("ds", V010, datetime(2024, 1, 1), datetime(2024, 1, 5)) + + assert result is None + mock_run_build.assert_called_once() + args, kwargs = mock_run_build.call_args + assert args == ("ds", V010, datetime(2024, 1, 1), datetime(2024, 1, 5)) + assert isinstance(kwargs["store"], PostgresStore) + + +@patch("core.service.builder.run_build") +def test_build_dataset_dry_run_uses_memory_store_and_returns_rows( + mock_run_build: MagicMock, +) -> None: + """dry_run build uses a MemoryStore and returns the produced rows.""" + from core.service.store import MemoryStore + + ts = datetime(2024, 1, 1) + + # simulate the worker writing into the injected store during the build + def fake_run_build(name, version, start, end, store): + store.insert_rows(name, version, [(ts, [{"v": 1}])]) + + mock_run_build.side_effect = fake_run_build + + result = build_dataset("ds", V010, ts, ts, dry_run=True) + + assert result == {ts: [{"v": 1}]} + store = mock_run_build.call_args.kwargs["store"] + assert isinstance(store, MemoryStore) @patch("core.service.builder.run_build")