diff --git a/CHANGELOG.md b/CHANGELOG.md index 63c3d93..70dd9e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ All notable changes to vouch are documented here. Format follows ## [Unreleased] +### Fixed +- `sync_apply` now loads the sync source exactly once and passes the same `_SyncSource` instance into `sync_check`, closing a TOCTOU window where a bundle replaced on disk between the two `_load_source` calls could cause the validation and write phases to operate on different snapshots. Also eliminates redundant directory walks (KB sources) and triple tarball opens (bundle sources). Fixes #217. + ### Fixed - `vouch serve` now fails fast with a clear `vouch init` hint when no `.vouch/` KB is present, instead of starting a server that immediately misbehaves (#95). diff --git a/src/vouch/sync.py b/src/vouch/sync.py index 350fc87..de9abdc 100644 --- a/src/vouch/sync.py +++ b/src/vouch/sync.py @@ -222,7 +222,16 @@ def _validation_issues_for_source( def sync_check(kb_dir: Path, source_path: Path) -> SyncCheckResult: """Compare another KB or bundle with ``kb_dir`` without writing.""" - src = _load_source(source_path) + return _sync_check_with_src(kb_dir, _load_source(source_path)) + + +def _sync_check_with_src(kb_dir: Path, src: _SyncSource) -> SyncCheckResult: + """Core sync check logic operating on an already-loaded _SyncSource. + + Separated from the public `sync_check` so `sync_apply` can pass its + already-loaded source directly without reloading, closing the TOCTOU + window described in #217. + """ issues = _validation_issues_for_source(src, kb_dir=kb_dir) new_files: list[str] = [] identical: list[str] = [] @@ -295,7 +304,7 @@ def sync_apply( raise ValueError(f"on_conflict must be fail|skip|propose, got {on_conflict}") src = _load_source(source_path) - check = sync_check(kb_dir, source_path) + check = _sync_check_with_src(kb_dir, src) if check.issues: raise RuntimeError(f"refusing to sync: {check.issues[0]}") if on_conflict == "fail" and check.conflicts: diff --git a/tests/test_sync.py b/tests/test_sync.py index 8670683..beac804 100644 --- a/tests/test_sync.py +++ b/tests/test_sync.py @@ -205,3 +205,83 @@ def test_sync_rejects_source_content_address_mismatch(tmp_path: Path) -> None: with pytest.raises(RuntimeError, match="content-address mismatch"): sync.sync_apply(dest.kb_dir, incoming.root) assert not (dest.kb_dir / "sources" / dir_id / "content").exists() + + +# --- TOCTOU / double-load fix (#217) ------------------------------------- +# +# sync_apply must load the source exactly once and pass the same +# _SyncSource instance to sync_check so the validated snapshot and the +# write loop are guaranteed to operate on the same data. Before the fix, +# sync_apply called _load_source() independently and then sync_check +# called it again internally, opening a TOCTOU window for bundles and +# causing redundant directory walks for KB sources. + + +def test_sync_apply_loads_source_exactly_once_for_kb( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """sync_apply must not reload the KB directory source after sync_check.""" + incoming = _store(tmp_path / "incoming") + _claim(incoming, "c1", "alpha") + dest = _store(tmp_path / "dest") + + load_calls: list[Path] = [] + original_load = sync._load_source + + def counting_load(path: Path) -> sync._SyncSource: + load_calls.append(path) + return original_load(path) + + monkeypatch.setattr(sync, "_load_source", counting_load) + + sync.sync_apply(dest.kb_dir, incoming.root, actor="tester") + + assert len(load_calls) == 1, ( + f"_load_source called {len(load_calls)} times; expected 1. " + "sync_apply must pass its already-loaded _SyncSource into sync_check." + ) + assert dest.get_claim("c1").text == "alpha" + + +def test_sync_apply_loads_bundle_source_exactly_once( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """For bundle sources the tarball must be opened exactly once by + _load_source during sync_apply — not once per _load_source call.""" + incoming = _store(tmp_path / "incoming") + _claim(incoming, "c1", "beta") + bundle_path = tmp_path / "incoming.tar.gz" + bundle.export(incoming.kb_dir, dest=bundle_path) + dest = _store(tmp_path / "dest") + + load_calls: list[Path] = [] + original_load = sync._load_source + + def counting_load(path: Path) -> sync._SyncSource: + load_calls.append(path) + return original_load(path) + + monkeypatch.setattr(sync, "_load_source", counting_load) + + result = sync.sync_apply(dest.kb_dir, bundle_path, actor="tester") + + assert len(load_calls) == 1, ( + f"_load_source called {len(load_calls)} times for bundle; expected 1." + ) + assert "claims/c1.yaml" in result["written"] + assert dest.get_claim("c1").text == "beta" + + +def test_sync_check_with_src_uses_preloaded_source(tmp_path: Path) -> None: + """_sync_check_with_src must use the supplied _SyncSource directly + without calling _load_source again.""" + incoming = _store(tmp_path / "incoming") + _claim(incoming, "c1", "gamma") + dest = _store(tmp_path / "dest") + + preloaded = sync._load_source(incoming.root) + report = sync._sync_check_with_src(dest.kb_dir, preloaded) + + assert report.ok + assert "claims/c1.yaml" in report.new_files +