Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ All notable changes to vouch are documented here. Format follows

## [Unreleased]

### Fixed
- `sync_apply` now loads the sync source exactly once and passes the same `_SyncSource` instance into `sync_check`, closing a TOCTOU window where a bundle replaced on disk between the two `_load_source` calls could cause the validation and write phases to operate on different snapshots. Also eliminates redundant directory walks (KB sources) and triple tarball opens (bundle sources). Fixes #217.

### Fixed
- `vouch serve` now fails fast with a clear `vouch init` hint when no `.vouch/` KB is present, instead of starting a server that immediately misbehaves (#95).

Expand Down
13 changes: 11 additions & 2 deletions src/vouch/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,16 @@ def _validation_issues_for_source(

def sync_check(kb_dir: Path, source_path: Path) -> SyncCheckResult:
"""Compare another KB or bundle with ``kb_dir`` without writing."""
src = _load_source(source_path)
return _sync_check_with_src(kb_dir, _load_source(source_path))


def _sync_check_with_src(kb_dir: Path, src: _SyncSource) -> SyncCheckResult:
"""Core sync check logic operating on an already-loaded _SyncSource.

Separated from the public `sync_check` so `sync_apply` can pass its
already-loaded source directly without reloading, closing the TOCTOU
window described in #217.
"""
issues = _validation_issues_for_source(src, kb_dir=kb_dir)
new_files: list[str] = []
identical: list[str] = []
Expand Down Expand Up @@ -295,7 +304,7 @@ def sync_apply(
raise ValueError(f"on_conflict must be fail|skip|propose, got {on_conflict}")

src = _load_source(source_path)
check = sync_check(kb_dir, source_path)
check = _sync_check_with_src(kb_dir, src)
if check.issues:
raise RuntimeError(f"refusing to sync: {check.issues[0]}")
if on_conflict == "fail" and check.conflicts:
Expand Down
80 changes: 80 additions & 0 deletions tests/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,83 @@ def test_sync_rejects_source_content_address_mismatch(tmp_path: Path) -> None:
with pytest.raises(RuntimeError, match="content-address mismatch"):
sync.sync_apply(dest.kb_dir, incoming.root)
assert not (dest.kb_dir / "sources" / dir_id / "content").exists()


# --- TOCTOU / double-load fix (#217) -------------------------------------
#
# sync_apply must load the source exactly once and pass the same
# _SyncSource instance to sync_check so the validated snapshot and the
# write loop are guaranteed to operate on the same data. Before the fix,
# sync_apply called _load_source() independently and then sync_check
# called it again internally, opening a TOCTOU window for bundles and
# causing redundant directory walks for KB sources.


def test_sync_apply_loads_source_exactly_once_for_kb(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""sync_apply must not reload the KB directory source after sync_check."""
incoming = _store(tmp_path / "incoming")
_claim(incoming, "c1", "alpha")
dest = _store(tmp_path / "dest")

load_calls: list[Path] = []
original_load = sync._load_source

def counting_load(path: Path) -> sync._SyncSource:
load_calls.append(path)
return original_load(path)

monkeypatch.setattr(sync, "_load_source", counting_load)

sync.sync_apply(dest.kb_dir, incoming.root, actor="tester")

assert len(load_calls) == 1, (
f"_load_source called {len(load_calls)} times; expected 1. "
"sync_apply must pass its already-loaded _SyncSource into sync_check."
)
assert dest.get_claim("c1").text == "alpha"


def test_sync_apply_loads_bundle_source_exactly_once(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""For bundle sources the tarball must be opened exactly once by
_load_source during sync_apply — not once per _load_source call."""
incoming = _store(tmp_path / "incoming")
_claim(incoming, "c1", "beta")
bundle_path = tmp_path / "incoming.tar.gz"
bundle.export(incoming.kb_dir, dest=bundle_path)
dest = _store(tmp_path / "dest")

load_calls: list[Path] = []
original_load = sync._load_source

def counting_load(path: Path) -> sync._SyncSource:
load_calls.append(path)
return original_load(path)

monkeypatch.setattr(sync, "_load_source", counting_load)

result = sync.sync_apply(dest.kb_dir, bundle_path, actor="tester")

assert len(load_calls) == 1, (
f"_load_source called {len(load_calls)} times for bundle; expected 1."
)
assert "claims/c1.yaml" in result["written"]
assert dest.get_claim("c1").text == "beta"


def test_sync_check_with_src_uses_preloaded_source(tmp_path: Path) -> None:
"""_sync_check_with_src must use the supplied _SyncSource directly
without calling _load_source again."""
incoming = _store(tmp_path / "incoming")
_claim(incoming, "c1", "gamma")
dest = _store(tmp_path / "dest")

preloaded = sync._load_source(incoming.root)
report = sync._sync_check_with_src(dest.kb_dir, preloaded)

assert report.ok
assert "claims/c1.yaml" in report.new_files

Loading