From 2543c9dd852b06f4f39bf15ba5ce59aac437bac3 Mon Sep 17 00:00:00 2001 From: Juan Sugg Date: Fri, 27 Mar 2026 12:56:17 -0300 Subject: [PATCH] test: harden suite contracts and runtime coverage --- .github/workflows/ci.yml | 2 + .local/issues.md | 118 ++++ CONTRIBUTING.md | 16 +- docs/ci/hardware-validation.md | 9 +- .../test_pytest_suite_bootstrap.py | 125 +++++ .../backends/test_faster_whisper_adapter.py | 525 ++++++++++++++++++ tests/suites/unit/utils/test_common_utils.py | 27 + 7 files changed, 817 insertions(+), 5 deletions(-) create mode 100644 .local/issues.md create mode 100644 tests/suites/integration/architecture/test_pytest_suite_bootstrap.py create mode 100644 tests/suites/unit/transcription/backends/test_faster_whisper_adapter.py create mode 100644 tests/suites/unit/utils/test_common_utils.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3f0a4f34..318542fb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,6 +4,8 @@ on: push: branches: [main] pull_request: + schedule: + - cron: "0 6 * * 1" concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} diff --git a/.local/issues.md b/.local/issues.md new file mode 100644 index 00000000..e2281850 --- /dev/null +++ b/.local/issues.md @@ -0,0 +1,118 @@ +# Audit Hardening Plan + +Last updated: 2026-03-27 +Branch: `audit-hardening-20260327` +Base: `main` at `95c7b4d7ca94993bcb0109de57fe81cedc8d711c` + +## Scope + +This document turns the validated audit findings into an executable implementation plan for the current repository state. + +Out of scope for this task: +- PyPI/package-name distribution changes. +- Release-gate restructuring for GPU/full-dataset publication blocking. +- Architecture Decision Record authoring beyond incidental doc refreshes. + +## Current Source Facts + +- The repository already uses `tests/suites/{unit,integration,smoke}` with path-derived structural markers in `tests/conftest.py`. +- `CONTRIBUTING.md` still references pre-relocation root test paths for the API boundary lane. +- `.github/workflows/ci.yml` has no scheduled trigger. +- `ser/transcript/backends/faster_whisper.py` still owns real runtime and I/O branches that need stronger unit coverage. +- `ser/utils/common_utils.py` remains a tiny but uncovered utility dependency used by timeline rendering. + +## Objectives + +### 1. Weekly dependency-regression CI + +Add a weekly scheduled run to `.github/workflows/ci.yml` so upstream dependency or resolver regressions surface even without active pull requests. + +Acceptance criteria: +- `ci.yml` includes a weekly `schedule` trigger. +- Existing `changes` classification keeps scheduled runs on the full pipeline. +- Contributor docs mention the scheduled lane accurately. + +### 2. Proper pytest marker governance + +Strengthen test-marker discipline without relying on blanket marker edits. The repository already derives structural markers (`unit`, `integration`, `smoke`, `topology_contract`) from suite paths; the missing piece is contract enforcement around that policy and around non-structural marker ownership. + +Implementation direction: +- Add architecture/contract tests inspired by `strongclaw/tests` to keep `tests/conftest.py` lean and structural. +- Assert that special-purpose markers such as `process_isolation` remain explicitly owned by the modules that require them. +- Keep structural suite markers path-derived, not hand-copied into every module. + +Acceptance criteria: +- New contract tests fail if root bootstrap starts assigning non-structural markers. +- New contract tests fail if special-purpose markers move out of explicit module ownership. +- Test layout remains under `tests/suites/...` with domain-oriented placement. + +### 3. Coverage uplift on runtime hotspots + +Increase confidence in the least-tested real runtime code by adding focused unit tests for `FasterWhisperAdapter` and `display_elapsed_time`. + +Implementation direction: +- Add a dedicated backend-focused test module under `tests/suites/unit/transcription/backends/`. +- Cover `setup_required`, `prepare_assets`, `load_model`, `transcribe`, and `_is_module_available` branches using fake modules and fake model objects. +- Add utility tests for `display_elapsed_time` under `tests/suites/unit/utils/`. + +Acceptance criteria: +- New tests exercise successful and failure/edge branches for the faster-whisper adapter. +- Utility formatting behavior is covered for both long and short output styles. +- Overall branch coverage stays above the configured threshold with additional headroom. + +### 4. Documentation refresh + +Update contributor-facing docs to match the current suite tree, marker model, and CI behavior. + +Acceptance criteria: +- `CONTRIBUTING.md` references `tests/suites/...` paths, not removed flat-root paths. +- CI topology text reflects scheduled CI and current suite/contract responsibilities. +- Any changed workflow/test semantics are documented where contributors would look first. + +## Execution Checklist + +- [x] Add weekly scheduled CI trigger and any supporting doc updates. +- [x] Add pytest bootstrap/marker contract tests under `tests/suites/integration/architecture/`. +- [x] Add dedicated faster-whisper adapter tests under `tests/suites/unit/transcription/backends/`. +- [x] Add `common_utils` coverage tests under `tests/suites/unit/utils/`. +- [x] Update `CONTRIBUTING.md` and nearby CI docs affected by the implementation. +- [x] Run focused validation while iterating. +- [x] Run full repo validation and compare outcomes against this document. +- [ ] Commit, push, open PR, merge once green, monitor workflows/branch freshness, then clean up worktree and local branches. + +## Validation Plan + +Focused checks during implementation: +- `uv run --extra dev pytest -q tests/suites/integration/architecture` +- `uv run --extra dev pytest -q tests/suites/unit/transcription/backends tests/suites/unit/utils/test_common_utils.py` +- `uv run --extra dev pytest --cov=ser.transcript.backends.faster_whisper --cov=ser.utils.common_utils --cov-branch --cov-report term-missing tests/suites/unit/transcription/backends tests/suites/unit/utils/test_common_utils.py` + +Final required checks: +- `make lint` +- `make type` +- `make import-lint` +- `make test-cov` + +## Validation Results + +Focused validation completed: +- `uv run --extra dev pytest -q tests/suites/integration/architecture/test_pytest_suite_bootstrap.py tests/suites/unit/transcription/backends/test_faster_whisper_adapter.py tests/suites/unit/utils/test_common_utils.py` -> `24 passed` +- `uv run --extra dev pytest --cov=ser.transcript.backends.faster_whisper --cov=ser.utils.common_utils --cov-branch --cov-report term-missing tests/suites/unit/transcription/backends/test_faster_whisper_adapter.py tests/suites/unit/utils/test_common_utils.py` -> targeted coverage `80.81%`; `ser/transcript/backends/faster_whisper.py` reached `80.00%` + +Final validation completed: +- `make lint` -> pass +- `make type` -> pass (`mypy` clean, `pyright` 0 errors / 0 warnings / 0 informations) +- `make import-lint` -> pass +- `make test-cov` -> pass with `986 passed` and total branch coverage `80.08%` + +Outcome versus objectives: +- Weekly scheduled CI trigger added. +- Marker governance now has contract coverage around root bootstrap responsibilities and explicit special-marker ownership. +- Faster-whisper hotspot coverage materially improved and `common_utils` now has direct tests. +- Contributor docs now match the suite tree and current CI behavior. + +## Notes + +- Work only from the dedicated worktree at `/Users/juanpedrosugg/dev/github/ser-audit-hardening-20260327`. +- Keep branch/commit/PR naming neutral and project-scoped. +- If upstream `origin/main` changes during the task, re-check freshness before pushing and again before merging. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 97aeae2a..887c476e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -58,11 +58,16 @@ Run the boundary lane whenever your change touches `ser/api.py`, `ser/_internal/ ```bash make import-lint -uv run pytest -q tests/test_import_lint_policy.py tests/test_api_import_boundary.py tests/test_api.py tests/test_cli.py +uv run pytest -q \ + tests/suites/integration/architecture/test_import_lint_policy.py \ + tests/suites/integration/architecture/test_api_import_boundary.py \ + tests/suites/integration/api/test_api.py \ + tests/suites/integration/cli/test_cli.py ``` ## CI Topology Default CI is defined in `.github/workflows/ci.yml`. +It runs on pull requests, pushes to `main`, and a weekly scheduled sweep every Monday at `06:00 UTC`. Quality and validation lanes: - `changes`: classifies pull requests so docs-only PRs can skip heavy jobs while `push` to `main` still runs the full pipeline. @@ -72,8 +77,15 @@ Quality and validation lanes: - `contract-gates`: deterministic contract lane on Python 3.12 (API boundary import-lint gate + transcription benchmark contract test). - `build`: package build + metadata/wheel smoke checks. +## Test Suite Layout +- `tests/suites/unit`: narrow owner or helper behavior. +- `tests/suites/integration`: multi-module orchestration, boundary, or workflow coverage. +- `tests/suites/smoke`: cheap user-path workflow checks. +- Structural markers (`unit`, `integration`, `smoke`, `topology_contract`) come from suite placement in `tests/conftest.py`. +- Non-structural markers must stay explicitly declared by the modules that need them. + ## Hardware Validation -Hardware-specific workflows are manual (`workflow_dispatch`): +Hardware-specific workflows are manual by default (`workflow_dispatch`) and are also reusable by release orchestration through `workflow_call`: - [docs/ci/hardware-validation.md](docs/ci/hardware-validation.md) Policy note: diff --git a/docs/ci/hardware-validation.md b/docs/ci/hardware-validation.md index 5982b2ac..789f595e 100644 --- a/docs/ci/hardware-validation.md +++ b/docs/ci/hardware-validation.md @@ -1,11 +1,13 @@ # Hardware Validation Workflows -Manual hardware validation is intentionally separated from default CI. +Hardware validation is intentionally separated from default CI. +These workflows support manual execution via `workflow_dispatch` and release orchestration via +`workflow_call`. ## MPS (GitHub-hosted) - Workflow: `.github/workflows/macos15-mps-validation.yml` -- Trigger: `workflow_dispatch` +- Triggers: `workflow_dispatch`, `workflow_call` - Runner: `macos-15` - Requirement: an Apple Silicon macOS 15 runner with MPS available. - The workflow fails fast if `torch.backends.mps.is_available()` or @@ -28,7 +30,7 @@ gh workflow run .github/workflows/macos15-mps-validation.yml \ ## CUDA and XPU (Self-hosted) - Workflow: `.github/workflows/linux-selfhosted-gpu-validation.yml` -- Trigger: `workflow_dispatch` +- Triggers: `workflow_dispatch`, `workflow_call` - Runner: self-hosted Linux runners selected via JSON label inputs. - Default CUDA labels: `["self-hosted","linux","x64","cuda"]` - Default XPU labels: `["self-hosted","linux","x64","xpu"]` @@ -75,4 +77,5 @@ gh workflow run .github/workflows/linux-selfhosted-gpu-validation.yml \ - Project support policy still includes local macOS13 validation targets: - `darwin-x86_64-macos13-python3.12` (full profile support) - `darwin-x86_64-macos13-python3.13` (partial, fast profile only) +- Default CI still does not call these workflows automatically. - Keep hardware lanes manual unless runtime/cost constraints change. diff --git a/tests/suites/integration/architecture/test_pytest_suite_bootstrap.py b/tests/suites/integration/architecture/test_pytest_suite_bootstrap.py new file mode 100644 index 00000000..dadef855 --- /dev/null +++ b/tests/suites/integration/architecture/test_pytest_suite_bootstrap.py @@ -0,0 +1,125 @@ +"""Contracts for pytest suite bootstrap responsibilities and marker ownership.""" + +from __future__ import annotations + +import ast +from pathlib import Path + +import pytest + +pytestmark = pytest.mark.topology_contract + +_ROOT_CONFTEST = Path("tests/conftest.py") +_ALLOWED_ROOT_MARKERS = frozenset( + { + "integration", + "smoke", + "topology_contract", + "unit", + "usefixtures", + } +) +_IGNORED_MODULE_MARKERS = frozenset( + { + "filterwarnings", + "integration", + "smoke", + "topology_contract", + "unit", + "usefixtures", + } +) +_EXPECTED_SPECIAL_MARKERS = { + "tests/suites/integration/test_process_isolation.py": {"process_isolation"}, +} + + +def _extract_marker_names(node: ast.expr) -> set[str]: + """Extract marker names from a supported `pytestmark` expression.""" + if isinstance(node, ast.Call): + return _extract_marker_names(node.func) + if isinstance(node, ast.Attribute) and isinstance(node.value, ast.Attribute): + if isinstance(node.value.value, ast.Name) and node.value.value.id == "pytest": + if node.value.attr == "mark": + return {node.attr} + if isinstance(node, ast.List | ast.Tuple): + markers: set[str] = set() + for element in node.elts: + markers.update(_extract_marker_names(element)) + return markers + raise AssertionError(f"Unsupported pytest marker expression: {ast.dump(node)}") + + +def _module_marker_names(path: Path) -> set[str]: + """Return module-level pytest markers declared by one test module.""" + tree = ast.parse(path.read_text(encoding="utf-8"), filename=path.as_posix()) + for node in tree.body: + if not isinstance(node, ast.Assign): + continue + if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): + continue + if node.targets[0].id != "pytestmark": + continue + return _extract_marker_names(node.value) + return set() + + +def _root_assigned_marker_names(path: Path) -> set[str]: + """Return pytest marker names added dynamically by the root conftest.""" + tree = ast.parse(path.read_text(encoding="utf-8"), filename=path.as_posix()) + markers: set[str] = set() + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + if not isinstance(node.func, ast.Attribute) or node.func.attr != "add_marker": + continue + if len(node.args) != 1: + raise AssertionError("Root conftest marker injection must pass exactly one marker.") + markers.update(_extract_marker_names(node.args[0])) + return markers + + +def test_root_conftest_stays_lean(repo_root: Path) -> None: + """Root suite bootstrap should stay small and structural.""" + lines = (repo_root / _ROOT_CONFTEST).read_text(encoding="utf-8").splitlines() + assert len(lines) <= 80, f"tests/conftest.py grew to {len(lines)} lines (max 80)." + + +def test_root_conftest_registers_fixture_plugin_only(repo_root: Path) -> None: + """Root bootstrap should expose shared fixtures via pytest_plugins only.""" + tree = ast.parse((repo_root / _ROOT_CONFTEST).read_text(encoding="utf-8")) + plugin_values: tuple[str, ...] | None = None + for node in tree.body: + if not isinstance(node, ast.Assign): + continue + if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): + continue + if node.targets[0].id != "pytest_plugins": + continue + if not isinstance(node.value, ast.Tuple): + raise AssertionError("pytest_plugins must stay a tuple of plugin module strings.") + plugin_values = tuple( + element.value + for element in node.value.elts + if isinstance(element, ast.Constant) and isinstance(element.value, str) + ) + break + + assert plugin_values == ("tests.fixtures.settings",) + + +def test_root_conftest_only_assigns_structural_markers(repo_root: Path) -> None: + """Dynamic root marker injection should stay limited to suite semantics.""" + assigned = _root_assigned_marker_names(repo_root / _ROOT_CONFTEST) + assert assigned == _ALLOWED_ROOT_MARKERS + + +def test_special_markers_are_explicitly_owned_by_modules(repo_root: Path) -> None: + """Special-purpose pytest markers should stay declared by the owning module.""" + discovered: dict[str, set[str]] = {} + for module_path in sorted((repo_root / "tests" / "suites").rglob("test_*.py")): + marker_names = _module_marker_names(module_path) - _IGNORED_MODULE_MARKERS + if marker_names: + discovered[module_path.relative_to(repo_root).as_posix()] = marker_names + + assert discovered == _EXPECTED_SPECIAL_MARKERS diff --git a/tests/suites/unit/transcription/backends/test_faster_whisper_adapter.py b/tests/suites/unit/transcription/backends/test_faster_whisper_adapter.py new file mode 100644 index 00000000..fac87736 --- /dev/null +++ b/tests/suites/unit/transcription/backends/test_faster_whisper_adapter.py @@ -0,0 +1,525 @@ +"""Focused unit coverage for faster-whisper adapter runtime branches.""" + +from __future__ import annotations + +import sys +from dataclasses import dataclass +from pathlib import Path +from types import ModuleType, SimpleNamespace +from typing import cast + +import pytest + +from ser.config import AppConfig +from ser.domain import TranscriptWord +from ser.transcript.backends.base import BackendRuntimeRequest +from ser.transcript.backends.faster_whisper import FasterWhisperAdapter + +pytestmark = pytest.mark.unit + + +def _runtime_request( + *, + model_name: str = "distil-large-v3", + device_spec: str = "cpu", + device_type: str = "cpu", + precision_candidates: tuple[str, ...] = ("float32",), + use_demucs: bool = False, + use_vad: bool = True, +) -> BackendRuntimeRequest: + """Build one runtime request for adapter-focused tests.""" + return BackendRuntimeRequest( + model_name=model_name, + use_demucs=use_demucs, + use_vad=use_vad, + device_spec=device_spec, + device_type=device_type, + precision_candidates=precision_candidates, + ) + + +def _settings(download_root: Path) -> AppConfig: + """Build a minimal settings stub for download-root access.""" + return cast( + AppConfig, + SimpleNamespace(models=SimpleNamespace(whisper_download_root=download_root)), + ) + + +@dataclass(slots=True) +class _FspathValue: + """Simple os.PathLike stub for cache-probe responses.""" + + path: str + + def __fspath__(self) -> str: + return self.path + + +@dataclass(slots=True) +class _WordStub: + """Transcript word stub mirroring faster-whisper word attributes.""" + + word: str + start: float | None + end: float | None + + +@dataclass(slots=True) +class _SegmentStub: + """Transcript segment stub with `words` payload.""" + + words: object + + +def test_setup_required_returns_false_for_empty_model_name(tmp_path: Path) -> None: + """Blank model names should skip remote cache probing entirely.""" + adapter = FasterWhisperAdapter() + + assert ( + adapter.setup_required( + runtime_request=_runtime_request(model_name=" "), + settings=_settings(tmp_path), + ) + is False + ) + + +def test_setup_required_returns_false_for_existing_directory(tmp_path: Path) -> None: + """Local model directories should not request additional setup.""" + model_dir = tmp_path / "local-model" + model_dir.mkdir() + + adapter = FasterWhisperAdapter() + + assert ( + adapter.setup_required( + runtime_request=_runtime_request(model_name=str(model_dir)), + settings=_settings(tmp_path), + ) + is False + ) + + +def test_setup_required_returns_true_when_local_only_probe_raises( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + """A failed local-only cache probe should request asset preparation.""" + + def _download_model(_model_name: str, *, local_files_only: bool, cache_dir: str) -> str: + assert local_files_only is True + assert cache_dir == str(tmp_path) + raise RuntimeError("cache miss") + + monkeypatch.setattr( + "ser.transcript.backends.faster_whisper.importlib.import_module", + lambda name: ( + SimpleNamespace(download_model=_download_model) + if name == "faster_whisper.utils" + else (_ for _ in ()).throw(ModuleNotFoundError(name)) + ), + ) + + adapter = FasterWhisperAdapter() + + assert adapter.setup_required( + runtime_request=_runtime_request(), + settings=_settings(tmp_path), + ) + + +def test_setup_required_accepts_string_and_pathlike_directory_probes( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + """Existing cache directories reported as string or PathLike should be accepted.""" + cached_dir = tmp_path / "cached-model" + cached_dir.mkdir() + adapter = FasterWhisperAdapter() + + def _download_from_string( + _model_name: str, + *, + local_files_only: bool, + cache_dir: str, + ) -> str: + assert local_files_only is True + assert cache_dir == str(tmp_path) + return str(cached_dir) + + monkeypatch.setattr( + "ser.transcript.backends.faster_whisper.importlib.import_module", + lambda name: ( + SimpleNamespace(download_model=_download_from_string) + if name == "faster_whisper.utils" + else (_ for _ in ()).throw(ModuleNotFoundError(name)) + ), + ) + assert ( + adapter.setup_required( + runtime_request=_runtime_request(), + settings=_settings(tmp_path), + ) + is False + ) + + def _download_from_pathlike( + _model_name: str, + *, + local_files_only: bool, + cache_dir: str, + ) -> _FspathValue: + assert local_files_only is True + assert cache_dir == str(tmp_path) + return _FspathValue(str(cached_dir)) + + monkeypatch.setattr( + "ser.transcript.backends.faster_whisper.importlib.import_module", + lambda name: ( + SimpleNamespace(download_model=_download_from_pathlike) + if name == "faster_whisper.utils" + else (_ for _ in ()).throw(ModuleNotFoundError(name)) + ), + ) + assert ( + adapter.setup_required( + runtime_request=_runtime_request(), + settings=_settings(tmp_path), + ) + is False + ) + + +def test_prepare_assets_downloads_missing_model_into_cache( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + """Asset preparation should download into the configured whisper cache root.""" + captured: dict[str, object] = {} + + def _download_model(model_name: str, *, local_files_only: bool, cache_dir: str) -> str: + captured["model_name"] = model_name + captured["local_files_only"] = local_files_only + captured["cache_dir"] = cache_dir + return cache_dir + + monkeypatch.setattr( + "ser.transcript.backends.faster_whisper.importlib.import_module", + lambda name: ( + SimpleNamespace(download_model=_download_model) + if name == "faster_whisper.utils" + else (_ for _ in ()).throw(ModuleNotFoundError(name)) + ), + ) + + adapter = FasterWhisperAdapter() + adapter.prepare_assets( + runtime_request=_runtime_request(), + settings=_settings(tmp_path), + ) + + assert captured == { + "model_name": "distil-large-v3", + "local_files_only": False, + "cache_dir": str(tmp_path), + } + assert tmp_path.is_dir() + + +def test_prepare_assets_ignores_missing_dependency( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + """Missing faster-whisper utilities should become a no-op.""" + monkeypatch.setattr( + "ser.transcript.backends.faster_whisper.importlib.import_module", + lambda name: (_ for _ in ()).throw(ModuleNotFoundError(name)), + ) + + adapter = FasterWhisperAdapter() + adapter.prepare_assets( + runtime_request=_runtime_request(), + settings=_settings(tmp_path), + ) + + assert not any(tmp_path.iterdir()) + + +def test_load_model_raises_clear_error_when_dependency_is_missing( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + """Model loading should surface actionable dependency remediation.""" + monkeypatch.setattr( + "ser.transcript.backends.faster_whisper.importlib.import_module", + lambda name: (_ for _ in ()).throw(ModuleNotFoundError(name)), + ) + + adapter = FasterWhisperAdapter() + + with pytest.raises(RuntimeError, match="Missing faster-whisper dependencies"): + adapter.load_model( + runtime_request=_runtime_request(), + settings=_settings(tmp_path), + ) + + +def test_load_model_requires_whisper_model_symbol( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + """The adapter should reject packages that do not expose WhisperModel.""" + monkeypatch.setattr( + "ser.transcript.backends.faster_whisper.importlib.import_module", + lambda name: ( + SimpleNamespace() + if name == "faster_whisper" + else (_ for _ in ()).throw(ModuleNotFoundError(name)) + ), + ) + + adapter = FasterWhisperAdapter() + + with pytest.raises(RuntimeError, match="does not expose WhisperModel"): + adapter.load_model( + runtime_request=_runtime_request(), + settings=_settings(tmp_path), + ) + + +def test_load_model_uses_cpu_int8_for_mps_requests( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + """MPS requests should load faster-whisper on CPU with int8 compute.""" + captured: dict[str, object] = {} + + class _FakeWhisperModel: + def __init__( + self, + model_name: str, + *, + device: str, + compute_type: str, + download_root: str, + ) -> None: + captured["model_name"] = model_name + captured["device"] = device + captured["compute_type"] = compute_type + captured["download_root"] = download_root + + monkeypatch.setattr( + "ser.transcript.backends.faster_whisper.importlib.import_module", + lambda name: ( + SimpleNamespace(WhisperModel=_FakeWhisperModel) + if name == "faster_whisper" + else (_ for _ in ()).throw(ModuleNotFoundError(name)) + ), + ) + + adapter = FasterWhisperAdapter() + model = adapter.load_model( + runtime_request=_runtime_request( + device_spec="mps", + device_type="mps", + precision_candidates=("float16", "float32"), + ), + settings=_settings(tmp_path), + ) + + assert isinstance(model, _FakeWhisperModel) + assert captured == { + "model_name": "distil-large-v3", + "device": "cpu", + "compute_type": "int8", + "download_root": str(tmp_path), + } + + +def test_load_model_uses_cuda_float16_when_requested( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + """CUDA requests should preserve the resolved device and first precision candidate.""" + captured: dict[str, object] = {} + + class _FakeWhisperModel: + def __init__( + self, + model_name: str, + *, + device: str, + compute_type: str, + download_root: str, + ) -> None: + captured["model_name"] = model_name + captured["device"] = device + captured["compute_type"] = compute_type + captured["download_root"] = download_root + + monkeypatch.setattr( + "ser.transcript.backends.faster_whisper.importlib.import_module", + lambda name: ( + SimpleNamespace(WhisperModel=_FakeWhisperModel) + if name == "faster_whisper" + else (_ for _ in ()).throw(ModuleNotFoundError(name)) + ), + ) + + adapter = FasterWhisperAdapter() + adapter.load_model( + runtime_request=_runtime_request( + device_spec="cuda:0", + device_type="cuda", + precision_candidates=("float16", "float32"), + ), + settings=_settings(tmp_path), + ) + + assert captured["device"] == "cuda:0" + assert captured["compute_type"] == "float16" + + +def test_transcribe_requires_callable_model_entrypoint(tmp_path: Path) -> None: + """Loaded model objects must expose a callable transcribe method.""" + adapter = FasterWhisperAdapter() + + with pytest.raises(RuntimeError, match="does not expose a callable transcribe"): + adapter.transcribe( + model=object(), + runtime_request=_runtime_request(), + file_path="sample.wav", + language="en", + settings=_settings(tmp_path), + ) + + +def test_transcribe_wraps_backend_errors( + tmp_path: Path, +) -> None: + """Adapter should normalize backend transcription failures.""" + + class _FailingModel: + def transcribe(self, **_kwargs: object) -> tuple[object, object]: + raise ValueError("boom") + + adapter = FasterWhisperAdapter() + + with pytest.raises(RuntimeError, match="Failed to transcribe audio"): + adapter.transcribe( + model=_FailingModel(), + runtime_request=_runtime_request(), + file_path="sample.wav", + language="en", + settings=_settings(tmp_path), + ) + + +def test_transcribe_rejects_invalid_result_envelope(tmp_path: Path) -> None: + """Unexpected backend result shapes should fail fast.""" + + class _InvalidModel: + def transcribe(self, **_kwargs: object) -> object: + return ["not", "a", "tuple"] + + adapter = FasterWhisperAdapter() + + with pytest.raises(RuntimeError, match="Unexpected result envelope"): + adapter.transcribe( + model=_InvalidModel(), + runtime_request=_runtime_request(), + file_path="sample.wav", + language="en", + settings=_settings(tmp_path), + ) + + +def test_transcribe_rejects_non_iterable_segments(tmp_path: Path) -> None: + """Segments payloads must be iterable for transcript formatting.""" + + class _InvalidModel: + def transcribe(self, **_kwargs: object) -> tuple[object, object]: + return (1, {"language": "en"}) + + adapter = FasterWhisperAdapter() + + with pytest.raises(RuntimeError, match="Unexpected segment stream type"): + adapter.transcribe( + model=_InvalidModel(), + runtime_request=_runtime_request(), + file_path="sample.wav", + language="en", + settings=_settings(tmp_path), + ) + + +def test_transcribe_formats_word_timestamps_and_logs_demucs_warning( + caplog: pytest.LogCaptureFixture, + tmp_path: Path, +) -> None: + """Transcription should format valid words and skip incomplete segment entries.""" + + class _FakeModel: + def transcribe(self, **kwargs: object) -> tuple[object, object]: + assert kwargs == { + "audio": "sample.wav", + "language": "en", + "word_timestamps": True, + "vad_filter": True, + "beam_size": 5, + } + return ( + [ + _SegmentStub( + words=[ + _WordStub(word="hello", start=0.1, end=0.4), + _WordStub(word="skip-missing-start", start=None, end=0.5), + _WordStub(word="skip-missing-end", start=0.5, end=None), + ] + ), + _SegmentStub(words="not-a-sequence"), + _SegmentStub(words=(_WordStub(word="world", start=0.5, end=0.8),)), + ], + {"language": "en"}, + ) + + caplog.set_level("WARNING") + adapter = FasterWhisperAdapter() + + transcript = adapter.transcribe( + model=_FakeModel(), + runtime_request=_runtime_request(use_demucs=True), + file_path="sample.wav", + language="en", + settings=_settings(tmp_path), + ) + + assert transcript == [ + TranscriptWord(word="hello", start_seconds=0.1, end_seconds=0.4), + TranscriptWord(word="world", start_seconds=0.5, end_seconds=0.8), + ] + assert "demucs flag is ignored" in caplog.text + + +def test_is_module_available_handles_sys_modules_and_find_spec_errors( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Availability probing should tolerate loaded modules and invalid specs.""" + adapter = FasterWhisperAdapter() + module_name = "synthetic_faster_whisper_dependency" + original = sys.modules.get(module_name) + sys.modules[module_name] = ModuleType(module_name) + try: + assert adapter._is_module_available(module_name) is True + finally: + if original is None: + sys.modules.pop(module_name, None) + else: + sys.modules[module_name] = original + + monkeypatch.setattr( + "ser.transcript.backends.faster_whisper.importlib.util.find_spec", + lambda name: (_ for _ in ()).throw(ValueError(name)), + ) + assert adapter._is_module_available("missing_dependency") is False diff --git a/tests/suites/unit/utils/test_common_utils.py b/tests/suites/unit/utils/test_common_utils.py new file mode 100644 index 00000000..33d8c3f9 --- /dev/null +++ b/tests/suites/unit/utils/test_common_utils.py @@ -0,0 +1,27 @@ +"""Unit coverage for shared elapsed-time formatting helpers.""" + +from __future__ import annotations + +import pytest + +from ser.utils.common_utils import display_elapsed_time + +pytestmark = pytest.mark.unit + + +@pytest.mark.parametrize( + ("elapsed_time", "output_format", "expected"), + [ + (12.3456, "long", "12.35 seconds"), + (65.0, "long", "1 min 5 seconds"), + (12.3456, "short", "12.35s"), + (65.0, "short", "1m5s"), + ], +) +def test_display_elapsed_time_formats_short_and_long_variants( + elapsed_time: float, + output_format: str, + expected: str, +) -> None: + """Elapsed time formatting should stay stable across supported styles.""" + assert display_elapsed_time(elapsed_time, _format=output_format) == expected