From 143f2f1af4e459e946e64a45e73859d15b7a2e3a Mon Sep 17 00:00:00 2001 From: Juan Sugg Date: Fri, 27 Mar 2026 07:27:06 -0300 Subject: [PATCH 1/2] fix: resolve warning noise docs drift and coverage gaps --- .local/resolve-open-risks.md | 206 ++++++++++++++ README.md | 2 +- docs/adr/README.md | 17 ++ docs/architecture.md | 2 +- docs/codebase-architecture.md | 10 +- .../transcription/in_process_orchestration.py | 6 +- ser/runtime/benchmarks.py | 11 +- ser/runtime/quality_gate_cli.py | 16 +- ser/utils/dsp.py | 125 ++++++--- .../docs/test_architecture_docs.py | 45 ++++ .../config/test_runtime_environment.py | 32 +++ tests/suites/unit/runtime/test_benchmarks.py | 93 +++++++ .../test_in_process_orchestration.py | 252 ++++++++++++++++++ tests/suites/unit/utils/test_dsp.py | 152 +++++++++++ 14 files changed, 901 insertions(+), 68 deletions(-) create mode 100644 .local/resolve-open-risks.md create mode 100644 docs/adr/README.md create mode 100644 tests/suites/integration/docs/test_architecture_docs.py create mode 100644 tests/suites/unit/internal/config/test_runtime_environment.py create mode 100644 tests/suites/unit/runtime/test_benchmarks.py create mode 100644 tests/suites/unit/transcription/test_in_process_orchestration.py create mode 100644 tests/suites/unit/utils/test_dsp.py diff --git a/.local/resolve-open-risks.md b/.local/resolve-open-risks.md new file mode 100644 index 00000000..3a76efbf --- /dev/null +++ b/.local/resolve-open-risks.md @@ -0,0 +1,206 @@ +# Resolve Open Risks + +Last updated: 2026-03-27 +Scope: `ser` repository +Status: active working document + +## Objective + +Close the highest-value open risks identified in the March 27, 2026 codebase assessment while preserving the current public contracts, architectural seams, and release posture. + +This document is intentionally operational. It records: + +1. the concrete open risks +2. why they matter +3. the implementation plan to close them +4. the acceptance criteria that define "done" +5. the validation required before merge + +## Current Risk Register + +### Risk 1: non-actionable `librosa` warnings leak into normal inference output + +- Status: closed in this change +- Severity: medium +- User impact: production CLI output is noisy and looks unreliable even when inference succeeds +- Evidence: + - live CLI smoke run emitted raw `librosa` `UserWarning` lines during normal fast-profile inference + - `ser/runtime/quality_gate_cli.py` already suppresses the exact warning families, but the regular feature-extraction path does not + - `ser/utils/dsp.py` directly calls `librosa.stft`, `librosa.feature.mfcc`, `librosa.feature.chroma_stft`, and `librosa.feature.melspectrogram` +- Root cause: + - warning suppression policy is duplicated and only applied in the quality-gate CLI path + - handcrafted feature extraction does not use a scoped warning policy around the known `librosa` warning families +- Target state: + - known non-actionable `librosa` warnings are suppressed in normal handcrafted feature extraction + - suppression remains scoped and specific so unexpected warnings still surface + - quality-gate CLI uses the same shared policy instead of duplicating the regexes +- Implementation plan: + 1. Extract shared warning filter constants and one reusable filter application helper in `ser/utils/dsp.py`. + 2. Add a scoped context manager in `ser/utils/dsp.py` so normal inference suppresses only the targeted warning families during feature extraction. + 3. Reuse the same helper in `ser/runtime/quality_gate_cli.py` so the warning policy has one source of truth. + 4. Add unit tests under `tests/suites/unit/utils/` covering: + - suppression of the known `n_fft` warning + - suppression of the empty-frequency-set tuning warning + - propagation of unrelated warnings +- Acceptance criteria: + - normal feature extraction does not emit the known `librosa` warning families + - unrelated warnings are still observable + - quality-gate CLI no longer hardcodes duplicate warning filter definitions +- Resolution summary: + - shared feature-extraction warning filters now live in `ser/utils/dsp.py` + - normal DSP extraction applies the warning policy with scoped suppression + - `ser/runtime/quality_gate_cli.py` now reuses the shared helper + - live CLI smoke validation no longer emits the raw `librosa` warnings + +### Risk 2: architecture and README docs have drifted from the current tree + +- Status: closed in this change +- Severity: medium +- User impact: maintainers and contributors follow broken or stale references +- Evidence: + - `README.md` links to `docs/adr`, but `docs/adr` is absent + - `docs/architecture.md` links to `docs/adr/`, but the directory is absent + - `docs/codebase-architecture.md` still reports March 12, 2026 counts that no longer match the current tree +- Root cause: + - documentation was not updated after subsequent architecture and test-suite growth + - broken references are not guarded by tests +- Target state: + - contributor-facing architecture docs only reference files and directories that exist + - codebase snapshot counts and dates reflect the current repository state + - at least one test protects the repaired docs contract +- Implementation plan: + 1. Repair README and architecture index links so they point to existing, maintained docs. + 2. Refresh snapshot date and counts in `docs/codebase-architecture.md`. + 3. Add a real `docs/adr/README.md` index so architecture-decision references are no longer broken. + 4. Add an integration-style docs contract test under `tests/suites/integration/docs/` that verifies the expected documentation targets exist. +- Acceptance criteria: + - no broken architecture/doc links remain in the touched surfaces + - architecture snapshot counts reflect the current tree + - docs contract test passes +- Resolution summary: + - README and architecture index now point at a real ADR index file + - `docs/adr/README.md` now exists as the stable architecture-decision index + - `docs/codebase-architecture.md` snapshot date and counts are refreshed + - docs contract tests were added under `tests/suites/integration/docs/` + +### Risk 3: branch coverage passes, but margin is too narrow + +- Status: materially improved in this change +- Severity: medium +- User impact: small unrelated changes can start failing CI because the coverage budget is sitting near the floor +- Evidence: + - `make test-cov` currently passes at 78.48% against a fail-under of 78.00% + - several low-effort modules still have weak or zero coverage: + - `ser/runtime/benchmarks.py` + - `ser/_internal/config/runtime_environment.py` + - `ser/_internal/transcription/in_process_orchestration.py` +- Root cause: + - the suite is broad but some owner/helper modules remain unexercised + - easy deterministic paths have not yet been promoted into the organized `tests/suites` structure +- Target state: + - coverage margin is widened by adding deterministic tests for undercovered owner/helper modules + - new tests live in organized suite directories +- Implementation plan: + 1. Add unit tests under `tests/suites/unit/runtime/` for `ser/runtime/benchmarks.py`. + 2. Add unit tests under `tests/suites/unit/internal/config/` for `ser/_internal/config/runtime_environment.py`. + 3. Add unit tests under `tests/suites/unit/transcription/` for `ser/_internal/transcription/in_process_orchestration.py`. + 4. Keep assertions focused on real contracts: success paths, failure paths, lifecycle cleanup, and emitted phase hooks. +- Acceptance criteria: + - new deterministic tests cover the targeted modules + - total branch coverage clears the configured threshold with materially better headroom than before this change +- Resolution summary: + - added organized unit coverage for: + - `ser/runtime/benchmarks.py` + - `ser/_internal/config/runtime_environment.py` + - `ser/_internal/transcription/in_process_orchestration.py` + - `ser/utils/dsp.py` + - total coverage improved from `78.48%` to `79.03%` + - deterministic new tests live under `tests/suites/unit/` and `tests/suites/integration/` + +### Risk 4: large orchestration hotspots still concentrate maintenance risk + +- Status: planned +- Severity: medium +- User impact: slower and riskier future changes in runtime/transcription/data hotspots +- Evidence: + - large remaining files include: + - `ser/transcript/backends/stable_whisper.py` + - `ser/_internal/runtime/accurate_public_boundary.py` + - `ser/_internal/runtime/medium_public_boundary.py` + - `ser/data/dataset_prepare.py` + - `ser/runtime/profile_quality_gate.py` +- Root cause: + - repeated extraction work has improved architecture, but some owner modules still bundle multiple orchestration concerns +- Target state: + - further refactors are staged, boundary-safe, and guided by tests rather than broad rewrites +- Implementation plan: + 1. Avoid expanding hotspot scope in this change. + 2. Use the warning-policy consolidation in Risk 1 as the pattern: extract reusable policy/mechanics into small units without widening public APIs. + 3. Prioritize future extractions in this order: + - `ser/runtime/profile_quality_gate.py`: isolate CLI parsing, report serialization, and evaluation wiring + - `ser/data/dataset_prepare.py`: isolate provider/source provenance validation from manifest orchestration + - `ser/transcript/backends/stable_whisper.py`: isolate import/runtime noise policy, transcribe-call assembly, and retry classification seams + 4. Require each future extraction to land with direct unit coverage against the new owner/helper seam. +- Acceptance criteria: + - this change does not worsen hotspot concentration + - this document gives the next change packets a concrete extraction order and constraints + +## Validation Results + +- `make import-lint`: passed +- `uv run --extra dev ruff check ser tests`: passed +- `uv run --extra dev pyright --pythonversion 3.12 ser tests`: passed +- `make test-cov`: passed with `79.03%` total coverage and `939` passing tests +- `uv build`: passed +- `uv run --with twine twine check dist/*`: passed +- `uv run ser --file sample.wav --profile fast --no-transcript --preflight warn`: passed without the previously leaked raw `librosa` warning output + +## Change Outcome + +- Risks 1 and 2 are closed by this change. +- Risk 3 is improved and now has more CI headroom. +- Risk 4 remains a planned staged refactor track; this change reduced one small maintenance pain point by consolidating duplicated feature-warning policy. + +## Execution Plan For This Change + +### Phase 1: warning policy consolidation + +- Introduce shared handcrafted feature warning filter helpers in `ser/utils/dsp.py`. +- Apply scoped suppression inside DSP feature extraction. +- Reuse the shared helper from `ser/runtime/quality_gate_cli.py`. + +### Phase 2: targeted tests and coverage widening + +- Add new unit tests under: + - `tests/suites/unit/utils/` + - `tests/suites/unit/runtime/` + - `tests/suites/unit/internal/config/` + - `tests/suites/unit/transcription/` +- Keep tests deterministic and dependency-light. + +### Phase 3: documentation repair + +- Refresh stale counts and dates. +- Remove broken doc references by either fixing them or creating the referenced artifact. +- Add one docs contract test under `tests/suites/integration/docs/`. + +### Phase 4: release validation + +- Run targeted unit and integration suites for touched areas. +- Run repository quality gates: + - `make import-lint` + - `uv run --extra dev ruff check ser tests` + - `uv run --extra dev pyright --pythonversion 3.12 ser tests` + - `make test-cov` + - `uv build` + - `uv run --with twine twine check dist/*` +- Run one end-to-end CLI smoke command and verify the known warning noise no longer appears. + +## Definition Of Done + +- Risk 1 is closed in code and tests. +- Risk 2 is closed in docs and tests. +- Risk 3 is materially improved with organized new test coverage. +- Risk 4 has a concrete staged plan and is not worsened by the implementation. +- CI-critical local gates pass from the worktree. +- Documentation reflects the current repository state. diff --git a/README.md b/README.md index 66ab71a1..2eac948d 100644 --- a/README.md +++ b/README.md @@ -112,5 +112,5 @@ uv run pytest -q tests/test_import_lint_policy.py tests/test_api_import_boundary - Contributor guide: [CONTRIBUTING.md](https://github.com/jsugg/ser/blob/main/CONTRIBUTING.md) - Compatibility details: [docs/compatibility-matrix.md](https://github.com/jsugg/ser/blob/main/docs/compatibility-matrix.md) - Hardware validation workflows: [docs/ci/hardware-validation.md](https://github.com/jsugg/ser/blob/main/docs/ci/hardware-validation.md) -- Architecture decisions: [docs/adr](https://github.com/jsugg/ser/tree/main/docs/adr) +- Architecture decisions index: [docs/adr/README.md](https://github.com/jsugg/ser/blob/main/docs/adr/README.md) - License: [LICENSE](https://github.com/jsugg/ser/blob/main/LICENSE) diff --git a/docs/adr/README.md b/docs/adr/README.md new file mode 100644 index 00000000..e5d212fa --- /dev/null +++ b/docs/adr/README.md @@ -0,0 +1,17 @@ +# Architecture Decisions + +This directory is the stable index point for architecture-decision records in `ser`. + +## Current state + +The repository currently keeps its architecture guidance in these maintained documents: + +- [`../architecture.md`](../architecture.md): entry point for architecture references +- [`../codebase-architecture.md`](../codebase-architecture.md): narrative codebase analysis +- [`../subsystem-dependency-map.md`](../subsystem-dependency-map.md): subsystem dependency directions and soft-boundary policy +- [`../refactor-hotspot-checks.md`](../refactor-hotspot-checks.md): hotspot inventory for careful refactors +- [`../architecture-refactor-roadmap.md`](../architecture-refactor-roadmap.md): staged refactor priorities + +## How this directory should be used + +When a future change materially alters architectural direction, add a numbered ADR Markdown file here and link it from this index. Until then, the documents above are the authoritative architecture references for contributors. diff --git a/docs/architecture.md b/docs/architecture.md index bdcb9414..5909d8e2 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -8,7 +8,7 @@ Use this page as the canonical starting point for SER architecture and change-pl - Text architecture diagram: [`docs/architecture-diagram.md`](architecture-diagram.md) - Subsystem dependency map: [`docs/subsystem-dependency-map.md`](subsystem-dependency-map.md) - Refactor hotspot inventory: [`docs/refactor-hotspot-checks.md`](refactor-hotspot-checks.md) -- Architecture decisions: [`docs/adr/`](adr/) +- Architecture decisions index: [`docs/adr/README.md`](adr/README.md) - Compatibility matrix: [`docs/compatibility-matrix.md`](compatibility-matrix.md) - Hardware validation policy: [`docs/ci/hardware-validation.md`](ci/hardware-validation.md) diff --git a/docs/codebase-architecture.md b/docs/codebase-architecture.md index 7f6d6e16..0c616e13 100644 --- a/docs/codebase-architecture.md +++ b/docs/codebase-architecture.md @@ -8,12 +8,12 @@ formal artifacts in [`docs/architecture-diagram.md`](architecture-diagram.md), ## Scope and current state -These counts are a current working-tree snapshot taken on March 12, 2026. +These counts are a current working-tree snapshot taken on March 27, 2026. -- Source modules under `ser/`: `226` -- Test modules under `tests/`: `136` -- Public modules outside `_internal/`: `166` -- Internal owner/helper modules under `_internal/`: `60` +- Source modules under `ser/`: `228` +- Test modules under `tests/`: `145` +- Public modules outside `_internal/`: `167` +- Internal owner/helper modules under `_internal/`: `61` - Public modules importing `_internal` directly: `24` This is a modular monolith with explicit subsystem seams. It is not a textbook diff --git a/ser/_internal/transcription/in_process_orchestration.py b/ser/_internal/transcription/in_process_orchestration.py index 901a9d4d..0ce75b9f 100644 --- a/ser/_internal/transcription/in_process_orchestration.py +++ b/ser/_internal/transcription/in_process_orchestration.py @@ -4,7 +4,7 @@ import logging from collections.abc import Callable -from typing import Protocol, TypeVar, cast +from typing import TYPE_CHECKING, Protocol, TypeVar, cast from ser.config import AppConfig from ser.profiles import TranscriptionBackendId @@ -13,7 +13,9 @@ PHASE_TRANSCRIPTION_MODEL_LOAD, PHASE_TRANSCRIPTION_SETUP, ) -from ser.transcript.backends import BackendRuntimeRequest + +if TYPE_CHECKING: + from ser.transcript.backends import BackendRuntimeRequest class _SetupLoadAdapter(Protocol): diff --git a/ser/runtime/benchmarks.py b/ser/runtime/benchmarks.py index 7bbb7f1c..9873ed4d 100644 --- a/ser/runtime/benchmarks.py +++ b/ser/runtime/benchmarks.py @@ -8,11 +8,16 @@ import time from pathlib import Path -from ser.models.emotion_model import predict_emotions - type BenchmarkSummary = dict[str, float | int] +def _predict_emotions(audio_path: str) -> object: + """Resolves the public predictor lazily to keep benchmark startup lightweight.""" + from ser.models.emotion_model import predict_emotions + + return predict_emotions(audio_path) + + def benchmark_predict(audio_path: str, runs: int) -> BenchmarkSummary: """Measures repeated prediction latency for one input audio file. @@ -32,7 +37,7 @@ def benchmark_predict(audio_path: str, runs: int) -> BenchmarkSummary: samples: list[float] = [] for _ in range(runs): start_time = time.perf_counter() - _ = predict_emotions(audio_path) + _ = _predict_emotions(audio_path) samples.append(time.perf_counter() - start_time) ordered_samples = sorted(samples) diff --git a/ser/runtime/quality_gate_cli.py b/ser/runtime/quality_gate_cli.py index 47fe3ff7..e6c05ad0 100644 --- a/ser/runtime/quality_gate_cli.py +++ b/ser/runtime/quality_gate_cli.py @@ -4,10 +4,11 @@ import argparse import logging -import warnings from collections.abc import Sequence from dataclasses import dataclass +from ser.utils.dsp import configure_feature_extraction_warning_filters + @dataclass(frozen=True, slots=True) class QualityGateCliDefaults: @@ -29,18 +30,7 @@ class QualityGateCliDefaults: def configure_cli_noise_controls() -> None: """Suppresses non-actionable warning/log noise for long gate executions.""" - warnings.filterwarnings( - "ignore", - message=r"n_fft=\d+ is too large for input signal of length=.*", - category=UserWarning, - module=r"librosa\.core\.spectrum", - ) - warnings.filterwarnings( - "ignore", - message=r"Trying to estimate tuning from empty frequency set\.", - category=UserWarning, - module=r"librosa\.core\.pitch", - ) + configure_feature_extraction_warning_filters() logging.getLogger("ser.models.emotion_model").setLevel(logging.WARNING) logging.getLogger("ser.features.feature_extractor").setLevel(logging.ERROR) logging.getLogger("ser.runtime.medium_inference").setLevel(logging.WARNING) diff --git a/ser/utils/dsp.py b/ser/utils/dsp.py index b667779c..c5fd9546 100644 --- a/ser/utils/dsp.py +++ b/ser/utils/dsp.py @@ -3,6 +3,9 @@ from __future__ import annotations import logging +import warnings +from collections.abc import Iterator +from contextlib import contextmanager import librosa import numpy as np @@ -14,6 +17,22 @@ logger: logging.Logger = get_logger(__name__) type FeatureVector = NDArray[np.float64] +type _WarningFilterDefinition = tuple[str, type[Warning], str] + +_SHORT_SIGNAL_WARNING_MESSAGE_REGEX = r"n_fft=\d+ is too large for input signal of length=.*" +_EMPTY_TUNING_WARNING_MESSAGE_REGEX = r"Trying to estimate tuning from empty frequency set\." +_FEATURE_EXTRACTION_WARNING_FILTERS: tuple[_WarningFilterDefinition, ...] = ( + ( + _SHORT_SIGNAL_WARNING_MESSAGE_REGEX, + UserWarning, + r"librosa\.core\.spectrum", + ), + ( + _EMPTY_TUNING_WARNING_MESSAGE_REGEX, + UserWarning, + r"librosa\.core\.pitch", + ), +) def _pad_audio_for_fft( @@ -26,6 +45,25 @@ def _pad_audio_for_fft( return np.pad(audio, (0, pad_width), mode="constant") +def configure_feature_extraction_warning_filters() -> None: + """Applies global warning filters for known non-actionable `librosa` noise.""" + for message_regex, category, module_regex in _FEATURE_EXTRACTION_WARNING_FILTERS: + warnings.filterwarnings( + "ignore", + message=message_regex, + category=category, + module=module_regex, + ) + + +@contextmanager +def _scoped_feature_extraction_warning_filters() -> Iterator[None]: + """Suppresses known non-actionable `librosa` warnings for one extraction scope.""" + with warnings.catch_warnings(): + configure_feature_extraction_warning_filters() + yield + + def extract_feature_from_signal( audio: NDArray[np.float32], sample_rate: int, @@ -56,53 +94,54 @@ def extract_feature_from_signal( if not bool(np.all(np.isfinite(prepared_audio))): raise ValueError("Audio buffer is not finite everywhere.") n_fft: int = min(prepared_audio.size, 2048) - stft_magnitude: NDArray[np.float32] = np.abs(librosa.stft(prepared_audio, n_fft=n_fft)) - stft_power_db: NDArray[np.float32] = librosa.power_to_db( - np.square(stft_magnitude), - ref=np.max, - ) - feature_parts: list[NDArray[np.float64]] = [] try: - if active_feature_flags.mfcc: - mfccs: NDArray[np.float64] = np.mean( - librosa.feature.mfcc(y=prepared_audio, sr=sample_rate, n_mfcc=40, n_fft=n_fft), - axis=1, + with _scoped_feature_extraction_warning_filters(): + stft_magnitude: NDArray[np.float32] = np.abs(librosa.stft(prepared_audio, n_fft=n_fft)) + stft_power_db: NDArray[np.float32] = librosa.power_to_db( + np.square(stft_magnitude), + ref=np.max, ) - feature_parts.append(np.asarray(mfccs, dtype=np.float64)) - if active_feature_flags.chroma: - chroma: NDArray[np.float64] = np.mean( - librosa.feature.chroma_stft(S=stft_magnitude, sr=sample_rate, n_fft=n_fft), - axis=1, - ) - feature_parts.append(np.asarray(chroma, dtype=np.float64)) - - if active_feature_flags.mel: - mel: NDArray[np.float64] = np.mean( - librosa.feature.melspectrogram(y=prepared_audio, sr=sample_rate, n_fft=n_fft), - axis=1, - ) - feature_parts.append(np.asarray(mel, dtype=np.float64)) - - if active_feature_flags.contrast: - spectral_contrast: NDArray[np.float64] = np.mean( - librosa.feature.spectral_contrast( - S=stft_power_db, - sr=sample_rate, - n_fft=n_fft, - ), - axis=1, - ) - feature_parts.append(np.asarray(spectral_contrast, dtype=np.float64)) - - if active_feature_flags.tonnetz: - harmonic: NDArray[np.float32] = librosa.effects.harmonic(prepared_audio) - tonnetz: NDArray[np.float64] = np.mean( - librosa.feature.tonnetz(y=harmonic, sr=sample_rate), - axis=1, - ) - feature_parts.append(np.asarray(tonnetz, dtype=np.float64)) + if active_feature_flags.mfcc: + mfccs: NDArray[np.float64] = np.mean( + librosa.feature.mfcc(y=prepared_audio, sr=sample_rate, n_mfcc=40, n_fft=n_fft), + axis=1, + ) + feature_parts.append(np.asarray(mfccs, dtype=np.float64)) + + if active_feature_flags.chroma: + chroma: NDArray[np.float64] = np.mean( + librosa.feature.chroma_stft(S=stft_magnitude, sr=sample_rate, n_fft=n_fft), + axis=1, + ) + feature_parts.append(np.asarray(chroma, dtype=np.float64)) + + if active_feature_flags.mel: + mel: NDArray[np.float64] = np.mean( + librosa.feature.melspectrogram(y=prepared_audio, sr=sample_rate, n_fft=n_fft), + axis=1, + ) + feature_parts.append(np.asarray(mel, dtype=np.float64)) + + if active_feature_flags.contrast: + spectral_contrast: NDArray[np.float64] = np.mean( + librosa.feature.spectral_contrast( + S=stft_power_db, + sr=sample_rate, + n_fft=n_fft, + ), + axis=1, + ) + feature_parts.append(np.asarray(spectral_contrast, dtype=np.float64)) + + if active_feature_flags.tonnetz: + harmonic: NDArray[np.float32] = librosa.effects.harmonic(prepared_audio) + tonnetz: NDArray[np.float64] = np.mean( + librosa.feature.tonnetz(y=harmonic, sr=sample_rate), + axis=1, + ) + feature_parts.append(np.asarray(tonnetz, dtype=np.float64)) except Exception as err: logger.warning("Error extracting features from signal: %s", err) raise diff --git a/tests/suites/integration/docs/test_architecture_docs.py b/tests/suites/integration/docs/test_architecture_docs.py new file mode 100644 index 00000000..4c998b8f --- /dev/null +++ b/tests/suites/integration/docs/test_architecture_docs.py @@ -0,0 +1,45 @@ +"""Integration-style contract tests for contributor-facing architecture docs.""" + +from __future__ import annotations + +import re +from pathlib import Path + +import pytest + +pytestmark = pytest.mark.integration + +_README_DOC_LINK_PATTERN = re.compile( + r"https://github\.com/jsugg/ser/(?:blob|tree)/main/(docs/[A-Za-z0-9_./-]+)" +) +_ARCHITECTURE_RELATIVE_LINK_PATTERN = re.compile(r"\(([^)]+)\)") + + +def _repo_root() -> Path: + """Returns the repository root for docs contract tests.""" + return Path(__file__).resolve().parents[4] + + +def test_readme_architecture_links_resolve_to_existing_docs() -> None: + """README architecture links should point at docs artifacts that exist in-tree.""" + root = _repo_root() + readme_text = (root / "README.md").read_text(encoding="utf-8") + resolved_targets = { + root / relative_path for relative_path in _README_DOC_LINK_PATTERN.findall(readme_text) + } + + assert resolved_targets + assert all(target.is_file() for target in resolved_targets) + + +def test_architecture_index_links_resolve_to_existing_docs() -> None: + """Architecture index should only reference local docs artifacts that exist.""" + root = _repo_root() + architecture_text = (root / "docs" / "architecture.md").read_text(encoding="utf-8") + resolved_targets = { + (root / "docs" / relative_path).resolve() + for relative_path in _ARCHITECTURE_RELATIVE_LINK_PATTERN.findall(architecture_text) + } + + assert root / "docs" / "adr" / "README.md" in resolved_targets + assert all(target.is_file() for target in resolved_targets) diff --git a/tests/suites/unit/internal/config/test_runtime_environment.py b/tests/suites/unit/internal/config/test_runtime_environment.py new file mode 100644 index 00000000..eb526ce1 --- /dev/null +++ b/tests/suites/unit/internal/config/test_runtime_environment.py @@ -0,0 +1,32 @@ +"""Unit tests for runtime environment synchronization helpers.""" + +from __future__ import annotations + +import pytest + +from ser._internal.config.runtime_environment import sync_torch_runtime_environment + +pytestmark = pytest.mark.unit + + +@pytest.mark.parametrize( + ("enable_mps_fallback", "expected"), + [(True, "1"), (False, "0")], +) +def test_sync_torch_runtime_environment_sets_explicit_flag_value( + enable_mps_fallback: bool, + expected: str, +) -> None: + """Torch runtime environment helper should encode the boolean explicitly.""" + environ: dict[str, str] = {"UNCHANGED": "value"} + + sync_torch_runtime_environment( + enable_mps_fallback=enable_mps_fallback, + environ=environ, + pytorch_enable_mps_fallback_env="PYTORCH_ENABLE_MPS_FALLBACK", + ) + + assert environ == { + "UNCHANGED": "value", + "PYTORCH_ENABLE_MPS_FALLBACK": expected, + } diff --git a/tests/suites/unit/runtime/test_benchmarks.py b/tests/suites/unit/runtime/test_benchmarks.py new file mode 100644 index 00000000..ef0b9fdb --- /dev/null +++ b/tests/suites/unit/runtime/test_benchmarks.py @@ -0,0 +1,93 @@ +"""Unit tests for local runtime benchmark helpers.""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +import pytest + +from ser.runtime import benchmarks + +pytestmark = pytest.mark.unit + + +def test_benchmark_predict_rejects_invalid_run_count() -> None: + """Benchmark helper should validate run count before timing work.""" + with pytest.raises(ValueError, match="greater than or equal to 1"): + benchmarks.benchmark_predict(audio_path="sample.wav", runs=0) + + +def test_benchmark_predict_returns_deterministic_summary( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Benchmark helper should compute summary statistics from timing samples.""" + perf_samples = iter([0.0, 1.0, 1.0, 3.0, 3.0, 6.0]) + monkeypatch.setattr(benchmarks, "_predict_emotions", lambda _audio_path: ["ok"]) + monkeypatch.setattr(benchmarks.time, "perf_counter", lambda: next(perf_samples)) + + summary = benchmarks.benchmark_predict(audio_path="sample.wav", runs=3) + + assert summary == { + "runs": 3, + "mean_seconds": 2.0, + "median_seconds": 2.0, + "p95_seconds": 3.0, + "min_seconds": 1.0, + "max_seconds": 3.0, + } + + +def test_parse_args_reads_cli_flags(monkeypatch: pytest.MonkeyPatch) -> None: + """Argument parser should expose required file and optional output controls.""" + monkeypatch.setattr( + sys, + "argv", + ["ser-benchmark", "--file", "sample.wav", "--runs", "7", "--out", "result.json"], + ) + + args = benchmarks._parse_args() + + assert args == argparse.Namespace(file="sample.wav", runs=7, out="result.json") + + +def test_main_prints_json_when_no_output_file( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """CLI wrapper should print benchmark JSON when no output path is requested.""" + payload = {"runs": 2, "mean_seconds": 1.5} + monkeypatch.setattr( + benchmarks, + "_parse_args", + lambda: argparse.Namespace(file="sample.wav", runs=2, out=None), + ) + monkeypatch.setattr(benchmarks, "benchmark_predict", lambda **_kwargs: payload) + + benchmarks.main() + + assert capsys.readouterr().out == json.dumps(payload, indent=2, sort_keys=True) + "\n" + + +def test_main_writes_json_output_file( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """CLI wrapper should write JSON output when an explicit file path is provided.""" + output_path = tmp_path / "benchmarks" / "summary.json" + payload = {"runs": 4, "mean_seconds": 2.25} + monkeypatch.setattr( + benchmarks, + "_parse_args", + lambda: argparse.Namespace(file="sample.wav", runs=4, out=str(output_path)), + ) + monkeypatch.setattr(benchmarks, "benchmark_predict", lambda **_kwargs: payload) + + benchmarks.main() + + expected_text = json.dumps(payload, indent=2, sort_keys=True) + "\n" + assert output_path.read_text(encoding="utf-8") == expected_text + assert capsys.readouterr().out == expected_text diff --git a/tests/suites/unit/transcription/test_in_process_orchestration.py b/tests/suites/unit/transcription/test_in_process_orchestration.py new file mode 100644 index 00000000..f090b179 --- /dev/null +++ b/tests/suites/unit/transcription/test_in_process_orchestration.py @@ -0,0 +1,252 @@ +"""Unit tests for in-process transcription orchestration helpers.""" + +from __future__ import annotations + +import logging +from types import SimpleNamespace +from typing import cast + +import pytest + +from ser._internal.transcription import in_process_orchestration as ipo +from ser.config import AppConfig +from ser.transcript.backends.base import BackendRuntimeRequest + +pytestmark = pytest.mark.unit + + +class _Profile: + """Minimal transcription profile stub.""" + + backend_id = "faster_whisper" + + +class _Adapter: + """Deterministic adapter stub used by orchestration tests.""" + + def __init__(self) -> None: + self.calls: list[tuple[str, object, object]] = [] + self.model = object() + + def setup_required( + self, + *, + runtime_request: object, + settings: object, + ) -> bool: + self.calls.append(("setup_required", runtime_request, settings)) + return True + + def prepare_assets( + self, + *, + runtime_request: object, + settings: object, + ) -> None: + self.calls.append(("prepare_assets", runtime_request, settings)) + + def load_model( + self, + *, + runtime_request: object, + settings: object, + ) -> object: + self.calls.append(("load_model", runtime_request, settings)) + return self.model + + +def test_transcription_setup_required_uses_compatibility_checked_runtime_request() -> None: + """Setup helper should run compatibility checks before asking the adapter.""" + settings = cast(AppConfig, SimpleNamespace()) + active_profile = _Profile() + runtime_request = BackendRuntimeRequest( + model_name="tiny", + use_demucs=False, + use_vad=False, + ) + adapter = _Adapter() + compatibility_calls: list[tuple[object, object, object]] = [] + + resolved = ipo.transcription_setup_required( + active_profile=active_profile, + settings=settings, + runtime_request_resolver=lambda profile, resolved_settings: runtime_request, + compatibility_checker=lambda *, active_profile, settings, runtime_request: compatibility_calls.append( + (active_profile, settings, runtime_request) + ), + adapter_resolver=lambda backend_id: adapter, + ) + + assert resolved is True + assert compatibility_calls == [(active_profile, settings, runtime_request)] + assert adapter.calls == [("setup_required", runtime_request, settings)] + + +def test_prepare_transcription_assets_delegates_to_adapter() -> None: + """Preparation helper should reuse the same runtime request and adapter seam.""" + settings = cast(AppConfig, SimpleNamespace()) + active_profile = _Profile() + runtime_request = BackendRuntimeRequest( + model_name="tiny", + use_demucs=False, + use_vad=False, + ) + adapter = _Adapter() + + ipo.prepare_transcription_assets( + active_profile=active_profile, + settings=settings, + runtime_request_resolver=lambda profile, resolved_settings: runtime_request, + compatibility_checker=lambda **_kwargs: None, + adapter_resolver=lambda backend_id: adapter, + ) + + assert adapter.calls == [("prepare_assets", runtime_request, settings)] + + +def test_load_whisper_model_returns_adapter_model_after_logging_runtime_resolution() -> None: + """Model loader should resolve settings, runtime request, and adapter model in order.""" + settings = cast(AppConfig, SimpleNamespace()) + active_profile = _Profile() + runtime_request = BackendRuntimeRequest( + model_name="tiny", + use_demucs=False, + use_vad=False, + device_spec="cpu", + precision_candidates=("fp32",), + memory_tier="low", + ) + adapter = _Adapter() + + resolved = ipo.load_whisper_model( + profile=None, + settings_resolver=lambda: settings, + profile_resolver=lambda profile: active_profile, + runtime_request_resolver=lambda profile, resolved_settings: runtime_request, + compatibility_checker=lambda **_kwargs: None, + adapter_resolver=lambda backend_id: adapter, + logger=logging.getLogger("ser.tests.in_process_orchestration"), + error_factory=RuntimeError, + ) + + assert resolved is adapter.model + assert adapter.calls == [("load_model", runtime_request, settings)] + + +def test_load_whisper_model_wraps_runtime_failures_with_domain_error() -> None: + """Model loader should translate unexpected failures into the transcription domain error.""" + active_profile = _Profile() + runtime_request = BackendRuntimeRequest( + model_name="tiny", + use_demucs=False, + use_vad=False, + device_spec="cpu", + precision_candidates=("fp32",), + memory_tier="low", + ) + settings = cast(AppConfig, SimpleNamespace()) + + with pytest.raises(RuntimeError, match="Failed to load transcription model") as exc_info: + ipo.load_whisper_model( + profile=None, + settings_resolver=lambda: settings, + profile_resolver=lambda profile: active_profile, + runtime_request_resolver=lambda profile, resolved_settings: runtime_request, + compatibility_checker=lambda **_kwargs: (_ for _ in ()).throw(ValueError("boom")), + adapter_resolver=lambda backend_id: _Adapter(), + logger=logging.getLogger("ser.tests.in_process_orchestration"), + error_factory=RuntimeError, + ) + + assert isinstance(exc_info.value.__cause__, ValueError) + + +def test_extract_transcript_in_process_runs_setup_and_releases_model_on_success() -> None: + """In-process transcription should complete all phases and always release model memory.""" + phase_events: list[tuple[str, str]] = [] + release_calls: list[object | None] = [] + settings = cast(AppConfig, SimpleNamespace()) + active_profile = _Profile() + model = object() + + resolved = ipo.extract_transcript_in_process( + file_path="sample.wav", + language="en", + profile=active_profile, + settings_resolver=lambda: settings, + setup_required_checker=lambda *, active_profile, settings: True, + prepare_assets_runner=lambda *, active_profile, settings: phase_events.append( + ("prepare_assets", "setup") + ), + load_model_fn=lambda profile: model, + transcribe_with_profile_fn=lambda model, language, file_path, active_profile: ["word"], + release_memory_fn=lambda *, model: release_calls.append(model), + phase_started_fn=lambda logger, *, phase_name: phase_events.append(("start", phase_name)) + or 1.0, + phase_completed_fn=lambda logger, *, phase_name, started_at: phase_events.append( + ("complete", phase_name) + ) + or None, + phase_failed_fn=lambda logger, *, phase_name, started_at: phase_events.append( + ("failed", phase_name) + ) + or None, + logger=logging.getLogger("ser.tests.in_process_orchestration"), + ) + + assert resolved == ["word"] + assert phase_events == [ + ("start", "transcription_setup"), + ("prepare_assets", "setup"), + ("complete", "transcription_setup"), + ("start", "transcription_model_load"), + ("complete", "transcription_model_load"), + ("start", "transcription"), + ("complete", "transcription"), + ] + assert release_calls == [model] + + +def test_extract_transcript_in_process_reports_transcription_failure_and_releases_model() -> None: + """Transcription failures should mark the phase as failed and still release model memory.""" + phase_events: list[tuple[str, str]] = [] + release_calls: list[object | None] = [] + active_profile = _Profile() + model = object() + settings = cast(AppConfig, SimpleNamespace()) + + with pytest.raises(ValueError, match="transcribe failed"): + ipo.extract_transcript_in_process( + file_path="sample.wav", + language="en", + profile=active_profile, + settings_resolver=lambda: settings, + setup_required_checker=lambda *, active_profile, settings: False, + prepare_assets_runner=lambda *, active_profile, settings: None, + load_model_fn=lambda profile: model, + transcribe_with_profile_fn=lambda model, language, file_path, active_profile: ( + (_ for _ in ()).throw(ValueError("transcribe failed")) + ), + release_memory_fn=lambda *, model: release_calls.append(model), + phase_started_fn=lambda logger, *, phase_name: phase_events.append( + ("start", phase_name) + ) + or 1.0, + phase_completed_fn=lambda logger, *, phase_name, started_at: phase_events.append( + ("complete", phase_name) + ) + or None, + phase_failed_fn=lambda logger, *, phase_name, started_at: phase_events.append( + ("failed", phase_name) + ) + or None, + logger=logging.getLogger("ser.tests.in_process_orchestration"), + ) + + assert phase_events == [ + ("start", "transcription_model_load"), + ("complete", "transcription_model_load"), + ("start", "transcription"), + ("failed", "transcription"), + ] + assert release_calls == [model] diff --git a/tests/suites/unit/utils/test_dsp.py b/tests/suites/unit/utils/test_dsp.py new file mode 100644 index 00000000..f49da999 --- /dev/null +++ b/tests/suites/unit/utils/test_dsp.py @@ -0,0 +1,152 @@ +"""Unit tests for DSP-level handcrafted feature extraction helpers.""" + +from __future__ import annotations + +import warnings + +import numpy as np +import pytest + +from ser.config import FeatureFlags +from ser.utils import dsp + +pytestmark = [ + pytest.mark.unit, + pytest.mark.filterwarnings( + r"ignore:path is deprecated\. Use files\(\) instead\..*:DeprecationWarning" + ), +] + + +def _patch_common_feature_dependencies(monkeypatch: pytest.MonkeyPatch) -> None: + """Installs deterministic librosa stubs shared by DSP tests.""" + monkeypatch.setattr( + dsp.librosa, + "power_to_db", + lambda array, ref=None: np.asarray(array, dtype=np.float32), + ) + monkeypatch.setattr(dsp.librosa.effects, "harmonic", lambda audio: audio) + monkeypatch.setattr( + dsp.librosa.feature, + "tonnetz", + lambda **_kwargs: np.ones((6, 2), dtype=np.float32), + ) + monkeypatch.setattr( + dsp.librosa.feature, + "spectral_contrast", + lambda **_kwargs: np.ones((7, 2), dtype=np.float32), + ) + monkeypatch.setattr( + dsp.librosa.feature, + "melspectrogram", + lambda **_kwargs: np.ones((128, 2), dtype=np.float32), + ) + monkeypatch.setattr( + dsp.librosa.feature, + "mfcc", + lambda **_kwargs: np.ones((40, 2), dtype=np.float32), + ) + + +def test_configure_feature_extraction_warning_filters_ignores_known_short_signal_warning() -> None: + """Global helper should ignore the known non-actionable short-signal warning.""" + with warnings.catch_warnings(record=True) as captured: + warnings.simplefilter("always") + dsp.configure_feature_extraction_warning_filters() + warnings.warn_explicit( + "n_fft=512 is too large for input signal of length=93", + category=UserWarning, + filename="spectrum.py", + lineno=1, + module="librosa.core.spectrum", + ) + + assert captured == [] + + +def test_extract_feature_from_signal_suppresses_known_librosa_warnings( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Scoped extraction should suppress the known librosa warning families.""" + _patch_common_feature_dependencies(monkeypatch) + + def _warn_stft(_audio: np.ndarray, *, n_fft: int) -> np.ndarray: + warnings.warn_explicit( + "n_fft=512 is too large for input signal of length=93", + category=UserWarning, + filename="spectrum.py", + lineno=1, + module="librosa.core.spectrum", + ) + return np.ones((n_fft // 2 + 1, 2), dtype=np.float32) + + def _warn_chroma_stft(**_kwargs: object) -> np.ndarray: + warnings.warn_explicit( + "Trying to estimate tuning from empty frequency set.", + category=UserWarning, + filename="pitch.py", + lineno=1, + module="librosa.core.pitch", + ) + return np.ones((12, 2), dtype=np.float32) + + monkeypatch.setattr(dsp.librosa, "stft", _warn_stft) + monkeypatch.setattr(dsp.librosa.feature, "chroma_stft", _warn_chroma_stft) + + with warnings.catch_warnings(record=True) as captured: + warnings.simplefilter("always") + feature_vector = dsp.extract_feature_from_signal( + np.ones(256, dtype=np.float32), + sample_rate=16_000, + feature_flags=FeatureFlags( + mfcc=False, + chroma=True, + mel=False, + contrast=False, + tonnetz=False, + ), + ) + + assert feature_vector.shape == (12,) + assert captured == [] + + +def test_extract_feature_from_signal_preserves_unexpected_warnings( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Unexpected warnings should remain visible to callers.""" + _patch_common_feature_dependencies(monkeypatch) + monkeypatch.setattr( + dsp.librosa, + "stft", + lambda _audio, *, n_fft: np.ones((n_fft // 2 + 1, 2), dtype=np.float32), + ) + + def _unexpected_chroma(**_kwargs: object) -> np.ndarray: + warnings.warn_explicit( + "unexpected feature warning", + category=UserWarning, + filename="pitch.py", + lineno=1, + module="librosa.core.pitch", + ) + return np.ones((12, 2), dtype=np.float32) + + monkeypatch.setattr(dsp.librosa.feature, "chroma_stft", _unexpected_chroma) + + with warnings.catch_warnings(record=True) as captured: + warnings.simplefilter("always") + feature_vector = dsp.extract_feature_from_signal( + np.ones(256, dtype=np.float32), + sample_rate=16_000, + feature_flags=FeatureFlags( + mfcc=False, + chroma=True, + mel=False, + contrast=False, + tonnetz=False, + ), + ) + + assert feature_vector.shape == (12,) + assert [str(item.message) for item in captured] == ["unexpected feature warning"] From 89f1c120d803de30be495fe1637764c58567432e Mon Sep 17 00:00:00 2001 From: Juan Sugg Date: Fri, 27 Mar 2026 07:30:11 -0300 Subject: [PATCH 2/2] test: tighten typed transcription phase stubs --- .../test_in_process_orchestration.py | 72 +++++++++++++------ 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/tests/suites/unit/transcription/test_in_process_orchestration.py b/tests/suites/unit/transcription/test_in_process_orchestration.py index f090b179..b1022f37 100644 --- a/tests/suites/unit/transcription/test_in_process_orchestration.py +++ b/tests/suites/unit/transcription/test_in_process_orchestration.py @@ -169,6 +169,28 @@ def test_extract_transcript_in_process_runs_setup_and_releases_model_on_success( active_profile = _Profile() model = object() + def _phase_started(_logger: logging.Logger, *, phase_name: str) -> float: + phase_events.append(("start", phase_name)) + return 1.0 + + def _phase_completed( + _logger: logging.Logger, + *, + phase_name: str, + started_at: float, + ) -> None: + del started_at + phase_events.append(("complete", phase_name)) + + def _phase_failed( + _logger: logging.Logger, + *, + phase_name: str, + started_at: float, + ) -> None: + del started_at + phase_events.append(("failed", phase_name)) + resolved = ipo.extract_transcript_in_process( file_path="sample.wav", language="en", @@ -181,16 +203,9 @@ def test_extract_transcript_in_process_runs_setup_and_releases_model_on_success( load_model_fn=lambda profile: model, transcribe_with_profile_fn=lambda model, language, file_path, active_profile: ["word"], release_memory_fn=lambda *, model: release_calls.append(model), - phase_started_fn=lambda logger, *, phase_name: phase_events.append(("start", phase_name)) - or 1.0, - phase_completed_fn=lambda logger, *, phase_name, started_at: phase_events.append( - ("complete", phase_name) - ) - or None, - phase_failed_fn=lambda logger, *, phase_name, started_at: phase_events.append( - ("failed", phase_name) - ) - or None, + phase_started_fn=_phase_started, + phase_completed_fn=_phase_completed, + phase_failed_fn=_phase_failed, logger=logging.getLogger("ser.tests.in_process_orchestration"), ) @@ -215,6 +230,28 @@ def test_extract_transcript_in_process_reports_transcription_failure_and_release model = object() settings = cast(AppConfig, SimpleNamespace()) + def _phase_started(_logger: logging.Logger, *, phase_name: str) -> float: + phase_events.append(("start", phase_name)) + return 1.0 + + def _phase_completed( + _logger: logging.Logger, + *, + phase_name: str, + started_at: float, + ) -> None: + del started_at + phase_events.append(("complete", phase_name)) + + def _phase_failed( + _logger: logging.Logger, + *, + phase_name: str, + started_at: float, + ) -> None: + del started_at + phase_events.append(("failed", phase_name)) + with pytest.raises(ValueError, match="transcribe failed"): ipo.extract_transcript_in_process( file_path="sample.wav", @@ -228,18 +265,9 @@ def test_extract_transcript_in_process_reports_transcription_failure_and_release (_ for _ in ()).throw(ValueError("transcribe failed")) ), release_memory_fn=lambda *, model: release_calls.append(model), - phase_started_fn=lambda logger, *, phase_name: phase_events.append( - ("start", phase_name) - ) - or 1.0, - phase_completed_fn=lambda logger, *, phase_name, started_at: phase_events.append( - ("complete", phase_name) - ) - or None, - phase_failed_fn=lambda logger, *, phase_name, started_at: phase_events.append( - ("failed", phase_name) - ) - or None, + phase_started_fn=_phase_started, + phase_completed_fn=_phase_completed, + phase_failed_fn=_phase_failed, logger=logging.getLogger("ser.tests.in_process_orchestration"), )