From 9ccec02c44bae3fd679a32c6e7b93988381d91a2 Mon Sep 17 00:00:00 2001 From: frapercan Date: Fri, 8 May 2026 17:34:46 +0200 Subject: [PATCH] =?UTF-8?q?refactor(cafa-eval):=20T-CONTEXTS=20partial=20?= =?UTF-8?q?=E2=80=94=20CafaEvalRunContext?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces ``CafaEvalRunContext`` frozen dataclass in ``_run_cafa_eval_driver.py`` to bundle the 16 per-call inputs that the per-setting NK/LK/PK loop consumes (artifact paths, reranker bundles, delta cohort, scoring snapshot). ``evaluate_all_settings`` signature collapses 18 args → 3 (``session``, ``ctx``, ``emit``). The orchestrator builds the context inline in ``RunCafaEvaluationOperation.execute``; otherwise the loop body reads ``ctx.`` instead of locals. Sizes: - _run_cafa_eval_driver.py: +20 LOC (dataclass) - run_cafa_evaluation.py: -2 LOC (one less call-site arg list) - Smell baseline: 79 -> 78 (`evaluate_all_settings` retired from params>6 list; offset by methods+1 for the dataclass __init__ count; net params>6 24 → 23) Combined with PR #73 (in flight, KnnEnrichmentContext) the params>6 ratchet should land at ~22 once both merge. Local-first 5 verde (ruff + flake8 + pytest 1162 + check_smells). --- .smell-baseline.json | 19 ++--- .../core/operations/_run_cafa_eval_driver.py | 79 +++++++++++-------- protea/core/operations/run_cafa_evaluation.py | 13 +-- 3 files changed, 58 insertions(+), 53 deletions(-) diff --git a/.smell-baseline.json b/.smell-baseline.json index a91a42c..bc693a1 100644 --- a/.smell-baseline.json +++ b/.smell-baseline.json @@ -173,7 +173,7 @@ "kind": "method", "path": "protea/core/operations/_run_cafa_eval_driver.py", "name": "_run_cafaeval_for_setting", - "line": 85, + "line": 113, "metric": 73, "threshold": 60 }, @@ -407,8 +407,8 @@ "kind": "method", "path": "protea/core/operations/run_cafa_evaluation.py", "name": "RunCafaEvaluationOperation.execute", - "line": 171, - "metric": 265, + "line": 174, + "metric": 263, "threshold": 60 }, { @@ -596,7 +596,7 @@ "kind": "params", "path": "protea/core/operations/_run_cafa_eval_driver.py", "name": "_write_setting_predictions", - "line": 32, + "line": 60, "metric": 9, "threshold": 6 }, @@ -605,19 +605,10 @@ "kind": "params", "path": "protea/core/operations/_run_cafa_eval_driver.py", "name": "_run_cafaeval_for_setting", - "line": 85, + "line": 113, "metric": 9, "threshold": 6 }, - { - "key": "params::protea/core/operations/_run_cafa_eval_driver.py::evaluate_all_settings", - "kind": "params", - "path": "protea/core/operations/_run_cafa_eval_driver.py", - "name": "evaluate_all_settings", - "line": 160, - "metric": 18, - "threshold": 6 - }, { "key": "params::protea/core/operations/compute_embeddings.py::_embed_t5", "kind": "params", diff --git a/protea/core/operations/_run_cafa_eval_driver.py b/protea/core/operations/_run_cafa_eval_driver.py index e2258ce..cc771a0 100644 --- a/protea/core/operations/_run_cafa_eval_driver.py +++ b/protea/core/operations/_run_cafa_eval_driver.py @@ -18,6 +18,7 @@ import os import signal import uuid +from dataclasses import dataclass from pathlib import Path from typing import Any @@ -29,6 +30,33 @@ from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig +@dataclass(frozen=True) +class CafaEvalRunContext: + """Bundle of per-run inputs consumed by :func:`evaluate_all_settings`. + + Groups the 16 per-call inputs (artifact paths, reranker bundles, + delta cohort, scoring snapshot) so the entry-point signature stays + under flake8-bugbear's parameter ceiling. + """ + + pred_set_id: uuid.UUID + delta_proteins: set[str] + max_distance: float | None + artifacts_root: Path + has_rerankers: bool + reranker_models: dict[str, dict[str, dict[str, Any]]] + scoring_config_snapshot: ScoringConfig | None + data: EvaluationData + obo_path: str + nk_path: str + lk_path: str + pk_path: str + pk_known_path: str + ia_path: str | None + toi_path: str + shared_pred_dir: str + + def _write_setting_predictions( session: Session, *, @@ -160,60 +188,45 @@ def _run_cafaeval_for_setting( def evaluate_all_settings( session: Session, *, - pred_set_id: uuid.UUID, - delta_proteins: set[str], - max_distance: float | None, - artifacts_root: Path, - has_rerankers: bool, - reranker_models: dict[str, dict[str, dict[str, Any]]], - scoring_config_snapshot: ScoringConfig | None, - data: EvaluationData, - obo_path: str, - nk_path: str, - lk_path: str, - pk_path: str, - pk_known_path: str, - ia_path: str | None, - toi_path: str, - shared_pred_dir: str, + ctx: CafaEvalRunContext, emit: EmitFn, ) -> dict[str, dict[str, Any]]: """Drive the per-setting NK / LK / PK cafaeval loop. Writes per-setting predictions when a reranker applies (otherwise - reuses ``shared_pred_dir``), invokes cafaeval, parses results, + reuses ``ctx.shared_pred_dir``), invokes cafaeval, parses results, persists the full cafaeval artifact tree, and emits per-setting audit events. Returns ``{setting → namespace metrics dict}``. """ results: dict[str, dict[str, Any]] = {} for setting, gt_file, known_file in [ - ("NK", nk_path, None), - ("LK", lk_path, None), - ("PK", pk_path, pk_known_path), + ("NK", ctx.nk_path, None), + ("LK", ctx.lk_path, None), + ("PK", ctx.pk_path, ctx.pk_known_path), ]: - if has_rerankers: + if ctx.has_rerankers: pred_dir = _write_setting_predictions( session, setting=setting, - pred_set_id=pred_set_id, - delta_proteins=delta_proteins, - max_distance=max_distance, - artifacts_root=artifacts_root, - reranker_models=reranker_models, - scoring_config_snapshot=scoring_config_snapshot, - data=data, + pred_set_id=ctx.pred_set_id, + delta_proteins=ctx.delta_proteins, + max_distance=ctx.max_distance, + artifacts_root=ctx.artifacts_root, + reranker_models=ctx.reranker_models, + scoring_config_snapshot=ctx.scoring_config_snapshot, + data=ctx.data, ) else: - pred_dir = shared_pred_dir + pred_dir = ctx.shared_pred_dir results[setting] = _run_cafaeval_for_setting( setting=setting, - obo_path=obo_path, + obo_path=ctx.obo_path, pred_dir=pred_dir, gt_file=gt_file, - ia_path=ia_path, + ia_path=ctx.ia_path, known_file=known_file, - toi_path=toi_path, - artifacts_root=artifacts_root, + toi_path=ctx.toi_path, + artifacts_root=ctx.artifacts_root, emit=emit, ) return results diff --git a/protea/core/operations/run_cafa_evaluation.py b/protea/core/operations/run_cafa_evaluation.py index da9dd77..ae3ff52 100644 --- a/protea/core/operations/run_cafa_evaluation.py +++ b/protea/core/operations/run_cafa_evaluation.py @@ -13,7 +13,10 @@ from protea.core.evaluation import load_evaluation_data_for_set from protea.core.operations import _run_cafa_artifacts as _artifacts from protea.core.operations import _run_cafa_data_helpers as _data -from protea.core.operations._run_cafa_eval_driver import evaluate_all_settings +from protea.core.operations._run_cafa_eval_driver import ( + CafaEvalRunContext, + evaluate_all_settings, +) # Re-exports for backwards compatibility with existing imports. # Helpers live in ``_run_cafa_helpers`` so this file can stay close @@ -342,9 +345,7 @@ def execute( # Run evaluator for each setting. Body lives in # ``_run_cafa_eval_driver.evaluate_all_settings``. - shared_pred_dir = os.path.join(str(artifacts_root), "predictions") - results = evaluate_all_settings( - session, + run_ctx = CafaEvalRunContext( pred_set_id=pred_set_id, delta_proteins=delta_proteins, max_distance=p.max_distance, @@ -360,9 +361,9 @@ def execute( pk_known_path=pk_known_path, ia_path=ia_path, toi_path=toi_path, - shared_pred_dir=shared_pred_dir, - emit=emit, + shared_pred_dir=os.path.join(str(artifacts_root), "predictions"), ) + results = evaluate_all_settings(session, ctx=run_ctx, emit=emit) # ── 2b. Upload all staged artifacts to the artifact store ──────── for path in sorted(artifacts_root.rglob("*")):