From 9ccec02c44bae3fd679a32c6e7b93988381d91a2 Mon Sep 17 00:00:00 2001
From: frapercan <frapercan1@alum.us.es>
Date: Fri, 8 May 2026 17:34:46 +0200
Subject: [PATCH] =?UTF-8?q?refactor(cafa-eval):=20T-CONTEXTS=20partial=20?=
 =?UTF-8?q?=E2=80=94=20CafaEvalRunContext?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduces ``CafaEvalRunContext`` frozen dataclass in
``_run_cafa_eval_driver.py`` to bundle the 16 per-call inputs that
the per-setting NK/LK/PK loop consumes (artifact paths, reranker
bundles, delta cohort, scoring snapshot).

``evaluate_all_settings`` signature collapses 18 args → 3
(``session``, ``ctx``, ``emit``). The orchestrator builds the
context inline in ``RunCafaEvaluationOperation.execute``; otherwise
the loop body reads ``ctx.<field>`` instead of locals.

Sizes:
- _run_cafa_eval_driver.py: +20 LOC (dataclass)
- run_cafa_evaluation.py: -2 LOC (one less call-site arg list)
- Smell baseline: 79 -> 78 (`evaluate_all_settings` retired from
  params>6 list; offset by methods+1 for the dataclass __init__
  count; net params>6 24 → 23)

Combined with PR #73 (in flight, KnnEnrichmentContext) the params>6
ratchet should land at ~22 once both merge.

Local-first 5 verde (ruff + flake8 + pytest 1162 + check_smells).
---
 .smell-baseline.json                          | 19 ++---
 .../core/operations/_run_cafa_eval_driver.py  | 79 +++++++++++--------
 protea/core/operations/run_cafa_evaluation.py | 13 +--
 3 files changed, 58 insertions(+), 53 deletions(-)
diff --git a/.smell-baseline.json b/.smell-baseline.json
index a91a42c..bc693a1 100644
--- a/.smell-baseline.json
+++ b/.smell-baseline.json
@@ -173,7 +173,7 @@
       "kind": "method",
       "path": "protea/core/operations/_run_cafa_eval_driver.py",
       "name": "_run_cafaeval_for_setting",
-      "line": 85,
+      "line": 113,
       "metric": 73,
       "threshold": 60
     },
@@ -407,8 +407,8 @@
       "kind": "method",
       "path": "protea/core/operations/run_cafa_evaluation.py",
       "name": "RunCafaEvaluationOperation.execute",
-      "line": 171,
-      "metric": 265,
+      "line": 174,
+      "metric": 263,
       "threshold": 60
     },
     {
@@ -596,7 +596,7 @@
       "kind": "params",
       "path": "protea/core/operations/_run_cafa_eval_driver.py",
       "name": "_write_setting_predictions",
-      "line": 32,
+      "line": 60,
       "metric": 9,
       "threshold": 6
     },
@@ -605,19 +605,10 @@
       "kind": "params",
       "path": "protea/core/operations/_run_cafa_eval_driver.py",
       "name": "_run_cafaeval_for_setting",
-      "line": 85,
+      "line": 113,
       "metric": 9,
       "threshold": 6
     },
-    {
-      "key": "params::protea/core/operations/_run_cafa_eval_driver.py::evaluate_all_settings",
-      "kind": "params",
-      "path": "protea/core/operations/_run_cafa_eval_driver.py",
-      "name": "evaluate_all_settings",
-      "line": 160,
-      "metric": 18,
-      "threshold": 6
-    },
     {
       "key": "params::protea/core/operations/compute_embeddings.py::_embed_t5",
       "kind": "params",
diff --git a/protea/core/operations/_run_cafa_eval_driver.py b/protea/core/operations/_run_cafa_eval_driver.py
index e2258ce..cc771a0 100644
--- a/protea/core/operations/_run_cafa_eval_driver.py
+++ b/protea/core/operations/_run_cafa_eval_driver.py
@@ -18,6 +18,7 @@
 import os
 import signal
 import uuid
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
 
@@ -29,6 +30,33 @@
 from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
 
 
+@dataclass(frozen=True)
+class CafaEvalRunContext:
+    """Bundle of per-run inputs consumed by :func:`evaluate_all_settings`.
+
+    Groups the 16 per-call inputs (artifact paths, reranker bundles,
+    delta cohort, scoring snapshot) so the entry-point signature stays
+    under flake8-bugbear's parameter ceiling.
+    """
+
+    pred_set_id: uuid.UUID
+    delta_proteins: set[str]
+    max_distance: float | None
+    artifacts_root: Path
+    has_rerankers: bool
+    reranker_models: dict[str, dict[str, dict[str, Any]]]
+    scoring_config_snapshot: ScoringConfig | None
+    data: EvaluationData
+    obo_path: str
+    nk_path: str
+    lk_path: str
+    pk_path: str
+    pk_known_path: str
+    ia_path: str | None
+    toi_path: str
+    shared_pred_dir: str
+
+
 def _write_setting_predictions(
     session: Session,
     *,
@@ -160,60 +188,45 @@ def _run_cafaeval_for_setting(
 def evaluate_all_settings(
     session: Session,
     *,
-    pred_set_id: uuid.UUID,
-    delta_proteins: set[str],
-    max_distance: float | None,
-    artifacts_root: Path,
-    has_rerankers: bool,
-    reranker_models: dict[str, dict[str, dict[str, Any]]],
-    scoring_config_snapshot: ScoringConfig | None,
-    data: EvaluationData,
-    obo_path: str,
-    nk_path: str,
-    lk_path: str,
-    pk_path: str,
-    pk_known_path: str,
-    ia_path: str | None,
-    toi_path: str,
-    shared_pred_dir: str,
+    ctx: CafaEvalRunContext,
     emit: EmitFn,
 ) -> dict[str, dict[str, Any]]:
     """Drive the per-setting NK / LK / PK cafaeval loop.
 
     Writes per-setting predictions when a reranker applies (otherwise
-    reuses ``shared_pred_dir``), invokes cafaeval, parses results,
+    reuses ``ctx.shared_pred_dir``), invokes cafaeval, parses results,
     persists the full cafaeval artifact tree, and emits per-setting
     audit events. Returns ``{setting → namespace metrics dict}``.
     """
     results: dict[str, dict[str, Any]] = {}
     for setting, gt_file, known_file in [
-        ("NK", nk_path, None),
-        ("LK", lk_path, None),
-        ("PK", pk_path, pk_known_path),
+        ("NK", ctx.nk_path, None),
+        ("LK", ctx.lk_path, None),
+        ("PK", ctx.pk_path, ctx.pk_known_path),
     ]:
-        if has_rerankers:
+        if ctx.has_rerankers:
             pred_dir = _write_setting_predictions(
                 session,
                 setting=setting,
-                pred_set_id=pred_set_id,
-                delta_proteins=delta_proteins,
-                max_distance=max_distance,
-                artifacts_root=artifacts_root,
-                reranker_models=reranker_models,
-                scoring_config_snapshot=scoring_config_snapshot,
-                data=data,
+                pred_set_id=ctx.pred_set_id,
+                delta_proteins=ctx.delta_proteins,
+                max_distance=ctx.max_distance,
+                artifacts_root=ctx.artifacts_root,
+                reranker_models=ctx.reranker_models,
+                scoring_config_snapshot=ctx.scoring_config_snapshot,
+                data=ctx.data,
             )
         else:
-            pred_dir = shared_pred_dir
+            pred_dir = ctx.shared_pred_dir
         results[setting] = _run_cafaeval_for_setting(
             setting=setting,
-            obo_path=obo_path,
+            obo_path=ctx.obo_path,
             pred_dir=pred_dir,
             gt_file=gt_file,
-            ia_path=ia_path,
+            ia_path=ctx.ia_path,
             known_file=known_file,
-            toi_path=toi_path,
-            artifacts_root=artifacts_root,
+            toi_path=ctx.toi_path,
+            artifacts_root=ctx.artifacts_root,
             emit=emit,
         )
     return results
diff --git a/protea/core/operations/run_cafa_evaluation.py b/protea/core/operations/run_cafa_evaluation.py
index da9dd77..ae3ff52 100644
--- a/protea/core/operations/run_cafa_evaluation.py
+++ b/protea/core/operations/run_cafa_evaluation.py
@@ -13,7 +13,10 @@
 from protea.core.evaluation import load_evaluation_data_for_set
 from protea.core.operations import _run_cafa_artifacts as _artifacts
 from protea.core.operations import _run_cafa_data_helpers as _data
-from protea.core.operations._run_cafa_eval_driver import evaluate_all_settings
+from protea.core.operations._run_cafa_eval_driver import (
+    CafaEvalRunContext,
+    evaluate_all_settings,
+)
 
 # Re-exports for backwards compatibility with existing imports.
 # Helpers live in ``_run_cafa_helpers`` so this file can stay close
@@ -342,9 +345,7 @@ def execute(
 
             # Run evaluator for each setting. Body lives in
             # ``_run_cafa_eval_driver.evaluate_all_settings``.
-            shared_pred_dir = os.path.join(str(artifacts_root), "predictions")
-            results = evaluate_all_settings(
-                session,
+            run_ctx = CafaEvalRunContext(
                 pred_set_id=pred_set_id,
                 delta_proteins=delta_proteins,
                 max_distance=p.max_distance,
@@ -360,9 +361,9 @@ def execute(
                 pk_known_path=pk_known_path,
                 ia_path=ia_path,
                 toi_path=toi_path,
-                shared_pred_dir=shared_pred_dir,
-                emit=emit,
+                shared_pred_dir=os.path.join(str(artifacts_root), "predictions"),
             )
+            results = evaluate_all_settings(session, ctx=run_ctx, emit=emit)
 
             # ── 2b. Upload all staged artifacts to the artifact store ────────
             for path in sorted(artifacts_root.rglob("*")):