Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 5 additions & 14 deletions .smell-baseline.json
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@
"kind": "method",
"path": "protea/core/operations/_run_cafa_eval_driver.py",
"name": "_run_cafaeval_for_setting",
"line": 85,
"line": 113,
"metric": 73,
"threshold": 60
},
Expand Down Expand Up @@ -407,8 +407,8 @@
"kind": "method",
"path": "protea/core/operations/run_cafa_evaluation.py",
"name": "RunCafaEvaluationOperation.execute",
"line": 171,
"metric": 265,
"line": 174,
"metric": 263,
"threshold": 60
},
{
Expand Down Expand Up @@ -596,7 +596,7 @@
"kind": "params",
"path": "protea/core/operations/_run_cafa_eval_driver.py",
"name": "_write_setting_predictions",
"line": 32,
"line": 60,
"metric": 9,
"threshold": 6
},
Expand All @@ -605,19 +605,10 @@
"kind": "params",
"path": "protea/core/operations/_run_cafa_eval_driver.py",
"name": "_run_cafaeval_for_setting",
"line": 85,
"line": 113,
"metric": 9,
"threshold": 6
},
{
"key": "params::protea/core/operations/_run_cafa_eval_driver.py::evaluate_all_settings",
"kind": "params",
"path": "protea/core/operations/_run_cafa_eval_driver.py",
"name": "evaluate_all_settings",
"line": 160,
"metric": 18,
"threshold": 6
},
{
"key": "params::protea/core/operations/compute_embeddings.py::_embed_t5",
"kind": "params",
Expand Down
79 changes: 46 additions & 33 deletions protea/core/operations/_run_cafa_eval_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import os
import signal
import uuid
from dataclasses import dataclass
from pathlib import Path
from typing import Any

Expand All @@ -29,6 +30,33 @@
from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig


@dataclass(frozen=True)
class CafaEvalRunContext:
"""Bundle of per-run inputs consumed by :func:`evaluate_all_settings`.

Groups the 16 per-call inputs (artifact paths, reranker bundles,
delta cohort, scoring snapshot) so the entry-point signature stays
under flake8-bugbear's parameter ceiling.
"""

pred_set_id: uuid.UUID
delta_proteins: set[str]
max_distance: float | None
artifacts_root: Path
has_rerankers: bool
reranker_models: dict[str, dict[str, dict[str, Any]]]
scoring_config_snapshot: ScoringConfig | None
data: EvaluationData
obo_path: str
nk_path: str
lk_path: str
pk_path: str
pk_known_path: str
ia_path: str | None
toi_path: str
shared_pred_dir: str


def _write_setting_predictions(
session: Session,
*,
Expand Down Expand Up @@ -160,60 +188,45 @@ def _run_cafaeval_for_setting(
def evaluate_all_settings(
session: Session,
*,
pred_set_id: uuid.UUID,
delta_proteins: set[str],
max_distance: float | None,
artifacts_root: Path,
has_rerankers: bool,
reranker_models: dict[str, dict[str, dict[str, Any]]],
scoring_config_snapshot: ScoringConfig | None,
data: EvaluationData,
obo_path: str,
nk_path: str,
lk_path: str,
pk_path: str,
pk_known_path: str,
ia_path: str | None,
toi_path: str,
shared_pred_dir: str,
ctx: CafaEvalRunContext,
emit: EmitFn,
) -> dict[str, dict[str, Any]]:
"""Drive the per-setting NK / LK / PK cafaeval loop.

Writes per-setting predictions when a reranker applies (otherwise
reuses ``shared_pred_dir``), invokes cafaeval, parses results,
reuses ``ctx.shared_pred_dir``), invokes cafaeval, parses results,
persists the full cafaeval artifact tree, and emits per-setting
audit events. Returns ``{setting → namespace metrics dict}``.
"""
results: dict[str, dict[str, Any]] = {}
for setting, gt_file, known_file in [
("NK", nk_path, None),
("LK", lk_path, None),
("PK", pk_path, pk_known_path),
("NK", ctx.nk_path, None),
("LK", ctx.lk_path, None),
("PK", ctx.pk_path, ctx.pk_known_path),
]:
if has_rerankers:
if ctx.has_rerankers:
pred_dir = _write_setting_predictions(
session,
setting=setting,
pred_set_id=pred_set_id,
delta_proteins=delta_proteins,
max_distance=max_distance,
artifacts_root=artifacts_root,
reranker_models=reranker_models,
scoring_config_snapshot=scoring_config_snapshot,
data=data,
pred_set_id=ctx.pred_set_id,
delta_proteins=ctx.delta_proteins,
max_distance=ctx.max_distance,
artifacts_root=ctx.artifacts_root,
reranker_models=ctx.reranker_models,
scoring_config_snapshot=ctx.scoring_config_snapshot,
data=ctx.data,
)
else:
pred_dir = shared_pred_dir
pred_dir = ctx.shared_pred_dir
results[setting] = _run_cafaeval_for_setting(
setting=setting,
obo_path=obo_path,
obo_path=ctx.obo_path,
pred_dir=pred_dir,
gt_file=gt_file,
ia_path=ia_path,
ia_path=ctx.ia_path,
known_file=known_file,
toi_path=toi_path,
artifacts_root=artifacts_root,
toi_path=ctx.toi_path,
artifacts_root=ctx.artifacts_root,
emit=emit,
)
return results
13 changes: 7 additions & 6 deletions protea/core/operations/run_cafa_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
from protea.core.evaluation import load_evaluation_data_for_set
from protea.core.operations import _run_cafa_artifacts as _artifacts
from protea.core.operations import _run_cafa_data_helpers as _data
from protea.core.operations._run_cafa_eval_driver import evaluate_all_settings
from protea.core.operations._run_cafa_eval_driver import (
CafaEvalRunContext,
evaluate_all_settings,
)

# Re-exports for backwards compatibility with existing imports.
# Helpers live in ``_run_cafa_helpers`` so this file can stay close
Expand Down Expand Up @@ -342,9 +345,7 @@ def execute(

# Run evaluator for each setting. Body lives in
# ``_run_cafa_eval_driver.evaluate_all_settings``.
shared_pred_dir = os.path.join(str(artifacts_root), "predictions")
results = evaluate_all_settings(
session,
run_ctx = CafaEvalRunContext(
pred_set_id=pred_set_id,
delta_proteins=delta_proteins,
max_distance=p.max_distance,
Expand All @@ -360,9 +361,9 @@ def execute(
pk_known_path=pk_known_path,
ia_path=ia_path,
toi_path=toi_path,
shared_pred_dir=shared_pred_dir,
emit=emit,
shared_pred_dir=os.path.join(str(artifacts_root), "predictions"),
)
results = evaluate_all_settings(session, ctx=run_ctx, emit=emit)

# ── 2b. Upload all staged artifacts to the artifact store ────────
for path in sorted(artifacts_root.rglob("*")):
Expand Down
Loading