diff --git a/pyproject.toml b/pyproject.toml index 0aea578..bc8f111 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,13 +42,23 @@ target-version = "py312" [tool.ruff.lint] # E/F: pycodestyle + pyflakes I: isort UP: pyupgrade B: flake8-bugbear # SIM: flake8-simplify LOG: flake8-logging RET: flake8-return PTH: flake8-use-pathlib -# BLE + TRY (exception hygiene) are added in Wave 2 alongside the bare-except cleanup. -select = ["E", "F", "I", "UP", "B", "SIM", "LOG", "RET", "PTH"] +# BLE: no-bare-except TRY: tryceratops (exception hygiene) +select = ["E", "F", "I", "UP", "B", "SIM", "LOG", "RET", "PTH", "BLE", "TRY"] ignore = [ "E501", # line-too-long (handled by formatter preference) "RET504", # unnecessary assignment before return (noisy) + "TRY003", # long messages in raise — our domain errors benefit from prose + "TRY301", # abstract raise into own block — stylistic, rarely actionable + "TRY300", # consider-else-block after try — stylistic, not buggy + "TRY004", # prefer TypeError for isinstance failures — our LLM-parse + # results are genuine *value* errors (malformed payload), not + # programmer type errors. ValueError is the right semantic. ] +# Tests may use broad excepts to assert "some error occurred" behavior. +[tool.ruff.lint.extend-per-file-ignores] +"tests/*.py" = ["BLE001"] + # Seed data modules are bulk content, not hand-maintained logic. [tool.ruff.lint.per-file-ignores] "skillforge/seeds/batch*.py" = ["E402", "E741", "F401", "F811", "SIM", "B"] diff --git a/skillforge/agents/_json.py b/skillforge/agents/_json.py new file mode 100644 index 0000000..417cc92 --- /dev/null +++ b/skillforge/agents/_json.py @@ -0,0 +1,99 @@ +"""Robust JSON-array extraction from LLM prose. + +Shared by agents that ask an LLM for a list of structured items and must +tolerate responses wrapped in prose, code fences, nested backticks, or +string values that contain ``[``/``]``. See ``docs/clean-code.md`` §1 +(reuse over duplication). +""" + +from __future__ import annotations + +import json +import re + +from skillforge.errors import ParseError + +_FENCE_RE = re.compile(r"```(?:json)?\s*\n?(.*)\n?```", re.DOTALL) + + +def extract_json_array(text: str) -> list[dict]: + """Return the outermost JSON array embedded in ``text``. + + Handles three response shapes: + 1. Raw JSON array — the entire response is a ``[...]``. + 2. Fenced block — response wrapped in ```` ```json ... ``` ```` fences. + Matched greedily so nested fences in string values don't split. + 3. Array embedded in prose — extracted via bracket-depth scanning + that respects JSON string literal state (``[``/``]`` inside + string values don't perturb the depth counter). + + Raises: + ParseError: no balanced JSON array could be located. + """ + candidate = text.strip() + + if candidate.startswith("[") and candidate.endswith("]"): + try: + parsed = json.loads(candidate) + except json.JSONDecodeError: + pass + else: + if isinstance(parsed, list): + return parsed + + fence_match = _FENCE_RE.search(text) + if fence_match: + fenced = fence_match.group(1).strip() + try: + parsed = json.loads(fenced) + except json.JSONDecodeError: + text_to_scan = fenced + else: + if isinstance(parsed, list): + return parsed + text_to_scan = fenced + else: + text_to_scan = text + + array_src = _scan_outermost_array(text_to_scan) + if array_src is not None: + try: + parsed = json.loads(array_src) + except json.JSONDecodeError: + pass + else: + if isinstance(parsed, list): + return parsed + + raise ParseError("no valid JSON array found in response text") + + +def _scan_outermost_array(text: str) -> str | None: + """Return the outermost balanced ``[...]`` substring, or ``None``.""" + start = text.find("[") + if start == -1: + return None + + depth = 0 + in_string = False + escape = False + for i in range(start, len(text)): + ch = text[i] + if escape: + escape = False + continue + if ch == "\\": + escape = True + continue + if ch == '"': + in_string = not in_string + continue + if in_string: + continue + if ch == "[": + depth += 1 + elif ch == "]": + depth -= 1 + if depth == 0: + return text[start : i + 1] + return None diff --git a/skillforge/agents/breeder.py b/skillforge/agents/breeder.py index 13c673d..e2d0aac 100644 --- a/skillforge/agents/breeder.py +++ b/skillforge/agents/breeder.py @@ -20,6 +20,7 @@ from __future__ import annotations import json +import logging import re from datetime import UTC, datetime @@ -35,6 +36,8 @@ ) from skillforge.models import Generation, SkillGenome +logger = logging.getLogger("skillforge.agents.breeder") + # --------------------------------------------------------------------------- # Slot allocation # --------------------------------------------------------------------------- @@ -150,9 +153,9 @@ async def breed( breeding_instructions=diagnostic_instructions, ) next_gen.extend(diagnostic_children[: slots["diagnostic"]]) - except Exception as exc: # noqa: BLE001 - # Fall through — wildcard slots below absorb the shortfall - print(f"breeder: diagnostic mutation failed: {exc}") + except Exception: # noqa: BLE001 — subagent boundary: one slot failure must not kill the whole breed + # Fall through — wildcard slots below absorb the shortfall. + logger.exception("breeder.diagnostic_failed") # --- Reflective crossover: combine 2-3 Pareto-optimal parents --- pareto_parents = [s for s in ranked if s.is_pareto_optimal][:3] @@ -171,8 +174,8 @@ async def breed( breeding_instructions=crossover_instructions, ) next_gen.extend(crossover_children[: slots["crossover"]]) - except Exception as exc: # noqa: BLE001 - print(f"breeder: crossover failed: {exc}") + except Exception: # noqa: BLE001 — subagent boundary: one slot failure must not kill the whole breed + logger.exception("breeder.crossover_failed") # --- Wildcard: fresh Skills via spawn_gen0 --- if slots["wildcards"] > 0: @@ -182,14 +185,14 @@ async def breed( pop_size=slots["wildcards"], ) # Mark wildcards as mutations on the next generation - next_gen_num = (generation.number + 1) + next_gen_num = generation.number + 1 for w in wildcards: w.generation = next_gen_num w.mutations = ["wildcard"] w.mutation_rationale = "Wildcard slot: fresh spawn to prevent convergence" next_gen.extend(wildcards) - except Exception as exc: # noqa: BLE001 - print(f"breeder: wildcard spawn failed: {exc}") + except Exception: # noqa: BLE001 — subagent boundary: one slot failure must not kill the whole breed + logger.exception("breeder.wildcard_spawn_failed") # --- Trim or pad to exactly target_pop_size --- next_gen = next_gen[:target_pop_size] @@ -413,8 +416,12 @@ async def _extract_lessons(context: str, learning_log: list[str]) -> list[str]: max_tokens=500, messages=[{"role": "user", "content": prompt}], ) - except Exception as exc: # noqa: BLE001 - return [f"(lesson extraction failed: {exc})"] + except Exception: + # Degrade gracefully — a breeder that blocks on LLM hiccups would + # stall the whole run. The SDK has many concrete error types across + # versions; catching at the boundary keeps the engine moving. + logger.exception("breeder.lesson_extraction_failed") + return ["(lesson extraction failed)"] match = re.search(r"\[.*\]", text, re.DOTALL) if not match: @@ -452,8 +459,10 @@ async def _extract_breeding_report( max_tokens=800, messages=[{"role": "user", "content": prompt}], ) - except Exception as exc: # noqa: BLE001 - return f"(breeding report failed: {exc})" + except Exception: + # Degrade gracefully — see _extract_lessons for rationale. + logger.exception("breeder.report_extraction_failed") + return "(breeding report failed)" async def _extract_consolidated( @@ -486,8 +495,10 @@ async def _extract_consolidated( max_tokens=1200, messages=[{"role": "user", "content": prompt}], ) - except Exception as exc: # noqa: BLE001 - return ([f"(consolidated extraction failed: {exc})"], "") + except Exception: + # Degrade gracefully — see _extract_lessons for rationale. + logger.exception("breeder.consolidated_extraction_failed") + return (["(consolidated extraction failed)"], "") match = re.search(r"\{.*\}", text, re.DOTALL) if not match: @@ -526,8 +537,8 @@ def publish_findings_to_bible( findings_dir = BIBLE_DIR / "findings" try: findings_dir.mkdir(parents=True, exist_ok=True) - except OSError as exc: - print(f"bible: failed to create findings dir: {exc}") + except OSError: + logger.exception("bible.findings_dir_mkdir_failed") return # Determine the next finding number by scanning existing files @@ -556,8 +567,8 @@ def publish_findings_to_bible( ) try: (findings_dir / filename).write_text(content) - except OSError as exc: - print(f"bible: failed to write finding {filename}: {exc}") + except OSError: + logger.exception("bible.finding_write_failed", extra={"filename": filename}) continue next_num += 1 @@ -570,8 +581,8 @@ def publish_findings_to_bible( existing = "# Evolution Log\n\n*Chronological log of all SkillForge evolution runs.*\n\n" entry_line = f"- **{timestamp}** — run `{run_id[:8]}` gen {generation}: {len(new_entries)} new finding(s)\n" log_path.write_text(existing + entry_line) - except OSError as exc: - print(f"bible: failed to update evolution log: {exc}") + except OSError: + logger.exception("bible.evolution_log_write_failed") def _slugify(text: str) -> str: diff --git a/skillforge/agents/challenge_designer.py b/skillforge/agents/challenge_designer.py index 5f46e56..45c3819 100644 --- a/skillforge/agents/challenge_designer.py +++ b/skillforge/agents/challenge_designer.py @@ -13,13 +13,13 @@ from __future__ import annotations -import json -import re import uuid from anthropic import AsyncAnthropic +from skillforge.agents._json import extract_json_array from skillforge.config import ANTHROPIC_API_KEY, model_for +from skillforge.errors import ParseError from skillforge.models import Challenge # JSON schema description embedded in prompts @@ -35,90 +35,6 @@ ]""" -def _extract_json_array(text: str) -> list[dict]: - """Extract a JSON array from text. - - Robust against: - 1. Raw JSON array (ideal case) - 2. ``` json ... ``` fences with nested backticks in string values - 3. JSON embedded in prose with `[`/`]` characters in string literals - - Raises: - ValueError: if no valid JSON array can be extracted. - """ - candidate = text.strip() - - # 1. Try the whole text as JSON - if candidate.startswith("[") and candidate.endswith("]"): - try: - result = json.loads(candidate) - if isinstance(result, list): - return result - except json.JSONDecodeError: - pass - - # 2. Strip outer ```json ... ``` fence greedily - fence_match = re.search(r"```(?:json)?\s*\n?(.*)\n?```", text, re.DOTALL) - if fence_match: - fenced = fence_match.group(1).strip() - try: - result = json.loads(fenced) - if isinstance(result, list): - return result - except json.JSONDecodeError: - text_to_scan = fenced - else: - text_to_scan = fenced - else: - text_to_scan = text - - # 3. Bracket-depth scan respecting string literal state - array = _scan_outermost_array(text_to_scan) - if array is not None: - try: - result = json.loads(array) - if isinstance(result, list): - return result - except json.JSONDecodeError: - pass - - raise ValueError("No valid JSON array found in response text") - - -def _scan_outermost_array(text: str) -> str | None: - """Find the outermost JSON array via bracket-depth scanning that - respects string literal state. Returns substring including ``[`` and - ``]``, or ``None`` if no balanced array found. - """ - start = text.find("[") - if start == -1: - return None - - depth = 0 - in_string = False - escape = False - for i in range(start, len(text)): - ch = text[i] - if escape: - escape = False - continue - if ch == "\\": - escape = True - continue - if ch == '"': - in_string = not in_string - continue - if in_string: - continue - if ch == "[": - depth += 1 - elif ch == "]": - depth -= 1 - if depth == 0: - return text[start : i + 1] - return None - - _FILE_CONVENTION = """\ ## File convention (STRICT — follow exactly) @@ -261,14 +177,14 @@ async def design_challenges(specialization: str, n: int = 3) -> list[Challenge]: text = await _generate(_build_system_prompt(specialization, n)) try: - raw = _extract_json_array(text) - except ValueError: + raw = extract_json_array(text) + except (ValueError, ParseError): # Attempt 2 — retry with more explicit prompt text = await _generate(_build_retry_prompt(specialization, n)) try: - raw = _extract_json_array(text) - except ValueError as err: - raise ValueError( + raw = extract_json_array(text) + except (ValueError, ParseError) as err: + raise ParseError( "challenge designer failed to produce valid JSON after 2 attempts" ) from err @@ -331,10 +247,10 @@ async def design_variant_challenge( prompt = _build_variant_system_prompt(specialization, dimension) text = await _generate(prompt) try: - raw = _extract_json_array(text) - except ValueError: + raw = extract_json_array(text) + except (ValueError, ParseError): text = await _generate(_build_retry_prompt(specialization, n=1)) - raw = _extract_json_array(text) + raw = extract_json_array(text) challenges = _parse_challenges(raw) if len(challenges) != 1: diff --git a/skillforge/agents/competitor_sdk.py b/skillforge/agents/competitor_sdk.py index b48b9f1..c672c66 100644 --- a/skillforge/agents/competitor_sdk.py +++ b/skillforge/agents/competitor_sdk.py @@ -19,7 +19,7 @@ from __future__ import annotations import asyncio -import sys +import logging from pathlib import Path from claude_agent_sdk import ClaudeAgentOptions, query @@ -28,6 +28,8 @@ from skillforge.engine.sandbox import collect_written_files from skillforge.models import Challenge, CompetitionResult, SkillGenome +logger = logging.getLogger("skillforge.agents.competitor_sdk") + def _message_to_dict(msg) -> dict: """Convert an SDK message to a JSON-safe dict for trace storage.""" @@ -123,8 +125,8 @@ async def run_competitor( trace=trace, judge_reasoning="timeout after 300s", ) - except Exception as exc: - print(f"sdk error in run_competitor: {exc}", file=sys.stderr) + except Exception as exc: # noqa: BLE001 — SDK boundary: degrade gracefully, never crash the run + logger.exception("sdk error in run_competitor") output_files = collect_written_files(sandbox_path / "output") return CompetitionResult( skill_id=skill.id, diff --git a/skillforge/agents/spawner.py b/skillforge/agents/spawner.py index 4921459..c25c036 100644 --- a/skillforge/agents/spawner.py +++ b/skillforge/agents/spawner.py @@ -15,14 +15,14 @@ from __future__ import annotations -import json -import re import uuid from anthropic import AsyncAnthropic +from skillforge.agents._json import extract_json_array from skillforge.config import ANTHROPIC_API_KEY, BIBLE_DIR, GOLDEN_TEMPLATE_DIR, model_for from skillforge.engine.sandbox import validate_skill_structure +from skillforge.errors import ParseError from skillforge.models import SkillGenome # JSON schema for spawner responses @@ -69,98 +69,6 @@ def _read_bible_patterns() -> str: return "\n\n---\n\n".join(parts) -def _extract_json_array(text: str) -> list[dict]: - """Extract a JSON array from text. - - Handles three cases robustly: - 1. Whole response is a raw JSON array - 2. Response is wrapped in ``` json ... ``` fences (greedy match of - the outermost fence — SKILL.md content can contain nested fences - that a non-greedy match would trip over) - 3. JSON array embedded in prose, extracted via bracket-depth scanning - that respects string literal state (handles `[` and `]` inside - string values like Python list comp examples) - - Raises: - ValueError: if no valid JSON array can be extracted. - """ - candidate = text.strip() - - # 1. Try the whole text as JSON (ideal case) - if candidate.startswith("[") and candidate.endswith("]"): - try: - result = json.loads(candidate) - if isinstance(result, list): - return result - except json.JSONDecodeError: - pass - - # 2. Strip outer ```json ... ``` fence greedily (matches LAST ```). - # Non-greedy would stop at the first nested ``` inside string values. - fence_match = re.search(r"```(?:json)?\s*\n?(.*)\n?```", text, re.DOTALL) - if fence_match: - fenced = fence_match.group(1).strip() - try: - result = json.loads(fenced) - if isinstance(result, list): - return result - except json.JSONDecodeError: - # Fall through to bracket scanning on the fenced content - text_to_scan = fenced - else: - text_to_scan = fenced - else: - text_to_scan = text - - # 3. Bracket-depth scan that respects JSON string literal state - array = _scan_outermost_array(text_to_scan) - if array is not None: - try: - result = json.loads(array) - if isinstance(result, list): - return result - except json.JSONDecodeError: - pass - - raise ValueError("No valid JSON array found in response text") - - -def _scan_outermost_array(text: str) -> str | None: - """Find the outermost JSON array substring via bracket-depth scanning. - - Properly tracks string literal state so brackets inside string values - don't throw off the depth counter. Returns the substring (including the - outer ``[`` and ``]``), or ``None`` if no balanced array is found. - """ - start = text.find("[") - if start == -1: - return None - - depth = 0 - in_string = False - escape = False - for i in range(start, len(text)): - ch = text[i] - if escape: - escape = False - continue - if ch == "\\": - escape = True - continue - if ch == '"': - in_string = not in_string - continue - if in_string: - continue - if ch == "[": - depth += 1 - elif ch == "]": - depth -= 1 - if depth == 0: - return text[start : i + 1] - return None - - def _extract_response_text(response) -> str: """Extract text from an Anthropic Messages API response. @@ -388,11 +296,11 @@ async def spawn_gen0(specialization: str, pop_size: int) -> list[SkillGenome]: _save_debug_response("spawn_gen0_attempt1", text) try: - raw = _extract_json_array(text) + raw = extract_json_array(text) genomes = _parse_genomes(raw, generation=0) valid_genomes, invalid = _validate_genomes(genomes) first_attempt_failed = False - except ValueError: + except (ValueError, ParseError): # JSON parse failure — treat as if everything was invalid so the # retry path runs. genomes = [] @@ -421,8 +329,8 @@ async def spawn_gen0(specialization: str, pop_size: int) -> list[SkillGenome]: _save_debug_response("spawn_gen0_attempt2", text) try: - raw2 = _extract_json_array(text) - except ValueError as exc: + raw2 = extract_json_array(text) + except (ValueError, ParseError) as exc: raise ValueError( f"spawner failed to produce valid JSON on retry: {exc}. " f"See /tmp/sf-spawn_gen0_attempt2.txt for the raw response." @@ -473,8 +381,8 @@ async def breed_next_gen( text = await _generate(system_prompt) try: - raw = _extract_json_array(text) - except ValueError as exc: + raw = extract_json_array(text) + except (ValueError, ParseError) as exc: raise ValueError( f"spawner breed_next_gen failed to produce valid JSON: {exc}" ) from exc @@ -506,8 +414,8 @@ async def breed_next_gen( text = await _generate(repair_prompt) try: - raw2 = _extract_json_array(text) - except ValueError as exc: + raw2 = extract_json_array(text) + except (ValueError, ParseError) as exc: raise ValueError( f"spawner breed_next_gen failed to produce valid JSON on retry: {exc}" ) from exc @@ -627,8 +535,8 @@ async def spawn_from_parent( text = await _generate(system_prompt) try: - raw = _extract_json_array(text) - except ValueError: + raw = extract_json_array(text) + except (ValueError, ParseError): # If the LLM refused or produced garbage, fall back to elite-only # (graceful degradation — evolution can still proceed with just the parent) return [elite] @@ -773,8 +681,8 @@ async def spawn_variant_gen0( _save_debug_response(f"spawn_variant_gen0_{dimension.get('name', 'unknown')}", text) try: - raw = _extract_json_array(text) - except ValueError: + raw = extract_json_array(text) + except (ValueError, ParseError): # One retry with a stricter formatting reminder retry_prompt = ( system_prompt @@ -783,7 +691,7 @@ async def spawn_variant_gen0( "markdown fences." ) text = await _generate(retry_prompt) - raw = _extract_json_array(text) + raw = extract_json_array(text) genomes = _parse_genomes(raw, generation=0) valid_genomes, invalid = _validate_genomes(genomes) diff --git a/skillforge/api/debug.py b/skillforge/api/debug.py index fea8b2a..a0543eb 100644 --- a/skillforge/api/debug.py +++ b/skillforge/api/debug.py @@ -467,17 +467,14 @@ async def debug_status(token: str = "") -> dict: if token != ADMIN_TOKEN: raise HTTPException(status_code=403, detail="invalid admin token") - from skillforge.api.routes import _active_runs from skillforge.db.queries import list_leaked_skills, list_runs + from skillforge.engine.run_registry import registry # Active runs - active = [] - for run_id, task in _active_runs.items(): - active.append({ - "run_id": run_id, - "done": task.done(), - "cancelled": task.cancelled(), - }) + active = [ + {"run_id": run_id, "done": task.done(), "cancelled": task.cancelled()} + for run_id, task in registry.iter_tasks() + ] # Recent failed runs all_runs = await list_runs(limit=20) diff --git a/skillforge/api/journal.py b/skillforge/api/journal.py index fae426c..b5f8af5 100644 --- a/skillforge/api/journal.py +++ b/skillforge/api/journal.py @@ -63,7 +63,9 @@ async def list_journal_entries() -> list[dict]: for path in sorted(JOURNAL_DIR.glob("*.md"), reverse=True): try: entries.append(_parse_entry(path)) - except Exception: + except (OSError, UnicodeDecodeError, ValueError): + # Skip malformed entries — the journal index should never + # crash because a single file is unreadable. continue return entries diff --git a/skillforge/api/llms.py b/skillforge/api/llms.py index 970dda6..39d10c0 100644 --- a/skillforge/api/llms.py +++ b/skillforge/api/llms.py @@ -12,6 +12,7 @@ from __future__ import annotations import json +import logging from pathlib import Path from fastapi import APIRouter, HTTPException @@ -20,6 +21,8 @@ from skillforge.config import ROOT_DIR from skillforge.db.queries import _connect +logger = logging.getLogger("skillforge.api.llms") + router = APIRouter(tags=["llms"]) SITE_URL = "https://skld.run" @@ -87,7 +90,7 @@ async def sitemap_xml() -> Response: for (run_id,) in await cursor.fetchall(): urls.append(f"{SITE_URL}/runs/{run_id}") urls.append(f"{SITE_URL}/runs/{run_id}.md") - except Exception: + except Exception: # noqa: BLE001 — llms.txt endpoint must serve partial content, never 500 pass body_lines = ['', @@ -283,7 +286,7 @@ async def bench_md() -> PlainTextResponse: try: data = json.loads(family_json.read_text(encoding="utf-8")) summary = data.get("description") or data.get("summary") or "" - except Exception: + except Exception: # noqa: BLE001 — llms.txt endpoint must serve partial content, never 500 pass lines.append(f"- [{slug}]({SITE_URL}/bench/{slug}.md)" + (f" — {summary}" if summary else "")) @@ -305,7 +308,7 @@ async def bench_family_md(slug: str) -> PlainTextResponse: try: data = json.loads(family_json.read_text(encoding="utf-8")) body += "\n\n## family.json\n\n```json\n" + json.dumps(data, indent=2) + "\n```\n" - except Exception: + except Exception: # noqa: BLE001 — llms.txt endpoint must serve partial content, never 500 pass return _md(body) @@ -330,7 +333,7 @@ async def registry_md() -> PlainTextResponse: "FROM evolution_runs ORDER BY created_at DESC LIMIT 200" ) rows = await cursor.fetchall() - except Exception: + except Exception: # noqa: BLE001 — llms.txt endpoint must serve partial content, never 500 rows = [] if not rows: lines.append("_No runs found._") @@ -348,7 +351,7 @@ async def run_md(run_id: str) -> PlainTextResponse: try: from skillforge.db.queries import get_run run = await get_run(run_id) - except Exception: + except Exception: # noqa: BLE001 — llms.txt endpoint must serve partial content, never 500 run = None if run is None: raise HTTPException(status_code=404, detail=f"run not found: {run_id}") @@ -371,7 +374,7 @@ async def run_md(run_id: str) -> PlainTextResponse: try: fit = max(best.pareto_objectives.values()) lines.append(f"- **Fitness:** {fit:.4f}") - except Exception: + except Exception: # noqa: BLE001 — llms.txt endpoint must serve partial content, never 500 pass lines.append("- **Pareto objectives:**") for k, v in sorted(best.pareto_objectives.items()): @@ -393,6 +396,6 @@ def _first_heading(path: Path) -> str | None: return s.lstrip("#").strip() if s.startswith("# "): return s.lstrip("#").strip() - except Exception: + except Exception: # noqa: BLE001 — llms.txt endpoint must serve partial content, never 500 return None return None diff --git a/skillforge/api/routes.py b/skillforge/api/routes.py index 7c7e2b2..386d89a 100644 --- a/skillforge/api/routes.py +++ b/skillforge/api/routes.py @@ -24,17 +24,15 @@ from skillforge.config import invite_code_valid from skillforge.db.database import init_db from skillforge.db.queries import get_lineage, get_run, list_runs, save_run -from skillforge.engine.evolution import PENDING_PARENTS, run_evolution +from skillforge.engine.evolution import run_evolution from skillforge.engine.export import export_agent_sdk_config, export_skill_md, export_skill_zip +from skillforge.engine.run_registry import registry from skillforge.models import EvolutionRun, SkillGenome logger = logging.getLogger("skillforge.api") router = APIRouter(prefix="/api") -# Module-level registry: run_id -> background task -_active_runs: dict[str, asyncio.Task] = {} - async def _classify_run_via_taxonomist( run: EvolutionRun, requested_mode: str | None @@ -193,13 +191,13 @@ async def start_evolution(req: EvolveRequest) -> EvolveResponse: # Spawn background task — store reference so it isn't GC'd task = asyncio.create_task(run_evolution(run)) - _active_runs[run.id] = task + registry.set_task(run.id, task) logger.info("run=%s started: spec=%s pop=%d gens=%d", run.id[:8], run.specialization[:60], run.population_size, run.num_generations) # Cleanup callback removes the task from the registry when it finishes def _cleanup(t: asyncio.Task) -> None: - _active_runs.pop(run.id, None) + registry.clear_task(run.id) exc = t.exception() if not t.cancelled() else None if exc: logger.error("run=%s task failed: %s", run.id[:8], exc) @@ -239,9 +237,10 @@ async def start_evolution_from_parent(req: EvolveFromParentRequest) -> EvolveRes - ``upload``: ``parent_id`` is an upload_id from POST /api/uploads/skill. Resolved via the in-memory upload cache. - The parent is stashed in the ``PENDING_PARENTS`` registry keyed by the new - run's id. The evolution engine picks it up at gen 0 spawn time and routes - through ``spawner.spawn_from_parent()`` instead of ``spawn_gen0()``. + The parent is stashed in the ``RunRegistry`` (see ``engine/run_registry.py``) + keyed by the new run's id. The evolution engine picks it up at gen 0 spawn + time and routes through ``spawner.spawn_from_parent()`` instead of + ``spawn_gen0()``. """ if not invite_code_valid(req.invite_code): raise HTTPException( @@ -316,17 +315,17 @@ async def start_evolution_from_parent(req: EvolveFromParentRequest) -> EvolveRes await save_run(run) # Stash the parent so the engine's gen-0 spawn picks it up - PENDING_PARENTS[run.id] = parent + registry.stash_parent(run.id, parent) # Clear the upload cache so we don't leak memory if req.parent_source == "upload": clear_upload(req.parent_id) task = asyncio.create_task(run_evolution(run)) - _active_runs[run.id] = task + registry.set_task(run.id, task) def _cleanup(t: asyncio.Task) -> None: - _active_runs.pop(run.id, None) + registry.clear_task(run.id) task.add_done_callback(_cleanup) @@ -337,12 +336,12 @@ def _cleanup(t: asyncio.Task) -> None: async def cancel_run(run_id: str) -> dict: """Cancel an in-progress evolution run. - Finds the backing asyncio task in ``_active_runs``, cancels it, and - marks the run status as ``cancelled`` in the DB. The engine catches + Finds the backing asyncio task in the ``RunRegistry``, cancels it, + and marks the run status as ``cancelled`` in the DB. The engine catches ``CancelledError`` in its main loop, emits a ``run_cancelled`` event, and persists the partial state before exiting. """ - task = _active_runs.get(run_id) + task = registry.get_task(run_id) if task is None or task.done(): # Maybe already done, maybe never existed — either way nothing to cancel raise HTTPException( diff --git a/skillforge/api/spec_assistant.py b/skillforge/api/spec_assistant.py index 70464e3..ed7a08d 100644 --- a/skillforge/api/spec_assistant.py +++ b/skillforge/api/spec_assistant.py @@ -379,8 +379,8 @@ async def generate_skill(req: GenerateSkillRequest) -> GenerateSkillResponse: traits=[], ) logger.info("auto-saved generated package as candidate seed: %s", name) - except Exception as e: - logger.warning("failed to auto-save candidate seed: %s", e) + except Exception: # noqa: BLE001 — auto-save is a bonus; never fail the response over it + logger.exception("failed to auto-save candidate seed") return GenerateSkillResponse( name=name, diff --git a/skillforge/api/uploads.py b/skillforge/api/uploads.py index 57e5bdf..53179e6 100644 --- a/skillforge/api/uploads.py +++ b/skillforge/api/uploads.py @@ -101,7 +101,7 @@ def _sniff_skill_md(zf: zipfile.ZipFile) -> tuple[str, dict[str, str]]: continue # silently skip disallowed extensions try: content = zf.read(info.filename).decode("utf-8", errors="replace") - except Exception: + except (OSError, zipfile.BadZipFile, RuntimeError): continue if rel == "SKILL.md": skill_md_body = content @@ -170,15 +170,16 @@ async def upload_skill(file: UploadFile = File(...)) -> dict: # noqa: B008 (Fa # Extract frontmatter for the preview response frontmatter_preview: dict = {} if skill_md.startswith("---"): - try: - import yaml + import yaml + try: _, fm_block, _ = skill_md.split("---", 2) parsed = yaml.safe_load(fm_block) or {} - if isinstance(parsed, dict): - frontmatter_preview = parsed - except Exception: - pass + except (ValueError, yaml.YAMLError): + # Malformed frontmatter — preview stays empty, upload still succeeds. + parsed = None + if isinstance(parsed, dict): + frontmatter_preview = parsed _UPLOADS[upload_id] = _UploadRecord(skill=genome, filename=filename) diff --git a/skillforge/api/websocket.py b/skillforge/api/websocket.py index 2e9f2c7..a3d68da 100644 --- a/skillforge/api/websocket.py +++ b/skillforge/api/websocket.py @@ -47,7 +47,7 @@ async def evolution_events(websocket: WebSocket, run_id: str) -> None: except WebSocketDisconnect: logger.info("ws run=%s client disconnected", run_id[:8]) return - except Exception as exc: - logger.error("ws run=%s error: %s", run_id[:8], exc) + except Exception: # noqa: BLE001 — WS handler must always close cleanly, never propagate + logger.exception("ws run=%s error", run_id[:8]) with contextlib.suppress(Exception): await websocket.close(code=1011) diff --git a/skillforge/db/benchmark_seed_loader.py b/skillforge/db/benchmark_seed_loader.py index b97fd3b..bce11e5 100644 --- a/skillforge/db/benchmark_seed_loader.py +++ b/skillforge/db/benchmark_seed_loader.py @@ -59,8 +59,8 @@ async def load_benchmark_results() -> dict: try: records = json.loads(SEED_PATH.read_text(encoding="utf-8")) - except Exception as exc: # noqa: BLE001 - never block boot on a bad seed - logger.exception("Failed to parse benchmark seed: %s", exc) + except (OSError, json.JSONDecodeError): + logger.exception("Failed to parse benchmark seed") return {"loaded": 0, "skipped_reason": "parse-error"} if not isinstance(records, list) or not records: diff --git a/skillforge/engine/events.py b/skillforge/engine/events.py index a138d0f..9effdad 100644 --- a/skillforge/engine/events.py +++ b/skillforge/engine/events.py @@ -55,8 +55,8 @@ async def _persist_event(run_id: str, event_type: str, payload: dict[str, Any]) (run_id, event_type, _json.dumps(payload, default=str), payload.get("timestamp", "")), ) await conn.commit() - except Exception: - pass # best-effort, never block the engine + except Exception: # noqa: BLE001 — event persistence is best-effort; must never block the engine + logger.debug("event persistence failed (swallowed)", exc_info=True) async def emit(run_id: str, event: str, **kwargs: Any) -> None: diff --git a/skillforge/engine/evolution.py b/skillforge/engine/evolution.py index d63546b..fb255b6 100644 --- a/skillforge/engine/evolution.py +++ b/skillforge/engine/evolution.py @@ -36,15 +36,12 @@ from skillforge.db.database import init_db from skillforge.db.queries import save_run from skillforge.engine.events import emit +from skillforge.engine.run_registry import registry from skillforge.engine.sandbox import cleanup_sandbox, create_sandbox -from skillforge.models import EvolutionRun, Generation, SkillGenome +from skillforge.models import EvolutionRun, Generation logger = logging.getLogger("skillforge.engine") -# Module-level registry: run_id -> parent SkillGenome when the run was started -# via fork-and-evolve (seed or upload). Looked up at gen 0 spawn time. -PENDING_PARENTS: dict[str, SkillGenome] = {} - # --- Budget tracking --------------------------------------------------------- # MVP: estimate cost from trace length. Each SDK turn is ~$0.02 for Sonnet 4.6 # (very rough). Real tracking would read token counts from message metadata; @@ -231,8 +228,8 @@ async def run_evolution(run: EvolutionRun) -> EvolutionRun: try: run = await run_variant_evolution(run) - except Exception as exc: # noqa: BLE001 - logger.exception("run=%s atomic evolution failed: %s", run.id[:8], exc) + except Exception as exc: # noqa: BLE001 — top-level run boundary must catch all + logger.exception("run=%s atomic evolution failed", run.id[:8]) run.status = "failed" run.failure_reason = f"atomic evolution failed: {exc}" run.completed_at = datetime.now(UTC) @@ -288,8 +285,8 @@ async def _build_report() -> None: "managed_environment_ready", environment_id=env_id, ) - except Exception as exc: # noqa: BLE001 - logger.error("run=%s managed environment creation failed: %s", run.id[:8], exc) + except Exception as exc: # noqa: BLE001 — managed-env boundary: any SDK failure must be captured + logger.exception("run=%s managed environment creation failed", run.id[:8]) run.status = "failed" run.failure_reason = f"managed environment creation failed: {exc}" await emit(run.id, "run_failed", reason="env_create_failed") @@ -319,7 +316,7 @@ async def _build_report() -> None: # --- Spawn or breed --------------------------------------- if gen_num == 0: - seed_parent = PENDING_PARENTS.pop(run.id, None) + seed_parent = registry.take_parent(run.id) if seed_parent is not None: skills = await spawn_from_parent(seed_parent, run.population_size) else: @@ -478,8 +475,8 @@ async def _build_report() -> None: source_skill_id=best.id, ) logger.info("run=%s auto-saved best skill as candidate seed", run.id[:8]) - except Exception as e: - logger.warning("run=%s failed to save candidate seed: %s", run.id[:8], e) + except Exception: # noqa: BLE001 — auto-save is a bonus; never fail the run over it + logger.exception("run=%s failed to save candidate seed", run.id[:8]) await emit( run.id, @@ -558,10 +555,8 @@ async def _persist(run: EvolutionRun) -> None: """ try: await save_run(run) - except Exception as exc: # noqa: BLE001 - # DB persistence failures are non-fatal during a run — the Progress - # Tracker event stream is the primary truth; DB is a durable backup. - logger.error("run=%s DB persistence failed: %s", run.id[:8], exc) + except Exception: # noqa: BLE001 — DB is a durable backup; event stream is the primary truth + logger.exception("run=%s DB persistence failed", run.id[:8]) def dump_run_json(run: EvolutionRun) -> Path | None: @@ -582,7 +577,7 @@ def dump_run_json(run: EvolutionRun) -> Path | None: RUN_DUMPS_DIR.mkdir(parents=True, exist_ok=True) path = RUN_DUMPS_DIR / f"{run.id}.json" path.write_text(json.dumps(run.to_dict(), indent=2, default=str)) - return path - except Exception as exc: # noqa: BLE001 - logger.error("run=%s JSON dump failed: %s", run.id[:8], exc) + except (OSError, TypeError): + logger.exception("run=%s JSON dump failed", run.id[:8]) return None + return path diff --git a/skillforge/engine/report.py b/skillforge/engine/report.py index 3a17c36..5a93392 100644 --- a/skillforge/engine/report.py +++ b/skillforge/engine/report.py @@ -206,15 +206,15 @@ async def _build_atomic_genomes_section( (run.id,), ) as cur: rows = await cur.fetchall() - except Exception as exc: # pragma: no cover - defensive - logger.warning("report: failed to fetch atomic genomes: %s", exc) + except Exception: # noqa: BLE001 — defensive: report rendering must not fail on a bad row + logger.exception("report: failed to fetch atomic genomes") return [] out: list[dict[str, Any]] = [] for row in rows: try: g = _row_to_genome(row) - except Exception: + except Exception: # noqa: BLE001 — skip corrupt rows; never crash the whole report continue out.append( { @@ -250,8 +250,8 @@ async def _build_variant_evolutions_section( """ try: evolutions = await get_variant_evolutions_for_run(run.id) - except Exception as exc: # pragma: no cover - defensive - logger.warning("report: failed to fetch variant evolutions: %s", exc) + except Exception: # noqa: BLE001 — defensive: report rendering must not fail on a bad row + logger.exception("report: failed to fetch variant evolutions") return [] out: list[dict[str, Any]] = [] @@ -296,8 +296,8 @@ async def _build_assembly_report(run: EvolutionRun) -> dict[str, Any] | None: try: variants = await get_variants_for_family(family_id) active_variants = [v.to_dict() for v in variants if v.is_active] - except Exception as exc: # pragma: no cover - defensive - logger.warning("report: failed to fetch active variants: %s", exc) + except Exception: # noqa: BLE001 — defensive: report rendering degrades gracefully + logger.exception("report: failed to fetch active variants") return { "family_id": family.id, @@ -474,8 +474,8 @@ async def generate_run_report( """ try: run = await get_run(run_id) - except Exception as exc: # pragma: no cover - defensive - logger.error("report: failed to load run %s: %s", run_id, exc) + except Exception: # noqa: BLE001 — report generation must degrade, never crash + logger.exception("report: failed to load run %s", run_id) return None if run is None: @@ -510,8 +510,8 @@ async def generate_run_report( ), "generated_at": datetime.now(UTC).isoformat(), } - except Exception as exc: # noqa: BLE001 - logger.exception("report: failed to build report for %s: %s", run_id, exc) + except Exception: # noqa: BLE001 — report generation must degrade, never crash + logger.exception("report: failed to build report for %s", run_id) return None target_dir = reports_dir if reports_dir is not None else REPORTS_DIR @@ -535,8 +535,8 @@ async def generate_run_report( logger.info( "report: generated %s (%d bytes)", run_id, size_bytes ) - except Exception as exc: # noqa: BLE001 - logger.exception("report: failed to write report for %s: %s", run_id, exc) + except (OSError, TypeError): + logger.exception("report: failed to write report for %s", run_id) return None return report diff --git a/skillforge/engine/run_registry.py b/skillforge/engine/run_registry.py new file mode 100644 index 0000000..dadb042 --- /dev/null +++ b/skillforge/engine/run_registry.py @@ -0,0 +1,71 @@ +"""In-process registry of active evolution runs. + +Centralizes two pieces of runtime state the FastAPI process tracks for +every currently-running evolution: + +- ``pending_parents`` — a ``SkillGenome`` stashed at run-start when the + run was forked from a seed or an uploaded skill. Gen 0 spawn pops this + to seed the initial population instead of spawning fresh. + +- ``tasks`` — the backing ``asyncio.Task`` for each evolution coroutine, + so ``DELETE /api/runs/{id}`` can cancel in-flight work. + +Before this module existed, the same state lived as two mutable module +globals split across ``engine/evolution.py`` and ``api/routes.py``. That +made the state invisible to tests, impossible to reset, and fragile +under concurrent requests. A single registry with explicit accessors +replaces both. See ``docs/clean-code.md`` §5. +""" + +from __future__ import annotations + +import asyncio +from collections.abc import Iterable +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from skillforge.models import SkillGenome + + +@dataclass(slots=True) +class RunRegistry: + """Holds pending-parent and active-task state for evolution runs.""" + + _pending_parents: dict[str, SkillGenome] = field(default_factory=dict) + _tasks: dict[str, asyncio.Task] = field(default_factory=dict) + + # --- pending parents (fork-and-evolve) --------------------------------- + + def stash_parent(self, run_id: str, parent: SkillGenome) -> None: + """Record the parent genome a run should fork from.""" + self._pending_parents[run_id] = parent + + def take_parent(self, run_id: str) -> SkillGenome | None: + """Pop the parent genome for ``run_id`` (one-shot).""" + return self._pending_parents.pop(run_id, None) + + # --- active evolution tasks -------------------------------------------- + + def set_task(self, run_id: str, task: asyncio.Task) -> None: + self._tasks[run_id] = task + + def clear_task(self, run_id: str) -> None: + self._tasks.pop(run_id, None) + + def get_task(self, run_id: str) -> asyncio.Task | None: + return self._tasks.get(run_id) + + def active_count(self) -> int: + return len(self._tasks) + + def iter_tasks(self) -> Iterable[tuple[str, asyncio.Task]]: + # Snapshot via list() so callers can cancel tasks without mutating + # the dict during iteration. + return list(self._tasks.items()) + + +# Module-level singleton. Tests that need isolation can construct their +# own ``RunRegistry()`` and inject it explicitly; production code uses +# this shared instance. +registry = RunRegistry() diff --git a/skillforge/engine/variant_evolution.py b/skillforge/engine/variant_evolution.py index d90f283..4972e41 100644 --- a/skillforge/engine/variant_evolution.py +++ b/skillforge/engine/variant_evolution.py @@ -533,8 +533,8 @@ async def run_variant_evolution(run: EvolutionRun) -> EvolutionRun: ) logger.info("run=%s managed environment ready: %s", run.id[:8], env_id) await emit(run.id, "managed_environment_ready", environment_id=env_id) - except Exception as exc: # noqa: BLE001 - logger.error("run=%s managed environment creation failed: %s", run.id[:8], exc) + except Exception as exc: # noqa: BLE001 — managed-env boundary: any SDK failure must be captured + logger.exception("run=%s managed environment creation failed", run.id[:8]) run.status = "failed" run.failure_reason = f"managed environment creation failed: {exc}" await save_run(run) @@ -572,12 +572,11 @@ async def run_variant_evolution(run: EvolutionRun) -> EvolutionRun: foundation_winner=foundation_winner, env_id=env_id, ) - except Exception as exc: # noqa: BLE001 + except Exception as exc: # noqa: BLE001 — one bad dimension must not crash the whole atomic run logger.exception( - "run=%s dimension %s mini-evolution failed: %s", + "run=%s dimension %s mini-evolution failed", run.id[:8], vevo.dimension, - exc, ) vevo.status = "failed" await save_variant_evolution(vevo) diff --git a/skillforge/errors.py b/skillforge/errors.py new file mode 100644 index 0000000..d343dc3 --- /dev/null +++ b/skillforge/errors.py @@ -0,0 +1,90 @@ +"""Typed exception hierarchy for the skillforge package. + +Every domain error inherits from ``SkldError`` so boundary handlers can +catch the whole family with one ``except`` clause while inner code can +still raise (and catch) precise types. See ``docs/clean-code.md`` §4. +""" + +from __future__ import annotations + + +class SkldError(Exception): + """Base class for every skillforge domain error.""" + + +# --- Agent pipeline --------------------------------------------------------- + + +class AgentError(SkldError): + """A failure inside an agent role (spawner, breeder, etc.).""" + + +class SpawnError(AgentError): + """Spawner could not produce a valid initial variant population.""" + + +class BreedError(AgentError): + """Breeder could not evolve a next generation.""" + + +class JudgeError(AgentError): + """A judging layer (L1-L5) failed to produce a verdict.""" + + +class EngineerError(AgentError): + """Engineer failed to assemble or refine a composite skill.""" + + +class TaxonomistError(AgentError): + """Taxonomist failed to classify a specialization.""" + + +class ChallengeDesignError(AgentError): + """Challenge designer could not produce a valid challenge.""" + + +# --- External integrations -------------------------------------------------- + + +class AgentSDKError(SkldError): + """The Anthropic / claude-agent SDK returned or raised unexpectedly. + + Distinct from ``AgentError`` — ``AgentSDKError`` is a *transport* + failure (bad creds, rate limit, network), ``AgentError`` is a + *semantic* failure (agent produced garbage output). + """ + + +class ManagedEnvironmentError(SkldError): + """A hosted Managed-Agents environment could not be created, reused, or torn down.""" + + +# --- Data + parsing --------------------------------------------------------- + + +class ParseError(SkldError, ValueError): + """Could not parse a structured response (JSON, YAML, frontmatter, etc.). + + Inherits from ``ValueError`` so ``except ValueError`` continues to match + parse failures in legacy call sites; new code should catch ``ParseError`` + to distinguish parse failures from other value errors. + """ + + +class ValidationError(SkldError, ValueError): + """A parsed value violated a domain invariant.""" + + +# --- Persistence ------------------------------------------------------------ + + +class DBError(SkldError): + """Database operation failed in an unexpected way. + + Catching this is always optional — most call sites should let it + propagate so FastAPI's error middleware returns 500 with context. + """ + + +class StorageError(SkldError): + """Filesystem / blob-storage operation failed (export, upload, etc.).""" diff --git a/skillforge/main.py b/skillforge/main.py index f05afbd..f5bce61 100644 --- a/skillforge/main.py +++ b/skillforge/main.py @@ -88,14 +88,14 @@ async def lifespan(app: FastAPI): await load_seeds() try: await load_mock_runs() - except Exception as exc: # pragma: no cover - fail-soft on seed run load - logger.exception("Seed run loader failed: %s", exc) + except Exception: # noqa: BLE001 — boot-time: never block startup on seed load + logger.exception("Seed run loader failed") try: diag = await load_benchmark_results() if diag.get("loaded"): logger.info("Benchmark seed loaded: %d rows", diag["loaded"]) - except Exception as exc: # pragma: no cover - fail-soft on benchmark seed load - logger.exception("Benchmark seed loader failed: %s", exc) + except Exception: # noqa: BLE001 — boot-time: never block startup on seed load + logger.exception("Benchmark seed loader failed") try: taxonomy_diag = await load_taxonomy() logger.info( @@ -104,8 +104,8 @@ async def lifespan(app: FastAPI): taxonomy_diag.get("families_created", 0), taxonomy_diag.get("families_reused", 0), ) - except Exception as exc: # pragma: no cover - boot-time resiliency - logger.exception("Taxonomy bootstrap failed: %s", exc) + except Exception: # noqa: BLE001 — boot-time: never block startup on bootstrap + logger.exception("Taxonomy bootstrap failed") zombie_count = await mark_zombie_runs() if zombie_count: logger.warning("Marked %d zombie run(s) as failed on startup", zombie_count) @@ -139,11 +139,11 @@ async def lifespan(app: FastAPI): @app.get("/api/health") async def health() -> dict: """Backend health check with active run count.""" - from skillforge.api.routes import _active_runs + from skillforge.engine.run_registry import registry return { "status": "ok", "service": "skillforge", - "active_runs": len(_active_runs), + "active_runs": registry.active_count(), } @@ -395,8 +395,8 @@ def _inject_noscript_content(raw_html: str, full_path: str) -> str: elif base == "bench": content_lines.append("
Controlled evaluation benchmark across 7 Elixir skill families with 6-layer composite scoring (L0 string match 10%, compilation 15%, AST quality 15%, behavioral tests 40%, template quality 10%, brevity 10%). Families: phoenix-liveview, ecto-query-writer, ecto-sandbox-test, ecto-schema-changeset, oban-worker, pattern-match-refactor, security-linter.
") - except Exception: - pass + except Exception: # noqa: BLE001 — SEO augmentation must never break page render + logger.debug("meta_injection: failed to enrich SEO tags", exc_info=True) if not content_lines: return raw_html diff --git a/skillforge/seeds/mock_run_loader.py b/skillforge/seeds/mock_run_loader.py index 4d07954..8cdf05a 100644 --- a/skillforge/seeds/mock_run_loader.py +++ b/skillforge/seeds/mock_run_loader.py @@ -501,5 +501,5 @@ async def load_mock_runs() -> None: for path in files: try: await _load_one(path) - except Exception as e: # noqa: BLE001 - fail soft, never break boot - logger.exception("seed_run_loader: failed to load %s: %s", path.name, e) + except Exception: # noqa: BLE001 - fail soft, never break boot + logger.exception("seed_run_loader: failed to load %s", path.name) diff --git a/tests/test_seeds.py b/tests/test_seeds.py index ad62d15..151eb5e 100644 --- a/tests/test_seeds.py +++ b/tests/test_seeds.py @@ -28,7 +28,7 @@ from fastapi.testclient import TestClient from skillforge.db import seed_loader -from skillforge.engine.evolution import PENDING_PARENTS +from skillforge.engine.run_registry import registry as _run_registry from skillforge.main import app from skillforge.models import EvolutionRun, Generation, SkillGenome @@ -44,10 +44,10 @@ def client() -> TestClient: @pytest.fixture(autouse=True) def _clear_pending_parents(): - """PENDING_PARENTS is module-level state — wipe between tests.""" - PENDING_PARENTS.clear() + """RunRegistry is process-level state — wipe between tests.""" + _run_registry._pending_parents.clear() yield - PENDING_PARENTS.clear() + _run_registry._pending_parents.clear() def _make_seed_skill(skill_id: str = "seed-foo", description: str = "Use when foo. NOT for bar.") -> SkillGenome: @@ -431,8 +431,11 @@ def test_evolve_from_parent_registry_happy_path(client): assert "run_id" in payload assert payload["ws_url"] == f"/ws/evolve/{payload['run_id']}" # Parent was stashed for the engine to pick up - assert payload["run_id"] in PENDING_PARENTS - assert PENDING_PARENTS[payload["run_id"]].id == "seed-registry-1" + parent = _run_registry.take_parent(payload["run_id"]) + assert parent is not None + assert parent.id == "seed-registry-1" + # Restore so the autouse fixture's post-yield clear() stays idempotent + _run_registry.stash_parent(payload["run_id"], parent) # --------------------------------------------------------------------------- diff --git a/tests/test_uploads.py b/tests/test_uploads.py index a1bfab2..9e41673 100644 --- a/tests/test_uploads.py +++ b/tests/test_uploads.py @@ -33,7 +33,7 @@ from fastapi.testclient import TestClient from skillforge.api import uploads as uploads_module -from skillforge.engine.evolution import PENDING_PARENTS +from skillforge.engine.run_registry import registry as _run_registry from skillforge.main import app # --------------------------------------------------------------------------- @@ -48,12 +48,12 @@ def client() -> TestClient: @pytest.fixture(autouse=True) def _wipe_upload_state(): - """Per-test cleanup: in-memory upload cache + PENDING_PARENTS registry.""" + """Per-test cleanup: in-memory upload cache + RunRegistry pending parents.""" uploads_module._UPLOADS.clear() - PENDING_PARENTS.clear() + _run_registry._pending_parents.clear() yield uploads_module._UPLOADS.clear() - PENDING_PARENTS.clear() + _run_registry._pending_parents.clear() # A valid SKILL.md that passes validate_skill_structure: @@ -519,8 +519,9 @@ def test_upload_then_fork_round_trip(client): assert fork_resp.status_code == 200, fork_resp.text payload = fork_resp.json() new_run_id = payload["run_id"] - assert new_run_id in PENDING_PARENTS - parent = PENDING_PARENTS[new_run_id] + parent = _run_registry.take_parent(new_run_id) + assert parent is not None assert parent.skill_md_content == VALID_SKILL_MD + _run_registry.stash_parent(new_run_id, parent) # The upload should be cleared from the cache after the fork starts assert uploads_module.get_upload(upload_id) is None