ty13r · ty13r · Apr 19, 2026 · Apr 19, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -42,13 +42,23 @@ target-version = "py312"
 [tool.ruff.lint]
 # E/F: pycodestyle + pyflakes  I: isort  UP: pyupgrade  B: flake8-bugbear
 # SIM: flake8-simplify  LOG: flake8-logging  RET: flake8-return  PTH: flake8-use-pathlib
-# BLE + TRY (exception hygiene) are added in Wave 2 alongside the bare-except cleanup.
-select = ["E", "F", "I", "UP", "B", "SIM", "LOG", "RET", "PTH"]
+# BLE: no-bare-except  TRY: tryceratops (exception hygiene)
+select = ["E", "F", "I", "UP", "B", "SIM", "LOG", "RET", "PTH", "BLE", "TRY"]
 ignore = [
     "E501",   # line-too-long (handled by formatter preference)
     "RET504", # unnecessary assignment before return (noisy)
+    "TRY003", # long messages in raise — our domain errors benefit from prose
+    "TRY301", # abstract raise into own block — stylistic, rarely actionable
+    "TRY300", # consider-else-block after try — stylistic, not buggy
+    "TRY004", # prefer TypeError for isinstance failures — our LLM-parse
+              # results are genuine *value* errors (malformed payload), not
+              # programmer type errors. ValueError is the right semantic.
 ]
 
+# Tests may use broad excepts to assert "some error occurred" behavior.
+[tool.ruff.lint.extend-per-file-ignores]
+"tests/*.py" = ["BLE001"]
+
 # Seed data modules are bulk content, not hand-maintained logic.
 [tool.ruff.lint.per-file-ignores]
 "skillforge/seeds/batch*.py" = ["E402", "E741", "F401", "F811", "SIM", "B"]

diff --git a/skillforge/agents/_json.py b/skillforge/agents/_json.py
@@ -0,0 +1,99 @@
+"""Robust JSON-array extraction from LLM prose.
+
+Shared by agents that ask an LLM for a list of structured items and must
+tolerate responses wrapped in prose, code fences, nested backticks, or
+string values that contain ``[``/``]``. See ``docs/clean-code.md`` §1
+(reuse over duplication).
+"""
+
+from __future__ import annotations
+
+import json
+import re
+
+from skillforge.errors import ParseError
+
+_FENCE_RE = re.compile(r"```(?:json)?\s*\n?(.*)\n?```", re.DOTALL)
+
+
+def extract_json_array(text: str) -> list[dict]:
+    """Return the outermost JSON array embedded in ``text``.
+
+    Handles three response shapes:
+      1. Raw JSON array — the entire response is a ``[...]``.
+      2. Fenced block — response wrapped in ```` ```json ... ``` ```` fences.
+         Matched greedily so nested fences in string values don't split.
+      3. Array embedded in prose — extracted via bracket-depth scanning
+         that respects JSON string literal state (``[``/``]`` inside
+         string values don't perturb the depth counter).
+
+    Raises:
+        ParseError: no balanced JSON array could be located.
+    """
+    candidate = text.strip()
+
+    if candidate.startswith("[") and candidate.endswith("]"):
+        try:
+            parsed = json.loads(candidate)
+        except json.JSONDecodeError:
+            pass
+        else:
+            if isinstance(parsed, list):
+                return parsed
+
+    fence_match = _FENCE_RE.search(text)
+    if fence_match:
+        fenced = fence_match.group(1).strip()
+        try:
+            parsed = json.loads(fenced)
+        except json.JSONDecodeError:
+            text_to_scan = fenced
+        else:
+            if isinstance(parsed, list):
+                return parsed
+            text_to_scan = fenced
+    else:
+        text_to_scan = text
+
+    array_src = _scan_outermost_array(text_to_scan)
+    if array_src is not None:
+        try:
+            parsed = json.loads(array_src)
+        except json.JSONDecodeError:
+            pass
+        else:
+            if isinstance(parsed, list):
+                return parsed
+
+    raise ParseError("no valid JSON array found in response text")
+
+
+def _scan_outermost_array(text: str) -> str | None:
+    """Return the outermost balanced ``[...]`` substring, or ``None``."""
+    start = text.find("[")
+    if start == -1:
+        return None
+
+    depth = 0
+    in_string = False
+    escape = False
+    for i in range(start, len(text)):
+        ch = text[i]
+        if escape:
+            escape = False
+            continue
+        if ch == "\\":
+            escape = True
+            continue
+        if ch == '"':
+            in_string = not in_string
+            continue
+        if in_string:
+            continue
+        if ch == "[":
+            depth += 1
+        elif ch == "]":
+            depth -= 1
+            if depth == 0:
+                return text[start : i + 1]
+    return None
diff --git a/skillforge/agents/breeder.py b/skillforge/agents/breeder.py
@@ -20,6 +20,7 @@
 from __future__ import annotations
 
 import json
+import logging
 import re
 from datetime import UTC, datetime
 
@@ -35,6 +36,8 @@
 )
 from skillforge.models import Generation, SkillGenome
 
+logger = logging.getLogger("skillforge.agents.breeder")
+
 # ---------------------------------------------------------------------------
 # Slot allocation
 # ---------------------------------------------------------------------------
@@ -150,9 +153,9 @@ async def breed(
                 breeding_instructions=diagnostic_instructions,
             )
             next_gen.extend(diagnostic_children[: slots["diagnostic"]])
-        except Exception as exc:  # noqa: BLE001
-            # Fall through — wildcard slots below absorb the shortfall
-            print(f"breeder: diagnostic mutation failed: {exc}")
+        except Exception:  # noqa: BLE001 — subagent boundary: one slot failure must not kill the whole breed
+            # Fall through — wildcard slots below absorb the shortfall.
+            logger.exception("breeder.diagnostic_failed")
 
     # --- Reflective crossover: combine 2-3 Pareto-optimal parents ---
     pareto_parents = [s for s in ranked if s.is_pareto_optimal][:3]
@@ -171,8 +174,8 @@ async def breed(
                 breeding_instructions=crossover_instructions,
             )
             next_gen.extend(crossover_children[: slots["crossover"]])
-        except Exception as exc:  # noqa: BLE001
-            print(f"breeder: crossover failed: {exc}")
+        except Exception:  # noqa: BLE001 — subagent boundary: one slot failure must not kill the whole breed
+            logger.exception("breeder.crossover_failed")
 
     # --- Wildcard: fresh Skills via spawn_gen0 ---
     if slots["wildcards"] > 0:
@@ -182,14 +185,14 @@ async def breed(
                 pop_size=slots["wildcards"],
             )
             # Mark wildcards as mutations on the next generation
-            next_gen_num = (generation.number + 1)
+            next_gen_num = generation.number + 1
             for w in wildcards:
                 w.generation = next_gen_num
                 w.mutations = ["wildcard"]
                 w.mutation_rationale = "Wildcard slot: fresh spawn to prevent convergence"
             next_gen.extend(wildcards)
-        except Exception as exc:  # noqa: BLE001
-            print(f"breeder: wildcard spawn failed: {exc}")
+        except Exception:  # noqa: BLE001 — subagent boundary: one slot failure must not kill the whole breed
+            logger.exception("breeder.wildcard_spawn_failed")
 
     # --- Trim or pad to exactly target_pop_size ---
     next_gen = next_gen[:target_pop_size]
@@ -413,8 +416,12 @@ async def _extract_lessons(context: str, learning_log: list[str]) -> list[str]:
             max_tokens=500,
             messages=[{"role": "user", "content": prompt}],
         )
-    except Exception as exc:  # noqa: BLE001
-        return [f"(lesson extraction failed: {exc})"]
+    except Exception:
+        # Degrade gracefully — a breeder that blocks on LLM hiccups would
+        # stall the whole run. The SDK has many concrete error types across
+        # versions; catching at the boundary keeps the engine moving.
+        logger.exception("breeder.lesson_extraction_failed")
+        return ["(lesson extraction failed)"]
 
     match = re.search(r"\[.*\]", text, re.DOTALL)
     if not match:
@@ -452,8 +459,10 @@ async def _extract_breeding_report(
             max_tokens=800,
             messages=[{"role": "user", "content": prompt}],
         )
-    except Exception as exc:  # noqa: BLE001
-        return f"(breeding report failed: {exc})"
+    except Exception:
+        # Degrade gracefully — see _extract_lessons for rationale.
+        logger.exception("breeder.report_extraction_failed")
+        return "(breeding report failed)"
 
 
 async def _extract_consolidated(
@@ -486,8 +495,10 @@ async def _extract_consolidated(
             max_tokens=1200,
             messages=[{"role": "user", "content": prompt}],
         )
-    except Exception as exc:  # noqa: BLE001
-        return ([f"(consolidated extraction failed: {exc})"], "")
+    except Exception:
+        # Degrade gracefully — see _extract_lessons for rationale.
+        logger.exception("breeder.consolidated_extraction_failed")
+        return (["(consolidated extraction failed)"], "")
 
     match = re.search(r"\{.*\}", text, re.DOTALL)
     if not match:
@@ -526,8 +537,8 @@ def publish_findings_to_bible(
     findings_dir = BIBLE_DIR / "findings"
     try:
         findings_dir.mkdir(parents=True, exist_ok=True)
-    except OSError as exc:
-        print(f"bible: failed to create findings dir: {exc}")
+    except OSError:
+        logger.exception("bible.findings_dir_mkdir_failed")
         return
 
     # Determine the next finding number by scanning existing files
@@ -556,8 +567,8 @@ def publish_findings_to_bible(
         )
         try:
             (findings_dir / filename).write_text(content)
-        except OSError as exc:
-            print(f"bible: failed to write finding {filename}: {exc}")
+        except OSError:
+            logger.exception("bible.finding_write_failed", extra={"filename": filename})
             continue
         next_num += 1
 
@@ -570,8 +581,8 @@ def publish_findings_to_bible(
             existing = "# Evolution Log\n\n*Chronological log of all SkillForge evolution runs.*\n\n"
         entry_line = f"- **{timestamp}** — run `{run_id[:8]}` gen {generation}: {len(new_entries)} new finding(s)\n"
         log_path.write_text(existing + entry_line)
-    except OSError as exc:
-        print(f"bible: failed to update evolution log: {exc}")
+    except OSError:
+        logger.exception("bible.evolution_log_write_failed")
 
 
 def _slugify(text: str) -> str:

diff --git a/skillforge/agents/challenge_designer.py b/skillforge/agents/challenge_designer.py
@@ -13,13 +13,13 @@
 
 from __future__ import annotations
 
-import json
-import re
 import uuid
 
 from anthropic import AsyncAnthropic
 
+from skillforge.agents._json import extract_json_array
 from skillforge.config import ANTHROPIC_API_KEY, model_for
+from skillforge.errors import ParseError
 from skillforge.models import Challenge
 
 # JSON schema description embedded in prompts
@@ -35,90 +35,6 @@
 ]"""
 
 
-def _extract_json_array(text: str) -> list[dict]:
-    """Extract a JSON array from text.
-
-    Robust against:
-      1. Raw JSON array (ideal case)
-      2. ``` json ... ``` fences with nested backticks in string values
-      3. JSON embedded in prose with `[`/`]` characters in string literals
-
-    Raises:
-        ValueError: if no valid JSON array can be extracted.
-    """
-    candidate = text.strip()
-
-    # 1. Try the whole text as JSON
-    if candidate.startswith("[") and candidate.endswith("]"):
-        try:
-            result = json.loads(candidate)
-            if isinstance(result, list):
-                return result
-        except json.JSONDecodeError:
-            pass
-
-    # 2. Strip outer ```json ... ``` fence greedily
-    fence_match = re.search(r"```(?:json)?\s*\n?(.*)\n?```", text, re.DOTALL)
-    if fence_match:
-        fenced = fence_match.group(1).strip()
-        try:
-            result = json.loads(fenced)
-            if isinstance(result, list):
-                return result
-        except json.JSONDecodeError:
-            text_to_scan = fenced
-        else:
-            text_to_scan = fenced
-    else:
-        text_to_scan = text
-
-    # 3. Bracket-depth scan respecting string literal state
-    array = _scan_outermost_array(text_to_scan)
-    if array is not None:
-        try:
-            result = json.loads(array)
-            if isinstance(result, list):
-                return result
-        except json.JSONDecodeError:
-            pass
-
-    raise ValueError("No valid JSON array found in response text")
-
-
-def _scan_outermost_array(text: str) -> str | None:
-    """Find the outermost JSON array via bracket-depth scanning that
-    respects string literal state. Returns substring including ``[`` and
-    ``]``, or ``None`` if no balanced array found.
-    """
-    start = text.find("[")
-    if start == -1:
-        return None
-
-    depth = 0
-    in_string = False
-    escape = False
-    for i in range(start, len(text)):
-        ch = text[i]
-        if escape:
-            escape = False
-            continue
-        if ch == "\\":
-            escape = True
-            continue
-        if ch == '"':
-            in_string = not in_string
-            continue
-        if in_string:
-            continue
-        if ch == "[":
-            depth += 1
-        elif ch == "]":
-            depth -= 1
-            if depth == 0:
-                return text[start : i + 1]
-    return None
-
-
 _FILE_CONVENTION = """\
 ## File convention (STRICT — follow exactly)
 
@@ -261,14 +177,14 @@ async def design_challenges(specialization: str, n: int = 3) -> list[Challenge]:
     text = await _generate(_build_system_prompt(specialization, n))
 
     try:
-        raw = _extract_json_array(text)
-    except ValueError:
+        raw = extract_json_array(text)
+    except (ValueError, ParseError):
         # Attempt 2 — retry with more explicit prompt
         text = await _generate(_build_retry_prompt(specialization, n))
         try:
-            raw = _extract_json_array(text)
-        except ValueError as err:
-            raise ValueError(
+            raw = extract_json_array(text)
+        except (ValueError, ParseError) as err:
+            raise ParseError(
                 "challenge designer failed to produce valid JSON after 2 attempts"
             ) from err
 
@@ -331,10 +247,10 @@ async def design_variant_challenge(
     prompt = _build_variant_system_prompt(specialization, dimension)
     text = await _generate(prompt)
     try:
-        raw = _extract_json_array(text)
-    except ValueError:
+        raw = extract_json_array(text)
+    except (ValueError, ParseError):
         text = await _generate(_build_retry_prompt(specialization, n=1))
-        raw = _extract_json_array(text)
+        raw = extract_json_array(text)
 
     challenges = _parse_challenges(raw)
     if len(challenges) != 1: