ty13r · ty13r · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/skillforge/agents/breeder.py b/skillforge/agents/breeder.py
diff --git a/skillforge/agents/breeder/__init__.py b/skillforge/agents/breeder/__init__.py
@@ -0,0 +1,69 @@
+"""Breeder — reflective mutation, multi-parent crossover, learning log, bible publishing.
+
+Inspired by GEPA's Actionable Side Information: mutations are diagnostic,
+not random. The Breeder reads execution traces and trait attribution
+from the judging pipeline, identifies root causes of failures, and
+proposes targeted fixes.
+
+Responsibilities:
+- Elitism: top N Skills survive unchanged
+- Reflective crossover: combine traits from 2-3 parents guided by attribution
+- Diagnostic mutation: fix specific causes surfaced by trait attribution
+- Joint component mutation: frontmatter + body + scripts mutate together
+- Wildcard: 1+ slots per generation for fresh Skills
+- Learning log maintenance: append new lessons each generation
+- Bible publishing: extract generalizable findings to ``bible/findings/``
+
+Slot allocation scales with ``target_pop_size`` (never hardcoded; see
+``_ranking.compute_slots`` for the formula).
+
+Submodule layout:
+
+  _ranking.py   compute_slots + rank_skills + _aggregate_fitness (pure)
+  _prompts.py   _build_diagnostic_instructions + _build_crossover_instructions
+                + _build_breeding_context (pure string-templating)
+  _reports.py   _extract_lessons_and_report + _extract_lessons
+                + _extract_breeding_report + _extract_consolidated
+                (LLM-calling; degrades gracefully on SDK errors)
+  main.py       breed() + _carry_elite (top-level orchestrator)
+  bible.py      publish_findings_to_bible (disk I/O, fire-and-forget)
+"""
+
+from __future__ import annotations
+
+# Re-expose imports the old breeder.py module aliased so test patches
+# targeting ``skillforge.agents.breeder.breed_next_gen`` and
+# ``skillforge.agents.breeder.BIBLE_DIR`` continue to resolve.
+from skillforge.agents.breeder._ranking import (
+    _aggregate_fitness,
+    compute_slots,
+    rank_skills,
+)
+from skillforge.agents.breeder._reports import (
+    _extract_breeding_report,
+    _extract_consolidated,
+    _extract_lessons,
+    _extract_lessons_and_report,
+)
+from skillforge.agents.breeder.bible import publish_findings_to_bible
+from skillforge.agents.breeder.main import _carry_elite, breed
+from skillforge.agents.spawner import breed_next_gen, spawn_gen0
+from skillforge.config import BIBLE_DIR
+
+__all__ = [
+    "breed",
+    "compute_slots",
+    "rank_skills",
+    "publish_findings_to_bible",
+    # Re-exports for test-patch stability.
+    "breed_next_gen",
+    "spawn_gen0",
+    "BIBLE_DIR",
+    # Private helpers re-exported for test access.
+    "_aggregate_fitness",
+    "_carry_elite",
+    "_extract_lessons_and_report",
+    "_extract_lessons",
+    "_extract_breeding_report",
+    "_extract_consolidated",
+]
diff --git a/skillforge/agents/breeder/_prompts.py b/skillforge/agents/breeder/_prompts.py
@@ -0,0 +1,89 @@
+"""Breeding-instruction prompt builders.
+
+Pure string-templating functions — no LLM calls, no I/O. The actual
+breeding happens in ``main.breed()`` which feeds these prompts to the
+Spawner.
+"""
+
+from __future__ import annotations
+
+from skillforge.agents.breeder._ranking import _aggregate_fitness
+from skillforge.models import SkillGenome
+
+
+def _build_diagnostic_instructions(
+    low_scorers: list[SkillGenome],
+    learning_log: list[str],
+    n_children: int,
+) -> str:
+    """Build breeding instructions for diagnostic mutation of low scorers."""
+    if not low_scorers or n_children == 0:
+        return ""
+
+    diagnoses = []
+    for skill in low_scorers:
+        worst_traits = sorted(
+            skill.trait_attribution.items(),
+            key=lambda kv: kv[1],
+        )[:3]
+        trait_notes = "\n".join(
+            f"    - {t}: contribution {c:.2f} — {skill.trait_diagnostics.get(t, 'no diagnosis')}"
+            for t, c in worst_traits
+        )
+        diagnoses.append(
+            f"  Skill {skill.id[:8]}:\n"
+            f"    aggregate fitness: {_aggregate_fitness(skill):.2f}\n"
+            f"    worst traits:\n{trait_notes}"
+        )
+
+    log_section = "\n".join(f"  - {entry}" for entry in learning_log[-10:])
+
+    return (
+        f"Produce exactly {n_children} child Skill(s) by DIAGNOSTIC MUTATION of the "
+        "low-scoring parent(s) below. For each child, identify the root cause of "
+        "the parent's low fitness (from the trait diagnostics), and make a TARGETED "
+        "fix — rewrite or remove the underperforming instructions, tighten vague "
+        "phrasing, add concrete examples for ignored rules, or rescope the trait.\n\n"
+        "Do NOT make random changes. Every mutation must cite a specific parent "
+        "trait and explain (in mutation_rationale) how the child addresses it.\n\n"
+        f"Low-scoring parents:\n{chr(10).join(diagnoses)}\n\n"
+        f"Recent lessons (learning log):\n{log_section or '  (none yet)'}"
+    )
+
+
+def _build_crossover_instructions(
+    parents: list[SkillGenome],
+    learning_log: list[str],
+    n_children: int,
+) -> str:
+    """Build instructions for reflective crossover across 2-3 parents."""
+    if not parents or n_children == 0:
+        return ""
+
+    parent_notes = []
+    for p in parents:
+        best_traits = sorted(
+            p.trait_attribution.items(),
+            key=lambda kv: kv[1],
+            reverse=True,
+        )[:3]
+        trait_summary = ", ".join(f"{t}:{c:+.2f}" for t, c in best_traits) or "(no attribution)"
+        parent_notes.append(
+            f"  Parent {p.id[:8]} (fitness {_aggregate_fitness(p):.2f}): "
+            f"best traits → {trait_summary}"
+        )
+
+    log_section = "\n".join(f"  - {entry}" for entry in learning_log[-10:])
+
+    return (
+        f"Produce exactly {n_children} child Skill(s) by REFLECTIVE CROSSOVER of the "
+        f"Pareto-optimal parents below. Combine the HIGH-CONTRIBUTING traits from "
+        "each parent into each child, preserving the causal mechanism that made "
+        "each trait successful (not just the surface phrasing).\n\n"
+        "Crossover is NOT concatenation. For each child, explain (in mutation_rationale) "
+        "which traits from which parents were combined and WHY those particular "
+        "traits work together.\n\n"
+        f"Pareto-optimal parents:\n{chr(10).join(parent_notes)}\n\n"
+        f"Recent lessons (learning log):\n{log_section or '  (none yet)'}"
+    )
+
diff --git a/skillforge/agents/breeder/_ranking.py b/skillforge/agents/breeder/_ranking.py
@@ -0,0 +1,79 @@
+"""Pure ranking helpers — slot allocation + fitness aggregation + sorting.
+
+No I/O, no LLM calls. Used by the main ``breed()`` orchestrator and by
+``_build_breeding_context`` when it needs to format a ranked list.
+"""
+
+from __future__ import annotations
+
+from skillforge.models import Generation, SkillGenome
+
+
+def compute_slots(target_pop_size: int) -> dict[str, int]:
+    """Allocate breeding slots as a function of ``target_pop_size``.
+
+    Formula (from PLAN.md §Step 6e Breeder):
+
+        elitism    = max(1, target_pop_size // 5 * 2)   ~40% floor 1
+        wildcards  = max(1, target_pop_size // 10)      ~10% floor 1
+        remainder  = target_pop_size - elitism - wildcards
+        diagnostic = remainder // 2
+        crossover  = remainder - diagnostic
+
+    Worked examples:
+        pop_size=3  → elitism=1, wildcards=1, diagnostic=0, crossover=1 (sum 3)
+        pop_size=5  → elitism=2, wildcards=1, diagnostic=1, crossover=1 (sum 5)
+        pop_size=10 → elitism=4, wildcards=1, diagnostic=2, crossover=3 (sum 10)
+    """
+    if target_pop_size < 1:
+        raise ValueError(f"target_pop_size must be >=1, got {target_pop_size}")
+
+    elitism = max(1, (target_pop_size // 5) * 2)
+    wildcards = max(1, target_pop_size // 10)
+
+    # Ensure elitism + wildcards doesn't exceed target (pathological tiny sizes)
+    if elitism + wildcards > target_pop_size:
+        elitism = max(1, target_pop_size - 1)
+        wildcards = max(0, target_pop_size - elitism)
+
+    remainder = target_pop_size - elitism - wildcards
+    diagnostic = remainder // 2
+    crossover = remainder - diagnostic
+
+    slots = {
+        "elitism": elitism,
+        "wildcards": wildcards,
+        "diagnostic": diagnostic,
+        "crossover": crossover,
+    }
+    assert sum(slots.values()) == target_pop_size, (
+        f"slot sum {sum(slots.values())} != target {target_pop_size}: {slots}"
+    )
+    return slots
+
+
+# ---------------------------------------------------------------------------
+# Ranking
+# ---------------------------------------------------------------------------
+
+
+def _aggregate_fitness(skill: SkillGenome) -> float:
+    """Scalar aggregate of Pareto objectives for ranking (charts/selection).
+
+    The Pareto front is the real answer; this scalar is a summary for
+    ordering within the front (and for ranking Skills OFF the front).
+    """
+    if not skill.pareto_objectives:
+        return 0.0
+    return sum(skill.pareto_objectives.values()) / len(skill.pareto_objectives)
+
+
+def rank_skills(generation: Generation) -> list[SkillGenome]:
+    """Return generation.skills sorted by (is_pareto_optimal desc, fitness desc)."""
+    return sorted(
+        generation.skills,
+        key=lambda s: (s.is_pareto_optimal, _aggregate_fitness(s)),
+        reverse=True,
+    )
+
+