Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
629 changes: 0 additions & 629 deletions skillforge/agents/breeder.py

This file was deleted.

69 changes: 69 additions & 0 deletions skillforge/agents/breeder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Breeder — reflective mutation, multi-parent crossover, learning log, bible publishing.

Inspired by GEPA's Actionable Side Information: mutations are diagnostic,
not random. The Breeder reads execution traces and trait attribution
from the judging pipeline, identifies root causes of failures, and
proposes targeted fixes.

Responsibilities:
- Elitism: top N Skills survive unchanged
- Reflective crossover: combine traits from 2-3 parents guided by attribution
- Diagnostic mutation: fix specific causes surfaced by trait attribution
- Joint component mutation: frontmatter + body + scripts mutate together
- Wildcard: 1+ slots per generation for fresh Skills
- Learning log maintenance: append new lessons each generation
- Bible publishing: extract generalizable findings to ``bible/findings/``

Slot allocation scales with ``target_pop_size`` (never hardcoded; see
``_ranking.compute_slots`` for the formula).

Submodule layout:

_ranking.py compute_slots + rank_skills + _aggregate_fitness (pure)
_prompts.py _build_diagnostic_instructions + _build_crossover_instructions
+ _build_breeding_context (pure string-templating)
_reports.py _extract_lessons_and_report + _extract_lessons
+ _extract_breeding_report + _extract_consolidated
(LLM-calling; degrades gracefully on SDK errors)
main.py breed() + _carry_elite (top-level orchestrator)
bible.py publish_findings_to_bible (disk I/O, fire-and-forget)
"""

from __future__ import annotations

# Re-expose imports the old breeder.py module aliased so test patches
# targeting ``skillforge.agents.breeder.breed_next_gen`` and
# ``skillforge.agents.breeder.BIBLE_DIR`` continue to resolve.
from skillforge.agents.breeder._ranking import (
_aggregate_fitness,
compute_slots,
rank_skills,
)
from skillforge.agents.breeder._reports import (
_extract_breeding_report,
_extract_consolidated,
_extract_lessons,
_extract_lessons_and_report,
)
from skillforge.agents.breeder.bible import publish_findings_to_bible
from skillforge.agents.breeder.main import _carry_elite, breed
from skillforge.agents.spawner import breed_next_gen, spawn_gen0
from skillforge.config import BIBLE_DIR

__all__ = [
"breed",
"compute_slots",
"rank_skills",
"publish_findings_to_bible",
# Re-exports for test-patch stability.
"breed_next_gen",
"spawn_gen0",
"BIBLE_DIR",
# Private helpers re-exported for test access.
"_aggregate_fitness",
"_carry_elite",
"_extract_lessons_and_report",
"_extract_lessons",
"_extract_breeding_report",
"_extract_consolidated",
]
89 changes: 89 additions & 0 deletions skillforge/agents/breeder/_prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Breeding-instruction prompt builders.

Pure string-templating functions — no LLM calls, no I/O. The actual
breeding happens in ``main.breed()`` which feeds these prompts to the
Spawner.
"""

from __future__ import annotations

from skillforge.agents.breeder._ranking import _aggregate_fitness
from skillforge.models import SkillGenome


def _build_diagnostic_instructions(
low_scorers: list[SkillGenome],
learning_log: list[str],
n_children: int,
) -> str:
"""Build breeding instructions for diagnostic mutation of low scorers."""
if not low_scorers or n_children == 0:
return ""

diagnoses = []
for skill in low_scorers:
worst_traits = sorted(
skill.trait_attribution.items(),
key=lambda kv: kv[1],
)[:3]
trait_notes = "\n".join(
f" - {t}: contribution {c:.2f} — {skill.trait_diagnostics.get(t, 'no diagnosis')}"
for t, c in worst_traits
)
diagnoses.append(
f" Skill {skill.id[:8]}:\n"
f" aggregate fitness: {_aggregate_fitness(skill):.2f}\n"
f" worst traits:\n{trait_notes}"
)

log_section = "\n".join(f" - {entry}" for entry in learning_log[-10:])

return (
f"Produce exactly {n_children} child Skill(s) by DIAGNOSTIC MUTATION of the "
"low-scoring parent(s) below. For each child, identify the root cause of "
"the parent's low fitness (from the trait diagnostics), and make a TARGETED "
"fix — rewrite or remove the underperforming instructions, tighten vague "
"phrasing, add concrete examples for ignored rules, or rescope the trait.\n\n"
"Do NOT make random changes. Every mutation must cite a specific parent "
"trait and explain (in mutation_rationale) how the child addresses it.\n\n"
f"Low-scoring parents:\n{chr(10).join(diagnoses)}\n\n"
f"Recent lessons (learning log):\n{log_section or ' (none yet)'}"
)


def _build_crossover_instructions(
parents: list[SkillGenome],
learning_log: list[str],
n_children: int,
) -> str:
"""Build instructions for reflective crossover across 2-3 parents."""
if not parents or n_children == 0:
return ""

parent_notes = []
for p in parents:
best_traits = sorted(
p.trait_attribution.items(),
key=lambda kv: kv[1],
reverse=True,
)[:3]
trait_summary = ", ".join(f"{t}:{c:+.2f}" for t, c in best_traits) or "(no attribution)"
parent_notes.append(
f" Parent {p.id[:8]} (fitness {_aggregate_fitness(p):.2f}): "
f"best traits → {trait_summary}"
)

log_section = "\n".join(f" - {entry}" for entry in learning_log[-10:])

return (
f"Produce exactly {n_children} child Skill(s) by REFLECTIVE CROSSOVER of the "
f"Pareto-optimal parents below. Combine the HIGH-CONTRIBUTING traits from "
"each parent into each child, preserving the causal mechanism that made "
"each trait successful (not just the surface phrasing).\n\n"
"Crossover is NOT concatenation. For each child, explain (in mutation_rationale) "
"which traits from which parents were combined and WHY those particular "
"traits work together.\n\n"
f"Pareto-optimal parents:\n{chr(10).join(parent_notes)}\n\n"
f"Recent lessons (learning log):\n{log_section or ' (none yet)'}"
)

79 changes: 79 additions & 0 deletions skillforge/agents/breeder/_ranking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Pure ranking helpers — slot allocation + fitness aggregation + sorting.

No I/O, no LLM calls. Used by the main ``breed()`` orchestrator and by
``_build_breeding_context`` when it needs to format a ranked list.
"""

from __future__ import annotations

from skillforge.models import Generation, SkillGenome


def compute_slots(target_pop_size: int) -> dict[str, int]:
"""Allocate breeding slots as a function of ``target_pop_size``.

Formula (from PLAN.md §Step 6e Breeder):

elitism = max(1, target_pop_size // 5 * 2) ~40% floor 1
wildcards = max(1, target_pop_size // 10) ~10% floor 1
remainder = target_pop_size - elitism - wildcards
diagnostic = remainder // 2
crossover = remainder - diagnostic

Worked examples:
pop_size=3 → elitism=1, wildcards=1, diagnostic=0, crossover=1 (sum 3)
pop_size=5 → elitism=2, wildcards=1, diagnostic=1, crossover=1 (sum 5)
pop_size=10 → elitism=4, wildcards=1, diagnostic=2, crossover=3 (sum 10)
"""
if target_pop_size < 1:
raise ValueError(f"target_pop_size must be >=1, got {target_pop_size}")

elitism = max(1, (target_pop_size // 5) * 2)
wildcards = max(1, target_pop_size // 10)

# Ensure elitism + wildcards doesn't exceed target (pathological tiny sizes)
if elitism + wildcards > target_pop_size:
elitism = max(1, target_pop_size - 1)
wildcards = max(0, target_pop_size - elitism)

remainder = target_pop_size - elitism - wildcards
diagnostic = remainder // 2
crossover = remainder - diagnostic

slots = {
"elitism": elitism,
"wildcards": wildcards,
"diagnostic": diagnostic,
"crossover": crossover,
}
assert sum(slots.values()) == target_pop_size, (
f"slot sum {sum(slots.values())} != target {target_pop_size}: {slots}"
)
return slots


# ---------------------------------------------------------------------------
# Ranking
# ---------------------------------------------------------------------------


def _aggregate_fitness(skill: SkillGenome) -> float:
"""Scalar aggregate of Pareto objectives for ranking (charts/selection).

The Pareto front is the real answer; this scalar is a summary for
ordering within the front (and for ranking Skills OFF the front).
"""
if not skill.pareto_objectives:
return 0.0
return sum(skill.pareto_objectives.values()) / len(skill.pareto_objectives)


def rank_skills(generation: Generation) -> list[SkillGenome]:
"""Return generation.skills sorted by (is_pareto_optimal desc, fitness desc)."""
return sorted(
generation.skills,
key=lambda s: (s.is_pareto_optimal, _aggregate_fitness(s)),
reverse=True,
)


Loading
Loading