Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,23 @@ target-version = "py312"
[tool.ruff.lint]
# E/F: pycodestyle + pyflakes I: isort UP: pyupgrade B: flake8-bugbear
# SIM: flake8-simplify LOG: flake8-logging RET: flake8-return PTH: flake8-use-pathlib
# BLE + TRY (exception hygiene) are added in Wave 2 alongside the bare-except cleanup.
select = ["E", "F", "I", "UP", "B", "SIM", "LOG", "RET", "PTH"]
# BLE: no-bare-except TRY: tryceratops (exception hygiene)
select = ["E", "F", "I", "UP", "B", "SIM", "LOG", "RET", "PTH", "BLE", "TRY"]
ignore = [
"E501", # line-too-long (handled by formatter preference)
"RET504", # unnecessary assignment before return (noisy)
"TRY003", # long messages in raise — our domain errors benefit from prose
"TRY301", # abstract raise into own block — stylistic, rarely actionable
"TRY300", # consider-else-block after try — stylistic, not buggy
"TRY004", # prefer TypeError for isinstance failures — our LLM-parse
# results are genuine *value* errors (malformed payload), not
# programmer type errors. ValueError is the right semantic.
]

# Tests may use broad excepts to assert "some error occurred" behavior.
[tool.ruff.lint.extend-per-file-ignores]
"tests/*.py" = ["BLE001"]

# Seed data modules are bulk content, not hand-maintained logic.
[tool.ruff.lint.per-file-ignores]
"skillforge/seeds/batch*.py" = ["E402", "E741", "F401", "F811", "SIM", "B"]
Expand Down
99 changes: 99 additions & 0 deletions skillforge/agents/_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Robust JSON-array extraction from LLM prose.

Shared by agents that ask an LLM for a list of structured items and must
tolerate responses wrapped in prose, code fences, nested backticks, or
string values that contain ``[``/``]``. See ``docs/clean-code.md`` §1
(reuse over duplication).
"""

from __future__ import annotations

import json
import re

from skillforge.errors import ParseError

_FENCE_RE = re.compile(r"```(?:json)?\s*\n?(.*)\n?```", re.DOTALL)


def extract_json_array(text: str) -> list[dict]:
"""Return the outermost JSON array embedded in ``text``.

Handles three response shapes:
1. Raw JSON array — the entire response is a ``[...]``.
2. Fenced block — response wrapped in ```` ```json ... ``` ```` fences.
Matched greedily so nested fences in string values don't split.
3. Array embedded in prose — extracted via bracket-depth scanning
that respects JSON string literal state (``[``/``]`` inside
string values don't perturb the depth counter).

Raises:
ParseError: no balanced JSON array could be located.
"""
candidate = text.strip()

if candidate.startswith("[") and candidate.endswith("]"):
try:
parsed = json.loads(candidate)
except json.JSONDecodeError:
pass
else:
if isinstance(parsed, list):
return parsed

fence_match = _FENCE_RE.search(text)
if fence_match:
fenced = fence_match.group(1).strip()
try:
parsed = json.loads(fenced)
except json.JSONDecodeError:
text_to_scan = fenced
else:
if isinstance(parsed, list):
return parsed
text_to_scan = fenced
else:
text_to_scan = text

array_src = _scan_outermost_array(text_to_scan)
if array_src is not None:
try:
parsed = json.loads(array_src)
except json.JSONDecodeError:
pass
else:
if isinstance(parsed, list):
return parsed

raise ParseError("no valid JSON array found in response text")


def _scan_outermost_array(text: str) -> str | None:
"""Return the outermost balanced ``[...]`` substring, or ``None``."""
start = text.find("[")
if start == -1:
return None

depth = 0
in_string = False
escape = False
for i in range(start, len(text)):
ch = text[i]
if escape:
escape = False
continue
if ch == "\\":
escape = True
continue
if ch == '"':
in_string = not in_string
continue
if in_string:
continue
if ch == "[":
depth += 1
elif ch == "]":
depth -= 1
if depth == 0:
return text[start : i + 1]
return None
51 changes: 31 additions & 20 deletions skillforge/agents/breeder.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from __future__ import annotations

import json
import logging
import re
from datetime import UTC, datetime

Expand All @@ -35,6 +36,8 @@
)
from skillforge.models import Generation, SkillGenome

logger = logging.getLogger("skillforge.agents.breeder")

# ---------------------------------------------------------------------------
# Slot allocation
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -150,9 +153,9 @@ async def breed(
breeding_instructions=diagnostic_instructions,
)
next_gen.extend(diagnostic_children[: slots["diagnostic"]])
except Exception as exc: # noqa: BLE001
# Fall through — wildcard slots below absorb the shortfall
print(f"breeder: diagnostic mutation failed: {exc}")
except Exception: # noqa: BLE001 — subagent boundary: one slot failure must not kill the whole breed
# Fall through — wildcard slots below absorb the shortfall.
logger.exception("breeder.diagnostic_failed")

# --- Reflective crossover: combine 2-3 Pareto-optimal parents ---
pareto_parents = [s for s in ranked if s.is_pareto_optimal][:3]
Expand All @@ -171,8 +174,8 @@ async def breed(
breeding_instructions=crossover_instructions,
)
next_gen.extend(crossover_children[: slots["crossover"]])
except Exception as exc: # noqa: BLE001
print(f"breeder: crossover failed: {exc}")
except Exception: # noqa: BLE001 — subagent boundary: one slot failure must not kill the whole breed
logger.exception("breeder.crossover_failed")

# --- Wildcard: fresh Skills via spawn_gen0 ---
if slots["wildcards"] > 0:
Expand All @@ -182,14 +185,14 @@ async def breed(
pop_size=slots["wildcards"],
)
# Mark wildcards as mutations on the next generation
next_gen_num = (generation.number + 1)
next_gen_num = generation.number + 1
for w in wildcards:
w.generation = next_gen_num
w.mutations = ["wildcard"]
w.mutation_rationale = "Wildcard slot: fresh spawn to prevent convergence"
next_gen.extend(wildcards)
except Exception as exc: # noqa: BLE001
print(f"breeder: wildcard spawn failed: {exc}")
except Exception: # noqa: BLE001 — subagent boundary: one slot failure must not kill the whole breed
logger.exception("breeder.wildcard_spawn_failed")

# --- Trim or pad to exactly target_pop_size ---
next_gen = next_gen[:target_pop_size]
Expand Down Expand Up @@ -413,8 +416,12 @@ async def _extract_lessons(context: str, learning_log: list[str]) -> list[str]:
max_tokens=500,
messages=[{"role": "user", "content": prompt}],
)
except Exception as exc: # noqa: BLE001
return [f"(lesson extraction failed: {exc})"]
except Exception:
# Degrade gracefully — a breeder that blocks on LLM hiccups would
# stall the whole run. The SDK has many concrete error types across
# versions; catching at the boundary keeps the engine moving.
logger.exception("breeder.lesson_extraction_failed")
return ["(lesson extraction failed)"]

match = re.search(r"\[.*\]", text, re.DOTALL)
if not match:
Expand Down Expand Up @@ -452,8 +459,10 @@ async def _extract_breeding_report(
max_tokens=800,
messages=[{"role": "user", "content": prompt}],
)
except Exception as exc: # noqa: BLE001
return f"(breeding report failed: {exc})"
except Exception:
# Degrade gracefully — see _extract_lessons for rationale.
logger.exception("breeder.report_extraction_failed")
return "(breeding report failed)"


async def _extract_consolidated(
Expand Down Expand Up @@ -486,8 +495,10 @@ async def _extract_consolidated(
max_tokens=1200,
messages=[{"role": "user", "content": prompt}],
)
except Exception as exc: # noqa: BLE001
return ([f"(consolidated extraction failed: {exc})"], "")
except Exception:
# Degrade gracefully — see _extract_lessons for rationale.
logger.exception("breeder.consolidated_extraction_failed")
return (["(consolidated extraction failed)"], "")

match = re.search(r"\{.*\}", text, re.DOTALL)
if not match:
Expand Down Expand Up @@ -526,8 +537,8 @@ def publish_findings_to_bible(
findings_dir = BIBLE_DIR / "findings"
try:
findings_dir.mkdir(parents=True, exist_ok=True)
except OSError as exc:
print(f"bible: failed to create findings dir: {exc}")
except OSError:
logger.exception("bible.findings_dir_mkdir_failed")
return

# Determine the next finding number by scanning existing files
Expand Down Expand Up @@ -556,8 +567,8 @@ def publish_findings_to_bible(
)
try:
(findings_dir / filename).write_text(content)
except OSError as exc:
print(f"bible: failed to write finding {filename}: {exc}")
except OSError:
logger.exception("bible.finding_write_failed", extra={"filename": filename})
continue
next_num += 1

Expand All @@ -570,8 +581,8 @@ def publish_findings_to_bible(
existing = "# Evolution Log\n\n*Chronological log of all SkillForge evolution runs.*\n\n"
entry_line = f"- **{timestamp}** — run `{run_id[:8]}` gen {generation}: {len(new_entries)} new finding(s)\n"
log_path.write_text(existing + entry_line)
except OSError as exc:
print(f"bible: failed to update evolution log: {exc}")
except OSError:
logger.exception("bible.evolution_log_write_failed")


def _slugify(text: str) -> str:
Expand Down
104 changes: 10 additions & 94 deletions skillforge/agents/challenge_designer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@

from __future__ import annotations

import json
import re
import uuid

from anthropic import AsyncAnthropic

from skillforge.agents._json import extract_json_array
from skillforge.config import ANTHROPIC_API_KEY, model_for
from skillforge.errors import ParseError
from skillforge.models import Challenge

# JSON schema description embedded in prompts
Expand All @@ -35,90 +35,6 @@
]"""


def _extract_json_array(text: str) -> list[dict]:
"""Extract a JSON array from text.

Robust against:
1. Raw JSON array (ideal case)
2. ``` json ... ``` fences with nested backticks in string values
3. JSON embedded in prose with `[`/`]` characters in string literals

Raises:
ValueError: if no valid JSON array can be extracted.
"""
candidate = text.strip()

# 1. Try the whole text as JSON
if candidate.startswith("[") and candidate.endswith("]"):
try:
result = json.loads(candidate)
if isinstance(result, list):
return result
except json.JSONDecodeError:
pass

# 2. Strip outer ```json ... ``` fence greedily
fence_match = re.search(r"```(?:json)?\s*\n?(.*)\n?```", text, re.DOTALL)
if fence_match:
fenced = fence_match.group(1).strip()
try:
result = json.loads(fenced)
if isinstance(result, list):
return result
except json.JSONDecodeError:
text_to_scan = fenced
else:
text_to_scan = fenced
else:
text_to_scan = text

# 3. Bracket-depth scan respecting string literal state
array = _scan_outermost_array(text_to_scan)
if array is not None:
try:
result = json.loads(array)
if isinstance(result, list):
return result
except json.JSONDecodeError:
pass

raise ValueError("No valid JSON array found in response text")


def _scan_outermost_array(text: str) -> str | None:
"""Find the outermost JSON array via bracket-depth scanning that
respects string literal state. Returns substring including ``[`` and
``]``, or ``None`` if no balanced array found.
"""
start = text.find("[")
if start == -1:
return None

depth = 0
in_string = False
escape = False
for i in range(start, len(text)):
ch = text[i]
if escape:
escape = False
continue
if ch == "\\":
escape = True
continue
if ch == '"':
in_string = not in_string
continue
if in_string:
continue
if ch == "[":
depth += 1
elif ch == "]":
depth -= 1
if depth == 0:
return text[start : i + 1]
return None


_FILE_CONVENTION = """\
## File convention (STRICT — follow exactly)

Expand Down Expand Up @@ -261,14 +177,14 @@ async def design_challenges(specialization: str, n: int = 3) -> list[Challenge]:
text = await _generate(_build_system_prompt(specialization, n))

try:
raw = _extract_json_array(text)
except ValueError:
raw = extract_json_array(text)
except (ValueError, ParseError):
# Attempt 2 — retry with more explicit prompt
text = await _generate(_build_retry_prompt(specialization, n))
try:
raw = _extract_json_array(text)
except ValueError as err:
raise ValueError(
raw = extract_json_array(text)
except (ValueError, ParseError) as err:
raise ParseError(
"challenge designer failed to produce valid JSON after 2 attempts"
) from err

Expand Down Expand Up @@ -331,10 +247,10 @@ async def design_variant_challenge(
prompt = _build_variant_system_prompt(specialization, dimension)
text = await _generate(prompt)
try:
raw = _extract_json_array(text)
except ValueError:
raw = extract_json_array(text)
except (ValueError, ParseError):
text = await _generate(_build_retry_prompt(specialization, n=1))
raw = _extract_json_array(text)
raw = extract_json_array(text)

challenges = _parse_challenges(raw)
if len(challenges) != 1:
Expand Down
Loading
Loading