From ba93506f3e870f2f9d4d9a1ba22e1193b3a80858 Mon Sep 17 00:00:00 2001 From: mrveiss Date: Fri, 3 Apr 2026 17:37:49 +0300 Subject: [PATCH] feat(autoresearch): quality-diversity archive replacing top-K in PromptOptimizer (#3222) Co-Authored-By: Claude Sonnet 4.6 --- .../services/autoresearch/prompt_optimizer.py | 2 +- .../autoresearch/prompt_optimizer_test.py | 23 ++++++------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/autobot-backend/services/autoresearch/prompt_optimizer.py b/autobot-backend/services/autoresearch/prompt_optimizer.py index 5a89d813a..54faccc28 100644 --- a/autobot-backend/services/autoresearch/prompt_optimizer.py +++ b/autobot-backend/services/autoresearch/prompt_optimizer.py @@ -31,7 +31,7 @@ from .archive import Archive from .config import AutoResearchConfig from .models import VariantArchiveEntry -from .scorers import PromptScorer, ScorerResult +from .scorers import PromptScorer logger = logging.getLogger(__name__) diff --git a/autobot-backend/services/autoresearch/prompt_optimizer_test.py b/autobot-backend/services/autoresearch/prompt_optimizer_test.py index c093501e0..110b92469 100644 --- a/autobot-backend/services/autoresearch/prompt_optimizer_test.py +++ b/autobot-backend/services/autoresearch/prompt_optimizer_test.py @@ -15,18 +15,17 @@ from services.autoresearch.models import VariantArchiveEntry from services.autoresearch.prompt_optimizer import ( OptimizationSession, - OptimizationStatus, PromptOptimizer, PromptOptTarget, PromptVariant, ) from services.autoresearch.scorers import ScorerResult + # --------------------------------------------------------------------------- # Helper factory # --------------------------------------------------------------------------- - def _make_variant(vid: str, score: float, round_number: int = 1) -> PromptVariant: return PromptVariant( id=vid, @@ -58,7 +57,6 @@ def _make_entry( # PromptVariant # --------------------------------------------------------------------------- - class TestPromptVariantModel: def test_to_dict(self): variant = PromptVariant( @@ -86,7 +84,6 @@ def test_from_dict_round_trip(self): # OptimizationSession # --------------------------------------------------------------------------- - class TestOptimizationSession: def test_to_dict(self): target = PromptOptTarget( @@ -107,7 +104,6 @@ def test_to_dict(self): # Archive unit tests # --------------------------------------------------------------------------- - class TestArchive: def test_add_retains_all_entries(self): archive = Archive() @@ -187,7 +183,6 @@ def test_serialisation_round_trip(self): # PromptOptimizer integration (archive-aware) # --------------------------------------------------------------------------- - class TestPromptOptimizerLoop: @pytest.fixture def mock_llm(self): @@ -202,15 +197,9 @@ def mock_scorer(self): scorer = AsyncMock() scorer.name = "test_scorer" scorer.score.side_effect = [ - ScorerResult( - score=0.3, raw_score=3, metadata={}, scorer_name="test_scorer" - ), - ScorerResult( - score=0.8, raw_score=8, metadata={}, scorer_name="test_scorer" - ), - ScorerResult( - score=0.5, raw_score=5, metadata={}, scorer_name="test_scorer" - ), + ScorerResult(score=0.3, raw_score=3, metadata={}, scorer_name="test_scorer"), + ScorerResult(score=0.8, raw_score=8, metadata={}, scorer_name="test_scorer"), + ScorerResult(score=0.5, raw_score=5, metadata={}, scorer_name="test_scorer"), ] return scorer @@ -416,7 +405,9 @@ async def benchmark_fn(prompt: str) -> str: assert session.rounds_completed == 0 @pytest.mark.asyncio - async def test_scorer_failure_marks_variant_invalid_in_archive(self, mock_llm): + async def test_scorer_failure_marks_variant_invalid_in_archive( + self, mock_llm + ): """Variants whose scorer raises must have valid_parent=False in archive.""" failing_scorer = AsyncMock() failing_scorer.score.side_effect = RuntimeError("scorer exploded")