From d658444860f35171a486887e0fd1b757cdc533f9 Mon Sep 17 00:00:00 2001 From: Vo Date: Thu, 14 May 2026 15:13:24 -0700 Subject: [PATCH 1/3] Add backend support for sim detail feature --- backend/app/features/assistant/__init__.py | 1 + backend/app/features/assistant/api.py | 79 +++++ backend/app/features/assistant/schemas.py | 52 +++ backend/app/features/assistant/service.py | 309 ++++++++++++++++++ backend/app/main.py | 2 + backend/tests/features/assistant/__init__.py | 1 + backend/tests/features/assistant/test_api.py | 132 ++++++++ .../tests/features/assistant/test_service.py | 233 +++++++++++++ 8 files changed, 809 insertions(+) create mode 100644 backend/app/features/assistant/__init__.py create mode 100644 backend/app/features/assistant/api.py create mode 100644 backend/app/features/assistant/schemas.py create mode 100644 backend/app/features/assistant/service.py create mode 100644 backend/tests/features/assistant/__init__.py create mode 100644 backend/tests/features/assistant/test_api.py create mode 100644 backend/tests/features/assistant/test_service.py diff --git a/backend/app/features/assistant/__init__.py b/backend/app/features/assistant/__init__.py new file mode 100644 index 00000000..bfe4f61f --- /dev/null +++ b/backend/app/features/assistant/__init__.py @@ -0,0 +1 @@ +"""Assistant feature package.""" diff --git a/backend/app/features/assistant/api.py b/backend/app/features/assistant/api.py new file mode 100644 index 00000000..1dc2cfef --- /dev/null +++ b/backend/app/features/assistant/api.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +from time import perf_counter +from uuid import UUID, uuid4 + +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy.orm import Session, joinedload, selectinload + +from app.common.dependencies import get_database_session +from app.core.logger import _setup_custom_logger +from app.features.assistant.schemas import SimulationSummaryResponse +from app.features.assistant.service import build_simulation_summary +from app.features.simulation.models import Simulation +from app.features.user.manager import current_active_user +from app.features.user.models import User + +router = APIRouter(prefix="/simulations", tags=["Simulation Assistant"]) +logger = _setup_custom_logger(__name__) + + +@router.post( + "/{sim_id}/summary", + response_model=SimulationSummaryResponse, + responses={ + 200: {"description": "Deterministic summary generated successfully."}, + 401: {"description": "Unauthorized."}, + 404: {"description": "Simulation not found."}, + }, +) +def summarize_simulation( + sim_id: UUID, + db: Session = Depends(get_database_session), + user: User = Depends(current_active_user), +) -> SimulationSummaryResponse: + """Generate a deterministic read-only summary for one simulation.""" + + start = perf_counter() + trace_id = uuid4() + + simulation = ( + db.query(Simulation) + .options( + joinedload(Simulation.case), + joinedload(Simulation.machine), + selectinload(Simulation.artifacts), + selectinload(Simulation.links), + ) + .filter(Simulation.id == sim_id) + .one_or_none() + ) + + if simulation is None: + duration_ms = (perf_counter() - start) * 1000 + logger.info( + "simulation_summary trace_id=%s simulation_id=%s user_id=%s success=false " + "status=not_found latency_ms=%.2f citation_count=0 caveat_count=0", + trace_id, + sim_id, + user.id, + duration_ms, + ) + raise HTTPException(status_code=404, detail="Simulation not found") + + summary = build_simulation_summary(simulation) + summary = summary.model_copy(update={"trace_id": trace_id}) + + duration_ms = (perf_counter() - start) * 1000 + logger.info( + "simulation_summary trace_id=%s simulation_id=%s user_id=%s success=true " + "latency_ms=%.2f citation_count=%d caveat_count=%d", + trace_id, + simulation.id, + user.id, + duration_ms, + len(summary.citations), + len(summary.caveats), + ) + + return summary diff --git a/backend/app/features/assistant/schemas.py b/backend/app/features/assistant/schemas.py new file mode 100644 index 00000000..410f83a5 --- /dev/null +++ b/backend/app/features/assistant/schemas.py @@ -0,0 +1,52 @@ +from typing import Literal +from uuid import UUID + +from pydantic import Field + +from app.common.schemas.base import CamelOutBaseModel + + +class SummaryCitationOut(CamelOutBaseModel): + """Metadata citation for a deterministic simulation summary.""" + + source_type: Literal[ + "simulation_field", + "case_field", + "machine_field", + "artifact", + "external_link", + ] = Field(..., description="Kind of SimBoard record referenced by the summary.") + path: str = Field( + ..., + description="Stable field path or related-record selector used by the summary.", + ) + label: str = Field(..., description="Human-readable label for the cited source.") + + +class SimulationSummaryResponse(CamelOutBaseModel): + """Structured response returned by the deterministic summary endpoint.""" + + answer: str = Field( + ..., description="Deterministic summary prose for the simulation." + ) + citations: list[SummaryCitationOut] = Field( + default_factory=list, + description="Metadata citations backing claims in the answer.", + ) + assumptions: list[str] = Field( + default_factory=list, + description="Explicit assumptions used by the formatter.", + ) + caveats: list[str] = Field( + default_factory=list, + description="Missing-data or weak-signal warnings for the summary.", + ) + limitations: list[str] = Field( + default_factory=list, + description="Known limits of this deterministic summary implementation.", + ) + suggested_followups: list[str] = Field( + default_factory=list, + description="Non-agentic follow-up checks derived from available metadata.", + ) + trace_id: UUID = Field(..., description="Trace ID for request review and logs.") diff --git a/backend/app/features/assistant/service.py b/backend/app/features/assistant/service.py new file mode 100644 index 00000000..65efc288 --- /dev/null +++ b/backend/app/features/assistant/service.py @@ -0,0 +1,309 @@ +from __future__ import annotations + +from collections import OrderedDict +from datetime import datetime +from typing import Literal + +from app.features.assistant.schemas import SimulationSummaryResponse, SummaryCitationOut +from app.features.simulation.enums import ArtifactKind, ExternalLinkKind +from app.features.simulation.models import Simulation + +CitationSource = Literal[ + "simulation_field", + "case_field", + "machine_field", + "artifact", + "external_link", +] + +LIMITATIONS = [ + "This v1 summary uses only metadata already stored in SimBoard. It does not use retrieval, diagnostics interpretation, or LLM reasoning." +] + + +class SummaryDraft: + """Mutable collector used while assembling deterministic summary output.""" + + def __init__(self) -> None: + self.sentences: list[str] = [] + self.caveats: list[str] = [] + self.followups: list[str] = [] + self.citations: OrderedDict[tuple[str, str], SummaryCitationOut] = OrderedDict() + + def add_citation(self, source_type: CitationSource, path: str, label: str) -> None: + self.citations[(source_type, path)] = SummaryCitationOut( + source_type=source_type, + path=path, + label=label, + ) + + +def _enum_value(value: object) -> str: + return str(getattr(value, "value", value)) + + +def _format_timestamp(value: datetime | None) -> str | None: + if value is None: + return None + return value.date().isoformat() + + +def _add_identity_and_status(simulation: Simulation, draft: SummaryDraft) -> None: + case = simulation.case + machine = simulation.machine + is_reference = case.reference_simulation_id == simulation.id + change_count = ( + len(simulation.run_config_deltas) if simulation.run_config_deltas else 0 + ) + + draft.add_citation("simulation_field", "simulation.execution_id", "Execution ID") + draft.add_citation("case_field", "case.name", "Case name") + draft.sentences.append( + f"Simulation {simulation.execution_id} belongs to case {case.name}." + ) + + type_bits = [_enum_value(simulation.simulation_type)] + if is_reference: + type_bits.append("reference") + draft.add_citation( + "case_field", + "case.reference_simulation_id", + "Reference simulation", + ) + else: + type_bits.append("non-reference") + if simulation.run_config_deltas: + draft.sentences.append( + f"It is a non-reference run with {change_count} recorded " + "configuration change(s) versus the case reference simulation." + ) + draft.add_citation( + "simulation_field", + "simulation.run_config_deltas", + "Configuration deltas", + ) + else: + draft.sentences.append( + "It is a non-reference run, but SimBoard does not currently record " + "any configuration deltas for it." + ) + draft.caveats.append( + "This non-reference simulation has no recorded configuration deltas " + "in SimBoard metadata." + ) + + if machine is not None and machine.name: + draft.sentences.append( + f"It is recorded as a {' '.join(type_bits)} simulation on machine " + f"{machine.name} with status {_enum_value(simulation.status)}." + ) + draft.add_citation("machine_field", "machine.name", "Machine name") + else: + draft.sentences.append( + f"It is recorded as a {' '.join(type_bits)} simulation with status " + f"{_enum_value(simulation.status)}." + ) + draft.caveats.append("Machine information is not recorded for this simulation.") + + draft.add_citation( + "simulation_field", "simulation.simulation_type", "Simulation type" + ) + draft.add_citation("simulation_field", "simulation.status", "Simulation status") + + +def _add_configuration(simulation: Simulation, draft: SummaryDraft) -> None: + draft.sentences.append( + f"It uses compset {simulation.compset} ({simulation.compset_alias}) on grid " + f"{simulation.grid_name} at {simulation.grid_resolution} resolution with " + f"{simulation.initialization_type} initialization." + ) + draft.add_citation("simulation_field", "simulation.compset", "Compset") + draft.add_citation("simulation_field", "simulation.compset_alias", "Compset alias") + draft.add_citation("simulation_field", "simulation.grid_name", "Grid name") + draft.add_citation( + "simulation_field", "simulation.grid_resolution", "Grid resolution" + ) + draft.add_citation( + "simulation_field", + "simulation.initialization_type", + "Initialization type", + ) + + +def _add_version_metadata(simulation: Simulation, draft: SummaryDraft) -> None: + version_bits: list[str] = [] + if simulation.git_tag: + version_bits.append(f"tag {simulation.git_tag}") + draft.add_citation("simulation_field", "simulation.git_tag", "Git tag") + if simulation.git_branch: + version_bits.append(f"branch {simulation.git_branch}") + draft.add_citation("simulation_field", "simulation.git_branch", "Git branch") + if simulation.git_commit_hash: + version_bits.append(f"commit {simulation.git_commit_hash}") + draft.add_citation( + "simulation_field", + "simulation.git_commit_hash", + "Git commit hash", + ) + + if version_bits: + draft.sentences.append( + "Recorded version metadata includes " + ", ".join(version_bits) + "." + ) + else: + draft.caveats.append("Version metadata is not recorded for this simulation.") + + +def _add_timeline_metadata(simulation: Simulation, draft: SummaryDraft) -> None: + start_date = _format_timestamp(simulation.simulation_start_date) + end_date = _format_timestamp(simulation.simulation_end_date) + + if start_date and end_date: + draft.sentences.append( + f"The recorded simulation period runs from {start_date} to {end_date}." + ) + draft.add_citation( + "simulation_field", + "simulation.simulation_start_date", + "Simulation start date", + ) + draft.add_citation( + "simulation_field", + "simulation.simulation_end_date", + "Simulation end date", + ) + return + + if start_date: + draft.sentences.append( + f"The recorded simulation period starts on {start_date}, and no end " + "date is stored in SimBoard metadata." + ) + draft.add_citation( + "simulation_field", + "simulation.simulation_start_date", + "Simulation start date", + ) + draft.caveats.append( + "Simulation end date is not recorded in SimBoard metadata." + ) + return + + draft.caveats.append("Simulation start date is not recorded in SimBoard metadata.") + + +def _add_optional_metadata(simulation: Simulation, draft: SummaryDraft) -> None: + if simulation.campaign: + draft.sentences.append( + f"Campaign metadata identifies this run as {simulation.campaign}." + ) + draft.add_citation("simulation_field", "simulation.campaign", "Campaign") + else: + draft.caveats.append("Campaign metadata is not recorded for this simulation.") + + if simulation.experiment_type: + draft.sentences.append( + f"Experiment type metadata records {simulation.experiment_type}." + ) + draft.add_citation( + "simulation_field", + "simulation.experiment_type", + "Experiment type", + ) + else: + draft.caveats.append( + "Experiment type metadata is not recorded for this simulation." + ) + + if simulation.description: + draft.sentences.append( + f"Recorded description: {simulation.description.strip()}" + ) + draft.add_citation("simulation_field", "simulation.description", "Description") + if simulation.key_features: + draft.sentences.append(f"Key features: {simulation.key_features.strip()}") + draft.add_citation( + "simulation_field", "simulation.key_features", "Key features" + ) + if simulation.known_issues: + draft.sentences.append(f"Known issues: {simulation.known_issues.strip()}") + draft.add_citation( + "simulation_field", + "simulation.known_issues", + "Known issues", + ) + if simulation.notes_markdown: + draft.sentences.append("Additional notes are recorded for this simulation.") + draft.add_citation("simulation_field", "simulation.notes_markdown", "Notes") + + +def _add_diagnostics_and_followups(simulation: Simulation, draft: SummaryDraft) -> None: + diagnostic_links = [ + link for link in simulation.links if link.kind == ExternalLinkKind.DIAGNOSTIC + ] + if diagnostic_links: + draft.sentences.append( + f"SimBoard records {len(diagnostic_links)} diagnostic link(s) for this " + "run, but this v1 summary does not interpret diagnostic outputs." + ) + draft.add_citation( + "external_link", + "links[kind=diagnostic]", + "Diagnostic links", + ) + draft.followups.append( + "Open the recorded diagnostic links to review supporting context for this run." + ) + else: + draft.caveats.append( + "No diagnostic links are recorded for this simulation in SimBoard." + ) + + if simulation.run_config_deltas: + draft.followups.append( + "Compare this run against the case reference simulation to review the " + "recorded configuration deltas." + ) + + if simulation.known_issues: + draft.followups.append( + "Review the recorded known issues before using this simulation as a baseline." + ) + + output_artifacts = [ + artifact + for artifact in simulation.artifacts + if artifact.kind == ArtifactKind.OUTPUT + ] + if output_artifacts: + draft.add_citation("artifact", "artifacts[kind=output]", "Output artifacts") + draft.followups.append( + "Open the recorded output artifacts if you need run outputs beyond the metadata summary." + ) + + if not draft.followups: + draft.followups.append( + "Review the simulation detail page metadata for additional provenance and run context." + ) + + +def build_simulation_summary(simulation: Simulation) -> SimulationSummaryResponse: + """Build a deterministic summary from authoritative SimBoard metadata.""" + + draft = SummaryDraft() + _add_identity_and_status(simulation, draft) + _add_configuration(simulation, draft) + _add_version_metadata(simulation, draft) + _add_timeline_metadata(simulation, draft) + _add_optional_metadata(simulation, draft) + _add_diagnostics_and_followups(simulation, draft) + + return SimulationSummaryResponse( + answer=" ".join(draft.sentences), + citations=list(draft.citations.values()), + assumptions=[], + caveats=draft.caveats, + limitations=LIMITATIONS, + suggested_followups=draft.followups, + trace_id="00000000-0000-0000-0000-000000000000", + ) diff --git a/backend/app/main.py b/backend/app/main.py index e6c9ea41..afcc1057 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -7,6 +7,7 @@ from app.core.config import settings from app.core.exceptions import register_exception_handlers from app.core.logger import _setup_root_logger +from app.features.assistant.api import router as assistant_router from app.features.ingestion.api import router as ingestion_router from app.features.machine.api import router as machine_router from app.features.pace.api import router as pace_router @@ -35,6 +36,7 @@ def create_app() -> FastAPI: # Register routers. app.include_router(simulation_router, prefix=API_BASE) + app.include_router(assistant_router, prefix=API_BASE) app.include_router(case_router, prefix=API_BASE) app.include_router(machine_router, prefix=API_BASE) app.include_router(pace_router, prefix=API_BASE) diff --git a/backend/tests/features/assistant/__init__.py b/backend/tests/features/assistant/__init__.py new file mode 100644 index 00000000..85ac57ab --- /dev/null +++ b/backend/tests/features/assistant/__init__.py @@ -0,0 +1 @@ +"""Tests for assistant feature.""" diff --git a/backend/tests/features/assistant/test_api.py b/backend/tests/features/assistant/test_api.py new file mode 100644 index 00000000..2e865073 --- /dev/null +++ b/backend/tests/features/assistant/test_api.py @@ -0,0 +1,132 @@ +from uuid import UUID, uuid4 + +import pytest +from sqlalchemy.orm import Session + +from app.api.version import API_BASE +from app.features.ingestion.enums import IngestionSourceType, IngestionStatus +from app.features.ingestion.models import Ingestion +from app.features.machine.models import Machine +from app.features.simulation.models import Case, Simulation +from app.features.user.manager import current_active_user +from app.features.user.models import User, UserRole +from app.main import app + + +@pytest.fixture +def authenticated_client(client, normal_user_sync): + def fake_current_user(): + return User( + id=normal_user_sync["id"], + email=normal_user_sync["email"], + is_active=True, + is_verified=True, + role=UserRole.USER, + ) + + app.dependency_overrides[current_active_user] = fake_current_user + return client + + +def _create_case(db: Session, name: str = "assistant_api_case") -> Case: + case = Case(name=name) + db.add(case) + db.flush() + return case + + +def _create_simulation( + db: Session, + normal_user_sync: dict[str, UUID | str], + admin_user_sync: dict[str, UUID | str], + *, + execution_id: str = "assistant-api-exec-1", +) -> Simulation: + machine = db.query(Machine).first() + assert machine is not None + + case = _create_case(db) + ingestion = Ingestion( + source_type=IngestionSourceType.BROWSER_UPLOAD, + source_reference=execution_id, + machine_id=machine.id, + triggered_by=normal_user_sync["id"], + status=IngestionStatus.SUCCESS, + created_count=1, + duplicate_count=0, + error_count=0, + ) + db.add(ingestion) + db.flush() + + simulation = Simulation( + case_id=case.id, + execution_id=execution_id, + compset="AQUAPLANET", + compset_alias="QPC4", + grid_name="f19_f19", + grid_resolution="1.9x2.5", + simulation_type="experimental", + status="completed", + initialization_type="startup", + machine_id=machine.id, + simulation_start_date="2023-01-01T00:00:00Z", + git_tag="v2.0.0", + created_by=normal_user_sync["id"], + last_updated_by=admin_user_sync["id"], + ingestion_id=ingestion.id, + ) + db.add(simulation) + db.flush() + case.reference_simulation_id = simulation.id + db.commit() + db.refresh(simulation) + return simulation + + +class TestSummarizeSimulationEndpoint: + def test_authenticated_request_returns_summary_contract( + self, authenticated_client, db: Session, normal_user_sync, admin_user_sync + ) -> None: + simulation = _create_simulation(db, normal_user_sync, admin_user_sync) + + response = authenticated_client.post( + f"{API_BASE}/simulations/{simulation.id}/summary" + ) + + assert response.status_code == 200 + data = response.json() + assert ( + "Simulation assistant-api-exec-1 belongs to case assistant_api_case." + in data["answer"] + ) + assert isinstance(data["citations"], list) + assert data["assumptions"] == [] + assert isinstance(data["caveats"], list) + assert data["limitations"] == [ + "This v1 summary uses only metadata already stored in SimBoard. It does not use retrieval, diagnostics interpretation, or LLM reasoning." + ] + assert isinstance(data["suggestedFollowups"], list) + assert UUID(data["traceId"]) + assert {citation["path"] for citation in data["citations"]} >= { + "simulation.execution_id", + "case.name", + } + + def test_unauthenticated_request_returns_401( + self, client, db: Session, normal_user_sync, admin_user_sync + ) -> None: + simulation = _create_simulation(db, normal_user_sync, admin_user_sync) + + response = client.post(f"{API_BASE}/simulations/{simulation.id}/summary") + + assert response.status_code == 401 + assert response.json() == {"detail": "Not authenticated"} + + def test_unknown_simulation_returns_404(self, authenticated_client) -> None: + response = authenticated_client.post( + f"{API_BASE}/simulations/{uuid4()}/summary" + ) + + assert response.status_code == 404 + assert response.json() == {"detail": "Simulation not found"} diff --git a/backend/tests/features/assistant/test_service.py b/backend/tests/features/assistant/test_service.py new file mode 100644 index 00000000..0a92e957 --- /dev/null +++ b/backend/tests/features/assistant/test_service.py @@ -0,0 +1,233 @@ +from uuid import UUID + +from sqlalchemy.orm import Session + +from app.features.assistant.service import build_simulation_summary +from app.features.ingestion.enums import IngestionSourceType, IngestionStatus +from app.features.ingestion.models import Ingestion +from app.features.machine.models import Machine +from app.features.simulation.enums import ArtifactKind, ExternalLinkKind +from app.features.simulation.models import Artifact, Case, ExternalLink, Simulation + + +def _create_case(db: Session, name: str = "assistant_case") -> Case: + case = Case(name=name) + db.add(case) + db.flush() + return case + + +def _create_simulation( + db: Session, + normal_user_sync: dict[str, UUID | str], + admin_user_sync: dict[str, UUID | str], + *, + case_name: str = "assistant_case", + execution_id: str = "assistant-exec-1", + is_reference: bool = True, + with_diagnostics: bool = True, + with_optional_metadata: bool = True, +) -> Simulation: + machine = db.query(Machine).first() + assert machine is not None + + case = _create_case(db, case_name) + + ingestion = Ingestion( + source_type=IngestionSourceType.BROWSER_UPLOAD, + source_reference=execution_id, + machine_id=machine.id, + triggered_by=normal_user_sync["id"], + status=IngestionStatus.SUCCESS, + created_count=1, + duplicate_count=0, + error_count=0, + ) + db.add(ingestion) + db.flush() + + simulation = Simulation( + case_id=case.id, + execution_id=execution_id, + description="Control simulation for deterministic summary." + if with_optional_metadata + else None, + compset="AQUAPLANET", + compset_alias="QPC4", + grid_name="f19_f19", + grid_resolution="1.9x2.5", + simulation_type="experimental", + status="completed", + campaign="historical" if with_optional_metadata else None, + experiment_type="historical" if with_optional_metadata else None, + initialization_type="startup", + machine_id=machine.id, + simulation_start_date="2023-01-01T00:00:00Z", + simulation_end_date="2023-12-31T00:00:00Z" if with_optional_metadata else None, + compiler="gcc", + key_features="High-resolution control setup." + if with_optional_metadata + else None, + known_issues="Sea-ice diagnostics pending QA." + if with_optional_metadata + else None, + notes_markdown="Reviewed by domain team." if with_optional_metadata else None, + git_branch="main" if with_optional_metadata else None, + git_tag="v1.2.3" if with_optional_metadata else None, + git_commit_hash="abc123def456" if with_optional_metadata else None, + created_by=normal_user_sync["id"], + last_updated_by=admin_user_sync["id"], + ingestion_id=ingestion.id, + run_config_deltas=( + None + if is_reference + else {"compiler": {"reference": "gcc-11", "current": "gcc-12"}} + ), + ) + db.add(simulation) + db.flush() + + if is_reference: + case.reference_simulation_id = simulation.id + else: + reference = Simulation( + case_id=case.id, + execution_id=f"{execution_id}-ref", + compset="AQUAPLANET", + compset_alias="QPC4", + grid_name="f19_f19", + grid_resolution="1.9x2.5", + simulation_type="experimental", + status="completed", + initialization_type="startup", + machine_id=machine.id, + simulation_start_date="2023-01-01T00:00:00Z", + created_by=normal_user_sync["id"], + last_updated_by=admin_user_sync["id"], + ingestion_id=ingestion.id, + ) + db.add(reference) + db.flush() + case.reference_simulation_id = reference.id + + if with_diagnostics: + db.add( + ExternalLink( + simulation_id=simulation.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/diag", + label="Diagnostics Dashboard", + ) + ) + + db.add( + Artifact( + simulation_id=simulation.id, + kind=ArtifactKind.OUTPUT, + uri="/archive/output.nc", + label="Primary output", + ) + ) + + db.commit() + db.refresh(simulation) + return simulation + + +class TestBuildSimulationSummary: + def test_complete_metadata_produces_stable_summary_and_citations( + self, db: Session, normal_user_sync, admin_user_sync + ) -> None: + simulation = _create_simulation( + db, + normal_user_sync, + admin_user_sync, + execution_id="assistant-complete", + ) + + summary = build_simulation_summary(simulation) + + assert ( + "Simulation assistant-complete belongs to case assistant_case." + in summary.answer + ) + assert ( + "Recorded version metadata includes tag v1.2.3, branch main, commit abc123def456." + in summary.answer + ) + assert "SimBoard records 1 diagnostic link(s) for this run" in summary.answer + assert {citation.path for citation in summary.citations} >= { + "simulation.execution_id", + "case.name", + "simulation.git_tag", + "links[kind=diagnostic]", + } + + def test_missing_optional_metadata_yields_caveats_not_fabrication( + self, db: Session, normal_user_sync, admin_user_sync + ) -> None: + simulation = _create_simulation( + db, + normal_user_sync, + admin_user_sync, + execution_id="assistant-missing", + with_diagnostics=False, + with_optional_metadata=False, + ) + + summary = build_simulation_summary(simulation) + + assert "Recorded description:" not in summary.answer + assert ( + "Version metadata is not recorded for this simulation." in summary.caveats + ) + assert ( + "Campaign metadata is not recorded for this simulation." in summary.caveats + ) + assert ( + "No diagnostic links are recorded for this simulation in SimBoard." + in summary.caveats + ) + + def test_non_reference_simulation_mentions_change_count( + self, db: Session, normal_user_sync, admin_user_sync + ) -> None: + simulation = _create_simulation( + db, + normal_user_sync, + admin_user_sync, + execution_id="assistant-nonref", + is_reference=False, + ) + + summary = build_simulation_summary(simulation) + + assert ( + "non-reference run with 1 recorded configuration change(s)" + in summary.answer + ) + assert "simulation.run_config_deltas" in { + citation.path for citation in summary.citations + } + + def test_absent_diagnostics_adds_limitation_not_interpretation( + self, db: Session, normal_user_sync, admin_user_sync + ) -> None: + simulation = _create_simulation( + db, + normal_user_sync, + admin_user_sync, + execution_id="assistant-nodiag", + with_diagnostics=False, + ) + + summary = build_simulation_summary(simulation) + + assert "interpret diagnostic outputs" not in summary.answer + assert ( + "No diagnostic links are recorded for this simulation in SimBoard." + in summary.caveats + ) + assert summary.limitations == [ + "This v1 summary uses only metadata already stored in SimBoard. It does not use retrieval, diagnostics interpretation, or LLM reasoning." + ] From f314f9cb53fc99b95b5374e73f43aa98d53a6d6b Mon Sep 17 00:00:00 2001 From: Vo Date: Thu, 14 May 2026 15:13:37 -0700 Subject: [PATCH 2/3] Add frontend support for sim detail feature --- .../simulations/SimulationDetailsPage.tsx | 12 ++ frontend/src/features/simulations/api/api.ts | 15 +- .../components/SimulationDetailsView.tsx | 163 +++++++++++++++++- .../simulations/hooks/useSimulationSummary.ts | 43 +++++ frontend/src/types/simulation.ts | 25 +++ 5 files changed, 255 insertions(+), 3 deletions(-) create mode 100644 frontend/src/features/simulations/hooks/useSimulationSummary.ts diff --git a/frontend/src/features/simulations/SimulationDetailsPage.tsx b/frontend/src/features/simulations/SimulationDetailsPage.tsx index af101dba..d7178a20 100644 --- a/frontend/src/features/simulations/SimulationDetailsPage.tsx +++ b/frontend/src/features/simulations/SimulationDetailsPage.tsx @@ -1,14 +1,18 @@ import { useEffect, useState } from 'react'; import { useLocation, useParams } from 'react-router-dom'; +import { useAuth } from '@/auth/hooks/useAuth'; import { resolvePaceExecution } from '@/features/simulations/api/api'; import { SimulationDetailsView } from '@/features/simulations/components/SimulationDetailsView'; import { useSimulation } from '@/features/simulations/hooks/useSimulation'; +import { useSimulationSummary } from '@/features/simulations/hooks/useSimulationSummary'; export const SimulationDetailsPage = () => { const { id } = useParams<{ id: string }>(); const location = useLocation(); const { data: simulation, loading, error } = useSimulation(id ?? ''); + const { isAuthenticated, loading: authLoading, loginWithGithub } = useAuth(); + const summary = useSimulationSummary(id ?? ''); const [paceExperimentId, setPaceExperimentId] = useState(null); const [isResolvingPace, setIsResolvingPace] = useState(false); const [paceResolutionAttempted, setPaceResolutionAttempted] = useState(false); @@ -114,6 +118,14 @@ export const SimulationDetailsPage = () => { paceLink={paceLink} isResolvingPace={isResolvingPace} showPaceFallbackInfo={paceResolutionAttempted && !paceExperimentId} + summary={summary.data} + summaryLoading={summary.loading} + summaryError={summary.error} + summaryRequested={summary.requested} + onGenerateSummary={summary.generate} + canGenerateSummary={isAuthenticated} + isCheckingAuth={authLoading} + onLoginForSummary={loginWithGithub} /> ); }; diff --git a/frontend/src/features/simulations/api/api.ts b/frontend/src/features/simulations/api/api.ts index 4239c6ea..d212ee4e 100644 --- a/frontend/src/features/simulations/api/api.ts +++ b/frontend/src/features/simulations/api/api.ts @@ -1,5 +1,10 @@ import { api } from '@/api/api'; -import type { CaseOut, SimulationCreate, SimulationOut } from '@/types'; +import type { + CaseOut, + SimulationCreate, + SimulationOut, + SimulationSummaryResponseOut, +} from '@/types'; export const SIMULATIONS_URL = '/simulations'; export const CASES_URL = '/cases'; @@ -32,6 +37,14 @@ export const getSimulationById = async (id: string): Promise => { return res.data; }; +export const generateSimulationSummary = async ( + id: string, +): Promise => { + const res = await api.post(`${SIMULATIONS_URL}/${id}/summary`); + + return res.data; +}; + export const resolvePaceExecution = async (executionId: string): Promise => { const res = await api.get(`${PACE_URL}/resolve`, { headers: { 'Cache-Control': 'no-cache' }, diff --git a/frontend/src/features/simulations/components/SimulationDetailsView.tsx b/frontend/src/features/simulations/components/SimulationDetailsView.tsx index 99d503be..dfcb7c37 100644 --- a/frontend/src/features/simulations/components/SimulationDetailsView.tsx +++ b/frontend/src/features/simulations/components/SimulationDetailsView.tsx @@ -1,4 +1,4 @@ -import { ArrowLeft, ChevronDown, CircleHelp } from 'lucide-react'; +import { ArrowLeft, ChevronDown, CircleHelp, Sparkles } from 'lucide-react'; import { useState } from 'react'; import { Link } from 'react-router-dom'; @@ -17,7 +17,7 @@ import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/comp import { SimulationPathCard } from '@/features/simulations/components/SimulationPathCard'; import { SimulationTypeBadge } from '@/features/simulations/components/SimulationTypeBadge'; import { cn } from '@/lib/utils'; -import type { SimulationOut } from '@/types'; +import type { SimulationOut, SimulationSummaryResponseOut } from '@/types'; import { getArtifactsByKind } from '@/types/artifact'; import { formatDate, getSimulationDuration } from '@/utils/utils'; @@ -33,6 +33,14 @@ interface SimulationDetailsViewProps { } | null; isResolvingPace?: boolean; showPaceFallbackInfo?: boolean; + summary?: SimulationSummaryResponseOut | null; + summaryLoading?: boolean; + summaryError?: string | null; + summaryRequested?: boolean; + onGenerateSummary?: () => void | Promise; + canGenerateSummary?: boolean; + isCheckingAuth?: boolean; + onLoginForSummary?: () => void; } // -------------------- Small UI helpers -------------------- @@ -99,6 +107,14 @@ export const SimulationDetailsView = ({ paceLink = null, isResolvingPace = false, showPaceFallbackInfo = false, + summary = null, + summaryLoading = false, + summaryError = null, + summaryRequested = false, + onGenerateSummary, + canGenerateSummary = false, + isCheckingAuth = false, + onLoginForSummary, }: SimulationDetailsViewProps) => { const [activeTab, setActiveTab] = useState('summary'); const [isAdvancedMetadataOpen, setIsAdvancedMetadataOpen] = useState(false); @@ -209,6 +225,149 @@ export const SimulationDetailsView = ({ {/* SUMMARY TAB */} + + +
+
+ + + AI Summary + +

+ Generate a read-only summary for this simulation using the metadata already + recorded in SimBoard. +

+
+ +
+
+ + {!summaryRequested && !summary && !summaryError && ( +
+ {canGenerateSummary + ? 'Generate a summary to review key metadata, caveats, citations, limitations, and suggested follow-up questions for this run.' + : 'Log in with GitHub to generate a read-only AI summary for this simulation.'} +
+ )} + + {summaryError && ( +
+ Error generating AI summary: {summaryError} +
+ )} + + {summaryLoading && !summary && ( +
+
+ + Building deterministic summary from SimBoard metadata... +
+
+ )} + + {summary && ( +
+
+ + +
+ + {summary.caveats.length > 0 && ( +
+ +
    + {summary.caveats.map((caveat) => ( +
  • + {caveat} +
  • + ))} +
+
+ )} + + {summary.limitations.length > 0 && ( +
+ +
    + {summary.limitations.map((limitation) => ( +
  • + {limitation} +
  • + ))} +
+
+ )} + + {summary.citations.length > 0 && ( +
+ +
+
+ {summary.citations.map((citation) => ( +
+
{citation.label}
+ + {citation.path} + +
+ ))} +
+
+
+ )} + + {summary.suggestedFollowups.length > 0 && ( +
+ +
    + {summary.suggestedFollowups.map((followup) => ( +
  • + {followup} +
  • + ))} +
+
+ )} +
+ )} +
+
+
diff --git a/frontend/src/features/simulations/hooks/useSimulationSummary.ts b/frontend/src/features/simulations/hooks/useSimulationSummary.ts new file mode 100644 index 00000000..5fdc8a32 --- /dev/null +++ b/frontend/src/features/simulations/hooks/useSimulationSummary.ts @@ -0,0 +1,43 @@ +import axios from 'axios'; +import { useEffect, useState } from 'react'; + +import { generateSimulationSummary } from '@/features/simulations/api/api'; +import type { SimulationSummaryResponseOut } from '@/types'; + +export const useSimulationSummary = (simulationId: string) => { + const [data, setData] = useState(null); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + const [requested, setRequested] = useState(false); + + useEffect(() => { + setData(null); + setLoading(false); + setError(null); + setRequested(false); + }, [simulationId]); + + const generate = async () => { + if (!simulationId) return; + + setRequested(true); + setLoading(true); + setError(null); + + try { + const result = await generateSimulationSummary(simulationId); + setData(result); + } catch (e) { + setData(null); + if (axios.isAxiosError(e) && (e.response?.status === 401 || e.response?.status === 403)) { + setError('Log in to generate an AI summary for this simulation.'); + } else { + setError(e instanceof Error ? e.message : 'Failed to generate AI summary.'); + } + } finally { + setLoading(false); + } + }; + + return { data, loading, error, requested, generate }; +}; diff --git a/frontend/src/types/simulation.ts b/frontend/src/types/simulation.ts index 17623c87..3eaa8369 100644 --- a/frontend/src/types/simulation.ts +++ b/frontend/src/types/simulation.ts @@ -9,6 +9,29 @@ export interface SimulationUserPreview { full_name?: string | null; } +export type SummaryCitationSourceType = + | 'simulation_field' + | 'case_field' + | 'machine_field' + | 'artifact' + | 'external_link'; + +export interface SimulationSummaryCitationOut { + sourceType: SummaryCitationSourceType; + path: string; + label: string; +} + +export interface SimulationSummaryResponseOut { + answer: string; + citations: SimulationSummaryCitationOut[]; + assumptions: string[]; + caveats: string[]; + limitations: string[]; + suggestedFollowups: string[]; + traceId: string; +} + /** * API response model for a Case with nested simulation summaries. */ @@ -129,6 +152,8 @@ export interface SimulationOut extends SimulationCreate { // Relationships // ~~~~~~~~~~~~~~ + artifacts: ArtifactOut[]; + links: ExternalLinkOut[]; machine: Machine; // Computed fields From a8def467cfb1e64937e9654ea1b4b6ed7bf91814 Mon Sep 17 00:00:00 2001 From: Vo Date: Thu, 14 May 2026 15:46:45 -0700 Subject: [PATCH 3/3] Add frontend support for sim detail feature --- docs/52-ai-summary/phase-2-plan.md | 184 +++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 docs/52-ai-summary/phase-2-plan.md diff --git a/docs/52-ai-summary/phase-2-plan.md b/docs/52-ai-summary/phase-2-plan.md new file mode 100644 index 00000000..338d1dfc --- /dev/null +++ b/docs/52-ai-summary/phase-2-plan.md @@ -0,0 +1,184 @@ +# Plan: Phase 2 LLM Integration for Issue #52 Using Pydantic AI + +## Summary + +Add backend-managed LLM summary generation to the existing simulation summary flow while preserving the current endpoint and detail-page summary panel. Use **Pydantic AI** as the adapter/orchestration layer, support **OpenAI and Anthropic** behind a **backend config switch**, and keep the current deterministic summary generator as the mandatory fallback whenever LLM generation is disabled, misconfigured, invalid, or fails. + +## Key Changes + +### 1. Refactor the assistant backend into explicit phases + +Restructure the existing assistant feature into five responsibilities: + +- **Citation path registry**: define a `VALID_CITATION_PATHS: frozenset[str]` (or a structured registry keyed by `source_type`) in the assistant feature that enumerates every citation path the snapshot builder can produce. This is the single source of truth for citation validation. Update the registry whenever new metadata fields are added to the snapshot. +- **Metadata snapshot builder**: derive one canonical, structured simulation context object as a **dedicated Pydantic model** (`SimulationSnapshot`) from `Simulation` ORM data. The snapshot model must use explicit field assignment — never serialize the ORM model directly — and must **exclude PII** (`created_by`, `last_updated_by`, user emails/roles). Only fields present in the citation path registry are included. If the snapshot exceeds a configurable size budget (character or token limit), truncate or sample artifacts/links and add a caveat noting the truncation. +- **Deterministic renderer**: keep the current metadata-only summary builder as the non-LLM fallback path. +- **Pydantic AI summary generator**: define a typed output model matching the public summary response and use Pydantic AI to request structured output from the configured provider. +- **Summary orchestrator**: decide whether to use LLM generation, validate the result against the citation path registry, and fall back to deterministic output when needed. + +This keeps metadata extraction centralized, prevents divergence between deterministic and LLM outputs, and ensures no PII is transmitted to external providers. + +**Likely file mapping** (new files under `backend/app/features/assistant/`): + +| Responsibility | File | +| ----------------------------- | ----------------------- | +| Citation path registry | `registry.py` | +| Metadata snapshot builder | `snapshot.py` | +| Deterministic renderer | `service.py` (existing) | +| Pydantic AI summary generator | `llm_generator.py` | +| Summary orchestrator | `orchestrator.py` | + +### 2. Use Pydantic AI with a config-switched provider model + +Add assistant LLM settings in `backend/app/core/config.py` and corresponding example env templates: + +- `assistant_llm_enabled` +- `assistant_llm_provider` with values `openai` or `anthropic` +- `assistant_openai_api_key` (declare as `pydantic.SecretStr` to prevent leaking in repr/logs) +- `assistant_openai_model` +- `assistant_anthropic_api_key` (declare as `pydantic.SecretStr`) +- `assistant_anthropic_model` +- `assistant_llm_timeout_seconds` +- `assistant_snapshot_max_chars` (optional, default sensible limit e.g. 12000) + +Use **one active provider at runtime** selected by config. Do not add per-request provider selection, provider fan-out, or UI provider controls. + +Use Pydantic AI to: + +- select the model implementation from config +- enforce typed structured output +- keep provider-specific code minimal and localized + +If Pydantic AI still requires provider SDK dependencies for the selected models, add only the necessary packages to `backend/pyproject.toml`. + +### 3. Migrate endpoint to async and extend the response minimally + +Migrate the endpoint handler from `def summarize_simulation` to `async def summarize_simulation` and use Pydantic AI's async interface for provider calls. The deterministic fallback path has no blocking I/O, so the migration is safe. This prevents long-running LLM network calls from exhausting the sync threadpool under concurrent requests. + +**Async DB session strategy**: The current endpoint injects a sync `Session` via `get_database_session` and calls `db.query()`. Inside an `async def` handler these calls would block the event loop. Migrate the endpoint's DB queries to use the existing async session infrastructure (`get_async_session` from `backend/app/core/database_async.py`, async SQLAlchemy). The repo already has `AsyncSessionLocal`, `get_async_session`, and corresponding async test fixtures (`async_db`, `async_client`), so no new infrastructure is needed. The `current_active_user` dependency already uses `AsyncSession` internally. + +Preserve the existing endpoint path: + +- `POST /simulations/{sim_id}/summary` + +Keep the existing response fields and add generation metadata with **deterministic nullability rules**: + +- `generation_mode`: `"llm"` or `"deterministic"` — always present, never null. +- `generation_provider`: `"openai"`, `"anthropic"`, or `null` — **always `null` when `generation_mode` is `"deterministic"`** (including fallback). Provider identity on fallback is captured only in server logs, not in the response. +- `generation_model`: configured model name or `null` — same nullability rule as `generation_provider`. + +Existing fields remain: + +- `answer` +- `citations` +- `assumptions` +- `caveats` +- `limitations` +- `suggested_followups` +- `trace_id` + +Frontend types should add the three new fields as **optional** (`generationMode?: ...`) so the frontend is backward-compatible during rolling deploys. Rendering logic should default to `"deterministic"` when the fields are absent. The current simulation details UX should stay as a read-only summary panel rather than changing interaction patterns. + +### 4. Ground and validate LLM output before returning it + +The LLM must not return freeform prose directly to the API response. Require it to produce schema-valid structured output through Pydantic AI, then validate: + +- citations reference only metadata paths present in the canonical snapshot +- citation source types remain within the existing allowed enum values +- required sections are present and non-empty where expected +- diagnostics are not interpreted beyond metadata unless the source data already contains that interpretation + +**Prompt template location**: The system prompt is a module-level constant (`SUMMARY_SYSTEM_PROMPT`) in the LLM generator module (`llm_generator.py`). This keeps it version-controlled, grep-able, and testable without adding file-loading complexity. + +Prompting should instruct the model to: + +- summarize only the provided metadata snapshot +- avoid scientific conclusions not present in source metadata +- use caveats when metadata is missing +- keep citations metadata-grounded only +- avoid retrieval, external knowledge, and hidden assumptions + +If validation fails, parsing fails, provider calls error, or configuration is incomplete: + +- log the failure reason (correlated with the existing `trace_id` on the same structured log line) +- generate the deterministic summary instead +- return `generation_mode="deterministic"` with `generation_provider=null` and `generation_model=null` +- include a standardized caveat string indicating fallback was used (e.g. "This summary was generated using the deterministic fallback because the LLM path was unavailable.") to keep frontend rendering consistent + +### 5. Preserve the current frontend interaction model + +Reuse the existing simulation details summary panel and button. Do not add chat UI, freeform prompts, compare integration, or multi-turn state. + +Frontend changes should be limited to: + +- accepting the new generation metadata fields +- showing whether the summary came from the LLM path or deterministic fallback +- optionally surfacing provider/model in a small secondary disclosure +- preserving existing auth, loading, error, and retry behavior + +The panel remains contextual to a single simulation and read-only. + +### 6. Extend logging and observability + +Keep the current per-request assistant logging and append new fields to the **same structured log line** that already carries `trace_id`: + +- `generation_mode` +- `generation_provider` +- `generation_model` +- `fallback_reason` when deterministic fallback is used +- `llm_latency_ms` separate from total `latency_ms` + +All new log fields share the existing `trace_id` key for correlation. + +If Pydantic AI or the underlying provider exposes token/usage metadata cheaply, log it opportunistically, but do not add it to the public API in this phase. + +## Test Plan + +### Backend + +Add or update assistant tests to cover: + +- LLM disabled returns deterministic output with `generation_mode="deterministic"`, `generation_provider=null`, `generation_model=null` +- config switch selects OpenAI path; mock returns valid structured output +- config switch selects Anthropic path; mock returns valid structured output +- provider misconfiguration falls back deterministically with standardized caveat +- provider exception falls back deterministically with standardized caveat +- schema-invalid LLM output falls back deterministically +- citations referencing paths not in `VALID_CITATION_PATHS` are rejected and fall back deterministically +- successful LLM output returns structured response with `generation_mode="llm"` +- auth and `404` behavior remain unchanged +- `async def` endpoint returns correctly for both LLM and deterministic paths +- snapshot builder excludes PII fields (`created_by`, `last_updated_by`, user emails/roles) +- snapshot truncation fires when artifact/link count exceeds `assistant_snapshot_max_chars`; caveat is added +- API key config fields are `SecretStr` and do not appear in `repr()` or log output +- response contract is stable: all three generation metadata fields are always present with correct nullability +- fallback log line includes `trace_id`, `fallback_reason`, `generation_mode` + +Mock provider/model calls in tests. Do not depend on live network access or real API keys. + +**Note on test client**: The existing sync `TestClient` fixture handles `async def` endpoints correctly (Starlette runs an internal event loop). However, once the endpoint migrates from `get_database_session` (sync) to `get_async_session` (async), the existing `db` fixture override no longer targets the right dependency. Existing assistant API tests must migrate to the `async_client` / `async_db` fixtures from `conftest.py` and override `get_async_session` instead. The repo already has these async test fixtures, so no new infrastructure is needed. + +### Frontend + +Update existing frontend typing/render coverage to verify: + +- summary response type accepts generation metadata as optional fields +- summary panel renders LLM vs deterministic badge correctly +- fallback disclosure (standardized caveat) does not break existing summary rendering +- response with missing generation metadata fields (rolling deploy compat) defaults gracefully +- existing login-required behavior remains unchanged + +### Commands + +- `make backend-test` +- `make frontend-lint` +- `pnpm --dir frontend run type-check` +- `make pre-commit-run` + +## Assumptions + +- Phase 2 uses **Pydantic AI** as the thin LLM orchestration layer. +- Support for OpenAI and Anthropic means **one provider selected by backend config**, not per-request choice and not automatic provider failover. +- Deterministic fallback remains mandatory and automatic. +- No retrieval, RAG, curated document indexing, compare workflows, persistence, chat UI, or frontend interaction redesign is included in this phase. +- Public API shape should remain stable aside from the minimal generation metadata additions above.