diff --git a/.claude/agents/devops-engineer-md.md b/.claude/agents/devops-engineer-md.md index 44104163b..fe1ec173f 100644 --- a/.claude/agents/devops-engineer-md.md +++ b/.claude/agents/devops-engineer-md.md @@ -15,7 +15,7 @@ You are a Senior DevOps Engineer specializing in the AutoBot AutoBot enterprise **🚫 REMOTE HOST DEVELOPMENT RULES:** - **NEVER edit configurations directly on remote hosts** (172.16.168.21-25) -- **ALL infrastructure changes MUST be made locally** in `/home/kali/Desktop/AutoBot/` +- **ALL infrastructure changes MUST be made locally** in `/opt/autobot` - **NEVER use SSH to modify configs** on production VMs - **Infrastructure as Code principle** - All configurations in version control - **Use Ansible playbooks** for remote deployments and configuration diff --git a/.claude/agents/documentation-engineer-md.md b/.claude/agents/documentation-engineer-md.md index 42f9ea09e..81a2800ed 100644 --- a/.claude/agents/documentation-engineer-md.md +++ b/.claude/agents/documentation-engineer-md.md @@ -217,7 +217,7 @@ def generate_api_reference(module_name: str): [Code example removed for token optimization] ```bash # Edit locally first -vim /home/kali/Desktop/AutoBot/autobot-vue/src/components/MyComponent.vue +vim autobot-vue/src/components/MyComponent.vue # Then sync to VM1 (172.16.168.21) ./scripts/utilities/sync-frontend.sh components/MyComponent.vue @@ -227,7 +227,7 @@ vim /home/kali/Desktop/AutoBot/autobot-vue/src/components/MyComponent.vue [Code example removed for token optimization] ```bash # Edit locally first -vim /home/kali/Desktop/AutoBot/backend/api/chat.py +vim backend/api/chat.py # Then sync to VM4 (172.16.168.24) ./scripts/utilities/sync-to-vm.sh ai-stack backend/api/ /home/autobot/backend/api/ @@ -237,7 +237,7 @@ ansible-playbook -i ansible/inventory ansible/playbooks/deploy-backend.yml [Code example removed for token optimization] ```bash # Edit locally first -vim /home/kali/Desktop/AutoBot/config/redis.conf +vim config/redis.conf # Then deploy via Ansible ansible-playbook -i ansible/inventory ansible/playbooks/update-redis-config.yml @@ -245,7 +245,7 @@ ansible-playbook -i ansible/inventory ansible/playbooks/update-redis-config.yml [Code example removed for token optimization] ```bash # Edit locally first -vim /home/kali/Desktop/AutoBot/docker-compose.yml +vim docker-compose.yml # Then deploy via Ansible ansible-playbook -i ansible/inventory ansible/playbooks/deploy-infrastructure.yml @@ -264,15 +264,15 @@ ssh autobot@172.16.168.24 "docker-compose up -d" [Code example removed for token optimization] ```bash # RIGHT - Local edit + sync -vim /home/kali/Desktop/AutoBot/app.py +vim app.py ./scripts/utilities/sync-to-vm.sh ai-stack app.py /home/autobot/app.py # RIGHT - Local config + Ansible -vim /home/kali/Desktop/AutoBot/config/redis.conf +vim config/redis.conf ansible-playbook ansible/playbooks/update-redis.yml # RIGHT - Local Docker + deployment -vim /home/kali/Desktop/AutoBot/docker-compose.yml +vim docker-compose.yml ansible-playbook ansible/playbooks/deploy-containers.yml ``` diff --git a/.claude/agents/frontend-designer.md b/.claude/agents/frontend-designer.md index f40bad9a8..a29267c18 100644 --- a/.claude/agents/frontend-designer.md +++ b/.claude/agents/frontend-designer.md @@ -123,7 +123,7 @@ If the user provides images or mockups: [Code example removed for token optimization] ```bash # Edit locally first -vim /home/kali/Desktop/AutoBot/autobot-vue/src/components/MyComponent.vue +vim autobot-vue/src/components/MyComponent.vue # Then sync to VM1 (172.16.168.21) ./scripts/utilities/sync-frontend.sh components/MyComponent.vue @@ -133,7 +133,7 @@ vim /home/kali/Desktop/AutoBot/autobot-vue/src/components/MyComponent.vue [Code example removed for token optimization] ```bash # Edit locally first -vim /home/kali/Desktop/AutoBot/backend/api/chat.py +vim backend/api/chat.py # Then sync to VM4 (172.16.168.24) ./scripts/utilities/sync-to-vm.sh ai-stack backend/api/ /home/autobot/backend/api/ @@ -143,7 +143,7 @@ ansible-playbook -i ansible/inventory ansible/playbooks/deploy-backend.yml [Code example removed for token optimization] ```bash # Edit locally first -vim /home/kali/Desktop/AutoBot/config/redis.conf +vim config/redis.conf # Then deploy via Ansible ansible-playbook -i ansible/inventory ansible/playbooks/update-redis-config.yml @@ -151,7 +151,7 @@ ansible-playbook -i ansible/inventory ansible/playbooks/update-redis-config.yml [Code example removed for token optimization] ```bash # Edit locally first -vim /home/kali/Desktop/AutoBot/docker-compose.yml +vim docker-compose.yml # Then deploy via Ansible ansible-playbook -i ansible/inventory ansible/playbooks/deploy-infrastructure.yml @@ -170,15 +170,15 @@ ssh autobot@172.16.168.24 "docker-compose up -d" [Code example removed for token optimization] ```bash # RIGHT - Local edit + sync -vim /home/kali/Desktop/AutoBot/app.py +vim app.py ./scripts/utilities/sync-to-vm.sh ai-stack app.py /home/autobot/app.py # RIGHT - Local config + Ansible -vim /home/kali/Desktop/AutoBot/config/redis.conf +vim config/redis.conf ansible-playbook ansible/playbooks/update-redis.yml # RIGHT - Local Docker + deployment -vim /home/kali/Desktop/AutoBot/docker-compose.yml +vim docker-compose.yml ansible-playbook ansible/playbooks/deploy-containers.yml ``` diff --git a/.claude/agents/frontend-engineer-agent.md b/.claude/agents/frontend-engineer-agent.md index ff1f997f4..0d25afb10 100644 --- a/.claude/agents/frontend-engineer-agent.md +++ b/.claude/agents/frontend-engineer-agent.md @@ -23,7 +23,7 @@ You are a Senior Frontend Engineer specializing in the AutoBot Vue 3 application **🚫 REMOTE HOST DEVELOPMENT RULES:** - **NEVER edit code directly on remote hosts** (172.16.168.21-25) -- **ALL edits MUST be made locally** in `/home/kali/Desktop/AutoBot/` +- **ALL edits MUST be made locally** in `/opt/autobot` - **NEVER use SSH to modify files** on remote VMs - **Configuration changes MUST be local** then synced via scripts - **Use `./sync-frontend.sh`** for production builds diff --git a/.claude/docs/MANDATORY_LOCAL_EDIT_POLICY.md b/.claude/docs/MANDATORY_LOCAL_EDIT_POLICY.md index 445d4e98f..ac8673647 100644 --- a/.claude/docs/MANDATORY_LOCAL_EDIT_POLICY.md +++ b/.claude/docs/MANDATORY_LOCAL_EDIT_POLICY.md @@ -14,7 +14,7 @@ ``` LOCAL EDIT → TEST → SYNC → DEPLOY → VERIFY ↓ ↓ ↓ ↓ ↓ - /home/kali local rsync restart health + $HOME local rsync restart health /Desktop/ tests or service check AutoBot/ ansible ``` @@ -29,7 +29,7 @@ LOCAL EDIT → TEST → SYNC → DEPLOY → VERIFY | VM4 | 172.16.168.24 | AI Stack | | VM5 | 172.16.168.25 | Browser | -**Local Base:** `/home/kali/Desktop/AutoBot/` — ALL edits here. NO EXCEPTIONS. +**Local Base:** `` — ALL edits here. NO EXCEPTIONS. ## 🔄 Approved Sync Methods @@ -37,7 +37,7 @@ LOCAL EDIT → TEST → SYNC → DEPLOY → VERIFY ```bash rsync -avz --delete \ -e "ssh -i ~/.ssh/autobot_key" \ - /home/kali/Desktop/AutoBot/backend/ \ + backend/ \ autobot@172.16.168.21:/opt/autobot/backend/ ``` @@ -62,7 +62,7 @@ ssh autobot@172.16.168.23 "redis-cli CONFIG SET maxmemory 2gb" ### ✅ CORRECT ```bash # Edit locally -vim /home/kali/Desktop/AutoBot/config.yaml +vim config.yaml # Sync to remote rsync -avz config.yaml autobot@172.16.168.21:/opt/autobot/ @@ -73,7 +73,7 @@ ansible-playbook playbooks/update-redis-config.yml ## 📋 Pre-Remote Checklist -- [ ] Edit made in `/home/kali/Desktop/AutoBot/`? +- [ ] Edit made in `/opt/autobot`? - [ ] Local change tested? - [ ] Sync script/playbook ready? - [ ] SSH keys configured? diff --git a/.claude/skills/commit/SKILL.md b/.claude/skills/commit/SKILL.md index 4b54afe33..c8954cffb 100644 --- a/.claude/skills/commit/SKILL.md +++ b/.claude/skills/commit/SKILL.md @@ -23,7 +23,7 @@ If on `main` or `Dev_new_gui` directly: **STOP** and ask user. For every `.py` file that will be committed: ```bash -BLACK=/home/kali/.cache/pre-commit/repoefsi1klb/py_env-python3/bin/black +BLACK=$HOME/.cache/pre-commit/repoefsi1klb/py_env-python3/bin/black for FILE in $(git diff --staged --name-only | grep '\.py$'); do $BLACK --line-length=88 "$FILE" diff --git a/.claude/skills/parallel/SKILL.md b/.claude/skills/parallel/SKILL.md index 319ce55ff..30d490cd4 100644 --- a/.claude/skills/parallel/SKILL.md +++ b/.claude/skills/parallel/SKILL.md @@ -192,16 +192,16 @@ Task( subagent_type="senior-backend-engineer", description="Implement issue #123", prompt=""" - WORKTREE ABSOLUTE PATH: /home/kali/Desktop/AutoBot/.worktrees/issue-123 + WORKTREE ABSOLUTE PATH: .worktrees/issue-123 MANDATORY PATH DISCIPLINE — verify before ANY file operation: 1. Run `pwd` — must match the absolute path above 2. If it doesn't, cd to the absolute path first 3. Use absolute paths for ALL Read/Edit/Write/Bash tool calls - 4. NEVER write files outside /home/kali/Desktop/AutoBot/.worktrees/issue-123 + 4. NEVER write files outside .worktrees/issue-123 Implement GitHub issue #123: - 1. cd /home/kali/Desktop/AutoBot/.worktrees/issue-123 && pwd + 1. cd .worktrees/issue-123 && pwd 2. git checkout -b fix/issue-123 3. Implement the full issue 4. Run tests and verify diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..8197c691f --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,28 @@ +## Summary + + + +Closes # + +## Changes + + + +- + +## Test plan + + + +- [ ] + +## Changelog fragment + + + +- [ ] Added `changelog/unreleased/{issue}-{slug}.md` — or N/A (internal change) diff --git a/.github/workflows/autoresearch-image.yml b/.github/workflows/autoresearch-image.yml new file mode 100644 index 000000000..7b77e6966 --- /dev/null +++ b/.github/workflows/autoresearch-image.yml @@ -0,0 +1,38 @@ +name: Build AutoResearch Docker Image + +on: + push: + paths: + - 'autobot-backend/services/autoresearch/Dockerfile' + - 'autobot-backend/services/autoresearch/run_experiment.py' + - 'autobot-backend/services/autoresearch/requirements*.txt' + branches: + - Dev_new_gui + - main + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - uses: actions/checkout@v4 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: autobot-backend/services/autoresearch + file: autobot-backend/services/autoresearch/Dockerfile + push: true + tags: | + ghcr.io/mrveiss/autobot-autoresearch:${{ github.sha }} + ${{ github.ref == 'refs/heads/main' && 'ghcr.io/mrveiss/autobot-autoresearch:latest' || '' }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index db8332a08..9fe00ce0c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,6 +2,11 @@ # Automatically creates releases on merge to main using git-cliff # Analyzes conventional commits to determine version bump and generate changelog # +# Changelog strategy (A+B): +# A — git-cliff writes a per-version file: changelog/{version}.md +# B — changelog/unreleased/*.md fragments are compiled into that file first +# (human-written highlights precede the git-cliff commit log) +# # Issue: #1296 name: Release @@ -51,9 +56,9 @@ jobs: echo "version=$NEXT_VERSION" >> $GITHUB_OUTPUT fi - - name: Generate release notes + - name: Generate release notes (git-cliff) if: steps.check.outputs.release_needed == 'true' - id: changelog + id: cliff_notes uses: orhun/git-cliff-action@v4 with: config: cliff.toml @@ -62,7 +67,17 @@ jobs: OUTPUT: RELEASE_NOTES.md GITHUB_REPO: ${{ github.repository }} - - name: Update CHANGELOG.md + - name: Compile changelog fragments + write per-version file (A+B) + if: steps.check.outputs.release_needed == 'true' + run: | + VERSION="${{ steps.check.outputs.version }}" + python3 scripts/compile_changelog.py \ + --version "${VERSION}" \ + --cliff-notes RELEASE_NOTES.md + # Use the compiled per-version file as the GitHub Release body + cp "changelog/${VERSION}.md" RELEASE_NOTES.md + + - name: Update CHANGELOG.md (full history index) if: steps.check.outputs.release_needed == 'true' uses: orhun/git-cliff-action@v4 with: @@ -72,15 +87,15 @@ jobs: OUTPUT: CHANGELOG.md GITHUB_REPO: ${{ github.repository }} - - name: Commit changelog and tag + - name: Commit changelog files and tag if: steps.check.outputs.release_needed == 'true' run: | VERSION="${{ steps.check.outputs.version }}" git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - git add CHANGELOG.md + git add CHANGELOG.md "changelog/${VERSION}.md" changelog/ git diff --staged --quiet && echo "No changelog changes" || \ - git commit -m "chore(release): update changelog for ${VERSION}" + git commit -m "chore(release): changelog and fragments for ${VERSION}" git tag -a "${VERSION}" -m "Release ${VERSION}" git push origin main --follow-tags diff --git a/.gitignore b/.gitignore index 4824508e8..fe4f8a6c0 100644 --- a/.gitignore +++ b/.gitignore @@ -444,9 +444,11 @@ certs/.redis-admin-credentials slm-server/.env dev_creds_backup docs/.obsidian/app.json +docs/.obsidian/workspace.json docs/.obsidian/appearance.json docs/.obsidian/core-plugins.json .obsidian/app.json +.obsidian/workspace.json .obsidian/appearance.json .obsidian/core-plugins.json diff --git a/.mcp/package-lock.json b/.mcp/package-lock.json index e142a27d6..6b0a3f30c 100644 --- a/.mcp/package-lock.json +++ b/.mcp/package-lock.json @@ -797,9 +797,9 @@ } }, "node_modules/path-to-regexp": { - "version": "8.3.0", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.3.0.tgz", - "integrity": "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==", + "version": "8.4.2", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.4.2.tgz", + "integrity": "sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA==", "license": "MIT", "funding": { "type": "opencollective", diff --git a/CHANGELOG.md b/CHANGELOG.md index b0e160b6f..3d438693c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -170,7 +170,7 @@ All notable changes to this project will be documented in this file. - *(skills)* Add scoring weight constants, strengthen tag test, add stub tags -- *(analytics)* Replace hardcoded /home/kali paths, fix PermissionError 500 (#1178) +- *(analytics)* Replace hardcoded $HOME paths, fix PermissionError 500 (#1178) - *(deploy)* Create backend symlink via Ansible, remove dev path (#1168) @@ -562,7 +562,7 @@ All notable changes to this project will be documented in this file. - *(ansible)* Correct PYTHONPATH and EnvironmentFile paths in service templates (#941) -- Replace hardcoded /home/kali/Desktop/AutoBot in 20 runtime files (#832) (#836) ([#836](https://github.com/mrveiss/AutoBot-AI/pull/836)) +- Replace hardcoded $AUTOBOT_PROJECT_ROOT in 20 runtime files (#832) (#836) ([#836](https://github.com/mrveiss/AutoBot-AI/pull/836)) - *(infra)* Remove false frontend/slm-frontend conflict - nginx virtual hosts (#926) @@ -874,7 +874,7 @@ All notable changes to this project will be documented in this file. - *(slm)* Fix 10 broken SLM frontend features (#834) -- Replace hardcoded /home/kali/Desktop/AutoBot with env-var lookup (#832) +- Replace hardcoded $AUTOBOT_PROJECT_ROOT with env-var lookup (#832) - *(slm)* Git_tracker DB fallback for rsync deployments (#829) diff --git a/autobot-backend/agents/base_agent.py b/autobot-backend/agents/base_agent.py index 6d9a9f0af..0c490551c 100644 --- a/autobot-backend/agents/base_agent.py +++ b/autobot-backend/agents/base_agent.py @@ -370,7 +370,10 @@ async def _handle_communication_request( return StandardMessage( header=MessageHeader(message_type=MessageType.ERROR), payload=MessagePayload( - content={"error": "Communication request failed", "error_type": type(e).__name__} + content={ + "error": "Communication request failed", + "error_type": type(e).__name__, + } ), ) diff --git a/autobot-backend/agents/enhanced_system_commands_agent.py b/autobot-backend/agents/enhanced_system_commands_agent.py index b21d8f29b..37ddd8905 100644 --- a/autobot-backend/agents/enhanced_system_commands_agent.py +++ b/autobot-backend/agents/enhanced_system_commands_agent.py @@ -172,10 +172,11 @@ def _build_command_messages( self, request: str, context: Optional[Dict[str, Any]] ) -> List[Dict[str, str]]: """Build messages for command generation (Issue #398: extracted).""" - messages = [{"role": "system", "content": self._get_system_commands_prompt()}] + system_prompt = self._get_system_commands_prompt() if context: context_str = self._build_context_string(context) - messages.append({"role": "system", "content": f"Context: {context_str}"}) + system_prompt = f"{system_prompt}\n\nContext: {context_str}" + messages = [{"role": "system", "content": system_prompt}] messages.append({"role": "user", "content": request}) return messages diff --git a/autobot-backend/agents/knowledge_retrieval_agent.py b/autobot-backend/agents/knowledge_retrieval_agent.py index b60e5dce6..9402644d5 100644 --- a/autobot-backend/agents/knowledge_retrieval_agent.py +++ b/autobot-backend/agents/knowledge_retrieval_agent.py @@ -236,7 +236,11 @@ async def find_similar_documents( except Exception as e: logger.error("Similar document search error: %s", e) - return {"status": "error", "documents": [], "error": "Document search failed"} + return { + "status": "error", + "documents": [], + "error": "Document search failed", + } async def quick_fact_lookup( self, fact_query: str, max_docs: int = 3 diff --git a/autobot-backend/agents/librarian_assistant.py b/autobot-backend/agents/librarian_assistant.py index 30186aabb..cdd9ef78f 100644 --- a/autobot-backend/agents/librarian_assistant.py +++ b/autobot-backend/agents/librarian_assistant.py @@ -353,7 +353,9 @@ async def assess_content_quality( return self._parse_assessment_response(response, content_data) except Exception as e: logger.error("Error assessing content quality: %s", e) - return self._build_fallback_assessment(content_data, "Content quality assessment failed") + return self._build_fallback_assessment( + content_data, "Content quality assessment failed" + ) async def store_in_knowledge_base( self, content_data: Dict[str, Any], assessment: Dict[str, Any] diff --git a/autobot-backend/agents/npu_code_search_agent.py b/autobot-backend/agents/npu_code_search_agent.py index 8bc5d496c..34d1c30f6 100644 --- a/autobot-backend/agents/npu_code_search_agent.py +++ b/autobot-backend/agents/npu_code_search_agent.py @@ -411,7 +411,11 @@ async def index_codebase( except Exception as e: self.logger.error("Codebase indexing failed: %s", e) - return {"status": "error", "error": "Codebase indexing failed", "indexed_files": indexed_files} + return { + "status": "error", + "error": "Codebase indexing failed", + "indexed_files": indexed_files, + } def _build_file_index_data( self, relative_path: str, language: str, content: str, elements: Dict[str, List] diff --git a/autobot-backend/agents/system_knowledge_manager.py b/autobot-backend/agents/system_knowledge_manager.py index 53dc1955f..53b24b3a8 100644 --- a/autobot-backend/agents/system_knowledge_manager.py +++ b/autobot-backend/agents/system_knowledge_manager.py @@ -874,4 +874,8 @@ def get_knowledge_categories(self) -> Dict[str, Any]: except Exception as e: logger.error("Failed to get knowledge categories: %s", e) - return {"success": False, "error": "Failed to retrieve knowledge categories", "categories": {}} + return { + "success": False, + "error": "Failed to retrieve knowledge categories", + "categories": {}, + } diff --git a/autobot-backend/api/__init__.py b/autobot-backend/api/__init__.py index 873d89d2d..69dc18a3f 100644 --- a/autobot-backend/api/__init__.py +++ b/autobot-backend/api/__init__.py @@ -9,7 +9,8 @@ "knowledge", "llm", "sandbox", - "base_terminal", + # Issue #567: base_terminal archived — endpoints migrated to terminal.py + # Issue #3332: base_terminal removed from public API surface "websockets", "enhanced_search", # New NPU-accelerated search API "analytics", # Enhanced backend analytics API diff --git a/autobot-backend/api/agent_config.py b/autobot-backend/api/agent_config.py index 4fc6c3bbd..298aee57b 100644 --- a/autobot-backend/api/agent_config.py +++ b/autobot-backend/api/agent_config.py @@ -109,6 +109,20 @@ async def _get_available_models() -> list: return [] +async def _get_available_providers() -> list: + """Return names of LLM providers that are currently reachable.""" + try: + from services.provider_health import ProviderHealthManager + + results = await ProviderHealthManager.check_all_providers( + timeout=3.0, use_cache=True + ) + return [name for name, result in results.items() if result.available] + except Exception as e: + logger.warning("Could not check provider availability: %s", e) + return [] + + class AgentConfig(BaseModel): """Agent configuration model""" @@ -835,7 +849,7 @@ async def get_agent_config( "config_source": config_source, "configuration_options": { "available_models": await _get_available_models(), - "available_providers": ["ollama", "openai", "anthropic"], + "available_providers": await _get_available_providers(), "configurable_settings": ["model", "provider", "enabled", "priority"], }, "health_check": { diff --git a/autobot-backend/api/analytics_architecture.py b/autobot-backend/api/analytics_architecture.py index 58066bfa9..d1593b91d 100644 --- a/autobot-backend/api/analytics_architecture.py +++ b/autobot-backend/api/analytics_architecture.py @@ -1411,10 +1411,19 @@ async def health_check( """ Check the health of the architecture analyzer. + Deprecated: Use /api/system/health for system-wide health checks. + This per-module endpoint will be removed in a future release. (#3333) + Issue #744: Requires admin authentication. """ + logger.warning( + "Deprecated health endpoint called: /api/architecture/health — " + "use /api/system/health instead (#3333)" + ) return { "status": "healthy", "available_patterns": len(PatternType), "templates_loaded": len(PATTERN_TEMPLATES), + "deprecated": True, + "use_instead": "/api/system/health", } diff --git a/autobot-backend/api/analytics_cfg.py b/autobot-backend/api/analytics_cfg.py index feb4a3791..cc6264a29 100644 --- a/autobot-backend/api/analytics_cfg.py +++ b/autobot-backend/api/analytics_cfg.py @@ -1452,13 +1452,22 @@ async def cfg_health( """ Health check for CFG analyzer. + Deprecated: Use /api/system/health for system-wide health checks. + This per-module endpoint will be removed in a future release. (#3333) + Issue #744: Requires admin authentication. """ + logger.warning( + "Deprecated health endpoint called: /api/cfg-analytics/health — " + "use /api/system/health instead (#3333)" + ) return JSONResponse( status_code=200, content={ "status": "healthy", "service": "cfg_analyzer", + "deprecated": True, + "use_instead": "/api/system/health", "capabilities": [ "cfg_construction", "unreachable_code_detection", diff --git a/autobot-backend/api/analytics_code_generation.py b/autobot-backend/api/analytics_code_generation.py index b950bc631..e74e286fa 100644 --- a/autobot-backend/api/analytics_code_generation.py +++ b/autobot-backend/api/analytics_code_generation.py @@ -27,7 +27,7 @@ from enum import Enum from typing import Any, Dict, List, Optional, Tuple -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Query from pydantic import BaseModel, Field from auth_middleware import check_admin_permission @@ -47,6 +47,21 @@ router = APIRouter(tags=["code-generation", "analytics"]) logger = logging.getLogger(__name__) + +async def _resolve_source_or_404(source_id: Optional[str]) -> None: + """Raise HTTP 404 if source_id is provided but not found (Issue #3436). + + Uses a lazy import of resolve_source_root to avoid loading the full + codebase_analytics package at module import time. + """ + if source_id is None: + return + from api.codebase_analytics.endpoints.shared import resolve_source_root + + source_root = await resolve_source_root(source_id) + if source_root is None: + raise HTTPException(status_code=404, detail=f"Source '{source_id}' not found") + # Issue #380: Pre-compiled regex patterns for code analysis and extraction _FUNC_DEF_RE = re.compile(r"def\s+(\w+)") # Extract function name _PYTHON_CODE_BLOCK_RE = re.compile(r"```python\n(.*?)```", re.DOTALL) @@ -948,13 +963,22 @@ def get_code_generation_engine() -> CodeGenerationEngine: @router.get("/health") async def get_health(admin_check: bool = Depends(check_admin_permission)): - """Get code generation service health status + """Get code generation service health status. + + Deprecated: Use /api/system/health for system-wide health checks. + This per-module endpoint will be removed in a future release. (#3333) Issue #744: Requires admin authentication. """ + logger.warning( + "Deprecated health endpoint called: /api/code-generation/health — " + "use /api/system/health instead (#3333)" + ) return { "status": "healthy", "service": "code_generation", + "deprecated": True, + "use_instead": "/api/system/health", "features": [ "code_generation", "refactoring", @@ -1072,14 +1096,19 @@ async def rollback_code( @router.get("/stats") -async def get_stats(admin_check: bool = Depends(check_admin_permission)): +async def get_stats( + admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), +): """ Get code generation statistics. Returns usage statistics for generation and refactoring. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. """ + await _resolve_source_or_404(source_id) engine = get_code_generation_engine() return await engine.get_stats() diff --git a/autobot-backend/api/analytics_code_generation_test.py b/autobot-backend/api/analytics_code_generation_test.py new file mode 100644 index 000000000..83e60246a --- /dev/null +++ b/autobot-backend/api/analytics_code_generation_test.py @@ -0,0 +1,124 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Unit tests for analytics_code_generation.py source_id scoping (Issue #3436) + +Tests the following functionality: +- _extract_language_stats helper function +- _get_refactoring_description helper function +- _resolve_source_or_404 guard logic (mocked via sys.modules) +""" + +import sys +import types +from pathlib import Path + +import pytest +from unittest.mock import patch + + +def _make_shared_mock(return_path=None): + """Build a fake api.codebase_analytics.endpoints.shared module.""" + async def fake_resolve(source_id): + if source_id is None: + return None + return return_path + + mod = types.ModuleType("api.codebase_analytics.endpoints.shared") + mod.resolve_source_root = fake_resolve + return mod + + +class TestExtractLanguageStats: + """Tests for _extract_language_stats utility function.""" + + def test_empty_dict_returns_empty(self): + """Empty stats should return empty dict.""" + from api.analytics_code_generation import _extract_language_stats + + result = _extract_language_stats({}) + assert result == {} + + def test_excludes_reserved_keys(self): + """Keys in EXCLUDED_LANGUAGE_KEYS should be excluded.""" + from api.analytics_code_generation import _extract_language_stats + + stats = { + "total": 100, + "success": 50, + "tokens": 999, + } + result = _extract_language_stats(stats) + assert result == {} + + def test_extracts_language_with_colon_format(self): + """Keys in 'prefix:lang:suffix' format should produce language entries.""" + from api.analytics_code_generation import _extract_language_stats + + stats = { + "gen:python:count": 10, + "gen:typescript:count": 5, + } + result = _extract_language_stats(stats) + assert "python" in result + assert "typescript" in result + + +class TestGetRefactoringDescription: + """Tests for _get_refactoring_description helper.""" + + def test_known_type_returns_non_empty_description(self): + """Each defined RefactoringType should have a description.""" + from api.analytics_code_generation import ( + _get_refactoring_description, + RefactoringType, + ) + + for rt in RefactoringType: + desc = _get_refactoring_description(rt) + assert isinstance(desc, str) + assert len(desc) > 0 + + def test_general_type_returns_fallback(self): + """GENERAL type should return a reasonable description.""" + from api.analytics_code_generation import ( + _get_refactoring_description, + RefactoringType, + ) + + desc = _get_refactoring_description(RefactoringType.GENERAL) + assert "general" in desc.lower() or "quality" in desc.lower() + + +class TestSourceIdGuardLogic: + """Tests for _resolve_source_or_404 guard (mocked via sys.modules injection).""" + + @pytest.mark.asyncio + async def test_none_source_id_does_not_raise(self): + """_resolve_source_or_404 with None should return without raising.""" + from api.analytics_code_generation import _resolve_source_or_404 + + await _resolve_source_or_404(None) + + @pytest.mark.asyncio + async def test_unknown_source_id_raises_404(self): + """_resolve_source_or_404 with unknown source_id should raise HTTP 404.""" + from fastapi import HTTPException + + fake_mod = _make_shared_mock(return_path=None) + with patch.dict(sys.modules, {"api.codebase_analytics.endpoints.shared": fake_mod}): + from api.analytics_code_generation import _resolve_source_or_404 + + with pytest.raises(HTTPException) as exc_info: + await _resolve_source_or_404("unknown-gen-id") + assert exc_info.value.status_code == 404 + + @pytest.mark.asyncio + async def test_valid_source_id_does_not_raise(self): + """_resolve_source_or_404 with valid source_id should return without raising.""" + fake_mod = _make_shared_mock(return_path=Path("/repos/gen-project")) + with patch.dict(sys.modules, {"api.codebase_analytics.endpoints.shared": fake_mod}): + from api.analytics_code_generation import _resolve_source_or_404 + + await _resolve_source_or_404("gen-project-id") diff --git a/autobot-backend/api/analytics_code_review.py b/autobot-backend/api/analytics_code_review.py index e997b1e44..eb592594c 100644 --- a/autobot-backend/api/analytics_code_review.py +++ b/autobot-backend/api/analytics_code_review.py @@ -33,6 +33,22 @@ router = APIRouter(tags=["code-review", "analytics"]) # Prefix set in router_registry + +async def _resolve_source_or_404(source_id: Optional[str]) -> None: + """Raise HTTP 404 if source_id is provided but not found (Issue #3436). + + Uses a lazy import of resolve_source_root to avoid loading the full + codebase_analytics package at module import time. + """ + if source_id is None: + return + from api.codebase_analytics.endpoints.shared import resolve_source_root + + source_root = await resolve_source_root(source_id) + if source_root is None: + raise HTTPException(status_code=404, detail=f"Source '{source_id}' not found") + + # Performance optimization: O(1) lookup for reviewable file extensions (Issue #326) REVIEWABLE_EXTENSIONS = {".py", ".vue", ".ts", ".js"} @@ -456,14 +472,17 @@ async def analyze_diff( commit_range: Optional[str] = Query( None, description="Git commit range (e.g., HEAD~1..HEAD)" ), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ) -> dict[str, Any]: """ Analyze git diff and generate review comments. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope analysis to a project. Returns review findings with severity and suggestions. """ + await _resolve_source_or_404(source_id) diff_content = await get_git_diff(commit_range) if not diff_content: @@ -581,14 +600,17 @@ async def get_review_history( admin_check: bool = Depends(check_admin_permission), limit: int = Query(20, ge=1, le=100), since: Optional[str] = Query(None, description="ISO date string"), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ) -> dict[str, Any]: """ Get review history. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. Returns past reviews for trend analysis. """ + await _resolve_source_or_404(source_id) # Issue #543: Return no-data response instead of demo data return _no_data_response( "No review history available. Reviews will be stored here once you run code reviews." @@ -599,14 +621,17 @@ async def get_review_history( async def get_review_metrics( admin_check: bool = Depends(check_admin_permission), period: str = Query("30d", pattern="^(7d|30d|90d)$"), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ) -> dict[str, Any]: """ Get review metrics over time. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. Returns aggregated statistics for trend analysis. """ + await _resolve_source_or_404(source_id) # Issue #543: Return no-data response instead of demo data return _no_data_response( "No review metrics available. Metrics will accumulate as you run code reviews." @@ -662,14 +687,17 @@ async def submit_feedback( @router.get("/summary") async def get_review_summary( admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ) -> dict[str, Any]: """ Get overall review system summary. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. Returns dashboard-level metrics. """ + await _resolve_source_or_404(source_id) # Issue #543: Return no-data response instead of demo data return _no_data_response( "No review summary available. Summary statistics will be generated after running code reviews." diff --git a/autobot-backend/api/analytics_code_review_test.py b/autobot-backend/api/analytics_code_review_test.py new file mode 100644 index 000000000..793165f9b --- /dev/null +++ b/autobot-backend/api/analytics_code_review_test.py @@ -0,0 +1,180 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Unit tests for analytics_code_review.py source_id scoping (Issue #3436) + +Tests the following functionality: +- parse_diff helper function +- calculate_review_score helper function +- _no_data_response helper function +- _resolve_source_or_404 guard logic (mocked via sys.modules) +""" + +import sys +import types +import pytest +from pathlib import Path +from unittest.mock import patch + + +def _make_shared_mock(return_path=None): + """Build a fake api.codebase_analytics.endpoints.shared module.""" + async def fake_resolve(source_id): + if source_id is None: + return None + return return_path + + mod = types.ModuleType("api.codebase_analytics.endpoints.shared") + mod.resolve_source_root = fake_resolve + return mod + + +class TestParseDiff: + """Tests for parse_diff utility function.""" + + def test_empty_diff_returns_empty_list(self): + """Empty diff string should return empty list.""" + from api.analytics_code_review import parse_diff + + result = parse_diff("") + assert result == [] + + def test_single_file_diff_parsed(self): + """A single-file diff should produce one entry.""" + from api.analytics_code_review import parse_diff + + diff = ( + "diff --git a/foo.py b/foo.py\n" + "@@ -1,2 +1,3 @@\n" + " existing_line\n" + "+new_line\n" + "-removed_line\n" + ) + result = parse_diff(diff) + assert len(result) == 1 + assert result[0]["path"] == "foo.py" + assert result[0]["additions"] == 1 + assert result[0]["deletions"] == 1 + + def test_multiple_files_in_diff(self): + """Multiple file headers in a diff should produce multiple entries.""" + from api.analytics_code_review import parse_diff + + diff = ( + "diff --git a/foo.py b/foo.py\n" + "@@ -1 +1 @@\n" + "+line\n" + "diff --git a/bar.py b/bar.py\n" + "@@ -1 +1 @@\n" + "+another_line\n" + ) + result = parse_diff(diff) + assert len(result) == 2 + + +class TestCalculateReviewScore: + """Tests for calculate_review_score utility function.""" + + def test_no_comments_returns_100(self): + """No review comments should give a perfect score.""" + from api.analytics_code_review import calculate_review_score + + assert calculate_review_score([]) == 100.0 + + def test_critical_comment_reduces_score(self): + """A critical comment should reduce the score significantly.""" + from api.analytics_code_review import ( + calculate_review_score, + ReviewComment, + ReviewSeverity, + ReviewCategory, + ) + + comment = ReviewComment( + id="SEC001-1", + file_path="test.py", + line_number=1, + severity=ReviewSeverity.CRITICAL, + category=ReviewCategory.SECURITY, + message="Critical issue", + ) + score = calculate_review_score([comment]) + assert score < 100.0 + assert score >= 0.0 + + def test_score_clamped_to_zero(self): + """Score should not go below 0.""" + from api.analytics_code_review import ( + calculate_review_score, + ReviewComment, + ReviewSeverity, + ReviewCategory, + ) + + comments = [ + ReviewComment( + id=f"SEC001-{i}", + file_path="test.py", + line_number=i, + severity=ReviewSeverity.CRITICAL, + category=ReviewCategory.SECURITY, + message="Critical issue", + ) + for i in range(20) + ] + score = calculate_review_score(comments) + assert score == 0.0 + + +class TestNoDataResponse: + """Tests for _no_data_response helper.""" + + def test_default_message(self): + """Should include no_data status and message key.""" + from api.analytics_code_review import _no_data_response + + result = _no_data_response() + assert result["status"] == "no_data" + assert "message" in result + assert "comments" in result + + def test_custom_message(self): + """Should accept a custom message.""" + from api.analytics_code_review import _no_data_response + + result = _no_data_response("Custom message") + assert result["message"] == "Custom message" + + +class TestSourceIdGuardLogic: + """Tests for _resolve_source_or_404 guard (mocked via sys.modules injection).""" + + @pytest.mark.asyncio + async def test_none_source_id_does_not_raise(self): + """_resolve_source_or_404 with None should return without raising.""" + from api.analytics_code_review import _resolve_source_or_404 + + await _resolve_source_or_404(None) + + @pytest.mark.asyncio + async def test_unknown_source_id_raises_404(self): + """_resolve_source_or_404 with unknown source_id should raise HTTP 404.""" + from fastapi import HTTPException + + fake_mod = _make_shared_mock(return_path=None) + with patch.dict(sys.modules, {"api.codebase_analytics.endpoints.shared": fake_mod}): + from api.analytics_code_review import _resolve_source_or_404 + + with pytest.raises(HTTPException) as exc_info: + await _resolve_source_or_404("nonexistent-id") + assert exc_info.value.status_code == 404 + + @pytest.mark.asyncio + async def test_valid_source_id_does_not_raise(self): + """_resolve_source_or_404 with valid source_id should return without raising.""" + fake_mod = _make_shared_mock(return_path=Path("/repos/review-project")) + with patch.dict(sys.modules, {"api.codebase_analytics.endpoints.shared": fake_mod}): + from api.analytics_code_review import _resolve_source_or_404 + + await _resolve_source_or_404("valid-id") diff --git a/autobot-backend/api/analytics_controller.py b/autobot-backend/api/analytics_controller.py index b21908a2b..14bee32d6 100644 --- a/autobot-backend/api/analytics_controller.py +++ b/autobot-backend/api/analytics_controller.py @@ -64,7 +64,9 @@ re.compile(r"^\d+$"), # Alphanumeric slugs that look generated: starts with alpha/digit, contains # both letters and digits, length ≥ 8. Avoids collapsing short word slugs. - re.compile(r"^(?=[a-z0-9_-]{0,200}[a-z])(?=[a-z0-9_-]{0,200}\d)[a-z0-9_-]{8,}$", re.I), + re.compile( + r"^(?=[a-z0-9_-]{0,200}[a-z])(?=[a-z0-9_-]{0,200}\d)[a-z0-9_-]{8,}$", re.I + ), ] diff --git a/autobot-backend/api/analytics_dfa.py b/autobot-backend/api/analytics_dfa.py index 03affa228..18e526d68 100644 --- a/autobot-backend/api/analytics_dfa.py +++ b/autobot-backend/api/analytics_dfa.py @@ -1338,11 +1338,20 @@ async def health_check(admin_check: bool = Depends(check_admin_permission)): """ Health check endpoint. + Deprecated: Use /api/system/health for system-wide health checks. + This per-module endpoint will be removed in a future release. (#3333) + Issue #744: Requires admin authentication. """ + logger.warning( + "Deprecated health endpoint called: /api/dfa-analytics/health — " + "use /api/system/health instead (#3333)" + ) return { "status": "healthy", "service": "data-flow-analysis", + "deprecated": True, + "use_instead": "/api/system/health", "features": [ "variable_tracking", "def_use_chains", diff --git a/autobot-backend/api/analytics_evolution.py b/autobot-backend/api/analytics_evolution.py index 8bc84aa6c..5615c00ad 100644 --- a/autobot-backend/api/analytics_evolution.py +++ b/autobot-backend/api/analytics_evolution.py @@ -33,6 +33,22 @@ tags=["code-evolution", "analytics"] ) # Prefix set in router_registry +async def _resolve_source_or_404(source_id: Optional[str]) -> None: + """Raise HTTP 404 if source_id is provided but not found (Issue #3436). + + Uses a lazy import of resolve_source_root to avoid loading the full + codebase_analytics package at module import time. + """ + if source_id is None: + return + from api.codebase_analytics.endpoints.shared import resolve_source_root + from fastapi import HTTPException + + source_root = await resolve_source_root(source_id) + if source_root is None: + raise HTTPException(status_code=404, detail=f"Source '{source_id}' not found") + + # Performance optimization: O(1) lookup for aggregation granularities (Issue #326) AGGREGATION_GRANULARITIES = {"weekly", "monthly"} @@ -337,10 +353,14 @@ async def get_evolution_timeline( description="Comma-separated metrics", ), admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ): """Get code evolution timeline (Issue #398: refactored). - Issue #744: Requires admin authentication.""" + Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. + """ + await _resolve_source_or_404(source_id) redis_client = get_evolution_redis() requested_metrics = metrics.split(",") @@ -396,6 +416,7 @@ async def get_pattern_evolution( start_date: Optional[str] = Query(None, description="Start date (ISO format)"), end_date: Optional[str] = Query(None, description="End date (ISO format)"), admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ): """ Get pattern evolution data (Issue #315: depth 6→3). @@ -403,7 +424,9 @@ async def get_pattern_evolution( Tracks adoption/removal of patterns like god_class, long_method, etc. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. """ + await _resolve_source_or_404(source_id) redis_client = get_evolution_redis() if not redis_client: @@ -551,10 +574,14 @@ def _build_trends_success_response( async def get_quality_trends( days: int = Query(30, description="Number of days to analyze", ge=1, le=365), admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ): """Get quality trend analysis (Issue #398: refactored). - Issue #744: Requires admin authentication.""" + Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. + """ + await _resolve_source_or_404(source_id) redis_client = get_evolution_redis() if not redis_client: @@ -825,6 +852,7 @@ async def export_evolution_data( @router.get("/summary") async def get_evolution_summary( admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ): """ Get a summary of code evolution including key statistics. @@ -832,7 +860,9 @@ async def get_evolution_summary( Provides overview for dashboard display. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. """ + await _resolve_source_or_404(source_id) redis_client = get_evolution_redis() summary = { diff --git a/autobot-backend/api/analytics_evolution_test.py b/autobot-backend/api/analytics_evolution_test.py new file mode 100644 index 000000000..a9b678c0d --- /dev/null +++ b/autobot-backend/api/analytics_evolution_test.py @@ -0,0 +1,182 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Unit tests for analytics_evolution.py source_id scoping (Issue #3436) + +Tests the following functionality: +- _decode_redis_value helper function +- _parse_date_range helper function +- _no_data_response helper function +- _calculate_metric_trend helper function +- _resolve_source_or_404 guard logic (mocked via sys.modules) +""" + +import sys +import types +from datetime import datetime, timedelta +from pathlib import Path + +import pytest +from unittest.mock import patch + + +def _make_shared_mock(return_path=None): + """Build a fake api.codebase_analytics.endpoints.shared module.""" + async def fake_resolve(source_id): + if source_id is None: + return None + return return_path + + mod = types.ModuleType("api.codebase_analytics.endpoints.shared") + mod.resolve_source_root = fake_resolve + return mod + + +class TestDecodeRedisValue: + """Tests for _decode_redis_value utility function.""" + + def test_decodes_bytes(self): + """Bytes value should be decoded to string.""" + from api.analytics_evolution import _decode_redis_value + + assert _decode_redis_value(b"hello") == "hello" + + def test_passes_through_string(self): + """String value should pass through unchanged.""" + from api.analytics_evolution import _decode_redis_value + + assert _decode_redis_value("world") == "world" + + +class TestParseDateRange: + """Tests for _parse_date_range helper function.""" + + def test_none_dates_produce_sensible_defaults(self): + """None start/end should produce a ~30-day window ending now.""" + from api.analytics_evolution import _parse_date_range + + start_ts, end_ts = _parse_date_range(None, None) + now = datetime.now().timestamp() + + assert abs(end_ts - now) < 5 + expected_start = (datetime.now() - timedelta(days=30)).timestamp() + assert abs(start_ts - expected_start) < 5 + + def test_explicit_dates_are_parsed(self): + """ISO date strings should be parsed to timestamps.""" + from api.analytics_evolution import _parse_date_range + + start = "2025-01-01" + end = "2025-01-31" + start_ts, end_ts = _parse_date_range(start, end) + + expected_start = datetime.fromisoformat(start).timestamp() + expected_end = datetime.fromisoformat(end).timestamp() + assert start_ts == expected_start + assert end_ts == expected_end + + +class TestNoDataResponse: + """Tests for _no_data_response in analytics_evolution.""" + + def test_default_response_structure(self): + """Should include status, message, timeline, patterns, trends keys.""" + from api.analytics_evolution import _no_data_response + + result = _no_data_response() + assert result["status"] == "no_data" + assert "message" in result + assert "timeline" in result + assert "patterns" in result + assert "trends" in result + + def test_custom_message(self): + """Should accept custom message.""" + from api.analytics_evolution import _no_data_response + + result = _no_data_response("Custom evolution message") + assert result["message"] == "Custom evolution message" + + +class TestCalculateMetricTrend: + """Tests for _calculate_metric_trend helper.""" + + def test_returns_none_when_insufficient_data(self): + """Less than 2 data points should return None.""" + from api.analytics_evolution import _calculate_metric_trend + + snapshots = [{"overall_score": 80}] + result = _calculate_metric_trend(snapshots, "overall_score") + assert result is None + + def test_calculates_trend_for_improving_metric(self): + """Positive change should show improving direction.""" + from api.analytics_evolution import _calculate_metric_trend + + snapshots = [ + {"overall_score": 70}, + {"overall_score": 80}, + ] + result = _calculate_metric_trend(snapshots, "overall_score") + assert result is not None + assert result["direction"] == "improving" + assert result["change"] == 10.0 + assert result["data_points"] == 2 + + def test_calculates_trend_for_declining_metric(self): + """Negative change should show declining direction.""" + from api.analytics_evolution import _calculate_metric_trend + + snapshots = [ + {"overall_score": 90}, + {"overall_score": 70}, + ] + result = _calculate_metric_trend(snapshots, "overall_score") + assert result is not None + assert result["direction"] == "declining" + + def test_stable_metric_when_no_change(self): + """Zero change should show stable direction.""" + from api.analytics_evolution import _calculate_metric_trend + + snapshots = [ + {"overall_score": 80}, + {"overall_score": 80}, + ] + result = _calculate_metric_trend(snapshots, "overall_score") + assert result is not None + assert result["direction"] == "stable" + + +class TestSourceIdGuardLogic: + """Tests for _resolve_source_or_404 guard (mocked via sys.modules injection).""" + + @pytest.mark.asyncio + async def test_none_source_id_does_not_raise(self): + """_resolve_source_or_404 with None should return without raising.""" + from api.analytics_evolution import _resolve_source_or_404 + + await _resolve_source_or_404(None) + + @pytest.mark.asyncio + async def test_unknown_source_id_raises_404(self): + """_resolve_source_or_404 with unknown source_id should raise HTTP 404.""" + from fastapi import HTTPException + + fake_mod = _make_shared_mock(return_path=None) + with patch.dict(sys.modules, {"api.codebase_analytics.endpoints.shared": fake_mod}): + from api.analytics_evolution import _resolve_source_or_404 + + with pytest.raises(HTTPException) as exc_info: + await _resolve_source_or_404("nonexistent-id") + assert exc_info.value.status_code == 404 + + @pytest.mark.asyncio + async def test_valid_source_id_does_not_raise(self): + """_resolve_source_or_404 with valid source_id should return without raising.""" + fake_mod = _make_shared_mock(return_path=Path("/repos/evolution-project")) + with patch.dict(sys.modules, {"api.codebase_analytics.endpoints.shared": fake_mod}): + from api.analytics_evolution import _resolve_source_or_404 + + await _resolve_source_or_404("valid-id") diff --git a/autobot-backend/api/analytics_llm_patterns.py b/autobot-backend/api/analytics_llm_patterns.py index 06169266f..ea0b8f86c 100644 --- a/autobot-backend/api/analytics_llm_patterns.py +++ b/autobot-backend/api/analytics_llm_patterns.py @@ -1023,10 +1023,20 @@ def get_pattern_analyzer() -> LLMPatternAnalyzer: @router.get("/health") async def get_health(): - """Get LLM pattern analyzer health status""" + """Get LLM pattern analyzer health status. + + Deprecated: Use /api/system/health for system-wide health checks. + This per-module endpoint will be removed in a future release. (#3333) + """ + logger.warning( + "Deprecated health endpoint called: /api/llm-patterns/health — " + "use /api/system/health instead (#3333)" + ) return { "status": "healthy", "service": "llm_pattern_analyzer", + "deprecated": True, + "use_instead": "/api/system/health", "features": [ "prompt_analysis", "usage_tracking", diff --git a/autobot-backend/api/analytics_maintenance.py b/autobot-backend/api/analytics_maintenance.py index 48a32f9d1..40b7245f4 100644 --- a/autobot-backend/api/analytics_maintenance.py +++ b/autobot-backend/api/analytics_maintenance.py @@ -30,7 +30,8 @@ ) logger = logging.getLogger(__name__) -router = APIRouter(prefix="/advanced", tags=["analytics", "advanced"]) +# Issue #3355: prefix moved to router registry (analytics_routers.py) +router = APIRouter(tags=["analytics", "advanced"]) # ============================================================================ diff --git a/autobot-backend/api/analytics_quality.py b/autobot-backend/api/analytics_quality.py index 18225ec4f..822521616 100644 --- a/autobot-backend/api/analytics_quality.py +++ b/autobot-backend/api/analytics_quality.py @@ -26,6 +26,22 @@ router = APIRouter(tags=["code-quality", "analytics"]) # Prefix set in router_registry +async def _resolve_source_or_404(source_id: Optional[str]) -> None: + """Raise HTTP 404 if source_id is provided but not found (Issue #3436). + + Uses a lazy import of resolve_source_root to avoid loading the full + codebase_analytics package at module import time. + """ + if source_id is None: + return + from api.codebase_analytics.endpoints.shared import resolve_source_root + from fastapi import HTTPException + + source_root = await resolve_source_root(source_id) + if source_root is None: + raise HTTPException(status_code=404, detail=f"Source '{source_id}' not found") + + # ============================================================================ # Models # ============================================================================ @@ -922,6 +938,7 @@ async def broadcast(self, message: dict): @router.get("/health-score") async def get_health_score( admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ) -> dict[str, Any]: """ Get current codebase health score with breakdown. @@ -929,7 +946,9 @@ async def get_health_score( Returns overall health score, grade, and recommendations. Issue #543: Returns no_data status when no analysis data available. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. """ + await _resolve_source_or_404(source_id) data = await get_quality_data_from_storage() # Issue #543: Handle no data case @@ -957,6 +976,7 @@ async def get_health_score( async def get_quality_metrics( category: Optional[MetricCategory] = Query(None, description="Filter by category"), admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ) -> dict[str, Any]: """ Get all quality metrics or filter by category. @@ -964,7 +984,9 @@ async def get_quality_metrics( Returns detailed metrics with grades and trends. Issue #543: Returns no_data status when no analysis data available. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. """ + await _resolve_source_or_404(source_id) data = await get_quality_data_from_storage() # Issue #543: Handle no data case @@ -1013,6 +1035,7 @@ async def get_pattern_distribution( severity: Optional[str] = Query(None, description="Filter by severity"), limit: int = Query(20, ge=1, le=100), admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ) -> dict[str, Any]: """ Get distribution of code patterns detected in the codebase. @@ -1020,7 +1043,9 @@ async def get_pattern_distribution( Returns pattern types with counts, percentages, and severity. Issue #543: Returns no_data status when no analysis data available. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. """ + await _resolve_source_or_404(source_id) data = await get_quality_data_from_storage() # Issue #543: Handle no data case @@ -1061,6 +1086,7 @@ async def get_pattern_distribution( async def get_complexity_metrics( top_n: int = Query(10, ge=1, le=50, description="Number of hotspots to return"), admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ) -> dict[str, Any]: """ Get code complexity analysis with hotspots. @@ -1068,7 +1094,9 @@ async def get_complexity_metrics( Returns cyclomatic and cognitive complexity metrics. Issue #543: Returns no_data status when no analysis data available. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. """ + await _resolve_source_or_404(source_id) data = await get_quality_data_from_storage() # Issue #543: Handle no data case @@ -1175,6 +1203,7 @@ async def get_quality_trends( period: str = Query("30d", pattern="^(7d|14d|30d|90d)$"), metric: Optional[str] = Query(None, description="Specific metric to trend"), admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ) -> dict[str, Any]: """ Get quality score trends over time. @@ -1182,7 +1211,9 @@ async def get_quality_trends( Returns historical data for trend analysis. Issue #543: Returns no_data status when no analysis data available. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. """ + await _resolve_source_or_404(source_id) data = await get_quality_data_from_storage() if data is None: @@ -1204,6 +1235,7 @@ async def get_quality_trends( @router.get("/snapshot") async def get_quality_snapshot( admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ) -> dict[str, Any]: """ Get complete quality snapshot for the current state. @@ -1211,7 +1243,9 @@ async def get_quality_snapshot( Returns all metrics, patterns, and statistics in one response. Issue #543: Returns no_data status when no analysis data available. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. """ + await _resolve_source_or_404(source_id) data = await get_quality_data_from_storage() # Issue #543: Handle no data case @@ -1272,6 +1306,7 @@ async def drill_down_category( severity: Optional[str] = Query(None), limit: int = Query(50, ge=1, le=200), admin_check: bool = Depends(check_admin_permission), + source_id: Optional[str] = Query(None, description="Project source ID to scope analysis"), ) -> dict[str, Any]: """ Drill down into a specific quality category. @@ -1280,7 +1315,9 @@ async def drill_down_category( Issue #543: Now queries real ChromaDB data instead of demo data. Issue #665: Refactored using helper functions for clarity. Issue #744: Requires admin authentication. + Issue #3436: Accepts optional source_id to scope results to a project. """ + await _resolve_source_or_404(source_id) problems, stats = await _get_problems_from_chromadb() if not problems: diff --git a/autobot-backend/api/analytics_quality_test.py b/autobot-backend/api/analytics_quality_test.py new file mode 100644 index 000000000..fbe43761e --- /dev/null +++ b/autobot-backend/api/analytics_quality_test.py @@ -0,0 +1,184 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Unit tests for analytics_quality.py source_id scoping (Issue #3436) + +Tests the following functionality: +- get_grade helper function +- calculate_health_score helper function +- _no_data_response helper function +- _resolve_source_or_404 guard logic (mocked via sys.modules) +""" + +import sys +import types +import pytest +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + + +def _make_shared_mock(return_path=None): + """Build a fake api.codebase_analytics.endpoints.shared module.""" + async def fake_resolve(source_id): + if source_id is None: + return None + return return_path + + mod = types.ModuleType("api.codebase_analytics.endpoints.shared") + mod.resolve_source_root = fake_resolve + return mod + + +class TestGetGrade: + """Tests for get_grade utility function.""" + + def test_a_grade_for_high_score(self): + """Score >= 90 should yield grade A.""" + from api.analytics_quality import get_grade, QualityGrade + + assert get_grade(95.0) == QualityGrade.A + + def test_b_grade_for_mid_score(self): + """Score >= 80 should yield grade B.""" + from api.analytics_quality import get_grade, QualityGrade + + assert get_grade(85.0) == QualityGrade.B + + def test_c_grade(self): + """Score >= 70 should yield grade C.""" + from api.analytics_quality import get_grade, QualityGrade + + assert get_grade(75.0) == QualityGrade.C + + def test_d_grade(self): + """Score >= 60 should yield grade D.""" + from api.analytics_quality import get_grade, QualityGrade + + assert get_grade(65.0) == QualityGrade.D + + def test_f_grade_for_low_score(self): + """Score < 60 should yield grade F.""" + from api.analytics_quality import get_grade, QualityGrade + + assert get_grade(50.0) == QualityGrade.F + + def test_boundary_score_90(self): + """Exactly 90 should yield A.""" + from api.analytics_quality import get_grade, QualityGrade + + assert get_grade(90.0) == QualityGrade.A + + def test_boundary_score_just_below_90(self): + """89.9 should yield B not A.""" + from api.analytics_quality import get_grade, QualityGrade + + assert get_grade(89.9) == QualityGrade.B + + +class TestCalculateHealthScore: + """Tests for calculate_health_score helper.""" + + def test_returns_health_score_object(self): + """Should return a HealthScore with overall, grade, and breakdown.""" + from api.analytics_quality import calculate_health_score, HealthScore + + metrics = { + "maintainability": 80.0, + "reliability": 75.0, + "security": 90.0, + "performance": 70.0, + "testability": 65.0, + "documentation": 60.0, + } + result = calculate_health_score(metrics) + + assert isinstance(result, HealthScore) + assert 0 <= result.overall <= 100 + assert result.grade is not None + assert isinstance(result.breakdown, dict) + assert isinstance(result.recommendations, list) + + def test_recommendations_for_low_scores(self): + """Low scores should generate recommendation strings.""" + from api.analytics_quality import calculate_health_score + + metrics = { + "maintainability": 50.0, + "reliability": 55.0, + "security": 45.0, + "performance": 85.0, + "testability": 90.0, + "documentation": 80.0, + } + result = calculate_health_score(metrics) + + assert len(result.recommendations) > 0 + rec_text = " ".join(result.recommendations) + assert any( + cat in rec_text.lower() + for cat in ["maintainability", "reliability", "security"] + ) + + def test_max_5_recommendations(self): + """At most 5 recommendations are returned.""" + from api.analytics_quality import calculate_health_score + + metrics = {k: 40.0 for k in ["maintainability", "reliability", "security", + "performance", "testability", "documentation"]} + result = calculate_health_score(metrics) + assert len(result.recommendations) <= 5 + + +class TestNoDataResponse: + """Tests for _no_data_response helper.""" + + def test_default_message(self): + """Should return a dict with status=no_data and default message.""" + from api.analytics_quality import _no_data_response + + result = _no_data_response() + assert result["status"] == "no_data" + assert "message" in result + + def test_custom_message(self): + """Should return the custom message when provided.""" + from api.analytics_quality import _no_data_response + + result = _no_data_response("Custom error message") + assert result["message"] == "Custom error message" + + +class TestSourceIdGuardLogic: + """Tests for _resolve_source_or_404 guard (mocked via sys.modules injection).""" + + @pytest.mark.asyncio + async def test_none_source_id_does_not_raise(self): + """_resolve_source_or_404 with None should return without raising.""" + from api.analytics_quality import _resolve_source_or_404 + + # Should not raise even without the codebase_analytics package loaded + await _resolve_source_or_404(None) + + @pytest.mark.asyncio + async def test_unknown_source_id_raises_404(self): + """_resolve_source_or_404 with unknown source_id should raise HTTP 404.""" + from fastapi import HTTPException + + fake_mod = _make_shared_mock(return_path=None) + with patch.dict(sys.modules, {"api.codebase_analytics.endpoints.shared": fake_mod}): + from api.analytics_quality import _resolve_source_or_404 + + with pytest.raises(HTTPException) as exc_info: + await _resolve_source_or_404("nonexistent-id") + assert exc_info.value.status_code == 404 + + @pytest.mark.asyncio + async def test_valid_source_id_does_not_raise(self): + """_resolve_source_or_404 with valid source_id should return without raising.""" + fake_mod = _make_shared_mock(return_path=Path("/repos/myproject")) + with patch.dict(sys.modules, {"api.codebase_analytics.endpoints.shared": fake_mod}): + from api.analytics_quality import _resolve_source_or_404 + + # Should not raise + await _resolve_source_or_404("valid-id") diff --git a/autobot-backend/api/analytics_reporting.py b/autobot-backend/api/analytics_reporting.py index ad2f9575e..69fcbb6d7 100644 --- a/autobot-backend/api/analytics_reporting.py +++ b/autobot-backend/api/analytics_reporting.py @@ -24,7 +24,8 @@ from autobot_shared.http_client import get_http_client logger = logging.getLogger(__name__) -router = APIRouter(prefix="/unified", tags=["unified-analytics"]) +# Issue #3355: prefix moved to router registry (analytics_routers.py) +router = APIRouter(tags=["unified-analytics"]) async def fetch_quality_health() -> Dict[str, Any]: diff --git a/autobot-backend/api/base_terminal.py b/autobot-backend/api/base_terminal.py deleted file mode 100644 index 0fbd3479c..000000000 --- a/autobot-backend/api/base_terminal.py +++ /dev/null @@ -1,293 +0,0 @@ -""" -Base Terminal WebSocket Handler -Provides common terminal functionality for websocket handlers including PTY management -Updated to use improved TerminalWebSocketManager for race condition fixes -""" - -import asyncio -import logging -import os -import subprocess # nosec B404 - required for PTY terminal operations -import threading -from abc import ABC, abstractmethod -from typing import Optional - -from utils.terminal_websocket_manager import TerminalWebSocketAdapter - -logger = logging.getLogger(__name__) - - -class BaseTerminalWebSocket(ABC): - """Base class for terminal WebSocket handlers with improved race condition handling""" - - def __init__(self): - # Use new terminal manager for race condition fixes - self.terminal_adapter = TerminalWebSocketAdapter(self, self.terminal_type) - - # Legacy compatibility properties - self.websocket = None - self.pty_fd: Optional[int] = None - self.process: Optional[subprocess.Popen] = None - self.reader_thread: Optional[threading.Thread] = None - self.active = False - self.current_dir = os.environ.get("AUTOBOT_BASE_DIR", "/opt/autobot") - self.env = os.environ.copy() - - @abstractmethod - async def send_message(self, message: dict): - """Send message to WebSocket client""" - - @property - @abstractmethod - def terminal_type(self) -> str: - """Get terminal type for logging""" - - async def start_pty_shell(self): - """Start a PTY shell process using improved terminal manager""" - try: - self.active = True - await self.terminal_adapter.start_session(self.websocket) - - # Update legacy properties for compatibility - self.pty_fd = self.terminal_adapter.manager.pty_fd - self.process = self.terminal_adapter.manager.process - - logger.info(f"PTY shell started for {self.terminal_type}") - - except Exception as e: - self.active = False - logger.error(f"Failed to start PTY shell: {e}") - await self.send_message( - { - "type": "error", - "message": "Failed to start {self.terminal_type}", - } - ) - - def _initialize_output_sender(self): - """Initialize async output sender task. Issue #620. - - Sets up the output queue and schedules the async sender coroutine. - """ - import queue - - # Create output queue for thread-safe message passing - self.output_queue = queue.Queue() - - # Cancel existing sender task if any - if hasattr(self, "_output_sender_task"): - self._output_sender_task.cancel() - - # Schedule async output sender - try: - loop = asyncio.get_running_loop() - self._output_sender_task = asyncio.run_coroutine_threadsafe( - self._async_output_sender(), loop - ) - except RuntimeError: - # No loop available - will handle synchronously - self._output_sender_task = None - - def _process_and_queue_pty_data(self, data: bytes) -> bool: - """Process PTY data and queue message for delivery. Issue #620. - - Args: - data: Raw bytes from PTY - - Returns: - True if processing succeeded, False if PTY closed - """ - import queue - import time - - if not data: - return False - - try: - output = data.decode("utf-8", errors="replace") - processed_output = self.process_output(output) - - message = { - "type": "output", - "content": processed_output, - "timestamp": time.time(), - } - - try: - self.output_queue.put_nowait(message) - except queue.Full: - # Queue is full, drop oldest message to prevent blocking - try: - self.output_queue.get_nowait() - self.output_queue.put_nowait(message) - except queue.Empty: - pass - - except Exception as e: - logger.error(f"Error processing PTY data: {e}") - - return True - - def _signal_output_sender_stop(self): - """Signal async output sender to stop. Issue #620.""" - import queue - - if hasattr(self, "output_queue"): - try: - self.output_queue.put_nowait({"type": "stop"}) - except queue.Full: - pass - - def _read_pty_output(self): - """Read output from PTY in separate thread with queue-based delivery.""" - import select - - self._initialize_output_sender() - - try: - while self.active and self.pty_fd: - try: - ready, _, _ = select.select([self.pty_fd], [], [], 0.1) - - if ready: - data = os.read(self.pty_fd, 1024) - if not self._process_and_queue_pty_data(data): - break - except OSError: - break - except Exception as e: - logger.error(f"Error reading PTY: {e}") - break - except Exception as e: - logger.error(f"PTY reader thread error: {e}") - finally: - self._signal_output_sender_stop() - - async def _async_output_sender(self): - """Async task to send queued output messages to WebSocket""" - import queue - - if not hasattr(self, "output_queue"): - return - - try: - while self.active: - try: - # Wait for message with timeout - message = await asyncio.wait_for( - asyncio.get_running_loop().run_in_executor( - None, self.output_queue.get, True, 0.1 - ), - timeout=0.2, - ) - - if message.get("type") == "stop": - break - - # Send message if WebSocket is active - if self.websocket and self.active: - await self.send_message(message) - - except asyncio.TimeoutError: - continue - except queue.Empty: - continue - except Exception as e: - logger.error(f"Error in async output sender: {e}") - - except Exception as e: - logger.error(f"Async output sender error: {e}") - - def process_output(self, output: str) -> str: - """Process PTY output before sending - override in subclasses""" - return output - - async def send_input(self, text: str): - """Send input to PTY shell using improved terminal manager""" - if self.active: - try: - await self.terminal_adapter.send_input(text) - except Exception as e: - logger.error(f"Error writing to PTY: {e}") - - async def process_input(self, text: str) -> str: - """Process input before sending to PTY - override in subclasses""" - return text - - async def cleanup(self): - """Clean up PTY resources - consolidated implementation""" - logger.info(f"Cleaning up {self.terminal_type} session") - - self.active = False - - # Cancel async output sender task - if hasattr(self, "_output_sender_task") and self._output_sender_task: - try: - self._output_sender_task.cancel() - # Wait for task to complete cancellation - try: - await asyncio.wait_for( - asyncio.wrap_future(self._output_sender_task), timeout=1.0 - ) - except (asyncio.TimeoutError, asyncio.CancelledError): - pass - except Exception as e: - logger.warning(f"Error cancelling output sender task: {e}") - - # Clear output queue - if hasattr(self, "output_queue"): - try: - # Drain the queue - while not self.output_queue.empty(): - try: - self.output_queue.get_nowait() - except Exception: - break - except Exception as e: - logger.warning(f"Error clearing output queue: {e}") - - # Use new terminal manager for cleanup - try: - await self.terminal_adapter.stop_session() - - # Clear legacy properties - self.pty_fd = None - self.process = None - self.reader_thread = None - self.websocket = None - - except Exception as e: - logger.error(f"Error during improved cleanup: {e}") - - logger.info(f"{self.terminal_type} cleanup completed") - - async def execute_command(self, command: str) -> bool: - """Send command to PTY shell with common handling""" - if not command.strip(): - return False - - logger.info(f"{self.terminal_type} executing: {command}") - - try: - # Send command to PTY shell - await self.send_input(command + "\n") - return True - - except Exception as e: - logger.error(f"{self.terminal_type} command error: {e}") - await self.send_message( - {"type": "error", "message": "{self.terminal_type} error"} - ) - return False - - async def validate_command(self, command: str) -> bool: - """Validate command before execution (override in subclasses)""" - return True - - def get_terminal_stats(self) -> dict: - """Get terminal session statistics""" - try: - return self.terminal_adapter.get_stats() - except Exception as e: - logger.error(f"Error getting terminal stats: {e}") - return {"error": "Internal server error"} diff --git a/autobot-backend/api/chat_knowledge.py b/autobot-backend/api/chat_knowledge.py index 46bbceba2..df61345f6 100644 --- a/autobot-backend/api/chat_knowledge.py +++ b/autobot-backend/api/chat_knowledge.py @@ -3,8 +3,35 @@ # Copyright (c) 2025 mrveiss # Author: mrveiss """ -Chat Knowledge Management API -Handles chat-specific file associations, knowledge context, and compilation +Chat Knowledge API — session-scoped knowledge lifecycle management. + +Responsibility (issue #3336): + This module owns all knowledge operations that are **scoped to a chat + session**. It is mounted at ``/api/chat-knowledge/*``. + +Scope: + - Creating and updating per-session knowledge contexts (topic, keywords, + user ownership). + - Associating or uploading files to a specific chat session. + - Adding *temporary* knowledge facts that live only for the duration of + a session. + - Presenting pending-decision facts to the user and applying + add-to-KB / keep-temporary / delete decisions. + - Compiling an entire chat conversation into a permanent KB entry. + - Session-fact preservation before conversation deletion (issue #547). + +What does NOT belong here: + - General KB document management (ingestion, tagging, categories) → + api/knowledge.py (mounted at ``/api/knowledge_base/*``) + - LLM-mediated librarian queries → api/kb_librarian.py + +Overlap note (issue #3336): + The ``POST /search`` endpoint in this module delegates to + ``KnowledgeBase.search()`` *and* additionally searches in-memory + temporary facts for the requesting session. It is NOT a duplicate of + ``POST /api/knowledge_base/search``: it adds session-scoped temporary + results that the global endpoint cannot see. Keep both; they serve + different consumers. """ import asyncio diff --git a/autobot-backend/api/codebase_analytics/endpoints/sources.py b/autobot-backend/api/codebase_analytics/endpoints/sources.py index 18d2602a7..05eb59772 100644 --- a/autobot-backend/api/codebase_analytics/endpoints/sources.py +++ b/autobot-backend/api/codebase_analytics/endpoints/sources.py @@ -94,7 +94,9 @@ async def _run_git_clone(url: str, dest: str, branch: str) -> str: stderr=asyncio.subprocess.PIPE, ) try: - _, stderr = await asyncio.wait_for(proc.communicate(), timeout=_GIT_TIMEOUT_SECONDS) + _, stderr = await asyncio.wait_for( + proc.communicate(), timeout=_GIT_TIMEOUT_SECONDS + ) except asyncio.TimeoutError: proc.kill() await proc.wait() @@ -119,7 +121,9 @@ async def _run_git_pull(clone_path: str) -> str: stderr=asyncio.subprocess.PIPE, ) try: - _, stderr = await asyncio.wait_for(proc.communicate(), timeout=_GIT_TIMEOUT_SECONDS) + _, stderr = await asyncio.wait_for( + proc.communicate(), timeout=_GIT_TIMEOUT_SECONDS + ) except asyncio.TimeoutError: proc.kill() await proc.wait() diff --git a/autobot-backend/api/conversation_export.py b/autobot-backend/api/conversation_export.py index 73a8b5596..53c64e62a 100644 --- a/autobot-backend/api/conversation_export.py +++ b/autobot-backend/api/conversation_export.py @@ -30,8 +30,8 @@ export_conversation_markdown, import_conversation, ) -from utils.chat_utils import get_chat_history_manager, validate_chat_session_id from utils.chat_exceptions import get_exceptions_lazy +from utils.chat_utils import get_chat_history_manager, validate_chat_session_id logger = logging.getLogger(__name__) diff --git a/autobot-backend/api/intelligent_agent.py b/autobot-backend/api/intelligent_agent.py index aeadff280..ab228048a 100644 --- a/autobot-backend/api/intelligent_agent.py +++ b/autobot-backend/api/intelligent_agent.py @@ -360,8 +360,6 @@ async def websocket_stream(websocket: WebSocket): except Exception as e: logger.error("WebSocket error: %s", e) try: - await websocket.send_json( - {"type": "error", "content": "WebSocket error"} - ) + await websocket.send_json({"type": "error", "content": "WebSocket error"}) except Exception as conn_error: logger.debug("Connection error: %s", conn_error) # Connection closed diff --git a/autobot-backend/api/kb_librarian.py b/autobot-backend/api/kb_librarian.py index a89b754bb..c5fff2a51 100644 --- a/autobot-backend/api/kb_librarian.py +++ b/autobot-backend/api/kb_librarian.py @@ -1,15 +1,44 @@ # AutoBot - AI-Powered Automation Platform # Copyright (c) 2025 mrveiss # Author: mrveiss -"""KB Librarian API endpoints.""" +""" +KB Librarian API — LLM-mediated librarian agent interface. + +Mounted at ``/api/kb-librarian`` (issue #3402). + +Endpoints: + - ``POST /query`` — Process a natural-language query through the + KBLibrarianAgent: intent detection, similarity + search, and optional LLM auto-summarisation. + Accepts per-request overrides for max_results, + similarity_threshold, and auto_summarize. + - ``GET /status`` — Return the runtime configuration of the + librarian agent singleton (enabled flag, + threshold, max results, summarise, KB active). + - ``PUT /configure`` — Update librarian agent runtime parameters: + enabled flag, similarity_threshold (0.0–1.0), + max_results (>=1), and auto_summarize. + +What does NOT belong here: + - Raw KB document CRUD → api/knowledge.py (``/api/knowledge_base/*``) + - Chat-session knowledge lifecycle → api/chat_knowledge.py + (``/api/chat-knowledge/*``) + +Overlap note (issue #3336): + ``POST /query`` overlaps in *outcome* with ``POST /api/knowledge_base/search`` + but routes through a stateful agent singleton with per-request parameter + overrides and LLM summarisation. It is a higher-level abstraction, not a + duplicate. +""" import logging from typing import List, Optional -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel from agents.kb_librarian_agent import get_kb_librarian +from auth_middleware import get_current_user from autobot_shared.error_boundaries import ErrorCategory, with_error_handling from type_defs.common import Metadata @@ -43,14 +72,22 @@ class KBQueryResponse(BaseModel): error_code_prefix="KB_LIBRARIAN", ) @router.post("/query", response_model=KBQueryResponse) -async def query_knowledge_base(kb_query: KBQuery): +async def query_knowledge_base( + kb_query: KBQuery, + current_user: dict = Depends(get_current_user), +): """Query the knowledge base using the KB Librarian Agent. + Routes the query through the KBLibrarianAgent singleton: intent detection, + similarity search, and optional LLM auto-summarisation. Per-request + overrides for max_results, similarity_threshold, and auto_summarize are + applied temporarily and restored after the call. + Args: - kb_query: The query parameters + kb_query: The query parameters including optional per-request overrides. Returns: - KBQueryResponse with search results + KBQueryResponse with search results and optional summary. """ try: kb_librarian = get_kb_librarian() @@ -92,11 +129,14 @@ async def query_knowledge_base(kb_query: KBQuery): error_code_prefix="KB_LIBRARIAN", ) @router.get("/status") -async def get_kb_librarian_status(): - """Get the status of the KB Librarian Agent. +async def get_kb_librarian_status( + current_user: dict = Depends(get_current_user), +): + """Return the runtime configuration of the KB Librarian Agent singleton. Returns: - Status information about the KB Librarian + Dict containing enabled, similarity_threshold, max_results, + auto_summarize, and knowledge_base_active. """ try: kb_librarian = get_kb_librarian() @@ -125,17 +165,20 @@ async def configure_kb_librarian( similarity_threshold: Optional[float] = None, max_results: Optional[int] = None, auto_summarize: Optional[bool] = None, + current_user: dict = Depends(get_current_user), ): - """Configure the KB Librarian Agent settings. + """Update KB Librarian Agent runtime parameters. + + All parameters are optional; only supplied values are changed. Args: - enabled: Whether the KB Librarian is enabled - similarity_threshold: Minimum similarity score (0.0-1.0) - max_results: Maximum number of results to return - auto_summarize: Whether to automatically summarize findings + enabled: Whether the KB Librarian is enabled. + similarity_threshold: Minimum similarity score (0.0–1.0). + max_results: Maximum number of results to return (>=1). + auto_summarize: Whether to automatically summarise findings. Returns: - Updated configuration + Dict with confirmation message and updated configuration values. """ try: kb_librarian = get_kb_librarian() diff --git a/autobot-backend/api/knowledge.py b/autobot-backend/api/knowledge.py index 1ab20e0cf..c9333a79c 100644 --- a/autobot-backend/api/knowledge.py +++ b/autobot-backend/api/knowledge.py @@ -1,7 +1,34 @@ # AutoBot - AI-Powered Automation Platform # Copyright (c) 2025 mrveiss # Author: mrveiss -"""Knowledge Base API endpoints for content management and search with RAG integration.""" +""" +Knowledge Base API — primary document management and general search. + +Responsibility (issue #3336): + This module is the **canonical owner** of all knowledge-base document + lifecycle operations. It is mounted at ``/api/knowledge_base/*``. + +Scope: + - Ingesting content: text facts, URLs, file uploads, man-pages, + machine profiles, and AutoBot documentation. + - General-purpose semantic search (vector + keyword) across the + entire knowledge base. + - Category, tag, collection, and metadata management. + - Import/export job status and statistics. + - Admin health and clear-all operations. + +What does NOT belong here: + - Chat-session-scoped operations (temporary facts, file associations, + session compilation, session-fact preservation) → api/chat_knowledge.py + - LLM-mediated librarian queries (intent detection, auto-summarise, + per-request parameter overrides) → api/kb_librarian.py + +Related modules: + - ``api/chat_knowledge.py`` — chat-session knowledge lifecycle + - ``api/kb_librarian.py`` — librarian agent (unregistered, internal use) + - ``api/knowledge_search.py`` — search sub-router (included here) + - ``api/knowledge_tags.py``, ``api/knowledge_categories.py``, etc. +""" import asyncio import json diff --git a/autobot-backend/api/long_running_operations.py b/autobot-backend/api/long_running_operations.py index 2490f83b7..d0282c37c 100644 --- a/autobot-backend/api/long_running_operations.py +++ b/autobot-backend/api/long_running_operations.py @@ -184,9 +184,13 @@ async def start_codebase_indexing( from pathlib import Path try: - safe_codebase_path = Path(validate_path(request.codebase_path, must_exist=True)) + safe_codebase_path = Path( + validate_path(request.codebase_path, must_exist=True) + ) except (ValueError, PermissionError): - raise HTTPException(status_code=400, detail="Invalid or inaccessible codebase path") + raise HTTPException( + status_code=400, detail="Invalid or inaccessible codebase path" + ) estimated_files = 0 try: @@ -259,7 +263,9 @@ async def start_comprehensive_testing( try: safe_test_path = Path(validate_path(request.test_path, must_exist=True)) except (ValueError, PermissionError): - raise HTTPException(status_code=400, detail="Invalid or inaccessible test path") + raise HTTPException( + status_code=400, detail="Invalid or inaccessible test path" + ) estimated_tests = 0 try: diff --git a/autobot-backend/api/mesh_brain.py b/autobot-backend/api/mesh_brain.py deleted file mode 100644 index 0a456b710..000000000 --- a/autobot-backend/api/mesh_brain.py +++ /dev/null @@ -1,50 +0,0 @@ -# AutoBot - AI-Powered Automation Platform -# Copyright (c) 2025 mrveiss -# Author: mrveiss -"""Mesh Brain health and status API endpoints for Neural Mesh RAG (#1994, #2120).""" - -import logging - -from fastapi import APIRouter - -logger = logging.getLogger(__name__) - -router = APIRouter(prefix="/api/mesh/brain", tags=["mesh-brain"]) - -# Module-level scheduler reference set during app startup via set_scheduler(). -_scheduler = None - - -def set_scheduler(scheduler) -> None: - """Register the MeshBrainScheduler instance used by this router.""" - global _scheduler - _scheduler = scheduler - logger.info("MeshBrainScheduler registered with API router") - - -@router.get("/status") -async def get_mesh_brain_status() -> dict: - """Return the full job-by-job status of the Mesh Brain scheduler.""" - if _scheduler is None: - return {"running": False, "jobs": {}, "message": "Mesh Brain not initialized"} - return _scheduler.get_status() - - -@router.get("/health") -async def get_mesh_brain_health() -> dict: - """Return a concise health summary — healthy when no jobs have last_result='failed'.""" - if _scheduler is None: - return {"healthy": False, "reason": "not_initialized"} - return _build_health_response(_scheduler.get_status()) - - -def _build_health_response(status: dict) -> dict: - """Derive a health dict from a scheduler status snapshot.""" - failed_jobs = [ - name for name, job in status["jobs"].items() if job["last_result"] == "failed" - ] - return { - "healthy": len(failed_jobs) == 0, - "running": status["running"], - "failed_jobs": failed_jobs, - } diff --git a/autobot-backend/api/monitoring_compat.py b/autobot-backend/api/monitoring_compat.py deleted file mode 100644 index 35b3588e8..000000000 --- a/autobot-backend/api/monitoring_compat.py +++ /dev/null @@ -1,442 +0,0 @@ -# AutoBot - AI-Powered Automation Platform -# Copyright (c) 2025 mrveiss -# Author: mrveiss -""" -Monitoring Compatibility Layer - REST API for Prometheus Metrics -Phase 4: Grafana Integration (Issue #347) - -Provides backwards-compatible REST endpoints that query Prometheus -for metrics data. These endpoints are DEPRECATED and will be removed -in a future version. Use Grafana dashboards for visualization. - -Issue #379: Optimized sequential awaits with asyncio.gather for concurrent queries. -""" - -import asyncio -import logging -import warnings -from datetime import datetime, timedelta -from typing import Any, Dict, List, Optional - -import aiohttp -from fastapi import APIRouter, Query - -from autobot_shared.http_client import get_http_client -from autobot_shared.ssot_config import get_config - -logger = logging.getLogger(__name__) - -router = APIRouter(prefix="/metrics", tags=["metrics-compat"]) - -# Prometheus server configuration - use SSOT config -_ssot = get_config() -PROMETHEUS_URL = f"http://{_ssot.vm.main}:{_ssot.port.prometheus}" - -# Deprecation warning message -DEPRECATION_MSG = ( - "This endpoint is deprecated. " - f"Use Grafana dashboards at http://{_ssot.vm.redis}:{_ssot.port.grafana} instead. " - "This endpoint will be removed in v3.0." -) - -# Issue #380: Module-level tuple for monitored service names -_MONITORED_SERVICES = ("backend", "redis", "ollama", "npu-worker", "frontend") - - -async def query_prometheus_instant(query: str) -> Optional[float]: - """ - Execute an instant PromQL query and return the value. - - Args: - query: PromQL query string - - Returns: - Float value or None if no data - """ - try: - # Use singleton HTTP client (Issue #65 P1: 60-80% overhead reduction) - http_client = get_http_client() - params = {"query": query} - async with await http_client.get( - f"{PROMETHEUS_URL}/api/v1/query", - params=params, - timeout=aiohttp.ClientTimeout(total=10), - ) as response: - if response.status != 200: - logger.warning("Prometheus query failed: %s", response.status) - return None - - data = await response.json() - if data.get("status") != "success": - return None - - results = data.get("data", {}).get("result", []) - if not results: - return None - - # Return the first result value - return float(results[0]["value"][1]) - - except aiohttp.ClientError as e: - logger.error("Prometheus connection error: %s", e) - return None - except (KeyError, IndexError, ValueError) as e: - logger.error("Error parsing Prometheus response: %s", e) - return None - - -async def query_prometheus_range( - query: str, start: datetime, end: datetime, step: str = "15s" -) -> List[Dict[str, Any]]: - """ - Execute a range PromQL query and return time series data. - - Args: - query: PromQL query string - start: Start time - end: End time - step: Query resolution step - - Returns: - List of data points with timestamp and value - """ - try: - # Use singleton HTTP client (Issue #65 P1: 60-80% overhead reduction) - http_client = get_http_client() - params = { - "query": query, - "start": start.isoformat() + "Z", - "end": end.isoformat() + "Z", - "step": step, - } - async with await http_client.get( - f"{PROMETHEUS_URL}/api/v1/query_range", - params=params, - timeout=aiohttp.ClientTimeout(total=30), - ) as response: - if response.status != 200: - logger.warning("Prometheus range query failed: %s", response.status) - return [] - - data = await response.json() - if data.get("status") != "success": - return [] - - results = data.get("data", {}).get("result", []) - if not results: - return [] - - # Format data points - points = [] - for result in results: - metric = result.get("metric", {}) - values = result.get("values", []) - - for timestamp, value in values: - points.append( - { - "timestamp": datetime.fromtimestamp(timestamp).isoformat(), - "value": float(value), - "labels": metric, - } - ) - - return points - - except aiohttp.ClientError as e: - logger.error("Prometheus connection error: %s", e) - return [] - except (KeyError, ValueError) as e: - logger.error("Error parsing Prometheus response: %s", e) - return [] - - -@router.get("/system/current") -async def get_system_metrics_current(): - """ - DEPRECATED: Get current system metrics. - Use Grafana dashboard 'autobot-system' instead. - """ - warnings.warn(DEPRECATION_MSG, DeprecationWarning, stacklevel=2) - logger.warning( - "Deprecated endpoint called: /metrics/system/current - %s", DEPRECATION_MSG - ) - - # Issue #379: Concurrent queries with asyncio.gather - cpu, memory, disk, load_1m = await asyncio.gather( - query_prometheus_instant("autobot_cpu_usage_percent"), - query_prometheus_instant("autobot_memory_usage_percent"), - query_prometheus_instant('autobot_disk_usage_percent{mount_point="/"}'), - query_prometheus_instant("autobot_load_average_1m"), - ) - - return { - "success": True, - "deprecated": True, - "deprecation_message": DEPRECATION_MSG, - "system_metrics": { - "cpu_percent": cpu, - "memory_percent": memory, - "disk_percent": disk, - "load_average_1m": load_1m, - }, - "timestamp": datetime.now().isoformat(), - } - - -@router.get("/system/history") -async def get_system_metrics_history( - duration: str = Query("1h", description="Time duration (e.g., 1h, 6h, 1d)"), - step: str = Query("15s", description="Data point interval"), -): - """ - DEPRECATED: Get historical system metrics. - Use Grafana dashboard 'autobot-system' instead. - """ - warnings.warn(DEPRECATION_MSG, DeprecationWarning, stacklevel=2) - logger.warning( - "Deprecated endpoint called: /metrics/system/history - %s", DEPRECATION_MSG - ) - - # Parse duration - duration_map = { - "15m": timedelta(minutes=15), - "1h": timedelta(hours=1), - "6h": timedelta(hours=6), - "1d": timedelta(days=1), - "7d": timedelta(days=7), - } - - delta = duration_map.get(duration, timedelta(hours=1)) - end = datetime.utcnow() - start = end - delta - - # Issue #379: Concurrent queries with asyncio.gather - cpu_history, memory_history = await asyncio.gather( - query_prometheus_range("autobot_cpu_usage_percent", start, end, step), - query_prometheus_range("autobot_memory_usage_percent", start, end, step), - ) - - return { - "success": True, - "deprecated": True, - "deprecation_message": DEPRECATION_MSG, - "cpu_history": cpu_history, - "memory_history": memory_history, - "time_range": {"start": start.isoformat(), "end": end.isoformat()}, - } - - -@router.get("/workflow/summary") -async def get_workflow_summary(): - """ - DEPRECATED: Get workflow execution summary. - Use Grafana dashboard 'autobot-workflow' instead. - """ - warnings.warn(DEPRECATION_MSG, DeprecationWarning, stacklevel=2) - logger.warning( - "Deprecated endpoint called: /metrics/workflow/summary - %s", DEPRECATION_MSG - ) - - # Issue #379: Concurrent queries with asyncio.gather - total, completed, failed, active = await asyncio.gather( - query_prometheus_instant("sum(autobot_workflow_executions_total)"), - query_prometheus_instant( - 'sum(autobot_workflow_executions_total{status="completed"})' - ), - query_prometheus_instant( - 'sum(autobot_workflow_executions_total{status="failed"})' - ), - query_prometheus_instant("autobot_active_workflows"), - ) - - # Calculate success rate - success_rate = None - if total and total > 0: - success_rate = (completed or 0) / total * 100 - - return { - "success": True, - "deprecated": True, - "deprecation_message": DEPRECATION_MSG, - "workflow_summary": { - "total_executions": total, - "completed": completed, - "failed": failed, - "active": active, - "success_rate_percent": success_rate, - }, - "timestamp": datetime.now().isoformat(), - } - - -@router.get("/errors/recent") -async def get_recent_errors( - limit: int = Query(20, description="Number of recent errors to return"), -): - """ - DEPRECATED: Get recent error metrics. - Use Grafana dashboard 'autobot-errors' instead. - """ - warnings.warn(DEPRECATION_MSG, DeprecationWarning, stacklevel=2) - logger.warning( - "Deprecated endpoint called: /metrics/errors/recent - %s", DEPRECATION_MSG - ) - - # Issue #379: Concurrent queries with asyncio.gather - total_errors, error_rate_1m, error_rate_5m = await asyncio.gather( - query_prometheus_instant("sum(autobot_errors_total)"), - query_prometheus_instant("rate(autobot_errors_total[1m])"), - query_prometheus_instant("rate(autobot_errors_total[5m])"), - ) - - # Get errors by category - end = datetime.utcnow() - start = end - timedelta(hours=1) - errors_by_category = await query_prometheus_range( - "autobot_errors_total", start, end, "5m" - ) - - return { - "success": True, - "deprecated": True, - "deprecation_message": DEPRECATION_MSG, - "error_metrics": { - "total_errors": total_errors, - "error_rate_1m": error_rate_1m, - "error_rate_5m": error_rate_5m, - }, - "errors_by_category": errors_by_category[:limit], - "timestamp": datetime.now().isoformat(), - } - - -@router.get("/claude-api/status") -async def get_claude_api_status(): - """ - DEPRECATED: Get Claude API status. - Use Grafana dashboard 'autobot-claude-api' instead. - """ - warnings.warn(DEPRECATION_MSG, DeprecationWarning, stacklevel=2) - logger.warning( - f"Deprecated endpoint called: /metrics/claude-api/status - {DEPRECATION_MSG}" - ) - - # Issue #379: Concurrent queries with asyncio.gather - rate_limit, request_rate, p95_latency, failure_rate = await asyncio.gather( - query_prometheus_instant("autobot_claude_api_rate_limit_remaining"), - query_prometheus_instant("rate(autobot_claude_api_requests_total[5m]) * 60"), - query_prometheus_instant( - "histogram_quantile(0.95, rate(autobot_claude_api_response_time_seconds_bucket[5m]))" - ), - query_prometheus_instant( - 'rate(autobot_claude_api_requests_total{success="false"}[5m]) / rate(autobot_claude_api_requests_total[5m])' - ), - ) - - return { - "success": True, - "deprecated": True, - "deprecation_message": DEPRECATION_MSG, - "claude_api_status": { - "rate_limit_remaining": rate_limit, - "requests_per_minute": request_rate, - "p95_latency_seconds": p95_latency, - "failure_rate": failure_rate, - }, - "timestamp": datetime.now().isoformat(), - } - - -@router.get("/services/health") -async def get_services_health(): - """ - DEPRECATED: Get service health status. - Use Grafana dashboard 'autobot-overview' instead. - """ - warnings.warn(DEPRECATION_MSG, DeprecationWarning, stacklevel=2) - logger.warning( - "Deprecated endpoint called: /metrics/services/health - %s", DEPRECATION_MSG - ) - - # Query service status for each service (Issue #380: use module-level constant) - health_data = {} - - for service in _MONITORED_SERVICES: - service_status = await query_prometheus_instant( - f'autobot_service_status{{service_name="{service}",status="online"}}' - ) - response_time = await query_prometheus_instant( - f'autobot_service_response_time_seconds{{service_name="{service}"}}' - ) - health_score = await query_prometheus_instant( - f'autobot_service_health_score{{service_name="{service}"}}' - ) - - health_data[service] = { - "online": bool(service_status and service_status == 1), - "response_time_seconds": response_time, - "health_score": health_score, - } - - return { - "success": True, - "deprecated": True, - "deprecation_message": DEPRECATION_MSG, - "services": health_data, - "timestamp": datetime.now().isoformat(), - } - - -@router.get("/github/status") -async def get_github_status(): - """ - DEPRECATED: Get GitHub API status. - Use Grafana dashboard 'autobot-github' instead. - """ - warnings.warn(DEPRECATION_MSG, DeprecationWarning, stacklevel=2) - logger.warning( - "Deprecated endpoint called: /metrics/github/status - %s", DEPRECATION_MSG - ) - - # Issue #664: Parallelize independent Prometheus queries - rate_limit, total_ops, p95_latency = await asyncio.gather( - query_prometheus_instant("autobot_github_api_rate_limit_remaining"), - query_prometheus_instant("sum(autobot_github_api_operations_total)"), - query_prometheus_instant( - "histogram_quantile(0.95, rate(autobot_github_api_duration_seconds_bucket[5m]))" - ), - ) - - return { - "success": True, - "deprecated": True, - "deprecation_message": DEPRECATION_MSG, - "github_status": { - "rate_limit_remaining": rate_limit, - "total_operations": total_ops, - "p95_latency_seconds": p95_latency, - }, - "timestamp": datetime.now().isoformat(), - } - - -@router.get("/health") -async def get_monitoring_compat_health(): - """ - Health check for monitoring compatibility layer. - """ - # Check Prometheus connectivity - try: - cpu = await query_prometheus_instant("autobot_cpu_usage_percent") - prometheus_healthy = cpu is not None - except Exception: - prometheus_healthy = False - - return { - "status": "healthy" if prometheus_healthy else "degraded", - "prometheus_connected": prometheus_healthy, - "prometheus_url": PROMETHEUS_URL, - "deprecation_notice": DEPRECATION_MSG, - "timestamp": datetime.now().isoformat(), - } diff --git a/autobot-backend/api/natural_language_search.py b/autobot-backend/api/natural_language_search.py index bbad71aab..488098db9 100644 --- a/autobot-backend/api/natural_language_search.py +++ b/autobot-backend/api/natural_language_search.py @@ -42,7 +42,8 @@ {} ) # Populated after enum defined -router = APIRouter(prefix="/nl-search", tags=["natural-language-search", "code-search"]) +# Issue #3355: prefix moved to router registry (feature_routers.py) +router = APIRouter(tags=["natural-language-search", "code-search"]) # ============================================================================= @@ -1293,10 +1294,20 @@ async def list_supported_domains(): @router.get("/health") async def health_check(): - """Health check endpoint.""" + """Health check endpoint. + + Deprecated: Use /api/system/health for system-wide health checks. + This per-module endpoint will be removed in a future release. (#3333) + """ + logger.warning( + "Deprecated health endpoint called: /api/health (natural_language_search) — " + "use /api/system/health instead (#3333)" + ) return { "status": "healthy", "service": "natural-language-search", + "deprecated": True, + "use_instead": "/api/system/health", "features": [ "query_parsing", "intent_classification", diff --git a/autobot-backend/api/phases.py b/autobot-backend/api/phases.py new file mode 100644 index 000000000..fcc5b9f14 --- /dev/null +++ b/autobot-backend/api/phases.py @@ -0,0 +1,109 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Phases API endpoints + +Provides the /api/phases/* endpoints consumed by PhaseProgressionIndicator +and related frontend components. + +Issue #3331: The existing api/phase_management.py depended on +scripts.phase_validation_system which does not exist, causing an ImportError +at startup and returning 404 for all /api/phases/* paths. This module +provides the required endpoints backed by the project_state_manager which +is already in use throughout the backend. +""" + +import logging +from datetime import datetime +from typing import List + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from project_state_manager import get_project_state_manager + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +# --------------------------------------------------------------------------- +# Response models +# --------------------------------------------------------------------------- + + +class PhaseEntry(BaseModel): + id: str + name: str + completion: float + is_active: bool + is_completed: bool + + +class PhasesStatusResponse(BaseModel): + status: str + service: str + phases: List[PhaseEntry] + timestamp: str + + +class ValidationRunResponse(BaseModel): + status: str + message: str + timestamp: str + + +# --------------------------------------------------------------------------- +# Endpoints +# --------------------------------------------------------------------------- + + +@router.get("/status", response_model=PhasesStatusResponse) +async def get_phases_status() -> PhasesStatusResponse: + """Return phase completion status for all development phases.""" + try: + manager = get_project_state_manager() + raw = manager.get_project_status(use_cache=True) + + phases: List[PhaseEntry] = [] + for phase_id, phase_data in raw.get("phases", {}).items(): + phases.append( + PhaseEntry( + id=phase_id, + name=phase_data.get("name", phase_id), + completion=phase_data.get("completion", 0.0), + is_active=phase_data.get("is_active", False), + is_completed=phase_data.get("is_completed", False), + ) + ) + + return PhasesStatusResponse( + status="ok", + service="phase_management", + phases=phases, + timestamp=datetime.utcnow().isoformat() + "Z", + ) + except Exception as exc: + logger.error("Error getting phases status: %s", exc) + raise HTTPException(status_code=500, detail="Failed to retrieve phases status") + + +@router.post("/validation/run", response_model=ValidationRunResponse) +async def run_phases_validation() -> ValidationRunResponse: + """Queue a phase validation run. + + Triggers a full project phase validation pass via the project state + manager and returns immediately with an acknowledgement. + """ + try: + manager = get_project_state_manager() + manager.validate_all_phases() + return ValidationRunResponse( + status="ok", + message="Validation queued", + timestamp=datetime.utcnow().isoformat() + "Z", + ) + except Exception as exc: + logger.error("Error queuing phase validation: %s", exc) + raise HTTPException(status_code=500, detail="Failed to queue phase validation") diff --git a/autobot-backend/api/project.py b/autobot-backend/api/project.py new file mode 100644 index 000000000..9c947bff7 --- /dev/null +++ b/autobot-backend/api/project.py @@ -0,0 +1,115 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Project API endpoints + +Provides the /api/project/* endpoints consumed by PhaseProgressionIndicator +and related frontend components. + +Issue #3331: These endpoints were missing, causing 404 errors in the UI. +The existing api/project_state.py router used an internal prefix of /project +combined with a registration prefix of /project-state, yielding incorrect +URLs. This module exposes the correct /api/project/* paths. +""" + +import logging +from datetime import datetime +from typing import Dict, Optional + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from project_state_manager import get_project_state_manager + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +# --------------------------------------------------------------------------- +# Response models +# --------------------------------------------------------------------------- + + +class PhaseStatusItem(BaseModel): + name: str + completion: float + is_active: bool + is_completed: bool + capabilities: int + implemented_capabilities: int + + +class ProjectStatusResponse(BaseModel): + current_phase: str + total_phases: int + completed_phases: int + active_phases: int + overall_completion: float + next_suggested_phase: Optional[str] + phases: Dict[str, PhaseStatusItem] + + +class ProjectReportResponse(BaseModel): + status: str + overall_completion: float + current_phase: str + total_phases: int + completed_phases: int + generated_at: str + + +# --------------------------------------------------------------------------- +# Endpoints +# --------------------------------------------------------------------------- + + +@router.get("/status", response_model=ProjectStatusResponse) +async def get_project_status(detailed: bool = False) -> ProjectStatusResponse: + """Return current project development phase status. + + Args: + detailed: When True, bypass cache and run full validation checks. + """ + try: + manager = get_project_state_manager() + raw = manager.get_project_status(use_cache=not detailed) + + phases: Dict[str, PhaseStatusItem] = {} + for phase_id, phase_data in raw.get("phases", {}).items(): + phases[phase_id] = PhaseStatusItem(**phase_data) + + next_phase = raw.get("next_suggested_phase") + return ProjectStatusResponse( + current_phase=raw.get("current_phase", "unknown"), + total_phases=raw.get("total_phases", 0), + completed_phases=raw.get("completed_phases", 0), + active_phases=raw.get("active_phases", 0), + overall_completion=raw.get("overall_completion", 0.0), + next_suggested_phase=str(next_phase) if next_phase else None, + phases=phases, + ) + except Exception as exc: + logger.error("Error getting project status: %s", exc) + raise HTTPException(status_code=500, detail="Failed to retrieve project status") + + +@router.get("/report", response_model=ProjectReportResponse) +async def get_project_report() -> ProjectReportResponse: + """Return a summary report of project completion and phase state.""" + try: + manager = get_project_state_manager() + raw = manager.get_project_status(use_cache=True) + + return ProjectReportResponse( + status="ok", + overall_completion=raw.get("overall_completion", 0.0), + current_phase=raw.get("current_phase", "unknown"), + total_phases=raw.get("total_phases", 0), + completed_phases=raw.get("completed_phases", 0), + generated_at=datetime.utcnow().isoformat() + "Z", + ) + except Exception as exc: + logger.error("Error generating project report: %s", exc) + raise HTTPException(status_code=500, detail="Failed to generate project report") diff --git a/autobot-backend/api/secure_terminal_websocket.py b/autobot-backend/api/secure_terminal_websocket.py deleted file mode 100644 index 9dfce42e8..000000000 --- a/autobot-backend/api/secure_terminal_websocket.py +++ /dev/null @@ -1,258 +0,0 @@ -""" -Secure Terminal WebSocket Handler with Command Auditing -Provides PTY terminal with enhanced security logging and optional sandboxing -""" - -import json -import logging -import time -from typing import Dict, Optional - -from fastapi import WebSocket, WebSocketDisconnect - -from .base_terminal import BaseTerminalWebSocket - -logger = logging.getLogger(__name__) - - -class SecureTerminalSession(BaseTerminalWebSocket): - """Secure terminal session with command auditing and optional sandboxing""" - - def __init__(self, session_id: str, security_layer=None, user_role: str = "user"): - super().__init__() - self.session_id = session_id - self.security_layer = security_layer - self.user_role = user_role - - # Security settings - self.audit_commands = True - self.log_all_output = False - - # Command buffer for audit logging - self.command_buffer = "" - self.last_command = "" - - @property - def terminal_type(self) -> str: - """Get terminal type for logging""" - return "Secure terminal" - - async def connect(self, websocket: WebSocket): - """Connect WebSocket to this session and start PTY shell""" - await websocket.accept() - self.websocket = websocket - self.active = True - - # Start PTY shell process - await self.start_pty_shell() - - # Send connection confirmation with security status - security_status = "enabled" if self.security_layer else "disabled" - await self.send_message( - { - "type": "connection", - "status": "connected", - "message": f"Secure terminal connected (security: {security_status})", - "session_id": self.session_id, - "working_dir": self.current_dir, - "user_role": self.user_role, - "security": { - "audit_enabled": self.audit_commands, - "logging_enabled": self.log_all_output, - }, - } - ) - - # PTY shell startup now handled by base class - - def process_output(self, output: str) -> str: - """Process PTY output with security logging""" - # Log output if enabled - if self.log_all_output and self.security_layer: - self.security_layer.audit_log( - action="terminal_output", - user=f"terminal_{self.session_id}", - outcome="logged", - details={"output": output, "user_role": self.user_role}, - ) - return output - - async def process_input(self, text: str) -> str: - """Process input with security auditing""" - # Track command input for auditing - if "\n" in text or "\r" in text: - # Command completed - command = self.command_buffer + text.replace("\n", "").replace("\r", "") - if command.strip() and self.audit_commands: - await self.audit_command(command.strip()) - self.command_buffer = "" - else: - self.command_buffer += text - - return text - - async def audit_command(self, command: str): - """Audit a command execution for security logging""" - if not self.security_layer: - return - - self.last_command = command - - try: - # Log command attempt - self.security_layer.audit_log( - action="terminal_command", - user=f"terminal_{self.session_id}", - outcome="executed", - details={ - "command": command, - "user_role": self.user_role, - "session_id": self.session_id, - "timestamp": time.time(), - }, - ) - - # Check for risky commands (optional enhancement) - risky_patterns = [ - "rm -r", - "sudo rm", - "dd if=", - "mkfs", - "fdisk", - "chmod 777", - "chown -R", - "> /dev/", - ] - - if any(pattern in command.lower() for pattern in risky_patterns): - self.security_layer.audit_log( - action="risky_command_detected", - user=f"terminal_{self.session_id}", - outcome="warning", - details={ - "command": command, - "risk_level": "high", - "user_role": self.user_role, - }, - ) - - except Exception as e: - logger.error(f"Security audit error: {e}") - - async def send_message(self, message: dict): - """Send message to WebSocket client with standardized format""" - if self.websocket and self.active: - try: - # Ensure standardized format - standardized_message = message.copy() - - # Convert "data" field to "content" for consistency - if ( - "data" in standardized_message - and "content" not in standardized_message - ): - standardized_message["content"] = standardized_message.pop("data") - - # Add metadata if not present - if "metadata" not in standardized_message: - standardized_message["metadata"] = { - "session_id": self.session_id, - "timestamp": time.time(), - "terminal_type": "secure", - "user_role": self.user_role, - } - - await self.websocket.send_text(json.dumps(standardized_message)) - except Exception as e: - logger.error(f"WebSocket send error: {e}") - - async def disconnect(self): - """Disconnect session and clean up PTY with security logging""" - # Log session end before cleanup - if self.security_layer: - self.security_layer.audit_log( - action="terminal_session_ended", - user=f"terminal_{self.session_id}", - outcome="disconnected", - details={ - "session_id": self.session_id, - "user_role": self.user_role, - "last_command": self.last_command, - }, - ) - - # Use base class cleanup method - await self.cleanup() - - logger.info(f"Secure terminal session {self.session_id} disconnected") - - -# Global session manager -_secure_sessions: Dict[str, SecureTerminalSession] = {} - - -async def handle_secure_terminal_websocket( - websocket: WebSocket, session_id: str, security_layer=None -): - """Handle secure terminal WebSocket connection""" - try: - # Get user role from query parameters - user_role = websocket.query_params.get("role", "user") - - # Create or get session - if session_id not in _secure_sessions: - session = SecureTerminalSession( - session_id=session_id, - security_layer=security_layer, - user_role=user_role, - ) - _secure_sessions[session_id] = session - else: - session = _secure_sessions[session_id] - - # Connect to session - await session.connect(websocket) - - # Message processing loop - while True: - try: - data = await websocket.receive_text() - message = json.loads(data) - - message_type = message.get("type", "") - - if message_type == "input": - # Send input to PTY - support multiple input field formats - text = message.get( - "content", message.get("text", message.get("data", "")) - ) - if text: - await session.send_input(text) - - elif message_type == "ping": - # Respond to ping - await session.send_message({"type": "pong"}) - - elif message_type == "resize": - # Handle terminal resize (could be implemented) - logger.info(f"Terminal resize request: {message}") - - except WebSocketDisconnect: - break - except json.JSONDecodeError: - logger.error("Invalid JSON received") - break - except Exception as e: - logger.error(f"Error in secure terminal loop: {e}") - break - - finally: - # Clean up session - if session_id in _secure_sessions: - await _secure_sessions[session_id].disconnect() - del _secure_sessions[session_id] - - -def get_secure_session(session_id: str) -> Optional[SecureTerminalSession]: - """Get a secure terminal session by ID""" - return _secure_sessions.get(session_id) diff --git a/autobot-backend/api/secure_terminal_websocket_test.py b/autobot-backend/api/secure_terminal_websocket_test.py deleted file mode 100644 index 7254d68bb..000000000 --- a/autobot-backend/api/secure_terminal_websocket_test.py +++ /dev/null @@ -1,524 +0,0 @@ -""" -Unit tests for Secure Terminal WebSocket functionality -Tests WebSocket terminal with security auditing and command monitoring -""" - -import asyncio -import json -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -# Import the modules to test -from api.secure_terminal_websocket import ( - SecureTerminalSession, - get_secure_session, - handle_secure_terminal_websocket, -) - - -class TestSecureTerminalSession: - """Test SecureTerminalSession functionality""" - - def setup_method(self): - """Set up test fixtures""" - self.mock_security_layer = MagicMock() - self.session_id = "test_session_123" - self.user_role = "developer" - - self.session = SecureTerminalSession( - session_id=self.session_id, - security_layer=self.mock_security_layer, - user_role=self.user_role, - ) - - def test_session_initialization(self): - """Test SecureTerminalSession initializes correctly""" - assert self.session.session_id == self.session_id - assert self.session.security_layer == self.mock_security_layer - assert self.session.user_role == self.user_role - assert self.session.websocket is None - assert self.session.active is False - assert self.session.audit_commands is True - assert self.session.command_buffer == "" - assert self.session.last_command == "" - - @pytest.mark.asyncio - async def test_connect_websocket(self): - """Test WebSocket connection""" - mock_websocket = AsyncMock() - - with patch.object(self.session, "start_pty_shell") as mock_start_pty: - with patch.object(self.session, "send_message") as mock_send: - await self.session.connect(mock_websocket) - - assert self.session.websocket == mock_websocket - assert self.session.active is True - mock_websocket.accept.assert_called_once() - mock_start_pty.assert_called_once() - mock_send.assert_called_once() - - # Check connection message - call_args = mock_send.call_args[0][0] - assert call_args["type"] == "connection" - assert call_args["status"] == "connected" - assert call_args["session_id"] == self.session_id - assert call_args["user_role"] == self.user_role - - @pytest.mark.asyncio - async def test_start_pty_shell_success(self): - """Test PTY shell startup success""" - with patch("pty.openpty") as mock_openpty: - with patch("subprocess.Popen") as mock_popen: - with patch("os.close") as mock_close: - with patch("threading.Thread") as mock_thread: - mock_openpty.return_value = (10, 11) # master_fd, slave_fd - mock_process = MagicMock() - mock_popen.return_value = mock_process - - await self.session.start_pty_shell() - - assert self.session.pty_fd == 10 - assert self.session.process == mock_process - mock_close.assert_called_once_with(11) # slave_fd closed - mock_thread.assert_called_once() - mock_thread.return_value.start.assert_called_once() - - @pytest.mark.asyncio - async def test_start_pty_shell_failure(self): - """Test PTY shell startup failure""" - with patch("pty.openpty", side_effect=OSError("PTY failed")): - with patch.object(self.session, "send_message") as mock_send: - await self.session.start_pty_shell() - - assert self.session.pty_fd is None - assert self.session.process is None - mock_send.assert_called_once() - - # Check error message - call_args = mock_send.call_args[0][0] - assert call_args["type"] == "error" - assert "Failed to start secure terminal" in call_args["message"] - - def test_read_pty_output_thread(self): - """Test PTY output reading in thread""" - self.session.pty_fd = 10 - self.session.active = True - self.session.websocket = AsyncMock() - - # Mock select and os.read - with patch("select.select") as mock_select: - with patch("os.read") as mock_read: - with patch("asyncio.run_coroutine_threadsafe") as mock_run_coro: - with patch("asyncio.get_event_loop"): - # First call returns data, second call returns empty - # (to break loop) - mock_select.side_effect = [([10], [], []), ([], [], [])] - mock_read.side_effect = [b"test output", b""] - - self.session._read_pty_output() - - assert mock_run_coro.called - # Check that send_message was scheduled - # Can't easily test the exact message here due to - # coroutine wrapping - - @pytest.mark.asyncio - async def test_send_input_command_completion(self): - """Test sending input with command completion""" - self.session.pty_fd = 10 - self.session.active = True - self.session.command_buffer = "ls -l" - - with patch("os.write") as mock_write: - with patch.object(self.session, "audit_command") as mock_audit: - await self.session.send_input("a\n") - - mock_write.assert_called_once_with(10, b"ls -la\n") - mock_audit.assert_called_once_with("ls -la") - assert self.session.command_buffer == "" - - @pytest.mark.asyncio - async def test_send_input_partial_command(self): - """Test sending partial command input""" - self.session.pty_fd = 10 - self.session.active = True - - with patch("os.write") as mock_write: - with patch.object(self.session, "audit_command") as mock_audit: - await self.session.send_input("ls ") - await self.session.send_input("-l") - - assert self.session.command_buffer == "ls -l" - assert mock_audit.call_count == 0 # No command completed yet - assert mock_write.call_count == 2 - - @pytest.mark.asyncio - async def test_send_input_write_error(self): - """Test send input with write error""" - self.session.pty_fd = 10 - self.session.active = True - - with patch("os.write", side_effect=OSError("Write failed")): - # Should not raise exception - await self.session.send_input("test") - - @pytest.mark.asyncio - async def test_audit_command_without_security_layer(self): - """Test command auditing without security layer""" - self.session.security_layer = None - - # Should not raise exception - await self.session.audit_command("ls -la") - - @pytest.mark.asyncio - async def test_audit_command_with_security_layer(self): - """Test command auditing with security layer""" - await self.session.audit_command("ls -la") - - # Check that audit_log was called - self.mock_security_layer.audit_log.assert_called() - call_args = self.mock_security_layer.audit_log.call_args - - assert call_args[1]["action"] == "terminal_command" - assert call_args[1]["user"] == f"terminal_{self.session_id}" - assert call_args[1]["outcome"] == "executed" - assert call_args[1]["details"]["command"] == "ls -la" - assert call_args[1]["details"]["user_role"] == self.user_role - - @pytest.mark.asyncio - async def test_audit_command_with_risk_assessment(self): - """Test command auditing with risk assessment""" - # Mock command executor with risk assessment - mock_executor = MagicMock() - mock_executor.assess_command_risk.return_value = ("high", ["High risk command"]) - self.mock_security_layer.command_executor = mock_executor - - with patch.object(self.session, "send_message") as mock_send: - await self.session.audit_command("rm -rf /tmp/test") - - # Should log risk assessment - assert ( - self.mock_security_layer.audit_log.call_count == 2 - ) # Command + risk assessment - - # Should send security warning - mock_send.assert_called_once() - call_args = mock_send.call_args[0][0] - assert call_args["type"] == "security_warning" - assert "high risk" in call_args["message"] - - @pytest.mark.asyncio - async def test_audit_command_safe_command(self): - """Test auditing of safe command""" - mock_executor = MagicMock() - mock_executor.assess_command_risk.return_value = ("safe", ["Safe command"]) - self.mock_security_layer.command_executor = mock_executor - - with patch.object(self.session, "send_message") as mock_send: - await self.session.audit_command("echo hello") - - # Should only log command, not risk assessment - assert self.mock_security_layer.audit_log.call_count == 1 - - # Should not send security warning - mock_send.assert_not_called() - - @pytest.mark.asyncio - async def test_audit_command_error_handling(self): - """Test command auditing error handling""" - self.mock_security_layer.audit_log.side_effect = Exception("Audit failed") - - # Should not raise exception - await self.session.audit_command("test command") - - @pytest.mark.asyncio - async def test_execute_command(self): - """Test command execution""" - with patch.object(self.session, "send_input") as mock_send_input: - result = await self.session.execute_command("ls -la") - - assert result is True - mock_send_input.assert_called_once_with("ls -la\n") - - @pytest.mark.asyncio - async def test_execute_command_empty(self): - """Test execution of empty command""" - result = await self.session.execute_command("") - assert result is False - - result = await self.session.execute_command(" ") - assert result is False - - @pytest.mark.asyncio - async def test_execute_command_error(self): - """Test command execution with error""" - with patch.object( - self.session, "send_input", side_effect=Exception("Send failed") - ): - with patch.object(self.session, "send_message") as mock_send: - result = await self.session.execute_command("test") - - assert result is False - mock_send.assert_called_once() - - # Check error message - call_args = mock_send.call_args[0][0] - assert call_args["type"] == "error" - assert "Secure terminal error" in call_args["message"] - - @pytest.mark.asyncio - async def test_send_message_success(self): - """Test successful message sending""" - self.session.websocket = AsyncMock() - self.session.active = True - - message = {"type": "test", "data": "test message"} - await self.session.send_message(message) - - self.session.websocket.send_text.assert_called_once_with(json.dumps(message)) - - @pytest.mark.asyncio - async def test_send_message_error(self): - """Test message sending with error""" - self.session.websocket = AsyncMock() - self.session.websocket.send_text.side_effect = Exception("Send failed") - self.session.active = True - - # Should not raise exception - await self.session.send_message({"type": "test"}) - - @pytest.mark.asyncio - async def test_send_message_inactive_session(self): - """Test message sending to inactive session""" - self.session.websocket = AsyncMock() - self.session.active = False - - await self.session.send_message({"type": "test"}) - - # Should not try to send when inactive - self.session.websocket.send_text.assert_not_called() - - def test_disconnect(self): - """Test session disconnection""" - # Set up session state - self.session.active = True - self.session.pty_fd = 10 - self.session.process = MagicMock() - self.session.last_command = "test command" - - with patch("os.close") as mock_close: - self.session.disconnect() - - assert self.session.active is False - assert self.session.pty_fd is None - mock_close.assert_called_once_with(10) - self.session.process.terminate.assert_called_once() - - # Check audit log for session end - self.mock_security_layer.audit_log.assert_called() - call_args = self.mock_security_layer.audit_log.call_args - assert call_args[1]["action"] == "terminal_session_ended" - assert call_args[1]["details"]["last_command"] == "test command" - - def test_disconnect_process_kill(self): - """Test session disconnection with process kill""" - self.session.active = True - self.session.process = MagicMock() - self.session.process.wait.side_effect = Exception( - "Timeout" - ) # Simulate TimeoutExpired - - with patch.object(self.session.process, "kill"): - self.session.disconnect() - - self.session.process.terminate.assert_called_once() - # Can't easily test kill call due to exception handling - - -class TestSecureTerminalWebSocketHandler: - """Test WebSocket handler functionality""" - - @pytest.mark.asyncio - async def test_handle_secure_terminal_websocket_success(self): - """Test successful WebSocket handling""" - mock_websocket = AsyncMock() - mock_websocket.query_params = {"role": "developer"} - - session_id = "test_session" - mock_security_layer = MagicMock() - - # Mock WebSocket messages - messages = [ - json.dumps({"type": "input", "data": "ls\n"}), - json.dumps({"type": "command", "command": "pwd"}), - json.dumps({"type": "ping"}), - ] - - # Set up receive_text to return messages then disconnect - mock_websocket.receive_text.side_effect = messages + [asyncio.TimeoutError()] - - with patch( - "api.secure_terminal_websocket._secure_sessions", {} - ) as mock_sessions: - # Should complete without raising exception - await handle_secure_terminal_websocket( - mock_websocket, session_id, mock_security_layer - ) - - # Session should have been created and cleaned up - assert session_id not in mock_sessions - - @pytest.mark.asyncio - async def test_handle_secure_terminal_websocket_existing_session(self): - """Test WebSocket handling with existing session""" - mock_websocket = AsyncMock() - mock_websocket.query_params = {"role": "admin"} - - session_id = "existing_session" - mock_security_layer = MagicMock() - - # Create existing session - existing_session = MagicMock() - - mock_websocket.receive_text.side_effect = [asyncio.TimeoutError()] - - with patch( - "api.secure_terminal_websocket._secure_sessions", - {session_id: existing_session}, - ): - await handle_secure_terminal_websocket( - mock_websocket, session_id, mock_security_layer - ) - - # Should update existing session - assert existing_session.security_layer == mock_security_layer - assert existing_session.user_role == "admin" - - @pytest.mark.asyncio - async def test_handle_secure_terminal_websocket_invalid_json(self): - """Test WebSocket handling with invalid JSON""" - mock_websocket = AsyncMock() - mock_websocket.query_params = {} - - session_id = "test_session" - - # Send invalid JSON - mock_websocket.receive_text.side_effect = [ - "invalid json", - asyncio.TimeoutError(), - ] - - with patch("api.secure_terminal_websocket._secure_sessions", {}): - # Should handle gracefully - await handle_secure_terminal_websocket(mock_websocket, session_id, None) - - @pytest.mark.asyncio - async def test_handle_secure_terminal_websocket_disconnect(self): - """Test WebSocket handling with disconnect""" - from fastapi import WebSocketDisconnect - - mock_websocket = AsyncMock() - mock_websocket.query_params = {} - mock_websocket.receive_text.side_effect = WebSocketDisconnect() - - session_id = "test_session" - - with patch("api.secure_terminal_websocket._secure_sessions", {}): - # Should handle disconnect gracefully - await handle_secure_terminal_websocket(mock_websocket, session_id, None) - - @pytest.mark.asyncio - async def test_handle_secure_terminal_websocket_message_types(self): - """Test different WebSocket message types""" - mock_websocket = AsyncMock() - mock_websocket.query_params = {} - - session_id = "test_session" - mock_session = AsyncMock() - - messages = [ - json.dumps({"type": "input", "data": "test input"}), - json.dumps({"type": "command", "command": "test command"}), - json.dumps({"type": "resize", "cols": 80, "rows": 24}), - json.dumps({"type": "ping"}), - json.dumps({"type": "unknown_type"}), - ] - - mock_websocket.receive_text.side_effect = messages + [asyncio.TimeoutError()] - - with patch( - "api.secure_terminal_websocket.SecureTerminalSession" - ) as MockSession: - MockSession.return_value = mock_session - - await handle_secure_terminal_websocket(mock_websocket, session_id, None) - - # Check that appropriate methods were called - mock_session.send_input.assert_called_once_with("test input") - mock_session.execute_command.assert_called_once_with("test command") - mock_session.send_message.assert_called_once_with({"type": "pong"}) - - -class TestSecureTerminalUtilities: - """Test utility functions""" - - def test_get_secure_session_exists(self): - """Test getting existing secure session""" - session_id = "test_session" - mock_session = MagicMock() - - with patch( - "api.secure_terminal_websocket._secure_sessions", - {session_id: mock_session}, - ): - result = get_secure_session(session_id) - assert result == mock_session - - def test_get_secure_session_not_exists(self): - """Test getting non-existent secure session""" - session_id = "nonexistent_session" - - with patch("api.secure_terminal_websocket._secure_sessions", {}): - result = get_secure_session(session_id) - assert result is None - - -# Integration tests -class TestSecureTerminalIntegration: - """Integration tests for secure terminal functionality""" - - @pytest.mark.asyncio - async def test_full_terminal_session_lifecycle(self): - """Test complete terminal session lifecycle""" - mock_security_layer = MagicMock() - session_id = "integration_test" - - # Create session - session = SecureTerminalSession(session_id, mock_security_layer, "developer") - - # Mock WebSocket - mock_websocket = AsyncMock() - - # Test connection - with patch.object(session, "start_pty_shell"): - await session.connect(mock_websocket) - assert session.active is True - - # Test command execution - with patch.object(session, "send_input") as mock_send: - await session.execute_command("echo 'integration test'") - mock_send.assert_called_once() - - # Test auditing - await session.audit_command("ls -la") - mock_security_layer.audit_log.assert_called() - - # Test disconnection - session.disconnect() - assert session.active is False - - -# Run tests -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/autobot-backend/api/services.py b/autobot-backend/api/services.py index d1ad92503..8438976c0 100644 --- a/autobot-backend/api/services.py +++ b/autobot-backend/api/services.py @@ -227,11 +227,23 @@ async def get_services(admin_check: bool = Depends(check_admin_permission)): ) @router.get("/health") async def get_health(admin_check: bool = Depends(check_admin_permission)): - """Simple health check endpoint + """Simple health check endpoint. + + Deprecated: Use /api/system/health for system-wide health checks. + This per-module endpoint will be removed in a future release. (#3333) Issue #744: Requires admin authentication. """ - return {"status": "healthy", "timestamp": datetime.now()} + logger.warning( + "Deprecated health endpoint called: /api/services/health — " + "use /api/system/health instead (#3333)" + ) + return { + "status": "healthy", + "timestamp": datetime.now(), + "deprecated": True, + "use_instead": "/api/system/health", + } @with_error_handling( diff --git a/autobot-backend/api/settings.py b/autobot-backend/api/settings.py index be68c1680..ad8d6d289 100644 --- a/autobot-backend/api/settings.py +++ b/autobot-backend/api/settings.py @@ -81,7 +81,16 @@ async def get_settings(): ) @router.get("/settings") async def get_settings_explicit(): - """Get application settings - explicit /settings endpoint for frontend compatibility""" + """Get application settings. + + Issue #3334: Deprecated duplicate — use GET /api/settings/ instead. + This endpoint remains for backward compatibility but will be removed in a + future release. + """ + logger.warning( + "Deprecated endpoint called: GET /api/settings/settings. " + "Use GET /api/settings/ instead. (#3334)" + ) try: return ConfigService.get_full_config() except Exception as e: @@ -133,7 +142,16 @@ async def save_settings_explicit( settings_data: dict, session: AsyncSession = Depends(get_db_session), ): - """Save application settings — frontend compat (#1747: audit trail).""" + """Save application settings. + + Issue #3334: Deprecated duplicate — use POST /api/settings/ instead. + This endpoint remains for backward compatibility but will be removed in a + future release. + """ + logger.warning( + "Deprecated endpoint called: POST /api/settings/settings. " + "Use POST /api/settings/ instead. (#3334)" + ) try: if not settings_data: logger.warning("Received empty settings data, skipping save") diff --git a/autobot-backend/api/simple_terminal_websocket.py b/autobot-backend/api/simple_terminal_websocket.py deleted file mode 100644 index e20db7a48..000000000 --- a/autobot-backend/api/simple_terminal_websocket.py +++ /dev/null @@ -1,238 +0,0 @@ -""" -Simplified Terminal WebSocket Handler for AutoBot -A working alternative to the complex PTY-based system with enhanced security -""" - -import json -import logging -from typing import Dict - -from fastapi import WebSocket, WebSocketDisconnect - -from .base_terminal import BaseTerminalWebSocket - - -def _get_workflow_manager(): - """Deferred import to avoid circular dependency (#1210).""" - try: - from services.workflow_automation.routes import get_workflow_manager - - return get_workflow_manager() - except ImportError: - return None - - -logger = logging.getLogger(__name__) - - -class SimpleTerminalSession(BaseTerminalWebSocket): - """Full-featured terminal session with PTY support for sudo commands""" - - def __init__(self, session_id: str): - super().__init__() - self.session_id = session_id - - @property - def terminal_type(self) -> str: - """Get terminal type for logging""" - return "Terminal" - - async def connect(self, websocket: WebSocket): - """Connect WebSocket to this session and start PTY shell""" - await websocket.accept() - self.websocket = websocket - self.active = True - - # Start PTY shell process - await self.start_pty_shell() - - # Send connection confirmation - await self.send_message( - { - "type": "connection", - "status": "connected", - "message": "Full terminal connected with sudo support", - "session_id": self.session_id, - "working_dir": self.current_dir, - } - ) - - logger.info(f"Full terminal session {self.session_id} connected") - - # PTY shell startup now handled by base class - - async def send_message(self, data: dict): - """Send message to WebSocket with standardized format""" - if self.websocket: - try: - # Ensure standardized format - convert legacy field names - standardized_data = data.copy() - - # Convert "data" field to "content" for consistency - if "data" in standardized_data and "content" not in standardized_data: - standardized_data["content"] = standardized_data.pop("data") - - # Add metadata if not present - if "metadata" not in standardized_data: - import time - - standardized_data["metadata"] = { - "session_id": self.session_id, - "timestamp": time.time(), - "terminal_type": "simple", - } - - await self.websocket.send_text(json.dumps(standardized_data)) - except Exception as e: - logger.error(f"Error sending message: {e}") - - # Input handling now in base class - - async def handle_workflow_control(self, data: Dict): - """Handle workflow automation control messages""" - workflow_manager = _get_workflow_manager() - if not workflow_manager: - await self.send_message( - {"type": "error", "message": "Workflow automation not available"} - ) - return - - try: - action = data.get("action", "") - workflow_id = data.get("workflow_id", "") - - logger.info(f"Handling workflow control: {action} for {workflow_id}") - - if action == "pause": - # Pause any running automation - await self.send_message( - { - "type": "workflow_paused", - "message": "🛑 Automation paused by user request", - } - ) - - elif action == "resume": - # Resume automation - await self.send_message( - {"type": "workflow_resumed", "message": "▶️ Automation resumed"} - ) - - elif action == "approve_step": - step_id = data.get("step_id", "") - await self.send_message( - { - "type": "workflow_step_approved", - "message": f"✅ Step {step_id} approved", - "step_id": step_id, - } - ) - - elif action == "cancel": - await self.send_message( - { - "type": "workflow_cancelled", - "message": "❌ Workflow cancelled by user", - } - ) - - else: - await self.send_message( - {"type": "error", "message": f"Unknown workflow action: {action}"} - ) - - except Exception as e: - logger.error(f"Workflow control error: {e}") - await self.send_message( - {"type": "error", "message": "Workflow message error"} - ) - - async def disconnect(self): - """Disconnect session and clean up PTY""" - # Use base class cleanup method - await self.cleanup() - - self.websocket = None - logger.info(f"Full terminal session {self.session_id} disconnected") - - -class SimpleTerminalHandler: - """Handler for simple terminal WebSocket connections""" - - def __init__(self): - self.sessions: Dict[str, SimpleTerminalSession] = {} - - async def handle_websocket(self, websocket: WebSocket, session_id: str): - """Handle simple terminal WebSocket connection""" - try: - # Create new session - session = SimpleTerminalSession(session_id) - self.sessions[session_id] = session - - # Connect to session - await session.connect(websocket) - - # Message processing loop - while True: - try: - data = await websocket.receive_text() - message = json.loads(data) - - message_type = message.get("type", "") - - if message_type == "input": - # Send input to PTY shell - support both legacy and new formats - text = message.get( - "content", message.get("text", message.get("data", "")) - ) - if text: - await session.send_input(text) - - elif message_type == "workflow_control": - # Handle workflow automation controls - await session.handle_workflow_control(message) - - elif message_type == "ping": - # Respond to ping for connection health - await session.send_message({"type": "pong"}) - - except WebSocketDisconnect: - break - except json.JSONDecodeError: - logger.error("Invalid JSON received") - break - except Exception as e: - logger.error(f"Error handling message: {e}") - break - - finally: - await session.disconnect() - if session_id in self.sessions: - del self.sessions[session_id] - - def get_active_sessions(self) -> list: - """Get list of active session IDs""" - return list(self.sessions.keys()) - - def get_session(self, session_id: str) -> SimpleTerminalSession: - """Get session by ID""" - return self.sessions.get(session_id) - - -# Global handler instance -simple_terminal_handler = SimpleTerminalHandler() - - -async def handle_simple_terminal_websocket(websocket: WebSocket, session_id: str): - """Handle simple terminal WebSocket connection""" - await simple_terminal_handler.handle_websocket(websocket, session_id) - - -def get_simple_terminal_sessions(): - """Get all active simple terminal sessions""" - return simple_terminal_handler.get_active_sessions() - - -def get_simple_terminal_session(session_id: str): - """Get a simple terminal session by ID""" - return simple_terminal_handler.get_session(session_id) diff --git a/autobot-backend/api/slm/deployments.py b/autobot-backend/api/slm/deployments.py new file mode 100644 index 000000000..b9681434b --- /dev/null +++ b/autobot-backend/api/slm/deployments.py @@ -0,0 +1,279 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +SLM Deployments API + +Exposes HTTP endpoints so autobot-backend callers can trigger and query Docker +container deployments through the SLM Ansible playbook runner without talking +directly to the SLM backend. + +Endpoints +--------- +POST /slm/deployments/docker Trigger a Docker deployment +POST /slm/deployments Create a generic multi-role deployment +GET /slm/deployments List active deployments +GET /slm/deployments/{id} Get a deployment by ID +POST /slm/deployments/{id}/execute Execute a queued deployment +POST /slm/deployments/{id}/cancel Cancel a deployment +POST /slm/deployments/{id}/rollback Rollback a deployment + +Related to Issue #3407. +""" + +from __future__ import annotations + +import logging +from typing import Any, Optional + +from fastapi import APIRouter, Depends, HTTPException, Query, status + +from models.infrastructure import ( + DeploymentActionResponse, + DeploymentCreateRequest, + DeploymentStrategy, + DockerDeploymentRequest, + DockerDeploymentStatus, +) +from services.slm.deployment_orchestrator import ( + DeploymentContext, + DeploymentOrchestrator, + DeploymentStatus, + SLMDeploymentOrchestrator, + get_orchestrator, +) +from services.slm_client import get_slm_client + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/slm/deployments", tags=["slm-deployments"]) + +_VALID_STRATEGIES = {s.value for s in DeploymentStrategy} + + +# --------------------------------------------------------------------------- +# Dependency +# --------------------------------------------------------------------------- + + +def _require_orchestrator() -> DeploymentOrchestrator: + """Return the global orchestrator or raise 503 if not initialised.""" + orch = get_orchestrator() + if orch is None: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Deployment orchestrator not initialised", + ) + return orch + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _context_to_dict(ctx: DeploymentContext) -> dict: + """Serialise a DeploymentContext to a response dict.""" + return { + "deployment_id": ctx.deployment_id, + "role_name": ctx.role_name, + "target_nodes": ctx.target_nodes, + "strategy": ( + ctx.strategy.value if hasattr(ctx.strategy, "value") else ctx.strategy + ), + "playbook_path": ctx.playbook_path, + "status": ctx.status.value if hasattr(ctx.status, "value") else ctx.status, + "steps": [ + { + "step_type": ( + s.step_type.value if hasattr(s.step_type, "value") else s.step_type + ), + "node_id": s.node_id, + "node_name": s.node_name, + "description": s.description, + "started_at": s.started_at.isoformat() if s.started_at else None, + "completed_at": s.completed_at.isoformat() if s.completed_at else None, + "success": s.success, + "error": s.error, + } + for s in ctx.steps + ], + "started_at": ctx.started_at.isoformat() if ctx.started_at else None, + "completed_at": ctx.completed_at.isoformat() if ctx.completed_at else None, + "error": ctx.error, + } + + +# --------------------------------------------------------------------------- +# Docker-specific route (issue requirement) +# --------------------------------------------------------------------------- + + +@router.post( + "/docker", + response_model=DockerDeploymentStatus, + status_code=status.HTTP_202_ACCEPTED, + summary="Trigger a Docker container deployment via SLM", +) +async def deploy_docker(request: DockerDeploymentRequest) -> DockerDeploymentStatus: + """ + Trigger deployment of one or more Docker containers on the target node. + + The SLM runs the configured Ansible playbook (default: + deploy-hybrid-docker.yml) and returns a deployment record. + """ + client = get_slm_client() + if client is None: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="SLM client not initialised", + ) + slm_orch = SLMDeploymentOrchestrator(client) + result = await slm_orch.deploy_docker(request) + logger.info( + "Docker deployment triggered: %s on node %s", + result.deployment_id, + result.node_id, + ) + return result + + +# --------------------------------------------------------------------------- +# Generic multi-role deployment routes +# --------------------------------------------------------------------------- + + +@router.post( + "", + status_code=status.HTTP_201_CREATED, + summary="Create a generic multi-role deployment", +) +async def create_deployment( + body: DeploymentCreateRequest, + orch: DeploymentOrchestrator = Depends(_require_orchestrator), +) -> Any: + """Create and queue a new multi-role, multi-node deployment.""" + if body.strategy.value not in _VALID_STRATEGIES: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid strategy: {body.strategy}", + ) + ctx = await orch.create_deployment( + role_name=body.role_name, + target_nodes=body.target_nodes, + strategy=body.strategy, + playbook_path=body.playbook_path, + ) + return _context_to_dict(ctx) + + +@router.get("", summary="List active deployments") +async def list_deployments( + status_filter: Optional[str] = Query(None), + orch: DeploymentOrchestrator = Depends(_require_orchestrator), +) -> Any: + """Return active deployments, optionally filtered by status string.""" + deployments = orch.active_deployments + if status_filter: + deployments = [ + d + for d in deployments + if (d.status.value if hasattr(d.status, "value") else d.status) + == status_filter + ] + return { + "deployments": [_context_to_dict(d) for d in deployments], + "total": len(deployments), + } + + +@router.get("/{deployment_id}", summary="Get a deployment by ID") +async def get_deployment( + deployment_id: str, + orch: DeploymentOrchestrator = Depends(_require_orchestrator), +) -> Any: + """Return a single deployment context by its ID.""" + ctx = orch.get_deployment(deployment_id) + if ctx is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Deployment not found", + ) + return _context_to_dict(ctx) + + +@router.post("/{deployment_id}/execute", summary="Execute a queued deployment") +async def execute_deployment( + deployment_id: str, + orch: DeploymentOrchestrator = Depends(_require_orchestrator), +) -> Any: + """Start execution of a deployment that is in QUEUED state.""" + ctx = orch.get_deployment(deployment_id) + if ctx is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Deployment not found", + ) + if ctx.status != DeploymentStatus.QUEUED: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Deployment is not queued (current status: {ctx.status})", + ) + ok = await orch.execute_deployment(deployment_id) + if not ok: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Could not execute deployment {deployment_id!r}", + ) + logger.info("Deployment execution started: %s", deployment_id) + return DeploymentActionResponse( + deployment_id=deployment_id, action="execute", success=True + ) + + +@router.post("/{deployment_id}/cancel", summary="Cancel a deployment") +async def cancel_deployment( + deployment_id: str, + orch: DeploymentOrchestrator = Depends(_require_orchestrator), +) -> Any: + """Cancel a queued or running deployment.""" + ctx = orch.get_deployment(deployment_id) + cancelled = await orch.cancel_deployment(deployment_id) + if not cancelled: + if ctx is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Deployment not found", + ) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Deployment cannot be cancelled in its current state", + ) + logger.info("Deployment cancelled: %s", deployment_id) + return DeploymentActionResponse( + deployment_id=deployment_id, action="cancel", success=True + ) + + +@router.post("/{deployment_id}/rollback", summary="Rollback a deployment") +async def rollback_deployment( + deployment_id: str, + orch: DeploymentOrchestrator = Depends(_require_orchestrator), +) -> Any: + """Trigger rollback for a deployment.""" + ctx = orch.get_deployment(deployment_id) + if ctx is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Deployment not found", + ) + success = await orch.trigger_rollback(deployment_id) + if not success: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No deployed nodes to roll back", + ) + logger.info("Rollback triggered for deployment %s", deployment_id) + return DeploymentActionResponse( + deployment_id=deployment_id, action="rollback", success=True + ) diff --git a/autobot-backend/api/slm/deployments_api_test.py b/autobot-backend/api/slm/deployments_api_test.py index fa9e06b0d..b9d932e74 100644 --- a/autobot-backend/api/slm/deployments_api_test.py +++ b/autobot-backend/api/slm/deployments_api_test.py @@ -395,3 +395,169 @@ def test_response_timestamps_formatted( assert response.status_code == 200 data = response.json() assert data["started_at"] == "2025-01-15T10:30:00" + + +# ============================================================================= +# SLMDeploymentOrchestrator integration tests (real orchestrator + test-double +# SLM HTTP client — not MagicMock wrapping the whole orchestrator) +# ============================================================================= + + +class FakeSLMClient: + """ + Test-double for the SLM HTTP client. + + Returns deterministic canned responses so tests exercise the real + SLMDeploymentOrchestrator translation logic without hitting a live SLM. + """ + + def __init__(self, deployment_id: str = "slm-deploy-001", node_id: str = "node-99"): + self._deployment_id = deployment_id + self._node_id = node_id + + async def create_deployment(self, payload: dict) -> dict: + return { + "deployment_id": self._deployment_id, + "node_id": payload.get("node_id", self._node_id), + "status": "running", + "started_at": None, + "completed_at": None, + "error": None, + } + + async def get_deployment(self, deployment_id: str) -> dict: + return { + "deployment_id": deployment_id, + "node_id": self._node_id, + "status": "completed", + "started_at": None, + "completed_at": None, + "error": None, + } + + async def list_deployments(self, node_id=None) -> dict: + return { + "deployments": [ + { + "deployment_id": self._deployment_id, + "node_id": self._node_id, + "status": "completed", + "started_at": None, + "completed_at": None, + "error": None, + } + ] + } + + +class TestSLMDeploymentOrchestratorIntegration: + """Integration tests for SLMDeploymentOrchestrator with a test-double SLM client.""" + + @pytest.fixture + def fake_client(self): + return FakeSLMClient() + + @pytest.fixture + def slm_orch(self, fake_client): + from services.slm.deployment_orchestrator import SLMDeploymentOrchestrator + + return SLMDeploymentOrchestrator(slm_client=fake_client) + + @pytest.mark.asyncio + async def test_deploy_docker_calls_slm_and_maps_response(self, slm_orch): + """deploy_docker translates the request and returns a DockerDeploymentStatus.""" + from models.infrastructure import DockerContainerSpec, DockerDeploymentRequest + + request = DockerDeploymentRequest( + node_id="node-99", + containers=[ + DockerContainerSpec( + name="my-app", + image="my-org/my-app", + tag="1.2.3", + ) + ], + ) + result = await slm_orch.deploy_docker(request) + + assert result.deployment_id == "slm-deploy-001" + assert result.node_id == "node-99" + assert result.status == "running" + + @pytest.mark.asyncio + async def test_deploy_docker_builds_extra_vars_with_ports(self, fake_client): + """build_extra_vars correctly serialises port mappings.""" + from models.infrastructure import ( + DockerContainerSpec, + DockerDeploymentRequest, + PortMapping, + ) + from services.slm.deployment_orchestrator import SLMDeploymentOrchestrator + + captured: dict = {} + + async def capturing_create(payload): + captured.update(payload) + return { + "deployment_id": "x", + "node_id": "n", + "status": "queued", + "started_at": None, + "completed_at": None, + "error": None, + } + + fake_client.create_deployment = capturing_create + orch = SLMDeploymentOrchestrator(slm_client=fake_client) + + request = DockerDeploymentRequest( + node_id="node-1", + containers=[ + DockerContainerSpec( + name="svc", + image="acme/svc", + tag="latest", + ports=[PortMapping(host_port=8080, container_port=80)], + environment={"ENV": "prod"}, + ) + ], + ) + await orch.deploy_docker(request) + + containers = captured["extra_data"]["extra_vars"]["docker_containers"] + assert len(containers) == 1 + assert containers[0]["ports"] == ["8080:80/tcp"] + assert containers[0]["environment"] == {"ENV": "prod"} + + @pytest.mark.asyncio + async def test_get_deployment_returns_status(self, slm_orch): + """get_deployment fetches and maps a deployment by ID.""" + result = await slm_orch.get_deployment("slm-deploy-001") + + assert result.deployment_id == "slm-deploy-001" + assert result.status == "completed" + + @pytest.mark.asyncio + async def test_list_deployments_returns_list(self, slm_orch): + """list_deployments returns a non-empty list from the SLM.""" + results = await slm_orch.list_deployments() + + assert len(results) == 1 + assert results[0].deployment_id == "slm-deploy-001" + + @pytest.mark.asyncio + async def test_list_deployments_node_filter_forwarded(self, fake_client): + """node_id filter is forwarded to the SLM client.""" + from services.slm.deployment_orchestrator import SLMDeploymentOrchestrator + + received_kwargs: dict = {} + + async def spy_list(node_id=None): + received_kwargs["node_id"] = node_id + return {"deployments": []} + + fake_client.list_deployments = spy_list + orch = SLMDeploymentOrchestrator(slm_client=fake_client) + await orch.list_deployments(node_id="node-42") + + assert received_kwargs["node_id"] == "node-42" diff --git a/autobot-backend/api/terminal.py b/autobot-backend/api/terminal.py index 4292ef120..009afe9de 100644 --- a/autobot-backend/api/terminal.py +++ b/autobot-backend/api/terminal.py @@ -1003,17 +1003,19 @@ async def terminal_info( "endpoints": { "sessions": "/api/terminal/sessions", "websocket_primary": "/api/terminal/ws/{session_id}", - "websocket_simple": "/api/terminal/ws/simple/{session_id}", - "websocket_secure": "/api/terminal/ws/secure/{session_id}", + # Issue #3332: /ws/simple and /ws/secure are compat aliases — use /ws/{session_id} + "websocket_simple": "/api/terminal/ws/simple/{session_id} (compat alias)", + "websocket_secure": "/api/terminal/ws/secure/{session_id} (compat alias)", # Issue #729: SSH to infrastructure hosts moved to slm-server "websocket_ssh": "/api/terminal/ws/ssh/{host_id} (deprecated - use SLM)", }, "security_levels": [level.value for level in SecurityLevel], - "consolidated_from": [ - "terminal.py", - "simple_terminal_websocket.py", - "secure_terminal_websocket.py", - "base_terminal.py", + # Issue #3332: simple_terminal_websocket.py, secure_terminal_websocket.py, + # and base_terminal.py are deprecated — logic consolidated here. + "deprecated_modules": [ + "api/simple_terminal_websocket.py", + "api/secure_terminal_websocket.py", + "api/base_terminal.py", ], # Issue #729: Layer separation notice "notice": "SSH connections to infrastructure hosts have been moved to slm-server. " diff --git a/autobot-backend/api/terminal_websocket.py b/autobot-backend/api/terminal_websocket.py index acc0e54bd..5b8a574a0 100644 --- a/autobot-backend/api/terminal_websocket.py +++ b/autobot-backend/api/terminal_websocket.py @@ -124,9 +124,7 @@ async def handle_terminal_session(self, websocket: WebSocket, chat_id: str): await self._send_error(websocket, "Invalid JSON") except Exception as e: logger.error(f"Error handling WebSocket message: {e}") - await self._send_error( - websocket, "Error processing message" - ) + await self._send_error(websocket, "Error processing message") except Exception as e: logger.error(f"WebSocket session error: {e}") diff --git a/autobot-backend/chat_workflow/graph.py b/autobot-backend/chat_workflow/graph.py index d0318d537..b8100437f 100644 --- a/autobot-backend/chat_workflow/graph.py +++ b/autobot-backend/chat_workflow/graph.py @@ -207,8 +207,39 @@ async def prepare_llm(state: ChatState, config: RunnableConfig) -> dict: } +def _inject_mid_conversation_warning(hint: str, initial_prompt: str) -> str: + """Append a corrective hint to the prompt string for mid-conversation injection. + + Issue #3260 — Anthropic provider constraint: the Anthropic API only permits + a system message at the *start* of a conversation. Any additional + ``SystemMessage`` inserted after the first human turn raises a validation + error from ``langchain_anthropic._format_messages()``. + + Rule: ALL mid-conversation corrective content (loop warnings, guardrail + feedback, RLM refinement hints, etc.) MUST be injected by appending to + ``initial_prompt`` (prompt-string injection) or wrapped in a + ``HumanMessage``. Never construct a standalone ``SystemMessage`` and + insert it after the conversation has started. + + Args: + hint: The corrective text to inject (e.g. a loop-detection warning or + a self-reflection refinement note). + initial_prompt: The current value of the initial prompt string that + will be forwarded to the LLM on the next iteration. + + Returns: + A new prompt string with ``hint`` appended in a clearly labelled block. + + Example:: + + >>> _inject_mid_conversation_warning("Avoid repeating tool calls.", "Answer the question.") + 'Answer the question.\\n\\n[Guidance: Avoid repeating tool calls.]' + """ + return f"{initial_prompt}\n\n[Guidance: {hint}]" + + def _build_llm_iteration_context(state: ChatState): - """Helper for generate_response. Ref: #1088, #1373. + """Helper for generate_response. Ref: #1088, #1373, #3260. Reconstructs an LLMIterationContext from the current graph state so that generate_response can delegate to the manager's continuation loop method. @@ -216,19 +247,21 @@ def _build_llm_iteration_context(state: ChatState): When an RLM refinement hint is present (set by reflect_on_response), it is appended to the initial prompt so the LLM focuses on the identified deficiency in the next pass. + + Note (Issue #3260): Corrective/warning content is always merged into + ``initial_prompt`` via ``_inject_mid_conversation_warning``, never via a + ``SystemMessage``. See that helper's docstring for the full rationale. """ from .models import LLMIterationContext initial_prompt = state["llm_params"].get("initial_prompt") or "" - # Inject RLM refinement hint when looping back (#1373) + # Inject RLM refinement hint when looping back (#1373). + # Must use _inject_mid_conversation_warning — not SystemMessage — to satisfy + # Anthropic's requirement that SystemMessage only appear as the first message. hint = state.get("rlm_refinement_hint", "") if hint: - initial_prompt = ( - f"{initial_prompt}\n\n" - f"[Self-reflection feedback — please improve your answer: " - f"{hint}]" - ) + initial_prompt = _inject_mid_conversation_warning(hint, initial_prompt) return LLMIterationContext( ollama_endpoint=state["llm_params"]["ollama_endpoint"], diff --git a/autobot-backend/chat_workflow/graph_inject_warning_test.py b/autobot-backend/chat_workflow/graph_inject_warning_test.py new file mode 100644 index 000000000..10d3d11a1 --- /dev/null +++ b/autobot-backend/chat_workflow/graph_inject_warning_test.py @@ -0,0 +1,240 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Unit tests for _inject_mid_conversation_warning helper (Issue #3260). + +Verifies that: +1. The helper appends the hint to the prompt string with the expected format. +2. An empty prompt is handled correctly. +3. The helper returns a plain string, NOT a SystemMessage object, so it is + safe for all LLM providers including Anthropic (which rejects SystemMessage + after the first human turn). +4. A mock Anthropic _format_messages validator shows that a HumanMessage is + accepted while a mid-conversation SystemMessage raises ValueError — + confirming why the helper's prompt-string approach is correct. + +This file is self-contained: all runtime dependencies that are absent from the +dev Python environment (langchain_core, langgraph, xxhash, redis) are stubbed +at module level before graph.py is loaded. The test therefore runs with only +Python stdlib and pytest installed. +""" + +import importlib.util +import sys +import types +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +# --------------------------------------------------------------------------- +# Minimal message stubs — replicate only what the tests rely on. +# langchain_core may not be installed in the dev environment. +# --------------------------------------------------------------------------- + + +class _BaseMessage: + """Minimal LangChain BaseMessage stub.""" + + def __init__(self, content: str) -> None: + self.content = content + + +class _HumanMessage(_BaseMessage): + """Stub for langchain_core.messages.HumanMessage.""" + + +class _SystemMessage(_BaseMessage): + """Stub for langchain_core.messages.SystemMessage.""" + + +# --------------------------------------------------------------------------- +# Stub all missing runtime packages so graph.py can be loaded in isolation. +# --------------------------------------------------------------------------- + +_LANGCHAIN_CORE_MESSAGES = types.ModuleType("langchain_core.messages") +_LANGCHAIN_CORE_MESSAGES.HumanMessage = _HumanMessage # type: ignore[attr-defined] +_LANGCHAIN_CORE_MESSAGES.SystemMessage = _SystemMessage # type: ignore[attr-defined] +_LANGCHAIN_CORE_MESSAGES.AIMessage = MagicMock # type: ignore[attr-defined] +_LANGCHAIN_CORE_MESSAGES.BaseMessage = _BaseMessage # type: ignore[attr-defined] + +_LANGCHAIN_CORE = types.ModuleType("langchain_core") +_LANGCHAIN_CORE.messages = _LANGCHAIN_CORE_MESSAGES # type: ignore[attr-defined] + +_LANGCHAIN_CORE_RUNNABLES = types.ModuleType("langchain_core.runnables") +_LANGCHAIN_CORE_RUNNABLES.RunnableConfig = MagicMock # type: ignore[attr-defined] + +_STUBS: dict = { + "langchain_core": _LANGCHAIN_CORE, + "langchain_core.messages": _LANGCHAIN_CORE_MESSAGES, + "langchain_core.runnables": _LANGCHAIN_CORE_RUNNABLES, + "xxhash": types.ModuleType("xxhash"), + "redis": types.ModuleType("redis"), + "redis.asyncio": types.ModuleType("redis.asyncio"), + "langgraph": types.ModuleType("langgraph"), + "langgraph.checkpoint": types.ModuleType("langgraph.checkpoint"), + "langgraph.checkpoint.redis": types.ModuleType("langgraph.checkpoint.redis"), + "langgraph.checkpoint.redis.aio": types.ModuleType( + "langgraph.checkpoint.redis.aio" + ), + "langgraph.graph": types.ModuleType("langgraph.graph"), + "langgraph.types": types.ModuleType("langgraph.types"), + "typing_extensions": types.ModuleType("typing_extensions"), +} + +for _mod_name, _stub in _STUBS.items(): + if _mod_name not in sys.modules: + sys.modules[_mod_name] = _stub + +# Provide symbols that graph.py imports at module level. +for _attr in ("END", "START", "StateGraph"): + if not hasattr(sys.modules["langgraph.graph"], _attr): + setattr(sys.modules["langgraph.graph"], _attr, MagicMock()) +if not hasattr(sys.modules["langgraph.types"], "interrupt"): + sys.modules["langgraph.types"].interrupt = MagicMock() # type: ignore[attr-defined] +if not hasattr(sys.modules["langgraph.checkpoint.redis.aio"], "AsyncRedisSaver"): + sys.modules["langgraph.checkpoint.redis.aio"].AsyncRedisSaver = MagicMock() # type: ignore[attr-defined] + +# typing_extensions.TypedDict — graph.py uses it for ChatState. +import typing + +if not hasattr(sys.modules["typing_extensions"], "TypedDict"): + sys.modules["typing_extensions"].TypedDict = typing.TypedDict # type: ignore[attr-defined] + +# --------------------------------------------------------------------------- +# Load graph.py as an isolated module (bypassing chat_workflow/__init__.py). +# This avoids the full manager/dependency_container/llm_interface_pkg chain. +# --------------------------------------------------------------------------- + +_GRAPH_PATH = Path(__file__).parent / "graph.py" +_spec = importlib.util.spec_from_file_location("_graph_isolated", _GRAPH_PATH) +assert _spec is not None and _spec.loader is not None +_graph_module = importlib.util.module_from_spec(_spec) +_spec.loader.exec_module(_graph_module) # type: ignore[union-attr] + +_inject_mid_conversation_warning = _graph_module._inject_mid_conversation_warning + +# Use our stub message classes for the Anthropic constraint tests. +HumanMessage = _HumanMessage +SystemMessage = _SystemMessage + + +class TestInjectMidConversationWarning: + """Tests for _inject_mid_conversation_warning.""" + + def test_appends_hint_to_prompt(self): + """Helper appends the hint in a [Guidance: ...] block.""" + result = _inject_mid_conversation_warning( + "Avoid repeating tool calls.", "Answer the question." + ) + assert ( + result == "Answer the question.\n\n[Guidance: Avoid repeating tool calls.]" + ) + + def test_empty_prompt(self): + """Helper works when initial_prompt is empty.""" + result = _inject_mid_conversation_warning("Warning text.", "") + assert result == "\n\n[Guidance: Warning text.]" + + def test_returns_string_not_message_object(self): + """Result must be a plain string, not a LangChain message object. + + The Anthropic API rejects SystemMessage objects that appear after the + first human turn. Returning a plain string ensures the content is + folded into the prompt and forwarded as part of the HumanMessage on + the next iteration — never as a standalone SystemMessage. + """ + hint = "Loop detected: change your approach." + prompt = "Tell me about Python." + result = _inject_mid_conversation_warning(hint, prompt) + assert isinstance(result, str), ( + "_inject_mid_conversation_warning must return a str, not a " + "LangChain message object (see Issue #3260)" + ) + assert "[Guidance:" in result + + def test_multi_line_prompt_preserved(self): + """Multi-line prompts are not truncated.""" + prompt = "Line one.\nLine two.\nLine three." + result = _inject_mid_conversation_warning("Be concise.", prompt) + assert result.startswith(prompt) + assert result.endswith("[Guidance: Be concise.]") + + def test_hint_label_is_guidance(self): + """The injected label is exactly '[Guidance: ...]' for consistency.""" + result = _inject_mid_conversation_warning("some hint", "base") + assert "[Guidance: some hint]" in result + + +class TestAnthropicSystemMessageConstraint: + """Demonstrate the Anthropic mid-conversation SystemMessage restriction. + + These tests use a mock that simulates langchain_anthropic's validation + behaviour: a SystemMessage after the first human turn is rejected, while a + HumanMessage (or prompt-string injection) is accepted. This documents + *why* _inject_mid_conversation_warning exists and what it prevents. + """ + + def _mock_format_messages(self, messages): + """Simulate the Anthropic _format_messages validation rule. + + Anthropic only allows a SystemMessage as the *first* message. Any + SystemMessage appearing after a HumanMessage raises ValueError, mirroring + ``langchain_anthropic.chat_models.ChatAnthropic._format_messages()``. + """ + seen_human = False + for msg in messages: + if isinstance(msg, HumanMessage): + seen_human = True + if isinstance(msg, SystemMessage) and seen_human: + raise ValueError( + "Anthropic does not support system messages after the first " + "human turn. Use a HumanMessage or append to the prompt string." + ) + return messages + + def test_mid_conversation_system_message_rejected(self): + """A SystemMessage after a HumanMessage raises ValueError (Anthropic rule).""" + messages = [ + SystemMessage(content="You are a helpful assistant."), + HumanMessage(content="Hello"), + SystemMessage( + content="[Warning: loop detected]" + ), # mid-conversation — WRONG + ] + with pytest.raises( + ValueError, match="Anthropic does not support system messages" + ): + self._mock_format_messages(messages) + + def test_human_message_injection_accepted(self): + """A HumanMessage injected mid-conversation is accepted.""" + messages = [ + SystemMessage(content="You are a helpful assistant."), + HumanMessage(content="Hello"), + HumanMessage(content="[System Notice]: Loop detected — change approach."), + ] + # Must not raise + result = self._mock_format_messages(messages) + assert len(result) == 3 + + def test_prompt_string_injection_accepted(self): + """Prompt-string injection (_inject_mid_conversation_warning approach) is accepted. + + The helper merges the warning into the prompt text, so the next LLM + call carries the guidance inside a HumanMessage rather than adding a + separate SystemMessage. + """ + base_prompt = "Tell me about Python." + warning = "Loop detected: change your approach." + enriched_prompt = _inject_mid_conversation_warning(warning, base_prompt) + + messages = [ + SystemMessage(content="You are a helpful assistant."), + HumanMessage(content=enriched_prompt), # guidance is part of the human turn + ] + # Must not raise + result = self._mock_format_messages(messages) + assert len(result) == 2 + assert "[Guidance:" in result[1].content diff --git a/autobot-backend/chat_workflow/llm_handler.py b/autobot-backend/chat_workflow/llm_handler.py index 0739e2a7f..7287029a8 100644 --- a/autobot-backend/chat_workflow/llm_handler.py +++ b/autobot-backend/chat_workflow/llm_handler.py @@ -17,6 +17,9 @@ from autobot_shared.http_client import get_http_client from constants.model_constants import ModelConstants from dependencies import global_config_manager +from extensions.base import HookContext +from extensions.hooks import HookPoint +from extensions.manager import get_extension_manager from prompt_manager import get_language_instruction, get_prompt, resolve_language from .models import WorkflowSession @@ -27,6 +30,73 @@ _VALID_URL_SCHEMES = ("http://", "https://") +async def _emit_system_prompt_ready(system_prompt: str, session: Any) -> str: + """Emit ON_SYSTEM_PROMPT_READY to registered extensions and return result. + + Issue #3405: Fires after _get_system_prompt() so extensions can inspect or + rewrite the system prompt before it enters prompt assembly. If no extension + is registered for this hook the function is a no-op and the original prompt + is returned unchanged. + + Args: + system_prompt: The assembled system prompt string. + session: WorkflowSession instance (passed as data["session"]). + + Returns: + Possibly modified system prompt string. + """ + ctx = HookContext( + session_id=getattr(session, "session_id", ""), + data={"system_prompt": system_prompt, "session": session}, + ) + result = await get_extension_manager().invoke_with_transform( + HookPoint.SYSTEM_PROMPT_READY, ctx, "system_prompt" + ) + if isinstance(result, str) and result != system_prompt: + logger.debug( + "[#3405] SYSTEM_PROMPT_READY modified system prompt (%d -> %d chars)", + len(system_prompt), + len(result), + ) + return result + return system_prompt + + +async def _emit_full_prompt_ready( + prompt: str, llm_params: Dict[str, Any], context: Dict[str, Any] +) -> str: + """Emit ON_FULL_PROMPT_READY to registered extensions and return result. + + Issue #3405: Fires after _build_full_prompt() so extensions can append + dynamic content (e.g. infrastructure telemetry hints) before the prompt + is sent to the LLM. If no extension is registered for this hook the + function is a no-op and the original prompt is returned unchanged. + + Args: + prompt: The fully assembled prompt string. + llm_params: Dict containing model/endpoint selection. + context: Arbitrary request-level context dict. + + Returns: + Possibly modified full prompt string. + """ + ctx = HookContext( + session_id=context.get("session_id", ""), + data={"prompt": prompt, "llm_params": llm_params, "context": context}, + ) + result = await get_extension_manager().invoke_with_transform( + HookPoint.FULL_PROMPT_READY, ctx, "prompt" + ) + if isinstance(result, str) and result != prompt: + logger.debug( + "[#3405] FULL_PROMPT_READY modified full prompt (%d -> %d chars)", + len(prompt), + len(result), + ) + return result + return prompt + + class LLMHandlerMixin: """Mixin for LLM interaction handling.""" @@ -310,6 +380,7 @@ async def _prepare_llm_request_params( else: ollama_endpoint = self._get_ollama_endpoint_for_model(selected_model) system_prompt = self._get_system_prompt(language=language) + system_prompt = await _emit_system_prompt_ready(system_prompt, session) conversation_context = self._build_conversation_context(session) # Knowledge retrieval for RAG @@ -324,6 +395,11 @@ async def _prepare_llm_request_params( full_prompt = self._build_full_prompt( system_prompt, knowledge_context, conversation_context, message ) + full_prompt = await _emit_full_prompt_ready( + full_prompt, + {"endpoint": ollama_endpoint, "model": selected_model}, + {"session_id": session.session_id, "message": message}, + ) logger.info( "[ChatWorkflowManager] Making Ollama request to: %s", ollama_endpoint diff --git a/autobot-backend/chat_workflow/prompt_hooks_test.py b/autobot-backend/chat_workflow/prompt_hooks_test.py new file mode 100644 index 000000000..f745dd2c2 --- /dev/null +++ b/autobot-backend/chat_workflow/prompt_hooks_test.py @@ -0,0 +1,195 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Unit tests for Issue #3405 — ON_SYSTEM_PROMPT_READY and ON_FULL_PROMPT_READY +plugin hooks in the chat pipeline. + +Tests verify: +1. New HookPoint enum members exist +2. ON_SYSTEM_PROMPT_READY fires with correct args and return value replaces prompt +3. ON_FULL_PROMPT_READY fires with correct args and return value replaces prompt +4. No-op when no extensions are registered for a hook +5. Extension errors do not crash the pipeline +""" + +from typing import Optional +from unittest.mock import AsyncMock, patch + +import pytest + +from extensions.base import Extension, HookContext +from extensions.hooks import HookPoint +from extensions.manager import ExtensionManager, reset_extension_manager +from chat_workflow.llm_handler import _emit_system_prompt_ready, _emit_full_prompt_ready + + +class _SystemPromptWatcher(Extension): + """Extension that records args and returns a modified system prompt.""" + + name = "test_system_prompt_watcher" + + def __init__(self, return_value: Optional[str] = None) -> None: + self._return_value = return_value + self.captured_system_prompt: Optional[str] = None + + async def on_system_prompt_ready(self, ctx: HookContext) -> Optional[str]: + self.captured_system_prompt = ctx.get("system_prompt") + return self._return_value + + +class _FullPromptWatcher(Extension): + """Extension that records args and returns a modified full prompt.""" + + name = "test_full_prompt_watcher" + + def __init__(self, return_value: Optional[str] = None) -> None: + self._return_value = return_value + self.captured_prompt: Optional[str] = None + self.captured_llm_params: Optional[dict] = None + self.captured_context: Optional[dict] = None + + async def on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]: + self.captured_prompt = ctx.get("prompt") + self.captured_llm_params = ctx.get("llm_params") + self.captured_context = ctx.get("context") + return self._return_value + + +class _ErrorExtension(Extension): + """Extension that always raises an exception.""" + + name = "test_error_extension" + + async def on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]: + raise RuntimeError("simulated extension failure") + + +class _FakeSession: + session_id = "sess-test-001" + metadata: dict = {} + + +@pytest.fixture(autouse=True) +def reset_manager(): + """Ensure global ExtensionManager is reset between tests.""" + reset_extension_manager() + yield + reset_extension_manager() + + +class TestNewHookPoints: + """Verify the new HookPoint members are present.""" + + def test_on_system_prompt_ready_exists(self): + assert HookPoint.SYSTEM_PROMPT_READY is not None + + def test_on_full_prompt_ready_exists(self): + assert HookPoint.FULL_PROMPT_READY is not None + + def test_total_hook_count_increased(self): + # Original 22 hooks + 2 new ones = 24 + assert len(HookPoint) == 24 + + +class TestEmitSystemPromptReady: + """Tests for _emit_system_prompt_ready helper.""" + + @pytest.mark.asyncio + async def test_noop_when_no_extension_registered(self): + """Returns original prompt unchanged when no extension is registered.""" + original = "You are AutoBot." + result = await _emit_system_prompt_ready(original, _FakeSession()) + assert result == original + + @pytest.mark.asyncio + async def test_extension_receives_correct_args(self): + """Extension receives system_prompt and session via HookContext.""" + watcher = _SystemPromptWatcher(return_value=None) + from extensions.manager import get_extension_manager + + get_extension_manager().register(watcher) + + original = "You are AutoBot." + session = _FakeSession() + await _emit_system_prompt_ready(original, session) + + assert watcher.captured_system_prompt == original + + @pytest.mark.asyncio + async def test_return_value_replaces_prompt(self): + """A non-None str returned by extension replaces the system prompt.""" + modified = "You are AutoBot [modified by extension]." + watcher = _SystemPromptWatcher(return_value=modified) + from extensions.manager import get_extension_manager + + get_extension_manager().register(watcher) + + result = await _emit_system_prompt_ready("You are AutoBot.", _FakeSession()) + assert result == modified + + @pytest.mark.asyncio + async def test_none_return_keeps_original(self): + """Returning None from extension keeps the original prompt.""" + watcher = _SystemPromptWatcher(return_value=None) + from extensions.manager import get_extension_manager + + get_extension_manager().register(watcher) + + original = "You are AutoBot." + result = await _emit_system_prompt_ready(original, _FakeSession()) + assert result == original + + +class TestEmitFullPromptReady: + """Tests for _emit_full_prompt_ready helper.""" + + @pytest.mark.asyncio + async def test_noop_when_no_extension_registered(self): + """Returns original prompt unchanged when no extension is registered.""" + original = "System prompt\n\nUser: hello\n\nAssistant:" + result = await _emit_full_prompt_ready(original, {}, {}) + assert result == original + + @pytest.mark.asyncio + async def test_extension_receives_correct_args(self): + """Extension receives prompt, llm_params and context via HookContext.""" + watcher = _FullPromptWatcher(return_value=None) + from extensions.manager import get_extension_manager + + get_extension_manager().register(watcher) + + original = "System prompt\n\nUser: hello\n\nAssistant:" + llm_params = {"model": "llama3", "endpoint": "http://localhost:11434/api/generate"} + context = {"session_id": "sess-abc", "message": "hello"} + + await _emit_full_prompt_ready(original, llm_params, context) + + assert watcher.captured_prompt == original + assert watcher.captured_llm_params == llm_params + assert watcher.captured_context == context + + @pytest.mark.asyncio + async def test_return_value_replaces_prompt(self): + """A non-None str returned by extension replaces the full prompt.""" + modified = "System prompt\n\nUser: hello\n\nAssistant:\n\n[hint: be concise]" + watcher = _FullPromptWatcher(return_value=modified) + from extensions.manager import get_extension_manager + + get_extension_manager().register(watcher) + + result = await _emit_full_prompt_ready( + "System prompt\n\nUser: hello\n\nAssistant:", {}, {} + ) + assert result == modified + + @pytest.mark.asyncio + async def test_extension_error_does_not_crash_pipeline(self): + """An exception inside an extension is swallowed; original prompt is returned.""" + from extensions.manager import get_extension_manager + + get_extension_manager().register(_ErrorExtension()) + + original = "System prompt\n\nUser: hello\n\nAssistant:" + result = await _emit_full_prompt_ready(original, {}, {}) + assert result == original diff --git a/autobot-backend/chat_workflow/tool_handler.py b/autobot-backend/chat_workflow/tool_handler.py index 260d041d3..59cd1f640 100644 --- a/autobot-backend/chat_workflow/tool_handler.py +++ b/autobot-backend/chat_workflow/tool_handler.py @@ -1318,7 +1318,11 @@ async def _handle_browser_tool( except Exception as e: logger.error("[Issue #1368] Browser tool '%s' failed: %s", tool_name, e) execution_results.append( - {"tool": tool_name, "status": "error", "error": "Browser tool execution failed"} + { + "tool": tool_name, + "status": "error", + "error": "Browser tool execution failed", + } ) yield WorkflowMessage( type="error", diff --git a/autobot-backend/code_analysis/auto-tools/logging_standardizer.py b/autobot-backend/code_analysis/auto-tools/logging_standardizer.py index 9cbd92d0e..67d67cd27 100644 --- a/autobot-backend/code_analysis/auto-tools/logging_standardizer.py +++ b/autobot-backend/code_analysis/auto-tools/logging_standardizer.py @@ -438,7 +438,10 @@ def process_file(self, file_path: Path) -> bool: except Exception as e: self.report["errors"].append( - {"file": str(file_path.relative_to(self.project_root)), "error": "File standardization failed"} + { + "file": str(file_path.relative_to(self.project_root)), + "error": "File standardization failed", + } ) return False diff --git a/autobot-backend/code_analysis/auto-tools/performance_optimizer.py b/autobot-backend/code_analysis/auto-tools/performance_optimizer.py index 93949ef8d..61cd159cd 100644 --- a/autobot-backend/code_analysis/auto-tools/performance_optimizer.py +++ b/autobot-backend/code_analysis/auto-tools/performance_optimizer.py @@ -339,7 +339,10 @@ def process_file(self, file_path: Path) -> bool: except Exception as e: self.report["errors"].append( - {"file": str(file_path.relative_to(self.project_root)), "error": "File optimization failed"} + { + "file": str(file_path.relative_to(self.project_root)), + "error": "File optimization failed", + } ) return False diff --git a/autobot-backend/code_analysis/auto-tools/playwright_sanitizer.py b/autobot-backend/code_analysis/auto-tools/playwright_sanitizer.py index ca16a56a4..95baa38f3 100644 --- a/autobot-backend/code_analysis/auto-tools/playwright_sanitizer.py +++ b/autobot-backend/code_analysis/auto-tools/playwright_sanitizer.py @@ -373,7 +373,11 @@ def fix_playwright_report(self, file_path: str) -> Dict[str, Any]: except Exception as e: logger.error("Error processing %s: %s", file_path, e) - return {"file": file_path, "status": "error", "error": "File processing failed"} + return { + "file": file_path, + "status": "error", + "error": "File processing failed", + } def _compute_report_stats(self, results: List[Dict[str, Any]]) -> Dict[str, int]: """ diff --git a/autobot-backend/code_analysis/auto-tools/results/vue_improvement_report.md b/autobot-backend/code_analysis/auto-tools/results/vue_improvement_report.md index 7dfbf872f..0a64af085 100644 --- a/autobot-backend/code_analysis/auto-tools/results/vue_improvement_report.md +++ b/autobot-backend/code_analysis/auto-tools/results/vue_improvement_report.md @@ -29,27 +29,27 @@ Generated on: 2025-08-12 ### v-for Key Improvements (6 fixes) -**/home/kali/Desktop/AutoBot/autobot-vue/src/components/ChatInterface.vue**: +**autobot-vue/src/components/ChatInterface.vue**: - Line 125: Changed `:key="index"` to `:key="message.id || message.timestamp || `msg-${index}`"` - Line 137: Changed `:key="index"` to `:key="file.name || file.id || `file-${index}`"` -**/home/kali/Desktop/AutoBot/autobot-vue/src/components/HistoryView.vue**: +**autobot-vue/src/components/HistoryView.vue**: - Line 11: Changed `:key="index"` to `:key="entry.id || `history-${entry.date}`"` -**/home/kali/Desktop/AutoBot/autobot-vue/src/components/KnowledgeManager.vue**: +**autobot-vue/src/components/KnowledgeManager.vue**: - Line 32: Changed `:key="index"` to `:key="result.id || `result-${index}`"` - Line 457: Changed `:key="index"` to `:key="link.url || link.href || `link-${index}`"` -**/home/kali/Desktop/AutoBot/autobot-vue/src/components/FileBrowser.vue**: +**autobot-vue/src/components/FileBrowser.vue**: - Line 70: Changed `:key="index"` to `:key="file.name || file.id || `file-${index}`"` ### Event Listener Cleanup (0 fixes) ## Files Modified -- /home/kali/Desktop/AutoBot/autobot-vue/src/components/HistoryView.vue -- /home/kali/Desktop/AutoBot/autobot-vue/src/components/FileBrowser.vue -- /home/kali/Desktop/AutoBot/autobot-vue/src/components/KnowledgeManager.vue -- /home/kali/Desktop/AutoBot/autobot-vue/src/components/ChatInterface.vue +- autobot-vue/src/components/HistoryView.vue +- autobot-vue/src/components/FileBrowser.vue +- autobot-vue/src/components/KnowledgeManager.vue +- autobot-vue/src/components/ChatInterface.vue ## Recommendations for Further Improvement diff --git a/autobot-backend/code_analysis/auto-tools/results/vue_quality_report.md b/autobot-backend/code_analysis/auto-tools/results/vue_quality_report.md index b84d07b9a..7987b1cae 100644 --- a/autobot-backend/code_analysis/auto-tools/results/vue_quality_report.md +++ b/autobot-backend/code_analysis/auto-tools/results/vue_quality_report.md @@ -24,7 +24,7 @@ The Vue-specific fix agent has successfully analyzed and fixed critical Vue.js i **Files Fixed**: -#### `/home/kali/Desktop/AutoBot/autobot-vue/src/components/ChatInterface.vue` +#### `autobot-vue/src/components/ChatInterface.vue` ```diff -
Dict[str, Any]: except Exception as e: logger.error("Error processing file %sfile_path : %se ") - return {"file": file_path, "status": "error", "error": "File processing failed"} + return { + "file": file_path, + "status": "error", + "error": "File processing failed", + } def scan_directory(self, directory: str) -> List[str]: """Scan directory for HTML files to fix.""" diff --git a/autobot-backend/code_analysis/auto-tools/security_sanitizer.py b/autobot-backend/code_analysis/auto-tools/security_sanitizer.py index ffe4f7bbb..7eadeba61 100644 --- a/autobot-backend/code_analysis/auto-tools/security_sanitizer.py +++ b/autobot-backend/code_analysis/auto-tools/security_sanitizer.py @@ -373,7 +373,11 @@ def fix_file(self, file_path: str) -> Dict[str, Any]: except Exception as e: logger.error("Error processing file %sfile_path : %se ") - return {"file": file_path, "status": "error", "error": "File processing failed"} + return { + "file": file_path, + "status": "error", + "error": "File processing failed", + } def scan_directory(self, directory: str) -> List[str]: """Scan directory for HTML files to fix.""" diff --git a/autobot-backend/code_intelligence/pattern_analysis/analyzer.py b/autobot-backend/code_intelligence/pattern_analysis/analyzer.py index 71e5dead4..d6c4f2dcb 100644 --- a/autobot-backend/code_intelligence/pattern_analysis/analyzer.py +++ b/autobot-backend/code_intelligence/pattern_analysis/analyzer.py @@ -594,7 +594,11 @@ async def _run_clone_detection(self, directory: str) -> Dict[str, Any]: except Exception as e: logger.error("Clone detection failed: %s", e) - return {"type": "clone_detection", "patterns": [], "error": "Clone detection failed"} + return { + "type": "clone_detection", + "patterns": [], + "error": "Clone detection failed", + } async def _run_regex_detection(self, directory: str) -> Dict[str, Any]: """Run regex optimization detection. @@ -620,7 +624,11 @@ async def _run_regex_detection(self, directory: str) -> Dict[str, Any]: except Exception as e: logger.error("Regex detection failed: %s", e) - return {"type": "regex_detection", "patterns": [], "error": "Regex detection failed"} + return { + "type": "regex_detection", + "patterns": [], + "error": "Regex detection failed", + } async def _run_complexity_analysis(self, directory: str) -> Dict[str, Any]: """Run complexity analysis. diff --git a/autobot-backend/conversation.py b/autobot-backend/conversation.py index 94be10bf5..bd9869872 100644 --- a/autobot-backend/conversation.py +++ b/autobot-backend/conversation.py @@ -744,7 +744,9 @@ async def _conduct_research(self, user_message: str) -> Dict[str, Any]: except Exception as e: logger.error("Research failed: %s", e) - self._add_system_message("External research failed", "debug", {"error": True}) + self._add_system_message( + "External research failed", "debug", {"error": True} + ) return {"success": False, "error": "External research failed"} def _generate_search_queries(self, user_message: str) -> List[str]: diff --git a/autobot-backend/dependency_container.py b/autobot-backend/dependency_container.py index 1797ef0e2..14fa6b94c 100644 --- a/autobot-backend/dependency_container.py +++ b/autobot-backend/dependency_container.py @@ -339,7 +339,10 @@ async def health_check_all_services(self) -> Dict[str, Dict[str, Any]]: } except Exception as e: logger.error("Health check failed for service %s: %s", service_name, e) - results[service_name] = {"status": "unhealthy", "error": "Health check failed"} + results[service_name] = { + "status": "unhealthy", + "error": "Health check failed", + } return results diff --git a/autobot-backend/diagnostics.py b/autobot-backend/diagnostics.py index bbf02e439..9d815f77c 100644 --- a/autobot-backend/diagnostics.py +++ b/autobot-backend/diagnostics.py @@ -96,7 +96,10 @@ def _get_system_info(self) -> Dict[str, Any]: } except Exception as e: logger.error("Error gathering system info: %s", e) - return {"error": "Failed to gather system info", "timestamp": datetime.now().isoformat()} + return { + "error": "Failed to gather system info", + "timestamp": datetime.now().isoformat(), + } def _get_gpu_info(self) -> Dict[str, Any]: """Get GPU information for performance monitoring""" @@ -450,7 +453,12 @@ def _generate_performance_recommendations(self) -> List[Dict[str, str]]: except Exception as e: logger.error("Error generating recommendations: %s", e) - return [{"category": "error", "recommendation": "Error generating recommendations"}] + return [ + { + "category": "error", + "recommendation": "Error generating recommendations", + } + ] def cleanup_and_optimize_memory(self): """Force memory cleanup and optimization""" diff --git a/autobot-backend/elevation_wrapper.py b/autobot-backend/elevation_wrapper.py index 8694edcb4..a87781a5b 100644 --- a/autobot-backend/elevation_wrapper.py +++ b/autobot-backend/elevation_wrapper.py @@ -166,7 +166,11 @@ async def _execute_normal(self, command: str) -> Dict: except Exception as e: logger.error("Normal execution failed: %s", e) - return {"success": False, "error": "Command execution failed", "return_code": -1} + return { + "success": False, + "error": "Command execution failed", + "return_code": -1, + } async def _execute_elevated(self, command: str, session_token: str) -> Dict: """Execute command with elevation using session token""" @@ -186,7 +190,11 @@ async def _execute_elevated(self, command: str, session_token: str) -> Dict: except Exception as e: logger.error("Elevated execution failed: %s", e) - return {"success": False, "error": "Elevated command execution failed", "return_code": -1} + return { + "success": False, + "error": "Elevated command execution failed", + "return_code": -1, + } def clear_session(self): """Clear the current elevation session""" diff --git a/autobot-backend/extensions/base.py b/autobot-backend/extensions/base.py index d363a8c06..639a09409 100644 --- a/autobot-backend/extensions/base.py +++ b/autobot-backend/extensions/base.py @@ -488,6 +488,40 @@ async def on_approval_received(self, ctx: HookContext) -> Optional[None]: None (logging only) """ + # ========== Prompt Pipeline Hooks (Issue #3405) ========== + + async def on_system_prompt_ready(self, ctx: HookContext) -> Optional[str]: + """ + Called after the system prompt is built. + + Receives the assembled system prompt in ctx.data["system_prompt"] and + ctx.data["session"] for session metadata. Return a non-None str to + replace the system prompt; return None to leave it unchanged. + + Args: + ctx: Hook context with data["system_prompt"] and data["session"] + + Returns: + Modified system prompt str or None to keep unchanged + """ + + async def on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]: + """ + Called after the full prompt (system + knowledge + conversation) is built. + + Receives the full prompt in ctx.data["prompt"], LLM parameters in + ctx.data["llm_params"], and request context in ctx.data["context"]. + Return a non-None str to replace the full prompt; return None to keep it + unchanged. + + Args: + ctx: Hook context with data["prompt"], data["llm_params"], + data["context"] + + Returns: + Modified full prompt str or None to keep unchanged + """ + # ========== Utility Methods ========== def __repr__(self) -> str: diff --git a/autobot-backend/extensions/extension_hooks_test.py b/autobot-backend/extensions/extension_hooks_test.py index 70d40528c..40c1159ce 100644 --- a/autobot-backend/extensions/extension_hooks_test.py +++ b/autobot-backend/extensions/extension_hooks_test.py @@ -30,8 +30,8 @@ class TestHookPoint: """Test HookPoint enum definitions.""" def test_hook_count(self): - """Should have exactly 22 hook points.""" - assert len(HookPoint) == 22 + """Should have exactly 24 hook points (22 original + 2 added in #3405).""" + assert len(HookPoint) == 24 def test_message_preparation_hooks(self): """Should have message preparation hooks.""" @@ -82,6 +82,11 @@ def test_approval_hooks(self): assert HookPoint.APPROVAL_REQUIRED is not None assert HookPoint.APPROVAL_RECEIVED is not None + def test_prompt_pipeline_hooks(self): + """Should have prompt pipeline hooks added in Issue #3405.""" + assert HookPoint.SYSTEM_PROMPT_READY is not None + assert HookPoint.FULL_PROMPT_READY is not None + def test_hook_metadata_exists(self): """Every hook should have metadata.""" for hook in HookPoint: diff --git a/autobot-backend/extensions/hooks.py b/autobot-backend/extensions/hooks.py index 348095d1c..696a15f9d 100644 --- a/autobot-backend/extensions/hooks.py +++ b/autobot-backend/extensions/hooks.py @@ -76,6 +76,10 @@ class HookPoint(Enum): APPROVAL_REQUIRED = auto() # Method: on_approval_required APPROVAL_RECEIVED = auto() # Method: on_approval_received + # Prompt pipeline — Issue #3405 + SYSTEM_PROMPT_READY = auto() # Method: on_system_prompt_ready + FULL_PROMPT_READY = auto() # Method: on_full_prompt_ready + # Hook metadata for documentation and validation HOOK_METADATA = { @@ -189,6 +193,16 @@ class HookPoint(Enum): "can_modify": [], "return_type": "None (logging only)", }, + HookPoint.SYSTEM_PROMPT_READY: { + "description": "Called after the system prompt is built; return a str to replace it", + "can_modify": ["system_prompt"], + "return_type": "Modified system prompt str or None", + }, + HookPoint.FULL_PROMPT_READY: { + "description": "Called after the full prompt is assembled; return a str to replace it", + "can_modify": ["prompt"], + "return_type": "Modified full prompt str or None", + }, } diff --git a/autobot-backend/initialization/lifespan.py b/autobot-backend/initialization/lifespan.py index 858feaa6f..a3e547555 100644 --- a/autobot-backend/initialization/lifespan.py +++ b/autobot-backend/initialization/lifespan.py @@ -645,13 +645,14 @@ async def _launch_workflow(workflow_id: str, payload: dict) -> None: """Launcher callback invoked by TriggerService when a trigger fires.""" from services.workflow_automation import get_workflow_manager - logger.info("Trigger fired for workflow %s with payload keys=%s", - workflow_id, list(payload.keys()) if payload else []) + logger.info( + "Trigger fired for workflow %s with payload keys=%s", + workflow_id, + list(payload.keys()) if payload else [], + ) mgr = get_workflow_manager() if mgr: - await mgr.start_workflow_execution( - workflow_id, trigger_payload=payload - ) + await mgr.start_workflow_execution(workflow_id, trigger_payload=payload) await trigger_service.start(launcher=_launch_workflow) app.state.trigger_service = trigger_service @@ -801,6 +802,11 @@ async def _init_slm_client(): await init_slm_client(slm_url, slm_token) logger.info("✅ [ 89%] SLM Client: Connected to SLM server at %s", slm_url) + from services.slm.deployment_orchestrator import init_orchestrator + from services.slm_client import get_slm_client as _get_slm_client + + init_orchestrator(_get_slm_client()) + logger.info("✅ [ 89%] SLM Client: DeploymentOrchestrator initialised") except Exception as slm_error: logger.warning( "SLM client initialization failed (continuing without): %s", slm_error diff --git a/autobot-backend/initialization/router_registry/analytics_routers.py b/autobot-backend/initialization/router_registry/analytics_routers.py index 0b3906877..185aa7ead 100644 --- a/autobot-backend/initialization/router_registry/analytics_routers.py +++ b/autobot-backend/initialization/router_registry/analytics_routers.py @@ -30,9 +30,10 @@ "codebase_analytics", ), # Issue #708: renamed from analytics_unified + # Issue #3355: moved prefix from APIRouter() into registry (was "") ( "api.analytics_reporting", - "", + "/unified", ["analytics-reporting", "analytics"], "analytics_reporting", ), @@ -145,9 +146,10 @@ "analytics_continuous_learning", ), # Advanced analytics - Issue #59, Issue #708: renamed from analytics_advanced + # Issue #3355: moved prefix from APIRouter() into registry (was "") ( "api.analytics_maintenance", - "", + "/advanced", ["analytics-maintenance", "analytics", "bi"], "analytics_maintenance", ), diff --git a/autobot-backend/initialization/router_registry/feature_routers.py b/autobot-backend/initialization/router_registry/feature_routers.py index 21038cc83..8dcafa523 100644 --- a/autobot-backend/initialization/router_registry/feature_routers.py +++ b/autobot-backend/initialization/router_registry/feature_routers.py @@ -105,7 +105,11 @@ "state_tracking", ), ("api.project_state", "/project-state", ["project-state"], "project_state"), - ("api.phase_management", "/phases", ["phases"], "phase_management"), + # Issue #3331: api.phase_management removed — its /phases prefix conflicted and its + # scripts.phase_validation_system import never existed; replaced by api.phases below. + # Issue #3331: /api/project/* and /api/phases/* endpoints for PhaseProgressionIndicator + ("api.project", "/project", ["project"], "project"), + ("api.phases", "/phases", ["phases"], "phases"), # Services and infrastructure ("api.services", "/services", ["services"], "services"), ("api.elevation", "/elevation", ["elevation"], "elevation"), @@ -157,9 +161,10 @@ ["merge-conflicts", "code-intelligence", "git"], "merge_conflict_resolution", ), + # Issue #3355: moved prefix from APIRouter() into registry (was "") ( "api.natural_language_search", - "", + "/nl-search", ["natural-language-search", "code-search"], "natural_language_search", ), @@ -286,6 +291,13 @@ ["chat-knowledge"], "chat_knowledge", ), + # Issue #3402: KB Librarian agent HTTP surface — registered + ( + "api.kb_librarian", + "/kb-librarian", + ["kb-librarian", "knowledge"], + "kb_librarian", + ), # NPU and Redis ("api.npu_workers", "", ["npu-workers"], "npu_workers"), ("api.redis_service", "/redis-service", ["redis-service"], "redis_service"), @@ -430,6 +442,13 @@ ["conversation-export"], "conversation_export", ), + # Issue #3407: SLM Docker deployment bridge + ( + "api.slm.deployments", + "", + ["slm-deployments"], + "slm_deployments", + ), ] diff --git a/autobot-backend/judges/__init__.py b/autobot-backend/judges/__init__.py index ccc5fb7b0..8d9a3c89f 100644 --- a/autobot-backend/judges/__init__.py +++ b/autobot-backend/judges/__init__.py @@ -129,7 +129,9 @@ async def make_judgment( except Exception as e: logger.error("Error in %s judgment: %s", self.judge_type, e) - return await self._create_error_judgment(subject, "Judgment evaluation failed") + return await self._create_error_judgment( + subject, "Judgment evaluation failed" + ) async def _finalize_judgment_result( self, judgment_result: JudgmentResult, start_time: datetime diff --git a/autobot-backend/knowledge/chat_knowledge_system.e2e_test.py b/autobot-backend/knowledge/chat_knowledge_system.e2e_test.py index 12b1b3abd..f85ffcbab 100644 --- a/autobot-backend/knowledge/chat_knowledge_system.e2e_test.py +++ b/autobot-backend/knowledge/chat_knowledge_system.e2e_test.py @@ -68,7 +68,7 @@ async def test_chat_context_creation(self): } async with self.session.post( - f"{BASE_URL}/api/chat_knowledge/context/create", json=context_data + f"{BASE_URL}/api/chat-knowledge/context/create", json=context_data ) as response: if response.status == 200: data = await response.json() @@ -108,7 +108,7 @@ def test_function(): data.add_field("association_type", "upload") async with self.session.post( - f"{BASE_URL}/api/chat_knowledge/files/upload/{self.test_chat_id}", + f"{BASE_URL}/api/chat-knowledge/files/upload/{self.test_chat_id}", data=data, ) as response: if response.status == 200: @@ -151,7 +151,7 @@ async def test_temporary_knowledge_addition(self): } async with self.session.post( - f"{BASE_URL}/api/chat_knowledge/knowledge/add_temporary", + f"{BASE_URL}/api/chat-knowledge/knowledge/add_temporary", json=knowledge_data, ) as response: if response.status == 200: @@ -177,7 +177,7 @@ async def test_knowledge_retrieval_and_decisions(self): # Get pending knowledge items async with self.session.get( - f"{BASE_URL}/api/chat_knowledge/knowledge/pending/{self.test_chat_id}" + f"{BASE_URL}/api/chat-knowledge/knowledge/pending/{self.test_chat_id}" ) as response: if response.status == 200: data = await response.json() @@ -204,7 +204,7 @@ async def test_knowledge_retrieval_and_decisions(self): } async with self.session.post( - f"{BASE_URL}/api/chat_knowledge/knowledge/decide", + f"{BASE_URL}/api/chat-knowledge/knowledge/decide", json=decision_data, ) as decision_response: if decision_response.status == 200: @@ -253,7 +253,7 @@ async def test_chat_search(self): } async with self.session.post( - f"{BASE_URL}/api/chat_knowledge/search", json=search_data + f"{BASE_URL}/api/chat-knowledge/search", json=search_data ) as response: if response.status == 200: data = await response.json() @@ -316,7 +316,7 @@ async def test_chat_compilation(self): } async with self.session.post( - f"{BASE_URL}/api/chat_knowledge/compile", json=compile_data + f"{BASE_URL}/api/chat-knowledge/compile", json=compile_data ) as response: if response.status == 200: data = await response.json() @@ -339,7 +339,7 @@ async def test_context_retrieval(self): logger.info("📋 Test 7: Context Retrieval") async with self.session.get( - f"{BASE_URL}/api/chat_knowledge/context/{self.test_chat_id}" + f"{BASE_URL}/api/chat-knowledge/context/{self.test_chat_id}" ) as response: if response.status == 200: data = await response.json() diff --git a/autobot-backend/knowledge/facts.py b/autobot-backend/knowledge/facts.py index f982ab3f3..f57d52d95 100644 --- a/autobot-backend/knowledge/facts.py +++ b/autobot-backend/knowledge/facts.py @@ -1300,7 +1300,9 @@ async def _delete_single_fact_for_session( except Exception as e: logger.error("Failed to delete fact %s: %s", fact_id, e) - result["errors"].append({"fact_id": fact_id, "error": "Fact deletion failed"}) + result["errors"].append( + {"fact_id": fact_id, "error": "Fact deletion failed"} + ) async def _process_session_facts_deletion( self, @@ -1376,7 +1378,9 @@ async def delete_facts_by_session( except Exception as e: logger.error("Failed to delete facts for session %s: %s", session_id, e) - result["errors"].append({"session_id": session_id, "error": "Session facts deletion failed"}) + result["errors"].append( + {"session_id": session_id, "error": "Session facts deletion failed"} + ) return result async def _cleanup_session_tracking( diff --git a/autobot-backend/knowledge/metadata.py b/autobot-backend/knowledge/metadata.py index 47d634471..4e951e584 100644 --- a/autobot-backend/knowledge/metadata.py +++ b/autobot-backend/knowledge/metadata.py @@ -242,7 +242,11 @@ async def list_metadata_templates(self, category: str = None) -> Dict[str, Any]: except Exception as e: logger.error("Failed to list metadata templates: %s", e) - return {"status": "error", "message": "Metadata operation failed", "templates": []} + return { + "status": "error", + "message": "Metadata operation failed", + "templates": [], + } async def _update_template_category_links( self, template_id: str, old_categories: set, new_categories: set @@ -393,7 +397,11 @@ async def validate_metadata( except Exception as e: logger.error("Failed to validate metadata: %s", e) - return {"valid": False, "errors": ["Metadata validation failed"], "warnings": []} + return { + "valid": False, + "errors": ["Metadata validation failed"], + "warnings": [], + } def _validate_field_type(self, field_name: str, value: Any, field_type: str) -> str: """Validate a field value against its expected type. Returns error or None.""" @@ -469,7 +477,11 @@ async def apply_template_defaults( except Exception as e: logger.error("Failed to apply template defaults: %s", e) - return {"status": "error", "message": "Metadata operation failed", "metadata": metadata} + return { + "status": "error", + "message": "Metadata operation failed", + "metadata": metadata, + } def _match_metadata_value( self, field_value: Any, value: Any, operator: str @@ -548,7 +560,11 @@ async def search_by_metadata( except Exception as e: logger.error("Failed to search by metadata: %s", e) - return {"status": "error", "message": "Metadata operation failed", "fact_ids": []} + return { + "status": "error", + "message": "Metadata operation failed", + "fact_ids": [], + } def ensure_initialized(self): """Ensure the knowledge base is initialized. Implemented in composed class.""" diff --git a/autobot-backend/knowledge/search.py b/autobot-backend/knowledge/search.py index 84d96dd15..6053e7296 100644 --- a/autobot-backend/knowledge/search.py +++ b/autobot-backend/knowledge/search.py @@ -462,7 +462,12 @@ async def enhanced_search( ) except Exception as e: logger.error("Enhanced search failed: %s", e) - return {"success": False, "results": [], "total_count": 0, "error": "Search failed"} + return { + "success": False, + "results": [], + "total_count": 0, + "error": "Search failed", + } def _preprocess_query(self, query: str) -> str: """Preprocess search query for better results. Issue #78: Query preprocessing.""" @@ -775,7 +780,12 @@ async def enhanced_search_v2_ctx( import traceback logger.error(traceback.format_exc()) - return {"success": False, "results": [], "total_count": 0, "error": "Search failed"} + return { + "success": False, + "results": [], + "total_count": 0, + "error": "Search failed", + } def _expand_query_terms(self, query: str, enable_expansion: bool) -> List[str]: """Expand query with synonyms and related terms.""" diff --git a/autobot-backend/knowledge/stats.py b/autobot-backend/knowledge/stats.py index 00a473354..01b607463 100644 --- a/autobot-backend/knowledge/stats.py +++ b/autobot-backend/knowledge/stats.py @@ -506,7 +506,11 @@ async def get_detailed_stats(self) -> Dict[str, Any]: except Exception as e: logger.error("Error generating detailed stats: %s", e) - return {**basic_stats, "detailed_stats": False, "error": "Failed to generate detailed stats"} + return { + **basic_stats, + "detailed_stats": False, + "error": "Failed to generate detailed stats", + } async def _calc_all_quality_dimensions( self, facts: List[Dict[str, Any]] diff --git a/autobot-backend/knowledge/suggestions.py b/autobot-backend/knowledge/suggestions.py index cb6da9142..6eec7de44 100644 --- a/autobot-backend/knowledge/suggestions.py +++ b/autobot-backend/knowledge/suggestions.py @@ -107,7 +107,11 @@ async def suggest_tags( except Exception as e: logger.error("Failed to suggest tags: %s", e) - return {"success": False, "suggestions": [], "error": "Failed to suggest tags"} + return { + "success": False, + "suggestions": [], + "error": "Failed to suggest tags", + } async def suggest_categories( self, @@ -146,7 +150,11 @@ async def suggest_categories( except Exception as e: logger.error("Failed to suggest categories: %s", e) - return {"success": False, "suggestions": [], "error": "Failed to suggest categories"} + return { + "success": False, + "suggestions": [], + "error": "Failed to suggest categories", + } def _empty_suggestion_response(self, error: str = None) -> Dict[str, Any]: """Build empty suggestion response (Issue #398: extracted).""" @@ -300,7 +308,11 @@ async def auto_apply_suggestions( except Exception as e: logger.error("Failed to auto-apply suggestions: %s", e) - return {"success": False, "fact_id": fact_id, "error": "Failed to apply suggestions"} + return { + "success": False, + "fact_id": fact_id, + "error": "Failed to apply suggestions", + } async def _find_similar_documents( self, content: str, limit: int diff --git a/autobot-backend/knowledge/versioning.py b/autobot-backend/knowledge/versioning.py index 64afd0062..40e0c47da 100644 --- a/autobot-backend/knowledge/versioning.py +++ b/autobot-backend/knowledge/versioning.py @@ -208,7 +208,11 @@ async def list_versions(self, fact_id: str, limit: int = 10) -> Dict[str, Any]: except Exception as e: logger.error("Failed to list versions for %s: %s", fact_id, e) - return {"status": "error", "message": "Versioning operation failed", "versions": []} + return { + "status": "error", + "message": "Versioning operation failed", + "versions": [], + } async def _apply_version_to_fact( self, fact_id: str, target_version: Dict[str, Any] diff --git a/autobot-backend/knowledge_factory.py b/autobot-backend/knowledge_factory.py index 0b3265cd5..6321371a5 100644 --- a/autobot-backend/knowledge_factory.py +++ b/autobot-backend/knowledge_factory.py @@ -144,7 +144,8 @@ async def get_or_create_knowledge_base(app: FastAPI, force_refresh: bool = False if elapsed < KB_RETRY_COOLDOWN_SECONDS: remaining = int(KB_RETRY_COOLDOWN_SECONDS - elapsed) logger.debug( - "KB init cooldown active — skipping retry (%ds remaining)", remaining + "KB init cooldown active — skipping retry (%ds remaining)", + remaining, ) return None diff --git a/autobot-backend/llm_interface_pkg/adapters/__init__.py b/autobot-backend/llm_interface_pkg/adapters/__init__.py index c42589ce4..e30e6fb3c 100644 --- a/autobot-backend/llm_interface_pkg/adapters/__init__.py +++ b/autobot-backend/llm_interface_pkg/adapters/__init__.py @@ -10,7 +10,6 @@ from .ai_stack_adapter import AIStackAdapter from .anthropic_adapter import AnthropicAdapter -from .layer_inference_adapter import LayerInferenceAdapter from .base import ( AdapterBase, AdapterConfig, @@ -19,6 +18,7 @@ EnvironmentTestResult, SessionCodec, ) +from .layer_inference_adapter import LayerInferenceAdapter from .ollama_adapter import OllamaAdapter from .openai_adapter import OpenAIAdapter from .process_adapter import ProcessAdapter diff --git a/autobot-backend/llm_interface_pkg/adapters/layer_inference_adapter.py b/autobot-backend/llm_interface_pkg/adapters/layer_inference_adapter.py index 5dce7f6bd..ec30fa1e8 100644 --- a/autobot-backend/llm_interface_pkg/adapters/layer_inference_adapter.py +++ b/autobot-backend/llm_interface_pkg/adapters/layer_inference_adapter.py @@ -102,7 +102,10 @@ async def execute( try: result = await asyncio.to_thread( - self._pipeline.execute, prompt, prepared, max_new_tokens=max_tokens, + self._pipeline.execute, + prompt, + prepared, + max_new_tokens=max_tokens, ) except Exception: logger.exception("LayerInference generation failed") diff --git a/autobot-backend/llm_interface_pkg/interface.py b/autobot-backend/llm_interface_pkg/interface.py index 1136baded..7fd81d265 100644 --- a/autobot-backend/llm_interface_pkg/interface.py +++ b/autobot-backend/llm_interface_pkg/interface.py @@ -1273,15 +1273,11 @@ async def _handle_local_request(self, request: LLMRequest) -> LLMResponse: """Handle local requests via handler.""" return await self._local_handler.chat_completion(request) - async def _handle_layer_inference_request( - self, request: LLMRequest - ) -> LLMResponse: + async def _handle_layer_inference_request(self, request: LLMRequest) -> LLMResponse: """Handle layer-by-layer inference requests via adapter (#3104).""" adapter = self._adapter_registry.get("layer_inference") if not adapter: - raise ValueError( - "LayerInferenceAdapter not registered in adapter registry" - ) + raise ValueError("LayerInferenceAdapter not registered in adapter registry") return await adapter.execute(request) # Utility methods diff --git a/autobot-backend/llm_interface_pkg/optimization/__init__.py b/autobot-backend/llm_interface_pkg/optimization/__init__.py index ed066d70e..27cc19d3a 100644 --- a/autobot-backend/llm_interface_pkg/optimization/__init__.py +++ b/autobot-backend/llm_interface_pkg/optimization/__init__.py @@ -63,7 +63,6 @@ LayerInferenceEngine, LayerInferenceStats, ) -from .model_inspector import ModelInfo, clear_cache, inspect_model from .meta_eviction import ( EvictionStats, MetaDeviceEvictionManager, @@ -71,6 +70,7 @@ evict_layer_to_meta, get_gpu_memory_allocated, ) +from .model_inspector import ModelInfo, clear_cache, inspect_model from .pipeline import LayerInferencePipeline, PipelineConfig, PreparedPipeline from .profiler import INFERENCE_STAGES, LayeredProfiler from .prompt_compressor import CompressionConfig, CompressionResult, PromptCompressor diff --git a/autobot-backend/llm_interface_pkg/optimization/hf_quantizer.py b/autobot-backend/llm_interface_pkg/optimization/hf_quantizer.py index 185904d68..edd85450d 100644 --- a/autobot-backend/llm_interface_pkg/optimization/hf_quantizer.py +++ b/autobot-backend/llm_interface_pkg/optimization/hf_quantizer.py @@ -378,7 +378,9 @@ def _preprocess_bitsandbytes(self) -> Dict[str, Any]: if self._config.extra_kwargs.get("load_in_8bit"): bnb_kwargs = {"load_in_8bit": True} elif self._config.extra_kwargs.get("load_in_4bit") is not None: - bnb_kwargs = {"load_in_4bit": bool(self._config.extra_kwargs["load_in_4bit"])} + bnb_kwargs = { + "load_in_4bit": bool(self._config.extra_kwargs["load_in_4bit"]) + } bnb_config = transformers.BitsAndBytesConfig(**bnb_kwargs) return { "quantization_config": bnb_config, diff --git a/autobot-backend/llm_interface_pkg/optimization/model_inspector.py b/autobot-backend/llm_interface_pkg/optimization/model_inspector.py index 00789fb46..9b4c93f98 100644 --- a/autobot-backend/llm_interface_pkg/optimization/model_inspector.py +++ b/autobot-backend/llm_interface_pkg/optimization/model_inspector.py @@ -130,7 +130,9 @@ def _cache_put(model_name: str, info: ModelInfo) -> None: # --------------------------------------------------------------------------- -def _extract_from_config(cfg: Any, param_count_override: Optional[int] = None) -> ModelInfo: +def _extract_from_config( + cfg: Any, param_count_override: Optional[int] = None +) -> ModelInfo: """ Build a ModelInfo from a transformers PretrainedConfig object. @@ -154,9 +156,7 @@ def _extract_from_config(cfg: Any, param_count_override: Optional[int] = None) - or 0 ) num_attention_heads = ( - getattr(cfg, "num_attention_heads", None) - or getattr(cfg, "n_head", None) - or 0 + getattr(cfg, "num_attention_heads", None) or getattr(cfg, "n_head", None) or 0 ) vocab_size = getattr(cfg, "vocab_size", 0) or 0 @@ -193,7 +193,9 @@ def _estimate_param_count(num_layers: int, hidden_size: int, vocab_size: int) -> return embedding_params + num_layers * per_layer_params -def _count_params_via_skeleton(cfg: Any, transformers: Any, accelerate: Any) -> Optional[int]: +def _count_params_via_skeleton( + cfg: Any, transformers: Any, accelerate: Any +) -> Optional[int]: """ Instantiate an empty-weight model skeleton and return its exact param count. @@ -265,7 +267,9 @@ def _inspect_via_config(model_name: str) -> Optional[ModelInfo]: transformers = _import_transformers() accelerate = _import_accelerate() except ImportError as exc: - logger.warning("model_inspector: dependency missing for %s — %s", model_name, exc) + logger.warning( + "model_inspector: dependency missing for %s — %s", model_name, exc + ) return None try: diff --git a/autobot-backend/llm_interface_pkg/optimization/model_inspector_test.py b/autobot-backend/llm_interface_pkg/optimization/model_inspector_test.py index 19fe97c3b..1b36b93bc 100644 --- a/autobot-backend/llm_interface_pkg/optimization/model_inspector_test.py +++ b/autobot-backend/llm_interface_pkg/optimization/model_inspector_test.py @@ -27,7 +27,6 @@ inspect_model, ) - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -174,9 +173,17 @@ def test_fallback_attributes_n_layer(self): def test_zero_config_returns_zero_params(self): """All-zero config produces zero param_count.""" cfg = MagicMock() - for attr in ("num_hidden_layers", "num_layers", "n_layer", - "hidden_size", "d_model", "n_embd", - "num_attention_heads", "n_head", "vocab_size"): + for attr in ( + "num_hidden_layers", + "num_layers", + "n_layer", + "hidden_size", + "d_model", + "n_embd", + "num_attention_heads", + "n_head", + "vocab_size", + ): setattr(cfg, attr, 0) info = _extract_from_config(cfg) assert info.param_count == 0 @@ -226,7 +233,9 @@ def test_returns_none_on_exception(self): """Returns None when from_config raises, without propagating the error.""" cfg = _make_config() transformers = MagicMock(name="transformers") - transformers.AutoModelForCausalLM.from_config.side_effect = RuntimeError("unsupported arch") + transformers.AutoModelForCausalLM.from_config.side_effect = RuntimeError( + "unsupported arch" + ) accelerate = _make_accelerate_mock() result = _count_params_via_skeleton(cfg, transformers, accelerate) @@ -328,8 +337,12 @@ def test_returns_none_when_accelerate_missing(self): def test_returns_model_info_with_skeleton_param_count(self): """inspect_model uses skeleton param count when available.""" - cfg = _make_config(num_hidden_layers=32, hidden_size=4096, - num_attention_heads=32, vocab_size=32000) + cfg = _make_config( + num_hidden_layers=32, + hidden_size=4096, + num_attention_heads=32, + vocab_size=32000, + ) mock_transformers = _make_transformers_mock(param_count=7_241_732_096) mock_transformers.AutoConfig = MagicMock() mock_transformers.AutoConfig.from_pretrained.return_value = cfg @@ -358,7 +371,9 @@ def test_falls_back_to_formula_when_skeleton_fails(self): mock_transformers = MagicMock(name="transformers") mock_transformers.AutoConfig = MagicMock() mock_transformers.AutoConfig.from_pretrained.return_value = cfg - mock_transformers.AutoModelForCausalLM.from_config.side_effect = RuntimeError("no arch") + mock_transformers.AutoModelForCausalLM.from_config.side_effect = RuntimeError( + "no arch" + ) mock_accelerate = _make_accelerate_mock() with ( @@ -405,7 +420,9 @@ def test_caches_result_on_success(self): def test_returns_none_on_config_fetch_failure(self): """inspect_model returns None when AutoConfig.from_pretrained raises.""" mock_transformers = MagicMock(name="transformers") - mock_transformers.AutoConfig.from_pretrained.side_effect = OSError("hub unavailable") + mock_transformers.AutoConfig.from_pretrained.side_effect = OSError( + "hub unavailable" + ) mock_accelerate = _make_accelerate_mock() with ( diff --git a/autobot-backend/llm_interface_pkg/optimization/pipeline.py b/autobot-backend/llm_interface_pkg/optimization/pipeline.py index f17001db9..d787b54b8 100644 --- a/autobot-backend/llm_interface_pkg/optimization/pipeline.py +++ b/autobot-backend/llm_interface_pkg/optimization/pipeline.py @@ -25,8 +25,11 @@ from dataclasses import dataclass, field from typing import Any, Dict, Optional -from .attention_backend import AttentionBackendSelector, ModelConfig as AttentionModelConfig -from .hf_quantizer import HfQuantizerWrapper, QuantizerConfig, QuantizationType +from .attention_backend import ( + AttentionBackendSelector, +) +from .attention_backend import ModelConfig as AttentionModelConfig +from .hf_quantizer import HfQuantizerWrapper, QuantizationType, QuantizerConfig from .kv_cache import KVCacheConfig, KVCacheManager, LayerKVCache from .layer_inference import LayerInferenceConfig, LayerInferenceEngine from .meta_eviction import MetaDeviceEvictionManager, clean_memory @@ -224,7 +227,10 @@ def _log_prepare_result(self, prepared: PreparedPipeline, elapsed: float) -> Non kv_layers = prepared.kv_cache.config.num_layers if prepared.kv_cache else 0 logger.info( "Pipeline.prepare: done in %.3fs quant=%s attn=%s kv_layers=%d", - elapsed, quant_type, prepared.attention_backend, kv_layers, + elapsed, + quant_type, + prepared.attention_backend, + kv_layers, ) def _try_build_quantizer(self, model_cfg): diff --git a/autobot-backend/llm_interface_pkg/optimization/router.py b/autobot-backend/llm_interface_pkg/optimization/router.py index e0915ef92..8aa96072a 100644 --- a/autobot-backend/llm_interface_pkg/optimization/router.py +++ b/autobot-backend/llm_interface_pkg/optimization/router.py @@ -266,7 +266,7 @@ def get_quantization_kwargs( if not self.config.quantization_enabled: return {} - from .hf_quantizer import HfQuantizerWrapper, QuantizerConfig, QuantizationType + from .hf_quantizer import HfQuantizerWrapper, QuantizationType, QuantizerConfig quant_type_str = self.config.quantization_type type_map = { diff --git a/autobot-backend/llm_multi_provider.py b/autobot-backend/llm_multi_provider.py index 1c84626a9..63eb1e4a0 100644 --- a/autobot-backend/llm_multi_provider.py +++ b/autobot-backend/llm_multi_provider.py @@ -781,7 +781,9 @@ async def health_check(self) -> Dict[str, Any]: health["overall_healthy"] = False except Exception as e: - logger.error("Health check failed for provider %s: %s", provider_type.value, e) + logger.error( + "Health check failed for provider %s: %s", provider_type.value, e + ) health["providers"][provider_type.value] = { "available": False, "enabled": provider.config.enabled, diff --git a/autobot-backend/llm_providers/custom_openai_provider.py b/autobot-backend/llm_providers/custom_openai_provider.py index eae5efc9f..3be75b88d 100644 --- a/autobot-backend/llm_providers/custom_openai_provider.py +++ b/autobot-backend/llm_providers/custom_openai_provider.py @@ -69,9 +69,7 @@ def __init__( def _resolve_base_url(self) -> str: """Resolve the endpoint base URL from settings or environment.""" - url = self._get_setting("base_url") or os.getenv( - "CUSTOM_OPENAI_BASE_URL", "" - ) + url = self._get_setting("base_url") or os.getenv("CUSTOM_OPENAI_BASE_URL", "") if not url: raise ValueError( "Custom OpenAI base_url not configured. " @@ -82,9 +80,7 @@ def _resolve_base_url(self) -> str: def _resolve_api_key(self) -> str: """Resolve the API key (many local servers accept any non-empty string).""" return ( - self._get_setting("api_key") - or os.getenv("CUSTOM_OPENAI_API_KEY") - or "none" + self._get_setting("api_key") or os.getenv("CUSTOM_OPENAI_API_KEY") or "none" ) def _ensure_client(self): diff --git a/autobot-backend/llm_providers/huggingface_provider.py b/autobot-backend/llm_providers/huggingface_provider.py index a0c9659e5..376d2e6f0 100644 --- a/autobot-backend/llm_providers/huggingface_provider.py +++ b/autobot-backend/llm_providers/huggingface_provider.py @@ -174,6 +174,7 @@ async def stream_completion(self, request: LLMRequest) -> AsyncIterator[str]: if chunk_json == "[DONE]": break import json + chunk = json.loads(chunk_json) delta = chunk["choices"][0].get("delta", {}).get("content") if delta: diff --git a/autobot-backend/llm_providers/ollama_provider.py b/autobot-backend/llm_providers/ollama_provider.py index dec444a2e..02a919926 100644 --- a/autobot-backend/llm_providers/ollama_provider.py +++ b/autobot-backend/llm_providers/ollama_provider.py @@ -52,10 +52,7 @@ def _resolve_base_url(self) -> str: """Resolve the Ollama base URL from settings, SSOT config, or env.""" if self._base_url: return self._base_url - self._base_url = ( - self._get_setting("base_url") - or get_ollama_url() - ) + self._base_url = self._get_setting("base_url") or get_ollama_url() return self._base_url def _ensure_delegate(self): @@ -68,9 +65,7 @@ def _ensure_delegate(self): if self._delegate is not None: return self._delegate from llm_interface_pkg.models import LLMSettings - from llm_interface_pkg.providers.ollama import ( - OllamaProvider as _OllamaProvider, - ) + from llm_interface_pkg.providers.ollama import OllamaProvider as _OllamaProvider from llm_interface_pkg.streaming import StreamingManager settings = LLMSettings() @@ -98,6 +93,7 @@ async def chat_completion(self, request: LLMRequest) -> LLMResponse: self._total_errors += 1 logger.error("OllamaProvider delegation error: %s", exc) import time + return LLMResponse( content="", model=request.model_name or "", diff --git a/autobot-backend/llm_providers/openai_provider.py b/autobot-backend/llm_providers/openai_provider.py index e7d51b276..67d6c80b3 100644 --- a/autobot-backend/llm_providers/openai_provider.py +++ b/autobot-backend/llm_providers/openai_provider.py @@ -65,6 +65,7 @@ def _resolve_api_key(self) -> Optional[str]: if not key: try: from config import ConfigManager + key = ConfigManager().get_api_key("openai") except Exception: pass diff --git a/autobot-backend/llm_providers/provider_registry.py b/autobot-backend/llm_providers/provider_registry.py index f60a5e22e..d850244da 100644 --- a/autobot-backend/llm_providers/provider_registry.py +++ b/autobot-backend/llm_providers/provider_registry.py @@ -96,7 +96,9 @@ def set_fallback_chain(self, chain: List[str]) -> None: # Per-conversation overrides # ------------------------------------------------------------------ - def set_conversation_provider(self, conversation_id: str, provider_name: str) -> None: + def set_conversation_provider( + self, conversation_id: str, provider_name: str + ) -> None: """Pin a specific provider for a given conversation.""" self._conversation_overrides[conversation_id] = provider_name logger.debug( @@ -265,8 +267,10 @@ def _populate_default_providers(registry: ProviderRegistry) -> None: # Ollama (local) — always registered, highest priority try: ssot = get_ssot_config() - ollama_url = ssot.ollama_url if ssot else os.getenv( - "AUTOBOT_OLLAMA_ENDPOINT", "http://127.0.0.1:11434" + ollama_url = ( + ssot.ollama_url + if ssot + else os.getenv("AUTOBOT_OLLAMA_ENDPOINT", "http://127.0.0.1:11434") ) from llm_providers.ollama_provider import OllamaProvider diff --git a/autobot-backend/middleware/audit_middleware.py b/autobot-backend/middleware/audit_middleware.py index 9f36d692a..b22eaad66 100644 --- a/autobot-backend/middleware/audit_middleware.py +++ b/autobot-backend/middleware/audit_middleware.py @@ -72,12 +72,13 @@ def set_main_event_loop(loop: asyncio.AbstractEventLoop) -> None: # Issue #380: Module-level frozensets for audit checks _MODIFYING_HTTP_METHODS = frozenset({"POST", "PUT", "DELETE", "PATCH"}) +# Issue #3334: /api/config/ replaced by /api/settings/ — updated sensitive prefix list _SENSITIVE_PATH_PREFIXES = ( "/api/auth/", "/api/security/", "/api/elevation/", "/api/files/", - "/api/config/", + "/api/settings/", ) @@ -99,8 +100,10 @@ def set_main_event_loop(loop: asyncio.AbstractEventLoop) -> None: "/api/chat/sessions": "session.create", "/api/agent-terminal/sessions": "session.create", "/api/terminal/sessions": "session.create", - # Configuration - "/api/config": "config.update", + # Configuration — Issue #3334: consolidated under /api/settings/ + "/api/settings/": "config.update", + "/api/settings/config": "config.update", + "/api/settings/backend": "config.update", } diff --git a/autobot-backend/models/infrastructure.py b/autobot-backend/models/infrastructure.py new file mode 100644 index 000000000..680a7623f --- /dev/null +++ b/autobot-backend/models/infrastructure.py @@ -0,0 +1,83 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Infrastructure Pydantic models for Docker deployment orchestration. + +These models represent the request/response shapes used when autobot-backend +orchestrates Docker container deployments via the SLM Ansible playbook runner. + +Related to Issue #3407. +""" + +from __future__ import annotations + +import enum +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, Field + + +class DeploymentStrategy(str, enum.Enum): + """Deployment rollout strategy.""" + + SEQUENTIAL = "sequential" + PARALLEL = "parallel" + CANARY = "canary" + + +class PortMapping(BaseModel): + """A single host-to-container port mapping.""" + + host_port: int + container_port: int + protocol: str = "tcp" + + +class DockerContainerSpec(BaseModel): + """Specification for a single Docker container to deploy.""" + + name: str + image: str + tag: str = "latest" + ports: list[PortMapping] = Field(default_factory=list) + environment: dict[str, str] = Field(default_factory=dict) + restart_policy: str = "unless-stopped" + + +class DockerDeploymentRequest(BaseModel): + """Request body for triggering a Docker deployment via the SLM.""" + + node_id: str + containers: list[DockerContainerSpec] + playbook: str = "deploy-hybrid-docker.yml" + + +class DockerDeploymentStatus(BaseModel): + """Status of a Docker deployment returned by the SLM.""" + + deployment_id: str + node_id: str + status: str + started_at: Optional[datetime] = None + completed_at: Optional[datetime] = None + error: Optional[str] = None + + +class DeploymentCreateRequest(BaseModel): + """Generic deployment create request (multi-role, non-Docker path).""" + + role_name: str + target_nodes: list[str] + strategy: DeploymentStrategy = DeploymentStrategy.SEQUENTIAL + playbook_path: Optional[str] = None + + +class DeploymentActionResponse(BaseModel): + """Response for execute / cancel / rollback actions.""" + + deployment_id: str + action: str + success: bool + message: Optional[str] = None diff --git a/autobot-backend/npu_integration.py b/autobot-backend/npu_integration.py index 0929090d0..cbfb41373 100644 --- a/autobot-backend/npu_integration.py +++ b/autobot-backend/npu_integration.py @@ -322,7 +322,11 @@ async def offload_heavy_processing( except Exception as e: logger.error("Heavy processing offload failed: %s", e) - return {"success": False, "error": "Processing offload failed", "fallback": True} + return { + "success": False, + "error": "Processing offload failed", + "fallback": True, + } async def close(self): """No-op: HTTP client is managed by singleton HTTPClientManager""" diff --git a/autobot-backend/npu_semantic_search.py b/autobot-backend/npu_semantic_search.py index b12290a36..bd84a5921 100644 --- a/autobot-backend/npu_semantic_search.py +++ b/autobot-backend/npu_semantic_search.py @@ -860,7 +860,11 @@ async def _benchmark_single_device( except Exception as e: logger.error("❌ Benchmark failed for %s: %s", device.value, e) device_results.append( - {"query": query, "iteration": iteration, "error": "Benchmark failed"} + { + "query": query, + "iteration": iteration, + "error": "Benchmark failed", + } ) return device_results @@ -1344,7 +1348,10 @@ async def get_code_collection_stats(self) -> Dict[str, Any]: } except Exception as e: logger.error("Failed to get code collection stats: %s", e) - return {"available": False, "error": "Failed to retrieve code collection stats"} + return { + "available": False, + "error": "Failed to retrieve code collection stats", + } def _search_single_modality( self, diff --git a/autobot-backend/orchestration/dag_executor.py b/autobot-backend/orchestration/dag_executor.py index 50d90c277..e25241747 100644 --- a/autobot-backend/orchestration/dag_executor.py +++ b/autobot-backend/orchestration/dag_executor.py @@ -23,6 +23,9 @@ import asyncio import logging +import os +import ssl +import time from dataclasses import dataclass, field from enum import Enum from typing import Any, Callable, Coroutine, Dict, List, Optional, Set @@ -40,6 +43,7 @@ class NodeType(str, Enum): STEP = "step" CONDITION = "condition" PARALLEL = "parallel" + DISTRIBUTED_SHELL = "distributed_shell" # Issue #3406: fleet fan-out @classmethod def _missing_(cls, value: object) -> "NodeType": @@ -464,6 +468,16 @@ async def _execute_node( if node.node_type == NodeType.CONDITION: return await self._execute_condition_node(node, dag, ctx) + # Issue #3406: distributed_shell has its own fan-out executor + if node.node_type == NodeType.DISTRIBUTED_SHELL: + try: + result = await execute_distributed_shell(node, ctx) + except Exception as exc: + logger.error("distributed_shell node %s raised: %s", node.node_id, exc) + result = {"success": False, "error": str(exc), "node_id": node.node_id} + ctx.step_results[node.node_id] = result + return [e.target for e in dag.successors(node.node_id)] + # Regular (STEP / PARALLEL / unknown-as-STEP) node try: result = await self._execute_step(node, ctx) @@ -553,6 +567,161 @@ def _get_next_node_ids( return [e.target for e in dag.successors(node.node_id)] +# --------------------------------------------------------------------------- +# Issue #3406: Distributed shell fan-out +# --------------------------------------------------------------------------- + + +def _build_slm_ssl_context() -> ssl.SSLContext: + """Create SSL context for SLM HTTP calls (mirrors slm_client pattern).""" + ctx = ssl.create_default_context() + if os.environ.get("AUTOBOT_SKIP_TLS_VERIFY", "").lower() == "true": + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + return ctx + + +async def _execute_on_node( + slm_url: str, + auth_token: str, + node_id: str, + script: str, + language: str, + timeout: int, +) -> Dict[str, Any]: + """POST /nodes/{node_id}/execute on the SLM backend. + + Returns a per-node result dict compatible with DAGExecutionContext. + """ + import aiohttp # lazy import — not available in all environments + + url = f"{slm_url}/api/nodes/{node_id}/execute" + payload = {"command": script, "language": language, "timeout": timeout} + headers = {"Authorization": f"Bearer {auth_token}"} + ssl_ctx = _build_slm_ssl_context() + connector = aiohttp.TCPConnector(ssl=ssl_ctx) + + try: + async with aiohttp.ClientSession( + headers=headers, connector=connector + ) as session: + async with session.post( + url, json=payload, timeout=aiohttp.ClientTimeout(total=timeout + 30) + ) as resp: + if resp.status == 200: + data = await resp.json() + return { + "node_id": node_id, + "exit_code": data.get("exit_code", -1), + "stdout": data.get("stdout", ""), + "stderr": data.get("stderr", ""), + "duration_ms": data.get("duration_ms", 0), + "success": data.get("exit_code", -1) == 0, + } + body = await resp.text() + return { + "node_id": node_id, + "exit_code": -1, + "stdout": "", + "stderr": f"HTTP {resp.status}: {body[:500]}", + "duration_ms": 0, + "success": False, + } + except Exception as exc: + logger.error("distributed_shell: node %s raised %s", node_id, exc) + return { + "node_id": node_id, + "exit_code": -1, + "stdout": "", + "stderr": str(exc), + "duration_ms": 0, + "success": False, + } + + +async def execute_distributed_shell(node: DAGNode, ctx: DAGExecutionContext) -> Dict[str, Any]: + """Fan out *node.data* shell script to all listed fleet nodes in parallel. + + Expected ``node.data`` schema:: + + { + "nodes": ["node-id-1", "node-id-2", ...], + "script": "echo hello", + "language": "bash", # optional, default "bash" + "timeout": 300, # optional, default 300 + } + + Returns a result dict whose ``success`` is True only when all nodes + return exit_code 0. Per-node details are in ``node_results``. + """ + slm_url = os.environ.get("SLM_URL", "").rstrip("/") + auth_token = os.environ.get("SLM_AUTH_TOKEN", "") + if not slm_url: + return { + "success": False, + "error": "SLM_URL not configured for distributed_shell", + "node_results": [], + "node_id": node.node_id, + } + + target_nodes: List[str] = node.data.get("nodes", []) + script: str = node.data.get("script", "") + language: str = node.data.get("language", "bash") + timeout: int = int(node.data.get("timeout", 300)) + + if not target_nodes: + return { + "success": False, + "error": "distributed_shell: 'nodes' list is empty", + "node_results": [], + "node_id": node.node_id, + } + if not script: + return { + "success": False, + "error": "distributed_shell: 'script' is required", + "node_results": [], + "node_id": node.node_id, + } + + logger.info( + "distributed_shell %s: fanning out to %d node(s): %s", + node.node_id, + len(target_nodes), + target_nodes, + ) + t0 = time.monotonic() + + results: List[Dict[str, Any]] = await asyncio.gather( + *( + _execute_on_node(slm_url, auth_token, nid, script, language, timeout) + for nid in target_nodes + ) + ) + + total_ms = int((time.monotonic() - t0) * 1000) + all_ok = all(r.get("success", False) for r in results) + failed = [r["node_id"] for r in results if not r.get("success", False)] + + if not all_ok: + logger.warning( + "distributed_shell %s: %d/%d node(s) failed: %s", + node.node_id, + len(failed), + len(target_nodes), + failed, + ) + + return { + "success": all_ok, + "node_id": node.node_id, + "node_results": results, + "total_duration_ms": total_ms, + "failed_nodes": failed, + "error": None if all_ok else f"Nodes failed: {failed}", + } + + # --------------------------------------------------------------------------- # Convenience helpers used by WorkflowExecutor integration # --------------------------------------------------------------------------- diff --git a/autobot-backend/orchestration/execution_modes_test.py b/autobot-backend/orchestration/execution_modes_test.py index 2487e6bbb..98e6dc218 100644 --- a/autobot-backend/orchestration/execution_modes_test.py +++ b/autobot-backend/orchestration/execution_modes_test.py @@ -502,7 +502,9 @@ def test_dag_path_fires_send_workflow_notification(self) -> None: cfg = {"workflow_id": "wf_dag_notif", "channels": {}} mock_notify = unittest.mock.AsyncMock() - with unittest.mock.patch.object(executor, "_send_workflow_notification", mock_notify): + with unittest.mock.patch.object( + executor, "_send_workflow_notification", mock_notify + ): asyncio.get_event_loop().run_until_complete( executor.execute_coordinated_workflow( "wf_dag_notif", diff --git a/autobot-backend/orchestration/workflow_executor.py b/autobot-backend/orchestration/workflow_executor.py index c73792c7e..4c4ccb7c3 100644 --- a/autobot-backend/orchestration/workflow_executor.py +++ b/autobot-backend/orchestration/workflow_executor.py @@ -247,15 +247,16 @@ async def _send_workflow_notification( ) async def _send_step_failure_notification( - self, workflow_id: str, step_id: str, error: str, + self, + workflow_id: str, + step_id: str, + error: str, execution_context: Optional[Dict[str, Any]] = None, ) -> None: """Fire a STEP_FAILED notification (#3101, #3168).""" from services.notification_service import NotificationEvent - config = self._resolve_notification_config( - workflow_id, execution_context or {} - ) + config = self._resolve_notification_config(workflow_id, execution_context or {}) if config is None: return @@ -266,9 +267,7 @@ async def _send_step_failure_notification( "step_name": step_id, "error": error, } - await svc.send( - NotificationEvent.STEP_FAILED, workflow_id, payload, config - ) + await svc.send(NotificationEvent.STEP_FAILED, workflow_id, payload, config) except Exception: logger.warning( "Failed to send step-failure notification for %s/%s", @@ -649,7 +648,10 @@ async def execute_coordinated_workflow( workflow_id, ) return await self._execute_dag_workflow( - workflow_id, steps, effective_edges, context, + workflow_id, + steps, + effective_edges, + context, notification_config=notification_config, ) @@ -724,9 +726,7 @@ async def execute_coordinated_workflow( shared_memory.clear() # Issue #3101: fire notification on terminal status. - await self._send_workflow_notification( - workflow_id, execution_context - ) + await self._send_workflow_notification(workflow_id, execution_context) return execution_context @@ -735,9 +735,7 @@ async def execute_coordinated_workflow( execution_context["status"] = "failed" execution_context["error"] = str(e) # Issue #3101: fire failure notification. - await self._send_workflow_notification( - workflow_id, execution_context - ) + await self._send_workflow_notification(workflow_id, execution_context) return execution_context def _apply_checkpoints( diff --git a/autobot-backend/project_state_manager.py b/autobot-backend/project_state_manager.py index edb5422fc..21a771f2c 100644 --- a/autobot-backend/project_state_manager.py +++ b/autobot-backend/project_state_manager.py @@ -316,7 +316,7 @@ def _get_phase5_capabilities(self) -> List[PhaseCapability]: "chat_knowledge", "Context-aware chat system", "api_endpoint", - get_service_url("backend", "/api/chat_knowledge/health"), + get_service_url("backend", "/api/chat-knowledge/health"), ), ] diff --git a/autobot-backend/requirements.txt b/autobot-backend/requirements.txt index 361098d0c..0512b119b 100644 --- a/autobot-backend/requirements.txt +++ b/autobot-backend/requirements.txt @@ -19,7 +19,7 @@ alembic>=1.18.4 # AI/ML openai>=2.30.0 -anthropic>=0.86.0 +anthropic>=0.87.0 chromadb>=1.5.5 sentence-transformers>=5.3.0 torch>=2.11.0 # Issue #904: ML model training @@ -55,7 +55,7 @@ spacy>=3.8.13 # Issue #2025: NLP entity extraction scikit-learn>=1.8.0 # Issue #2027: RAPTOR clustering # Media processing pipelines (Issue #932) -aiohttp>=3.13.3 # Async HTTP client - SECURITY UPDATE (7 CVE fixes) +aiohttp>=3.13.4 # Async HTTP client - SECURITY UPDATE beautifulsoup4>=4.14.3 # HTML parsing for link pipeline Pillow>=12.1.1 # Image processing - SECURITY UPDATE (OOB write fix) # Optional (install separately for full media support): diff --git a/autobot-backend/research_browser_manager.py b/autobot-backend/research_browser_manager.py index c13068d7a..58f6de2fb 100644 --- a/autobot-backend/research_browser_manager.py +++ b/autobot-backend/research_browser_manager.py @@ -335,7 +335,9 @@ async def extract_content(self) -> Dict[str, Any]: } except Exception as e: - logger.error("Content extraction failed for session %s: %s", self.session_id, e) + logger.error( + "Content extraction failed for session %s: %s", self.session_id, e + ) return {"success": False, "error": "Content extraction failed"} async def save_mhtml(self) -> Optional[str]: diff --git a/autobot-backend/resources/prompts/chat/api_documentation.md b/autobot-backend/resources/prompts/chat/api_documentation.md index ca99a5350..4efdc05a6 100644 --- a/autobot-backend/resources/prompts/chat/api_documentation.md +++ b/autobot-backend/resources/prompts/chat/api_documentation.md @@ -8,13 +8,13 @@ 1. Execute commands to find the authoritative documentation: ``` - find /home/kali/Desktop/AutoBot/docs/api -name "*.md" | head -20 + find docs/api -name "*.md" | head -20 ``` 2. Read the relevant files: ``` - cat /home/kali/Desktop/AutoBot/docs/api/COMPREHENSIVE_API_DOCUMENTATION.md - cat /home/kali/Desktop/AutoBot/docs/api/API_ENDPOINT_MAPPING.md - cat /home/kali/Desktop/AutoBot/docs/developer/AUTOBOT_REFERENCE.md + cat docs/api/COMPREHENSIVE_API_DOCUMENTATION.md + cat docs/api/API_ENDPOINT_MAPPING.md + cat docs/developer/AUTOBOT_REFERENCE.md ``` 3. Base your answer on the **actual file contents**, not the static guidance below. 4. If file contents contradict this document, **trust the files**. @@ -300,15 +300,15 @@ curl -X POST http://172.16.168.20:8001/api/v1/knowledge/upload \ ### Documentation References **Complete API Reference:** -- `/home/kali/Desktop/AutoBot/docs/api/COMPREHENSIVE_API_DOCUMENTATION.md` +- `docs/api/COMPREHENSIVE_API_DOCUMENTATION.md` - 518+ endpoints documented - Request/response examples - Error handling details - Integration guides **Related Documentation:** -- Architecture: `/home/kali/Desktop/AutoBot/docs/architecture/PHASE_5_DISTRIBUTED_ARCHITECTURE.md` -- Troubleshooting: `/home/kali/Desktop/AutoBot/docs/troubleshooting/COMPREHENSIVE_TROUBLESHOOTING_GUIDE.md` +- Architecture: `docs/architecture/DISTRIBUTED_ARCHITECTURE.md` +- Troubleshooting: `docs/troubleshooting/COMPREHENSIVE_TROUBLESHOOTING_GUIDE.md` ### API Design Principles diff --git a/autobot-backend/resources/prompts/chat/architecture_explanation.md b/autobot-backend/resources/prompts/chat/architecture_explanation.md index 3b9270350..d89fdabc3 100644 --- a/autobot-backend/resources/prompts/chat/architecture_explanation.md +++ b/autobot-backend/resources/prompts/chat/architecture_explanation.md @@ -8,13 +8,13 @@ 1. Execute commands to find the authoritative documentation: ``` - find /home/kali/Desktop/AutoBot/docs/architecture -name "*.md" | head -20 + find docs/architecture -name "*.md" | head -20 ``` 2. Read the relevant files: ``` - cat /home/kali/Desktop/AutoBot/docs/architecture/PHASE_5_DISTRIBUTED_ARCHITECTURE.md - cat /home/kali/Desktop/AutoBot/docs/architecture/DISTRIBUTED_6VM_ARCHITECTURE.md - cat /home/kali/Desktop/AutoBot/docs/developer/AUTOBOT_REFERENCE.md + cat docs/architecture/DISTRIBUTED_ARCHITECTURE.md + cat docs/architecture/DISTRIBUTED_6VM_ARCHITECTURE.md + cat docs/developer/AUTOBOT_REFERENCE.md ``` 3. Base your answer on the **actual file contents**, not the static guidance below. 4. If file contents contradict this document, **trust the files**. @@ -177,9 +177,9 @@ You are explaining AutoBot's distributed VM architecture and technical design. F ### Documentation References Always reference these for detailed information: -- **Architecture Doc**: `/home/kali/Desktop/AutoBot/docs/architecture/PHASE_5_DISTRIBUTED_ARCHITECTURE.md` -- **API Documentation**: `/home/kali/Desktop/AutoBot/docs/api/COMPREHENSIVE_API_DOCUMENTATION.md` -- **Developer Setup**: `/home/kali/Desktop/AutoBot/docs/developer/PHASE_5_DEVELOPER_SETUP.md` +- **Architecture Doc**: `docs/architecture/DISTRIBUTED_ARCHITECTURE.md` +- **API Documentation**: `docs/api/COMPREHENSIVE_API_DOCUMENTATION.md` +- **Developer Setup**: `docs/developer/DEVELOPER_SETUP.md` ## Response Style diff --git a/autobot-backend/resources/prompts/chat/installation_help.md b/autobot-backend/resources/prompts/chat/installation_help.md index e5c39e783..f240475a2 100644 --- a/autobot-backend/resources/prompts/chat/installation_help.md +++ b/autobot-backend/resources/prompts/chat/installation_help.md @@ -8,13 +8,13 @@ 1. Execute commands to find the authoritative documentation: ``` - find /home/kali/Desktop/AutoBot/docs/developer -name "*.md" | head -20 + find docs/developer -name "*.md" | head -20 ``` 2. Read the relevant files: ``` - cat /home/kali/Desktop/AutoBot/docs/developer/PHASE_5_DEVELOPER_SETUP.md - cat /home/kali/Desktop/AutoBot/docs/developer/INFRASTRUCTURE_DEPLOYMENT.md - cat /home/kali/Desktop/AutoBot/docs/developer/AUTOBOT_REFERENCE.md + cat docs/developer/DEVELOPER_SETUP.md + cat docs/developer/INFRASTRUCTURE_DEPLOYMENT.md + cat docs/developer/AUTOBOT_REFERENCE.md ``` 3. Base your answer on the **actual file contents**, not the static guidance below. 4. If file contents contradict this document, **trust the files**. @@ -107,9 +107,9 @@ Explain the 5-VM distributed architecture clearly: ### Key Documentation References Always reference these documents: -- **Setup Guide**: `/home/kali/Desktop/AutoBot/docs/developer/PHASE_5_DEVELOPER_SETUP.md` -- **Architecture**: `/home/kali/Desktop/AutoBot/docs/architecture/PHASE_5_DISTRIBUTED_ARCHITECTURE.md` -- **Troubleshooting**: `/home/kali/Desktop/AutoBot/docs/troubleshooting/COMPREHENSIVE_TROUBLESHOOTING_GUIDE.md` +- **Setup Guide**: `docs/developer/DEVELOPER_SETUP.md` +- **Architecture**: `docs/architecture/DISTRIBUTED_ARCHITECTURE.md` +- **Troubleshooting**: `docs/troubleshooting/COMPREHENSIVE_TROUBLESHOOTING_GUIDE.md` ### Installation Guidance Patterns diff --git a/autobot-backend/resources/prompts/chat/system_prompt.md b/autobot-backend/resources/prompts/chat/system_prompt.md index 94e2395f5..87748ff53 100644 --- a/autobot-backend/resources/prompts/chat/system_prompt.md +++ b/autobot-backend/resources/prompts/chat/system_prompt.md @@ -207,7 +207,7 @@ User: "Check disk space on frontend VM" User: "Find all Python files in backend directory" ``` -Find Python files in backend +Find Python files in backend ``` **Important Notes:** @@ -448,7 +448,7 @@ Would you like me to walk you through the complete setup process, or do you have **When User Seems Lost or Confused:** - Detect confusion patterns: "I don't know", "not sure", "confused", "stuck" - Offer step-by-step guidance with clear numbered steps -- Provide relevant documentation links from `/home/kali/Desktop/AutoBot/docs/` +- Provide relevant documentation links from `docs/` - Ask clarifying questions: "Are you trying to [specific task]?" - Break down complex topics into smaller chunks @@ -460,7 +460,7 @@ Would you like me to walk you through the complete setup process, or do you have **Installation/Setup Requests:** - ALWAYS direct to `sudo ./install.sh` then the Setup Wizard -- Reference: `/home/kali/Desktop/AutoBot/docs/developer/PHASE_5_DEVELOPER_SETUP.md` +- Reference: `docs/developer/DEVELOPER_SETUP.md` - Explain 5-VM distributed architecture: Main(20), Frontend(21), NPU(22), Redis(23), AI-Stack(24), Browser(25) - Provide concrete examples with actual file paths - Mention 25-minute complete setup time @@ -468,7 +468,7 @@ Would you like me to walk you through the complete setup process, or do you have **Architecture Questions:** - Reference distributed VM infrastructure clearly - Explain service separation rationale -- Point to architecture documentation: `docs/architecture/PHASE_5_DISTRIBUTED_ARCHITECTURE.md` +- Point to architecture documentation: `docs/architecture/DISTRIBUTED_ARCHITECTURE.md` - Use specific IP addresses: 172.16.168.20-25 - Clarify single frontend server rule (only VM1) @@ -545,7 +545,7 @@ Assistant: "AutoBot uses 5 VMs in its distributed architecture: This distributed setup provides better resource management and scalability. Each VM has a specific role to optimize performance. -For more details about the architecture, check: `docs/architecture/PHASE_5_DISTRIBUTED_ARCHITECTURE.md` +For more details about the architecture, check: `docs/architecture/DISTRIBUTED_ARCHITECTURE.md` Do you need help setting up these VMs, or do you have questions about a specific component?" ``` diff --git a/autobot-backend/resources/prompts/chat/system_prompt_simple.md b/autobot-backend/resources/prompts/chat/system_prompt_simple.md index 7ece53d49..4168fada3 100644 --- a/autobot-backend/resources/prompts/chat/system_prompt_simple.md +++ b/autobot-backend/resources/prompts/chat/system_prompt_simple.md @@ -19,9 +19,9 @@ You are AutoBot, a helpful AI assistant. You can have normal conversations AND e - ONLY answer from knowledge base context provided in this prompt OR from actual command output - If no knowledge context is provided, **DO NOT answer from memory** — instead execute a command to locate and read the actual documentation: ``` - find /home/kali/Desktop/AutoBot/docs/developer -name "*.md" | head -20 + find docs/developer -name "*.md" | head -20 ``` - Then read the relevant file (e.g. `cat /home/kali/Desktop/AutoBot/docs/developer/PHASE_5_DEVELOPER_SETUP.md`) and base your answer on its actual content. + Then read the relevant file (e.g. `cat docs/developer/DEVELOPER_SETUP.md`) and base your answer on its actual content. - **NEVER answer AutoBot setup, install, deployment, or configuration questions from memory.** The LLM's internal knowledge about AutoBot installation is outdated and unreliable — always ground answers in the actual docs or command output. - If documentation is not accessible or no relevant docs are found, say so explicitly and do NOT fabricate steps. @@ -149,7 +149,7 @@ User: "what processes are using the most CPU?" User: "find all Python files in backend" ``` -Find Python files +Find Python files ``` User: "go to github.com and take a screenshot" diff --git a/autobot-backend/resources/prompts/chat/troubleshooting.md b/autobot-backend/resources/prompts/chat/troubleshooting.md index 9ef7c3868..7898c8c1d 100644 --- a/autobot-backend/resources/prompts/chat/troubleshooting.md +++ b/autobot-backend/resources/prompts/chat/troubleshooting.md @@ -8,13 +8,13 @@ 1. Execute commands to find the authoritative documentation: ``` - find /home/kali/Desktop/AutoBot/docs/troubleshooting -name "*.md" | head -20 + find docs/troubleshooting -name "*.md" | head -20 ``` 2. Read the relevant files: ``` - cat /home/kali/Desktop/AutoBot/docs/troubleshooting/COMPREHENSIVE_TROUBLESHOOTING_GUIDE.md - cat /home/kali/Desktop/AutoBot/docs/system-state.md - cat /home/kali/Desktop/AutoBot/docs/developer/AUTOBOT_REFERENCE.md + cat docs/troubleshooting/COMPREHENSIVE_TROUBLESHOOTING_GUIDE.md + cat docs/system-state.md + cat docs/developer/AUTOBOT_REFERENCE.md ``` 3. Base your answer on the **actual file contents**, not the static guidance below. 4. If file contents contradict this document, **trust the files**. @@ -58,7 +58,7 @@ You are helping diagnose and resolve AutoBot issues. Focus on systematic debuggi *Symptom*: API calls failing or returning 500 errors *Diagnosis Steps*: -1. Check backend logs: `/home/kali/Desktop/AutoBot/logs/backend.log` +1. Check backend logs: `logs/backend.log` 2. Verify backend health: `curl http://172.16.168.20:8001/api/health` 3. Test Redis connection: `redis-cli -h 172.16.168.23 ping` 4. Check Ollama status: `curl http://172.16.168.24:11434/api/tags` @@ -136,8 +136,8 @@ You are helping diagnose and resolve AutoBot issues. Focus on systematic debuggi ### Log File Locations **Main Machine:** -- Backend: `/home/kali/Desktop/AutoBot/logs/backend.log` -- Setup: `/home/kali/Desktop/AutoBot/logs/setup.log` +- Backend: `logs/backend.log` +- Setup: `logs/setup.log` - Docker: `docker logs ` **Frontend VM (172.16.168.21):** @@ -172,7 +172,7 @@ curl http://172.16.168.21:5173 **View Logs:** ```bash # Backend logs -tail -f /home/kali/Desktop/AutoBot/logs/backend.log +tail -f logs/backend.log # Docker logs docker logs -f @@ -196,9 +196,9 @@ nc -zv 172.16.168.23 6379 ### Documentation References Always reference comprehensive guides: -- **Troubleshooting Guide**: `/home/kali/Desktop/AutoBot/docs/troubleshooting/COMPREHENSIVE_TROUBLESHOOTING_GUIDE.md` -- **System State**: `/home/kali/Desktop/AutoBot/docs/system-state.md` -- **API Docs**: `/home/kali/Desktop/AutoBot/docs/api/COMPREHENSIVE_API_DOCUMENTATION.md` +- **Troubleshooting Guide**: `docs/troubleshooting/COMPREHENSIVE_TROUBLESHOOTING_GUIDE.md` +- **System State**: `docs/system-state.md` +- **API Docs**: `docs/api/COMPREHENSIVE_API_DOCUMENTATION.md` ### Escalation Criteria diff --git a/autobot-backend/security/domain_security_test.py b/autobot-backend/security/domain_security_test.py index 832beec3f..f23c539c9 100644 --- a/autobot-backend/security/domain_security_test.py +++ b/autobot-backend/security/domain_security_test.py @@ -176,7 +176,9 @@ def setup_method(self): ) def test_matches_with_content_between(self): - assert self.mgr._check_blacklist("pre-anything-fix.example.com")["blocked"] is True + assert ( + self.mgr._check_blacklist("pre-anything-fix.example.com")["blocked"] is True + ) def test_matches_when_wildcard_is_empty(self): assert self.mgr._check_blacklist("prefix.example.com")["blocked"] is True @@ -299,7 +301,9 @@ def setup_method(self): cfg = DomainSecurityConfig.__new__(DomainSecurityConfig) cfg.config_path = "" cfg.config = cfg._get_default_config() - with patch("security.domain_security.get_http_client", return_value=MagicMock()): + with patch( + "security.domain_security.get_http_client", return_value=MagicMock() + ): mgr = DomainSecurityManager.__new__(DomainSecurityManager) mgr.config = cfg mgr.domain_cache = {} diff --git a/autobot-backend/security/security_integration_test.py b/autobot-backend/security/security_integration_test.py index 02230d09b..bb81b5517 100644 --- a/autobot-backend/security/security_integration_test.py +++ b/autobot-backend/security/security_integration_test.py @@ -238,67 +238,6 @@ def test_command_approval_workflow_api(self): assert "approved" in data["message"] -class TestTerminalSecurityIntegration: - """Integration tests for secure terminal functionality""" - - @pytest.mark.asyncio - async def test_secure_terminal_session_integration(self): - """Test secure terminal session with security layer integration""" - from api.secure_terminal_websocket import SecureTerminalSession - - # Create mock security layer - mock_security = MagicMock() - - # Create terminal session - session = SecureTerminalSession( - session_id="integration_test", - security_layer=mock_security, - user_role="developer", - ) - - # Test command auditing integration - await session.audit_command("ls -la") - - # Verify security layer was called for auditing - mock_security.audit_log.assert_called() - call_args = mock_security.audit_log.call_args - assert call_args[1]["action"] == "terminal_command" - assert call_args[1]["details"]["command"] == "ls -la" - - @pytest.mark.asyncio - async def test_terminal_risk_assessment_integration(self): - """Test terminal risk assessment integration""" - from api.secure_terminal_websocket import SecureTerminalSession - - # Mock security layer with command executor - mock_security = MagicMock() - mock_executor = MagicMock() - mock_executor.assess_command_risk.return_value = ("high", ["High risk command"]) - mock_security.command_executor = mock_executor - - # Create terminal session - session = SecureTerminalSession( - session_id="risk_test", security_layer=mock_security, user_role="user" - ) - - # Mock WebSocket for warning messages - session.websocket = AsyncMock() - session.active = True - - # Test high-risk command that matches risky patterns - await session.audit_command("rm -rf /tmp/test") - - # Should log both command execution and risky command detection - assert mock_security.audit_log.call_count == 2 - - # Verify the audit calls - audit_calls = mock_security.audit_log.call_args_list - assert audit_calls[0][1]["action"] == "terminal_command" - assert audit_calls[1][1]["action"] == "risky_command_detected" - assert audit_calls[1][1]["outcome"] == "warning" - assert audit_calls[1][1]["details"]["risk_level"] == "high" - - class TestDockerSandboxIntegration: """Integration tests for Docker sandbox functionality""" diff --git a/autobot-backend/security/threat_intelligence.py b/autobot-backend/security/threat_intelligence.py index cfcad82d9..33579818e 100644 --- a/autobot-backend/security/threat_intelligence.py +++ b/autobot-backend/security/threat_intelligence.py @@ -286,7 +286,11 @@ async def _submit_url_for_analysis(self, url: str) -> Dict[str, Any]: } except Exception as e: logger.error("Failed to submit URL to VirusTotal: %s", e) - return {"success": False, "error": "VirusTotal submission failed", "score": None} + return { + "success": False, + "error": "VirusTotal submission failed", + "score": None, + } def _parse_url_response(self, data: Dict[str, Any]) -> Dict[str, Any]: """Parse VirusTotal URL analysis response.""" @@ -318,7 +322,11 @@ def _parse_url_response(self, data: Dict[str, Any]) -> Dict[str, Any]: } except Exception as e: logger.error("Failed to parse VirusTotal response: %s", e) - return {"success": False, "error": "Failed to parse VirusTotal response", "score": None} + return { + "success": False, + "error": "Failed to parse VirusTotal response", + "score": None, + } class URLVoidClient: @@ -418,7 +426,11 @@ async def check_domain(self, url: str) -> Dict[str, Any]: return {"success": False, "error": "Request timeout", "score": None} except Exception as e: logger.error("URLVoid API error: %s", e) - return {"success": False, "error": "URLVoid API request failed", "score": None} + return { + "success": False, + "error": "URLVoid API request failed", + "score": None, + } def _check_xml_error(self, root: Any) -> Optional[Dict[str, Any]]: """Check for error response in URLVoid XML. @@ -524,7 +536,11 @@ def _parse_response(self, content: str, domain: str) -> Dict[str, Any]: except Exception as e: # defusedxml raises various exceptions for malformed/malicious XML logger.error("Failed to parse URLVoid XML response: %s", e) - return {"success": False, "error": "Failed to parse URLVoid response", "score": None} + return { + "success": False, + "error": "Failed to parse URLVoid response", + "score": None, + } class ThreatIntelligenceService: diff --git a/autobot-backend/services/access_control_metrics.py b/autobot-backend/services/access_control_metrics.py index 4da83ebe1..1758a8f6e 100644 --- a/autobot-backend/services/access_control_metrics.py +++ b/autobot-backend/services/access_control_metrics.py @@ -273,7 +273,10 @@ async def get_statistics( except Exception as e: logger.error("Failed to get statistics: %s", e) - return {"error": "Failed to retrieve access control statistics", "total_violations": 0} + return { + "error": "Failed to retrieve access control statistics", + "total_violations": 0, + } def _parse_json_violation_data( self, @@ -404,7 +407,10 @@ async def get_endpoint_statistics(self, endpoint: str, days: int = 7) -> Metadat except Exception as e: logger.error("Failed to get endpoint statistics: %s", e) - return {"endpoint": endpoint, "error": "Failed to retrieve endpoint statistics"} + return { + "endpoint": endpoint, + "error": "Failed to retrieve endpoint statistics", + } async def get_user_statistics(self, username: str, days: int = 7) -> Metadata: """ diff --git a/autobot-backend/services/agent_terminal/STRUCTURE.md b/autobot-backend/services/agent_terminal/STRUCTURE.md index 65a087dae..99f238240 100644 --- a/autobot-backend/services/agent_terminal/STRUCTURE.md +++ b/autobot-backend/services/agent_terminal/STRUCTURE.md @@ -106,10 +106,10 @@ from backend.services.agent_terminal import ... ``` Updated files: -- `/home/kali/Desktop/AutoBot/backend/api/agent_terminal.py` -- `/home/kali/Desktop/AutoBot/backend/api/websockets.py` -- `/home/kali/Desktop/AutoBot/src/chat_workflow_manager.py` -- `/home/kali/Desktop/AutoBot/src/tools/terminal_tool.py` +- `backend/api/agent_terminal.py` +- `backend/api/websockets.py` +- `src/chat_workflow_manager.py` +- `src/tools/terminal_tool.py` Note: `AgentRole` imports changed to: ```python diff --git a/autobot-backend/services/agent_terminal/service.py b/autobot-backend/services/agent_terminal/service.py index 15301f799..63ecf583f 100644 --- a/autobot-backend/services/agent_terminal/service.py +++ b/autobot-backend/services/agent_terminal/service.py @@ -817,7 +817,11 @@ async def execute_command( return await self._execute_auto_approved_command(session, command, risk) except Exception as e: logger.error("Command execution error: %s", e) - return {"status": "error", "error": "Command execution failed", "command": command} + return { + "status": "error", + "error": "Command execution failed", + "command": command, + } async def _save_command_to_chat( self, diff --git a/autobot-backend/services/autoresearch/Dockerfile b/autobot-backend/services/autoresearch/Dockerfile new file mode 100644 index 000000000..d68139c3f --- /dev/null +++ b/autobot-backend/services/autoresearch/Dockerfile @@ -0,0 +1,23 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# +# Slim experiment runner image for AutoResearch Docker isolation. +# Built and pushed to ghcr.io/mrveiss/autobot-autoresearch by CI. +# No AutoBot backend code is included — experiment deps only. + +FROM python:3.12-slim + +WORKDIR /experiment + +# Install minimal runtime dependencies for experiment scripts. +# numpy and torch-cpu cover the typical nanoGPT training baseline. +RUN pip install --no-cache-dir numpy torch --extra-index-url https://download.pytorch.org/whl/cpu + +# Copy the experiment entrypoint. The train script itself is provided at +# runtime via a read-only volume mount at /experiment. +COPY run_experiment.py /experiment/run_experiment.py + +# Output directory must exist so the writable volume mount succeeds. +RUN mkdir -p /output + +ENTRYPOINT ["python", "run_experiment.py"] diff --git a/autobot-backend/services/autoresearch/__init__.py b/autobot-backend/services/autoresearch/__init__.py index 6d2d3ca14..25a8a9315 100644 --- a/autobot-backend/services/autoresearch/__init__.py +++ b/autobot-backend/services/autoresearch/__init__.py @@ -11,22 +11,7 @@ Issue #2599: AutoBot-orchestrated loop + web search (M2). """ -from .knowledge_synthesizer import ExperimentInsight, KnowledgeSynthesizer -from .prompt_optimizer import ( - BenchmarkFn, - OptimizationSession, - OptimizationStatus, - PromptOptimizer, - PromptOptTarget, - PromptVariant, -) -from .scorers import ( - HumanReviewScorer, - LLMJudgeScorer, - PromptScorer, - ScorerResult, - ValBpbScorer, -) +from .archive import Archive from .auto_research_agent import ( ApprovalGate, AutoResearchAgent, @@ -37,12 +22,16 @@ SessionStatus, ) from .config import AutoResearchConfig +from .knowledge_synthesizer import ExperimentInsight, KnowledgeSynthesizer +from .meta_agent import MetaAgent, MetaPatch +from .meta_eval_harness import MetaEvalHarness, MetaEvalResult from .models import ( Experiment, ExperimentResult, ExperimentState, ExperimentStats, HyperParams, + VariantArchiveEntry, ) from .osint_engine import ( CorrelatedSignal, @@ -57,8 +46,23 @@ build_default_engine, ) from .parser import ExperimentOutputParser +from .prompt_optimizer import ( + BenchmarkFn, + OptimizationSession, + OptimizationStatus, + PromptOptimizer, + PromptOptTarget, + PromptVariant, +) from .routes import router from .runner import ExperimentRunner +from .scorers import ( + HumanReviewScorer, + LLMJudgeScorer, + PromptScorer, + ScorerResult, + ValBpbScorer, +) from .store import ExperimentStore __all__ = [ @@ -98,6 +102,13 @@ "ExperimentInsight", # Routes "router", + # Archive + Meta-agent (Issue #3222, #3224) + "Archive", + "VariantArchiveEntry", + "MetaAgent", + "MetaPatch", + "MetaEvalHarness", + "MetaEvalResult", # OSINT Engine (Issue #1949) "OSINTSource", "OSINTEngine", diff --git a/autobot-backend/services/autoresearch/archive.py b/autobot-backend/services/autoresearch/archive.py new file mode 100644 index 000000000..ddac2c80c --- /dev/null +++ b/autobot-backend/services/autoresearch/archive.py @@ -0,0 +1,153 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Quality-Diversity Archive for PromptOptimizer. + +Issue #3222: Replaces the greedy top-K filter so all variants are retained +and parent selection uses random-weighted sampling (weight = score). +""" + +from __future__ import annotations + +import json +import logging +import random +from typing import List, Optional + +from .models import VariantArchiveEntry + +logger = logging.getLogger(__name__) + + +class Archive: + """Stores all VariantArchiveEntry objects across optimization generations. + + All variants are retained regardless of score. Parent selection is + random-weighted so high-scoring variants are more likely to be chosen + but low-scoring ones are never completely excluded. + """ + + def __init__(self, max_size: Optional[int] = None) -> None: + self._entries: List[VariantArchiveEntry] = [] + self._max_size = max_size + + # ------------------------------------------------------------------ + # Mutation helpers + # ------------------------------------------------------------------ + + def add(self, entry: VariantArchiveEntry) -> None: + """Add a new entry, optionally pruning if max_size is set.""" + self._entries.append(entry) + if self._max_size and len(self._entries) > self._max_size: + self._prune(self._max_size) + + def mark_invalid(self, variant_id: str) -> None: + """Exclude *variant_id* from future parent selection.""" + for entry in self._entries: + if entry.variant_id == variant_id: + entry.valid_parent = False + return + + def _prune(self, max_size: int) -> None: + """Remove lowest-scoring entries until len <= max_size.""" + self._entries.sort(key=lambda e: e.score, reverse=True) + self._entries = self._entries[:max_size] + + # ------------------------------------------------------------------ + # Selection + # ------------------------------------------------------------------ + + def select_parent( + self, strategy: str = "random_weighted" + ) -> Optional[VariantArchiveEntry]: + """Return a parent entry using *strategy*. + + Supported strategies + -------------------- + random_weighted + Weight each valid entry by its score. Falls back to uniform + random when all scores are zero. + """ + candidates = self.valid_parents + if not candidates: + logger.warning("Archive.select_parent: no valid parents available") + return None + + if strategy != "random_weighted": + logger.warning( + "Archive.select_parent: unknown strategy %r, using random_weighted", + strategy, + ) + + return self._weighted_random(candidates) + + def _weighted_random( + self, candidates: List[VariantArchiveEntry] + ) -> VariantArchiveEntry: + """Weighted-random selection; uniform fallback when all weights are 0.""" + weights = [max(e.score, 0.0) for e in candidates] + total = sum(weights) + if total == 0.0: + return random.choice(candidates) + return random.choices(candidates, weights=weights, k=1)[0] + + # ------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------ + + @property + def valid_parents(self) -> List[VariantArchiveEntry]: + """All entries eligible for selection as a mutation parent.""" + return [e for e in self._entries if e.valid_parent] + + @property + def best(self) -> Optional[VariantArchiveEntry]: + """Highest-scoring entry across the entire archive.""" + if not self._entries: + return None + return max(self._entries, key=lambda e: e.score) + + @property + def size(self) -> int: + return len(self._entries) + + # ------------------------------------------------------------------ + # Serialisation (for Redis persistence) + # ------------------------------------------------------------------ + + def to_dict(self) -> dict: + return { + "max_size": self._max_size, + "entries": [e.to_dict() for e in self._entries], + } + + @classmethod + def from_dict(cls, data: dict, variant_cls: type) -> "Archive": + """Reconstruct an Archive from its serialised form. + + Args: + data: dict produced by :meth:`to_dict`. + variant_cls: the PromptVariant class used to reconstruct entries. + """ + archive = cls(max_size=data.get("max_size")) + for raw in data.get("entries", []): + variant = variant_cls.from_dict(raw["variant"]) + entry = VariantArchiveEntry( + variant_id=raw["variant_id"], + variant=variant, + score=raw["score"], + parent_id=raw.get("parent_id"), + generation=raw["generation"], + valid_parent=raw.get("valid_parent", True), + created_at=raw.get("created_at", 0.0), + ) + archive._entries.append(entry) + return archive + + def to_json(self) -> str: + return json.dumps(self.to_dict()) + + @classmethod + def from_json(cls, raw: str, variant_cls: type) -> "Archive": + return cls.from_dict(json.loads(raw), variant_cls) diff --git a/autobot-backend/services/autoresearch/config.py b/autobot-backend/services/autoresearch/config.py index 7b1292f9d..c22efd916 100644 --- a/autobot-backend/services/autoresearch/config.py +++ b/autobot-backend/services/autoresearch/config.py @@ -60,6 +60,66 @@ class AutoResearchConfig: max_concurrent_experiments: int = 1 python_executable: Optional[str] = None + # Staged evaluation (cheap-first gating) + staged_eval_fraction: float = field( + default_factory=lambda: float( + os.getenv("AUTOBOT_AUTORESEARCH_STAGED_EVAL_FRACTION", "0.3") + ) + ) + staged_eval_threshold: float = field( + default_factory=lambda: float( + os.getenv("AUTOBOT_AUTORESEARCH_STAGED_EVAL_THRESHOLD", "0.5") + ) + ) + + # Docker isolation (issue #3223) + # Set AUTOBOT_AUTORESEARCH_DOCKER_ENABLED=true via Ansible/env to activate. + docker_enabled: bool = field( + default_factory=lambda: os.getenv( + "AUTOBOT_AUTORESEARCH_DOCKER_ENABLED", "false" + ).lower() + == "true" + ) + docker_image: str = field( + default_factory=lambda: os.getenv( + "AUTOBOT_AUTORESEARCH_DOCKER_IMAGE", + "ghcr.io/mrveiss/autobot-autoresearch:latest", + ) + ) + docker_memory_limit: str = field( + default_factory=lambda: os.getenv("AUTOBOT_AUTORESEARCH_DOCKER_MEMORY", "4g") + ) + docker_cpu_limit: float = field( + default_factory=lambda: float( + os.getenv("AUTOBOT_AUTORESEARCH_DOCKER_CPUS", "2.0") + ) + ) + docker_timeout: int = field( + default_factory=lambda: int( + os.getenv("AUTOBOT_AUTORESEARCH_DOCKER_TIMEOUT", "300") + ) + ) + + # Meta-agent settings (issue #3224) + meta_agent_max_module_lines: int = field( + default_factory=lambda: int( + os.getenv("AUTOBOT_META_AGENT_MAX_MODULE_LINES", "500") + ) + ) + meta_agent_llm_model: str = field( + default_factory=lambda: os.getenv( + "AUTOBOT_META_AGENT_LLM_MODEL", "claude-sonnet-4-6" + ) + ) + meta_agent_test_timeout: int = field( + default_factory=lambda: int(os.getenv("AUTOBOT_META_AGENT_TEST_TIMEOUT", "60")) + ) + meta_agent_approval_threshold: float = field( + default_factory=lambda: float( + os.getenv("AUTOBOT_META_AGENT_APPROVAL_THRESHOLD", "0.1") + ) + ) + # Data directory for experiment outputs data_dir: Path = field( default_factory=lambda: Path( diff --git a/autobot-backend/services/autoresearch/knowledge_synthesizer.py b/autobot-backend/services/autoresearch/knowledge_synthesizer.py index f96133b09..e9041c7ef 100644 --- a/autobot-backend/services/autoresearch/knowledge_synthesizer.py +++ b/autobot-backend/services/autoresearch/knowledge_synthesizer.py @@ -122,7 +122,9 @@ async def synthesize_session(self, session_id: str) -> List[ExperimentInsight]: ) raw_insights = json.loads(response.content) except json.JSONDecodeError as exc: - logger.warning("KnowledgeSynthesizer: failed to parse LLM response: %s", exc) + logger.warning( + "KnowledgeSynthesizer: failed to parse LLM response: %s", exc + ) return [] except Exception as exc: logger.exception("KnowledgeSynthesizer: LLM call failed: %s", exc) diff --git a/autobot-backend/services/autoresearch/knowledge_synthesizer_test.py b/autobot-backend/services/autoresearch/knowledge_synthesizer_test.py index a6a39b640..f4c880cd4 100644 --- a/autobot-backend/services/autoresearch/knowledge_synthesizer_test.py +++ b/autobot-backend/services/autoresearch/knowledge_synthesizer_test.py @@ -6,9 +6,9 @@ from __future__ import annotations import json +from unittest.mock import AsyncMock, MagicMock, patch import pytest -from unittest.mock import AsyncMock, MagicMock, patch from services.autoresearch.knowledge_synthesizer import ( ExperimentInsight, @@ -66,14 +66,16 @@ def mock_store(self): def mock_llm(self): llm = AsyncMock() mock_response = MagicMock() - mock_response.content = json.dumps([ - { - "statement": "Warmup steps >= 300 improve convergence", - "confidence": 0.8, - "supporting_experiments": ["e2"], - "related_hyperparams": ["warmup_steps"], - } - ]) + mock_response.content = json.dumps( + [ + { + "statement": "Warmup steps >= 300 improve convergence", + "confidence": 0.8, + "supporting_experiments": ["e2"], + "related_hyperparams": ["warmup_steps"], + } + ] + ) llm.chat.return_value = mock_response return llm @@ -106,12 +108,16 @@ async def test_query_insights(self, synthesizer, mock_chromadb): mock_chromadb.query.return_value = { "ids": [["i1"]], "documents": [["Warmup steps >= 300 improve convergence"]], - "metadatas": [[{ - "confidence": 0.8, - "supporting_experiments": "e2", - "related_hyperparams": "warmup_steps", - "session_id": "s1", - }]], + "metadatas": [ + [ + { + "confidence": 0.8, + "supporting_experiments": "e2", + "related_hyperparams": "warmup_steps", + "session_id": "s1", + } + ] + ], } results = await synthesizer.query_insights("warmup", limit=5) assert len(results) == 1 diff --git a/autobot-backend/services/autoresearch/meta_agent.py b/autobot-backend/services/autoresearch/meta_agent.py new file mode 100644 index 000000000..d388e24c7 --- /dev/null +++ b/autobot-backend/services/autoresearch/meta_agent.py @@ -0,0 +1,237 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +AutoResearch Meta-Agent + +Issue #3224: Self-referential agent that generates code-level improvements +to a target Python module, guided by prior-generation eval results stored +in the quality-diversity archive. + +Architecture: + MetaAgent.generate_patch() + │ + ├─ _validate_target() ─► path safety checks + ├─ _build_prompt() ─► module content + archive context + ├─ _call_llm() ─► full modified module content from LLM + └─ MetaPatch ─► original + modified content + rationale + +Safety constraints: + - target_module_path must be an absolute path + - target must have a .py extension and must not be a test file + - module line count must be within meta_agent_max_module_lines + - LLM output is treated as candidate code only — MetaEvalHarness + validates and gates via tests + ApprovalGate before any live apply +""" + +from __future__ import annotations + +import logging +import time +import uuid +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional + +from .config import AutoResearchConfig + +logger = logging.getLogger(__name__) + + +@dataclass +class MetaPatch: + """Proposed code improvement produced by the MetaAgent.""" + + patch_id: str = field(default_factory=lambda: str(uuid.uuid4())) + target_path: str = "" # absolute path to the target module + original_content: str = "" + modified_content: str = "" + rationale: str = "" # LLM summary of what was changed and why + generation: int = 0 + parent_id: Optional[str] = None + created_at: float = field(default_factory=time.time) + + @property + def has_changes(self) -> bool: + """True when the LLM produced a meaningful modification.""" + return self.original_content.strip() != self.modified_content.strip() + + def to_dict(self) -> Dict[str, Any]: + return { + "patch_id": self.patch_id, + "target_path": self.target_path, + "original_content": self.original_content, + "modified_content": self.modified_content, + "has_changes": self.has_changes, + "rationale": self.rationale, + "generation": self.generation, + "parent_id": self.parent_id, + "created_at": self.created_at, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "MetaPatch": + """Reconstruct a MetaPatch from its serialised form (for Archive replay).""" + return cls( + patch_id=data["patch_id"], + target_path=data.get("target_path", ""), + original_content=data.get("original_content", ""), + modified_content=data.get("modified_content", ""), + rationale=data.get("rationale", ""), + generation=data.get("generation", 0), + parent_id=data.get("parent_id"), + created_at=data.get("created_at", 0.0), + ) + + +class MetaAgent: + """Generates code-level improvement patches for a target module. + + The agent reads the current module source, optionally reads prior + generation eval context from the archive, and asks an LLM to produce + an improved version of the file. The result is a MetaPatch that can + be validated and gated by MetaEvalHarness before any live application. + """ + + _SYSTEM_PROMPT = ( + "You are an expert software engineer specialising in improving Python code.\n" + "You will receive a Python module and optional context about prior versions.\n\n" + "Your task: return an improved version of the module.\n\n" + "Rules:\n" + "1. Return ONLY the complete modified Python file — no markdown fences, " + "no explanations outside the file.\n" + "2. Preserve all public function/class signatures exactly.\n" + "3. Make minimal, focused changes — do not rewrite unnecessarily.\n" + "4. You may add helper functions but must not remove existing public ones.\n" + "5. Begin the file with a one-line comment: " + "# RATIONALE: \n" + ) + + def __init__( + self, + config: Optional[AutoResearchConfig] = None, + llm_service: Any = None, + ) -> None: + self.config = config or AutoResearchConfig() + self._llm = llm_service + + async def generate_patch( + self, + target_module_path: Path, + eval_context: List[Dict[str, Any]], + generation: int, + parent_id: Optional[str] = None, + ) -> MetaPatch: + """Generate a code improvement patch for *target_module_path*. + + Args: + target_module_path: Absolute path to the Python module to improve. + eval_context: List of prior-generation result dicts from the archive + (score, rationale, etc.) for context. + generation: Current generation index. + parent_id: Archive entry ID of the parent generation, or None. + + Returns: + A MetaPatch with original and proposed modified content. + """ + self._validate_target(target_module_path) + original_content = target_module_path.read_text(encoding="utf-8") + self._validate_size(original_content, target_module_path) + + prompt = self._build_prompt(original_content, eval_context) + logger.info( + "MetaAgent: generating patch for %s (gen=%d)", + target_module_path.name, + generation, + ) + modified_content = await self._call_llm(prompt) + rationale = self._extract_rationale(modified_content) + + patch = MetaPatch( + target_path=str(target_module_path), + original_content=original_content, + modified_content=modified_content, + rationale=rationale, + generation=generation, + parent_id=parent_id, + ) + if not patch.has_changes: + logger.info( + "MetaAgent: LLM produced no changes for %s", target_module_path.name + ) + else: + logger.info( + "MetaAgent: patch %s has changes (gen=%d, parent=%s)", + patch.patch_id, + generation, + parent_id, + ) + return patch + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + def _validate_target(self, path: Path) -> None: + """Raise ValueError for unsafe or disallowed targets.""" + if not path.is_absolute(): + raise ValueError(f"target_module_path must be absolute, got: {path}") + if path.suffix != ".py": + raise ValueError( + f"target_module_path must be a .py file, got: {path.suffix}" + ) + stem = path.stem.lower() + if stem.startswith("test_") or stem.endswith("_test"): + raise ValueError(f"meta-agent must not target test files: {path.name}") + if not path.exists(): + raise FileNotFoundError(f"Target module not found: {path}") + + def _validate_size(self, content: str, path: Path) -> None: + """Raise ValueError if the module exceeds the configured line limit.""" + line_count = content.count("\n") + limit = self.config.meta_agent_max_module_lines + if line_count > limit: + raise ValueError( + f"{path.name} has {line_count} lines, exceeds limit of {limit}" + ) + + def _build_prompt( + self, original_content: str, eval_context: List[Dict[str, Any]] + ) -> str: + """Compose the user prompt from module content and archive context.""" + parts = [ + "Here is the Python module to improve:\n\n```python\n", + original_content, + "\n```\n", + ] + if eval_context: + parts.append("\nContext from prior generations (best → worst score):\n") + for entry in eval_context[:5]: # cap at 5 entries + score = entry.get("score", "?") + rationale = entry.get("rationale", "no rationale") + parts.append(f"- score={score}: {rationale}\n") + parts.append("\nReturn the improved module now.") + return "".join(parts) + + async def _call_llm(self, prompt: str) -> str: + """Call the LLM with the improvement prompt and return the response.""" + if self._llm is None: + raise RuntimeError("MetaAgent: no LLM service configured") + response = await self._llm.chat( + messages=[ + {"role": "system", "content": self._SYSTEM_PROMPT}, + {"role": "user", "content": prompt}, + ], + temperature=0.7, + max_tokens=4000, + model=self.config.meta_agent_llm_model, + ) + return response.content.strip() + + @staticmethod + def _extract_rationale(modified_content: str) -> str: + """Pull the rationale comment from the first line of the LLM output.""" + first_line = modified_content.splitlines()[0] if modified_content else "" + if first_line.startswith("# RATIONALE:"): + return first_line[len("# RATIONALE:") :].strip() + return "no rationale provided" diff --git a/autobot-backend/services/autoresearch/meta_agent_test.py b/autobot-backend/services/autoresearch/meta_agent_test.py new file mode 100644 index 000000000..bd483547f --- /dev/null +++ b/autobot-backend/services/autoresearch/meta_agent_test.py @@ -0,0 +1,264 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Unit tests for MetaAgent (issue #3224). +""" + +from __future__ import annotations + +import textwrap +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from .config import AutoResearchConfig +from .meta_agent import MetaAgent, MetaPatch + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_agent(llm_response: str = "") -> tuple[MetaAgent, MagicMock]: + """Return (agent, mock_llm) with the LLM returning *llm_response*.""" + llm = MagicMock() + llm.chat = AsyncMock(return_value=MagicMock(content=llm_response)) + agent = MetaAgent(config=AutoResearchConfig(), llm_service=llm) + return agent, llm + + +def _write_module(path: Path, content: str) -> None: + path.write_text(textwrap.dedent(content), encoding="utf-8") + + +# --------------------------------------------------------------------------- +# MetaPatch +# --------------------------------------------------------------------------- + + +def test_metapatch_has_changes_true(): + patch = MetaPatch(original_content="a = 1\n", modified_content="a = 2\n") + assert patch.has_changes is True + + +def test_metapatch_has_changes_false_whitespace(): + patch = MetaPatch(original_content="a = 1\n", modified_content="a = 1") + assert patch.has_changes is False + + +def test_metapatch_to_dict_keys(): + patch = MetaPatch(patch_id="abc", target_path="/tmp/foo.py", generation=3) + d = patch.to_dict() + assert d["patch_id"] == "abc" + assert d["target_path"] == "/tmp/foo.py" + assert d["generation"] == 3 + assert "has_changes" in d + + +def test_metapatch_from_dict_roundtrip(): + original = MetaPatch( + patch_id="roundtrip-id", + target_path="/tmp/foo.py", + original_content="x = 1\n", + modified_content="x = 2\n", + rationale="test", + generation=3, + parent_id="parent-abc", + ) + restored = MetaPatch.from_dict(original.to_dict()) + assert restored.patch_id == original.patch_id + assert restored.original_content == original.original_content + assert restored.modified_content == original.modified_content + assert restored.rationale == original.rationale + assert restored.generation == original.generation + assert restored.parent_id == original.parent_id + + +# --------------------------------------------------------------------------- +# _validate_target +# --------------------------------------------------------------------------- + + +def test_validate_target_rejects_relative(tmp_path): + agent, _ = _make_agent() + with pytest.raises(ValueError, match="absolute"): + agent._validate_target(Path("relative/path.py")) + + +def test_validate_target_rejects_non_py(tmp_path): + agent, _ = _make_agent() + f = tmp_path / "module.txt" + f.touch() + with pytest.raises(ValueError, match=".py"): + agent._validate_target(f) + + +def test_validate_target_rejects_test_prefix_file(tmp_path): + agent, _ = _make_agent() + f = tmp_path / "test_module.py" + f.touch() + with pytest.raises(ValueError, match="test files"): + agent._validate_target(f) + + +def test_validate_target_rejects_test_suffix_file(tmp_path): + agent, _ = _make_agent() + f = tmp_path / "module_test.py" + f.touch() + with pytest.raises(ValueError, match="test files"): + agent._validate_target(f) + + +def test_validate_target_accepts_protest_py(tmp_path): + """'protest.py' contains 'test' as substring but is NOT a test file.""" + agent, _ = _make_agent() + f = tmp_path / "protest.py" + f.touch() + agent._validate_target(f) # should not raise + + +def test_validate_target_rejects_missing(tmp_path): + agent, _ = _make_agent() + f = tmp_path / "missing.py" + with pytest.raises(FileNotFoundError): + agent._validate_target(f) + + +def test_validate_target_accepts_valid(tmp_path): + agent, _ = _make_agent() + f = tmp_path / "module.py" + f.touch() + agent._validate_target(f) # should not raise + + +# --------------------------------------------------------------------------- +# _validate_size +# --------------------------------------------------------------------------- + + +def test_validate_size_ok(tmp_path): + agent, _ = _make_agent() + content = "\n".join(["x = 1"] * 10) + agent._validate_size(content, tmp_path / "mod.py") # well within limit + + +def test_validate_size_exceeded(tmp_path): + config = AutoResearchConfig() + config.meta_agent_max_module_lines = 5 + agent = MetaAgent(config=config) + content = "\n".join(["x = 1"] * 10) + with pytest.raises(ValueError, match="exceeds limit"): + agent._validate_size(content, tmp_path / "mod.py") + + +# --------------------------------------------------------------------------- +# _build_prompt +# --------------------------------------------------------------------------- + + +def test_build_prompt_no_context(): + agent, _ = _make_agent() + prompt = agent._build_prompt("def foo(): pass\n", []) + assert "def foo(): pass" in prompt + assert "Return the improved module now." in prompt + + +def test_build_prompt_with_eval_context(): + agent, _ = _make_agent() + context = [{"score": 0.9, "rationale": "faster loop"}] + prompt = agent._build_prompt("x = 1\n", context) + assert "score=0.9" in prompt + assert "faster loop" in prompt + + +def test_build_prompt_caps_context_at_five(): + agent, _ = _make_agent() + context = [{"score": float(i), "rationale": f"r{i}"} for i in range(10)] + prompt = agent._build_prompt("x = 1\n", context) + # Only first 5 entries should appear + assert "r4" in prompt + assert "r5" not in prompt + + +# --------------------------------------------------------------------------- +# _extract_rationale +# --------------------------------------------------------------------------- + + +def test_extract_rationale_present(): + content = "# RATIONALE: optimised inner loop\n\ndef foo(): pass\n" + assert MetaAgent._extract_rationale(content) == "optimised inner loop" + + +def test_extract_rationale_missing(): + content = "def foo(): pass\n" + assert MetaAgent._extract_rationale(content) == "no rationale provided" + + +def test_extract_rationale_empty(): + assert MetaAgent._extract_rationale("") == "no rationale provided" + + +# --------------------------------------------------------------------------- +# generate_patch (integration of private helpers via public API) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_generate_patch_returns_metapatch(tmp_path): + modified = "# RATIONALE: removed dead code\ndef foo(): return 42\n" + agent, llm = _make_agent(llm_response=modified) + + target = tmp_path / "module.py" + _write_module(target, "def foo(): return 1\n") + + patch = await agent.generate_patch( + target_module_path=target, + eval_context=[], + generation=1, + ) + + assert isinstance(patch, MetaPatch) + assert patch.has_changes is True + assert patch.rationale == "removed dead code" + assert patch.generation == 1 + assert patch.target_path == str(target) + llm.chat.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_generate_patch_no_changes_logs(tmp_path, caplog): + content = "def foo(): return 1\n" + agent, _ = _make_agent(llm_response=content) + + target = tmp_path / "module.py" + _write_module(target, content) + + import logging + + with caplog.at_level(logging.INFO): + patch = await agent.generate_patch( + target_module_path=target, + eval_context=[], + generation=0, + ) + + assert not patch.has_changes + assert "no changes" in caplog.text + + +@pytest.mark.asyncio +async def test_generate_patch_no_llm_raises(tmp_path): + agent = MetaAgent() # no llm_service + + target = tmp_path / "module.py" + target.write_text("x = 1\n", encoding="utf-8") + + with pytest.raises(RuntimeError, match="no LLM service"): + await agent.generate_patch( + target_module_path=target, + eval_context=[], + generation=0, + ) diff --git a/autobot-backend/services/autoresearch/meta_eval_harness.py b/autobot-backend/services/autoresearch/meta_eval_harness.py new file mode 100644 index 000000000..9f266504c --- /dev/null +++ b/autobot-backend/services/autoresearch/meta_eval_harness.py @@ -0,0 +1,367 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +AutoResearch MetaEvalHarness + +Issue #3224: Validates MetaAgent patches by applying them to a temporary copy +of the target module, running pytest, scoring the result, and gating live +application through the ApprovalGate. + +Architecture: + MetaEvalHarness.evaluate_patch(patch, archive) + │ + ├─ _apply_to_tempfile() ─► write modified content to a temp .py file + ├─ _run_tests() ─► pytest subprocess → (passed, total) + ├─ _compute_score() ─► test_pass_rate float 0..1 + ├─ _add_to_archive() ─► VariantArchiveEntry with score + parent + ├─ _check_approval() ─► ApprovalGate consulted when improvement > threshold + └─ MetaEvalResult ─► score, passed, decision, applied + +Safety: + - Patch is NEVER applied to the live file without explicit approval + - Tests run against the temporary file, not the live module + - Docker execution honours config.docker_enabled +""" + +from __future__ import annotations + +import asyncio +import logging +import os +import re +import shutil +import time +import uuid +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional + +from .archive import Archive +from .auto_research_agent import ApprovalGate +from .config import AutoResearchConfig +from .meta_agent import MetaPatch +from .models import VariantArchiveEntry + +logger = logging.getLogger(__name__) + + +@dataclass +class MetaEvalResult: + """Outcome of evaluating a single MetaPatch.""" + + patch_id: str = "" + score: float = 0.0 # fraction of tests passed (0..1) + tests_passed: int = 0 + tests_total: int = 0 + test_output: str = "" # raw pytest output for diagnostics + decision: str = "skipped" # "approved" | "rejected" | "skipped" | "timeout" + applied: bool = False # True only when patch is written to live file + error: Optional[str] = None + evaluated_at: float = field(default_factory=time.time) + + @property + def succeeded(self) -> bool: + """True when tests ran and at least one passed.""" + return self.tests_total > 0 and self.tests_passed > 0 + + def to_dict(self) -> Dict[str, Any]: + return { + "patch_id": self.patch_id, + "score": self.score, + "tests_passed": self.tests_passed, + "tests_total": self.tests_total, + "decision": self.decision, + "applied": self.applied, + "error": self.error, + "evaluated_at": self.evaluated_at, + } + + +class MetaEvalHarness: + """Validates a MetaPatch and optionally applies it to the live module. + + Workflow + -------- + 1. Write the modified content to a sibling temp file ``_meta_.py``. + 2. Run pytest targeting ``tests/`` adjacent to the module (or supplied path). + 3. Compute score = passed / total. + 4. Add a ``VariantArchiveEntry`` to the provided Archive. + 5. Consult ApprovalGate when the score exceeds the configured threshold. + 6. If approved, overwrite the live file with the modified content. + """ + + def __init__( + self, + config: Optional[AutoResearchConfig] = None, + approval_gate: Optional[ApprovalGate] = None, + ) -> None: + self.config = config or AutoResearchConfig() + self._gate = approval_gate or ApprovalGate(self.config) + + async def evaluate_patch( + self, + patch: MetaPatch, + archive: Archive, + session_id: str = "", + test_paths: Optional[List[str]] = None, + ) -> MetaEvalResult: + """Evaluate *patch* and return a MetaEvalResult. + + Args: + patch: The MetaPatch produced by MetaAgent. + archive: Archive to record the evaluated entry. + session_id: Autoresearch session ID used by ApprovalGate keys. + test_paths: Explicit list of test file/dir paths. When None the + harness discovers them automatically (see _find_tests). + + Returns: + A MetaEvalResult describing the outcome. + """ + result = MetaEvalResult(patch_id=patch.patch_id) + + if not patch.has_changes: + logger.info( + "MetaEvalHarness: patch %s has no changes — skipping", patch.patch_id + ) + result.decision = "skipped" + self._add_to_archive(archive, patch, result) + return result + + tmp_path: Optional[Path] = None + try: + tmp_path = self._write_temp_module(patch) + passed, total, output = await self._run_tests( + tmp_path, test_paths or self._find_tests(Path(patch.target_path)) + ) + result.tests_passed = passed + result.tests_total = total + result.test_output = output + result.score = self._compute_score(passed, total) + except Exception as exc: + logger.exception( + "MetaEvalHarness: test run failed for patch %s", patch.patch_id + ) + result.error = str(exc) + result.decision = "rejected" + self._add_to_archive(archive, patch, result) + return result + finally: + if tmp_path and tmp_path.exists(): + tmp_path.unlink(missing_ok=True) + + self._add_to_archive(archive, patch, result) + + if not result.succeeded: + result.decision = "rejected" + logger.info( + "MetaEvalHarness: patch %s rejected (0/%d tests passed)", + patch.patch_id, + total, + ) + return result + + # Consult ApprovalGate if improvement is significant. + # If the gate is required but no session_id is provided we reject + # rather than auto-approve — never silently apply to live code. + needs_approval = self._gate.check_approval_needed( + result.score, self.config.meta_agent_approval_threshold + ) + if needs_approval: + if not session_id: + logger.warning( + "MetaEvalHarness: approval required for patch %s but " + "no session_id provided — rejecting", + patch.patch_id, + ) + result.decision = "rejected" + else: + result.decision = await self._request_and_wait( + session_id, patch, result + ) + else: + result.decision = "approved" + + if result.decision == "approved": + self._apply_patch(patch) + result.applied = True + logger.info( + "MetaEvalHarness: patch %s applied to %s (score=%.3f)", + patch.patch_id, + patch.target_path, + result.score, + ) + else: + logger.info( + "MetaEvalHarness: patch %s not applied (decision=%s)", + patch.patch_id, + result.decision, + ) + + return result + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + def _write_temp_module(self, patch: MetaPatch) -> Path: + """Write modified content to a sibling temp file and return its path.""" + target = Path(patch.target_path) + suffix = f"_meta_{uuid.uuid4().hex[:8]}.py" + tmp_path = target.with_name(target.stem + suffix) + tmp_path.write_text(patch.modified_content, encoding="utf-8") + return tmp_path + + @staticmethod + def _find_tests(target: Path) -> List[str]: + """Return test file paths adjacent to *target* module. + + Looks for ``tests/`` directory or ``*_test.py`` files next to the + target's parent package. + """ + parent = target.parent + candidates = [ + parent / "tests", + parent.parent / "tests", + ] + for test_dir in candidates: + if test_dir.is_dir(): + return [str(test_dir)] + # Fallback: test files adjacent to the module + test_files = list(parent.glob("*_test.py")) + return [str(f) for f in test_files] if test_files else [str(parent)] + + async def _run_tests( + self, tmp_module: Path, test_paths: List[str] + ) -> tuple[int, int, str]: + """Run pytest and return (passed, total, output). + + The temporary module is exposed to pytest via the ``PYTHONPATH`` + environment variable so imports resolve correctly without modifying + the live package. + """ + env = os.environ.copy() + # Prepend the directory containing the temp module so pytest can import it + extra_path = str(tmp_module.parent) + existing = env.get("PYTHONPATH", "") + env["PYTHONPATH"] = f"{extra_path}:{existing}" if existing else extra_path + + cmd = [ + self.config.python_bin, + "-m", + "pytest", + "--tb=short", + "-q", + *test_paths, + ] + try: + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.STDOUT, + env=env, + ) + stdout, _ = await asyncio.wait_for( + process.communicate(), + timeout=self.config.meta_agent_test_timeout, + ) + except asyncio.TimeoutError: + try: + process.kill() + await process.wait() + except Exception: + pass + raise RuntimeError( + f"Test run timed out after {self.config.meta_agent_test_timeout}s" + ) + + output = stdout.decode("utf-8", errors="replace") if stdout else "" + passed, total = self._parse_pytest_summary(output) + return passed, total, output + + @staticmethod + def _parse_pytest_summary(output: str) -> tuple[int, int]: + """Extract (passed, total) counts from pytest's short summary line. + + Handles patterns like: + ``5 passed, 1 failed in 0.42s`` + ``3 passed in 0.10s`` + ``2 failed in 0.05s`` + """ + passed = 0 + failed = 0 + for line in reversed(output.splitlines()): + m_passed = re.search(r"(\d+) passed", line) + m_failed = re.search(r"(\d+) failed", line) + m_error = re.search(r"(\d+) error", line) + if m_passed or m_failed or m_error: + passed = int(m_passed.group(1)) if m_passed else 0 + failed = int(m_failed.group(1)) if m_failed else 0 + failed += int(m_error.group(1)) if m_error else 0 + return passed, passed + failed + return 0, 0 + + @staticmethod + def _compute_score(passed: int, total: int) -> float: + """Return pass-rate (0..1), or 0.0 when no tests were collected.""" + if total == 0: + return 0.0 + return passed / total + + def _add_to_archive( + self, archive: Archive, patch: MetaPatch, result: MetaEvalResult + ) -> None: + """Record the evaluation result as a VariantArchiveEntry.""" + entry = VariantArchiveEntry( + variant_id=patch.patch_id, + variant=patch, # MetaPatch is stored as the "variant" + score=result.score, + parent_id=patch.parent_id, + generation=patch.generation, + valid_parent=result.succeeded, + ) + archive.add(entry) + + async def _request_and_wait( + self, session_id: str, patch: MetaPatch, result: MetaEvalResult + ) -> str: + """Request ApprovalGate decision and wait up to test_timeout seconds.""" + details = { + "patch_id": patch.patch_id, + "target_path": patch.target_path, + "rationale": patch.rationale, + "score": result.score, + "tests_passed": result.tests_passed, + "tests_total": result.tests_total, + } + try: + await self._gate.request_approval( + session_id=session_id, + experiment_id=patch.patch_id, + details=details, + ) + return await self._gate.wait_for_approval( + session_id=session_id, + experiment_id=patch.patch_id, + timeout=float(self.config.meta_agent_test_timeout), + ) + except Exception: + logger.exception( + "MetaEvalHarness: ApprovalGate error for patch %s", patch.patch_id + ) + return "timeout" + + @staticmethod + def _apply_patch(patch: MetaPatch) -> None: + """Overwrite the live module with the modified content. + + A per-patch backup is written before overwriting so each applied + generation is independently recoverable without touching git. + Backup name: ``..meta_bak`` — unique per patch. + """ + target = Path(patch.target_path) + prefix = patch.patch_id[:8] + backup = target.with_name(f"{target.stem}.{prefix}.meta_bak") + shutil.copy2(target, backup) + target.write_text(patch.modified_content, encoding="utf-8") + logger.info("MetaEvalHarness: live file updated, backup at %s", backup) diff --git a/autobot-backend/services/autoresearch/meta_eval_harness_test.py b/autobot-backend/services/autoresearch/meta_eval_harness_test.py new file mode 100644 index 000000000..e4e2a3785 --- /dev/null +++ b/autobot-backend/services/autoresearch/meta_eval_harness_test.py @@ -0,0 +1,414 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Unit tests for MetaEvalHarness (issue #3224). +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock +from unittest.mock import patch as mock_patch + +import pytest + +from .archive import Archive +from .config import AutoResearchConfig +from .meta_agent import MetaPatch +from .meta_eval_harness import MetaEvalHarness, MetaEvalResult + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_harness(approval_gate=None) -> MetaEvalHarness: + config = AutoResearchConfig() + config.meta_agent_test_timeout = 10 + config.meta_agent_approval_threshold = 0.1 + return MetaEvalHarness(config=config, approval_gate=approval_gate) + + +def _make_patch( + original: str = "def foo(): return 1\n", + modified: str = "def foo(): return 2\n", + patch_id: str = "test-patch-1", +) -> MetaPatch: + return MetaPatch( + patch_id=patch_id, + target_path="/tmp/module.py", + original_content=original, + modified_content=modified, + rationale="test change", + generation=1, + ) + + +# --------------------------------------------------------------------------- +# MetaEvalResult +# --------------------------------------------------------------------------- + + +def test_result_succeeded_true(): + r = MetaEvalResult(tests_passed=3, tests_total=4) + assert r.succeeded is True + + +def test_result_succeeded_false_zero_total(): + r = MetaEvalResult(tests_passed=0, tests_total=0) + assert r.succeeded is False + + +def test_result_succeeded_false_zero_passed(): + r = MetaEvalResult(tests_passed=0, tests_total=5) + assert r.succeeded is False + + +def test_result_to_dict(): + r = MetaEvalResult(patch_id="abc", score=0.8, decision="approved", applied=True) + d = r.to_dict() + assert d["patch_id"] == "abc" + assert d["score"] == 0.8 + assert d["decision"] == "approved" + assert d["applied"] is True + + +# --------------------------------------------------------------------------- +# _parse_pytest_summary +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "output,expected_passed,expected_total", + [ + ("5 passed, 2 failed in 0.5s", 5, 7), + ("3 passed in 0.1s", 3, 3), + ("2 failed in 0.05s", 0, 2), + ("1 passed, 1 failed, 1 error in 1.0s", 1, 3), + ("no output", 0, 0), + ("", 0, 0), + ], +) +def test_parse_pytest_summary(output, expected_passed, expected_total): + passed, total = MetaEvalHarness._parse_pytest_summary(output) + assert passed == expected_passed + assert total == expected_total + + +# --------------------------------------------------------------------------- +# _compute_score +# --------------------------------------------------------------------------- + + +def test_compute_score_all_pass(): + assert MetaEvalHarness._compute_score(5, 5) == 1.0 + + +def test_compute_score_partial(): + assert MetaEvalHarness._compute_score(3, 4) == pytest.approx(0.75) + + +def test_compute_score_zero_total(): + assert MetaEvalHarness._compute_score(0, 0) == 0.0 + + +# --------------------------------------------------------------------------- +# evaluate_patch — no changes +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_evaluate_patch_no_changes_skips(): + harness = _make_harness() + archive = Archive() + patch = MetaPatch( + patch_id="same", + target_path="/tmp/module.py", + original_content="x = 1\n", + modified_content="x = 1", # stripped equal + ) + result = await harness.evaluate_patch(patch, archive) + assert result.decision == "skipped" + assert result.applied is False + assert archive.size == 1 # still recorded + + +# --------------------------------------------------------------------------- +# evaluate_patch — tests fail +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_evaluate_patch_tests_fail_rejected(tmp_path): + harness = _make_harness() + archive = Archive() + patch = _make_patch() + + with ( + mock_patch.object( + harness, "_write_temp_module", return_value=tmp_path / "tmp.py" + ), + mock_patch.object( + harness, + "_run_tests", + new=AsyncMock(return_value=(0, 3, "3 failed in 0.1s")), + ), + ): + # write a placeholder so unlink won't fail + (tmp_path / "tmp.py").touch() + result = await harness.evaluate_patch(patch, archive) + + assert result.decision == "rejected" + assert result.applied is False + assert result.score == 0.0 + + +# --------------------------------------------------------------------------- +# evaluate_patch — tests pass, approval not needed +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_evaluate_patch_approved_no_gate(tmp_path): + harness = _make_harness() + # Override threshold so no gate needed for score < threshold + harness.config.meta_agent_approval_threshold = 2.0 # impossible to trigger + + archive = Archive() + patch = _make_patch() + + applied_path = tmp_path / "module.py" + applied_path.write_text(patch.original_content, encoding="utf-8") + patch = MetaPatch( + patch_id="p1", + target_path=str(applied_path), + original_content=patch.original_content, + modified_content=patch.modified_content, + ) + + with ( + mock_patch.object( + harness, "_write_temp_module", return_value=tmp_path / "tmp.py" + ), + mock_patch.object( + harness, + "_run_tests", + new=AsyncMock(return_value=(5, 5, "5 passed in 0.2s")), + ), + ): + (tmp_path / "tmp.py").touch() + result = await harness.evaluate_patch(patch, archive) + + assert result.decision == "approved" + assert result.applied is True + assert applied_path.read_text(encoding="utf-8") == patch.modified_content + + +# --------------------------------------------------------------------------- +# evaluate_patch — approval gate consulted +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_evaluate_patch_gate_approved(tmp_path): + gate = MagicMock() + gate.check_approval_needed = MagicMock(return_value=True) + gate.request_approval = AsyncMock() + gate.wait_for_approval = AsyncMock(return_value="approved") + + harness = _make_harness(approval_gate=gate) + + applied_path = tmp_path / "module.py" + applied_path.write_text("def foo(): return 1\n", encoding="utf-8") + + patch = MetaPatch( + patch_id="gate-patch", + target_path=str(applied_path), + original_content="def foo(): return 1\n", + modified_content="def foo(): return 2\n", + ) + archive = Archive() + + with ( + mock_patch.object( + harness, "_write_temp_module", return_value=tmp_path / "tmp.py" + ), + mock_patch.object( + harness, + "_run_tests", + new=AsyncMock(return_value=(4, 4, "4 passed in 0.1s")), + ), + ): + (tmp_path / "tmp.py").touch() + result = await harness.evaluate_patch(patch, archive, session_id="sess-1") + + assert result.decision == "approved" + assert result.applied is True + gate.request_approval.assert_awaited_once() + gate.wait_for_approval.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_evaluate_patch_gate_rejected(tmp_path): + gate = MagicMock() + gate.check_approval_needed = MagicMock(return_value=True) + gate.request_approval = AsyncMock() + gate.wait_for_approval = AsyncMock(return_value="rejected") + + harness = _make_harness(approval_gate=gate) + + applied_path = tmp_path / "module.py" + applied_path.write_text("def foo(): return 1\n", encoding="utf-8") + + patch = MetaPatch( + patch_id="rejected-patch", + target_path=str(applied_path), + original_content="def foo(): return 1\n", + modified_content="def foo(): return 2\n", + ) + archive = Archive() + + with ( + mock_patch.object( + harness, "_write_temp_module", return_value=tmp_path / "tmp.py" + ), + mock_patch.object( + harness, + "_run_tests", + new=AsyncMock(return_value=(4, 4, "4 passed in 0.1s")), + ), + ): + (tmp_path / "tmp.py").touch() + result = await harness.evaluate_patch(patch, archive, session_id="sess-1") + + assert result.decision == "rejected" + assert result.applied is False + # Live file unchanged + assert applied_path.read_text(encoding="utf-8") == "def foo(): return 1\n" + + +# --------------------------------------------------------------------------- +# Archive integration +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_evaluate_patch_adds_to_archive(tmp_path): + harness = _make_harness() + harness.config.meta_agent_approval_threshold = 2.0 # no gate + + applied_path = tmp_path / "module.py" + applied_path.write_text("def foo(): return 1\n", encoding="utf-8") + + patch = MetaPatch( + patch_id="arch-patch", + target_path=str(applied_path), + original_content="def foo(): return 1\n", + modified_content="def foo(): return 99\n", + generation=2, + ) + archive = Archive() + + with ( + mock_patch.object( + harness, "_write_temp_module", return_value=tmp_path / "tmp.py" + ), + mock_patch.object( + harness, + "_run_tests", + new=AsyncMock(return_value=(3, 4, "3 passed, 1 failed in 0.1s")), + ), + ): + (tmp_path / "tmp.py").touch() + result = await harness.evaluate_patch(patch, archive) + + assert archive.size == 1 + entry = archive.best + assert entry.variant_id == "arch-patch" + assert entry.score == pytest.approx(0.75) + assert entry.generation == 2 + + +# --------------------------------------------------------------------------- +# Gate bypass safety +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_evaluate_patch_rejects_when_gate_required_no_session(tmp_path): + """When approval is required but session_id is empty, reject (not approve).""" + gate = MagicMock() + gate.check_approval_needed = MagicMock(return_value=True) # gate always required + gate.request_approval = AsyncMock() + + harness = _make_harness(approval_gate=gate) + archive = Archive() + + applied_path = tmp_path / "module.py" + applied_path.write_text("def foo(): return 1\n", encoding="utf-8") + + patch = MetaPatch( + patch_id="bypass-patch", + target_path=str(applied_path), + original_content="def foo(): return 1\n", + modified_content="def foo(): return 2\n", + ) + + with ( + mock_patch.object( + harness, "_write_temp_module", return_value=tmp_path / "tmp.py" + ), + mock_patch.object( + harness, + "_run_tests", + new=AsyncMock(return_value=(5, 5, "5 passed in 0.1s")), + ), + ): + (tmp_path / "tmp.py").touch() + result = await harness.evaluate_patch(patch, archive, session_id="") + + assert result.decision == "rejected" + assert result.applied is False + gate.request_approval.assert_not_awaited() + # Live file must be unchanged + assert applied_path.read_text(encoding="utf-8") == "def foo(): return 1\n" + + +# --------------------------------------------------------------------------- +# Backup filename uniqueness +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_apply_patch_backup_includes_patch_id(tmp_path): + """Each applied patch must create a unique backup file.""" + harness = _make_harness() + harness.config.meta_agent_approval_threshold = 2.0 # no gate + + applied_path = tmp_path / "module.py" + applied_path.write_text("def foo(): return 1\n", encoding="utf-8") + + patch = MetaPatch( + patch_id="abcdef12-0000-0000-0000-000000000000", + target_path=str(applied_path), + original_content="def foo(): return 1\n", + modified_content="def foo(): return 2\n", + ) + archive = Archive() + + with ( + mock_patch.object( + harness, "_write_temp_module", return_value=tmp_path / "tmp.py" + ), + mock_patch.object( + harness, + "_run_tests", + new=AsyncMock(return_value=(3, 3, "3 passed in 0.1s")), + ), + ): + (tmp_path / "tmp.py").touch() + result = await harness.evaluate_patch(patch, archive) + + assert result.applied is True + backup = tmp_path / "module.abcdef12.meta_bak" + assert backup.exists(), f"expected backup at {backup}" diff --git a/autobot-backend/services/autoresearch/models.py b/autobot-backend/services/autoresearch/models.py index f1190fb3c..4e7db53b1 100644 --- a/autobot-backend/services/autoresearch/models.py +++ b/autobot-backend/services/autoresearch/models.py @@ -15,6 +15,38 @@ from dataclasses import dataclass, field from typing import Any, Dict, List, Optional +# Forward reference — PromptVariant is defined in prompt_optimizer to avoid +# circular imports; callers import VariantArchiveEntry directly from models. +# The type annotation below uses a string literal for the forward ref. + + +@dataclass +class VariantArchiveEntry: + """A single entry in the quality-diversity archive. + + Issue #3222: Replaces the greedy top-K filter so that every evaluated + variant is retained and eligible for weighted-random parent selection. + """ + + variant_id: str + variant: Any # PromptVariant — typed as Any to avoid circular import + score: float + parent_id: Optional[str] + generation: int + valid_parent: bool = True + created_at: float = field(default_factory=time.time) + + def to_dict(self) -> Dict[str, Any]: + return { + "variant_id": self.variant_id, + "variant": self.variant.to_dict(), + "score": self.score, + "parent_id": self.parent_id, + "generation": self.generation, + "valid_parent": self.valid_parent, + "created_at": self.created_at, + } + class ExperimentState(str, enum.Enum): """Lifecycle states for an experiment run.""" diff --git a/autobot-backend/services/autoresearch/prompt_optimizer.py b/autobot-backend/services/autoresearch/prompt_optimizer.py index c92b4ca43..54faccc28 100644 --- a/autobot-backend/services/autoresearch/prompt_optimizer.py +++ b/autobot-backend/services/autoresearch/prompt_optimizer.py @@ -28,7 +28,10 @@ from enum import Enum from typing import Any, Callable, Coroutine, Dict, List, Optional -from .scorers import PromptScorer, ScorerResult +from .archive import Archive +from .config import AutoResearchConfig +from .models import VariantArchiveEntry +from .scorers import PromptScorer logger = logging.getLogger(__name__) @@ -75,6 +78,18 @@ def to_dict(self) -> Dict[str, Any]: "created_at": self.created_at, } + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "PromptVariant": + return cls( + id=data.get("id", ""), + prompt_text=data.get("prompt_text", ""), + output=data.get("output", ""), + scores=data.get("scores", {}), + final_score=data.get("final_score", 0.0), + round_number=data.get("round_number", 0), + created_at=data.get("created_at", 0.0), + ) + @dataclass class OptimizationSession: @@ -91,18 +106,23 @@ class OptimizationSession: started_at: Optional[float] = None completed_at: Optional[float] = None error_message: Optional[str] = None + # Issue #3222: quality-diversity archive (not serialised inline — persisted + # separately under autoresearch:archive:{session_id}) + archive: Optional["Archive"] = field(default=None, repr=False) def to_dict(self) -> Dict[str, Any]: return { "id": self.id, - "target": { - "agent_name": self.target.agent_name, - "scorer_chain": self.target.scorer_chain, - "mutation_count": self.target.mutation_count, - "top_k": self.target.top_k, - } - if self.target - else None, + "target": ( + { + "agent_name": self.target.agent_name, + "scorer_chain": self.target.scorer_chain, + "mutation_count": self.target.mutation_count, + "top_k": self.target.top_k, + } + if self.target + else None + ), "status": self.status.value, "rounds_completed": self.rounds_completed, "max_rounds": self.max_rounds, @@ -138,9 +158,11 @@ def __init__( self, scorers: Dict[str, PromptScorer], llm_service: Any, + config: Optional[AutoResearchConfig] = None, ) -> None: self._scorers = scorers self._llm = llm_service + self._config = config or AutoResearchConfig() self._cancel_event = asyncio.Event() self._current_session: Optional[OptimizationSession] = None self._redis = None @@ -164,11 +186,15 @@ async def optimize( # Capture pre-cancel state before starting (caller may have called cancel()) pre_cancelled = self._cancel_event.is_set() + archive_max_size = getattr(target, "archive_max_size", target.top_k * 10) + archive = Archive(max_size=archive_max_size) + session = OptimizationSession( target=target, status=OptimizationStatus.RUNNING, max_rounds=max_rounds, started_at=time.time(), + archive=archive, ) self._current_session = session @@ -176,6 +202,7 @@ async def optimize( self._cancel_event.clear() current_best_prompt = target.current_prompt + parent_id: Optional[str] = None try: for round_num in range(1, max_rounds + 1): @@ -190,7 +217,7 @@ async def optimize( target.agent_name, ) - round_variants = await self._run_round( + round_variants, failed_ids = await self._run_round( current_best_prompt=current_best_prompt, target=target, benchmark_fn=benchmark_fn, @@ -199,19 +226,18 @@ async def optimize( ) if round_variants: - best_in_round = max(round_variants, key=lambda v: v.final_score) - if best_in_round.final_score > session.baseline_score: - session.best_variant = best_in_round - session.baseline_score = best_in_round.final_score - current_best_prompt = best_in_round.prompt_text - logger.info( - "PromptOptimizer: new best variant %s (score=%.3f)", - best_in_round.id, - best_in_round.final_score, - ) + current_best_prompt, parent_id = self._update_archive( + archive=archive, + round_variants=round_variants, + failed_ids=failed_ids, + parent_id=parent_id, + round_num=round_num, + session=session, + ) session.rounds_completed = round_num await self._save_session(session) + await self._save_archive(session.id, archive) if session.status == OptimizationStatus.RUNNING: session.status = OptimizationStatus.COMPLETED @@ -226,6 +252,51 @@ async def optimize( return session + def _update_archive( + self, + archive: Archive, + round_variants: List[PromptVariant], + failed_ids: set, + parent_id: Optional[str], + round_num: int, + session: OptimizationSession, + ) -> tuple: + """Add round variants to archive, mark failures, select next parent. + + Returns (new_best_prompt, new_parent_id). + """ + for v in round_variants: + archive.add( + VariantArchiveEntry( + variant_id=v.id, + variant=v, + score=v.final_score, + parent_id=parent_id, + generation=round_num, + valid_parent=v.id not in failed_ids, + ) + ) + + best_in_round = max(round_variants, key=lambda v: v.final_score) + if best_in_round.final_score > session.baseline_score: + session.best_variant = best_in_round + session.baseline_score = best_in_round.final_score + logger.info( + "PromptOptimizer: new best variant %s (score=%.3f)", + best_in_round.id, + best_in_round.final_score, + ) + + chosen = archive.select_parent() + if chosen is not None: + logger.debug( + "PromptOptimizer: selected parent %s (score=%.3f)", + chosen.variant_id, + chosen.score, + ) + return chosen.variant.prompt_text, chosen.variant_id + return best_in_round.prompt_text, best_in_round.id + async def _run_round( self, current_best_prompt: str, @@ -233,8 +304,13 @@ async def _run_round( benchmark_fn: BenchmarkFn, round_number: int, session: OptimizationSession, - ) -> List[PromptVariant]: - """Execute a single mutation -> benchmark -> score round.""" + ) -> tuple: + """Execute a single mutation -> benchmark -> score round. + + Returns (variants, failed_ids) where failed_ids is the set of variant + IDs that raised a scorer exception. Caller marks those invalid after + adding all entries to the archive. + """ # 1. Mutate prompt_texts = await self._mutate_prompt( current_best_prompt, target.mutation_count @@ -251,9 +327,35 @@ async def _run_round( ) variants.append(variant) - # 3. Score through the chain - candidates = variants - for scorer_name in target.scorer_chain: + # 3. Score through the chain with staged gating; collect failed IDs + failed_ids = await self._score_through_chain( + variants=variants, + target=target, + session=session, + ) + + session.all_variants.extend(variants) + return variants, failed_ids + + async def _score_through_chain( + self, + variants: List[PromptVariant], + target: PromptOptTarget, + session: OptimizationSession, + ) -> set: + """Run staged scoring chain with threshold gating between tiers. + + Tier-1 uses subset_fraction for cheap evaluation. Variants that do + not clear staged_eval_threshold are finalized at their current score + and excluded from subsequent (more expensive) tiers. + + Returns the set of variant IDs that raised a scorer exception. + """ + candidates = list(variants) + threshold = self._config.staged_eval_threshold + failed_ids: set = set() + + for tier_idx, scorer_name in enumerate(target.scorer_chain): scorer = self._scorers.get(scorer_name) if scorer is None: logger.warning( @@ -261,27 +363,70 @@ async def _run_round( ) continue - for variant in candidates: + subset_frac = self._config.staged_eval_fraction if tier_idx == 0 else None + candidates, tier_failed = await self._score_tier( + scorer=scorer, + scorer_name=scorer_name, + variants=candidates, + session=session, + subset_fraction=subset_frac, + ) + failed_ids.update(tier_failed) + + # Gate: drop variants below threshold before next tier + passed = [v for v in candidates if v.final_score >= threshold] + gated_out = len(candidates) - len(passed) + if gated_out: + logger.info( + "PromptOptimizer: staged gate after %r — %d variant(s) below " + "threshold %.2f (kept %d)", + scorer_name, + gated_out, + threshold, + len(passed), + ) + candidates = passed + + if not candidates: + logger.info( + "PromptOptimizer: no candidates passed gate after %r", scorer_name + ) + break + + return failed_ids + + async def _score_tier( + self, + scorer: PromptScorer, + scorer_name: str, + variants: List[PromptVariant], + session: OptimizationSession, + subset_fraction: Optional[float], + ) -> tuple: + """Score all variants with one scorer and update final_score. + + Returns (variants, failed_ids) where failed_ids contains IDs of + variants that raised a scorer exception. + """ + failed_ids: set = set() + for variant in variants: + try: result = await scorer.score( variant.output, - { - "session_id": session.id, - "variant_id": variant.id, - }, + {"session_id": session.id, "variant_id": variant.id}, + subset_fraction=subset_fraction, ) variant.scores[scorer_name] = result.score - # Final score = average across all scorers so far - variant.final_score = ( - sum(variant.scores.values()) / len(variant.scores) + variant.final_score = sum(variant.scores.values()) / len(variant.scores) + except Exception as exc: + logger.warning( + "PromptOptimizer: scorer %r failed for variant %s: %s", + scorer_name, + variant.id, + exc, ) - - # Keep top-K for next scorer - candidates = sorted( - candidates, key=lambda v: v.final_score, reverse=True - )[: target.top_k] - - session.all_variants.extend(variants) - return candidates + failed_ids.add(variant.id) + return variants, failed_ids async def _mutate_prompt(self, base_prompt: str, n: int) -> List[str]: """Generate N prompt variants using LLM.""" @@ -327,3 +472,31 @@ async def _save_session(self, session: OptimizationSession) -> None: await redis.set(key, json.dumps(session.to_dict()), ex=86400 * 7) except Exception: logger.exception("Failed to save optimization session %s", session.id) + + async def _save_archive(self, session_id: str, archive: "Archive") -> None: + """Persist quality-diversity archive to Redis. + + Key: autoresearch:archive:{session_id} (Issue #3222) + """ + try: + redis = await self._get_redis() + key = f"autoresearch:archive:{session_id}" + await redis.set(key, archive.to_json(), ex=86400 * 7) + except Exception: + logger.exception("Failed to save archive for session %s", session_id) + + async def load_archive(self, session_id: str) -> Optional["Archive"]: + """Restore a previously persisted archive from Redis.""" + try: + redis = await self._get_redis() + key = f"autoresearch:archive:{session_id}" + raw = await redis.get(key) + if raw is None: + return None + return Archive.from_json( + raw if isinstance(raw, str) else raw.decode("utf-8"), + PromptVariant, + ) + except Exception: + logger.exception("Failed to load archive for session %s", session_id) + return None diff --git a/autobot-backend/services/autoresearch/prompt_optimizer_test.py b/autobot-backend/services/autoresearch/prompt_optimizer_test.py index fd652e00f..39ac55eac 100644 --- a/autobot-backend/services/autoresearch/prompt_optimizer_test.py +++ b/autobot-backend/services/autoresearch/prompt_optimizer_test.py @@ -1,18 +1,20 @@ # AutoBot - AI-Powered Automation Platform # Copyright (c) 2025 mrveiss # Author: mrveiss -"""Tests for prompt optimizer — Issue #2600.""" +"""Tests for prompt optimizer and quality-diversity archive — Issue #2600, #3222.""" from __future__ import annotations import json +from unittest.mock import AsyncMock, MagicMock import pytest -from unittest.mock import AsyncMock, MagicMock +from services.autoresearch.archive import Archive +from services.autoresearch.config import AutoResearchConfig +from services.autoresearch.models import VariantArchiveEntry from services.autoresearch.prompt_optimizer import ( OptimizationSession, - OptimizationStatus, PromptOptimizer, PromptOptTarget, PromptVariant, @@ -20,6 +22,41 @@ from services.autoresearch.scorers import ScorerResult +# --------------------------------------------------------------------------- +# Helper factory +# --------------------------------------------------------------------------- + +def _make_variant(vid: str, score: float, round_number: int = 1) -> PromptVariant: + return PromptVariant( + id=vid, + prompt_text=f"prompt_{vid}", + output=f"output_{vid}", + scores={"s": score}, + final_score=score, + round_number=round_number, + ) + + +def _make_entry( + vid: str, + score: float, + valid_parent: bool = True, + generation: int = 1, +) -> VariantArchiveEntry: + return VariantArchiveEntry( + variant_id=vid, + variant=_make_variant(vid, score), + score=score, + parent_id=None, + generation=generation, + valid_parent=valid_parent, + ) + + +# --------------------------------------------------------------------------- +# PromptVariant +# --------------------------------------------------------------------------- + class TestPromptVariantModel: def test_to_dict(self): variant = PromptVariant( @@ -35,6 +72,17 @@ def test_to_dict(self): assert d["scores"] == {"llm_judge": 0.8} assert d["final_score"] == 0.8 + def test_from_dict_round_trip(self): + v = _make_variant("v2", 0.5) + restored = PromptVariant.from_dict(v.to_dict()) + assert restored.id == "v2" + assert restored.final_score == 0.5 + assert restored.prompt_text == "prompt_v2" + + +# --------------------------------------------------------------------------- +# OptimizationSession +# --------------------------------------------------------------------------- class TestOptimizationSession: def test_to_dict(self): @@ -52,11 +100,93 @@ def test_to_dict(self): assert d["rounds_completed"] == 0 +# --------------------------------------------------------------------------- +# Archive unit tests +# --------------------------------------------------------------------------- + +class TestArchive: + def test_add_retains_all_entries(self): + archive = Archive() + for i in range(5): + archive.add(_make_entry(f"v{i}", score=float(i) * 0.1)) + assert archive.size == 5 + + def test_best_returns_highest_score(self): + archive = Archive() + archive.add(_make_entry("low", score=0.1)) + archive.add(_make_entry("high", score=0.9)) + archive.add(_make_entry("mid", score=0.5)) + assert archive.best is not None + assert archive.best.variant_id == "high" + + def test_valid_parents_excludes_invalid(self): + archive = Archive() + archive.add(_make_entry("good", score=0.8, valid_parent=True)) + archive.add(_make_entry("bad", score=0.2, valid_parent=False)) + parents = archive.valid_parents + assert len(parents) == 1 + assert parents[0].variant_id == "good" + + def test_mark_invalid_excludes_entry(self): + archive = Archive() + archive.add(_make_entry("a", score=0.7)) + archive.add(_make_entry("b", score=0.3)) + archive.mark_invalid("a") + parents = archive.valid_parents + assert all(p.variant_id != "a" for p in parents) + + def test_select_parent_returns_valid_entry(self): + archive = Archive() + archive.add(_make_entry("x", score=0.6)) + archive.add(_make_entry("y", score=0.0, valid_parent=False)) + result = archive.select_parent() + assert result is not None + assert result.variant_id == "x" + + def test_select_parent_none_when_all_invalid(self): + archive = Archive() + archive.add(_make_entry("z", score=0.5, valid_parent=False)) + assert archive.select_parent() is None + + def test_select_parent_uniform_when_all_scores_zero(self): + archive = Archive() + for i in range(10): + archive.add(_make_entry(f"v{i}", score=0.0)) + # Should not raise; should return one of the entries + result = archive.select_parent() + assert result is not None + + def test_prune_caps_size(self): + archive = Archive(max_size=3) + for i in range(5): + archive.add(_make_entry(f"v{i}", score=float(i) * 0.1)) + assert archive.size == 3 + # Only the top-3 scoring entries should remain + ids = {e.variant_id for e in archive.valid_parents} + assert "v4" in ids # score 0.4 — top 3 + + def test_serialisation_round_trip(self): + archive = Archive(max_size=10) + archive.add(_make_entry("a", score=0.7)) + archive.add(_make_entry("b", score=0.3, valid_parent=False)) + serialised = archive.to_json() + restored = Archive.from_json(serialised, PromptVariant) + assert restored.size == 2 + assert restored.best is not None + assert restored.best.variant_id == "a" + invalid = [e for e in restored._entries if not e.valid_parent] + assert len(invalid) == 1 + assert invalid[0].variant_id == "b" + + +# --------------------------------------------------------------------------- +# PromptOptimizer integration (archive-aware) +# --------------------------------------------------------------------------- + class TestPromptOptimizerLoop: @pytest.fixture def mock_llm(self): llm = AsyncMock() - # Return 3 variants as JSON array mock_response = MagicMock() mock_response.content = json.dumps(["variant A", "variant B", "variant C"]) llm.chat.return_value = mock_response @@ -103,6 +233,159 @@ async def benchmark_fn(prompt: str) -> str: assert session.best_variant.final_score == 0.8 assert len(session.all_variants) == 3 + @pytest.mark.asyncio + async def test_archive_populated_after_round(self, optimizer, mock_scorer): + """Archive must retain all variants, not just top-K.""" + target = PromptOptTarget( + agent_name="test", + current_prompt="base", + scorer_chain=["test_scorer"], + mutation_count=3, + top_k=1, # old top-K = 1; archive must still hold all 3 + ) + + async def benchmark_fn(prompt: str) -> str: + return f"output for: {prompt}" + + session = await optimizer.optimize(target, benchmark_fn, max_rounds=1) + assert session.archive is not None + assert session.archive.size == 3 # all variants retained + + @pytest.mark.asyncio + async def test_subset_fraction_passed_to_first_scorer(self, mock_llm, mock_scorer): + """First scorer in chain receives staged_eval_fraction; subsequent get None.""" + cheap_scorer = AsyncMock() + cheap_scorer.name = "cheap" + cheap_scorer.score.return_value = ScorerResult( + score=0.9, raw_score=9, metadata={}, scorer_name="cheap" + ) + + expensive_scorer = AsyncMock() + expensive_scorer.name = "expensive" + expensive_scorer.score.return_value = ScorerResult( + score=0.85, raw_score=8, metadata={}, scorer_name="expensive" + ) + + cfg = AutoResearchConfig() + cfg.staged_eval_fraction = 0.25 + cfg.staged_eval_threshold = 0.0 # pass all so both scorers run + + opt = PromptOptimizer( + scorers={"cheap": cheap_scorer, "expensive": expensive_scorer}, + llm_service=mock_llm, + config=cfg, + ) + opt._redis = AsyncMock() + + target = PromptOptTarget( + agent_name="test", + current_prompt="base", + scorer_chain=["cheap", "expensive"], + mutation_count=1, + top_k=1, + ) + + async def benchmark_fn(prompt: str) -> str: + return "output" + + await opt.optimize(target, benchmark_fn, max_rounds=1) + + # cheap scorer must have been called with subset_fraction=0.25 + cheap_scorer.score.assert_awaited() + call_kwargs = cheap_scorer.score.call_args + assert call_kwargs.kwargs.get("subset_fraction") == 0.25 + + # expensive scorer must have been called with subset_fraction=None + expensive_scorer.score.assert_awaited() + exp_kwargs = expensive_scorer.score.call_args + assert exp_kwargs.kwargs.get("subset_fraction") is None + + @pytest.mark.asyncio + async def test_staged_gate_blocks_low_scoring_variants(self, mock_llm): + """Variants below staged_eval_threshold do not reach tier-2 scorer.""" + cheap_scorer = AsyncMock() + cheap_scorer.name = "cheap" + # All 3 variants score below threshold + cheap_scorer.score.return_value = ScorerResult( + score=0.2, raw_score=2, metadata={}, scorer_name="cheap" + ) + + expensive_scorer = AsyncMock() + expensive_scorer.name = "expensive" + + cfg = AutoResearchConfig() + cfg.staged_eval_fraction = 0.3 + cfg.staged_eval_threshold = 0.5 # 0.2 < 0.5 => all blocked + + opt = PromptOptimizer( + scorers={"cheap": cheap_scorer, "expensive": expensive_scorer}, + llm_service=mock_llm, + config=cfg, + ) + opt._redis = AsyncMock() + + target = PromptOptTarget( + agent_name="test", + current_prompt="base", + scorer_chain=["cheap", "expensive"], + mutation_count=3, + top_k=3, + ) + + async def benchmark_fn(prompt: str) -> str: + return "output" + + session = await opt.optimize(target, benchmark_fn, max_rounds=1) + + # Cheap scorer ran for all 3 variants + assert cheap_scorer.score.await_count == 3 + # Expensive scorer never ran + expensive_scorer.score.assert_not_awaited() + assert session.status.value == "completed" + + @pytest.mark.asyncio + async def test_staged_gate_passes_high_scoring_variants(self, mock_llm): + """Variants above threshold advance to tier-2.""" + cheap_scorer = AsyncMock() + cheap_scorer.name = "cheap" + cheap_scorer.score.return_value = ScorerResult( + score=0.8, raw_score=8, metadata={}, scorer_name="cheap" + ) + + expensive_scorer = AsyncMock() + expensive_scorer.name = "expensive" + expensive_scorer.score.return_value = ScorerResult( + score=0.9, raw_score=9, metadata={}, scorer_name="expensive" + ) + + cfg = AutoResearchConfig() + cfg.staged_eval_fraction = 0.3 + cfg.staged_eval_threshold = 0.5 # 0.8 >= 0.5 => all pass + + opt = PromptOptimizer( + scorers={"cheap": cheap_scorer, "expensive": expensive_scorer}, + llm_service=mock_llm, + config=cfg, + ) + opt._redis = AsyncMock() + + target = PromptOptTarget( + agent_name="test", + current_prompt="base", + scorer_chain=["cheap", "expensive"], + mutation_count=3, + top_k=3, + ) + + async def benchmark_fn(prompt: str) -> str: + return "output" + + session = await opt.optimize(target, benchmark_fn, max_rounds=1) + + assert cheap_scorer.score.await_count == 3 + assert expensive_scorer.score.await_count == 3 + assert session.status.value == "completed" + @pytest.mark.asyncio async def test_optimize_cancel(self, optimizer): target = PromptOptTarget( @@ -120,3 +403,39 @@ async def benchmark_fn(prompt: str) -> str: session = await optimizer.optimize(target, benchmark_fn, max_rounds=5) assert session.status.value == "cancelled" assert session.rounds_completed == 0 + + @pytest.mark.asyncio + async def test_scorer_failure_marks_variant_invalid_in_archive( + self, mock_llm + ): + """Variants whose scorer raises must have valid_parent=False in archive.""" + failing_scorer = AsyncMock() + failing_scorer.score.side_effect = RuntimeError("scorer exploded") + opt = PromptOptimizer( + scorers={"fail_scorer": failing_scorer}, + llm_service=mock_llm, + ) + opt._redis = AsyncMock() + + target = PromptOptTarget( + agent_name="test", + current_prompt="base", + scorer_chain=["fail_scorer"], + mutation_count=3, + top_k=1, + ) + + async def benchmark_fn(prompt: str) -> str: + return "output" + + session = await opt.optimize(target, benchmark_fn, max_rounds=1) + + assert session.archive is not None + invalid = [e for e in session.archive._entries if not e.valid_parent] + assert len(invalid) == 3 # all variants failed scoring + + @pytest.mark.asyncio + async def test_load_archive_returns_none_when_missing(self, optimizer): + optimizer._redis.get.return_value = None + result = await optimizer.load_archive("nonexistent-session-id") + assert result is None diff --git a/autobot-backend/services/autoresearch/routes.py b/autobot-backend/services/autoresearch/routes.py index f3dd198f3..92b781cb2 100644 --- a/autobot-backend/services/autoresearch/routes.py +++ b/autobot-backend/services/autoresearch/routes.py @@ -361,7 +361,9 @@ async def submit_variant_score( # Validate key components to prevent Redis key injection if not _UUID_PATTERN.match(session_id) or not _UUID_PATTERN.match(variant_id): - raise HTTPException(status_code=400, detail="Invalid session_id or variant_id format") + raise HTTPException( + status_code=400, detail="Invalid session_id or variant_id format" + ) redis = get_redis_client(async_client=True, database="main") key = f"autoresearch:prompt_review:{session_id}:{variant_id}" @@ -395,13 +397,13 @@ async def list_pending_approvals( key_str = key if isinstance(key, str) else key.decode("utf-8") parts = key_str.split(":") if len(parts) >= 5: - status_key = ( - f"autoresearch:approval:status:{parts[3]}:{parts[4]}" - ) + status_key = f"autoresearch:approval:status:{parts[3]}:{parts[4]}" status = await redis.get(status_key) status_str = ( - status.decode("utf-8") if isinstance(status, bytes) else status - ) if status else "unknown" + (status.decode("utf-8") if isinstance(status, bytes) else status) + if status + else "unknown" + ) if status_str == "pending": data["status"] = "pending" approvals.append(data) diff --git a/autobot-backend/services/autoresearch/run_experiment.py b/autobot-backend/services/autoresearch/run_experiment.py new file mode 100644 index 000000000..e8beaac93 --- /dev/null +++ b/autobot-backend/services/autoresearch/run_experiment.py @@ -0,0 +1,87 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Experiment entrypoint executed inside the Docker container. + +The host mounts: + /experiment (read-only) — contains train.py and this script + /output (writable) — results JSON written here + +Environment variables (set by ExperimentRunner via --env flags): + AUTOBOT_EXP_MAX_STEPS, AUTOBOT_EXP_LEARNING_RATE, AUTOBOT_EXP_BATCH_SIZE, + AUTOBOT_EXP_BLOCK_SIZE, AUTOBOT_EXP_N_LAYER, AUTOBOT_EXP_N_HEAD, + AUTOBOT_EXP_N_EMBD, AUTOBOT_EXP_DROPOUT, AUTOBOT_EXP_WARMUP_STEPS, + AUTOBOT_EXP_WEIGHT_DECAY, plus any AUTOBOT_EXP_EXTRA_* entries. +""" + +from __future__ import annotations + +import json +import logging +import os +import subprocess +import sys + +logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") +logger = logging.getLogger(__name__) + +_ENV_PREFIX = "AUTOBOT_EXP_" +_EXTRA_PREFIX = "AUTOBOT_EXP_EXTRA_" + +_PARAM_MAP = { + "MAX_STEPS": "max_steps", + "LEARNING_RATE": "learning_rate", + "BATCH_SIZE": "batch_size", + "BLOCK_SIZE": "block_size", + "N_LAYER": "n_layer", + "N_HEAD": "n_head", + "N_EMBD": "n_embd", + "DROPOUT": "dropout", + "WARMUP_STEPS": "warmup_steps", + "WEIGHT_DECAY": "weight_decay", +} + + +def _build_train_args() -> list[str]: + """Translate AUTOBOT_EXP_* env vars into train.py CLI flags.""" + args: list[str] = [] + for env_key, flag in _PARAM_MAP.items(): + value = os.environ.get(f"{_ENV_PREFIX}{env_key}") + if value is not None: + args.append(f"--{flag}={value}") + for key, value in os.environ.items(): + if key.startswith(_EXTRA_PREFIX): + flag = key[len(_EXTRA_PREFIX) :].lower() + args.append(f"--{flag}={value}") + return args + + +def main() -> int: + """Run train.py and write a results JSON to /output/result.json.""" + train_script = "/experiment/train.py" + cmd = [sys.executable, train_script] + _build_train_args() + logger.info("Running: %s", " ".join(cmd)) + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + encoding="utf-8", + ) + + output_path = "/output/result.json" + payload = { + "returncode": result.returncode, + "stdout": result.stdout, + "stderr": result.stderr, + } + with open(output_path, "w", encoding="utf-8") as fh: + json.dump(payload, fh) + + logger.info("Result written to %s (rc=%d)", output_path, result.returncode) + return result.returncode + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/autobot-backend/services/autoresearch/runner.py b/autobot-backend/services/autoresearch/runner.py index d2857f069..89c20759d 100644 --- a/autobot-backend/services/autoresearch/runner.py +++ b/autobot-backend/services/autoresearch/runner.py @@ -11,9 +11,12 @@ from __future__ import annotations import asyncio +import json import logging import re +import tempfile import time +from pathlib import Path from typing import Optional from .config import AutoResearchConfig @@ -43,8 +46,22 @@ ) +_ENV_PARAM_MAP = { + "max_steps": "AUTOBOT_EXP_MAX_STEPS", + "learning_rate": "AUTOBOT_EXP_LEARNING_RATE", + "batch_size": "AUTOBOT_EXP_BATCH_SIZE", + "block_size": "AUTOBOT_EXP_BLOCK_SIZE", + "n_layer": "AUTOBOT_EXP_N_LAYER", + "n_head": "AUTOBOT_EXP_N_HEAD", + "n_embd": "AUTOBOT_EXP_N_EMBD", + "dropout": "AUTOBOT_EXP_DROPOUT", + "warmup_steps": "AUTOBOT_EXP_WARMUP_STEPS", + "weight_decay": "AUTOBOT_EXP_WEIGHT_DECAY", +} + + class ExperimentRunner: - """Run autoresearch experiments as isolated subprocesses.""" + """Run autoresearch experiments as isolated subprocesses or Docker containers.""" def __init__( self, @@ -58,6 +75,7 @@ def __init__( self._running: bool = False self._lock = asyncio.Lock() self._current_process: Optional[asyncio.subprocess.Process] = None + self._current_container_name: Optional[str] = None async def run_experiment(self, experiment: Experiment) -> Experiment: """Execute a single experiment and persist results. @@ -113,10 +131,16 @@ async def run_experiment(self, experiment: Experiment) -> Experiment: return experiment async def _execute_training(self, experiment: Experiment) -> ExperimentResult: - """Spawn training subprocess and capture output.""" + """Spawn training subprocess or Docker container and capture output.""" + if self.config.docker_enabled: + return await self._execute_in_docker(experiment) + return await self._execute_subprocess(experiment) + + async def _execute_subprocess(self, experiment: Experiment) -> ExperimentResult: + """Execute training as a bare subprocess (original behaviour).""" cmd = self._build_command(experiment) logger.info( - "Starting training for experiment %s: %s", + "Starting training for experiment %s (subprocess): %s", experiment.id, " ".join(cmd), ) @@ -160,6 +184,148 @@ async def _execute_training(self, experiment: Experiment) -> ExperimentResult: return self.parser.parse(output, wall_time=wall_time) + async def _execute_in_docker(self, experiment: Experiment) -> ExperimentResult: + """Execute training inside an isolated Docker container.""" + self._current_container_name = f"autobot_exp_{experiment.id}" + with tempfile.TemporaryDirectory(prefix="autobot_exp_") as output_dir: + cmd = self._build_docker_command(experiment, Path(output_dir)) + logger.info( + "Starting training for experiment %s (docker): %s", + experiment.id, + " ".join(cmd), + ) + start = time.monotonic() + try: + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.STDOUT, + ) + self._current_process = process + + stdout, _ = await asyncio.wait_for( + process.communicate(), + timeout=self.config.docker_timeout, + ) + except asyncio.TimeoutError: + wall_time = time.monotonic() - start + return await self._handle_docker_timeout(wall_time) + + wall_time = time.monotonic() - start + container_log = stdout.decode("utf-8", errors="replace") if stdout else "" + + if process.returncode != 0: + return ExperimentResult( + error_message=( + f"Docker container exited with code {process.returncode}" + ), + raw_output=container_log, + wall_time_seconds=wall_time, + ) + + return self._parse_docker_output(Path(output_dir), wall_time) + + def _build_docker_command( + self, experiment: Experiment, output_dir: Path + ) -> list[str]: + """Build the docker run command for a containerised experiment.""" + hp = experiment.hyperparams + self._validate_extra_params(hp.extra) + self._validate_mount_path(Path(self.config.autoresearch_dir)) + env_flags = self._build_docker_env_flags(hp) + container_name = self._current_container_name or f"autobot_exp_{experiment.id}" + + cmd = [ + "docker", + "run", + "--rm", + "--name", + container_name, + "--network", + "none", + "--memory", + self.config.docker_memory_limit, + "--cpus", + str(self.config.docker_cpu_limit), + "-v", + f"{self.config.autoresearch_dir}:/experiment:ro", + "-v", + f"{output_dir}:/output", + ] + cmd.extend(env_flags) + cmd.append(self.config.docker_image) + return cmd + + @staticmethod + def _validate_mount_path(path: Path) -> None: + """Reject obviously unsafe mount paths (root or non-absolute).""" + resolved = path.resolve() + if not resolved.is_absolute() or resolved == Path("/"): + raise ValueError(f"autoresearch_dir is unsafe to mount: {path}") + + @staticmethod + def _build_docker_env_flags(hp: object) -> list[str]: + """Return --env flags mapping hyperparams to AUTOBOT_EXP_* variables.""" + flags: list[str] = [] + for attr, env_key in _ENV_PARAM_MAP.items(): + value = getattr(hp, attr) + flags.extend(["--env", f"{env_key}={value}"]) + for extra_key, extra_val in hp.extra.items(): + env_key = f"AUTOBOT_EXP_EXTRA_{extra_key.upper()}" + flags.extend(["--env", f"{env_key}={extra_val}"]) + return flags + + async def _handle_docker_timeout(self, wall_time: float) -> ExperimentResult: + """Kill the container after a timeout and return a timeout result.""" + if self._current_process and self._current_process.returncode is None: + if self._current_container_name: + try: + kill_proc = await asyncio.create_subprocess_exec( + "docker", "kill", self._current_container_name + ) + await kill_proc.wait() + except Exception: + logger.exception( + "Failed to docker kill container %s after timeout", + self._current_container_name, + ) + self._current_process.kill() + await self._current_process.wait() + + return ExperimentResult( + error_message=( + f"Docker experiment timed out after {self.config.docker_timeout}s" + ), + wall_time_seconds=wall_time, + ) + + def _parse_docker_output( + self, output_dir: Path, wall_time: float + ) -> ExperimentResult: + """Read result.json from the container output mount and parse it.""" + result_path = output_dir / "result.json" + if not result_path.exists(): + return ExperimentResult( + error_message="Container produced no result.json", + wall_time_seconds=wall_time, + ) + + with result_path.open(encoding="utf-8") as fh: + payload = json.load(fh) + + stdout_text = payload.get("stdout", "") + returncode = payload.get("returncode", -1) + + if returncode != 0: + stderr_text = payload.get("stderr", "") + return ExperimentResult( + error_message=f"Training exited with code {returncode}", + raw_output=stdout_text + stderr_text, + wall_time_seconds=wall_time, + ) + + return self.parser.parse(stdout_text, wall_time=wall_time) + def _build_command(self, experiment: Experiment) -> list[str]: """Build the subprocess command for a training run.""" hp = experiment.hyperparams diff --git a/autobot-backend/services/autoresearch/runner_test.py b/autobot-backend/services/autoresearch/runner_test.py index 12ee81ddd..4e7d0f307 100644 --- a/autobot-backend/services/autoresearch/runner_test.py +++ b/autobot-backend/services/autoresearch/runner_test.py @@ -626,3 +626,394 @@ def test_reflects_internal_flag(self): runner = _make_runner() runner._running = True assert runner.is_running is True + + +# --------------------------------------------------------------------------- +# Docker isolation tests (issue #3223) +# --------------------------------------------------------------------------- + + +def _make_docker_config(**overrides) -> AutoResearchConfig: + """Build a docker-enabled test config.""" + defaults = { + "default_training_timeout": 10, + "improvement_threshold": 0.01, + "docker_enabled": True, + "docker_image": "ghcr.io/mrveiss/autobot-autoresearch:test", + "docker_memory_limit": "2g", + "docker_cpu_limit": 1.0, + "docker_timeout": 30, + } + defaults.update(overrides) + return AutoResearchConfig(**defaults) + + +class TestDockerCommand: + """Tests for ExperimentRunner._build_docker_command.""" + + def test_docker_run_flags_present(self): + config = _make_docker_config() + runner = _make_runner(config=config) + exp = _make_experiment() + + import tempfile + from pathlib import Path + + with tempfile.TemporaryDirectory() as tmp: + cmd = runner._build_docker_command(exp, Path(tmp)) + + assert cmd[0] == "docker" + assert "run" in cmd + assert "--rm" in cmd + assert "--network" in cmd + assert "none" in cmd + + def test_memory_and_cpu_flags(self): + config = _make_docker_config(docker_memory_limit="3g", docker_cpu_limit=1.5) + runner = _make_runner(config=config) + exp = _make_experiment() + + import tempfile + from pathlib import Path + + with tempfile.TemporaryDirectory() as tmp: + cmd = runner._build_docker_command(exp, Path(tmp)) + + assert "--memory" in cmd + mem_idx = cmd.index("--memory") + assert cmd[mem_idx + 1] == "3g" + + assert "--cpus" in cmd + cpu_idx = cmd.index("--cpus") + assert cmd[cpu_idx + 1] == "1.5" + + def test_image_at_end_of_command(self): + config = _make_docker_config() + runner = _make_runner(config=config) + exp = _make_experiment() + + import tempfile + from pathlib import Path + + with tempfile.TemporaryDirectory() as tmp: + cmd = runner._build_docker_command(exp, Path(tmp)) + + assert cmd[-1] == config.docker_image + + def test_env_flags_contain_hyperparams(self): + config = _make_docker_config() + runner = _make_runner(config=config) + hp = HyperParams(max_steps=1000, learning_rate=0.001) + exp = _make_experiment(hyperparams=hp) + + import tempfile + from pathlib import Path + + with tempfile.TemporaryDirectory() as tmp: + cmd = runner._build_docker_command(exp, Path(tmp)) + + env_values = [cmd[i + 1] for i, v in enumerate(cmd) if v == "--env"] + assert any("AUTOBOT_EXP_MAX_STEPS=1000" in v for v in env_values) + assert any("AUTOBOT_EXP_LEARNING_RATE=0.001" in v for v in env_values) + + def test_extra_params_in_env_flags(self): + config = _make_docker_config() + runner = _make_runner(config=config) + hp = HyperParams(extra={"seed": 42}) + exp = _make_experiment(hyperparams=hp) + + import tempfile + from pathlib import Path + + with tempfile.TemporaryDirectory() as tmp: + cmd = runner._build_docker_command(exp, Path(tmp)) + + env_values = [cmd[i + 1] for i, v in enumerate(cmd) if v == "--env"] + assert any("AUTOBOT_EXP_EXTRA_SEED=42" in v for v in env_values) + + def test_container_name_flag_present(self): + config = _make_docker_config() + runner = _make_runner(config=config) + exp = _make_experiment() + runner._current_container_name = f"autobot_exp_{exp.id}" + + import tempfile + from pathlib import Path + + with tempfile.TemporaryDirectory() as tmp: + cmd = runner._build_docker_command(exp, Path(tmp)) + + assert "--name" in cmd + name_idx = cmd.index("--name") + assert cmd[name_idx + 1] == f"autobot_exp_{exp.id}" + + def test_unsafe_mount_path_raises(self): + from pathlib import Path + + runner = _make_runner() + with pytest.raises(ValueError, match="unsafe"): + runner._validate_mount_path(Path("/")) + + def test_volume_mounts_present(self): + config = _make_docker_config() + runner = _make_runner(config=config) + exp = _make_experiment() + + import tempfile + from pathlib import Path + + with tempfile.TemporaryDirectory() as tmp: + cmd = runner._build_docker_command(exp, Path(tmp)) + + volume_args = [cmd[i + 1] for i, v in enumerate(cmd) if v == "-v"] + assert any("/experiment:ro" in v for v in volume_args) + assert any("/output" in v for v in volume_args) + + +class TestExecuteInDocker: + """Tests for ExperimentRunner._execute_in_docker with mocked subprocess.""" + + @pytest.mark.asyncio + async def test_successful_docker_run_reads_output(self): + import json + import os + import tempfile + + config = _make_docker_config() + expected_result = ExperimentResult(val_bpb=4.8, steps_completed=1000) + parser = _make_parser(result=expected_result) + runner = _make_runner(config=config, parser=parser) + exp = _make_experiment() + + output_payload = json.dumps( + {"returncode": 0, "stdout": "step 1000 val_bpb 4.8\n", "stderr": ""} + ).encode("utf-8") + + mock_process = AsyncMock() + mock_process.communicate = AsyncMock(return_value=(output_payload, None)) + mock_process.returncode = 0 + + # Capture the output directory created by _execute_in_docker so we + # can pre-populate result.json before the method reads it. + original_tmp = tempfile.TemporaryDirectory + + class _CapturingTmpDir: + """Context manager that writes result.json on __enter__.""" + + def __init__(self, *args, **kwargs): + self._real = original_tmp(*args, **kwargs) + + def __enter__(self): + path = self._real.__enter__() + result_file = os.path.join(path, "result.json") + with open(result_file, "w", encoding="utf-8") as fh: + json.dump( + { + "returncode": 0, + "stdout": "step 1000 val_bpb 4.8\n", + "stderr": "", + }, + fh, + ) + return path + + def __exit__(self, *args): + return self._real.__exit__(*args) + + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + with patch( + "services.autoresearch.runner.tempfile.TemporaryDirectory", + _CapturingTmpDir, + ): + result = await runner._execute_in_docker(exp) + + assert result.val_bpb == 4.8 + parser.parse.assert_called_once() + + @pytest.mark.asyncio + async def test_docker_uses_create_subprocess_exec(self): + """docker_enabled=True must call asyncio.create_subprocess_exec with 'docker'.""" + import json + import os + import tempfile + + config = _make_docker_config() + runner = _make_runner(config=config) + exp = _make_experiment() + + original_tmp = tempfile.TemporaryDirectory + + class _PrePopTmpDir: + def __init__(self, *args, **kwargs): + self._real = original_tmp(*args, **kwargs) + + def __enter__(self): + path = self._real.__enter__() + with open( + os.path.join(path, "result.json"), "w", encoding="utf-8" + ) as fh: + json.dump({"returncode": 0, "stdout": "", "stderr": ""}, fh) + return path + + def __exit__(self, *args): + return self._real.__exit__(*args) + + mock_process = AsyncMock() + mock_process.communicate = AsyncMock(return_value=(b"", None)) + mock_process.returncode = 0 + + with patch( + "asyncio.create_subprocess_exec", return_value=mock_process + ) as mock_exec: + with patch( + "services.autoresearch.runner.tempfile.TemporaryDirectory", + _PrePopTmpDir, + ): + await runner._execute_in_docker(exp) + + first_call_args = mock_exec.call_args_list[0][0] + assert first_call_args[0] == "docker" + assert "run" in first_call_args + + @pytest.mark.asyncio + async def test_docker_timeout_returns_error_result(self): + config = _make_docker_config(docker_timeout=1) + runner = _make_runner(config=config) + exp = _make_experiment() + + mock_process = AsyncMock() + mock_process.communicate = AsyncMock(side_effect=asyncio.TimeoutError()) + mock_process.returncode = None + mock_process.kill = MagicMock() + mock_process.wait = AsyncMock() + + # docker kill subprocess mock + mock_kill_process = AsyncMock() + mock_kill_process.returncode = 0 + mock_kill_process.wait = AsyncMock() + + import tempfile + + original_tmp = tempfile.TemporaryDirectory + + class _EmptyTmpDir: + def __init__(self, *args, **kwargs): + self._real = original_tmp(*args, **kwargs) + + def __enter__(self): + return self._real.__enter__() + + def __exit__(self, *args): + return self._real.__exit__(*args) + + call_count = 0 + kill_args_captured = [] + + async def _side_effect(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == 1: + return mock_process + kill_args_captured.extend(args) + return mock_kill_process + + with patch("asyncio.create_subprocess_exec", side_effect=_side_effect): + with patch( + "services.autoresearch.runner.tempfile.TemporaryDirectory", + _EmptyTmpDir, + ): + result = await runner._execute_in_docker(exp) + + assert not result.success + assert "timed out" in result.error_message + # docker kill must target container name, not host PID + assert "docker" in kill_args_captured + assert "kill" in kill_args_captured + container_name = kill_args_captured[kill_args_captured.index("kill") + 1] + assert container_name.startswith("autobot_exp_") + + @pytest.mark.asyncio + async def test_docker_nonzero_exit_returns_error(self): + import json + import os + import tempfile + + config = _make_docker_config() + runner = _make_runner(config=config) + exp = _make_experiment() + + original_tmp = tempfile.TemporaryDirectory + + class _PrePopTmpDir: + def __init__(self, *args, **kwargs): + self._real = original_tmp(*args, **kwargs) + + def __enter__(self): + path = self._real.__enter__() + with open( + os.path.join(path, "result.json"), "w", encoding="utf-8" + ) as fh: + json.dump( + {"returncode": 1, "stdout": "err\n", "stderr": "crash"}, + fh, + ) + return path + + def __exit__(self, *args): + return self._real.__exit__(*args) + + mock_process = AsyncMock() + mock_process.communicate = AsyncMock(return_value=(b"", None)) + mock_process.returncode = 0 + + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + with patch( + "services.autoresearch.runner.tempfile.TemporaryDirectory", + _PrePopTmpDir, + ): + result = await runner._execute_in_docker(exp) + + assert not result.success + assert "exited with code 1" in result.error_message + + @pytest.mark.asyncio + async def test_missing_result_json_returns_error(self): + config = _make_docker_config() + runner = _make_runner(config=config) + exp = _make_experiment() + + mock_process = AsyncMock() + mock_process.communicate = AsyncMock(return_value=(b"", None)) + mock_process.returncode = 0 + + with patch("asyncio.create_subprocess_exec", return_value=mock_process): + result = await runner._execute_in_docker(exp) + + assert not result.success + assert "result.json" in result.error_message + + +class TestDockerFallback: + """Verify docker_enabled=False uses subprocess, not Docker.""" + + @pytest.mark.asyncio + async def test_subprocess_path_when_docker_disabled(self): + config = _make_config(docker_enabled=False) + expected_result = ExperimentResult(val_bpb=5.5, steps_completed=5000) + parser = _make_parser(result=expected_result) + runner = _make_runner(config=config, parser=parser) + exp = _make_experiment() + + mock_process = AsyncMock() + mock_process.communicate = AsyncMock(return_value=(b"step 5000", None)) + mock_process.returncode = 0 + + with patch( + "asyncio.create_subprocess_exec", return_value=mock_process + ) as mock_exec: + result = await runner._execute_training(exp) + + # Must not call docker + first_call_args = mock_exec.call_args_list[0][0] + assert first_call_args[0] != "docker" + assert result.val_bpb == 5.5 diff --git a/autobot-backend/services/autoresearch/scorers.py b/autobot-backend/services/autoresearch/scorers.py index 25f608d64..6314aa6ed 100644 --- a/autobot-backend/services/autoresearch/scorers.py +++ b/autobot-backend/services/autoresearch/scorers.py @@ -18,7 +18,7 @@ import time from abc import ABC, abstractmethod from dataclasses import dataclass, field -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union logger = logging.getLogger(__name__) @@ -56,12 +56,21 @@ def name(self) -> str: """Unique scorer identifier.""" @abstractmethod - async def score(self, prompt_output: str, context: Dict[str, Any]) -> ScorerResult: + async def score( + self, + prompt_output: str, + context: Dict[str, Any], + subset_fraction: Optional[float] = None, + ) -> ScorerResult: """Score a prompt variant's output. Args: prompt_output: The text produced by running the prompt variant. context: Scorer-specific context (hyperparams, criteria, etc.). + subset_fraction: If provided (0 < value <= 1), evaluate on this + fraction of benchmark samples instead of the full set. + Scorers that do not support subset evaluation ignore this + parameter. Pass None (default) for full evaluation. Returns: ScorerResult with normalized score. @@ -85,7 +94,9 @@ def __init__( baseline_val_bpb: float, ) -> None: if baseline_val_bpb <= 0: - raise ValueError(f"baseline_val_bpb must be positive, got {baseline_val_bpb}") + raise ValueError( + f"baseline_val_bpb must be positive, got {baseline_val_bpb}" + ) self._runner = runner self._baseline = baseline_val_bpb @@ -93,7 +104,19 @@ def __init__( def name(self) -> str: return "val_bpb" - async def score(self, prompt_output: str, context: Dict[str, Any]) -> ScorerResult: + async def score( + self, + prompt_output: str, + context: Dict[str, Any], + subset_fraction: Optional[float] = None, + ) -> ScorerResult: + # subset_fraction is informational for this scorer; full experiment + # is always required to get a valid val_bpb reading. + if subset_fraction is not None: + logger.debug( + "ValBpbScorer: subset_fraction=%s ignored — full experiment required", + subset_fraction, + ) hp_data = context.get("hyperparams", {}) hp = HyperParams.from_dict(hp_data) if hp_data else HyperParams() @@ -127,7 +150,9 @@ async def score(self, prompt_output: str, context: Dict[str, Any]) -> ScorerResu # Normalize: improvement as fraction of baseline, clamped 0-1 improvement = self._baseline - val_bpb - normalized = max(0.0, improvement / self._baseline) if self._baseline > 0 else 0.0 + normalized = ( + max(0.0, improvement / self._baseline) if self._baseline > 0 else 0.0 + ) return ScorerResult( score=normalized, @@ -146,7 +171,7 @@ async def score(self, prompt_output: str, context: Dict[str, Any]) -> ScorerResu _JUDGE_SYSTEM_PROMPT = ( "You are a prompt quality evaluator. Rate the following output on a scale " "of 0-10 based on these criteria: {criteria}.\n\n" - "Respond with JSON: {{\"rating\": <0-10>, \"reasoning\": \"\"}}" + 'Respond with JSON: {{"rating": <0-10>, "reasoning": ""}}' ) @@ -169,7 +194,14 @@ def __init__( def name(self) -> str: return "llm_judge" - async def score(self, prompt_output: str, context: Dict[str, Any]) -> ScorerResult: + async def score( + self, + prompt_output: str, + context: Dict[str, Any], + subset_fraction: Optional[float] = None, + ) -> ScorerResult: + # subset_fraction: LLMJudgeScorer evaluates a single output text so + # sub-sampling is not applicable; parameter accepted for interface compat. criteria_str = ", ".join(self._criteria) system_msg = _JUDGE_SYSTEM_PROMPT.format(criteria=criteria_str) @@ -260,7 +292,14 @@ def _validate_key_component(value: str, name: str) -> str: ) return value - async def score(self, prompt_output: str, context: Dict[str, Any]) -> ScorerResult: + async def score( + self, + prompt_output: str, + context: Dict[str, Any], + subset_fraction: Optional[float] = None, + ) -> ScorerResult: + # subset_fraction: HumanReviewScorer queues the variant for a human; + # sub-sampling does not apply — parameter accepted for interface compat. session_id = self._validate_key_component( context.get("session_id", "unknown"), "session_id" ) @@ -276,11 +315,13 @@ async def score(self, prompt_output: str, context: Dict[str, Any]) -> ScorerResu ) await redis.set( pending_key, - json.dumps({ - "prompt_output": prompt_output[:5000], - "session_id": session_id, - "variant_id": variant_id, - }), + json.dumps( + { + "prompt_output": prompt_output[:5000], + "session_id": session_id, + "variant_id": variant_id, + } + ), ex=self._TTL_SECONDS, ) diff --git a/autobot-backend/services/autoresearch/scorers_test.py b/autobot-backend/services/autoresearch/scorers_test.py index 2c3203f75..21dce2f24 100644 --- a/autobot-backend/services/autoresearch/scorers_test.py +++ b/autobot-backend/services/autoresearch/scorers_test.py @@ -6,9 +6,9 @@ from __future__ import annotations import json +from unittest.mock import AsyncMock, MagicMock import pytest -from unittest.mock import AsyncMock, MagicMock from services.autoresearch.models import Experiment, ExperimentResult, ExperimentState from services.autoresearch.scorers import ( @@ -133,6 +133,48 @@ async def test_score_handles_llm_failure(self, scorer, mock_llm): assert "error" in result.metadata +class TestSubsetFractionPassthrough: + """Verify subset_fraction=None is a no-op for all concrete scorers.""" + + @pytest.mark.asyncio + async def test_llm_judge_accepts_subset_fraction_none(self): + llm = AsyncMock() + mock_response = MagicMock() + mock_response.content = '{"rating": 7, "reasoning": "ok"}' + llm.chat.return_value = mock_response + + scorer = LLMJudgeScorer(llm_service=llm, criteria=["quality"]) + result = await scorer.score("output text", {}, subset_fraction=None) + assert result.score == 0.7 + + @pytest.mark.asyncio + async def test_llm_judge_accepts_subset_fraction_value(self): + llm = AsyncMock() + mock_response = MagicMock() + mock_response.content = '{"rating": 6, "reasoning": "ok"}' + llm.chat.return_value = mock_response + + scorer = LLMJudgeScorer(llm_service=llm, criteria=["quality"]) + # subset_fraction is accepted and ignored for LLMJudgeScorer + result = await scorer.score("output text", {}, subset_fraction=0.3) + assert result.score == 0.6 + + @pytest.mark.asyncio + async def test_val_bpb_accepts_subset_fraction(self): + runner = AsyncMock() + experiment = MagicMock() + experiment.result = MagicMock() + experiment.result.val_bpb = 4.0 + experiment.state = MagicMock() + experiment.state.value = "kept" + runner.run_experiment.return_value = experiment + + scorer = ValBpbScorer(runner=runner, baseline_val_bpb=5.0) + result = await scorer.score("hypothesis", {}, subset_fraction=0.3) + # full experiment still runs; subset_fraction is logged and ignored + assert result.score > 0.0 + + class TestHumanReviewScorer: @pytest.fixture def mock_redis(self): diff --git a/autobot-backend/services/autoresearch/store.py b/autobot-backend/services/autoresearch/store.py index 47ce6f51c..e53997100 100644 --- a/autobot-backend/services/autoresearch/store.py +++ b/autobot-backend/services/autoresearch/store.py @@ -136,7 +136,9 @@ def _build_document(self, experiment: Experiment) -> str: ] # Include hyperparams for richer search hp_dict = experiment.hyperparams.to_dict() - parts.append(f"Hyperparams: {', '.join(f'{k}={v}' for k, v in hp_dict.items())}") + parts.append( + f"Hyperparams: {', '.join(f'{k}={v}' for k, v in hp_dict.items())}" + ) if experiment.result: parts.append(f"val_bpb: {experiment.result.val_bpb}") diff --git a/autobot-backend/services/captcha_human_loop.py b/autobot-backend/services/captcha_human_loop.py index 251c41f89..edbe2b15c 100644 --- a/autobot-backend/services/captcha_human_loop.py +++ b/autobot-backend/services/captcha_human_loop.py @@ -464,7 +464,9 @@ async def request_human_intervention( except Exception as e: logger.error("Error requesting CAPTCHA intervention: %s", e) - return self._build_error_result(captcha_id, url, start_time, "CAPTCHA intervention request failed") + return self._build_error_result( + captcha_id, url, start_time, "CAPTCHA intervention request failed" + ) finally: self._cleanup_captcha_tracking(captcha_id) diff --git a/autobot-backend/services/conversation_export.py b/autobot-backend/services/conversation_export.py index 3a2dbd1b9..9b175ee1f 100644 --- a/autobot-backend/services/conversation_export.py +++ b/autobot-backend/services/conversation_export.py @@ -62,9 +62,7 @@ def _render_session_metadata_markdown( # --------------------------------------------------------------------------- -def _build_json_envelope( - session_id: str, chat_data: Dict[str, Any] -) -> Dict[str, Any]: +def _build_json_envelope(session_id: str, chat_data: Dict[str, Any]) -> Dict[str, Any]: """Wrap raw session data in the versioned AutoBot JSON export envelope.""" return { "format": AUTOBOT_EXPORT_FORMAT, @@ -150,9 +148,7 @@ async def export_all_conversations_json(chat_history_manager) -> Optional[str]: session_id = session_info.get("chatId") or session_info.get("id", "") if not session_id: continue - chat_data = await _load_full_session_data( - chat_history_manager, session_id - ) + chat_data = await _load_full_session_data(chat_history_manager, session_id) if chat_data is not None: envelopes.append(_build_json_envelope(session_id, chat_data)) archive = _build_bulk_envelope(envelopes) diff --git a/autobot-backend/services/fact_extraction_service.py b/autobot-backend/services/fact_extraction_service.py index e051bf05c..1bdd20a88 100644 --- a/autobot-backend/services/fact_extraction_service.py +++ b/autobot-backend/services/fact_extraction_service.py @@ -339,7 +339,9 @@ async def extract_facts_from_chunks( except Exception as e: logger.error("Error processing chunks for fact extraction: %s", e) - return self._build_chunks_error_response("Chunk fact extraction failed", len(chunks)) + return self._build_chunks_error_response( + "Chunk fact extraction failed", len(chunks) + ) async def _deduplicate_facts(self, facts: List[AtomicFact]) -> List[AtomicFact]: """ diff --git a/autobot-backend/services/feature_flags.py b/autobot-backend/services/feature_flags.py index a7c9bb81f..a7c00f5ed 100644 --- a/autobot-backend/services/feature_flags.py +++ b/autobot-backend/services/feature_flags.py @@ -324,7 +324,10 @@ async def get_rollout_statistics(self) -> Metadata: except Exception as e: logger.error("Failed to get rollout statistics: %s", e) - return {"error": "Failed to retrieve rollout statistics", "current_mode": "unknown"} + return { + "error": "Failed to retrieve rollout statistics", + "current_mode": "unknown", + } async def clear_all_flags(self) -> bool: """ diff --git a/autobot-backend/services/knowledge/doc_indexer.py b/autobot-backend/services/knowledge/doc_indexer.py index b33ed4d2b..5bf43163a 100644 --- a/autobot-backend/services/knowledge/doc_indexer.py +++ b/autobot-backend/services/knowledge/doc_indexer.py @@ -36,11 +36,11 @@ "CLAUDE.md", "docs/system-state.md", "docs/api/COMPREHENSIVE_API_DOCUMENTATION.md", - "docs/architecture/PHASE_5_DISTRIBUTED_ARCHITECTURE.md", + "docs/architecture/DISTRIBUTED_ARCHITECTURE.md", "docs/architecture/README.md", "docs/architecture/data-flows.md", "docs/architecture/redis-schema.md", - "docs/developer/PHASE_5_DEVELOPER_SETUP.md", + "docs/developer/DEVELOPER_SETUP.md", "docs/troubleshooting/COMPREHENSIVE_TROUBLESHOOTING_GUIDE.md", "docs/features/MULTIMODAL_AI_INTEGRATION.md", "docs/GLOSSARY.md", @@ -68,7 +68,6 @@ TIER_3_DIRS_EXTRA = [ "docs/user-guide", "docs/developer", - "docs/plans", ] EXCLUDE_PATTERNS = [ @@ -80,6 +79,7 @@ r".*/archives/.*", r".*/reports/finished/.*", r".*/changelog/.*", + r".*/_index\.md$", ] # ============================================================================ @@ -205,6 +205,44 @@ def _extract_tags(content: str, file_path: str) -> List[str]: return list(tags)[:15] +def _parse_frontmatter(content: str) -> Tuple[str, List[str], List[str]]: + """Strip Obsidian YAML frontmatter and return (body, tags, aliases). + + Recognises a leading ``---`` block. Returns the content after the closing + ``---`` together with any ``tags`` and ``aliases`` values found inside it. + If no frontmatter is present the original content is returned unchanged + with empty tag and alias lists. + """ + if not content.startswith("---"): + return content, [], [] + + end = content.find("\n---", 3) + if end == -1: + return content, [], [] + + fm_block = content[3:end] + body = content[end + 4 :].lstrip("\n") + + fm_tags: List[str] = [] + fm_aliases: List[str] = [] + + # Parse simple YAML list values for 'tags' and 'aliases' keys. + current_key: Optional[str] = None + for line in fm_block.splitlines(): + key_match = re.match(r"^(\w+)\s*:", line) + if key_match: + current_key = key_match.group(1) + inline = line[key_match.end() :].strip() + if inline and current_key in ("tags", "aliases"): + target = fm_tags if current_key == "tags" else fm_aliases + target.append(inline.lstrip("- ").strip()) + elif line.strip().startswith("-") and current_key in ("tags", "aliases"): + target = fm_tags if current_key == "tags" else fm_aliases + target.append(line.strip().lstrip("- ").strip()) + + return body, fm_tags, fm_aliases + + def _estimate_tokens(text: str) -> int: """Estimate token count (~4 chars per token).""" return len(text) // 4 @@ -693,11 +731,13 @@ async def _index_file_chunks( """ import asyncio - chunks = _chunk_markdown(content, file_str) + body, fm_tags, fm_aliases = _parse_frontmatter(content) + chunks = _chunk_markdown(body, file_str) if not chunks: return 0, 0 - file_tags = _extract_tags(content, file_str) + file_tags = _extract_tags(body, file_str) + file_tags = list(dict.fromkeys(fm_tags + fm_aliases + file_tags))[:20] indexed = 0 for i, chunk in enumerate(chunks): ok = await asyncio.to_thread( @@ -773,12 +813,14 @@ async def _index_single_file_content( result.skipped += 1 return - chunks = _chunk_markdown(content, file_path) + body, fm_tags, fm_aliases = _parse_frontmatter(content) + chunks = _chunk_markdown(body, file_path) if not chunks: result.skipped += 1 return - file_tags = _extract_tags(content, file_path) + file_tags = _extract_tags(body, file_path) + file_tags = list(dict.fromkeys(fm_tags + fm_aliases + file_tags))[:20] indexed = 0 for i, chunk in enumerate(chunks): ok = await asyncio.to_thread( diff --git a/autobot-backend/services/llm_cost_tracker.py b/autobot-backend/services/llm_cost_tracker.py index 4238e8c56..fccbc02b5 100644 --- a/autobot-backend/services/llm_cost_tracker.py +++ b/autobot-backend/services/llm_cost_tracker.py @@ -858,7 +858,10 @@ async def get_cost_by_session(self, session_id: str) -> Dict[str, Any]: except Exception as e: logger.error("Failed to get session cost: %s", e) - return {"session_id": session_id, "error": "Failed to retrieve session cost"} + return { + "session_id": session_id, + "error": "Failed to retrieve session cost", + } async def get_cost_trends(self, days: int = 30) -> Dict[str, Any]: """ diff --git a/autobot-backend/services/llm_service.py b/autobot-backend/services/llm_service.py index 46930d849..12e5ecd67 100644 --- a/autobot-backend/services/llm_service.py +++ b/autobot-backend/services/llm_service.py @@ -81,7 +81,9 @@ def _apply_task_defaults( """ defaults = _TASK_TYPE_DEFAULTS.get(llm_type.value, _TASK_TYPE_DEFAULTS["general"]) resolved_temp = temperature if temperature is not None else defaults["temperature"] - resolved_tokens = max_tokens if max_tokens is not None else defaults.get("max_tokens") + resolved_tokens = ( + max_tokens if max_tokens is not None else defaults.get("max_tokens") + ) return resolved_temp, resolved_tokens @@ -182,9 +184,7 @@ async def chat( if provider is None: self._error_count += 1 logger.error("No available provider for chat request") - return _build_error_response( - request, "No available LLM provider", "none" - ) + return _build_error_response(request, "No available LLM provider", "none") response = await provider.chat_completion(request) if response.error: @@ -275,9 +275,7 @@ async def list_models( import asyncio as _asyncio target_names = ( - [provider_name] - if provider_name - else list(self._registry._providers.keys()) + [provider_name] if provider_name else list(self._registry._providers.keys()) ) results: Dict[str, List[str]] = {} for name in target_names: diff --git a/autobot-backend/services/mesh_brain/scheduler_test.py b/autobot-backend/services/mesh_brain/scheduler_test.py index d0d437f55..323085872 100644 --- a/autobot-backend/services/mesh_brain/scheduler_test.py +++ b/autobot-backend/services/mesh_brain/scheduler_test.py @@ -1,7 +1,7 @@ # AutoBot - AI-Powered Automation Platform # Copyright (c) 2025 mrveiss # Author: mrveiss -"""Unit tests for MeshBrainScheduler and mesh_brain API endpoints (#1994, #2120).""" +"""Unit tests for MeshBrainScheduler (#1994, #2120).""" import asyncio from unittest.mock import AsyncMock, MagicMock, patch @@ -225,84 +225,6 @@ async def test_stop_cancels_tasks(self): assert scheduler._running is False -# ============================================================================= -# API endpoint helpers (mesh_brain.py) -# ============================================================================= - - -class TestMeshBrainApiHelpers: - def test_health_endpoint_returns_healthy_when_no_failed_jobs(self): - """_build_health_response returns healthy=True when no jobs have failed.""" - from api.mesh_brain import _build_health_response - - status = { - "running": True, - "jobs": { - "edge_sync": {"last_result": "success"}, - "node_promoter": {"last_result": None}, - }, - } - result = _build_health_response(status) - assert result["healthy"] is True - assert result["failed_jobs"] == [] - assert result["running"] is True - - def test_health_endpoint_returns_unhealthy_when_job_failed(self): - """_build_health_response returns healthy=False and lists the failed job.""" - from api.mesh_brain import _build_health_response - - status = { - "running": True, - "jobs": { - "edge_sync": {"last_result": "failed"}, - "node_promoter": {"last_result": "success"}, - }, - } - result = _build_health_response(status) - assert result["healthy"] is False - assert "edge_sync" in result["failed_jobs"] - - def test_set_scheduler_registers_instance(self): - """set_scheduler() stores the scheduler so get_status endpoint can use it.""" - import api.mesh_brain as mb - - original = mb._scheduler - try: - mock_scheduler = MagicMock() - mb.set_scheduler(mock_scheduler) - assert mb._scheduler is mock_scheduler - finally: - mb._scheduler = original - - @pytest.mark.asyncio - async def test_status_endpoint_returns_not_initialized_message(self): - """get_mesh_brain_status returns a safe dict when _scheduler is None.""" - import api.mesh_brain as mb - - original = mb._scheduler - try: - mb._scheduler = None - result = await mb.get_mesh_brain_status() - assert result["running"] is False - assert "message" in result - finally: - mb._scheduler = original - - @pytest.mark.asyncio - async def test_health_endpoint_returns_not_initialized(self): - """get_mesh_brain_health returns healthy=False with reason when _scheduler is None.""" - import api.mesh_brain as mb - - original = mb._scheduler - try: - mb._scheduler = None - result = await mb.get_mesh_brain_health() - assert result["healthy"] is False - assert result["reason"] == "not_initialized" - finally: - mb._scheduler = original - - # ============================================================================= # Internal helpers # ============================================================================= diff --git a/autobot-backend/services/notification_service.py b/autobot-backend/services/notification_service.py index c42b6cdea..9aa478ef7 100644 --- a/autobot-backend/services/notification_service.py +++ b/autobot-backend/services/notification_service.py @@ -70,6 +70,7 @@ class NotificationEvent(str, Enum): WORKFLOW_FAILED = "workflow_failed" STEP_FAILED = "step_failed" APPROVAL_NEEDED = "approval_needed" + SERVICE_FAILED = "service_failed" # --------------------------------------------------------------------------- @@ -119,6 +120,9 @@ class NotificationConfig: NotificationEvent.APPROVAL_NEEDED: ( "Workflow '$workflow_id' is waiting for approval at step '$step_name'." ), + NotificationEvent.SERVICE_FAILED: ( + "Service '$service' on '$hostname' transitioned $prev_state -> $new_state. $error_context" + ), } diff --git a/autobot-backend/services/playwright_service.py b/autobot-backend/services/playwright_service.py index f7aee1643..a42f20dbb 100644 --- a/autobot-backend/services/playwright_service.py +++ b/autobot-backend/services/playwright_service.py @@ -198,7 +198,12 @@ async def search_web( } except Exception as e: logger.error("Web search error: %s", e) - return {"success": False, "error": "Web search failed", "query": query, "results": []} + return { + "success": False, + "error": "Web search failed", + "query": query, + "results": [], + } async def test_frontend( self, frontend_url: str = ServiceURLs.FRONTEND_LOCAL @@ -317,7 +322,12 @@ async def send_test_message( } except Exception as e: logger.error("Test message error: %s", e) - return {"success": False, "error": "Test message failed", "message": message, "steps": []} + return { + "success": False, + "error": "Test message failed", + "message": message, + "steps": [], + } async def capture_screenshot( self, url: str, full_page: bool = True, wait_timeout: int = 5000 diff --git a/autobot-backend/services/redis_service_manager.py b/autobot-backend/services/redis_service_manager.py index 8cd8f566d..faaf0d5ca 100644 --- a/autobot-backend/services/redis_service_manager.py +++ b/autobot-backend/services/redis_service_manager.py @@ -364,9 +364,13 @@ async def start_service(self, user_id: str = "system") -> ServiceOperationResult logger.error("Start service failed: %s", e) await self._record_error() self._audit_log( - "redis_service_start_failed", {"error": type(e).__name__}, user_id=user_id + "redis_service_start_failed", + {"error": type(e).__name__}, + user_id=user_id, + ) + return self._operation_failure_result( + "start", "Service start failed", duration ) - return self._operation_failure_result("start", "Service start failed", duration) async def stop_service(self, user_id: str = "system") -> ServiceOperationResult: """ @@ -428,9 +432,13 @@ async def stop_service(self, user_id: str = "system") -> ServiceOperationResult: logger.error("Stop service failed: %s", e) await self._record_error() self._audit_log( - "redis_service_stop_failed", {"error": type(e).__name__}, user_id=user_id + "redis_service_stop_failed", + {"error": type(e).__name__}, + user_id=user_id, + ) + return self._operation_failure_result( + "stop", "Service stop failed", duration ) - return self._operation_failure_result("stop", "Service stop failed", duration) async def restart_service(self, user_id: str = "system") -> ServiceOperationResult: """ @@ -483,9 +491,13 @@ async def restart_service(self, user_id: str = "system") -> ServiceOperationResu logger.error("Restart service failed: %s", e) await self._record_error() self._audit_log( - "redis_service_restart_failed", {"error": type(e).__name__}, user_id=user_id + "redis_service_restart_failed", + {"error": type(e).__name__}, + user_id=user_id, + ) + return self._operation_failure_result( + "restart", "Service restart failed", duration ) - return self._operation_failure_result("restart", "Service restart failed", duration) async def get_service_status(self, use_cache: bool = True) -> ServiceStatus: """ diff --git a/autobot-backend/services/semantic_analyzer.py b/autobot-backend/services/semantic_analyzer.py index 793c289ba..8cb3ba7d2 100644 --- a/autobot-backend/services/semantic_analyzer.py +++ b/autobot-backend/services/semantic_analyzer.py @@ -54,7 +54,9 @@ class SemanticAnalyzer: "docstring": re.compile(r'"""[\s\S]{0,5000}Args:[\s\S]{0,5000}Returns:'), }, "numpy": { - "docstring": re.compile(r'"""[\s\S]{0,5000}Parameters[\s\S]{0,5000}----------'), + "docstring": re.compile( + r'"""[\s\S]{0,5000}Parameters[\s\S]{0,5000}----------' + ), }, } diff --git a/autobot-backend/services/slm/__init__.py b/autobot-backend/services/slm/__init__.py new file mode 100644 index 000000000..1587cbc3f --- /dev/null +++ b/autobot-backend/services/slm/__init__.py @@ -0,0 +1,4 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +"""SLM service package for autobot-backend.""" diff --git a/autobot-backend/services/slm/deployment_orchestrator.py b/autobot-backend/services/slm/deployment_orchestrator.py new file mode 100644 index 000000000..3bf07f62d --- /dev/null +++ b/autobot-backend/services/slm/deployment_orchestrator.py @@ -0,0 +1,311 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +SLM Deployment Orchestrator + +Bridges autobot-backend to the SLM deployment API so that Docker containers +can be deployed via Ansible playbooks without callers needing to know the SLM +request shape. Also exposes a richer in-process DeploymentOrchestrator for +multi-role, multi-node rollouts that tracks steps locally before forwarding +the underlying playbook call to the SLM. + +Related to Issue #3407. +""" + +from __future__ import annotations + +import enum +import logging +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Optional + +from models.infrastructure import ( + DeploymentStrategy, + DockerContainerSpec, + DockerDeploymentRequest, + DockerDeploymentStatus, +) + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Enums +# --------------------------------------------------------------------------- + + +class DeploymentStatus(str, enum.Enum): + """Lifecycle states for an in-process deployment.""" + + QUEUED = "queued" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + CANCELLED = "cancelled" + ROLLED_BACK = "rolled_back" + + +class DeploymentStepType(str, enum.Enum): + """Types of steps within a deployment.""" + + DRAIN = "drain" + DEPLOY = "deploy" + HEALTH_CHECK = "health_check" + ROLLBACK = "rollback" + + +# --------------------------------------------------------------------------- +# Data classes +# --------------------------------------------------------------------------- + + +@dataclass +class DeploymentStep: + """A single step within a deployment context.""" + + step_type: DeploymentStepType + node_id: str + node_name: str + description: str + started_at: Optional[datetime] = None + completed_at: Optional[datetime] = None + success: Optional[bool] = None + error: Optional[str] = None + + +@dataclass +class DeploymentContext: + """Tracks a multi-node, multi-step deployment in memory.""" + + deployment_id: str + strategy: DeploymentStrategy + role_name: str + target_nodes: list[str] + playbook_path: Optional[str] = None + status: DeploymentStatus = DeploymentStatus.QUEUED + steps: list[DeploymentStep] = field(default_factory=list) + started_at: Optional[datetime] = None + completed_at: Optional[datetime] = None + error: Optional[str] = None + + +# --------------------------------------------------------------------------- +# SLMDeploymentOrchestrator — thin SLM HTTP bridge for Docker deployments +# --------------------------------------------------------------------------- + + +class SLMDeploymentOrchestrator: + """ + Calls the SLM backend to trigger and query Docker deployments. + + This class handles the translation between autobot-backend's + DockerDeploymentRequest model and the SLM's POST /deployments payload. + It does not maintain in-process state; all state lives in the SLM. + """ + + def __init__(self, slm_client: Any) -> None: + self._client = slm_client + + def _build_extra_vars(self, containers: list[DockerContainerSpec]) -> dict: + """Build Ansible extra_vars dict from container specs.""" + return { + "docker_containers": [ + { + "name": c.name, + "image": f"{c.image}:{c.tag}", + "ports": [ + f"{p.host_port}:{p.container_port}/{p.protocol}" + for p in c.ports + ], + "environment": c.environment, + "restart_policy": c.restart_policy, + } + for c in containers + ] + } + + async def deploy_docker( + self, request: DockerDeploymentRequest + ) -> DockerDeploymentStatus: + """ + Trigger a Docker deployment on the target node via the SLM. + + Translates DockerDeploymentRequest into the SLM POST /deployments body + and returns a DockerDeploymentStatus built from the SLM response. + """ + extra_vars = self._build_extra_vars(request.containers) + payload = { + "node_id": request.node_id, + "roles": ["docker"], + "extra_data": { + "playbook": request.playbook, + "extra_vars": extra_vars, + }, + } + logger.info("Triggering Docker deployment on node %s via SLM", request.node_id) + response = await self._client.create_deployment(payload) + return self._map_response(response) + + async def get_deployment(self, deployment_id: str) -> DockerDeploymentStatus: + """Fetch the status of a single deployment from the SLM.""" + response = await self._client.get_deployment(deployment_id) + return self._map_response(response) + + async def list_deployments( + self, node_id: Optional[str] = None + ) -> list[DockerDeploymentStatus]: + """List deployments, optionally filtered by node_id.""" + response = await self._client.list_deployments(node_id=node_id) + deployments = response.get("deployments", []) + return [self._map_response(d) for d in deployments] + + def _map_response(self, data: dict) -> DockerDeploymentStatus: + """Map a raw SLM response dict to a DockerDeploymentStatus.""" + return DockerDeploymentStatus( + deployment_id=data.get("deployment_id", ""), + node_id=data.get("node_id", ""), + status=data.get("status", "unknown"), + started_at=data.get("started_at"), + completed_at=data.get("completed_at"), + error=data.get("error"), + ) + + +# --------------------------------------------------------------------------- +# DeploymentOrchestrator — in-process multi-step orchestrator +# --------------------------------------------------------------------------- + + +class DeploymentOrchestrator: + """ + In-process orchestrator that tracks multi-role, multi-node deployments. + + Maintains an active_deployments list so that the API layer can query and + act on in-flight deployments without a database round-trip. Actual + playbook execution is delegated to the SLM via the slm_client. + """ + + def __init__(self, slm_client: Any) -> None: + self._client = slm_client + self.active_deployments: list[DeploymentContext] = [] + + async def create_deployment( + self, + role_name: str, + target_nodes: list[str], + strategy: DeploymentStrategy = DeploymentStrategy.SEQUENTIAL, + playbook_path: Optional[str] = None, + ) -> DeploymentContext: + """Create and queue a new deployment context.""" + ctx = DeploymentContext( + deployment_id=str(uuid.uuid4()), + strategy=strategy, + role_name=role_name, + target_nodes=target_nodes, + playbook_path=playbook_path, + status=DeploymentStatus.QUEUED, + ) + self.active_deployments.append(ctx) + logger.info( + "Deployment queued: %s for role=%s nodes=%s", + ctx.deployment_id, + role_name, + target_nodes, + ) + return ctx + + def get_deployment(self, deployment_id: str) -> Optional[DeploymentContext]: + """Return the DeploymentContext for the given id, or None.""" + for ctx in self.active_deployments: + if ctx.deployment_id == deployment_id: + return ctx + return None + + async def execute_deployment(self, deployment_id: str) -> bool: + """Execute a QUEUED deployment by forwarding each node to the SLM. + + Transitions the context through RUNNING → COMPLETED/FAILED. + Returns False if the deployment is not found or not QUEUED. + """ + ctx = self.get_deployment(deployment_id) + if ctx is None or ctx.status != DeploymentStatus.QUEUED: + return False + ctx.status = DeploymentStatus.RUNNING + try: + for node_id in ctx.target_nodes: + extra: dict = {} + if ctx.playbook_path: + extra["playbook"] = ctx.playbook_path + await self._client.create_deployment( + node_id=node_id, roles=[ctx.role_name], extra_data=extra + ) + ctx.status = DeploymentStatus.COMPLETED + logger.info("Deployment completed: %s", deployment_id) + except Exception as exc: + ctx.status = DeploymentStatus.FAILED + logger.error("Deployment %s failed: %s", deployment_id, exc) + return True + + async def cancel_deployment(self, deployment_id: str) -> bool: + """ + Cancel a deployment. + + Returns True if the deployment was found and cancelled; False otherwise. + """ + ctx = self.get_deployment(deployment_id) + if ctx is None: + return False + if ctx.status not in (DeploymentStatus.QUEUED, DeploymentStatus.RUNNING): + return False + ctx.status = DeploymentStatus.CANCELLED + logger.info("Deployment cancelled: %s", deployment_id) + return True + + async def trigger_rollback(self, deployment_id: str) -> bool: + """ + Trigger a rollback for the given deployment. + + Returns True if a rollback step was queued; False if there is nothing + to roll back (e.g. no nodes have been deployed yet). + """ + ctx = self.get_deployment(deployment_id) + if ctx is None: + return False + deployed_nodes = [s.node_id for s in ctx.steps if s.success] + if not deployed_nodes: + return False + for node_id in deployed_nodes: + ctx.steps.append( + DeploymentStep( + step_type=DeploymentStepType.ROLLBACK, + node_id=node_id, + node_name=node_id, + description=f"Rolling back {node_id}", + ) + ) + ctx.status = DeploymentStatus.ROLLED_BACK + logger.info("Rollback triggered for deployment %s", deployment_id) + return True + + +# --------------------------------------------------------------------------- +# Module-level singleton +# --------------------------------------------------------------------------- + +_orchestrator: Optional[DeploymentOrchestrator] = None + + +def get_orchestrator() -> Optional[DeploymentOrchestrator]: + """Return the module-level DeploymentOrchestrator singleton, or None.""" + return _orchestrator + + +def init_orchestrator(slm_client: Any) -> DeploymentOrchestrator: + """Initialize the module-level orchestrator singleton.""" + global _orchestrator + _orchestrator = DeploymentOrchestrator(slm_client=slm_client) + logger.info("DeploymentOrchestrator initialised") + return _orchestrator diff --git a/autobot-backend/services/temporal_invalidation_service.py b/autobot-backend/services/temporal_invalidation_service.py index 43af20077..52a99b9b0 100644 --- a/autobot-backend/services/temporal_invalidation_service.py +++ b/autobot-backend/services/temporal_invalidation_service.py @@ -258,7 +258,10 @@ async def initialize_rules(self) -> Dict[str, Any]: except Exception as e: logger.error("Error initializing invalidation rules: %s", e) - return {"status": "error", "message": "Temporal invalidation operation failed"} + return { + "status": "error", + "message": "Temporal invalidation operation failed", + } async def _load_invalidation_rules(self) -> Dict[str, InvalidationRule]: """Load invalidation rules from Redis.""" @@ -638,7 +641,9 @@ async def run_invalidation_sweep( except Exception as e: logger.error("Error in invalidation sweep: %s", e) processing_time = (datetime.now() - start_time).total_seconds() - return self._build_sweep_error_response("Invalidation sweep failed", processing_time) + return self._build_sweep_error_response( + "Invalidation sweep failed", processing_time + ) def _prepare_fact_for_invalidation( self, pipe, fact: AtomicFact, reasons: Dict[str, Dict[str, Any]] @@ -869,7 +874,10 @@ async def invalidate_contradictory_facts( except Exception as e: logger.error("Error in contradiction invalidation: %s", e) - return {"status": "error", "message": "Temporal invalidation operation failed"} + return { + "status": "error", + "message": "Temporal invalidation operation failed", + } def _aggregate_history_statistics( self, recent_history: List[str] @@ -969,7 +977,10 @@ async def add_invalidation_rule(self, rule: InvalidationRule) -> Dict[str, Any]: except Exception as e: logger.error("Error adding invalidation rule: %s", e) - return {"status": "error", "message": "Temporal invalidation operation failed"} + return { + "status": "error", + "message": "Temporal invalidation operation failed", + } async def remove_invalidation_rule(self, rule_id: str) -> Dict[str, Any]: """Remove an invalidation rule.""" @@ -985,7 +996,10 @@ async def remove_invalidation_rule(self, rule_id: str) -> Dict[str, Any]: except Exception as e: logger.error("Error removing invalidation rule: %s", e) - return {"status": "error", "message": "Temporal invalidation operation failed"} + return { + "status": "error", + "message": "Temporal invalidation operation failed", + } async def schedule_periodic_invalidation(self): """Schedule periodic invalidation sweeps.""" diff --git a/autobot-backend/services/terminal_secrets_service.py b/autobot-backend/services/terminal_secrets_service.py index 11a90ccae..601a830a6 100644 --- a/autobot-backend/services/terminal_secrets_service.py +++ b/autobot-backend/services/terminal_secrets_service.py @@ -164,7 +164,6 @@ async def _prepare_session_keys( logger.error("Failed to prepare key '%s': %s", key_data["name"], e) result["errors"].append(f"Failed to prepare key '{key_data['name']}'") - async def setup_ssh_keys( self, session_id: str, diff --git a/autobot-backend/services/user_behavior_analytics.py b/autobot-backend/services/user_behavior_analytics.py index fbf7ff53a..b0599d39c 100644 --- a/autobot-backend/services/user_behavior_analytics.py +++ b/autobot-backend/services/user_behavior_analytics.py @@ -271,7 +271,11 @@ async def get_user_journey(self, session_id: str) -> dict: except Exception as e: logger.error("Failed to get user journey: %s", e) - return {"session_id": session_id, "error": "Failed to retrieve user journey", "steps": []} + return { + "session_id": session_id, + "error": "Failed to retrieve user journey", + "steps": [], + } async def get_daily_stats(self, days: int = 30) -> dict: """ diff --git a/autobot-backend/services/workflow_automation/executor.py b/autobot-backend/services/workflow_automation/executor.py index 45a76091b..ddfa753c8 100644 --- a/autobot-backend/services/workflow_automation/executor.py +++ b/autobot-backend/services/workflow_automation/executor.py @@ -402,9 +402,13 @@ async def _send_step_confirmation_request( # Issue #3101: Notify configured channels that approval is needed. if step.requires_confirmation: - await self._notify(workflow, "approval_needed", { - "step_name": step.step_id, - }) + await self._notify( + workflow, + "approval_needed", + { + "step_name": step.step_id, + }, + ) def _check_step_dependencies( self, workflow: ActiveWorkflow, step: WorkflowStep @@ -531,10 +535,14 @@ async def _handle_step_execution_failure( ) # Issue #3101: Notify configured channels on step failure. - await self._notify(workflow, "step_failed", { - "step_name": step_id, - "error": str(error), - }) + await self._notify( + workflow, + "step_failed", + { + "step_name": step_id, + "error": str(error), + }, + ) async def approve_and_execute_step( self, @@ -776,10 +784,14 @@ async def _complete_workflow( ) # Issue #3101: Notify configured channels on workflow completion. - await self._notify(workflow, "workflow_completed", { - "status": "completed", - "total_steps": len(workflow.steps), - }) + await self._notify( + workflow, + "workflow_completed", + { + "status": "completed", + "total_steps": len(workflow.steps), + }, + ) # Issue #1367: Archive to completed history if self.on_workflow_finished: @@ -839,9 +851,13 @@ async def cancel_workflow( ) # Issue #3101: Notify configured channels on workflow cancellation/failure. - await self._notify(workflow, "workflow_failed", { - "error": "cancelled", - }) + await self._notify( + workflow, + "workflow_failed", + { + "error": "cancelled", + }, + ) # Issue #1367: Archive to completed history if self.on_workflow_finished: diff --git a/autobot-backend/services/workflow_automation/models.py b/autobot-backend/services/workflow_automation/models.py index 37926fe09..007140dad 100644 --- a/autobot-backend/services/workflow_automation/models.py +++ b/autobot-backend/services/workflow_automation/models.py @@ -311,11 +311,26 @@ class PlanPresentationRequest(BaseModel): _EMAIL_RE = __import__("re").compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$") _PRIVATE_PREFIXES = ( - "https://10.", "https://172.16.", "https://172.17.", "https://172.18.", - "https://172.19.", "https://172.20.", "https://172.21.", "https://172.22.", - "https://172.23.", "https://172.24.", "https://172.25.", "https://172.26.", - "https://172.27.", "https://172.28.", "https://172.29.", "https://172.30.", - "https://172.31.", "https://192.168.", "https://127.", "https://169.254.", + "https://10.", + "https://172.16.", + "https://172.17.", + "https://172.18.", + "https://172.19.", + "https://172.20.", + "https://172.21.", + "https://172.22.", + "https://172.23.", + "https://172.24.", + "https://172.25.", + "https://172.26.", + "https://172.27.", + "https://172.28.", + "https://172.29.", + "https://172.30.", + "https://172.31.", + "https://192.168.", + "https://127.", + "https://169.254.", "https://localhost", ) diff --git a/autobot-backend/services/workflow_automation/persistence.py b/autobot-backend/services/workflow_automation/persistence.py index 8e8722163..091b72d66 100644 --- a/autobot-backend/services/workflow_automation/persistence.py +++ b/autobot-backend/services/workflow_automation/persistence.py @@ -44,7 +44,9 @@ async def save_notification_config( key = _notif_config_key(workflow_id) if config is None: await redis.delete(key) - logger.debug("Deleted notification config from Redis (workflow=%s)", workflow_id) + logger.debug( + "Deleted notification config from Redis (workflow=%s)", workflow_id + ) return payload = json.dumps(asdict(config), ensure_ascii=False) await redis.set(key, payload, ex=_NOTIF_CONFIG_TTL) diff --git a/autobot-backend/startup_validator.py b/autobot-backend/startup_validator.py index be3c68167..07c50249e 100644 --- a/autobot-backend/startup_validator.py +++ b/autobot-backend/startup_validator.py @@ -185,7 +185,9 @@ def _validate_autobot_modules(self): ) except Exception as e: # Module imported but failed to initialize - logger.error("AutoBot module initialization failed: %s: %s", module_name, e) + logger.error( + "AutoBot module initialization failed: %s: %s", module_name, e + ) self.result.add_error( f"AutoBot module initialization failed: {module_name}", {"error": type(e).__name__}, @@ -202,10 +204,13 @@ def _validate_optional_modules(self): except ImportError as e: logger.debug("Optional module not available: %s: %s", module_name, e) self.result.add_warning( - f"Optional module not available: {module_name}", {"error": type(e).__name__} + f"Optional module not available: {module_name}", + {"error": type(e).__name__}, ) except Exception as e: - logger.debug("Optional module initialization failed: %s: %s", module_name, e) + logger.debug( + "Optional module initialization failed: %s: %s", module_name, e + ) self.result.add_warning( f"Optional module initialization failed: {module_name}", {"error": type(e).__name__}, @@ -253,7 +258,9 @@ async def validate_single_service(service_name: str, validator_func): logger.debug("✅ Service connectivity: %s", service_name) return service_name, None except Exception as e: - logger.error("Service connectivity check failed for %s: %s", service_name, e) + logger.error( + "Service connectivity check failed for %s: %s", service_name, e + ) return service_name, "Service connectivity check failed" results = await asyncio.gather( diff --git a/autobot-backend/takeover_manager.e2e_test.py b/autobot-backend/takeover_manager.e2e_test.py index eebebdea9..4d3f74d42 100644 --- a/autobot-backend/takeover_manager.e2e_test.py +++ b/autobot-backend/takeover_manager.e2e_test.py @@ -14,7 +14,7 @@ # Import system components try: - from api.simple_terminal_websocket import SimpleTerminalSession + from api.terminal_handlers import ConsolidatedTerminalWebSocket from api.workflow_automation import ( AutomationMode, WorkflowAutomationManager, @@ -67,7 +67,11 @@ async def run_all_tests(self): except Exception as e: logger.error(f"❌ {test_method.__name__} FAILED: {e}") self.test_results.append( - {"test": test_method.__name__, "status": "FAILED", "error": "Test execution failed"} + { + "test": test_method.__name__, + "status": "FAILED", + "error": "Test execution failed", + } ) await self.print_test_summary() @@ -247,8 +251,11 @@ async def test_emergency_kill(self): return # Create mock terminal session - terminal_session = SimpleTerminalSession(self.test_session_id) - terminal_session.websocket = AsyncMock() + mock_websocket = AsyncMock() + terminal_session = ConsolidatedTerminalWebSocket( + websocket=mock_websocket, + session_id=self.test_session_id, + ) terminal_session.active = True # Simulate running processes @@ -257,14 +264,14 @@ async def test_emergency_kill(self): {"pid": 1002, "command": "background_process &", "startTime": time.time()}, ] - # Test emergency kill message handling + # Test emergency kill message handling via public dispatch method kill_data = { - "type": "automation_control", + "type": "workflow_control", "action": "emergency_kill", "session_id": self.test_session_id, } - await terminal_session.handle_workflow_control(kill_data) + await terminal_session.handle_message(kill_data) # Verify emergency kill message was sent terminal_session.websocket.send_text.assert_called() @@ -425,34 +432,32 @@ async def test_websocket_communication(self): return # Create mock terminal session - terminal_session = SimpleTerminalSession(self.test_session_id) - terminal_session.websocket = AsyncMock() + mock_websocket = AsyncMock() + terminal_session = ConsolidatedTerminalWebSocket( + websocket=mock_websocket, + session_id=self.test_session_id, + ) terminal_session.active = True - # Test workflow control messages + # Test workflow control messages via the public dispatch method test_messages = [ { - "type": "automation_control", + "type": "workflow_control", "action": "pause", "session_id": self.test_session_id, }, { - "type": "automation_control", + "type": "workflow_control", "action": "resume", "session_id": self.test_session_id, }, { - "type": "workflow_message", - "subtype": "start_workflow", - "workflow": {"name": "Test Workflow", "steps": []}, + "type": "ping", }, ] for message in test_messages: - if message["type"] == "automation_control": - await terminal_session.handle_workflow_control(message) - elif message["type"] == "workflow_message": - await terminal_session.handle_workflow_message(message) + await terminal_session.handle_message(message) # Verify WebSocket send_text was called for each message assert terminal_session.websocket.send_text.call_count >= len( @@ -530,14 +535,17 @@ async def test_error_handling(self): except Exception as e: logger.info(f"✅ Properly handled error: {e}") - # Test malformed messages - terminal_session = SimpleTerminalSession(self.test_session_id) - terminal_session.websocket = AsyncMock() + # Test malformed messages via public dispatch method + mock_websocket = AsyncMock() + terminal_session = ConsolidatedTerminalWebSocket( + websocket=mock_websocket, + session_id=self.test_session_id, + ) malformed_data = {"type": "invalid_type", "data": "malformed"} try: - await terminal_session.handle_workflow_control(malformed_data) + await terminal_session.handle_message(malformed_data) except Exception as e: logger.info(f"✅ Properly handled malformed message: {e}") diff --git a/autobot-backend/takeover_manager.py b/autobot-backend/takeover_manager.py index 54d66e018..0de6eae7e 100644 --- a/autobot-backend/takeover_manager.py +++ b/autobot-backend/takeover_manager.py @@ -597,7 +597,10 @@ async def _capture_system_snapshot(self) -> Dict[str, Any]: except Exception as e: logger.error("Failed to capture system snapshot: %s", e) - return {"error": "Failed to capture system snapshot", "timestamp": datetime.now().isoformat()} + return { + "error": "Failed to capture system snapshot", + "timestamp": datetime.now().isoformat(), + } async def _execute_trigger_handlers(self, request: TakeoverRequest): """Execute registered handlers for takeover triggers""" diff --git a/autobot-backend/tests/test_autoresearch_m3.py b/autobot-backend/tests/test_autoresearch_m3.py index 664cd96e9..ecf588993 100644 --- a/autobot-backend/tests/test_autoresearch_m3.py +++ b/autobot-backend/tests/test_autoresearch_m3.py @@ -6,9 +6,9 @@ from __future__ import annotations import json +from unittest.mock import AsyncMock, MagicMock import pytest -from unittest.mock import AsyncMock, MagicMock from services.autoresearch.knowledge_synthesizer import KnowledgeSynthesizer from services.autoresearch.models import ( diff --git a/autobot-backend/tests/utils/test_cache_response_json_response.py b/autobot-backend/tests/utils/test_cache_response_json_response.py new file mode 100644 index 000000000..945baa818 --- /dev/null +++ b/autobot-backend/tests/utils/test_cache_response_json_response.py @@ -0,0 +1,236 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Tests for LLM response caching via cache_response decorator (Issue #3273). + +Verifies that: +- JSONResponse objects are correctly serialised into Redis-storable dicts. +- Cache hits reconstruct a valid JSONResponse with the original body/status. +- Error-status JSONResponse objects (4xx/5xx) are not cached. +- Plain-dict responses continue to be cached as before. +- _record_cache_hit / _record_cache_miss helpers do not raise when Prometheus + is unavailable. +""" + +import json +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from fastapi.responses import JSONResponse + +from utils.advanced_cache_manager import ( + _JSON_RESPONSE_ENVELOPE, + SimpleCacheManager, + _deserialise_cached_entry, + _record_cache_hit, + _record_cache_miss, + _serialise_response, +) + +# --------------------------------------------------------------------------- +# _serialise_response +# --------------------------------------------------------------------------- + + +class TestSerialiseResponse: + def test_json_response_200_is_serialised(self): + resp = JSONResponse(content={"model": "llama3"}, status_code=200) + result = _serialise_response(resp) + assert result is not None + assert result[_JSON_RESPONSE_ENVELOPE] is True + assert result["status_code"] == 200 + assert json.loads(result["body"]) == {"model": "llama3"} + + def test_json_response_4xx_returns_none(self): + resp = JSONResponse(content={"detail": "not found"}, status_code=404) + assert _serialise_response(resp) is None + + def test_json_response_5xx_returns_none(self): + resp = JSONResponse(content={"error": "oops"}, status_code=500) + assert _serialise_response(resp) is None + + def test_dict_response_is_passed_through(self): + data = {"models": ["llama3"], "total_count": 1} + result = _serialise_response(data) + assert result == data + + def test_dict_with_error_key_returns_none(self): + assert _serialise_response({"error": "bad"}) is None + + def test_dict_with_status_error_returns_none(self): + assert _serialise_response({"status": "error"}) is None + + def test_empty_dict_returns_none(self): + assert _serialise_response({}) is None + + def test_non_dict_non_response_returns_none(self): + assert _serialise_response("plain string") is None + assert _serialise_response(42) is None + assert _serialise_response(None) is None + + +# --------------------------------------------------------------------------- +# _deserialise_cached_entry +# --------------------------------------------------------------------------- + + +class TestDeserialiseResponse: + def test_envelope_dict_reconstructs_json_response(self): + envelope = { + _JSON_RESPONSE_ENVELOPE: True, + "status_code": 200, + "body": json.dumps({"status": "connected", "model": "llama3"}), + } + result = _deserialise_cached_entry(envelope) + assert isinstance(result, JSONResponse) + assert result.status_code == 200 + body = json.loads(result.body.decode("utf-8")) + assert body == {"status": "connected", "model": "llama3"} + + def test_plain_dict_returned_unchanged(self): + data = {"models": ["llama3"], "total_count": 1} + assert _deserialise_cached_entry(data) == data + + def test_malformed_envelope_body_returns_none(self): + bad_envelope = { + _JSON_RESPONSE_ENVELOPE: True, + "status_code": 200, + "body": "not-valid-json{{", + } + result = _deserialise_cached_entry(bad_envelope) + assert result is None + + +# --------------------------------------------------------------------------- +# SimpleCacheManager._is_cacheable_response +# --------------------------------------------------------------------------- + + +class TestIsCacheableResponse: + def test_json_response_2xx_is_cacheable(self): + resp = JSONResponse(content={"ok": True}, status_code=200) + assert SimpleCacheManager._is_cacheable_response(resp) is True + + def test_json_response_4xx_is_not_cacheable(self): + resp = JSONResponse(content={"detail": "err"}, status_code=400) + assert SimpleCacheManager._is_cacheable_response(resp) is False + + def test_dict_ok_is_cacheable(self): + assert SimpleCacheManager._is_cacheable_response({"key": "val"}) is True + + def test_dict_error_is_not_cacheable(self): + assert SimpleCacheManager._is_cacheable_response({"error": "x"}) is False + + def test_string_is_not_cacheable(self): + assert SimpleCacheManager._is_cacheable_response("text") is False + + +# --------------------------------------------------------------------------- +# cache_response decorator — integration with SimpleCacheManager +# --------------------------------------------------------------------------- + + +class TestCacheResponseDecorator: + @pytest.mark.asyncio + async def test_cache_miss_then_hit_for_json_response(self): + """Second call returns cached JSONResponse without calling the function.""" + manager = SimpleCacheManager() + + stored: dict = {} + + async def fake_get(key): + return stored.get(key) + + async def fake_set(key, value, ttl=None): + stored[key] = value + + manager.get = fake_get + manager.set = fake_set + + call_count = 0 + + @manager.cache_response(cache_key="test_endpoint", ttl=60) + async def endpoint(): + nonlocal call_count + call_count += 1 + return JSONResponse(content={"model": "llama3"}, status_code=200) + + first = await endpoint() + assert call_count == 1 + assert isinstance(first, JSONResponse) + assert json.loads(first.body) == {"model": "llama3"} + + second = await endpoint() + # Function body must NOT be called again + assert call_count == 1 + assert isinstance(second, JSONResponse) + assert json.loads(second.body) == {"model": "llama3"} + + @pytest.mark.asyncio + async def test_error_response_not_cached(self): + """JSONResponse with 4xx status must never be stored.""" + manager = SimpleCacheManager() + set_called = False + + async def fake_get(key): + return None + + async def fake_set(key, value, ttl=None): + nonlocal set_called + set_called = True + + manager.get = fake_get + manager.set = fake_set + + @manager.cache_response(cache_key="test_error", ttl=60) + async def error_endpoint(): + return JSONResponse(content={"detail": "not found"}, status_code=404) + + result = await error_endpoint() + assert result.status_code == 404 + assert set_called is False + + +# --------------------------------------------------------------------------- +# Prometheus helpers — must be silent when metrics manager is unavailable +# --------------------------------------------------------------------------- + + +class TestPrometheusHelpers: + def test_record_cache_hit_does_not_raise_when_import_fails(self): + with patch( + "monitoring.prometheus_metrics.get_metrics_manager", + side_effect=ImportError("no prometheus"), + ): + # Should silently pass because _record_cache_hit catches all exceptions + _record_cache_hit("some_key") + + def test_record_cache_miss_does_not_raise_when_import_fails(self): + with patch( + "monitoring.prometheus_metrics.get_metrics_manager", + side_effect=ImportError("no prometheus"), + ): + _record_cache_miss("some_key") + + def test_record_cache_hit_calls_metrics_manager(self): + mock_mgr = MagicMock() + with patch( + "monitoring.prometheus_metrics.get_metrics_manager", + return_value=mock_mgr, + ): + _record_cache_hit("llm_models") + mock_mgr.record_llm_response_cache_hit.assert_called_once_with( + endpoint="llm_models" + ) + + def test_record_cache_miss_calls_metrics_manager(self): + mock_mgr = MagicMock() + with patch( + "monitoring.prometheus_metrics.get_metrics_manager", + return_value=mock_mgr, + ): + _record_cache_miss("llm_models") + mock_mgr.record_llm_response_cache_miss.assert_called_once_with( + endpoint="llm_models" + ) diff --git a/autobot-backend/tools/terminal_tool.py b/autobot-backend/tools/terminal_tool.py index 3b6d381cf..7aa20f5b0 100644 --- a/autobot-backend/tools/terminal_tool.py +++ b/autobot-backend/tools/terminal_tool.py @@ -210,7 +210,11 @@ async def execute_command( return self._format_execution_result(result, command, description) except Exception as e: logger.error("Error executing command: %s", e, exc_info=True) - return {"status": "error", "error": "Command execution failed", "command": command} + return { + "status": "error", + "error": "Command execution failed", + "command": command, + } async def get_session_info(self, conversation_id: str) -> Dict[str, Any]: """ diff --git a/autobot-backend/utils/advanced_cache_manager.py b/autobot-backend/utils/advanced_cache_manager.py index 201725e2f..8b63c2291 100644 --- a/autobot-backend/utils/advanced_cache_manager.py +++ b/autobot-backend/utils/advanced_cache_manager.py @@ -53,6 +53,85 @@ def _extract_request_from_call(args: tuple, kwargs: dict) -> Any: return None +_JSON_RESPONSE_ENVELOPE = "__json_response__" + + +def _serialise_response(result: Any) -> Optional[Any]: + """ + Convert a response value into a JSON-serialisable form for Redis storage. + + Issue #3273: JSONResponse bodies were never cached because json.dumps + cannot handle Starlette Response objects. We wrap them in an envelope dict + so they survive the round-trip through Redis. + + Returns None when the response must not be cached (error responses, etc.). + """ + from starlette.responses import JSONResponse as StarletteJSONResponse + + if isinstance(result, StarletteJSONResponse): + if result.status_code >= 400: + return None + try: + body_str = result.body.decode("utf-8") + return { + _JSON_RESPONSE_ENVELOPE: True, + "status_code": result.status_code, + "body": body_str, + } + except Exception as exc: + logger.warning("Could not serialise JSONResponse for caching: %s", exc) + return None + + if isinstance(result, dict): + if result.get("error") or result.get("status") == "error" or not result: + return None + return result + + return None + + +def _deserialise_cached_entry(cached_entry: Any) -> Any: + """ + Reconstruct the original response from a cached entry. + + Issue #3273: Reverse of _serialise_response — rebuild JSONResponse when + the envelope marker is present, otherwise return the dict as-is. + """ + from fastapi.responses import JSONResponse + + if isinstance(cached_entry, dict) and cached_entry.get(_JSON_RESPONSE_ENVELOPE): + try: + return JSONResponse( + content=json.loads(cached_entry["body"]), + status_code=cached_entry.get("status_code", 200), + ) + except Exception as exc: + logger.warning("Could not deserialise cached JSONResponse: %s", exc) + return None + + return cached_entry + + +def _record_cache_hit(key: str) -> None: + """Increment Prometheus LLM response cache hit counter (Issue #3273).""" + try: + from monitoring.prometheus_metrics import get_metrics_manager + + get_metrics_manager().record_llm_response_cache_hit(endpoint=key) + except Exception as e: + logger.warning("Could not record cache hit metric: %s", e) + + +def _record_cache_miss(key: str) -> None: + """Increment Prometheus LLM response cache miss counter (Issue #3273).""" + try: + from monitoring.prometheus_metrics import get_metrics_manager + + get_metrics_manager().record_llm_response_cache_miss(endpoint=key) + except Exception as e: + logger.warning("Could not record cache miss metric: %s", e) + + def _generate_cache_key( explicit_key: Optional[str], request: Any, @@ -999,8 +1078,10 @@ def cache_response(self, cache_key: str = None, ttl: int = None): Compatible with original CacheManager.cache_response(). Supports FastAPI Request objects for automatic key generation. + Supports JSONResponse objects (Issue #3273: re-enable LLM response caching). Issue #620: Refactored to use extracted helper functions. + Issue #3273: Extended to serialise/deserialise JSONResponse bodies. """ actual_ttl = ttl or self.default_ttl @@ -1016,21 +1097,34 @@ async def wrapper(*args, **kwargs): # Try to get from cache first try: - cached_result = await self.get(key) - if cached_result is not None: + cached_entry = await self.get(key) + if cached_entry is not None: logger.debug("Cache HIT: %s - serving from cache", key) - return cached_result + _record_cache_hit(key) + # Issue #3352: _deserialise_cached_entry returns None on + # corrupt/malformed entries; treat that as a cache miss + # and fall through to execute the real function. + result = _deserialise_cached_entry(cached_entry) + if result is not None: + return result + logger.warning( + "Cache entry for key %s could not be deserialised; " + "treating as cache miss", + key, + ) except Exception as e: logger.error("Cache retrieval error for key %s: %s", key, e) # Execute function and cache result logger.debug("Cache MISS: %s - executing function", key) + _record_cache_miss(key) result = await func(*args, **kwargs) - # Cache successful responses - if self._is_cacheable_response(result): + # Cache successful responses (dict or JSONResponse) + serialisable = _serialise_response(result) + if serialisable is not None: try: - await self.set(key, result, actual_ttl) + await self.set(key, serialisable, actual_ttl) logger.debug("Cache SET: %s - cached for %ds", key, actual_ttl) except Exception as e: logger.error("Cache storage error for key %s: %s", key, e) @@ -1043,7 +1137,16 @@ async def wrapper(*args, **kwargs): @staticmethod def _is_cacheable_response(result: Any) -> bool: - """Check if a response should be cached""" + """Check if a response should be cached. + + Issue #3273: Extended to accept JSONResponse objects in addition to dicts. + """ + from starlette.responses import JSONResponse as StarletteJSONResponse + + if isinstance(result, StarletteJSONResponse): + # Only cache non-error status codes + return result.status_code < 400 + if not isinstance(result, dict): return False @@ -1107,16 +1210,25 @@ async def wrapper(*args, **kwargs): try: cached_result = await cache_manager.get(key) if cached_result is not None: - return cached_result + # Issue #3351: deserialise envelope back to JSONResponse if needed + deserialised = _deserialise_cached_entry(cached_result) + if deserialised is not None: + return deserialised except Exception as e: logger.error("Cache retrieval error for key %s: %s", key, e) # Execute and cache result = await func(*args, **kwargs) - if cache_manager._is_cacheable_response(result): + # Issue #3351: Use _serialise_response so that JSONResponse objects + # are wrapped in an envelope before storage. Storing the raw + # Starlette object causes json.dumps to raise TypeError and the + # entry is silently dropped. _serialise_response returns None for + # responses that must not be cached (errors, empty dicts, etc.). + serialisable = _serialise_response(result) + if serialisable is not None: try: - await cache_manager.set(key, result, ttl) + await cache_manager.set(key, serialisable, ttl) except Exception as e: logger.error("Cache storage error for key %s: %s", key, e) diff --git a/autobot-backend/utils/async_chromadb_client.py b/autobot-backend/utils/async_chromadb_client.py index b21c26aec..49540ea2f 100644 --- a/autobot-backend/utils/async_chromadb_client.py +++ b/autobot-backend/utils/async_chromadb_client.py @@ -44,7 +44,7 @@ # Issue #3094: Use SSOT config port so the default (8100) matches Ansible deployment. # Host remains os.getenv-based: empty string = use local PersistentClient (dev mode). _CHROMADB_HOST = os.getenv("AUTOBOT_CHROMADB_HOST", "") -_CHROMADB_PORT = _ssot_config.ports.chromadb +_CHROMADB_PORT = _ssot_config.port.chromadb logger = logging.getLogger(__name__) diff --git a/autobot-backend/utils/chromadb_client.py b/autobot-backend/utils/chromadb_client.py index 241f1fbfe..5f3136ea9 100644 --- a/autobot-backend/utils/chromadb_client.py +++ b/autobot-backend/utils/chromadb_client.py @@ -45,7 +45,7 @@ # Issue #3094: Use SSOT config port so the default (8100) matches Ansible deployment. # Host remains os.getenv-based: empty string = use local PersistentClient (dev mode). _CHROMADB_HOST = os.getenv("AUTOBOT_CHROMADB_HOST", "") -_CHROMADB_PORT = _ssot_config.ports.chromadb +_CHROMADB_PORT = _ssot_config.port.chromadb # Module exports __all__ = [ diff --git a/autobot-backend/utils/gpu_acceleration_optimizer.py b/autobot-backend/utils/gpu_acceleration_optimizer.py index d4b60f89e..6fb3e1781 100644 --- a/autobot-backend/utils/gpu_acceleration_optimizer.py +++ b/autobot-backend/utils/gpu_acceleration_optimizer.py @@ -224,7 +224,9 @@ async def optimize_for_multimodal_workload(self) -> GPUOptimizationResult: except Exception as e: self.logger.error("Multi-modal optimization failed: %s", e) - return GPUOptimizationResult.create_failed("multimodal_workload", "Multi-modal optimization failed") + return GPUOptimizationResult.create_failed( + "multimodal_workload", "Multi-modal optimization failed" + ) async def _collect_performance_baseline(self) -> Dict[str, float]: """Collect current performance metrics as baseline.""" diff --git a/autobot-backend/workflow_templates/service_health_monitor.yaml b/autobot-backend/workflow_templates/service_health_monitor.yaml new file mode 100644 index 000000000..6a7f5c1f2 --- /dev/null +++ b/autobot-backend/workflow_templates/service_health_monitor.yaml @@ -0,0 +1,79 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +# +# Workflow Template: Service Health Monitor (#3404) +# +# Triggered by Redis pub/sub events emitted by HealthCollector when a +# systemd service changes state. Use this template to create an +# automated workflow that notifies operators the moment a service fails +# or recovers. + +metadata: + id: service_health_monitor + name: "Service Health Monitor" + description: > + Listens for systemd service state-change events published by the SLM + HealthCollector and dispatches a SERVICE_FAILED notification through + the AutoBot notification pipeline. + version: "1.0.0" + category: monitoring + tags: + - systemd + - health + - alerting + +trigger: + type: REDIS_PUBSUB + # Glob pattern — matches any service on any managed node. + # To restrict to a specific service, replace * with the service name, + # e.g. autobot:services:autobot-backend:state_change + channel: "autobot:services:*:state_change" + # Only fire when the service has entered a failure-like state. + # Remove this filter block to react to all transitions (including recovery). + filter: + field: new_state + operator: in + values: + - failed + - crash-loop + +steps: + - id: notify_service_failure + name: "Send service-failure notification" + type: notification + event: service_failed + # Map pub/sub payload fields to notification template variables. + # The SERVICE_FAILED default template uses: service, hostname, + # prev_state, new_state, error_context. + payload_mapping: + service: "{{ trigger.payload.service }}" + hostname: "{{ trigger.payload.hostname }}" + prev_state: "{{ trigger.payload.prev_state }}" + new_state: "{{ trigger.payload.new_state }}" + error_context: "{{ trigger.payload.error_context }}" + # Channel routing — configure at least one channel. + channels: + - in_app + # Uncomment and fill in to enable additional channels: + # email_recipients: + # - ops@example.com + # slack_webhook_url: "{{ env.SLACK_OPS_WEBHOOK }}" + # webhook_url: "{{ env.PAGERDUTY_EVENTS_URL }}" + + - id: log_state_change + name: "Log state change to knowledge base" + type: knowledge_write + depends_on: + - notify_service_failure + payload: + title: "Service state change: {{ trigger.payload.service }}" + body: > + Host {{ trigger.payload.hostname }} reported service + {{ trigger.payload.service }} transitioned from + {{ trigger.payload.prev_state }} to {{ trigger.payload.new_state }}. + Error context: {{ trigger.payload.error_context }} + tags: + - service-health + - "host:{{ trigger.payload.hostname }}" + - "service:{{ trigger.payload.service }}" diff --git a/autobot-frontend/CONVERSATION_FILE_MANAGER_IMPLEMENTATION.md b/autobot-frontend/CONVERSATION_FILE_MANAGER_IMPLEMENTATION.md index 658310971..95cb0d528 100644 --- a/autobot-frontend/CONVERSATION_FILE_MANAGER_IMPLEMENTATION.md +++ b/autobot-frontend/CONVERSATION_FILE_MANAGER_IMPLEMENTATION.md @@ -9,7 +9,7 @@ All conversation-specific file manager UI components have been successfully impl ## 📦 Components Implemented ### 1. **useConversationFiles.ts** (Composable) -**Location:** `/home/kali/Desktop/AutoBot/autobot-vue/src/composables/useConversationFiles.ts` +**Location:** `autobot-vue/src/composables/useConversationFiles.ts` **Features:** - Reactive state management for conversation files @@ -41,7 +41,7 @@ All conversation-specific file manager UI components have been successfully impl --- ### 2. **ChatFilePanel.vue** (Component) -**Location:** `/home/kali/Desktop/AutoBot/autobot-vue/src/components/chat/ChatFilePanel.vue` +**Location:** `autobot-vue/src/components/chat/ChatFilePanel.vue` **Features:** - Right sidebar panel (280px width) @@ -71,7 +71,7 @@ Already integrated in `ChatInterface.vue`: --- ### 3. **DeleteConversationDialog.vue** (Component) -**Location:** `/home/kali/Desktop/AutoBot/autobot-vue/src/components/chat/DeleteConversationDialog.vue` +**Location:** `autobot-vue/src/components/chat/DeleteConversationDialog.vue` **Features:** - Modal dialog with backdrop overlay @@ -116,7 +116,7 @@ Integrated in `ChatSidebar.vue`: ### Updated Files: #### 1. **ChatRepository.ts** -**Location:** `/home/kali/Desktop/AutoBot/autobot-vue/src/models/repositories/ChatRepository.ts` +**Location:** `autobot-vue/src/models/repositories/ChatRepository.ts` **Enhancement:** ```typescript @@ -135,7 +135,7 @@ async deleteChat( --- #### 2. **ChatController.ts** -**Location:** `/home/kali/Desktop/AutoBot/autobot-vue/src/models/controllers/ChatController.ts` +**Location:** `autobot-vue/src/models/controllers/ChatController.ts` **Enhancement:** ```typescript @@ -154,7 +154,7 @@ async deleteChatSession( --- #### 3. **ChatSidebar.vue** -**Location:** `/home/kali/Desktop/AutoBot/autobot-vue/src/components/chat/ChatSidebar.vue` +**Location:** `autobot-vue/src/components/chat/ChatSidebar.vue` **Enhancements:** - Imported `DeleteConversationDialog` component diff --git a/autobot-frontend/STUB_REMEDIATION_PROGRESS.md b/autobot-frontend/STUB_REMEDIATION_PROGRESS.md index 8474217bc..8729d7fe1 100644 --- a/autobot-frontend/STUB_REMEDIATION_PROGRESS.md +++ b/autobot-frontend/STUB_REMEDIATION_PROGRESS.md @@ -59,7 +59,7 @@ const uploadFile = async (upload: any, file: File): Promise => { #### Backend API Used - **Endpoint**: `POST /api/conversation-files/conversation/{session_id}/upload` -- **Location**: `/home/kali/Desktop/AutoBot/backend/api/conversation_files.py:274` +- **Location**: `backend/api/conversation_files.py:274` - **Status**: ✅ Already exists (implemented) #### Key Improvements @@ -143,7 +143,7 @@ const reloadSystem = async () => { #### Backend API Used - **Endpoint**: `POST /api/system/reload_config` -- **Location**: `/home/kali/Desktop/AutoBot/backend/api/system.py:226` +- **Location**: `backend/api/system.py:226` - **Status**: ✅ Already exists (implemented) - **Functionality**: Reloads configuration and clears caches @@ -249,12 +249,12 @@ const refreshMemoryStatus = async () => { 1. **GPU Detection**: - **Endpoint**: `GET /api/monitoring/hardware/gpu` - - **Location**: `/home/kali/Desktop/AutoBot/backend/api/monitoring.py:538` + - **Location**: `backend/api/monitoring.py:538` - **Returns**: Real GPU metrics (utilization, memory, temperature, name) 2. **System Memory**: - **Endpoint**: `GET /api/system/metrics` - - **Location**: `/home/kali/Desktop/AutoBot/backend/api/system.py:587` + - **Location**: `backend/api/system.py:587` - **Returns**: Real memory metrics using `psutil` (total, available, used, percent) #### Key Improvements diff --git a/autobot-frontend/XTERM_UPGRADE_IMPLEMENTATION.md b/autobot-frontend/XTERM_UPGRADE_IMPLEMENTATION.md index d545009cc..f1dacb039 100644 --- a/autobot-frontend/XTERM_UPGRADE_IMPLEMENTATION.md +++ b/autobot-frontend/XTERM_UPGRADE_IMPLEMENTATION.md @@ -188,7 +188,7 @@ import ToolsTerminal from '@/components/ToolsTerminal.vue' ## Deployment Instructions ### 1. Verify Files Created -All files have been created locally in `/home/kali/Desktop/AutoBot/autobot-vue/`: +All files have been created locally in `autobot-vue/`: - ✅ `src/components/terminal/BaseXTerminal.vue` - ✅ `src/components/terminal/HostSelector.vue` - ✅ `src/components/ChatTerminal.vue` @@ -199,7 +199,7 @@ All files have been created locally in `/home/kali/Desktop/AutoBot/autobot-vue/` ### 2. Sync to Frontend VM ```bash -cd /home/kali/Desktop/AutoBot +cd /opt/autobot # Sync new terminal components ./scripts/utilities/sync-frontend.sh src/components/terminal/ diff --git a/autobot-frontend/package-lock.json b/autobot-frontend/package-lock.json index 80e4c3600..0f2be0daa 100644 --- a/autobot-frontend/package-lock.json +++ b/autobot-frontend/package-lock.json @@ -18,6 +18,7 @@ "@xterm/addon-fit": "^0.11.0", "@xterm/addon-web-links": "^0.12.0", "@xterm/xterm": "^6.0.0", + "3d-force-graph": "^1.79.1", "apexcharts": "^5.10.4", "cytoscape": "^3.33.1", "cytoscape-fcose": "^2.2.0", @@ -26,6 +27,8 @@ "onnxruntime-web": "^1.24.3", "pinia": "^3.0.4", "pinia-plugin-persistedstate": "^4.7.1", + "three": "^0.183.2", + "three-spritetext": "^1.10.0", "vue": "^3.5.31", "vue-i18n": "^11.3.0", "vue-router": "^5.0.4", @@ -3154,6 +3157,12 @@ "dev": true, "license": "MIT" }, + "node_modules/@tweenjs/tween.js": { + "version": "25.0.0", + "resolved": "https://registry.npmjs.org/@tweenjs/tween.js/-/tween.js-25.0.0.tgz", + "integrity": "sha512-XKLA6syeBUaPzx4j3qwMqzzq+V4uo72BnlbOjmuljLrRqdsd3qnzvZZoxvMHZ23ndsRS4aufU6JOZYpCbU6T1A==", + "license": "MIT" + }, "node_modules/@tybys/wasm-util": { "version": "0.10.1", "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.1.tgz", @@ -4442,6 +4451,22 @@ "addons/*" ] }, + "node_modules/3d-force-graph": { + "version": "1.79.1", + "resolved": "https://registry.npmjs.org/3d-force-graph/-/3d-force-graph-1.79.1.tgz", + "integrity": "sha512-iscIVt4jWjJ11KEEswgOIOWk8Ew4EFKHRyERJXJ0ouycqzHCtWwb9E5imnxS5rYF1f1IESkFNAfB+h3EkU0Irw==", + "license": "MIT", + "dependencies": { + "accessor-fn": "1", + "kapsule": "^1.16", + "three": ">=0.118 <1", + "three-forcegraph": "1", + "three-render-objects": "^1.35" + }, + "engines": { + "node": ">=12" + } + }, "node_modules/abbrev": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-2.0.0.tgz", @@ -4452,6 +4477,15 @@ "node": "^14.17.0 || ^16.13.0 || >=18.0.0" } }, + "node_modules/accessor-fn": { + "version": "1.5.3", + "resolved": "https://registry.npmjs.org/accessor-fn/-/accessor-fn-1.5.3.tgz", + "integrity": "sha512-rkAofCwe/FvYFUlMB0v0gWmhqtfAtV1IUkdPbfhTUyYniu5LrC0A0UJkTH0Jv3S8SvwkmfuAlY+mQIJATdocMA==", + "license": "MIT", + "engines": { + "node": ">=12" + } + }, "node_modules/acorn": { "version": "8.16.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz", @@ -5567,6 +5601,165 @@ "cytoscape": "^3.2.0" } }, + "node_modules/d3-array": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz", + "integrity": "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==", + "license": "ISC", + "dependencies": { + "internmap": "1 - 2" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-binarytree": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/d3-binarytree/-/d3-binarytree-1.0.2.tgz", + "integrity": "sha512-cElUNH+sHu95L04m92pG73t2MEJXKu+GeKUN1TJkFsu93E5W8E9Sc3kHEGJKgenGvj19m6upSn2EunvMgMD2Yw==", + "license": "MIT" + }, + "node_modules/d3-color": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz", + "integrity": "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-dispatch": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-dispatch/-/d3-dispatch-3.0.1.tgz", + "integrity": "sha512-rzUyPU/S7rwUflMyLc1ETDeBj0NRuHKKAcvukozwhshr6g6c5d8zh4c2gQjY2bZ0dXeGLWc1PF174P2tVvKhfg==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-force-3d": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/d3-force-3d/-/d3-force-3d-3.0.6.tgz", + "integrity": "sha512-4tsKHUPLOVkyfEffZo1v6sFHvGFwAIIjt/W8IThbp08DYAsXZck+2pSHEG5W1+gQgEvFLdZkYvmJAbRM2EzMnA==", + "license": "MIT", + "dependencies": { + "d3-binarytree": "1", + "d3-dispatch": "1 - 3", + "d3-octree": "1", + "d3-quadtree": "1 - 3", + "d3-timer": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-format": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz", + "integrity": "sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-interpolate": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz", + "integrity": "sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==", + "license": "ISC", + "dependencies": { + "d3-color": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-octree": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/d3-octree/-/d3-octree-1.1.0.tgz", + "integrity": "sha512-F8gPlqpP+HwRPMO/8uOu5wjH110+6q4cgJvgJT6vlpy3BEaDIKlTZrgHKZSp/i1InRpVfh4puY/kvL6MxK930A==", + "license": "MIT" + }, + "node_modules/d3-quadtree": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-quadtree/-/d3-quadtree-3.0.1.tgz", + "integrity": "sha512-04xDrxQTDTCFwP5H6hRhsRcb9xxv2RzkcsygFzmkSIOJy3PeRJP7sNk3VRIbKXcog561P9oU0/rVH6vDROAgUw==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-scale": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz", + "integrity": "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==", + "license": "ISC", + "dependencies": { + "d3-array": "2.10.0 - 3", + "d3-format": "1 - 3", + "d3-interpolate": "1.2.0 - 3", + "d3-time": "2.1.1 - 3", + "d3-time-format": "2 - 4" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-scale-chromatic": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-scale-chromatic/-/d3-scale-chromatic-3.1.0.tgz", + "integrity": "sha512-A3s5PWiZ9YCXFye1o246KoscMWqf8BsD9eRiJ3He7C9OBaxKhAd5TFCdEx/7VbKtxxTsu//1mMJFrEt572cEyQ==", + "license": "ISC", + "dependencies": { + "d3-color": "1 - 3", + "d3-interpolate": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-selection": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", + "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-time": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz", + "integrity": "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==", + "license": "ISC", + "dependencies": { + "d3-array": "2 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-time-format": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz", + "integrity": "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==", + "license": "ISC", + "dependencies": { + "d3-time": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-timer": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz", + "integrity": "sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, "node_modules/dashdash": { "version": "1.14.1", "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", @@ -5580,6 +5773,18 @@ "node": ">=0.10" } }, + "node_modules/data-bind-mapper": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/data-bind-mapper/-/data-bind-mapper-1.0.3.tgz", + "integrity": "sha512-QmU3lyEnbENQPo0M1F9BMu4s6cqNNp8iJA+b/HP2sSb7pf3dxwF3+EP1eO69rwBfH9kFJ1apmzrtogAmVt2/Xw==", + "license": "MIT", + "dependencies": { + "accessor-fn": "1" + }, + "engines": { + "node": ">=12" + } + }, "node_modules/data-urls": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-7.0.0.tgz", @@ -6704,6 +6909,20 @@ "dev": true, "license": "ISC" }, + "node_modules/float-tooltip": { + "version": "1.7.5", + "resolved": "https://registry.npmjs.org/float-tooltip/-/float-tooltip-1.7.5.tgz", + "integrity": "sha512-/kXzuDnnBqyyWyhDMH7+PfP8J/oXiAavGzcRxASOMRHFuReDtofizLLJsf7nnDLAfEaMW4pVWaXrAjtnglpEkg==", + "license": "MIT", + "dependencies": { + "d3-selection": "2 - 3", + "kapsule": "^1.16", + "preact": "10" + }, + "engines": { + "node": ">=12" + } + }, "node_modules/follow-redirects": { "version": "1.15.11", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", @@ -7268,6 +7487,15 @@ "node": ">= 0.4" } }, + "node_modules/internmap": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/internmap/-/internmap-2.0.3.tgz", + "integrity": "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, "node_modules/is-arguments": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/is-arguments/-/is-arguments-1.2.0.tgz", @@ -8087,6 +8315,18 @@ "verror": "1.10.0" } }, + "node_modules/kapsule": { + "version": "1.16.3", + "resolved": "https://registry.npmjs.org/kapsule/-/kapsule-1.16.3.tgz", + "integrity": "sha512-4+5mNNf4vZDSwPhKprKwz3330iisPrb08JyMgbsdFrimBCKNHecua/WBwvVg3n7vwx0C1ARjfhwIpbrbd9n5wg==", + "license": "MIT", + "dependencies": { + "lodash-es": "4" + }, + "engines": { + "node": ">=12" + } + }, "node_modules/keyv": { "version": "4.5.4", "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", @@ -8475,12 +8715,18 @@ } }, "node_modules/lodash": { - "version": "4.17.23", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz", - "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==", + "version": "4.18.1", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.18.1.tgz", + "integrity": "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==", "dev": true, "license": "MIT" }, + "node_modules/lodash-es": { + "version": "4.18.1", + "resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.18.1.tgz", + "integrity": "sha512-J8xewKD/Gk22OZbhpOVSwcs60zhd95ESDwezOFuA3/099925PdHJ7OFHNTGtajL3AlZkykD32HykiMo+BIBI8A==", + "license": "MIT" + }, "node_modules/lodash.once": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz", @@ -8990,6 +9236,44 @@ "dev": true, "license": "MIT" }, + "node_modules/ngraph.events": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/ngraph.events/-/ngraph.events-1.4.0.tgz", + "integrity": "sha512-NeDGI4DSyjBNBRtA86222JoYietsmCXbs8CEB0dZ51Xeh4lhVl1y3wpWLumczvnha8sFQIW4E0vvVWwgmX2mGw==", + "license": "BSD-3-Clause" + }, + "node_modules/ngraph.forcelayout": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/ngraph.forcelayout/-/ngraph.forcelayout-3.3.1.tgz", + "integrity": "sha512-MKBuEh1wujyQHFTW57y5vd/uuEOK0XfXYxm3lC7kktjJLRdt/KEKEknyOlc6tjXflqBKEuYBBcu7Ax5VY+S6aw==", + "license": "BSD-3-Clause", + "dependencies": { + "ngraph.events": "^1.0.0", + "ngraph.merge": "^1.0.0", + "ngraph.random": "^1.0.0" + } + }, + "node_modules/ngraph.graph": { + "version": "20.1.2", + "resolved": "https://registry.npmjs.org/ngraph.graph/-/ngraph.graph-20.1.2.tgz", + "integrity": "sha512-W/G3GBR3Y5UxMLHTUCPP9v+pbtpzwuAEIqP5oZV+9IwgxAIEZwh+Foc60iPc1idlnK7Zxu0p3puxAyNmDvBd0Q==", + "license": "BSD-3-Clause", + "dependencies": { + "ngraph.events": "^1.4.0" + } + }, + "node_modules/ngraph.merge": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/ngraph.merge/-/ngraph.merge-1.0.0.tgz", + "integrity": "sha512-5J8YjGITUJeapsomtTALYsw7rFveYkM+lBj3QiYZ79EymQcuri65Nw3knQtFxQBU1r5iOaVRXrSwMENUPK62Vg==", + "license": "MIT" + }, + "node_modules/ngraph.random": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/ngraph.random/-/ngraph.random-1.2.0.tgz", + "integrity": "sha512-4EUeAGbB2HWX9njd6bP6tciN6ByJfoaAvmVL9QTaZSeXrW46eNGA9GajiXiPBbvFqxUWFkEbyo6x5qsACUuVfA==", + "license": "BSD-3-Clause" + }, "node_modules/node-releases": { "version": "2.0.27", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.27.tgz", @@ -9650,6 +9934,18 @@ "node": ">=18" } }, + "node_modules/polished": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/polished/-/polished-4.3.1.tgz", + "integrity": "sha512-OBatVyC/N7SCW/FaDHrSd+vn0o5cS855TOmYi4OkdWUMSJCET/xip//ch8xGUvtr3i44X9LVyWwQlRMTN3pwSA==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.17.8" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/possible-typed-array-names": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz", @@ -9702,6 +9998,16 @@ "node": ">=4" } }, + "node_modules/preact": { + "version": "10.29.1", + "resolved": "https://registry.npmjs.org/preact/-/preact-10.29.1.tgz", + "integrity": "sha512-gQCLc/vWroE8lIpleXtdJhTFDogTdZG9AjMUpVkDf2iTCNwYNWA+u16dL41TqUDJO4gm2IgrcMv3uTpjd4Pwmg==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/preact" + } + }, "node_modules/prelude-ls": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", @@ -10742,6 +11048,67 @@ "url": "https://opencollective.com/webpack" } }, + "node_modules/three": { + "version": "0.183.2", + "resolved": "https://registry.npmjs.org/three/-/three-0.183.2.tgz", + "integrity": "sha512-di3BsL2FEQ1PA7Hcvn4fyJOlxRRgFYBpMTcyOgkwJIaDOdJMebEFPA+t98EvjuljDx4hNulAGwF6KIjtwI5jgQ==", + "license": "MIT" + }, + "node_modules/three-forcegraph": { + "version": "1.43.1", + "resolved": "https://registry.npmjs.org/three-forcegraph/-/three-forcegraph-1.43.1.tgz", + "integrity": "sha512-lQnYPLvR31gb91mF5xHhU0jPHJgBPw9QB23R6poCk8Tgvz8sQtq7wTxwClcPdfKCBbHXsb7FSqK06Osiu1kQ5A==", + "license": "MIT", + "dependencies": { + "accessor-fn": "1", + "d3-array": "1 - 3", + "d3-force-3d": "2 - 3", + "d3-scale": "1 - 4", + "d3-scale-chromatic": "1 - 3", + "data-bind-mapper": "1", + "kapsule": "^1.16", + "ngraph.forcelayout": "3", + "ngraph.graph": "20", + "tinycolor2": "1" + }, + "engines": { + "node": ">=12" + }, + "peerDependencies": { + "three": ">=0.118.3" + } + }, + "node_modules/three-render-objects": { + "version": "1.40.5", + "resolved": "https://registry.npmjs.org/three-render-objects/-/three-render-objects-1.40.5.tgz", + "integrity": "sha512-iA+rYdal0tkond37YeXIvEMAxUFGxw1wU6+ce/GsuiOUKL+8zaxFXY7PTVft0F+Km50mbmtKQ24b2FdwSG3p3A==", + "license": "MIT", + "dependencies": { + "@tweenjs/tween.js": "18 - 25", + "accessor-fn": "1", + "float-tooltip": "^1.7", + "kapsule": "^1.16", + "polished": "4" + }, + "engines": { + "node": ">=12" + }, + "peerDependencies": { + "three": ">=0.168" + } + }, + "node_modules/three-spritetext": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/three-spritetext/-/three-spritetext-1.10.0.tgz", + "integrity": "sha512-t08iP1FCU1lQh8T5MmCpdijKgas8GDHJE0LqMGBuVu3xqMMpFnEZhTlih7FlxLPQizHIGoumUSpfOlY1GO/Tgg==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "peerDependencies": { + "three": ">=0.86.0" + } + }, "node_modules/throttleit": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/throttleit/-/throttleit-1.0.1.tgz", @@ -10766,6 +11133,12 @@ "dev": true, "license": "MIT" }, + "node_modules/tinycolor2": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/tinycolor2/-/tinycolor2-1.6.0.tgz", + "integrity": "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw==", + "license": "MIT" + }, "node_modules/tinyexec": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/tinyexec/-/tinyexec-1.0.4.tgz", diff --git a/autobot-frontend/package.json b/autobot-frontend/package.json index 1cb914ad9..8dcbdbf12 100644 --- a/autobot-frontend/package.json +++ b/autobot-frontend/package.json @@ -45,6 +45,7 @@ "@xterm/addon-fit": "^0.11.0", "@xterm/addon-web-links": "^0.12.0", "@xterm/xterm": "^6.0.0", + "3d-force-graph": "^1.79.1", "apexcharts": "^5.10.4", "cytoscape": "^3.33.1", "cytoscape-fcose": "^2.2.0", @@ -53,6 +54,8 @@ "onnxruntime-web": "^1.24.3", "pinia": "^3.0.4", "pinia-plugin-persistedstate": "^4.7.1", + "three": "^0.183.2", + "three-spritetext": "^1.10.0", "vue": "^3.5.31", "vue-i18n": "^11.3.0", "vue-router": "^5.0.4", diff --git a/autobot-frontend/src/App.vue b/autobot-frontend/src/App.vue index 632ef40df..b1dce692b 100644 --- a/autobot-frontend/src/App.vue +++ b/autobot-frontend/src/App.vue @@ -15,7 +15,7 @@
- + {{ t('agent.settings.unsavedChanges') }}
- + \ No newline at end of file diff --git a/autobot-frontend/src/components/analytics/CodeGenerationDashboard.vue b/autobot-frontend/src/components/analytics/CodeGenerationDashboard.vue index d79226958..f59071556 100644 --- a/autobot-frontend/src/components/analytics/CodeGenerationDashboard.vue +++ b/autobot-frontend/src/components/analytics/CodeGenerationDashboard.vue @@ -4,6 +4,8 @@

{{ $t('analytics.codeGeneration.title') }}

{{ $t('analytics.codeGeneration.subtitle') }}

+ +

{{ sourceId }}

@@ -326,12 +328,25 @@ + + diff --git a/autobot-frontend/src/components/ui/SystemStatusNotification.vue b/autobot-frontend/src/components/ui/SystemStatusNotification.vue index 220a9b1bb..c0a118288 100644 --- a/autobot-frontend/src/components/ui/SystemStatusNotification.vue +++ b/autobot-frontend/src/components/ui/SystemStatusNotification.vue @@ -13,7 +13,7 @@ >
diff --git a/autobot-frontend/src/components/workflow/PhaseProgressionIndicator.vue b/autobot-frontend/src/components/workflow/PhaseProgressionIndicator.vue new file mode 100644 index 000000000..9e3cddbba --- /dev/null +++ b/autobot-frontend/src/components/workflow/PhaseProgressionIndicator.vue @@ -0,0 +1,630 @@ + + + + + + \ No newline at end of file diff --git a/autobot-frontend/src/components/workflow/WorkflowLiveDashboard.vue b/autobot-frontend/src/components/workflow/WorkflowLiveDashboard.vue index 44aeaa8ae..88af5e92a 100644 --- a/autobot-frontend/src/components/workflow/WorkflowLiveDashboard.vue +++ b/autobot-frontend/src/components/workflow/WorkflowLiveDashboard.vue @@ -155,6 +155,7 @@