From 0dba8915b57b4b2265e315eb10be5a9c6fb82733 Mon Sep 17 00:00:00 2001 From: mrveiss Date: Mon, 30 Mar 2026 16:50:43 +0300 Subject: [PATCH 1/3] feat(slm): add deployed-vs-source drift detection to code-sync (#2834) - New services/drift_checker.py: SHA-256 checksum comparison between code_source and deployed directories, skipping .pyc/__pycache__/venv/.git - New GET /code-sync/drift endpoint returns FileDriftReport with per-file drift status (modified | source_only | deployed_only) - Added DriftedFile and FileDriftReport Pydantic schemas to models/schemas.py - useCodeSync composable: fetchDrift() method + FileDriftReport/DriftedFile types - CodeSyncView: "File Drift Check" card with expandable drifted-files table Co-Authored-By: Claude Sonnet 4.6 --- autobot-slm-backend/api/code_sync.py | 46 ++++ autobot-slm-backend/models/schemas.py | 20 ++ autobot-slm-backend/services/drift_checker.py | 211 ++++++++++++++++++ .../src/composables/useCodeSync.ts | 52 +++++ .../src/views/CodeSyncView.vue | 138 ++++++++++++ 5 files changed, 467 insertions(+) create mode 100644 autobot-slm-backend/services/drift_checker.py diff --git a/autobot-slm-backend/api/code_sync.py b/autobot-slm-backend/api/code_sync.py index da860a91c..f74c8a8e4 100644 --- a/autobot-slm-backend/api/code_sync.py +++ b/autobot-slm-backend/api/code_sync.py @@ -34,6 +34,7 @@ CodeSyncStatusResponse, CodeVersionNotification, CodeVersionNotificationResponse, + FileDriftReport, FleetSyncJobStatus, FleetSyncNodeStatus, FleetSyncRequest, @@ -50,6 +51,11 @@ from services.auth import get_current_user from services.code_distributor import get_code_distributor from services.database import get_db +from services.drift_checker import ( + build_drift_report, + get_default_deployed_dir, + get_default_source_dir, +) from services.fleet_sync_guard import assert_no_running_sync, fleet_sync_lock from services.git_tracker import DEFAULT_BRANCH, DEFAULT_REPO_PATH, get_git_tracker from services.playbook_executor import get_playbook_executor @@ -395,6 +401,46 @@ async def get_sync_status( ) +@router.get("/drift", response_model=FileDriftReport) +async def get_file_drift( + _: Annotated[dict, Depends(get_current_user)], + component: str = "autobot-slm-backend", +) -> FileDriftReport: + """ + Compare file checksums between code_source and the deployed directory (Issue #2834). + + Detects files that have drifted due to manual patches or incomplete Ansible deploys. + Only Python, config, and script files are compared; .pyc, __pycache__, venv, and + .git directories are always excluded. + + Query params: + component: Sub-directory to compare (default: autobot-slm-backend). + + Returns a FileDriftReport with a list of drifted files and their checksums. + """ + source_dir = get_default_source_dir(component) + deployed_dir = get_default_deployed_dir(component) + + logger.info( + "drift check: comparing source=%s deployed=%s", source_dir, deployed_dir + ) + + report = await asyncio.get_running_loop().run_in_executor( + None, + build_drift_report, + source_dir, + deployed_dir, + ) + + logger.info( + "drift check: %d drifted files out of %d compared", + len(report["drifted_files"]), + report["total_compared"], + ) + + return FileDriftReport(**report) + + @router.post("/refresh", response_model=CodeSyncRefreshResponse) async def refresh_version( db: Annotated[AsyncSession, Depends(get_db)], diff --git a/autobot-slm-backend/models/schemas.py b/autobot-slm-backend/models/schemas.py index f7d31a061..c58b50a53 100644 --- a/autobot-slm-backend/models/schemas.py +++ b/autobot-slm-backend/models/schemas.py @@ -1557,6 +1557,26 @@ class CodeSyncRefreshResponse(BaseModel): has_update: bool = False +class DriftedFile(BaseModel): + """A file whose checksum differs between code_source and deployed (Issue #2834).""" + + path: str + source_checksum: Optional[str] = None + deployed_checksum: Optional[str] = None + status: str # "modified" | "source_only" | "deployed_only" + + +class FileDriftReport(BaseModel): + """Result of comparing code_source vs deployed file checksums (Issue #2834).""" + + source_dir: str + deployed_dir: str + drifted_files: list[DriftedFile] + total_compared: int + drift_detected: bool + checked_at: str + + class PendingNodeResponse(BaseModel): """Node that needs code update.""" diff --git a/autobot-slm-backend/services/drift_checker.py b/autobot-slm-backend/services/drift_checker.py new file mode 100644 index 000000000..b1ae56dbc --- /dev/null +++ b/autobot-slm-backend/services/drift_checker.py @@ -0,0 +1,211 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Drift Checker Service (Issue #2834). + +Compares file checksums between the code_source directory and the deployed +directory to detect files that have been manually patched or missed by Ansible. +""" + +import hashlib +import logging +import os +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, List, Tuple + +logger = logging.getLogger(__name__) + +# File extensions that are meaningful to compare. +_INCLUDE_EXTENSIONS = {".py", ".cfg", ".ini", ".toml", ".yaml", ".yml", ".sh", ".txt"} + +# Directory names to skip entirely during traversal. +_SKIP_DIRS = { + "__pycache__", + ".git", + "venv", + ".venv", + "node_modules", + ".mypy_cache", + ".ruff_cache", + "dist", + "build", +} + + +def _file_checksum(path: Path, block_size: int = 65536) -> str: + """Return the SHA-256 hex digest of a file. + + Reads in blocks to avoid loading large files into memory at once. + + Args: + path: Absolute path to the file. + block_size: Read chunk size in bytes. + + Returns: + Lowercase hex SHA-256 digest string. + """ + h = hashlib.sha256() + with open(path, "rb") as fh: + while chunk := fh.read(block_size): + h.update(chunk) + return h.hexdigest() + + +def _collect_checksums(root: Path) -> Dict[str, str]: + """Walk *root* and return a mapping of relative-path → SHA-256 checksum. + + Only files with extensions in ``_INCLUDE_EXTENSIONS`` are included. + Directories in ``_SKIP_DIRS`` are pruned from the walk. + + Args: + root: Directory to scan. + + Returns: + Dict mapping POSIX-style relative path strings to hex digest strings. + """ + checksums: Dict[str, str] = {} + for dirpath, dirnames, filenames in os.walk(root): + # Prune skip dirs in-place so os.walk does not descend into them. + dirnames[:] = [d for d in dirnames if d not in _SKIP_DIRS] + + for filename in filenames: + filepath = Path(dirpath) / filename + if filepath.suffix not in _INCLUDE_EXTENSIONS: + continue + try: + rel = filepath.relative_to(root).as_posix() + checksums[rel] = _file_checksum(filepath) + except (OSError, IOError) as exc: + logger.warning("drift_checker: cannot read %s: %s", filepath, exc) + + return checksums + + +def compute_drift( + source_dir: str, + deployed_dir: str, +) -> Tuple[List[dict], int]: + """Compare file checksums between *source_dir* and *deployed_dir*. + + Returns a tuple of (drifted_file_dicts, total_compared_count). + + Each drifted file dict has keys: + path – POSIX relative path + source_checksum – SHA-256 of the source file (None if absent) + deployed_checksum – SHA-256 of the deployed file (None if absent) + status – "modified" | "source_only" | "deployed_only" + + Files that exist in both directories with identical checksums are not + included in the returned list. + + Args: + source_dir: Absolute path to the authoritative code source directory. + deployed_dir: Absolute path to the currently deployed directory. + + Returns: + Tuple of (list_of_drift_dicts, total_files_compared). + """ + src_path = Path(source_dir) + dep_path = Path(deployed_dir) + + if not src_path.is_dir(): + logger.warning("drift_checker: source_dir does not exist: %s", source_dir) + return [], 0 + + if not dep_path.is_dir(): + logger.warning("drift_checker: deployed_dir does not exist: %s", deployed_dir) + return [], 0 + + src_checksums = _collect_checksums(src_path) + dep_checksums = _collect_checksums(dep_path) + + all_paths = set(src_checksums) | set(dep_checksums) + total_compared = len(all_paths) + drifted: List[dict] = [] + + for rel_path in sorted(all_paths): + src_cs = src_checksums.get(rel_path) + dep_cs = dep_checksums.get(rel_path) + + if src_cs == dep_cs: + # Both present and identical — no drift. + continue + + if src_cs is None: + status = "deployed_only" + elif dep_cs is None: + status = "source_only" + else: + status = "modified" + + drifted.append( + { + "path": rel_path, + "source_checksum": src_cs, + "deployed_checksum": dep_cs, + "status": status, + } + ) + + return drifted, total_compared + + +def build_drift_report( + source_dir: str, + deployed_dir: str, +) -> dict: + """Build the full drift report dict for the API response (Issue #2834). + + Args: + source_dir: Path to code_source directory. + deployed_dir: Path to deployed component directory. + + Returns: + Dict matching the ``FileDriftReport`` schema. + """ + drifted, total = compute_drift(source_dir, deployed_dir) + + return { + "source_dir": source_dir, + "deployed_dir": deployed_dir, + "drifted_files": drifted, + "total_compared": total, + "drift_detected": len(drifted) > 0, + "checked_at": datetime.now(timezone.utc).isoformat(), + } + + +def get_default_deployed_dir(component: str = "autobot-slm-backend") -> str: + """Return the expected deployed path for *component* under /opt/autobot. + + Reads ``SLM_DEPLOYED_ROOT`` from the environment so the path is + configurable without hardcoding. + + Args: + component: Sub-directory name under the deployed root. + + Returns: + Absolute path string for the deployed component directory. + """ + deployed_root = os.environ.get("SLM_DEPLOYED_ROOT", "/opt/autobot") + return str(Path(deployed_root) / component) + + +def get_default_source_dir(component: str = "autobot-slm-backend") -> str: + """Return the code_source sub-directory for *component*. + + Reads ``SLM_REPO_PATH`` from the environment (same var used by git_tracker). + Falls back to the repository root when the component sub-directory does not + exist yet (e.g. monorepo roots that serve as the authoritative source). + + Args: + component: Sub-directory name inside the code_source repository. + + Returns: + Absolute path string for the source directory to compare against. + """ + source_root = os.environ.get("SLM_REPO_PATH", "/opt/autobot/code_source") + candidate = Path(source_root) / component + return str(candidate) if candidate.is_dir() else source_root diff --git a/autobot-slm-frontend/src/composables/useCodeSync.ts b/autobot-slm-frontend/src/composables/useCodeSync.ts index b52b9fa89..b68192845 100644 --- a/autobot-slm-frontend/src/composables/useCodeSync.ts +++ b/autobot-slm-frontend/src/composables/useCodeSync.ts @@ -147,6 +147,23 @@ export interface ScheduleRunResponse { job_id: string | null } +// Issue #2834: Drift detection types +export interface DriftedFile { + path: string + source_checksum: string | null + deployed_checksum: string | null + status: 'modified' | 'source_only' | 'deployed_only' +} + +export interface FileDriftReport { + source_dir: string + deployed_dir: string + drifted_files: DriftedFile[] + total_compared: number + drift_detected: boolean + checked_at: string +} + // Re-export role types for consumers (Issue #779) export type { Role, SyncResult } @@ -185,6 +202,7 @@ export function useCodeSync() { const loading = ref(false) const error = ref(null) const lastRefresh = ref(null) + const driftReport = ref(null) // Issue #2834 // ============================================================================= // Computed Properties @@ -601,6 +619,36 @@ export function useCodeSync() { } } + // ============================================================================= + // Drift Detection (Issue #2834) + // ============================================================================= + + /** + * Fetch a file-level drift report comparing code_source vs deployed files. + * + * @param component - Sub-directory to compare (default: autobot-slm-backend). + */ + async function fetchDrift(component = 'autobot-slm-backend'): Promise { + loading.value = true + error.value = null + + try { + const response = await client.get('/code-sync/drift', { + params: { component }, + }) + driftReport.value = response.data + return response.data + } catch (e) { + error.value = e instanceof Error ? e.message : 'Failed to fetch drift report' + if (axios.isAxiosError(e) && e.response?.data?.detail) { + error.value = e.response.data.detail + } + return null + } finally { + loading.value = false + } + } + // ============================================================================= // Return Public API // ============================================================================= @@ -613,6 +661,7 @@ export function useCodeSync() { loading: readonly(loading), error: readonly(error), lastRefresh: readonly(lastRefresh), + driftReport: readonly(driftReport), // Issue #2834 // Computed hasOutdatedNodes, @@ -650,5 +699,8 @@ export function useCodeSync() { fetchRoles: rolesComposable.fetchRoles, syncRole: rolesComposable.syncRole, pullFromSource: rolesComposable.pullFromSource, + + // Drift detection (Issue #2834) + fetchDrift, } } diff --git a/autobot-slm-frontend/src/views/CodeSyncView.vue b/autobot-slm-frontend/src/views/CodeSyncView.vue index 07fd7ba5e..60fe0e0af 100644 --- a/autobot-slm-frontend/src/views/CodeSyncView.vue +++ b/autobot-slm-frontend/src/views/CodeSyncView.vue @@ -18,6 +18,7 @@ import { type SyncOptions, type UpdateSchedule, type ScheduleCreateRequest, + type FileDriftReport, } from '@/composables/useCodeSync' import { createLogger } from '@/utils/debugUtils' import { formatDateTime } from '@/composables/useTimezone' @@ -61,6 +62,11 @@ const codeSourceComposable = useCodeSource() const codeSourceData = codeSourceComposable.codeSource const showCodeSourceModal = ref(false) +// Drift detection state (Issue #2834) +const driftReport = ref(null) +const isDriftLoading = ref(false) +const showDriftDetails = ref(false) + // ============================================================================= // Computed Properties // ============================================================================= @@ -348,6 +354,19 @@ function describeCron(expression: string): string { } // ============================================================================= +// Drift Detection (Issue #2834) +// ============================================================================= + +async function handleCheckDrift(): Promise { + isDriftLoading.value = true + const result = await codeSync.fetchDrift() + if (result) { + driftReport.value = result + showDriftDetails.value = true + } + isDriftLoading.value = false +} + // ============================================================================= // Lifecycle // ============================================================================= @@ -542,6 +561,125 @@ onUnmounted(() => { + +
+
+
+

File Drift Check

+

+ Compare checksums between code_source and deployed files to detect manual patches. +

+
+ +
+ + +
+
+ + + + + {{ driftReport.drifted_files.length }} drifted file{{ driftReport.drifted_files.length !== 1 ? 's' : '' }} detected + + + + + + No drift detected ({{ driftReport.total_compared }} files compared) + + + Checked {{ formatDate(driftReport.checked_at) }} + +
+ +
+ Source: {{ driftReport.source_dir }} +  →  + Deployed: {{ driftReport.deployed_dir }} +
+ + + + + +
+ + + + + + + + + + + + + + + + + +
StatusFileSource SHA-256Deployed SHA-256
+ + {{ file.status === 'modified' ? 'Modified' : file.status === 'source_only' ? 'Source only' : 'Deployed only' }} + + {{ file.path }} + {{ file.source_checksum ? file.source_checksum.substring(0, 16) + '...' : '—' }} + + {{ file.deployed_checksum ? file.deployed_checksum.substring(0, 16) + '...' : '—' }} +
+
+
+ + +
+ Click "Check Drift" to compare file checksums between the code source and deployed directories. +
+
+
Date: Sat, 4 Apr 2026 00:38:19 +0300 Subject: [PATCH 2/3] fix(slm): add component allowlist to prevent path traversal in /drift endpoint (#3427) - Add ALLOWED_COMPONENTS frozenset in drift_checker.py - Validate component param against allowlist in get_file_drift(); raise HTTP 400 on mismatch Co-Authored-By: Claude Sonnet 4.6 --- autobot-slm-backend/api/code_sync.py | 8 ++++++++ autobot-slm-backend/services/drift_checker.py | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/autobot-slm-backend/api/code_sync.py b/autobot-slm-backend/api/code_sync.py index f74c8a8e4..0f5bb7063 100644 --- a/autobot-slm-backend/api/code_sync.py +++ b/autobot-slm-backend/api/code_sync.py @@ -52,6 +52,7 @@ from services.code_distributor import get_code_distributor from services.database import get_db from services.drift_checker import ( + ALLOWED_COMPONENTS, build_drift_report, get_default_deployed_dir, get_default_source_dir, @@ -415,9 +416,16 @@ async def get_file_drift( Query params: component: Sub-directory to compare (default: autobot-slm-backend). + Must be one of the allowed components (Issue #3427). Returns a FileDriftReport with a list of drifted files and their checksums. """ + if component not in ALLOWED_COMPONENTS: + raise HTTPException( + status_code=400, + detail=f"Invalid component '{component}'. Must be one of: {sorted(ALLOWED_COMPONENTS)}", + ) + source_dir = get_default_source_dir(component) deployed_dir = get_default_deployed_dir(component) diff --git a/autobot-slm-backend/services/drift_checker.py b/autobot-slm-backend/services/drift_checker.py index b1ae56dbc..f295e973d 100644 --- a/autobot-slm-backend/services/drift_checker.py +++ b/autobot-slm-backend/services/drift_checker.py @@ -20,6 +20,17 @@ # File extensions that are meaningful to compare. _INCLUDE_EXTENSIONS = {".py", ".cfg", ".ini", ".toml", ".yaml", ".yml", ".sh", ".txt"} +# Permitted component names for the /drift endpoint (Issue #3427). +# Only these sub-directories may be requested to prevent path traversal. +ALLOWED_COMPONENTS = frozenset( + { + "autobot-slm-backend", + "autobot-slm-frontend", + "autobot-backend", + "autobot-frontend", + } +) + # Directory names to skip entirely during traversal. _SKIP_DIRS = { "__pycache__", From 325a48b271975a75b62ba140be8b5193f58fcd1f Mon Sep 17 00:00:00 2001 From: mrveiss Date: Sat, 4 Apr 2026 00:42:06 +0300 Subject: [PATCH 3/3] fix(slm): address code review findings for drift detection (#2834) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use Literal["modified","source_only","deployed_only"] in DriftedFile.status - Raise ValueError (surfaced as HTTP 500) in get_default_source_dir when component dir is missing, rather than silently falling back to monorepo root - Wrap get_file_drift source_dir resolution in try/except ValueError → HTTP 500 - Add try/finally to handleCheckDrift in CodeSyncView so isDriftLoading resets on error Co-Authored-By: Claude Sonnet 4.6 --- autobot-slm-backend/api/code_sync.py | 5 ++++- autobot-slm-backend/models/schemas.py | 4 ++-- autobot-slm-backend/services/drift_checker.py | 6 +++++- autobot-slm-frontend/src/views/CodeSyncView.vue | 13 ++++++++----- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/autobot-slm-backend/api/code_sync.py b/autobot-slm-backend/api/code_sync.py index 0f5bb7063..7ec279451 100644 --- a/autobot-slm-backend/api/code_sync.py +++ b/autobot-slm-backend/api/code_sync.py @@ -426,7 +426,10 @@ async def get_file_drift( detail=f"Invalid component '{component}'. Must be one of: {sorted(ALLOWED_COMPONENTS)}", ) - source_dir = get_default_source_dir(component) + try: + source_dir = get_default_source_dir(component) + except ValueError as exc: + raise HTTPException(status_code=500, detail=str(exc)) from exc deployed_dir = get_default_deployed_dir(component) logger.info( diff --git a/autobot-slm-backend/models/schemas.py b/autobot-slm-backend/models/schemas.py index c58b50a53..a3b167814 100644 --- a/autobot-slm-backend/models/schemas.py +++ b/autobot-slm-backend/models/schemas.py @@ -9,7 +9,7 @@ from datetime import datetime from enum import Enum -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Literal, Optional from pydantic import BaseModel, Field, field_validator, model_validator @@ -1563,7 +1563,7 @@ class DriftedFile(BaseModel): path: str source_checksum: Optional[str] = None deployed_checksum: Optional[str] = None - status: str # "modified" | "source_only" | "deployed_only" + status: Literal["modified", "source_only", "deployed_only"] class FileDriftReport(BaseModel): diff --git a/autobot-slm-backend/services/drift_checker.py b/autobot-slm-backend/services/drift_checker.py index f295e973d..859320920 100644 --- a/autobot-slm-backend/services/drift_checker.py +++ b/autobot-slm-backend/services/drift_checker.py @@ -219,4 +219,8 @@ def get_default_source_dir(component: str = "autobot-slm-backend") -> str: """ source_root = os.environ.get("SLM_REPO_PATH", "/opt/autobot/code_source") candidate = Path(source_root) / component - return str(candidate) if candidate.is_dir() else source_root + if not candidate.is_dir(): + raise ValueError( + f"drift_checker: source component directory does not exist: {candidate}" + ) + return str(candidate) diff --git a/autobot-slm-frontend/src/views/CodeSyncView.vue b/autobot-slm-frontend/src/views/CodeSyncView.vue index 60fe0e0af..46b858881 100644 --- a/autobot-slm-frontend/src/views/CodeSyncView.vue +++ b/autobot-slm-frontend/src/views/CodeSyncView.vue @@ -359,12 +359,15 @@ function describeCron(expression: string): string { async function handleCheckDrift(): Promise { isDriftLoading.value = true - const result = await codeSync.fetchDrift() - if (result) { - driftReport.value = result - showDriftDetails.value = true + try { + const result = await codeSync.fetchDrift() + if (result) { + driftReport.value = result + showDriftDetails.value = true + } + } finally { + isDriftLoading.value = false } - isDriftLoading.value = false } // =============================================================================