mrveiss · mrveiss · Apr 3, 2026 · Mar 30, 2026 · Apr 3, 2026 · Apr 3, 2026
@@ -34,6 +34,7 @@
     CodeSyncStatusResponse,
     CodeVersionNotification,
     CodeVersionNotificationResponse,
+    FileDriftReport,
     FleetSyncJobStatus,
     FleetSyncNodeStatus,
     FleetSyncRequest,
@@ -50,6 +51,12 @@
 from services.auth import get_current_user
 from services.code_distributor import get_code_distributor
 from services.database import get_db
+from services.drift_checker import (
+    ALLOWED_COMPONENTS,
+    build_drift_report,
+    get_default_deployed_dir,
+    get_default_source_dir,
+)
 from services.fleet_sync_guard import assert_no_running_sync, fleet_sync_lock
 from services.git_tracker import DEFAULT_BRANCH, DEFAULT_REPO_PATH, get_git_tracker
 from services.playbook_executor import get_playbook_executor
@@ -395,6 +402,56 @@ async def get_sync_status(
     )
 
 
+@router.get("/drift", response_model=FileDriftReport)
+async def get_file_drift(
+    _: Annotated[dict, Depends(get_current_user)],
+    component: str = "autobot-slm-backend",
+) -> FileDriftReport:
+    """
+    Compare file checksums between code_source and the deployed directory (Issue #2834).
+
+    Detects files that have drifted due to manual patches or incomplete Ansible deploys.
+    Only Python, config, and script files are compared; .pyc, __pycache__, venv, and
+    .git directories are always excluded.
+
+    Query params:
+        component: Sub-directory to compare (default: autobot-slm-backend).
+                   Must be one of the allowed components (Issue #3427).
+
+    Returns a FileDriftReport with a list of drifted files and their checksums.
+    """
+    if component not in ALLOWED_COMPONENTS:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid component '{component}'. Must be one of: {sorted(ALLOWED_COMPONENTS)}",
+        )
+
+    try:
+        source_dir = get_default_source_dir(component)
+    except ValueError as exc:
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+    deployed_dir = get_default_deployed_dir(component)
+
+    logger.info(
+        "drift check: comparing source=%s deployed=%s", source_dir, deployed_dir
+    )
+
+    report = await asyncio.get_running_loop().run_in_executor(
+        None,
+        build_drift_report,
+        source_dir,
+        deployed_dir,
+    )
+
+    logger.info(
+        "drift check: %d drifted files out of %d compared",
+        len(report["drifted_files"]),
+        report["total_compared"],
+    )
+
+    return FileDriftReport(**report)
+
+
 @router.post("/refresh", response_model=CodeSyncRefreshResponse)
 async def refresh_version(
     db: Annotated[AsyncSession, Depends(get_db)],

@@ -9,7 +9,7 @@
 
 from datetime import datetime
 from enum import Enum
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Literal, Optional
 
 from pydantic import BaseModel, Field, field_validator, model_validator
 
@@ -1557,6 +1557,26 @@ class CodeSyncRefreshResponse(BaseModel):
     has_update: bool = False
 
 
+class DriftedFile(BaseModel):
+    """A file whose checksum differs between code_source and deployed (Issue #2834)."""
+
+    path: str
+    source_checksum: Optional[str] = None
+    deployed_checksum: Optional[str] = None
+    status: Literal["modified", "source_only", "deployed_only"]
+
+
+class FileDriftReport(BaseModel):
+    """Result of comparing code_source vs deployed file checksums (Issue #2834)."""
+
+    source_dir: str
+    deployed_dir: str
+    drifted_files: list[DriftedFile]
+    total_compared: int
+    drift_detected: bool
+    checked_at: str
+
+
 class PendingNodeResponse(BaseModel):
     """Node that needs code update."""
 

@@ -0,0 +1,226 @@
+# AutoBot - AI-Powered Automation Platform
+# Copyright (c) 2025 mrveiss
+# Author: mrveiss
+"""
+Drift Checker Service (Issue #2834).
+
+Compares file checksums between the code_source directory and the deployed
+directory to detect files that have been manually patched or missed by Ansible.
+"""
+
+import hashlib
+import logging
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+logger = logging.getLogger(__name__)
+
+# File extensions that are meaningful to compare.
+_INCLUDE_EXTENSIONS = {".py", ".cfg", ".ini", ".toml", ".yaml", ".yml", ".sh", ".txt"}
+
+# Permitted component names for the /drift endpoint (Issue #3427).
+# Only these sub-directories may be requested to prevent path traversal.
+ALLOWED_COMPONENTS = frozenset(
+    {
+        "autobot-slm-backend",
+        "autobot-slm-frontend",
+        "autobot-backend",
+        "autobot-frontend",
+    }
+)
+
+# Directory names to skip entirely during traversal.
+_SKIP_DIRS = {
+    "__pycache__",
+    ".git",
+    "venv",
+    ".venv",
+    "node_modules",
+    ".mypy_cache",
+    ".ruff_cache",
+    "dist",
+    "build",
+}
+
+
+def _file_checksum(path: Path, block_size: int = 65536) -> str:
+    """Return the SHA-256 hex digest of a file.
+
+    Reads in blocks to avoid loading large files into memory at once.
+
+    Args:
+        path: Absolute path to the file.
+        block_size: Read chunk size in bytes.
+
+    Returns:
+        Lowercase hex SHA-256 digest string.
+    """
+    h = hashlib.sha256()
+    with open(path, "rb") as fh:
+        while chunk := fh.read(block_size):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def _collect_checksums(root: Path) -> Dict[str, str]:
+    """Walk *root* and return a mapping of relative-path → SHA-256 checksum.
+
+    Only files with extensions in ``_INCLUDE_EXTENSIONS`` are included.
+    Directories in ``_SKIP_DIRS`` are pruned from the walk.
+
+    Args:
+        root: Directory to scan.
+
+    Returns:
+        Dict mapping POSIX-style relative path strings to hex digest strings.
+    """
+    checksums: Dict[str, str] = {}
+    for dirpath, dirnames, filenames in os.walk(root):
+        # Prune skip dirs in-place so os.walk does not descend into them.
+        dirnames[:] = [d for d in dirnames if d not in _SKIP_DIRS]
+
+        for filename in filenames:
+            filepath = Path(dirpath) / filename
+            if filepath.suffix not in _INCLUDE_EXTENSIONS:
+                continue
+            try:
+                rel = filepath.relative_to(root).as_posix()
+                checksums[rel] = _file_checksum(filepath)
+            except (OSError, IOError) as exc:
+                logger.warning("drift_checker: cannot read %s: %s", filepath, exc)
+
+    return checksums
+
+
+def compute_drift(
+    source_dir: str,
+    deployed_dir: str,
+) -> Tuple[List[dict], int]:
+    """Compare file checksums between *source_dir* and *deployed_dir*.
+
+    Returns a tuple of (drifted_file_dicts, total_compared_count).
+
+    Each drifted file dict has keys:
+        path            – POSIX relative path
+        source_checksum – SHA-256 of the source file (None if absent)
+        deployed_checksum – SHA-256 of the deployed file (None if absent)
+        status          – "modified" | "source_only" | "deployed_only"
+
+    Files that exist in both directories with identical checksums are not
+    included in the returned list.
+
+    Args:
+        source_dir: Absolute path to the authoritative code source directory.
+        deployed_dir: Absolute path to the currently deployed directory.
+
+    Returns:
+        Tuple of (list_of_drift_dicts, total_files_compared).
+    """
+    src_path = Path(source_dir)
+    dep_path = Path(deployed_dir)
+
+    if not src_path.is_dir():
+        logger.warning("drift_checker: source_dir does not exist: %s", source_dir)
+        return [], 0
+
+    if not dep_path.is_dir():
+        logger.warning("drift_checker: deployed_dir does not exist: %s", deployed_dir)
+        return [], 0
+
+    src_checksums = _collect_checksums(src_path)
+    dep_checksums = _collect_checksums(dep_path)
+
+    all_paths = set(src_checksums) | set(dep_checksums)
+    total_compared = len(all_paths)
+    drifted: List[dict] = []
+
+    for rel_path in sorted(all_paths):
+        src_cs = src_checksums.get(rel_path)
+        dep_cs = dep_checksums.get(rel_path)
+
+        if src_cs == dep_cs:
+            # Both present and identical — no drift.
+            continue
+
+        if src_cs is None:
+            status = "deployed_only"
+        elif dep_cs is None:
+            status = "source_only"
+        else:
+            status = "modified"
+
+        drifted.append(
+            {
+                "path": rel_path,
+                "source_checksum": src_cs,
+                "deployed_checksum": dep_cs,
+                "status": status,
+            }
+        )
+
+    return drifted, total_compared
+
+
+def build_drift_report(
+    source_dir: str,
+    deployed_dir: str,
+) -> dict:
+    """Build the full drift report dict for the API response (Issue #2834).
+
+    Args:
+        source_dir: Path to code_source directory.
+        deployed_dir: Path to deployed component directory.
+
+    Returns:
+        Dict matching the ``FileDriftReport`` schema.
+    """
+    drifted, total = compute_drift(source_dir, deployed_dir)
+
+    return {
+        "source_dir": source_dir,
+        "deployed_dir": deployed_dir,
+        "drifted_files": drifted,
+        "total_compared": total,
+        "drift_detected": len(drifted) > 0,
+        "checked_at": datetime.now(timezone.utc).isoformat(),
+    }
+
+
+def get_default_deployed_dir(component: str = "autobot-slm-backend") -> str:
+    """Return the expected deployed path for *component* under /opt/autobot.
+
+    Reads ``SLM_DEPLOYED_ROOT`` from the environment so the path is
+    configurable without hardcoding.
+
+    Args:
+        component: Sub-directory name under the deployed root.
+
+    Returns:
+        Absolute path string for the deployed component directory.
+    """
+    deployed_root = os.environ.get("SLM_DEPLOYED_ROOT", "/opt/autobot")
+    return str(Path(deployed_root) / component)
+
+
+def get_default_source_dir(component: str = "autobot-slm-backend") -> str:
+    """Return the code_source sub-directory for *component*.
+
+    Reads ``SLM_REPO_PATH`` from the environment (same var used by git_tracker).
+    Falls back to the repository root when the component sub-directory does not
+    exist yet (e.g. monorepo roots that serve as the authoritative source).
+
+    Args:
+        component: Sub-directory name inside the code_source repository.
+
+    Returns:
+        Absolute path string for the source directory to compare against.
+    """
+    source_root = os.environ.get("SLM_REPO_PATH", "/opt/autobot/code_source")
+    candidate = Path(source_root) / component
+    if not candidate.is_dir():
+        raise ValueError(
+            f"drift_checker: source component directory does not exist: {candidate}"
+        )
+    return str(candidate)