From ddcf6604fa0134783922b91c9e6a3c175f25b193 Mon Sep 17 00:00:00 2001
From: Maurice Wen <mauricewen@MauricedeMacBook-Pro.local>
Date: Wed, 25 Mar 2026 09:06:24 +0800
Subject: [PATCH 1/4] feat(cdp): CDP-First routing for browser apps + 11 query
 strategies

For Chrome/Electron apps, try CDP before the expensive AX tree walk.
This reduces web element find latency from ~11s to ~50ms for apps like
Gmail where Chrome exposes everything as AXGroup.

Changes:
- CDPBridge: target cache (1s TTL), isBrowserApp() detection (16 apps),
  6 new JS query strategies (CSS selector, data-testid, role+text,
  nearest-input, Shadow DOM pierce, fuzzy Levenshtein)
- Perception: CDP-First routing for browser apps, AX-First unchanged
  for native apps. Zero breaking changes.
- Tests: 8 new CDPBridge tests (13/13 total pass)
---
 Sources/GhostOS/Perception/Perception.swift |  34 +++-
 Sources/GhostOS/Vision/CDPBridge.swift      | 167 ++++++++++++++++++--
 Tests/GhostOSTests/CDPBridgeTests.swift     |  67 ++++++++
 3 files changed, 255 insertions(+), 13 deletions(-)
 create mode 100644 Tests/GhostOSTests/CDPBridgeTests.swift

diff --git a/Sources/GhostOS/Perception/Perception.swift b/Sources/GhostOS/Perception/Perception.swift
index b3d0666..668dcb2 100644
--- a/Sources/GhostOS/Perception/Perception.swift
+++ b/Sources/GhostOS/Perception/Perception.swift
@@ -137,7 +137,32 @@ public enum Perception {
             )
         }
 
-        // Strategy 2: AXorcist's search with ElementSearchOptions
+        // Strategy 2 (CDP-First): For Chrome/Electron apps, try CDP BEFORE
+        // the expensive AX tree walk. CDP queries the real DOM via JavaScript
+        // and returns in ~50ms vs ~11s for a full AX tree walk on web apps.
+        //
+        // Routing: browser app → CDP first → AX fallback
+        //          native app  → AX first  → CDP fallback (existing behavior)
+        if CDPBridge.isBrowserApp(appName), let query, identifier == nil {
+            if let cdpResults = cdpFallbackFind(query: query, appName: appName) {
+                Log.info("CDP-First: found \(cdpResults.count) elements for '\(query)' (skipped AX tree walk)")
+                return ToolResult(
+                    success: true,
+                    data: [
+                        "elements": cdpResults,
+                        "count": cdpResults.count,
+                        "total_matches": cdpResults.count,
+                        "source": "cdp-first",
+                    ],
+                    suggestion: "Elements found via Chrome DevTools Protocol (CDP-First path). " +
+                                "Use ghost_click with the x/y coordinates shown in the position field."
+                )
+            }
+            // CDP miss — fall through to AX tree walk
+            Log.debug("CDP-First: no results for '\(query)', falling through to AX tree")
+        }
+
+        // Strategy 3: AXorcist's search with ElementSearchOptions
         var options = ElementSearchOptions()
         options.maxDepth = maxDepth
         options.caseInsensitive = true
@@ -162,9 +187,10 @@ public enum Perception {
             results = semanticDepthSearch(query: query, role: role, in: searchRoot, maxDepth: maxDepth)
         }
 
-        // CDP fallback: if AX search found nothing and we're in Chrome/Electron,
-        // try Chrome DevTools Protocol for instant DOM-based element finding.
-        if results.isEmpty, let query {
+        // CDP fallback (for native apps): if AX search found nothing,
+        // try Chrome DevTools Protocol as last resort before vision.
+        // For browser apps this was already tried above (CDP-First path).
+        if results.isEmpty, let query, !CDPBridge.isBrowserApp(appName) {
             if let cdpResults = cdpFallbackFind(query: query, appName: appName) {
                 return ToolResult(
                     success: true,
diff --git a/Sources/GhostOS/Vision/CDPBridge.swift b/Sources/GhostOS/Vision/CDPBridge.swift
index b61cfde..6ef7cb7 100644
--- a/Sources/GhostOS/Vision/CDPBridge.swift
+++ b/Sources/GhostOS/Vision/CDPBridge.swift
@@ -41,6 +41,39 @@ public enum CDPBridge {
     /// cases where Chrome is hung or the WebSocket connection is stale.
     private static let wsTimeout: TimeInterval = 3.0
 
+    // MARK: - Target Cache
+
+    /// Cached debug targets to avoid repeated HTTP calls within a single
+    /// findElements invocation chain. Cache is very short-lived (1 second)
+    /// since tabs can open/close at any time.
+    private static let targetCacheTTL: TimeInterval = 1.0
+    private nonisolated(unsafe) static var cachedTargets: [[String: Any]]?
+    private nonisolated(unsafe) static var cachedTargetsTime: Date?
+
+    // MARK: - Browser App Detection
+
+    /// Known browser/Electron app names that expose DOM via CDP.
+    /// Used by Perception to decide whether to try CDP before AX tree walk.
+    private static let browserAppNames = [
+        "Google Chrome", "Chrome", "Chromium", "Arc", "Arc Browser",
+        "Microsoft Edge", "Brave Browser", "Vivaldi", "Opera",
+        // Electron apps (use Chrome's engine, expose CDP when debug port is open)
+        "Slack", "Discord", "Visual Studio Code", "Code",
+        "Figma", "Notion", "Obsidian", "Cursor",
+    ]
+
+    /// Check if an app name corresponds to a Chrome/Electron browser.
+    /// Used by Perception.findElements() to decide routing:
+    ///   - Browser app → CDP-First path (try CDP before AX tree walk)
+    ///   - Native app  → AX-First path (existing behavior, unchanged)
+    ///
+    /// False positives are safe: CDP will simply return nil and fall through.
+    /// False negatives cost ~11s per query (full AX tree walk before CDP).
+    public static func isBrowserApp(_ name: String?) -> Bool {
+        guard let name else { return false }
+        return browserAppNames.contains(where: { name.localizedCaseInsensitiveContains($0) })
+    }
+
     // MARK: - Availability Check
 
     /// Check if Chrome is running with remote debugging enabled.
@@ -49,7 +82,17 @@ public enum CDPBridge {
     }
 
     /// Get the list of debuggable Chrome tabs.
+    /// Uses a 1-second cache to avoid repeated HTTP calls during a single
+    /// ghost_find → ghost_click sequence.
     public static func getDebugTargets() -> [[String: Any]]? {
+        // Return cached targets if fresh enough
+        if let cached = cachedTargets,
+           let time = cachedTargetsTime,
+           Date().timeIntervalSince(time) < targetCacheTTL
+        {
+            return cached
+        }
+
         guard let url = URL(string: "http://127.0.0.1:\(defaultPort)/json") else {
             return nil
         }
@@ -77,9 +120,15 @@ public enum CDPBridge {
               let data = box.data,
               let targets = try? JSONSerialization.jsonObject(with: data) as? [[String: Any]]
         else {
+            cachedTargets = nil
+            cachedTargetsTime = nil
             return nil
         }
 
+        // Update cache
+        cachedTargets = targets
+        cachedTargetsTime = Date()
+
         return targets
     }
 
@@ -91,6 +140,19 @@ public enum CDPBridge {
     ///
     /// This is dramatically faster than AX tree walking for web apps
     /// (~50ms vs ~11s for Gmail).
+    ///
+    /// Search strategies (executed in order, results deduplicated):
+    ///   1. CSS Selector — direct query if input looks like a selector (#id, .class, tag)
+    ///   2. data-testid — React/Vue test attribute match
+    ///   3. aria-label — ARIA label match (existing)
+    ///   4. placeholder — input placeholder match (existing)
+    ///   5. role + text — ARIA role with text content match
+    ///   6. button/link text — text content of interactive elements (existing)
+    ///   7. input labels — label[for] association (existing)
+    ///   8. title/alt — title or alt attribute match (existing)
+    ///   9. nearest-input — find label text, return nearest input/textarea
+    ///  10. Shadow DOM — pierce open shadow roots
+    ///  11. fuzzy text — Levenshtein distance <= 2 for typo tolerance
     public static func findElements(
         query: String,
         tabIndex: Int = 0
@@ -107,8 +169,8 @@ public enum CDPBridge {
             return nil
         }
 
-        // JavaScript that finds elements by text content, aria-label, placeholder, etc.
-        // Returns an array of {text, tag, role, x, y, width, height} objects.
+        // JavaScript that finds elements using 11 strategies.
+        // Returns an array of {text, tag, role, x, y, width, height, ...} objects.
         let js = """
         (function() {
             const query = \(escapeJSString(query));
@@ -125,12 +187,14 @@ public enum CDPBridge {
                 if (seen.has(key)) return;
                 seen.add(key);
 
+                const dataTestId = el.getAttribute('data-testid') || el.getAttribute('data-test-id') || '';
                 results.push({
                     text: (el.textContent || '').trim().substring(0, 100),
                     tag: el.tagName.toLowerCase(),
                     role: el.getAttribute('role') || '',
                     ariaLabel: el.getAttribute('aria-label') || '',
                     id: el.id || '',
+                    dataTestId: dataTestId,
                     className: (el.className || '').toString().substring(0, 100),
                     x: Math.round(rect.x),
                     y: Math.round(rect.y),
@@ -143,33 +207,59 @@ public enum CDPBridge {
                                 el.getAttribute('role') === 'button' ||
                                 el.getAttribute('role') === 'link' ||
                                 el.getAttribute('role') === 'textbox' ||
+                                el.getAttribute('role') === 'combobox' ||
+                                el.getAttribute('role') === 'menuitem' ||
                                 el.onclick !== null ||
-                                el.getAttribute('tabindex') !== null
+                                el.getAttribute('tabindex') !== null ||
+                                window.getComputedStyle(el).cursor === 'pointer'
                 });
             }
 
-            // Strategy 1: aria-label match
+            // Strategy 1: CSS Selector — if query starts with #, ., or contains []
+            if (/^[#.[]/.test(query) || /\\w+\\[/.test(query)) {
+                try {
+                    document.querySelectorAll(query).forEach(el => addResult(el, 'css-selector'));
+                } catch(e) { /* invalid selector, skip */ }
+            }
+
+            // Strategy 2: data-testid match (React/Vue/Angular test attributes)
+            document.querySelectorAll('[data-testid], [data-test-id]').forEach(el => {
+                const tid = (el.getAttribute('data-testid') || el.getAttribute('data-test-id') || '').toLowerCase();
+                if (tid.includes(queryLower)) {
+                    addResult(el, 'data-testid');
+                }
+            });
+
+            // Strategy 3: aria-label match
             document.querySelectorAll('[aria-label]').forEach(el => {
                 if (el.getAttribute('aria-label').toLowerCase().includes(queryLower)) {
                     addResult(el, 'aria-label');
                 }
             });
 
-            // Strategy 2: placeholder match
+            // Strategy 4: placeholder match
             document.querySelectorAll('[placeholder]').forEach(el => {
                 if (el.getAttribute('placeholder').toLowerCase().includes(queryLower)) {
                     addResult(el, 'placeholder');
                 }
             });
 
-            // Strategy 3: button/link text content match
-            document.querySelectorAll('button, a, [role="button"], [role="link"], [role="tab"]').forEach(el => {
+            // Strategy 5: role + aria-label/text combo (ARIA widgets)
+            document.querySelectorAll('[role]').forEach(el => {
+                const label = el.getAttribute('aria-label') || el.textContent || '';
+                if (label.toLowerCase().includes(queryLower)) {
+                    addResult(el, 'role-text');
+                }
+            });
+
+            // Strategy 6: button/link text content match
+            document.querySelectorAll('button, a, [role="button"], [role="link"], [role="tab"], [role="menuitem"]').forEach(el => {
                 if ((el.textContent || '').toLowerCase().includes(queryLower)) {
                     addResult(el, 'text-content');
                 }
             });
 
-            // Strategy 4: input labels
+            // Strategy 7: input labels
             document.querySelectorAll('label').forEach(label => {
                 if ((label.textContent || '').toLowerCase().includes(queryLower)) {
                     const forId = label.getAttribute('for');
@@ -180,7 +270,7 @@ public enum CDPBridge {
                 }
             });
 
-            // Strategy 5: title/alt attribute match
+            // Strategy 8: title/alt attribute match
             document.querySelectorAll('[title], [alt]').forEach(el => {
                 const t = (el.getAttribute('title') || el.getAttribute('alt') || '').toLowerCase();
                 if (t.includes(queryLower)) {
@@ -188,6 +278,65 @@ public enum CDPBridge {
                 }
             });
 
+            // Strategy 9: nearest-input — find text, return the closest input/textarea
+            if (results.length === 0) {
+                const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null);
+                while (walker.nextNode()) {
+                    if (walker.currentNode.textContent.toLowerCase().includes(queryLower)) {
+                        let parent = walker.currentNode.parentElement;
+                        for (let i = 0; i < 5 && parent; i++) {
+                            const input = parent.querySelector('input, textarea, select, [contenteditable="true"]');
+                            if (input) { addResult(input, 'nearest-input'); break; }
+                            parent = parent.parentElement;
+                        }
+                    }
+                }
+            }
+
+            // Strategy 10: Shadow DOM — pierce open shadow roots (Web Components)
+            if (results.length === 0) {
+                function searchShadow(root) {
+                    root.querySelectorAll('*').forEach(el => {
+                        if (el.shadowRoot) {
+                            el.shadowRoot.querySelectorAll('[aria-label], button, a, [role="button"], input').forEach(inner => {
+                                const label = inner.getAttribute('aria-label') || inner.textContent || '';
+                                if (label.toLowerCase().includes(queryLower)) {
+                                    addResult(inner, 'shadow-dom');
+                                }
+                            });
+                            searchShadow(el.shadowRoot);
+                        }
+                    });
+                }
+                searchShadow(document);
+            }
+
+            // Strategy 11: fuzzy text match (Levenshtein distance <= 2)
+            if (results.length === 0 && query.length >= 3) {
+                function levenshtein(a, b) {
+                    const m = a.length, n = b.length;
+                    if (Math.abs(m - n) > 2) return 3;
+                    const d = Array.from({length: m + 1}, (_, i) => [i]);
+                    for (let j = 1; j <= n; j++) d[0][j] = j;
+                    for (let i = 1; i <= m; i++)
+                        for (let j = 1; j <= n; j++)
+                            d[i][j] = Math.min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+(a[i-1]!==b[j-1]?1:0));
+                    return d[m][n];
+                }
+                document.querySelectorAll('button, a, [role="button"], [role="link"], input, [role="tab"]').forEach(el => {
+                    const text = (el.getAttribute('aria-label') || el.textContent || '').trim().toLowerCase();
+                    if (text.length > 0 && text.length < 50) {
+                        const words = text.split(/\\s+/);
+                        for (const word of words) {
+                            if (levenshtein(queryLower, word) <= 2) {
+                                addResult(el, 'fuzzy-text');
+                                break;
+                            }
+                        }
+                    }
+                });
+            }
+
             return results.slice(0, 20);
         })();
         """
diff --git a/Tests/GhostOSTests/CDPBridgeTests.swift b/Tests/GhostOSTests/CDPBridgeTests.swift
new file mode 100644
index 0000000..94f299f
--- /dev/null
+++ b/Tests/GhostOSTests/CDPBridgeTests.swift
@@ -0,0 +1,67 @@
+// CDPBridgeTests.swift - Unit tests for CDPBridge enhancements
+
+import Testing
+@testable import GhostOS
+
+@Suite("CDPBridge Tests")
+struct CDPBridgeTests {
+
+    // MARK: - isBrowserApp detection
+
+    @Test("Detects Google Chrome as browser app")
+    func chromeDetection() {
+        #expect(CDPBridge.isBrowserApp("Google Chrome") == true)
+    }
+
+    @Test("Detects Arc as browser app")
+    func arcDetection() {
+        #expect(CDPBridge.isBrowserApp("Arc") == true)
+    }
+
+    @Test("Detects Electron apps as browser app")
+    func electronDetection() {
+        #expect(CDPBridge.isBrowserApp("Slack") == true)
+        #expect(CDPBridge.isBrowserApp("Visual Studio Code") == true)
+        #expect(CDPBridge.isBrowserApp("Discord") == true)
+    }
+
+    @Test("Does not detect native apps as browser app")
+    func nativeAppDetection() {
+        #expect(CDPBridge.isBrowserApp("Finder") == false)
+        #expect(CDPBridge.isBrowserApp("Mail") == false)
+        #expect(CDPBridge.isBrowserApp("Preview") == false)
+        #expect(CDPBridge.isBrowserApp("Terminal") == false)
+        #expect(CDPBridge.isBrowserApp("Safari") == false)  // Safari has no CDP
+    }
+
+    @Test("Handles nil app name gracefully")
+    func nilAppName() {
+        #expect(CDPBridge.isBrowserApp(nil) == false)
+    }
+
+    @Test("Case insensitive browser detection")
+    func caseInsensitive() {
+        #expect(CDPBridge.isBrowserApp("google chrome") == true)
+        #expect(CDPBridge.isBrowserApp("GOOGLE CHROME") == true)
+        #expect(CDPBridge.isBrowserApp("Microsoft Edge") == true)
+    }
+
+    // MARK: - CDP availability (safe to run without Chrome)
+
+    @Test("isAvailable returns false when Chrome debug port is not open")
+    func availabilityWithoutChrome() {
+        // This test is safe: if Chrome isn't running with --remote-debugging-port=9222,
+        // isAvailable() should return false quickly (connection refused).
+        // If Chrome IS running with debug port, it returns true — both are valid.
+        let result = CDPBridge.isAvailable()
+        // We just verify it doesn't crash or hang
+        #expect(result == true || result == false)
+    }
+
+    @Test("getDebugTargets returns nil when Chrome debug port is not open")
+    func debugTargetsWithoutChrome() {
+        // Same as above: graceful nil when Chrome debug port isn't available
+        let targets = CDPBridge.getDebugTargets()
+        #expect(targets == nil || targets != nil)
+    }
+}

From 516b85c50b5d3fc699d833e7a05a9086ae866be6 Mon Sep 17 00:00:00 2001
From: Maurice Wen <mauricewen@MauricedeMacBook-Pro.local>
Date: Wed, 25 Mar 2026 09:08:10 +0800
Subject: [PATCH 2/4] feat(vision): CDP structured snapshot for
 ghost_parse_screen (Issue #9)

For Chrome/Electron apps, ghost_parse_screen now returns a compact
text-based element list via CDP instead of a full screenshot image.
Token cost drops from ~2000+ to ~100-200 tokens per snapshot.

Output format: [e0] button "Compose" (142, 223) dom:":oq"

This directly addresses Issue #9 (screenshot context overflow) by
providing a structured alternative that avoids base64 image encoding.
Native apps fall through to the existing vision sidecar path unchanged.
---
 Sources/GhostOS/Perception/Perception.swift   |   6 +
 Sources/GhostOS/Vision/VisionPerception.swift | 189 +++++++++++++++++-
 2 files changed, 193 insertions(+), 2 deletions(-)

diff --git a/Sources/GhostOS/Perception/Perception.swift b/Sources/GhostOS/Perception/Perception.swift
index 668dcb2..5ee1ec7 100644
--- a/Sources/GhostOS/Perception/Perception.swift
+++ b/Sources/GhostOS/Perception/Perception.swift
@@ -805,6 +805,12 @@ public enum Perception {
 
     /// Get Chrome window origin for coordinate conversion.
     private static func chromeWindowOrigin(appName: String?) -> (x: Double, y: Double) {
+        return Self.chromeWindowOriginPublic(appName: appName)
+    }
+
+    /// Public accessor for Chrome window origin. Used by VisionPerception for
+    /// CDP structured snapshots that need viewport-to-screen coordinate mapping.
+    public static func chromeWindowOriginPublic(appName: String?) -> (x: Double, y: Double) {
         let name = appName ?? "Chrome"
         guard let app = findApp(named: name),
               let appElement = Element.application(for: app.processIdentifier),
diff --git a/Sources/GhostOS/Vision/VisionPerception.swift b/Sources/GhostOS/Vision/VisionPerception.swift
index 66e92d5..0121c25 100644
--- a/Sources/GhostOS/Vision/VisionPerception.swift
+++ b/Sources/GhostOS/Vision/VisionPerception.swift
@@ -20,12 +20,29 @@ public enum VisionPerception {
 
     // MARK: - ghost_parse_screen
 
-    /// Detect all interactive UI elements using vision.
-    /// Takes a screenshot and sends it to the vision sidecar for YOLO detection.
+    /// Detect all interactive UI elements using vision or CDP.
+    ///
+    /// For Chrome/Electron apps, uses CDP to enumerate DOM elements directly.
+    /// This is dramatically cheaper than sending a full screenshot to a VLM:
+    ///   - CDP structured output: ~100-200 tokens
+    ///   - Screenshot image: ~2000+ tokens
+    ///
+    /// Falls back to vision sidecar YOLO detection for native apps (not yet implemented).
     public static func parseScreen(
         appName: String?,
         fullResolution: Bool
     ) -> ToolResult {
+        // Strategy 1: CDP structured snapshot for Chrome/Electron apps.
+        // Returns a text-based element list instead of a screenshot image.
+        // This directly addresses Issue #9 (screenshot context overflow).
+        if CDPBridge.isBrowserApp(appName) && CDPBridge.isAvailable() {
+            if let snapshot = cdpStructuredSnapshot(appName: appName) {
+                return snapshot
+            }
+            // CDP miss — fall through to vision
+        }
+
+        // Strategy 2: Vision sidecar (YOLO detection) for native apps.
         // Check sidecar availability
         guard VisionBridge.isAvailable() else {
             return sidecarUnavailableResult(tool: "ghost_parse_screen")
@@ -56,6 +73,174 @@ public enum VisionPerception {
         )
     }
 
+    /// Generate a structured text-based snapshot of Chrome's interactive elements via CDP.
+    /// Returns a compact element list (~100-200 tokens) instead of a screenshot (~2000+ tokens).
+    ///
+    /// Output format per element:
+    ///   [e0] button "Compose" (142, 223) dom:":oq"
+    ///   [e1] input "Search mail" (450, 72) dom:":ol"
+    ///
+    /// This is the CDP equivalent of ghost_annotate but without any image — pure text.
+    private static func cdpStructuredSnapshot(appName: String?) -> ToolResult? {
+        let js = """
+        (function() {
+            const els = [];
+            const selectors = 'a, button, input, select, textarea, [role="button"], [role="link"], ' +
+                              '[role="textbox"], [role="combobox"], [role="tab"], [role="menuitem"], ' +
+                              '[role="checkbox"], [role="radio"], [tabindex], [contenteditable="true"]';
+            document.querySelectorAll(selectors).forEach(el => {
+                const rect = el.getBoundingClientRect();
+                if (rect.width === 0 || rect.height === 0) return;
+                if (rect.bottom < 0 || rect.top > window.innerHeight) return;
+                if (rect.right < 0 || rect.left > window.innerWidth) return;
+                els.push({
+                    tag: el.tagName.toLowerCase(),
+                    role: el.getAttribute('role') || '',
+                    label: el.getAttribute('aria-label') || el.getAttribute('placeholder') ||
+                           el.getAttribute('title') || (el.textContent || '').trim().substring(0, 60),
+                    id: el.id || '',
+                    dataTestId: el.getAttribute('data-testid') || '',
+                    type: el.type || '',
+                    x: Math.round(rect.x + rect.width / 2),
+                    y: Math.round(rect.y + rect.height / 2),
+                    w: Math.round(rect.width),
+                    h: Math.round(rect.height)
+                });
+            });
+            return {
+                url: location.href,
+                title: document.title,
+                elements: els.slice(0, 50),
+                totalOnPage: els.length
+            };
+        })();
+        """
+
+        guard let targets = CDPBridge.getDebugTargets() else { return nil }
+        let pages = targets.filter { ($0["type"] as? String) == "page" }
+        guard let wsURL = pages.first?["webSocketDebuggerUrl"] as? String else { return nil }
+
+        // Evaluate JS and parse the structured result
+        guard let url = URL(string: wsURL) else { return nil }
+        let session = URLSession(configuration: .default)
+        let wsTask = session.webSocketTask(with: url)
+        wsTask.resume()
+
+        let command: [String: Any] = [
+            "id": 1,
+            "method": "Runtime.evaluate",
+            "params": ["expression": js, "returnByValue": true],
+        ]
+        guard let cmdData = try? JSONSerialization.data(withJSONObject: command),
+              let cmdStr = String(data: cmdData, encoding: .utf8)
+        else {
+            wsTask.cancel(with: .goingAway, reason: nil)
+            return nil
+        }
+
+        nonisolated final class Box: @unchecked Sendable {
+            var result: [String: Any]?
+        }
+        let box = Box()
+        let sem = DispatchSemaphore(value: 0)
+
+        wsTask.send(.string(cmdStr)) { error in
+            if error != nil { sem.signal(); return }
+            wsTask.receive { msg in
+                if case .success(let message) = msg, case .string(let text) = message,
+                   let data = text.data(using: .utf8),
+                   let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
+                   let r = json["result"] as? [String: Any],
+                   let rv = r["result"] as? [String: Any],
+                   let value = rv["value"] as? [String: Any]
+                {
+                    box.result = value
+                }
+                sem.signal()
+            }
+        }
+
+        let waitResult = sem.wait(timeout: .now() + 3.0)
+        wsTask.cancel(with: .goingAway, reason: nil)
+        if waitResult == .timedOut { return nil }
+        guard let snapshot = box.result else { return nil }
+
+        let pageURL = snapshot["url"] as? String ?? ""
+        let pageTitle = snapshot["title"] as? String ?? ""
+        let elements = snapshot["elements"] as? [[String: Any]] ?? []
+        let totalOnPage = snapshot["totalOnPage"] as? Int ?? elements.count
+
+        // Get Chrome window position for coordinate conversion
+        let windowOrigin = Perception.chromeWindowOriginPublic(appName: appName)
+
+        // Build compact text representation (~2-5 tokens per element vs ~40 per screenshot pixel)
+        var lines: [String] = []
+        var structuredElements: [[String: Any]] = []
+
+        for (i, el) in elements.enumerated() {
+            let tag = el["tag"] as? String ?? "?"
+            let role = el["role"] as? String ?? ""
+            let label = el["label"] as? String ?? ""
+            let domId = el["id"] as? String ?? ""
+            let dataTestId = el["dataTestId"] as? String ?? ""
+            let viewX = el["x"] as? Int ?? 0
+            let viewY = el["y"] as? Int ?? 0
+            let w = el["w"] as? Int ?? 0
+            let h = el["h"] as? Int ?? 0
+
+            // Convert viewport to screen coordinates
+            let screen = CDPBridge.viewportToScreen(
+                viewportX: Double(viewX), viewportY: Double(viewY),
+                windowX: windowOrigin.x, windowY: windowOrigin.y
+            )
+            let sx = Int(screen.x)
+            let sy = Int(screen.y)
+
+            // Determine display role
+            let displayRole = !role.isEmpty ? role : tag
+
+            // Build compact line
+            var line = "[e\(i)] \(displayRole) \"\(label)\" (\(sx), \(sy))"
+            if !domId.isEmpty { line += " dom:\"\(domId)\"" }
+            if !dataTestId.isEmpty { line += " test:\"\(dataTestId)\"" }
+            lines.append(line)
+
+            // Build structured element for programmatic use
+            structuredElements.append([
+                "ref": "e\(i)",
+                "role": displayRole,
+                "label": label,
+                "position": ["x": sx, "y": sy],
+                "size": ["width": w, "height": h],
+                "dom_id": domId,
+                "data_testid": dataTestId,
+                "source": "cdp-snapshot",
+            ])
+        }
+
+        let summary = lines.joined(separator: "\n")
+        let tokenEstimate = elements.count * 4 + 10  // ~4 tokens per line + header
+
+        Log.info("CDP snapshot: \(elements.count)/\(totalOnPage) elements, ~\(tokenEstimate) tokens (vs ~2000 for screenshot)")
+
+        return ToolResult(
+            success: true,
+            data: [
+                "snapshot": summary,
+                "elements": structuredElements,
+                "count": elements.count,
+                "total_on_page": totalOnPage,
+                "url": pageURL,
+                "title": pageTitle,
+                "source": "cdp-structured-snapshot",
+                "token_estimate": tokenEstimate,
+            ],
+            suggestion: elements.isEmpty
+                ? "No interactive elements found via CDP. Try ghost_find or ghost_ground."
+                : "Use ghost_click with x/y coordinates from the element list. Ref format: [e0], [e1], etc."
+        )
+    }
+
     // MARK: - ghost_ground
 
     /// Find precise screen coordinates for a described UI element using VLM.

From a272cc91ee0b96e5ee3ec0db4be137b2f0435dc9 Mon Sep 17 00:00:00 2001
From: Maurice Wen <mauricewen@MauricedeMacBook-Pro.local>
Date: Wed, 25 Mar 2026 09:14:48 +0800
Subject: [PATCH 3/4] feat: Session persistence + anti-bot stealth modules
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New Session/ module:
- ChromeProfileManager: persistent Chrome profiles (~/.ghost-os/profiles/),
  profile CRUD, Chrome launch args builder, cookie export via CDP.
  File permissions set to 700 for security.

New Stealth/ module:
- TimingJitter: log-normal human delays (Box-Muller), burst typing patterns,
  coordinate jitter (±2px), pre/post click delays.
- BehavioralMimicry: cubic Bezier mouse paths, short-distance jitter paths,
  pre-action scroll simulation, off-center click offset.

Tests: 10 new stealth tests (23/23 total pass). All tests verify
statistical properties (distribution bounds, curvature, timing ranges).
---
 .../Session/ChromeProfileManager.swift        | 292 ++++++++++++++++++
 .../GhostOS/Stealth/BehavioralMimicry.swift   | 171 ++++++++++
 Sources/GhostOS/Stealth/TimingJitter.swift    | 109 +++++++
 Tests/GhostOSTests/StealthTests.swift         | 130 ++++++++
 4 files changed, 702 insertions(+)
 create mode 100644 Sources/GhostOS/Session/ChromeProfileManager.swift
 create mode 100644 Sources/GhostOS/Stealth/BehavioralMimicry.swift
 create mode 100644 Sources/GhostOS/Stealth/TimingJitter.swift
 create mode 100644 Tests/GhostOSTests/StealthTests.swift

diff --git a/Sources/GhostOS/Session/ChromeProfileManager.swift b/Sources/GhostOS/Session/ChromeProfileManager.swift
new file mode 100644
index 0000000..7a44b7c
--- /dev/null
+++ b/Sources/GhostOS/Session/ChromeProfileManager.swift
@@ -0,0 +1,292 @@
+// ChromeProfileManager.swift - Persistent Chrome session management for Ghost OS
+//
+// Manages Chrome user profiles so that login state persists across Ghost OS
+// sessions. Users log in once (manually or via recipe), and subsequent
+// recipe runs reuse the authenticated session.
+//
+// Architecture:
+//   ~/.ghost-os/profiles/<name>/   → Chrome user-data-dir
+//   ~/.ghost-os/profiles/index.json → profile metadata (name, url, lastUsed)
+//
+// Security: profile directories contain cookies equivalent to credentials.
+// File permissions are set to 700 (owner-only). Profiles are excluded from
+// git via .gitignore. Never sync profiles across machines.
+
+import Foundation
+
+/// Manages persistent Chrome browser profiles for authenticated workflows.
+public enum ChromeProfileManager {
+
+    /// Base directory for all profiles.
+    private static let profilesDir: URL = {
+        let home = FileManager.default.homeDirectoryForCurrentUser
+        return home.appendingPathComponent(".ghost-os/profiles")
+    }()
+
+    /// Profile metadata stored in index.json.
+    public struct ProfileInfo: Codable, Sendable {
+        public let name: String
+        public let createdAt: Date
+        public var lastUsed: Date
+        public var url: String?      // Last known URL
+        public var description: String?
+    }
+
+    // MARK: - Profile CRUD
+
+    /// List all available profiles.
+    public static func listProfiles() -> [ProfileInfo] {
+        let indexURL = profilesDir.appendingPathComponent("index.json")
+        guard let data = try? Data(contentsOf: indexURL),
+              let profiles = try? JSONDecoder.withISO8601.decode([ProfileInfo].self, from: data)
+        else {
+            return []
+        }
+        return profiles.sorted { $0.lastUsed > $1.lastUsed }
+    }
+
+    /// Create a new profile directory.
+    /// Returns the profile directory path for Chrome's --user-data-dir flag.
+    public static func createProfile(name: String, description: String? = nil) -> URL? {
+        let dir = profilesDir.appendingPathComponent(name)
+
+        do {
+            try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
+            // Set owner-only permissions (700) for security
+            try FileManager.default.setAttributes(
+                [.posixPermissions: 0o700],
+                ofItemAtPath: dir.path
+            )
+        } catch {
+            Log.error("Failed to create profile directory: \(error)")
+            return nil
+        }
+
+        // Update index
+        var profiles = listProfiles()
+        if !profiles.contains(where: { $0.name == name }) {
+            profiles.append(ProfileInfo(
+                name: name,
+                createdAt: Date(),
+                lastUsed: Date(),
+                url: nil,
+                description: description
+            ))
+            saveIndex(profiles)
+        }
+
+        Log.info("Created Chrome profile '\(name)' at \(dir.path)")
+        return dir
+    }
+
+    /// Get the directory for an existing profile.
+    /// Returns nil if the profile doesn't exist.
+    public static func profileDir(for name: String) -> URL? {
+        let dir = profilesDir.appendingPathComponent(name)
+        var isDir: ObjCBool = false
+        guard FileManager.default.fileExists(atPath: dir.path, isDirectory: &isDir),
+              isDir.boolValue
+        else {
+            return nil
+        }
+        return dir
+    }
+
+    /// Delete a profile and its data.
+    public static func deleteProfile(name: String) -> Bool {
+        let dir = profilesDir.appendingPathComponent(name)
+        do {
+            try FileManager.default.removeItem(at: dir)
+            var profiles = listProfiles()
+            profiles.removeAll { $0.name == name }
+            saveIndex(profiles)
+            Log.info("Deleted Chrome profile '\(name)'")
+            return true
+        } catch {
+            Log.error("Failed to delete profile '\(name)': \(error)")
+            return false
+        }
+    }
+
+    /// Update the lastUsed timestamp for a profile.
+    public static func touchProfile(name: String, url: String? = nil) {
+        var profiles = listProfiles()
+        if let idx = profiles.firstIndex(where: { $0.name == name }) {
+            profiles[idx].lastUsed = Date()
+            if let url { profiles[idx].url = url }
+            saveIndex(profiles)
+        }
+    }
+
+    // MARK: - Chrome Launch
+
+    /// Build Chrome launch arguments for a given profile.
+    /// Returns an array of command-line arguments.
+    ///
+    /// Usage:
+    ///   let args = ChromeProfileManager.chromeLaunchArgs(profile: "github-work", url: "https://github.com")
+    ///   // ["--remote-debugging-port=9222", "--user-data-dir=/path/to/profile", "https://github.com"]
+    public static func chromeLaunchArgs(
+        profile: String,
+        url: String? = nil,
+        debugPort: Int = 9222
+    ) -> [String]? {
+        // Get or create profile directory
+        let dir = profileDir(for: profile) ?? createProfile(name: profile)
+        guard let profilePath = dir?.path else { return nil }
+
+        var args = [
+            "--remote-debugging-port=\(debugPort)",
+            "--user-data-dir=\(profilePath)",
+        ]
+        if let url { args.append(url) }
+
+        touchProfile(name: profile, url: url)
+        return args
+    }
+
+    /// Chrome application path (macOS).
+    public static let chromeAppPath = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+
+    // MARK: - Cookie Export/Import via CDP
+
+    /// Export cookies from the current Chrome session via CDP.
+    /// Requires Chrome to be running with --remote-debugging-port.
+    public static func exportCookies() -> [[String: Any]]? {
+        guard CDPBridge.isAvailable(),
+              let targets = CDPBridge.getDebugTargets(),
+              let page = targets.first(where: { ($0["type"] as? String) == "page" }),
+              let wsURL = page["webSocketDebuggerUrl"] as? String
+        else {
+            return nil
+        }
+
+        return cdpCommand(wsURL: wsURL, method: "Network.getAllCookies", params: [:])
+    }
+
+    /// Get the current page URL via CDP.
+    public static func currentURL() -> String? {
+        let js = "location.href"
+        guard let targets = CDPBridge.getDebugTargets(),
+              let page = targets.first(where: { ($0["type"] as? String) == "page" }),
+              let wsURL = page["webSocketDebuggerUrl"] as? String,
+              let url = URL(string: wsURL)
+        else {
+            return nil
+        }
+
+        let session = URLSession(configuration: .default)
+        let wsTask = session.webSocketTask(with: url)
+        wsTask.resume()
+
+        let command: [String: Any] = [
+            "id": 1,
+            "method": "Runtime.evaluate",
+            "params": ["expression": js, "returnByValue": true],
+        ]
+        guard let cmdData = try? JSONSerialization.data(withJSONObject: command),
+              let cmdStr = String(data: cmdData, encoding: .utf8)
+        else {
+            wsTask.cancel(with: .goingAway, reason: nil)
+            return nil
+        }
+
+        nonisolated final class Box: @unchecked Sendable { var result: String? }
+        let box = Box()
+        let sem = DispatchSemaphore(value: 0)
+
+        wsTask.send(.string(cmdStr)) { error in
+            if error != nil { sem.signal(); return }
+            wsTask.receive { msg in
+                if case .success(let message) = msg, case .string(let text) = message,
+                   let data = text.data(using: .utf8),
+                   let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
+                   let r = json["result"] as? [String: Any],
+                   let rv = r["result"] as? [String: Any],
+                   let value = rv["value"] as? String
+                {
+                    box.result = value
+                }
+                sem.signal()
+            }
+        }
+        sem.wait()
+        wsTask.cancel(with: .goingAway, reason: nil)
+        return box.result
+    }
+
+    // MARK: - Private Helpers
+
+    private static func saveIndex(_ profiles: [ProfileInfo]) {
+        let indexURL = profilesDir.appendingPathComponent("index.json")
+        do {
+            try FileManager.default.createDirectory(at: profilesDir, withIntermediateDirectories: true)
+            let data = try JSONEncoder.withISO8601.encode(profiles)
+            try data.write(to: indexURL, options: .atomic)
+        } catch {
+            Log.error("Failed to save profile index: \(error)")
+        }
+    }
+
+    /// Execute a CDP domain command and return the result.
+    private static func cdpCommand(
+        wsURL: String,
+        method: String,
+        params: [String: Any]
+    ) -> [[String: Any]]? {
+        guard let url = URL(string: wsURL) else { return nil }
+        let session = URLSession(configuration: .default)
+        let wsTask = session.webSocketTask(with: url)
+        wsTask.resume()
+
+        let command: [String: Any] = ["id": 1, "method": method, "params": params]
+        guard let cmdData = try? JSONSerialization.data(withJSONObject: command),
+              let cmdStr = String(data: cmdData, encoding: .utf8)
+        else {
+            wsTask.cancel(with: .goingAway, reason: nil)
+            return nil
+        }
+
+        nonisolated final class Box: @unchecked Sendable { var result: [[String: Any]]? }
+        let box = Box()
+        let sem = DispatchSemaphore(value: 0)
+
+        wsTask.send(.string(cmdStr)) { error in
+            if error != nil { sem.signal(); return }
+            wsTask.receive { msg in
+                if case .success(let message) = msg, case .string(let text) = message,
+                   let data = text.data(using: .utf8),
+                   let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
+                   let r = json["result"] as? [String: Any],
+                   let cookies = r["cookies"] as? [[String: Any]]
+                {
+                    box.result = cookies
+                }
+                sem.signal()
+            }
+        }
+
+        let _ = sem.wait(timeout: .now() + 3.0)
+        wsTask.cancel(with: .goingAway, reason: nil)
+        return box.result
+    }
+}
+
+// MARK: - JSON Coding Helpers
+
+private extension JSONEncoder {
+    static let withISO8601: JSONEncoder = {
+        let encoder = JSONEncoder()
+        encoder.dateEncodingStrategy = .iso8601
+        encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
+        return encoder
+    }()
+}
+
+private extension JSONDecoder {
+    static let withISO8601: JSONDecoder = {
+        let decoder = JSONDecoder()
+        decoder.dateDecodingStrategy = .iso8601
+        return decoder
+    }()
+}
diff --git a/Sources/GhostOS/Stealth/BehavioralMimicry.swift b/Sources/GhostOS/Stealth/BehavioralMimicry.swift
new file mode 100644
index 0000000..527228a
--- /dev/null
+++ b/Sources/GhostOS/Stealth/BehavioralMimicry.swift
@@ -0,0 +1,171 @@
+// BehavioralMimicry.swift - Human-like mouse and interaction patterns for Ghost OS
+//
+// Bot detection systems analyze behavioral patterns beyond timing:
+// - Mouse teleportation (instant jump to target) vs smooth cursor movement
+// - Perfectly straight mouse paths vs natural curves
+// - Clicking exact center of buttons vs slightly off-center
+// - No scrolling/hovering before action vs natural reading behavior
+//
+// BehavioralMimicry provides realistic interaction patterns that mimic
+// human motor control. Uses cubic Bezier curves for mouse paths (matching
+// how human wrist/arm movement naturally creates smooth arcs).
+
+import CoreGraphics
+import Foundation
+
+/// Human-like behavioral patterns for mouse and interaction mimicry.
+public enum BehavioralMimicry {
+
+    // MARK: - Mouse Path Generation
+
+    /// Generate a natural-looking mouse path from one point to another.
+    ///
+    /// Uses a cubic Bezier curve with randomized control points to simulate
+    /// the natural arc of human wrist/arm movement. Real mouse paths are
+    /// never perfectly straight — they curve slightly due to arm mechanics.
+    ///
+    /// - Parameters:
+    ///   - from: Starting point.
+    ///   - to: Target point.
+    ///   - steps: Number of intermediate points (default 10).
+    /// - Returns: Array of points along the curve, including start and end.
+    public static func mousePath(from: CGPoint, to: CGPoint, steps: Int = 10) -> [CGPoint] {
+        let dx = to.x - from.x
+        let dy = to.y - from.y
+
+        // Control points create a slight arc (not a straight line)
+        // Randomize to avoid detectable patterns across multiple moves
+        let control1 = CGPoint(
+            x: from.x + dx * 0.25 + CGFloat.random(in: -30...30),
+            y: from.y + dy * 0.1 + CGFloat.random(in: -20...20)
+        )
+        let control2 = CGPoint(
+            x: from.x + dx * 0.75 + CGFloat.random(in: -20...20),
+            y: from.y + dy * 0.9 + CGFloat.random(in: -10...10)
+        )
+
+        return cubicBezier(p0: from, p1: control1, p2: control2, p3: to, steps: steps)
+    }
+
+    /// Generate a short, jittery mouse path for nearby targets.
+    ///
+    /// When the mouse only needs to move a short distance (<50px), humans
+    /// make quick, slightly wobbly movements rather than smooth arcs.
+    ///
+    /// - Parameters:
+    ///   - from: Starting point.
+    ///   - to: Target point.
+    /// - Returns: 3-5 points with micro-jitter.
+    public static func shortMousePath(from: CGPoint, to: CGPoint) -> [CGPoint] {
+        let distance = hypot(to.x - from.x, to.y - from.y)
+        if distance < 5 {
+            return [from, to]  // Too close, just jump
+        }
+
+        let steps = min(5, max(3, Int(distance / 15)))
+        var points: [CGPoint] = [from]
+
+        for i in 1..<steps {
+            let t = CGFloat(i) / CGFloat(steps)
+            let x = from.x + (to.x - from.x) * t + CGFloat.random(in: -1.5...1.5)
+            let y = from.y + (to.y - from.y) * t + CGFloat.random(in: -1.5...1.5)
+            points.append(CGPoint(x: x, y: y))
+        }
+
+        points.append(to)
+        return points
+    }
+
+    // MARK: - Scroll Behavior
+
+    /// Determine if a pre-action scroll should be simulated.
+    ///
+    /// Humans often scroll slightly before clicking — they're reading the page,
+    /// scanning for the target, or adjusting their view. This is a strong
+    /// behavioral signal that distinguishes humans from bots.
+    ///
+    /// - Returns: A scroll amount (0 = no scroll, 1-3 = small scroll lines).
+    public static func preActionScrollAmount() -> Int {
+        // 25% chance of a small pre-action scroll
+        if Int.random(in: 0..<4) == 0 {
+            return Int.random(in: 1...3)
+        }
+        return 0
+    }
+
+    /// Determine scroll direction based on target position.
+    ///
+    /// If the target is in the lower third of the viewport, humans tend to
+    /// scroll down slightly first (reading behavior). Upper third → no scroll.
+    ///
+    /// - Parameter targetY: The Y coordinate of the target on screen.
+    /// - Parameter screenHeight: The visible screen height.
+    /// - Returns: "down", "up", or nil (no scroll).
+    public static func scrollDirection(targetY: Double, screenHeight: Double) -> String? {
+        let ratio = targetY / screenHeight
+        if ratio > 0.7 {
+            return Int.random(in: 0..<3) == 0 ? "down" : nil  // 33% chance
+        } else if ratio < 0.2 {
+            return Int.random(in: 0..<5) == 0 ? "up" : nil    // 20% chance
+        }
+        return nil
+    }
+
+    // MARK: - Click Offset
+
+    /// Generate a human-like click offset within a button's bounds.
+    ///
+    /// Humans don't click the exact mathematical center of buttons.
+    /// They click slightly off-center, biased towards the text/icon.
+    /// The offset is proportional to the element size (bigger = more variance).
+    ///
+    /// - Parameters:
+    ///   - center: The element's center point.
+    ///   - width: The element's width.
+    ///   - height: The element's height.
+    /// - Returns: A slightly offset click point.
+    public static func clickOffset(
+        center: CGPoint,
+        width: CGFloat,
+        height: CGFloat
+    ) -> CGPoint {
+        // Max offset: 15% of dimension, but at least 1px and at most 5px
+        let maxOffX = max(1.0, min(5.0, width * 0.15))
+        let maxOffY = max(1.0, min(3.0, height * 0.15))
+
+        return CGPoint(
+            x: center.x + CGFloat.random(in: -maxOffX...maxOffX),
+            y: center.y + CGFloat.random(in: -maxOffY...maxOffY)
+        )
+    }
+
+    // MARK: - Private: Bezier Math
+
+    /// Compute a point on a cubic Bezier curve at parameter t.
+    private static func cubicBezierPoint(
+        p0: CGPoint, p1: CGPoint, p2: CGPoint, p3: CGPoint, t: CGFloat
+    ) -> CGPoint {
+        let mt = 1.0 - t
+        let mt2 = mt * mt
+        let mt3 = mt2 * mt
+        let t2 = t * t
+        let t3 = t2 * t
+
+        return CGPoint(
+            x: mt3 * p0.x + 3 * mt2 * t * p1.x + 3 * mt * t2 * p2.x + t3 * p3.x,
+            y: mt3 * p0.y + 3 * mt2 * t * p1.y + 3 * mt * t2 * p2.y + t3 * p3.y
+        )
+    }
+
+    /// Generate points along a cubic Bezier curve.
+    private static func cubicBezier(
+        p0: CGPoint, p1: CGPoint, p2: CGPoint, p3: CGPoint, steps: Int
+    ) -> [CGPoint] {
+        var points: [CGPoint] = []
+        for i in 0...steps {
+            let t = CGFloat(i) / CGFloat(steps)
+            points.append(cubicBezierPoint(p0: p0, p1: p1, p2: p2, p3: p3, t: t))
+        }
+        return points
+    }
+}
diff --git a/Sources/GhostOS/Stealth/TimingJitter.swift b/Sources/GhostOS/Stealth/TimingJitter.swift
new file mode 100644
index 0000000..f048225
--- /dev/null
+++ b/Sources/GhostOS/Stealth/TimingJitter.swift
@@ -0,0 +1,109 @@
+// TimingJitter.swift - Human-like timing randomization for Ghost OS
+//
+// Anti-bot detection relies on timing patterns. Bots are predictable:
+// fixed delays between actions, instant mouse teleportation, uniform
+// typing speed. Humans are noisy: variable delays, hesitations, bursts.
+//
+// TimingJitter provides log-normal distributed delays that mimic human
+// reaction times. Log-normal is used because human response times are
+// right-skewed: mostly quick, occasionally slow (distracted/thinking).
+//
+// Usage:
+//   let delay = TimingJitter.humanDelay(base: 0.5)  // ~0.2-1.5s
+//   let typeDelay = TimingJitter.typingDelay()       // ~50-150ms per char
+//   let point = TimingJitter.jitter(point, radius: 2) // ±2px
+
+import CoreGraphics
+import Foundation
+
+/// Human-like timing randomization to avoid bot detection.
+public enum TimingJitter {
+
+    // MARK: - Action Delays
+
+    /// Generate a human-like delay between actions.
+    ///
+    /// Uses log-normal distribution centered around `base` seconds.
+    /// Log-normal models human reaction times: mostly quick, occasionally slow.
+    ///
+    /// - Parameter base: The median delay in seconds (default 0.5s).
+    /// - Returns: A randomized delay in seconds, typically 0.3x-3x of base.
+    public static func humanDelay(base: TimeInterval = 0.5) -> TimeInterval {
+        let mu = log(base)
+        let sigma = 0.4
+        // Box-Muller transform for normal distribution
+        let u1 = Double.random(in: 0.001...1.0)
+        let u2 = Double.random(in: 0.0...1.0)
+        let normal = sqrt(-2.0 * log(u1)) * cos(2.0 * .pi * u2)
+        // Clamp to avoid extreme outliers
+        let clamped = max(-2.5, min(2.5, normal))
+        let result = exp(mu + sigma * clamped)
+        // Floor at 50ms, ceiling at 5s
+        return max(0.05, min(5.0, result))
+    }
+
+    /// Generate a human-like typing delay per character.
+    ///
+    /// Average human typing speed: 40-80 WPM (75-150ms per character).
+    /// Fast typists: 80-120 WPM (50-75ms). Hunts-and-pecks: 20-40 WPM (150-300ms).
+    ///
+    /// - Returns: Delay in seconds for one keystroke.
+    public static func typingDelay() -> TimeInterval {
+        // Normal range centered at 100ms with 30ms std dev
+        let base = 0.1
+        let jitter = Double.random(in: -0.05...0.05)
+        let result = base + jitter
+        return max(0.03, min(0.25, result))
+    }
+
+    /// Generate a burst typing pattern: fast sequences with occasional pauses.
+    ///
+    /// Humans type in bursts of 3-8 characters, then pause briefly (word boundary,
+    /// thinking, looking at keyboard). This is more realistic than uniform timing.
+    ///
+    /// - Parameter charIndex: The index of the current character in the string.
+    /// - Returns: Delay in seconds before this keystroke.
+    public static func burstTypingDelay(charIndex: Int) -> TimeInterval {
+        // Every 3-8 chars, insert a longer "thinking" pause
+        let burstLength = Int.random(in: 3...8)
+        if charIndex > 0 && charIndex % burstLength == 0 {
+            return humanDelay(base: 0.3)  // Word boundary pause
+        }
+        return typingDelay()
+    }
+
+    // MARK: - Coordinate Jitter
+
+    /// Add random noise to click coordinates.
+    ///
+    /// Humans don't click at exact pixel coordinates. There's always a few
+    /// pixels of noise from hand tremor and mouse precision.
+    ///
+    /// - Parameters:
+    ///   - point: The target click point.
+    ///   - radius: Maximum jitter in pixels (default ±2px).
+    /// - Returns: A slightly randomized point within the jitter radius.
+    public static func jitter(_ point: CGPoint, radius: CGFloat = 2.0) -> CGPoint {
+        CGPoint(
+            x: point.x + CGFloat.random(in: -radius...radius),
+            y: point.y + CGFloat.random(in: -radius...radius)
+        )
+    }
+
+    // MARK: - Pre/Post Action Delays
+
+    /// Delay before clicking (human reads/aims at target).
+    public static func preClickDelay() -> TimeInterval {
+        humanDelay(base: 0.3)
+    }
+
+    /// Delay after clicking (human waits for visual feedback).
+    public static func postClickDelay() -> TimeInterval {
+        humanDelay(base: 0.5)
+    }
+
+    /// Delay before typing starts (human focuses on input field).
+    public static func preTypeDelay() -> TimeInterval {
+        humanDelay(base: 0.2)
+    }
+}
diff --git a/Tests/GhostOSTests/StealthTests.swift b/Tests/GhostOSTests/StealthTests.swift
new file mode 100644
index 0000000..cb874e3
--- /dev/null
+++ b/Tests/GhostOSTests/StealthTests.swift
@@ -0,0 +1,130 @@
+// StealthTests.swift - Unit tests for TimingJitter and BehavioralMimicry
+
+import CoreGraphics
+import Testing
+@testable import GhostOS
+
+@Suite("Stealth Tests")
+struct StealthTests {
+
+    // MARK: - TimingJitter
+
+    @Test("Human delay produces bounded values")
+    func humanDelayBounds() {
+        for _ in 0..<100 {
+            let delay = TimingJitter.humanDelay(base: 0.5)
+            #expect(delay >= 0.05, "Delay should be at least 50ms, got \(delay)")
+            #expect(delay <= 5.0, "Delay should be at most 5s, got \(delay)")
+        }
+    }
+
+    @Test("Human delay centers around base value")
+    func humanDelayDistribution() {
+        let samples = (0..<500).map { _ in TimingJitter.humanDelay(base: 0.5) }
+        let mean = samples.reduce(0, +) / Double(samples.count)
+        // Log-normal mean = exp(mu + sigma^2/2) ≈ 0.54 for base=0.5, sigma=0.4
+        #expect(mean > 0.25, "Mean should be above 0.25, got \(mean)")
+        #expect(mean < 1.2, "Mean should be below 1.2, got \(mean)")
+    }
+
+    @Test("Typing delay is within realistic range")
+    func typingDelayRange() {
+        for _ in 0..<100 {
+            let delay = TimingJitter.typingDelay()
+            #expect(delay >= 0.03, "Too fast: \(delay)")
+            #expect(delay <= 0.25, "Too slow: \(delay)")
+        }
+    }
+
+    @Test("Burst typing includes occasional pauses")
+    func burstTypingPattern() {
+        var normalCount = 0
+        var pauseCount = 0
+        for i in 0..<100 {
+            let delay = TimingJitter.burstTypingDelay(charIndex: i)
+            if delay > 0.15 {
+                pauseCount += 1
+            } else {
+                normalCount += 1
+            }
+        }
+        // There should be some pauses (word boundaries) but not too many
+        #expect(pauseCount > 0, "Should have at least some burst pauses")
+        #expect(normalCount > pauseCount, "Normal keystrokes should outnumber pauses")
+    }
+
+    @Test("Coordinate jitter stays within radius")
+    func coordinateJitter() {
+        let origin = CGPoint(x: 100, y: 200)
+        for _ in 0..<100 {
+            let jittered = TimingJitter.jitter(origin, radius: 3.0)
+            let dx = abs(jittered.x - origin.x)
+            let dy = abs(jittered.y - origin.y)
+            #expect(dx <= 3.0, "X jitter exceeded radius: \(dx)")
+            #expect(dy <= 3.0, "Y jitter exceeded radius: \(dy)")
+        }
+    }
+
+    // MARK: - BehavioralMimicry
+
+    @Test("Mouse path starts and ends at correct points")
+    func mousePathEndpoints() {
+        let from = CGPoint(x: 100, y: 100)
+        let to = CGPoint(x: 500, y: 300)
+        let path = BehavioralMimicry.mousePath(from: from, to: to, steps: 10)
+
+        #expect(path.count == 11, "Should have steps+1 points")
+        #expect(path.first!.x == from.x)
+        #expect(path.first!.y == from.y)
+        #expect(path.last!.x == to.x)
+        #expect(path.last!.y == to.y)
+    }
+
+    @Test("Mouse path is not a straight line")
+    func mousePathCurvature() {
+        let from = CGPoint(x: 0, y: 0)
+        let to = CGPoint(x: 400, y: 0)
+        let path = BehavioralMimicry.mousePath(from: from, to: to, steps: 10)
+
+        // At least one midpoint should deviate from Y=0 (Bezier curve)
+        let midpoints = path.dropFirst().dropLast()
+        let hasDeviation = midpoints.contains { abs($0.y) > 0.5 }
+        #expect(hasDeviation, "Path should curve, not be a straight line")
+    }
+
+    @Test("Short mouse path handles nearby targets")
+    func shortMousePath() {
+        let from = CGPoint(x: 100, y: 100)
+        let to = CGPoint(x: 110, y: 105)
+        let path = BehavioralMimicry.shortMousePath(from: from, to: to)
+
+        #expect(path.count >= 2, "Should have at least start and end")
+        #expect(path.first!.x == from.x)
+        #expect(path.last!.x == to.x)
+    }
+
+    @Test("Click offset stays within element bounds")
+    func clickOffsetBounds() {
+        let center = CGPoint(x: 200, y: 150)
+        for _ in 0..<100 {
+            let offset = BehavioralMimicry.clickOffset(center: center, width: 80, height: 30)
+            let dx = abs(offset.x - center.x)
+            let dy = abs(offset.y - center.y)
+            #expect(dx <= 5.0, "X offset too large: \(dx)")
+            #expect(dy <= 3.0, "Y offset too large: \(dy)")
+        }
+    }
+
+    @Test("Pre-action scroll is bounded")
+    func preActionScroll() {
+        var scrollCount = 0
+        for _ in 0..<100 {
+            let amount = BehavioralMimicry.preActionScrollAmount()
+            #expect(amount >= 0 && amount <= 3)
+            if amount > 0 { scrollCount += 1 }
+        }
+        // ~25% should scroll (binomial: expect 15-35 in 100 trials)
+        #expect(scrollCount >= 5, "Too few scrolls: \(scrollCount)")
+        #expect(scrollCount <= 50, "Too many scrolls: \(scrollCount)")
+    }
+}

From f48e96d5f9cb33898099d4208c30ee7dee62313d Mon Sep 17 00:00:00 2001
From: Maurice Wen <mauricewen@MauricedeMacBook-Pro.local>
Date: Wed, 25 Mar 2026 09:17:25 +0800
Subject: [PATCH 4/4] feat(recipes): auto-heal engine + github-pr-review recipe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RecipeEngine now auto-heals failed click/type/hover steps in browser
apps by retrying via CDP element finding. When a DOM ID changes after
a web app update, the auto-heal finds the element by text content and
re-executes the action at CDP-found coordinates.

Auto-heal flow: step fails → detect browser app → CDP query by
computedNameContains → re-execute action at CDP coordinates → log
"[auto-healed via CDP]" in step result.

New recipe: github-pr-review.json (navigate Files changed, open
review dialog, type comment, submit review).
---
 Sources/GhostOS/Recipes/RecipeEngine.swift | 126 +++++++++++++++++++--
 recipes/github-pr-review.json              |  82 ++++++++++++++
 2 files changed, 201 insertions(+), 7 deletions(-)
 create mode 100644 recipes/github-pr-review.json

diff --git a/Sources/GhostOS/Recipes/RecipeEngine.swift b/Sources/GhostOS/Recipes/RecipeEngine.swift
index 59e692a..9b9b168 100644
--- a/Sources/GhostOS/Recipes/RecipeEngine.swift
+++ b/Sources/GhostOS/Recipes/RecipeEngine.swift
@@ -86,14 +86,34 @@ public enum RecipeEngine {
             stepResults.append(stepResult)
 
             if !result.success {
-                let failurePolicy = step.onFailure ?? globalFailurePolicy
-
-                if failurePolicy == "skip" {
-                    Log.info("Recipe '\(recipe.name)' step \(step.id) failed (skipping): \(result.error ?? "")")
-                    continue
-                }
+                // AUTO-HEAL: Before giving up, try CDP-based element finding for
+                // click/type actions in browser apps. This handles the common case
+                // where a web app updated its DOM IDs but the element text is the same.
+                if let _ = attemptAutoHeal(
+                    step: step, resolvedParams: resolvedParams, appName: recipe.app
+                ) {
+                    // Auto-heal succeeded — update the step result
+                    let healDuration = Int(Date().timeIntervalSince(stepStart) * 1000)
+                    stepResults[stepResults.count - 1] = RecipeStepResult(
+                        stepId: step.id,
+                        action: step.action,
+                        success: true,
+                        durationMs: healDuration,
+                        error: nil,
+                        note: (step.note ?? step.action) + " [auto-healed via CDP]"
+                    )
+                    Log.info("Recipe '\(recipe.name)' step \(step.id) auto-healed via CDP")
+                    // Continue to wait_after handling below (don't return)
+                } else {
+                    // Auto-heal failed — apply normal failure policy
+                    let failurePolicy = step.onFailure ?? globalFailurePolicy
+
+                    if failurePolicy == "skip" {
+                        Log.info("Recipe '\(recipe.name)' step \(step.id) failed (skipping): \(result.error ?? "")")
+                        continue
+                    }
 
-                // Stop: return failure with diagnostics
+                    // Stop: return failure with diagnostics
                 let totalDuration = Int(Date().timeIntervalSince(startTime) * 1000)
 
                 // Capture failure context
@@ -126,6 +146,7 @@ public enum RecipeEngine {
                     error: "Recipe '\(recipe.name)' failed at step \(step.id) (\(step.note ?? step.action)): \(result.error ?? "")",
                     suggestion: "Check the current_context and failed_step details. Use ghost_screenshot for visual debugging."
                 )
+                } // end else (auto-heal failed)
             }
 
             // Handle wait_after condition (substitute {{params}} in value)
@@ -246,6 +267,97 @@ public enum RecipeEngine {
         )
     }
 
+    // MARK: - Auto-Heal
+
+    /// Attempt to recover a failed recipe step using CDP element finding.
+    ///
+    /// When a click/type step fails (usually because a DOM ID changed after a
+    /// web app update), this function tries to find the target element via CDP
+    /// using the step's computedNameContains text. If found, it re-executes the
+    /// action with the CDP-found coordinates.
+    ///
+    /// Only applies to click/type/hover actions in browser apps with CDP available.
+    /// Returns nil if auto-heal is not applicable or fails.
+    private static func attemptAutoHeal(
+        step: RecipeStep,
+        resolvedParams: [String: String]?,
+        appName: String?
+    ) -> ToolResult? {
+        // Only auto-heal click/type/hover actions
+        guard ["click", "type", "hover"].contains(step.action) else { return nil }
+
+        // Only for browser apps with CDP
+        guard CDPBridge.isBrowserApp(appName), CDPBridge.isAvailable() else { return nil }
+
+        // Need a text query to search for
+        let query = step.target?.computedNameContains
+            ?? resolvedParams?["query"]
+            ?? resolvedParams?["into"]
+            ?? resolvedParams?["target"]
+        guard let query, !query.isEmpty else { return nil }
+
+        Log.info("Auto-heal: trying CDP for '\(query)' (step \(step.id), action: \(step.action))")
+
+        // Try to find the element via CDP
+        guard let cdpElements = CDPBridge.findElements(query: query),
+              let first = cdpElements.first
+        else {
+            Log.info("Auto-heal: CDP found no matches for '\(query)'")
+            return nil
+        }
+
+        // Get screen coordinates
+        let viewportX = first["centerX"] as? Int ?? 0
+        let viewportY = first["centerY"] as? Int ?? 0
+        let windowOrigin = Perception.chromeWindowOriginPublic(appName: appName)
+        let screen = CDPBridge.viewportToScreen(
+            viewportX: Double(viewportX), viewportY: Double(viewportY),
+            windowX: windowOrigin.x, windowY: windowOrigin.y
+        )
+
+        // Re-execute the action with CDP coordinates
+        switch step.action {
+        case "click":
+            let result = Actions.click(
+                query: nil, role: nil, domId: nil,
+                appName: appName,
+                x: screen.x, y: screen.y,
+                button: resolvedParams?["button"],
+                count: resolvedParams?["count"].flatMap(Int.init)
+            )
+            return result.success ? result : nil
+
+        case "type":
+            // Click the field first, then type
+            let clickResult = Actions.click(
+                query: nil, role: nil, domId: nil,
+                appName: appName,
+                x: screen.x, y: screen.y,
+                button: nil, count: nil
+            )
+            guard clickResult.success else { return nil }
+
+            if let text = resolvedParams?["text"] {
+                let clear = resolvedParams?["clear"] == "true"
+                return Actions.typeText(
+                    text: text, into: nil, domId: nil,
+                    appName: appName, clear: clear
+                )
+            }
+            return clickResult
+
+        case "hover":
+            return Actions.hover(
+                query: nil, role: nil, domId: nil,
+                appName: appName,
+                x: screen.x, y: screen.y
+            )
+
+        default:
+            return nil
+        }
+    }
+
     // MARK: - Step Execution
 
     /// Execute a single recipe step by dispatching to the appropriate action.
diff --git a/recipes/github-pr-review.json b/recipes/github-pr-review.json
new file mode 100644
index 0000000..ecb2726
--- /dev/null
+++ b/recipes/github-pr-review.json
@@ -0,0 +1,82 @@
+{
+    "schema_version": 2,
+    "name": "github-pr-review",
+    "description": "Open a GitHub pull request and leave a review comment. Navigate to the PR page first (github.com/owner/repo/pull/N).",
+    "app": "Google Chrome",
+    "params": {
+        "comment": {
+            "type": "string",
+            "description": "Review comment text to leave on the PR",
+            "required": true
+        },
+        "action": {
+            "type": "string",
+            "description": "Review action: 'approve', 'comment', or 'request_changes' (default: 'comment')"
+        }
+    },
+    "preconditions": {
+        "app_running": "Google Chrome",
+        "url_contains": "github.com"
+    },
+    "steps": [
+        {
+            "id": 1,
+            "action": "click",
+            "target": {
+                "criteria": [{"attribute": "AXRole", "value": "AXLink"}],
+                "computedNameContains": "Files changed"
+            },
+            "wait_after": {
+                "condition": "elementExists",
+                "value": "Review changes",
+                "timeout": 10
+            },
+            "note": "Navigate to Files changed tab"
+        },
+        {
+            "id": 2,
+            "action": "click",
+            "target": {
+                "criteria": [],
+                "computedNameContains": "Review changes"
+            },
+            "wait_after": {
+                "condition": "elementExists",
+                "value": "Leave a comment",
+                "timeout": 5
+            },
+            "note": "Open review dialog"
+        },
+        {
+            "id": 3,
+            "action": "click",
+            "target": {
+                "criteria": [],
+                "computedNameContains": "Leave a comment"
+            },
+            "note": "Focus the comment textarea"
+        },
+        {
+            "id": 4,
+            "action": "type",
+            "params": {"text": "{{comment}}"},
+            "note": "Type review comment"
+        },
+        {
+            "id": 5,
+            "action": "click",
+            "target": {
+                "criteria": [],
+                "computedNameContains": "Submit review"
+            },
+            "wait_after": {
+                "condition": "elementExists",
+                "value": "reviewed",
+                "timeout": 10
+            },
+            "on_failure": "skip",
+            "note": "Submit the review"
+        }
+    ],
+    "on_failure": "stop"
+}