Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 36 additions & 4 deletions Sources/GhostOS/Perception/Perception.swift
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,32 @@ public enum Perception {
)
}

// Strategy 2: AXorcist's search with ElementSearchOptions
// Strategy 2 (CDP-First): For Chrome/Electron apps, try CDP BEFORE
// the expensive AX tree walk. CDP queries the real DOM via JavaScript
// and returns in ~50ms vs ~11s for a full AX tree walk on web apps.
//
// Routing: browser app → CDP first → AX fallback
// native app → AX first → CDP fallback (existing behavior)
if CDPBridge.isBrowserApp(appName), let query, identifier == nil {
if let cdpResults = cdpFallbackFind(query: query, appName: appName) {
Log.info("CDP-First: found \(cdpResults.count) elements for '\(query)' (skipped AX tree walk)")
return ToolResult(
success: true,
data: [
"elements": cdpResults,
"count": cdpResults.count,
"total_matches": cdpResults.count,
"source": "cdp-first",
],
suggestion: "Elements found via Chrome DevTools Protocol (CDP-First path). " +
"Use ghost_click with the x/y coordinates shown in the position field."
)
}
// CDP miss — fall through to AX tree walk
Log.debug("CDP-First: no results for '\(query)', falling through to AX tree")
}

// Strategy 3: AXorcist's search with ElementSearchOptions
var options = ElementSearchOptions()
options.maxDepth = maxDepth
options.caseInsensitive = true
Expand All @@ -162,9 +187,10 @@ public enum Perception {
results = semanticDepthSearch(query: query, role: role, in: searchRoot, maxDepth: maxDepth)
}

// CDP fallback: if AX search found nothing and we're in Chrome/Electron,
// try Chrome DevTools Protocol for instant DOM-based element finding.
if results.isEmpty, let query {
// CDP fallback (for native apps): if AX search found nothing,
// try Chrome DevTools Protocol as last resort before vision.
// For browser apps this was already tried above (CDP-First path).
if results.isEmpty, let query, !CDPBridge.isBrowserApp(appName) {
if let cdpResults = cdpFallbackFind(query: query, appName: appName) {
return ToolResult(
success: true,
Expand Down Expand Up @@ -779,6 +805,12 @@ public enum Perception {

/// Get Chrome window origin for coordinate conversion.
private static func chromeWindowOrigin(appName: String?) -> (x: Double, y: Double) {
return Self.chromeWindowOriginPublic(appName: appName)
}

/// Public accessor for Chrome window origin. Used by VisionPerception for
/// CDP structured snapshots that need viewport-to-screen coordinate mapping.
public static func chromeWindowOriginPublic(appName: String?) -> (x: Double, y: Double) {
let name = appName ?? "Chrome"
guard let app = findApp(named: name),
let appElement = Element.application(for: app.processIdentifier),
Expand Down
126 changes: 119 additions & 7 deletions Sources/GhostOS/Recipes/RecipeEngine.swift
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,34 @@ public enum RecipeEngine {
stepResults.append(stepResult)

if !result.success {
let failurePolicy = step.onFailure ?? globalFailurePolicy

if failurePolicy == "skip" {
Log.info("Recipe '\(recipe.name)' step \(step.id) failed (skipping): \(result.error ?? "")")
continue
}
// AUTO-HEAL: Before giving up, try CDP-based element finding for
// click/type actions in browser apps. This handles the common case
// where a web app updated its DOM IDs but the element text is the same.
if let _ = attemptAutoHeal(
step: step, resolvedParams: resolvedParams, appName: recipe.app
) {
// Auto-heal succeeded — update the step result
let healDuration = Int(Date().timeIntervalSince(stepStart) * 1000)
stepResults[stepResults.count - 1] = RecipeStepResult(
stepId: step.id,
action: step.action,
success: true,
durationMs: healDuration,
error: nil,
note: (step.note ?? step.action) + " [auto-healed via CDP]"
)
Log.info("Recipe '\(recipe.name)' step \(step.id) auto-healed via CDP")
// Continue to wait_after handling below (don't return)
} else {
// Auto-heal failed — apply normal failure policy
let failurePolicy = step.onFailure ?? globalFailurePolicy

if failurePolicy == "skip" {
Log.info("Recipe '\(recipe.name)' step \(step.id) failed (skipping): \(result.error ?? "")")
continue
}

// Stop: return failure with diagnostics
// Stop: return failure with diagnostics
let totalDuration = Int(Date().timeIntervalSince(startTime) * 1000)

// Capture failure context
Expand Down Expand Up @@ -126,6 +146,7 @@ public enum RecipeEngine {
error: "Recipe '\(recipe.name)' failed at step \(step.id) (\(step.note ?? step.action)): \(result.error ?? "")",
suggestion: "Check the current_context and failed_step details. Use ghost_screenshot for visual debugging."
)
} // end else (auto-heal failed)
}

// Handle wait_after condition (substitute {{params}} in value)
Expand Down Expand Up @@ -246,6 +267,97 @@ public enum RecipeEngine {
)
}

// MARK: - Auto-Heal

/// Attempt to recover a failed recipe step using CDP element finding.
///
/// When a click/type step fails (usually because a DOM ID changed after a
/// web app update), this function tries to find the target element via CDP
/// using the step's computedNameContains text. If found, it re-executes the
/// action with the CDP-found coordinates.
///
/// Only applies to click/type/hover actions in browser apps with CDP available.
/// Returns nil if auto-heal is not applicable or fails.
private static func attemptAutoHeal(
step: RecipeStep,
resolvedParams: [String: String]?,
appName: String?
) -> ToolResult? {
// Only auto-heal click/type/hover actions
guard ["click", "type", "hover"].contains(step.action) else { return nil }

// Only for browser apps with CDP
guard CDPBridge.isBrowserApp(appName), CDPBridge.isAvailable() else { return nil }

// Need a text query to search for
let query = step.target?.computedNameContains
?? resolvedParams?["query"]
?? resolvedParams?["into"]
?? resolvedParams?["target"]
guard let query, !query.isEmpty else { return nil }

Log.info("Auto-heal: trying CDP for '\(query)' (step \(step.id), action: \(step.action))")

// Try to find the element via CDP
guard let cdpElements = CDPBridge.findElements(query: query),
let first = cdpElements.first
else {
Log.info("Auto-heal: CDP found no matches for '\(query)'")
return nil
}

// Get screen coordinates
let viewportX = first["centerX"] as? Int ?? 0
let viewportY = first["centerY"] as? Int ?? 0
let windowOrigin = Perception.chromeWindowOriginPublic(appName: appName)
let screen = CDPBridge.viewportToScreen(
viewportX: Double(viewportX), viewportY: Double(viewportY),
windowX: windowOrigin.x, windowY: windowOrigin.y
)

// Re-execute the action with CDP coordinates
switch step.action {
case "click":
let result = Actions.click(
query: nil, role: nil, domId: nil,
appName: appName,
x: screen.x, y: screen.y,
button: resolvedParams?["button"],
count: resolvedParams?["count"].flatMap(Int.init)
)
return result.success ? result : nil

case "type":
// Click the field first, then type
let clickResult = Actions.click(
query: nil, role: nil, domId: nil,
appName: appName,
x: screen.x, y: screen.y,
button: nil, count: nil
)
guard clickResult.success else { return nil }

if let text = resolvedParams?["text"] {
let clear = resolvedParams?["clear"] == "true"
return Actions.typeText(
text: text, into: nil, domId: nil,
appName: appName, clear: clear
)
}
return clickResult

case "hover":
return Actions.hover(
query: nil, role: nil, domId: nil,
appName: appName,
x: screen.x, y: screen.y
)

default:
return nil
}
}

// MARK: - Step Execution

/// Execute a single recipe step by dispatching to the appropriate action.
Expand Down
Loading