From 84b3a6088bf6074cdc046c24eb9108dfa16ec02d Mon Sep 17 00:00:00 2001 From: Allen Date: Fri, 13 Mar 2026 09:47:16 +0800 Subject: [PATCH 1/2] =?UTF-8?q?Linkedin=20=E8=AF=84=E8=AE=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Sources/GhostOS/Recipes/RecipeEngine.swift | 74 +++++++++++++++++++++- recipes/linkedin-submit-comment.json | 28 ++++++++ 2 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 recipes/linkedin-submit-comment.json diff --git a/Sources/GhostOS/Recipes/RecipeEngine.swift b/Sources/GhostOS/Recipes/RecipeEngine.swift index 59e692a..d587fd5 100644 --- a/Sources/GhostOS/Recipes/RecipeEngine.swift +++ b/Sources/GhostOS/Recipes/RecipeEngine.swift @@ -273,6 +273,42 @@ public enum RecipeEngine { count: params["count"].flatMap(Int.init) ) + case "vision_click": + guard let description = params["description"] ?? params["query"] ?? params["target"] else { + return ToolResult( + success: false, + error: "Step \(step.id): 'vision_click' action requires 'description' param" + ) + } + + let cropBox = parseCropBox(params["crop_box"], appName: stepApp) + let grounded = VisionPerception.groundElement( + description: description, + appName: stepApp, + cropBox: cropBox + ) + guard grounded.success, + let x = grounded.data?["x"] as? Double, + let y = grounded.data?["y"] as? Double + else { + return ToolResult( + success: false, + error: grounded.error ?? "Step \(step.id): vision grounding failed", + suggestion: grounded.suggestion + ) + } + + return Actions.click( + query: nil, + role: nil, + domId: nil, + appName: stepApp, + x: x, + y: y, + button: params["button"], + count: params["count"].flatMap(Int.init) + ) + case "type": guard let text = params["text"] else { return ToolResult(success: false, error: "Step \(step.id): 'type' action requires 'text' param") @@ -374,11 +410,47 @@ public enum RecipeEngine { return ToolResult( success: false, error: "Unknown recipe action: '\(step.action)'", - suggestion: "Valid actions: click, type, press, hotkey, focus, scroll, hover, long_press, drag, wait" + suggestion: "Valid actions: click, vision_click, type, press, hotkey, focus, scroll, hover, long_press, drag, wait" ) } } + private static func parseCropBox(_ rawValue: String?, appName: String?) -> [Double]? { + guard let rawValue else { return nil } + + let numbers = rawValue + .split(separator: ",") + .compactMap { Double($0.trimmingCharacters(in: .whitespaces)) } + + guard numbers.count == 4 else { return nil } + + // Treat normalized values (0...1) as ratios of the focused window frame. + let isNormalized = numbers.allSatisfy { $0 >= 0 && $0 <= 1 } + guard isNormalized else { return numbers } + + guard let appName, + let app = Perception.findApp(named: appName), + let appElement = Element.application(for: app.processIdentifier), + let window = appElement.focusedWindow(), + let pos = window.position(), + let size = window.size() + else { + return nil + } + + let originX = Double(pos.x) + let originY = Double(pos.y) + let width = Double(size.width) + let height = Double(size.height) + + return [ + originX + numbers[0] * width, + originY + numbers[1] * height, + originX + numbers[2] * width, + originY + numbers[3] * height, + ] + } + // MARK: - Wait After /// Handle a step's wait_after condition. diff --git a/recipes/linkedin-submit-comment.json b/recipes/linkedin-submit-comment.json new file mode 100644 index 0000000..fce92d0 --- /dev/null +++ b/recipes/linkedin-submit-comment.json @@ -0,0 +1,28 @@ +{ + "schema_version": 2, + "name": "linkedin-submit-comment", + "description": "Submit an already-open LinkedIn comment draft in Chrome by visually locating the blue 评论 button inside the active comment composer.", + "app": "Google Chrome", + "preconditions": { + "app_running": "Google Chrome", + "url_contains": "linkedin.com" + }, + "steps": [ + { + "id": 1, + "action": "vision_click", + "on_failure": "skip", + "note": "Click the blue 评论 submit button in the open LinkedIn comment composer", + "params": { + "description": "blue 评论 submit button at the bottom right of the open LinkedIn comment box", + "crop_box": "0.68,0.38,0.98,0.86" + }, + "wait_after": { + "condition": "elementGone", + "value": "内容创建文本编辑器", + "timeout": 2 + } + } + ], + "on_failure": "stop" +} From f4a8ed9e2e43e13ef1638cee292f85fb6b7249eb Mon Sep 17 00:00:00 2001 From: Allen Date: Fri, 13 Mar 2026 09:52:24 +0800 Subject: [PATCH 2/2] Fix learning recorder review issues --- Sources/GhostOS/Learning/EventHandlers.swift | 5 +- .../GhostOS/Learning/LearningRecorder.swift | 63 ++++++++++++++----- 2 files changed, 53 insertions(+), 15 deletions(-) diff --git a/Sources/GhostOS/Learning/EventHandlers.swift b/Sources/GhostOS/Learning/EventHandlers.swift index 57e5984..c89ef5f 100644 --- a/Sources/GhostOS/Learning/EventHandlers.swift +++ b/Sources/GhostOS/Learning/EventHandlers.swift @@ -34,10 +34,13 @@ nonisolated enum EventHandlers { if flags.contains(.maskShift) { mods.append("shift") } if flags.contains(.maskAlternate) { mods.append("option") } if flags.contains(.maskControl) { mods.append("control") } + let hasShortcutModifier = flags.contains(.maskCommand) + || flags.contains(.maskAlternate) + || flags.contains(.maskControl) let chars = keyChars(from: event) - if let chars, mods.isEmpty { + if let chars, !hasShortcutModifier { // Coalesce character keys into pending buffer recorder.withLock { session in guard session != nil else { return } diff --git a/Sources/GhostOS/Learning/LearningRecorder.swift b/Sources/GhostOS/Learning/LearningRecorder.swift index ed76e80..b2112c7 100644 --- a/Sources/GhostOS/Learning/LearningRecorder.swift +++ b/Sources/GhostOS/Learning/LearningRecorder.swift @@ -23,6 +23,7 @@ nonisolated public final class LearningRecorder: @unchecked Sendable { private var eventTap: CFMachPort? private var learningRunLoop: CFRunLoop? private var learningThread: Thread? + private var startupCancelled = false // Keystroke coalescing -- only access within withLock or flushPending* (caller holds lock) internal var pendingKeystrokes: String = "" @@ -64,27 +65,37 @@ nonisolated public final class LearningRecorder: @unchecked Sendable { if session != nil { os_unfair_lock_unlock(&lock); return .alreadyRecording } session = LearningSession(taskDescription: taskDescription) lastRecordedAppName = "" + startupCancelled = false os_unfair_lock_unlock(&lock) - let thread = Thread { [weak self] in self?.runLearningThread() } + let startupSignal = DispatchSemaphore(value: 0) + let thread = Thread { [weak self] in self?.runLearningThread(startupSignal: startupSignal) } thread.name = "ghost-learning" thread.qualityOfService = .userInteractive learningThread = thread thread.start() - // Busy-wait up to 500ms for tap creation - for _ in 0..<50 { - Thread.sleep(forTimeInterval: 0.01) - os_unfair_lock_lock(&lock) - let ready = eventTap != nil - os_unfair_lock_unlock(&lock) - if ready { learningLog("INFO", "Learning: recording started"); return nil } - } + let signaled = startupSignal.wait(timeout: .now() + .milliseconds(500)) == .success os_unfair_lock_lock(&lock) - let failed = eventTap == nil - if failed { session = nil } + let failed = !signaled || eventTap == nil + if failed { + session = nil + startupCancelled = true + } os_unfair_lock_unlock(&lock) + + if failed { + for _ in 0..<50 { + if learningThread?.isFinished == true { break } + Thread.sleep(forTimeInterval: 0.01) + } + if learningThread?.isFinished == true { + learningThread = nil + learningRunLoop = nil + } + } + if !failed { learningLog("INFO", "Learning: recording started") } return failed ? .inputMonitoringNotGranted : nil } @@ -97,6 +108,7 @@ nonisolated public final class LearningRecorder: @unchecked Sendable { let actions = cur.actions let result = cur session = nil + startupCancelled = false os_unfair_lock_unlock(&lock) if let rl = learningRunLoop { CFRunLoopStop(rl) } @@ -120,7 +132,15 @@ nonisolated public final class LearningRecorder: @unchecked Sendable { // MARK: - Background Thread - private func runLearningThread() { + private func runLearningThread(startupSignal: DispatchSemaphore) { + os_unfair_lock_lock(&lock) + let cancelledBeforeStart = startupCancelled || session == nil + os_unfair_lock_unlock(&lock) + if cancelledBeforeStart { + startupSignal.signal() + return + } + var mask: CGEventMask = 0 for t: CGEventType in [.keyDown, .leftMouseDown, .leftMouseUp, .rightMouseDown, .rightMouseUp, .scrollWheel] { mask |= (1 << t.rawValue) @@ -131,13 +151,25 @@ nonisolated public final class LearningRecorder: @unchecked Sendable { eventsOfInterest: mask, callback: learningEventCallback, userInfo: userInfo ) else { learningLog("ERROR", "Learning: CGEvent tap creation failed (Input Monitoring not granted?)") - os_unfair_lock_lock(&lock); session = nil; os_unfair_lock_unlock(&lock) + os_unfair_lock_lock(&lock) + session = nil + startupCancelled = true + os_unfair_lock_unlock(&lock) + startupSignal.signal() return } let source = CFMachPortCreateRunLoopSource(nil, tap, 0) let rl = CFRunLoopGetCurrent()! - os_unfair_lock_lock(&lock); eventTap = tap; learningRunLoop = rl; os_unfair_lock_unlock(&lock) + os_unfair_lock_lock(&lock) + let cancelledAfterCreate = startupCancelled || session == nil + if !cancelledAfterCreate { + eventTap = tap + learningRunLoop = rl + } + os_unfair_lock_unlock(&lock) + startupSignal.signal() + if cancelledAfterCreate { return } CFRunLoopAddSource(rl, source, .commonModes) CGEvent.tapEnable(tap: tap, enable: true) @@ -160,6 +192,9 @@ nonisolated public final class LearningRecorder: @unchecked Sendable { CFRunLoopRemoveSource(rl, source, .commonModes) os_unfair_lock_lock(&lock) eventTap = nil + learningRunLoop = nil + learningThread = nil + startupCancelled = false invalidateTimer(&keystrokeFlushTimer) invalidateTimer(&scrollFlushTimer) invalidateTimer(&maxDurationTimer)