diff --git a/Sources/ScreenTextKit/Capture/DaemonRunner.swift b/Sources/ScreenTextKit/Capture/DaemonRunner.swift index 3be0ee0..a27fa01 100644 --- a/Sources/ScreenTextKit/Capture/DaemonRunner.swift +++ b/Sources/ScreenTextKit/Capture/DaemonRunner.swift @@ -27,6 +27,10 @@ public final class DaemonRunner { } public func run() -> Never { + // FIX: Register with WindowServer for proper screen capture + let app = NSApplication.shared + app.setActivationPolicy(.accessory) + observer = NSWorkspace.shared.notificationCenter.addObserver( forName: NSWorkspace.didActivateApplicationNotification, object: nil, @@ -55,7 +59,7 @@ public final class DaemonRunner { capture(trigger: .manual) logger.info("Daemon started") - RunLoop.main.run() + app.run() fatalError("Run loop exited unexpectedly") } diff --git a/Sources/ScreenTextKit/Capture/FrameBufferStore.swift b/Sources/ScreenTextKit/Capture/FrameBufferStore.swift index f9a6250..b025581 100644 --- a/Sources/ScreenTextKit/Capture/FrameBufferStore.swift +++ b/Sources/ScreenTextKit/Capture/FrameBufferStore.swift @@ -14,8 +14,8 @@ public final class FrameBufferStore { paths: ScreenTextPaths, retentionSeconds: Int, maxFrames: Int, - maxDimension: Int = 1280, - jpegQuality: Double = 0.45 + maxDimension: Int = 2560, + jpegQuality: Double = 0.85 ) { self.paths = paths self.retentionSeconds = retentionSeconds diff --git a/Sources/ScreenTextKit/Capture/NativeTextExtractor.swift b/Sources/ScreenTextKit/Capture/NativeTextExtractor.swift index cc64ee3..462b9c6 100644 --- a/Sources/ScreenTextKit/Capture/NativeTextExtractor.swift +++ b/Sources/ScreenTextKit/Capture/NativeTextExtractor.swift @@ -27,19 +27,25 @@ public final class NativeTextExtractor: TextExtractor { public func extract() throws -> ExtractedText? { let metadata = metadataProvider.currentMetadata() - if !forceOCR { - if let accessibilityText = accessibilityExtractor.extractText(), - accessibilityText.count >= minimumAccessibilityChars { - return ExtractedText(text: accessibilityText, source: .accessibility, metadata: metadata) - } - } + let accessibilityText = accessibilityExtractor.extractText() + let hasGoodAccessibility = (accessibilityText?.count ?? 0) >= minimumAccessibilityChars - guard ocrEnabled else { - return nil + // Always run OCR too (if enabled) and keep the longer result + var ocrText: String? = nil + if ocrEnabled { + ocrText = try ocrExtractor.extractText() } - if let ocrText = try ocrExtractor.extractText(), !ocrText.isEmpty { - return ExtractedText(text: ocrText, source: .ocr, metadata: metadata) + let accLen = accessibilityText?.count ?? 0 + let ocrLen = ocrText?.count ?? 0 + + // Return whichever extracted more text + if ocrLen > accLen && ocrLen > 0 { + return ExtractedText(text: ocrText!, source: .ocr, metadata: metadata) + } else if accLen > 0 && hasGoodAccessibility { + return ExtractedText(text: accessibilityText!, source: .accessibility, metadata: metadata) + } else if ocrLen > 0 { + return ExtractedText(text: ocrText!, source: .ocr, metadata: metadata) } return nil diff --git a/Sources/ScreenTextKit/Capture/OCRTextExtractor.swift b/Sources/ScreenTextKit/Capture/OCRTextExtractor.swift index 3d763e6..25fa5b6 100644 --- a/Sources/ScreenTextKit/Capture/OCRTextExtractor.swift +++ b/Sources/ScreenTextKit/Capture/OCRTextExtractor.swift @@ -1,4 +1,5 @@ import CoreGraphics +import CoreImage import Foundation import ImageIO import Vision @@ -7,7 +8,7 @@ public final class OCRTextExtractor { private let minimumTextHeight: Float private let recognitionLevel: VNRequestTextRecognitionLevel - public init(minimumTextHeight: Float = 0.005, recognitionLevel: VNRequestTextRecognitionLevel = .accurate) { + public init(minimumTextHeight: Float = 0.002, recognitionLevel: VNRequestTextRecognitionLevel = .accurate) { self.minimumTextHeight = minimumTextHeight self.recognitionLevel = recognitionLevel } @@ -17,7 +18,24 @@ public final class OCRTextExtractor { return nil } - return try extractText(from: image) + // Run OCR on both original and inverted image, keep the one with more text. + // Dark UIs (WhatsApp, Slack, etc.) yield much more text when colors are inverted. + let originalText = try extractText(from: image) + let invertedText: String? + if let inverted = invertColors(image) { + invertedText = try extractText(from: inverted) + } else { + invertedText = nil + } + + let orig = originalText ?? "" + let inv = invertedText ?? "" + + if orig.isEmpty && inv.isEmpty { + return nil + } + + return inv.count > orig.count ? inv : orig } public func extractText(fromImageURL imageURL: URL) throws -> String? { @@ -54,4 +72,15 @@ public final class OCRTextExtractor { return lines.joined(separator: "\n") } + + /// Invert image colors using CoreImage — turns dark UIs light for better OCR + private func invertColors(_ image: CGImage) -> CGImage? { + let ciImage = CIImage(cgImage: image) + guard let filter = CIFilter(name: "CIColorInvert") else { return nil } + filter.setValue(ciImage, forKey: kCIInputImageKey) + guard let output = filter.outputImage else { return nil } + + let context = CIContext(options: [.useSoftwareRenderer: false]) + return context.createCGImage(output, from: output.extent) + } } diff --git a/assets/AppIcon.png b/assets/AppIcon.png new file mode 100644 index 0000000..7a16638 Binary files /dev/null and b/assets/AppIcon.png differ