Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Sources/ScreenTextKit/Capture/DaemonRunner.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ public final class DaemonRunner {
}

public func run() -> Never {
// FIX: Register with WindowServer for proper screen capture
let app = NSApplication.shared
app.setActivationPolicy(.accessory)

observer = NSWorkspace.shared.notificationCenter.addObserver(
forName: NSWorkspace.didActivateApplicationNotification,
object: nil,
Expand Down Expand Up @@ -55,7 +59,7 @@ public final class DaemonRunner {
capture(trigger: .manual)
logger.info("Daemon started")

RunLoop.main.run()
app.run()
fatalError("Run loop exited unexpectedly")
}

Expand Down
4 changes: 2 additions & 2 deletions Sources/ScreenTextKit/Capture/FrameBufferStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ public final class FrameBufferStore {
paths: ScreenTextPaths,
retentionSeconds: Int,
maxFrames: Int,
maxDimension: Int = 1280,
jpegQuality: Double = 0.45
maxDimension: Int = 2560,
jpegQuality: Double = 0.85
) {
self.paths = paths
self.retentionSeconds = retentionSeconds
Expand Down
26 changes: 16 additions & 10 deletions Sources/ScreenTextKit/Capture/NativeTextExtractor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,25 @@ public final class NativeTextExtractor: TextExtractor {
public func extract() throws -> ExtractedText? {
let metadata = metadataProvider.currentMetadata()

if !forceOCR {
if let accessibilityText = accessibilityExtractor.extractText(),
accessibilityText.count >= minimumAccessibilityChars {
return ExtractedText(text: accessibilityText, source: .accessibility, metadata: metadata)
}
}
let accessibilityText = accessibilityExtractor.extractText()
let hasGoodAccessibility = (accessibilityText?.count ?? 0) >= minimumAccessibilityChars

guard ocrEnabled else {
return nil
// Always run OCR too (if enabled) and keep the longer result
var ocrText: String? = nil
if ocrEnabled {
ocrText = try ocrExtractor.extractText()
}

if let ocrText = try ocrExtractor.extractText(), !ocrText.isEmpty {
return ExtractedText(text: ocrText, source: .ocr, metadata: metadata)
let accLen = accessibilityText?.count ?? 0
let ocrLen = ocrText?.count ?? 0

// Return whichever extracted more text
if ocrLen > accLen && ocrLen > 0 {
return ExtractedText(text: ocrText!, source: .ocr, metadata: metadata)
} else if accLen > 0 && hasGoodAccessibility {
return ExtractedText(text: accessibilityText!, source: .accessibility, metadata: metadata)
} else if ocrLen > 0 {
return ExtractedText(text: ocrText!, source: .ocr, metadata: metadata)
}

return nil
Expand Down
33 changes: 31 additions & 2 deletions Sources/ScreenTextKit/Capture/OCRTextExtractor.swift
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import CoreGraphics
import CoreImage
import Foundation
import ImageIO
import Vision
Expand All @@ -7,7 +8,7 @@ public final class OCRTextExtractor {
private let minimumTextHeight: Float
private let recognitionLevel: VNRequestTextRecognitionLevel

public init(minimumTextHeight: Float = 0.005, recognitionLevel: VNRequestTextRecognitionLevel = .accurate) {
public init(minimumTextHeight: Float = 0.002, recognitionLevel: VNRequestTextRecognitionLevel = .accurate) {
self.minimumTextHeight = minimumTextHeight
self.recognitionLevel = recognitionLevel
}
Expand All @@ -17,7 +18,24 @@ public final class OCRTextExtractor {
return nil
}

return try extractText(from: image)
// Run OCR on both original and inverted image, keep the one with more text.
// Dark UIs (WhatsApp, Slack, etc.) yield much more text when colors are inverted.
let originalText = try extractText(from: image)
let invertedText: String?
if let inverted = invertColors(image) {
invertedText = try extractText(from: inverted)
} else {
invertedText = nil
}

let orig = originalText ?? ""
let inv = invertedText ?? ""

if orig.isEmpty && inv.isEmpty {
return nil
}

return inv.count > orig.count ? inv : orig
}

public func extractText(fromImageURL imageURL: URL) throws -> String? {
Expand Down Expand Up @@ -54,4 +72,15 @@ public final class OCRTextExtractor {

return lines.joined(separator: "\n")
}

/// Invert image colors using CoreImage — turns dark UIs light for better OCR
private func invertColors(_ image: CGImage) -> CGImage? {
let ciImage = CIImage(cgImage: image)
guard let filter = CIFilter(name: "CIColorInvert") else { return nil }
filter.setValue(ciImage, forKey: kCIInputImageKey)
guard let output = filter.outputImage else { return nil }

let context = CIContext(options: [.useSoftwareRenderer: false])
return context.createCGImage(output, from: output.extent)
}
}
Binary file added assets/AppIcon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.