Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions Cotabby.xcodeproj/project.pbxproj

Large diffs are not rendered by default.

33 changes: 29 additions & 4 deletions Cotabby/Services/Focus/AXTextGeometryResolver.swift
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,21 @@ struct AXTextGeometryResolver {
/// Finds the best caret anchor available, preferring bounds-for-range and falling back to element frame.
/// `cocoaAnchorFrame` is the element's AXFrame already converted to Cocoa coordinates — it serves
/// as the ground-truth reference for detecting whether text-range rects need pixel-to-point scaling.
/// Throttle window for the Branch 2.5 static-text-run walk, matching the deep-walk interval:
/// short enough that caret geometry trails fast typing by at most one window, long enough to
/// keep a ~300-node AX walk off every poll tick in Gmail-class hosts.
private static let staticRunWalkThrottleInterval: TimeInterval = 0.1

func resolveCaretRect(
for element: AXUIElement,
selection: NSRange,
supportsBoundsForRange: Bool,
supportsFrame: Bool,
cocoaAnchorFrame: CGRect?,
textValue: String? = nil,
textSelection: NSRange? = nil
textSelection: NSRange? = nil,
staticRunThrottle: StaticTextRunWalkThrottle? = nil,
focusChangeSequence: UInt64 = 0
) -> CaretGeometryResult? {
let selectionInTextValue = textSelection ?? selection

Expand Down Expand Up @@ -141,7 +148,9 @@ struct AXTextGeometryResolver {
if let result = resolveCaretFromChildTextRuns(
element: element,
parentSelection: selectionInTextValue,
parentText: parentText
parentText: parentText,
staticRunThrottle: staticRunThrottle,
focusChangeSequence: focusChangeSequence
) {
return result
}
Expand Down Expand Up @@ -211,14 +220,30 @@ struct AXTextGeometryResolver {
private func resolveCaretFromChildTextRuns(
element: AXUIElement,
parentSelection: NSRange,
parentText: String
parentText: String,
staticRunThrottle: StaticTextRunWalkThrottle? = nil,
focusChangeSequence: UInt64 = 0
) -> CaretGeometryResult? {
let parentTextLength = (parentText as NSString).length
guard parentSelection.location <= parentTextLength else {
return nil
}

let textRuns = collectStaticTextRuns(from: element)
// With a throttle, the expensive node walk is reused within the window while the
// caret-placement math below still reruns against the live text and selection, so the
// caret keeps tracking keystrokes inside slightly stale run frames. Deep-walk leaf calls
// pass no throttle: they are already bounded by `DeepGeometryWalkThrottle` upstream.
let textRuns: [(text: String, frame: CGRect)]
if let staticRunThrottle {
textRuns = staticRunThrottle.runs(
focusChangeSequence: focusChangeSequence,
interval: Self.staticRunWalkThrottleInterval
) {
collectStaticTextRuns(from: element)
}
} else {
textRuns = collectStaticTextRuns(from: element)
}

guard !textRuns.isEmpty else { return nil }

Expand Down
45 changes: 45 additions & 0 deletions Cotabby/Services/Focus/FocusSessionScopedCache.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import Foundation

/// Caches per-element AX reads that cannot change while focus stays in one field, keyed by
/// `FocusTracker`'s `focusChangeSequence` plus an element key.
///
/// The focus resolver re-reads several invariant attributes (secure-field markers, terminal DOM
/// classes) on every poll tick; each read is a synchronous cross-process Accessibility round trip.
/// Scoping the cache to the focus-change sequence is what makes it safe: `elementIdentifier` is
/// CFHash-based and collides across recycled AX nodes, so an identity-only cache could serve a
/// stale verdict (for example "not secure") to a different field after a focus switch. A changed
/// sequence is a real field switch and drops everything.
///
/// A reference type so it can carry state across the value-typed `FocusSnapshotResolver`'s
/// non-mutating `resolveSnapshot`, mirroring `DeepGeometryWalkThrottle` and `FieldStyleCache`.
@MainActor
final class FocusSessionScopedCache<Value> {
private var sequence: UInt64?
private var values: [String: Value] = [:]

// A `@MainActor` class with stored properties takes the isolated-deinit back-deploy path on
// dealloc, which over-releases and aborts app-hosted test runs; releasing value types needs
// no main-actor hop. Same workaround as `EmojiUsageStore` and `SystemMetricsStore`.
nonisolated deinit {}

/// Returns the cached value for `key` within the current focus session, computing and storing
/// it on first use. Entry count is bounded by the handful of candidates inspected per session.
func value(
forKey key: String,
focusChangeSequence: UInt64,
compute: () -> Value
) -> Value {
if sequence != focusChangeSequence {
sequence = focusChangeSequence
values.removeAll(keepingCapacity: true)
}

if let cached = values[key] {
return cached
}

let value = compute()
values[key] = value
return value
}
}
182 changes: 139 additions & 43 deletions Cotabby/Services/Focus/FocusSnapshotResolver.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@ struct FocusSnapshotResolver {
/// Carries deep-walk throttle state across the value-typed resolver's non-mutating polls.
private let deepWalkThrottle = DeepGeometryWalkThrottle()

/// Same lifetime trick for the Branch 2.5 static-text-run walk: collected run frames are
/// reused across polls of one field instead of re-walking up to ~300 nodes per tick.
private let staticRunWalkThrottle = StaticTextRunWalkThrottle()

/// Session-scoped caches for AX reads that are invariant while focus stays in one field.
/// Secure-field verdicts gate whether Cotabby operates at all, so they are scoped to the
/// focus-change sequence rather than raw element identity, which CFHash can recycle across
/// fields (see `FocusSessionScopedCache`).
private let secureFieldVerdictCache = FocusSessionScopedCache<Bool>()
private let terminalDetectionCache = FocusSessionScopedCache<Bool>()

/// Caches the resolved field font/color per focused element so the attributed-string AX read
/// happens once per field rather than on every poll. Reference type for the same reason as
/// `deepWalkThrottle`: it carries state across the value-typed resolver's non-mutating polls.
Expand Down Expand Up @@ -72,9 +83,14 @@ struct FocusSnapshotResolver {
let deepDescendants = BrowserAppDetector.needsWebAccessibilityPriming(
bundleIdentifier: bundleIdentifier)
let candidateResolution = resolveCandidate(
around: focusedElement,
around: FocusedElementReading(
element: focusedElement,
role: focusedRole,
subrole: focusedSubrole
),
bundleIdentifier: bundleIdentifier,
deepDescendants: deepDescendants
deepDescendants: deepDescendants,
focusChangeSequence: focusChangeSequence
)
let resolution = candidateResolution.resolution
let diagnosticCandidate = candidateResolution.diagnosticCandidate
Expand Down Expand Up @@ -221,11 +237,18 @@ struct FocusSnapshotResolver {
// terminal while leaving the editor and chat working. Read on the focused element because
// that is exactly where xterm puts the caret (`xterm-helper-textarea`). Computed here — only
// once a real editable field has resolved — so idle/non-editable focus polls don't pay for an
// extra AXDOMClassList round-trip; native apps don't vend the attribute anyway.
let isIntegratedTerminal = TerminalAppDetector.isIntegratedTerminal(
domClassList: AXHelper.stringArrayValue(
for: "AXDOMClassList" as CFString, on: focusedElement) ?? []
)
// extra AXDOMClassList round-trip; native apps don't vend the attribute anyway. Cached per
// focus session because the class list on one focused element cannot change without a field
// switch bumping the sequence, which previously cost one round-trip on every poll tick.
let isIntegratedTerminal = terminalDetectionCache.value(
forKey: focusedElementIdentifier,
focusChangeSequence: focusChangeSequence
) {
TerminalAppDetector.isIntegratedTerminal(
domClassList: AXHelper.stringArrayValue(
for: "AXDOMClassList" as CFString, on: focusedElement) ?? []
)
}
// Web-vs-native classification for the caret-geometry trust policy. The DOM-attribute
// signal was computed in `candidateSnapshot` from the attribute list it already fetched,
// so this adds no AX round-trip to the focus poll.
Expand Down Expand Up @@ -294,32 +317,71 @@ struct FocusSnapshotResolver {
/// reading text/selection/caret data from many wrapper and static-text nodes even after the real
/// input target had already been discovered. This preserves the resolver's "first full
/// capability wins" policy while avoiding unnecessary synchronous AX IPC.
///
/// Candidate enumeration is staged the same way: the bounded descendant BFS used for Chromium
/// wrappers costs hundreds of additional AX round trips per pass, and a shallow candidate
/// (focused node, ancestors, their children) wins in the common case — including Chromium
/// hosts that focus the editable directly — so the BFS runs only when no shallow candidate
/// resolves with full capabilities. Evaluation order is unchanged: shallow candidates always
/// preceded BFS appends, so any shallow winner made the BFS results unreachable anyway.
private func resolveCandidate(
around focusedElement: AXUIElement,
around focusedReading: FocusedElementReading,
bundleIdentifier: String,
deepDescendants: Bool
deepDescendants: Bool,
focusChangeSequence: UInt64
) -> FocusCandidateResolution {
var bestPartial: (candidate: AXFocusCandidate, evaluation: FocusCapabilityCandidateEvaluation)?
var inspectedCount = 0

for element in candidateElements(around: focusedElement, deepDescendants: deepDescendants) {
inspectedCount += 1
let candidate = candidateSnapshot(for: element, bundleIdentifier: bundleIdentifier)
let evaluation = FocusCapabilityResolver.evaluate(candidate.resolverCandidate)

if evaluation.hasFullCapabilities {
return FocusCandidateResolution(
resolvedCandidate: candidate,
diagnosticCandidate: candidate,
resolution: FocusCapabilityResolution(
selectedEvaluation: evaluation,
inspectedCandidateCount: inspectedCount
)
func winner(in elements: [AXUIElement]) -> FocusCandidateResolution? {
for element in elements {
inspectedCount += 1
let candidate = candidateSnapshot(
for: element,
bundleIdentifier: bundleIdentifier,
focusChangeSequence: focusChangeSequence,
focusedReading: focusedReading
)
let evaluation = FocusCapabilityResolver.evaluate(candidate.resolverCandidate)

if evaluation.hasFullCapabilities {
return FocusCandidateResolution(
resolvedCandidate: candidate,
diagnosticCandidate: candidate,
resolution: FocusCapabilityResolution(
selectedEvaluation: evaluation,
inspectedCandidateCount: inspectedCount
)
)
}

if bestPartial == nil || evaluation.score > bestPartial!.evaluation.score {
bestPartial = (candidate, evaluation)
}
}

if bestPartial == nil || evaluation.score > bestPartial!.evaluation.score {
bestPartial = (candidate, evaluation)
return nil
}

var seen = Set<String>()
let shallow = shallowCandidateElements(around: focusedReading.element, seen: &seen)
if let resolved = winner(in: shallow.ordered) {
return resolved
}

if deepDescendants {
var deepCandidates: [AXUIElement] = []
appendEditableDescendants(of: [focusedReading.element] + shallow.ancestors) { element in
guard let element else {
return
}
guard seen.insert(AXHelper.elementIdentity(for: element)).inserted else {
return
}
deepCandidates.append(element)
}
if let resolved = winner(in: deepCandidates) {
return resolved
}
}

Expand Down Expand Up @@ -364,11 +426,15 @@ struct FocusSnapshotResolver {
)
}

private func candidateElements(
around focusedElement: AXUIElement, deepDescendants: Bool = false
) -> [AXUIElement] {
/// Enumerates the cheap nearby candidates: the focused node, up to two ancestors, and their
/// children. The Chromium descendant BFS is intentionally not part of this list — see
/// `resolveCandidate` for the staging rationale (Chromium reports focus on a wrapper above the
/// editable, AXWebArea → AXGroup → … → AXTextField, so the BFS exists as the fallback for the
/// cases where this shallow neighborhood misses the real target).
private func shallowCandidateElements(
around focusedElement: AXUIElement, seen: inout Set<String>
) -> (ordered: [AXUIElement], ancestors: [AXUIElement]) {
var ordered: [AXUIElement] = []
var seen = Set<String>()

func append(_ element: AXUIElement?) {
guard let element else {
Expand Down Expand Up @@ -410,15 +476,7 @@ struct FocusSnapshotResolver {
}
}

// Chromium reports focus on a wrapper above the editable (AXWebArea → AXGroup → … →
// AXTextField), so the shallow walk above can miss the real target. Search descendants for
// editable-looking nodes, bounded in depth and count and appending only likely editables
// (not every visited node) so per-tick candidateSnapshot cost stays in check.
if deepDescendants {
appendEditableDescendants(of: [focusedElement] + ancestors, append: append)
}

return ordered
return (ordered, ancestors)
}

/// Bounded BFS for editable-looking descendants, used only for Chromium/Electron. Traverses up
Expand Down Expand Up @@ -604,10 +662,24 @@ struct FocusSnapshotResolver {
}

/// Extracts the AX properties Cotabby needs from one candidate element near the current focus.
private func candidateSnapshot(for element: AXUIElement, bundleIdentifier: String)
-> AXFocusCandidate {
let role = AXHelper.stringValue(for: kAXRoleAttribute as CFString, on: element) ?? "Unknown"
let subrole = AXHelper.stringValue(for: kAXSubroleAttribute as CFString, on: element)
private func candidateSnapshot(
for element: AXUIElement,
bundleIdentifier: String,
focusChangeSequence: UInt64,
focusedReading: FocusedElementReading
) -> AXFocusCandidate {
// `resolveSnapshot` already read the focused element's role pair for diagnostics, and the
// focused element is the winning candidate in the common case; re-reading would repeat two
// AX round trips on every poll tick. `CFEqual` is a local comparison, not an IPC.
let role: String
let subrole: String?
if CFEqual(element, focusedReading.element) {
role = focusedReading.role
subrole = focusedReading.subrole
} else {
role = AXHelper.stringValue(for: kAXRoleAttribute as CFString, on: element) ?? "Unknown"
subrole = AXHelper.stringValue(for: kAXSubroleAttribute as CFString, on: element)
}
let supportedAttributes = Set(AXHelper.attributeNames(on: element))
let supportedParameterizedAttributes = Set(
AXHelper.parameterizedAttributeNames(on: element))
Expand Down Expand Up @@ -712,17 +784,33 @@ struct FocusSnapshotResolver {
supportsFrame: supportedAttributes.contains("AXFrame"),
cocoaAnchorFrame: inputFrameRect,
textValue: textValue,
textSelection: selection
textSelection: selection,
// The run-walk throttle slot is shared across calls, so it is restricted to the
// focused element: that is the per-tick steady-state caller, and scoping prevents
// one slot from serving run frames collected under a different root element.
staticRunThrottle: CFEqual(element, focusedReading.element)
? staticRunWalkThrottle
: nil,
focusChangeSequence: focusChangeSequence
)
}
let caretRect = caretResult?.rect
let caretQuality = caretResult?.quality
let isSecure = isSecureElement(element: element, role: role, subrole: subrole)
// Recorded from the already-fetched attribute list (no extra AX call) so snapshot
// assembly can classify the field as web-rendered without touching the element again.
let vendsDOMAttributes = WebContentFieldDetector.vendsDOMAttributes(supportedAttributes)
let elementIdentifier = AXHelper.elementIdentifier(
for: element, bundleIdentifier: bundleIdentifier)
// Secure-ness is invariant for an element's lifetime, and the three marker probes behind
// it (role description, title, description) are separate AX round trips otherwise paid on
// every poll tick. Session scoping keeps recycled element identities from ever serving a
// stale verdict to a different field.
let isSecure = secureFieldVerdictCache.value(
forKey: elementIdentifier,
focusChangeSequence: focusChangeSequence
) {
isSecureElement(element: element, role: role, subrole: subrole)
}
let resolverCandidate = FocusCapabilityCandidate(
elementIdentifier: elementIdentifier,
role: role,
Expand Down Expand Up @@ -864,6 +952,14 @@ private struct FocusCandidateResolution {
let resolution: FocusCapabilityResolution
}

/// The focused element together with its already-read role pair, so candidate snapshotting can
/// reuse the reads `resolveSnapshot` performed for diagnostics instead of repeating the IPC.
private struct FocusedElementReading {
let element: AXUIElement
let role: String
let subrole: String?
}

private struct AXTextSelection {
let text: String
let selection: NSRange
Expand Down
Loading