diff --git a/KeyType/Logic/Completion/CompletionController.swift b/KeyType/Logic/Completion/CompletionController.swift index 97c04d5..13fbed6 100644 --- a/KeyType/Logic/Completion/CompletionController.swift +++ b/KeyType/Logic/Completion/CompletionController.swift @@ -17,6 +17,7 @@ import Foundation import LlamaModelRuntime import MacContextCapture import ModelManagement +import ModelProfileGeneration import ModelRuntime import Observation import Personalization @@ -562,10 +563,18 @@ final class CompletionController { // optional side sections are frozen briefly so unrelated history/clipboard/OCR updates do // not rewrite the prompt prefix and destroy KV append reuse mid-burst. let (sideContext, sideContextReused) = promptSideContext(for: promptContext) + // Relevance-filter the frozen history against the *live* beforeCursor so topically-unrelated + // samples (e.g. a bio stored from an earlier session in the same app) are dropped before they + // reach the prompt. This runs at generation time with the current context, not inside the + // 2-second frozen side-context cache, so the judgment always reflects what the user is typing. + let filteredHistory = WritingHistoryFilter.filterByRelevance( + sideContext.previousUserInputs, + beforeCursor: context.beforeCursor + ) let promptResult = KeyTypeModuleGraph.makePromptBuilder().buildPrompt( context: promptContext, customInstructions: settings.promptCustomInstructions(appInstructions: policy.customInstructions), - previousUserInputs: sideContext.previousUserInputs, + previousUserInputs: filteredHistory, pasteboardText: sideContext.pasteboardText, screenText: sideContext.screenText, includeEnvironmentContext: policy.includesEnvironmentContext @@ -577,13 +586,21 @@ final class CompletionController { let healExtraTokens = healSlack > 0 ? 1 : 0 // Completion length is user-configurable (Settings) and maps to the decoder's token/width budget. let length = settings.completionLength + // Clipboard and OCR are background context, not text to reproduce; carry them so the output + // filter can drop a completion that just parrots them verbatim. History is excluded — it is + // already same-app/domain scoped and echoing the user's own recurring phrases is intended. + let injectedContext = Self.injectedContext( + pasteboardText: sideContext.pasteboardText, + screenText: sideContext.screenText + ) let request = CompletionRequest( context: context, prompt: promptResult.prompt, requiredPrefixBytes: requiredPrefixBytes, mode: policy.completionMode, maxCompletionTokens: length.maxCompletionTokens + healExtraTokens, - maxDisplayWidth: length.maxDisplayWidth + healSlack + maxDisplayWidth: length.maxDisplayWidth + healSlack, + injectedContext: injectedContext ) rememberFullPromptDebug( for: request, @@ -591,6 +608,7 @@ final class CompletionController { promptContext: promptContext, tokenHealing: heal.map { FullPromptTokenHealing(head: $0.head, heal: $0.heal) }, sideContext: sideContext, + filteredPreviousUserInputs: filteredHistory, sideContextReused: sideContextReused, policy: policy, completionLength: length, @@ -759,6 +777,7 @@ final class CompletionController { promptContext: TextFieldContext, tokenHealing: FullPromptTokenHealing?, sideContext: FrozenPromptSideContext, + filteredPreviousUserInputs: [String], sideContextReused: Bool, policy: CompletionPolicy, completionLength: CompletionLength, @@ -776,7 +795,7 @@ final class CompletionController { historyEnabled: sideContext.historyEnabled, clipboardEnabled: sideContext.clipboardEnabled, ocrEnabled: sideContext.ocrEnabled, - previousUserInputs: sideContext.previousUserInputs, + previousUserInputs: filteredPreviousUserInputs, pasteboardText: sideContext.pasteboardText, screenText: sideContext.screenText ), @@ -886,11 +905,19 @@ final class CompletionController { return (cached, true) } + // Scope history to the focused app. Cross-app recent samples bleed unrelated content into the + // prompt — e.g. a Notes draft about an API key surfacing as a verbatim suggestion in a fresh + // Gmail message — which the small model tends to parrot. Same-app history still personalizes + // tone/recurring phrases without leaking content across contexts. + // Normalize an empty domain to nil so it can't collapse the same-app filter to `domain == ""` + // and silently drop all real history for the app. + let scopedDomain = context.target.domain.flatMap { $0.isEmpty ? nil : $0 } let query = WritingHistoryQuery( bundleIdentifier: context.target.bundleIdentifier, - domain: context.target.domain, + domain: scopedDomain, typingContext: context.typingContext, - language: context.detectedLanguage + language: context.detectedLanguage, + sameAppOnly: true ) let previousUserInputs = settings.historyEnabled ? history.samples(for: query) @@ -970,6 +997,59 @@ final class CompletionController { case notApplicable } + /// Clipboard + OCR text injected into the prompt, as the echo guard consumes it. History is + /// intentionally excluded (same-app/domain scoped; echoing the user's own phrases is intended). + private static func injectedContext(pasteboardText: String?, screenText: String?) -> [String] { + [pasteboardText, screenText].compactMap { $0 } + } + + /// Re-check the context-dependent suppression nets against the *live* context before re-showing a + /// cached completion. The candidate was filtered once at generation time, but reuse re-shows it + /// without going back through the pipeline, and the inputs those nets key off can change after the + /// fact: + /// - prefix-repetition / suffix-overlap key off `beforeCursor`/`afterCursor`, which grow as the + /// user types through the suggestion — a tail clean at anchor time can become a verbatim + /// repetition (or suffix duplication) of text just typed; + /// - the echo guard keys off injected clipboard/OCR context, which can change mid-burst or differ + /// from when an older reused snapshot was generated. We check it against the currently-frozen + /// side context (already cached, so no hot-path pasteboard read). + /// Returns `true` when the remaining text is still safe to show. + private func reuseRemainingPassesLiveGuards(remaining: String, context: TextFieldContext) -> Bool { + Self.reuseRemainingIsSafe( + remaining: remaining, + context: context, + injectedContext: Self.injectedContext( + pasteboardText: frozenSideContext?.pasteboardText, + screenText: frozenSideContext?.screenText + ) + ) + } + + /// Pure decision behind `reuseRemainingPassesLiveGuards`, factored out so the reuse-safety rules + /// are unit-testable without constructing a controller. `true` when `remaining` is still safe to + /// re-show against the given live context and injected side context. + nonisolated static func reuseRemainingIsSafe( + remaining: String, + context: TextFieldContext, + injectedContext: [String] + ) -> Bool { + guard !remaining.isEmpty else { return true } + if PrefixRepetitionGuard.repeatsPrefix(completion: remaining, beforeCursor: context.beforeCursor) { + return false + } + if SuffixOverlapGuard.duplicatesSuffix( + completion: remaining, + beforeCursor: context.beforeCursor, + afterCursor: context.afterCursor + ) { + return false + } + if ContextEchoGuard.echoesInjectedContext(completion: remaining, injectedContext: injectedContext) { + return false + } + return true + } + @discardableResult private func applyReuseHistoryIfUseful( for live: TextFieldContext, @@ -980,6 +1060,11 @@ final class CompletionController { switch reuseHistory.decision(for: live) { case let .reuse(reuse): + guard reuseRemainingPassesLiveGuards(remaining: reuse.remainingText, context: live) else { + predictionLog.append("REUSE rejected by live guard remaining=\"\(PredictionLog.escape(reuse.remainingText))\"") + clearCompletion() + return .mustRecompute + } anchorText = reuse.anchorText anchorContext = reuse.anchorContext if updateLatestContext { latestContext = live } @@ -1292,6 +1377,10 @@ final class CompletionController { ) -> Bool { switch decision { case let .reuse(reuse): + guard reuseRemainingPassesLiveGuards(remaining: reuse.remainingText, context: optimistic) else { + predictionLog.append("REUSE rejected by live guard remaining=\"\(PredictionLog.escape(reuse.remainingText))\"") + return false + } anchorText = reuse.anchorText anchorContext = reuse.anchorContext latestContext = optimistic @@ -1478,12 +1567,28 @@ final class CompletionController { forFilename: modelFilename, vocabSize: runtime.metadata.vocabularySize ) - let profile = try MmapAutocompleteProfile.open( - at: try ModelContainer.profileURL(family: family), - tokenizerVocabSize: runtime.metadata.vocabularySize, - tokenizerBytes: { try runtime.tokenizer.rawBytes(for: $0) }, - expectedModelFamily: family - ) + let profileURL = try ModelContainer.profileURL(family: family) + func openProfile() throws -> MmapAutocompleteProfile { + try MmapAutocompleteProfile.open( + at: profileURL, + tokenizerVocabSize: runtime.metadata.vocabularySize, + tokenizerBytes: { try runtime.tokenizer.rawBytes(for: $0) }, + expectedModelFamily: family + ) + } + let profile: MmapAutocompleteProfile + do { + profile = try openProfile() + } catch { + // A profile built by an older classifier / schema version fails to open. No other launch + // path rebuilds it (setup only checks the file *exists*), so an app update that changes the + // token classification would otherwise brick completions for existing users. Rebuild it in + // place from the model's tokenizer, then retry. See ADR-021 / ACPF currentSchemaVersion. + Logger(subsystem: "com.pattonium.KeyType", category: "completion") + .error("ACPF profile open failed (\(String(describing: error), privacy: .public)); rebuilding for \(modelFilename, privacy: .public)") + _ = try await ProfileGenerator.generateProfileIfNeeded(forModelFilename: modelFilename) + profile = try openProfile() + } // Apply the telemetry-derived nudges to the decoder defaults: a larger relative cutoff keeps // more branches alive (fewer suppressions), a lower probability floor admits weaker-but-valid // continuations. Bounds are clamped inside `ThresholdTuner`. See ADR-023. diff --git a/KeyType/Logic/Context/ScreenContextController.swift b/KeyType/Logic/Context/ScreenContextController.swift index 55c5c5c..c788a06 100644 --- a/KeyType/Logic/Context/ScreenContextController.swift +++ b/KeyType/Logic/Context/ScreenContextController.swift @@ -99,6 +99,11 @@ final class ScreenContextController { let key = windowKey(for: snapshot) guard key != lastWindowKey else { return } lastWindowKey = key + // Drop the previous window's cached OCR *before* kicking off the new (async) capture, so a + // completion fired in the just-focused window can't be fed the prior window's screen text + // while the fresh capture is still in flight. Without this, switching browser tabs/windows + // leaks the old page's text (e.g. a "2 of 10 …" results counter) into the new one's prompt. + engine.clear() capture(for: snapshot) } @@ -120,7 +125,18 @@ final class ScreenContextController { // screen context carries only the *surrounding* on-screen text. let context = snapshot.context let fieldText = context.beforeCursor + context.afterCursor - engine.refresh(pid: pid, fieldText: fieldText) + // The caret location lets the capturer pick the right window when the app has several open, + // so screen context can't bleed in text from a different window of the same app. `caretRect` + // is in AppKit space (bottom-left origin) but ScreenCaptureKit window frames are in CG space + // (top-left origin), so convert before handing it down — otherwise the Y axes don't match and + // the wrong window (or none) is selected. + let focusPoint = snapshot.caretRect.flatMap { rect -> CGPoint? in + DisplayCoordinateConverter.coreGraphicsPoint( + fromAppKitPoint: CGPoint(x: rect.midX, y: rect.midY), + displays: ScreenDisplayGeometryProvider.current() + ) + } + engine.refresh(pid: pid, fieldText: fieldText, focusPoint: focusPoint) } // MARK: - Eligibility diff --git a/KeyType/Logic/Telemetry/WritingHistoryRecorder.swift b/KeyType/Logic/Telemetry/WritingHistoryRecorder.swift index d7b6e84..d030b43 100644 --- a/KeyType/Logic/Telemetry/WritingHistoryRecorder.swift +++ b/KeyType/Logic/Telemetry/WritingHistoryRecorder.swift @@ -122,6 +122,10 @@ final class WritingHistoryRecorder { guard sample.text.trimmingCharacters( in: .whitespacesAndNewlines ).count >= minimumCharacters else { return } + // Belt-and-suspenders junk gate: skip entries that aren't prose (bare URLs, UUID blobs, + // filesystem paths) before they reach the encrypted DB. Mirrored in WritingHistorySelection + // for samples already on disk from before this guard was introduced. + guard WritingHistoryFilter.isProse(sample.text) else { return } // Re-resolve the policy from the captured metadata: secure/sensitive fields and apps that // disable training-data collection must never contribute samples. diff --git a/KeyTypeTests/KeyTypeTests.swift b/KeyTypeTests/KeyTypeTests.swift index 27cd589..41524b1 100644 --- a/KeyTypeTests/KeyTypeTests.swift +++ b/KeyTypeTests/KeyTypeTests.swift @@ -273,6 +273,48 @@ struct KeyTypeTests { #expect(advanced == nil) } + // MARK: - Reuse re-check (H2) + + @Test func reuseRejectsRemainingThatRepeatsRecentlyTypedText() { + // As the user types through a cached suggestion, beforeCursor grows; a tail that becomes a + // verbatim repetition of just-typed text must not be re-shown via reuse. + let context = TextFieldContext( + beforeCursor: "You can use it to access the OpenAI. And", + target: Self.target + ) + #expect( + CompletionController.reuseRemainingIsSafe( + remaining: " you can use it to access the OpenAI again", + context: context, + injectedContext: [] + ) == false + ) + } + + @Test func reuseRejectsRemainingThatEchoesInjectedClipboard() { + // A cached completion (clean at anchor time) must not be re-shown if it now parrots the + // currently-injected clipboard/OCR context. + let context = TextFieldContext(beforeCursor: "Hi Molly,", target: Self.target) + #expect( + CompletionController.reuseRemainingIsSafe( + remaining: " if you require maintenance of UPS systems or", + context: context, + injectedContext: ["if you require maintenance of UPS systems or backup power, call us."] + ) == false + ) + } + + @Test func reuseAllowsGenuineRemaining() { + let context = TextFieldContext(beforeCursor: "Hi Molly,", target: Self.target) + #expect( + CompletionController.reuseRemainingIsSafe( + remaining: " hope you are doing well today", + context: context, + injectedContext: ["if you require maintenance of UPS systems or backup power, call us."] + ) + ) + } + @Test func promotionCachePromotesLowerRankedBranchWhenTopIsInvalidated() { let cache = Self.promotionCache(candidates: [ "ship it today", diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AlphanumericNormalizer.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AlphanumericNormalizer.swift new file mode 100644 index 0000000..725cea4 --- /dev/null +++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AlphanumericNormalizer.swift @@ -0,0 +1,16 @@ +import Foundation + +/// Shared text normalization for the content-overlap guards (`SuffixOverlapGuard`, +/// `PrefixRepetitionGuard`, `ContextEchoGuard`). Comparisons are done on case-folded alphanumeric +/// scalars only, so differences in whitespace, punctuation, and stray symbol glyphs the model +/// sometimes prepends ("**", "•") don't defeat a match. +enum AlphanumericNormalizer { + /// Case-folded string of only the alphanumeric scalars in `text`. + static func normalize(_ text: String) -> String { + var result = String.UnicodeScalarView() + for scalar in text.lowercased().unicodeScalars where CharacterSet.alphanumerics.contains(scalar) { + result.append(scalar) + } + return String(result) + } +} diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift index 60d7108..a7834da 100644 --- a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift +++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift @@ -135,6 +135,12 @@ public struct CompletionRequest: Equatable { public var mode: CompletionMode public var maxCompletionTokens: Int public var maxDisplayWidth: Int + /// Side-context text injected into the prompt that the user did NOT type — clipboard contents and + /// on-screen OCR text. Carried alongside the request so the output filter can drop a completion + /// that merely parrots it verbatim (`ContextEchoGuard`). Writing-history samples are deliberately + /// excluded: they are scoped to the same app/domain and reproducing the user's own recurring + /// phrases is the point of that feature. + public var injectedContext: [String] public init( context: TextFieldContext, @@ -142,7 +148,8 @@ public struct CompletionRequest: Equatable { requiredPrefixBytes: [UInt8] = [], mode: CompletionMode = .prose, maxCompletionTokens: Int = 4, - maxDisplayWidth: Int = 80 + maxDisplayWidth: Int = 80, + injectedContext: [String] = [] ) { self.context = context self.prompt = prompt @@ -150,6 +157,7 @@ public struct CompletionRequest: Equatable { self.mode = mode self.maxCompletionTokens = maxCompletionTokens self.maxDisplayWidth = maxDisplayWidth + self.injectedContext = injectedContext } } @@ -202,6 +210,26 @@ public enum SuppressionReason: Equatable { /// A mid-line / fill-in-the-middle completion that is too long or too low-probability to show /// without risking a wrong suggestion. case lowConfidenceMidLine + /// The completion reproduces a phrase that is already present in the recent text before the caret. + /// Accepting it would create a verbatim repetition loop. See `PrefixRepetitionGuard`. + case repeatsRecentPrefix + /// The completion verbatim-reproduces a span of injected side context the user did not type + /// (clipboard, on-screen OCR text) — the small model parroting context instead of predicting. + /// See `ContextEchoGuard`. + case echoesInjectedContext + /// The completion contains a reserved model-internal marker (e.g. Gemma's ``, chat/FIM + /// scaffolding) that should have been masked at sample time. Belt-and-suspenders for stale or + /// mis-flagged token profiles. See `TokenClassifier` / `DefaultCandidateFilter.containsReservedMarker`. + case reservedMarker + /// The completion contains a within-candidate token-repetition loop — the same word appears ≥ 3 times + /// ("text 1 1 1", "since 1 1 1"). Model degeneration, not a bleed from side context. + /// See `IntraCompletionRepetitionGuard`. + case intraCompletionRepetition + /// The completion is nothing but markup tags (``, ``, …) in a prose/correction context + /// whose surrounding text contains no markup — Gemma's single-token HTML-tag block surfacing in + /// ordinary writing. Sample-time demotion is the primary defence (see + /// `BiasPolicy.markupTagStaticPenalty`); this is its context-aware output net. See `MarkupTagGuard`. + case markupTagOutsideMarkupContext case noCandidate } diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/ContextEchoGuard.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/ContextEchoGuard.swift new file mode 100644 index 0000000..9eec7b9 --- /dev/null +++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/ContextEchoGuard.swift @@ -0,0 +1,69 @@ +import Foundation + +/// Shared "does this completion reproduce a phrase from some text" test, used by both +/// `PrefixRepetitionGuard` (against the recent typed prefix) and `ContextEchoGuard` (against injected +/// side context). Two shapes are detected on case-folded alphanumerics: +/// +/// 1. **Whole** — the entire (normalized) completion is a substring of the text. A strong signal, so +/// a short match (`minimumWhole`) is enough. +/// 2. **Leading** — the completion *begins* with a run that appears in the text and then diverges, so +/// shape 1 misses it. A leading run of length ≥ `minimumLeading` exists iff the leading slice of +/// exactly that length is a substring (any longer contained run has it as a prefix), so one +/// `contains` decides it. The larger floor keeps chance word collisions from firing. +enum RepeatedSpanDetector { + static func reproduces( + normalizedCompletion: String, + within normalizedText: String, + minimumWhole: Int, + minimumLeading: Int + ) -> Bool { + guard !normalizedCompletion.isEmpty, !normalizedText.isEmpty else { return false } + + if normalizedCompletion.count >= minimumWhole, + normalizedText.contains(normalizedCompletion) { + return true + } + + guard normalizedCompletion.count >= minimumLeading else { return false } + return normalizedText.contains(String(normalizedCompletion.prefix(minimumLeading))) + } +} + +/// Detects completions that merely parrot injected side context — clipboard contents or on-screen +/// OCR text the prompt carries but the user did not type. The small model frequently copies such +/// context verbatim instead of using it as background (e.g. text copied from a localhost page in +/// one browser surfacing as a suggestion in a different app's compose field). +/// +/// Writing-history samples are intentionally NOT passed here: they are already scoped to the same +/// app/domain, and reproducing the user's own recurring phrases (a signature, a stock reply) is the +/// purpose of that personalization — suppressing it would be a regression. +public enum ContextEchoGuard { + + /// `true` when `completion` verbatim-reproduces a span of any string in `injectedContext`. + /// + /// `minimumWhole` is a touch higher than `PrefixRepetitionGuard`'s because the injected corpus is + /// larger (more chance of an incidental short match); `minimumLeading` matches it. + public static func echoesInjectedContext( + completion: String, + injectedContext: [String], + minimumWhole: Int = 12, + minimumLeading: Int = 16 + ) -> Bool { + guard !injectedContext.isEmpty else { return false } + let normalizedCompletion = AlphanumericNormalizer.normalize(completion) + guard !normalizedCompletion.isEmpty else { return false } + + for sample in injectedContext { + let normalizedSample = AlphanumericNormalizer.normalize(sample) + if RepeatedSpanDetector.reproduces( + normalizedCompletion: normalizedCompletion, + within: normalizedSample, + minimumWhole: minimumWhole, + minimumLeading: minimumLeading + ) { + return true + } + } + return false + } +} diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/IntraCompletionRepetitionGuard.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/IntraCompletionRepetitionGuard.swift new file mode 100644 index 0000000..d25da81 --- /dev/null +++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/IntraCompletionRepetitionGuard.swift @@ -0,0 +1,46 @@ +import Foundation + +/// Detects within-completion token-repetition degeneration — a model failure mode where the +/// same word repeats three or more times inside a single candidate ("text 1 1 1", "since 1 1 1"), +/// distinct from the across-prefix loop that `PrefixRepetitionGuard` targets. +/// +/// Words are identified as contiguous runs of alphanumeric characters (case-insensitive, +/// punctuation stripped), so both "1 1 1" and "1, 1, 1" are reliably detected. +/// Fires only when a single word appears ≥ 3 times; normal prose completions never have this shape. +public enum IntraCompletionRepetitionGuard { + + /// `true` when `completion` contains a degenerate within-completion repetition loop + /// (any single word appearing ≥ 3 times). + public static func isDegenerate(_ completion: String) -> Bool { + let words = contentWords(completion) + guard words.count >= 3 else { return false } + var counts: [Substring: Int] = [:] + for word in words { + let n = (counts[word, default: 0]) + 1 + counts[word] = n + if n >= 3 { return true } + } + return false + } + + /// Lowercase alphanumeric runs in `text` (punctuation and whitespace discarded). + /// "1, 1, 1" → ["1","1","1"]; " text 1 1 1" → ["text","1","1","1"]. + static func contentWords(_ text: String) -> [Substring] { + var words: [Substring] = [] + var start: String.Index? = nil + let lowered = text.lowercased() + for idx in lowered.indices { + let ch = lowered[idx] + if ch.isLetter || ch.isNumber { + if start == nil { start = idx } + } else if let s = start { + words.append(lowered[s..` = 200, `` = 215, …), and in thin prose contexts the model surfaces +/// them as suggestions ("my name is" → ""). The primary defence is sample-time demotion via +/// `BiasPolicy.markupTagStaticPenalty`; this guard is the context-aware mirror for finalised +/// candidates, applied by the candidate filter in prose/correction modes only. +/// +/// Deliberately conservative in both directions: +/// - It fires only when the *entire* completion is markup tags (plus whitespace). A candidate that +/// continues the user's own angle-bracket text ("code> to format") has prose content and passes. +/// - It is silent whenever the surrounding field text already contains tag-like markup — a user +/// genuinely writing HTML in a prose-mode field (chat box, CMS textarea) keeps tag completions. +public enum MarkupTagGuard { + + /// Matches a completion consisting solely of one or more whole tags separated by whitespace: + /// `""`, `" "`, `""`. Tag shape mirrors `TokenClassifier.matchesMarkupTag`. + private static let pureMarkupRegex = try? NSRegularExpression( + pattern: #"^\s*(\s*)+$"#, + options: [] + ) + + /// Loose tag detector for the *surrounding* text — attributes allowed (``), since + /// real markup contexts contain them. Used only for the exemption, where a false positive + /// merely means we keep showing tag completions. + private static let contextMarkupRegex = try? NSRegularExpression( + pattern: #"]{0,80}>"#, + options: [] + ) + + /// `true` when `completion` should be suppressed: it is pure markup and neither side of the + /// caret shows the user working with markup. + public static func violates( + completion: String, + beforeCursor: String, + afterCursor: String + ) -> Bool { + guard isPureMarkup(completion) else { return false } + if containsMarkup(beforeCursor) || containsMarkup(afterCursor) { return false } + return true + } + + static func isPureMarkup(_ text: String) -> Bool { + guard !text.isEmpty, let regex = pureMarkupRegex else { return false } + let range = NSRange(text.startIndex.. Bool { + guard !text.isEmpty, let regex = contextMarkupRegex else { return false } + let range = NSRange(text.startIndex.. Bool { + let normalizedCompletion = AlphanumericNormalizer.normalize(completion) + + // Only look back a bounded window — we don't want to suppress completions that share a + // common phrase with text written hours ago in a very long document. + let lookback = String(beforeCursor.suffix(lookbackCharacters)) + let normalizedPrefix = AlphanumericNormalizer.normalize(lookback) + + // Shape 1 (whole) catches a short verbatim repeat; shape 2 (leading) catches a repeat that + // then diverges. See `RepeatedSpanDetector`. + return RepeatedSpanDetector.reproduces( + normalizedCompletion: normalizedCompletion, + within: normalizedPrefix, + minimumWhole: minimumAlphanumericLength, + minimumLeading: minimumLeadingRepeat + ) + } +} diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/SuffixOverlapGuard.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/SuffixOverlapGuard.swift index 0695059..3e00aa5 100644 --- a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/SuffixOverlapGuard.swift +++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/SuffixOverlapGuard.swift @@ -159,11 +159,7 @@ public enum SuffixOverlapGuard { /// Case-folded string of only the alphanumeric scalars — drops whitespace, punctuation, and any /// stray symbol glyphs the model prepends, so the comparison is on real content. static func normalizedAlphanumerics(_ text: String) -> String { - var result = String.UnicodeScalarView() - for scalar in text.lowercased().unicodeScalars where CharacterSet.alphanumerics.contains(scalar) { - result.append(scalar) - } - return String(result) + AlphanumericNormalizer.normalize(text) } /// Whether the last scalar of `text` is a word character (letter or digit) — i.e. the caret is diff --git a/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/ContextEchoGuardTests.swift b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/ContextEchoGuardTests.swift new file mode 100644 index 0000000..4256e7f --- /dev/null +++ b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/ContextEchoGuardTests.swift @@ -0,0 +1,65 @@ +import AutocompleteCore +import XCTest + +final class ContextEchoGuardTests: XCTestCase { + + func testFiresWhenCompletionEchoesClipboardVerbatim() { + // The reported case: text copied from a localhost page in another browser is injected as + // clipboard context and parroted into a fresh Gmail draft. + let clipboard = "if you require maintenance of UPS systems or backup power, contact us." + XCTAssertTrue( + ContextEchoGuard.echoesInjectedContext( + completion: " if you require maintenance of UPS systems or", + injectedContext: [clipboard] + ) + ) + } + + func testFiresOnLeadingEchoThatThenDiverges() { + let screen = "The private key for the OpenAI API is stored in the vault." + XCTAssertTrue( + ContextEchoGuard.echoesInjectedContext( + completion: " the private key for the OpenAI API is yours to keep forever", + injectedContext: [screen] + ) + ) + } + + func testChecksAllInjectedSources() { + XCTAssertTrue( + ContextEchoGuard.echoesInjectedContext( + completion: " maintenance of UPS systems is required", + injectedContext: ["unrelated clipboard text", "notes about maintenance of UPS systems here"] + ) + ) + } + + func testDoesNotFireWithoutInjectedContext() { + XCTAssertFalse( + ContextEchoGuard.echoesInjectedContext( + completion: " if you require maintenance of UPS systems or", + injectedContext: [] + ) + ) + } + + func testAllowsGenuineCompletionNotInContext() { + let clipboard = "if you require maintenance of UPS systems or backup power, contact us." + XCTAssertFalse( + ContextEchoGuard.echoesInjectedContext( + completion: " hope you are doing well", + injectedContext: [clipboard] + ) + ) + } + + func testDoesNotFireOnShortIncidentalOverlap() { + // A short common run ("if you ") must not be enough to suppress a real continuation. + XCTAssertFalse( + ContextEchoGuard.echoesInjectedContext( + completion: " if you can", + injectedContext: ["if you require maintenance of UPS systems"] + ) + ) + } +} diff --git a/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/IntraCompletionRepetitionGuardTests.swift b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/IntraCompletionRepetitionGuardTests.swift new file mode 100644 index 0000000..dd9ca2e --- /dev/null +++ b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/IntraCompletionRepetitionGuardTests.swift @@ -0,0 +1,88 @@ +import XCTest +@testable import AutocompleteCore + +final class IntraCompletionRepetitionGuardTests: XCTestCase { + + // MARK: - Degenerate cases (should suppress) + + func testDigitTripleSpaceSeparated_isDegenerate() { + XCTAssertTrue(IntraCompletionRepetitionGuard.isDegenerate(" text 1 1 1")) + } + + func testDigitTripleWithLeadWord_isDegenerate() { + XCTAssertTrue(IntraCompletionRepetitionGuard.isDegenerate(" since 1 1 1")) + } + + func testDigitTripleWithMultipleLeadWords_isDegenerate() { + XCTAssertTrue(IntraCompletionRepetitionGuard.isDegenerate(" apartment or my 1 1 1")) + } + + func testWordTriple_isDegenerate() { + XCTAssertTrue(IntraCompletionRepetitionGuard.isDegenerate(" the the the best option")) + } + + /// Punctuation-separated repetitions: "1, 1, 1" must be caught even though + /// whitespace-splitting gives ["1,", "1,", "1"] — the guard uses alphanumeric runs. + func testPunctuationSeparated_isDegenerate() { + XCTAssertTrue(IntraCompletionRepetitionGuard.isDegenerate("1, 1, 1")) + } + + func testHyphenSeparated_isDegenerate() { + XCTAssertTrue(IntraCompletionRepetitionGuard.isDegenerate("go-go-go now")) + } + + // MARK: - Normal completions (must not suppress) + + func testNormalProseSentence_notDegenerate() { + XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(" is a company for the industrial floor.")) + } + + func testSingleWord_notDegenerate() { + XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(" hello")) + } + + func testTwoWords_notDegenerate() { + XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(" good morning")) + } + + /// Two occurrences is below the threshold of three. + func testDoubleRepeat_notDegenerate() { + XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(" apartment or my 1 1")) + } + + func testDoubleRepeatAlt_notDegenerate() { + XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(" text 1 1")) + } + + func testEmptyString_notDegenerate() { + XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate("")) + } + + func testOnlyPunctuation_notDegenerate() { + XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(". . .")) + } + + // MARK: - contentWords helper + + func testContentWords_stripsSpacesAndPunctuation() { + XCTAssertEqual( + IntraCompletionRepetitionGuard.contentWords(" text 1 1 1").map(String.init), + ["text", "1", "1", "1"] + ) + } + + func testContentWords_commaSeparated() { + XCTAssertEqual( + IntraCompletionRepetitionGuard.contentWords("1, 1, 1").map(String.init), + ["1", "1", "1"] + ) + } + + func testContentWords_emptyString() { + XCTAssertTrue(IntraCompletionRepetitionGuard.contentWords("").isEmpty) + } + + func testContentWords_onlyPunctuation() { + XCTAssertTrue(IntraCompletionRepetitionGuard.contentWords(". . .").isEmpty) + } +} diff --git a/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/MarkupTagGuardTests.swift b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/MarkupTagGuardTests.swift new file mode 100644 index 0000000..3427e9a --- /dev/null +++ b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/MarkupTagGuardTests.swift @@ -0,0 +1,95 @@ +import XCTest +@testable import AutocompleteCore + +/// `MarkupTagGuard` — the output net for Gemma's single-token HTML-tag leak (`"my name is"` → +/// `""` in a web chat box). Suppress only pure-markup candidates in markup-free contexts; +/// a user genuinely writing tags must keep their completions. +final class MarkupTagGuardTests: XCTestCase { + + // MARK: - Pure-markup detection + + func testSingleClosingTagIsPureMarkup() { + XCTAssertTrue(MarkupTagGuard.isPureMarkup("")) + } + + func testLeadingSpaceTagIsPureMarkup() { + // The observed leak: token 236743 (" ") + token 215 (""). + XCTAssertTrue(MarkupTagGuard.isPureMarkup(" ")) + } + + func testMultipleTagsArePureMarkup() { + XCTAssertTrue(MarkupTagGuard.isPureMarkup("")) + XCTAssertTrue(MarkupTagGuard.isPureMarkup(" ")) + } + + func testSelfClosingTagIsPureMarkup() { + XCTAssertTrue(MarkupTagGuard.isPureMarkup("
")) + XCTAssertTrue(MarkupTagGuard.isPureMarkup("
")) + } + + func testProseIsNotPureMarkup() { + XCTAssertFalse(MarkupTagGuard.isPureMarkup("john smith")) + } + + func testTagFollowedByProseIsNotPureMarkup() { + // The tag may be continuing the user's own markup — other nets judge the rest. + XCTAssertFalse(MarkupTagGuard.isPureMarkup("
and then some")) + } + + func testPartialBracketTextIsNotPureMarkup() { + XCTAssertFalse(MarkupTagGuard.isPureMarkup("code> to format")) + XCTAssertFalse(MarkupTagGuard.isPureMarkup("<3")) + XCTAssertFalse(MarkupTagGuard.isPureMarkup("a < b")) + } + + func testAttributeBearingTagIsNotPureMarkup() { + // Attribute tags are never single leaked tokens; leave them to context judgement. + XCTAssertFalse(MarkupTagGuard.isPureMarkup(#"
"#)) + } + + func testEmptyStringIsNotPureMarkup() { + XCTAssertFalse(MarkupTagGuard.isPureMarkup("")) + } + + // MARK: - Context exemption + + func testSuppressesPureTagInProseContext() { + XCTAssertTrue(MarkupTagGuard.violates( + completion: " ", + beforeCursor: "my name is", + afterCursor: "" + )) + } + + func testAllowsClosingTagWhenUserIsWritingMarkup() { + XCTAssertFalse(MarkupTagGuard.violates( + completion: "
", + beforeCursor: "wrap it like hello", + afterCursor: "" + )) + } + + func testAllowsTagWhenMarkupFollowsCaret() { + XCTAssertFalse(MarkupTagGuard.violates( + completion: "", + beforeCursor: "add a cell: ", + afterCursor: "" + )) + } + + func testAttributeBearingContextMarkupExempts() { + XCTAssertFalse(MarkupTagGuard.violates( + completion: "", + beforeCursor: #"see this link"#, + afterCursor: "" + )) + } + + func testProseCompletionNeverViolates() { + XCTAssertFalse(MarkupTagGuard.violates( + completion: " john smith", + beforeCursor: "my name is", + afterCursor: "" + )) + } +} diff --git a/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/PrefixRepetitionGuardTests.swift b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/PrefixRepetitionGuardTests.swift new file mode 100644 index 0000000..c387416 --- /dev/null +++ b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/PrefixRepetitionGuardTests.swift @@ -0,0 +1,117 @@ +import AutocompleteCore +import XCTest + +final class PrefixRepetitionGuardTests: XCTestCase { + + // MARK: - Whole-completion repetition + + func testFiresWhenWholeCompletionRepeatsRecentPhrase() { + let before = "This is the private key for the OpenAI API. You can use it to access the OpenAI. And" + XCTAssertTrue( + PrefixRepetitionGuard.repeatsPrefix( + completion: " you can use it to access the OpenAI", + beforeCursor: before + ) + ) + } + + func testIgnoresPunctuationAndCaseDifferences() { + let before = "I went to the AI meetup. I want to write about" + XCTAssertTrue( + PrefixRepetitionGuard.repeatsPrefix( + completion: " i want to write about,", + beforeCursor: before + ) + ) + } + + // MARK: - Leading repetition that then diverges (the loop shape) + + func testFiresWhenCompletionLeadsWithRepeatThenDiverges() { + // The repeated phrase is followed by genuinely new text, so the *whole* completion is no + // longer a substring of the prefix — only the leading run is. + let before = "This is the private key for the OpenAI API. You can use it to access the OpenAI. And" + XCTAssertTrue( + PrefixRepetitionGuard.repeatsPrefix( + completion: " you can use it to access the OpenAI API to do whatever you want", + beforeCursor: before + ) + ) + } + + // MARK: - Negatives + + func testAllowsGenuineContinuation() { + let before = "This is the private key for the OpenAI API. You can use it to access the OpenAI. And" + XCTAssertFalse( + PrefixRepetitionGuard.repeatsPrefix( + completion: " keep it somewhere safe", + beforeCursor: before + ) + ) + } + + func testDoesNotFireOnShortCommonLeadingWord() { + // A short leading collision ("the ") must not be enough to suppress a real continuation. + let before = "I saw the dog run across the" + XCTAssertFalse( + PrefixRepetitionGuard.repeatsPrefix( + completion: " street quickly", + beforeCursor: before + ) + ) + } + + func testDoesNotFireOnShortCompletion() { + let before = "the quick brown fox jumps over the" + XCTAssertFalse( + PrefixRepetitionGuard.repeatsPrefix( + completion: " lazy", + beforeCursor: before + ) + ) + } + + func testLeadingRepeatThresholdBoundary() { + // The leading-divergence shape requires a repeated run of ≥16 normalized alphanumeric chars. + // "abcdefghijklmno" is 15 → must NOT fire on leading-only; "abcdefghijklmnop" is 16 → fires. + let before15 = "abcdefghijklmno was here earlier in the document somewhere" + XCTAssertFalse( + PrefixRepetitionGuard.repeatsPrefix( + completion: "abcdefghijklmno then something new entirely", + beforeCursor: before15 + ), + "15-char leading run is below the threshold" + ) + let before16 = "abcdefghijklmnop was here earlier in the document somewhere" + XCTAssertTrue( + PrefixRepetitionGuard.repeatsPrefix( + completion: "abcdefghijklmnop then something new entirely", + beforeCursor: before16 + ), + "16-char leading run meets the threshold" + ) + } + + func testWholeCompletionRepeatBoundaryIsEightChars() { + // The whole-completion shape uses the lower ≥8 floor; "abcdefg" (7) must not fire. + XCTAssertFalse( + PrefixRepetitionGuard.repeatsPrefix(completion: " abcdefg", beforeCursor: "abcdefg earlier") + ) + XCTAssertTrue( + PrefixRepetitionGuard.repeatsPrefix(completion: " abcdefgh", beforeCursor: "abcdefgh earlier") + ) + } + + func testRespectsLookbackWindow() { + // The repeated phrase sits far outside the lookback window, so it should not be suppressed. + let filler = String(repeating: "x ", count: 400) + let before = "you can use it to access the OpenAI" + filler + XCTAssertFalse( + PrefixRepetitionGuard.repeatsPrefix( + completion: " you can use it to access the OpenAI", + beforeCursor: before + ) + ) + } +} diff --git a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/ConstrainedGeneration.swift b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/ConstrainedGeneration.swift index c61be18..a1becb0 100644 --- a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/ConstrainedGeneration.swift +++ b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/ConstrainedGeneration.swift @@ -90,6 +90,13 @@ public final class ConstrainedGenerationEngine: CompletionGenerating { // admissible tokens aren't masked out and the branch can't silently collapse to // `noCandidate` (ADR-025). constrained: !branch.remainingPrefix.isEmpty, + // Decode-time repetition penalty is scoped to this branch's own emitted tokens, so + // a degenerate loop is demoted in favour of a non-repeating sibling. Suppressed + // while a required prefix is still being satisfied (mid-word healing, ADR-019): that + // path forces a specific continuation that may legitimately repeat an earlier token, + // and demoting it would collapse the only admissible branch. Inert unless the + // penalties are configured (see DecodingConfiguration.presencePenalty). + recentTokens: branch.remainingPrefix.isEmpty ? branch.tokenIDs : [], isAdmissible: { self.tokenAllowed($0, afterRequiredPrefix: branch.remainingPrefix) } ) diff --git a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/DecodingConfiguration.swift b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/DecodingConfiguration.swift index 7968ec7..23ad2c1 100644 --- a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/DecodingConfiguration.swift +++ b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/DecodingConfiguration.swift @@ -48,6 +48,22 @@ public struct DecodingConfiguration: Equatable { /// Weight of the mean per-token suffix-join log-probability added to a branch's cumulative score /// before final ranking. See ADR-057. public var suffixRerankWeight: Float + /// Decode-time repetition control. Subtracted (in logit space, before temperature) from any token + /// that has already been emitted *on the same branch*, demoting degenerate intra-completion loops + /// ("access the OpenAI access the OpenAI") so a non-repeating sibling — or the stop token — wins the + /// beam instead of the controller having to suppress the looped output after the fact. The penalty + /// is suppressed while a branch is still satisfying a required prefix (mid-word healing), so it never + /// demotes a forced continuation (see `ConstrainedGenerationEngine`). + /// + /// `presencePenalty` is applied once if the token appears at all on the branch; `frequencyPenalty` + /// is applied per prior occurrence. Both default to `0` (inert — `SamplerResult` is byte-identical + /// to the un-penalized path), so the production default is unchanged until a value is chosen via the + /// KeyTypeBench sweep. This is a *demotion* lever, not a promotion one: it only reshuffles tokens + /// already in the candidate pool, so it bites on the medium/long completion lengths where loops + /// form and is near-inert at the short (≤4-token) default. + public var presencePenalty: Float + /// See `presencePenalty`. Scaled by the number of prior occurrences of the token on the branch. + public var frequencyPenalty: Float public init( topK: Int = 64, @@ -61,7 +77,9 @@ public struct DecodingConfiguration: Equatable { fimMaxPrefixTokens: Int = 256, fimMaxSuffixTokens: Int = 64, suffixRerankTokenCount: Int = 0, - suffixRerankWeight: Float = 1.0 + suffixRerankWeight: Float = 1.0, + presencePenalty: Float = 0, + frequencyPenalty: Float = 0 ) { self.topK = topK self.topP = topP @@ -75,5 +93,7 @@ public struct DecodingConfiguration: Equatable { self.fimMaxSuffixTokens = fimMaxSuffixTokens self.suffixRerankTokenCount = suffixRerankTokenCount self.suffixRerankWeight = suffixRerankWeight + self.presencePenalty = presencePenalty + self.frequencyPenalty = frequencyPenalty } } diff --git a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Filtering/CandidateFilter.swift b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Filtering/CandidateFilter.swift index 8e2474d..293d2f2 100644 --- a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Filtering/CandidateFilter.swift +++ b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Filtering/CandidateFilter.swift @@ -90,6 +90,12 @@ public final class DefaultCandidateFilter: CandidateFiltering { // insertable as an inline completion. if !Self.isInsertionSafe(candidate.text) { return .insertionUnsafe } + // 6·: Reserved model-internal markers (Gemma ``, chat/FIM scaffolding). These are + // masked at sample time once the profile is rebuilt (see TokenClassifier); this net is the + // belt-and-suspenders for stale profiles / cross-token concatenations / other models, with + // a distinct reason so telemetry can confirm the masking landed. + if Self.containsReservedMarker(candidate.text) { return .reservedMarker } + // 6a. CJK script net: once the live caret is inside CJK text, a Latin-leading continuation // is almost always pinyin/romanization leakage from the base model or IME composition. // Suppress it rather than showing visibly wrong ghost text. @@ -121,6 +127,54 @@ public final class DefaultCandidateFilter: CandidateFiltering { return .duplicatesAfterCursor } + // The content-overlap nets below judge the text that will actually be inserted. When the + // prompt was healed (ADR-019) the candidate re-emits the already-typed stem (" coll…"); strip + // it so the comparison is against the genuinely-new continuation, not the stem the user typed. + let insertedText = Self.healStripped(candidate.text, request: request) + + // 7b. Prefix-repetition net: the completion reproduces a phrase already in the recent + // preceding text, so accepting it would create a verbatim repetition loop. + // Typical failure: small model predicts "i want to write about" after "…AI meetup." + // because that exact phrase appeared earlier in the text. See PrefixRepetitionGuard. + if PrefixRepetitionGuard.repeatsPrefix( + completion: insertedText, + beforeCursor: request.context.beforeCursor + ) { + return .repeatsRecentPrefix + } + + // 7b'. Intra-completion repetition: the same word appears ≥ 3 times within the candidate + // itself ("text 1 1 1", "since 1 1 1") — model degeneration unrelated to side context. + // Distinct from the prefix-repetition loop above (which checks against already-typed text). + if IntraCompletionRepetitionGuard.isDegenerate(insertedText) { + return .intraCompletionRepetition + } + + // 7b''. Markup-tag net: the candidate is nothing but HTML tags in a prose context with no + // markup in the surrounding text — Gemma's single-token tag block (`` = 215) + // surfacing in ordinary writing. Sample-time demotion (`BiasPolicy.markupTagStaticPenalty`) + // is the primary defence; this context-aware net covers stale profiles and beam paths. + // Code/terminal modes are untouched, and a field already containing markup is exempt. + if request.mode == .prose || request.mode == .correction, + MarkupTagGuard.violates( + completion: insertedText, + beforeCursor: request.context.beforeCursor, + afterCursor: request.context.afterCursor + ) { + return .markupTagOutsideMarkupContext + } + + // 7c. Context-echo net: the completion verbatim-reproduces injected side context the user did + // not type (clipboard / on-screen OCR). The small model parrots such context instead of + // using it as background — e.g. text copied from one app surfacing in another's compose + // field. Writing-history samples are excluded upstream (see `CompletionRequest`). + if ContextEchoGuard.echoesInjectedContext( + completion: insertedText, + injectedContext: request.injectedContext + ) { + return .echoesInjectedContext + } + // 8. Mid-line confidence net. Native FIM is useful only when it is both short and highly // likely; longer middle spans have been low-precision in edge data. Keep this deliberately // conservative so re-enabled mid-line favors suppression over wrong visible text. @@ -158,6 +212,15 @@ public final class DefaultCandidateFilter: CandidateFiltering { return meanLogProbability < minimumMidLineMeanLogProbability } + // MARK: - Heal-aware text + + /// The text that will actually be inserted: for a healed request (ADR-019) the candidate re-emits + /// the already-typed stem, so strip it back off; otherwise the candidate text is inserted as-is. + static func healStripped(_ text: String, request: CompletionRequest) -> String { + guard !request.requiredPrefixBytes.isEmpty else { return text } + return MidWordHealing.strip(text, heal: String(decoding: request.requiredPrefixBytes, as: UTF8.self)) + } + // MARK: - Required prefix /// `true` when `bytes` is consistent with `prefix`: either it begins with the whole prefix or @@ -171,8 +234,8 @@ public final class DefaultCandidateFilter: CandidateFiltering { /// A candidate is unsafe to insert if it is empty / whitespace-only, carries any control /// character (C0 controls including tab and newline, or DEL), or has no alphanumeric content at - /// all. The last rule drops noise-only suggestions (`"..."`, `"…"`, `"—"`) that are never a - /// useful inline continuation; alphanumerics span every script, so CJK/Thai completions pass. + /// all. The alphanumeric rule drops noise-only suggestions (`"..."`, `"…"`, `"—"`); alphanumerics + /// span every script, so CJK/Thai pass. (Reserved markers get their own gate — see `suppressionReason`.) static func isInsertionSafe(_ text: String) -> Bool { if text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { return false } for scalar in text.unicodeScalars { @@ -182,6 +245,30 @@ public final class DefaultCandidateFilter: CandidateFiltering { return true } + /// Regexes for model-internal markers that must never appear in a shown completion: reserved + /// placeholders (``, ``, ``, ``, ``) and chat / + /// FIM scaffolding (`<|…|>`, ``, …). Matched as substrings since a candidate may + /// embed one mid-text. Kept narrow so ordinary `` text the user types is unaffected. + private static let reservedMarkerRegexes: [NSRegularExpression] = { + let patterns = [ + #""#, + #""#, + #""#, + #""#, #""#, + #"<\|[^|>]+\|>"#, + #""#, #""# + ] + return patterns.compactMap { try? NSRegularExpression(pattern: $0, options: [.caseInsensitive]) } + }() + + static func containsReservedMarker(_ text: String) -> Bool { + let range = NSRange(text.startIndex.. Bool { guard request.mode == .prose || request.mode == .correction else { return false } guard let last = request.context.beforeCursor.last, !last.isWhitespace else { return false } @@ -210,9 +297,7 @@ public final class DefaultCandidateFilter: CandidateFiltering { // For a healed request (ADR-019) the candidate re-emits the typed stem (`" coll…"`); strip it // so the leading word is the genuinely-new continuation rather than an empty leading-space run // — otherwise healed mid-word completions slip past the net entirely (ADR-025 follow-up). - let judged = request.requiredPrefixBytes.isEmpty - ? candidate.text - : MidWordHealing.strip(candidate.text, heal: String(decoding: request.requiredPrefixBytes, as: UTF8.self)) + let judged = Self.healStripped(candidate.text, request: request) let lead = CurrentWordTypoGuard.leadingWord(of: judged) guard !lead.isEmpty else { return false } // completion opened on a boundary — not our word @@ -244,9 +329,7 @@ public final class DefaultCandidateFilter: CandidateFiltering { let stem = CurrentWordTypoGuard.trailingWord(of: request.context.beforeCursor) guard !stem.isEmpty else { return false } // model started a fresh word — leave it - let judged = request.requiredPrefixBytes.isEmpty - ? candidate.text - : MidWordHealing.strip(candidate.text, heal: String(decoding: request.requiredPrefixBytes, as: UTF8.self)) + let judged = Self.healStripped(candidate.text, request: request) let lead = CurrentWordTypoGuard.leadingWord(of: judged) guard !lead.isEmpty else { return false } // completion opened on a boundary — not our word diff --git a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Sampling/TokenSampler.swift b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Sampling/TokenSampler.swift index b3a527b..37a951a 100644 --- a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Sampling/TokenSampler.swift +++ b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Sampling/TokenSampler.swift @@ -35,11 +35,26 @@ enum TokenSampler { profile: AutocompleteProfile, configuration: DecodingConfiguration, constrained: Bool = false, + recentTokens: [TokenID] = [], isAdmissible: (TokenID) -> Bool ) -> SamplerResult { guard !logits.isEmpty else { return .empty } let temperature = max(configuration.temperature, 1e-3) + // Decode-time repetition penalty (see `DecodingConfiguration.presencePenalty`). Build the + // per-token occurrence count for this branch once; when no penalty is configured (or the + // branch is empty) `occurrences` stays empty and the scaling loop below is byte-identical to + // the un-penalized path. The penalty adjusts only `value` (step 1) — never `argmaxLogit`, + // which stays on the raw logits so stop/hardStop detection is unaffected (ADR-010). + let presencePenalty = configuration.presencePenalty + let frequencyPenalty = configuration.frequencyPenalty + let penaltyActive = (presencePenalty != 0 || frequencyPenalty != 0) && !recentTokens.isEmpty + var occurrences: [TokenID: Int] = [:] + if penaltyActive { + occurrences.reserveCapacity(recentTokens.count) + for id in recentTokens { occurrences[id, default: 0] += 1 } + } + // 0. Pre-select the highest raw-logit tokens. Running the profile lookups + softmax over // the full vocabulary (150k+ tokens) per branch is the dominant cost; the surviving // candidate pool only ever needs `topK` entries, so restrict the expensive work to a @@ -79,7 +94,11 @@ enum TokenSampler { } if profile.isExcluded(id, mode: mode) { continue } if !isAdmissible(id) { continue } - let value = (logit.logit + profile.bias(for: id, mode: mode)) / temperature + var biased = logit.logit + profile.bias(for: id, mode: mode) + if penaltyActive, let count = occurrences[id], count > 0 { + biased -= presencePenalty + frequencyPenalty * Float(count) + } + let value = biased / temperature scaled.append((id, value)) if value > maxValue { maxValue = value } } diff --git a/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Engine/RepetitionPenaltyTests.swift b/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Engine/RepetitionPenaltyTests.swift new file mode 100644 index 0000000..33a4118 --- /dev/null +++ b/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Engine/RepetitionPenaltyTests.swift @@ -0,0 +1,95 @@ +import AutocompleteCore +@testable import ConstrainedGeneration +import ModelRuntime +import TokenProfiles +import XCTest + +/// Decode-time repetition penalty in `TokenSampler.rank` (see `DecodingConfiguration.presencePenalty`). +/// The penalty demotes tokens already emitted on the same branch so a degenerate intra-completion loop +/// loses the beam to a non-repeating sibling. It is a *demotion* lever applied only to `value`, never +/// to the raw-logit argmax used for stop detection, and is byte-identical to the un-penalized path +/// when no penalty is configured. +final class RepetitionPenaltyTests: XCTestCase { + + /// Three plain word tokens, no flags — admissible everywhere, never excluded, never a stop. + private func makeProfile(vocab: Int) -> InMemoryAutocompleteProfile { + let records = (0.. [TokenLogit] { + values.enumerated().map { TokenLogit(tokenID: TokenID($0.offset), logit: $0.element) } + } + + private func rank( + _ logitValues: [Float], + config: DecodingConfiguration, + recent: [TokenID] + ) -> SamplerResult { + TokenSampler.rank( + logits: logits(logitValues), + mode: .prose, + profile: makeProfile(vocab: logitValues.count), + configuration: config, + recentTokens: recent, + isAdmissible: { _ in true } + ) + } + + /// With penalties at 0 the result is identical regardless of branch history — the inert default. + func testZeroPenaltyIsByteIdenticalToUnpenalized() { + let values: [Float] = [3.0, 2.5, 1.0] + let config = DecodingConfiguration() + let baseline = rank(values, config: config, recent: []) + let withHistory = rank(values, config: config, recent: [0, 0, 0]) + XCTAssertEqual(baseline.tokens, withHistory.tokens) + XCTAssertEqual(baseline.argmaxTokenID, withHistory.argmaxTokenID) + } + + /// A repeated token's probability drops once the presence penalty is active, and a previously + /// lower-ranked sibling overtakes it as the top candidate. + func testPresencePenaltyDemotesRepeatedToken() { + // Token 0 leads on raw logits, token 1 is close behind. + let values: [Float] = [3.0, 2.8, 0.5] + let config = DecodingConfiguration(presencePenalty: 4.0) + + let baseline = rank(values, config: config, recent: []) + XCTAssertEqual(baseline.tokens.first?.tokenID, 0, "token 0 wins with no history") + + let penalized = rank(values, config: config, recent: [0]) + XCTAssertEqual(penalized.tokens.first?.tokenID, 1, "repeated token 0 is demoted below token 1") + // Token 0 either keeps a lower probability or is pushed out of the nucleus entirely (absent ⇒ 0). + let p0 = penalized.tokens.first(where: { $0.tokenID == 0 })?.probability ?? 0 + let base0 = baseline.tokens.first(where: { $0.tokenID == 0 })?.probability ?? 0 + XCTAssertLessThan(p0, base0, "token 0 probability must drop under the penalty") + } + + /// The frequency penalty scales with occurrence count: two prior occurrences demote harder than one. + func testFrequencyPenaltyScalesWithCount() { + let values: [Float] = [3.0, 2.0, 1.0] + let config = DecodingConfiguration(frequencyPenalty: 1.5) + let once = rank(values, config: config, recent: [0]).tokens.first(where: { $0.tokenID == 0 })?.probability ?? 0 + let twice = rank(values, config: config, recent: [0, 0]).tokens.first(where: { $0.tokenID == 0 })?.probability ?? 0 + XCTAssertLessThan(twice, once, "more prior occurrences must penalize harder") + } + + /// H7: the penalty must not move `argmaxTokenID` — it is tracked on raw logits for stop detection. + func testArgmaxUnaffectedByPenalty() { + let values: [Float] = [3.0, 2.8, 0.5] + let config = DecodingConfiguration(presencePenalty: 10.0) + let penalized = rank(values, config: config, recent: [0]) + XCTAssertEqual(penalized.argmaxTokenID, 0, "raw-logit argmax stays token 0 despite the penalty") + } + + /// H6: when the penalty drives the only repeated candidate down, the pool must not collapse to + /// empty — the floor still keeps the non-repeating sibling, avoiding a spurious `noCandidate`. + func testPenaltyDoesNotEmptyThePool() { + let values: [Float] = [5.0, 1.0] + let config = DecodingConfiguration(minBranchProbability: 0.0, presencePenalty: 50.0) + let penalized = rank(values, config: config, recent: [0]) + XCTAssertFalse(penalized.tokens.isEmpty, "a non-repeating sibling must survive") + XCTAssertEqual(penalized.tokens.first?.tokenID, 1) + } +} diff --git a/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Filtering/CandidateFilterTests.swift b/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Filtering/CandidateFilterTests.swift index 72ffc26..3e13b9c 100644 --- a/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Filtering/CandidateFilterTests.swift +++ b/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Filtering/CandidateFilterTests.swift @@ -19,7 +19,8 @@ final class CandidateFilterTests: XCTestCase { target: AppTarget = CandidateFilterTests.target, placeholder: String? = nil, labels: [String] = [], - traits: TextFieldTraits = TextFieldTraits() + traits: TextFieldTraits = TextFieldTraits(), + injectedContext: [String] = [] ) -> CompletionRequest { let context = TextFieldContext( beforeCursor: beforeCursor, @@ -36,7 +37,8 @@ final class CandidateFilterTests: XCTestCase { requiredPrefixBytes: requiredPrefixBytes, mode: mode, maxCompletionTokens: maxCompletionTokens, - maxDisplayWidth: maxDisplayWidth + maxDisplayWidth: maxDisplayWidth, + injectedContext: injectedContext ) } @@ -379,6 +381,146 @@ final class CandidateFilterTests: XCTestCase { ) } + // MARK: - Prefix-repetition net + + func testSuppressesPrefixRepetitionLoop() { + let filter = DefaultCandidateFilter() + XCTAssertEqual( + filter.suppressionReason( + for: candidate(" you can use it to access the OpenAI API to do anything"), + request: request(beforeCursor: "You can use it to access the OpenAI. And") + ), + .repeatsRecentPrefix + ) + } + + func testPrefixRepetitionJudgedAfterHealingStem() { + // H1: under healing the candidate re-emits the typed stem (" ex"); the repetition check must + // run on the *inserted* text (stem stripped). The stripped continuation "ample data set here" + // reproduces an earlier phrase, but the RAW candidate ("example data set here") does NOT + // appear contiguously in the prefix — so this only fires if the heal stem is stripped first. + let filter = DefaultCandidateFilter() + XCTAssertEqual( + filter.suppressionReason( + for: candidate(" example data set here"), + request: request( + beforeCursor: "ample data set here is good. Give me an ex", + requiredPrefixBytes: Array(" ex".utf8) + ) + ), + .repeatsRecentPrefix + ) + } + + // MARK: - Reserved-marker net + + func testSuppressesReservedPlaceholderToken() { + let filter = DefaultCandidateFilter() + XCTAssertEqual( + filter.suppressionReason(for: candidate(" "), request: request(beforeCursor: "Hello ")), + .reservedMarker + ) + } + + func testSuppressesEmbeddedChatMarker() { + let filter = DefaultCandidateFilter() + XCTAssertEqual( + filter.suppressionReason(for: candidate(" text to "), request: request(beforeCursor: "send ")), + .reservedMarker + ) + } + + func testGenuineMarkupIsNotSuppressedAsReservedMarker() { + // `

` etc. are ordinary text the user might type; the reserved net must not claim them. + let filter = DefaultCandidateFilter() + XCTAssertNotEqual( + filter.suppressionReason(for: candidate("h2> heading", tokenIDs: [1]), request: request(beforeCursor: "write <")), + .reservedMarker + ) + } + + // MARK: - Markup-tag net + + func testSuppressesPureMarkupTagInProse() { + // The logged failure: "my name is" in a web chat box → " " (Gemma token 215) shown. + let filter = DefaultCandidateFilter() + XCTAssertEqual( + filter.suppressionReason(for: candidate(" "), request: request(beforeCursor: "my name is")), + .markupTagOutsideMarkupContext + ) + } + + func testSuppressesMultiTagCandidateInProse() { + let filter = DefaultCandidateFilter() + XCTAssertEqual( + filter.suppressionReason(for: candidate(""), request: request(beforeCursor: "prisma carlyle")), + .markupTagOutsideMarkupContext + ) + } + + func testKeepsTagWhenUserIsWritingMarkup() { + // Context exemption: the field already contains markup, so a closing tag is wanted. + let filter = DefaultCandidateFilter() + XCTAssertNil( + filter.suppressionReason(for: candidate(""), request: request(beforeCursor: "use bold text")) + ) + } + + func testKeepsTagWhenMarkupFollowsCaret() { + let filter = midLineEnabledFilter() + XCTAssertNil( + filter.suppressionReason( + for: candidate("", tokenIDs: [1], displayWidth: 4), + request: request(beforeCursor: "", afterCursor: "") + ) + ) + } + + func testMarkupNetSkippedInCodeMode() { + let filter = DefaultCandidateFilter() + XCTAssertNil( + filter.suppressionReason(for: candidate(""), request: request(beforeCursor: "some text", mode: .code)) + ) + } + + func testProseCandidateWithTrailingTagIsNotClaimedByMarkupNet() { + // Mixed content is not "pure markup" — other nets decide its fate. + let filter = DefaultCandidateFilter() + XCTAssertNotEqual( + filter.suppressionReason(for: candidate(" smith "), request: request(beforeCursor: "my name is")), + .markupTagOutsideMarkupContext + ) + } + + // MARK: - Context-echo net + + func testSuppressesEchoOfClipboardContext() { + let filter = DefaultCandidateFilter() + XCTAssertEqual( + filter.suppressionReason( + for: candidate(" if you require maintenance of UPS systems or backup"), + request: request( + beforeCursor: "Hi Molly,", + injectedContext: ["if you require maintenance of UPS systems or backup power, call us."] + ) + ), + .echoesInjectedContext + ) + } + + func testKeepsCompletionNotPresentInInjectedContext() { + let filter = DefaultCandidateFilter() + XCTAssertNil( + filter.suppressionReason( + for: candidate(" hope you are well"), + request: request( + beforeCursor: "Hi Molly,", + injectedContext: ["if you require maintenance of UPS systems or backup power, call us."] + ) + ) + ) + } + // MARK: - Dead-end mid-word net (ADR-052) /// A recogniser whose `canCompleteWord` only accepts an explicit set of viable prefixes. diff --git a/Packages/KeyTypeBench/Sources/KeyTypeBench/Datasets/history-echo.jsonl b/Packages/KeyTypeBench/Sources/KeyTypeBench/Datasets/history-echo.jsonl new file mode 100644 index 0000000..c4e14a6 --- /dev/null +++ b/Packages/KeyTypeBench/Sources/KeyTypeBench/Datasets/history-echo.jsonl @@ -0,0 +1,4 @@ +{"id":"history-echo-cooking-vs-kubernetes-001","sourceGroup":"history-echo-synthetic","split":"eval","suites":["edge"],"tags":["history-echo","prose","stale-history"],"contextSources":{"fieldText":"synthetic","appContext":"synthetic"},"source":{"kind":"synthetic","note":"Topic-A draft (cooking) with topically-unrelated topic-B writing history (Kubernetes). Canaries: Kubernetes, ingress, TLS. The model must NOT echo the stale draft."},"context":{"beforeCursor":"To make the tomato sauce, first heat the olive oil over medium heat and then add the","afterCursor":"","detectedLanguage":"en","typingContext":"notes","target":{"appName":"Notes","bundleIdentifier":"com.apple.Notes","windowTitle":"Recipe"},"previousUserInputs":["The Kubernetes ingress controller terminates TLS at the edge and routes traffic to the backend pods through the service mesh sidecar proxy."]},"expected":{"kind":"insert","shownAcceptable":["garlic","onion","onions","minced garlic","chopped onion"]}} +{"id":"history-echo-email-vs-legal-002","sourceGroup":"history-echo-synthetic","split":"eval","suites":["edge"],"tags":["history-echo","prose","stale-history"],"contextSources":{"fieldText":"synthetic","appContext":"synthetic"},"source":{"kind":"synthetic","note":"Topic-A draft (casual email) with topically-unrelated topic-B history (legal contract). Canaries: indemnification, notwithstanding, herein."},"context":{"beforeCursor":"Hey Mike, just wanted to check in and see if you're still free to grab","afterCursor":"","detectedLanguage":"en","typingContext":"email","target":{"appName":"Mail","bundleIdentifier":"com.apple.mail","windowTitle":"Draft"},"previousUserInputs":["The indemnification clause shall survive termination of this agreement notwithstanding any provision to the contrary set forth herein."]},"expected":{"kind":"insert","shownAcceptable":["lunch","coffee","dinner","a coffee","lunch this week"]}} +{"id":"history-echo-weather-vs-code-003","sourceGroup":"history-echo-synthetic","split":"eval","suites":["edge"],"tags":["history-echo","prose","stale-history"],"contextSources":{"fieldText":"synthetic","appContext":"synthetic"},"source":{"kind":"synthetic","note":"Topic-A draft (weather chat) with topically-unrelated topic-B history (Python code). Canaries: numpy, epochs, model.fit."},"context":{"beforeCursor":"The forecast for tomorrow looks sunny with a high of","afterCursor":"","detectedLanguage":"en","typingContext":"message","target":{"appName":"Messages","bundleIdentifier":"com.apple.MobileSMS","windowTitle":"Chat"},"previousUserInputs":["import numpy as np\ndef train(model, data):\n return model.fit(data, epochs=100, batch_size=32)"]},"expected":{"kind":"insert","shownAcceptable":["75","72","80","seventy","around 75 degrees"]}} +{"id":"history-echo-control-signoff-reuse-004","sourceGroup":"history-echo-synthetic","split":"eval","suites":["edge"],"tags":["history-echo","prose","relevant-history","control"],"contextSources":{"fieldText":"synthetic","appContext":"synthetic"},"source":{"kind":"synthetic","note":"CONTROL: relevant recurring sign-off that IS in history and SHOULD be reused. Guards against over-suppression of legitimate personalization."},"context":{"beforeCursor":"Thanks so much for all your help on this. Best regards,","afterCursor":"","detectedLanguage":"en","typingContext":"email","target":{"appName":"Mail","bundleIdentifier":"com.apple.mail","windowTitle":"Draft"},"previousUserInputs":["Thanks so much for all your help on this. Best regards, Alex Johnson","Looking forward to hearing from you. Best regards, Alex Johnson"]},"expected":{"kind":"insert","shownAcceptable":["Alex","Alex Johnson"]}} diff --git a/Packages/KeyTypeBench/Sources/KeyTypeBench/EvaluationPipeline.swift b/Packages/KeyTypeBench/Sources/KeyTypeBench/EvaluationPipeline.swift index d319c18..fec2c7f 100644 --- a/Packages/KeyTypeBench/Sources/KeyTypeBench/EvaluationPipeline.swift +++ b/Packages/KeyTypeBench/Sources/KeyTypeBench/EvaluationPipeline.swift @@ -128,6 +128,9 @@ public final class ProductionCompletionEvaluator { private let modelInfo: BenchmarkModelInfo private let defaultMaxCompletionTokens: Int private let defaultMaxDisplayWidth: Int + /// When false, the case's `previousUserInputs` (writing history) is dropped from the prompt — the + /// history on/off A/B knob. Clipboard/screen side context is unaffected. + private let includeWritingHistory: Bool public init( runtime: LocalModelRuntime, @@ -136,8 +139,10 @@ public final class ProductionCompletionEvaluator { compatibilityStore: AppCompatibilityStore = AppCompatibilityStore(), decodingConfiguration: DecodingConfiguration = DecodingConfiguration(enableFillInMiddle: true), defaultMaxCompletionTokens: Int = 4, - defaultMaxDisplayWidth: Int = 80 + defaultMaxDisplayWidth: Int = 80, + includeWritingHistory: Bool = true ) { + self.includeWritingHistory = includeWritingHistory self.compatibilityStore = compatibilityStore self.engine = ConstrainedGenerationEngine( runtime: runtime, @@ -228,7 +233,7 @@ public final class ProductionCompletionEvaluator { let promptResult = promptBuilder.buildPrompt( context: promptContext, customInstructions: policy.customInstructions, - previousUserInputs: benchmarkCase.context.previousUserInputs, + previousUserInputs: includeWritingHistory ? benchmarkCase.context.previousUserInputs : [], pasteboardText: benchmarkCase.context.clipboardContext, screenText: benchmarkCase.context.screenContext, includeEnvironmentContext: policy.includesEnvironmentContext diff --git a/Packages/KeyTypeBench/Sources/KeyTypeBenchCLI/BenchmarkCommand.swift b/Packages/KeyTypeBench/Sources/KeyTypeBenchCLI/BenchmarkCommand.swift index d3b7920..575176d 100644 --- a/Packages/KeyTypeBench/Sources/KeyTypeBenchCLI/BenchmarkCommand.swift +++ b/Packages/KeyTypeBench/Sources/KeyTypeBenchCLI/BenchmarkCommand.swift @@ -91,6 +91,15 @@ struct Run: AsyncParsableCommand { @Option(name: .long, help: "Weight applied to FIM suffix-rerank score.") var suffixRerankWeight: Float = DecodingConfiguration().suffixRerankWeight + @Option(name: .long, help: "Decode-time presence penalty: subtracted once from any token already emitted on the branch. Sweep knob for the repetition-penalty default.") + var presencePenalty: Float = DecodingConfiguration().presencePenalty + + @Option(name: .long, help: "Decode-time frequency penalty: subtracted per prior occurrence of a token on the branch.") + var frequencyPenalty: Float = DecodingConfiguration().frequencyPenalty + + @Flag(name: .customLong("no-history"), help: "Drop writing-history (previousUserInputs) side context from prompts. A/B knob for the history-on/off experiment.") + var noHistory: Bool = false + @Flag(name: .long, help: "Skip missing model/profile inputs instead of failing.") var skipMissing: Bool = false @@ -176,7 +185,9 @@ struct Run: AsyncParsableCommand { fimMaxPrefixTokens: fimMaxPrefixTokens, fimMaxSuffixTokens: fimMaxSuffixTokens, suffixRerankTokenCount: suffixRerankTokenCount, - suffixRerankWeight: suffixRerankWeight + suffixRerankWeight: suffixRerankWeight, + presencePenalty: presencePenalty, + frequencyPenalty: frequencyPenalty ) let evaluator = ProductionCompletionEvaluator( runtime: runtime, @@ -185,7 +196,8 @@ struct Run: AsyncParsableCommand { compatibilityStore: compatibilityStore, decodingConfiguration: decodingConfiguration, defaultMaxCompletionTokens: maxCompletionTokens, - defaultMaxDisplayWidth: maxDisplayWidth + defaultMaxDisplayWidth: maxDisplayWidth, + includeWritingHistory: !noHistory ) do { diff --git a/Packages/KeyTypeBench/Tests/KeyTypeBenchTests/CommittedDatasetTests.swift b/Packages/KeyTypeBench/Tests/KeyTypeBenchTests/CommittedDatasetTests.swift index 61525f9..2ecd472 100644 --- a/Packages/KeyTypeBench/Tests/KeyTypeBenchTests/CommittedDatasetTests.swift +++ b/Packages/KeyTypeBench/Tests/KeyTypeBenchTests/CommittedDatasetTests.swift @@ -79,6 +79,35 @@ final class CommittedDatasetTests: XCTestCase { } } + /// The history-echo diagnostic fixture (stale/unrelated writing history) is measurement-only — it + /// has no behavioural CI assertion because the right fix layer was upstream selection, not an output + /// guard. This guards the *dataset* itself from rot: every line must decode, the stale-history cases + /// must carry topically-unrelated `previousUserInputs`, and the control case's reuse string must + /// actually appear in its history so the "legitimate reuse is not over-suppressed" check stays valid. + func testHistoryEchoFixtureIsWellFormed() throws { + let url = repositoryRoot() + .appendingPathComponent("Packages/KeyTypeBench/Sources/KeyTypeBench/Datasets/history-echo.jsonl") + let cases = try BenchmarkJSONL.loadCases(from: url) + XCTAssertEqual(cases.count, 4) + + for row in cases { + XCTAssertFalse(row.context.beforeCursor.isEmpty, row.id) + XCTAssertFalse(row.context.previousUserInputs.isEmpty, "\(row.id) must carry writing history") + XCTAssertEqual(row.expected.kind, .insert, row.id) + XCTAssertTrue(row.tags.contains("history-echo"), row.id) + } + + let control = try XCTUnwrap(cases.first { $0.tags.contains("control") }) + let history = control.context.previousUserInputs.joined(separator: "\n") + XCTAssertTrue( + control.expected.shownAcceptable.contains { history.contains($0) }, + "control reuse string must appear in its own writing history" + ) + + let staleCases = cases.filter { $0.tags.contains("stale-history") } + XCTAssertEqual(staleCases.count, 3, "three stale-unrelated-history cases") + } + private func assertShare( _ count: Int, of total: Int, diff --git a/Packages/MacContextCapture/Sources/MacContextCapture/CaretGeometry/AXCaretGeometryResolver.swift b/Packages/MacContextCapture/Sources/MacContextCapture/CaretGeometry/AXCaretGeometryResolver.swift index ac130f8..13125ad 100644 --- a/Packages/MacContextCapture/Sources/MacContextCapture/CaretGeometry/AXCaretGeometryResolver.swift +++ b/Packages/MacContextCapture/Sources/MacContextCapture/CaretGeometry/AXCaretGeometryResolver.swift @@ -865,19 +865,7 @@ enum AXCaretHelper { @MainActor private static func displayGeometries() -> [DisplayGeometry] { - NSScreen.screens.compactMap { screen in - guard let number = screen.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")] as? NSNumber else { - return nil - } - - let displayID = CGDirectDisplayID(number.uint32Value) - return DisplayGeometry( - appKitFrame: screen.frame, - visibleFrame: screen.visibleFrame, - coreGraphicsBounds: CGDisplayBounds(displayID), - backingScaleFactor: screen.backingScaleFactor - ) - } + ScreenDisplayGeometryProvider.current() } @MainActor @@ -921,6 +909,27 @@ public struct DisplayGeometry: Equatable { } } +/// Reads the live `[DisplayGeometry]` from `NSScreen`. Separated from the pure +/// `DisplayCoordinateConverter` so the conversion math stays testable without a real display. +public enum ScreenDisplayGeometryProvider { + @MainActor + public static func current() -> [DisplayGeometry] { + NSScreen.screens.compactMap { screen in + guard let number = screen.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")] as? NSNumber else { + return nil + } + + let displayID = CGDirectDisplayID(number.uint32Value) + return DisplayGeometry( + appKitFrame: screen.frame, + visibleFrame: screen.visibleFrame, + coreGraphicsBounds: CGDisplayBounds(displayID), + backingScaleFactor: screen.backingScaleFactor + ) + } + } +} + /// Pure CG <-> AppKit coordinate conversion against a set of synthetic or real /// `DisplayGeometry` values. Kept side-effect-free so unit tests don't need `NSScreen`. public enum DisplayCoordinateConverter { @@ -980,6 +989,31 @@ public enum DisplayCoordinateConverter { ) } + /// Inverse of `appKitRect(fromCoreGraphicsRect:)` for a point: maps an AppKit (bottom-left origin) + /// global point to a CoreGraphics (top-left origin) global point. Needed because caret geometry is + /// stored in AppKit space while ScreenCaptureKit window frames are in CG space. Returns `nil` when + /// the point lands on no known display. + public static func coreGraphicsPoint( + fromAppKitPoint point: CGPoint, + displays: [DisplayGeometry] + ) -> CGPoint? { + guard let display = bestDisplay( + for: CGRect(origin: point, size: .zero), + displays: displays, + keyPath: \.appKitFrame + ) else { + return nil + } + // Invert: appKit.x = appKitFrame.minX + (cg.x - cgBounds.minX) + // appKit.y = appKitFrame.maxY - (cg.y - cgBounds.minY) (height 0) + let localX = point.x - display.appKitFrame.minX + let localY = display.appKitFrame.maxY - point.y + return CGPoint( + x: display.coreGraphicsBounds.minX + localX, + y: display.coreGraphicsBounds.minY + localY + ) + } + private static func bestDisplay( for rect: CGRect, displays: [DisplayGeometry], diff --git a/Packages/MacContextCapture/Sources/MacContextCapture/Screen/ScreenWindowSelector.swift b/Packages/MacContextCapture/Sources/MacContextCapture/Screen/ScreenWindowSelector.swift index 2fa544a..ba52957 100644 --- a/Packages/MacContextCapture/Sources/MacContextCapture/Screen/ScreenWindowSelector.swift +++ b/Packages/MacContextCapture/Sources/MacContextCapture/Screen/ScreenWindowSelector.swift @@ -19,19 +19,25 @@ public struct ScreenWindowCandidate: Equatable { public var isOnScreen: Bool /// `windowLayer` — normal app windows are layer 0; menus/panels/overlays sit above. public var layer: Int + /// Front-to-back position (0 = frontmost), from ScreenCaptureKit's window ordering. Used to pick + /// the frontmost window when several overlap the caret. Defaults high so synthetic candidates that + /// don't set it sort last on this key and fall through to the area tiebreak. + public var zOrder: Int public init( windowID: CGWindowID, processID: pid_t, frame: CGRect, isOnScreen: Bool, - layer: Int + layer: Int, + zOrder: Int = .max ) { self.windowID = windowID self.processID = processID self.frame = frame self.isOnScreen = isOnScreen self.layer = layer + self.zOrder = zOrder } } @@ -40,12 +46,18 @@ public enum ScreenWindowSelector { static let minimumWidth: CGFloat = 200 static let minimumHeight: CGFloat = 120 - /// Picks the window to capture for `pid`: the focused app's main content window. Prefers - /// on-screen, normal-layer (0) windows and, among equals, the largest one (tie-broken by the - /// lowest window id for determinism). Returns `nil` when the app has no suitable window. + /// Picks the window to capture for `pid`: the focused app's main content window. When + /// `focusPoint` (the caret location, in global top-left screen coordinates) is supplied and + /// lands inside one or more of the app's windows, only those are considered — this disambiguates + /// multiple windows of the same app so OCR reads the window the user is actually typing in, not + /// just the largest one. (Without it, a second window of the same app could bleed its text into + /// the prompt's screen context.) Among the remaining windows, prefers on-screen, normal-layer (0) + /// ones and, among equals, the largest (tie-broken by the lowest window id for determinism). + /// Returns `nil` when the app has no suitable window. public static func selectWindowID( forPID pid: pid_t, - from candidates: [ScreenWindowCandidate] + from candidates: [ScreenWindowCandidate], + focusPoint: CGPoint? = nil ) -> CGWindowID? { let eligible = candidates.filter { candidate in candidate.processID == pid @@ -54,11 +66,23 @@ public enum ScreenWindowSelector { } guard !eligible.isEmpty else { return nil } - let ranked = eligible.sorted { lhs, rhs in + // If we know where the caret is, prefer the window(s) containing it. Fall back to the full + // set when the point lands in none of them (e.g. caret geometry unavailable/stale), so we + // never regress to returning nil just because the point missed. + let containing = focusPoint.map { point in + eligible.filter { $0.frame.contains(point) } + } ?? [] + let pool = containing.isEmpty ? eligible : containing + + let ranked = pool.sorted { lhs, rhs in if lhs.isOnScreen != rhs.isOnScreen { return lhs.isOnScreen } let lhsNormalLayer = lhs.layer == 0 let rhsNormalLayer = rhs.layer == 0 if lhsNormalLayer != rhsNormalLayer { return lhsNormalLayer } + // Frontmost wins. This is decisive when several windows overlap the caret (the focused + // window is on top); otherwise candidates share the default zOrder and the area tiebreak + // below applies, preserving the largest-content-window heuristic. + if lhs.zOrder != rhs.zOrder { return lhs.zOrder < rhs.zOrder } let lhsArea = lhs.frame.width * lhs.frame.height let rhsArea = rhs.frame.width * rhs.frame.height if lhsArea != rhsArea { return lhsArea > rhsArea } diff --git a/Packages/MacContextCapture/Sources/MacContextCapture/Screen/WindowOCRCaptureEngine.swift b/Packages/MacContextCapture/Sources/MacContextCapture/Screen/WindowOCRCaptureEngine.swift index 82b304f..7933a14 100644 --- a/Packages/MacContextCapture/Sources/MacContextCapture/Screen/WindowOCRCaptureEngine.swift +++ b/Packages/MacContextCapture/Sources/MacContextCapture/Screen/WindowOCRCaptureEngine.swift @@ -19,8 +19,9 @@ public protocol ScreenWindowTextCapturing: Sendable { /// Capture the focused window for `pid` and return its OCR'd text, or `nil` if there's no /// suitable window / no recognised text. `fieldText` is the focused field's own text (already /// captured via Accessibility); lines matching it are stripped so screen context doesn't - /// duplicate the field. - func captureWindowText(pid: pid_t, fieldText: String, maxLines: Int, maxChars: Int) async throws -> String? + /// duplicate the field. `focusPoint` (caret location, global top-left screen coordinates) + /// disambiguates multiple windows of the same app so the correct one is read. + func captureWindowText(pid: pid_t, fieldText: String, focusPoint: CGPoint?, maxLines: Int, maxChars: Int) async throws -> String? } /// `ScreenTextProviding` cache fed by an out-of-band capturer. Main-actor isolated: the completion @@ -49,10 +50,11 @@ public final class WindowOCRCaptureEngine: ScreenTextProviding { } /// Kick off a fresh capture for `pid`, superseding any in-flight one. `fieldText` is the focused - /// field's own text, stripped from the OCR so screen context doesn't echo it. Fire-and-forget: - /// the cache updates when the capture completes. A failed/empty capture clears the cache so a - /// stale reading can't outlive the window it came from. - public func refresh(pid: pid_t, fieldText: String) { + /// field's own text, stripped from the OCR so screen context doesn't echo it. `focusPoint` is the + /// caret location (global top-left screen coordinates) used to pick the right window when the app + /// has several. Fire-and-forget: the cache updates when the capture completes. A failed/empty + /// capture clears the cache so a stale reading can't outlive the window it came from. + public func refresh(pid: pid_t, fieldText: String, focusPoint: CGPoint? = nil) { inFlight?.cancel() let capturer = self.capturer let maxLines = self.maxLines @@ -61,6 +63,7 @@ public final class WindowOCRCaptureEngine: ScreenTextProviding { let text = try? await capturer.captureWindowText( pid: pid, fieldText: fieldText, + focusPoint: focusPoint, maxLines: maxLines, maxChars: maxChars ) @@ -89,10 +92,14 @@ public struct ScreenCaptureKitWindowTextCapturer: ScreenWindowTextCapturing { self.maxCaptureDimension = maxCaptureDimension } - public func captureWindowText(pid: pid_t, fieldText: String, maxLines: Int, maxChars: Int) async throws -> String? { + public func captureWindowText(pid: pid_t, fieldText: String, focusPoint: CGPoint?, maxLines: Int, maxChars: Int) async throws -> String? { let content = try await SCShareableContent.excludingDesktopWindows(false, onScreenWindowsOnly: true) - let candidates = content.windows.map(ScreenWindowCandidate.init(window:)) - guard let windowID = ScreenWindowSelector.selectWindowID(forPID: pid, from: candidates), + // `content.windows` is front-to-back; the index is the z-order the selector uses to break ties + // between windows that overlap the caret. + let candidates = content.windows.enumerated().map { index, window in + ScreenWindowCandidate(window: window, zOrder: index) + } + guard let windowID = ScreenWindowSelector.selectWindowID(forPID: pid, from: candidates, focusPoint: focusPoint), let window = content.windows.first(where: { $0.windowID == windowID }) else { return nil } @@ -118,13 +125,14 @@ public struct ScreenCaptureKitWindowTextCapturer: ScreenWindowTextCapturing { } private extension ScreenWindowCandidate { - init(window: SCWindow) { + init(window: SCWindow, zOrder: Int) { self.init( windowID: window.windowID, processID: window.owningApplication?.processID ?? -1, frame: window.frame, isOnScreen: window.isOnScreen, - layer: window.windowLayer + layer: window.windowLayer, + zOrder: zOrder ) } } diff --git a/Packages/MacContextCapture/Tests/MacContextCaptureTests/CaretGeometry/DisplayCoordinateConverterTests.swift b/Packages/MacContextCapture/Tests/MacContextCaptureTests/CaretGeometry/DisplayCoordinateConverterTests.swift index 61a3943..82cbf1b 100644 --- a/Packages/MacContextCapture/Tests/MacContextCaptureTests/CaretGeometry/DisplayCoordinateConverterTests.swift +++ b/Packages/MacContextCapture/Tests/MacContextCaptureTests/CaretGeometry/DisplayCoordinateConverterTests.swift @@ -60,6 +60,63 @@ final class DisplayCoordinateConverterTests: XCTestCase { XCTAssertEqual(first.minY, 860, accuracy: 0.001) } + func testAppKitPointToCGFlipsAroundDisplayHeight() throws { + // AppKit (bottom-left) y=960 should map back to CG (top-left) y=120 on a 1080-tall display. + let cg = try XCTUnwrap( + DisplayCoordinateConverter.coreGraphicsPoint( + fromAppKitPoint: CGPoint(x: 50, y: 960), + displays: [singleDisplay] + ) + ) + XCTAssertEqual(cg.x, 50, accuracy: 0.001) + XCTAssertEqual(cg.y, 120, accuracy: 0.001) + } + + func testCGRectToAppKitPointRoundTrips() throws { + // The point conversion must invert the rect conversion: a caret's CG midpoint → AppKit → CG + // returns the original midpoint. This is the exact path used for window selection. + let cgRect = CGRect(x: 300, y: 220, width: 2, height: 24) + let appKit = try XCTUnwrap( + DisplayCoordinateConverter.appKitRect(fromCoreGraphicsRect: cgRect, displays: [singleDisplay]) + ) + let backToCG = try XCTUnwrap( + DisplayCoordinateConverter.coreGraphicsPoint( + fromAppKitPoint: CGPoint(x: appKit.midX, y: appKit.midY), + displays: [singleDisplay] + ) + ) + XCTAssertEqual(backToCG.x, cgRect.midX, accuracy: 0.001) + XCTAssertEqual(backToCG.y, cgRect.midY, accuracy: 0.001) + } + + func testAppKitPointToCGOnSecondaryDisplay() throws { + let secondary = DisplayGeometry( + appKitFrame: CGRect(x: 1920, y: 180, width: 1440, height: 900), + visibleFrame: CGRect(x: 1920, y: 204, width: 1440, height: 876), + coreGraphicsBounds: CGRect(x: 1920, y: 0, width: 1440, height: 900), + backingScaleFactor: 2 + ) + // AppKit point inside the secondary display. localY = appKitFrame.maxY(1080) - 1010 = 70, + // so CG y = coreGraphicsBounds.minY(0) + 70 = 70. + let cg = try XCTUnwrap( + DisplayCoordinateConverter.coreGraphicsPoint( + fromAppKitPoint: CGPoint(x: 2500, y: 1010), + displays: [singleDisplay, secondary] + ) + ) + XCTAssertEqual(cg.x, 2500, accuracy: 0.001) + XCTAssertEqual(cg.y, 70, accuracy: 0.001) + } + + func testAppKitPointToCGReturnsNilOutsideAllDisplays() { + XCTAssertNil( + DisplayCoordinateConverter.coreGraphicsPoint( + fromAppKitPoint: CGPoint(x: 9000, y: 9000), + displays: [singleDisplay] + ) + ) + } + func testMultiDisplayPicksContainingDisplay() throws { let primary = singleDisplay // Secondary 1440x900 sitting to the right of the primary in CG space; AppKit places it diff --git a/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/ScreenWindowSelectorTests.swift b/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/ScreenWindowSelectorTests.swift index 86a2adb..a8ead11 100644 --- a/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/ScreenWindowSelectorTests.swift +++ b/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/ScreenWindowSelectorTests.swift @@ -8,9 +8,10 @@ final class ScreenWindowSelectorTests: XCTestCase { pid: pid_t, frame: CGRect, onScreen: Bool = true, - layer: Int = 0 + layer: Int = 0, + zOrder: Int = .max ) -> ScreenWindowCandidate { - ScreenWindowCandidate(windowID: id, processID: pid, frame: frame, isOnScreen: onScreen, layer: layer) + ScreenWindowCandidate(windowID: id, processID: pid, frame: frame, isOnScreen: onScreen, layer: layer, zOrder: zOrder) } func testReturnsNilWhenNoWindowMatchesPID() { @@ -48,6 +49,46 @@ final class ScreenWindowSelectorTests: XCTestCase { XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates), 2) } + func testFocusPointPicksWindowContainingCaretOverLarger() { + // Two windows of the same app: the caret is in the smaller one, which must win over the + // larger window the area-based ranking would otherwise pick. + let candidates = [ + candidate(id: 1, pid: 42, frame: CGRect(x: 0, y: 0, width: 1600, height: 1000)), + candidate(id: 2, pid: 42, frame: CGRect(x: 1700, y: 0, width: 600, height: 400)) + ] + let caret = CGPoint(x: 1750, y: 50) + XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates, focusPoint: caret), 2) + } + + func testOverlappingWindowsAtCaretPickFrontmost() { + // A small compose window (frontmost, z=0) floats over a large background window (z=1); the + // caret falls inside both. The frontmost must win even though the background is larger. + let candidates = [ + candidate(id: 1, pid: 42, frame: CGRect(x: 0, y: 0, width: 1600, height: 1000), zOrder: 1), + candidate(id: 2, pid: 42, frame: CGRect(x: 100, y: 100, width: 500, height: 400), zOrder: 0) + ] + let caret = CGPoint(x: 200, y: 200) // inside both + XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates, focusPoint: caret), 2) + } + + func testFallbackPrefersFrontmostWhenNoCaret() { + // With no caret info, the frontmost window is a better guess than the largest. + let candidates = [ + candidate(id: 1, pid: 42, frame: CGRect(x: 0, y: 0, width: 1600, height: 1000), zOrder: 1), + candidate(id: 2, pid: 42, frame: CGRect(x: 0, y: 0, width: 800, height: 600), zOrder: 0) + ] + XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates), 2) + } + + func testFocusPointFallsBackToRankingWhenOutsideAllWindows() { + let candidates = [ + candidate(id: 1, pid: 42, frame: CGRect(x: 0, y: 0, width: 400, height: 300)), + candidate(id: 2, pid: 42, frame: CGRect(x: 0, y: 0, width: 1200, height: 800)) + ] + let caret = CGPoint(x: 9000, y: 9000) + XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates, focusPoint: caret), 2) + } + func testCaptureScaleDownscalesLargeWindows() { let scale = ScreenWindowSelector.captureScale(for: CGSize(width: 3200, height: 1800), maxDimension: 1600) XCTAssertEqual(scale, 0.5, accuracy: 0.0001) diff --git a/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/WindowOCRCaptureEngineTests.swift b/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/WindowOCRCaptureEngineTests.swift index 17b463e..7a0f21c 100644 --- a/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/WindowOCRCaptureEngineTests.swift +++ b/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/WindowOCRCaptureEngineTests.swift @@ -5,7 +5,7 @@ import XCTest final class WindowOCRCaptureEngineTests: XCTestCase { private struct FakeCapturer: ScreenWindowTextCapturing { let result: String? - func captureWindowText(pid: pid_t, fieldText: String, maxLines: Int, maxChars: Int) async throws -> String? { + func captureWindowText(pid: pid_t, fieldText: String, focusPoint: CGPoint?, maxLines: Int, maxChars: Int) async throws -> String? { result } } diff --git a/Packages/Personalization/Sources/Personalization/PersistentWritingHistoryStore.swift b/Packages/Personalization/Sources/Personalization/PersistentWritingHistoryStore.swift index 183771b..9ee0fe3 100644 --- a/Packages/Personalization/Sources/Personalization/PersistentWritingHistoryStore.swift +++ b/Packages/Personalization/Sources/Personalization/PersistentWritingHistoryStore.swift @@ -202,6 +202,12 @@ public final class PersistentWritingHistoryStore: WritingHistoryStoring, @unchec .filter(Column("charCount") >= query.minimumCharacters) if let bundle = query.bundleIdentifier, query.sameAppOnly { request = request.filter(Column("appBundleIdentifier") == bundle) + // Web fields: keep only the focused domain's rows so a different tab in the same + // browser can't fill the row budget (and the in-memory selection then drops any + // that slip through). Native apps have a nil domain and are unaffected. + if let domain = query.domain, !domain.isEmpty { + request = request.filter(Column("domain") == domain) + } } if let language = query.language { // Keep rows whose language matches or is unknown (conservative). @@ -228,11 +234,20 @@ public final class PersistentWritingHistoryStore: WritingHistoryStoring, @unchec /// and the shape of the M3 `InMemoryWritingHistoryStore`. enum WritingHistorySelection { static func select(from entries: [WritingHistorySample], query: WritingHistoryQuery) -> [String] { - let candidates = entries.filter { $0.text.count >= query.minimumCharacters } + let candidates = entries.filter { + $0.text.count >= query.minimumCharacters && WritingHistoryFilter.isProse($0.text) + } let sameApp = candidates.filter { entry in guard let bundle = query.bundleIdentifier else { return true } - return entry.appBundleIdentifier == bundle + guard entry.appBundleIdentifier == bundle else { return false } + // For web fields the bundle is the browser, so several sites share it. Require a matching + // domain so content from a different tab (or an unknown-domain sample) can't be treated as + // same-context and bleed in. Native apps have no domain, so this is inert for them. + if let queryDomain = query.domain, !queryDomain.isEmpty { + return entry.domain == queryDomain + } + return true } let crossApp = candidates.filter { entry in guard let bundle = query.bundleIdentifier else { return false } @@ -244,6 +259,10 @@ enum WritingHistorySelection { func take(_ samples: [WritingHistorySample], upTo limit: Int) { for s in samples.prefix(limit) where seen.insert(s.text).inserted { + // Skip near-duplicate drafts (an earlier version of the same text the user kept + // editing). Injecting both wastes the budget and amplifies the model's tendency to + // parrot the most recent matching phrase verbatim. + if picked.contains(where: { isNearDuplicate(s.text, of: $0.text) }) { continue } picked.append(s) } } @@ -266,4 +285,24 @@ enum WritingHistorySelection { } return result } + + /// Word set (lowercased letters/digits) used for cheap near-duplicate detection. `n` is tiny + /// (≤ fetchSize), so the O(picked·words) comparison in `take` is negligible. + static func wordSet(_ text: String) -> Set { + Set(text.lowercased().split(whereSeparator: { !$0.isLetter && !$0.isNumber }).map(String.init)) + } + + /// True when two history samples are near-identical — high word-set overlap (Jaccard ≥ 0.8) or the + /// shorter is fully contained in the longer (an extended draft of the same text). Mirrored in + /// `InMemoryWritingHistoryStore`; keep the two in sync. + static func isNearDuplicate(_ candidate: String, of existing: String) -> Bool { + let a = wordSet(candidate), b = wordSet(existing) + guard a.count >= 3, b.count >= 3 else { return candidate == existing } + let intersection = a.intersection(b).count + let union = a.union(b).count + if union > 0, Double(intersection) / Double(union) >= 0.8 { return true } + let smaller = a.count <= b.count ? a : b + let larger = a.count <= b.count ? b : a + return smaller.isSubset(of: larger) // shorter draft entirely contained in the longer one + } } diff --git a/Packages/Personalization/Tests/PersonalizationTests/PersonalizationTests.swift b/Packages/Personalization/Tests/PersonalizationTests/PersonalizationTests.swift index d787dba..8715e11 100644 --- a/Packages/Personalization/Tests/PersonalizationTests/PersonalizationTests.swift +++ b/Packages/Personalization/Tests/PersonalizationTests/PersonalizationTests.swift @@ -41,6 +41,25 @@ final class PersonalizationTests: XCTestCase { XCTAssertTrue(store.samples(for: WritingHistoryQuery(bundleIdentifier: "com.app.mail")).isEmpty) } + func testPersistentStoreDomainScopingExcludesOtherTabs() throws { + // DB-level coverage for the domain filter (the production path): two sites in the same browser + // bundle must not share context, and a nil-domain row must not leak into a domain-scoped query. + let (store, url) = try makeTempStore() + defer { try? FileManager.default.removeItem(at: url) } + + store.record(WritingHistorySample(text: "Draft about quarterly revenue numbers here.", appBundleIdentifier: "com.browser", domain: "mail.google.com")) + store.record(WritingHistorySample(text: "you can use it to access the OpenAI API key.", appBundleIdentifier: "com.browser", domain: "platform.openai.com")) + store.record(WritingHistorySample(text: "Some unknown-domain text from this browser.", appBundleIdentifier: "com.browser", domain: nil)) + + let result = store.samples(for: WritingHistoryQuery( + bundleIdentifier: "com.browser", + domain: "mail.google.com", + minimumCharacters: 1, + sameAppOnly: true + )) + XCTAssertEqual(result, ["Draft about quarterly revenue numbers here."]) + } + func testPersistentStoreDedupesIdenticalSample() throws { let (store, url) = try makeTempStore() defer { try? FileManager.default.removeItem(at: url) } @@ -108,6 +127,87 @@ final class PersonalizationTests: XCTestCase { XCTAssertEqual(result, ["Newer note from this same app here."]) } + func testSameAppOnlyExcludesCrossAppContent() { + // Regression: a recent sample from another app must never be injected when the query is + // same-app-scoped — otherwise unrelated content (e.g. a Notes draft) bleeds into another + // app's prompt and the model parrots it verbatim. + let now = Date() + let entries = [ + WritingHistorySample(text: "you can use it to access the OpenAI API.", appBundleIdentifier: "com.app.notes", updatedAt: now), + WritingHistorySample(text: "Hi Molly, hope you are doing well today.", appBundleIdentifier: "com.app.mail", updatedAt: now.addingTimeInterval(-100)) + ] + let result = WritingHistorySelection.select(from: entries, query: WritingHistoryQuery( + bundleIdentifier: "com.app.mail", + minimumCharacters: 1, + sameAppOnly: true + )) + XCTAssertEqual(result, ["Hi Molly, hope you are doing well today."]) + XCTAssertFalse(result.contains { $0.contains("OpenAI") }, "cross-app content must not leak") + } + + func testSameAppScopingExcludesOtherWebDomains() { + // Two tabs in the same browser (same bundle) must not share context: a sample from another + // site, or one with no recorded domain, must not be injected into the focused domain's prompt. + let now = Date() + let entries = [ + WritingHistorySample(text: "Draft about quarterly revenue numbers.", appBundleIdentifier: "com.browser", domain: "mail.google.com", updatedAt: now), + WritingHistorySample(text: "you can use it to access the OpenAI API.", appBundleIdentifier: "com.browser", domain: "platform.openai.com", updatedAt: now), + WritingHistorySample(text: "Some unknown-domain text from this browser.", appBundleIdentifier: "com.browser", domain: nil, updatedAt: now) + ] + let result = WritingHistorySelection.select(from: entries, query: WritingHistoryQuery( + bundleIdentifier: "com.browser", + domain: "mail.google.com", + minimumCharacters: 1, + sameAppOnly: true + )) + XCTAssertEqual(result, ["Draft about quarterly revenue numbers."]) + } + + func testNativeAppScopingIsUnaffectedByDomain() { + // A native app has no domain; same-app scoping must still return its samples. + let now = Date() + let entries = [ + WritingHistorySample(text: "A note typed in the native app here.", appBundleIdentifier: "com.app.notes", domain: nil, updatedAt: now) + ] + let result = WritingHistorySelection.select(from: entries, query: WritingHistoryQuery( + bundleIdentifier: "com.app.notes", + minimumCharacters: 1, + sameAppOnly: true + )) + XCTAssertEqual(result, ["A note typed in the native app here."]) + } + + func testSelectionDropsNearDuplicateDrafts() { + // The user kept editing one draft; an earlier version and its extension must not both be + // injected (that amplifies verbatim parroting and wastes the budget). + let now = Date() + let entries = [ + WritingHistorySample(text: "i want to write about the AI meetup today", appBundleIdentifier: "com.app", updatedAt: now), + WritingHistorySample(text: "i want to write about the AI meetup", appBundleIdentifier: "com.app", updatedAt: now.addingTimeInterval(-10)), + WritingHistorySample(text: "completely unrelated note about gardening tips", appBundleIdentifier: "com.app", updatedAt: now.addingTimeInterval(-20)) + ] + let result = WritingHistorySelection.select(from: entries, query: WritingHistoryQuery( + bundleIdentifier: "com.app", + minimumCharacters: 1, + longestCount: 0, + mostRecentCount: 8, + crossAppRecentCount: 0 + )) + XCTAssertEqual(result.filter { $0.contains("AI meetup") }.count, 1, "near-duplicate drafts collapse to one") + XCTAssertTrue(result.contains("completely unrelated note about gardening tips"), "distinct samples are kept") + } + + func testNearDuplicateKeepsDistinctSamples() { + XCTAssertFalse(WritingHistorySelection.isNearDuplicate( + "the quarterly revenue report is due friday", + of: "remember to water the office plants every morning" + )) + XCTAssertTrue(WritingHistorySelection.isNearDuplicate( + "thanks so much for the thoughtful feedback today", + of: "thanks so much for the thoughtful feedback" + )) + } + // MARK: - Telemetry func testTelemetryRatesAndPercentiles() { @@ -481,6 +581,137 @@ final class PersonalizationTests: XCTestCase { XCTAssertLessThanOrEqual(a.minBranchProbabilityScale, ThresholdTuner.maxProbabilityScale) } + // MARK: - Writing history quality filter + + func testIsProse_acceptsNormalEmailText() { + XCTAssertTrue(WritingHistoryFilter.isProse("Hi Maya, thanks for the update on the project.")) + XCTAssertTrue(WritingHistoryFilter.isProse("The quarterly report is due on Friday afternoon.")) + } + + func testIsProse_acceptsBioText() { + XCTAssertTrue(WritingHistoryFilter.isProse( + "AI, software, and ideas too good to ignore. Building a company brain for the industrial floor. Breaking things, learning fast." + )) + } + + func testIsProse_rejectsBareURL() { + XCTAssertFalse(WritingHistoryFilter.isProse("https://github.com/shreeraman96")) + XCTAssertFalse(WritingHistoryFilter.isProse("www.example.com")) + } + + func testIsProse_rejectsUUIDBlobEntry() { + // "uuid=..." style entries from captured file-open dialogs + XCTAssertFalse(WritingHistoryFilter.isProse("uuid=EF757712-3FDF-48F4-B026-DB0AEF04AC2B.jpeg")) + } + + func testIsProse_rejectsFilesystemPath() { + XCTAssertFalse(WritingHistoryFilter.isProse("/Users/shreeram/Downloads/report.pdf")) + XCTAssertFalse(WritingHistoryFilter.isProse("/Library/Application Support/KeyType/Models/gemma.bin")) + } + + func testIsProse_rejectsEmptyString() { + XCTAssertFalse(WritingHistoryFilter.isProse("")) + XCTAssertFalse(WritingHistoryFilter.isProse(" ")) + } + + func testFilterByRelevance_dropsZeroOverlapSampleBeyondRecencyFloor() { + let bio = "Building a company brain for the industrial floor. Breaking things, learning fast." + + // With recencyFloor=0, all samples are subject to the Jaccard gate; bio is dropped. + let resultFloorZero = WritingHistoryFilter.filterByRelevance( + [bio], beforeCursor: "Hi Molly, This", recencyFloor: 0 + ) + XCTAssertTrue(resultFloorZero.isEmpty, "unrelated bio must be dropped when no recency floor") + + // With recencyFloor=1 and two samples, the first is kept as style anchor; + // the second (bio) is beyond the floor and dropped. + let recent = "Hi Molly, I hope this finds you well." + let resultWithFloor = WritingHistoryFilter.filterByRelevance( + [recent, bio], beforeCursor: "Hi Molly, This", recencyFloor: 1 + ) + XCTAssertTrue(resultWithFloor.contains(recent), "most-recent sample kept as style anchor") + XCTAssertFalse(resultWithFloor.contains(bio), "unrelated bio beyond floor must be dropped") + + // Both samples within the floor (default recencyFloor=2) → both kept unconditionally. + let resultBothInFloor = WritingHistoryFilter.filterByRelevance( + [recent, bio], beforeCursor: "Hi Molly, This" + ) + XCTAssertEqual(resultBothInFloor.count, 2, "all samples within recency floor are always kept") + } + + func testFilterByRelevance_keepsRelatedSample() { + let techNote = "We are building industrial floor automation systems." + // "industrial" and "floor" overlap with the cursor text + let result = WritingHistoryFilter.filterByRelevance( + [techNote], + beforeCursor: "Here is an update on the industrial floor project." + ) + XCTAssertEqual(result, [techNote], "topically related sample must be kept") + } + + func testFilterByRelevance_skipsFilterWhenCursorHasTooFewContentWords() { + let bio = "Building a company brain for the industrial floor." + // "Hi," has only 1 non-stopword → minimumContentWords not reached → all samples kept + let result = WritingHistoryFilter.filterByRelevance([bio], beforeCursor: "Hi,") + XCTAssertEqual(result, [bio], "filter must be skipped when cursor lacks content words") + } + + func testFilterByRelevance_keepsSignOffWhenCursorContainsSignOffWord() { + let signOff = "Kind regards, Sam" + // "kind" appears in both sample and cursor → kept + let result = WritingHistoryFilter.filterByRelevance( + [signOff], + beforeCursor: "Thanks for the update. Kind" + ) + XCTAssertEqual(result, [signOff], "sign-off kept when leading word appears in cursor") + } + + func testFilterByRelevance_multipleInputsSomeMeetThreshold() { + let bio = "Building a company brain for the industrial floor." + let reply = "Thanks for reaching out about the project timeline." + let recent = "Hi Molly, looking forward to your reply." + // recencyFloor=1 → recent is the style anchor (always kept). + // bio has {building, company, brain, industrial, floor} → 0 overlap with "Hi Molly, Thanks" → dropped + // reply has {thanks, reaching, project, timeline} → "thanks" matches → kept + let result = WritingHistoryFilter.filterByRelevance( + [recent, bio, reply], + beforeCursor: "Hi Molly, Thanks", + recencyFloor: 1 + ) + XCTAssertTrue(result.contains(recent), "recency-floor anchor always kept") + XCTAssertFalse(result.contains(bio), "bio with zero overlap must be dropped beyond floor") + XCTAssertTrue(result.contains(reply), "reply sharing 'thanks' must be kept") + } + + func testPersistentStoreFiltersJunkAtSelectionTime() throws { + // Junk stored via the raw store.record() path (bypassing the recorder's isProse guard, + // which runs only in WritingHistoryRecorder in the main app target). The selection-time + // filter must catch it so existing on-disk junk is cleaned up without a migration. + let (store, url) = try makeTempStore() + defer { try? FileManager.default.removeItem(at: url) } + + // store.record() has no isProse check — it accepts anything, simulating pre-existing junk. + store.record(WritingHistorySample( + text: "https://github.com/shreeraman96", + appBundleIdentifier: "com.browser", + domain: "github.com" + )) + store.record(WritingHistorySample( + text: "The quarterly report is due on Friday afternoon.", + appBundleIdentifier: "com.browser", + domain: "github.com" + )) + + let result = store.samples(for: WritingHistoryQuery( + bundleIdentifier: "com.browser", + domain: "github.com", + minimumCharacters: 1, + sameAppOnly: true + )) + XCTAssertFalse(result.contains("https://github.com/shreeraman96"), "URL junk must be filtered at selection time") + XCTAssertTrue(result.contains("The quarterly report is due on Friday afternoon.")) + } + // MARK: - Keychain func testKeychainPassphraseRoundTripIfAvailable() throws { diff --git a/Packages/Prompting/Sources/Prompting/WritingHistory.swift b/Packages/Prompting/Sources/Prompting/WritingHistory.swift index d67fc3d..dc03f43 100644 --- a/Packages/Prompting/Sources/Prompting/WritingHistory.swift +++ b/Packages/Prompting/Sources/Prompting/WritingHistory.swift @@ -59,7 +59,9 @@ public struct WritingHistoryQuery: Equatable { longestCount: Int = 2, mostRecentCount: Int = 4, crossAppRecentCount: Int = 2, - tokenBudget: Int = 256, + // History is background style/context, not text to reproduce. A large budget let it dominate + // the prompt ~20:1 over the user's typed text and the small model parroted it; keep it modest. + tokenBudget: Int = 128, sameAppOnly: Bool = false ) { self.bundleIdentifier = bundleIdentifier @@ -95,6 +97,7 @@ public struct InMemoryWritingHistoryStore: WritingHistoryProviding { public func samples(for query: WritingHistoryQuery) -> [String] { let candidates = entries.filter { entry in guard entry.text.count >= query.minimumCharacters else { return false } + guard WritingHistoryFilter.isProse(entry.text) else { return false } if query.sameAppOnly, let bundle = query.bundleIdentifier, entry.appBundleIdentifier != bundle { return false @@ -108,7 +111,14 @@ public struct InMemoryWritingHistoryStore: WritingHistoryProviding { let sameApp = candidates.filter { entry in guard let bundle = query.bundleIdentifier else { return true } - return entry.appBundleIdentifier == bundle + guard entry.appBundleIdentifier == bundle else { return false } + // For web fields the bundle is the browser, shared across sites; require a matching domain + // so a different tab's content can't be treated as same-context. Native apps have no + // domain, so this is inert for them. Mirrors `WritingHistorySelection` in Personalization. + if let queryDomain = query.domain, !queryDomain.isEmpty { + return entry.domain == queryDomain + } + return true } let crossApp = candidates.filter { entry in guard let bundle = query.bundleIdentifier else { return false } @@ -120,6 +130,8 @@ public struct InMemoryWritingHistoryStore: WritingHistoryProviding { func take(_ samples: [WritingHistorySample], upTo limit: Int) { for s in samples.prefix(limit) where seen.insert(s.text).inserted { + // Skip near-duplicate drafts; mirrors `WritingHistorySelection` in Personalization. + if picked.contains(where: { Self.isNearDuplicate(s.text, of: $0.text) }) { continue } picked.append(s) } } @@ -137,4 +149,22 @@ public struct InMemoryWritingHistoryStore: WritingHistoryProviding { return Array(picked.prefix(query.fetchSize)).map { $0.text } } + + /// Word set (lowercased letters/digits) for cheap near-duplicate detection. + static func wordSet(_ text: String) -> Set { + Set(text.lowercased().split(whereSeparator: { !$0.isLetter && !$0.isNumber }).map(String.init)) + } + + /// True when two samples are near-identical (Jaccard ≥ 0.8, or the shorter is fully contained in + /// the longer). Mirror of `WritingHistorySelection.isNearDuplicate`; keep the two in sync. + static func isNearDuplicate(_ candidate: String, of existing: String) -> Bool { + let a = wordSet(candidate), b = wordSet(existing) + guard a.count >= 3, b.count >= 3 else { return candidate == existing } + let intersection = a.intersection(b).count + let union = a.union(b).count + if union > 0, Double(intersection) / Double(union) >= 0.8 { return true } + let smaller = a.count <= b.count ? a : b + let larger = a.count <= b.count ? b : a + return smaller.isSubset(of: larger) + } } diff --git a/Packages/Prompting/Sources/Prompting/WritingHistoryFilter.swift b/Packages/Prompting/Sources/Prompting/WritingHistoryFilter.swift new file mode 100644 index 0000000..62b5954 --- /dev/null +++ b/Packages/Prompting/Sources/Prompting/WritingHistoryFilter.swift @@ -0,0 +1,124 @@ +import Foundation + +/// Quality gates applied to writing-history samples before they reach the prompt. +/// +/// **Junk filter** (`isProse`): history samples can contain non-prose entries — URLs captured +/// from browser address bars, UUID-bearing file references, or hex blobs. These waste prompt +/// token budget without aiding style personalization and can mislead the model. +/// +/// **Relevance filter** (`filterByRelevance`): a topically-unrelated history sample (e.g. a +/// user's professional bio stored from a previous Gmail session) can cause the model to +/// paraphrase it into an unrelated draft. Applied at **generation time** with the live +/// `beforeCursor` — not inside the 2-second frozen side-context cache — so the judgment always +/// reflects what the user is currently typing. +/// +/// Trade-off: `filterByRelevance` will occasionally drop stock closing phrases ("Kind regards") +/// when the email body has zero topical overlap with the sign-off. This is accepted because +/// (a) the threshold is conservative (Jaccard ≥ 0.10), (b) once the user has typed the opening +/// word of the sign-off ("Kind") the phrase is kept, and (c) preventing biography bleed into +/// unrelated emails is a higher-priority correctness concern. +public enum WritingHistoryFilter { + + // MARK: - Junk filter + + /// Returns `false` for clearly non-prose entries: bare URLs, UUID blobs, filesystem paths, + /// or text where fewer than 65 % of characters are letters or spaces. + public static func isProse(_ text: String) -> Bool { + let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return false } + + // Bare URL (entire text is a single URL, no surrounding prose) + if !trimmed.contains(" ") { + if trimmed.range(of: #"^\S+://\S+"#, options: .regularExpression) != nil { + return false + } + if trimmed.hasPrefix("www.") { return false } + } + + // Filesystem path (starts with "/" and has ≥ 3 slashes) + if trimmed.hasPrefix("/") && trimmed.filter({ $0 == "/" }).count >= 3 { + return false + } + + // Low letter+space ratio catches UUID blobs, hex strings, mostly-numeric entries. + // Example: "uuid=EF757712-3FDF-48F4-B026-DB0AEF04AC2B.jpeg" → ~38 % → rejected. + let total = trimmed.unicodeScalars.count + let lettersAndSpaces = trimmed.unicodeScalars.filter { + CharacterSet.letters.contains($0) || $0 == " " + }.count + guard Double(lettersAndSpaces) / Double(total) >= 0.65 else { return false } + + return true + } + + // MARK: - Relevance filter + + /// Common English function words excluded when measuring topical overlap. These are + /// ubiquitous across writing contexts and carry no topic signal. + public static let commonEnglishStopwords: Set = [ + "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", + "by", "from", "as", "is", "are", "was", "were", "be", "been", "being", "have", "has", + "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "can", + "it", "its", "this", "that", "these", "those", "i", "you", "he", "she", "we", "they", + "my", "your", "his", "her", "our", "their", "which", "who", "what", "when", "where", + "how", "all", "not", "more", "some", "about", "up", "out", "if", "no", "so", "than", + "very", "just", "also", "there", "here", "then", "too", "into", "through", + "even", "new", "get", "go", "first", "because", "over", "see", "know", + "me", "him", "us", "them", "am" + ] + + /// Filters `samples` to those with non-trivial topical overlap with `beforeCursor`. + /// + /// The first `recencyFloor` samples are always kept regardless of relevance — they act as + /// style anchors: the user's most-recent writing establishes their current tone and recurring + /// phrases even when the topic differs. Only samples beyond that floor are subject to the + /// Jaccard gate. + /// + /// Returns all samples unchanged when `beforeCursor` has fewer than `minimumContentWords` + /// non-stopword words (short openers lack enough signal to judge topic relevance). + /// + /// A sample is kept if the stopword-filtered, digit-filtered Jaccard similarity between its + /// word set and `beforeCursor`'s word set is ≥ `jaccardThreshold`. + public static func filterByRelevance( + _ samples: [String], + beforeCursor: String, + jaccardThreshold: Double = 0.10, + minimumContentWords: Int = 2, + recencyFloor: Int = 2 + ) -> [String] { + guard !samples.isEmpty else { return samples } + let floor = min(recencyFloor, samples.count) + let anchors = Array(samples.prefix(floor)) + let candidates = Array(samples.dropFirst(floor)) + guard !candidates.isEmpty else { return anchors } + + let cursorWords = contentWordSet(beforeCursor) + guard cursorWords.count >= minimumContentWords else { return samples } + + let filtered = candidates.filter { sample in + let sampleWords = contentWordSet(sample) + guard !sampleWords.isEmpty else { return false } + let intersection = cursorWords.intersection(sampleWords).count + let union = cursorWords.union(sampleWords).count + guard union > 0 else { return false } + return Double(intersection) / Double(union) >= jaccardThreshold + } + return anchors + filtered + } + + /// Stopword-filtered, digit-filtered lowercase content words (≥ 2 characters) in `text`. + /// Pure-digit tokens ("25", "2024") are excluded — they are weak topic signals and cause + /// false matches between topically unrelated samples that share a bare number. + static func contentWordSet(_ text: String) -> Set { + Set( + text.lowercased() + .split(whereSeparator: { !$0.isLetter && !$0.isNumber }) + .map(String.init) + .filter { + $0.count >= 2 + && !commonEnglishStopwords.contains($0) + && !$0.allSatisfy({ $0.isNumber }) + } + ) + } +} diff --git a/Packages/TokenProfiles/Sources/TokenProfiles/Classification/BiasPolicy.swift b/Packages/TokenProfiles/Sources/TokenProfiles/Classification/BiasPolicy.swift index 0c4f5ff..3ae6fcd 100644 --- a/Packages/TokenProfiles/Sources/TokenProfiles/Classification/BiasPolicy.swift +++ b/Packages/TokenProfiles/Sources/TokenProfiles/Classification/BiasPolicy.swift @@ -38,6 +38,16 @@ public enum BiasPolicy { /// Re-enables emoji tokens in emoji mode (cancels `emojiStaticPenalty`). public static let emojiEmojiModeDelta: Float = 3.0 + /// Whole-tag markup tokens (Gemma's ``/`` block) leak into prose when the context is + /// thin: observed shown `` at logprob −0.35 with legitimate runners-up at −1.7…−3.8, so + /// the emoji-sized −3 would not cover the gap. −6 pushes a tag below any plausible prose token + /// while staying finite (the output-stage `MarkupTagGuard` is context-aware; this is not). + public static let markupTagStaticPenalty: Float = -6.0 + /// Re-enables markup-tag tokens where markup is working material (cancels the static penalty): + /// HTML/Markdown in editors (code mode) and editors running inside a terminal. + public static let markupTagCodeModeDelta: Float = 6.0 + public static let markupTagTerminalModeDelta: Float = 6.0 + public static let newlineProseDelta: Float = -2.0 // MARK: - Static bias @@ -62,6 +72,9 @@ public enum BiasPolicy { if flags.contains(.emoji) { bias += emojiStaticPenalty } + if flags.contains(.markupTag) { + bias += markupTagStaticPenalty + } if isRepeatedWhitespace(flags: flags, bytes: bytes) { bias += repeatedWhitespaceStaticPenalty } @@ -104,11 +117,15 @@ public enum BiasPolicy { if flags.contains(.sentenceEnd) { delta += sentenceEndProseBonus } return delta case .code: - if isRepeatedWhitespace(flags: flags, bytes: bytes) { return repeatedWhitespaceCodeBonus } - return 0 + var delta: Float = 0 + if isRepeatedWhitespace(flags: flags, bytes: bytes) { delta += repeatedWhitespaceCodeBonus } + if flags.contains(.markupTag) { delta += markupTagCodeModeDelta } + return delta case .terminal: - if isRepeatedWhitespace(flags: flags, bytes: bytes) { return repeatedWhitespaceTerminalBonus } - return 0 + var delta: Float = 0 + if isRepeatedWhitespace(flags: flags, bytes: bytes) { delta += repeatedWhitespaceTerminalBonus } + if flags.contains(.markupTag) { delta += markupTagTerminalModeDelta } + return delta case .emoji: if flags.contains(.emoji) { return emojiEmojiModeDelta } return 0 diff --git a/Packages/TokenProfiles/Sources/TokenProfiles/Classification/TokenClassifier.swift b/Packages/TokenProfiles/Sources/TokenProfiles/Classification/TokenClassifier.swift index b20eacd..0a731d3 100644 --- a/Packages/TokenProfiles/Sources/TokenProfiles/Classification/TokenClassifier.swift +++ b/Packages/TokenProfiles/Sources/TokenProfiles/Classification/TokenClassifier.swift @@ -49,7 +49,15 @@ public enum TokenClassifier { var flags = TokenProfileFlags() - // SPECIAL: control / user-defined / unknown / unused / known role / chat marker. + // Reserved/placeholder tokens (e.g. Gemma's ``…``) are never valid output, + // but some GGUF conversions fail to set the `.unused` attribute on them (they arrive as + // NORMAL/USER_DEFINED), so the attribute checks below miss them and they leak into suggestions + // as literal "" text. Detect them by rendered byte content as a backstop. Check both + // the raw text and the BPE-marker-stripped form so a "▁"/"Ġ" variant can't + // slip the anchored match. See ADR. + let isReservedPlaceholder = matchesReservedPlaceholder(rawText) || matchesReservedPlaceholder(visibleText) + + // SPECIAL: control / user-defined / unknown / unused / known role / chat marker / reserved. let isSpecial = probe.attr.contains(.control) || probe.attr.contains(.userDefined) @@ -58,6 +66,7 @@ public enum TokenClassifier { || probe.isControl || probe.role != nil || matchesChatMarker(rawText) + || isReservedPlaceholder if isSpecial { flags.insert(.special) } // STOP: EOS / EOT / any EOG-declared token. @@ -70,6 +79,14 @@ public enum TokenClassifier { // CHAT_MARKER: assistant scaffolding text we never want to emit. if matchesChatMarker(rawText) { flags.insert(.chatMarker) } + // MARKUP_TAG: a whole markup tag baked in as one vocab token (Gemma's ``/``/… + // block at ids 168–237 arrives as NORMAL, like the `` case above). Flagged — + // not excluded — so `BiasPolicy` can demote it in prose while code/terminal keep the + // canonical single-token path for genuine HTML/Markdown editing. + if !isSpecial, matchesMarkupTag(rawText) || matchesMarkupTag(visibleText) { + flags.insert(.markupTag) + } + // INVALID_UTF8 (standalone byte fallback or partial multi-byte token). if rawText == nil { flags.insert(.invalidUTF8) } @@ -215,6 +232,47 @@ public enum TokenClassifier { return false } + /// Reserved / never-emitted placeholder tokens identified by their *rendered text* rather than a + /// tokenizer attribute, because some GGUF conversions don't flag them (notably Gemma's + /// ``…`` block, which comes through as NORMAL). Kept deliberately narrow — + /// only the unambiguous model-internal placeholder forms, so genuine `` text the user might + /// type is unaffected. + private static let reservedPlaceholderRegexes: [NSRegularExpression] = { + let patterns = [ + #"^$"#, // Gemma reserved slots + #"^$"#, // other vendors' reserved blocks + #"^$"#, // T5-style sentinel tokens + #"^$"#, #"^$"# // padding / masking placeholders + ] + return patterns.compactMap { try? NSRegularExpression(pattern: $0, options: [.caseInsensitive]) } + }() + + /// A token whose entire rendered text (after optional leading whitespace) is one markup tag: + /// ``, ``, `
`, … Anchored so partial-bracket text (`<3`, `a`) are special-cased + /// out by the caller before this runs. + private static let markupTagRegex = try? NSRegularExpression( + pattern: #"^\s*$"#, + options: [] + ) + + static func matchesMarkupTag(_ text: String?) -> Bool { + guard let text = text, !text.isEmpty, let regex = markupTagRegex else { return false } + let range = NSRange(text.startIndex.. Bool { + guard let text = text, !text.isEmpty else { return false } + let range = NSRange(text.startIndex.. Bool { // General punctuation categories Pc/Pd/Pe/Pf/Pi/Po/Ps. scalar.properties.generalCategory.isPunctuation diff --git a/Packages/TokenProfiles/Sources/TokenProfiles/Format/ACPFFormat.swift b/Packages/TokenProfiles/Sources/TokenProfiles/Format/ACPFFormat.swift index 4b96d99..6d5d993 100644 --- a/Packages/TokenProfiles/Sources/TokenProfiles/Format/ACPFFormat.swift +++ b/Packages/TokenProfiles/Sources/TokenProfiles/Format/ACPFFormat.swift @@ -14,7 +14,9 @@ public enum ACPF { /// file was produced for a different endianness and must be rejected. public static let endianSentinel: UInt16 = 0x0102 - /// First (and currently only) schema version. + /// On-disk *binary format* version. Bump only when the byte layout of the header, sections, or + /// records changes — never for changes to what gets baked into those bytes. Cache-busting for + /// *classifier output* changes is `generatorVersion`'s job, not this field's (see below). public static let currentSchemaVersion: UInt16 = 1 /// One section descriptor per `SectionKind`, in the header's section array. @@ -54,8 +56,17 @@ public enum ACPF { /// bytes. Anything in `0...255` is a real first byte. public static let emptyFirstByte: UInt16 = 256 - /// Identifier stamped into the validation section's `generator_version` slot. - public static let generatorVersion: String = "keytype-acpf-1.0" + /// Identifier stamped into the validation section's `generator_version` slot, and the cache-busting + /// key for the *classifier* output. The tokenizer digest covers only vocab bytes, so a change to + /// `TokenClassifier` (which decides the `.excluded`/`.special` flags baked into every record and the + /// trie) does NOT change the digest and would otherwise leave stale profiles in place. + /// `MmapAutocompleteProfile.init` rejects a profile whose stamped `generator_version` differs from + /// this, forcing `ProfileGenerator` to rebuild. **Bump this whenever `TokenClassifier` output + /// changes**, independent of the binary `currentSchemaVersion`. + /// 1.0 → 1.1: reserved-placeholder exclusion by byte content (Gemma `` leak fix). + /// 1.1 → 1.2: markup-tag flag + prose demotion for Gemma's single-token HTML-tag block + /// (`` shown in prose contexts; cancelled in code/terminal modes). + public static let generatorVersion: String = "keytype-acpf-1.2" } /// Ordinals into the header's section array. **Stable across schema versions** — once an @@ -373,6 +384,7 @@ public enum ACPFOpenError: Error, Equatable, CustomStringConvertible { case modelFamilyMismatch(expected: String, found: String) case vocabSizeMismatch(expected: Int, found: Int) case tokenizerDigestMismatch(expected: ACPFTokenizerDigestValue, found: ACPFTokenizerDigestValue) + case generatorVersionMismatch(expected: String, found: String) case malformedSectionPayload(kind: SectionKind, message: String) public var description: String { @@ -399,6 +411,8 @@ public enum ACPFOpenError: Error, Equatable, CustomStringConvertible { return "ACPF: vocab_size \(found) != expected \(expected)" case let .tokenizerDigestMismatch(expected, found): return "ACPF: tokenizer digest \(found.hexPrefix) != expected \(expected.hexPrefix)" + case let .generatorVersionMismatch(expected, found): + return "ACPF: generator_version '\(found)' != expected '\(expected)' (rebuild required)" case let .malformedSectionPayload(kind, message): return "ACPF: section \(kind) payload is malformed: \(message)" } diff --git a/Packages/TokenProfiles/Sources/TokenProfiles/Storage/MmapAutocompleteProfile.swift b/Packages/TokenProfiles/Sources/TokenProfiles/Storage/MmapAutocompleteProfile.swift index f8cc4ad..72ca8c9 100644 --- a/Packages/TokenProfiles/Sources/TokenProfiles/Storage/MmapAutocompleteProfile.swift +++ b/Packages/TokenProfiles/Sources/TokenProfiles/Storage/MmapAutocompleteProfile.swift @@ -81,14 +81,16 @@ public final class MmapAutocompleteProfile: AutocompleteProfile { at url: URL, expectedVocabSize: Int? = nil, expectedModelFamily: String? = nil, - expectedTokenizerDigest: ACPFTokenizerDigestValue? = nil + expectedTokenizerDigest: ACPFTokenizerDigestValue? = nil, + expectedGeneratorVersion: String? = ACPF.generatorVersion ) throws -> MmapAutocompleteProfile { let data = try Data(contentsOf: url, options: [.alwaysMapped, .uncached]) return try MmapAutocompleteProfile( data: data, expectedVocabSize: expectedVocabSize, expectedModelFamily: expectedModelFamily, - expectedTokenizerDigest: expectedTokenizerDigest + expectedTokenizerDigest: expectedTokenizerDigest, + expectedGeneratorVersion: expectedGeneratorVersion ) } @@ -99,14 +101,16 @@ public final class MmapAutocompleteProfile: AutocompleteProfile { at url: URL, tokenizerVocabSize: Int, tokenizerBytes: (TokenID) throws -> [UInt8], - expectedModelFamily: String? = nil + expectedModelFamily: String? = nil, + expectedGeneratorVersion: String? = ACPF.generatorVersion ) throws -> MmapAutocompleteProfile { let digest = try ACPFTokenizerDigest.digest(vocabSize: tokenizerVocabSize, bytesFor: tokenizerBytes) return try open( at: url, expectedVocabSize: tokenizerVocabSize, expectedModelFamily: expectedModelFamily, - expectedTokenizerDigest: digest + expectedTokenizerDigest: digest, + expectedGeneratorVersion: expectedGeneratorVersion ) } @@ -116,7 +120,8 @@ public final class MmapAutocompleteProfile: AutocompleteProfile { data: Data, expectedVocabSize: Int? = nil, expectedModelFamily: String? = nil, - expectedTokenizerDigest: ACPFTokenizerDigestValue? = nil + expectedTokenizerDigest: ACPFTokenizerDigestValue? = nil, + expectedGeneratorVersion: String? = ACPF.generatorVersion ) throws { // 1. Header sanity. let header = try data.withUnsafeBytes { try ACPFHeaderRaw(reading: $0) } @@ -172,6 +177,20 @@ public final class MmapAutocompleteProfile: AutocompleteProfile { throw ACPFOpenError.tokenizerDigestMismatch(expected: expectedDigest, found: digest) } + // 5b. Generator-version cache-buster. The tokenizer digest covers only vocab bytes, so a + // `TokenClassifier` logic change (which alters the baked `.excluded`/`.special` flags and + // trie) leaves the digest untouched. The VALIDATION section's `generator_version` string + // captures that logic version; reject a profile stamped with anything other than the + // build's expected value so `ProfileGenerator` rebuilds. An empty/missing stamp (older + // profiles, or a section without validation strings) skips the check for back-compat — + // only a present, non-empty, non-matching value is a hard mismatch. + if let expectedGenerator = expectedGeneratorVersion, let validation = sections[.validation] { + let stamped = MmapAutocompleteProfile.readValidationStrings(data: data, section: validation).generatorVersion + if !stamped.isEmpty, stamped != expectedGenerator { + throw ACPFOpenError.generatorVersionMismatch(expected: expectedGenerator, found: stamped) + } + } + // 6. Parse trie preamble (nodeCount, edgeCount) and compute payload offsets. let trieSection = sections[.prefixTrie]! let trieOffset = Int(trieSection.offset) @@ -407,7 +426,16 @@ public final class MmapAutocompleteProfile: AutocompleteProfile { /// Returns the validation section's `(ggufMetadataDigest, generatorVersion, builderHost)` triple. public func validationStrings() -> (ggufMetadataDigest: String, generatorVersion: String, builderHost: String) { - let section = sections[.validation]! + Self.readValidationStrings(data: data, section: sections[.validation]!) + } + + /// Parses the three length-prefixed strings in the VALIDATION section payload. Shared by + /// `validationStrings()` and the init-time `generator_version` check so both decode identically. + /// A truncated/empty section yields empty strings (never a crash). + static func readValidationStrings( + data: Data, + section: ACPFSectionRaw + ) -> (ggufMetadataDigest: String, generatorVersion: String, builderHost: String) { var cursor = Int(section.offset) let end = cursor + Int(section.length) diff --git a/Packages/TokenProfiles/Sources/TokenProfiles/TokenProfiles.swift b/Packages/TokenProfiles/Sources/TokenProfiles/TokenProfiles.swift index 48347dd..7497a90 100644 --- a/Packages/TokenProfiles/Sources/TokenProfiles/TokenProfiles.swift +++ b/Packages/TokenProfiles/Sources/TokenProfiles/TokenProfiles.swift @@ -20,6 +20,10 @@ public struct TokenProfileFlags: OptionSet, Equatable { public static let emoji = TokenProfileFlags(rawValue: 1 << 9) public static let chatMarker = TokenProfileFlags(rawValue: 1 << 10) public static let invalidUTF8 = TokenProfileFlags(rawValue: 1 << 11) + /// A whole markup tag as a single vocab token (Gemma bakes ``, ``, ``, … into + /// its vocabulary as dedicated tokens the GGUF reports as NORMAL). Not excluded — markup is + /// legitimate output in code/terminal modes — but down-biased in prose (see `BiasPolicy`). + public static let markupTag = TokenProfileFlags(rawValue: 1 << 12) } public struct TokenProfileRecord: Equatable { diff --git a/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/BiasPolicyMarkupTagTests.swift b/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/BiasPolicyMarkupTagTests.swift new file mode 100644 index 0000000..9da3a80 --- /dev/null +++ b/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/BiasPolicyMarkupTagTests.swift @@ -0,0 +1,46 @@ +import XCTest +@testable import TokenProfiles + +/// Mode-aware bias for `.markupTag` tokens: penalised in prose (where `` leaked into +/// suggestions), fully re-enabled in code/terminal where markup is working material. Mirrors the +/// emoji penalty/cancel pattern. +final class BiasPolicyMarkupTagTests: XCTestCase { + + private let tagBytes = Array("".utf8) + + func testStaticBiasCarriesMarkupTagPenalty() { + let bias = BiasPolicy.staticBias(flags: [.markupTag], displayWidth: 7, bytes: tagBytes) + XCTAssertEqual(bias, BiasPolicy.markupTagStaticPenalty) + } + + func testCodeModeDeltaCancelsThePenalty() { + let bias = BiasPolicy.staticBias(flags: [.markupTag], displayWidth: 7, bytes: tagBytes) + let delta = BiasPolicy.delta(flags: [.markupTag], mode: .code, bytes: tagBytes) + XCTAssertEqual(bias + delta, 0, "markup tags must be fully re-enabled in code mode") + } + + func testTerminalModeDeltaCancelsThePenalty() { + let bias = BiasPolicy.staticBias(flags: [.markupTag], displayWidth: 7, bytes: tagBytes) + let delta = BiasPolicy.delta(flags: [.markupTag], mode: .terminal, bytes: tagBytes) + XCTAssertEqual(bias + delta, 0, "markup tags must be fully re-enabled in terminal mode") + } + + func testProseModeKeepsThePenalty() { + XCTAssertEqual(BiasPolicy.delta(flags: [.markupTag], mode: .prose, bytes: tagBytes), 0) + } + + func testCorrectionModeKeepsThePenalty() { + XCTAssertEqual(BiasPolicy.delta(flags: [.markupTag], mode: .correction, bytes: tagBytes), 0) + } + + func testPenaltyOutweighsObservedLeakMargin() { + // The leaked `` was shown at logprob −0.35 with legitimate runners-up at −1.7…−3.8; + // the penalty must exceed that gap or the leak persists in flat distributions. + XCTAssertLessThanOrEqual(BiasPolicy.markupTagStaticPenalty, -4.0) + } + + func testExcludedTokenStillInfinitelyNegativeRegardlessOfMarkupFlag() { + let bias = BiasPolicy.staticBias(flags: [.excluded, .special], displayWidth: 10, bytes: Array("".utf8)) + XCTAssertEqual(bias, -Float.infinity) + } +} diff --git a/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/ClassifierFlagTests.swift b/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/ClassifierFlagTests.swift index fe9d444..f41b334 100644 --- a/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/ClassifierFlagTests.swift +++ b/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/ClassifierFlagTests.swift @@ -203,6 +203,59 @@ final class ClassifierFlagTests: XCTestCase { XCTAssertTrue(cls.flags.contains(.excluded)) } + // MARK: - Reserved placeholders flagged by byte content (GGUF attribute missing) + + func testGemmaUnusedPlaceholderExcludedByByteContent() { + // The real failure: Gemma's arrive as NORMAL (no .unused attr) and leaked as + // literal text. They must be classified special + excluded purely from their rendered bytes. + let cls = classify(u8(""), attr: .normal) + XCTAssertTrue(cls.flags.contains(.special), "reserved placeholder must be special") + XCTAssertTrue(cls.flags.contains(.excluded), "reserved placeholder must be excluded from sampling") + } + + func testOtherReservedPlaceholderFormsExcluded() { + for s in ["", "", "", "", ""] { + XCTAssertTrue(classify(u8(s), attr: .normal).flags.contains(.excluded), "\(s) should be excluded") + } + } + + func testGenuineAngleBracketTextIsNotExcluded() { + // Don't over-reach: ordinary markup/text the user might type stays sampleable. + for s in ["

", "", "
", "<3"] { + XCTAssertFalse(classify(u8(s), attr: .normal).flags.contains(.excluded), "\(s) should NOT be excluded") + } + } + + // MARK: - Markup-tag tokens (Gemma's single-token HTML-tag block, ids 168–237) + + func testWholeTagTokensGetMarkupTagFlag() { + // The ``-shown-in-prose failure: these arrive as NORMAL single tokens. They are + // flagged (for the prose bias penalty) but stay sampleable for code/terminal modes. + for s in ["", "", "

", "", "
"] { + let cls = classify(u8(s), attr: .normal) + XCTAssertTrue(cls.flags.contains(.markupTag), "\(s) should be flagged markupTag") + XCTAssertFalse(cls.flags.contains(.excluded), "\(s) must NOT be excluded") + } + } + + func testSentencePieceSpacePrefixedTagGetsMarkupTagFlag() { + XCTAssertTrue(classify(u8("\u{2581}"), attr: .normal).flags.contains(.markupTag)) + } + + func testNonTagAngleBracketTextIsNotMarkupTag() { + for s in ["<3", "a", #""#, "hello", "<", ">"] { + XCTAssertFalse(classify(u8(s), attr: .normal).flags.contains(.markupTag), "\(s) should NOT be markupTag") + } + } + + func testReservedPlaceholderIsNotMarkupTag() { + // `` matches the tag shape but is special/excluded — keep the flags disjoint so + // bias accounting stays single-purpose. + let cls = classify(u8(""), attr: .normal) + XCTAssertTrue(cls.flags.contains(.excluded)) + XCTAssertFalse(cls.flags.contains(.markupTag)) + } + // MARK: - Display width func testDisplayWidthOfASCII() { diff --git a/Packages/TokenProfiles/Tests/TokenProfilesTests/Format/GeneratorVersionTests.swift b/Packages/TokenProfiles/Tests/TokenProfilesTests/Format/GeneratorVersionTests.swift new file mode 100644 index 0000000..89bae40 --- /dev/null +++ b/Packages/TokenProfiles/Tests/TokenProfilesTests/Format/GeneratorVersionTests.swift @@ -0,0 +1,74 @@ +import XCTest +@testable import TokenProfiles + +/// Cache-busting via the VALIDATION section's `generator_version` string. The tokenizer digest +/// covers only vocab bytes, so a `TokenClassifier` logic change (which alters the baked +/// `.excluded`/`.special` flags and trie) leaves the digest unchanged. `generator_version` captures +/// that logic version; `MmapAutocompleteProfile.init` rejects a profile stamped with anything other +/// than the build's expected value so `ProfileGenerator` rebuilds. See `ACPF.generatorVersion`. +final class GeneratorVersionTests: XCTestCase { + + /// The binary format version stays at 1 — the P0 classifier change is a *content* change, busted + /// via `generatorVersion`, not the on-disk layout. Guards against re-introducing the schema bump. + func testSchemaVersionRemainsOne() { + XCTAssertEqual(ACPF.currentSchemaVersion, 1) + } + + private func encode(generatorVersion: String) throws -> Data { + let built = SyntheticVocabFixture.build() + let input = ACPFProfileInput( + modelFamily: built.modelFamily, + vocabSize: built.vocabSize, + tokenizerDigest: built.digest, + entries: built.entries, + ggufMetadataDigest: "synthetic-gguf-digest", + generatorVersion: generatorVersion, + builderHost: "synthetic-host", + buildTimestamp: Date(timeIntervalSince1970: 1_716_000_000), + headerFlags: 0 + ) + return try ACPFWriter.encode(input) + } + + func testMatchingGeneratorVersionOpens() throws { + let data = try encode(generatorVersion: "keytype-acpf-1.1") + XCTAssertNoThrow(try MmapAutocompleteProfile(data: data, expectedGeneratorVersion: "keytype-acpf-1.1")) + } + + func testStaleGeneratorVersionIsRejected() throws { + let data = try encode(generatorVersion: "keytype-acpf-1.0") + XCTAssertThrowsError( + try MmapAutocompleteProfile(data: data, expectedGeneratorVersion: "keytype-acpf-1.1") + ) { error in + guard case let ACPFOpenError.generatorVersionMismatch(expected, found) = error else { + return XCTFail("expected generatorVersionMismatch, got \(error)") + } + XCTAssertEqual(expected, "keytype-acpf-1.1") + XCTAssertEqual(found, "keytype-acpf-1.0") + } + } + + /// Passing `nil` opts out of the check (format round-trip tests that write arbitrary versions). + func testNilExpectationSkipsCheck() throws { + let data = try encode(generatorVersion: "anything-goes") + XCTAssertNoThrow(try MmapAutocompleteProfile(data: data, expectedGeneratorVersion: nil)) + } + + /// Back-compat: a profile with an empty/unstamped generator_version skips the check rather than + /// being rejected, so older profiles without the stamp still open. Only a present, non-empty, + /// non-matching value is a hard mismatch. + func testEmptyStampSkipsCheckForBackCompat() throws { + let data = try encode(generatorVersion: "") + XCTAssertNoThrow(try MmapAutocompleteProfile(data: data, expectedGeneratorVersion: "keytype-acpf-1.1")) + } + + /// The default expectation is the build's current `ACPF.generatorVersion`, so a profile this build + /// produces opens with no explicit argument — and a stale one does not. + func testDefaultExpectationUsesCurrentBuildVersion() throws { + let current = try encode(generatorVersion: ACPF.generatorVersion) + XCTAssertNoThrow(try MmapAutocompleteProfile(data: current)) + + let stale = try encode(generatorVersion: "keytype-acpf-0.0") + XCTAssertThrowsError(try MmapAutocompleteProfile(data: stale)) + } +}