diff --git a/KeyType/Logic/Completion/CompletionController.swift b/KeyType/Logic/Completion/CompletionController.swift
index 97c04d5..13fbed6 100644
--- a/KeyType/Logic/Completion/CompletionController.swift
+++ b/KeyType/Logic/Completion/CompletionController.swift
@@ -17,6 +17,7 @@ import Foundation
 import LlamaModelRuntime
 import MacContextCapture
 import ModelManagement
+import ModelProfileGeneration
 import ModelRuntime
 import Observation
 import Personalization
@@ -562,10 +563,18 @@ final class CompletionController {
         // optional side sections are frozen briefly so unrelated history/clipboard/OCR updates do
         // not rewrite the prompt prefix and destroy KV append reuse mid-burst.
         let (sideContext, sideContextReused) = promptSideContext(for: promptContext)
+        // Relevance-filter the frozen history against the *live* beforeCursor so topically-unrelated
+        // samples (e.g. a bio stored from an earlier session in the same app) are dropped before they
+        // reach the prompt. This runs at generation time with the current context, not inside the
+        // 2-second frozen side-context cache, so the judgment always reflects what the user is typing.
+        let filteredHistory = WritingHistoryFilter.filterByRelevance(
+            sideContext.previousUserInputs,
+            beforeCursor: context.beforeCursor
+        )
         let promptResult = KeyTypeModuleGraph.makePromptBuilder().buildPrompt(
             context: promptContext,
             customInstructions: settings.promptCustomInstructions(appInstructions: policy.customInstructions),
-            previousUserInputs: sideContext.previousUserInputs,
+            previousUserInputs: filteredHistory,
             pasteboardText: sideContext.pasteboardText,
             screenText: sideContext.screenText,
             includeEnvironmentContext: policy.includesEnvironmentContext
@@ -577,13 +586,21 @@ final class CompletionController {
         let healExtraTokens = healSlack > 0 ? 1 : 0
         // Completion length is user-configurable (Settings) and maps to the decoder's token/width budget.
         let length = settings.completionLength
+        // Clipboard and OCR are background context, not text to reproduce; carry them so the output
+        // filter can drop a completion that just parrots them verbatim. History is excluded — it is
+        // already same-app/domain scoped and echoing the user's own recurring phrases is intended.
+        let injectedContext = Self.injectedContext(
+            pasteboardText: sideContext.pasteboardText,
+            screenText: sideContext.screenText
+        )
         let request = CompletionRequest(
             context: context,
             prompt: promptResult.prompt,
             requiredPrefixBytes: requiredPrefixBytes,
             mode: policy.completionMode,
             maxCompletionTokens: length.maxCompletionTokens + healExtraTokens,
-            maxDisplayWidth: length.maxDisplayWidth + healSlack
+            maxDisplayWidth: length.maxDisplayWidth + healSlack,
+            injectedContext: injectedContext
         )
         rememberFullPromptDebug(
             for: request,
@@ -591,6 +608,7 @@ final class CompletionController {
             promptContext: promptContext,
             tokenHealing: heal.map { FullPromptTokenHealing(head: $0.head, heal: $0.heal) },
             sideContext: sideContext,
+            filteredPreviousUserInputs: filteredHistory,
             sideContextReused: sideContextReused,
             policy: policy,
             completionLength: length,
@@ -759,6 +777,7 @@ final class CompletionController {
         promptContext: TextFieldContext,
         tokenHealing: FullPromptTokenHealing?,
         sideContext: FrozenPromptSideContext,
+        filteredPreviousUserInputs: [String],
         sideContextReused: Bool,
         policy: CompletionPolicy,
         completionLength: CompletionLength,
@@ -776,7 +795,7 @@ final class CompletionController {
                 historyEnabled: sideContext.historyEnabled,
                 clipboardEnabled: sideContext.clipboardEnabled,
                 ocrEnabled: sideContext.ocrEnabled,
-                previousUserInputs: sideContext.previousUserInputs,
+                previousUserInputs: filteredPreviousUserInputs,
                 pasteboardText: sideContext.pasteboardText,
                 screenText: sideContext.screenText
             ),
@@ -886,11 +905,19 @@ final class CompletionController {
             return (cached, true)
         }
 
+        // Scope history to the focused app. Cross-app recent samples bleed unrelated content into the
+        // prompt — e.g. a Notes draft about an API key surfacing as a verbatim suggestion in a fresh
+        // Gmail message — which the small model tends to parrot. Same-app history still personalizes
+        // tone/recurring phrases without leaking content across contexts.
+        // Normalize an empty domain to nil so it can't collapse the same-app filter to `domain == ""`
+        // and silently drop all real history for the app.
+        let scopedDomain = context.target.domain.flatMap { $0.isEmpty ? nil : $0 }
         let query = WritingHistoryQuery(
             bundleIdentifier: context.target.bundleIdentifier,
-            domain: context.target.domain,
+            domain: scopedDomain,
             typingContext: context.typingContext,
-            language: context.detectedLanguage
+            language: context.detectedLanguage,
+            sameAppOnly: true
         )
         let previousUserInputs = settings.historyEnabled
             ? history.samples(for: query)
@@ -970,6 +997,59 @@ final class CompletionController {
         case notApplicable
     }
 
+    /// Clipboard + OCR text injected into the prompt, as the echo guard consumes it. History is
+    /// intentionally excluded (same-app/domain scoped; echoing the user's own phrases is intended).
+    private static func injectedContext(pasteboardText: String?, screenText: String?) -> [String] {
+        [pasteboardText, screenText].compactMap { $0 }
+    }
+
+    /// Re-check the context-dependent suppression nets against the *live* context before re-showing a
+    /// cached completion. The candidate was filtered once at generation time, but reuse re-shows it
+    /// without going back through the pipeline, and the inputs those nets key off can change after the
+    /// fact:
+    ///   - prefix-repetition / suffix-overlap key off `beforeCursor`/`afterCursor`, which grow as the
+    ///     user types through the suggestion — a tail clean at anchor time can become a verbatim
+    ///     repetition (or suffix duplication) of text just typed;
+    ///   - the echo guard keys off injected clipboard/OCR context, which can change mid-burst or differ
+    ///     from when an older reused snapshot was generated. We check it against the currently-frozen
+    ///     side context (already cached, so no hot-path pasteboard read).
+    /// Returns `true` when the remaining text is still safe to show.
+    private func reuseRemainingPassesLiveGuards(remaining: String, context: TextFieldContext) -> Bool {
+        Self.reuseRemainingIsSafe(
+            remaining: remaining,
+            context: context,
+            injectedContext: Self.injectedContext(
+                pasteboardText: frozenSideContext?.pasteboardText,
+                screenText: frozenSideContext?.screenText
+            )
+        )
+    }
+
+    /// Pure decision behind `reuseRemainingPassesLiveGuards`, factored out so the reuse-safety rules
+    /// are unit-testable without constructing a controller. `true` when `remaining` is still safe to
+    /// re-show against the given live context and injected side context.
+    nonisolated static func reuseRemainingIsSafe(
+        remaining: String,
+        context: TextFieldContext,
+        injectedContext: [String]
+    ) -> Bool {
+        guard !remaining.isEmpty else { return true }
+        if PrefixRepetitionGuard.repeatsPrefix(completion: remaining, beforeCursor: context.beforeCursor) {
+            return false
+        }
+        if SuffixOverlapGuard.duplicatesSuffix(
+            completion: remaining,
+            beforeCursor: context.beforeCursor,
+            afterCursor: context.afterCursor
+        ) {
+            return false
+        }
+        if ContextEchoGuard.echoesInjectedContext(completion: remaining, injectedContext: injectedContext) {
+            return false
+        }
+        return true
+    }
+
     @discardableResult
     private func applyReuseHistoryIfUseful(
         for live: TextFieldContext,
@@ -980,6 +1060,11 @@ final class CompletionController {
 
         switch reuseHistory.decision(for: live) {
         case let .reuse(reuse):
+            guard reuseRemainingPassesLiveGuards(remaining: reuse.remainingText, context: live) else {
+                predictionLog.append("REUSE rejected by live guard remaining=\"\(PredictionLog.escape(reuse.remainingText))\"")
+                clearCompletion()
+                return .mustRecompute
+            }
             anchorText = reuse.anchorText
             anchorContext = reuse.anchorContext
             if updateLatestContext { latestContext = live }
@@ -1292,6 +1377,10 @@ final class CompletionController {
     ) -> Bool {
         switch decision {
         case let .reuse(reuse):
+            guard reuseRemainingPassesLiveGuards(remaining: reuse.remainingText, context: optimistic) else {
+                predictionLog.append("REUSE rejected by live guard remaining=\"\(PredictionLog.escape(reuse.remainingText))\"")
+                return false
+            }
             anchorText = reuse.anchorText
             anchorContext = reuse.anchorContext
             latestContext = optimistic
@@ -1478,12 +1567,28 @@ final class CompletionController {
             forFilename: modelFilename,
             vocabSize: runtime.metadata.vocabularySize
         )
-        let profile = try MmapAutocompleteProfile.open(
-            at: try ModelContainer.profileURL(family: family),
-            tokenizerVocabSize: runtime.metadata.vocabularySize,
-            tokenizerBytes: { try runtime.tokenizer.rawBytes(for: $0) },
-            expectedModelFamily: family
-        )
+        let profileURL = try ModelContainer.profileURL(family: family)
+        func openProfile() throws -> MmapAutocompleteProfile {
+            try MmapAutocompleteProfile.open(
+                at: profileURL,
+                tokenizerVocabSize: runtime.metadata.vocabularySize,
+                tokenizerBytes: { try runtime.tokenizer.rawBytes(for: $0) },
+                expectedModelFamily: family
+            )
+        }
+        let profile: MmapAutocompleteProfile
+        do {
+            profile = try openProfile()
+        } catch {
+            // A profile built by an older classifier / schema version fails to open. No other launch
+            // path rebuilds it (setup only checks the file *exists*), so an app update that changes the
+            // token classification would otherwise brick completions for existing users. Rebuild it in
+            // place from the model's tokenizer, then retry. See ADR-021 / ACPF currentSchemaVersion.
+            Logger(subsystem: "com.pattonium.KeyType", category: "completion")
+                .error("ACPF profile open failed (\(String(describing: error), privacy: .public)); rebuilding for \(modelFilename, privacy: .public)")
+            _ = try await ProfileGenerator.generateProfileIfNeeded(forModelFilename: modelFilename)
+            profile = try openProfile()
+        }
         // Apply the telemetry-derived nudges to the decoder defaults: a larger relative cutoff keeps
         // more branches alive (fewer suppressions), a lower probability floor admits weaker-but-valid
         // continuations. Bounds are clamped inside `ThresholdTuner`. See ADR-023.
diff --git a/KeyType/Logic/Context/ScreenContextController.swift b/KeyType/Logic/Context/ScreenContextController.swift
index 55c5c5c..c788a06 100644
--- a/KeyType/Logic/Context/ScreenContextController.swift
+++ b/KeyType/Logic/Context/ScreenContextController.swift
@@ -99,6 +99,11 @@ final class ScreenContextController {
         let key = windowKey(for: snapshot)
         guard key != lastWindowKey else { return }
         lastWindowKey = key
+        // Drop the previous window's cached OCR *before* kicking off the new (async) capture, so a
+        // completion fired in the just-focused window can't be fed the prior window's screen text
+        // while the fresh capture is still in flight. Without this, switching browser tabs/windows
+        // leaks the old page's text (e.g. a "2 of 10 …" results counter) into the new one's prompt.
+        engine.clear()
         capture(for: snapshot)
     }
 
@@ -120,7 +125,18 @@ final class ScreenContextController {
         // screen context carries only the *surrounding* on-screen text.
         let context = snapshot.context
         let fieldText = context.beforeCursor + context.afterCursor
-        engine.refresh(pid: pid, fieldText: fieldText)
+        // The caret location lets the capturer pick the right window when the app has several open,
+        // so screen context can't bleed in text from a different window of the same app. `caretRect`
+        // is in AppKit space (bottom-left origin) but ScreenCaptureKit window frames are in CG space
+        // (top-left origin), so convert before handing it down — otherwise the Y axes don't match and
+        // the wrong window (or none) is selected.
+        let focusPoint = snapshot.caretRect.flatMap { rect -> CGPoint? in
+            DisplayCoordinateConverter.coreGraphicsPoint(
+                fromAppKitPoint: CGPoint(x: rect.midX, y: rect.midY),
+                displays: ScreenDisplayGeometryProvider.current()
+            )
+        }
+        engine.refresh(pid: pid, fieldText: fieldText, focusPoint: focusPoint)
     }
 
     // MARK: - Eligibility
diff --git a/KeyType/Logic/Telemetry/WritingHistoryRecorder.swift b/KeyType/Logic/Telemetry/WritingHistoryRecorder.swift
index d7b6e84..d030b43 100644
--- a/KeyType/Logic/Telemetry/WritingHistoryRecorder.swift
+++ b/KeyType/Logic/Telemetry/WritingHistoryRecorder.swift
@@ -122,6 +122,10 @@ final class WritingHistoryRecorder {
         guard sample.text.trimmingCharacters(
             in: .whitespacesAndNewlines
         ).count >= minimumCharacters else { return }
+        // Belt-and-suspenders junk gate: skip entries that aren't prose (bare URLs, UUID blobs,
+        // filesystem paths) before they reach the encrypted DB. Mirrored in WritingHistorySelection
+        // for samples already on disk from before this guard was introduced.
+        guard WritingHistoryFilter.isProse(sample.text) else { return }
 
         // Re-resolve the policy from the captured metadata: secure/sensitive fields and apps that
         // disable training-data collection must never contribute samples.
diff --git a/KeyTypeTests/KeyTypeTests.swift b/KeyTypeTests/KeyTypeTests.swift
index 27cd589..41524b1 100644
--- a/KeyTypeTests/KeyTypeTests.swift
+++ b/KeyTypeTests/KeyTypeTests.swift
@@ -273,6 +273,48 @@ struct KeyTypeTests {
         #expect(advanced == nil)
     }
 
+    // MARK: - Reuse re-check (H2)
+
+    @Test func reuseRejectsRemainingThatRepeatsRecentlyTypedText() {
+        // As the user types through a cached suggestion, beforeCursor grows; a tail that becomes a
+        // verbatim repetition of just-typed text must not be re-shown via reuse.
+        let context = TextFieldContext(
+            beforeCursor: "You can use it to access the OpenAI. And",
+            target: Self.target
+        )
+        #expect(
+            CompletionController.reuseRemainingIsSafe(
+                remaining: " you can use it to access the OpenAI again",
+                context: context,
+                injectedContext: []
+            ) == false
+        )
+    }
+
+    @Test func reuseRejectsRemainingThatEchoesInjectedClipboard() {
+        // A cached completion (clean at anchor time) must not be re-shown if it now parrots the
+        // currently-injected clipboard/OCR context.
+        let context = TextFieldContext(beforeCursor: "Hi Molly,", target: Self.target)
+        #expect(
+            CompletionController.reuseRemainingIsSafe(
+                remaining: " if you require maintenance of UPS systems or",
+                context: context,
+                injectedContext: ["if you require maintenance of UPS systems or backup power, call us."]
+            ) == false
+        )
+    }
+
+    @Test func reuseAllowsGenuineRemaining() {
+        let context = TextFieldContext(beforeCursor: "Hi Molly,", target: Self.target)
+        #expect(
+            CompletionController.reuseRemainingIsSafe(
+                remaining: " hope you are doing well today",
+                context: context,
+                injectedContext: ["if you require maintenance of UPS systems or backup power, call us."]
+            )
+        )
+    }
+
     @Test func promotionCachePromotesLowerRankedBranchWhenTopIsInvalidated() {
         let cache = Self.promotionCache(candidates: [
             "ship it today",
diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AlphanumericNormalizer.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AlphanumericNormalizer.swift
new file mode 100644
index 0000000..725cea4
--- /dev/null
+++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AlphanumericNormalizer.swift
@@ -0,0 +1,16 @@
+import Foundation
+
+/// Shared text normalization for the content-overlap guards (`SuffixOverlapGuard`,
+/// `PrefixRepetitionGuard`, `ContextEchoGuard`). Comparisons are done on case-folded alphanumeric
+/// scalars only, so differences in whitespace, punctuation, and stray symbol glyphs the model
+/// sometimes prepends ("**", "•") don't defeat a match.
+enum AlphanumericNormalizer {
+    /// Case-folded string of only the alphanumeric scalars in `text`.
+    static func normalize(_ text: String) -> String {
+        var result = String.UnicodeScalarView()
+        for scalar in text.lowercased().unicodeScalars where CharacterSet.alphanumerics.contains(scalar) {
+            result.append(scalar)
+        }
+        return String(result)
+    }
+}
diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift
index 60d7108..a7834da 100644
--- a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift
+++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift
@@ -135,6 +135,12 @@ public struct CompletionRequest: Equatable {
     public var mode: CompletionMode
     public var maxCompletionTokens: Int
     public var maxDisplayWidth: Int
+    /// Side-context text injected into the prompt that the user did NOT type — clipboard contents and
+    /// on-screen OCR text. Carried alongside the request so the output filter can drop a completion
+    /// that merely parrots it verbatim (`ContextEchoGuard`). Writing-history samples are deliberately
+    /// excluded: they are scoped to the same app/domain and reproducing the user's own recurring
+    /// phrases is the point of that feature.
+    public var injectedContext: [String]
 
     public init(
         context: TextFieldContext,
@@ -142,7 +148,8 @@ public struct CompletionRequest: Equatable {
         requiredPrefixBytes: [UInt8] = [],
         mode: CompletionMode = .prose,
         maxCompletionTokens: Int = 4,
-        maxDisplayWidth: Int = 80
+        maxDisplayWidth: Int = 80,
+        injectedContext: [String] = []
     ) {
         self.context = context
         self.prompt = prompt
@@ -150,6 +157,7 @@ public struct CompletionRequest: Equatable {
         self.mode = mode
         self.maxCompletionTokens = maxCompletionTokens
         self.maxDisplayWidth = maxDisplayWidth
+        self.injectedContext = injectedContext
     }
 }
 
@@ -202,6 +210,26 @@ public enum SuppressionReason: Equatable {
     /// A mid-line / fill-in-the-middle completion that is too long or too low-probability to show
     /// without risking a wrong suggestion.
     case lowConfidenceMidLine
+    /// The completion reproduces a phrase that is already present in the recent text before the caret.
+    /// Accepting it would create a verbatim repetition loop. See `PrefixRepetitionGuard`.
+    case repeatsRecentPrefix
+    /// The completion verbatim-reproduces a span of injected side context the user did not type
+    /// (clipboard, on-screen OCR text) — the small model parroting context instead of predicting.
+    /// See `ContextEchoGuard`.
+    case echoesInjectedContext
+    /// The completion contains a reserved model-internal marker (e.g. Gemma's `<unused56>`, chat/FIM
+    /// scaffolding) that should have been masked at sample time. Belt-and-suspenders for stale or
+    /// mis-flagged token profiles. See `TokenClassifier` / `DefaultCandidateFilter.containsReservedMarker`.
+    case reservedMarker
+    /// The completion contains a within-candidate token-repetition loop — the same word appears ≥ 3 times
+    /// ("text 1 1 1", "since 1 1 1"). Model degeneration, not a bleed from side context.
+    /// See `IntraCompletionRepetitionGuard`.
+    case intraCompletionRepetition
+    /// The completion is nothing but markup tags (`</code>`, `<b>`, …) in a prose/correction context
+    /// whose surrounding text contains no markup — Gemma's single-token HTML-tag block surfacing in
+    /// ordinary writing. Sample-time demotion is the primary defence (see
+    /// `BiasPolicy.markupTagStaticPenalty`); this is its context-aware output net. See `MarkupTagGuard`.
+    case markupTagOutsideMarkupContext
     case noCandidate
 }
 
diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/ContextEchoGuard.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/ContextEchoGuard.swift
new file mode 100644
index 0000000..9eec7b9
--- /dev/null
+++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/ContextEchoGuard.swift
@@ -0,0 +1,69 @@
+import Foundation
+
+/// Shared "does this completion reproduce a phrase from some text" test, used by both
+/// `PrefixRepetitionGuard` (against the recent typed prefix) and `ContextEchoGuard` (against injected
+/// side context). Two shapes are detected on case-folded alphanumerics:
+///
+/// 1. **Whole** — the entire (normalized) completion is a substring of the text. A strong signal, so
+///    a short match (`minimumWhole`) is enough.
+/// 2. **Leading** — the completion *begins* with a run that appears in the text and then diverges, so
+///    shape 1 misses it. A leading run of length ≥ `minimumLeading` exists iff the leading slice of
+///    exactly that length is a substring (any longer contained run has it as a prefix), so one
+///    `contains` decides it. The larger floor keeps chance word collisions from firing.
+enum RepeatedSpanDetector {
+    static func reproduces(
+        normalizedCompletion: String,
+        within normalizedText: String,
+        minimumWhole: Int,
+        minimumLeading: Int
+    ) -> Bool {
+        guard !normalizedCompletion.isEmpty, !normalizedText.isEmpty else { return false }
+
+        if normalizedCompletion.count >= minimumWhole,
+           normalizedText.contains(normalizedCompletion) {
+            return true
+        }
+
+        guard normalizedCompletion.count >= minimumLeading else { return false }
+        return normalizedText.contains(String(normalizedCompletion.prefix(minimumLeading)))
+    }
+}
+
+/// Detects completions that merely parrot injected side context — clipboard contents or on-screen
+/// OCR text the prompt carries but the user did not type. The small model frequently copies such
+/// context verbatim instead of using it as background (e.g. text copied from a localhost page in
+/// one browser surfacing as a suggestion in a different app's compose field).
+///
+/// Writing-history samples are intentionally NOT passed here: they are already scoped to the same
+/// app/domain, and reproducing the user's own recurring phrases (a signature, a stock reply) is the
+/// purpose of that personalization — suppressing it would be a regression.
+public enum ContextEchoGuard {
+
+    /// `true` when `completion` verbatim-reproduces a span of any string in `injectedContext`.
+    ///
+    /// `minimumWhole` is a touch higher than `PrefixRepetitionGuard`'s because the injected corpus is
+    /// larger (more chance of an incidental short match); `minimumLeading` matches it.
+    public static func echoesInjectedContext(
+        completion: String,
+        injectedContext: [String],
+        minimumWhole: Int = 12,
+        minimumLeading: Int = 16
+    ) -> Bool {
+        guard !injectedContext.isEmpty else { return false }
+        let normalizedCompletion = AlphanumericNormalizer.normalize(completion)
+        guard !normalizedCompletion.isEmpty else { return false }
+
+        for sample in injectedContext {
+            let normalizedSample = AlphanumericNormalizer.normalize(sample)
+            if RepeatedSpanDetector.reproduces(
+                normalizedCompletion: normalizedCompletion,
+                within: normalizedSample,
+                minimumWhole: minimumWhole,
+                minimumLeading: minimumLeading
+            ) {
+                return true
+            }
+        }
+        return false
+    }
+}
diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/IntraCompletionRepetitionGuard.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/IntraCompletionRepetitionGuard.swift
new file mode 100644
index 0000000..d25da81
--- /dev/null
+++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/IntraCompletionRepetitionGuard.swift
@@ -0,0 +1,46 @@
+import Foundation
+
+/// Detects within-completion token-repetition degeneration — a model failure mode where the
+/// same word repeats three or more times inside a single candidate ("text 1 1 1", "since 1 1 1"),
+/// distinct from the across-prefix loop that `PrefixRepetitionGuard` targets.
+///
+/// Words are identified as contiguous runs of alphanumeric characters (case-insensitive,
+/// punctuation stripped), so both "1 1 1" and "1, 1, 1" are reliably detected.
+/// Fires only when a single word appears ≥ 3 times; normal prose completions never have this shape.
+public enum IntraCompletionRepetitionGuard {
+
+    /// `true` when `completion` contains a degenerate within-completion repetition loop
+    /// (any single word appearing ≥ 3 times).
+    public static func isDegenerate(_ completion: String) -> Bool {
+        let words = contentWords(completion)
+        guard words.count >= 3 else { return false }
+        var counts: [Substring: Int] = [:]
+        for word in words {
+            let n = (counts[word, default: 0]) + 1
+            counts[word] = n
+            if n >= 3 { return true }
+        }
+        return false
+    }
+
+    /// Lowercase alphanumeric runs in `text` (punctuation and whitespace discarded).
+    /// "1, 1, 1" → ["1","1","1"]; " text 1 1 1" → ["text","1","1","1"].
+    static func contentWords(_ text: String) -> [Substring] {
+        var words: [Substring] = []
+        var start: String.Index? = nil
+        let lowered = text.lowercased()
+        for idx in lowered.indices {
+            let ch = lowered[idx]
+            if ch.isLetter || ch.isNumber {
+                if start == nil { start = idx }
+            } else if let s = start {
+                words.append(lowered[s..<idx])
+                start = nil
+            }
+        }
+        if let s = start {
+            words.append(lowered[s...])
+        }
+        return words
+    }
+}
diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/MarkupTagGuard.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/MarkupTagGuard.swift
new file mode 100644
index 0000000..4feb5af
--- /dev/null
+++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/MarkupTagGuard.swift
@@ -0,0 +1,54 @@
+import Foundation
+
+/// Output-stage net for the markup-tag leak: Gemma's vocabulary carries whole HTML tags as single
+/// NORMAL tokens (`<b>` = 200, `</code>` = 215, …), and in thin prose contexts the model surfaces
+/// them as suggestions ("my name is" → "</code>"). The primary defence is sample-time demotion via
+/// `BiasPolicy.markupTagStaticPenalty`; this guard is the context-aware mirror for finalised
+/// candidates, applied by the candidate filter in prose/correction modes only.
+///
+/// Deliberately conservative in both directions:
+/// - It fires only when the *entire* completion is markup tags (plus whitespace). A candidate that
+///   continues the user's own angle-bracket text ("code> to format") has prose content and passes.
+/// - It is silent whenever the surrounding field text already contains tag-like markup — a user
+///   genuinely writing HTML in a prose-mode field (chat box, CMS textarea) keeps tag completions.
+public enum MarkupTagGuard {
+
+    /// Matches a completion consisting solely of one or more whole tags separated by whitespace:
+    /// `"</code>"`, `" <b>"`, `"</td></tr>"`. Tag shape mirrors `TokenClassifier.matchesMarkupTag`.
+    private static let pureMarkupRegex = try? NSRegularExpression(
+        pattern: #"^\s*(</?[a-zA-Z][a-zA-Z0-9]*( ?/)?>\s*)+$"#,
+        options: []
+    )
+
+    /// Loose tag detector for the *surrounding* text — attributes allowed (`<a href="…">`), since
+    /// real markup contexts contain them. Used only for the exemption, where a false positive
+    /// merely means we keep showing tag completions.
+    private static let contextMarkupRegex = try? NSRegularExpression(
+        pattern: #"</?[a-zA-Z][^<>]{0,80}>"#,
+        options: []
+    )
+
+    /// `true` when `completion` should be suppressed: it is pure markup and neither side of the
+    /// caret shows the user working with markup.
+    public static func violates(
+        completion: String,
+        beforeCursor: String,
+        afterCursor: String
+    ) -> Bool {
+        guard isPureMarkup(completion) else { return false }
+        if containsMarkup(beforeCursor) || containsMarkup(afterCursor) { return false }
+        return true
+    }
+
+    static func isPureMarkup(_ text: String) -> Bool {
+        guard !text.isEmpty, let regex = pureMarkupRegex else { return false }
+        let range = NSRange(text.startIndex..<text.endIndex, in: text)
+        return regex.firstMatch(in: text, options: [], range: range) != nil
+    }
+
+    static func containsMarkup(_ text: String) -> Bool {
+        guard !text.isEmpty, let regex = contextMarkupRegex else { return false }
+        let range = NSRange(text.startIndex..<text.endIndex, in: text)
+        return regex.firstMatch(in: text, options: [], range: range) != nil
+    }
+}
diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/PrefixRepetitionGuard.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/PrefixRepetitionGuard.swift
new file mode 100644
index 0000000..69dc43f
--- /dev/null
+++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/PrefixRepetitionGuard.swift
@@ -0,0 +1,49 @@
+import Foundation
+
+/// Detects completions that would create a verbatim repetition loop by reproducing a phrase already
+/// present in the recent text before the cursor.
+///
+/// The failure mode this guards against: the model predicts " i want to write about" after
+/// "...AI meetup." because that exact phrase already appeared earlier in the text. If the user
+/// accepts it, the sentence repeats — and the model will predict the same continuation again,
+/// looping indefinitely.
+///
+/// Two repetition shapes are caught, both on case-folded alphanumerics within the last
+/// `lookbackCharacters` of `beforeCursor`:
+///
+/// 1. **Whole-completion** — the entire suggestion already appears verbatim in the recent text. A
+///    strong signal, so a short match (`minimumAlphanumericLength`) is enough.
+/// 2. **Leading** — the suggestion *begins* by reproducing a recent phrase and then diverges
+///    ("…access the OpenAI" + " API to do X"). The whole string is no longer a substring, so shape 1
+///    misses it; this catches it when the repeated leading run is long enough
+///    (`minimumLeadingRepeat`) to be a genuine loop rather than a chance word collision.
+///
+/// The minimum lengths keep short common phrases ("the", "and") from triggering false positives.
+public enum PrefixRepetitionGuard {
+
+    /// `true` when `completion` reproduces a phrase that already appears in the recent prefix,
+    /// meaning accepting it would create a repetition.
+    public static func repeatsPrefix(
+        completion: String,
+        beforeCursor: String,
+        lookbackCharacters: Int = 300,
+        minimumAlphanumericLength: Int = 8,
+        minimumLeadingRepeat: Int = 16
+    ) -> Bool {
+        let normalizedCompletion = AlphanumericNormalizer.normalize(completion)
+
+        // Only look back a bounded window — we don't want to suppress completions that share a
+        // common phrase with text written hours ago in a very long document.
+        let lookback = String(beforeCursor.suffix(lookbackCharacters))
+        let normalizedPrefix = AlphanumericNormalizer.normalize(lookback)
+
+        // Shape 1 (whole) catches a short verbatim repeat; shape 2 (leading) catches a repeat that
+        // then diverges. See `RepeatedSpanDetector`.
+        return RepeatedSpanDetector.reproduces(
+            normalizedCompletion: normalizedCompletion,
+            within: normalizedPrefix,
+            minimumWhole: minimumAlphanumericLength,
+            minimumLeading: minimumLeadingRepeat
+        )
+    }
+}
diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/SuffixOverlapGuard.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/SuffixOverlapGuard.swift
index 0695059..3e00aa5 100644
--- a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/SuffixOverlapGuard.swift
+++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/SuffixOverlapGuard.swift
@@ -159,11 +159,7 @@ public enum SuffixOverlapGuard {
     /// Case-folded string of only the alphanumeric scalars — drops whitespace, punctuation, and any
     /// stray symbol glyphs the model prepends, so the comparison is on real content.
     static func normalizedAlphanumerics(_ text: String) -> String {
-        var result = String.UnicodeScalarView()
-        for scalar in text.lowercased().unicodeScalars where CharacterSet.alphanumerics.contains(scalar) {
-            result.append(scalar)
-        }
-        return String(result)
+        AlphanumericNormalizer.normalize(text)
     }
 
     /// Whether the last scalar of `text` is a word character (letter or digit) — i.e. the caret is
diff --git a/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/ContextEchoGuardTests.swift b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/ContextEchoGuardTests.swift
new file mode 100644
index 0000000..4256e7f
--- /dev/null
+++ b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/ContextEchoGuardTests.swift
@@ -0,0 +1,65 @@
+import AutocompleteCore
+import XCTest
+
+final class ContextEchoGuardTests: XCTestCase {
+
+    func testFiresWhenCompletionEchoesClipboardVerbatim() {
+        // The reported case: text copied from a localhost page in another browser is injected as
+        // clipboard context and parroted into a fresh Gmail draft.
+        let clipboard = "if you require maintenance of UPS systems or backup power, contact us."
+        XCTAssertTrue(
+            ContextEchoGuard.echoesInjectedContext(
+                completion: " if you require maintenance of UPS systems or",
+                injectedContext: [clipboard]
+            )
+        )
+    }
+
+    func testFiresOnLeadingEchoThatThenDiverges() {
+        let screen = "The private key for the OpenAI API is stored in the vault."
+        XCTAssertTrue(
+            ContextEchoGuard.echoesInjectedContext(
+                completion: " the private key for the OpenAI API is yours to keep forever",
+                injectedContext: [screen]
+            )
+        )
+    }
+
+    func testChecksAllInjectedSources() {
+        XCTAssertTrue(
+            ContextEchoGuard.echoesInjectedContext(
+                completion: " maintenance of UPS systems is required",
+                injectedContext: ["unrelated clipboard text", "notes about maintenance of UPS systems here"]
+            )
+        )
+    }
+
+    func testDoesNotFireWithoutInjectedContext() {
+        XCTAssertFalse(
+            ContextEchoGuard.echoesInjectedContext(
+                completion: " if you require maintenance of UPS systems or",
+                injectedContext: []
+            )
+        )
+    }
+
+    func testAllowsGenuineCompletionNotInContext() {
+        let clipboard = "if you require maintenance of UPS systems or backup power, contact us."
+        XCTAssertFalse(
+            ContextEchoGuard.echoesInjectedContext(
+                completion: " hope you are doing well",
+                injectedContext: [clipboard]
+            )
+        )
+    }
+
+    func testDoesNotFireOnShortIncidentalOverlap() {
+        // A short common run ("if you ") must not be enough to suppress a real continuation.
+        XCTAssertFalse(
+            ContextEchoGuard.echoesInjectedContext(
+                completion: " if you can",
+                injectedContext: ["if you require maintenance of UPS systems"]
+            )
+        )
+    }
+}
diff --git a/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/IntraCompletionRepetitionGuardTests.swift b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/IntraCompletionRepetitionGuardTests.swift
new file mode 100644
index 0000000..dd9ca2e
--- /dev/null
+++ b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/IntraCompletionRepetitionGuardTests.swift
@@ -0,0 +1,88 @@
+import XCTest
+@testable import AutocompleteCore
+
+final class IntraCompletionRepetitionGuardTests: XCTestCase {
+
+    // MARK: - Degenerate cases (should suppress)
+
+    func testDigitTripleSpaceSeparated_isDegenerate() {
+        XCTAssertTrue(IntraCompletionRepetitionGuard.isDegenerate(" text 1 1 1"))
+    }
+
+    func testDigitTripleWithLeadWord_isDegenerate() {
+        XCTAssertTrue(IntraCompletionRepetitionGuard.isDegenerate(" since 1 1 1"))
+    }
+
+    func testDigitTripleWithMultipleLeadWords_isDegenerate() {
+        XCTAssertTrue(IntraCompletionRepetitionGuard.isDegenerate(" apartment or my 1 1 1"))
+    }
+
+    func testWordTriple_isDegenerate() {
+        XCTAssertTrue(IntraCompletionRepetitionGuard.isDegenerate(" the the the best option"))
+    }
+
+    /// Punctuation-separated repetitions: "1, 1, 1" must be caught even though
+    /// whitespace-splitting gives ["1,", "1,", "1"] — the guard uses alphanumeric runs.
+    func testPunctuationSeparated_isDegenerate() {
+        XCTAssertTrue(IntraCompletionRepetitionGuard.isDegenerate("1, 1, 1"))
+    }
+
+    func testHyphenSeparated_isDegenerate() {
+        XCTAssertTrue(IntraCompletionRepetitionGuard.isDegenerate("go-go-go now"))
+    }
+
+    // MARK: - Normal completions (must not suppress)
+
+    func testNormalProseSentence_notDegenerate() {
+        XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(" is a company for the industrial floor."))
+    }
+
+    func testSingleWord_notDegenerate() {
+        XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(" hello"))
+    }
+
+    func testTwoWords_notDegenerate() {
+        XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(" good morning"))
+    }
+
+    /// Two occurrences is below the threshold of three.
+    func testDoubleRepeat_notDegenerate() {
+        XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(" apartment or my 1 1"))
+    }
+
+    func testDoubleRepeatAlt_notDegenerate() {
+        XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(" text 1 1"))
+    }
+
+    func testEmptyString_notDegenerate() {
+        XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(""))
+    }
+
+    func testOnlyPunctuation_notDegenerate() {
+        XCTAssertFalse(IntraCompletionRepetitionGuard.isDegenerate(". . ."))
+    }
+
+    // MARK: - contentWords helper
+
+    func testContentWords_stripsSpacesAndPunctuation() {
+        XCTAssertEqual(
+            IntraCompletionRepetitionGuard.contentWords(" text 1 1 1").map(String.init),
+            ["text", "1", "1", "1"]
+        )
+    }
+
+    func testContentWords_commaSeparated() {
+        XCTAssertEqual(
+            IntraCompletionRepetitionGuard.contentWords("1, 1, 1").map(String.init),
+            ["1", "1", "1"]
+        )
+    }
+
+    func testContentWords_emptyString() {
+        XCTAssertTrue(IntraCompletionRepetitionGuard.contentWords("").isEmpty)
+    }
+
+    func testContentWords_onlyPunctuation() {
+        XCTAssertTrue(IntraCompletionRepetitionGuard.contentWords(". . .").isEmpty)
+    }
+}
diff --git a/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/MarkupTagGuardTests.swift b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/MarkupTagGuardTests.swift
new file mode 100644
index 0000000..3427e9a
--- /dev/null
+++ b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/MarkupTagGuardTests.swift
@@ -0,0 +1,95 @@
+import XCTest
+@testable import AutocompleteCore
+
+/// `MarkupTagGuard` — the output net for Gemma's single-token HTML-tag leak (`"my name is"` →
+/// `"</code>"` in a web chat box). Suppress only pure-markup candidates in markup-free contexts;
+/// a user genuinely writing tags must keep their completions.
+final class MarkupTagGuardTests: XCTestCase {
+
+    // MARK: - Pure-markup detection
+
+    func testSingleClosingTagIsPureMarkup() {
+        XCTAssertTrue(MarkupTagGuard.isPureMarkup("</code>"))
+    }
+
+    func testLeadingSpaceTagIsPureMarkup() {
+        // The observed leak: token 236743 (" ") + token 215 ("</code>").
+        XCTAssertTrue(MarkupTagGuard.isPureMarkup(" </code>"))
+    }
+
+    func testMultipleTagsArePureMarkup() {
+        XCTAssertTrue(MarkupTagGuard.isPureMarkup("</td></tr>"))
+        XCTAssertTrue(MarkupTagGuard.isPureMarkup("<b> <i>"))
+    }
+
+    func testSelfClosingTagIsPureMarkup() {
+        XCTAssertTrue(MarkupTagGuard.isPureMarkup("<br/>"))
+        XCTAssertTrue(MarkupTagGuard.isPureMarkup("<br />"))
+    }
+
+    func testProseIsNotPureMarkup() {
+        XCTAssertFalse(MarkupTagGuard.isPureMarkup("john smith"))
+    }
+
+    func testTagFollowedByProseIsNotPureMarkup() {
+        // The tag may be continuing the user's own markup — other nets judge the rest.
+        XCTAssertFalse(MarkupTagGuard.isPureMarkup("</b> and then some"))
+    }
+
+    func testPartialBracketTextIsNotPureMarkup() {
+        XCTAssertFalse(MarkupTagGuard.isPureMarkup("code> to format"))
+        XCTAssertFalse(MarkupTagGuard.isPureMarkup("<3"))
+        XCTAssertFalse(MarkupTagGuard.isPureMarkup("a < b"))
+    }
+
+    func testAttributeBearingTagIsNotPureMarkup() {
+        // Attribute tags are never single leaked tokens; leave them to context judgement.
+        XCTAssertFalse(MarkupTagGuard.isPureMarkup(#"<a href="x">"#))
+    }
+
+    func testEmptyStringIsNotPureMarkup() {
+        XCTAssertFalse(MarkupTagGuard.isPureMarkup(""))
+    }
+
+    // MARK: - Context exemption
+
+    func testSuppressesPureTagInProseContext() {
+        XCTAssertTrue(MarkupTagGuard.violates(
+            completion: " </code>",
+            beforeCursor: "my name is",
+            afterCursor: ""
+        ))
+    }
+
+    func testAllowsClosingTagWhenUserIsWritingMarkup() {
+        XCTAssertFalse(MarkupTagGuard.violates(
+            completion: "</b>",
+            beforeCursor: "wrap it like <b>hello",
+            afterCursor: ""
+        ))
+    }
+
+    func testAllowsTagWhenMarkupFollowsCaret() {
+        XCTAssertFalse(MarkupTagGuard.violates(
+            completion: "<td>",
+            beforeCursor: "add a cell: ",
+            afterCursor: "</tr></table>"
+        ))
+    }
+
+    func testAttributeBearingContextMarkupExempts() {
+        XCTAssertFalse(MarkupTagGuard.violates(
+            completion: "</a>",
+            beforeCursor: #"see <a href="https://example.com">this link"#,
+            afterCursor: ""
+        ))
+    }
+
+    func testProseCompletionNeverViolates() {
+        XCTAssertFalse(MarkupTagGuard.violates(
+            completion: " john smith",
+            beforeCursor: "my name is",
+            afterCursor: ""
+        ))
+    }
+}
diff --git a/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/PrefixRepetitionGuardTests.swift b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/PrefixRepetitionGuardTests.swift
new file mode 100644
index 0000000..c387416
--- /dev/null
+++ b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/PrefixRepetitionGuardTests.swift
@@ -0,0 +1,117 @@
+import AutocompleteCore
+import XCTest
+
+final class PrefixRepetitionGuardTests: XCTestCase {
+
+    // MARK: - Whole-completion repetition
+
+    func testFiresWhenWholeCompletionRepeatsRecentPhrase() {
+        let before = "This is the private key for the OpenAI API. You can use it to access the OpenAI. And"
+        XCTAssertTrue(
+            PrefixRepetitionGuard.repeatsPrefix(
+                completion: " you can use it to access the OpenAI",
+                beforeCursor: before
+            )
+        )
+    }
+
+    func testIgnoresPunctuationAndCaseDifferences() {
+        let before = "I went to the AI meetup. I want to write about"
+        XCTAssertTrue(
+            PrefixRepetitionGuard.repeatsPrefix(
+                completion: " i want to write about,",
+                beforeCursor: before
+            )
+        )
+    }
+
+    // MARK: - Leading repetition that then diverges (the loop shape)
+
+    func testFiresWhenCompletionLeadsWithRepeatThenDiverges() {
+        // The repeated phrase is followed by genuinely new text, so the *whole* completion is no
+        // longer a substring of the prefix — only the leading run is.
+        let before = "This is the private key for the OpenAI API. You can use it to access the OpenAI. And"
+        XCTAssertTrue(
+            PrefixRepetitionGuard.repeatsPrefix(
+                completion: " you can use it to access the OpenAI API to do whatever you want",
+                beforeCursor: before
+            )
+        )
+    }
+
+    // MARK: - Negatives
+
+    func testAllowsGenuineContinuation() {
+        let before = "This is the private key for the OpenAI API. You can use it to access the OpenAI. And"
+        XCTAssertFalse(
+            PrefixRepetitionGuard.repeatsPrefix(
+                completion: " keep it somewhere safe",
+                beforeCursor: before
+            )
+        )
+    }
+
+    func testDoesNotFireOnShortCommonLeadingWord() {
+        // A short leading collision ("the ") must not be enough to suppress a real continuation.
+        let before = "I saw the dog run across the"
+        XCTAssertFalse(
+            PrefixRepetitionGuard.repeatsPrefix(
+                completion: " street quickly",
+                beforeCursor: before
+            )
+        )
+    }
+
+    func testDoesNotFireOnShortCompletion() {
+        let before = "the quick brown fox jumps over the"
+        XCTAssertFalse(
+            PrefixRepetitionGuard.repeatsPrefix(
+                completion: " lazy",
+                beforeCursor: before
+            )
+        )
+    }
+
+    func testLeadingRepeatThresholdBoundary() {
+        // The leading-divergence shape requires a repeated run of ≥16 normalized alphanumeric chars.
+        // "abcdefghijklmno" is 15 → must NOT fire on leading-only; "abcdefghijklmnop" is 16 → fires.
+        let before15 = "abcdefghijklmno was here earlier in the document somewhere"
+        XCTAssertFalse(
+            PrefixRepetitionGuard.repeatsPrefix(
+                completion: "abcdefghijklmno then something new entirely",
+                beforeCursor: before15
+            ),
+            "15-char leading run is below the threshold"
+        )
+        let before16 = "abcdefghijklmnop was here earlier in the document somewhere"
+        XCTAssertTrue(
+            PrefixRepetitionGuard.repeatsPrefix(
+                completion: "abcdefghijklmnop then something new entirely",
+                beforeCursor: before16
+            ),
+            "16-char leading run meets the threshold"
+        )
+    }
+
+    func testWholeCompletionRepeatBoundaryIsEightChars() {
+        // The whole-completion shape uses the lower ≥8 floor; "abcdefg" (7) must not fire.
+        XCTAssertFalse(
+            PrefixRepetitionGuard.repeatsPrefix(completion: " abcdefg", beforeCursor: "abcdefg earlier")
+        )
+        XCTAssertTrue(
+            PrefixRepetitionGuard.repeatsPrefix(completion: " abcdefgh", beforeCursor: "abcdefgh earlier")
+        )
+    }
+
+    func testRespectsLookbackWindow() {
+        // The repeated phrase sits far outside the lookback window, so it should not be suppressed.
+        let filler = String(repeating: "x ", count: 400)
+        let before = "you can use it to access the OpenAI" + filler
+        XCTAssertFalse(
+            PrefixRepetitionGuard.repeatsPrefix(
+                completion: " you can use it to access the OpenAI",
+                beforeCursor: before
+            )
+        )
+    }
+}
diff --git a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/ConstrainedGeneration.swift b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/ConstrainedGeneration.swift
index c61be18..a1becb0 100644
--- a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/ConstrainedGeneration.swift
+++ b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/ConstrainedGeneration.swift
@@ -90,6 +90,13 @@ public final class ConstrainedGenerationEngine: CompletionGenerating {
                     // admissible tokens aren't masked out and the branch can't silently collapse to
                     // `noCandidate` (ADR-025).
                     constrained: !branch.remainingPrefix.isEmpty,
+                    // Decode-time repetition penalty is scoped to this branch's own emitted tokens, so
+                    // a degenerate loop is demoted in favour of a non-repeating sibling. Suppressed
+                    // while a required prefix is still being satisfied (mid-word healing, ADR-019): that
+                    // path forces a specific continuation that may legitimately repeat an earlier token,
+                    // and demoting it would collapse the only admissible branch. Inert unless the
+                    // penalties are configured (see DecodingConfiguration.presencePenalty).
+                    recentTokens: branch.remainingPrefix.isEmpty ? branch.tokenIDs : [],
                     isAdmissible: { self.tokenAllowed($0, afterRequiredPrefix: branch.remainingPrefix) }
                 )
 
diff --git a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/DecodingConfiguration.swift b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/DecodingConfiguration.swift
index 7968ec7..23ad2c1 100644
--- a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/DecodingConfiguration.swift
+++ b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Engine/DecodingConfiguration.swift
@@ -48,6 +48,22 @@ public struct DecodingConfiguration: Equatable {
     /// Weight of the mean per-token suffix-join log-probability added to a branch's cumulative score
     /// before final ranking. See ADR-057.
     public var suffixRerankWeight: Float
+    /// Decode-time repetition control. Subtracted (in logit space, before temperature) from any token
+    /// that has already been emitted *on the same branch*, demoting degenerate intra-completion loops
+    /// ("access the OpenAI access the OpenAI") so a non-repeating sibling — or the stop token — wins the
+    /// beam instead of the controller having to suppress the looped output after the fact. The penalty
+    /// is suppressed while a branch is still satisfying a required prefix (mid-word healing), so it never
+    /// demotes a forced continuation (see `ConstrainedGenerationEngine`).
+    ///
+    /// `presencePenalty` is applied once if the token appears at all on the branch; `frequencyPenalty`
+    /// is applied per prior occurrence. Both default to `0` (inert — `SamplerResult` is byte-identical
+    /// to the un-penalized path), so the production default is unchanged until a value is chosen via the
+    /// KeyTypeBench sweep. This is a *demotion* lever, not a promotion one: it only reshuffles tokens
+    /// already in the candidate pool, so it bites on the medium/long completion lengths where loops
+    /// form and is near-inert at the short (≤4-token) default.
+    public var presencePenalty: Float
+    /// See `presencePenalty`. Scaled by the number of prior occurrences of the token on the branch.
+    public var frequencyPenalty: Float
 
     public init(
         topK: Int = 64,
@@ -61,7 +77,9 @@ public struct DecodingConfiguration: Equatable {
         fimMaxPrefixTokens: Int = 256,
         fimMaxSuffixTokens: Int = 64,
         suffixRerankTokenCount: Int = 0,
-        suffixRerankWeight: Float = 1.0
+        suffixRerankWeight: Float = 1.0,
+        presencePenalty: Float = 0,
+        frequencyPenalty: Float = 0
     ) {
         self.topK = topK
         self.topP = topP
@@ -75,5 +93,7 @@ public struct DecodingConfiguration: Equatable {
         self.fimMaxSuffixTokens = fimMaxSuffixTokens
         self.suffixRerankTokenCount = suffixRerankTokenCount
         self.suffixRerankWeight = suffixRerankWeight
+        self.presencePenalty = presencePenalty
+        self.frequencyPenalty = frequencyPenalty
     }
 }
diff --git a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Filtering/CandidateFilter.swift b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Filtering/CandidateFilter.swift
index 8e2474d..293d2f2 100644
--- a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Filtering/CandidateFilter.swift
+++ b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Filtering/CandidateFilter.swift
@@ -90,6 +90,12 @@ public final class DefaultCandidateFilter: CandidateFiltering {
         //    insertable as an inline completion.
         if !Self.isInsertionSafe(candidate.text) { return .insertionUnsafe }
 
+        // 6·: Reserved model-internal markers (Gemma `<unused56>`, chat/FIM scaffolding). These are
+        //     masked at sample time once the profile is rebuilt (see TokenClassifier); this net is the
+        //     belt-and-suspenders for stale profiles / cross-token concatenations / other models, with
+        //     a distinct reason so telemetry can confirm the masking landed.
+        if Self.containsReservedMarker(candidate.text) { return .reservedMarker }
+
         // 6a. CJK script net: once the live caret is inside CJK text, a Latin-leading continuation
         //     is almost always pinyin/romanization leakage from the base model or IME composition.
         //     Suppress it rather than showing visibly wrong ghost text.
@@ -121,6 +127,54 @@ public final class DefaultCandidateFilter: CandidateFiltering {
             return .duplicatesAfterCursor
         }
 
+        // The content-overlap nets below judge the text that will actually be inserted. When the
+        // prompt was healed (ADR-019) the candidate re-emits the already-typed stem (" coll…"); strip
+        // it so the comparison is against the genuinely-new continuation, not the stem the user typed.
+        let insertedText = Self.healStripped(candidate.text, request: request)
+
+        // 7b. Prefix-repetition net: the completion reproduces a phrase already in the recent
+        //     preceding text, so accepting it would create a verbatim repetition loop.
+        //     Typical failure: small model predicts "i want to write about" after "…AI meetup."
+        //     because that exact phrase appeared earlier in the text. See PrefixRepetitionGuard.
+        if PrefixRepetitionGuard.repeatsPrefix(
+            completion: insertedText,
+            beforeCursor: request.context.beforeCursor
+        ) {
+            return .repeatsRecentPrefix
+        }
+
+        // 7b'. Intra-completion repetition: the same word appears ≥ 3 times within the candidate
+        //     itself ("text 1 1 1", "since 1 1 1") — model degeneration unrelated to side context.
+        //     Distinct from the prefix-repetition loop above (which checks against already-typed text).
+        if IntraCompletionRepetitionGuard.isDegenerate(insertedText) {
+            return .intraCompletionRepetition
+        }
+
+        // 7b''. Markup-tag net: the candidate is nothing but HTML tags in a prose context with no
+        //     markup in the surrounding text — Gemma's single-token tag block (`</code>` = 215)
+        //     surfacing in ordinary writing. Sample-time demotion (`BiasPolicy.markupTagStaticPenalty`)
+        //     is the primary defence; this context-aware net covers stale profiles and beam paths.
+        //     Code/terminal modes are untouched, and a field already containing markup is exempt.
+        if request.mode == .prose || request.mode == .correction,
+           MarkupTagGuard.violates(
+               completion: insertedText,
+               beforeCursor: request.context.beforeCursor,
+               afterCursor: request.context.afterCursor
+           ) {
+            return .markupTagOutsideMarkupContext
+        }
+
+        // 7c. Context-echo net: the completion verbatim-reproduces injected side context the user did
+        //     not type (clipboard / on-screen OCR). The small model parrots such context instead of
+        //     using it as background — e.g. text copied from one app surfacing in another's compose
+        //     field. Writing-history samples are excluded upstream (see `CompletionRequest`).
+        if ContextEchoGuard.echoesInjectedContext(
+            completion: insertedText,
+            injectedContext: request.injectedContext
+        ) {
+            return .echoesInjectedContext
+        }
+
         // 8. Mid-line confidence net. Native FIM is useful only when it is both short and highly
         //    likely; longer middle spans have been low-precision in edge data. Keep this deliberately
         //    conservative so re-enabled mid-line favors suppression over wrong visible text.
@@ -158,6 +212,15 @@ public final class DefaultCandidateFilter: CandidateFiltering {
         return meanLogProbability < minimumMidLineMeanLogProbability
     }
 
+    // MARK: - Heal-aware text
+
+    /// The text that will actually be inserted: for a healed request (ADR-019) the candidate re-emits
+    /// the already-typed stem, so strip it back off; otherwise the candidate text is inserted as-is.
+    static func healStripped(_ text: String, request: CompletionRequest) -> String {
+        guard !request.requiredPrefixBytes.isEmpty else { return text }
+        return MidWordHealing.strip(text, heal: String(decoding: request.requiredPrefixBytes, as: UTF8.self))
+    }
+
     // MARK: - Required prefix
 
     /// `true` when `bytes` is consistent with `prefix`: either it begins with the whole prefix or
@@ -171,8 +234,8 @@ public final class DefaultCandidateFilter: CandidateFiltering {
 
     /// A candidate is unsafe to insert if it is empty / whitespace-only, carries any control
     /// character (C0 controls including tab and newline, or DEL), or has no alphanumeric content at
-    /// all. The last rule drops noise-only suggestions (`"..."`, `"…"`, `"—"`) that are never a
-    /// useful inline continuation; alphanumerics span every script, so CJK/Thai completions pass.
+    /// all. The alphanumeric rule drops noise-only suggestions (`"..."`, `"…"`, `"—"`); alphanumerics
+    /// span every script, so CJK/Thai pass. (Reserved markers get their own gate — see `suppressionReason`.)
     static func isInsertionSafe(_ text: String) -> Bool {
         if text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { return false }
         for scalar in text.unicodeScalars {
@@ -182,6 +245,30 @@ public final class DefaultCandidateFilter: CandidateFiltering {
         return true
     }
 
+    /// Regexes for model-internal markers that must never appear in a shown completion: reserved
+    /// placeholders (`<unused56>`, `<reserved_…>`, `<extra_id_…>`, `<pad>`, `<mask>`) and chat /
+    /// FIM scaffolding (`<|…|>`, `<start_of_turn>`, …). Matched as substrings since a candidate may
+    /// embed one mid-text. Kept narrow so ordinary `<tag>` text the user types is unaffected.
+    private static let reservedMarkerRegexes: [NSRegularExpression] = {
+        let patterns = [
+            #"<unused\d+>"#,
+            #"<reserved[_ ]?\d+>"#,
+            #"<extra_id_\d+>"#,
+            #"<pad>"#, #"<mask>"#,
+            #"<\|[^|>]+\|>"#,
+            #"<start_of_turn>"#, #"<end_of_turn>"#
+        ]
+        return patterns.compactMap { try? NSRegularExpression(pattern: $0, options: [.caseInsensitive]) }
+    }()
+
+    static func containsReservedMarker(_ text: String) -> Bool {
+        let range = NSRange(text.startIndex..<text.endIndex, in: text)
+        for regex in reservedMarkerRegexes where regex.firstMatch(in: text, options: [], range: range) != nil {
+            return true
+        }
+        return false
+    }
+
     static func hasCJKScriptMismatch(_ text: String, request: CompletionRequest) -> Bool {
         guard request.mode == .prose || request.mode == .correction else { return false }
         guard let last = request.context.beforeCursor.last, !last.isWhitespace else { return false }
@@ -210,9 +297,7 @@ public final class DefaultCandidateFilter: CandidateFiltering {
         // For a healed request (ADR-019) the candidate re-emits the typed stem (`" coll…"`); strip it
         // so the leading word is the genuinely-new continuation rather than an empty leading-space run
         // — otherwise healed mid-word completions slip past the net entirely (ADR-025 follow-up).
-        let judged = request.requiredPrefixBytes.isEmpty
-            ? candidate.text
-            : MidWordHealing.strip(candidate.text, heal: String(decoding: request.requiredPrefixBytes, as: UTF8.self))
+        let judged = Self.healStripped(candidate.text, request: request)
 
         let lead = CurrentWordTypoGuard.leadingWord(of: judged)
         guard !lead.isEmpty else { return false } // completion opened on a boundary — not our word
@@ -244,9 +329,7 @@ public final class DefaultCandidateFilter: CandidateFiltering {
         let stem = CurrentWordTypoGuard.trailingWord(of: request.context.beforeCursor)
         guard !stem.isEmpty else { return false } // model started a fresh word — leave it
 
-        let judged = request.requiredPrefixBytes.isEmpty
-            ? candidate.text
-            : MidWordHealing.strip(candidate.text, heal: String(decoding: request.requiredPrefixBytes, as: UTF8.self))
+        let judged = Self.healStripped(candidate.text, request: request)
 
         let lead = CurrentWordTypoGuard.leadingWord(of: judged)
         guard !lead.isEmpty else { return false } // completion opened on a boundary — not our word
diff --git a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Sampling/TokenSampler.swift b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Sampling/TokenSampler.swift
index b3a527b..37a951a 100644
--- a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Sampling/TokenSampler.swift
+++ b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Sampling/TokenSampler.swift
@@ -35,11 +35,26 @@ enum TokenSampler {
         profile: AutocompleteProfile,
         configuration: DecodingConfiguration,
         constrained: Bool = false,
+        recentTokens: [TokenID] = [],
         isAdmissible: (TokenID) -> Bool
     ) -> SamplerResult {
         guard !logits.isEmpty else { return .empty }
         let temperature = max(configuration.temperature, 1e-3)
 
+        // Decode-time repetition penalty (see `DecodingConfiguration.presencePenalty`). Build the
+        // per-token occurrence count for this branch once; when no penalty is configured (or the
+        // branch is empty) `occurrences` stays empty and the scaling loop below is byte-identical to
+        // the un-penalized path. The penalty adjusts only `value` (step 1) — never `argmaxLogit`,
+        // which stays on the raw logits so stop/hardStop detection is unaffected (ADR-010).
+        let presencePenalty = configuration.presencePenalty
+        let frequencyPenalty = configuration.frequencyPenalty
+        let penaltyActive = (presencePenalty != 0 || frequencyPenalty != 0) && !recentTokens.isEmpty
+        var occurrences: [TokenID: Int] = [:]
+        if penaltyActive {
+            occurrences.reserveCapacity(recentTokens.count)
+            for id in recentTokens { occurrences[id, default: 0] += 1 }
+        }
+
         // 0. Pre-select the highest raw-logit tokens. Running the profile lookups + softmax over
         //    the full vocabulary (150k+ tokens) per branch is the dominant cost; the surviving
         //    candidate pool only ever needs `topK` entries, so restrict the expensive work to a
@@ -79,7 +94,11 @@ enum TokenSampler {
             }
             if profile.isExcluded(id, mode: mode) { continue }
             if !isAdmissible(id) { continue }
-            let value = (logit.logit + profile.bias(for: id, mode: mode)) / temperature
+            var biased = logit.logit + profile.bias(for: id, mode: mode)
+            if penaltyActive, let count = occurrences[id], count > 0 {
+                biased -= presencePenalty + frequencyPenalty * Float(count)
+            }
+            let value = biased / temperature
             scaled.append((id, value))
             if value > maxValue { maxValue = value }
         }
diff --git a/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Engine/RepetitionPenaltyTests.swift b/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Engine/RepetitionPenaltyTests.swift
new file mode 100644
index 0000000..33a4118
--- /dev/null
+++ b/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Engine/RepetitionPenaltyTests.swift
@@ -0,0 +1,95 @@
+import AutocompleteCore
+@testable import ConstrainedGeneration
+import ModelRuntime
+import TokenProfiles
+import XCTest
+
+/// Decode-time repetition penalty in `TokenSampler.rank` (see `DecodingConfiguration.presencePenalty`).
+/// The penalty demotes tokens already emitted on the same branch so a degenerate intra-completion loop
+/// loses the beam to a non-repeating sibling. It is a *demotion* lever applied only to `value`, never
+/// to the raw-logit argmax used for stop detection, and is byte-identical to the un-penalized path
+/// when no penalty is configured.
+final class RepetitionPenaltyTests: XCTestCase {
+
+    /// Three plain word tokens, no flags — admissible everywhere, never excluded, never a stop.
+    private func makeProfile(vocab: Int) -> InMemoryAutocompleteProfile {
+        let records = (0..<vocab).map {
+            TokenProfileRecord(tokenID: TokenID($0), bytes: Array("w\($0)".utf8))
+        }
+        return InMemoryAutocompleteProfile(vocabularySize: vocab, records: records)
+    }
+
+    private func logits(_ values: [Float]) -> [TokenLogit] {
+        values.enumerated().map { TokenLogit(tokenID: TokenID($0.offset), logit: $0.element) }
+    }
+
+    private func rank(
+        _ logitValues: [Float],
+        config: DecodingConfiguration,
+        recent: [TokenID]
+    ) -> SamplerResult {
+        TokenSampler.rank(
+            logits: logits(logitValues),
+            mode: .prose,
+            profile: makeProfile(vocab: logitValues.count),
+            configuration: config,
+            recentTokens: recent,
+            isAdmissible: { _ in true }
+        )
+    }
+
+    /// With penalties at 0 the result is identical regardless of branch history — the inert default.
+    func testZeroPenaltyIsByteIdenticalToUnpenalized() {
+        let values: [Float] = [3.0, 2.5, 1.0]
+        let config = DecodingConfiguration()
+        let baseline = rank(values, config: config, recent: [])
+        let withHistory = rank(values, config: config, recent: [0, 0, 0])
+        XCTAssertEqual(baseline.tokens, withHistory.tokens)
+        XCTAssertEqual(baseline.argmaxTokenID, withHistory.argmaxTokenID)
+    }
+
+    /// A repeated token's probability drops once the presence penalty is active, and a previously
+    /// lower-ranked sibling overtakes it as the top candidate.
+    func testPresencePenaltyDemotesRepeatedToken() {
+        // Token 0 leads on raw logits, token 1 is close behind.
+        let values: [Float] = [3.0, 2.8, 0.5]
+        let config = DecodingConfiguration(presencePenalty: 4.0)
+
+        let baseline = rank(values, config: config, recent: [])
+        XCTAssertEqual(baseline.tokens.first?.tokenID, 0, "token 0 wins with no history")
+
+        let penalized = rank(values, config: config, recent: [0])
+        XCTAssertEqual(penalized.tokens.first?.tokenID, 1, "repeated token 0 is demoted below token 1")
+        // Token 0 either keeps a lower probability or is pushed out of the nucleus entirely (absent ⇒ 0).
+        let p0 = penalized.tokens.first(where: { $0.tokenID == 0 })?.probability ?? 0
+        let base0 = baseline.tokens.first(where: { $0.tokenID == 0 })?.probability ?? 0
+        XCTAssertLessThan(p0, base0, "token 0 probability must drop under the penalty")
+    }
+
+    /// The frequency penalty scales with occurrence count: two prior occurrences demote harder than one.
+    func testFrequencyPenaltyScalesWithCount() {
+        let values: [Float] = [3.0, 2.0, 1.0]
+        let config = DecodingConfiguration(frequencyPenalty: 1.5)
+        let once = rank(values, config: config, recent: [0]).tokens.first(where: { $0.tokenID == 0 })?.probability ?? 0
+        let twice = rank(values, config: config, recent: [0, 0]).tokens.first(where: { $0.tokenID == 0 })?.probability ?? 0
+        XCTAssertLessThan(twice, once, "more prior occurrences must penalize harder")
+    }
+
+    /// H7: the penalty must not move `argmaxTokenID` — it is tracked on raw logits for stop detection.
+    func testArgmaxUnaffectedByPenalty() {
+        let values: [Float] = [3.0, 2.8, 0.5]
+        let config = DecodingConfiguration(presencePenalty: 10.0)
+        let penalized = rank(values, config: config, recent: [0])
+        XCTAssertEqual(penalized.argmaxTokenID, 0, "raw-logit argmax stays token 0 despite the penalty")
+    }
+
+    /// H6: when the penalty drives the only repeated candidate down, the pool must not collapse to
+    /// empty — the floor still keeps the non-repeating sibling, avoiding a spurious `noCandidate`.
+    func testPenaltyDoesNotEmptyThePool() {
+        let values: [Float] = [5.0, 1.0]
+        let config = DecodingConfiguration(minBranchProbability: 0.0, presencePenalty: 50.0)
+        let penalized = rank(values, config: config, recent: [0])
+        XCTAssertFalse(penalized.tokens.isEmpty, "a non-repeating sibling must survive")
+        XCTAssertEqual(penalized.tokens.first?.tokenID, 1)
+    }
+}
diff --git a/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Filtering/CandidateFilterTests.swift b/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Filtering/CandidateFilterTests.swift
index 72ffc26..3e13b9c 100644
--- a/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Filtering/CandidateFilterTests.swift
+++ b/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Filtering/CandidateFilterTests.swift
@@ -19,7 +19,8 @@ final class CandidateFilterTests: XCTestCase {
         target: AppTarget = CandidateFilterTests.target,
         placeholder: String? = nil,
         labels: [String] = [],
-        traits: TextFieldTraits = TextFieldTraits()
+        traits: TextFieldTraits = TextFieldTraits(),
+        injectedContext: [String] = []
     ) -> CompletionRequest {
         let context = TextFieldContext(
             beforeCursor: beforeCursor,
@@ -36,7 +37,8 @@ final class CandidateFilterTests: XCTestCase {
             requiredPrefixBytes: requiredPrefixBytes,
             mode: mode,
             maxCompletionTokens: maxCompletionTokens,
-            maxDisplayWidth: maxDisplayWidth
+            maxDisplayWidth: maxDisplayWidth,
+            injectedContext: injectedContext
         )
     }
 
@@ -379,6 +381,146 @@ final class CandidateFilterTests: XCTestCase {
         )
     }
 
+    // MARK: - Prefix-repetition net
+
+    func testSuppressesPrefixRepetitionLoop() {
+        let filter = DefaultCandidateFilter()
+        XCTAssertEqual(
+            filter.suppressionReason(
+                for: candidate(" you can use it to access the OpenAI API to do anything"),
+                request: request(beforeCursor: "You can use it to access the OpenAI. And")
+            ),
+            .repeatsRecentPrefix
+        )
+    }
+
+    func testPrefixRepetitionJudgedAfterHealingStem() {
+        // H1: under healing the candidate re-emits the typed stem (" ex"); the repetition check must
+        // run on the *inserted* text (stem stripped). The stripped continuation "ample data set here"
+        // reproduces an earlier phrase, but the RAW candidate ("example data set here") does NOT
+        // appear contiguously in the prefix — so this only fires if the heal stem is stripped first.
+        let filter = DefaultCandidateFilter()
+        XCTAssertEqual(
+            filter.suppressionReason(
+                for: candidate(" example data set here"),
+                request: request(
+                    beforeCursor: "ample data set here is good. Give me an ex",
+                    requiredPrefixBytes: Array(" ex".utf8)
+                )
+            ),
+            .repeatsRecentPrefix
+        )
+    }
+
+    // MARK: - Reserved-marker net
+
+    func testSuppressesReservedPlaceholderToken() {
+        let filter = DefaultCandidateFilter()
+        XCTAssertEqual(
+            filter.suppressionReason(for: candidate(" <unused56>"), request: request(beforeCursor: "Hello ")),
+            .reservedMarker
+        )
+    }
+
+    func testSuppressesEmbeddedChatMarker() {
+        let filter = DefaultCandidateFilter()
+        XCTAssertEqual(
+            filter.suppressionReason(for: candidate(" text to <unused54><unused56>"), request: request(beforeCursor: "send ")),
+            .reservedMarker
+        )
+    }
+
+    func testGenuineMarkupIsNotSuppressedAsReservedMarker() {
+        // `<h2>` etc. are ordinary text the user might type; the reserved net must not claim them.
+        let filter = DefaultCandidateFilter()
+        XCTAssertNotEqual(
+            filter.suppressionReason(for: candidate("h2> heading", tokenIDs: [1]), request: request(beforeCursor: "write <")),
+            .reservedMarker
+        )
+    }
+
+    // MARK: - Markup-tag net
+
+    func testSuppressesPureMarkupTagInProse() {
+        // The logged failure: "my name is" in a web chat box → " </code>" (Gemma token 215) shown.
+        let filter = DefaultCandidateFilter()
+        XCTAssertEqual(
+            filter.suppressionReason(for: candidate(" </code>"), request: request(beforeCursor: "my name is")),
+            .markupTagOutsideMarkupContext
+        )
+    }
+
+    func testSuppressesMultiTagCandidateInProse() {
+        let filter = DefaultCandidateFilter()
+        XCTAssertEqual(
+            filter.suppressionReason(for: candidate("</td></tr>"), request: request(beforeCursor: "prisma carlyle")),
+            .markupTagOutsideMarkupContext
+        )
+    }
+
+    func testKeepsTagWhenUserIsWritingMarkup() {
+        // Context exemption: the field already contains markup, so a closing tag is wanted.
+        let filter = DefaultCandidateFilter()
+        XCTAssertNil(
+            filter.suppressionReason(for: candidate("</b>"), request: request(beforeCursor: "use <b>bold text"))
+        )
+    }
+
+    func testKeepsTagWhenMarkupFollowsCaret() {
+        let filter = midLineEnabledFilter()
+        XCTAssertNil(
+            filter.suppressionReason(
+                for: candidate("<td>", tokenIDs: [1], displayWidth: 4),
+                request: request(beforeCursor: "<tr>", afterCursor: "</tr>")
+            )
+        )
+    }
+
+    func testMarkupNetSkippedInCodeMode() {
+        let filter = DefaultCandidateFilter()
+        XCTAssertNil(
+            filter.suppressionReason(for: candidate("</code>"), request: request(beforeCursor: "some text", mode: .code))
+        )
+    }
+
+    func testProseCandidateWithTrailingTagIsNotClaimedByMarkupNet() {
+        // Mixed content is not "pure markup" — other nets decide its fate.
+        let filter = DefaultCandidateFilter()
+        XCTAssertNotEqual(
+            filter.suppressionReason(for: candidate(" smith </b>"), request: request(beforeCursor: "my name is")),
+            .markupTagOutsideMarkupContext
+        )
+    }
+
+    // MARK: - Context-echo net
+
+    func testSuppressesEchoOfClipboardContext() {
+        let filter = DefaultCandidateFilter()
+        XCTAssertEqual(
+            filter.suppressionReason(
+                for: candidate(" if you require maintenance of UPS systems or backup"),
+                request: request(
+                    beforeCursor: "Hi Molly,",
+                    injectedContext: ["if you require maintenance of UPS systems or backup power, call us."]
+                )
+            ),
+            .echoesInjectedContext
+        )
+    }
+
+    func testKeepsCompletionNotPresentInInjectedContext() {
+        let filter = DefaultCandidateFilter()
+        XCTAssertNil(
+            filter.suppressionReason(
+                for: candidate(" hope you are well"),
+                request: request(
+                    beforeCursor: "Hi Molly,",
+                    injectedContext: ["if you require maintenance of UPS systems or backup power, call us."]
+                )
+            )
+        )
+    }
+
     // MARK: - Dead-end mid-word net (ADR-052)
 
     /// A recogniser whose `canCompleteWord` only accepts an explicit set of viable prefixes.
diff --git a/Packages/KeyTypeBench/Sources/KeyTypeBench/Datasets/history-echo.jsonl b/Packages/KeyTypeBench/Sources/KeyTypeBench/Datasets/history-echo.jsonl
new file mode 100644
index 0000000..c4e14a6
--- /dev/null
+++ b/Packages/KeyTypeBench/Sources/KeyTypeBench/Datasets/history-echo.jsonl
@@ -0,0 +1,4 @@
+{"id":"history-echo-cooking-vs-kubernetes-001","sourceGroup":"history-echo-synthetic","split":"eval","suites":["edge"],"tags":["history-echo","prose","stale-history"],"contextSources":{"fieldText":"synthetic","appContext":"synthetic"},"source":{"kind":"synthetic","note":"Topic-A draft (cooking) with topically-unrelated topic-B writing history (Kubernetes). Canaries: Kubernetes, ingress, TLS. The model must NOT echo the stale draft."},"context":{"beforeCursor":"To make the tomato sauce, first heat the olive oil over medium heat and then add the","afterCursor":"","detectedLanguage":"en","typingContext":"notes","target":{"appName":"Notes","bundleIdentifier":"com.apple.Notes","windowTitle":"Recipe"},"previousUserInputs":["The Kubernetes ingress controller terminates TLS at the edge and routes traffic to the backend pods through the service mesh sidecar proxy."]},"expected":{"kind":"insert","shownAcceptable":["garlic","onion","onions","minced garlic","chopped onion"]}}
+{"id":"history-echo-email-vs-legal-002","sourceGroup":"history-echo-synthetic","split":"eval","suites":["edge"],"tags":["history-echo","prose","stale-history"],"contextSources":{"fieldText":"synthetic","appContext":"synthetic"},"source":{"kind":"synthetic","note":"Topic-A draft (casual email) with topically-unrelated topic-B history (legal contract). Canaries: indemnification, notwithstanding, herein."},"context":{"beforeCursor":"Hey Mike, just wanted to check in and see if you're still free to grab","afterCursor":"","detectedLanguage":"en","typingContext":"email","target":{"appName":"Mail","bundleIdentifier":"com.apple.mail","windowTitle":"Draft"},"previousUserInputs":["The indemnification clause shall survive termination of this agreement notwithstanding any provision to the contrary set forth herein."]},"expected":{"kind":"insert","shownAcceptable":["lunch","coffee","dinner","a coffee","lunch this week"]}}
+{"id":"history-echo-weather-vs-code-003","sourceGroup":"history-echo-synthetic","split":"eval","suites":["edge"],"tags":["history-echo","prose","stale-history"],"contextSources":{"fieldText":"synthetic","appContext":"synthetic"},"source":{"kind":"synthetic","note":"Topic-A draft (weather chat) with topically-unrelated topic-B history (Python code). Canaries: numpy, epochs, model.fit."},"context":{"beforeCursor":"The forecast for tomorrow looks sunny with a high of","afterCursor":"","detectedLanguage":"en","typingContext":"message","target":{"appName":"Messages","bundleIdentifier":"com.apple.MobileSMS","windowTitle":"Chat"},"previousUserInputs":["import numpy as np\ndef train(model, data):\n    return model.fit(data, epochs=100, batch_size=32)"]},"expected":{"kind":"insert","shownAcceptable":["75","72","80","seventy","around 75 degrees"]}}
+{"id":"history-echo-control-signoff-reuse-004","sourceGroup":"history-echo-synthetic","split":"eval","suites":["edge"],"tags":["history-echo","prose","relevant-history","control"],"contextSources":{"fieldText":"synthetic","appContext":"synthetic"},"source":{"kind":"synthetic","note":"CONTROL: relevant recurring sign-off that IS in history and SHOULD be reused. Guards against over-suppression of legitimate personalization."},"context":{"beforeCursor":"Thanks so much for all your help on this. Best regards,","afterCursor":"","detectedLanguage":"en","typingContext":"email","target":{"appName":"Mail","bundleIdentifier":"com.apple.mail","windowTitle":"Draft"},"previousUserInputs":["Thanks so much for all your help on this. Best regards, Alex Johnson","Looking forward to hearing from you. Best regards, Alex Johnson"]},"expected":{"kind":"insert","shownAcceptable":["Alex","Alex Johnson"]}}
diff --git a/Packages/KeyTypeBench/Sources/KeyTypeBench/EvaluationPipeline.swift b/Packages/KeyTypeBench/Sources/KeyTypeBench/EvaluationPipeline.swift
index d319c18..fec2c7f 100644
--- a/Packages/KeyTypeBench/Sources/KeyTypeBench/EvaluationPipeline.swift
+++ b/Packages/KeyTypeBench/Sources/KeyTypeBench/EvaluationPipeline.swift
@@ -128,6 +128,9 @@ public final class ProductionCompletionEvaluator {
     private let modelInfo: BenchmarkModelInfo
     private let defaultMaxCompletionTokens: Int
     private let defaultMaxDisplayWidth: Int
+    /// When false, the case's `previousUserInputs` (writing history) is dropped from the prompt — the
+    /// history on/off A/B knob. Clipboard/screen side context is unaffected.
+    private let includeWritingHistory: Bool
 
     public init(
         runtime: LocalModelRuntime,
@@ -136,8 +139,10 @@ public final class ProductionCompletionEvaluator {
         compatibilityStore: AppCompatibilityStore = AppCompatibilityStore(),
         decodingConfiguration: DecodingConfiguration = DecodingConfiguration(enableFillInMiddle: true),
         defaultMaxCompletionTokens: Int = 4,
-        defaultMaxDisplayWidth: Int = 80
+        defaultMaxDisplayWidth: Int = 80,
+        includeWritingHistory: Bool = true
     ) {
+        self.includeWritingHistory = includeWritingHistory
         self.compatibilityStore = compatibilityStore
         self.engine = ConstrainedGenerationEngine(
             runtime: runtime,
@@ -228,7 +233,7 @@ public final class ProductionCompletionEvaluator {
         let promptResult = promptBuilder.buildPrompt(
             context: promptContext,
             customInstructions: policy.customInstructions,
-            previousUserInputs: benchmarkCase.context.previousUserInputs,
+            previousUserInputs: includeWritingHistory ? benchmarkCase.context.previousUserInputs : [],
             pasteboardText: benchmarkCase.context.clipboardContext,
             screenText: benchmarkCase.context.screenContext,
             includeEnvironmentContext: policy.includesEnvironmentContext
diff --git a/Packages/KeyTypeBench/Sources/KeyTypeBenchCLI/BenchmarkCommand.swift b/Packages/KeyTypeBench/Sources/KeyTypeBenchCLI/BenchmarkCommand.swift
index d3b7920..575176d 100644
--- a/Packages/KeyTypeBench/Sources/KeyTypeBenchCLI/BenchmarkCommand.swift
+++ b/Packages/KeyTypeBench/Sources/KeyTypeBenchCLI/BenchmarkCommand.swift
@@ -91,6 +91,15 @@ struct Run: AsyncParsableCommand {
     @Option(name: .long, help: "Weight applied to FIM suffix-rerank score.")
     var suffixRerankWeight: Float = DecodingConfiguration().suffixRerankWeight
 
+    @Option(name: .long, help: "Decode-time presence penalty: subtracted once from any token already emitted on the branch. Sweep knob for the repetition-penalty default.")
+    var presencePenalty: Float = DecodingConfiguration().presencePenalty
+
+    @Option(name: .long, help: "Decode-time frequency penalty: subtracted per prior occurrence of a token on the branch.")
+    var frequencyPenalty: Float = DecodingConfiguration().frequencyPenalty
+
+    @Flag(name: .customLong("no-history"), help: "Drop writing-history (previousUserInputs) side context from prompts. A/B knob for the history-on/off experiment.")
+    var noHistory: Bool = false
+
     @Flag(name: .long, help: "Skip missing model/profile inputs instead of failing.")
     var skipMissing: Bool = false
 
@@ -176,7 +185,9 @@ struct Run: AsyncParsableCommand {
                 fimMaxPrefixTokens: fimMaxPrefixTokens,
                 fimMaxSuffixTokens: fimMaxSuffixTokens,
                 suffixRerankTokenCount: suffixRerankTokenCount,
-                suffixRerankWeight: suffixRerankWeight
+                suffixRerankWeight: suffixRerankWeight,
+                presencePenalty: presencePenalty,
+                frequencyPenalty: frequencyPenalty
             )
             let evaluator = ProductionCompletionEvaluator(
                 runtime: runtime,
@@ -185,7 +196,8 @@ struct Run: AsyncParsableCommand {
                 compatibilityStore: compatibilityStore,
                 decodingConfiguration: decodingConfiguration,
                 defaultMaxCompletionTokens: maxCompletionTokens,
-                defaultMaxDisplayWidth: maxDisplayWidth
+                defaultMaxDisplayWidth: maxDisplayWidth,
+                includeWritingHistory: !noHistory
             )
 
             do {
diff --git a/Packages/KeyTypeBench/Tests/KeyTypeBenchTests/CommittedDatasetTests.swift b/Packages/KeyTypeBench/Tests/KeyTypeBenchTests/CommittedDatasetTests.swift
index 61525f9..2ecd472 100644
--- a/Packages/KeyTypeBench/Tests/KeyTypeBenchTests/CommittedDatasetTests.swift
+++ b/Packages/KeyTypeBench/Tests/KeyTypeBenchTests/CommittedDatasetTests.swift
@@ -79,6 +79,35 @@ final class CommittedDatasetTests: XCTestCase {
         }
     }
 
+    /// The history-echo diagnostic fixture (stale/unrelated writing history) is measurement-only — it
+    /// has no behavioural CI assertion because the right fix layer was upstream selection, not an output
+    /// guard. This guards the *dataset* itself from rot: every line must decode, the stale-history cases
+    /// must carry topically-unrelated `previousUserInputs`, and the control case's reuse string must
+    /// actually appear in its history so the "legitimate reuse is not over-suppressed" check stays valid.
+    func testHistoryEchoFixtureIsWellFormed() throws {
+        let url = repositoryRoot()
+            .appendingPathComponent("Packages/KeyTypeBench/Sources/KeyTypeBench/Datasets/history-echo.jsonl")
+        let cases = try BenchmarkJSONL.loadCases(from: url)
+        XCTAssertEqual(cases.count, 4)
+
+        for row in cases {
+            XCTAssertFalse(row.context.beforeCursor.isEmpty, row.id)
+            XCTAssertFalse(row.context.previousUserInputs.isEmpty, "\(row.id) must carry writing history")
+            XCTAssertEqual(row.expected.kind, .insert, row.id)
+            XCTAssertTrue(row.tags.contains("history-echo"), row.id)
+        }
+
+        let control = try XCTUnwrap(cases.first { $0.tags.contains("control") })
+        let history = control.context.previousUserInputs.joined(separator: "\n")
+        XCTAssertTrue(
+            control.expected.shownAcceptable.contains { history.contains($0) },
+            "control reuse string must appear in its own writing history"
+        )
+
+        let staleCases = cases.filter { $0.tags.contains("stale-history") }
+        XCTAssertEqual(staleCases.count, 3, "three stale-unrelated-history cases")
+    }
+
     private func assertShare(
         _ count: Int,
         of total: Int,
diff --git a/Packages/MacContextCapture/Sources/MacContextCapture/CaretGeometry/AXCaretGeometryResolver.swift b/Packages/MacContextCapture/Sources/MacContextCapture/CaretGeometry/AXCaretGeometryResolver.swift
index ac130f8..13125ad 100644
--- a/Packages/MacContextCapture/Sources/MacContextCapture/CaretGeometry/AXCaretGeometryResolver.swift
+++ b/Packages/MacContextCapture/Sources/MacContextCapture/CaretGeometry/AXCaretGeometryResolver.swift
@@ -865,19 +865,7 @@ enum AXCaretHelper {
 
     @MainActor
     private static func displayGeometries() -> [DisplayGeometry] {
-        NSScreen.screens.compactMap { screen in
-            guard let number = screen.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")] as? NSNumber else {
-                return nil
-            }
-
-            let displayID = CGDirectDisplayID(number.uint32Value)
-            return DisplayGeometry(
-                appKitFrame: screen.frame,
-                visibleFrame: screen.visibleFrame,
-                coreGraphicsBounds: CGDisplayBounds(displayID),
-                backingScaleFactor: screen.backingScaleFactor
-            )
-        }
+        ScreenDisplayGeometryProvider.current()
     }
 
     @MainActor
@@ -921,6 +909,27 @@ public struct DisplayGeometry: Equatable {
     }
 }
 
+/// Reads the live `[DisplayGeometry]` from `NSScreen`. Separated from the pure
+/// `DisplayCoordinateConverter` so the conversion math stays testable without a real display.
+public enum ScreenDisplayGeometryProvider {
+    @MainActor
+    public static func current() -> [DisplayGeometry] {
+        NSScreen.screens.compactMap { screen in
+            guard let number = screen.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")] as? NSNumber else {
+                return nil
+            }
+
+            let displayID = CGDirectDisplayID(number.uint32Value)
+            return DisplayGeometry(
+                appKitFrame: screen.frame,
+                visibleFrame: screen.visibleFrame,
+                coreGraphicsBounds: CGDisplayBounds(displayID),
+                backingScaleFactor: screen.backingScaleFactor
+            )
+        }
+    }
+}
+
 /// Pure CG <-> AppKit coordinate conversion against a set of synthetic or real
 /// `DisplayGeometry` values. Kept side-effect-free so unit tests don't need `NSScreen`.
 public enum DisplayCoordinateConverter {
@@ -980,6 +989,31 @@ public enum DisplayCoordinateConverter {
         )
     }
 
+    /// Inverse of `appKitRect(fromCoreGraphicsRect:)` for a point: maps an AppKit (bottom-left origin)
+    /// global point to a CoreGraphics (top-left origin) global point. Needed because caret geometry is
+    /// stored in AppKit space while ScreenCaptureKit window frames are in CG space. Returns `nil` when
+    /// the point lands on no known display.
+    public static func coreGraphicsPoint(
+        fromAppKitPoint point: CGPoint,
+        displays: [DisplayGeometry]
+    ) -> CGPoint? {
+        guard let display = bestDisplay(
+            for: CGRect(origin: point, size: .zero),
+            displays: displays,
+            keyPath: \.appKitFrame
+        ) else {
+            return nil
+        }
+        // Invert: appKit.x = appKitFrame.minX + (cg.x - cgBounds.minX)
+        //         appKit.y = appKitFrame.maxY - (cg.y - cgBounds.minY)   (height 0)
+        let localX = point.x - display.appKitFrame.minX
+        let localY = display.appKitFrame.maxY - point.y
+        return CGPoint(
+            x: display.coreGraphicsBounds.minX + localX,
+            y: display.coreGraphicsBounds.minY + localY
+        )
+    }
+
     private static func bestDisplay(
         for rect: CGRect,
         displays: [DisplayGeometry],
diff --git a/Packages/MacContextCapture/Sources/MacContextCapture/Screen/ScreenWindowSelector.swift b/Packages/MacContextCapture/Sources/MacContextCapture/Screen/ScreenWindowSelector.swift
index 2fa544a..ba52957 100644
--- a/Packages/MacContextCapture/Sources/MacContextCapture/Screen/ScreenWindowSelector.swift
+++ b/Packages/MacContextCapture/Sources/MacContextCapture/Screen/ScreenWindowSelector.swift
@@ -19,19 +19,25 @@ public struct ScreenWindowCandidate: Equatable {
     public var isOnScreen: Bool
     /// `windowLayer` — normal app windows are layer 0; menus/panels/overlays sit above.
     public var layer: Int
+    /// Front-to-back position (0 = frontmost), from ScreenCaptureKit's window ordering. Used to pick
+    /// the frontmost window when several overlap the caret. Defaults high so synthetic candidates that
+    /// don't set it sort last on this key and fall through to the area tiebreak.
+    public var zOrder: Int
 
     public init(
         windowID: CGWindowID,
         processID: pid_t,
         frame: CGRect,
         isOnScreen: Bool,
-        layer: Int
+        layer: Int,
+        zOrder: Int = .max
     ) {
         self.windowID = windowID
         self.processID = processID
         self.frame = frame
         self.isOnScreen = isOnScreen
         self.layer = layer
+        self.zOrder = zOrder
     }
 }
 
@@ -40,12 +46,18 @@ public enum ScreenWindowSelector {
     static let minimumWidth: CGFloat = 200
     static let minimumHeight: CGFloat = 120
 
-    /// Picks the window to capture for `pid`: the focused app's main content window. Prefers
-    /// on-screen, normal-layer (0) windows and, among equals, the largest one (tie-broken by the
-    /// lowest window id for determinism). Returns `nil` when the app has no suitable window.
+    /// Picks the window to capture for `pid`: the focused app's main content window. When
+    /// `focusPoint` (the caret location, in global top-left screen coordinates) is supplied and
+    /// lands inside one or more of the app's windows, only those are considered — this disambiguates
+    /// multiple windows of the same app so OCR reads the window the user is actually typing in, not
+    /// just the largest one. (Without it, a second window of the same app could bleed its text into
+    /// the prompt's screen context.) Among the remaining windows, prefers on-screen, normal-layer (0)
+    /// ones and, among equals, the largest (tie-broken by the lowest window id for determinism).
+    /// Returns `nil` when the app has no suitable window.
     public static func selectWindowID(
         forPID pid: pid_t,
-        from candidates: [ScreenWindowCandidate]
+        from candidates: [ScreenWindowCandidate],
+        focusPoint: CGPoint? = nil
     ) -> CGWindowID? {
         let eligible = candidates.filter { candidate in
             candidate.processID == pid
@@ -54,11 +66,23 @@ public enum ScreenWindowSelector {
         }
         guard !eligible.isEmpty else { return nil }
 
-        let ranked = eligible.sorted { lhs, rhs in
+        // If we know where the caret is, prefer the window(s) containing it. Fall back to the full
+        // set when the point lands in none of them (e.g. caret geometry unavailable/stale), so we
+        // never regress to returning nil just because the point missed.
+        let containing = focusPoint.map { point in
+            eligible.filter { $0.frame.contains(point) }
+        } ?? []
+        let pool = containing.isEmpty ? eligible : containing
+
+        let ranked = pool.sorted { lhs, rhs in
             if lhs.isOnScreen != rhs.isOnScreen { return lhs.isOnScreen }
             let lhsNormalLayer = lhs.layer == 0
             let rhsNormalLayer = rhs.layer == 0
             if lhsNormalLayer != rhsNormalLayer { return lhsNormalLayer }
+            // Frontmost wins. This is decisive when several windows overlap the caret (the focused
+            // window is on top); otherwise candidates share the default zOrder and the area tiebreak
+            // below applies, preserving the largest-content-window heuristic.
+            if lhs.zOrder != rhs.zOrder { return lhs.zOrder < rhs.zOrder }
             let lhsArea = lhs.frame.width * lhs.frame.height
             let rhsArea = rhs.frame.width * rhs.frame.height
             if lhsArea != rhsArea { return lhsArea > rhsArea }
diff --git a/Packages/MacContextCapture/Sources/MacContextCapture/Screen/WindowOCRCaptureEngine.swift b/Packages/MacContextCapture/Sources/MacContextCapture/Screen/WindowOCRCaptureEngine.swift
index 82b304f..7933a14 100644
--- a/Packages/MacContextCapture/Sources/MacContextCapture/Screen/WindowOCRCaptureEngine.swift
+++ b/Packages/MacContextCapture/Sources/MacContextCapture/Screen/WindowOCRCaptureEngine.swift
@@ -19,8 +19,9 @@ public protocol ScreenWindowTextCapturing: Sendable {
     /// Capture the focused window for `pid` and return its OCR'd text, or `nil` if there's no
     /// suitable window / no recognised text. `fieldText` is the focused field's own text (already
     /// captured via Accessibility); lines matching it are stripped so screen context doesn't
-    /// duplicate the field.
-    func captureWindowText(pid: pid_t, fieldText: String, maxLines: Int, maxChars: Int) async throws -> String?
+    /// duplicate the field. `focusPoint` (caret location, global top-left screen coordinates)
+    /// disambiguates multiple windows of the same app so the correct one is read.
+    func captureWindowText(pid: pid_t, fieldText: String, focusPoint: CGPoint?, maxLines: Int, maxChars: Int) async throws -> String?
 }
 
 /// `ScreenTextProviding` cache fed by an out-of-band capturer. Main-actor isolated: the completion
@@ -49,10 +50,11 @@ public final class WindowOCRCaptureEngine: ScreenTextProviding {
     }
 
     /// Kick off a fresh capture for `pid`, superseding any in-flight one. `fieldText` is the focused
-    /// field's own text, stripped from the OCR so screen context doesn't echo it. Fire-and-forget:
-    /// the cache updates when the capture completes. A failed/empty capture clears the cache so a
-    /// stale reading can't outlive the window it came from.
-    public func refresh(pid: pid_t, fieldText: String) {
+    /// field's own text, stripped from the OCR so screen context doesn't echo it. `focusPoint` is the
+    /// caret location (global top-left screen coordinates) used to pick the right window when the app
+    /// has several. Fire-and-forget: the cache updates when the capture completes. A failed/empty
+    /// capture clears the cache so a stale reading can't outlive the window it came from.
+    public func refresh(pid: pid_t, fieldText: String, focusPoint: CGPoint? = nil) {
         inFlight?.cancel()
         let capturer = self.capturer
         let maxLines = self.maxLines
@@ -61,6 +63,7 @@ public final class WindowOCRCaptureEngine: ScreenTextProviding {
             let text = try? await capturer.captureWindowText(
                 pid: pid,
                 fieldText: fieldText,
+                focusPoint: focusPoint,
                 maxLines: maxLines,
                 maxChars: maxChars
             )
@@ -89,10 +92,14 @@ public struct ScreenCaptureKitWindowTextCapturer: ScreenWindowTextCapturing {
         self.maxCaptureDimension = maxCaptureDimension
     }
 
-    public func captureWindowText(pid: pid_t, fieldText: String, maxLines: Int, maxChars: Int) async throws -> String? {
+    public func captureWindowText(pid: pid_t, fieldText: String, focusPoint: CGPoint?, maxLines: Int, maxChars: Int) async throws -> String? {
         let content = try await SCShareableContent.excludingDesktopWindows(false, onScreenWindowsOnly: true)
-        let candidates = content.windows.map(ScreenWindowCandidate.init(window:))
-        guard let windowID = ScreenWindowSelector.selectWindowID(forPID: pid, from: candidates),
+        // `content.windows` is front-to-back; the index is the z-order the selector uses to break ties
+        // between windows that overlap the caret.
+        let candidates = content.windows.enumerated().map { index, window in
+            ScreenWindowCandidate(window: window, zOrder: index)
+        }
+        guard let windowID = ScreenWindowSelector.selectWindowID(forPID: pid, from: candidates, focusPoint: focusPoint),
               let window = content.windows.first(where: { $0.windowID == windowID }) else {
             return nil
         }
@@ -118,13 +125,14 @@ public struct ScreenCaptureKitWindowTextCapturer: ScreenWindowTextCapturing {
 }
 
 private extension ScreenWindowCandidate {
-    init(window: SCWindow) {
+    init(window: SCWindow, zOrder: Int) {
         self.init(
             windowID: window.windowID,
             processID: window.owningApplication?.processID ?? -1,
             frame: window.frame,
             isOnScreen: window.isOnScreen,
-            layer: window.windowLayer
+            layer: window.windowLayer,
+            zOrder: zOrder
         )
     }
 }
diff --git a/Packages/MacContextCapture/Tests/MacContextCaptureTests/CaretGeometry/DisplayCoordinateConverterTests.swift b/Packages/MacContextCapture/Tests/MacContextCaptureTests/CaretGeometry/DisplayCoordinateConverterTests.swift
index 61a3943..82cbf1b 100644
--- a/Packages/MacContextCapture/Tests/MacContextCaptureTests/CaretGeometry/DisplayCoordinateConverterTests.swift
+++ b/Packages/MacContextCapture/Tests/MacContextCaptureTests/CaretGeometry/DisplayCoordinateConverterTests.swift
@@ -60,6 +60,63 @@ final class DisplayCoordinateConverterTests: XCTestCase {
         XCTAssertEqual(first.minY, 860, accuracy: 0.001)
     }
 
+    func testAppKitPointToCGFlipsAroundDisplayHeight() throws {
+        // AppKit (bottom-left) y=960 should map back to CG (top-left) y=120 on a 1080-tall display.
+        let cg = try XCTUnwrap(
+            DisplayCoordinateConverter.coreGraphicsPoint(
+                fromAppKitPoint: CGPoint(x: 50, y: 960),
+                displays: [singleDisplay]
+            )
+        )
+        XCTAssertEqual(cg.x, 50, accuracy: 0.001)
+        XCTAssertEqual(cg.y, 120, accuracy: 0.001)
+    }
+
+    func testCGRectToAppKitPointRoundTrips() throws {
+        // The point conversion must invert the rect conversion: a caret's CG midpoint → AppKit → CG
+        // returns the original midpoint. This is the exact path used for window selection.
+        let cgRect = CGRect(x: 300, y: 220, width: 2, height: 24)
+        let appKit = try XCTUnwrap(
+            DisplayCoordinateConverter.appKitRect(fromCoreGraphicsRect: cgRect, displays: [singleDisplay])
+        )
+        let backToCG = try XCTUnwrap(
+            DisplayCoordinateConverter.coreGraphicsPoint(
+                fromAppKitPoint: CGPoint(x: appKit.midX, y: appKit.midY),
+                displays: [singleDisplay]
+            )
+        )
+        XCTAssertEqual(backToCG.x, cgRect.midX, accuracy: 0.001)
+        XCTAssertEqual(backToCG.y, cgRect.midY, accuracy: 0.001)
+    }
+
+    func testAppKitPointToCGOnSecondaryDisplay() throws {
+        let secondary = DisplayGeometry(
+            appKitFrame: CGRect(x: 1920, y: 180, width: 1440, height: 900),
+            visibleFrame: CGRect(x: 1920, y: 204, width: 1440, height: 876),
+            coreGraphicsBounds: CGRect(x: 1920, y: 0, width: 1440, height: 900),
+            backingScaleFactor: 2
+        )
+        // AppKit point inside the secondary display. localY = appKitFrame.maxY(1080) - 1010 = 70,
+        // so CG y = coreGraphicsBounds.minY(0) + 70 = 70.
+        let cg = try XCTUnwrap(
+            DisplayCoordinateConverter.coreGraphicsPoint(
+                fromAppKitPoint: CGPoint(x: 2500, y: 1010),
+                displays: [singleDisplay, secondary]
+            )
+        )
+        XCTAssertEqual(cg.x, 2500, accuracy: 0.001)
+        XCTAssertEqual(cg.y, 70, accuracy: 0.001)
+    }
+
+    func testAppKitPointToCGReturnsNilOutsideAllDisplays() {
+        XCTAssertNil(
+            DisplayCoordinateConverter.coreGraphicsPoint(
+                fromAppKitPoint: CGPoint(x: 9000, y: 9000),
+                displays: [singleDisplay]
+            )
+        )
+    }
+
     func testMultiDisplayPicksContainingDisplay() throws {
         let primary = singleDisplay
         // Secondary 1440x900 sitting to the right of the primary in CG space; AppKit places it
diff --git a/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/ScreenWindowSelectorTests.swift b/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/ScreenWindowSelectorTests.swift
index 86a2adb..a8ead11 100644
--- a/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/ScreenWindowSelectorTests.swift
+++ b/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/ScreenWindowSelectorTests.swift
@@ -8,9 +8,10 @@ final class ScreenWindowSelectorTests: XCTestCase {
         pid: pid_t,
         frame: CGRect,
         onScreen: Bool = true,
-        layer: Int = 0
+        layer: Int = 0,
+        zOrder: Int = .max
     ) -> ScreenWindowCandidate {
-        ScreenWindowCandidate(windowID: id, processID: pid, frame: frame, isOnScreen: onScreen, layer: layer)
+        ScreenWindowCandidate(windowID: id, processID: pid, frame: frame, isOnScreen: onScreen, layer: layer, zOrder: zOrder)
     }
 
     func testReturnsNilWhenNoWindowMatchesPID() {
@@ -48,6 +49,46 @@ final class ScreenWindowSelectorTests: XCTestCase {
         XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates), 2)
     }
 
+    func testFocusPointPicksWindowContainingCaretOverLarger() {
+        // Two windows of the same app: the caret is in the smaller one, which must win over the
+        // larger window the area-based ranking would otherwise pick.
+        let candidates = [
+            candidate(id: 1, pid: 42, frame: CGRect(x: 0, y: 0, width: 1600, height: 1000)),
+            candidate(id: 2, pid: 42, frame: CGRect(x: 1700, y: 0, width: 600, height: 400))
+        ]
+        let caret = CGPoint(x: 1750, y: 50)
+        XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates, focusPoint: caret), 2)
+    }
+
+    func testOverlappingWindowsAtCaretPickFrontmost() {
+        // A small compose window (frontmost, z=0) floats over a large background window (z=1); the
+        // caret falls inside both. The frontmost must win even though the background is larger.
+        let candidates = [
+            candidate(id: 1, pid: 42, frame: CGRect(x: 0, y: 0, width: 1600, height: 1000), zOrder: 1),
+            candidate(id: 2, pid: 42, frame: CGRect(x: 100, y: 100, width: 500, height: 400), zOrder: 0)
+        ]
+        let caret = CGPoint(x: 200, y: 200) // inside both
+        XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates, focusPoint: caret), 2)
+    }
+
+    func testFallbackPrefersFrontmostWhenNoCaret() {
+        // With no caret info, the frontmost window is a better guess than the largest.
+        let candidates = [
+            candidate(id: 1, pid: 42, frame: CGRect(x: 0, y: 0, width: 1600, height: 1000), zOrder: 1),
+            candidate(id: 2, pid: 42, frame: CGRect(x: 0, y: 0, width: 800, height: 600), zOrder: 0)
+        ]
+        XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates), 2)
+    }
+
+    func testFocusPointFallsBackToRankingWhenOutsideAllWindows() {
+        let candidates = [
+            candidate(id: 1, pid: 42, frame: CGRect(x: 0, y: 0, width: 400, height: 300)),
+            candidate(id: 2, pid: 42, frame: CGRect(x: 0, y: 0, width: 1200, height: 800))
+        ]
+        let caret = CGPoint(x: 9000, y: 9000)
+        XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates, focusPoint: caret), 2)
+    }
+
     func testCaptureScaleDownscalesLargeWindows() {
         let scale = ScreenWindowSelector.captureScale(for: CGSize(width: 3200, height: 1800), maxDimension: 1600)
         XCTAssertEqual(scale, 0.5, accuracy: 0.0001)
diff --git a/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/WindowOCRCaptureEngineTests.swift b/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/WindowOCRCaptureEngineTests.swift
index 17b463e..7a0f21c 100644
--- a/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/WindowOCRCaptureEngineTests.swift
+++ b/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/WindowOCRCaptureEngineTests.swift
@@ -5,7 +5,7 @@ import XCTest
 final class WindowOCRCaptureEngineTests: XCTestCase {
     private struct FakeCapturer: ScreenWindowTextCapturing {
         let result: String?
-        func captureWindowText(pid: pid_t, fieldText: String, maxLines: Int, maxChars: Int) async throws -> String? {
+        func captureWindowText(pid: pid_t, fieldText: String, focusPoint: CGPoint?, maxLines: Int, maxChars: Int) async throws -> String? {
             result
         }
     }
diff --git a/Packages/Personalization/Sources/Personalization/PersistentWritingHistoryStore.swift b/Packages/Personalization/Sources/Personalization/PersistentWritingHistoryStore.swift
index 183771b..9ee0fe3 100644
--- a/Packages/Personalization/Sources/Personalization/PersistentWritingHistoryStore.swift
+++ b/Packages/Personalization/Sources/Personalization/PersistentWritingHistoryStore.swift
@@ -202,6 +202,12 @@ public final class PersistentWritingHistoryStore: WritingHistoryStoring, @unchec
                     .filter(Column("charCount") >= query.minimumCharacters)
                 if let bundle = query.bundleIdentifier, query.sameAppOnly {
                     request = request.filter(Column("appBundleIdentifier") == bundle)
+                    // Web fields: keep only the focused domain's rows so a different tab in the same
+                    // browser can't fill the row budget (and the in-memory selection then drops any
+                    // that slip through). Native apps have a nil domain and are unaffected.
+                    if let domain = query.domain, !domain.isEmpty {
+                        request = request.filter(Column("domain") == domain)
+                    }
                 }
                 if let language = query.language {
                     // Keep rows whose language matches or is unknown (conservative).
@@ -228,11 +234,20 @@ public final class PersistentWritingHistoryStore: WritingHistoryStoring, @unchec
 /// and the shape of the M3 `InMemoryWritingHistoryStore`.
 enum WritingHistorySelection {
     static func select(from entries: [WritingHistorySample], query: WritingHistoryQuery) -> [String] {
-        let candidates = entries.filter { $0.text.count >= query.minimumCharacters }
+        let candidates = entries.filter {
+            $0.text.count >= query.minimumCharacters && WritingHistoryFilter.isProse($0.text)
+        }
 
         let sameApp = candidates.filter { entry in
             guard let bundle = query.bundleIdentifier else { return true }
-            return entry.appBundleIdentifier == bundle
+            guard entry.appBundleIdentifier == bundle else { return false }
+            // For web fields the bundle is the browser, so several sites share it. Require a matching
+            // domain so content from a different tab (or an unknown-domain sample) can't be treated as
+            // same-context and bleed in. Native apps have no domain, so this is inert for them.
+            if let queryDomain = query.domain, !queryDomain.isEmpty {
+                return entry.domain == queryDomain
+            }
+            return true
         }
         let crossApp = candidates.filter { entry in
             guard let bundle = query.bundleIdentifier else { return false }
@@ -244,6 +259,10 @@ enum WritingHistorySelection {
 
         func take(_ samples: [WritingHistorySample], upTo limit: Int) {
             for s in samples.prefix(limit) where seen.insert(s.text).inserted {
+                // Skip near-duplicate drafts (an earlier version of the same text the user kept
+                // editing). Injecting both wastes the budget and amplifies the model's tendency to
+                // parrot the most recent matching phrase verbatim.
+                if picked.contains(where: { isNearDuplicate(s.text, of: $0.text) }) { continue }
                 picked.append(s)
             }
         }
@@ -266,4 +285,24 @@ enum WritingHistorySelection {
         }
         return result
     }
+
+    /// Word set (lowercased letters/digits) used for cheap near-duplicate detection. `n` is tiny
+    /// (≤ fetchSize), so the O(picked·words) comparison in `take` is negligible.
+    static func wordSet(_ text: String) -> Set<String> {
+        Set(text.lowercased().split(whereSeparator: { !$0.isLetter && !$0.isNumber }).map(String.init))
+    }
+
+    /// True when two history samples are near-identical — high word-set overlap (Jaccard ≥ 0.8) or the
+    /// shorter is fully contained in the longer (an extended draft of the same text). Mirrored in
+    /// `InMemoryWritingHistoryStore`; keep the two in sync.
+    static func isNearDuplicate(_ candidate: String, of existing: String) -> Bool {
+        let a = wordSet(candidate), b = wordSet(existing)
+        guard a.count >= 3, b.count >= 3 else { return candidate == existing }
+        let intersection = a.intersection(b).count
+        let union = a.union(b).count
+        if union > 0, Double(intersection) / Double(union) >= 0.8 { return true }
+        let smaller = a.count <= b.count ? a : b
+        let larger = a.count <= b.count ? b : a
+        return smaller.isSubset(of: larger)   // shorter draft entirely contained in the longer one
+    }
 }
diff --git a/Packages/Personalization/Tests/PersonalizationTests/PersonalizationTests.swift b/Packages/Personalization/Tests/PersonalizationTests/PersonalizationTests.swift
index d787dba..8715e11 100644
--- a/Packages/Personalization/Tests/PersonalizationTests/PersonalizationTests.swift
+++ b/Packages/Personalization/Tests/PersonalizationTests/PersonalizationTests.swift
@@ -41,6 +41,25 @@ final class PersonalizationTests: XCTestCase {
         XCTAssertTrue(store.samples(for: WritingHistoryQuery(bundleIdentifier: "com.app.mail")).isEmpty)
     }
 
+    func testPersistentStoreDomainScopingExcludesOtherTabs() throws {
+        // DB-level coverage for the domain filter (the production path): two sites in the same browser
+        // bundle must not share context, and a nil-domain row must not leak into a domain-scoped query.
+        let (store, url) = try makeTempStore()
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        store.record(WritingHistorySample(text: "Draft about quarterly revenue numbers here.", appBundleIdentifier: "com.browser", domain: "mail.google.com"))
+        store.record(WritingHistorySample(text: "you can use it to access the OpenAI API key.", appBundleIdentifier: "com.browser", domain: "platform.openai.com"))
+        store.record(WritingHistorySample(text: "Some unknown-domain text from this browser.", appBundleIdentifier: "com.browser", domain: nil))
+
+        let result = store.samples(for: WritingHistoryQuery(
+            bundleIdentifier: "com.browser",
+            domain: "mail.google.com",
+            minimumCharacters: 1,
+            sameAppOnly: true
+        ))
+        XCTAssertEqual(result, ["Draft about quarterly revenue numbers here."])
+    }
+
     func testPersistentStoreDedupesIdenticalSample() throws {
         let (store, url) = try makeTempStore()
         defer { try? FileManager.default.removeItem(at: url) }
@@ -108,6 +127,87 @@ final class PersonalizationTests: XCTestCase {
         XCTAssertEqual(result, ["Newer note from this same app here."])
     }
 
+    func testSameAppOnlyExcludesCrossAppContent() {
+        // Regression: a recent sample from another app must never be injected when the query is
+        // same-app-scoped — otherwise unrelated content (e.g. a Notes draft) bleeds into another
+        // app's prompt and the model parrots it verbatim.
+        let now = Date()
+        let entries = [
+            WritingHistorySample(text: "you can use it to access the OpenAI API.", appBundleIdentifier: "com.app.notes", updatedAt: now),
+            WritingHistorySample(text: "Hi Molly, hope you are doing well today.", appBundleIdentifier: "com.app.mail", updatedAt: now.addingTimeInterval(-100))
+        ]
+        let result = WritingHistorySelection.select(from: entries, query: WritingHistoryQuery(
+            bundleIdentifier: "com.app.mail",
+            minimumCharacters: 1,
+            sameAppOnly: true
+        ))
+        XCTAssertEqual(result, ["Hi Molly, hope you are doing well today."])
+        XCTAssertFalse(result.contains { $0.contains("OpenAI") }, "cross-app content must not leak")
+    }
+
+    func testSameAppScopingExcludesOtherWebDomains() {
+        // Two tabs in the same browser (same bundle) must not share context: a sample from another
+        // site, or one with no recorded domain, must not be injected into the focused domain's prompt.
+        let now = Date()
+        let entries = [
+            WritingHistorySample(text: "Draft about quarterly revenue numbers.", appBundleIdentifier: "com.browser", domain: "mail.google.com", updatedAt: now),
+            WritingHistorySample(text: "you can use it to access the OpenAI API.", appBundleIdentifier: "com.browser", domain: "platform.openai.com", updatedAt: now),
+            WritingHistorySample(text: "Some unknown-domain text from this browser.", appBundleIdentifier: "com.browser", domain: nil, updatedAt: now)
+        ]
+        let result = WritingHistorySelection.select(from: entries, query: WritingHistoryQuery(
+            bundleIdentifier: "com.browser",
+            domain: "mail.google.com",
+            minimumCharacters: 1,
+            sameAppOnly: true
+        ))
+        XCTAssertEqual(result, ["Draft about quarterly revenue numbers."])
+    }
+
+    func testNativeAppScopingIsUnaffectedByDomain() {
+        // A native app has no domain; same-app scoping must still return its samples.
+        let now = Date()
+        let entries = [
+            WritingHistorySample(text: "A note typed in the native app here.", appBundleIdentifier: "com.app.notes", domain: nil, updatedAt: now)
+        ]
+        let result = WritingHistorySelection.select(from: entries, query: WritingHistoryQuery(
+            bundleIdentifier: "com.app.notes",
+            minimumCharacters: 1,
+            sameAppOnly: true
+        ))
+        XCTAssertEqual(result, ["A note typed in the native app here."])
+    }
+
+    func testSelectionDropsNearDuplicateDrafts() {
+        // The user kept editing one draft; an earlier version and its extension must not both be
+        // injected (that amplifies verbatim parroting and wastes the budget).
+        let now = Date()
+        let entries = [
+            WritingHistorySample(text: "i want to write about the AI meetup today", appBundleIdentifier: "com.app", updatedAt: now),
+            WritingHistorySample(text: "i want to write about the AI meetup", appBundleIdentifier: "com.app", updatedAt: now.addingTimeInterval(-10)),
+            WritingHistorySample(text: "completely unrelated note about gardening tips", appBundleIdentifier: "com.app", updatedAt: now.addingTimeInterval(-20))
+        ]
+        let result = WritingHistorySelection.select(from: entries, query: WritingHistoryQuery(
+            bundleIdentifier: "com.app",
+            minimumCharacters: 1,
+            longestCount: 0,
+            mostRecentCount: 8,
+            crossAppRecentCount: 0
+        ))
+        XCTAssertEqual(result.filter { $0.contains("AI meetup") }.count, 1, "near-duplicate drafts collapse to one")
+        XCTAssertTrue(result.contains("completely unrelated note about gardening tips"), "distinct samples are kept")
+    }
+
+    func testNearDuplicateKeepsDistinctSamples() {
+        XCTAssertFalse(WritingHistorySelection.isNearDuplicate(
+            "the quarterly revenue report is due friday",
+            of: "remember to water the office plants every morning"
+        ))
+        XCTAssertTrue(WritingHistorySelection.isNearDuplicate(
+            "thanks so much for the thoughtful feedback today",
+            of: "thanks so much for the thoughtful feedback"
+        ))
+    }
+
     // MARK: - Telemetry
 
     func testTelemetryRatesAndPercentiles() {
@@ -481,6 +581,137 @@ final class PersonalizationTests: XCTestCase {
         XCTAssertLessThanOrEqual(a.minBranchProbabilityScale, ThresholdTuner.maxProbabilityScale)
     }
 
+    // MARK: - Writing history quality filter
+
+    func testIsProse_acceptsNormalEmailText() {
+        XCTAssertTrue(WritingHistoryFilter.isProse("Hi Maya, thanks for the update on the project."))
+        XCTAssertTrue(WritingHistoryFilter.isProse("The quarterly report is due on Friday afternoon."))
+    }
+
+    func testIsProse_acceptsBioText() {
+        XCTAssertTrue(WritingHistoryFilter.isProse(
+            "AI, software, and ideas too good to ignore. Building a company brain for the industrial floor. Breaking things, learning fast."
+        ))
+    }
+
+    func testIsProse_rejectsBareURL() {
+        XCTAssertFalse(WritingHistoryFilter.isProse("https://github.com/shreeraman96"))
+        XCTAssertFalse(WritingHistoryFilter.isProse("www.example.com"))
+    }
+
+    func testIsProse_rejectsUUIDBlobEntry() {
+        // "uuid=..." style entries from captured file-open dialogs
+        XCTAssertFalse(WritingHistoryFilter.isProse("uuid=EF757712-3FDF-48F4-B026-DB0AEF04AC2B.jpeg"))
+    }
+
+    func testIsProse_rejectsFilesystemPath() {
+        XCTAssertFalse(WritingHistoryFilter.isProse("/Users/shreeram/Downloads/report.pdf"))
+        XCTAssertFalse(WritingHistoryFilter.isProse("/Library/Application Support/KeyType/Models/gemma.bin"))
+    }
+
+    func testIsProse_rejectsEmptyString() {
+        XCTAssertFalse(WritingHistoryFilter.isProse(""))
+        XCTAssertFalse(WritingHistoryFilter.isProse("   "))
+    }
+
+    func testFilterByRelevance_dropsZeroOverlapSampleBeyondRecencyFloor() {
+        let bio = "Building a company brain for the industrial floor. Breaking things, learning fast."
+
+        // With recencyFloor=0, all samples are subject to the Jaccard gate; bio is dropped.
+        let resultFloorZero = WritingHistoryFilter.filterByRelevance(
+            [bio], beforeCursor: "Hi Molly, This", recencyFloor: 0
+        )
+        XCTAssertTrue(resultFloorZero.isEmpty, "unrelated bio must be dropped when no recency floor")
+
+        // With recencyFloor=1 and two samples, the first is kept as style anchor;
+        // the second (bio) is beyond the floor and dropped.
+        let recent = "Hi Molly, I hope this finds you well."
+        let resultWithFloor = WritingHistoryFilter.filterByRelevance(
+            [recent, bio], beforeCursor: "Hi Molly, This", recencyFloor: 1
+        )
+        XCTAssertTrue(resultWithFloor.contains(recent), "most-recent sample kept as style anchor")
+        XCTAssertFalse(resultWithFloor.contains(bio), "unrelated bio beyond floor must be dropped")
+
+        // Both samples within the floor (default recencyFloor=2) → both kept unconditionally.
+        let resultBothInFloor = WritingHistoryFilter.filterByRelevance(
+            [recent, bio], beforeCursor: "Hi Molly, This"
+        )
+        XCTAssertEqual(resultBothInFloor.count, 2, "all samples within recency floor are always kept")
+    }
+
+    func testFilterByRelevance_keepsRelatedSample() {
+        let techNote = "We are building industrial floor automation systems."
+        // "industrial" and "floor" overlap with the cursor text
+        let result = WritingHistoryFilter.filterByRelevance(
+            [techNote],
+            beforeCursor: "Here is an update on the industrial floor project."
+        )
+        XCTAssertEqual(result, [techNote], "topically related sample must be kept")
+    }
+
+    func testFilterByRelevance_skipsFilterWhenCursorHasTooFewContentWords() {
+        let bio = "Building a company brain for the industrial floor."
+        // "Hi," has only 1 non-stopword → minimumContentWords not reached → all samples kept
+        let result = WritingHistoryFilter.filterByRelevance([bio], beforeCursor: "Hi,")
+        XCTAssertEqual(result, [bio], "filter must be skipped when cursor lacks content words")
+    }
+
+    func testFilterByRelevance_keepsSignOffWhenCursorContainsSignOffWord() {
+        let signOff = "Kind regards, Sam"
+        // "kind" appears in both sample and cursor → kept
+        let result = WritingHistoryFilter.filterByRelevance(
+            [signOff],
+            beforeCursor: "Thanks for the update. Kind"
+        )
+        XCTAssertEqual(result, [signOff], "sign-off kept when leading word appears in cursor")
+    }
+
+    func testFilterByRelevance_multipleInputsSomeMeetThreshold() {
+        let bio = "Building a company brain for the industrial floor."
+        let reply = "Thanks for reaching out about the project timeline."
+        let recent = "Hi Molly, looking forward to your reply."
+        // recencyFloor=1 → recent is the style anchor (always kept).
+        // bio has {building, company, brain, industrial, floor} → 0 overlap with "Hi Molly, Thanks" → dropped
+        // reply has {thanks, reaching, project, timeline} → "thanks" matches → kept
+        let result = WritingHistoryFilter.filterByRelevance(
+            [recent, bio, reply],
+            beforeCursor: "Hi Molly, Thanks",
+            recencyFloor: 1
+        )
+        XCTAssertTrue(result.contains(recent), "recency-floor anchor always kept")
+        XCTAssertFalse(result.contains(bio), "bio with zero overlap must be dropped beyond floor")
+        XCTAssertTrue(result.contains(reply), "reply sharing 'thanks' must be kept")
+    }
+
+    func testPersistentStoreFiltersJunkAtSelectionTime() throws {
+        // Junk stored via the raw store.record() path (bypassing the recorder's isProse guard,
+        // which runs only in WritingHistoryRecorder in the main app target). The selection-time
+        // filter must catch it so existing on-disk junk is cleaned up without a migration.
+        let (store, url) = try makeTempStore()
+        defer { try? FileManager.default.removeItem(at: url) }
+
+        // store.record() has no isProse check — it accepts anything, simulating pre-existing junk.
+        store.record(WritingHistorySample(
+            text: "https://github.com/shreeraman96",
+            appBundleIdentifier: "com.browser",
+            domain: "github.com"
+        ))
+        store.record(WritingHistorySample(
+            text: "The quarterly report is due on Friday afternoon.",
+            appBundleIdentifier: "com.browser",
+            domain: "github.com"
+        ))
+
+        let result = store.samples(for: WritingHistoryQuery(
+            bundleIdentifier: "com.browser",
+            domain: "github.com",
+            minimumCharacters: 1,
+            sameAppOnly: true
+        ))
+        XCTAssertFalse(result.contains("https://github.com/shreeraman96"), "URL junk must be filtered at selection time")
+        XCTAssertTrue(result.contains("The quarterly report is due on Friday afternoon."))
+    }
+
     // MARK: - Keychain
 
     func testKeychainPassphraseRoundTripIfAvailable() throws {
diff --git a/Packages/Prompting/Sources/Prompting/WritingHistory.swift b/Packages/Prompting/Sources/Prompting/WritingHistory.swift
index d67fc3d..dc03f43 100644
--- a/Packages/Prompting/Sources/Prompting/WritingHistory.swift
+++ b/Packages/Prompting/Sources/Prompting/WritingHistory.swift
@@ -59,7 +59,9 @@ public struct WritingHistoryQuery: Equatable {
         longestCount: Int = 2,
         mostRecentCount: Int = 4,
         crossAppRecentCount: Int = 2,
-        tokenBudget: Int = 256,
+        // History is background style/context, not text to reproduce. A large budget let it dominate
+        // the prompt ~20:1 over the user's typed text and the small model parroted it; keep it modest.
+        tokenBudget: Int = 128,
         sameAppOnly: Bool = false
     ) {
         self.bundleIdentifier = bundleIdentifier
@@ -95,6 +97,7 @@ public struct InMemoryWritingHistoryStore: WritingHistoryProviding {
     public func samples(for query: WritingHistoryQuery) -> [String] {
         let candidates = entries.filter { entry in
             guard entry.text.count >= query.minimumCharacters else { return false }
+            guard WritingHistoryFilter.isProse(entry.text) else { return false }
             if query.sameAppOnly, let bundle = query.bundleIdentifier,
                entry.appBundleIdentifier != bundle {
                 return false
@@ -108,7 +111,14 @@ public struct InMemoryWritingHistoryStore: WritingHistoryProviding {
 
         let sameApp = candidates.filter { entry in
             guard let bundle = query.bundleIdentifier else { return true }
-            return entry.appBundleIdentifier == bundle
+            guard entry.appBundleIdentifier == bundle else { return false }
+            // For web fields the bundle is the browser, shared across sites; require a matching domain
+            // so a different tab's content can't be treated as same-context. Native apps have no
+            // domain, so this is inert for them. Mirrors `WritingHistorySelection` in Personalization.
+            if let queryDomain = query.domain, !queryDomain.isEmpty {
+                return entry.domain == queryDomain
+            }
+            return true
         }
         let crossApp = candidates.filter { entry in
             guard let bundle = query.bundleIdentifier else { return false }
@@ -120,6 +130,8 @@ public struct InMemoryWritingHistoryStore: WritingHistoryProviding {
 
         func take(_ samples: [WritingHistorySample], upTo limit: Int) {
             for s in samples.prefix(limit) where seen.insert(s.text).inserted {
+                // Skip near-duplicate drafts; mirrors `WritingHistorySelection` in Personalization.
+                if picked.contains(where: { Self.isNearDuplicate(s.text, of: $0.text) }) { continue }
                 picked.append(s)
             }
         }
@@ -137,4 +149,22 @@ public struct InMemoryWritingHistoryStore: WritingHistoryProviding {
 
         return Array(picked.prefix(query.fetchSize)).map { $0.text }
     }
+
+    /// Word set (lowercased letters/digits) for cheap near-duplicate detection.
+    static func wordSet(_ text: String) -> Set<String> {
+        Set(text.lowercased().split(whereSeparator: { !$0.isLetter && !$0.isNumber }).map(String.init))
+    }
+
+    /// True when two samples are near-identical (Jaccard ≥ 0.8, or the shorter is fully contained in
+    /// the longer). Mirror of `WritingHistorySelection.isNearDuplicate`; keep the two in sync.
+    static func isNearDuplicate(_ candidate: String, of existing: String) -> Bool {
+        let a = wordSet(candidate), b = wordSet(existing)
+        guard a.count >= 3, b.count >= 3 else { return candidate == existing }
+        let intersection = a.intersection(b).count
+        let union = a.union(b).count
+        if union > 0, Double(intersection) / Double(union) >= 0.8 { return true }
+        let smaller = a.count <= b.count ? a : b
+        let larger = a.count <= b.count ? b : a
+        return smaller.isSubset(of: larger)
+    }
 }
diff --git a/Packages/Prompting/Sources/Prompting/WritingHistoryFilter.swift b/Packages/Prompting/Sources/Prompting/WritingHistoryFilter.swift
new file mode 100644
index 0000000..62b5954
--- /dev/null
+++ b/Packages/Prompting/Sources/Prompting/WritingHistoryFilter.swift
@@ -0,0 +1,124 @@
+import Foundation
+
+/// Quality gates applied to writing-history samples before they reach the prompt.
+///
+/// **Junk filter** (`isProse`): history samples can contain non-prose entries — URLs captured
+/// from browser address bars, UUID-bearing file references, or hex blobs. These waste prompt
+/// token budget without aiding style personalization and can mislead the model.
+///
+/// **Relevance filter** (`filterByRelevance`): a topically-unrelated history sample (e.g. a
+/// user's professional bio stored from a previous Gmail session) can cause the model to
+/// paraphrase it into an unrelated draft. Applied at **generation time** with the live
+/// `beforeCursor` — not inside the 2-second frozen side-context cache — so the judgment always
+/// reflects what the user is currently typing.
+///
+/// Trade-off: `filterByRelevance` will occasionally drop stock closing phrases ("Kind regards")
+/// when the email body has zero topical overlap with the sign-off. This is accepted because
+/// (a) the threshold is conservative (Jaccard ≥ 0.10), (b) once the user has typed the opening
+/// word of the sign-off ("Kind") the phrase is kept, and (c) preventing biography bleed into
+/// unrelated emails is a higher-priority correctness concern.
+public enum WritingHistoryFilter {
+
+    // MARK: - Junk filter
+
+    /// Returns `false` for clearly non-prose entries: bare URLs, UUID blobs, filesystem paths,
+    /// or text where fewer than 65 % of characters are letters or spaces.
+    public static func isProse(_ text: String) -> Bool {
+        let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
+        guard !trimmed.isEmpty else { return false }
+
+        // Bare URL (entire text is a single URL, no surrounding prose)
+        if !trimmed.contains(" ") {
+            if trimmed.range(of: #"^\S+://\S+"#, options: .regularExpression) != nil {
+                return false
+            }
+            if trimmed.hasPrefix("www.") { return false }
+        }
+
+        // Filesystem path (starts with "/" and has ≥ 3 slashes)
+        if trimmed.hasPrefix("/") && trimmed.filter({ $0 == "/" }).count >= 3 {
+            return false
+        }
+
+        // Low letter+space ratio catches UUID blobs, hex strings, mostly-numeric entries.
+        // Example: "uuid=EF757712-3FDF-48F4-B026-DB0AEF04AC2B.jpeg" → ~38 % → rejected.
+        let total = trimmed.unicodeScalars.count
+        let lettersAndSpaces = trimmed.unicodeScalars.filter {
+            CharacterSet.letters.contains($0) || $0 == " "
+        }.count
+        guard Double(lettersAndSpaces) / Double(total) >= 0.65 else { return false }
+
+        return true
+    }
+
+    // MARK: - Relevance filter
+
+    /// Common English function words excluded when measuring topical overlap. These are
+    /// ubiquitous across writing contexts and carry no topic signal.
+    public static let commonEnglishStopwords: Set<String> = [
+        "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with",
+        "by", "from", "as", "is", "are", "was", "were", "be", "been", "being", "have", "has",
+        "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "can",
+        "it", "its", "this", "that", "these", "those", "i", "you", "he", "she", "we", "they",
+        "my", "your", "his", "her", "our", "their", "which", "who", "what", "when", "where",
+        "how", "all", "not", "more", "some", "about", "up", "out", "if", "no", "so", "than",
+        "very", "just", "also", "there", "here", "then", "too", "into", "through",
+        "even", "new", "get", "go", "first", "because", "over", "see", "know",
+        "me", "him", "us", "them", "am"
+    ]
+
+    /// Filters `samples` to those with non-trivial topical overlap with `beforeCursor`.
+    ///
+    /// The first `recencyFloor` samples are always kept regardless of relevance — they act as
+    /// style anchors: the user's most-recent writing establishes their current tone and recurring
+    /// phrases even when the topic differs. Only samples beyond that floor are subject to the
+    /// Jaccard gate.
+    ///
+    /// Returns all samples unchanged when `beforeCursor` has fewer than `minimumContentWords`
+    /// non-stopword words (short openers lack enough signal to judge topic relevance).
+    ///
+    /// A sample is kept if the stopword-filtered, digit-filtered Jaccard similarity between its
+    /// word set and `beforeCursor`'s word set is ≥ `jaccardThreshold`.
+    public static func filterByRelevance(
+        _ samples: [String],
+        beforeCursor: String,
+        jaccardThreshold: Double = 0.10,
+        minimumContentWords: Int = 2,
+        recencyFloor: Int = 2
+    ) -> [String] {
+        guard !samples.isEmpty else { return samples }
+        let floor = min(recencyFloor, samples.count)
+        let anchors = Array(samples.prefix(floor))
+        let candidates = Array(samples.dropFirst(floor))
+        guard !candidates.isEmpty else { return anchors }
+
+        let cursorWords = contentWordSet(beforeCursor)
+        guard cursorWords.count >= minimumContentWords else { return samples }
+
+        let filtered = candidates.filter { sample in
+            let sampleWords = contentWordSet(sample)
+            guard !sampleWords.isEmpty else { return false }
+            let intersection = cursorWords.intersection(sampleWords).count
+            let union = cursorWords.union(sampleWords).count
+            guard union > 0 else { return false }
+            return Double(intersection) / Double(union) >= jaccardThreshold
+        }
+        return anchors + filtered
+    }
+
+    /// Stopword-filtered, digit-filtered lowercase content words (≥ 2 characters) in `text`.
+    /// Pure-digit tokens ("25", "2024") are excluded — they are weak topic signals and cause
+    /// false matches between topically unrelated samples that share a bare number.
+    static func contentWordSet(_ text: String) -> Set<String> {
+        Set(
+            text.lowercased()
+                .split(whereSeparator: { !$0.isLetter && !$0.isNumber })
+                .map(String.init)
+                .filter {
+                    $0.count >= 2
+                        && !commonEnglishStopwords.contains($0)
+                        && !$0.allSatisfy({ $0.isNumber })
+                }
+        )
+    }
+}
diff --git a/Packages/TokenProfiles/Sources/TokenProfiles/Classification/BiasPolicy.swift b/Packages/TokenProfiles/Sources/TokenProfiles/Classification/BiasPolicy.swift
index 0c4f5ff..3ae6fcd 100644
--- a/Packages/TokenProfiles/Sources/TokenProfiles/Classification/BiasPolicy.swift
+++ b/Packages/TokenProfiles/Sources/TokenProfiles/Classification/BiasPolicy.swift
@@ -38,6 +38,16 @@ public enum BiasPolicy {
     /// Re-enables emoji tokens in emoji mode (cancels `emojiStaticPenalty`).
     public static let emojiEmojiModeDelta: Float = 3.0
 
+    /// Whole-tag markup tokens (Gemma's `<b>`/`</code>` block) leak into prose when the context is
+    /// thin: observed shown `</code>` at logprob −0.35 with legitimate runners-up at −1.7…−3.8, so
+    /// the emoji-sized −3 would not cover the gap. −6 pushes a tag below any plausible prose token
+    /// while staying finite (the output-stage `MarkupTagGuard` is context-aware; this is not).
+    public static let markupTagStaticPenalty: Float = -6.0
+    /// Re-enables markup-tag tokens where markup is working material (cancels the static penalty):
+    /// HTML/Markdown in editors (code mode) and editors running inside a terminal.
+    public static let markupTagCodeModeDelta: Float = 6.0
+    public static let markupTagTerminalModeDelta: Float = 6.0
+
     public static let newlineProseDelta: Float = -2.0
 
     // MARK: - Static bias
@@ -62,6 +72,9 @@ public enum BiasPolicy {
         if flags.contains(.emoji) {
             bias += emojiStaticPenalty
         }
+        if flags.contains(.markupTag) {
+            bias += markupTagStaticPenalty
+        }
         if isRepeatedWhitespace(flags: flags, bytes: bytes) {
             bias += repeatedWhitespaceStaticPenalty
         }
@@ -104,11 +117,15 @@ public enum BiasPolicy {
             if flags.contains(.sentenceEnd) { delta += sentenceEndProseBonus }
             return delta
         case .code:
-            if isRepeatedWhitespace(flags: flags, bytes: bytes) { return repeatedWhitespaceCodeBonus }
-            return 0
+            var delta: Float = 0
+            if isRepeatedWhitespace(flags: flags, bytes: bytes) { delta += repeatedWhitespaceCodeBonus }
+            if flags.contains(.markupTag) { delta += markupTagCodeModeDelta }
+            return delta
         case .terminal:
-            if isRepeatedWhitespace(flags: flags, bytes: bytes) { return repeatedWhitespaceTerminalBonus }
-            return 0
+            var delta: Float = 0
+            if isRepeatedWhitespace(flags: flags, bytes: bytes) { delta += repeatedWhitespaceTerminalBonus }
+            if flags.contains(.markupTag) { delta += markupTagTerminalModeDelta }
+            return delta
         case .emoji:
             if flags.contains(.emoji) { return emojiEmojiModeDelta }
             return 0
diff --git a/Packages/TokenProfiles/Sources/TokenProfiles/Classification/TokenClassifier.swift b/Packages/TokenProfiles/Sources/TokenProfiles/Classification/TokenClassifier.swift
index b20eacd..0a731d3 100644
--- a/Packages/TokenProfiles/Sources/TokenProfiles/Classification/TokenClassifier.swift
+++ b/Packages/TokenProfiles/Sources/TokenProfiles/Classification/TokenClassifier.swift
@@ -49,7 +49,15 @@ public enum TokenClassifier {
 
         var flags = TokenProfileFlags()
 
-        // SPECIAL: control / user-defined / unknown / unused / known role / chat marker.
+        // Reserved/placeholder tokens (e.g. Gemma's `<unused0>`…`<unusedN>`) are never valid output,
+        // but some GGUF conversions fail to set the `.unused` attribute on them (they arrive as
+        // NORMAL/USER_DEFINED), so the attribute checks below miss them and they leak into suggestions
+        // as literal "<unused56>" text. Detect them by rendered byte content as a backstop. Check both
+        // the raw text and the BPE-marker-stripped form so a "▁<unused56>"/"Ġ<unused56>" variant can't
+        // slip the anchored match. See ADR.
+        let isReservedPlaceholder = matchesReservedPlaceholder(rawText) || matchesReservedPlaceholder(visibleText)
+
+        // SPECIAL: control / user-defined / unknown / unused / known role / chat marker / reserved.
         let isSpecial =
             probe.attr.contains(.control)
             || probe.attr.contains(.userDefined)
@@ -58,6 +66,7 @@ public enum TokenClassifier {
             || probe.isControl
             || probe.role != nil
             || matchesChatMarker(rawText)
+            || isReservedPlaceholder
         if isSpecial { flags.insert(.special) }
 
         // STOP: EOS / EOT / any EOG-declared token.
@@ -70,6 +79,14 @@ public enum TokenClassifier {
         // CHAT_MARKER: assistant scaffolding text we never want to emit.
         if matchesChatMarker(rawText) { flags.insert(.chatMarker) }
 
+        // MARKUP_TAG: a whole markup tag baked in as one vocab token (Gemma's `<b>`/`</code>`/…
+        // block at ids 168–237 arrives as NORMAL, like the `<unused56>` case above). Flagged —
+        // not excluded — so `BiasPolicy` can demote it in prose while code/terminal keep the
+        // canonical single-token path for genuine HTML/Markdown editing.
+        if !isSpecial, matchesMarkupTag(rawText) || matchesMarkupTag(visibleText) {
+            flags.insert(.markupTag)
+        }
+
         // INVALID_UTF8 (standalone byte fallback or partial multi-byte token).
         if rawText == nil { flags.insert(.invalidUTF8) }
 
@@ -215,6 +232,47 @@ public enum TokenClassifier {
         return false
     }
 
+    /// Reserved / never-emitted placeholder tokens identified by their *rendered text* rather than a
+    /// tokenizer attribute, because some GGUF conversions don't flag them (notably Gemma's
+    /// `<unused0>`…`<unusedN>` block, which comes through as NORMAL). Kept deliberately narrow —
+    /// only the unambiguous model-internal placeholder forms, so genuine `<tag>` text the user might
+    /// type is unaffected.
+    private static let reservedPlaceholderRegexes: [NSRegularExpression] = {
+        let patterns = [
+            #"^<unused\d+>$"#,            // Gemma reserved slots
+            #"^<reserved[_ ]?\d+>$"#,     // other vendors' reserved blocks
+            #"^<extra_id_\d+>$"#,         // T5-style sentinel tokens
+            #"^<pad>$"#, #"^<mask>$"#     // padding / masking placeholders
+        ]
+        return patterns.compactMap { try? NSRegularExpression(pattern: $0, options: [.caseInsensitive]) }
+    }()
+
+    /// A token whose entire rendered text (after optional leading whitespace) is one markup tag:
+    /// `<b>`, `</code>`, `<br/>`, … Anchored so partial-bracket text (`<3`, `a<b`) and
+    /// attribute-bearing tags never match; reserved placeholders (`<unused56>`) are special-cased
+    /// out by the caller before this runs.
+    private static let markupTagRegex = try? NSRegularExpression(
+        pattern: #"^\s*</?[a-zA-Z][a-zA-Z0-9]*( ?/)?>$"#,
+        options: []
+    )
+
+    static func matchesMarkupTag(_ text: String?) -> Bool {
+        guard let text = text, !text.isEmpty, let regex = markupTagRegex else { return false }
+        let range = NSRange(text.startIndex..<text.endIndex, in: text)
+        return regex.firstMatch(in: text, options: [], range: range) != nil
+    }
+
+    static func matchesReservedPlaceholder(_ text: String?) -> Bool {
+        guard let text = text, !text.isEmpty else { return false }
+        let range = NSRange(text.startIndex..<text.endIndex, in: text)
+        for regex in reservedPlaceholderRegexes {
+            if regex.firstMatch(in: text, options: [], range: range) != nil {
+                return true
+            }
+        }
+        return false
+    }
+
     private static func isPunctuation(_ scalar: Unicode.Scalar) -> Bool {
         // General punctuation categories Pc/Pd/Pe/Pf/Pi/Po/Ps.
         scalar.properties.generalCategory.isPunctuation
diff --git a/Packages/TokenProfiles/Sources/TokenProfiles/Format/ACPFFormat.swift b/Packages/TokenProfiles/Sources/TokenProfiles/Format/ACPFFormat.swift
index 4b96d99..6d5d993 100644
--- a/Packages/TokenProfiles/Sources/TokenProfiles/Format/ACPFFormat.swift
+++ b/Packages/TokenProfiles/Sources/TokenProfiles/Format/ACPFFormat.swift
@@ -14,7 +14,9 @@ public enum ACPF {
     /// file was produced for a different endianness and must be rejected.
     public static let endianSentinel: UInt16 = 0x0102
 
-    /// First (and currently only) schema version.
+    /// On-disk *binary format* version. Bump only when the byte layout of the header, sections, or
+    /// records changes — never for changes to what gets baked into those bytes. Cache-busting for
+    /// *classifier output* changes is `generatorVersion`'s job, not this field's (see below).
     public static let currentSchemaVersion: UInt16 = 1
 
     /// One section descriptor per `SectionKind`, in the header's section array.
@@ -54,8 +56,17 @@ public enum ACPF {
     /// bytes. Anything in `0...255` is a real first byte.
     public static let emptyFirstByte: UInt16 = 256
 
-    /// Identifier stamped into the validation section's `generator_version` slot.
-    public static let generatorVersion: String = "keytype-acpf-1.0"
+    /// Identifier stamped into the validation section's `generator_version` slot, and the cache-busting
+    /// key for the *classifier* output. The tokenizer digest covers only vocab bytes, so a change to
+    /// `TokenClassifier` (which decides the `.excluded`/`.special` flags baked into every record and the
+    /// trie) does NOT change the digest and would otherwise leave stale profiles in place.
+    /// `MmapAutocompleteProfile.init` rejects a profile whose stamped `generator_version` differs from
+    /// this, forcing `ProfileGenerator` to rebuild. **Bump this whenever `TokenClassifier` output
+    /// changes**, independent of the binary `currentSchemaVersion`.
+    ///   1.0 → 1.1: reserved-placeholder exclusion by byte content (Gemma `<unusedN>` leak fix).
+    ///   1.1 → 1.2: markup-tag flag + prose demotion for Gemma's single-token HTML-tag block
+    ///              (`</code>` shown in prose contexts; cancelled in code/terminal modes).
+    public static let generatorVersion: String = "keytype-acpf-1.2"
 }
 
 /// Ordinals into the header's section array. **Stable across schema versions** — once an
@@ -373,6 +384,7 @@ public enum ACPFOpenError: Error, Equatable, CustomStringConvertible {
     case modelFamilyMismatch(expected: String, found: String)
     case vocabSizeMismatch(expected: Int, found: Int)
     case tokenizerDigestMismatch(expected: ACPFTokenizerDigestValue, found: ACPFTokenizerDigestValue)
+    case generatorVersionMismatch(expected: String, found: String)
     case malformedSectionPayload(kind: SectionKind, message: String)
 
     public var description: String {
@@ -399,6 +411,8 @@ public enum ACPFOpenError: Error, Equatable, CustomStringConvertible {
             return "ACPF: vocab_size \(found) != expected \(expected)"
         case let .tokenizerDigestMismatch(expected, found):
             return "ACPF: tokenizer digest \(found.hexPrefix) != expected \(expected.hexPrefix)"
+        case let .generatorVersionMismatch(expected, found):
+            return "ACPF: generator_version '\(found)' != expected '\(expected)' (rebuild required)"
         case let .malformedSectionPayload(kind, message):
             return "ACPF: section \(kind) payload is malformed: \(message)"
         }
diff --git a/Packages/TokenProfiles/Sources/TokenProfiles/Storage/MmapAutocompleteProfile.swift b/Packages/TokenProfiles/Sources/TokenProfiles/Storage/MmapAutocompleteProfile.swift
index f8cc4ad..72ca8c9 100644
--- a/Packages/TokenProfiles/Sources/TokenProfiles/Storage/MmapAutocompleteProfile.swift
+++ b/Packages/TokenProfiles/Sources/TokenProfiles/Storage/MmapAutocompleteProfile.swift
@@ -81,14 +81,16 @@ public final class MmapAutocompleteProfile: AutocompleteProfile {
         at url: URL,
         expectedVocabSize: Int? = nil,
         expectedModelFamily: String? = nil,
-        expectedTokenizerDigest: ACPFTokenizerDigestValue? = nil
+        expectedTokenizerDigest: ACPFTokenizerDigestValue? = nil,
+        expectedGeneratorVersion: String? = ACPF.generatorVersion
     ) throws -> MmapAutocompleteProfile {
         let data = try Data(contentsOf: url, options: [.alwaysMapped, .uncached])
         return try MmapAutocompleteProfile(
             data: data,
             expectedVocabSize: expectedVocabSize,
             expectedModelFamily: expectedModelFamily,
-            expectedTokenizerDigest: expectedTokenizerDigest
+            expectedTokenizerDigest: expectedTokenizerDigest,
+            expectedGeneratorVersion: expectedGeneratorVersion
         )
     }
 
@@ -99,14 +101,16 @@ public final class MmapAutocompleteProfile: AutocompleteProfile {
         at url: URL,
         tokenizerVocabSize: Int,
         tokenizerBytes: (TokenID) throws -> [UInt8],
-        expectedModelFamily: String? = nil
+        expectedModelFamily: String? = nil,
+        expectedGeneratorVersion: String? = ACPF.generatorVersion
     ) throws -> MmapAutocompleteProfile {
         let digest = try ACPFTokenizerDigest.digest(vocabSize: tokenizerVocabSize, bytesFor: tokenizerBytes)
         return try open(
             at: url,
             expectedVocabSize: tokenizerVocabSize,
             expectedModelFamily: expectedModelFamily,
-            expectedTokenizerDigest: digest
+            expectedTokenizerDigest: digest,
+            expectedGeneratorVersion: expectedGeneratorVersion
         )
     }
 
@@ -116,7 +120,8 @@ public final class MmapAutocompleteProfile: AutocompleteProfile {
         data: Data,
         expectedVocabSize: Int? = nil,
         expectedModelFamily: String? = nil,
-        expectedTokenizerDigest: ACPFTokenizerDigestValue? = nil
+        expectedTokenizerDigest: ACPFTokenizerDigestValue? = nil,
+        expectedGeneratorVersion: String? = ACPF.generatorVersion
     ) throws {
         // 1. Header sanity.
         let header = try data.withUnsafeBytes { try ACPFHeaderRaw(reading: $0) }
@@ -172,6 +177,20 @@ public final class MmapAutocompleteProfile: AutocompleteProfile {
             throw ACPFOpenError.tokenizerDigestMismatch(expected: expectedDigest, found: digest)
         }
 
+        // 5b. Generator-version cache-buster. The tokenizer digest covers only vocab bytes, so a
+        //     `TokenClassifier` logic change (which alters the baked `.excluded`/`.special` flags and
+        //     trie) leaves the digest untouched. The VALIDATION section's `generator_version` string
+        //     captures that logic version; reject a profile stamped with anything other than the
+        //     build's expected value so `ProfileGenerator` rebuilds. An empty/missing stamp (older
+        //     profiles, or a section without validation strings) skips the check for back-compat —
+        //     only a present, non-empty, non-matching value is a hard mismatch.
+        if let expectedGenerator = expectedGeneratorVersion, let validation = sections[.validation] {
+            let stamped = MmapAutocompleteProfile.readValidationStrings(data: data, section: validation).generatorVersion
+            if !stamped.isEmpty, stamped != expectedGenerator {
+                throw ACPFOpenError.generatorVersionMismatch(expected: expectedGenerator, found: stamped)
+            }
+        }
+
         // 6. Parse trie preamble (nodeCount, edgeCount) and compute payload offsets.
         let trieSection = sections[.prefixTrie]!
         let trieOffset = Int(trieSection.offset)
@@ -407,7 +426,16 @@ public final class MmapAutocompleteProfile: AutocompleteProfile {
 
     /// Returns the validation section's `(ggufMetadataDigest, generatorVersion, builderHost)` triple.
     public func validationStrings() -> (ggufMetadataDigest: String, generatorVersion: String, builderHost: String) {
-        let section = sections[.validation]!
+        Self.readValidationStrings(data: data, section: sections[.validation]!)
+    }
+
+    /// Parses the three length-prefixed strings in the VALIDATION section payload. Shared by
+    /// `validationStrings()` and the init-time `generator_version` check so both decode identically.
+    /// A truncated/empty section yields empty strings (never a crash).
+    static func readValidationStrings(
+        data: Data,
+        section: ACPFSectionRaw
+    ) -> (ggufMetadataDigest: String, generatorVersion: String, builderHost: String) {
         var cursor = Int(section.offset)
         let end = cursor + Int(section.length)
 
diff --git a/Packages/TokenProfiles/Sources/TokenProfiles/TokenProfiles.swift b/Packages/TokenProfiles/Sources/TokenProfiles/TokenProfiles.swift
index 48347dd..7497a90 100644
--- a/Packages/TokenProfiles/Sources/TokenProfiles/TokenProfiles.swift
+++ b/Packages/TokenProfiles/Sources/TokenProfiles/TokenProfiles.swift
@@ -20,6 +20,10 @@ public struct TokenProfileFlags: OptionSet, Equatable {
     public static let emoji = TokenProfileFlags(rawValue: 1 << 9)
     public static let chatMarker = TokenProfileFlags(rawValue: 1 << 10)
     public static let invalidUTF8 = TokenProfileFlags(rawValue: 1 << 11)
+    /// A whole markup tag as a single vocab token (Gemma bakes `<b>`, `</code>`, `<table>`, … into
+    /// its vocabulary as dedicated tokens the GGUF reports as NORMAL). Not excluded — markup is
+    /// legitimate output in code/terminal modes — but down-biased in prose (see `BiasPolicy`).
+    public static let markupTag = TokenProfileFlags(rawValue: 1 << 12)
 }
 
 public struct TokenProfileRecord: Equatable {
diff --git a/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/BiasPolicyMarkupTagTests.swift b/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/BiasPolicyMarkupTagTests.swift
new file mode 100644
index 0000000..9da3a80
--- /dev/null
+++ b/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/BiasPolicyMarkupTagTests.swift
@@ -0,0 +1,46 @@
+import XCTest
+@testable import TokenProfiles
+
+/// Mode-aware bias for `.markupTag` tokens: penalised in prose (where `</code>` leaked into
+/// suggestions), fully re-enabled in code/terminal where markup is working material. Mirrors the
+/// emoji penalty/cancel pattern.
+final class BiasPolicyMarkupTagTests: XCTestCase {
+
+    private let tagBytes = Array("</code>".utf8)
+
+    func testStaticBiasCarriesMarkupTagPenalty() {
+        let bias = BiasPolicy.staticBias(flags: [.markupTag], displayWidth: 7, bytes: tagBytes)
+        XCTAssertEqual(bias, BiasPolicy.markupTagStaticPenalty)
+    }
+
+    func testCodeModeDeltaCancelsThePenalty() {
+        let bias = BiasPolicy.staticBias(flags: [.markupTag], displayWidth: 7, bytes: tagBytes)
+        let delta = BiasPolicy.delta(flags: [.markupTag], mode: .code, bytes: tagBytes)
+        XCTAssertEqual(bias + delta, 0, "markup tags must be fully re-enabled in code mode")
+    }
+
+    func testTerminalModeDeltaCancelsThePenalty() {
+        let bias = BiasPolicy.staticBias(flags: [.markupTag], displayWidth: 7, bytes: tagBytes)
+        let delta = BiasPolicy.delta(flags: [.markupTag], mode: .terminal, bytes: tagBytes)
+        XCTAssertEqual(bias + delta, 0, "markup tags must be fully re-enabled in terminal mode")
+    }
+
+    func testProseModeKeepsThePenalty() {
+        XCTAssertEqual(BiasPolicy.delta(flags: [.markupTag], mode: .prose, bytes: tagBytes), 0)
+    }
+
+    func testCorrectionModeKeepsThePenalty() {
+        XCTAssertEqual(BiasPolicy.delta(flags: [.markupTag], mode: .correction, bytes: tagBytes), 0)
+    }
+
+    func testPenaltyOutweighsObservedLeakMargin() {
+        // The leaked `</code>` was shown at logprob −0.35 with legitimate runners-up at −1.7…−3.8;
+        // the penalty must exceed that gap or the leak persists in flat distributions.
+        XCTAssertLessThanOrEqual(BiasPolicy.markupTagStaticPenalty, -4.0)
+    }
+
+    func testExcludedTokenStillInfinitelyNegativeRegardlessOfMarkupFlag() {
+        let bias = BiasPolicy.staticBias(flags: [.excluded, .special], displayWidth: 10, bytes: Array("<unused56>".utf8))
+        XCTAssertEqual(bias, -Float.infinity)
+    }
+}
diff --git a/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/ClassifierFlagTests.swift b/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/ClassifierFlagTests.swift
index fe9d444..f41b334 100644
--- a/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/ClassifierFlagTests.swift
+++ b/Packages/TokenProfiles/Tests/TokenProfilesTests/Classification/ClassifierFlagTests.swift
@@ -203,6 +203,59 @@ final class ClassifierFlagTests: XCTestCase {
         XCTAssertTrue(cls.flags.contains(.excluded))
     }
 
+    // MARK: - Reserved placeholders flagged by byte content (GGUF attribute missing)
+
+    func testGemmaUnusedPlaceholderExcludedByByteContent() {
+        // The real failure: Gemma's <unusedN> arrive as NORMAL (no .unused attr) and leaked as
+        // literal text. They must be classified special + excluded purely from their rendered bytes.
+        let cls = classify(u8("<unused56>"), attr: .normal)
+        XCTAssertTrue(cls.flags.contains(.special), "reserved placeholder must be special")
+        XCTAssertTrue(cls.flags.contains(.excluded), "reserved placeholder must be excluded from sampling")
+    }
+
+    func testOtherReservedPlaceholderFormsExcluded() {
+        for s in ["<unused0>", "<reserved_12>", "<extra_id_3>", "<pad>", "<mask>"] {
+            XCTAssertTrue(classify(u8(s), attr: .normal).flags.contains(.excluded), "\(s) should be excluded")
+        }
+    }
+
+    func testGenuineAngleBracketTextIsNotExcluded() {
+        // Don't over-reach: ordinary markup/text the user might type stays sampleable.
+        for s in ["<h2>", "</code>", "<div>", "<3"] {
+            XCTAssertFalse(classify(u8(s), attr: .normal).flags.contains(.excluded), "\(s) should NOT be excluded")
+        }
+    }
+
+    // MARK: - Markup-tag tokens (Gemma's single-token HTML-tag block, ids 168–237)
+
+    func testWholeTagTokensGetMarkupTagFlag() {
+        // The `</code>`-shown-in-prose failure: these arrive as NORMAL single tokens. They are
+        // flagged (for the prose bias penalty) but stay sampleable for code/terminal modes.
+        for s in ["<b>", "</code>", "<table>", "</blockquote>", "<br/>"] {
+            let cls = classify(u8(s), attr: .normal)
+            XCTAssertTrue(cls.flags.contains(.markupTag), "\(s) should be flagged markupTag")
+            XCTAssertFalse(cls.flags.contains(.excluded), "\(s) must NOT be excluded")
+        }
+    }
+
+    func testSentencePieceSpacePrefixedTagGetsMarkupTagFlag() {
+        XCTAssertTrue(classify(u8("\u{2581}<b>"), attr: .normal).flags.contains(.markupTag))
+    }
+
+    func testNonTagAngleBracketTextIsNotMarkupTag() {
+        for s in ["<3", "a<b", "code>", #"<a href="x">"#, "hello", "<", ">"] {
+            XCTAssertFalse(classify(u8(s), attr: .normal).flags.contains(.markupTag), "\(s) should NOT be markupTag")
+        }
+    }
+
+    func testReservedPlaceholderIsNotMarkupTag() {
+        // `<unused56>` matches the tag shape but is special/excluded — keep the flags disjoint so
+        // bias accounting stays single-purpose.
+        let cls = classify(u8("<unused56>"), attr: .normal)
+        XCTAssertTrue(cls.flags.contains(.excluded))
+        XCTAssertFalse(cls.flags.contains(.markupTag))
+    }
+
     // MARK: - Display width
 
     func testDisplayWidthOfASCII() {
diff --git a/Packages/TokenProfiles/Tests/TokenProfilesTests/Format/GeneratorVersionTests.swift b/Packages/TokenProfiles/Tests/TokenProfilesTests/Format/GeneratorVersionTests.swift
new file mode 100644
index 0000000..89bae40
--- /dev/null
+++ b/Packages/TokenProfiles/Tests/TokenProfilesTests/Format/GeneratorVersionTests.swift
@@ -0,0 +1,74 @@
+import XCTest
+@testable import TokenProfiles
+
+/// Cache-busting via the VALIDATION section's `generator_version` string. The tokenizer digest
+/// covers only vocab bytes, so a `TokenClassifier` logic change (which alters the baked
+/// `.excluded`/`.special` flags and trie) leaves the digest unchanged. `generator_version` captures
+/// that logic version; `MmapAutocompleteProfile.init` rejects a profile stamped with anything other
+/// than the build's expected value so `ProfileGenerator` rebuilds. See `ACPF.generatorVersion`.
+final class GeneratorVersionTests: XCTestCase {
+
+    /// The binary format version stays at 1 — the P0 classifier change is a *content* change, busted
+    /// via `generatorVersion`, not the on-disk layout. Guards against re-introducing the schema bump.
+    func testSchemaVersionRemainsOne() {
+        XCTAssertEqual(ACPF.currentSchemaVersion, 1)
+    }
+
+    private func encode(generatorVersion: String) throws -> Data {
+        let built = SyntheticVocabFixture.build()
+        let input = ACPFProfileInput(
+            modelFamily: built.modelFamily,
+            vocabSize: built.vocabSize,
+            tokenizerDigest: built.digest,
+            entries: built.entries,
+            ggufMetadataDigest: "synthetic-gguf-digest",
+            generatorVersion: generatorVersion,
+            builderHost: "synthetic-host",
+            buildTimestamp: Date(timeIntervalSince1970: 1_716_000_000),
+            headerFlags: 0
+        )
+        return try ACPFWriter.encode(input)
+    }
+
+    func testMatchingGeneratorVersionOpens() throws {
+        let data = try encode(generatorVersion: "keytype-acpf-1.1")
+        XCTAssertNoThrow(try MmapAutocompleteProfile(data: data, expectedGeneratorVersion: "keytype-acpf-1.1"))
+    }
+
+    func testStaleGeneratorVersionIsRejected() throws {
+        let data = try encode(generatorVersion: "keytype-acpf-1.0")
+        XCTAssertThrowsError(
+            try MmapAutocompleteProfile(data: data, expectedGeneratorVersion: "keytype-acpf-1.1")
+        ) { error in
+            guard case let ACPFOpenError.generatorVersionMismatch(expected, found) = error else {
+                return XCTFail("expected generatorVersionMismatch, got \(error)")
+            }
+            XCTAssertEqual(expected, "keytype-acpf-1.1")
+            XCTAssertEqual(found, "keytype-acpf-1.0")
+        }
+    }
+
+    /// Passing `nil` opts out of the check (format round-trip tests that write arbitrary versions).
+    func testNilExpectationSkipsCheck() throws {
+        let data = try encode(generatorVersion: "anything-goes")
+        XCTAssertNoThrow(try MmapAutocompleteProfile(data: data, expectedGeneratorVersion: nil))
+    }
+
+    /// Back-compat: a profile with an empty/unstamped generator_version skips the check rather than
+    /// being rejected, so older profiles without the stamp still open. Only a present, non-empty,
+    /// non-matching value is a hard mismatch.
+    func testEmptyStampSkipsCheckForBackCompat() throws {
+        let data = try encode(generatorVersion: "")
+        XCTAssertNoThrow(try MmapAutocompleteProfile(data: data, expectedGeneratorVersion: "keytype-acpf-1.1"))
+    }
+
+    /// The default expectation is the build's current `ACPF.generatorVersion`, so a profile this build
+    /// produces opens with no explicit argument — and a stale one does not.
+    func testDefaultExpectationUsesCurrentBuildVersion() throws {
+        let current = try encode(generatorVersion: ACPF.generatorVersion)
+        XCTAssertNoThrow(try MmapAutocompleteProfile(data: current))
+
+        let stale = try encode(generatorVersion: "keytype-acpf-0.0")
+        XCTAssertThrowsError(try MmapAutocompleteProfile(data: stale))
+    }
+}