From 898ab610af71f7a3e1d3f07c4f37f50eb52b4f41 Mon Sep 17 00:00:00 2001
From: Michael Ramos <mdramos8@gmail.com>
Date: Tue, 14 Apr 2026 15:35:21 -0700
Subject: [PATCH 1/4] feat(plan-diff): word-level inline diff rendering

Two-pass hierarchical diff (diffLines outer + diffWordsWithSpace inner)
so modified plan blocks render with inline insertions/deletions in
context instead of showing the whole old block struck-through above the
whole new block. Resolves #560.

Engine (packages/ui/utils/planDiffEngine.ts):
- computeInlineDiff runs a second-pass word diff on modified blocks
  that pass a whitelist gate (paragraph/heading/list-item with matching
  structural fields).
- Sentinel substitution atomizes inline-code spans, markdown links, and
  fenced code blocks before diffWordsWithSpace runs, so diff markers
  never land inside backticks, link hrefs, or across fence boundaries.
  Fence regex uses a backreference so variable-length (e.g., 4-backtick
  wrapping 3-backtick) fences are matched atomically.
- Annotation context for an inline-diffed modified block now captures
  both old and new content so comments on struck-through words preserve
  that text in the exported feedback.

Renderer (packages/ui/components/plan-diff/PlanCleanDiffView.tsx):
- New InlineModifiedBlock component renders a modified block as one
  structural wrapper with <ins>/<del> wrappers inside, parsed through
  the local InlineMarkdown in a single pass so markdown delimiter pairs
  survive across token boundaries.
- InlineMarkdown extended to recognize <ins>/<del> tag passthrough
  (with recursive parsing of the wrapped content) and to recursively
  parse link anchor text so diff markers inside links render correctly.
- Plain-text stop-char scanner includes '<' so <ins>/<del> dispatch
  re-enters the loop instead of swallowing tag text.
- Click-to-annotate works in every editor mode (not just comment), with
  the block-level onClick opening the popover directly.

Mode switcher (packages/ui/components/plan-diff/PlanDiffModeSwitcher.tsx):
- Adds a third "Classic" tab between Rendered and Raw. Rendered is the
  new word-level default (labeled "exp"); Classic forces the legacy
  block-level stacked fallback for every modified block.

Styling (packages/ui/theme.css, packages/editor/index.css):
- plan-diff-word-added / plan-diff-word-removed utility classes for
  inline highlights with box-decoration-break: clone across line wraps.
- Inline <code> inside the diff wrappers picks up a tinted background
  so code-pill changes read unambiguously green/red.
- New plan-diff-modified class (amber border) for inline-diff modified
  blocks, matching the GitHub/VSCode convention of green=add,
  red=remove, yellow=both.

Tests (packages/ui/utils/planDiffEngine.test.ts):
- 18 tests covering the engine's qualification gate, structural-field
  matching, sentinel round-trip (inline code / links / fences), token
  content for common edit patterns.

For provenance purposes, this commit was AI assisted.
---
 bun.lock                                      |  16 +-
 packages/editor/index.css                     |  14 +
 .../plan-diff/PlanCleanDiffView.tsx           | 251 +++++++++++++---
 .../plan-diff/PlanDiffModeSwitcher.tsx        |  30 +-
 .../components/plan-diff/PlanDiffViewer.tsx   |   7 +-
 packages/ui/theme.css                         |  34 +++
 packages/ui/utils/planDiffEngine.test.ts      | 178 +++++++++++
 packages/ui/utils/planDiffEngine.ts           | 283 +++++++++++++++++-
 8 files changed, 761 insertions(+), 52 deletions(-)
 create mode 100644 packages/ui/utils/planDiffEngine.test.ts
diff --git a/bun.lock b/bun.lock
index d5274736..633e9ebd 100644
--- a/bun.lock
+++ b/bun.lock
@@ -5,18 +5,18 @@
     "": {
       "name": "plannotator",
       "dependencies": {
-        "@anthropic-ai/claude-agent-sdk": "0.2.92",
-        "@openai/codex-sdk": "0.118.0",
+        "@anthropic-ai/claude-agent-sdk": "^0.2.92",
+        "@openai/codex-sdk": "^0.118.0",
         "@opencode-ai/sdk": "^1.3.0",
         "@pierre/diffs": "^1.1.12",
-        "diff": "8.0.4",
+        "diff": "^8.0.4",
         "dockview-react": "^5.2.0",
         "dompurify": "^3.3.3",
-        "marked": "17.0.6",
+        "marked": "^17.0.6",
       },
       "devDependencies": {
         "@types/dompurify": "^3.2.0",
-        "@types/node": "25.5.2",
+        "@types/node": "^25.5.2",
         "@types/turndown": "^5.0.6",
         "bun-types": "^1.3.11",
       },
@@ -64,7 +64,7 @@
     },
     "apps/opencode-plugin": {
       "name": "@plannotator/opencode",
-      "version": "0.17.9",
+      "version": "0.17.10",
       "dependencies": {
         "@opencode-ai/plugin": "^1.1.10",
       },
@@ -86,7 +86,7 @@
     },
     "apps/pi-extension": {
       "name": "@plannotator/pi-extension",
-      "version": "0.17.9",
+      "version": "0.17.10",
       "dependencies": {
         "@joplin/turndown-plugin-gfm": "^1.0.64",
         "turndown": "^7.2.4",
@@ -176,7 +176,7 @@
     },
     "packages/server": {
       "name": "@plannotator/server",
-      "version": "0.17.9",
+      "version": "0.17.10",
       "dependencies": {
         "@plannotator/ai": "workspace:*",
         "@plannotator/shared": "workspace:*",
diff --git a/packages/editor/index.css b/packages/editor/index.css
index f10a9317..35cefb9b 100644
--- a/packages/editor/index.css
+++ b/packages/editor/index.css
@@ -148,6 +148,20 @@ pre code.hljs .hljs-code {
   background: oklch(from var(--destructive) l c h / 0.06);
 }
 
+/* Clean diff view - modified content (mix of additions and deletions in one
+   block, rendered inline via word-level diff). Amber/yellow border matches
+   the GitHub / VSCode convention that green=add, red=remove, yellow=both. */
+.plan-diff-modified {
+  border-left: 3px solid var(--warning);
+  background: oklch(from var(--warning) l c h / 0.06);
+  padding-left: 0.75rem;
+  border-radius: 0 0.25rem 0.25rem 0;
+  margin: 0.25rem 0;
+}
+.light .plan-diff-modified {
+  background: oklch(from var(--warning) l c h / 0.06);
+}
+
 /* Clean diff view - unchanged (dimmed) */
 .plan-diff-unchanged {
   /* handled via opacity in component */
diff --git a/packages/ui/components/plan-diff/PlanCleanDiffView.tsx b/packages/ui/components/plan-diff/PlanCleanDiffView.tsx
index e1747ce0..01687ff9 100644
--- a/packages/ui/components/plan-diff/PlanCleanDiffView.tsx
+++ b/packages/ui/components/plan-diff/PlanCleanDiffView.tsx
@@ -12,7 +12,11 @@ import { parseMarkdownToBlocks, computeListIndices } from "../../utils/parser";
 import { ListMarker } from "../ListMarker";
 import type { Block, Annotation, EditorMode, ImageAttachment } from "../../types";
 import { AnnotationType } from "../../types";
-import type { PlanDiffBlock } from "../../utils/planDiffEngine";
+import type {
+  PlanDiffBlock,
+  InlineDiffToken,
+  InlineDiffWrap,
+} from "../../utils/planDiffEngine";
 import type { QuickLabel } from "../../utils/quickLabels";
 import { AnnotationToolbar } from "../AnnotationToolbar";
 import { CommentPopover } from "../CommentPopover";
@@ -26,6 +30,13 @@ interface PlanCleanDiffViewProps {
   onSelectAnnotation?: (id: string | null) => void;
   selectedAnnotationId?: string | null;
   mode?: EditorMode;
+  /**
+   * When true (default), modified blocks that passed the qualification gate
+   * render with inline word-level highlights. When false, every modified
+   * block falls back to the stacked old-struck / new-green layout — the
+   * "Classic" diff view exposed in the mode switcher.
+   */
+  wordLevel?: boolean;
 }
 
 export const PlanCleanDiffView: React.FC<PlanCleanDiffViewProps> = ({
@@ -35,6 +46,7 @@ export const PlanCleanDiffView: React.FC<PlanCleanDiffViewProps> = ({
   onSelectAnnotation,
   selectedAnnotationId = null,
   mode = "selection",
+  wordLevel = true,
 }) => {
   const modeRef = useRef<EditorMode>(mode);
   const onAddAnnotationRef = useRef(onAddAnnotation);
@@ -111,12 +123,26 @@ export const PlanCleanDiffView: React.FC<PlanCleanDiffViewProps> = ({
     return set;
   }, [annotations]);
 
-  /** Resolve content for a diff block section (handles modified blocks with old/new sides) */
-  const getBlockContent = useCallback((block: PlanDiffBlock, diffContext: Annotation['diffContext']) =>
-    block.type === 'modified' && diffContext === 'removed'
-      ? block.oldContent || block.content
-      : block.content
-  , []);
+  /**
+   * Resolve content for a diff block section (handles modified blocks with
+   * old/new sides). For inline-diff modified blocks — one clickable target
+   * with diffContext 'modified' — we capture BOTH sides in git-diff shape
+   * so comments about a struck-through deleted word preserve that word in
+   * the exported feedback, instead of sending only the new content.
+   */
+  const getBlockContent = useCallback((block: PlanDiffBlock, diffContext: Annotation['diffContext']) => {
+    if (block.type === 'modified') {
+      if (diffContext === 'removed') return block.oldContent || block.content;
+      if (
+        diffContext === 'modified' &&
+        block.oldContent &&
+        block.oldContent !== block.content
+      ) {
+        return `- ${block.oldContent.trimEnd()}\n+ ${block.content.trimEnd()}`;
+      }
+    }
+    return block.content;
+  }, []);
 
   const createDiffAnnotation = useCallback((
     block: PlanDiffBlock,
@@ -240,7 +266,10 @@ export const PlanCleanDiffView: React.FC<PlanCleanDiffViewProps> = ({
   const handleBlockClick = useCallback((block: PlanDiffBlock, index: number, element: HTMLElement, diffContext: Annotation['diffContext']) => {
     if (modeRef.current === 'redline') {
       createDiffAnnotation(block, index, diffContext, AnnotationType.DELETION);
-    } else if (modeRef.current === 'comment') {
+    } else if (modeRef.current === 'quickLabel') {
+      setQuickLabelPicker({ anchorEl: element, block, index, diffContext });
+    } else {
+      // selection or comment → open the comment popover directly on click
       const content = getBlockContent(block, diffContext);
       setCommentPopover({
         anchorEl: element,
@@ -249,8 +278,6 @@ export const PlanCleanDiffView: React.FC<PlanCleanDiffViewProps> = ({
         index,
         diffContext,
       });
-    } else if (modeRef.current === 'quickLabel') {
-      setQuickLabelPicker({ anchorEl: element, block, index, diffContext });
     }
   }, [createDiffAnnotation, getBlockContent]);
 
@@ -267,9 +294,10 @@ export const PlanCleanDiffView: React.FC<PlanCleanDiffViewProps> = ({
           hoveredIndex={hoveredBlock?.index ?? null}
           hoveredDiffContext={hoveredBlock?.diffContext}
           isBlockAnnotated={isBlockAnnotated}
+          wordLevel={wordLevel}
           onHover={onAddAnnotation ? (el, diffContext) => handleHover(el, block, index, diffContext) : undefined}
           onLeave={onAddAnnotation ? handleLeave : undefined}
-          onClick={onAddAnnotation && mode !== 'selection' ? (el, diffContext) => handleBlockClick(block, index, el, diffContext) : undefined}
+          onClick={onAddAnnotation ? (el, diffContext) => handleBlockClick(block, index, el, diffContext) : undefined}
         />
       ))}
 
@@ -326,18 +354,20 @@ interface DiffBlockRendererProps {
   hoveredIndex: number | null;
   hoveredDiffContext?: Annotation['diffContext'];
   isBlockAnnotated: (index: number) => boolean;
+  /** When false, force block-level fallback even if inlineTokens is populated. */
+  wordLevel: boolean;
   onHover?: (element: HTMLElement, diffContext: Annotation['diffContext']) => void;
   onLeave?: () => void;
   onClick?: (element: HTMLElement, diffContext: Annotation['diffContext']) => void;
 }
 
 const DiffBlockRenderer: React.FC<DiffBlockRendererProps> = ({
-  block, index, hoveredIndex, hoveredDiffContext, isBlockAnnotated, onHover, onLeave, onClick,
+  block, index, hoveredIndex, hoveredDiffContext, isBlockAnnotated, wordLevel, onHover, onLeave, onClick,
 }) => {
   const hoverProps = (diffContext: Annotation['diffContext']) => onHover ? {
-    onMouseEnter: (e: React.MouseEvent<HTMLDivElement>) => onHover(e.currentTarget, diffContext),
+    onMouseEnter: (e: React.MouseEvent<HTMLElement>) => onHover(e.currentTarget, diffContext),
     onMouseLeave: () => onLeave?.(),
-    onClick: onClick ? (e: React.MouseEvent<HTMLDivElement>) => onClick(e.currentTarget, diffContext) : undefined,
+    onClick: onClick ? (e: React.MouseEvent<HTMLElement>) => onClick(e.currentTarget, diffContext) : undefined,
     style: { cursor: 'pointer' } as React.CSSProperties,
   } : {};
 
@@ -381,6 +411,21 @@ const DiffBlockRenderer: React.FC<DiffBlockRendererProps> = ({
       );
 
     case "modified":
+      // When the engine populated inlineTokens, we render a single in-context
+      // block with <ins>/<del> spans inside the structural wrapper. Falls
+      // back to the stacked strike-through rendering when tokens are absent
+      // (gate rejected: code/table/structural mismatch/inline-code hazard).
+      if (wordLevel && block.inlineTokens && block.inlineWrap) {
+        return (
+          <InlineModifiedBlock
+            tokens={block.inlineTokens}
+            wrap={block.inlineWrap}
+            index={index}
+            ringClass={ringClass('modified')}
+            hoverProps={hoverProps('modified')}
+          />
+        );
+      }
       return (
         <div data-diff-block-index={index}>
           <div
@@ -403,6 +448,119 @@ const DiffBlockRenderer: React.FC<DiffBlockRendererProps> = ({
   }
 };
 
+// --- Shared block-rendering style helpers ---
+// Kept as module-scope constants so InlineModifiedBlock and SimpleBlockRenderer
+// share a single source of truth for heading/paragraph/list-item styling.
+
+const HEADING_STYLE_BY_LEVEL: Record<number, string> = {
+  1: "text-2xl font-bold mb-4 mt-6 first:mt-0 tracking-tight",
+  2: "text-xl font-semibold mb-3 mt-8 text-foreground/90",
+  3: "text-base font-semibold mb-2 mt-6 text-foreground/80",
+};
+const HEADING_STYLE_FALLBACK = "text-base font-semibold mb-2 mt-4";
+const headingStyleFor = (level: number): string =>
+  HEADING_STYLE_BY_LEVEL[level] || HEADING_STYLE_FALLBACK;
+
+const PARAGRAPH_CLASS = "mb-4 leading-relaxed text-foreground/90 text-[15px]";
+const LIST_ITEM_ROW_CLASS = "flex gap-3 my-1.5";
+const listItemIndentRem = (level: number): string => `${level * 1.25}rem`;
+const listItemTextClass = (isCheckbox: boolean, checked?: boolean): string =>
+  `text-sm leading-relaxed ${isCheckbox && checked ? "text-muted-foreground line-through" : "text-foreground/90"}`;
+
+// --- Inline word-diff renderer for modified blocks ---
+
+interface InlineModifiedBlockProps {
+  tokens: InlineDiffToken[];
+  wrap: InlineDiffWrap;
+  index: number;
+  ringClass: string;
+  hoverProps: {
+    onMouseEnter?: (e: React.MouseEvent<HTMLElement>) => void;
+    onMouseLeave?: () => void;
+    onClick?: (e: React.MouseEvent<HTMLElement>) => void;
+    style?: React.CSSProperties;
+  };
+}
+
+/**
+ * Renders a 'modified' diff block in-context: one structural wrapper
+ * (h1-h6, p, or list-item div) containing a single InlineMarkdown parse
+ * over a unified string with <ins>/<del> tags wrapping changed tokens.
+ * Preserves markdown AST context across token boundaries (bold pairs,
+ * links) which per-token rendering would break.
+ */
+const InlineModifiedBlock: React.FC<InlineModifiedBlockProps> = ({
+  tokens,
+  wrap,
+  index,
+  ringClass,
+  hoverProps,
+}) => {
+  const unified = tokens
+    .map((t) => {
+      if (t.type === "added") return `<ins>${t.value}</ins>`;
+      if (t.type === "removed") return `<del>${t.value}</del>`;
+      return t.value;
+    })
+    .join("");
+
+  // Modified blocks rendered inline carry BOTH additions and deletions, so
+  // their border/background uses the amber "modified" class — not the green
+  // "added" one. Inline <ins>/<del> word highlights render on top unchanged.
+  const wrapperBase = `plan-diff-modified transition-shadow ${ringClass}`;
+  const { style: hoverStyle, ...hoverRest } = hoverProps;
+
+  if (wrap.type === "heading") {
+    const level = wrap.level || 1;
+    const Tag = `h${level}` as keyof React.JSX.IntrinsicElements;
+    return (
+      <Tag
+        data-diff-block-index={index}
+        className={`${headingStyleFor(level)} ${wrapperBase}`}
+        style={hoverStyle}
+        {...hoverRest}
+      >
+        <InlineMarkdown text={unified} />
+      </Tag>
+    );
+  }
+
+  if (wrap.type === "list-item") {
+    const listLevel = wrap.listLevel || 0;
+    const isCheckbox = wrap.checked !== undefined;
+    return (
+      <div
+        data-diff-block-index={index}
+        className={`${LIST_ITEM_ROW_CLASS} ${wrapperBase}`}
+        style={{ marginLeft: listItemIndentRem(listLevel), ...hoverStyle }}
+        {...hoverRest}
+      >
+        <ListMarker
+          level={listLevel}
+          ordered={wrap.ordered}
+          orderedIndex={wrap.orderedStart ?? 1}
+          checked={wrap.checked}
+        />
+        <span className={listItemTextClass(isCheckbox, wrap.checked)}>
+          <InlineMarkdown text={unified} />
+        </span>
+      </div>
+    );
+  }
+
+  // paragraph
+  return (
+    <p
+      data-diff-block-index={index}
+      className={`${PARAGRAPH_CLASS} ${wrapperBase}`}
+      style={hoverStyle}
+      {...hoverRest}
+    >
+      <InlineMarkdown text={unified} />
+    </p>
+  );
+};
+
 // --- Rendering components (unchanged from main) ---
 
 const MarkdownChunk: React.FC<{ content: string }> = ({ content }) => {
@@ -435,16 +593,10 @@ const MarkdownChunk: React.FC<{ content: string }> = ({ content }) => {
 const SimpleBlockRenderer: React.FC<{ block: Block; orderedIndex?: number | null }> = ({ block, orderedIndex }) => {
   switch (block.type) {
     case "heading": {
-      const Tag = `h${block.level || 1}` as keyof React.JSX.IntrinsicElements;
-      const styles =
-        {
-          1: "text-2xl font-bold mb-4 mt-6 first:mt-0 tracking-tight",
-          2: "text-xl font-semibold mb-3 mt-8 text-foreground/90",
-          3: "text-base font-semibold mb-2 mt-6 text-foreground/80",
-        }[block.level || 1] || "text-base font-semibold mb-2 mt-4";
-
+      const level = block.level || 1;
+      const Tag = `h${level}` as keyof React.JSX.IntrinsicElements;
       return (
-        <Tag className={styles}>
+        <Tag className={headingStyleFor(level)}>
           <InlineMarkdown text={block.content} />
         </Tag>
       );
@@ -466,22 +618,20 @@ const SimpleBlockRenderer: React.FC<{ block: Block; orderedIndex?: number | null
     }
 
     case "list-item": {
-      const indent = (block.level || 0) * 1.25;
+      const listLevel = block.level || 0;
       const isCheckbox = block.checked !== undefined;
       return (
         <div
-          className="flex gap-3 my-1.5"
-          style={{ marginLeft: `${indent}rem` }}
+          className={LIST_ITEM_ROW_CLASS}
+          style={{ marginLeft: listItemIndentRem(listLevel) }}
         >
           <ListMarker
-            level={block.level || 0}
+            level={listLevel}
             ordered={block.ordered}
             orderedIndex={orderedIndex}
             checked={block.checked}
           />
-          <span
-            className={`text-sm leading-relaxed ${isCheckbox && block.checked ? "text-muted-foreground line-through" : "text-foreground/90"}`}
-          >
+          <span className={listItemTextClass(isCheckbox, block.checked)}>
             <InlineMarkdown text={block.content} />
           </span>
         </div>
@@ -536,7 +686,7 @@ const SimpleBlockRenderer: React.FC<{ block: Block; orderedIndex?: number | null
 
     default:
       return (
-        <p className="mb-4 leading-relaxed text-foreground/90 text-[15px]">
+        <p className={PARAGRAPH_CLASS}>
           <InlineMarkdown text={block.content} />
         </p>
       );
@@ -580,8 +730,36 @@ const InlineMarkdown: React.FC<{ text: string }> = ({ text }) => {
   let previousChar = "";
 
   while (remaining.length > 0) {
+    // Plan-diff word markers: <ins>...</ins> and <del>...</del>. These are
+    // emitted by PlanCleanDiffView's modified-block renderer when the
+    // diff engine populates `inlineTokens`. Content is recursively parsed
+    // so inline formatting inside a diff token (e.g., **bold** on an
+    // added word) still renders.
+    let match = remaining.match(/^<(ins|del)>([\s\S]+?)<\/\1>/);
+    if (match) {
+      const tag = match[1] as "ins" | "del";
+      const className =
+        tag === "ins" ? "plan-diff-word-added" : "plan-diff-word-removed";
+      if (tag === "ins") {
+        parts.push(
+          <ins key={key++} className={className}>
+            <InlineMarkdown text={match[2]} />
+          </ins>
+        );
+      } else {
+        parts.push(
+          <del key={key++} className={className}>
+            <InlineMarkdown text={match[2]} />
+          </del>
+        );
+      }
+      remaining = remaining.slice(match[0].length);
+      previousChar = match[0][match[0].length - 1] || previousChar;
+      continue;
+    }
+
     // Bold: **text** ([\s\S]+? allows matching across hard line breaks)
-    let match = remaining.match(/^\*\*([\s\S]+?)\*\*/);
+    match = remaining.match(/^\*\*([\s\S]+?)\*\*/);
     if (match) {
       parts.push(
         <strong key={key++} className="font-semibold">
@@ -629,6 +807,9 @@ const InlineMarkdown: React.FC<{ text: string }> = ({ text }) => {
 
     match = remaining.match(/^\[([^\]]+)\]\(([^)]+)\)/);
     if (match) {
+      // Recursively parse the anchor text so <ins>/<del> diff tags (and
+      // other inline markdown) inside the link render correctly instead of
+      // showing up as literal HTML tag text.
       parts.push(
         <a
           key={key++}
@@ -637,7 +818,7 @@ const InlineMarkdown: React.FC<{ text: string }> = ({ text }) => {
           rel="noopener noreferrer"
           className="text-primary underline underline-offset-2 hover:text-primary/80"
         >
-          {match[1]}
+          <InlineMarkdown text={match[1]} />
         </a>
       );
       remaining = remaining.slice(match[0].length);
@@ -658,7 +839,9 @@ const InlineMarkdown: React.FC<{ text: string }> = ({ text }) => {
       continue;
     }
 
-    const nextSpecial = remaining.slice(1).search(/[\*_`\[!]/);
+    // Include '<' so the loop re-enters when an <ins>/<del> tag is next,
+    // rather than swallowing it as plain text.
+    const nextSpecial = remaining.slice(1).search(/[\*_`\[!<]/);
     if (nextSpecial === -1) {
       parts.push(remaining);
       previousChar = remaining[remaining.length - 1] || previousChar;
diff --git a/packages/ui/components/plan-diff/PlanDiffModeSwitcher.tsx b/packages/ui/components/plan-diff/PlanDiffModeSwitcher.tsx
index 55f69684..c5c01ed1 100644
--- a/packages/ui/components/plan-diff/PlanDiffModeSwitcher.tsx
+++ b/packages/ui/components/plan-diff/PlanDiffModeSwitcher.tsx
@@ -6,7 +6,7 @@
 
 import React from "react";
 
-export type PlanDiffMode = "clean" | "raw";
+export type PlanDiffMode = "clean" | "classic" | "raw";
 
 interface PlanDiffModeSwitcherProps {
   mode: PlanDiffMode;
@@ -21,6 +21,7 @@ export const PlanDiffModeSwitcher: React.FC<PlanDiffModeSwitcherProps> = ({
     <div className="inline-flex items-center bg-muted/50 rounded-lg p-0.5 border border-border/30">
       <button
         onClick={() => onChange("clean")}
+        title="Word-level inline diff (experimental)"
         className={`flex items-center gap-1.5 px-2.5 py-1.5 rounded-md text-xs font-medium transition-all ${
           mode === "clean"
             ? "bg-background text-foreground shadow-sm"
@@ -46,6 +47,33 @@ export const PlanDiffModeSwitcher: React.FC<PlanDiffModeSwitcherProps> = ({
           />
         </svg>
         Rendered
+        <span className="text-[9px] uppercase tracking-wider opacity-60 ml-0.5">
+          exp
+        </span>
+      </button>
+      <button
+        onClick={() => onChange("classic")}
+        title="Block-level stacked diff (old above new)"
+        className={`flex items-center gap-1.5 px-2.5 py-1.5 rounded-md text-xs font-medium transition-all ${
+          mode === "classic"
+            ? "bg-background text-foreground shadow-sm"
+            : "text-muted-foreground hover:text-foreground"
+        }`}
+      >
+        <svg
+          className="w-3.5 h-3.5"
+          fill="none"
+          viewBox="0 0 24 24"
+          stroke="currentColor"
+          strokeWidth={2}
+        >
+          <path
+            strokeLinecap="round"
+            strokeLinejoin="round"
+            d="M4 6h16M4 12h16M4 18h16"
+          />
+        </svg>
+        Classic
       </button>
       <button
         onClick={() => onChange("raw")}
diff --git a/packages/ui/components/plan-diff/PlanDiffViewer.tsx b/packages/ui/components/plan-diff/PlanDiffViewer.tsx
index f1a3cf01..2830e843 100644
--- a/packages/ui/components/plan-diff/PlanDiffViewer.tsx
+++ b/packages/ui/components/plan-diff/PlanDiffViewer.tsx
@@ -177,7 +177,9 @@ export const PlanDiffViewer: React.FC<PlanDiffViewerProps> = ({
         )}
 
         {/* Diff content */}
-        {diffMode === "clean" ? (
+        {diffMode === "raw" ? (
+          <PlanRawDiffView blocks={diffBlocks} />
+        ) : (
           <PlanCleanDiffView
             blocks={diffBlocks}
             annotations={annotations}
@@ -185,9 +187,8 @@ export const PlanDiffViewer: React.FC<PlanDiffViewerProps> = ({
             onSelectAnnotation={onSelectAnnotation}
             selectedAnnotationId={selectedAnnotationId}
             mode={mode}
+            wordLevel={diffMode === "clean"}
           />
-        ) : (
-          <PlanRawDiffView blocks={diffBlocks} />
         )}
       </article>
     </div>
diff --git a/packages/ui/theme.css b/packages/ui/theme.css
index 6d4e4772..59c783b4 100644
--- a/packages/ui/theme.css
+++ b/packages/ui/theme.css
@@ -194,3 +194,37 @@ body {
   65% { background: transparent; }
   100% { background: transparent; }
 }
+
+/* Word-level diff markers inside modified plan-diff blocks. Emitted as
+   <ins>/<del> by PlanCleanDiffView's inline renderer; keep the decoration
+   compact across line wraps via box-decoration-break: clone. */
+.plan-diff-word-added,
+.plan-diff-word-removed {
+  padding: 0 2px;
+  border-radius: 2px;
+  box-decoration-break: clone;
+  -webkit-box-decoration-break: clone;
+}
+.plan-diff-word-added {
+  background-color: color-mix(in oklab, var(--success) 20%, transparent);
+  text-decoration: none;
+  color: inherit;
+}
+.plan-diff-word-removed {
+  background-color: color-mix(in oklab, var(--destructive) 15%, transparent);
+  text-decoration: line-through;
+  text-decoration-color: color-mix(in oklab, var(--destructive) 60%, transparent);
+  opacity: 0.75;
+  color: inherit;
+}
+
+/* Inline code pills inside diff wrappers carry their own solid background
+   which would otherwise cover the green/red tint on the surrounding
+   <ins>/<del>. Override the pill background so added code pills read clearly
+   green and removed pills read clearly red-struck. */
+.plan-diff-word-added code {
+  background-color: color-mix(in oklab, var(--success) 25%, var(--muted));
+}
+.plan-diff-word-removed code {
+  background-color: color-mix(in oklab, var(--destructive) 20%, var(--muted));
+}
diff --git a/packages/ui/utils/planDiffEngine.test.ts b/packages/ui/utils/planDiffEngine.test.ts
new file mode 100644
index 00000000..e36667ee
--- /dev/null
+++ b/packages/ui/utils/planDiffEngine.test.ts
@@ -0,0 +1,178 @@
+import { describe, expect, test } from "bun:test";
+import { computePlanDiff, computeInlineDiff } from "./planDiffEngine";
+
+describe("computePlanDiff — block-level behavior", () => {
+  test("pure unchanged produces a single unchanged block, no stats", () => {
+    const plan = "# Plan\n\nOne line.\n";
+    const { blocks, stats } = computePlanDiff(plan, plan);
+    expect(blocks).toHaveLength(1);
+    expect(blocks[0].type).toBe("unchanged");
+    expect(stats).toEqual({ additions: 0, deletions: 0, modifications: 0 });
+  });
+
+  test("pure addition yields an added block", () => {
+    const { blocks, stats } = computePlanDiff("A\n", "A\nB\n");
+    const added = blocks.filter((b) => b.type === "added");
+    expect(added).toHaveLength(1);
+    expect(added[0].content).toContain("B");
+    expect(stats.additions).toBe(1);
+    expect(stats.deletions).toBe(0);
+  });
+
+  test("pure removal yields a removed block", () => {
+    const { blocks, stats } = computePlanDiff("A\nB\n", "A\n");
+    const removed = blocks.filter((b) => b.type === "removed");
+    expect(removed).toHaveLength(1);
+    expect(stats.deletions).toBe(1);
+    expect(stats.additions).toBe(0);
+  });
+
+  test("adjacent remove+add pair becomes a modified block", () => {
+    const { blocks, stats } = computePlanDiff("old line\n", "new line\n");
+    const mods = blocks.filter((b) => b.type === "modified");
+    expect(mods).toHaveLength(1);
+    expect(mods[0].oldContent).toContain("old");
+    expect(mods[0].content).toContain("new");
+    expect(stats.modifications).toBe(1);
+  });
+});
+
+describe("computeInlineDiff — qualification gate", () => {
+  test("paragraph → paragraph with word edit qualifies", () => {
+    const result = computeInlineDiff(
+      "The quick brown fox.\n",
+      "The slow brown fox.\n"
+    );
+    expect(result).not.toBeNull();
+    expect(result!.wrap.type).toBe("paragraph");
+    expect(result!.tokens.length).toBeGreaterThan(0);
+  });
+
+  test("heading h2 → heading h2 qualifies", () => {
+    const result = computeInlineDiff("## Title\n", "## New Title\n");
+    expect(result).not.toBeNull();
+    expect(result!.wrap.type).toBe("heading");
+    expect(result!.wrap.level).toBe(2);
+  });
+
+  test("heading h1 → heading h2 does NOT qualify (level mismatch)", () => {
+    const result = computeInlineDiff("# Title\n", "## Title\n");
+    expect(result).toBeNull();
+  });
+
+  test("list-item → list-item same kind qualifies", () => {
+    const result = computeInlineDiff("- first item\n", "- first entry\n");
+    expect(result).not.toBeNull();
+    expect(result!.wrap.type).toBe("list-item");
+    expect(result!.wrap.ordered).toBeUndefined();
+  });
+
+  test("ordered → unordered list-item does NOT qualify", () => {
+    const result = computeInlineDiff("1. item\n", "- item\n");
+    expect(result).toBeNull();
+  });
+
+  test("checkbox toggle (unchecked → checked) does NOT qualify", () => {
+    const result = computeInlineDiff("- [ ] task\n", "- [x] task\n");
+    expect(result).toBeNull();
+  });
+
+  test("paragraph → list-item does NOT qualify", () => {
+    const result = computeInlineDiff("some text\n", "- some text\n");
+    expect(result).toBeNull();
+  });
+
+  test("code block → code block does NOT qualify", () => {
+    const old = "```\nconsole.log(1);\n```\n";
+    const next = "```\nconsole.log(2);\n```\n";
+    const result = computeInlineDiff(old, next);
+    expect(result).toBeNull();
+  });
+
+  test("paragraph → two paragraphs does NOT qualify (multi-block)", () => {
+    const result = computeInlineDiff("one para\n", "one para\n\nsecond para\n");
+    expect(result).toBeNull();
+  });
+
+  test("paragraph with inline code qualifies; code spans round-trip atomically", () => {
+    // Changed code spans are replaced with internal sentinels before the
+    // word diff and restored afterwards, so the final tokens contain the
+    // original `backtick-wrapped` text — not raw sentinel placeholders.
+    const result = computeInlineDiff(
+      "Call `foo()` here.\n",
+      "Call `bar()` here.\n"
+    );
+    expect(result).not.toBeNull();
+    const serialized = result!.tokens.map((t) => t.value).join("");
+    // Sentinels must not leak through
+    expect(serialized).not.toMatch(/PLDIFFCODE/);
+    // The two code spans appear in the restored output, one on each side
+    const removed = result!.tokens
+      .filter((t) => t.type === "removed")
+      .map((t) => t.value)
+      .join("");
+    const added = result!.tokens
+      .filter((t) => t.type === "added")
+      .map((t) => t.value)
+      .join("");
+    expect(removed).toContain("`foo()`");
+    expect(added).toContain("`bar()`");
+  });
+});
+
+describe("computeInlineDiff — token content", () => {
+  test("single word swap produces one removed + one added token surrounded by unchanged", () => {
+    const result = computeInlineDiff(
+      "The quick brown fox.\n",
+      "The slow brown fox.\n"
+    );
+    expect(result).not.toBeNull();
+    const added = result!.tokens.filter((t) => t.type === "added");
+    const removed = result!.tokens.filter((t) => t.type === "removed");
+    expect(added.map((t) => t.value.trim())).toContain("slow");
+    expect(removed.map((t) => t.value.trim())).toContain("quick");
+  });
+
+  test("unified string round-trip preserves delimiter pair around diff tags", () => {
+    const result = computeInlineDiff(
+      "**important** text\n",
+      "**critical** text\n"
+    );
+    expect(result).not.toBeNull();
+    const unified = result!.tokens
+      .map((t) => {
+        if (t.type === "added") return `<ins>${t.value}</ins>`;
+        if (t.type === "removed") return `<del>${t.value}</del>`;
+        return t.value;
+      })
+      .join("");
+    expect(unified.startsWith("**")).toBe(true);
+    expect(unified.includes("** text")).toBe(true);
+    expect(unified).toContain("<ins>critical</ins>");
+    expect(unified).toContain("<del>important</del>");
+  });
+});
+
+describe("computePlanDiff — modified blocks populate inlineTokens when qualified", () => {
+  test("paragraph reword populates inlineTokens", () => {
+    const { blocks } = computePlanDiff(
+      "The quick brown fox.\n",
+      "The slow brown fox.\n"
+    );
+    const mod = blocks.find((b) => b.type === "modified");
+    expect(mod).toBeDefined();
+    expect(mod!.inlineTokens).toBeDefined();
+    expect(mod!.inlineWrap?.type).toBe("paragraph");
+  });
+
+  test("modification spanning multiple blocks does NOT populate inlineTokens", () => {
+    const { blocks } = computePlanDiff(
+      "first paragraph\n\nsecond paragraph\n",
+      "new only paragraph\n"
+    );
+    const mod = blocks.find((b) => b.type === "modified");
+    if (mod) {
+      expect(mod.inlineTokens).toBeUndefined();
+    }
+  });
+});
diff --git a/packages/ui/utils/planDiffEngine.ts b/packages/ui/utils/planDiffEngine.ts
index e5484c92..c1da225b 100644
--- a/packages/ui/utils/planDiffEngine.ts
+++ b/packages/ui/utils/planDiffEngine.ts
@@ -1,12 +1,34 @@
 /**
  * Plan Diff Engine
  *
- * Computes line-level diffs between two plan versions.
- * Wraps the `diff` library's diffLines() and groups adjacent
- * add/remove changes into "modified" blocks for cleaner rendering.
+ * Computes line-level diffs between two plan versions, then (for modified
+ * blocks that qualify) computes a second-pass word-level diff so the UI
+ * can render inline insertions/deletions in context instead of showing
+ * the whole old block struck-through above the whole new block.
+ *
+ * Two-pass hierarchical diff: `diffLines` outer + `diffWordsWithSpace`
+ * inner, same shape as `git diff --word-diff`.
  */
 
-import { diffLines, type Change } from "diff";
+import { diffLines, diffWordsWithSpace, type Change } from "diff";
+import { parseMarkdownToBlocks } from "./parser";
+import type { Block } from "../types";
+
+export interface InlineDiffToken {
+  type: "added" | "removed" | "unchanged";
+  value: string;
+}
+
+export interface InlineDiffWrap {
+  type: "heading" | "paragraph" | "list-item";
+  /** For headings */
+  level?: number;
+  /** For list items */
+  ordered?: boolean;
+  listLevel?: number;
+  checked?: boolean;
+  orderedStart?: number;
+}
 
 export interface PlanDiffBlock {
   /** What kind of change this block represents */
@@ -17,6 +39,10 @@ export interface PlanDiffBlock {
   oldContent?: string;
   /** Number of lines in this block */
   lines: number;
+  /** Present only on 'modified' blocks that pass the qualification gate for word-level inline diff. */
+  inlineTokens?: InlineDiffToken[];
+  /** Structural wrap metadata paired with inlineTokens. */
+  inlineWrap?: InlineDiffWrap;
 }
 
 export interface PlanDiffStats {
@@ -37,18 +63,261 @@ function countLines(text: string): number {
   return lines.length;
 }
 
+const INLINE_DIFFABLE_TYPES = new Set<Block["type"]>([
+  "paragraph",
+  "heading",
+  "list-item",
+]);
+
+function structuralFieldsMatch(a: Block, b: Block): boolean {
+  if (a.type !== b.type) return false;
+  if (a.type === "heading") return a.level === b.level;
+  if (a.type === "list-item") {
+    return (
+      a.ordered === b.ordered &&
+      a.level === b.level &&
+      a.checked === b.checked
+    );
+  }
+  return true; // paragraph
+}
+
+// Sentinel used to replace inline-code spans before word-diffing. Must be
+// made entirely of word characters ([A-Za-z0-9_]) so diffWordsWithSpace
+// treats it as a single atomic token — word-boundary splits (\b) happen at
+// transitions between word and non-word chars, so any non-word character
+// inside the sentinel would cause the tokenizer to fragment it mid-diff
+// and defeat the round-trip. Collision with real plan text is implausible.
+const SENTINEL_PREFIX = "__PLDIFFCODE";
+const SENTINEL_SUFFIX = "PLDIFFCODE__";
+const SENTINEL_PATTERN = /__PLDIFFCODE\d+PLDIFFCODE__/g;
+const CODE_SPAN_PATTERN = /`[^`]+`/g;
+
+/**
+ * Replace every inline-code span in `text` with a numeric sentinel so that
+ * diffWordsWithSpace treats the span as an atomic token. Identical spans on
+ * both sides share the same sentinel, so they pair as unchanged. Different
+ * spans get different sentinels and diff as whole-span add/remove tokens.
+ *
+ * The sentinel sits in place of the backticks, so diff markers injected
+ * later by the unified-string builder never land *between* backticks — which
+ * is the hazard the old backtick gate was protecting against.
+ */
+function sentinelFor(id: number): string {
+  return `${SENTINEL_PREFIX}${id}${SENTINEL_SUFFIX}`;
+}
+
+function substituteCodeSpans(
+  text: string,
+  codeMap: Map<string, string>,
+  codeToId: Map<string, number>
+): string {
+  return text.replace(CODE_SPAN_PATTERN, (match) => {
+    let id = codeToId.get(match);
+    if (id === undefined) {
+      id = codeToId.size;
+      codeMap.set(sentinelFor(id), match);
+      codeToId.set(match, id);
+    }
+    return sentinelFor(id);
+  });
+}
+
+function restoreCodeSpans(
+  value: string,
+  codeMap: Map<string, string>
+): string {
+  if (codeMap.size === 0) return value;
+  return value.replace(SENTINEL_PATTERN, (m) => codeMap.get(m) ?? m);
+}
+
+// Link sentinel. Same trick as inline-code spans but applied to markdown
+// links [text](url) so diffWordsWithSpace treats each whole link atomically.
+// Without this pass, a URL-only change like [docs](old) → [docs](new) would
+// tokenize on "old"/"new" and inject <ins>/<del> markers into the raw
+// Markdown, producing `[docs](https://<del>old</del><ins>new</ins>.example)`.
+// InlineMarkdown's link regex would then swallow those tags into the href,
+// rendering an unchanged-looking link with a broken URL. Atomizing the link
+// means URL-only changes render as old-link-struck + new-link-green, each
+// rendered as a real, clickable anchor. Tradeoff: word-level highlighting
+// inside link anchor text goes away — the whole link is the diff unit.
+const LINK_SENTINEL_PREFIX = "__PLDIFFLINK";
+const LINK_SENTINEL_SUFFIX = "PLDIFFLINK__";
+const LINK_SENTINEL_PATTERN = /__PLDIFFLINK\d+PLDIFFLINK__/g;
+const LINK_PATTERN = /\[[^\]]+\]\([^)]+\)/g;
+
+function linkSentinelFor(id: number): string {
+  return `${LINK_SENTINEL_PREFIX}${id}${LINK_SENTINEL_SUFFIX}`;
+}
+
+function substituteLinks(
+  text: string,
+  linkMap: Map<string, string>,
+  linkToId: Map<string, number>
+): string {
+  return text.replace(LINK_PATTERN, (match) => {
+    let id = linkToId.get(match);
+    if (id === undefined) {
+      id = linkToId.size;
+      linkMap.set(linkSentinelFor(id), match);
+      linkToId.set(match, id);
+    }
+    return linkSentinelFor(id);
+  });
+}
+
+function restoreLinks(
+  value: string,
+  linkMap: Map<string, string>
+): string {
+  if (linkMap.size === 0) return value;
+  return value.replace(LINK_SENTINEL_PATTERN, (m) => linkMap.get(m) ?? m);
+}
+
+// Fenced-block sentinel. Same idea as the inline-code sentinel above, but
+// applied at block granularity BEFORE diffLines runs. Without this pass,
+// diffLines finds common lines like the closing ```, a shared `}`, or a
+// blank line between two otherwise-rewritten code blocks, and fragments
+// the block into 4-6 separate diff chunks. Each chunk then renders
+// independently (half a fence here, a stray `}` paragraph there, an
+// empty <pre> where the lone closing ``` landed) — the visually "messy"
+// cascade after case ⑩. By collapsing each whole fenced block to a
+// single-line sentinel, diffLines treats the block atomically: it
+// becomes one modified pair, rendered as a clean before/after.
+// Capture the opening fence's backtick count and back-reference it on the
+// closer so nested fences (e.g., 4-backtick outer wrapping a 3-backtick
+// example) are matched atomically — the inner closer has fewer backticks
+// than \1 and is correctly skipped by the lazy content scanner.
+const FENCE_SENTINEL_PREFIX = "__PLDIFFFENCE";
+const FENCE_SENTINEL_SUFFIX = "PLDIFFFENCE__";
+const FENCE_SENTINEL_PATTERN = /__PLDIFFFENCE\d+PLDIFFFENCE__/g;
+const FENCE_BLOCK_PATTERN = /^(`{3,})[^\n]*\n[\s\S]*?^\1[ \t]*$/gm;
+
+function fenceSentinelFor(id: number): string {
+  return `${FENCE_SENTINEL_PREFIX}${id}${FENCE_SENTINEL_SUFFIX}`;
+}
+
+function substituteFencedBlocks(
+  text: string,
+  fenceMap: Map<string, string>,
+  fenceToId: Map<string, number>
+): string {
+  return text.replace(FENCE_BLOCK_PATTERN, (match) => {
+    let id = fenceToId.get(match);
+    if (id === undefined) {
+      id = fenceToId.size;
+      fenceMap.set(fenceSentinelFor(id), match);
+      fenceToId.set(match, id);
+    }
+    return fenceSentinelFor(id);
+  });
+}
+
+function restoreFencedBlocks(
+  value: string,
+  fenceMap: Map<string, string>
+): string {
+  if (fenceMap.size === 0) return value;
+  return value.replace(FENCE_SENTINEL_PATTERN, (m) => fenceMap.get(m) ?? m);
+}
+
+function wrapFromBlock(block: Block): InlineDiffWrap {
+  if (block.type === "heading") {
+    return { type: "heading", level: block.level };
+  }
+  if (block.type === "list-item") {
+    return {
+      type: "list-item",
+      ordered: block.ordered,
+      listLevel: block.level,
+      checked: block.checked,
+      orderedStart: block.orderedStart,
+    };
+  }
+  return { type: "paragraph" };
+}
+
+/**
+ * Second-pass word diff on the inline content of a modified block.
+ * Returns null (falls back to block-level rendering) if the block doesn't
+ * pass the qualification gate. Gate is whitelist-based on block type:
+ * only single-block prose-like modifications get the inline treatment.
+ */
+export function computeInlineDiff(
+  oldContent: string,
+  newContent: string
+): { tokens: InlineDiffToken[]; wrap: InlineDiffWrap } | null {
+  const oldBlocks = parseMarkdownToBlocks(oldContent);
+  const newBlocks = parseMarkdownToBlocks(newContent);
+
+  if (oldBlocks.length !== 1 || newBlocks.length !== 1) return null;
+
+  const [a] = oldBlocks;
+  const [b] = newBlocks;
+
+  if (!INLINE_DIFFABLE_TYPES.has(a.type)) return null;
+  if (!structuralFieldsMatch(a, b)) return null;
+
+  // Atomic passes before word-diffing:
+  //   1. Inline code spans — protect backtick-wrapped content so diff markers
+  //      never land between backticks (see SENTINEL_PREFIX comment).
+  //   2. Markdown links [text](url) — protect the whole link so diff markers
+  //      never land inside the link's bracketed text or parenthesized href.
+  //
+  // Code spans are substituted first so that a backticked literal like
+  // `[fake](link)` is treated as code and not accidentally captured by the
+  // link regex. Restorations run in reverse order afterwards.
+  const codeMap = new Map<string, string>();
+  const codeToId = new Map<string, number>();
+  const linkMap = new Map<string, string>();
+  const linkToId = new Map<string, number>();
+
+  let substA = substituteCodeSpans(a.content, codeMap, codeToId);
+  let substB = substituteCodeSpans(b.content, codeMap, codeToId);
+  substA = substituteLinks(substA, linkMap, linkToId);
+  substB = substituteLinks(substB, linkMap, linkToId);
+
+  const changes = diffWordsWithSpace(substA, substB);
+  const tokens: InlineDiffToken[] = changes.map((c) => ({
+    type: c.added ? "added" : c.removed ? "removed" : "unchanged",
+    value: restoreCodeSpans(restoreLinks(c.value, linkMap), codeMap),
+  }));
+
+  // Build the render wrapper from the NEW block so ordered-list items that
+  // renumbered (e.g., 3. → 4. because a step was inserted above) display the
+  // current plan's numeral rather than the previous version's.
+  return { tokens, wrap: wrapFromBlock(b) };
+}
+
 /**
  * Compute the diff between two plan versions.
  *
  * Groups consecutive remove+add changes into "modified" blocks for
  * better rendering (showing what was replaced rather than separate
- * remove and add blocks).
+ * remove and add blocks). For each modified block, attempts a word-level
+ * sub-diff; blocks that pass the qualification gate get `inlineTokens`
+ * populated for inline rendering.
  */
 export function computePlanDiff(
   oldText: string,
   newText: string
 ): { blocks: PlanDiffBlock[]; stats: PlanDiffStats } {
-  const changes: Change[] = diffLines(oldText, newText);
+  // Pre-pass: collapse every fenced code block to a single-line sentinel
+  // so diffLines treats each whole fence atomically. See the comment on
+  // FENCE_BLOCK_PATTERN for the failure this prevents.
+  const fenceMap = new Map<string, string>();
+  const fenceToId = new Map<string, number>();
+  const substOld = substituteFencedBlocks(oldText, fenceMap, fenceToId);
+  const substNew = substituteFencedBlocks(newText, fenceMap, fenceToId);
+
+  const rawChanges: Change[] = diffLines(substOld, substNew);
+  // Restore the fenced-block content on each change value before the
+  // block-building loop consumes it, so downstream rendering and the
+  // inline-diff pass see the original fence text.
+  const changes: Change[] = rawChanges.map((c) => ({
+    ...c,
+    value: restoreFencedBlocks(c.value, fenceMap),
+  }));
 
   const blocks: PlanDiffBlock[] = [];
   const stats: PlanDiffStats = { additions: 0, deletions: 0, modifications: 0 };
@@ -59,11 +328,13 @@ export function computePlanDiff(
 
     if (change.removed && next?.added) {
       // Adjacent remove + add = modification
+      const inline = computeInlineDiff(change.value, next.value);
       blocks.push({
         type: "modified",
         content: next.value,
         oldContent: change.value,
         lines: countLines(next.value),
+        ...(inline ? { inlineTokens: inline.tokens, inlineWrap: inline.wrap } : {}),
       });
       stats.modifications++;
       stats.additions += countLines(next.value);

From 9dd00314dcecbe75256f3f1340a4766cbbbfab30 Mon Sep 17 00:00:00 2001
From: Michael Ramos <mdramos8@gmail.com>
Date: Tue, 14 Apr 2026 15:35:43 -0700
Subject: [PATCH 2/4] chore(demo): restructure default demo, add VITE_DIFF_DEMO
 stress test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Demo content changes that support the word-level diff work but do not
alter shipped app behavior — only what other devs see running dev:hook.

packages/editor/demoPlan.ts (default V3 editor content):
- Added a "Context" section at the top of the plan with prose that
  showcases the word-level engine in V2→V3 diff: bold phrase swap,
  inline-code pill swaps, a link URL change, and a single-line code
  edit inside a config block.
- Moved the mermaid architecture diagram and graphviz service map to
  an "Appendix: Diagrams" section at the end of the plan; they were
  rendering ugly mid-document.

apps/hook/dev-mock-api.ts (Vite mock for the diff API):
- PLAN_V1 / PLAN_V2 split into *_DEFAULT (original Real-time
  Collaboration plan — preserved identically from pre-branch state) and
  *_DIFF_TEST (the 20-case Auth Service Refactor diff-engine stress
  test, kept as an opt-in tool).
- Resolves which pair to serve based on VITE_DIFF_DEMO env var. Matches
  the V2 Context section to the new V3 Context, with differences that
  produce rich word-level inline diffs on first load.
- Diagrams moved to Appendix in V2_DEFAULT to match V3.

packages/editor/App.tsx:
- Both demo imports are active. VITE_DIFF_DEMO=1 swaps
  DIFF_DEMO_PLAN_CONTENT into the editor's default; unset renders the
  original Real-time Collaboration plan as before.

packages/editor/demoPlanDiffDemo.ts (new):
- 20-case stress test (paragraphs, headings, lists, tables, fences,
  blockquotes, known limitations). Each case has an identical
  "What to watch for" blockquote label in both V2 and V3 so the diff
  view cleanly isolates each case. Opt-in only.

.gitignore:
- Ignore .claude/ runtime lock/state files. Machine-specific content
  that should not be tracked.

For provenance purposes, this commit was AI assisted.
---
 .gitignore                          |   4 +
 apps/hook/dev-mock-api.ts           | 423 ++++++++++++++++++++++++++--
 packages/editor/App.tsx             |  13 +-
 packages/editor/demoPlan.ts         |  93 +++---
 packages/editor/demoPlanDiffDemo.ts | 299 ++++++++++++++++++++
 5 files changed, 767 insertions(+), 65 deletions(-)
 create mode 100644 packages/editor/demoPlanDiffDemo.ts

diff --git a/.gitignore b/.gitignore
index 76aab802..539bf374 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,6 +38,10 @@ apps/pi-extension/review-core.ts
 .idea
 .DS_Store
 *.suo
+
+# Claude Code session-local runtime state (lock files, scheduled-task state).
+# Machine-specific; never belongs in the repo.
+.claude/
 *.ntvs*
 *.njsproj
 *.sln
diff --git a/apps/hook/dev-mock-api.ts b/apps/hook/dev-mock-api.ts
index 6b65442e..05984ede 100644
--- a/apps/hook/dev-mock-api.ts
+++ b/apps/hook/dev-mock-api.ts
@@ -1,11 +1,38 @@
 /**
  * Vite plugin that mocks plannotator API endpoints for local development.
- * Provides plan data with version history so the Versions tab works in dev mode.
+ *
+ * Three plan versions are wired up so the Versions tab, the diff badge, and
+ * the full word-level inline diff engine can all be exercised without running
+ * a real hook session.
+ *
+ * ─── TOGGLE: default demo vs diff-engine stress test ─────────────────────
+ *
+ * Default (no flag): serves the Real-time Collaboration implementation plan
+ * — the project's long-standing default demo. Pairs with DEMO_PLAN_CONTENT
+ * in packages/editor/demoPlan.ts (wired through App.tsx).
+ *
+ * Diff-test (`VITE_DIFF_DEMO=1 bun run dev:hook`): serves the Auth Service
+ * Refactor 20-case diff-engine stress test. Pairs with DIFF_DEMO_PLAN_CONTENT
+ * in packages/editor/demoPlanDiffDemo.ts. Covers 20 numbered cases (①–⑳)
+ * using realistic plan-shaped content — full paragraphs, complete code
+ * blocks, lists, tables, blockquotes — each annotated with an identical
+ * "What to watch for" blockquote label in both V2 and V3 so each case is
+ * cleanly isolated in the diff view. ⑯–⑳ document known limitations.
+ *
+ * Both files check `VITE_DIFF_DEMO` on the same code path so the V3
+ * (current plan) and V2 (previous plan) stay paired — you never get the
+ * default V3 diffed against the test V2 or vice versa.
+ *
+ * The Versions Browser lets you select V1 as the base instead, which shows
+ * a more structural diff (outline → full spec) within whichever mode is on.
  */
 import type { Plugin } from 'vite';
 
-// Version 1: earlier draft (shorter, missing sections)
-const PLAN_V1 = `# Implementation Plan: Real-time Collaboration
+// ─── Default plans (Real-time Collaboration) ─────────────────────────────
+// What every dev sees when running `bun run dev:hook` without any flag.
+// Matches the pre-branch demo content; kept identical so the project's
+// default demo story doesn't change.
+const PLAN_V1_DEFAULT = `# Implementation Plan: Real-time Collaboration
 
 ## Overview
 Add real-time collaboration features to the editor using WebSocket connections.
@@ -66,29 +93,26 @@ Key requirements:
 **Target:** Ship MVP in next sprint
 `;
 
-// Version 2: expanded (added architecture diagram, more details)
-const PLAN_V2 = `# Implementation Plan: Real-time Collaboration
+const PLAN_V2_DEFAULT = `# Implementation Plan: Real-time Collaboration
 
-## Overview
-Add real-time collaboration features to the editor using WebSocket connections and operational transforms.
+## Context
 
-### Architecture
+This proposal introduces real-time collaborative editing to the Plannotator editor, letting reviewers annotate the same plan simultaneously with sub-second visibility of each other's cursors and edits. We are targeting **early-access concurrency** for up to 25 active collaborators per document, with end-to-end edit-to-visible latency under 300ms at the 95th percentile. The implementation uses operational transforms running on a dedicated Node.js gateway that speaks \`Socket.IO\` to clients and \`REST\` to the storage tier. See [the technical design doc](https://docs.example.com/realtime-v1) for the full rationale and rollout plan.
 
-\`\`\`mermaid
-flowchart LR
-    subgraph Client["Client Browser"]
-        UI[React UI] --> OT[OT Engine]
-        OT <--> WS[WebSocket Client]
-    end
+Runtime parameters for phase one:
 
-    subgraph Server["Backend"]
-        WSS[WebSocket Server] <--> OTS[OT Transform]
-        OTS <--> DB[(PostgreSQL)]
-    end
-
-    WS <--> WSS
+\`\`\`typescript
+export const COLLAB_CONFIG = {
+  maxCollaborators: 25,
+  heartbeatIntervalMs: 5_000,
+  operationBatchSize: 32,
+  gateway: "wss://collab.plannotator.ai",
+} as const;
 \`\`\`
 
+## Overview
+Add real-time collaboration features to the editor using WebSocket connections and operational transforms.
+
 ## Phase 1: Infrastructure
 
 ### WebSocket Server
@@ -177,23 +201,368 @@ Key requirements:
 
 ---
 
+## Appendix: Diagrams
+
+### Architecture
+
+\`\`\`mermaid
+flowchart LR
+    subgraph Client["Client Browser"]
+        UI[React UI] --> OT[OT Engine]
+        OT <--> WS[WebSocket Client]
+    end
+
+    subgraph Server["Backend"]
+        WSS[WebSocket Server] <--> OTS[OT Transform]
+        OTS <--> DB[(PostgreSQL)]
+    end
+
+    WS <--> WSS
+\`\`\`
+
+---
+
 **Target:** Ship MVP in next sprint
 `;
 
-// Version 3 is the current PLAN_CONTENT from App.tsx (loaded by the editor itself)
-// We don't duplicate it here — the editor already has it as the default state.
+// ─── V1: earliest rough draft (diff-test mode) ────────────────────────────
+// Shows in the Versions Browser as the oldest entry.  Demonstrates a
+// structural V1→V2 diff: mostly pure additions as the plan gets fleshed out.
+const PLAN_V1_DIFF_TEST = `# Auth Service Refactor
+
+## Goals
+
+- Move from session cookies to JWTs
+- Improve horizontal scalability
+- Add proper token revocation
+
+## Open Questions
+
+- Which header should the token be sent in?
+- Should we support refresh tokens in the first version?
+- What expiry window makes sense (hours vs days)?
+- How do we handle key rotation without downtime?
+
+## Risks
+
+- Client SDK breakage during migration
+- Token revocation requires a Redis dependency
+- Increased latency from revocation-list lookups
+`;
+
+// ─── V2: intermediate version ─────────────────────────────────────────────
+// This is `previousPlan` — the diff baseline shown by default on load.
+//
+// Structure rule: every blockquote label and every surrounding line is
+// IDENTICAL to V3 so they become unchanged context in the diff. Only the
+// eight marked lines/sections actually differ.
+//
+// Differences vs V3 (each maps to the numbered case in the file header):
+//   ① has "## Background" section (absent in V3)       → pure deletion
+//   ② heading: "Security Model"   (V3: "Security Architecture") → heading inline diff
+//   ③ paragraph: "**strong**"     (V3: "**proven**")            → bold inline diff
+//   ④ paragraph: `Authorization`  (V3: `X-Auth-Token`)          → backtick gate
+//   ⑤ code line: '1h'             (V3: '24h')                   → code-line edge case
+//   ⑥ list item: "every request"  (V3: "each request")          → list-item inline diff
+//   ⑦ checkbox:  "[ ]"            (V3: "[x]")                   → checkbox state gate
+//   ⑧ no Observability section    (V3 has one)                  → pure addition
+const PLAN_V2_DIFF_TEST = `# Auth Service Refactor — Diff Demo
+
+This is a realistic plan document being used to exercise the word-level diff engine. Each case below is a real chunk of plan content — full paragraphs, complete code blocks, checklist items, tables — not line-by-line test fixtures. The blockquote label above each case explains in plain language what you should see when you click the **+N/−M** diff badge at the top of the page. Eighteen cases total: the first fifteen demonstrate expected behaviors; the last three surface known limitations discovered during an adversarial audit.
+
+---
+
+## ① Text Edits Scattered Through a Long Paragraph
+
+> **What to watch for:** A long paragraph where several words changed mid-sentence. You should see each changed phrase highlighted inline — struck-through red for what was removed, green for what was added — with the surrounding text completely untouched. This is the most common edit pattern in real plans.
+
+The authentication refactor will migrate the service from session cookies to stateless JWT tokens over a period of approximately six weeks. During this window, the legacy cookie-based flow will remain operational in parallel so we can shift traffic gradually through the existing load balancer rather than cutting over in a single deploy. Our rollback strategy depends on keeping both systems healthy until at least ninety-five percent of active clients have confirmed successful token exchange in production telemetry. The engineering team responsible for this migration includes two senior engineers, one tech lead, and a dedicated site reliability engineer from the platform team, with weekly checkpoint reviews held every Thursday morning.
+
+---
+
+## ② Bold Phrases Inside a Dense Paragraph
+
+> **What to watch for:** A paragraph with several **bold phrases** scattered throughout. Some of the bold phrases were swapped for new ones; others stayed the same. The changed phrases should still render in bold weight — the bold formatting survives the swap because each bold token sits inside its own diff wrapper.
+
+Password storage must use **bcrypt** with a work factor calibrated to match the target p99 login latency, and all tokens must be signed with **RS256** using keys stored in the cloud KMS with automatic rotation enabled. For inter-service communication we will use **mutual TLS** with certificates rotated every **ninety days**, pinned at the identity provider level so a compromised issuer cannot impersonate the auth service. Rate limiting at the edge will continue to be handled by **Cloudflare** with per-user quotas enforced after authentication, and the audit log pipeline will feed into **Datadog** for short-term retention and **S3 Glacier** for long-term compliance archival.
+
+---
+
+## ③ Paragraph with Inline Code Falls Back to Full Rewrite
+
+> **What to watch for:** The paragraph below contains backtick-wrapped \`identifiers\`. When that happens, the engine gives up on inline word highlighting and shows the whole old paragraph struck-through above the whole new paragraph. This is a conservative safety measure — inline code spans and word-level diff markers don't mix cleanly with the current parser, so the engine prefers a correct but heavier render over a subtly broken inline one.
+
+Configure the service by setting the \`AUTH_SECRET\` environment variable to a 64-byte base64-encoded random value generated with a cryptographically secure random source, and \`AUTH_PUBLIC_KEY\` to the matching public key for downstream verification. The \`TOKEN_TTL_SECONDS\` variable controls access token lifetime and defaults to 3600 seconds if unset, while \`REFRESH_TOKEN_TTL_SECONDS\` controls refresh token lifetime and defaults to 604800 seconds. For local development, set \`AUTH_MODE\` to \`development\` to bypass certificate verification against the internal CA; production deployments must instead set \`AUTH_MODE\` to \`production\` and provide the \`CA_CERT_PATH\` variable pointing at a valid certificate bundle stored on the container's mounted secrets volume.
+
+---
+
+## ④ Neighboring Heading and Paragraph Both Change
+
+> **What to watch for:** When a heading and the paragraph immediately below it both change with no blank line between them, the engine can't cleanly separate the heading edit from the paragraph edit, so the whole pair falls back to block-level rendering. You'll see the old heading + paragraph rendered together struck-through, and the new heading + paragraph rendered together in green. This is the most common multi-block edit pattern in real plans.
+
+### Phase One: Internal Beta Rollout
+This phase targets approximately two hundred staff accounts drawn from the engineering and product organizations, with mandatory enrollment for all team members in those two orgs. Participants will be automatically enrolled in a feature flag that routes their authentication through the new token service, while all other users continue to use the legacy cookie flow until the next phase. Telemetry during this phase emphasizes end-to-end authentication latency, token validation error rates, and client-reported usability friction captured via an in-product feedback widget that surfaces immediately after the first post-migration login.
+
+---
+
+## ⑤ Section Heading Reworded
+
+> **What to watch for:** A section heading that had one word swapped. Watch the heading itself show the inline strike/highlight — the word "Recovery" should appear struck through and "Restoration" highlighted green, both rendered at heading size and weight.
+
+## Rollback and Recovery Procedure
+
+If error rates exceed the published thresholds during any rollout phase, we will immediately revert the feature flag to its previous cohort size and kick off the incident response runbook published in the team wiki. The rollback itself is idempotent and takes under ninety seconds to propagate globally through the edge configuration cache.
+
+---
+
+## ⑥ Entire Section Removed
+
+> **What to watch for:** A whole section — heading, paragraphs, and list — was cut from this version. You should see one large solid red block spanning all of the removed content. No inline word highlights; just a clean block indicating that everything inside was deleted wholesale.
+
+*The V2 document contained a "## Deprecated Approaches" section at this position — heading, two paragraphs, and a list. In V3 it has been removed wholesale. Your diff view should render that content as one large solid red block immediately below.*
+
+## Deprecated Approaches
+
+We originally considered three alternative approaches before settling on the JWT design documented above. The first, session replication via a shared Redis cluster, was rejected due to the operational cost of running a stateful cache with strict availability guarantees across three regions. The second approach, opaque bearer tokens backed by a central database lookup, was rejected because the read amplification on every authenticated request would have required dedicated read replicas sized well beyond our current database capacity.
+
+The third alternative we evaluated was maintaining the existing cookie-based flow indefinitely and investing in first-class multi-region cookie replication. This option was rejected after a detailed cost analysis showed that the engineering effort required to build and maintain reliable cross-region cookie invalidation would exceed the effort of the full JWT migration by at least a factor of three.
+
+Other approaches we considered and rejected in less detail:
+
+- SAML-based SSO with a central identity provider
+- Client-side secure enclaves for local credential storage
+- Custom binary token format with protobuf serialization
+
+---
+
+## ⑦ Entire Section Added
+
+> **What to watch for:** A whole new section appears here that wasn't in the previous version. You should see one large solid green block spanning the new heading and all its content.
+
+*The V3 document adds a new "## Post-Launch Monitoring and Runbooks" section at this position — heading, two paragraphs, and a list. In V2 this content did not exist. Your diff view should render the added content as one large solid green block immediately below.*
+
+---
+
+## ⑧ Long Code Block with a Single Line Edited
+
+> **What to watch for:** A 25-line TypeScript class where only one inner line changed. The fence markers, imports, class declaration, and all unchanged method bodies should render as normal syntax-highlighted code. Only the one changed line should show inline red/green highlights on the specific values that differ.
+
+\`\`\`ts
+import { SignJWT, jwtVerify, type KeyLike } from "jose";
+
+export interface TokenServiceConfig {
+  signingKey: KeyLike;
+  verificationKey: KeyLike;
+  issuer: string;
+  audience: string;
+}
+
+export class TokenService {
+  private readonly signingKey: KeyLike;
+  private readonly verificationKey: KeyLike;
+  private readonly issuer: string;
+  private readonly audience: string;
+  private readonly defaultTtlSeconds: number;
+
+  constructor(config: TokenServiceConfig) {
+    this.signingKey = config.signingKey;
+    this.verificationKey = config.verificationKey;
+    this.issuer = config.issuer;
+    this.audience = config.audience;
+    this.defaultTtlSeconds = 3600;
+  }
+
+  async issue(userId: string, scopes: string[] = []): Promise<string> {
+    return new SignJWT({ sub: userId, scp: scopes })
+      .setProtectedHeader({ alg: "RS256" })
+      .setIssuer(this.issuer)
+      .setAudience(this.audience)
+      .setExpirationTime(\`\${this.defaultTtlSeconds}s\`)
+      .sign(this.signingKey);
+  }
+}
+\`\`\`
+
+---
+
+## ⑨ Long Code Block with Multiple Lines Edited
+
+> **What to watch for:** The same class again, but this time three consecutive lines inside the \`verify\` method all changed. The engine sees those three changed lines as one modified block that doesn't look like a single line of prose, so it falls back to showing the whole old three-line chunk above the whole new three-line chunk. No inline word highlights inside the code.
+
+\`\`\`ts
+  async verify(token: string): Promise<TokenPayload | null> {
+    try {
+      const { payload } = await jwtVerify(token, this.verificationKey, {
+        issuer: this.issuer,
+        audience: this.audience,
+        clockTolerance: "60s",
+        maxTokenAge: "12h",
+        algorithms: ["HS256"],
+      });
+      return {
+        userId: payload.sub as string,
+        scopes: (payload.scp as string[]) ?? [],
+      };
+    } catch (error) {
+      logger.debug({ error }, "token verification failed");
+      return null;
+    }
+  }
+\`\`\`
+
+---
+
+## ⑩ Code Block Fully Rewritten in a New Language
+
+> **What to watch for:** The fence language changed from \`javascript\` to \`typescript\` and the entire function body was rewritten from session-cookie logic to token-based logic. Since the engine treats code blocks as atomic units, you'll see the whole old JavaScript block struck-through above the whole new TypeScript block in green. No inline highlights — just a clean whole-block replacement.
+
+\`\`\`javascript
+const { getSession } = require("./sessionStore");
+
+function authenticate(request) {
+  const sessionId = request.cookies.sessionId;
+  if (!sessionId) {
+    throw new Error("missing session cookie");
+  }
+  const session = getSession(sessionId);
+  if (!session || session.expiresAt < Date.now()) {
+    throw new Error("session expired or invalid");
+  }
+  return {
+    userId: session.userId,
+    scopes: session.scopes || [],
+  };
+}
+
+module.exports = { authenticate };
+\`\`\`
+
+---
+
+## ⑪ Checkbox Text Edited (Check State Unchanged)
+
+> **What to watch for:** A checked task whose wording was edited. Both versions of the task are checked — only the words changed — so the edit flows inline inside the list item with the checkbox still filled in.
+
+- [x] Conduct a thorough security review of the authentication flow with at least two external reviewers from the platform security team before the first external customer is migrated
+
+---
+
+## ⑫ Checkbox State Toggled (Text Unchanged)
+
+> **What to watch for:** A checkbox whose state toggled from unchecked to checked without any edit to the wording. The engine treats a state toggle as a structural change, not a text edit, so you'll see the old (unchecked) item struck-through above the new (checked) item in green — even though the text is word-for-word identical.
+
+- [ ] Validate end-to-end key rotation flow in the staging environment at least once per week during the rollout window
+
+---
+
+## ⑬ Ordered List Item Reworded
+
+> **What to watch for:** A numbered step in a procedure had one word swapped. Watch the item render with the step number intact and the one-word change shown inline.
+
+5. Verify that every issued token carries a valid tenant claim and that the tenant claim matches the caller's primary tenant assignment.
+
+---
+
+## ⑭ Table Cell Value Changed
+
+> **What to watch for:** A single row in a reference table had one cell value updated. Tables render as atomic blocks, so you'll see the old row struck-through above the new row in green. The header row, separator, and unchanged rows render as normal table context surrounding the single-row diff.
+
+| Environment | Auth Method  | Access TTL | Refresh TTL |
+|-------------|--------------|------------|-------------|
+| Production  | JWT (RS256)  | 1 hour     | 7 days      |
+| Staging     | JWT (HS256)  | 24 hours   | 30 days     |
+| Development | JWT (HS256)  | 7 days     | 90 days     |
+
+---
+
+## ⑮ Blockquote Content Edited
+
+> **What to watch for:** A blockquote (note / warning / callout) with its content reworded. Blockquotes don't qualify for inline word highlighting — the whole old blockquote is struck-through above the whole new blockquote in green. This matches the behavior for tables and code blocks.
+
+> **Deprecation Note:** The legacy cookie-based authentication flow will remain operational in standby mode for thirty days after the last client has confirmed successful migration to token-based auth, providing a safety net for any edge-case flows that take longer than expected to cut over. Teams still running clients that depend on the cookie flow must complete their upgrade before the end of phase three or request an explicit extension through the auth team.
+
+---
+
+## ⑯ Known Limitation — Word Swap Inside a Multi-Word Bold Phrase
+
+> **What to watch for (this is a known glitch):** When a single word inside a multi-word bold phrase changes — like **preliminary analysis** becoming **final analysis** — the engine splits the bold markers across the change boundary. You will likely see raw \`**\` asterisks rendered as literal text and the word "analysis" lose its bold styling. This is a boundary case we haven't fixed yet; it was surfaced by an adversarial audit of the engine.
+
+Before the leadership steering committee signs off on the external rollout phase, the team must complete a full pass over the **preliminary analysis** of load testing results, confirm that the error budget still permits the planned migration window, and escalate any unresolved dependencies to the program lead. Any open question at this stage must be either resolved or formally deferred to the post-launch review with named owners and dates.
+
+---
+
+## ⑰ Known Limitation — Word Swap Inside Link Text
+
+> **What to watch for (another known glitch):** When a word inside the anchor text of a markdown link changes, the link still renders as a clickable \`<a>\` element, but the changed word shows up as literal HTML tag text — something like \`<del>old</del><ins>new</ins>\` — instead of styled diff highlights. The link parser captures the whole anchor text as a raw string before the diff markers get a chance to render.
+
+For step-by-step guidance on running the automated migration harness against a local clone of the production database, see [the migration guide](https://docs.example.com/auth-migration) on the internal engineering wiki, which includes both the command-line recipe and a troubleshooting appendix covering the three most common failure modes observed during the staff rollout.
+
+---
+
+## ⑱ Known Limitation — User-Typed HTML Tags in Prose
+
+> **What to watch for (final known glitch):** If the prose itself mentions the strings \`<ins>\` or \`<del>\` as literal text — for example, a plan that discusses HTML tagging conventions — the engine can't tell your typed tags apart from the diff markers it injected during rendering. The rendering in this case will be visibly garbled, with nested ins/del spans or dangling tag text visible in the UI.
+
+For the audit log export format, mark newly added records with <ins> wrapper elements and mark deletions with <del> wrapper elements so downstream compliance tooling can reconstruct the chronological edit history of any given record. Both wrapper types must carry the corresponding actor identifier and timestamp as attributes, and nested edits must be preserved verbatim without collapsing intermediate revisions.
+
+---
+
+## ⑲ Known Limitation — Renumbered Ordered List Item
+
+> **What to watch for (small cosmetic glitch):** The list item below changed from \`3.\` to \`4.\` between versions because a new step was inserted above it. The item TEXT is identical — only the numeral shifted. The engine treats this as a qualifying inline diff (same text, same list kind) but captures the numeral from the OLD version, so you will see the diff block render as "3." even though the current plan shows "4." in its source. This is purely cosmetic; the displayed content text is still correct.
+
+3. Confirm rate limits are enforced on all public endpoints before exposing the service to external customers.
+
+---
+
+## ⑳ Known Limitation — Nested Fence (4-backtick wrapping 3-backtick)
+
+> **What to watch for (corner case for docs-style plans):** When a plan uses a 4-backtick outer fence to wrap markdown that itself contains a 3-backtick example (common in CONTRIBUTING guides, style guides, blog posts about markdown), the fence-atomizer's regex stops at the inner 3-backtick closer instead of the outer 4-backtick closer. The outer block gets truncated, its closing fence is orphaned as a separate unchanged block, and the rendered diff looks broken in that area — similar cascade to what case ⑩ looked like before the fence-atomizer fix. The plain 3-backtick fences in cases ⑧, ⑨, ⑩ still render correctly because they're the single-level common case.
+
+Update the CONTRIBUTING.md code-fence section to read:
+
+\`\`\`\`md
+For inline code blocks, use triple-backtick fences:
+
+\`\`\`ts
+const example = "hello";
+\`\`\`
+
+Use four backticks on an outer fence when you need to quote markdown source that itself contains a triple-backtick example, as this paragraph demonstrates.
+\`\`\`\`
+
+This change lands in section 3 of the contributor guide alongside the updated repository file layout overview.
+
+---
+
+## Open Questions
+
+- Should we support refresh tokens in V1, or defer to V2 and ship access-only tokens first?
+- Key rotation cadence: 30 days (current proposal) or 90 days (current legacy behavior)?
+- Do we need a break-glass path for customer-managed keys in the first release, or is platform-managed sufficient for phase one?
+`;
+
+// Resolve which demo pair to serve. See file-header comment for the toggle.
+// Accept "1", "true", or any truthy string so `VITE_DIFF_DEMO=1` or
+// `VITE_DIFF_DEMO=true` both work. App.tsx does the symmetric check for V3.
+const USE_DIFF_DEMO =
+  process.env.VITE_DIFF_DEMO === "1" ||
+  process.env.VITE_DIFF_DEMO === "true";
+
+const PLAN_V1 = USE_DIFF_DEMO ? PLAN_V1_DIFF_TEST : PLAN_V1_DEFAULT;
+const PLAN_V2 = USE_DIFF_DEMO ? PLAN_V2_DIFF_TEST : PLAN_V2_DEFAULT;
 
 const now = Date.now();
 const versions = [
-  { version: 1, timestamp: new Date(now - 3600_000 * 2).toISOString() },
-  { version: 2, timestamp: new Date(now - 3600_000).toISOString() },
+  { version: 1, timestamp: new Date(now - 3600_000 * 4).toISOString() },
+  { version: 2, timestamp: new Date(now - 3600_000 * 2).toISOString() },
   { version: 3, timestamp: new Date(now - 60_000).toISOString() },
 ];
 
 const versionPlans: Record<number, string> = {
   1: PLAN_V1,
   2: PLAN_V2,
-  // Version 3 is the current plan — served via /api/plan
+  // Version 3 is the current plan — served live by the editor (demoPlanDiffDemo.ts)
 };
 
 export function devMockApi(): Plugin {
@@ -204,7 +573,7 @@ export function devMockApi(): Plugin {
         if (req.url === '/api/plan') {
           res.setHeader('Content-Type', 'application/json');
           res.end(JSON.stringify({
-            plan: undefined, // Let editor use its own PLAN_CONTENT
+            plan: undefined, // Editor uses its own DIFF_DEMO_PLAN_CONTENT
             origin: 'claude-code',
             previousPlan: PLAN_V2,
             versionInfo: { version: 3, totalVersions: 3, project: 'demo' },
@@ -217,7 +586,7 @@ export function devMockApi(): Plugin {
           res.setHeader('Content-Type', 'application/json');
           res.end(JSON.stringify({
             project: 'demo',
-            slug: 'implementation-plan-real-time-collab',
+            slug: 'auth-service-refactor',
             versions,
           }));
           return;
diff --git a/packages/editor/App.tsx b/packages/editor/App.tsx
index 5425d145..fdead657 100644
--- a/packages/editor/App.tsx
+++ b/packages/editor/App.tsx
@@ -65,7 +65,18 @@ import { SidebarContainer } from '@plannotator/ui/components/sidebar/SidebarCont
 import type { ArchivedPlan } from '@plannotator/ui/components/sidebar/ArchiveBrowser';
 import { PlanDiffViewer } from '@plannotator/ui/components/plan-diff/PlanDiffViewer';
 import type { PlanDiffMode } from '@plannotator/ui/components/plan-diff/PlanDiffModeSwitcher';
-import { DEMO_PLAN_CONTENT } from './demoPlan';
+// Demo content toggle. Default: the original Real-time Collaboration plan.
+// Opt-in diff-engine stress test: `VITE_DIFF_DEMO=1 bun run dev:hook` swaps
+// in the 20-case Auth Service Refactor test plan. dev-mock-api.ts reads the
+// same env var on the server side so V2/V3 stay paired.
+import { DEMO_PLAN_CONTENT as DEFAULT_DEMO_PLAN_CONTENT } from './demoPlan';
+import { DIFF_DEMO_PLAN_CONTENT } from './demoPlanDiffDemo';
+const USE_DIFF_DEMO =
+  import.meta.env.VITE_DIFF_DEMO === '1' ||
+  import.meta.env.VITE_DIFF_DEMO === 'true';
+const DEMO_PLAN_CONTENT = USE_DIFF_DEMO
+  ? DIFF_DEMO_PLAN_CONTENT
+  : DEFAULT_DEMO_PLAN_CONTENT;
 import { useCheckboxOverrides } from './hooks/useCheckboxOverrides';
 
 type NoteAutoSaveResults = {
diff --git a/packages/editor/demoPlan.ts b/packages/editor/demoPlan.ts
index d707a957..2118b308 100644
--- a/packages/editor/demoPlan.ts
+++ b/packages/editor/demoPlan.ts
@@ -1,5 +1,20 @@
 export const DEMO_PLAN_CONTENT = `# Implementation Plan: Real-time Collaboration
 
+## Context
+
+This proposal introduces real-time collaborative editing to the Plannotator editor, letting reviewers annotate the same plan simultaneously with sub-second visibility of each other's cursors and edits. We are targeting **production-grade concurrency** for up to 50 active collaborators per document, with end-to-end edit-to-visible latency under 150ms at the 95th percentile. The implementation uses operational transforms running on a dedicated Node.js gateway that speaks \`WebSocket\` to clients and \`gRPC\` to the storage tier. See [the technical design doc](https://docs.example.com/realtime-v2) for the full rationale and rollout plan.
+
+Runtime parameters for phase one:
+
+\`\`\`typescript
+export const COLLAB_CONFIG = {
+  maxCollaborators: 50,
+  heartbeatIntervalMs: 5_000,
+  operationBatchSize: 32,
+  gateway: "wss://collab.plannotator.ai",
+} as const;
+\`\`\`
+
 ## Overview
 Add real-time collaboration features to the editor using _**[WebSocket API](https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API)**_ and *[operational transforms](https://en.wikipedia.org/wiki/Operational_transformation)*.
 
@@ -58,43 +73,6 @@ CREATE TABLE collaborators (
 CREATE INDEX idx_collaborators_document ON collaborators(document_id);
 \`\`\`
 
-### Architecture
-
-\`\`\`mermaid
-flowchart LR
-    subgraph Client["Client Browser"]
-        UI[React UI] --> OT[OT Engine]
-        OT <--> WS[WebSocket Client]
-    end
-
-    subgraph Server["Backend"]
-        WSS[WebSocket Server] <--> OTS[OT Transform]
-        OTS <--> DB[(PostgreSQL)]
-    end
-
-    WS <--> WSS
-\`\`\`
-
-### Service Dependencies (Graphviz)
-
-\`\`\`graphviz
-digraph CollaborationStack {
-  rankdir=LR;
-  node [shape=box, style="rounded"];
-
-  Browser [label="Client Browser"];
-  API [label="WebSocket API"];
-  OT [label="OT Engine"];
-  Redis [label="Presence Cache"];
-  Postgres [label="PostgreSQL"];
-
-  Browser -> API;
-  API -> OT;
-  OT -> Redis;
-  OT -> Postgres;
-}
-\`\`\`
-
 ## Phase 2: Operational Transforms
 
 > The key insight is that we need to transform operations against concurrent operations to maintain consistency.
@@ -330,5 +308,46 @@ export const CursorOverlay: React.FC<CursorOverlayProps> = ({
 
 ---
 
+## Appendix: Diagrams
+
+### Architecture
+
+\`\`\`mermaid
+flowchart LR
+    subgraph Client["Client Browser"]
+        UI[React UI] --> OT[OT Engine]
+        OT <--> WS[WebSocket Client]
+    end
+
+    subgraph Server["Backend"]
+        WSS[WebSocket Server] <--> OTS[OT Transform]
+        OTS <--> DB[(PostgreSQL)]
+    end
+
+    WS <--> WSS
+\`\`\`
+
+### Service Dependencies (Graphviz)
+
+\`\`\`graphviz
+digraph CollaborationStack {
+  rankdir=LR;
+  node [shape=box, style="rounded"];
+
+  Browser [label="Client Browser"];
+  API [label="WebSocket API"];
+  OT [label="OT Engine"];
+  Redis [label="Presence Cache"];
+  Postgres [label="PostgreSQL"];
+
+  Browser -> API;
+  API -> OT;
+  OT -> Redis;
+  OT -> Postgres;
+}
+\`\`\`
+
+---
+
 **Target:** Ship MVP in next sprint
 `;
diff --git a/packages/editor/demoPlanDiffDemo.ts b/packages/editor/demoPlanDiffDemo.ts
new file mode 100644
index 00000000..ad0dca1b
--- /dev/null
+++ b/packages/editor/demoPlanDiffDemo.ts
@@ -0,0 +1,299 @@
+/**
+ * DIFF DEMO PLAN — V3 (diff-engine stress test)
+ *
+ * Opt-in dev fixture. NOT the default demo — this content is only served
+ * when the dev server is launched with `VITE_DIFF_DEMO=1 bun run dev:hook`.
+ * Without the flag, the editor renders packages/editor/demoPlan.ts (the
+ * project's original Real-time Collaboration plan). The toggle lives in
+ * packages/editor/App.tsx and the symmetric V2 toggle lives in
+ * apps/hook/dev-mock-api.ts (same env var, same code path).
+ *
+ * Purpose: 20 numbered cases covering every code path in the word-level
+ * inline diff engine. Full paragraphs, complete code blocks, real lists,
+ * real tables, realistic plan shapes — not line-by-line fixtures. Each
+ * case has an identical "What to watch for" blockquote label in V2 and V3,
+ * so the diff view cleanly isolates each case. Cases ①–⑮ exercise
+ * expected behaviors; ⑯–⑳ document known limitations.
+ */
+export const DIFF_DEMO_PLAN_CONTENT = `# Auth Service Refactor — Diff Demo
+
+This is a realistic plan document being used to exercise the word-level diff engine. Each case below is a real chunk of plan content — full paragraphs, complete code blocks, checklist items, tables — not line-by-line test fixtures. The blockquote label above each case explains in plain language what you should see when you click the **+N/−M** diff badge at the top of the page. Eighteen cases total: the first fifteen demonstrate expected behaviors; the last three surface known limitations discovered during an adversarial audit.
+
+---
+
+## ① Text Edits Scattered Through a Long Paragraph
+
+> **What to watch for:** A long paragraph where several words changed mid-sentence. You should see each changed phrase highlighted inline — struck-through red for what was removed, green for what was added — with the surrounding text completely untouched. This is the most common edit pattern in real plans.
+
+The authentication refactor will migrate the service from session cookies to stateless JWT tokens over a period of approximately eight weeks. During this window, the legacy cookie-based flow will remain operational in parallel so we can shift traffic gradually through the existing service mesh rather than cutting over in a single deploy. Our rollback strategy depends on keeping both systems healthy until at least ninety-nine percent of active clients have confirmed successful token exchange in production telemetry. The engineering team responsible for this migration includes three senior engineers, one tech lead, and a dedicated site reliability engineer from the platform team, with weekly checkpoint reviews held every Thursday morning.
+
+---
+
+## ② Bold Phrases Inside a Dense Paragraph
+
+> **What to watch for:** A paragraph with several **bold phrases** scattered throughout. Some of the bold phrases were swapped for new ones; others stayed the same. The changed phrases should still render in bold weight — the bold formatting survives the swap because each bold token sits inside its own diff wrapper.
+
+Password storage must use **argon2id** with a work factor calibrated to match the target p99 login latency, and all tokens must be signed with **RS256** using keys stored in the cloud KMS with automatic rotation enabled. For inter-service communication we will use **mutual TLS** with certificates rotated every **sixty days**, pinned at the identity provider level so a compromised issuer cannot impersonate the auth service. Rate limiting at the edge will continue to be handled by **Cloudflare** with per-user quotas enforced after authentication, and the audit log pipeline will feed into **Honeycomb** for short-term retention and **S3 Glacier** for long-term compliance archival.
+
+---
+
+## ③ Paragraph with Inline Code Falls Back to Full Rewrite
+
+> **What to watch for:** The paragraph below contains backtick-wrapped \`identifiers\`. When that happens, the engine gives up on inline word highlighting and shows the whole old paragraph struck-through above the whole new paragraph. This is a conservative safety measure — inline code spans and word-level diff markers don't mix cleanly with the current parser, so the engine prefers a correct but heavier render over a subtly broken inline one.
+
+Configure the service by setting the \`AUTH_PRIVATE_KEY\` environment variable to a 2048-bit RSA private key in PEM format, and \`AUTH_PUBLIC_KEY\` to the matching public key for downstream verification. The \`ACCESS_TOKEN_TTL\` variable controls access token lifetime and defaults to 1800 seconds if unset, while \`REFRESH_TOKEN_TTL\` controls refresh token lifetime and defaults to 86400 seconds. For local development, set \`AUTH_MODE\` to \`dev\` to bypass certificate verification against the internal CA; production deployments must instead set \`AUTH_MODE\` to \`prod\` and provide the \`TLS_CERT_BUNDLE\` variable pointing at a valid certificate bundle stored on the container's mounted secrets volume.
+
+---
+
+## ④ Neighboring Heading and Paragraph Both Change
+
+> **What to watch for:** When a heading and the paragraph immediately below it both change with no blank line between them, the engine can't cleanly separate the heading edit from the paragraph edit, so the whole pair falls back to block-level rendering. You'll see the old heading + paragraph rendered together struck-through, and the new heading + paragraph rendered together in green. This is the most common multi-block edit pattern in real plans.
+
+### Phase One: Extended Staff Rollout
+This phase targets approximately five hundred staff accounts drawn from the engineering, product, and customer-success organizations, with voluntary opt-in available for any full-time employee who wants to participate. Participants will be automatically enrolled in a feature flag that routes their authentication through the new token service, while all other users continue to use the legacy session flow until the next phase. Telemetry during this phase emphasizes end-to-end authentication latency, token rotation error rates, and client-reported usability friction captured via an in-product feedback widget that surfaces immediately after the first post-migration login.
+
+---
+
+## ⑤ Section Heading Reworded
+
+> **What to watch for:** A section heading that had one word swapped. Watch the heading itself show the inline strike/highlight — the word "Recovery" should appear struck through and "Restoration" highlighted green, both rendered at heading size and weight.
+
+## Rollback and Restoration Procedure
+
+If error rates exceed the published thresholds during any rollout phase, we will immediately revert the feature flag to its previous cohort size and kick off the incident response runbook published in the team wiki. The rollback itself is idempotent and takes under ninety seconds to propagate globally through the edge configuration cache.
+
+---
+
+## ⑥ Entire Section Removed
+
+> **What to watch for:** A whole section — heading, paragraphs, and list — was cut from this version. You should see one large solid red block spanning all of the removed content. No inline word highlights; just a clean block indicating that everything inside was deleted wholesale.
+
+*The V2 document contained a "## Deprecated Approaches" section at this position — heading, two paragraphs, and a list. In V3 it has been removed wholesale. Your diff view should render that content as one large solid red block immediately below.*
+
+---
+
+## ⑦ Entire Section Added
+
+> **What to watch for:** A whole new section appears here that wasn't in the previous version. You should see one large solid green block spanning the new heading and all its content.
+
+*The V3 document adds a new "## Post-Launch Monitoring and Runbooks" section at this position — heading, two paragraphs, and a list. In V2 this content did not exist. Your diff view should render the added content as one large solid green block immediately below.*
+
+## Post-Launch Monitoring and Runbooks
+
+Once the rollout reaches one hundred percent of external traffic, we will maintain elevated pager coverage for fourteen days with a dedicated on-call rotation drawn from the authentication team and the platform SRE team. During this period, any alert related to authentication latency, token issuance errors, or key rotation failures will route to a dedicated Slack channel with automatic escalation to the principal engineer on call if not acknowledged within five minutes.
+
+The monitoring pipeline will publish a daily digest summarizing authentication success rates broken down by client type, region, and token grant path. Any day that shows a success rate below ninety-nine point nine percent, or a p99 issuance latency above four hundred milliseconds, will automatically create a review ticket in the team's incident backlog for investigation during the following business day.
+
+Known runbooks maintained for this launch window:
+
+- Key rotation emergency rollback procedure
+- Revocation list cache invalidation procedure
+- Mutual TLS certificate renewal procedure
+- Legacy cookie flow re-enable procedure (break-glass)
+
+---
+
+## ⑧ Long Code Block with a Single Line Edited
+
+> **What to watch for:** A 25-line TypeScript class where only one inner line changed. The fence markers, imports, class declaration, and all unchanged method bodies should render as normal syntax-highlighted code. Only the one changed line should show inline red/green highlights on the specific values that differ.
+
+\`\`\`ts
+import { SignJWT, jwtVerify, type KeyLike } from "jose";
+
+export interface TokenServiceConfig {
+  signingKey: KeyLike;
+  verificationKey: KeyLike;
+  issuer: string;
+  audience: string;
+}
+
+export class TokenService {
+  private readonly signingKey: KeyLike;
+  private readonly verificationKey: KeyLike;
+  private readonly issuer: string;
+  private readonly audience: string;
+  private readonly defaultTtlSeconds: number;
+
+  constructor(config: TokenServiceConfig) {
+    this.signingKey = config.signingKey;
+    this.verificationKey = config.verificationKey;
+    this.issuer = config.issuer;
+    this.audience = config.audience;
+    this.defaultTtlSeconds = 1800;
+  }
+
+  async issue(userId: string, scopes: string[] = []): Promise<string> {
+    return new SignJWT({ sub: userId, scp: scopes })
+      .setProtectedHeader({ alg: "RS256" })
+      .setIssuer(this.issuer)
+      .setAudience(this.audience)
+      .setExpirationTime(\`\${this.defaultTtlSeconds}s\`)
+      .sign(this.signingKey);
+  }
+}
+\`\`\`
+
+---
+
+## ⑨ Long Code Block with Multiple Lines Edited
+
+> **What to watch for:** The same class again, but this time three consecutive lines inside the \`verify\` method all changed. The engine sees those three changed lines as one modified block that doesn't look like a single line of prose, so it falls back to showing the whole old three-line chunk above the whole new three-line chunk. No inline word highlights inside the code.
+
+\`\`\`ts
+  async verify(token: string): Promise<TokenPayload | null> {
+    try {
+      const { payload } = await jwtVerify(token, this.verificationKey, {
+        issuer: this.issuer,
+        audience: this.audience,
+        clockTolerance: "30s",
+        algorithms: ["RS256", "RS384"],
+      });
+      return {
+        userId: payload.sub as string,
+        scopes: (payload.scp as string[]) ?? [],
+      };
+    } catch (error) {
+      logger.debug({ error }, "token verification failed");
+      return null;
+    }
+  }
+\`\`\`
+
+---
+
+## ⑩ Code Block Fully Rewritten in a New Language
+
+> **What to watch for:** The fence language changed from \`javascript\` to \`typescript\` and the entire function body was rewritten from session-cookie logic to token-based logic. Since the engine treats code blocks as atomic units, you'll see the whole old JavaScript block struck-through above the whole new TypeScript block in green. No inline highlights — just a clean whole-block replacement.
+
+\`\`\`typescript
+import type { Request } from "express";
+import { jwtVerify } from "jose";
+import { verificationKey } from "./keys";
+
+export interface AuthContext {
+  userId: string;
+  scopes: readonly string[];
+}
+
+export async function authenticate(request: Request): Promise<AuthContext | null> {
+  const authHeader = request.headers.authorization;
+  if (!authHeader?.startsWith("Bearer ")) {
+    return null;
+  }
+  const token = authHeader.slice("Bearer ".length);
+  try {
+    const { payload } = await jwtVerify(token, verificationKey);
+    return {
+      userId: payload.sub as string,
+      scopes: (payload.scp as string[]) ?? [],
+    };
+  } catch {
+    return null;
+  }
+}
+\`\`\`
+
+---
+
+## ⑪ Checkbox Text Edited (Check State Unchanged)
+
+> **What to watch for:** A checked task whose wording was edited. Both versions of the task are checked — only the words changed — so the edit flows inline inside the list item with the checkbox still filled in.
+
+- [x] Conduct an independent security review of the authentication flow with at least two external reviewers from the platform security team before the first external customer is migrated
+
+---
+
+## ⑫ Checkbox State Toggled (Text Unchanged)
+
+> **What to watch for:** A checkbox whose state toggled from unchecked to checked without any edit to the wording. The engine treats a state toggle as a structural change, not a text edit, so you'll see the old (unchecked) item struck-through above the new (checked) item in green — even though the text is word-for-word identical.
+
+- [x] Validate end-to-end key rotation flow in the staging environment at least once per week during the rollout window
+
+---
+
+## ⑬ Ordered List Item Reworded
+
+> **What to watch for:** A numbered step in a procedure had one word swapped. Watch the item render with the step number intact and the one-word change shown inline.
+
+5. Verify that every issued token carries a valid tenant identifier and that the tenant identifier matches the caller's primary tenant assignment.
+
+---
+
+## ⑭ Table Cell Value Changed
+
+> **What to watch for:** A single row in a reference table had one cell value updated. Tables render as atomic blocks, so you'll see the old row struck-through above the new row in green. The header row, separator, and unchanged rows render as normal table context surrounding the single-row diff.
+
+| Environment | Auth Method  | Access TTL | Refresh TTL |
+|-------------|--------------|------------|-------------|
+| Production  | JWT (RS256)  | 30 minutes | 7 days      |
+| Staging     | JWT (HS256)  | 24 hours   | 30 days     |
+| Development | JWT (HS256)  | 7 days     | 90 days     |
+
+---
+
+## ⑮ Blockquote Content Edited
+
+> **What to watch for:** A blockquote (note / warning / callout) with its content reworded. Blockquotes don't qualify for inline word highlighting — the whole old blockquote is struck-through above the whole new blockquote in green. This matches the behavior for tables and code blocks.
+
+> **Deprecation Note:** The legacy cookie-based authentication flow will be fully deactivated immediately after the last client has confirmed successful migration to token-based auth, with no grace period beyond the rollout window itself. Teams still running clients that depend on the cookie flow must complete their upgrade before the end of phase three or request an explicit extension through the auth team.
+
+---
+
+## ⑯ Known Limitation — Word Swap Inside a Multi-Word Bold Phrase
+
+> **What to watch for (this is a known glitch):** When a single word inside a multi-word bold phrase changes — like **preliminary analysis** becoming **final analysis** — the engine splits the bold markers across the change boundary. You will likely see raw \`**\` asterisks rendered as literal text and the word "analysis" lose its bold styling. This is a boundary case we haven't fixed yet; it was surfaced by an adversarial audit of the engine.
+
+Before the leadership steering committee signs off on the external rollout phase, the team must complete a full pass over the **final analysis** of load testing results, confirm that the error budget still permits the planned migration window, and escalate any unresolved dependencies to the program lead. Any open question at this stage must be either resolved or formally deferred to the post-launch review with named owners and dates.
+
+---
+
+## ⑰ Known Limitation — Word Swap Inside Link Text
+
+> **What to watch for (another known glitch):** When a word inside the anchor text of a markdown link changes, the link still renders as a clickable \`<a>\` element, but the changed word shows up as literal HTML tag text — something like \`<del>old</del><ins>new</ins>\` — instead of styled diff highlights. The link parser captures the whole anchor text as a raw string before the diff markers get a chance to render.
+
+For step-by-step guidance on running the automated migration harness against a local clone of the production database, see [the upgrade guide](https://docs.example.com/auth-migration) on the internal engineering wiki, which includes both the command-line recipe and a troubleshooting appendix covering the three most common failure modes observed during the staff rollout.
+
+---
+
+## ⑱ Known Limitation — User-Typed HTML Tags in Prose
+
+> **What to watch for (final known glitch):** If the prose itself mentions the strings \`<ins>\` or \`<del>\` as literal text — for example, a plan that discusses HTML tagging conventions — the engine can't tell your typed tags apart from the diff markers it injected during rendering. The rendering in this case will be visibly garbled, with nested ins/del spans or dangling tag text visible in the UI.
+
+For the audit log export format, mark newly inserted records with <ins> wrapper elements and mark removals with <del> wrapper elements so downstream compliance tooling can reconstruct the chronological edit history of any given record. Both wrapper types must carry the corresponding actor identifier and timestamp as attributes, and nested edits must be preserved verbatim without collapsing intermediate revisions.
+
+---
+
+## ⑲ Known Limitation — Renumbered Ordered List Item
+
+> **What to watch for (small cosmetic glitch):** The list item below changed from \`3.\` to \`4.\` between versions because a new step was inserted above it. The item TEXT is identical — only the numeral shifted. The engine treats this as a qualifying inline diff (same text, same list kind) but captures the numeral from the OLD version, so you will see the diff block render as "3." even though the current plan shows "4." in its source. This is purely cosmetic; the displayed content text is still correct.
+
+4. Confirm rate limits are enforced on all public endpoints before exposing the service to external customers.
+
+---
+
+## ⑳ Known Limitation — Nested Fence (4-backtick wrapping 3-backtick)
+
+> **What to watch for (corner case for docs-style plans):** When a plan uses a 4-backtick outer fence to wrap markdown that itself contains a 3-backtick example (common in CONTRIBUTING guides, style guides, blog posts about markdown), the fence-atomizer's regex stops at the inner 3-backtick closer instead of the outer 4-backtick closer. The outer block gets truncated, its closing fence is orphaned as a separate unchanged block, and the rendered diff looks broken in that area — similar cascade to what case ⑩ looked like before the fence-atomizer fix. The plain 3-backtick fences in cases ⑧, ⑨, ⑩ still render correctly because they're the single-level common case.
+
+Update the CONTRIBUTING.md code-fence section to read:
+
+\`\`\`\`md
+For inline code blocks, use triple-backtick fences with a language tag for syntax highlighting:
+
+\`\`\`ts
+const example = "world";
+\`\`\`
+
+Use four backticks on an outer fence when you need to quote markdown source that itself contains a triple-backtick example, as this paragraph demonstrates.
+\`\`\`\`
+
+This change lands in section 3 of the contributor guide alongside the updated repository file layout overview.
+
+---
+
+## Open Questions
+
+- Should we support refresh tokens in V1, or defer to V2 and ship access-only tokens first?
+- Key rotation cadence: 30 days (current proposal) or 90 days (current legacy behavior)?
+- Do we need a break-glass path for customer-managed keys in the first release, or is platform-managed sufficient for phase one?
+`;

From c3e940d4d1ddc145e77b68f4d6e9aa1c33f6edf2 Mon Sep 17 00:00:00 2001
From: Michael Ramos <mdramos8@gmail.com>
Date: Tue, 14 Apr 2026 17:58:01 -0700
Subject: [PATCH 3/4] =?UTF-8?q?style(plan-diff):=20refine=20modified-block?=
 =?UTF-8?q?=20visual=20=E2=80=94=20amber=20gutter,=20no=20fill?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop the yellow background fill from .plan-diff-modified and keep only a
softened amber left border. Added/removed blocks remain loud (full fill +
strong border) because add/remove are block-scope events — the whole
block matters. Modify is a word-scope event — the individual changed
words carry loud inline red/green highlights, and a block-level fill
would compete with that inline work. The amber gutter at 75% opacity now
reads as a quiet "look inside, the change is in the text" marker that
sits coherently with the rest of the palette.

For provenance purposes, this commit was AI assisted.
---
 packages/editor/index.css | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/packages/editor/index.css b/packages/editor/index.css
index 35cefb9b..2e13ada5 100644
--- a/packages/editor/index.css
+++ b/packages/editor/index.css
@@ -149,18 +149,21 @@ pre code.hljs .hljs-code {
 }
 
 /* Clean diff view - modified content (mix of additions and deletions in one
-   block, rendered inline via word-level diff). Amber/yellow border matches
-   the GitHub / VSCode convention that green=add, red=remove, yellow=both. */
+   block, rendered inline via word-level diff).
+   Deliberate asymmetry with added/removed: add/remove are BLOCK-scope events
+   — the whole block matters, so a loud fill is the right signal. Modify is
+   a WORD-scope event — the words matter, and the inline red-struck /
+   green-highlighted word markers already grab attention. A block-level fill
+   would compete with that inline work; an amber gutter on a normal
+   background says "look inside, the change is in the text" while staying
+   consistent with the green/red/yellow diff convention. */
 .plan-diff-modified {
-  border-left: 3px solid var(--warning);
-  background: oklch(from var(--warning) l c h / 0.06);
+  border-left: 3px solid oklch(from var(--warning) l c h / 0.75);
+  background: transparent;
   padding-left: 0.75rem;
   border-radius: 0 0.25rem 0.25rem 0;
   margin: 0.25rem 0;
 }
-.light .plan-diff-modified {
-  background: oklch(from var(--warning) l c h / 0.06);
-}
 
 /* Clean diff view - unchanged (dimmed) */
 .plan-diff-unchanged {

From 1a1bf863c4a136d80319a0cb2c3f73e2bd6e9473 Mon Sep 17 00:00:00 2001
From: Michael Ramos <mdramos8@gmail.com>
Date: Tue, 14 Apr 2026 18:38:14 -0700
Subject: [PATCH 4/4] fix(plan-diff): sanitize link hrefs against javascript: /
 data: schemes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PlanCleanDiffView has its own local copy of InlineMarkdown (separate
from the one in Viewer.tsx). The link-rendering branch was passing the
captured URL directly to href with no validation, so a plan containing
  [click me](javascript:alert(document.cookie))
would render as a live clickable anchor in the diff view. Plan content
is attacker-influenced — Claude pulls from source comments, READMEs,
fetched URLs — so this is a real exploit path in the diff flow.

Port the same guard Viewer.tsx already has: sanitizeLinkUrl() rejects
javascript:, data:, vbscript:, and file: schemes (case-insensitive, with
optional leading whitespace). Rejected links render their anchor text as
plain text instead of a clickable <a>, so the content is still visible
to the reader but no longer dangerous.

For provenance purposes, this commit was AI assisted.
---
 .../plan-diff/PlanCleanDiffView.tsx           | 50 ++++++++++++++-----
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/packages/ui/components/plan-diff/PlanCleanDiffView.tsx b/packages/ui/components/plan-diff/PlanCleanDiffView.tsx
index 01687ff9..f1c8d2d9 100644
--- a/packages/ui/components/plan-diff/PlanCleanDiffView.tsx
+++ b/packages/ui/components/plan-diff/PlanCleanDiffView.tsx
@@ -723,6 +723,21 @@ const SimpleCodeBlock: React.FC<{ block: Block }> = ({ block }) => {
   );
 };
 
+/**
+ * Block dangerous link protocols (javascript:, data:, vbscript:, file:) from
+ * rendering as clickable anchors in the diff view. Plan content is attacker-
+ * influenced (Claude pulls from source comments, READMEs, fetched URLs), so
+ * a malicious `[click me](javascript:...)` link embedded in a plan must not
+ * render as a live <a>. Mirrors the same guard in Viewer.tsx; returns null
+ * for blocked schemes so the caller can render the anchor text as plain
+ * text instead of a clickable link.
+ */
+const DANGEROUS_PROTOCOL = /^\s*(javascript|data|vbscript|file)\s*:/i;
+function sanitizeLinkUrl(url: string): string | null {
+  if (DANGEROUS_PROTOCOL.test(url)) return null;
+  return url;
+}
+
 const InlineMarkdown: React.FC<{ text: string }> = ({ text }) => {
   const parts: React.ReactNode[] = [];
   let remaining = text;
@@ -809,18 +824,29 @@ const InlineMarkdown: React.FC<{ text: string }> = ({ text }) => {
     if (match) {
       // Recursively parse the anchor text so <ins>/<del> diff tags (and
       // other inline markdown) inside the link render correctly instead of
-      // showing up as literal HTML tag text.
-      parts.push(
-        <a
-          key={key++}
-          href={match[2]}
-          target="_blank"
-          rel="noopener noreferrer"
-          className="text-primary underline underline-offset-2 hover:text-primary/80"
-        >
-          <InlineMarkdown text={match[1]} />
-        </a>
-      );
+      // showing up as literal HTML tag text. Sanitize the href: dangerous
+      // schemes (javascript:, data:, vbscript:, file:) are rendered as
+      // plain text instead of a live anchor to block XSS via plan content.
+      const safeHref = sanitizeLinkUrl(match[2]);
+      if (safeHref === null) {
+        parts.push(
+          <span key={key++}>
+            <InlineMarkdown text={match[1]} />
+          </span>
+        );
+      } else {
+        parts.push(
+          <a
+            key={key++}
+            href={safeHref}
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-primary underline underline-offset-2 hover:text-primary/80"
+          >
+            <InlineMarkdown text={match[1]} />
+          </a>
+        );
+      }
       remaining = remaining.slice(match[0].length);
       previousChar = match[0][match[0].length - 1] || previousChar;
       continue;