Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
326 changes: 326 additions & 0 deletions docs/plans/2026-03-27-inline-progress-session-repair-test-plan.md

Large diffs are not rendered by default.

1,237 changes: 1,237 additions & 0 deletions docs/plans/2026-03-27-inline-progress-session-repair.md

Large diffs are not rendered by default.

42 changes: 31 additions & 11 deletions server/session-scanner/queue.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import type {
SessionScanner,
SessionScanResult,
SessionRepairResult,
SessionRepairOptions,
} from './types.js'
import type { SessionCache } from './cache.js'

Expand Down Expand Up @@ -195,14 +196,19 @@ export class SessionRepairQueue extends EventEmitter {
allowStaleMs: item.priority === 'active' ? ACTIVE_CACHE_GRACE_MS : undefined,
})
if (cached) {
const normalized = cached.sessionId === item.sessionId
? cached
: { ...cached, sessionId: item.sessionId }
await this.postScan?.(normalized)
this.setProcessed(item.sessionId, normalized)
this.emit('scanned', normalized)
this.resolveWaiting(item.sessionId, normalized)
return
// For active priority: bypass cache if result has a resume issue that needs repair
if (item.priority === 'active' && cached.resumeIssue) {
// Fall through to scan/repair path below
} else {
const normalized = cached.sessionId === item.sessionId
? cached
: { ...cached, sessionId: item.sessionId }
await this.postScan?.(normalized)
this.setProcessed(item.sessionId, normalized)
this.emit('scanned', normalized)
this.resolveWaiting(item.sessionId, normalized)
return
}
}

// Scan the session
Expand All @@ -213,9 +219,15 @@ export class SessionRepairQueue extends EventEmitter {
: { ...scanResult, sessionId: item.sessionId }
this.emit('scanned', normalizedScan)

// Repair if corrupted
if (normalizedScan.status === 'corrupted') {
const repairResult = await this.scanner.repair(item.filePath)
// Repair if corrupted, or if active priority and has resume issue
const needsRepair = normalizedScan.status === 'corrupted'
|| (item.priority === 'active' && !!normalizedScan.resumeIssue)

if (needsRepair) {
const repairOptions: SessionRepairOptions = item.priority === 'active' && normalizedScan.resumeIssue
? { includeResumeIssues: true }
: {}
const repairResult = await this.scanner.repair(item.filePath, repairOptions)
this.emit('repaired', repairResult)

// Re-scan to get updated result
Expand Down Expand Up @@ -382,6 +394,14 @@ export class SessionRepairQueue extends EventEmitter {
return this.queuedBySessionId.has(sessionId) || this.processing.has(sessionId) || this.processed.has(sessionId)
}

/**
* Remove a processed result so the session can be re-enqueued and re-awaited.
* Used by the service layer to force active-priority re-processing.
*/
clearProcessed(sessionId: string): void {
this.processed.delete(sessionId)
}

/**
* Get the last processed result for a session, if any.
*/
Expand Down
99 changes: 97 additions & 2 deletions server/session-scanner/scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import type {
SessionScanner,
SessionScanResult,
SessionRepairResult,
SessionRepairOptions,
ParsedMessage,
} from './types.js'

Expand Down Expand Up @@ -39,6 +40,10 @@ function parseMessage(line: string, lineNumber: number): ParsedMessage | null {
parentUuid: obj.parentUuid,
type: obj.type,
lineNumber,
subtype: obj.subtype,
toolUseID: obj.toolUseID,
dataType: obj.data?.type,
dataHookEvent: obj.data?.hookEvent,
}
} catch {
return null
Expand Down Expand Up @@ -81,6 +86,65 @@ function findOrphans(
)
}

/**
* Detect the inline stop-hook progress chain shape on the active leaf.
*
* The problematic shape is (from leaf toward root):
* turn_duration? -> stop_hook_summary -> progress(hook_progress/Stop) -> assistant
*
* Where stop_hook_summary.parentUuid === progress.uuid
* and stop_hook_summary.toolUseID === progress.toolUseID
* and progress.dataType === 'hook_progress'
* and progress.dataHookEvent === 'Stop'
* and the progress is parented to an assistant message.
*
* Returns the matched nodes if found, or undefined.
*/
interface InlineProgressMatch {
stopSummary: ParsedMessage
progress: ParsedMessage
assistant: ParsedMessage
}

function detectInlineStopHookProgress(
lastMessage: ParsedMessage | undefined,
uuidToMessage: Map<string, ParsedMessage>,
): InlineProgressMatch | undefined {
if (!lastMessage) return undefined

// The leaf may be turn_duration (skip it) or stop_hook_summary directly
let candidate = lastMessage
if (candidate.type === 'system' && candidate.subtype === 'turn_duration' && candidate.parentUuid) {
const parent = uuidToMessage.get(candidate.parentUuid)
if (parent) candidate = parent
}

// Candidate should be stop_hook_summary
if (candidate.type !== 'system' || candidate.subtype !== 'stop_hook_summary') return undefined
const stopSummary = candidate

// Parent of stop_hook_summary should be the progress record
if (!stopSummary.parentUuid) return undefined
const progress = uuidToMessage.get(stopSummary.parentUuid)
if (!progress) return undefined

// Validate progress record
if (progress.type !== 'progress') return undefined
if (progress.dataType !== 'hook_progress') return undefined
if (progress.dataHookEvent !== 'Stop') return undefined

// Validate toolUseID match
if (!stopSummary.toolUseID || stopSummary.toolUseID !== progress.toolUseID) return undefined

// Parent of progress should be an assistant message
if (!progress.parentUuid) return undefined
const assistant = uuidToMessage.get(progress.parentUuid)
if (!assistant) return undefined
if (assistant.type !== 'assistant') return undefined

return { stopSummary, progress, assistant }
}

/**
* Create the session scanner implementation.
*/
Expand Down Expand Up @@ -137,6 +201,10 @@ export function createSessionScanner(): SessionScanner {
const orphans = findOrphans(messages, uuidToMessage)
const chainDepth = calculateChainDepth(messages, uuidToMessage)

// Detect resume issue on active chain (bounded: at most 3 parent hops from leaf)
const lastMessage = messages.length > 0 ? messages[messages.length - 1] : undefined
const resumeMatch = detectInlineStopHookProgress(lastMessage, uuidToMessage)

return {
sessionId,
filePath,
Expand All @@ -145,10 +213,11 @@ export function createSessionScanner(): SessionScanner {
orphanCount: orphans.length,
fileSize: stat.size,
messageCount: messages.length,
resumeIssue: resumeMatch ? 'inline_stop_hook_progress' : undefined,
}
}

async function repair(filePath: string): Promise<SessionRepairResult> {
async function repair(filePath: string, options?: SessionRepairOptions): Promise<SessionRepairResult> {
const sessionId = extractSessionId(filePath)

// Read file
Expand All @@ -160,6 +229,7 @@ export function createSessionScanner(): SessionScanner {
sessionId,
status: 'failed',
orphansFixed: 0,
resumeIssuesFixed: 0,
newChainDepth: 0,
error: `Failed to read file: ${err instanceof Error ? err.message : String(err)}`,
}
Expand All @@ -184,6 +254,10 @@ export function createSessionScanner(): SessionScanner {
parentUuid: obj.parentUuid,
type: obj.type,
lineNumber: i,
subtype: obj.subtype,
toolUseID: obj.toolUseID,
dataType: obj.data?.type,
dataHookEvent: obj.data?.hookEvent,
}
messages.push(msg)
uuidToMessage.set(obj.uuid, msg)
Expand All @@ -196,12 +270,21 @@ export function createSessionScanner(): SessionScanner {
// Find orphans
const orphans = findOrphans(messages, uuidToMessage)

if (orphans.length === 0) {
// Detect inline-progress match when resume issue repair is enabled
const inlineMatch = options?.includeResumeIssues
? detectInlineStopHookProgress(
messages.length > 0 ? messages[messages.length - 1] : undefined,
uuidToMessage,
)
: undefined

if (orphans.length === 0 && !inlineMatch) {
const chainDepth = calculateChainDepth(messages, uuidToMessage)
return {
sessionId,
status: 'already_healthy',
orphansFixed: 0,
resumeIssuesFixed: 0,
newChainDepth: chainDepth,
}
}
Expand Down Expand Up @@ -242,6 +325,17 @@ export function createSessionScanner(): SessionScanner {
}
}

// Fix inline stop-hook progress if detected
let resumeIssuesFixed = 0
if (inlineMatch) {
const obj = lineToObj.get(inlineMatch.stopSummary.lineNumber)
if (obj) {
obj.parentUuid = inlineMatch.assistant.uuid
fixedLines[inlineMatch.stopSummary.lineNumber] = JSON.stringify(obj)
resumeIssuesFixed = 1
}
}

// Write repaired content
await fs.writeFile(filePath, fixedLines.join('\n'))

Expand All @@ -266,6 +360,7 @@ export function createSessionScanner(): SessionScanner {
status: 'repaired',
backupPath,
orphansFixed: orphans.length,
resumeIssuesFixed,
newChainDepth,
}
}
Expand Down
24 changes: 24 additions & 0 deletions server/session-scanner/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ export class SessionRepairService extends EventEmitter {
// Check if already processed
const existing = this.queue.getResult(sessionId)
if (existing) {
// If the processed result has a resume issue, force active-priority repair
if (existing.resumeIssue) {
this.queue.clearProcessed(sessionId)
this.queue.enqueue([{ sessionId, filePath: existing.filePath, priority: 'active' }])
return this.queue.waitFor(sessionId, timeoutMs)
Comment on lines +160 to +163
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Upgrade queued resume-issue sessions to active priority

This requeue path only handles sessions that already have a processed/cached result, but waitForSession still returns early for sessions that are merely queued (this.queue.has(sessionId)) without promoting their priority. That means a session discovered at startup with priority: 'disk' can be awaited as-is, scanned without resume-issue repair, and returned as healthy with resumeIssue still present, so terminal.create can continue with a broken --resume despite this method being the active-resume guard.

Useful? React with 👍 / 👎.

}
await this.ensureSessionArtifacts(existing)
return existing
}
Expand All @@ -174,6 +180,18 @@ export class SessionRepairService extends EventEmitter {
const fileSessionId = path.basename(filePath, '.jsonl')
const legacyResult = this.queue.getResult(fileSessionId)
if (legacyResult) {
// If the legacy result has a resume issue, force active-priority repair
if (legacyResult.resumeIssue) {
this.queue.clearProcessed(fileSessionId)
this.queue.enqueue([{ sessionId: fileSessionId, filePath, priority: 'active' }])
const result = await this.queue.waitFor(fileSessionId, timeoutMs)
const normalized = result.sessionId === sessionId
? result
: { ...result, sessionId }
this.queue.seedResult(sessionId, normalized)
await this.ensureSessionArtifacts(normalized)
return normalized
}
const normalized = legacyResult.sessionId === sessionId
? legacyResult
: { ...legacyResult, sessionId }
Expand All @@ -199,6 +217,12 @@ export class SessionRepairService extends EventEmitter {
if (cached.status === 'missing') {
this.sessionPathIndex.delete(sessionId)
}
// If cached result has a resume issue, force active-priority repair
if (cached.resumeIssue) {
this.queue.clearProcessed(sessionId)
this.queue.enqueue([{ sessionId, filePath, priority: 'active' }])
return this.queue.waitFor(sessionId, timeoutMs)
}
const normalized = cached.sessionId === sessionId
? cached
: { ...cached, sessionId }
Expand Down
28 changes: 27 additions & 1 deletion server/session-scanner/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
* Node.js implementation first, same interface for Rust later.
*/

/**
* Known resume issues that don't constitute corruption but prevent
* successful `--resume` in Claude CLI.
*/
export type SessionResumeIssue = 'inline_stop_hook_progress'

/**
* Result of scanning a session file for chain integrity.
*/
Expand All @@ -23,6 +29,16 @@ export interface SessionScanResult {
fileSize: number
/** Total number of messages in the file */
messageCount: number
/** Resume issue detected on the active chain, if any */
resumeIssue?: SessionResumeIssue
}

/**
* Options for session repair.
*/
export interface SessionRepairOptions {
/** Also fix resume issues (not just orphans). Default: false. */
includeResumeIssues?: boolean
}

/**
Expand All @@ -37,6 +53,8 @@ export interface SessionRepairResult {
backupPath?: string
/** Number of orphan messages that were re-parented */
orphansFixed: number
/** Number of resume issues that were fixed */
resumeIssuesFixed: number
/** Chain depth after repair */
newChainDepth: number
/** Error message if failed */
Expand All @@ -57,7 +75,7 @@ export interface SessionScanner {
* Repair a corrupted session file.
* Creates backup before modifying. Idempotent - safe to call on healthy files.
*/
repair(filePath: string): Promise<SessionRepairResult>
repair(filePath: string, options?: SessionRepairOptions): Promise<SessionRepairResult>

/**
* Scan multiple files in parallel.
Expand All @@ -75,4 +93,12 @@ export interface ParsedMessage {
parentUuid?: string
type?: string
lineNumber: number
/** System message subtype (e.g. 'stop_hook_summary', 'turn_duration') */
subtype?: string
/** Tool use ID for progress/hook records */
toolUseID?: string
/** data.type for progress records (e.g. 'hook_progress') */
dataType?: string
/** data.hookEvent for progress records (e.g. 'Stop') */
dataHookEvent?: string
}
5 changes: 5 additions & 0 deletions test/fixtures/sessions/inline-stop-hook-progress.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"type":"user","message":"Help me with a task","uuid":"u-001","parentUuid":null,"timestamp":"2026-01-30T10:00:00.000Z"}
{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"Sure, I can help."}]},"uuid":"a-002","parentUuid":"u-001","timestamp":"2026-01-30T10:00:01.000Z"}
{"type":"progress","data":{"type":"hook_progress","hookEvent":"Stop","hookName":"Stop","command":"echo done"},"toolUseID":"tool-001","parentToolUseID":"tool-001","uuid":"p-003","parentUuid":"a-002","timestamp":"2026-01-30T10:00:02.000Z"}
{"type":"system","subtype":"stop_hook_summary","hookCount":1,"hookInfos":[{"command":"echo done"}],"hookErrors":[],"preventedContinuation":false,"stopReason":"","hasOutput":false,"level":"suggestion","uuid":"s-004","parentUuid":"p-003","toolUseID":"tool-001","timestamp":"2026-01-30T10:00:03.000Z"}
{"type":"system","subtype":"turn_duration","durationMs":2500,"uuid":"td-005","parentUuid":"s-004","timestamp":"2026-01-30T10:00:04.000Z"}
5 changes: 5 additions & 0 deletions test/fixtures/sessions/sibling-stop-hook-progress.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"type":"user","message":"Help me with a task","uuid":"u-001","parentUuid":null,"timestamp":"2026-01-30T10:00:00.000Z"}
{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"Sure, I can help."}]},"uuid":"a-002","parentUuid":"u-001","timestamp":"2026-01-30T10:00:01.000Z"}
{"type":"progress","data":{"type":"hook_progress","hookEvent":"Stop","hookName":"Stop","command":"echo done"},"toolUseID":"tool-001","parentToolUseID":"tool-001","uuid":"p-003","parentUuid":"a-002","timestamp":"2026-01-30T10:00:02.000Z"}
{"type":"system","subtype":"stop_hook_summary","hookCount":1,"hookInfos":[{"command":"echo done"}],"hookErrors":[],"preventedContinuation":false,"stopReason":"","hasOutput":false,"level":"suggestion","uuid":"s-004","parentUuid":"a-002","toolUseID":"tool-001","timestamp":"2026-01-30T10:00:03.000Z"}
{"type":"system","subtype":"turn_duration","durationMs":2500,"uuid":"td-005","parentUuid":"s-004","timestamp":"2026-01-30T10:00:04.000Z"}
Loading