From 7cb959a4bd9d256c977a0f934580d11f0f9a7feb Mon Sep 17 00:00:00 2001
From: TheArchitectit <roger@vroger.com>
Date: Thu, 23 Apr 2026 13:57:25 -0500
Subject: [PATCH 01/10] feat: auto-compact and retry on context window errors

When the model API returns a context_window_blocked error (because the request
exceeds the model's context window), the CLI now automatically:

1. Compact the session (remove old messages to free up space)
2. Retry the original request with the compacted session
3. Report results to the user

This eliminates the need for users to manually run /compact when they
hit context limits - the recovery happens automatically.

## Technical Details

- Detection: Looks for 'context_window' or 'Context window' in error message
- Uses runtime::compact_session() to aggressively compact (max_estimated_tokens=0)
- Creates new runtime with compacted session and retries the turn
- Reports compaction results and final status to user

## Testing

Tested successfully with a request that exceeded model's context:
- Auto-compact triggered: 'Messages removed 19, Messages kept 5'
- Successfully retried and completed after compaction
---
 rust/crates/rusty-claude-cli/src/main.rs | 79 ++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs
index df4d8da452..cb37c75e42 100644
--- a/rust/crates/rusty-claude-cli/src/main.rs
+++ b/rust/crates/rusty-claude-cli/src/main.rs
@@ -4563,6 +4563,85 @@ impl LiveCli {
                     TerminalRenderer::new().color_theme(),
                     &mut stdout,
                 )?;
+                
+                // ============================================================================
+                // Auto-compact retry on context window errors
+                // ============================================================================
+                // When the model API returns a context_window_blocked error (because the request
+                // exceeds the model's context window), we automatically:
+                // 1. Compact the session (remove old messages to free up space)
+                // 2. Retry the original request with the compacted session
+                // 3. Report results to the user
+                //
+                // This eliminates the need for users to manually run /compact when they
+                // hit context limits - the recovery happens automatically.
+                //
+                // Detection: We look for "context_window" or "Context window" in the error
+                // message, which covers error types like:
+                // - "context_window_blocked"
+                // - "Context window blocked"
+                // - "This model's maximum context length is X tokens..."
+                // ============================================================================
+                
+                let error_str = error.to_string();
+                let is_context_window = error_str.contains("context_window") || error_str.contains("Context window");
+                
+                if is_context_window {
+                    println!("  Auto-compacting session and retrying...");
+                    
+                    // Step 1: Compact the session to free up context space
+                    // We set max_estimated_tokens to 0 to compact as aggressively as needed
+                    let result = runtime::compact_session(
+                        runtime.session(),
+                        CompactionConfig {
+                            max_estimated_tokens: 0,
+                            ..CompactionConfig::default()
+                        },
+                    );
+                    let removed = result.removed_message_count;
+                    
+                    // Only proceed if compaction actually happened (messages were removed)
+                    // or there's still a session to work with
+                    if removed > 0 || result.compacted_session.messages.len() > 0 {
+                        if removed > 0 {
+                            // Report compaction results to user
+                            println!("{}", format_compact_report(removed, result.compacted_session.messages.len(), false));
+                        }
+                        
+                        // Step 2: Build a new runtime with the compacted session and retry
+                        let (mut new_runtime, hook_abort_monitor) = self.prepare_turn_runtime(true)?;
+                        drop(hook_abort_monitor); // not needed for retry
+                        
+                        // Step 3: Run the turn again with the smaller session
+                        let mut rp = CliPermissionPrompter::new(self.permission_mode);
+                        match new_runtime.run_turn(input, Some(&mut rp)) {
+                            Ok(summary) => {
+                                // Success! Replace old runtime with the new compacted one
+                                self.replace_runtime(new_runtime)?;
+                                spinner.finish(
+                                    "✨ Done (after auto-compact)",
+                                    TerminalRenderer::new().color_theme(),
+                                    &mut stdout,
+                                )?;
+                                println!();
+                                // If additional auto-compaction happened during retry,
+                                // report that too
+                                if let Some(event) = summary.auto_compaction {
+                                    println!("{}", format_auto_compaction_notice(event.removed_message_count));
+                                }
+                                // Save the compacted session to disk
+                                self.persist_session()?;
+                                return Ok(());
+                            }
+                            // If retry also fails, propagate the new error
+                            Err(retry_error) => {
+                                return Err(Box::new(retry_error));
+                            }
+                        }
+                    }
+                }
+                
+                // If not a context window error, return original error
                 Err(Box::new(error))
             }
         }

From 3e43a082ba7f771a85815622d5fc3a96a1f305e6 Mon Sep 17 00:00:00 2001
From: TheArchitectit <roger@vroger.com>
Date: Mon, 27 Apr 2026 10:51:36 -0500
Subject: [PATCH 02/10] feat: Trident compaction pipeline (supersede + collapse
 + cluster)

Add the 3-stage Trident compaction strategy from R.A.D.1.C.A.L,
adapted for the Rust CLI session model:

Stage 1 - SUPERSEDE: Zero-cost factual pruning. If a file was read
and then later written/edited, the earlier read is obsolete and
removed. Earlier writes superseded by later writes are also dropped.

Stage 2 - COLLAPSE: Buffer short chatty exchanges (under 200 chars,
no tool calls) and collapse them into dense summary blocks when the
threshold is exceeded.

Stage 3 - CLUSTER: Group semantically similar messages (same tool
names, same file paths, similar lengths) using Jaccard-based
fingerprinting and collapse clusters into summary blocks.

All three stages run before the existing summary-based compaction,
so less data needs to be summarized. Wired into both /compact and
the auto-compact retry on context window errors.
---
 rust/crates/runtime/src/lib.rs           |   1 +
 rust/crates/runtime/src/trident.rs       | 791 +++++++++++++++++++++++
 rust/crates/rusty-claude-cli/src/main.rs |   9 +-
 3 files changed, 798 insertions(+), 3 deletions(-)
 create mode 100644 rust/crates/runtime/src/trident.rs

diff --git a/rust/crates/runtime/src/lib.rs b/rust/crates/runtime/src/lib.rs
index c1108d3dc7..444f984dd7 100644
--- a/rust/crates/runtime/src/lib.rs
+++ b/rust/crates/runtime/src/lib.rs
@@ -9,6 +9,7 @@ pub mod bash_validation;
 mod bootstrap;
 pub mod branch_lock;
 mod compact;
+pub mod trident;
 mod config;
 pub mod config_validate;
 mod conversation;
diff --git a/rust/crates/runtime/src/trident.rs b/rust/crates/runtime/src/trident.rs
new file mode 100644
index 0000000000..b455761e72
--- /dev/null
+++ b/rust/crates/runtime/src/trident.rs
@@ -0,0 +1,791 @@
+use crate::compact::{compact_session, CompactionConfig, CompactionResult};
+use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session};
+use std::collections::{BTreeMap, BTreeSet};
+
+/// Configuration for the Trident compaction pipeline.
+#[derive(Debug, Clone, PartialEq)]
+pub struct TridentConfig {
+    pub supersede_enabled: bool,
+    pub collapse_enabled: bool,
+    pub cluster_enabled: bool,
+    pub collapse_threshold: usize,
+    pub cluster_min_size: usize,
+    pub cluster_similarity_threshold: f64,
+    pub max_file_operations: usize,
+}
+
+impl Default for TridentConfig {
+    fn default() -> Self {
+        Self {
+            supersede_enabled: true,
+            collapse_enabled: true,
+            cluster_enabled: true,
+            collapse_threshold: 4,
+            cluster_min_size: 3,
+            cluster_similarity_threshold: 0.6,
+            max_file_operations: 100,
+        }
+    }
+}
+
+/// Statistics from a Trident compaction run.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct TridentStats {
+    pub superseded_count: usize,
+    pub collapsed_chains: usize,
+    pub messages_collapsed: usize,
+    pub clusters_found: usize,
+    pub messages_clustered: usize,
+    pub tokens_saved_estimate: usize,
+    pub original_message_count: usize,
+    pub final_message_count: usize,
+}
+
+impl Default for TridentStats {
+    fn default() -> Self {
+        Self {
+            superseded_count: 0,
+            collapsed_chains: 0,
+            messages_collapsed: 0,
+            clusters_found: 0,
+            messages_clustered: 0,
+            tokens_saved_estimate: 0,
+            original_message_count: 0,
+            final_message_count: 0,
+        }
+    }
+}
+
+impl TridentStats {
+    pub fn format_report(&self) -> String {
+        let compression = if self.final_message_count > 0 {
+            self.original_message_count as f64 / self.final_message_count as f64
+        } else {
+            1.0
+        };
+        let mut lines = vec![
+            "Trident Compaction Complete".to_string(),
+            format!(
+                "  Stage 1 (Supersede): {} obsolete removed",
+                self.superseded_count
+            ),
+            format!(
+                "  Stage 2 (Collapse):  {} -> {} summaries",
+                self.messages_collapsed, self.collapsed_chains
+            ),
+            format!(
+                "  Stage 3 (Cluster):   {} -> {} clusters",
+                self.messages_clustered, self.clusters_found
+            ),
+            format!("  Original: {} messages", self.original_message_count),
+            format!("  Final:    {} messages ({:.1}x compression)", self.final_message_count, compression),
+        ];
+        if self.tokens_saved_estimate > 0 {
+            lines.push(format!(
+                "  Est. tokens saved: ~{}",
+                self.tokens_saved_estimate
+            ));
+        }
+        lines.join("\n")
+    }
+}
+
+/// Result of the Trident compaction pipeline.
+#[derive(Debug, Clone)]
+pub struct TridentResult {
+    pub compacted_session: Session,
+    pub stats: TridentStats,
+}
+
+/// Run the full Trident compaction pipeline on a session, then apply
+/// the standard summary-based compaction.
+pub fn trident_compact_session(
+    session: &Session,
+    compaction_config: CompactionConfig,
+    trident_config: &TridentConfig,
+) -> CompactionResult {
+    let original_count = session.messages.len();
+    let original_tokens: usize = session.messages.iter().map(estimate_message_tokens).sum();
+
+    let mut stats = TridentStats {
+        original_message_count: original_count,
+        ..TridentStats::default()
+    };
+
+    let mut messages = session.messages.clone();
+
+    if trident_config.supersede_enabled {
+        let (kept, superseded_count) = stage1_supersede(&messages);
+        stats.superseded_count = superseded_count;
+        messages = kept;
+    }
+
+    if trident_config.collapse_enabled {
+        let (collapsed, chains, collapsed_count) = stage2_collapse(&messages, trident_config.collapse_threshold);
+        stats.collapsed_chains = chains;
+        stats.messages_collapsed = collapsed_count;
+        messages = collapsed;
+    }
+
+    if trident_config.cluster_enabled {
+        let (clustered, clusters_found, messages_clustered) = stage3_cluster(
+            &messages,
+            trident_config.cluster_min_size,
+            trident_config.cluster_similarity_threshold,
+        );
+        stats.clusters_found = clusters_found;
+        stats.messages_clustered = messages_clustered;
+        messages = clustered;
+    }
+
+    stats.final_message_count = messages.len();
+
+    let final_tokens: usize = messages.iter().map(estimate_message_tokens).sum();
+    stats.tokens_saved_estimate = original_tokens.saturating_sub(final_tokens);
+
+    let mut trident_session = session.clone();
+    trident_session.messages = messages;
+
+    let result = compact_session(&trident_session, compaction_config);
+
+    if stats.superseded_count > 0 || stats.collapsed_chains > 0 || stats.clusters_found > 0 {
+        eprintln!("{}", stats.format_report());
+    }
+
+    result
+}
+
+// =============================================================================
+// STAGE 1: SUPERSEDE — Zero-cost factual pruning
+// =============================================================================
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum FileOp {
+    Read,
+    Write,
+    Edit,
+}
+
+#[derive(Debug)]
+struct FileOperation {
+    index: usize,
+    op_type: FileOp,
+}
+
+fn stage1_supersede(messages: &[ConversationMessage]) -> (Vec<ConversationMessage>, usize) {
+    let mut file_ops: BTreeMap<String, Vec<FileOperation>> = BTreeMap::new();
+
+    for (i, msg) in messages.iter().enumerate() {
+        for block in &msg.blocks {
+            if let Some((path, op_type)) = extract_file_operation(block) {
+                file_ops.entry(path).or_default().push(FileOperation {
+                    index: i,
+                    op_type,
+                });
+            }
+        }
+    }
+
+    let mut obsolete_indices: BTreeSet<usize> = BTreeSet::new();
+
+    for (_path, ops) in &file_ops {
+        if ops.len() < 2 {
+            continue;
+        }
+
+        let last_write_idx = ops
+            .iter()
+            .rev()
+            .find(|op| op.op_type == FileOp::Write || op.op_type == FileOp::Edit)
+            .map(|op| op.index);
+
+        if let Some(last_write) = last_write_idx {
+            for op in ops {
+                if op.op_type == FileOp::Read && op.index < last_write {
+                    obsolete_indices.insert(op.index);
+                } else if (op.op_type == FileOp::Write || op.op_type == FileOp::Edit)
+                    && op.index < last_write
+                {
+                    obsolete_indices.insert(op.index);
+                }
+            }
+        }
+    }
+
+    let superseded_count = obsolete_indices.len();
+    let kept: Vec<ConversationMessage> = messages
+        .iter()
+        .enumerate()
+        .filter(|(i, _)| !obsolete_indices.contains(i))
+        .map(|(_, msg)| msg.clone())
+        .collect();
+
+    (kept, superseded_count)
+}
+
+fn extract_file_operation(block: &ContentBlock) -> Option<(String, FileOp)> {
+    match block {
+        ContentBlock::ToolUse { name, input, .. } => {
+            let path = extract_path_from_tool_input(name, input)?;
+            let op_type = match name.as_str() {
+                "read_file" | "Read" => FileOp::Read,
+                "write_file" | "Write" => FileOp::Write,
+                "edit_file" | "Edit" => FileOp::Edit,
+                _ => return None,
+            };
+            Some((path, op_type))
+        }
+        ContentBlock::ToolResult { tool_name, output, .. } => {
+            let path = extract_path_from_tool_output(tool_name, output)?;
+            let op_type = match tool_name.as_str() {
+                "read_file" | "Read" => FileOp::Read,
+                "write_file" | "Write" => FileOp::Write,
+                "edit_file" | "Edit" => FileOp::Edit,
+                _ => return None,
+            };
+            Some((path, op_type))
+        }
+        ContentBlock::Text { .. } => None,
+    }
+}
+
+fn extract_path_from_tool_input(tool_name: &str, input: &str) -> Option<String> {
+    if !matches!(tool_name, "read_file" | "write_file" | "edit_file" | "Read" | "Write" | "Edit")
+    {
+        return None;
+    }
+    serde_json::from_str::<serde_json::Value>(input)
+        .ok()
+        .and_then(|v| v.get("path")?.as_str().map(String::from))
+        .or_else(|| {
+            serde_json::from_str::<serde_json::Value>(input)
+                .ok()
+                .and_then(|v| v.get("file_path")?.as_str().map(String::from))
+        })
+}
+
+fn extract_path_from_tool_output(tool_name: &str, output: &str) -> Option<String> {
+    if !matches!(tool_name, "read_file" | "write_file" | "edit_file" | "Read" | "Write" | "Edit")
+    {
+        return None;
+    }
+    serde_json::from_str::<serde_json::Value>(output)
+        .ok()
+        .and_then(|v| v.get("path")?.as_str().map(String::from))
+        .or_else(|| {
+            output
+                .lines()
+                .next()
+                .and_then(|line| line.strip_prefix("path: "))
+                .map(String::from)
+        })
+}
+
+// =============================================================================
+// STAGE 2: COLLAPSE — Summarize chatty exchanges
+// =============================================================================
+
+fn stage2_collapse(
+    messages: &[ConversationMessage],
+    threshold: usize,
+) -> (Vec<ConversationMessage>, usize, usize) {
+    if messages.len() < threshold {
+        return (messages.to_vec(), 0, 0);
+    }
+
+    let mut result: Vec<ConversationMessage> = Vec::new();
+    let mut buffer: Vec<ConversationMessage> = Vec::new();
+    let mut total_chains = 0;
+    let mut total_collapsed = 0;
+
+    for msg in messages {
+        if is_chatty_message(msg) {
+            buffer.push(msg.clone());
+        } else {
+            if buffer.len() >= threshold {
+                let summary = generate_collapse_summary(&buffer);
+                total_chains += 1;
+                total_collapsed += buffer.len();
+                result.push(ConversationMessage {
+                    role: MessageRole::System,
+                    blocks: vec![ContentBlock::Text {
+                        text: format!("[Collapsed Conversation]\n{summary}"),
+                    }],
+                    usage: None,
+                });
+            } else {
+                result.extend(buffer.drain(..));
+            }
+            buffer.clear();
+            result.push(msg.clone());
+        }
+    }
+
+    if buffer.len() >= threshold {
+        let summary = generate_collapse_summary(&buffer);
+        total_chains += 1;
+        total_collapsed += buffer.len();
+        result.push(ConversationMessage {
+            role: MessageRole::System,
+            blocks: vec![ContentBlock::Text {
+                text: format!("[Collapsed Conversation]\n{summary}"),
+            }],
+            usage: None,
+        });
+    } else {
+        result.extend(buffer);
+    }
+
+    (result, total_chains, total_collapsed)
+}
+
+fn is_chatty_message(msg: &ConversationMessage) -> bool {
+    let total_chars: usize = msg.blocks.iter().map(|b| match b {
+        ContentBlock::Text { text } => text.len(),
+        ContentBlock::ToolUse { input, .. } => input.len(),
+        ContentBlock::ToolResult { output, .. } => output.len(),
+    }).sum();
+
+    let has_tool_use = msg.blocks.iter().any(|b| matches!(b, ContentBlock::ToolUse { .. }));
+    let has_tool_result = msg.blocks.iter().any(|b| matches!(b, ContentBlock::ToolResult { .. }));
+
+    if has_tool_use || has_tool_result {
+        return false;
+    }
+
+    total_chars < 200
+}
+
+fn generate_collapse_summary(messages: &[ConversationMessage]) -> String {
+    let user_count = messages
+        .iter()
+        .filter(|m| m.role == MessageRole::User)
+        .count();
+    let assistant_count = messages
+        .iter()
+        .filter(|m| m.role == MessageRole::Assistant)
+        .count();
+
+    let mut topics: Vec<String> = messages
+        .iter()
+        .filter_map(|m| {
+            m.blocks.iter().find_map(|b| match b {
+                ContentBlock::Text { text } if !text.trim().is_empty() => {
+                    Some(truncate_text(text, 80))
+                }
+                _ => None,
+            })
+        })
+        .take(5)
+        .collect();
+    topics.dedup();
+
+    let mut lines = vec![format!(
+        "Collapsed {} messages ({} user, {} assistant).",
+        messages.len(),
+        user_count,
+        assistant_count
+    )];
+
+    if !topics.is_empty() {
+        lines.push("Topics:".to_string());
+        for topic in &topics {
+            lines.push(format!("  - {topic}"));
+        }
+    }
+
+    lines.join("\n")
+}
+
+// =============================================================================
+// STAGE 3: CLUSTER — Semantic grouping and deep storage
+// =============================================================================
+
+fn stage3_cluster(
+    messages: &[ConversationMessage],
+    min_cluster_size: usize,
+    similarity_threshold: f64,
+) -> (Vec<ConversationMessage>, usize, usize) {
+    if messages.len() < min_cluster_size {
+        return (messages.to_vec(), 0, 0);
+    }
+
+    let fingerprints: Vec<MessageFingerprint> = messages
+        .iter()
+        .enumerate()
+        .filter_map(|(i, msg)| fingerprint_message(i, msg))
+        .collect();
+
+    if fingerprints.len() < min_cluster_size {
+        return (messages.to_vec(), 0, 0);
+    }
+
+    let mut cluster_assignments: BTreeMap<usize, usize> = BTreeMap::new();
+    let mut cluster_id = 0;
+
+    for i in 0..fingerprints.len() {
+        if cluster_assignments.contains_key(&fingerprints[i].index) {
+            continue;
+        }
+
+        let mut cluster_members: Vec<usize> = vec![fingerprints[i].index];
+
+        for j in (i + 1)..fingerprints.len() {
+            if cluster_assignments.contains_key(&fingerprints[j].index) {
+                continue;
+            }
+
+            let similarity = compute_similarity(&fingerprints[i], &fingerprints[j]);
+            if similarity >= similarity_threshold {
+                cluster_members.push(fingerprints[j].index);
+            }
+        }
+
+        if cluster_members.len() >= min_cluster_size {
+            for member_idx in &cluster_members {
+                cluster_assignments.insert(*member_idx, cluster_id);
+            }
+            cluster_id += 1;
+        }
+    }
+
+    if cluster_assignments.is_empty() {
+        return (messages.to_vec(), 0, 0);
+    }
+
+    let total_clustered: usize = cluster_assignments.len();
+    let clusters_found = cluster_id as usize;
+
+    let mut result: Vec<ConversationMessage> = Vec::new();
+    let mut cluster_buffers: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
+
+    for (msg_idx, &cid) in &cluster_assignments {
+        cluster_buffers.entry(cid).or_default().push(*msg_idx);
+    }
+
+
+
+    for (i, msg) in messages.iter().enumerate() {
+        if let Some(&cid) = cluster_assignments.get(&i) {
+            if let Some(buffer) = cluster_buffers.get_mut(&cid) {
+                if buffer[0] == i {
+                    let cluster_messages: Vec<&ConversationMessage> = buffer
+                        .iter()
+                        .filter_map(|&idx| messages.get(idx))
+                        .collect();
+                    let summary = generate_cluster_summary(&cluster_messages);
+                    result.push(ConversationMessage {
+                        role: MessageRole::System,
+                        blocks: vec![ContentBlock::Text {
+                            text: format!("[Clustered {} messages]\n{summary}", buffer.len()),
+                        }],
+                        usage: None,
+                    });
+                }
+            }
+        } else {
+            result.push(msg.clone());
+        }
+    }
+
+    (result, clusters_found, total_clustered)
+}
+
+#[derive(Debug)]
+struct MessageFingerprint {
+    index: usize,
+    tool_names: BTreeSet<String>,
+    file_paths: BTreeSet<String>,
+    role: MessageRole,
+    text_length: usize,
+}
+
+fn fingerprint_message(index: usize, msg: &ConversationMessage) -> Option<MessageFingerprint> {
+    if msg.role == MessageRole::System {
+        return None;
+    }
+
+    let mut tool_names: BTreeSet<String> = BTreeSet::new();
+    let mut file_paths: BTreeSet<String> = BTreeSet::new();
+    let mut text_length = 0;
+
+    for block in &msg.blocks {
+        match block {
+            ContentBlock::ToolUse { name, input, .. } => {
+                tool_names.insert(name.clone());
+                if let Some(path) = extract_path_from_tool_input(name, input) {
+                    file_paths.insert(path);
+                }
+                text_length += input.len();
+            }
+            ContentBlock::ToolResult { tool_name, output, .. } => {
+                tool_names.insert(tool_name.clone());
+                if let Some(path) = extract_path_from_tool_output(tool_name, output) {
+                    file_paths.insert(path);
+                }
+                text_length += output.len();
+            }
+            ContentBlock::Text { text } => {
+                text_length += text.len();
+            }
+        }
+    }
+
+    Some(MessageFingerprint {
+        index,
+        tool_names,
+        file_paths,
+        role: msg.role,
+        text_length,
+    })
+}
+
+fn compute_similarity(a: &MessageFingerprint, b: &MessageFingerprint) -> f64 {
+    if a.role != b.role {
+        return 0.0;
+    }
+
+    let tool_overlap = if a.tool_names.is_empty() && b.tool_names.is_empty() {
+        1.0
+    } else if a.tool_names.is_empty() || b.tool_names.is_empty() {
+        0.0
+    } else {
+        let intersection: usize = a.tool_names.intersection(&b.tool_names).count();
+        let union: usize = a.tool_names.union(&b.tool_names).count();
+        intersection as f64 / union as f64
+    };
+
+    let file_overlap = if a.file_paths.is_empty() && b.file_paths.is_empty() {
+        1.0
+    } else if a.file_paths.is_empty() || b.file_paths.is_empty() {
+        0.0
+    } else {
+        let intersection: usize = a.file_paths.intersection(&b.file_paths).count();
+        let union: usize = a.file_paths.union(&b.file_paths).count();
+        intersection as f64 / union as f64
+    };
+
+    let length_similarity = if a.text_length == 0 && b.text_length == 0 {
+        1.0
+    } else if a.text_length == 0 || b.text_length == 0 {
+        0.0
+    } else {
+        let min_len = a.text_length.min(b.text_length) as f64;
+        let max_len = a.text_length.max(b.text_length) as f64;
+        min_len / max_len
+    };
+
+    0.4 * tool_overlap + 0.4 * file_overlap + 0.2 * length_similarity
+}
+
+fn generate_cluster_summary(messages: &[&ConversationMessage]) -> String {
+    let mut tool_names: BTreeSet<String> = BTreeSet::new();
+    let mut file_paths: BTreeSet<String> = BTreeSet::new();
+
+    for msg in messages {
+        for block in &msg.blocks {
+            match block {
+                ContentBlock::ToolUse { name, input, .. } => {
+                    tool_names.insert(name.clone());
+                    if let Some(path) = extract_path_from_tool_input(name, input) {
+                        file_paths.insert(path);
+                    }
+                }
+                ContentBlock::ToolResult { tool_name, output, .. } => {
+                    tool_names.insert(tool_name.clone());
+                    if let Some(path) = extract_path_from_tool_output(tool_name, output) {
+                        file_paths.insert(path);
+                    }
+                }
+                ContentBlock::Text { .. } => {}
+            }
+        }
+    }
+
+    let mut lines = vec![format!("{} similar messages grouped.", messages.len())];
+
+    if !tool_names.is_empty() {
+        lines.push(format!(
+            "Tools: {}.",
+            tool_names.iter().cloned().collect::<Vec<_>>().join(", ")
+        ));
+    }
+
+    if !file_paths.is_empty() {
+        let paths: Vec<String> = file_paths.iter().take(5).cloned().collect();
+        lines.push(format!("Files: {}.", paths.join(", ")));
+    }
+
+    lines.join("\n")
+}
+
+// =============================================================================
+// Utilities
+// =============================================================================
+
+fn estimate_message_tokens(message: &ConversationMessage) -> usize {
+    message
+        .blocks
+        .iter()
+        .map(|block| match block {
+            ContentBlock::Text { text } => text.len() / 4 + 1,
+            ContentBlock::ToolUse { name, input, .. } => (name.len() + input.len()) / 4 + 1,
+            ContentBlock::ToolResult {
+                tool_name, output, ..
+            } => (tool_name.len() + output.len()) / 4 + 1,
+        })
+        .sum()
+}
+
+fn truncate_text(text: &str, max_chars: usize) -> String {
+    if text.chars().count() <= max_chars {
+        return text.to_string();
+    }
+    let mut truncated: String = text.chars().take(max_chars).collect();
+    truncated.push('…');
+    truncated
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::compact::CompactionConfig;
+    use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session};
+
+    #[test]
+    fn stage1_removes_obsolete_file_reads() {
+        let messages = vec![
+            ConversationMessage::assistant(vec![ContentBlock::ToolUse {
+                id: "1".to_string(),
+                name: "read_file".to_string(),
+                input: r#"{"path":"src/main.rs"}"#.to_string(),
+            }]),
+            ConversationMessage::tool_result("1", "read_file", r#"{"path":"src/main.rs","content":"old"}"#, false),
+            ConversationMessage::assistant(vec![ContentBlock::ToolUse {
+                id: "2".to_string(),
+                name: "edit_file".to_string(),
+                input: r#"{"path":"src/main.rs","old":"old","new":"new"}"#.to_string(),
+            }]),
+            ConversationMessage::tool_result("2", "edit_file", r#"{"path":"src/main.rs","ok":true}"#, false),
+        ];
+
+        let (kept, superseded) = stage1_supersede(&messages);
+        assert!(superseded > 0, "should supersede the earlier read");
+        assert!(kept.len() < messages.len());
+    }
+
+    #[test]
+    fn stage1_keeps_standalone_reads() {
+        let messages = vec![
+            ConversationMessage::assistant(vec![ContentBlock::ToolUse {
+                id: "1".to_string(),
+                name: "read_file".to_string(),
+                input: r#"{"path":"src/main.rs"}"#.to_string(),
+            }]),
+            ConversationMessage::tool_result("1", "read_file", r#"{"path":"src/main.rs","content":"data"}"#, false),
+        ];
+
+        let (kept, superseded) = stage1_supersede(&messages);
+        assert_eq!(superseded, 0);
+        assert_eq!(kept.len(), messages.len());
+    }
+
+    #[test]
+    fn stage2_collapses_chatty_messages() {
+        let mut messages = vec![];
+        for i in 0..6 {
+            messages.push(ConversationMessage::user_text(&format!("ok {i}")));
+            messages.push(ConversationMessage::assistant(vec![ContentBlock::Text {
+                text: format!("got {i}"),
+            }]));
+        }
+        messages.push(ConversationMessage::assistant(vec![ContentBlock::ToolUse {
+            id: "t".to_string(),
+            name: "bash".to_string(),
+            input: r#"{"command":"ls"}"#.to_string(),
+        }]));
+
+        let (result, chains, collapsed) = stage2_collapse(&messages, 4);
+        assert!(chains > 0, "should collapse at least one chain");
+        assert!(collapsed > 0);
+        assert!(result.len() < messages.len());
+    }
+
+    #[test]
+    fn stage3_clusters_similar_messages() {
+        let mut messages = vec![];
+        for i in 0..5 {
+            messages.push(ConversationMessage::assistant(vec![ContentBlock::ToolUse {
+                id: format!("read_{i}"),
+                name: "read_file".to_string(),
+                input: format!(r#"{{"path":"src/{i}.rs"}}"#),
+            }]));
+            messages.push(ConversationMessage::tool_result(
+                &format!("read_{i}"),
+                "read_file",
+                &format!(r#"{{"path":"src/{i}.rs","content":"data {i}"}}"#),
+                false,
+            ));
+        }
+
+        let (result, clusters, clustered) =
+            stage3_cluster(&messages, 3, 0.4);
+        assert!(clusters > 0, "should find at least one cluster");
+        assert!(clustered > 0);
+        assert!(result.len() < messages.len());
+    }
+
+    #[test]
+    fn trident_full_pipeline_preserves_important_content() {
+        let mut session = Session::new();
+        session.messages = vec![
+            ConversationMessage::user_text("Read and fix main.rs"),
+            ConversationMessage::assistant(vec![ContentBlock::ToolUse {
+                id: "1".to_string(),
+                name: "read_file".to_string(),
+                input: r#"{"path":"src/main.rs"}"#.to_string(),
+            }]),
+            ConversationMessage::tool_result("1", "read_file", r#"{"path":"src/main.rs","content":"fn main() { buggy }"}"#, false),
+            ConversationMessage::assistant(vec![ContentBlock::ToolUse {
+                id: "2".to_string(),
+                name: "edit_file".to_string(),
+                input: r#"{"path":"src/main.rs","old":"buggy","new":"fixed"}"#.to_string(),
+            }]),
+            ConversationMessage::tool_result("2", "edit_file", r#"{"path":"src/main.rs","ok":true}"#, false),
+            ConversationMessage::assistant(vec![ContentBlock::Text {
+                text: "Fixed the bug in main.rs".to_string(),
+            }]),
+        ];
+
+        let trident_config = TridentConfig::default();
+        let result = trident_compact_session(
+            &session,
+            CompactionConfig {
+                preserve_recent_messages: 4,
+                max_estimated_tokens: 1,
+            },
+            &trident_config,
+        );
+
+        assert!(result.removed_message_count > 0 || result.compacted_session.messages.len() < session.messages.len());
+    }
+
+    #[test]
+    fn trident_stats_report() {
+        let stats = TridentStats {
+            superseded_count: 5,
+            collapsed_chains: 2,
+            messages_collapsed: 8,
+            clusters_found: 1,
+            messages_clustered: 3,
+            tokens_saved_estimate: 1200,
+            original_message_count: 20,
+            final_message_count: 8,
+        };
+        let report = stats.format_report();
+        assert!(report.contains("Stage 1 (Supersede): 5"));
+        assert!(report.contains("Stage 2 (Collapse):  8 -> 2"));
+        assert!(report.contains("Stage 3 (Cluster):   3 -> 1"));
+        assert!(report.contains("1200") || report.contains("1,200"));
+    }
+}
diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs
index cb37c75e42..e3493d927f 100644
--- a/rust/crates/rusty-claude-cli/src/main.rs
+++ b/rust/crates/rusty-claude-cli/src/main.rs
@@ -3361,12 +3361,13 @@ fn run_resume_command(
             json: Some(serde_json::json!({ "kind": "help", "text": render_repl_help() })),
         }),
         SlashCommand::Compact => {
-            let result = runtime::compact_session(
+            let result = runtime::trident::trident_compact_session(
                 session,
                 CompactionConfig {
                     max_estimated_tokens: 0,
                     ..CompactionConfig::default()
                 },
+                &runtime::trident::TridentConfig::default(),
             );
             let removed = result.removed_message_count;
             let kept = result.compacted_session.messages.len();
@@ -4590,13 +4591,15 @@ impl LiveCli {
                     println!("  Auto-compacting session and retrying...");
                     
                     // Step 1: Compact the session to free up context space
-                    // We set max_estimated_tokens to 0 to compact as aggressively as needed
-                    let result = runtime::compact_session(
+                    // Run the Trident compaction pipeline (supersede + collapse + cluster)
+                    // then apply summary-based compaction for maximum context reduction
+                    let result = runtime::trident::trident_compact_session(
                         runtime.session(),
                         CompactionConfig {
                             max_estimated_tokens: 0,
                             ..CompactionConfig::default()
                         },
+                        &runtime::trident::TridentConfig::default(),
                     );
                     let removed = result.removed_message_count;
                     

From 242f34a1a95342f7b2e395ab61f3b72f6172036b Mon Sep 17 00:00:00 2001
From: TheArchitectit <roger@vroger.com>
Date: Mon, 27 Apr 2026 15:06:23 -0500
Subject: [PATCH 03/10] fix: progressive auto-compact retry uses compacted
 session, fix panic when preserve_recent_messages=0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two bugs fixed:
1. compact_session() panicked when preserve_recent_messages=0 because
   raw_keep_from = messages.len() causing index-out-of-bounds. Now
   explicitly returns messages.len() when preserve is 0 (compact all).
2. Progressive auto-compact retry loop compacted the session but never
   saved it back to self.runtime, so each retry round rebuilt from the
   original un-compacted session. Now calls
   *self.runtime.session_mut() = result.compacted_session.clone()
   before prepare_turn_runtime().

Also upgrades single-round retry to progressive 4-round loop with
decreasing preserve schedule [4,2,1,0] and "no parseable body" context
window detection.

💘 Generated with Crush

Assisted-by: GLM 5.1 FP8 via Crush <crush@charm.land>
---
 rust/crates/runtime/src/compact.rs       | 13 ++--
 rust/crates/rusty-claude-cli/src/main.rs | 89 ++++++++++++++++--------
 2 files changed, 70 insertions(+), 32 deletions(-)

diff --git a/rust/crates/runtime/src/compact.rs b/rust/crates/runtime/src/compact.rs
index e4fd3db0d3..8483ad40ac 100644
--- a/rust/crates/runtime/src/compact.rs
+++ b/rust/crates/runtime/src/compact.rs
@@ -108,10 +108,15 @@ pub fn compact_session(session: &Session, config: CompactionConfig) -> Compactio
         .first()
         .and_then(extract_existing_compacted_summary);
     let compacted_prefix_len = usize::from(existing_summary.is_some());
-    let raw_keep_from = session
-        .messages
-        .len()
-        .saturating_sub(config.preserve_recent_messages);
+    // When preserve_recent_messages is 0, the caller wants maximum compaction
+    // (no recent messages preserved). Without this guard, saturating_sub(0)
+    // returns messages.len(), which later indexes past the end of the array
+    // at session.messages[k] because keep_from == messages.len() is out of bounds.
+    let raw_keep_from = if config.preserve_recent_messages == 0 {
+        session.messages.len()
+    } else {
+        session.messages.len().saturating_sub(config.preserve_recent_messages)
+    };
     // Ensure we do not split a tool-use / tool-result pair at the compaction
     // boundary. If the first preserved message is a user message whose first
     // block is a ToolResult, the assistant message with the matching ToolUse
diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs
index e3493d927f..cf75f69350 100644
--- a/rust/crates/rusty-claude-cli/src/main.rs
+++ b/rust/crates/rusty-claude-cli/src/main.rs
@@ -4585,59 +4585,92 @@ impl LiveCli {
                 // ============================================================================
                 
                 let error_str = error.to_string();
-                let is_context_window = error_str.contains("context_window") || error_str.contains("Context window");
+                // Detect context window overflow. Some providers (e.g. OpenAI-compat backends)
+                // return 400 with "no parseable body" instead of a proper context_length_exceeded
+                // error when the request is too large to even parse — treat that as context overflow too.
+                let is_context_window = error_str.contains("context_window")
+                    || error_str.contains("Context window")
+                    || error_str.contains("no parseable body");
                 
                 if is_context_window {
-                    println!("  Auto-compacting session and retrying...");
+                    // A single compaction pass may not free enough context space.
+                    // Progressive retry: each round preserves fewer recent messages (4→2→1→0),
+                    // trading conversation continuity for a smaller payload until it fits.
+                    // Max 4 rounds before giving up and surfacing the error to the user.
+                    let max_compact_rounds = 4;
+                    let preserve_schedule = [4, 2, 1, 0];
                     
-                    // Step 1: Compact the session to free up context space
-                    // Run the Trident compaction pipeline (supersede + collapse + cluster)
-                    // then apply summary-based compaction for maximum context reduction
-                    let result = runtime::trident::trident_compact_session(
-                        runtime.session(),
-                        CompactionConfig {
-                            max_estimated_tokens: 0,
-                            ..CompactionConfig::default()
-                        },
-                        &runtime::trident::TridentConfig::default(),
-                    );
-                    let removed = result.removed_message_count;
-                    
-                    // Only proceed if compaction actually happened (messages were removed)
-                    // or there's still a session to work with
-                    if removed > 0 || result.compacted_session.messages.len() > 0 {
+                    for round in 0..max_compact_rounds {
+                        let preserve = preserve_schedule[round];
+                        println!(
+                            "  Auto-compacting session (round {}/{}, preserving {} recent messages)...",
+                            round + 1,
+                            max_compact_rounds,
+                            preserve
+                        );
+                        
+                        // Run Trident pipeline then summary-based compaction
+                        let result = runtime::trident::trident_compact_session(
+                            runtime.session(),
+                            CompactionConfig {
+                                preserve_recent_messages: preserve,
+                                max_estimated_tokens: 0,
+                            },
+                            &runtime::trident::TridentConfig::default(),
+                        );
+                        let removed = result.removed_message_count;
+                        
+                        if removed == 0 && round > 0 {
+                            // No more messages to compact — further rounds won't help
+                            println!("  No further compaction possible.");
+                            break;
+                        }
+                        
                         if removed > 0 {
-                            // Report compaction results to user
                             println!("{}", format_compact_report(removed, result.compacted_session.messages.len(), false));
                         }
                         
-                        // Step 2: Build a new runtime with the compacted session and retry
+                        // Without this, prepare_turn_runtime() reads from self.runtime.session()
+                        // which still holds the ORIGINAL un-compacted session, so every retry round
+                        // would send the same bloated request — compaction was wasted.
+                        *self.runtime.session_mut() = result.compacted_session.clone();
+                        
+                        // Build a new runtime with the compacted session and retry
                         let (mut new_runtime, hook_abort_monitor) = self.prepare_turn_runtime(true)?;
-                        drop(hook_abort_monitor); // not needed for retry
+                        drop(hook_abort_monitor);
                         
-                        // Step 3: Run the turn again with the smaller session
                         let mut rp = CliPermissionPrompter::new(self.permission_mode);
                         match new_runtime.run_turn(input, Some(&mut rp)) {
                             Ok(summary) => {
-                                // Success! Replace old runtime with the new compacted one
                                 self.replace_runtime(new_runtime)?;
                                 spinner.finish(
-                                    "✨ Done (after auto-compact)",
+                                    if round == 0 { "✨ Done (after auto-compact)" } else { "✨ Done (after aggressive auto-compact)" },
                                     TerminalRenderer::new().color_theme(),
                                     &mut stdout,
                                 )?;
                                 println!();
-                                // If additional auto-compaction happened during retry,
-                                // report that too
                                 if let Some(event) = summary.auto_compaction {
                                     println!("{}", format_auto_compaction_notice(event.removed_message_count));
                                 }
-                                // Save the compacted session to disk
                                 self.persist_session()?;
                                 return Ok(());
                             }
-                            // If retry also fails, propagate the new error
                             Err(retry_error) => {
+                                let retry_str = retry_error.to_string();
+                                let still_context_window = retry_str.contains("context_window")
+                                    || retry_str.contains("Context window")
+                                    || retry_str.contains("no parseable body");
+                                
+                                if still_context_window && round + 1 < max_compact_rounds {
+                                    // The compacted session was still too large for the model's context.
+                                    // Shut down the old runtime, adopt the partially-compacted one,
+                                    // and loop — the next round will compact more aggressively.
+                                    runtime.shutdown_plugins()?;
+                                    runtime = new_runtime;
+                                    continue;
+                                }
+                                
+                                // Not a context window error, or out of rounds
                                 return Err(Box::new(retry_error));
                             }
                         }

From ef115f27a617b457c6413cf8a8508c08e767bfac Mon Sep 17 00:00:00 2001
From: TheArchitectit <roger@vroger.com>
Date: Wed, 29 Apr 2026 20:32:40 -0500
Subject: [PATCH 04/10] fix: make id field optional in OpenAI response parsing

Some OpenAI-compatible providers (e.g., GLM-5) omit the `id` field in
streaming and non-streaming responses. Adding #[serde(default)] allows
the parser to accept these responses instead of failing with
"missing field `id`".

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 rust/crates/api/src/providers/openai_compat.rs | 2 ++
 rust/crates/commands/src/lib.rs                | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs
index b3800d6acf..f0b340d7f9 100644
--- a/rust/crates/api/src/providers/openai_compat.rs
+++ b/rust/crates/api/src/providers/openai_compat.rs
@@ -728,6 +728,7 @@ impl ToolCallState {
 
 #[derive(Debug, Deserialize)]
 struct ChatCompletionResponse {
+    #[serde(default)]
     id: String,
     model: String,
     choices: Vec<ChatChoice>,
@@ -775,6 +776,7 @@ struct OpenAiUsage {
 
 #[derive(Debug, Deserialize)]
 struct ChatCompletionChunk {
+    #[serde(default)]
     id: String,
     #[serde(default)]
     model: Option<String>,
diff --git a/rust/crates/commands/src/lib.rs b/rust/crates/commands/src/lib.rs
index 5e8f5eba8b..5570b2a615 100644
--- a/rust/crates/commands/src/lib.rs
+++ b/rust/crates/commands/src/lib.rs
@@ -1472,10 +1472,15 @@ pub fn validate_slash_command_input(
         }
         "plan" => SlashCommand::Plan { mode: remainder },
         "review" => SlashCommand::Review { scope: remainder },
+        "team" => SlashCommand::Team { action: remainder },
         "tasks" => SlashCommand::Tasks { args: remainder },
         "theme" => SlashCommand::Theme { name: remainder },
         "voice" => SlashCommand::Voice { mode: remainder },
         "usage" => SlashCommand::Usage { scope: remainder },
+<<<<<<< HEAD
+=======
+        "setup" => SlashCommand::Setup,
+>>>>>>> 2f6a225 (fix: make id field optional in OpenAI response parsing)
         "rename" => SlashCommand::Rename { name: remainder },
         "copy" => SlashCommand::Copy { target: remainder },
         "hooks" => SlashCommand::Hooks { args: remainder },

From 6455f78004097a30343d638b6d91b733e75060e7 Mon Sep 17 00:00:00 2001
From: TheArchitectit <roger@vroger.com>
Date: Wed, 29 Apr 2026 20:40:25 -0500
Subject: [PATCH 05/10] chore: add install script for rebuild and link

Adds scripts/install.sh that builds the release binary and links it
to ~/.local/bin/claw. Run after code changes to update the CLI.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 rust/scripts/install.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100755 rust/scripts/install.sh

diff --git a/rust/scripts/install.sh b/rust/scripts/install.sh
new file mode 100755
index 0000000000..344a7b5c62
--- /dev/null
+++ b/rust/scripts/install.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+set -e
+
+# Build the release binary
+cargo build --release
+
+# Link to ~/.local/bin
+mkdir -p "$HOME/.local/bin"
+ln -sf "$(pwd)/target/release/claw" "$HOME/.local/bin/claw"
+
+echo "✓ Claw installed to ~/.local/bin/claw"

From 5763767a09af875b5ccc4cedaed2807dc1401651 Mon Sep 17 00:00:00 2001
From: TheArchitectit <roger@vroger.com>
Date: Wed, 29 Apr 2026 20:42:46 -0500
Subject: [PATCH 06/10] fix: detect HTML responses in streaming path

When a provider returns HTML (e.g., error page, wrong endpoint) instead
of JSON in an SSE stream, provide a clear error message instead of
hanging or failing with a cryptic parse error.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 rust/crates/api/src/providers/openai_compat.rs | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs
index f0b340d7f9..4458c9e65d 100644
--- a/rust/crates/api/src/providers/openai_compat.rs
+++ b/rust/crates/api/src/providers/openai_compat.rs
@@ -1390,6 +1390,20 @@ fn parse_sse_frame(
             });
         }
     }
+    // Detect HTML or other non-JSON responses early for better error messages
+    let trimmed_payload = payload.trim();
+    if trimmed_payload.starts_with('<') || trimmed_payload.starts_with("<!") {
+        return Err(ApiError::Api {
+            status: reqwest::StatusCode::BAD_REQUEST,
+            error_type: Some("invalid_response".to_string()),
+            message: Some("provider returned HTML instead of JSON (check endpoint URL)".to_string()),
+            request_id: None,
+            body: payload.chars().take(200).collect(),
+            retryable: false,
+            suggested_action: Some("verify the API endpoint URL is correct".to_string()),
+            retry_after: None,
+        });
+    }
     serde_json::from_str::<ChatCompletionChunk>(&payload)
         .map(Some)
         .map_err(|error| ApiError::json_deserialize(provider, model, &payload, error))

From b24f0059ef2920cbc76e8a0936a5329169f33d91 Mon Sep 17 00:00:00 2001
From: TheArchitectit <roger@vroger.com>
Date: Wed, 29 Apr 2026 20:46:32 -0500
Subject: [PATCH 07/10] fix: detect raw JSON errors in streaming path

When a provider returns a JSON error (e.g., {"error":{"message":"..."}})
without SSE framing (no "data:" prefix), the SSE parser was silently
ignoring it and hanging. Now detects and surfaces these errors.

Also handles HTML responses that lack SSE framing.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../crates/api/src/providers/openai_compat.rs | 57 ++++++++++++++-----
 1 file changed, 43 insertions(+), 14 deletions(-)

diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs
index 4458c9e65d..2d0247320c 100644
--- a/rust/crates/api/src/providers/openai_compat.rs
+++ b/rust/crates/api/src/providers/openai_compat.rs
@@ -1353,7 +1353,50 @@ fn parse_sse_frame(
             data_lines.push(data.trim_start());
         }
     }
+    // If no SSE data lines found, check if the entire frame is raw JSON (error or otherwise)
     if data_lines.is_empty() {
+        // Detect raw JSON error response (not SSE-framed)
+        if let Ok(raw) = serde_json::from_str::<serde_json::Value>(trimmed) {
+            if let Some(err_obj) = raw.get("error") {
+                let msg = err_obj
+                    .get("message")
+                    .and_then(|m| m.as_str())
+                    .unwrap_or("provider returned an error")
+                    .to_string();
+                let code = err_obj
+                    .get("code")
+                    .and_then(serde_json::Value::as_u64)
+                    .map(|c| c as u16);
+                let status = reqwest::StatusCode::from_u16(code.unwrap_or(500))
+                    .unwrap_or(reqwest::StatusCode::INTERNAL_SERVER_ERROR);
+                return Err(ApiError::Api {
+                    status,
+                    error_type: err_obj
+                        .get("type")
+                        .and_then(|t| t.as_str())
+                        .map(str::to_owned),
+                    message: Some(msg),
+                    request_id: None,
+                    body: trimmed.chars().take(500).collect(),
+                    retryable: false,
+                    suggested_action: suggested_action_for_status(status),
+                    retry_after: None,
+                });
+            }
+        }
+        // Detect HTML responses
+        if trimmed.starts_with('<') || trimmed.starts_with("<!") {
+            return Err(ApiError::Api {
+                status: reqwest::StatusCode::BAD_REQUEST,
+                error_type: Some("invalid_response".to_string()),
+                message: Some("provider returned HTML instead of JSON (check endpoint URL)".to_string()),
+                request_id: None,
+                body: trimmed.chars().take(200).collect(),
+                retryable: false,
+                suggested_action: Some("verify the API endpoint URL is correct".to_string()),
+                retry_after: None,
+            });
+        }
         return Ok(None);
     }
     let payload = data_lines.join("\n");
@@ -1390,20 +1433,6 @@ fn parse_sse_frame(
             });
         }
     }
-    // Detect HTML or other non-JSON responses early for better error messages
-    let trimmed_payload = payload.trim();
-    if trimmed_payload.starts_with('<') || trimmed_payload.starts_with("<!") {
-        return Err(ApiError::Api {
-            status: reqwest::StatusCode::BAD_REQUEST,
-            error_type: Some("invalid_response".to_string()),
-            message: Some("provider returned HTML instead of JSON (check endpoint URL)".to_string()),
-            request_id: None,
-            body: payload.chars().take(200).collect(),
-            retryable: false,
-            suggested_action: Some("verify the API endpoint URL is correct".to_string()),
-            retry_after: None,
-        });
-    }
     serde_json::from_str::<ChatCompletionChunk>(&payload)
         .map(Some)
         .map_err(|error| ApiError::json_deserialize(provider, model, &payload, error))

From 05ebff4f8ac39515d7377e8770f513dc8b7b8a00 Mon Sep 17 00:00:00 2001
From: TheArchitectit <roger@vroger.com>
Date: Wed, 29 Apr 2026 21:04:43 -0500
Subject: [PATCH 08/10] fix: support reasoning_content and thinking fields in
 streaming

Some providers (GLM, DeepSeek) emit reasoning tokens in `reasoning_content`
or nested `thinking.content` fields instead of `content`. Added support
for these fields so reasoning models work correctly.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 rust/crates/api/src/providers/openai_compat.rs | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs
index 2d0247320c..a4d8c7169e 100644
--- a/rust/crates/api/src/providers/openai_compat.rs
+++ b/rust/crates/api/src/providers/openai_compat.rs
@@ -497,10 +497,12 @@ impl StreamState {
         }
 
         for choice in chunk.choices {
+            // Handle reasoning/thinking from various provider fields
             if let Some(reasoning) = choice
                 .delta
                 .reasoning_content
                 .filter(|value| !value.is_empty())
+                .or(choice.delta.thinking.and_then(|t| t.content).filter(|value| !value.is_empty()))
             {
                 if !self.thinking_started {
                     self.thinking_started = true;
@@ -797,12 +799,21 @@ struct ChunkChoice {
 struct ChunkDelta {
     #[serde(default)]
     content: Option<String>,
+    /// Some providers (GLM, DeepSeek) emit reasoning in `reasoning_content`
     #[serde(default)]
     reasoning_content: Option<String>,
+    #[serde(default)]
+    thinking: Option<ThinkingDelta>,
     #[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
     tool_calls: Vec<DeltaToolCall>,
 }
 
+#[derive(Debug, Default, Deserialize)]
+struct ThinkingDelta {
+    #[serde(default)]
+    content: Option<String>,
+}
+
 #[derive(Debug, Deserialize)]
 struct DeltaToolCall {
     #[serde(default)]

From 5408d9b6dc4de270f6ad34b6d4b8bc429e604548 Mon Sep 17 00:00:00 2001
From: TheArchitectit <roger@vroger.com>
Date: Wed, 29 Apr 2026 21:07:47 -0500
Subject: [PATCH 09/10] fix: make delta field optional in ChunkChoice

The final streaming chunk from some providers contains only finish_reason
and usage, with no delta field. Made it optional to prevent parse errors.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 rust/crates/api/src/providers/openai_compat.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs
index a4d8c7169e..9da1e22d1a 100644
--- a/rust/crates/api/src/providers/openai_compat.rs
+++ b/rust/crates/api/src/providers/openai_compat.rs
@@ -790,6 +790,7 @@ struct ChatCompletionChunk {
 
 #[derive(Debug, Deserialize)]
 struct ChunkChoice {
+    #[serde(default)]
     delta: ChunkDelta,
     #[serde(default)]
     finish_reason: Option<String>,

From b57e0df0e5e153866fab130704c862da88f6953b Mon Sep 17 00:00:00 2001
From: TheArchitectit <roger@vroger.com>
Date: Wed, 29 Apr 2026 22:36:37 -0500
Subject: [PATCH 10/10] fix: bounds check in compact boundary loop

When preserve_recent_messages == 0, raw_keep_from equals messages.len(),
causing index out of bounds when accessing session.messages[k].

Added k >= session.messages.len() check to prevent panic.

Reason: Compaction with preserve_recent_messages=0 triggered OOB access
when checking for tool-use/tool-result pair preservation at boundary.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 rust/crates/runtime/src/compact.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rust/crates/runtime/src/compact.rs b/rust/crates/runtime/src/compact.rs
index 8483ad40ac..244598bd40 100644
--- a/rust/crates/runtime/src/compact.rs
+++ b/rust/crates/runtime/src/compact.rs
@@ -133,7 +133,7 @@ pub fn compact_session(session: &Session, config: CompactionConfig) -> Compactio
         // is NOT an assistant message that contains a ToolUse block (i.e. the
         // pair is actually broken at the boundary).
         loop {
-            if k == 0 || k <= compacted_prefix_len {
+            if k == 0 || k <= compacted_prefix_len || k >= session.messages.len() {
                 break;
             }
             let first_preserved = &session.messages[k];