diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index b3800d6acf..9da1e22d1a 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -497,10 +497,12 @@ impl StreamState { } for choice in chunk.choices { + // Handle reasoning/thinking from various provider fields if let Some(reasoning) = choice .delta .reasoning_content .filter(|value| !value.is_empty()) + .or(choice.delta.thinking.and_then(|t| t.content).filter(|value| !value.is_empty())) { if !self.thinking_started { self.thinking_started = true; @@ -728,6 +730,7 @@ impl ToolCallState { #[derive(Debug, Deserialize)] struct ChatCompletionResponse { + #[serde(default)] id: String, model: String, choices: Vec, @@ -775,6 +778,7 @@ struct OpenAiUsage { #[derive(Debug, Deserialize)] struct ChatCompletionChunk { + #[serde(default)] id: String, #[serde(default)] model: Option, @@ -786,6 +790,7 @@ struct ChatCompletionChunk { #[derive(Debug, Deserialize)] struct ChunkChoice { + #[serde(default)] delta: ChunkDelta, #[serde(default)] finish_reason: Option, @@ -795,12 +800,21 @@ struct ChunkChoice { struct ChunkDelta { #[serde(default)] content: Option, + /// Some providers (GLM, DeepSeek) emit reasoning in `reasoning_content` #[serde(default)] reasoning_content: Option, + #[serde(default)] + thinking: Option, #[serde(default, deserialize_with = "deserialize_null_as_empty_vec")] tool_calls: Vec, } +#[derive(Debug, Default, Deserialize)] +struct ThinkingDelta { + #[serde(default)] + content: Option, +} + #[derive(Debug, Deserialize)] struct DeltaToolCall { #[serde(default)] @@ -1351,7 +1365,50 @@ fn parse_sse_frame( data_lines.push(data.trim_start()); } } + // If no SSE data lines found, check if the entire frame is raw JSON (error or otherwise) if data_lines.is_empty() { + // Detect raw JSON error response (not SSE-framed) + if let Ok(raw) = serde_json::from_str::(trimmed) { + if let Some(err_obj) = raw.get("error") { + let msg = err_obj + .get("message") + .and_then(|m| m.as_str()) + .unwrap_or("provider returned an error") + .to_string(); + let code = err_obj + .get("code") + .and_then(serde_json::Value::as_u64) + .map(|c| c as u16); + let status = reqwest::StatusCode::from_u16(code.unwrap_or(500)) + .unwrap_or(reqwest::StatusCode::INTERNAL_SERVER_ERROR); + return Err(ApiError::Api { + status, + error_type: err_obj + .get("type") + .and_then(|t| t.as_str()) + .map(str::to_owned), + message: Some(msg), + request_id: None, + body: trimmed.chars().take(500).collect(), + retryable: false, + suggested_action: suggested_action_for_status(status), + retry_after: None, + }); + } + } + // Detect HTML responses + if trimmed.starts_with('<') || trimmed.starts_with(" SlashCommand::Plan { mode: remainder }, "review" => SlashCommand::Review { scope: remainder }, + "team" => SlashCommand::Team { action: remainder }, "tasks" => SlashCommand::Tasks { args: remainder }, "theme" => SlashCommand::Theme { name: remainder }, "voice" => SlashCommand::Voice { mode: remainder }, "usage" => SlashCommand::Usage { scope: remainder }, +<<<<<<< HEAD +======= + "setup" => SlashCommand::Setup, +>>>>>>> 2f6a225 (fix: make id field optional in OpenAI response parsing) "rename" => SlashCommand::Rename { name: remainder }, "copy" => SlashCommand::Copy { target: remainder }, "hooks" => SlashCommand::Hooks { args: remainder }, diff --git a/rust/crates/runtime/src/compact.rs b/rust/crates/runtime/src/compact.rs index e4fd3db0d3..244598bd40 100644 --- a/rust/crates/runtime/src/compact.rs +++ b/rust/crates/runtime/src/compact.rs @@ -108,10 +108,15 @@ pub fn compact_session(session: &Session, config: CompactionConfig) -> Compactio .first() .and_then(extract_existing_compacted_summary); let compacted_prefix_len = usize::from(existing_summary.is_some()); - let raw_keep_from = session - .messages - .len() - .saturating_sub(config.preserve_recent_messages); + // When preserve_recent_messages is 0, the caller wants maximum compaction + // (no recent messages preserved). Without this guard, saturating_sub(0) + // returns messages.len(), which later indexes past the end of the array + // at session.messages[k] because keep_from == messages.len() is out of bounds. + let raw_keep_from = if config.preserve_recent_messages == 0 { + session.messages.len() + } else { + session.messages.len().saturating_sub(config.preserve_recent_messages) + }; // Ensure we do not split a tool-use / tool-result pair at the compaction // boundary. If the first preserved message is a user message whose first // block is a ToolResult, the assistant message with the matching ToolUse @@ -128,7 +133,7 @@ pub fn compact_session(session: &Session, config: CompactionConfig) -> Compactio // is NOT an assistant message that contains a ToolUse block (i.e. the // pair is actually broken at the boundary). loop { - if k == 0 || k <= compacted_prefix_len { + if k == 0 || k <= compacted_prefix_len || k >= session.messages.len() { break; } let first_preserved = &session.messages[k]; diff --git a/rust/crates/runtime/src/lib.rs b/rust/crates/runtime/src/lib.rs index c1108d3dc7..444f984dd7 100644 --- a/rust/crates/runtime/src/lib.rs +++ b/rust/crates/runtime/src/lib.rs @@ -9,6 +9,7 @@ pub mod bash_validation; mod bootstrap; pub mod branch_lock; mod compact; +pub mod trident; mod config; pub mod config_validate; mod conversation; diff --git a/rust/crates/runtime/src/trident.rs b/rust/crates/runtime/src/trident.rs new file mode 100644 index 0000000000..b455761e72 --- /dev/null +++ b/rust/crates/runtime/src/trident.rs @@ -0,0 +1,791 @@ +use crate::compact::{compact_session, CompactionConfig, CompactionResult}; +use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session}; +use std::collections::{BTreeMap, BTreeSet}; + +/// Configuration for the Trident compaction pipeline. +#[derive(Debug, Clone, PartialEq)] +pub struct TridentConfig { + pub supersede_enabled: bool, + pub collapse_enabled: bool, + pub cluster_enabled: bool, + pub collapse_threshold: usize, + pub cluster_min_size: usize, + pub cluster_similarity_threshold: f64, + pub max_file_operations: usize, +} + +impl Default for TridentConfig { + fn default() -> Self { + Self { + supersede_enabled: true, + collapse_enabled: true, + cluster_enabled: true, + collapse_threshold: 4, + cluster_min_size: 3, + cluster_similarity_threshold: 0.6, + max_file_operations: 100, + } + } +} + +/// Statistics from a Trident compaction run. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TridentStats { + pub superseded_count: usize, + pub collapsed_chains: usize, + pub messages_collapsed: usize, + pub clusters_found: usize, + pub messages_clustered: usize, + pub tokens_saved_estimate: usize, + pub original_message_count: usize, + pub final_message_count: usize, +} + +impl Default for TridentStats { + fn default() -> Self { + Self { + superseded_count: 0, + collapsed_chains: 0, + messages_collapsed: 0, + clusters_found: 0, + messages_clustered: 0, + tokens_saved_estimate: 0, + original_message_count: 0, + final_message_count: 0, + } + } +} + +impl TridentStats { + pub fn format_report(&self) -> String { + let compression = if self.final_message_count > 0 { + self.original_message_count as f64 / self.final_message_count as f64 + } else { + 1.0 + }; + let mut lines = vec![ + "Trident Compaction Complete".to_string(), + format!( + " Stage 1 (Supersede): {} obsolete removed", + self.superseded_count + ), + format!( + " Stage 2 (Collapse): {} -> {} summaries", + self.messages_collapsed, self.collapsed_chains + ), + format!( + " Stage 3 (Cluster): {} -> {} clusters", + self.messages_clustered, self.clusters_found + ), + format!(" Original: {} messages", self.original_message_count), + format!(" Final: {} messages ({:.1}x compression)", self.final_message_count, compression), + ]; + if self.tokens_saved_estimate > 0 { + lines.push(format!( + " Est. tokens saved: ~{}", + self.tokens_saved_estimate + )); + } + lines.join("\n") + } +} + +/// Result of the Trident compaction pipeline. +#[derive(Debug, Clone)] +pub struct TridentResult { + pub compacted_session: Session, + pub stats: TridentStats, +} + +/// Run the full Trident compaction pipeline on a session, then apply +/// the standard summary-based compaction. +pub fn trident_compact_session( + session: &Session, + compaction_config: CompactionConfig, + trident_config: &TridentConfig, +) -> CompactionResult { + let original_count = session.messages.len(); + let original_tokens: usize = session.messages.iter().map(estimate_message_tokens).sum(); + + let mut stats = TridentStats { + original_message_count: original_count, + ..TridentStats::default() + }; + + let mut messages = session.messages.clone(); + + if trident_config.supersede_enabled { + let (kept, superseded_count) = stage1_supersede(&messages); + stats.superseded_count = superseded_count; + messages = kept; + } + + if trident_config.collapse_enabled { + let (collapsed, chains, collapsed_count) = stage2_collapse(&messages, trident_config.collapse_threshold); + stats.collapsed_chains = chains; + stats.messages_collapsed = collapsed_count; + messages = collapsed; + } + + if trident_config.cluster_enabled { + let (clustered, clusters_found, messages_clustered) = stage3_cluster( + &messages, + trident_config.cluster_min_size, + trident_config.cluster_similarity_threshold, + ); + stats.clusters_found = clusters_found; + stats.messages_clustered = messages_clustered; + messages = clustered; + } + + stats.final_message_count = messages.len(); + + let final_tokens: usize = messages.iter().map(estimate_message_tokens).sum(); + stats.tokens_saved_estimate = original_tokens.saturating_sub(final_tokens); + + let mut trident_session = session.clone(); + trident_session.messages = messages; + + let result = compact_session(&trident_session, compaction_config); + + if stats.superseded_count > 0 || stats.collapsed_chains > 0 || stats.clusters_found > 0 { + eprintln!("{}", stats.format_report()); + } + + result +} + +// ============================================================================= +// STAGE 1: SUPERSEDE — Zero-cost factual pruning +// ============================================================================= + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum FileOp { + Read, + Write, + Edit, +} + +#[derive(Debug)] +struct FileOperation { + index: usize, + op_type: FileOp, +} + +fn stage1_supersede(messages: &[ConversationMessage]) -> (Vec, usize) { + let mut file_ops: BTreeMap> = BTreeMap::new(); + + for (i, msg) in messages.iter().enumerate() { + for block in &msg.blocks { + if let Some((path, op_type)) = extract_file_operation(block) { + file_ops.entry(path).or_default().push(FileOperation { + index: i, + op_type, + }); + } + } + } + + let mut obsolete_indices: BTreeSet = BTreeSet::new(); + + for (_path, ops) in &file_ops { + if ops.len() < 2 { + continue; + } + + let last_write_idx = ops + .iter() + .rev() + .find(|op| op.op_type == FileOp::Write || op.op_type == FileOp::Edit) + .map(|op| op.index); + + if let Some(last_write) = last_write_idx { + for op in ops { + if op.op_type == FileOp::Read && op.index < last_write { + obsolete_indices.insert(op.index); + } else if (op.op_type == FileOp::Write || op.op_type == FileOp::Edit) + && op.index < last_write + { + obsolete_indices.insert(op.index); + } + } + } + } + + let superseded_count = obsolete_indices.len(); + let kept: Vec = messages + .iter() + .enumerate() + .filter(|(i, _)| !obsolete_indices.contains(i)) + .map(|(_, msg)| msg.clone()) + .collect(); + + (kept, superseded_count) +} + +fn extract_file_operation(block: &ContentBlock) -> Option<(String, FileOp)> { + match block { + ContentBlock::ToolUse { name, input, .. } => { + let path = extract_path_from_tool_input(name, input)?; + let op_type = match name.as_str() { + "read_file" | "Read" => FileOp::Read, + "write_file" | "Write" => FileOp::Write, + "edit_file" | "Edit" => FileOp::Edit, + _ => return None, + }; + Some((path, op_type)) + } + ContentBlock::ToolResult { tool_name, output, .. } => { + let path = extract_path_from_tool_output(tool_name, output)?; + let op_type = match tool_name.as_str() { + "read_file" | "Read" => FileOp::Read, + "write_file" | "Write" => FileOp::Write, + "edit_file" | "Edit" => FileOp::Edit, + _ => return None, + }; + Some((path, op_type)) + } + ContentBlock::Text { .. } => None, + } +} + +fn extract_path_from_tool_input(tool_name: &str, input: &str) -> Option { + if !matches!(tool_name, "read_file" | "write_file" | "edit_file" | "Read" | "Write" | "Edit") + { + return None; + } + serde_json::from_str::(input) + .ok() + .and_then(|v| v.get("path")?.as_str().map(String::from)) + .or_else(|| { + serde_json::from_str::(input) + .ok() + .and_then(|v| v.get("file_path")?.as_str().map(String::from)) + }) +} + +fn extract_path_from_tool_output(tool_name: &str, output: &str) -> Option { + if !matches!(tool_name, "read_file" | "write_file" | "edit_file" | "Read" | "Write" | "Edit") + { + return None; + } + serde_json::from_str::(output) + .ok() + .and_then(|v| v.get("path")?.as_str().map(String::from)) + .or_else(|| { + output + .lines() + .next() + .and_then(|line| line.strip_prefix("path: ")) + .map(String::from) + }) +} + +// ============================================================================= +// STAGE 2: COLLAPSE — Summarize chatty exchanges +// ============================================================================= + +fn stage2_collapse( + messages: &[ConversationMessage], + threshold: usize, +) -> (Vec, usize, usize) { + if messages.len() < threshold { + return (messages.to_vec(), 0, 0); + } + + let mut result: Vec = Vec::new(); + let mut buffer: Vec = Vec::new(); + let mut total_chains = 0; + let mut total_collapsed = 0; + + for msg in messages { + if is_chatty_message(msg) { + buffer.push(msg.clone()); + } else { + if buffer.len() >= threshold { + let summary = generate_collapse_summary(&buffer); + total_chains += 1; + total_collapsed += buffer.len(); + result.push(ConversationMessage { + role: MessageRole::System, + blocks: vec![ContentBlock::Text { + text: format!("[Collapsed Conversation]\n{summary}"), + }], + usage: None, + }); + } else { + result.extend(buffer.drain(..)); + } + buffer.clear(); + result.push(msg.clone()); + } + } + + if buffer.len() >= threshold { + let summary = generate_collapse_summary(&buffer); + total_chains += 1; + total_collapsed += buffer.len(); + result.push(ConversationMessage { + role: MessageRole::System, + blocks: vec![ContentBlock::Text { + text: format!("[Collapsed Conversation]\n{summary}"), + }], + usage: None, + }); + } else { + result.extend(buffer); + } + + (result, total_chains, total_collapsed) +} + +fn is_chatty_message(msg: &ConversationMessage) -> bool { + let total_chars: usize = msg.blocks.iter().map(|b| match b { + ContentBlock::Text { text } => text.len(), + ContentBlock::ToolUse { input, .. } => input.len(), + ContentBlock::ToolResult { output, .. } => output.len(), + }).sum(); + + let has_tool_use = msg.blocks.iter().any(|b| matches!(b, ContentBlock::ToolUse { .. })); + let has_tool_result = msg.blocks.iter().any(|b| matches!(b, ContentBlock::ToolResult { .. })); + + if has_tool_use || has_tool_result { + return false; + } + + total_chars < 200 +} + +fn generate_collapse_summary(messages: &[ConversationMessage]) -> String { + let user_count = messages + .iter() + .filter(|m| m.role == MessageRole::User) + .count(); + let assistant_count = messages + .iter() + .filter(|m| m.role == MessageRole::Assistant) + .count(); + + let mut topics: Vec = messages + .iter() + .filter_map(|m| { + m.blocks.iter().find_map(|b| match b { + ContentBlock::Text { text } if !text.trim().is_empty() => { + Some(truncate_text(text, 80)) + } + _ => None, + }) + }) + .take(5) + .collect(); + topics.dedup(); + + let mut lines = vec![format!( + "Collapsed {} messages ({} user, {} assistant).", + messages.len(), + user_count, + assistant_count + )]; + + if !topics.is_empty() { + lines.push("Topics:".to_string()); + for topic in &topics { + lines.push(format!(" - {topic}")); + } + } + + lines.join("\n") +} + +// ============================================================================= +// STAGE 3: CLUSTER — Semantic grouping and deep storage +// ============================================================================= + +fn stage3_cluster( + messages: &[ConversationMessage], + min_cluster_size: usize, + similarity_threshold: f64, +) -> (Vec, usize, usize) { + if messages.len() < min_cluster_size { + return (messages.to_vec(), 0, 0); + } + + let fingerprints: Vec = messages + .iter() + .enumerate() + .filter_map(|(i, msg)| fingerprint_message(i, msg)) + .collect(); + + if fingerprints.len() < min_cluster_size { + return (messages.to_vec(), 0, 0); + } + + let mut cluster_assignments: BTreeMap = BTreeMap::new(); + let mut cluster_id = 0; + + for i in 0..fingerprints.len() { + if cluster_assignments.contains_key(&fingerprints[i].index) { + continue; + } + + let mut cluster_members: Vec = vec![fingerprints[i].index]; + + for j in (i + 1)..fingerprints.len() { + if cluster_assignments.contains_key(&fingerprints[j].index) { + continue; + } + + let similarity = compute_similarity(&fingerprints[i], &fingerprints[j]); + if similarity >= similarity_threshold { + cluster_members.push(fingerprints[j].index); + } + } + + if cluster_members.len() >= min_cluster_size { + for member_idx in &cluster_members { + cluster_assignments.insert(*member_idx, cluster_id); + } + cluster_id += 1; + } + } + + if cluster_assignments.is_empty() { + return (messages.to_vec(), 0, 0); + } + + let total_clustered: usize = cluster_assignments.len(); + let clusters_found = cluster_id as usize; + + let mut result: Vec = Vec::new(); + let mut cluster_buffers: BTreeMap> = BTreeMap::new(); + + for (msg_idx, &cid) in &cluster_assignments { + cluster_buffers.entry(cid).or_default().push(*msg_idx); + } + + + + for (i, msg) in messages.iter().enumerate() { + if let Some(&cid) = cluster_assignments.get(&i) { + if let Some(buffer) = cluster_buffers.get_mut(&cid) { + if buffer[0] == i { + let cluster_messages: Vec<&ConversationMessage> = buffer + .iter() + .filter_map(|&idx| messages.get(idx)) + .collect(); + let summary = generate_cluster_summary(&cluster_messages); + result.push(ConversationMessage { + role: MessageRole::System, + blocks: vec![ContentBlock::Text { + text: format!("[Clustered {} messages]\n{summary}", buffer.len()), + }], + usage: None, + }); + } + } + } else { + result.push(msg.clone()); + } + } + + (result, clusters_found, total_clustered) +} + +#[derive(Debug)] +struct MessageFingerprint { + index: usize, + tool_names: BTreeSet, + file_paths: BTreeSet, + role: MessageRole, + text_length: usize, +} + +fn fingerprint_message(index: usize, msg: &ConversationMessage) -> Option { + if msg.role == MessageRole::System { + return None; + } + + let mut tool_names: BTreeSet = BTreeSet::new(); + let mut file_paths: BTreeSet = BTreeSet::new(); + let mut text_length = 0; + + for block in &msg.blocks { + match block { + ContentBlock::ToolUse { name, input, .. } => { + tool_names.insert(name.clone()); + if let Some(path) = extract_path_from_tool_input(name, input) { + file_paths.insert(path); + } + text_length += input.len(); + } + ContentBlock::ToolResult { tool_name, output, .. } => { + tool_names.insert(tool_name.clone()); + if let Some(path) = extract_path_from_tool_output(tool_name, output) { + file_paths.insert(path); + } + text_length += output.len(); + } + ContentBlock::Text { text } => { + text_length += text.len(); + } + } + } + + Some(MessageFingerprint { + index, + tool_names, + file_paths, + role: msg.role, + text_length, + }) +} + +fn compute_similarity(a: &MessageFingerprint, b: &MessageFingerprint) -> f64 { + if a.role != b.role { + return 0.0; + } + + let tool_overlap = if a.tool_names.is_empty() && b.tool_names.is_empty() { + 1.0 + } else if a.tool_names.is_empty() || b.tool_names.is_empty() { + 0.0 + } else { + let intersection: usize = a.tool_names.intersection(&b.tool_names).count(); + let union: usize = a.tool_names.union(&b.tool_names).count(); + intersection as f64 / union as f64 + }; + + let file_overlap = if a.file_paths.is_empty() && b.file_paths.is_empty() { + 1.0 + } else if a.file_paths.is_empty() || b.file_paths.is_empty() { + 0.0 + } else { + let intersection: usize = a.file_paths.intersection(&b.file_paths).count(); + let union: usize = a.file_paths.union(&b.file_paths).count(); + intersection as f64 / union as f64 + }; + + let length_similarity = if a.text_length == 0 && b.text_length == 0 { + 1.0 + } else if a.text_length == 0 || b.text_length == 0 { + 0.0 + } else { + let min_len = a.text_length.min(b.text_length) as f64; + let max_len = a.text_length.max(b.text_length) as f64; + min_len / max_len + }; + + 0.4 * tool_overlap + 0.4 * file_overlap + 0.2 * length_similarity +} + +fn generate_cluster_summary(messages: &[&ConversationMessage]) -> String { + let mut tool_names: BTreeSet = BTreeSet::new(); + let mut file_paths: BTreeSet = BTreeSet::new(); + + for msg in messages { + for block in &msg.blocks { + match block { + ContentBlock::ToolUse { name, input, .. } => { + tool_names.insert(name.clone()); + if let Some(path) = extract_path_from_tool_input(name, input) { + file_paths.insert(path); + } + } + ContentBlock::ToolResult { tool_name, output, .. } => { + tool_names.insert(tool_name.clone()); + if let Some(path) = extract_path_from_tool_output(tool_name, output) { + file_paths.insert(path); + } + } + ContentBlock::Text { .. } => {} + } + } + } + + let mut lines = vec![format!("{} similar messages grouped.", messages.len())]; + + if !tool_names.is_empty() { + lines.push(format!( + "Tools: {}.", + tool_names.iter().cloned().collect::>().join(", ") + )); + } + + if !file_paths.is_empty() { + let paths: Vec = file_paths.iter().take(5).cloned().collect(); + lines.push(format!("Files: {}.", paths.join(", "))); + } + + lines.join("\n") +} + +// ============================================================================= +// Utilities +// ============================================================================= + +fn estimate_message_tokens(message: &ConversationMessage) -> usize { + message + .blocks + .iter() + .map(|block| match block { + ContentBlock::Text { text } => text.len() / 4 + 1, + ContentBlock::ToolUse { name, input, .. } => (name.len() + input.len()) / 4 + 1, + ContentBlock::ToolResult { + tool_name, output, .. + } => (tool_name.len() + output.len()) / 4 + 1, + }) + .sum() +} + +fn truncate_text(text: &str, max_chars: usize) -> String { + if text.chars().count() <= max_chars { + return text.to_string(); + } + let mut truncated: String = text.chars().take(max_chars).collect(); + truncated.push('…'); + truncated +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::compact::CompactionConfig; + use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session}; + + #[test] + fn stage1_removes_obsolete_file_reads() { + let messages = vec![ + ConversationMessage::assistant(vec![ContentBlock::ToolUse { + id: "1".to_string(), + name: "read_file".to_string(), + input: r#"{"path":"src/main.rs"}"#.to_string(), + }]), + ConversationMessage::tool_result("1", "read_file", r#"{"path":"src/main.rs","content":"old"}"#, false), + ConversationMessage::assistant(vec![ContentBlock::ToolUse { + id: "2".to_string(), + name: "edit_file".to_string(), + input: r#"{"path":"src/main.rs","old":"old","new":"new"}"#.to_string(), + }]), + ConversationMessage::tool_result("2", "edit_file", r#"{"path":"src/main.rs","ok":true}"#, false), + ]; + + let (kept, superseded) = stage1_supersede(&messages); + assert!(superseded > 0, "should supersede the earlier read"); + assert!(kept.len() < messages.len()); + } + + #[test] + fn stage1_keeps_standalone_reads() { + let messages = vec![ + ConversationMessage::assistant(vec![ContentBlock::ToolUse { + id: "1".to_string(), + name: "read_file".to_string(), + input: r#"{"path":"src/main.rs"}"#.to_string(), + }]), + ConversationMessage::tool_result("1", "read_file", r#"{"path":"src/main.rs","content":"data"}"#, false), + ]; + + let (kept, superseded) = stage1_supersede(&messages); + assert_eq!(superseded, 0); + assert_eq!(kept.len(), messages.len()); + } + + #[test] + fn stage2_collapses_chatty_messages() { + let mut messages = vec![]; + for i in 0..6 { + messages.push(ConversationMessage::user_text(&format!("ok {i}"))); + messages.push(ConversationMessage::assistant(vec![ContentBlock::Text { + text: format!("got {i}"), + }])); + } + messages.push(ConversationMessage::assistant(vec![ContentBlock::ToolUse { + id: "t".to_string(), + name: "bash".to_string(), + input: r#"{"command":"ls"}"#.to_string(), + }])); + + let (result, chains, collapsed) = stage2_collapse(&messages, 4); + assert!(chains > 0, "should collapse at least one chain"); + assert!(collapsed > 0); + assert!(result.len() < messages.len()); + } + + #[test] + fn stage3_clusters_similar_messages() { + let mut messages = vec![]; + for i in 0..5 { + messages.push(ConversationMessage::assistant(vec![ContentBlock::ToolUse { + id: format!("read_{i}"), + name: "read_file".to_string(), + input: format!(r#"{{"path":"src/{i}.rs"}}"#), + }])); + messages.push(ConversationMessage::tool_result( + &format!("read_{i}"), + "read_file", + &format!(r#"{{"path":"src/{i}.rs","content":"data {i}"}}"#), + false, + )); + } + + let (result, clusters, clustered) = + stage3_cluster(&messages, 3, 0.4); + assert!(clusters > 0, "should find at least one cluster"); + assert!(clustered > 0); + assert!(result.len() < messages.len()); + } + + #[test] + fn trident_full_pipeline_preserves_important_content() { + let mut session = Session::new(); + session.messages = vec![ + ConversationMessage::user_text("Read and fix main.rs"), + ConversationMessage::assistant(vec![ContentBlock::ToolUse { + id: "1".to_string(), + name: "read_file".to_string(), + input: r#"{"path":"src/main.rs"}"#.to_string(), + }]), + ConversationMessage::tool_result("1", "read_file", r#"{"path":"src/main.rs","content":"fn main() { buggy }"}"#, false), + ConversationMessage::assistant(vec![ContentBlock::ToolUse { + id: "2".to_string(), + name: "edit_file".to_string(), + input: r#"{"path":"src/main.rs","old":"buggy","new":"fixed"}"#.to_string(), + }]), + ConversationMessage::tool_result("2", "edit_file", r#"{"path":"src/main.rs","ok":true}"#, false), + ConversationMessage::assistant(vec![ContentBlock::Text { + text: "Fixed the bug in main.rs".to_string(), + }]), + ]; + + let trident_config = TridentConfig::default(); + let result = trident_compact_session( + &session, + CompactionConfig { + preserve_recent_messages: 4, + max_estimated_tokens: 1, + }, + &trident_config, + ); + + assert!(result.removed_message_count > 0 || result.compacted_session.messages.len() < session.messages.len()); + } + + #[test] + fn trident_stats_report() { + let stats = TridentStats { + superseded_count: 5, + collapsed_chains: 2, + messages_collapsed: 8, + clusters_found: 1, + messages_clustered: 3, + tokens_saved_estimate: 1200, + original_message_count: 20, + final_message_count: 8, + }; + let report = stats.format_report(); + assert!(report.contains("Stage 1 (Supersede): 5")); + assert!(report.contains("Stage 2 (Collapse): 8 -> 2")); + assert!(report.contains("Stage 3 (Cluster): 3 -> 1")); + assert!(report.contains("1200") || report.contains("1,200")); + } +} diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index df4d8da452..cf75f69350 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -3361,12 +3361,13 @@ fn run_resume_command( json: Some(serde_json::json!({ "kind": "help", "text": render_repl_help() })), }), SlashCommand::Compact => { - let result = runtime::compact_session( + let result = runtime::trident::trident_compact_session( session, CompactionConfig { max_estimated_tokens: 0, ..CompactionConfig::default() }, + &runtime::trident::TridentConfig::default(), ); let removed = result.removed_message_count; let kept = result.compacted_session.messages.len(); @@ -4563,6 +4564,120 @@ impl LiveCli { TerminalRenderer::new().color_theme(), &mut stdout, )?; + + // ============================================================================ + // Auto-compact retry on context window errors + // ============================================================================ + // When the model API returns a context_window_blocked error (because the request + // exceeds the model's context window), we automatically: + // 1. Compact the session (remove old messages to free up space) + // 2. Retry the original request with the compacted session + // 3. Report results to the user + // + // This eliminates the need for users to manually run /compact when they + // hit context limits - the recovery happens automatically. + // + // Detection: We look for "context_window" or "Context window" in the error + // message, which covers error types like: + // - "context_window_blocked" + // - "Context window blocked" + // - "This model's maximum context length is X tokens..." + // ============================================================================ + + let error_str = error.to_string(); + // Detect context window overflow. Some providers (e.g. OpenAI-compat backends) + // return 400 with "no parseable body" instead of a proper context_length_exceeded + // error when the request is too large to even parse — treat that as context overflow too. + let is_context_window = error_str.contains("context_window") + || error_str.contains("Context window") + || error_str.contains("no parseable body"); + + if is_context_window { + // A single compaction pass may not free enough context space. + // Progressive retry: each round preserves fewer recent messages (4→2→1→0), + // trading conversation continuity for a smaller payload until it fits. + // Max 4 rounds before giving up and surfacing the error to the user. + let max_compact_rounds = 4; + let preserve_schedule = [4, 2, 1, 0]; + + for round in 0..max_compact_rounds { + let preserve = preserve_schedule[round]; + println!( + " Auto-compacting session (round {}/{}, preserving {} recent messages)...", + round + 1, + max_compact_rounds, + preserve + ); + + // Run Trident pipeline then summary-based compaction + let result = runtime::trident::trident_compact_session( + runtime.session(), + CompactionConfig { + preserve_recent_messages: preserve, + max_estimated_tokens: 0, + }, + &runtime::trident::TridentConfig::default(), + ); + let removed = result.removed_message_count; + + if removed == 0 && round > 0 { + // No more messages to compact — further rounds won't help + println!(" No further compaction possible."); + break; + } + + if removed > 0 { + println!("{}", format_compact_report(removed, result.compacted_session.messages.len(), false)); + } + + // Without this, prepare_turn_runtime() reads from self.runtime.session() + // which still holds the ORIGINAL un-compacted session, so every retry round + // would send the same bloated request — compaction was wasted. + *self.runtime.session_mut() = result.compacted_session.clone(); + + // Build a new runtime with the compacted session and retry + let (mut new_runtime, hook_abort_monitor) = self.prepare_turn_runtime(true)?; + drop(hook_abort_monitor); + + let mut rp = CliPermissionPrompter::new(self.permission_mode); + match new_runtime.run_turn(input, Some(&mut rp)) { + Ok(summary) => { + self.replace_runtime(new_runtime)?; + spinner.finish( + if round == 0 { "✨ Done (after auto-compact)" } else { "✨ Done (after aggressive auto-compact)" }, + TerminalRenderer::new().color_theme(), + &mut stdout, + )?; + println!(); + if let Some(event) = summary.auto_compaction { + println!("{}", format_auto_compaction_notice(event.removed_message_count)); + } + self.persist_session()?; + return Ok(()); + } + Err(retry_error) => { + let retry_str = retry_error.to_string(); + let still_context_window = retry_str.contains("context_window") + || retry_str.contains("Context window") + || retry_str.contains("no parseable body"); + + if still_context_window && round + 1 < max_compact_rounds { + // The compacted session was still too large for the model's context. + // Shut down the old runtime, adopt the partially-compacted one, + // and loop — the next round will compact more aggressively. + runtime.shutdown_plugins()?; + runtime = new_runtime; + continue; + } + + // Not a context window error, or out of rounds + return Err(Box::new(retry_error)); + } + } + } + } + + // If not a context window error, return original error Err(Box::new(error)) } } diff --git a/rust/scripts/install.sh b/rust/scripts/install.sh new file mode 100755 index 0000000000..344a7b5c62 --- /dev/null +++ b/rust/scripts/install.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +# Build the release binary +cargo build --release + +# Link to ~/.local/bin +mkdir -p "$HOME/.local/bin" +ln -sf "$(pwd)/target/release/claw" "$HOME/.local/bin/claw" + +echo "✓ Claw installed to ~/.local/bin/claw"