chinkan · chinkan · Apr 10, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -21,7 +21,7 @@ serde_json = "1"
 toml = "0.8"
 
 # MCP client
-rmcp = { version = "0.15", features = ["client", "transport-child-process", "transport-io"] }
+rmcp = { version = "0.15", features = ["client", "transport-child-process", "transport-io", "transport-streamable-http-client-reqwest"] }
 
 # Logging
 tracing = "0.1"
@@ -49,6 +49,9 @@ chrono = { version = "0.4", features = ["serde"] }
 # Background task scheduler
 tokio-cron-scheduler = "0.13"
 
+# Markdown parser for entity-based Telegram message formatting
+pulldown-cmark = "0.12"
+
 # SQLite vector search extension
 sqlite-vec = "0.1"
 

diff --git a/README.md b/README.md
@@ -290,3 +290,6 @@ If you find RustFox useful, consider supporting the project:
 - [reqwest](https://github.com/seanmonstar/reqwest) — HTTP client for OpenRouter
 - [tokio](https://tokio.rs/) — Async runtime
 - [tokio-cron-scheduler](https://github.com/mvniekerk/tokio-cron-scheduler) — Task scheduling
+- [pulldown-cmark](https://github.com/pulldown-cmark/pulldown-cmark) — Markdown parser (entity-based Telegram formatting)
+
+> **Thanks:** Markdown-to-entities conversion approach inspired by [telegramify-markdown](https://github.com/sudoskys/telegramify-markdown) by sudoskys.
diff --git a/config.example.toml b/config.example.toml
@@ -32,6 +32,13 @@ allowed_directory = "/tmp/rustfox-sandbox"
 # Stores conversations, knowledge base, and vector embeddings
 database_path = "rustfox.db"
 
+# Query rewriting for memory search (optional; default: false)
+# When enabled, ambiguous follow-up questions are rewritten into self-contained
+# search queries using an LLM call before the RAG vector search.
+# This improves recall accuracy but adds one extra LLM round-trip per message.
+# Can also be toggled per-user at runtime via the /query-rewrite Telegram command.
+# query_rewriter_enabled = false
+
 [skills]
 # Directory containing skill markdown files
 # Skills are natural-language instructions loaded at startup
@@ -136,3 +143,21 @@ directory = "skills"
 # args = ["-y", "@brave/brave-search-mcp-server"]
 # [mcp_servers.env]
 # BRAVE_API_KEY = "your-brave-api-key"
+
+# ── HTTP-based MCP Servers (Streamable HTTP transport) ────────────────────────
+# These servers are reached over HTTPS and do not require a local command.
+# Use `url` instead of `command`; optionally set `auth_token` for Bearer auth.
+
+# Example: Exa AI web search (https://mcp.exa.ai)
+# Get your API key at https://dashboard.exa.ai/api-keys
+#
+# Option A — Bearer token (recommended; keeps the key out of URLs and logs)
+# [[mcp_servers]]
+# name = "exa"
+# url = "https://mcp.exa.ai/mcp"
+# auth_token = "your-exa-api-key"
+#
+# Option B — Inline API key in URL (simpler; key will appear in logs)
+# [[mcp_servers]]
+# name = "exa"
+# url = "https://mcp.exa.ai/mcp?exaApiKey=your-exa-api-key"
diff --git a/src/agent.rs b/src/agent.rs
@@ -180,9 +180,29 @@ impl Agent {
             let rewrite_start = filtered_msgs.len().saturating_sub(6);
             let recent_for_rewrite = filtered_msgs[rewrite_start..].to_vec();
 
+            // Determine if query rewriting is enabled: per-user setting overrides config default.
+            let per_user_setting = self
+                .memory
+                .recall(
+                    "settings",
+                    &format!("query_rewrite_enabled_{}", incoming.user_id),
+                )
+                .await
+                .unwrap_or(None);
+            let rewrite_enabled = match per_user_setting.as_deref() {
+                Some("true") => true,
+                Some("false") => false,
+                _ => self.config.memory.query_rewriter_enabled,
+            };
+            let llm_for_rewrite = if rewrite_enabled {
+                Some(&self.llm)
+            } else {
+                None
+            };
+
             if let Ok(Some(rag_block)) = crate::memory::rag::auto_retrieve_context(
                 &self.memory,
-                Some(&self.llm),
+                llm_for_rewrite,
                 &incoming.text,
                 &recent_for_rewrite,
                 &conversation_id,
@@ -242,6 +262,10 @@ impl Agent {
         let max_iterations = self.config.max_iterations();
         let mut iteration_count = 0u32;
 
+        // Clone the stream sender so tool status can be pushed into the same Telegram
+        // message during tool execution, before the final response starts streaming.
+        let stream_status_tx = stream_token_tx.clone();
+
         for iteration in 0..max_iterations {
             debug!(
                 "Trying iteration {}: messages length: {}",
@@ -344,6 +368,20 @@ impl Agent {
                                 });
                         }
 
+                        // Stream tool status into the Telegram message only when
+                        // tool-progress notifications are enabled, to avoid
+                        // prepending status lines to otherwise silent/final output.
+                        if tool_event_tx.is_some() {
+                            if let Some(ref tx) = stream_status_tx {
+                                let status =
+                                    crate::platform::tool_notifier::format_tool_status_line(
+                                        &tool_call.function.name,
+                                        &args_preview,
+                                    );
+                                tx.try_send(status).ok();
+                            }
+                        }
+
-                        // Stream tool status into the Telegram message only when
-                        // tool-progress notifications are enabled, to avoid
-                        // prepending status lines to otherwise silent/final output.
-                        if tool_event_tx.is_some() {
-                            if let Some(ref tx) = stream_status_tx {
-                                let status =
-                                    crate::platform::tool_notifier::format_tool_status_line(
-                                        &tool_call.function.name,
-                                        &args_preview,
-                                    );
-                                tx.try_send(status).ok();
-                            }
-                        }
-                        // Stream tool status into the Telegram message only when
-                        // tool-progress notifications are enabled, to avoid
-                        // prepending status lines to otherwise silent/final output.
-                        if tool_event_tx.is_some() {
-                            if let Some(ref tx) = stream_status_tx {
-                                let status =
-                                    crate::platform::tool_notifier::format_tool_status_line(
-                                        &tool_call.function.name,
-                                        &args_preview,
-                                    );
-                                tx.try_send(status).ok();
-                            }
-                        }
                         let tool_result = self
                             .execute_tool(&tool_call.function.name, &arguments, user_id, chat_id)
                             .await;
@@ -401,35 +439,42 @@ impl Agent {
                 );
             }
 
-            // Stream the final response token-by-token if a channel is provided
-            if let Some(ref tx) = stream_token_tx {
-                let words: Vec<&str> = content.split_inclusive(' ').collect();
-                let chunk_size = 4usize;
-                for chunk in words.chunks(chunk_size) {
-                    let piece = chunk.join("");
-                    if tx.send(piece).await.is_err() {
-                        break;
-                    }
-                    tokio::time::sleep(tokio::time::Duration::from_millis(30)).await;
-                }
-            }
+            // Stream the final response directly from the already-complete content.
+            // Previously this made a second chat_stream() API call, which could return
+            // Ok(partial) if the SSE connection was dropped mid-generation (e.g. after an
+            // 11-minute kimi-k2.5 response), silently saving a truncated reply.
+            // Now we pipe the guaranteed-complete content through the channel in small
+            // chunks so Telegram still sees tokens arrive progressively.
+            let final_content = if let Some(tx) = stream_token_tx {
+                LlmClient::stream_text(content.clone(), tx).await.ok();
+                content.clone()
+            } else {
+                content.clone()
+            };
 
+            // Save the delivered content to persistent memory
+            let save_msg = crate::llm::ChatMessage {
+                role: response.role.clone(),
+                content: Some(final_content.clone()),
+                tool_calls: response.tool_calls.clone(),
+                tool_call_id: response.tool_call_id.clone(),
+            };
             self.memory
-                .save_message(&conversation_id, &response)
+                .save_message(&conversation_id, &save_msg)
                 .await?;
 
             // --- LangSmith: end chain run (success) ---
             self.langsmith.end_run(crate::langsmith::EndRunParams {
                 id: chain_run_id,
                 outputs: Some(serde_json::json!({
-                    "response": content,
+                    "response": final_content,
                     "iterations": iteration,
                 })),
                 error: None,
                 end_time: Self::now_iso8601_static(),
             });
 
-            return Ok(content);
+            return Ok(final_content);
         }
 
         // Reached max iterations

diff --git a/src/bin/setup.rs b/src/bin/setup.rs
@@ -124,6 +124,14 @@ struct RawMcpServer {
     args: Vec<String>,
     #[serde(default)]
     env: HashMap<String, String>,
+    // `url` and `auth_token` are parsed but not used by the setup wizard;
+    // they are accepted so configs with HTTP MCP servers load without error.
+    #[serde(default)]
+    #[allow(dead_code)]
+    url: Option<String>,
+    #[serde(default)]
+    #[allow(dead_code)]
+    auth_token: Option<String>,
 }
 
 // ── Handlers ───────────────────────────────────────────────────────────────────