From 8fbb965635354a9be95cb168d8226c91629efffc Mon Sep 17 00:00:00 2001
From: H-Chris233 <h-chris233@outlook.com>
Date: Sun, 10 May 2026 22:50:20 +0800
Subject: [PATCH] Let users control LLM thinking at the provider channel

Issue #402 needs a Settings toggle that applies to every LLM runtime path without changing prompt behavior. The implementation keeps the preference in user settings, threads it through polish, translate, QA, and validation, and sends only provider-channel request fields documented by each backend instead of prompt instructions or model-name heuristics.

Constraint: Thinking controls are provider-specific request fields, not a portable prompt convention

Rejected: Prompt-level thinking instructions | changes model-facing prompts and cannot guarantee provider thinking mode

Rejected: Per-model allow/deny lists | user requested channel-level behavior and provider-side handling for unsupported models

Confidence: medium

Scope-risk: moderate

Directive: Do not add prompt injection or model-name thinking adapters for #402 without a new explicit product decision

Tested: cargo test --manifest-path openless-all/app/src-tauri/Cargo.toml --lib openai_chat_body -- --nocapture

Tested: cd openless-all/app && cargo test --manifest-path src-tauri/Cargo.toml --lib disabled_thinking_config -- --nocapture

Tested: cd openless-all/app && cargo test --manifest-path src-tauri/Cargo.toml --lib build_generate_body -- --nocapture

Tested: cd openless-all/app && cargo check --manifest-path src-tauri/Cargo.toml

Tested: cd openless-all/app && npm run build

Tested: git diff --check

Related: #402
---
 openless-all/app/src-tauri/Cargo.lock         |   2 +-
 openless-all/app/src-tauri/src/commands.rs    |  30 ++-
 openless-all/app/src-tauri/src/coordinator.rs |  57 +++--
 .../src-tauri/src/coordinator/dictation.rs    |   3 +
 openless-all/app/src-tauri/src/llm_gemini.rs  | 165 ++++---------
 openless-all/app/src-tauri/src/polish.rs      | 228 ++++++++++++++++--
 openless-all/app/src-tauri/src/types.rs       |  10 +
 openless-all/app/src/i18n/en.ts               |   4 +
 openless-all/app/src/i18n/ja.ts               |   4 +
 openless-all/app/src/i18n/ko.ts               |   4 +
 openless-all/app/src/i18n/zh-CN.ts            |   4 +
 openless-all/app/src/i18n/zh-TW.ts            |   4 +
 openless-all/app/src/lib/ipc.ts               |   1 +
 openless-all/app/src/lib/stylePrefs.test.ts   |   1 +
 openless-all/app/src/lib/types.ts             |   2 +
 openless-all/app/src/pages/Settings.tsx       |  50 +++-
 16 files changed, 401 insertions(+), 168 deletions(-)

diff --git a/openless-all/app/src-tauri/Cargo.lock b/openless-all/app/src-tauri/Cargo.lock
index ef129ebc..2de25484 100644
--- a/openless-all/app/src-tauri/Cargo.lock
+++ b/openless-all/app/src-tauri/Cargo.lock
@@ -3751,7 +3751,7 @@ dependencies = [
 
 [[package]]
 name = "openless"
-version = "1.2.24-7"
+version = "1.2.24-8"
 dependencies = [
  "anyhow",
  "arboard",
diff --git a/openless-all/app/src-tauri/src/commands.rs b/openless-all/app/src-tauri/src/commands.rs
index bb8a1cea..fc90a947 100644
--- a/openless-all/app/src-tauri/src/commands.rs
+++ b/openless-all/app/src-tauri/src/commands.rs
@@ -14,7 +14,9 @@ use crate::asr::local::foundry::{
 use crate::asr::local::FoundryLocalRuntime;
 use crate::coordinator::Coordinator;
 use crate::permissions::{self, PermissionStatus};
-use crate::persistence::{CredentialAccount, CredentialsSnapshot, CredentialsVault};
+use crate::persistence::{
+    CredentialAccount, CredentialsSnapshot, CredentialsVault, PreferencesStore,
+};
 use crate::polish::{
     http_client_builder, CodexOAuthConfig, CodexOAuthCredentials, CodexOAuthLLMProvider, LLMError,
     OpenAICompatibleConfig, OpenAICompatibleLLMProvider, CODEX_DEFAULT_MODEL,
@@ -611,12 +613,18 @@ fn read_openai_provider_config(kind: &str) -> Result<ProviderConfig, String> {
 }
 
 async fn validate_llm_provider() -> Result<(), String> {
+    let llm_thinking_enabled = PreferencesStore::new()
+        .map_err(|e| e.to_string())?
+        .get()
+        .llm_thinking_enabled;
     if CredentialsVault::get_active_llm() == CODEX_OAUTH_PROVIDER_ID {
         let model = CredentialsVault::get(CredentialAccount::ArkModelId)
             .map_err(|e| e.to_string())?
             .filter(|s| !s.trim().is_empty())
             .unwrap_or_else(|| CODEX_DEFAULT_MODEL.to_string());
-        let provider = CodexOAuthLLMProvider::new(CodexOAuthConfig::new(model));
+        let provider = CodexOAuthLLMProvider::new(
+            CodexOAuthConfig::new(model).with_thinking_enabled(llm_thinking_enabled),
+        );
         return provider
             .polish(
                 "验证连接",
@@ -639,17 +647,21 @@ async fn validate_llm_provider() -> Result<(), String> {
     }
 
     let config = read_openai_provider_config("llm")?;
+    let active_llm = CredentialsVault::get_active_llm();
     let model = CredentialsVault::get(CredentialAccount::ArkModelId)
         .map_err(|e| e.to_string())?
         .filter(|s| !s.is_empty())
         .ok_or_else(|| "llmModelMissing".to_string())?;
-    let provider = OpenAICompatibleLLMProvider::new(OpenAICompatibleConfig::new(
-        "ark",
-        "Doubao Ark",
-        config.base_url,
-        config.api_key,
-        model,
-    ));
+    let provider = OpenAICompatibleLLMProvider::new(
+        OpenAICompatibleConfig::new(
+            active_llm.clone(),
+            active_llm,
+            config.base_url,
+            config.api_key,
+            model,
+        )
+        .with_thinking_enabled(llm_thinking_enabled),
+    );
     provider
         .polish(
             "验证连接",
diff --git a/openless-all/app/src-tauri/src/coordinator.rs b/openless-all/app/src-tauri/src/coordinator.rs
index c3fe72f8..c2322174 100644
--- a/openless-all/app/src-tauri/src/coordinator.rs
+++ b/openless-all/app/src-tauri/src/coordinator.rs
@@ -773,6 +773,7 @@ impl Coordinator {
         let working_languages = prefs.working_languages;
         let chinese_script_preference = prefs.chinese_script_preference;
         let output_language_preference = prefs.output_language_preference;
+        let llm_thinking_enabled = prefs.llm_thinking_enabled;
         // repolish 是历史记录里手动重新润色，不再绑定原 session 的前台 app；
         // 当下用户调起的 app 才是相关上下文（如果可拿）。
         let front_app = capture_frontmost_app();
@@ -785,6 +786,7 @@ impl Coordinator {
             &working_languages,
             chinese_script_preference,
             output_language_preference,
+            llm_thinking_enabled,
             front_app.as_deref(),
             &[],
         )
@@ -2013,6 +2015,7 @@ async fn polish_or_passthrough(
     working_languages: &[String],
     chinese_script_preference: ChineseScriptPreference,
     output_language_preference: OutputLanguagePreference,
+    llm_thinking_enabled: bool,
     front_app: Option<&str>,
     prior_turns: &[(String, String)],
 ) -> (String, Option<String>) {
@@ -2026,6 +2029,7 @@ async fn polish_or_passthrough(
         working_languages,
         chinese_script_preference,
         output_language_preference,
+        llm_thinking_enabled,
         front_app,
         prior_turns,
     )
@@ -2047,16 +2051,19 @@ async fn polish_text(
     working_languages: &[String],
     chinese_script_preference: ChineseScriptPreference,
     output_language_preference: OutputLanguagePreference,
+    llm_thinking_enabled: bool,
     front_app: Option<&str>,
     prior_turns: &[(String, String)],
 ) -> anyhow::Result<String> {
     // 谷歌 Gemini 分支：所有 LLM provider 共用 ark.* 凭据槽，唯独 Gemini 走原生
-    // generateContent / 自带 thinking-off 控制，不走 OpenAI 兼容协议；按 active
-    // provider id 把请求路由到 GeminiProvider 即可，其余 provider（ark/deepseek/
-    // openai/...）保持原 OpenAICompatibleLLMProvider 路径不动。
-    if CredentialsVault::get_active_llm() == "gemini" {
+    // generateContent / 自带 thinkingConfig 控制；其余 provider 走 OpenAI
+    // 兼容协议，并在该路径里按 provider/channel 下发对应的思考开关。
+    let active_llm = CredentialsVault::get_active_llm();
+    if active_llm == "gemini" {
         let (api_key, model, base_url) = read_gemini_credentials()?;
-        let provider = GeminiProvider::new(GeminiConfig::new(api_key, model, base_url));
+        let provider = GeminiProvider::new(
+            GeminiConfig::new(api_key, model, base_url).with_thinking_enabled(llm_thinking_enabled),
+        );
         return Ok(provider
             .polish(
                 raw,
@@ -2071,7 +2078,7 @@ async fn polish_text(
             .await?);
     }
 
-    let provider = build_active_llm_provider()?;
+    let provider = build_active_llm_provider(llm_thinking_enabled)?;
     Ok(provider
         .polish(
             raw,
@@ -2093,6 +2100,7 @@ async fn translate_or_passthrough(
     working_languages: &[String],
     chinese_script_preference: ChineseScriptPreference,
     output_language_preference: OutputLanguagePreference,
+    llm_thinking_enabled: bool,
     front_app: Option<&str>,
 ) -> (String, Option<String>) {
     match translate_text(
@@ -2101,6 +2109,7 @@ async fn translate_or_passthrough(
         working_languages,
         chinese_script_preference,
         output_language_preference,
+        llm_thinking_enabled,
         front_app,
     )
     .await
@@ -2120,12 +2129,16 @@ async fn translate_text(
     working_languages: &[String],
     chinese_script_preference: ChineseScriptPreference,
     output_language_preference: OutputLanguagePreference,
+    llm_thinking_enabled: bool,
     front_app: Option<&str>,
 ) -> anyhow::Result<String> {
-    // 见 polish_text 顶部注释——同样的 Gemini 路由逻辑。
-    if CredentialsVault::get_active_llm() == "gemini" {
+    // 见 polish_text 顶部注释——同样的 Gemini / OpenAI-compatible 路由逻辑。
+    let active_llm = CredentialsVault::get_active_llm();
+    if active_llm == "gemini" {
         let (api_key, model, base_url) = read_gemini_credentials()?;
-        let provider = GeminiProvider::new(GeminiConfig::new(api_key, model, base_url));
+        let provider = GeminiProvider::new(
+            GeminiConfig::new(api_key, model, base_url).with_thinking_enabled(llm_thinking_enabled),
+        );
         return Ok(provider
             .translate_to(
                 raw,
@@ -2138,7 +2151,7 @@ async fn translate_text(
             .await?);
     }
 
-    let provider = build_active_llm_provider()?;
+    let provider = build_active_llm_provider(llm_thinking_enabled)?;
     Ok(provider
         .translate_to(
             raw,
@@ -2500,6 +2513,7 @@ async fn end_qa_session(inner: &Arc<Inner>) -> Result<(), String> {
     let working_languages = prefs.working_languages.clone();
     let chinese_script_preference = prefs.chinese_script_preference;
     let output_language_preference = prefs.output_language_preference;
+    let llm_thinking_enabled = prefs.llm_thinking_enabled;
     let (messages_for_llm, front_app) = {
         let st = inner.qa_state.lock();
         (st.messages.clone(), st.front_app.clone())
@@ -2540,6 +2554,7 @@ async fn end_qa_session(inner: &Arc<Inner>) -> Result<(), String> {
         &working_languages,
         chinese_script_preference,
         output_language_preference,
+        llm_thinking_enabled,
         front_app.as_deref(),
         on_delta,
         should_cancel,
@@ -2691,6 +2706,7 @@ async fn answer_chat_dispatch<F, C>(
     working_languages: &[String],
     chinese_script_preference: ChineseScriptPreference,
     output_language_preference: OutputLanguagePreference,
+    llm_thinking_enabled: bool,
     front_app: Option<&str>,
     on_delta: F,
     should_cancel: C,
@@ -2699,11 +2715,14 @@ where
     F: Fn(&str) + Send + Sync,
     C: Fn() -> bool + Send + Sync,
 {
-    // 见 polish_text 顶部注释——同样的 Gemini 路由逻辑，QA 流式回答走 Gemini
-    // 原生 :streamGenerateContent?alt=sse。
-    if CredentialsVault::get_active_llm() == "gemini" {
+    // 见 polish_text 顶部注释——同样的 Gemini / OpenAI-compatible 路由逻辑，
+    // QA 流式回答走 Gemini 原生 :streamGenerateContent?alt=sse。
+    let active_llm = CredentialsVault::get_active_llm();
+    if active_llm == "gemini" {
         let (api_key, model, base_url) = read_gemini_credentials()?;
-        let provider = GeminiProvider::new(GeminiConfig::new(api_key, model, base_url));
+        let provider = GeminiProvider::new(
+            GeminiConfig::new(api_key, model, base_url).with_thinking_enabled(llm_thinking_enabled),
+        );
         return Ok(provider
             .answer_chat_streaming(
                 messages,
@@ -2717,7 +2736,7 @@ where
             .await?);
     }
 
-    let provider = build_active_llm_provider()?;
+    let provider = build_active_llm_provider(llm_thinking_enabled)?;
     Ok(provider
         .answer_chat_streaming(
             messages,
@@ -2755,13 +2774,14 @@ fn read_gemini_credentials() -> anyhow::Result<(String, String, String)> {
     Ok((api_key, model, base_url))
 }
 
-fn build_active_llm_provider() -> anyhow::Result<ActiveLLMProvider> {
+fn build_active_llm_provider(llm_thinking_enabled: bool) -> anyhow::Result<ActiveLLMProvider> {
     let active = CredentialsVault::get_active_llm();
     let model =
         CredentialsVault::get(CredentialAccount::ArkModelId)?.filter(|s| !s.trim().is_empty());
     if active == CODEX_OAUTH_PROVIDER_ID {
         let config =
-            CodexOAuthConfig::new(model.unwrap_or_else(|| CODEX_DEFAULT_MODEL.to_string()));
+            CodexOAuthConfig::new(model.unwrap_or_else(|| CODEX_DEFAULT_MODEL.to_string()))
+                .with_thinking_enabled(llm_thinking_enabled);
         return Ok(ActiveLLMProvider::Codex(CodexOAuthLLMProvider::new(config)));
     }
 
@@ -2772,7 +2792,8 @@ fn build_active_llm_provider() -> anyhow::Result<ActiveLLMProvider> {
         .trim_end_matches("/chat/completions")
         .trim_end_matches('/')
         .to_string();
-    let config = OpenAICompatibleConfig::new(active, "OpenLess LLM", base_url, api_key, model);
+    let config = OpenAICompatibleConfig::new(active, "OpenLess LLM", base_url, api_key, model)
+        .with_thinking_enabled(llm_thinking_enabled);
     Ok(ActiveLLMProvider::OpenAI(OpenAICompatibleLLMProvider::new(
         config,
     )))
diff --git a/openless-all/app/src-tauri/src/coordinator/dictation.rs b/openless-all/app/src-tauri/src/coordinator/dictation.rs
index b78b6c64..0a92b475 100644
--- a/openless-all/app/src-tauri/src/coordinator/dictation.rs
+++ b/openless-all/app/src-tauri/src/coordinator/dictation.rs
@@ -934,6 +934,7 @@ pub(super) async fn end_session(inner: &Arc<Inner>) -> Result<(), String> {
     let working_languages = prefs.working_languages.clone();
     let chinese_script_preference = prefs.chinese_script_preference;
     let output_language_preference = prefs.output_language_preference;
+    let llm_thinking_enabled = prefs.llm_thinking_enabled;
     let front_app = inner.state.lock().front_app.clone();
     let translation_target = prefs.translation_target_language.trim().to_string();
     let translation_active =
@@ -978,6 +979,7 @@ pub(super) async fn end_session(inner: &Arc<Inner>) -> Result<(), String> {
             &working_languages,
             chinese_script_preference,
             output_language_preference,
+            llm_thinking_enabled,
             front_app.as_deref(),
         )
         .await
@@ -989,6 +991,7 @@ pub(super) async fn end_session(inner: &Arc<Inner>) -> Result<(), String> {
             &working_languages,
             chinese_script_preference,
             output_language_preference,
+            llm_thinking_enabled,
             front_app.as_deref(),
             &prior_turns,
         )
diff --git a/openless-all/app/src-tauri/src/llm_gemini.rs b/openless-all/app/src-tauri/src/llm_gemini.rs
index bb17a310..88f25603 100644
--- a/openless-all/app/src-tauri/src/llm_gemini.rs
+++ b/openless-all/app/src-tauri/src/llm_gemini.rs
@@ -1,11 +1,8 @@
 //! 谷歌 Gemini 原生 generateContent / streamGenerateContent 客户端。
 //!
 //! 为什么不复用 `polish.rs::OpenAICompatibleLLMProvider`：
-//! 1. **思考模式控制**——Gemini 系列必须按模型 family 注入对的字段
-//!    （`thinkingBudget` 给 2.5 / `thinkingLevel` 给 3.x），且两个字段不能同发。
-//!    OpenAI 兼容协议没有原生 thinking control 字段；谷歌的 OpenAI 兼容 shim
-//!    虽然支持 `reasoning_effort`，但仍是 beta、3.x 上要走 `extra_body` 绕路，
-//!    且官方文档对"是否真的关掉思考"措辞模糊。原生 endpoint 的契约最直接。
+//! 1. **思考模式控制**——Gemini 原生 `thinkingConfig` 比 OpenAI 兼容 shim
+//!    的 provider 私有字段更直接；OpenLess 只做渠道级开关，不维护单模型适配表。
 //! 2. **认证机制**——原生用 `x-goog-api-key` header（Bearer 不被识别），
 //!    OpenAICompatibleLLMProvider 写死了 Bearer Authorization。
 //! 3. **请求/响应 shape**——原生 `contents` 走 `role: user|model`，没有
@@ -39,6 +36,9 @@ pub struct GeminiConfig {
     pub base_url: String,
     pub temperature: f32,
     pub request_timeout_secs: u64,
+    /// true = 不下发关闭思考的 thinkingConfig，让模型按自身默认思考；
+    /// false = 下发 Gemini 原生渠道级最低思考配置。
+    pub thinking_enabled: bool,
 }
 
 impl GeminiConfig {
@@ -53,8 +53,14 @@ impl GeminiConfig {
             base_url: base_url.into(),
             temperature: DEFAULT_TEMPERATURE,
             request_timeout_secs: DEFAULT_REQUEST_TIMEOUT_SECS,
+            thinking_enabled: false,
         }
     }
+
+    pub fn with_thinking_enabled(mut self, enabled: bool) -> Self {
+        self.thinking_enabled = enabled;
+        self
+    }
 }
 
 pub struct GeminiProvider {
@@ -132,7 +138,8 @@ impl GeminiProvider {
 
         log::info!(
             "[llm] POST {} provider=gemini model={} translate=true",
-            url, self.config.model
+            url,
+            self.config.model
         );
 
         let body_text = self.send_unary(&url, &body).await?;
@@ -179,11 +186,11 @@ impl GeminiProvider {
             .await
     }
 
-    /// `generationConfig` 注入：温度 + 按模型 family 的 thinkingConfig。
+    /// `generationConfig` 注入：温度 + 渠道级 thinkingConfig。
     fn build_generate_body(&self, system_prompt: &str, contents: Vec<Value>) -> Value {
         let mut generation_config = json!({ "temperature": self.config.temperature });
-        if let Some(thinking) = thinking_config_for(&self.config.model) {
-            generation_config["thinkingConfig"] = thinking;
+        if !self.config.thinking_enabled {
+            generation_config["thinkingConfig"] = disabled_thinking_config();
         }
         json!({
             "systemInstruction": system_instruction(system_prompt),
@@ -403,9 +410,7 @@ fn drain_complete_sse_events(buffer: &mut Vec<u8>) -> Vec<String> {
             Ok(s) => s.to_string(),
             Err(e) => {
                 // 完整 event 自身 UTF-8 不合法（极少见，可能是上游异常）：丢弃此 event 不让流挂掉。
-                log::warn!(
-                    "[llm] gemini SSE event has invalid UTF-8 (skipping): {e}"
-                );
+                log::warn!("[llm] gemini SSE event has invalid UTF-8 (skipping): {e}");
                 buffer.drain(..end + delim_len);
                 continue;
             }
@@ -419,7 +424,10 @@ fn drain_complete_sse_events(buffer: &mut Vec<u8>) -> Vec<String> {
 /// 多轮 polish 的 contents 序列。
 /// 输入约定：`prior_turns` 与 polish.rs 一致（最新在前 newest-first），
 /// chat 时间序为 oldest-first，所以这里 `iter().rev()` 反转。
-fn build_polish_history_contents(prior_turns: &[(String, String)], user_prompt: &str) -> Vec<Value> {
+fn build_polish_history_contents(
+    prior_turns: &[(String, String)],
+    user_prompt: &str,
+) -> Vec<Value> {
     let mut contents: Vec<Value> = Vec::with_capacity(prior_turns.len() * 2 + 1);
     for (raw, polished) in prior_turns.iter().rev() {
         contents.push(user_content(&crate::polish::prompts::user_prompt(raw)));
@@ -436,38 +444,23 @@ fn qa_messages_to_contents(messages: &[QaChatMessage]) -> Vec<Value> {
     messages
         .iter()
         .map(|m| {
-            let role = if m.role == "assistant" { "model" } else { "user" };
+            let role = if m.role == "assistant" {
+                "model"
+            } else {
+                "user"
+            };
             json!({ "role": role, "parts": [{ "text": m.content }] })
         })
         .collect()
 }
 
-/// 按模型 ID 选 thinkingConfig。返回 None 表示该模型官方明示无法关思考
-/// （目前仅 gemini-2.5-pro），交由模型默认行为。
+/// Gemini 原生通道的关闭/最低思考请求。
 ///
-/// 字段对应矩阵（已对 ai.google.dev/gemini-api/docs/thinking 交叉核对）：
-/// - `gemini-2.5-flash`, `gemini-2.5-flash-lite`  → `thinkingBudget = 0`（完全关）
-/// - `gemini-2.5-pro`                              → 不可关（官方 N/A），不下发字段
-/// - `gemini-3*-pro*`                              → `thinkingLevel = "low"`（最低档）
-/// - `gemini-3*-flash*`                            → `thinkingLevel = "minimal"`（Flash-only）
-/// - 其它未知                                       → `thinkingLevel = "minimal"`（"尽量关"兜底）
-///
-/// 注意：thinkingBudget 与 thinkingLevel 不能同发——文档没明示但属未定义行为。
-fn thinking_config_for(model: &str) -> Option<Value> {
-    let m = model.to_ascii_lowercase();
-    if m.starts_with("gemini-2.5-pro") {
-        return None;
-    }
-    if m.starts_with("gemini-2.5-flash") {
-        return Some(json!({ "thinkingBudget": 0 }));
-    }
-    if m.starts_with("gemini-3") {
-        if m.contains("pro") {
-            return Some(json!({ "thinkingLevel": "low" }));
-        }
-        return Some(json!({ "thinkingLevel": "minimal" }));
-    }
-    Some(json!({ "thinkingLevel": "minimal" }))
+/// OpenLess 不维护 Gemini 单模型适配表；开启时不下发 thinkingConfig，关闭时
+/// 使用官方 thinkingConfig 中可表达“关闭思考”的 `thinkingBudget = 0`。若某个
+/// 具体模型不支持该字段或不能完全关闭思考，交由 Gemini API 自身处理。
+fn disabled_thinking_config() -> Value {
+    json!({ "thinkingBudget": 0 })
 }
 
 fn generate_content_url(base_url: &str, model: &str) -> String {
@@ -495,8 +488,8 @@ fn extract_assistant_content(body: &str) -> Result<String, LLMError> {
         .and_then(|c| c.get("parts"))
         .and_then(|p| p.as_array())
         .ok_or_else(|| LLMError::ParseError("missing content.parts".into()))?;
-    // 把所有 part.text 拼起来。模型在 thinking on 时可能产出多段，但我们已禁
-    // 思考；仍按 array 处理是为了不被 future-proof 单 part vs 多 part 的差异坑到。
+    // 把所有 part.text 拼起来。开启思考时模型可能产出多段；逐段拼接避免
+    // future-proof 单 part vs 多 part 的差异坑到。
     let mut buf = String::new();
     for part in parts {
         if let Some(t) = part.get("text").and_then(|v| v.as_str()) {
@@ -516,70 +509,22 @@ mod tests {
     use super::*;
 
     #[test]
-    fn thinking_config_2_5_flash_disables_thinking_via_budget_zero() {
-        let v = thinking_config_for("gemini-2.5-flash").unwrap();
-        assert_eq!(v, json!({ "thinkingBudget": 0 }));
-        // flash-lite 同款
-        let v2 = thinking_config_for("gemini-2.5-flash-lite").unwrap();
-        assert_eq!(v2, json!({ "thinkingBudget": 0 }));
-    }
-
-    #[test]
-    fn thinking_config_2_5_pro_returns_none_because_cannot_disable() {
-        assert!(thinking_config_for("gemini-2.5-pro").is_none());
-    }
-
-    #[test]
-    fn thinking_config_3_x_pro_uses_low_thinking_level() {
-        let v = thinking_config_for("gemini-3.1-pro-preview").unwrap();
-        assert_eq!(v, json!({ "thinkingLevel": "low" }));
-    }
-
-    #[test]
-    fn thinking_config_3_x_flash_uses_minimal() {
-        let v = thinking_config_for("gemini-3-flash-preview").unwrap();
-        assert_eq!(v, json!({ "thinkingLevel": "minimal" }));
-        let v2 = thinking_config_for("gemini-3.1-flash-lite").unwrap();
-        assert_eq!(v2, json!({ "thinkingLevel": "minimal" }));
-    }
-
-    #[test]
-    fn thinking_config_unknown_falls_back_to_minimal_level() {
-        // 未来未知 ID 兜底到 minimal——尽量"关"，避免默认放出长思考。
-        let v = thinking_config_for("gemini-99-future-model").unwrap();
-        assert_eq!(v, json!({ "thinkingLevel": "minimal" }));
-    }
-
-    #[test]
-    fn thinking_config_never_emits_both_budget_and_level() {
-        // 不变量：任一返回值最多含 `thinkingBudget` 或 `thinkingLevel` 中的一个，
-        // 不能同发——文档没明示但属未定义行为，回归就立刻暴露。
-        for model in [
-            "gemini-2.5-flash",
-            "gemini-2.5-flash-lite",
-            "gemini-3.1-pro-preview",
-            "gemini-3-flash-preview",
-            "gemini-3.1-flash-lite",
-            "gemini-3.1-flash-lite-preview",
-            "gemini-99-future-model",
-        ] {
-            let v = thinking_config_for(model).unwrap();
-            let obj = v.as_object().unwrap();
-            let has_budget = obj.contains_key("thinkingBudget");
-            let has_level = obj.contains_key("thinkingLevel");
-            assert!(
-                has_budget ^ has_level,
-                "model {model} 同时下发 budget 与 level，违反单字段不变量: {v}"
-            );
-        }
+    fn disabled_thinking_config_uses_channel_level_budget_zero() {
+        assert_eq!(disabled_thinking_config(), json!({ "thinkingBudget": 0 }));
     }
 
     #[test]
     fn generate_content_url_handles_trailing_slash_in_base_url() {
         let a = generate_content_url("https://x/v1beta", "gemini-2.5-flash");
         let b = generate_content_url("https://x/v1beta/", "gemini-2.5-flash");
-        assert_eq!(a, "https://x/v1beta/models/gemini-2.5-flash:generateContent");
-        assert_eq!(b, "https://x/v1beta/models/gemini-2.5-flash:generateContent");
+        assert_eq!(
+            a,
+            "https://x/v1beta/models/gemini-2.5-flash:generateContent"
+        );
+        assert_eq!(
+            b,
+            "https://x/v1beta/models/gemini-2.5-flash:generateContent"
+        );
     }
 
     #[test]
@@ -652,8 +597,8 @@ mod tests {
     }
 
     #[test]
-    fn build_generate_body_2_5_flash_includes_thinking_budget_zero() {
-        let cfg = GeminiConfig::new("k", "gemini-2.5-flash", "https://x/v1beta");
+    fn build_generate_body_disabled_includes_channel_level_thinking_budget_zero() {
+        let cfg = GeminiConfig::new("k", "any-gemini-model", "https://x/v1beta");
         let provider = GeminiProvider::new(cfg);
         let body = provider.build_generate_body("SYS", vec![user_content("hi")]);
         assert_eq!(
@@ -665,13 +610,14 @@ mod tests {
     }
 
     #[test]
-    fn build_generate_body_2_5_pro_omits_thinking_config() {
-        let cfg = GeminiConfig::new("k", "gemini-2.5-pro", "https://x/v1beta");
+    fn build_generate_body_thinking_enabled_omits_thinking_config() {
+        let cfg = GeminiConfig::new("k", "gemini-2.5-flash", "https://x/v1beta")
+            .with_thinking_enabled(true);
         let provider = GeminiProvider::new(cfg);
         let body = provider.build_generate_body("SYS", vec![user_content("hi")]);
         assert!(
             body["generationConfig"].get("thinkingConfig").is_none(),
-            "2.5 Pro 不能关思考，generationConfig 不应含 thinkingConfig 字段"
+            "开启思考模式时不下发关闭思考的 thinkingConfig"
         );
     }
 
@@ -744,15 +690,4 @@ mod tests {
         assert_eq!(events, vec!["data: ok", "data: ok2"]);
         assert!(buf.is_empty());
     }
-
-    #[test]
-    fn build_generate_body_3_x_pro_emits_low_thinking_level() {
-        let cfg = GeminiConfig::new("k", "gemini-3.1-pro-preview", "https://x/v1beta");
-        let provider = GeminiProvider::new(cfg);
-        let body = provider.build_generate_body("SYS", vec![]);
-        assert_eq!(
-            body["generationConfig"]["thinkingConfig"],
-            json!({ "thinkingLevel": "low" })
-        );
-    }
 }
diff --git a/openless-all/app/src-tauri/src/polish.rs b/openless-all/app/src-tauri/src/polish.rs
index b7da2e24..a9ab3115 100644
--- a/openless-all/app/src-tauri/src/polish.rs
+++ b/openless-all/app/src-tauri/src/polish.rs
@@ -33,6 +33,10 @@ pub struct OpenAICompatibleConfig {
     pub extra_headers: HashMap<String, String>,
     pub temperature: f32,
     pub request_timeout_secs: u64,
+    /// true = 让支持的 OpenAI-compatible provider 启用推理 / 思考；
+    /// false = 按渠道级官方参数关闭或压低思考。不做模型白名单判断，
+    /// 具体模型兼容性交给 provider 处理。
+    pub thinking_enabled: bool,
 }
 
 impl OpenAICompatibleConfig {
@@ -52,8 +56,14 @@ impl OpenAICompatibleConfig {
             extra_headers: HashMap::new(),
             temperature: DEFAULT_TEMPERATURE,
             request_timeout_secs: DEFAULT_REQUEST_TIMEOUT_SECS,
+            thinking_enabled: false,
         }
     }
+
+    pub fn with_thinking_enabled(mut self, enabled: bool) -> Self {
+        self.thinking_enabled = enabled;
+        self
+    }
 }
 
 #[derive(Debug, Error)]
@@ -315,12 +325,7 @@ impl OpenAICompatibleLLMProvider {
     ) -> Result<String, LLMError> {
         let url = chat_completions_url(&self.config.base_url);
         let messages = build_polish_history_messages(system_prompt, prior_turns, user_prompt);
-        let body = json!({
-            "model": self.config.model,
-            "stream": false,
-            "temperature": self.config.temperature,
-            "messages": messages,
-        });
+        let body = self.chat_body(false, messages);
 
         log::info!(
             "[llm] POST {} provider={} model={} prior_turns={}",
@@ -340,15 +345,13 @@ impl OpenAICompatibleLLMProvider {
         user_prompt: &str,
     ) -> Result<String, LLMError> {
         let url = chat_completions_url(&self.config.base_url);
-        let body = json!({
-            "model": self.config.model,
-            "stream": false,
-            "temperature": self.config.temperature,
-            "messages": [
-                { "role": "system", "content": system_prompt },
-                { "role": "user", "content": user_prompt },
+        let body = self.chat_body(
+            false,
+            vec![
+                json!({ "role": "system", "content": system_prompt }),
+                json!({ "role": "user", "content": user_prompt }),
             ],
-        });
+        );
 
         log::info!(
             "[llm] POST {} provider={} model={}",
@@ -360,6 +363,17 @@ impl OpenAICompatibleLLMProvider {
         self.send_chat_request(&url, &body).await
     }
 
+    fn chat_body(&self, stream: bool, messages: Vec<Value>) -> Value {
+        let mut body = json!({
+            "model": self.config.model,
+            "stream": stream,
+            "temperature": self.config.temperature,
+            "messages": messages,
+        });
+        apply_openai_compatible_thinking_control(&mut body, &self.config);
+        body
+    }
+
     /// 共用的 HTTP send + body 解析。chat_completion / chat_completion_with_polish_history
     /// 各自构造好 body 后都调到这里，避免 30 行 send/parse 重复。
     async fn send_chat_request(
@@ -430,12 +444,7 @@ impl OpenAICompatibleLLMProvider {
         }
 
         let url = chat_completions_url(&self.config.base_url);
-        let body = json!({
-            "model": self.config.model,
-            "stream": true,
-            "temperature": self.config.temperature,
-            "messages": msgs,
-        });
+        let body = self.chat_body(true, msgs);
 
         log::info!(
             "[llm] POST {} provider={} model={} chat_turns={} stream=true",
@@ -585,6 +594,11 @@ impl CodexOAuthConfig {
         self.auth_path = Some(auth_path);
         self
     }
+
+    pub fn with_thinking_enabled(mut self, enabled: bool) -> Self {
+        self.reasoning_effort = Some(if enabled { "medium" } else { "low" }.to_string());
+        self
+    }
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -1158,6 +1172,54 @@ fn unix_now_secs() -> u64 {
         .unwrap_or(0)
 }
 
+fn apply_openai_compatible_thinking_control(body: &mut Value, config: &OpenAICompatibleConfig) {
+    match openai_compatible_thinking_control(&config.provider_id) {
+        Some(ThinkingControl::ReasoningEffort) => {
+            // OpenAI Chat Completions 的 reasoning_effort 是渠道级请求字段。
+            // 关闭时统一压到 low，避免引入模型白名单；不支持该字段的模型由 provider 自行处理。
+            body["reasoning_effort"] = json!(if config.thinking_enabled {
+                "medium"
+            } else {
+                "low"
+            });
+        }
+        Some(ThinkingControl::EnableThinking) => {
+            body["enable_thinking"] = json!(config.thinking_enabled);
+        }
+        Some(ThinkingControl::OpenRouterReasoning) => {
+            body["reasoning"] = json!({
+                "effort": if config.thinking_enabled { "medium" } else { "none" },
+                // OpenLess 的 QA/润色输出只展示最终答案；推理内容即使生成，也不应进 UI。
+                "exclude": true,
+            });
+        }
+        Some(ThinkingControl::DeepSeekThinking) => {
+            body["thinking"] = json!({
+                "type": if config.thinking_enabled { "enabled" } else { "disabled" },
+            });
+        }
+        None => {}
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum ThinkingControl {
+    ReasoningEffort,
+    EnableThinking,
+    OpenRouterReasoning,
+    DeepSeekThinking,
+}
+
+fn openai_compatible_thinking_control(provider_id: &str) -> Option<ThinkingControl> {
+    match provider_id.trim() {
+        "deepseek" => Some(ThinkingControl::DeepSeekThinking),
+        "openrouterFree" => Some(ThinkingControl::OpenRouterReasoning),
+        "alibabaCoding" => Some(ThinkingControl::EnableThinking),
+        "openai" | "codingPlanX" => Some(ThinkingControl::ReasoningEffort),
+        _ => None,
+    }
+}
+
 /// 把 working_languages + front_app 拼成 system prompt 头部前提：
 ///     # 上下文
 ///     用户的工作语言：…
@@ -2055,6 +2117,124 @@ mod tests {
         );
     }
 
+    #[test]
+    fn openai_chat_body_adds_reasoning_effort_for_openai_channel() {
+        let provider = OpenAICompatibleLLMProvider::new(
+            OpenAICompatibleConfig::new(
+                "openai",
+                "OpenAI",
+                "https://api.openai.com/v1",
+                "k",
+                "any-model",
+            )
+            .with_thinking_enabled(true),
+        );
+
+        let body = provider.chat_body(false, vec![json!({ "role": "user", "content": "hi" })]);
+
+        assert_eq!(body["reasoning_effort"], "medium");
+    }
+
+    #[test]
+    fn openai_chat_body_lowers_reasoning_when_disabled_for_channel() {
+        let provider = OpenAICompatibleLLMProvider::new(OpenAICompatibleConfig::new(
+            "codingPlanX",
+            "Coding Plan X",
+            "https://api.codingplanx.ai/v1",
+            "k",
+            "any-model",
+        ));
+
+        let body = provider.chat_body(false, vec![json!({ "role": "user", "content": "hi" })]);
+
+        assert_eq!(body["reasoning_effort"], "low");
+    }
+
+    #[test]
+    fn openai_chat_body_adds_enable_thinking_for_alibaba_channel() {
+        let provider = OpenAICompatibleLLMProvider::new(
+            OpenAICompatibleConfig::new(
+                "alibabaCoding",
+                "Alibaba Coding",
+                "https://coding-intl.dashscope.aliyuncs.com/v1",
+                "k",
+                "any-model",
+            )
+            .with_thinking_enabled(true),
+        );
+
+        let body = provider.chat_body(false, vec![json!({ "role": "user", "content": "hi" })]);
+
+        assert_eq!(body["enable_thinking"], true);
+    }
+
+    #[test]
+    fn openai_chat_body_adds_openrouter_reasoning_control() {
+        let provider = OpenAICompatibleLLMProvider::new(OpenAICompatibleConfig::new(
+            "openrouterFree",
+            "OpenRouter",
+            "https://openrouter.ai/api/v1",
+            "k",
+            "openai/gpt-5-mini",
+        ));
+
+        let body = provider.chat_body(true, vec![json!({ "role": "user", "content": "hi" })]);
+
+        assert_eq!(body["reasoning"]["effort"], "none");
+        assert_eq!(body["reasoning"]["exclude"], true);
+    }
+
+    #[test]
+    fn openai_chat_body_adds_openrouter_reasoning_by_channel_not_model() {
+        let provider = OpenAICompatibleLLMProvider::new(OpenAICompatibleConfig::new(
+            "openrouterFree",
+            "OpenRouter",
+            "https://openrouter.ai/api/v1",
+            "k",
+            "qwen/qwen3-coder:free",
+        ));
+
+        let body = provider.chat_body(true, vec![json!({ "role": "user", "content": "hi" })]);
+
+        assert_eq!(body["reasoning"]["effort"], "none");
+        assert_eq!(body["reasoning"]["exclude"], true);
+    }
+
+    #[test]
+    fn openai_chat_body_adds_deepseek_thinking_toggle_by_channel() {
+        let provider = OpenAICompatibleLLMProvider::new(OpenAICompatibleConfig::new(
+            "deepseek",
+            "DeepSeek",
+            "https://api.deepseek.com/v1",
+            "k",
+            "any-model",
+        ));
+
+        let body = provider.chat_body(false, vec![json!({ "role": "user", "content": "hi" })]);
+
+        assert_eq!(body["thinking"]["type"], "disabled");
+    }
+
+    #[test]
+    fn openai_chat_body_omits_thinking_control_for_unknown_provider() {
+        let provider = OpenAICompatibleLLMProvider::new(
+            OpenAICompatibleConfig::new(
+                "custom",
+                "Custom",
+                "https://example.test/v1",
+                "k",
+                "custom-model",
+            )
+            .with_thinking_enabled(true),
+        );
+
+        let body = provider.chat_body(false, vec![json!({ "role": "user", "content": "hi" })]);
+
+        assert!(body.get("reasoning_effort").is_none());
+        assert!(body.get("enable_thinking").is_none());
+        assert!(body.get("reasoning").is_none());
+    }
+
     #[test]
     fn structured_prompt_includes_dense_github_request_example() {
         let prompt = prompts::system_prompt(PolishMode::Structured);
@@ -2257,6 +2437,13 @@ mod tests {
         );
     }
 
+    #[test]
+    fn codex_oauth_config_lowers_reasoning_when_thinking_disabled() {
+        let config = CodexOAuthConfig::new("gpt-5.5").with_thinking_enabled(false);
+
+        assert_eq!(config.reasoning_effort.as_deref(), Some("low"));
+    }
+
     #[tokio::test]
     async fn codex_oauth_provider_streams_text_from_codex_responses() {
         let auth_path = write_codex_auth_fixture("acct-openless", unix_now_secs() + 3600);
@@ -2277,6 +2464,7 @@ mod tests {
             assert!(request_text.contains(r#""stream":true"#));
             assert!(request_text.contains(r#""role":"developer"#));
             assert!(request_text.contains(r#""type":"input_text"#));
+            assert!(request_text.contains(r#""reasoning":{"effort":"medium"}"#));
             assert!(!request_text.contains(r#""temperature":"#));
 
             let body = concat!(
diff --git a/openless-all/app/src-tauri/src/types.rs b/openless-all/app/src-tauri/src/types.rs
index 5f658b24..3aac1cb5 100644
--- a/openless-all/app/src-tauri/src/types.rs
+++ b/openless-all/app/src-tauri/src/types.rs
@@ -168,6 +168,11 @@ pub struct UserPreferences {
     pub microphone_device_name: String,
     pub active_asr_provider: String, // "volcengine" | "apple-speech" | ...
     pub active_llm_provider: String, // "ark" | "openai" | ...
+    /// LLM 思考模式开关。默认 false 以保持既有「尽量关闭思考」行为；
+    /// Gemini 走原生 thinkingConfig，OpenAI-compatible 路径仅按 provider/channel
+    /// 下发官方渠道级字段，不用 prompt 注入，也不做模型白名单适配。详见 issue #402。
+    #[serde(default)]
+    pub llm_thinking_enabled: bool,
     /// Windows/Linux 粘贴成功后是否恢复用户原剪贴板。默认 true 跟历史行为一致；
     /// 关掉就把听写文本留在剪贴板，让 simulate_paste 实际没生效时用户能 Ctrl+V 找回。
     /// macOS 走 AX 直写，不受这个开关影响。详见 issue #111。
@@ -319,6 +324,8 @@ struct UserPreferencesWire {
     microphone_device_name: String,
     active_asr_provider: String,
     active_llm_provider: String,
+    #[serde(default)]
+    llm_thinking_enabled: bool,
     restore_clipboard_after_paste: bool,
     #[serde(default)]
     paste_shortcut: PasteShortcut,
@@ -372,6 +379,7 @@ impl Default for UserPreferencesWire {
             microphone_device_name: prefs.microphone_device_name,
             active_asr_provider: prefs.active_asr_provider,
             active_llm_provider: prefs.active_llm_provider,
+            llm_thinking_enabled: prefs.llm_thinking_enabled,
             restore_clipboard_after_paste: prefs.restore_clipboard_after_paste,
             paste_shortcut: prefs.paste_shortcut,
             allow_non_tsf_insertion_fallback: prefs.allow_non_tsf_insertion_fallback,
@@ -422,6 +430,7 @@ impl<'de> Deserialize<'de> for UserPreferences {
             microphone_device_name: wire.microphone_device_name,
             active_asr_provider: wire.active_asr_provider,
             active_llm_provider: wire.active_llm_provider,
+            llm_thinking_enabled: wire.llm_thinking_enabled,
             restore_clipboard_after_paste: wire.restore_clipboard_after_paste,
             paste_shortcut: wire.paste_shortcut,
             allow_non_tsf_insertion_fallback: wire.allow_non_tsf_insertion_fallback,
@@ -539,6 +548,7 @@ impl Default for UserPreferences {
             microphone_device_name: String::new(),
             active_asr_provider: default_active_asr_provider(),
             active_llm_provider: "ark".into(),
+            llm_thinking_enabled: false,
             restore_clipboard_after_paste: true,
             paste_shortcut: PasteShortcut::default(),
             allow_non_tsf_insertion_fallback: true,
diff --git a/openless-all/app/src/i18n/en.ts b/openless-all/app/src/i18n/en.ts
index 7e5a9228..a58e295f 100644
--- a/openless-all/app/src/i18n/en.ts
+++ b/openless-all/app/src/i18n/en.ts
@@ -401,6 +401,10 @@ export const en: typeof zhCN = {
       apiKeyLabel: 'API Key',
       baseUrlLabel: 'Base URL',
       modelLabel: 'Model',
+      thinkingModeLabel: 'Thinking',
+      thinkingModeOn: 'On',
+      thinkingModeOff: 'Off',
+      thinkingModeHint: 'Off disables or minimizes thinking with provider-level official parameters. On enables thinking by channel defaults. No prompt injection or per-model adapters.',
       bailianVocabularyIdLabel: 'Hotword Vocabulary ID (optional)',
       bailianVocabularyIdNote: 'If you have created a DashScope hotword vocabulary, enter its vocab-... ID. Leave blank to skip hotwords.',
       appIdLabel: 'App ID',
diff --git a/openless-all/app/src/i18n/ja.ts b/openless-all/app/src/i18n/ja.ts
index 6d34ffc5..e5a6eca1 100644
--- a/openless-all/app/src/i18n/ja.ts
+++ b/openless-all/app/src/i18n/ja.ts
@@ -403,6 +403,10 @@ export const ja: typeof zhCN = {
       apiKeyLabel: 'API キー',
       baseUrlLabel: 'エンドポイント',
       modelLabel: 'モデル',
+      thinkingModeLabel: '思考',
+      thinkingModeOn: 'オン',
+      thinkingModeOff: 'オフ',
+      thinkingModeHint: 'オフではチャネル単位の公式パラメーターで思考を無効化または最小化します。オンではチャネル既定で思考を有効化します。prompt 注入やモデル別適配は行いません。',
       bailianVocabularyIdLabel: 'ホットワード Vocabulary ID（任意）',
       bailianVocabularyIdNote: 'DashScope でホットワード辞書を作成済みの場合は vocab-... ID を入力します。空欄なら送信しません。',
       appIdLabel: 'App ID（アプリケーション ID）',
diff --git a/openless-all/app/src/i18n/ko.ts b/openless-all/app/src/i18n/ko.ts
index bef46493..535513e1 100644
--- a/openless-all/app/src/i18n/ko.ts
+++ b/openless-all/app/src/i18n/ko.ts
@@ -403,6 +403,10 @@ export const ko: typeof zhCN = {
       apiKeyLabel: 'API 키',
       baseUrlLabel: '엔드포인트',
       modelLabel: '모델',
+      thinkingModeLabel: '사고',
+      thinkingModeOn: '켜짐',
+      thinkingModeOff: '꺼짐',
+      thinkingModeHint: '꺼짐은 채널 단위 공식 파라미터로 사고를 끄거나 최소화합니다. 켜짐은 채널 기본값으로 사고를 켭니다. prompt 주입이나 모델별 어댑터는 사용하지 않습니다.',
       bailianVocabularyIdLabel: '핫워드 Vocabulary ID(선택)',
       bailianVocabularyIdNote: 'DashScope에서 핫워드 사전을 만들었다면 vocab-... ID를 입력하세요. 비워 두면 핫워드를 전송하지 않습니다.',
       appIdLabel: 'App ID(애플리케이션 ID)',
diff --git a/openless-all/app/src/i18n/zh-CN.ts b/openless-all/app/src/i18n/zh-CN.ts
index 6123d1ef..6c0aa3ff 100644
--- a/openless-all/app/src/i18n/zh-CN.ts
+++ b/openless-all/app/src/i18n/zh-CN.ts
@@ -399,6 +399,10 @@ export const zhCN = {
       apiKeyLabel: 'API 密钥',
       baseUrlLabel: '接口地址',
       modelLabel: '模型',
+      thinkingModeLabel: '思考',
+      thinkingModeOn: '开启',
+      thinkingModeOff: '关闭',
+      thinkingModeHint: '关闭时按渠道级官方参数关闭或压低思考；开启时按渠道默认启用思考。不注入 prompt，也不做单模型适配。',
       bailianVocabularyIdLabel: '热词 Vocabulary ID（可选）',
       bailianVocabularyIdNote: '如已在百炼创建热词表，可填写 vocab-...；留空则不下发热词。',
       appIdLabel: 'App ID（应用 ID）',
diff --git a/openless-all/app/src/i18n/zh-TW.ts b/openless-all/app/src/i18n/zh-TW.ts
index b9119b6d..a10ccab5 100644
--- a/openless-all/app/src/i18n/zh-TW.ts
+++ b/openless-all/app/src/i18n/zh-TW.ts
@@ -401,6 +401,10 @@ export const zhTW: typeof zhCN = {
       apiKeyLabel: 'API 密鑰',
       baseUrlLabel: '接口地址',
       modelLabel: '模型',
+      thinkingModeLabel: '思考',
+      thinkingModeOn: '開啟',
+      thinkingModeOff: '關閉',
+      thinkingModeHint: '關閉時按渠道級官方參數關閉或降低思考；開啟時按渠道預設啟用思考。不注入 prompt，也不做單模型適配。',
       bailianVocabularyIdLabel: '熱詞 Vocabulary ID（可選）',
       bailianVocabularyIdNote: '如已在百煉建立熱詞表，可填寫 vocab-...；留空則不下發熱詞。',
       appIdLabel: 'App ID（應用 ID）',
diff --git a/openless-all/app/src/lib/ipc.ts b/openless-all/app/src/lib/ipc.ts
index 4b347e4f..e974e393 100644
--- a/openless-all/app/src/lib/ipc.ts
+++ b/openless-all/app/src/lib/ipc.ts
@@ -57,6 +57,7 @@ const mockSettings: UserPreferences = {
   microphoneDeviceName: '',
   activeAsrProvider: 'foundry-local-whisper',
   activeLlmProvider: 'ark',
+  llmThinkingEnabled: false,
   restoreClipboardAfterPaste: true,
   pasteShortcut: 'ctrlV',
   allowNonTsfInsertionFallback: true,
diff --git a/openless-all/app/src/lib/stylePrefs.test.ts b/openless-all/app/src/lib/stylePrefs.test.ts
index bed26970..3767bfaf 100644
--- a/openless-all/app/src/lib/stylePrefs.test.ts
+++ b/openless-all/app/src/lib/stylePrefs.test.ts
@@ -26,6 +26,7 @@ const previousPrefs: UserPreferences = {
   microphoneDeviceName: '',
   activeAsrProvider: 'volcengine',
   activeLlmProvider: 'ark',
+  llmThinkingEnabled: false,
   restoreClipboardAfterPaste: true,
   pasteShortcut: 'ctrlV',
   allowNonTsfInsertionFallback: true,
diff --git a/openless-all/app/src/lib/types.ts b/openless-all/app/src/lib/types.ts
index 56678d88..b4e6e164 100644
--- a/openless-all/app/src/lib/types.ts
+++ b/openless-all/app/src/lib/types.ts
@@ -147,6 +147,8 @@ export interface UserPreferences {
   microphoneDeviceName: string;
   activeAsrProvider: string;
   activeLlmProvider: string;
+  /** LLM 思考模式开关。默认关闭，保持既有尽量关闭思考的行为。详见 issue #402。 */
+  llmThinkingEnabled: boolean;
   /** 仅 Windows/Linux：粘贴成功后是否恢复用户原剪贴板。默认 true。详见 issue #111。 */
   restoreClipboardAfterPaste: boolean;
   /** 仅 Windows/Linux：模拟粘贴时按下的快捷键。详见 issue #360：kitty/alacritty
diff --git a/openless-all/app/src/pages/Settings.tsx b/openless-all/app/src/pages/Settings.tsx
index 7f2cf83c..8b4b92c8 100644
--- a/openless-all/app/src/pages/Settings.tsx
+++ b/openless-all/app/src/pages/Settings.tsx
@@ -1139,6 +1139,30 @@ export function Toggle({ on, onToggle }: { on: boolean; onToggle?: (next: boolea
   );
 }
 
+function LlmThinkingToggle({ enabled, onToggle }: { enabled: boolean; onToggle: (next: boolean) => void }) {
+  const { t } = useTranslation();
+  return (
+    <div
+      title={t('settings.providers.thinkingModeHint')}
+      style={{
+        display: 'flex',
+        alignItems: 'center',
+        gap: 6,
+        paddingLeft: 2,
+        whiteSpace: 'nowrap',
+      }}
+    >
+      <span style={{ fontSize: 11.5, color: 'var(--ol-ink-4)' }}>
+        {t('settings.providers.thinkingModeLabel')}
+      </span>
+      <Toggle on={enabled} onToggle={onToggle} />
+      <span style={{ fontSize: 11.5, color: enabled ? 'var(--ol-blue)' : 'var(--ol-ink-4)' }}>
+        {enabled ? t('settings.providers.thinkingModeOn') : t('settings.providers.thinkingModeOff')}
+      </span>
+    </div>
+  );
+}
+
 const LLM_PRESETS = [
   {
     id: 'ark',
@@ -1168,9 +1192,8 @@ const LLM_PRESETS = [
     // 谷歌官方 Gemini API（原生 generateContent，不走 OpenAI 兼容 shim）。
     // baseUrl 末尾 /v1beta 是当前 Generally Available 的 path（ai.google.dev/api）。
     // 后端 llm_gemini.rs 会拼成 `{baseUrl}/models/{model}:generateContent`，
-    // 并按模型 family 注入 thinkingConfig 强制关思考（2.5 flash 系列 thinkingBudget=0；
-    // 3.x pro 走 thinkingLevel="low"；3.x flash 走 thinkingLevel="minimal"；
-    // 2.5 pro 官方明示无法关闭思考）。模型列表用 ProviderTools「拉取模型」按钮取，
+    // 并按 Gemini 原生通道级 thinkingConfig 关闭或压低思考，不在前端维护模型适配表。
+    // 模型列表用 ProviderTools「拉取模型」按钮取，
     // 由 commands.rs::fetch_provider_models 识别 generativelanguage 域名后按 Gemini shape 解析。
     id: 'gemini',
     nameKey: 'gemini',
@@ -1332,6 +1355,14 @@ function ProvidersSection() {
     }
   };
 
+  const onLlmThinkingToggle = (enabled: boolean) => {
+    if (!prefs) return;
+    void updatePrefs(current => ({ ...current, llmThinkingEnabled: enabled })).catch(error => {
+      console.error('[settings] failed to update LLM thinking mode', error);
+      emitSaved('failed', t('common.operationFailed'));
+    });
+  };
+
   const onAsrProviderChange = async (id: AsrPresetId) => {
     setAsrProvider(id);
     const seq = ++asrSwitchSeqRef.current;
@@ -1420,7 +1451,14 @@ function ProvidersSection() {
           </>
         )}
         <CredentialField key={`${committedLlmProvider}:model:${llmModelRevision}`} label={t('settings.providers.modelLabel')} account="ark.model_id"
-          placeholder={preset.modelPlaceholder || 'model-name'} mono />
+          placeholder={preset.modelPlaceholder || 'model-name'} mono
+          trailing={(
+            <LlmThinkingToggle
+              enabled={prefs?.llmThinkingEnabled ?? false}
+              onToggle={onLlmThinkingToggle}
+            />
+          )}
+        />
         <ProviderTools key={committedLlmProvider} kind="llm" modelAccount="ark.model_id" onModelSelected={() => setLlmModelRevision(v => v + 1)} />
       </Card>
 
@@ -1884,9 +1922,10 @@ interface CredentialFieldProps {
   mono?: boolean;
   mask?: boolean;
   defaultValue?: string;
+  trailing?: ReactNode;
 }
 
-function CredentialField({ label, account, placeholder, mono, mask, defaultValue }: CredentialFieldProps) {
+function CredentialField({ label, account, placeholder, mono, mask, defaultValue, trailing }: CredentialFieldProps) {
   const { t } = useTranslation();
   const [value, setValue] = useState('');
   const [revealed, setRevealed] = useState(false);
@@ -2023,6 +2062,7 @@ function CredentialField({ label, account, placeholder, mono, mask, defaultValue
             <Icon name="check" size={13} />
           </button>
         )}
+        {trailing}
         {mask && (
           <button
             onClick={() => setRevealed(r => !r)}