diff --git a/openless-all/app/src-tauri/Cargo.lock b/openless-all/app/src-tauri/Cargo.lock index ef129ebc..2de25484 100644 --- a/openless-all/app/src-tauri/Cargo.lock +++ b/openless-all/app/src-tauri/Cargo.lock @@ -3751,7 +3751,7 @@ dependencies = [ [[package]] name = "openless" -version = "1.2.24-7" +version = "1.2.24-8" dependencies = [ "anyhow", "arboard", diff --git a/openless-all/app/src-tauri/src/commands.rs b/openless-all/app/src-tauri/src/commands.rs index bb8a1cea..fc90a947 100644 --- a/openless-all/app/src-tauri/src/commands.rs +++ b/openless-all/app/src-tauri/src/commands.rs @@ -14,7 +14,9 @@ use crate::asr::local::foundry::{ use crate::asr::local::FoundryLocalRuntime; use crate::coordinator::Coordinator; use crate::permissions::{self, PermissionStatus}; -use crate::persistence::{CredentialAccount, CredentialsSnapshot, CredentialsVault}; +use crate::persistence::{ + CredentialAccount, CredentialsSnapshot, CredentialsVault, PreferencesStore, +}; use crate::polish::{ http_client_builder, CodexOAuthConfig, CodexOAuthCredentials, CodexOAuthLLMProvider, LLMError, OpenAICompatibleConfig, OpenAICompatibleLLMProvider, CODEX_DEFAULT_MODEL, @@ -611,12 +613,18 @@ fn read_openai_provider_config(kind: &str) -> Result { } async fn validate_llm_provider() -> Result<(), String> { + let llm_thinking_enabled = PreferencesStore::new() + .map_err(|e| e.to_string())? + .get() + .llm_thinking_enabled; if CredentialsVault::get_active_llm() == CODEX_OAUTH_PROVIDER_ID { let model = CredentialsVault::get(CredentialAccount::ArkModelId) .map_err(|e| e.to_string())? .filter(|s| !s.trim().is_empty()) .unwrap_or_else(|| CODEX_DEFAULT_MODEL.to_string()); - let provider = CodexOAuthLLMProvider::new(CodexOAuthConfig::new(model)); + let provider = CodexOAuthLLMProvider::new( + CodexOAuthConfig::new(model).with_thinking_enabled(llm_thinking_enabled), + ); return provider .polish( "验证连接", @@ -639,17 +647,21 @@ async fn validate_llm_provider() -> Result<(), String> { } let config = read_openai_provider_config("llm")?; + let active_llm = CredentialsVault::get_active_llm(); let model = CredentialsVault::get(CredentialAccount::ArkModelId) .map_err(|e| e.to_string())? .filter(|s| !s.is_empty()) .ok_or_else(|| "llmModelMissing".to_string())?; - let provider = OpenAICompatibleLLMProvider::new(OpenAICompatibleConfig::new( - "ark", - "Doubao Ark", - config.base_url, - config.api_key, - model, - )); + let provider = OpenAICompatibleLLMProvider::new( + OpenAICompatibleConfig::new( + active_llm.clone(), + active_llm, + config.base_url, + config.api_key, + model, + ) + .with_thinking_enabled(llm_thinking_enabled), + ); provider .polish( "验证连接", diff --git a/openless-all/app/src-tauri/src/coordinator.rs b/openless-all/app/src-tauri/src/coordinator.rs index c3fe72f8..c2322174 100644 --- a/openless-all/app/src-tauri/src/coordinator.rs +++ b/openless-all/app/src-tauri/src/coordinator.rs @@ -773,6 +773,7 @@ impl Coordinator { let working_languages = prefs.working_languages; let chinese_script_preference = prefs.chinese_script_preference; let output_language_preference = prefs.output_language_preference; + let llm_thinking_enabled = prefs.llm_thinking_enabled; // repolish 是历史记录里手动重新润色,不再绑定原 session 的前台 app; // 当下用户调起的 app 才是相关上下文(如果可拿)。 let front_app = capture_frontmost_app(); @@ -785,6 +786,7 @@ impl Coordinator { &working_languages, chinese_script_preference, output_language_preference, + llm_thinking_enabled, front_app.as_deref(), &[], ) @@ -2013,6 +2015,7 @@ async fn polish_or_passthrough( working_languages: &[String], chinese_script_preference: ChineseScriptPreference, output_language_preference: OutputLanguagePreference, + llm_thinking_enabled: bool, front_app: Option<&str>, prior_turns: &[(String, String)], ) -> (String, Option) { @@ -2026,6 +2029,7 @@ async fn polish_or_passthrough( working_languages, chinese_script_preference, output_language_preference, + llm_thinking_enabled, front_app, prior_turns, ) @@ -2047,16 +2051,19 @@ async fn polish_text( working_languages: &[String], chinese_script_preference: ChineseScriptPreference, output_language_preference: OutputLanguagePreference, + llm_thinking_enabled: bool, front_app: Option<&str>, prior_turns: &[(String, String)], ) -> anyhow::Result { // 谷歌 Gemini 分支:所有 LLM provider 共用 ark.* 凭据槽,唯独 Gemini 走原生 - // generateContent / 自带 thinking-off 控制,不走 OpenAI 兼容协议;按 active - // provider id 把请求路由到 GeminiProvider 即可,其余 provider(ark/deepseek/ - // openai/...)保持原 OpenAICompatibleLLMProvider 路径不动。 - if CredentialsVault::get_active_llm() == "gemini" { + // generateContent / 自带 thinkingConfig 控制;其余 provider 走 OpenAI + // 兼容协议,并在该路径里按 provider/channel 下发对应的思考开关。 + let active_llm = CredentialsVault::get_active_llm(); + if active_llm == "gemini" { let (api_key, model, base_url) = read_gemini_credentials()?; - let provider = GeminiProvider::new(GeminiConfig::new(api_key, model, base_url)); + let provider = GeminiProvider::new( + GeminiConfig::new(api_key, model, base_url).with_thinking_enabled(llm_thinking_enabled), + ); return Ok(provider .polish( raw, @@ -2071,7 +2078,7 @@ async fn polish_text( .await?); } - let provider = build_active_llm_provider()?; + let provider = build_active_llm_provider(llm_thinking_enabled)?; Ok(provider .polish( raw, @@ -2093,6 +2100,7 @@ async fn translate_or_passthrough( working_languages: &[String], chinese_script_preference: ChineseScriptPreference, output_language_preference: OutputLanguagePreference, + llm_thinking_enabled: bool, front_app: Option<&str>, ) -> (String, Option) { match translate_text( @@ -2101,6 +2109,7 @@ async fn translate_or_passthrough( working_languages, chinese_script_preference, output_language_preference, + llm_thinking_enabled, front_app, ) .await @@ -2120,12 +2129,16 @@ async fn translate_text( working_languages: &[String], chinese_script_preference: ChineseScriptPreference, output_language_preference: OutputLanguagePreference, + llm_thinking_enabled: bool, front_app: Option<&str>, ) -> anyhow::Result { - // 见 polish_text 顶部注释——同样的 Gemini 路由逻辑。 - if CredentialsVault::get_active_llm() == "gemini" { + // 见 polish_text 顶部注释——同样的 Gemini / OpenAI-compatible 路由逻辑。 + let active_llm = CredentialsVault::get_active_llm(); + if active_llm == "gemini" { let (api_key, model, base_url) = read_gemini_credentials()?; - let provider = GeminiProvider::new(GeminiConfig::new(api_key, model, base_url)); + let provider = GeminiProvider::new( + GeminiConfig::new(api_key, model, base_url).with_thinking_enabled(llm_thinking_enabled), + ); return Ok(provider .translate_to( raw, @@ -2138,7 +2151,7 @@ async fn translate_text( .await?); } - let provider = build_active_llm_provider()?; + let provider = build_active_llm_provider(llm_thinking_enabled)?; Ok(provider .translate_to( raw, @@ -2500,6 +2513,7 @@ async fn end_qa_session(inner: &Arc) -> Result<(), String> { let working_languages = prefs.working_languages.clone(); let chinese_script_preference = prefs.chinese_script_preference; let output_language_preference = prefs.output_language_preference; + let llm_thinking_enabled = prefs.llm_thinking_enabled; let (messages_for_llm, front_app) = { let st = inner.qa_state.lock(); (st.messages.clone(), st.front_app.clone()) @@ -2540,6 +2554,7 @@ async fn end_qa_session(inner: &Arc) -> Result<(), String> { &working_languages, chinese_script_preference, output_language_preference, + llm_thinking_enabled, front_app.as_deref(), on_delta, should_cancel, @@ -2691,6 +2706,7 @@ async fn answer_chat_dispatch( working_languages: &[String], chinese_script_preference: ChineseScriptPreference, output_language_preference: OutputLanguagePreference, + llm_thinking_enabled: bool, front_app: Option<&str>, on_delta: F, should_cancel: C, @@ -2699,11 +2715,14 @@ where F: Fn(&str) + Send + Sync, C: Fn() -> bool + Send + Sync, { - // 见 polish_text 顶部注释——同样的 Gemini 路由逻辑,QA 流式回答走 Gemini - // 原生 :streamGenerateContent?alt=sse。 - if CredentialsVault::get_active_llm() == "gemini" { + // 见 polish_text 顶部注释——同样的 Gemini / OpenAI-compatible 路由逻辑, + // QA 流式回答走 Gemini 原生 :streamGenerateContent?alt=sse。 + let active_llm = CredentialsVault::get_active_llm(); + if active_llm == "gemini" { let (api_key, model, base_url) = read_gemini_credentials()?; - let provider = GeminiProvider::new(GeminiConfig::new(api_key, model, base_url)); + let provider = GeminiProvider::new( + GeminiConfig::new(api_key, model, base_url).with_thinking_enabled(llm_thinking_enabled), + ); return Ok(provider .answer_chat_streaming( messages, @@ -2717,7 +2736,7 @@ where .await?); } - let provider = build_active_llm_provider()?; + let provider = build_active_llm_provider(llm_thinking_enabled)?; Ok(provider .answer_chat_streaming( messages, @@ -2755,13 +2774,14 @@ fn read_gemini_credentials() -> anyhow::Result<(String, String, String)> { Ok((api_key, model, base_url)) } -fn build_active_llm_provider() -> anyhow::Result { +fn build_active_llm_provider(llm_thinking_enabled: bool) -> anyhow::Result { let active = CredentialsVault::get_active_llm(); let model = CredentialsVault::get(CredentialAccount::ArkModelId)?.filter(|s| !s.trim().is_empty()); if active == CODEX_OAUTH_PROVIDER_ID { let config = - CodexOAuthConfig::new(model.unwrap_or_else(|| CODEX_DEFAULT_MODEL.to_string())); + CodexOAuthConfig::new(model.unwrap_or_else(|| CODEX_DEFAULT_MODEL.to_string())) + .with_thinking_enabled(llm_thinking_enabled); return Ok(ActiveLLMProvider::Codex(CodexOAuthLLMProvider::new(config))); } @@ -2772,7 +2792,8 @@ fn build_active_llm_provider() -> anyhow::Result { .trim_end_matches("/chat/completions") .trim_end_matches('/') .to_string(); - let config = OpenAICompatibleConfig::new(active, "OpenLess LLM", base_url, api_key, model); + let config = OpenAICompatibleConfig::new(active, "OpenLess LLM", base_url, api_key, model) + .with_thinking_enabled(llm_thinking_enabled); Ok(ActiveLLMProvider::OpenAI(OpenAICompatibleLLMProvider::new( config, ))) diff --git a/openless-all/app/src-tauri/src/coordinator/dictation.rs b/openless-all/app/src-tauri/src/coordinator/dictation.rs index b78b6c64..0a92b475 100644 --- a/openless-all/app/src-tauri/src/coordinator/dictation.rs +++ b/openless-all/app/src-tauri/src/coordinator/dictation.rs @@ -934,6 +934,7 @@ pub(super) async fn end_session(inner: &Arc) -> Result<(), String> { let working_languages = prefs.working_languages.clone(); let chinese_script_preference = prefs.chinese_script_preference; let output_language_preference = prefs.output_language_preference; + let llm_thinking_enabled = prefs.llm_thinking_enabled; let front_app = inner.state.lock().front_app.clone(); let translation_target = prefs.translation_target_language.trim().to_string(); let translation_active = @@ -978,6 +979,7 @@ pub(super) async fn end_session(inner: &Arc) -> Result<(), String> { &working_languages, chinese_script_preference, output_language_preference, + llm_thinking_enabled, front_app.as_deref(), ) .await @@ -989,6 +991,7 @@ pub(super) async fn end_session(inner: &Arc) -> Result<(), String> { &working_languages, chinese_script_preference, output_language_preference, + llm_thinking_enabled, front_app.as_deref(), &prior_turns, ) diff --git a/openless-all/app/src-tauri/src/llm_gemini.rs b/openless-all/app/src-tauri/src/llm_gemini.rs index bb17a310..88f25603 100644 --- a/openless-all/app/src-tauri/src/llm_gemini.rs +++ b/openless-all/app/src-tauri/src/llm_gemini.rs @@ -1,11 +1,8 @@ //! 谷歌 Gemini 原生 generateContent / streamGenerateContent 客户端。 //! //! 为什么不复用 `polish.rs::OpenAICompatibleLLMProvider`: -//! 1. **思考模式控制**——Gemini 系列必须按模型 family 注入对的字段 -//! (`thinkingBudget` 给 2.5 / `thinkingLevel` 给 3.x),且两个字段不能同发。 -//! OpenAI 兼容协议没有原生 thinking control 字段;谷歌的 OpenAI 兼容 shim -//! 虽然支持 `reasoning_effort`,但仍是 beta、3.x 上要走 `extra_body` 绕路, -//! 且官方文档对"是否真的关掉思考"措辞模糊。原生 endpoint 的契约最直接。 +//! 1. **思考模式控制**——Gemini 原生 `thinkingConfig` 比 OpenAI 兼容 shim +//! 的 provider 私有字段更直接;OpenLess 只做渠道级开关,不维护单模型适配表。 //! 2. **认证机制**——原生用 `x-goog-api-key` header(Bearer 不被识别), //! OpenAICompatibleLLMProvider 写死了 Bearer Authorization。 //! 3. **请求/响应 shape**——原生 `contents` 走 `role: user|model`,没有 @@ -39,6 +36,9 @@ pub struct GeminiConfig { pub base_url: String, pub temperature: f32, pub request_timeout_secs: u64, + /// true = 不下发关闭思考的 thinkingConfig,让模型按自身默认思考; + /// false = 下发 Gemini 原生渠道级最低思考配置。 + pub thinking_enabled: bool, } impl GeminiConfig { @@ -53,8 +53,14 @@ impl GeminiConfig { base_url: base_url.into(), temperature: DEFAULT_TEMPERATURE, request_timeout_secs: DEFAULT_REQUEST_TIMEOUT_SECS, + thinking_enabled: false, } } + + pub fn with_thinking_enabled(mut self, enabled: bool) -> Self { + self.thinking_enabled = enabled; + self + } } pub struct GeminiProvider { @@ -132,7 +138,8 @@ impl GeminiProvider { log::info!( "[llm] POST {} provider=gemini model={} translate=true", - url, self.config.model + url, + self.config.model ); let body_text = self.send_unary(&url, &body).await?; @@ -179,11 +186,11 @@ impl GeminiProvider { .await } - /// `generationConfig` 注入:温度 + 按模型 family 的 thinkingConfig。 + /// `generationConfig` 注入:温度 + 渠道级 thinkingConfig。 fn build_generate_body(&self, system_prompt: &str, contents: Vec) -> Value { let mut generation_config = json!({ "temperature": self.config.temperature }); - if let Some(thinking) = thinking_config_for(&self.config.model) { - generation_config["thinkingConfig"] = thinking; + if !self.config.thinking_enabled { + generation_config["thinkingConfig"] = disabled_thinking_config(); } json!({ "systemInstruction": system_instruction(system_prompt), @@ -403,9 +410,7 @@ fn drain_complete_sse_events(buffer: &mut Vec) -> Vec { Ok(s) => s.to_string(), Err(e) => { // 完整 event 自身 UTF-8 不合法(极少见,可能是上游异常):丢弃此 event 不让流挂掉。 - log::warn!( - "[llm] gemini SSE event has invalid UTF-8 (skipping): {e}" - ); + log::warn!("[llm] gemini SSE event has invalid UTF-8 (skipping): {e}"); buffer.drain(..end + delim_len); continue; } @@ -419,7 +424,10 @@ fn drain_complete_sse_events(buffer: &mut Vec) -> Vec { /// 多轮 polish 的 contents 序列。 /// 输入约定:`prior_turns` 与 polish.rs 一致(最新在前 newest-first), /// chat 时间序为 oldest-first,所以这里 `iter().rev()` 反转。 -fn build_polish_history_contents(prior_turns: &[(String, String)], user_prompt: &str) -> Vec { +fn build_polish_history_contents( + prior_turns: &[(String, String)], + user_prompt: &str, +) -> Vec { let mut contents: Vec = Vec::with_capacity(prior_turns.len() * 2 + 1); for (raw, polished) in prior_turns.iter().rev() { contents.push(user_content(&crate::polish::prompts::user_prompt(raw))); @@ -436,38 +444,23 @@ fn qa_messages_to_contents(messages: &[QaChatMessage]) -> Vec { messages .iter() .map(|m| { - let role = if m.role == "assistant" { "model" } else { "user" }; + let role = if m.role == "assistant" { + "model" + } else { + "user" + }; json!({ "role": role, "parts": [{ "text": m.content }] }) }) .collect() } -/// 按模型 ID 选 thinkingConfig。返回 None 表示该模型官方明示无法关思考 -/// (目前仅 gemini-2.5-pro),交由模型默认行为。 +/// Gemini 原生通道的关闭/最低思考请求。 /// -/// 字段对应矩阵(已对 ai.google.dev/gemini-api/docs/thinking 交叉核对): -/// - `gemini-2.5-flash`, `gemini-2.5-flash-lite` → `thinkingBudget = 0`(完全关) -/// - `gemini-2.5-pro` → 不可关(官方 N/A),不下发字段 -/// - `gemini-3*-pro*` → `thinkingLevel = "low"`(最低档) -/// - `gemini-3*-flash*` → `thinkingLevel = "minimal"`(Flash-only) -/// - 其它未知 → `thinkingLevel = "minimal"`("尽量关"兜底) -/// -/// 注意:thinkingBudget 与 thinkingLevel 不能同发——文档没明示但属未定义行为。 -fn thinking_config_for(model: &str) -> Option { - let m = model.to_ascii_lowercase(); - if m.starts_with("gemini-2.5-pro") { - return None; - } - if m.starts_with("gemini-2.5-flash") { - return Some(json!({ "thinkingBudget": 0 })); - } - if m.starts_with("gemini-3") { - if m.contains("pro") { - return Some(json!({ "thinkingLevel": "low" })); - } - return Some(json!({ "thinkingLevel": "minimal" })); - } - Some(json!({ "thinkingLevel": "minimal" })) +/// OpenLess 不维护 Gemini 单模型适配表;开启时不下发 thinkingConfig,关闭时 +/// 使用官方 thinkingConfig 中可表达“关闭思考”的 `thinkingBudget = 0`。若某个 +/// 具体模型不支持该字段或不能完全关闭思考,交由 Gemini API 自身处理。 +fn disabled_thinking_config() -> Value { + json!({ "thinkingBudget": 0 }) } fn generate_content_url(base_url: &str, model: &str) -> String { @@ -495,8 +488,8 @@ fn extract_assistant_content(body: &str) -> Result { .and_then(|c| c.get("parts")) .and_then(|p| p.as_array()) .ok_or_else(|| LLMError::ParseError("missing content.parts".into()))?; - // 把所有 part.text 拼起来。模型在 thinking on 时可能产出多段,但我们已禁 - // 思考;仍按 array 处理是为了不被 future-proof 单 part vs 多 part 的差异坑到。 + // 把所有 part.text 拼起来。开启思考时模型可能产出多段;逐段拼接避免 + // future-proof 单 part vs 多 part 的差异坑到。 let mut buf = String::new(); for part in parts { if let Some(t) = part.get("text").and_then(|v| v.as_str()) { @@ -516,70 +509,22 @@ mod tests { use super::*; #[test] - fn thinking_config_2_5_flash_disables_thinking_via_budget_zero() { - let v = thinking_config_for("gemini-2.5-flash").unwrap(); - assert_eq!(v, json!({ "thinkingBudget": 0 })); - // flash-lite 同款 - let v2 = thinking_config_for("gemini-2.5-flash-lite").unwrap(); - assert_eq!(v2, json!({ "thinkingBudget": 0 })); - } - - #[test] - fn thinking_config_2_5_pro_returns_none_because_cannot_disable() { - assert!(thinking_config_for("gemini-2.5-pro").is_none()); - } - - #[test] - fn thinking_config_3_x_pro_uses_low_thinking_level() { - let v = thinking_config_for("gemini-3.1-pro-preview").unwrap(); - assert_eq!(v, json!({ "thinkingLevel": "low" })); - } - - #[test] - fn thinking_config_3_x_flash_uses_minimal() { - let v = thinking_config_for("gemini-3-flash-preview").unwrap(); - assert_eq!(v, json!({ "thinkingLevel": "minimal" })); - let v2 = thinking_config_for("gemini-3.1-flash-lite").unwrap(); - assert_eq!(v2, json!({ "thinkingLevel": "minimal" })); - } - - #[test] - fn thinking_config_unknown_falls_back_to_minimal_level() { - // 未来未知 ID 兜底到 minimal——尽量"关",避免默认放出长思考。 - let v = thinking_config_for("gemini-99-future-model").unwrap(); - assert_eq!(v, json!({ "thinkingLevel": "minimal" })); - } - - #[test] - fn thinking_config_never_emits_both_budget_and_level() { - // 不变量:任一返回值最多含 `thinkingBudget` 或 `thinkingLevel` 中的一个, - // 不能同发——文档没明示但属未定义行为,回归就立刻暴露。 - for model in [ - "gemini-2.5-flash", - "gemini-2.5-flash-lite", - "gemini-3.1-pro-preview", - "gemini-3-flash-preview", - "gemini-3.1-flash-lite", - "gemini-3.1-flash-lite-preview", - "gemini-99-future-model", - ] { - let v = thinking_config_for(model).unwrap(); - let obj = v.as_object().unwrap(); - let has_budget = obj.contains_key("thinkingBudget"); - let has_level = obj.contains_key("thinkingLevel"); - assert!( - has_budget ^ has_level, - "model {model} 同时下发 budget 与 level,违反单字段不变量: {v}" - ); - } + fn disabled_thinking_config_uses_channel_level_budget_zero() { + assert_eq!(disabled_thinking_config(), json!({ "thinkingBudget": 0 })); } #[test] fn generate_content_url_handles_trailing_slash_in_base_url() { let a = generate_content_url("https://x/v1beta", "gemini-2.5-flash"); let b = generate_content_url("https://x/v1beta/", "gemini-2.5-flash"); - assert_eq!(a, "https://x/v1beta/models/gemini-2.5-flash:generateContent"); - assert_eq!(b, "https://x/v1beta/models/gemini-2.5-flash:generateContent"); + assert_eq!( + a, + "https://x/v1beta/models/gemini-2.5-flash:generateContent" + ); + assert_eq!( + b, + "https://x/v1beta/models/gemini-2.5-flash:generateContent" + ); } #[test] @@ -652,8 +597,8 @@ mod tests { } #[test] - fn build_generate_body_2_5_flash_includes_thinking_budget_zero() { - let cfg = GeminiConfig::new("k", "gemini-2.5-flash", "https://x/v1beta"); + fn build_generate_body_disabled_includes_channel_level_thinking_budget_zero() { + let cfg = GeminiConfig::new("k", "any-gemini-model", "https://x/v1beta"); let provider = GeminiProvider::new(cfg); let body = provider.build_generate_body("SYS", vec![user_content("hi")]); assert_eq!( @@ -665,13 +610,14 @@ mod tests { } #[test] - fn build_generate_body_2_5_pro_omits_thinking_config() { - let cfg = GeminiConfig::new("k", "gemini-2.5-pro", "https://x/v1beta"); + fn build_generate_body_thinking_enabled_omits_thinking_config() { + let cfg = GeminiConfig::new("k", "gemini-2.5-flash", "https://x/v1beta") + .with_thinking_enabled(true); let provider = GeminiProvider::new(cfg); let body = provider.build_generate_body("SYS", vec![user_content("hi")]); assert!( body["generationConfig"].get("thinkingConfig").is_none(), - "2.5 Pro 不能关思考,generationConfig 不应含 thinkingConfig 字段" + "开启思考模式时不下发关闭思考的 thinkingConfig" ); } @@ -744,15 +690,4 @@ mod tests { assert_eq!(events, vec!["data: ok", "data: ok2"]); assert!(buf.is_empty()); } - - #[test] - fn build_generate_body_3_x_pro_emits_low_thinking_level() { - let cfg = GeminiConfig::new("k", "gemini-3.1-pro-preview", "https://x/v1beta"); - let provider = GeminiProvider::new(cfg); - let body = provider.build_generate_body("SYS", vec![]); - assert_eq!( - body["generationConfig"]["thinkingConfig"], - json!({ "thinkingLevel": "low" }) - ); - } } diff --git a/openless-all/app/src-tauri/src/polish.rs b/openless-all/app/src-tauri/src/polish.rs index b7da2e24..a9ab3115 100644 --- a/openless-all/app/src-tauri/src/polish.rs +++ b/openless-all/app/src-tauri/src/polish.rs @@ -33,6 +33,10 @@ pub struct OpenAICompatibleConfig { pub extra_headers: HashMap, pub temperature: f32, pub request_timeout_secs: u64, + /// true = 让支持的 OpenAI-compatible provider 启用推理 / 思考; + /// false = 按渠道级官方参数关闭或压低思考。不做模型白名单判断, + /// 具体模型兼容性交给 provider 处理。 + pub thinking_enabled: bool, } impl OpenAICompatibleConfig { @@ -52,8 +56,14 @@ impl OpenAICompatibleConfig { extra_headers: HashMap::new(), temperature: DEFAULT_TEMPERATURE, request_timeout_secs: DEFAULT_REQUEST_TIMEOUT_SECS, + thinking_enabled: false, } } + + pub fn with_thinking_enabled(mut self, enabled: bool) -> Self { + self.thinking_enabled = enabled; + self + } } #[derive(Debug, Error)] @@ -315,12 +325,7 @@ impl OpenAICompatibleLLMProvider { ) -> Result { let url = chat_completions_url(&self.config.base_url); let messages = build_polish_history_messages(system_prompt, prior_turns, user_prompt); - let body = json!({ - "model": self.config.model, - "stream": false, - "temperature": self.config.temperature, - "messages": messages, - }); + let body = self.chat_body(false, messages); log::info!( "[llm] POST {} provider={} model={} prior_turns={}", @@ -340,15 +345,13 @@ impl OpenAICompatibleLLMProvider { user_prompt: &str, ) -> Result { let url = chat_completions_url(&self.config.base_url); - let body = json!({ - "model": self.config.model, - "stream": false, - "temperature": self.config.temperature, - "messages": [ - { "role": "system", "content": system_prompt }, - { "role": "user", "content": user_prompt }, + let body = self.chat_body( + false, + vec![ + json!({ "role": "system", "content": system_prompt }), + json!({ "role": "user", "content": user_prompt }), ], - }); + ); log::info!( "[llm] POST {} provider={} model={}", @@ -360,6 +363,17 @@ impl OpenAICompatibleLLMProvider { self.send_chat_request(&url, &body).await } + fn chat_body(&self, stream: bool, messages: Vec) -> Value { + let mut body = json!({ + "model": self.config.model, + "stream": stream, + "temperature": self.config.temperature, + "messages": messages, + }); + apply_openai_compatible_thinking_control(&mut body, &self.config); + body + } + /// 共用的 HTTP send + body 解析。chat_completion / chat_completion_with_polish_history /// 各自构造好 body 后都调到这里,避免 30 行 send/parse 重复。 async fn send_chat_request( @@ -430,12 +444,7 @@ impl OpenAICompatibleLLMProvider { } let url = chat_completions_url(&self.config.base_url); - let body = json!({ - "model": self.config.model, - "stream": true, - "temperature": self.config.temperature, - "messages": msgs, - }); + let body = self.chat_body(true, msgs); log::info!( "[llm] POST {} provider={} model={} chat_turns={} stream=true", @@ -585,6 +594,11 @@ impl CodexOAuthConfig { self.auth_path = Some(auth_path); self } + + pub fn with_thinking_enabled(mut self, enabled: bool) -> Self { + self.reasoning_effort = Some(if enabled { "medium" } else { "low" }.to_string()); + self + } } #[derive(Clone, Debug, PartialEq, Eq)] @@ -1158,6 +1172,54 @@ fn unix_now_secs() -> u64 { .unwrap_or(0) } +fn apply_openai_compatible_thinking_control(body: &mut Value, config: &OpenAICompatibleConfig) { + match openai_compatible_thinking_control(&config.provider_id) { + Some(ThinkingControl::ReasoningEffort) => { + // OpenAI Chat Completions 的 reasoning_effort 是渠道级请求字段。 + // 关闭时统一压到 low,避免引入模型白名单;不支持该字段的模型由 provider 自行处理。 + body["reasoning_effort"] = json!(if config.thinking_enabled { + "medium" + } else { + "low" + }); + } + Some(ThinkingControl::EnableThinking) => { + body["enable_thinking"] = json!(config.thinking_enabled); + } + Some(ThinkingControl::OpenRouterReasoning) => { + body["reasoning"] = json!({ + "effort": if config.thinking_enabled { "medium" } else { "none" }, + // OpenLess 的 QA/润色输出只展示最终答案;推理内容即使生成,也不应进 UI。 + "exclude": true, + }); + } + Some(ThinkingControl::DeepSeekThinking) => { + body["thinking"] = json!({ + "type": if config.thinking_enabled { "enabled" } else { "disabled" }, + }); + } + None => {} + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ThinkingControl { + ReasoningEffort, + EnableThinking, + OpenRouterReasoning, + DeepSeekThinking, +} + +fn openai_compatible_thinking_control(provider_id: &str) -> Option { + match provider_id.trim() { + "deepseek" => Some(ThinkingControl::DeepSeekThinking), + "openrouterFree" => Some(ThinkingControl::OpenRouterReasoning), + "alibabaCoding" => Some(ThinkingControl::EnableThinking), + "openai" | "codingPlanX" => Some(ThinkingControl::ReasoningEffort), + _ => None, + } +} + /// 把 working_languages + front_app 拼成 system prompt 头部前提: /// # 上下文 /// 用户的工作语言:… @@ -2055,6 +2117,124 @@ mod tests { ); } + #[test] + fn openai_chat_body_adds_reasoning_effort_for_openai_channel() { + let provider = OpenAICompatibleLLMProvider::new( + OpenAICompatibleConfig::new( + "openai", + "OpenAI", + "https://api.openai.com/v1", + "k", + "any-model", + ) + .with_thinking_enabled(true), + ); + + let body = provider.chat_body(false, vec![json!({ "role": "user", "content": "hi" })]); + + assert_eq!(body["reasoning_effort"], "medium"); + } + + #[test] + fn openai_chat_body_lowers_reasoning_when_disabled_for_channel() { + let provider = OpenAICompatibleLLMProvider::new(OpenAICompatibleConfig::new( + "codingPlanX", + "Coding Plan X", + "https://api.codingplanx.ai/v1", + "k", + "any-model", + )); + + let body = provider.chat_body(false, vec![json!({ "role": "user", "content": "hi" })]); + + assert_eq!(body["reasoning_effort"], "low"); + } + + #[test] + fn openai_chat_body_adds_enable_thinking_for_alibaba_channel() { + let provider = OpenAICompatibleLLMProvider::new( + OpenAICompatibleConfig::new( + "alibabaCoding", + "Alibaba Coding", + "https://coding-intl.dashscope.aliyuncs.com/v1", + "k", + "any-model", + ) + .with_thinking_enabled(true), + ); + + let body = provider.chat_body(false, vec![json!({ "role": "user", "content": "hi" })]); + + assert_eq!(body["enable_thinking"], true); + } + + #[test] + fn openai_chat_body_adds_openrouter_reasoning_control() { + let provider = OpenAICompatibleLLMProvider::new(OpenAICompatibleConfig::new( + "openrouterFree", + "OpenRouter", + "https://openrouter.ai/api/v1", + "k", + "openai/gpt-5-mini", + )); + + let body = provider.chat_body(true, vec![json!({ "role": "user", "content": "hi" })]); + + assert_eq!(body["reasoning"]["effort"], "none"); + assert_eq!(body["reasoning"]["exclude"], true); + } + + #[test] + fn openai_chat_body_adds_openrouter_reasoning_by_channel_not_model() { + let provider = OpenAICompatibleLLMProvider::new(OpenAICompatibleConfig::new( + "openrouterFree", + "OpenRouter", + "https://openrouter.ai/api/v1", + "k", + "qwen/qwen3-coder:free", + )); + + let body = provider.chat_body(true, vec![json!({ "role": "user", "content": "hi" })]); + + assert_eq!(body["reasoning"]["effort"], "none"); + assert_eq!(body["reasoning"]["exclude"], true); + } + + #[test] + fn openai_chat_body_adds_deepseek_thinking_toggle_by_channel() { + let provider = OpenAICompatibleLLMProvider::new(OpenAICompatibleConfig::new( + "deepseek", + "DeepSeek", + "https://api.deepseek.com/v1", + "k", + "any-model", + )); + + let body = provider.chat_body(false, vec![json!({ "role": "user", "content": "hi" })]); + + assert_eq!(body["thinking"]["type"], "disabled"); + } + + #[test] + fn openai_chat_body_omits_thinking_control_for_unknown_provider() { + let provider = OpenAICompatibleLLMProvider::new( + OpenAICompatibleConfig::new( + "custom", + "Custom", + "https://example.test/v1", + "k", + "custom-model", + ) + .with_thinking_enabled(true), + ); + + let body = provider.chat_body(false, vec![json!({ "role": "user", "content": "hi" })]); + + assert!(body.get("reasoning_effort").is_none()); + assert!(body.get("enable_thinking").is_none()); + assert!(body.get("reasoning").is_none()); + } + #[test] fn structured_prompt_includes_dense_github_request_example() { let prompt = prompts::system_prompt(PolishMode::Structured); @@ -2257,6 +2437,13 @@ mod tests { ); } + #[test] + fn codex_oauth_config_lowers_reasoning_when_thinking_disabled() { + let config = CodexOAuthConfig::new("gpt-5.5").with_thinking_enabled(false); + + assert_eq!(config.reasoning_effort.as_deref(), Some("low")); + } + #[tokio::test] async fn codex_oauth_provider_streams_text_from_codex_responses() { let auth_path = write_codex_auth_fixture("acct-openless", unix_now_secs() + 3600); @@ -2277,6 +2464,7 @@ mod tests { assert!(request_text.contains(r#""stream":true"#)); assert!(request_text.contains(r#""role":"developer"#)); assert!(request_text.contains(r#""type":"input_text"#)); + assert!(request_text.contains(r#""reasoning":{"effort":"medium"}"#)); assert!(!request_text.contains(r#""temperature":"#)); let body = concat!( diff --git a/openless-all/app/src-tauri/src/types.rs b/openless-all/app/src-tauri/src/types.rs index 5f658b24..3aac1cb5 100644 --- a/openless-all/app/src-tauri/src/types.rs +++ b/openless-all/app/src-tauri/src/types.rs @@ -168,6 +168,11 @@ pub struct UserPreferences { pub microphone_device_name: String, pub active_asr_provider: String, // "volcengine" | "apple-speech" | ... pub active_llm_provider: String, // "ark" | "openai" | ... + /// LLM 思考模式开关。默认 false 以保持既有「尽量关闭思考」行为; + /// Gemini 走原生 thinkingConfig,OpenAI-compatible 路径仅按 provider/channel + /// 下发官方渠道级字段,不用 prompt 注入,也不做模型白名单适配。详见 issue #402。 + #[serde(default)] + pub llm_thinking_enabled: bool, /// Windows/Linux 粘贴成功后是否恢复用户原剪贴板。默认 true 跟历史行为一致; /// 关掉就把听写文本留在剪贴板,让 simulate_paste 实际没生效时用户能 Ctrl+V 找回。 /// macOS 走 AX 直写,不受这个开关影响。详见 issue #111。 @@ -319,6 +324,8 @@ struct UserPreferencesWire { microphone_device_name: String, active_asr_provider: String, active_llm_provider: String, + #[serde(default)] + llm_thinking_enabled: bool, restore_clipboard_after_paste: bool, #[serde(default)] paste_shortcut: PasteShortcut, @@ -372,6 +379,7 @@ impl Default for UserPreferencesWire { microphone_device_name: prefs.microphone_device_name, active_asr_provider: prefs.active_asr_provider, active_llm_provider: prefs.active_llm_provider, + llm_thinking_enabled: prefs.llm_thinking_enabled, restore_clipboard_after_paste: prefs.restore_clipboard_after_paste, paste_shortcut: prefs.paste_shortcut, allow_non_tsf_insertion_fallback: prefs.allow_non_tsf_insertion_fallback, @@ -422,6 +430,7 @@ impl<'de> Deserialize<'de> for UserPreferences { microphone_device_name: wire.microphone_device_name, active_asr_provider: wire.active_asr_provider, active_llm_provider: wire.active_llm_provider, + llm_thinking_enabled: wire.llm_thinking_enabled, restore_clipboard_after_paste: wire.restore_clipboard_after_paste, paste_shortcut: wire.paste_shortcut, allow_non_tsf_insertion_fallback: wire.allow_non_tsf_insertion_fallback, @@ -539,6 +548,7 @@ impl Default for UserPreferences { microphone_device_name: String::new(), active_asr_provider: default_active_asr_provider(), active_llm_provider: "ark".into(), + llm_thinking_enabled: false, restore_clipboard_after_paste: true, paste_shortcut: PasteShortcut::default(), allow_non_tsf_insertion_fallback: true, diff --git a/openless-all/app/src/i18n/en.ts b/openless-all/app/src/i18n/en.ts index 7e5a9228..a58e295f 100644 --- a/openless-all/app/src/i18n/en.ts +++ b/openless-all/app/src/i18n/en.ts @@ -401,6 +401,10 @@ export const en: typeof zhCN = { apiKeyLabel: 'API Key', baseUrlLabel: 'Base URL', modelLabel: 'Model', + thinkingModeLabel: 'Thinking', + thinkingModeOn: 'On', + thinkingModeOff: 'Off', + thinkingModeHint: 'Off disables or minimizes thinking with provider-level official parameters. On enables thinking by channel defaults. No prompt injection or per-model adapters.', bailianVocabularyIdLabel: 'Hotword Vocabulary ID (optional)', bailianVocabularyIdNote: 'If you have created a DashScope hotword vocabulary, enter its vocab-... ID. Leave blank to skip hotwords.', appIdLabel: 'App ID', diff --git a/openless-all/app/src/i18n/ja.ts b/openless-all/app/src/i18n/ja.ts index 6d34ffc5..e5a6eca1 100644 --- a/openless-all/app/src/i18n/ja.ts +++ b/openless-all/app/src/i18n/ja.ts @@ -403,6 +403,10 @@ export const ja: typeof zhCN = { apiKeyLabel: 'API キー', baseUrlLabel: 'エンドポイント', modelLabel: 'モデル', + thinkingModeLabel: '思考', + thinkingModeOn: 'オン', + thinkingModeOff: 'オフ', + thinkingModeHint: 'オフではチャネル単位の公式パラメーターで思考を無効化または最小化します。オンではチャネル既定で思考を有効化します。prompt 注入やモデル別適配は行いません。', bailianVocabularyIdLabel: 'ホットワード Vocabulary ID(任意)', bailianVocabularyIdNote: 'DashScope でホットワード辞書を作成済みの場合は vocab-... ID を入力します。空欄なら送信しません。', appIdLabel: 'App ID(アプリケーション ID)', diff --git a/openless-all/app/src/i18n/ko.ts b/openless-all/app/src/i18n/ko.ts index bef46493..535513e1 100644 --- a/openless-all/app/src/i18n/ko.ts +++ b/openless-all/app/src/i18n/ko.ts @@ -403,6 +403,10 @@ export const ko: typeof zhCN = { apiKeyLabel: 'API 키', baseUrlLabel: '엔드포인트', modelLabel: '모델', + thinkingModeLabel: '사고', + thinkingModeOn: '켜짐', + thinkingModeOff: '꺼짐', + thinkingModeHint: '꺼짐은 채널 단위 공식 파라미터로 사고를 끄거나 최소화합니다. 켜짐은 채널 기본값으로 사고를 켭니다. prompt 주입이나 모델별 어댑터는 사용하지 않습니다.', bailianVocabularyIdLabel: '핫워드 Vocabulary ID(선택)', bailianVocabularyIdNote: 'DashScope에서 핫워드 사전을 만들었다면 vocab-... ID를 입력하세요. 비워 두면 핫워드를 전송하지 않습니다.', appIdLabel: 'App ID(애플리케이션 ID)', diff --git a/openless-all/app/src/i18n/zh-CN.ts b/openless-all/app/src/i18n/zh-CN.ts index 6123d1ef..6c0aa3ff 100644 --- a/openless-all/app/src/i18n/zh-CN.ts +++ b/openless-all/app/src/i18n/zh-CN.ts @@ -399,6 +399,10 @@ export const zhCN = { apiKeyLabel: 'API 密钥', baseUrlLabel: '接口地址', modelLabel: '模型', + thinkingModeLabel: '思考', + thinkingModeOn: '开启', + thinkingModeOff: '关闭', + thinkingModeHint: '关闭时按渠道级官方参数关闭或压低思考;开启时按渠道默认启用思考。不注入 prompt,也不做单模型适配。', bailianVocabularyIdLabel: '热词 Vocabulary ID(可选)', bailianVocabularyIdNote: '如已在百炼创建热词表,可填写 vocab-...;留空则不下发热词。', appIdLabel: 'App ID(应用 ID)', diff --git a/openless-all/app/src/i18n/zh-TW.ts b/openless-all/app/src/i18n/zh-TW.ts index b9119b6d..a10ccab5 100644 --- a/openless-all/app/src/i18n/zh-TW.ts +++ b/openless-all/app/src/i18n/zh-TW.ts @@ -401,6 +401,10 @@ export const zhTW: typeof zhCN = { apiKeyLabel: 'API 密鑰', baseUrlLabel: '接口地址', modelLabel: '模型', + thinkingModeLabel: '思考', + thinkingModeOn: '開啟', + thinkingModeOff: '關閉', + thinkingModeHint: '關閉時按渠道級官方參數關閉或降低思考;開啟時按渠道預設啟用思考。不注入 prompt,也不做單模型適配。', bailianVocabularyIdLabel: '熱詞 Vocabulary ID(可選)', bailianVocabularyIdNote: '如已在百煉建立熱詞表,可填寫 vocab-...;留空則不下發熱詞。', appIdLabel: 'App ID(應用 ID)', diff --git a/openless-all/app/src/lib/ipc.ts b/openless-all/app/src/lib/ipc.ts index 4b347e4f..e974e393 100644 --- a/openless-all/app/src/lib/ipc.ts +++ b/openless-all/app/src/lib/ipc.ts @@ -57,6 +57,7 @@ const mockSettings: UserPreferences = { microphoneDeviceName: '', activeAsrProvider: 'foundry-local-whisper', activeLlmProvider: 'ark', + llmThinkingEnabled: false, restoreClipboardAfterPaste: true, pasteShortcut: 'ctrlV', allowNonTsfInsertionFallback: true, diff --git a/openless-all/app/src/lib/stylePrefs.test.ts b/openless-all/app/src/lib/stylePrefs.test.ts index bed26970..3767bfaf 100644 --- a/openless-all/app/src/lib/stylePrefs.test.ts +++ b/openless-all/app/src/lib/stylePrefs.test.ts @@ -26,6 +26,7 @@ const previousPrefs: UserPreferences = { microphoneDeviceName: '', activeAsrProvider: 'volcengine', activeLlmProvider: 'ark', + llmThinkingEnabled: false, restoreClipboardAfterPaste: true, pasteShortcut: 'ctrlV', allowNonTsfInsertionFallback: true, diff --git a/openless-all/app/src/lib/types.ts b/openless-all/app/src/lib/types.ts index 56678d88..b4e6e164 100644 --- a/openless-all/app/src/lib/types.ts +++ b/openless-all/app/src/lib/types.ts @@ -147,6 +147,8 @@ export interface UserPreferences { microphoneDeviceName: string; activeAsrProvider: string; activeLlmProvider: string; + /** LLM 思考模式开关。默认关闭,保持既有尽量关闭思考的行为。详见 issue #402。 */ + llmThinkingEnabled: boolean; /** 仅 Windows/Linux:粘贴成功后是否恢复用户原剪贴板。默认 true。详见 issue #111。 */ restoreClipboardAfterPaste: boolean; /** 仅 Windows/Linux:模拟粘贴时按下的快捷键。详见 issue #360:kitty/alacritty diff --git a/openless-all/app/src/pages/Settings.tsx b/openless-all/app/src/pages/Settings.tsx index 7f2cf83c..8b4b92c8 100644 --- a/openless-all/app/src/pages/Settings.tsx +++ b/openless-all/app/src/pages/Settings.tsx @@ -1139,6 +1139,30 @@ export function Toggle({ on, onToggle }: { on: boolean; onToggle?: (next: boolea ); } +function LlmThinkingToggle({ enabled, onToggle }: { enabled: boolean; onToggle: (next: boolean) => void }) { + const { t } = useTranslation(); + return ( +
+ + {t('settings.providers.thinkingModeLabel')} + + + + {enabled ? t('settings.providers.thinkingModeOn') : t('settings.providers.thinkingModeOff')} + +
+ ); +} + const LLM_PRESETS = [ { id: 'ark', @@ -1168,9 +1192,8 @@ const LLM_PRESETS = [ // 谷歌官方 Gemini API(原生 generateContent,不走 OpenAI 兼容 shim)。 // baseUrl 末尾 /v1beta 是当前 Generally Available 的 path(ai.google.dev/api)。 // 后端 llm_gemini.rs 会拼成 `{baseUrl}/models/{model}:generateContent`, - // 并按模型 family 注入 thinkingConfig 强制关思考(2.5 flash 系列 thinkingBudget=0; - // 3.x pro 走 thinkingLevel="low";3.x flash 走 thinkingLevel="minimal"; - // 2.5 pro 官方明示无法关闭思考)。模型列表用 ProviderTools「拉取模型」按钮取, + // 并按 Gemini 原生通道级 thinkingConfig 关闭或压低思考,不在前端维护模型适配表。 + // 模型列表用 ProviderTools「拉取模型」按钮取, // 由 commands.rs::fetch_provider_models 识别 generativelanguage 域名后按 Gemini shape 解析。 id: 'gemini', nameKey: 'gemini', @@ -1332,6 +1355,14 @@ function ProvidersSection() { } }; + const onLlmThinkingToggle = (enabled: boolean) => { + if (!prefs) return; + void updatePrefs(current => ({ ...current, llmThinkingEnabled: enabled })).catch(error => { + console.error('[settings] failed to update LLM thinking mode', error); + emitSaved('failed', t('common.operationFailed')); + }); + }; + const onAsrProviderChange = async (id: AsrPresetId) => { setAsrProvider(id); const seq = ++asrSwitchSeqRef.current; @@ -1420,7 +1451,14 @@ function ProvidersSection() { )} + placeholder={preset.modelPlaceholder || 'model-name'} mono + trailing={( + + )} + /> setLlmModelRevision(v => v + 1)} /> @@ -1884,9 +1922,10 @@ interface CredentialFieldProps { mono?: boolean; mask?: boolean; defaultValue?: string; + trailing?: ReactNode; } -function CredentialField({ label, account, placeholder, mono, mask, defaultValue }: CredentialFieldProps) { +function CredentialField({ label, account, placeholder, mono, mask, defaultValue, trailing }: CredentialFieldProps) { const { t } = useTranslation(); const [value, setValue] = useState(''); const [revealed, setRevealed] = useState(false); @@ -2023,6 +2062,7 @@ function CredentialField({ label, account, placeholder, mono, mask, defaultValue )} + {trailing} {mask && (