diff --git a/src-tauri/src/proxy/usage/parser.rs b/src-tauri/src/proxy/usage/parser.rs index cf2f1982..7ed49445 100644 --- a/src-tauri/src/proxy/usage/parser.rs +++ b/src-tauri/src/proxy/usage/parser.rs @@ -333,16 +333,41 @@ impl TokenUsage { { usage.output_tokens = output as u32; } - // OpenRouter 转换后的流式响应:input_tokens 也在 message_delta 中 - // 如果 message_start 中没有 input_tokens,则从 message_delta 获取 - if usage.input_tokens == 0 { - if let Some(input) = - delta_usage.get("input_tokens").and_then(|v| v.as_u64()) + // 部分 Anthropic 兼容上游(如 Qwen、MiniMax)在 message_start + // 中将 fresh+cached 合并报告为 input_tokens,导致虚高。 + // 当 message_delta 提供了更小的正值 input_tokens 时,优先采用 + // delta 的值,并同步更新缓存计数以避免重复计算。 + if let Some(input) = + delta_usage.get("input_tokens").and_then(|v| v.as_u64()) + { + let delta_input = input as u32; + if delta_input > 0 + && (usage.input_tokens == 0 || delta_input < usage.input_tokens) { - usage.input_tokens = input as u32; + usage.input_tokens = delta_input; + // 同步采用 delta 中的缓存计数 + if let Some(cache_read) = delta_usage + .get("cache_read_input_tokens") + .and_then(|v| v.as_u64()) + { + usage.cache_read_tokens = cache_read as u32; + } + if let Some(cache_creation) = delta_usage + .get("cache_creation_input_tokens") + .and_then(|v| v.as_u64()) + { + usage.cache_creation_tokens = cache_creation as u32; + } + } + } else { + // OpenRouter 转换后的流式响应:input_tokens 仅在 + // message_delta 中且 message_start 未提供时的回退路径 + if usage.input_tokens == 0 { + // (no input_tokens in delta, keep start value) } } // 从 message_delta 中处理缓存命中(cache_read_input_tokens) + // 仅当上面未从 delta 同步时才回退 if usage.cache_read_tokens == 0 { if let Some(cache_read) = delta_usage .get("cache_read_input_tokens") @@ -352,7 +377,6 @@ impl TokenUsage { } } // 从 message_delta 中处理缓存创建(cache_creation_input_tokens) - // 注: 现在 zhipu 没有返回 cache_creation_input_tokens 字段 if usage.cache_creation_tokens == 0 { if let Some(cache_creation) = delta_usage .get("cache_creation_input_tokens") @@ -1137,4 +1161,102 @@ mod tests { assert_eq!(usage.output_tokens, 50); assert_eq!(usage.model, Some("gpt-4o".to_string())); } + + #[test] + fn test_claude_stream_delta_input_override_inflated_start() { + // Some providers (Qwen, MiniMax) report fresh+cached as input_tokens in + // message_start, inflating the count. message_delta provides the correct + // (smaller) value that should override. + let events = vec![ + json!({ + "type": "message_start", + "message": { + "model": "claude-sonnet-4-20250514", + "usage": { + "input_tokens": 10000, + "cache_read_input_tokens": 8000, + "cache_creation_input_tokens": 0 + } + } + }), + json!({ + "type": "message_delta", + "usage": { + "output_tokens": 50, + "input_tokens": 2000, + "cache_read_input_tokens": 1500, + "cache_creation_input_tokens": 0 + } + }), + ]; + + let usage = TokenUsage::from_claude_stream_events(&events).unwrap(); + // Delta input_tokens (2000) < start input_tokens (10000), so override + assert_eq!(usage.input_tokens, 2000); + assert_eq!(usage.output_tokens, 50); + // Cache counts synced from delta + assert_eq!(usage.cache_read_tokens, 1500); + assert_eq!(usage.cache_creation_tokens, 0); + } + + #[test] + fn test_claude_stream_delta_input_not_overridden_when_larger() { + // If delta input_tokens is larger than start, keep the start value + let events = vec![ + json!({ + "type": "message_start", + "message": { + "model": "claude-sonnet-4-20250514", + "usage": { + "input_tokens": 1000, + "cache_read_input_tokens": 200, + "cache_creation_input_tokens": 0 + } + } + }), + json!({ + "type": "message_delta", + "usage": { + "output_tokens": 50, + "input_tokens": 2000 + } + }), + ]; + + let usage = TokenUsage::from_claude_stream_events(&events).unwrap(); + // Delta (2000) > start (1000), so keep start value + assert_eq!(usage.input_tokens, 1000); + assert_eq!(usage.output_tokens, 50); + } + + #[test] + fn test_claude_stream_delta_input_overrides_when_start_is_zero() { + // When start has zero input_tokens, delta should always be adopted + let events = vec![ + json!({ + "type": "message_start", + "message": { + "model": "claude-sonnet-4-20250514", + "usage": { + "input_tokens": 0, + "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 0 + } + } + }), + json!({ + "type": "message_delta", + "usage": { + "output_tokens": 50, + "input_tokens": 500, + "cache_read_input_tokens": 100 + } + }), + ]; + + let usage = TokenUsage::from_claude_stream_events(&events).unwrap(); + assert_eq!(usage.input_tokens, 500); + assert_eq!(usage.output_tokens, 50); + assert_eq!(usage.cache_read_tokens, 100); + } } diff --git a/src-tauri/src/services/coding_plan.rs b/src-tauri/src/services/coding_plan.rs index 565ee2cf..a6530f73 100644 --- a/src-tauri/src/services/coding_plan.rs +++ b/src-tauri/src/services/coding_plan.rs @@ -181,12 +181,26 @@ async fn query_kimi(api_key: &str) -> SubscriptionQuota { // ── 智谱 GLM ──────────────────────────────────────────────── -async fn query_zhipu(api_key: &str) -> SubscriptionQuota { +/// 根据用户配置的 base_url 确定智谱配额查询端点。 +/// 中国大陆用户使用 open.bigmodel.cn,国际用户使用 api.z.ai。 +fn zhipu_quota_base(base_url: &str) -> &'static str { + let url = base_url.to_lowercase(); + if url.contains("bigmodel.cn") { + "https://open.bigmodel.cn" + } else { + "https://api.z.ai" + } +} + +async fn query_zhipu(base_url: &str, api_key: &str) -> SubscriptionQuota { let client = crate::proxy::http_client::get(); - // 统一走 api.z.ai 国际站(中国站 bigmodel.cn 有反爬机制) + let quota_url = format!( + "{}/api/monitor/usage/quota/limit", + zhipu_quota_base(base_url) + ); let resp = client - .get("https://api.z.ai/api/monitor/usage/quota/limit") + .get("a_url) .header("Authorization", api_key) // 注意:智谱不加 Bearer 前缀 .header("Content-Type", "application/json") .header("Accept-Language", "en-US,en") @@ -266,6 +280,15 @@ async fn query_zhipu(api_key: &str) -> SubscriptionQuota { } } + // 按 nextResetTime 排序:缺失的(刚重置的 5 小时桶)排在前面, + // 有值的按重置时间升序排列,确保显示顺序正确。 + tiers.sort_by(|a, b| match (&a.resets_at, &b.resets_at) { + (None, None) => std::cmp::Ordering::Equal, + (None, Some(_)) => std::cmp::Ordering::Less, + (Some(_), None) => std::cmp::Ordering::Greater, + (Some(a_time), Some(b_time)) => a_time.cmp(b_time), + }); + // 套餐等级存入 credential_message let level = data .get("level") @@ -442,10 +465,131 @@ pub async fn get_coding_plan_quota( let quota = match provider { CodingPlanProvider::Kimi => query_kimi(api_key).await, - CodingPlanProvider::ZhipuCn | CodingPlanProvider::ZhipuEn => query_zhipu(api_key).await, + CodingPlanProvider::ZhipuCn | CodingPlanProvider::ZhipuEn => { + query_zhipu(base_url, api_key).await + } CodingPlanProvider::MiniMaxCn => query_minimax(api_key, true).await, CodingPlanProvider::MiniMaxEn => query_minimax(api_key, false).await, }; Ok(quota) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn zhipu_quota_base_cn() { + assert_eq!( + zhipu_quota_base("https://open.bigmodel.cn/api/paas/v4"), + "https://open.bigmodel.cn" + ); + } + + #[test] + fn zhipu_quota_base_en() { + assert_eq!( + zhipu_quota_base("https://api.z.ai/api/paas/v4"), + "https://api.z.ai" + ); + } + + #[test] + fn zhipu_quota_base_case_insensitive() { + assert_eq!( + zhipu_quota_base("https://OPEN.BIGMODEL.CN/api/paas/v4"), + "https://open.bigmodel.cn" + ); + assert_eq!( + zhipu_quota_base("https://Api.Z.AI/api/paas/v4"), + "https://api.z.ai" + ); + } + + #[test] + fn detect_provider_case_insensitive() { + assert!(matches!( + detect_provider("https://OPEN.BIGMODEL.CN/api/paas/v4"), + Some(CodingPlanProvider::ZhipuCn) + )); + assert!(matches!( + detect_provider("https://Api.Z.AI/api/paas/v4"), + Some(CodingPlanProvider::ZhipuEn) + )); + } + + #[test] + fn zhipu_quota_base_matches_detect_provider_for_cn() { + // Ensure zhipu_quota_base and detect_provider agree on CN vs EN + let urls = [ + "https://open.bigmodel.cn/api/paas/v4", + "https://OPEN.BIGMODEL.CN/api/paas/v4", + "https://api.z.ai/api/paas/v4", + "https://Api.Z.AI/api/paas/v4", + ]; + for url in &urls { + let provider = detect_provider(url); + let base = zhipu_quota_base(url); + match provider { + Some(CodingPlanProvider::ZhipuCn) => { + assert_eq!(base, "https://open.bigmodel.cn", "CN mismatch for {url}") + } + Some(CodingPlanProvider::ZhipuEn) => { + assert_eq!(base, "https://api.z.ai", "EN mismatch for {url}") + } + _ => panic!("unexpected provider for {url}"), + } + } + } + + #[test] + fn zhipu_tier_sorting_none_resets_at_first() { + // When the 5-hour bucket has 0% utilization, nextResetTime is absent. + // Tiers with None resets_at should sort before those with Some. + let mut tiers = vec![ + QuotaTier { + name: "weekly".to_string(), + utilization: 50.0, + resets_at: Some("2026-06-15T00:00:00Z".to_string()), + }, + QuotaTier { + name: "five_hour".to_string(), + utilization: 0.0, + resets_at: None, + }, + ]; + tiers.sort_by(|a, b| match (&a.resets_at, &b.resets_at) { + (None, None) => std::cmp::Ordering::Equal, + (None, Some(_)) => std::cmp::Ordering::Less, + (Some(_), None) => std::cmp::Ordering::Greater, + (Some(a_time), Some(b_time)) => a_time.cmp(b_time), + }); + assert_eq!(tiers[0].name, "five_hour"); + assert_eq!(tiers[1].name, "weekly"); + } + + #[test] + fn zhipu_tier_sorting_by_reset_time_ascending() { + let mut tiers = vec![ + QuotaTier { + name: "weekly".to_string(), + utilization: 50.0, + resets_at: Some("2026-06-15T00:00:00Z".to_string()), + }, + QuotaTier { + name: "five_hour".to_string(), + utilization: 30.0, + resets_at: Some("2026-06-10T12:00:00Z".to_string()), + }, + ]; + tiers.sort_by(|a, b| match (&a.resets_at, &b.resets_at) { + (None, None) => std::cmp::Ordering::Equal, + (None, Some(_)) => std::cmp::Ordering::Less, + (Some(_), None) => std::cmp::Ordering::Greater, + (Some(a_time), Some(b_time)) => a_time.cmp(b_time), + }); + assert_eq!(tiers[0].name, "five_hour"); + assert_eq!(tiers[1].name, "weekly"); + } +}