diff --git a/docs/superpowers/plans/2026-05-06-windows-local-asr.md b/docs/superpowers/plans/2026-05-06-windows-local-asr.md new file mode 100644 index 00000000..01785022 --- /dev/null +++ b/docs/superpowers/plans/2026-05-06-windows-local-asr.md @@ -0,0 +1,1396 @@ +# Windows Local ASR Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a Windows-only `foundry-local-whisper` ASR provider so new Windows users can dictate through OpenLess without external ASR keys or Windows Win+H Voice Typing. + +**Architecture:** Keep `coordinator::Coordinator` as the single owner of dictation state. Add a Windows Foundry Local Whisper provider that buffers existing recorder PCM, transcribes it locally, then returns `RawTranscript` into the existing polish, Windows TSF IME insertion, and history pipeline. + +**Tech Stack:** Tauri 2, Rust, React/TypeScript, Foundry Local Rust SDK, reqwest multipart REST call to local `/v1/audio/transcriptions`, existing Windows TSF IME backend. + +--- + +## File Map + +- Modify `openless-all/app/src-tauri/Cargo.toml`: add Windows-only Foundry Local SDK dependency after a compile probe. +- Create `openless-all/app/src-tauri/src/asr/wav.rs`: shared WAV encoder for Whisper HTTP and Foundry Local. +- Modify `openless-all/app/src-tauri/src/asr/mod.rs`: export `wav` and Windows Foundry Local modules. +- Modify `openless-all/app/src-tauri/src/asr/whisper.rs`: use the shared WAV encoder. +- Create `openless-all/app/src-tauri/src/asr/local/foundry.rs`: provider id, model registry, runtime status structs, and Windows runtime/proxy exports. +- Create `openless-all/app/src-tauri/src/asr/local/foundry_runtime.rs`: Windows-only Foundry Local SDK wrapper for model status, download, load, endpoint discovery, and local transcription. +- Create `openless-all/app/src-tauri/src/asr/local/foundry_provider.rs`: `FoundryLocalWhisperAsr` implementing `AudioConsumer` and producing `RawTranscript`. +- Modify `openless-all/app/src-tauri/src/asr/local/mod.rs`: keep Qwen3 macOS exports and add Foundry Whisper exports. +- Modify `openless-all/app/src-tauri/src/types.rs`: add Windows local ASR preferences and Windows default provider. +- Modify `openless-all/app/src-tauri/src/persistence.rs`: align credentials active ASR default with Windows local ASR for new installs. +- Modify `openless-all/app/src-tauri/src/commands.rs`: expose Foundry Local settings/status/download/test commands and ASR credential status. +- Modify `openless-all/app/src-tauri/src/lib.rs`: manage a shared Foundry Local runtime and register commands. +- Modify `openless-all/app/src-tauri/src/coordinator.rs`: add `ActiveAsr::FoundryLocalWhisper`, provider startup, transcribe branch, timeout, cancel, and preload/release hooks. +- Modify `openless-all/app/src/lib/localAsr.ts`: add Foundry Local IPC types and wrapper functions. +- Modify `openless-all/app/src/lib/types.ts` and `openless-all/app/src/lib/ipc.ts`: add preferences/mock defaults. +- Modify `openless-all/app/src/pages/Settings.tsx`: add `foundry-local-whisper` provider preset and local ASR hint behavior. +- Modify `openless-all/app/src/pages/LocalAsr.tsx`: show Windows Foundry Local model/runtime controls alongside macOS Qwen3. +- Modify `openless-all/app/src/i18n/zh-CN.ts` and `openless-all/app/src/i18n/en.ts`: add user-facing strings. +- Modify `openless-all/app/scripts/windows-real-asr-insertion-smoke.ps1`: add a local ASR mode that does not require Volcengine credentials. + +## Implementation Tasks + +### Task 1: Shared WAV Encoder + +**Files:** +- Create: `openless-all/app/src-tauri/src/asr/wav.rs` +- Modify: `openless-all/app/src-tauri/src/asr/mod.rs` +- Modify: `openless-all/app/src-tauri/src/asr/whisper.rs` + +- [ ] **Step 1: Write the shared WAV encoder tests** + +Add this file: + +```rust +//! WAV helpers for ASR providers that accept complete audio files. + +/// Encode 16 kHz / mono / 16-bit little-endian PCM as a RIFF WAV file. +pub fn encode_wav_16k_mono(pcm: &[u8]) -> Vec { + let sample_rate: u32 = 16_000; + let num_channels: u16 = 1; + let bits_per_sample: u16 = 16; + let byte_rate = sample_rate * num_channels as u32 * (bits_per_sample as u32 / 8); + let block_align = num_channels * (bits_per_sample / 8); + let data_size = pcm.len() as u32; + let chunk_size = 36 + data_size; + + let mut wav = Vec::with_capacity(44 + pcm.len()); + wav.extend_from_slice(b"RIFF"); + wav.extend_from_slice(&chunk_size.to_le_bytes()); + wav.extend_from_slice(b"WAVE"); + wav.extend_from_slice(b"fmt "); + wav.extend_from_slice(&16u32.to_le_bytes()); + wav.extend_from_slice(&1u16.to_le_bytes()); + wav.extend_from_slice(&num_channels.to_le_bytes()); + wav.extend_from_slice(&sample_rate.to_le_bytes()); + wav.extend_from_slice(&byte_rate.to_le_bytes()); + wav.extend_from_slice(&block_align.to_le_bytes()); + wav.extend_from_slice(&bits_per_sample.to_le_bytes()); + wav.extend_from_slice(b"data"); + wav.extend_from_slice(&data_size.to_le_bytes()); + wav.extend_from_slice(pcm); + wav +} + +#[cfg(test)] +mod tests { + use super::encode_wav_16k_mono; + + #[test] + fn wav_header_matches_16k_mono_pcm() { + let pcm = [0x01, 0x00, 0xff, 0x7f]; + let wav = encode_wav_16k_mono(&pcm); + + assert_eq!(&wav[0..4], b"RIFF"); + assert_eq!(u32::from_le_bytes(wav[4..8].try_into().unwrap()), 40); + assert_eq!(&wav[8..12], b"WAVE"); + assert_eq!(&wav[12..16], b"fmt "); + assert_eq!(u16::from_le_bytes(wav[20..22].try_into().unwrap()), 1); + assert_eq!(u16::from_le_bytes(wav[22..24].try_into().unwrap()), 1); + assert_eq!(u32::from_le_bytes(wav[24..28].try_into().unwrap()), 16_000); + assert_eq!(u16::from_le_bytes(wav[34..36].try_into().unwrap()), 16); + assert_eq!(&wav[36..40], b"data"); + assert_eq!(u32::from_le_bytes(wav[40..44].try_into().unwrap()), 4); + assert_eq!(&wav[44..], &pcm); + } +} +``` + +- [ ] **Step 2: Run the new unit test and verify the module is not wired yet** + +Run: + +```powershell +cargo test --manifest-path openless-all/app/src-tauri/Cargo.toml wav_header_matches_16k_mono_pcm +``` + +Expected: FAIL with an unresolved module only if `wav.rs` has not been registered yet. + +- [ ] **Step 3: Register the module and replace Whisper's private encoder** + +In `openless-all/app/src-tauri/src/asr/mod.rs`, add: + +```rust +pub mod wav; +``` + +In `openless-all/app/src-tauri/src/asr/whisper.rs`, add: + +```rust +use crate::asr::wav::encode_wav_16k_mono; +``` + +Then remove the private `fn encode_wav_16k_mono(pcm: &[u8]) -> Vec` from the bottom of `whisper.rs`. + +- [ ] **Step 4: Run the WAV test** + +Run: + +```powershell +cargo test --manifest-path openless-all/app/src-tauri/Cargo.toml wav_header_matches_16k_mono_pcm +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```powershell +git add -- openless-all/app/src-tauri/src/asr/mod.rs openless-all/app/src-tauri/src/asr/whisper.rs openless-all/app/src-tauri/src/asr/wav.rs +git commit -m "refactor(asr): share wav encoding" +``` + +### Task 2: Provider Constants, Preferences, and Defaults + +**Files:** +- Create: `openless-all/app/src-tauri/src/asr/local/foundry.rs` +- Modify: `openless-all/app/src-tauri/src/asr/local/mod.rs` +- Modify: `openless-all/app/src-tauri/src/types.rs` +- Modify: `openless-all/app/src-tauri/src/persistence.rs` +- Modify: `openless-all/app/src/lib/types.ts` +- Modify: `openless-all/app/src/lib/ipc.ts` + +- [ ] **Step 1: Add provider constants and model registry** + +Create `openless-all/app/src-tauri/src/asr/local/foundry.rs`: + +```rust +use serde::Serialize; + +pub const PROVIDER_ID: &str = "foundry-local-whisper"; +pub const DEFAULT_MODEL_ALIAS: &str = "whisper-small"; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct FoundryWhisperModel { + pub alias: &'static str, + pub display_name: &'static str, + pub quality_tier: &'static str, +} + +pub const MODELS: &[FoundryWhisperModel] = &[ + FoundryWhisperModel { + alias: "whisper-small", + display_name: "Whisper Small", + quality_tier: "balanced", + }, + FoundryWhisperModel { + alias: "whisper-base", + display_name: "Whisper Base", + quality_tier: "low-resource", + }, + FoundryWhisperModel { + alias: "whisper-tiny", + display_name: "Whisper Tiny", + quality_tier: "smoke-test", + }, +]; + +pub fn is_foundry_local_whisper(id: &str) -> bool { + id == PROVIDER_ID +} + +pub fn model_alias_is_known(alias: &str) -> bool { + MODELS.iter().any(|model| model.alias == alias) +} + +pub fn default_language_hint() -> Option { + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn provider_id_is_stable() { + assert!(is_foundry_local_whisper("foundry-local-whisper")); + assert!(!is_foundry_local_whisper("local-qwen3")); + } + + #[test] + fn default_model_is_registered() { + assert!(model_alias_is_known(DEFAULT_MODEL_ALIAS)); + } +} +``` + +- [ ] **Step 2: Export the Foundry module** + +In `openless-all/app/src-tauri/src/asr/local/mod.rs`, add: + +```rust +pub mod foundry; +``` + +- [ ] **Step 3: Add Rust preferences** + +In `openless-all/app/src-tauri/src/types.rs`, add fields to `UserPreferences` after `local_asr_keep_loaded_secs`: + +```rust +/// Windows Foundry Local Whisper 当前激活的模型 alias。 +#[serde(default = "default_foundry_local_asr_model")] +pub foundry_local_asr_model: String, +/// Windows Foundry Local Whisper 语言 hint。空串 = 自动检测。 +#[serde(default)] +pub foundry_local_asr_language_hint: String, +/// Windows Foundry Local Whisper 模型在 runtime 中保持加载多久。 +#[serde(default = "default_local_asr_keep_loaded_secs")] +pub foundry_local_asr_keep_loaded_secs: u32, +``` + +Add the default helper: + +```rust +fn default_foundry_local_asr_model() -> String { + crate::asr::local::foundry::DEFAULT_MODEL_ALIAS.into() +} +``` + +Update `impl Default for UserPreferences`: + +```rust +active_asr_provider: default_active_asr_provider(), +foundry_local_asr_model: default_foundry_local_asr_model(), +foundry_local_asr_language_hint: String::new(), +foundry_local_asr_keep_loaded_secs: default_local_asr_keep_loaded_secs(), +``` + +Add this helper near the existing preference defaults: + +```rust +fn default_active_asr_provider() -> String { + #[cfg(target_os = "windows")] + { + return crate::asr::local::foundry::PROVIDER_ID.into(); + } + #[cfg(not(target_os = "windows"))] + { + "volcengine".into() + } +} +``` + +- [ ] **Step 4: Align credentials active ASR default** + +In `openless-all/app/src-tauri/src/persistence.rs`, replace `creds_default_asr()` with: + +```rust +fn creds_default_asr() -> String { + #[cfg(target_os = "windows")] + { + return crate::asr::local::foundry::PROVIDER_ID.into(); + } + #[cfg(not(target_os = "windows"))] + { + "volcengine".into() + } +} +``` + +- [ ] **Step 5: Add TypeScript preference fields** + +In `openless-all/app/src/lib/types.ts`, add: + +```ts + foundryLocalAsrModel: string; + foundryLocalAsrLanguageHint: string; + foundryLocalAsrKeepLoadedSecs: number; +``` + +In `openless-all/app/src/lib/ipc.ts`, update mock defaults: + +```ts + activeAsrProvider: 'foundry-local-whisper', + foundryLocalAsrModel: 'whisper-small', + foundryLocalAsrLanguageHint: '', + foundryLocalAsrKeepLoadedSecs: 300, +``` + +- [ ] **Step 6: Run default and provider tests** + +Run: + +```powershell +cargo test --manifest-path openless-all/app/src-tauri/Cargo.toml provider_id_is_stable default_model_is_registered +npm --prefix openless-all/app run build +``` + +Expected: Rust tests PASS; TypeScript build PASS. + +- [ ] **Step 7: Commit** + +```powershell +git add -- openless-all/app/src-tauri/src/asr/local/foundry.rs openless-all/app/src-tauri/src/asr/local/mod.rs openless-all/app/src-tauri/src/types.rs openless-all/app/src-tauri/src/persistence.rs openless-all/app/src/lib/types.ts openless-all/app/src/lib/ipc.ts +git commit -m "feat(asr): add Foundry local provider defaults" +``` + +### Task 3: Foundry Runtime Compile Probe + +**Files:** +- Modify: `openless-all/app/src-tauri/Cargo.toml` +- Create: `openless-all/app/src-tauri/src/asr/local/foundry_runtime.rs` +- Modify: `openless-all/app/src-tauri/src/asr/local/foundry.rs` +- Modify: `openless-all/app/src-tauri/src/asr/local/mod.rs` + +- [ ] **Step 1: Add the official Windows SDK dependency** + +Run: + +```powershell +cd openless-all/app/src-tauri +cargo add foundry-local-sdk --features winml --target 'cfg(target_os = "windows")' +``` + +Expected: `Cargo.toml` gains a Windows-only `foundry-local-sdk` dependency and `Cargo.lock` is updated. + +- [ ] **Step 2: Add runtime status types** + +Append to `openless-all/app/src-tauri/src/asr/local/foundry.rs`: + +```rust +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct FoundryRuntimeStatus { + pub provider_id: String, + pub available: bool, + pub active_model: String, + pub loaded_model_id: Option, + pub endpoint: Option, + pub error: Option, +} + +impl FoundryRuntimeStatus { + pub fn unavailable(active_model: String, error: impl Into) -> Self { + Self { + provider_id: PROVIDER_ID.into(), + available: false, + active_model, + loaded_model_id: None, + endpoint: None, + error: Some(error.into()), + } + } +} +``` + +- [ ] **Step 3: Add the minimal Windows runtime wrapper** + +Create `openless-all/app/src-tauri/src/asr/local/foundry_runtime.rs`: + +```rust +#[cfg(target_os = "windows")] +mod imp { + use anyhow::{Context, Result}; + use parking_lot::Mutex; + + use super::super::foundry::{FoundryRuntimeStatus, PROVIDER_ID}; + use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager}; + + #[derive(Debug, Clone)] + struct LoadedModel { + alias: String, + model_id: String, + endpoint: String, + } + + pub struct FoundryLocalRuntime { + loaded: Mutex>, + } + + impl Default for FoundryLocalRuntime { + fn default() -> Self { + Self::new() + } + } + + impl FoundryLocalRuntime { + pub fn new() -> Self { + Self { + loaded: Mutex::new(None), + } + } + + pub fn status_snapshot(&self, active_model: &str) -> FoundryRuntimeStatus { + let loaded = self.loaded.lock().clone(); + FoundryRuntimeStatus { + provider_id: PROVIDER_ID.into(), + available: true, + active_model: active_model.to_string(), + loaded_model_id: loaded.as_ref().map(|model| model.model_id.clone()), + endpoint: loaded.as_ref().map(|model| model.endpoint.clone()), + error: None, + } + } + + pub async fn ensure_loaded(&self, alias: &str) -> Result<(String, String)> { + if let Some(loaded) = self.loaded.lock().as_ref() { + if loaded.alias == alias { + return Ok((loaded.model_id.clone(), loaded.endpoint.clone())); + } + } + + let manager = + FoundryLocalManager::create(FoundryLocalConfig::new("openless")) + .context("initialize Foundry Local manager")?; + manager + .download_and_register_eps_with_progress(None, |_ep, _percent| {}) + .await + .context("download/register Foundry execution providers")?; + let model = manager + .catalog() + .get_model(alias) + .await + .with_context(|| format!("get Foundry model {alias}"))?; + if !model.is_cached().await.context("check Foundry model cache")? { + model.download(Some(|_percent| {})).await.context("download Foundry model")?; + } + model.load().await.context("load Foundry model")?; + manager.start_web_service().await.context("start Foundry web service")?; + let endpoint = manager + .urls() + .context("read Foundry web service urls")? + .first() + .cloned() + .context("Foundry web service returned no endpoint")?; + let model_id = model.id().to_string(); + + *self.loaded.lock() = Some(LoadedModel { + alias: alias.to_string(), + model_id: model_id.clone(), + endpoint: endpoint.clone(), + }); + Ok((model_id, endpoint)) + } + + pub fn release_now(&self) { + self.loaded.lock().take(); + } + } +} + +#[cfg(target_os = "windows")] +pub use imp::FoundryLocalRuntime; + +#[cfg(not(target_os = "windows"))] +pub struct FoundryLocalRuntime; + +#[cfg(not(target_os = "windows"))] +impl FoundryLocalRuntime { + pub fn new() -> Self { + Self + } + + pub fn status_snapshot( + &self, + active_model: &str, + ) -> super::foundry::FoundryRuntimeStatus { + super::foundry::FoundryRuntimeStatus::unavailable( + active_model.to_string(), + "Foundry Local Whisper is only available on Windows", + ) + } + + pub fn release_now(&self) {} +} +``` + +- [ ] **Step 4: Export the runtime** + +In `openless-all/app/src-tauri/src/asr/local/mod.rs`, add: + +```rust +pub mod foundry_runtime; +pub use foundry_runtime::FoundryLocalRuntime; +``` + +- [ ] **Step 5: Compile-check the SDK API** + +Run: + +```powershell +cargo check --manifest-path openless-all/app/src-tauri/Cargo.toml +``` + +Expected: PASS. If the Foundry SDK names differ from Microsoft Learn, update only `foundry_runtime.rs` and rerun until this command passes before continuing. + +- [ ] **Step 6: Commit** + +```powershell +git add -- openless-all/app/src-tauri/Cargo.toml openless-all/app/src-tauri/Cargo.lock openless-all/app/src-tauri/src/asr/local/foundry.rs openless-all/app/src-tauri/src/asr/local/foundry_runtime.rs openless-all/app/src-tauri/src/asr/local/mod.rs +git commit -m "feat(asr): add Foundry local runtime wrapper" +``` + +### Task 4: Foundry Local Whisper Provider + +**Files:** +- Create: `openless-all/app/src-tauri/src/asr/local/foundry_provider.rs` +- Modify: `openless-all/app/src-tauri/src/asr/local/mod.rs` + +- [ ] **Step 1: Add provider with fakeable HTTP transcription** + +Create `openless-all/app/src-tauri/src/asr/local/foundry_provider.rs`: + +```rust +#[cfg(target_os = "windows")] +use std::sync::Arc; + +use anyhow::{Context, Result}; +use parking_lot::Mutex; + +use crate::asr::wav::encode_wav_16k_mono; +use crate::asr::RawTranscript; + +#[cfg(target_os = "windows")] +use super::foundry_runtime::FoundryLocalRuntime; + +pub struct FoundryLocalWhisperAsr { + #[cfg(target_os = "windows")] + runtime: Arc, + model_alias: String, + language_hint: Option, + buffer: Mutex>, + client: reqwest::Client, +} + +impl FoundryLocalWhisperAsr { + #[cfg(target_os = "windows")] + pub fn new( + runtime: Arc, + model_alias: String, + language_hint: Option, + ) -> Self { + Self { + runtime, + model_alias, + language_hint, + buffer: Mutex::new(Vec::new()), + client: reqwest::Client::new(), + } + } + + pub async fn transcribe(&self) -> Result { + let pcm = self.buffer.lock().clone(); + if pcm.is_empty() { + return Ok(RawTranscript { + text: String::new(), + duration_ms: 0, + }); + } + let duration_ms = (pcm.len() as u64 / 2) * 1000 / 16_000; + let raw = self.transcribe_pcm(&pcm).await?; + self.buffer.lock().clear(); + Ok(RawTranscript { + text: raw.trim().to_string(), + duration_ms, + }) + } + + #[cfg(target_os = "windows")] + async fn transcribe_pcm(&self, pcm: &[u8]) -> Result { + let (model_id, endpoint) = self.runtime.ensure_loaded(&self.model_alias).await?; + self.post_transcription(&endpoint, &model_id, pcm).await + } + + #[cfg(not(target_os = "windows"))] + async fn transcribe_pcm(&self, _pcm: &[u8]) -> Result { + anyhow::bail!("Foundry Local Whisper is only available on Windows") + } + + async fn post_transcription( + &self, + endpoint: &str, + model_id: &str, + pcm: &[u8], + ) -> Result { + let wav = encode_wav_16k_mono(pcm); + let wav_part = reqwest::multipart::Part::bytes(wav) + .file_name("openless-foundry.wav") + .mime_str("audio/wav") + .context("set Foundry transcription MIME type")?; + let mut form = reqwest::multipart::Form::new() + .part("file", wav_part) + .text("model", model_id.to_string()) + .text("response_format", "json".to_string()); + if let Some(language) = self.language_hint.as_deref().filter(|s| !s.trim().is_empty()) { + form = form.text("language", language.to_string()); + } + let url = format!("{}/v1/audio/transcriptions", endpoint.trim_end_matches('/')); + let response = self + .client + .post(url) + .multipart(form) + .send() + .await + .context("Foundry Local transcription request failed")?; + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + anyhow::bail!("Foundry Local transcription HTTP {status}: {body}"); + } + let json: serde_json::Value = response + .json() + .await + .context("parse Foundry Local transcription response")?; + Ok(json["text"].as_str().unwrap_or("").to_string()) + } + + pub fn cancel(&self) { + self.buffer.lock().clear(); + } +} + +impl crate::recorder::AudioConsumer for FoundryLocalWhisperAsr { + fn consume_pcm_chunk(&self, pcm: &[u8]) { + self.buffer.lock().extend_from_slice(pcm); + } +} +``` + +- [ ] **Step 2: Export the provider** + +In `openless-all/app/src-tauri/src/asr/local/mod.rs`, add: + +```rust +pub mod foundry_provider; +pub use foundry_provider::FoundryLocalWhisperAsr; +``` + +- [ ] **Step 3: Run cargo check** + +Run: + +```powershell +cargo check --manifest-path openless-all/app/src-tauri/Cargo.toml +``` + +Expected: PASS. + +- [ ] **Step 4: Commit** + +```powershell +git add -- openless-all/app/src-tauri/src/asr/local/foundry_provider.rs openless-all/app/src-tauri/src/asr/local/mod.rs +git commit -m "feat(asr): add Foundry local Whisper provider" +``` + +### Task 5: Backend Commands and Runtime State + +**Files:** +- Modify: `openless-all/app/src-tauri/src/commands.rs` +- Modify: `openless-all/app/src-tauri/src/lib.rs` + +- [ ] **Step 1: Manage runtime in Tauri** + +In `openless-all/app/src-tauri/src/lib.rs`, after the local Qwen download manager: + +```rust +let foundry_local_runtime = Arc::new(asr::local::FoundryLocalRuntime::new()); +``` + +Add `.manage(foundry_local_runtime.clone())` to the Tauri builder. + +- [ ] **Step 2: Add command result type and status command** + +In `commands.rs`, import: + +```rust +use crate::asr::local::foundry::{ + model_alias_is_known, FoundryRuntimeStatus, DEFAULT_MODEL_ALIAS, + PROVIDER_ID as FOUNDRY_LOCAL_PROVIDER_ID, +}; +use crate::asr::local::FoundryLocalRuntime; +``` + +Add commands: + +```rust +#[tauri::command] +pub fn foundry_local_asr_status( + coord: CoordinatorState<'_>, + runtime: State<'_, Arc>, +) -> FoundryRuntimeStatus { + let prefs = coord.prefs().get(); + let active_model = if model_alias_is_known(&prefs.foundry_local_asr_model) { + prefs.foundry_local_asr_model + } else { + DEFAULT_MODEL_ALIAS.to_string() + }; + runtime.status_snapshot(&active_model) +} + +#[tauri::command] +pub fn foundry_local_asr_set_model( + coord: CoordinatorState<'_>, + model_alias: String, +) -> Result<(), String> { + if !model_alias_is_known(&model_alias) { + return Err(format!("unknown Foundry Whisper model alias: {model_alias}")); + } + let mut prefs = coord.prefs().get(); + prefs.foundry_local_asr_model = model_alias; + coord.prefs().set(prefs).map_err(|e| e.to_string()) +} + +#[tauri::command] +pub fn foundry_local_asr_set_language_hint( + coord: CoordinatorState<'_>, + language_hint: String, +) -> Result<(), String> { + let normalized = language_hint.trim().to_string(); + if !normalized.is_empty() + && (normalized.len() != 2 || !normalized.bytes().all(|b| b.is_ascii_lowercase())) + { + return Err("language hint must be empty or ISO 639-1 lowercase code".to_string()); + } + let mut prefs = coord.prefs().get(); + prefs.foundry_local_asr_language_hint = normalized; + coord.prefs().set(prefs).map_err(|e| e.to_string()) +} +``` + +- [ ] **Step 3: Make credential status treat Foundry as credential-free** + +In `asr_configured_for_provider`, add: + +```rust +if provider == FOUNDRY_LOCAL_PROVIDER_ID { + return true; +} +``` + +- [ ] **Step 4: Register commands** + +In `lib.rs` `invoke_handler`, add: + +```rust +commands::foundry_local_asr_status, +commands::foundry_local_asr_set_model, +commands::foundry_local_asr_set_language_hint, +``` + +- [ ] **Step 5: Add command tests** + +In `commands.rs` tests, add: + +```rust +#[test] +fn credentials_status_treats_foundry_local_asr_as_configured() { + assert!(asr_configured_for_provider( + crate::asr::local::foundry::PROVIDER_ID, + &CredentialsSnapshot::default() + )); +} +``` + +- [ ] **Step 6: Run tests and build** + +Run: + +```powershell +cargo test --manifest-path openless-all/app/src-tauri/Cargo.toml credentials_status_treats_foundry_local_asr_as_configured +cargo check --manifest-path openless-all/app/src-tauri/Cargo.toml +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```powershell +git add -- openless-all/app/src-tauri/src/commands.rs openless-all/app/src-tauri/src/lib.rs +git commit -m "feat(asr): expose Foundry local ASR status" +``` + +### Task 6: Coordinator Integration + +**Files:** +- Modify: `openless-all/app/src-tauri/src/coordinator.rs` + +- [ ] **Step 1: Add runtime to `Inner`** + +Import Foundry types: + +```rust +#[cfg(target_os = "windows")] +use crate::asr::local::{foundry, FoundryLocalRuntime, FoundryLocalWhisperAsr}; +``` + +Add field to `Inner`: + +```rust +#[cfg(target_os = "windows")] +foundry_local_runtime: Arc, +``` + +Initialize it in `Coordinator::new()`: + +```rust +#[cfg(target_os = "windows")] +foundry_local_runtime: Arc::new(FoundryLocalRuntime::new()), +``` + +- [ ] **Step 2: Add active ASR variant** + +Add to `ActiveAsr`: + +```rust +#[cfg(target_os = "windows")] +FoundryLocalWhisper(Arc), +``` + +Update `cancel_active_asr`: + +```rust +#[cfg(target_os = "windows")] +ActiveAsr::FoundryLocalWhisper(local) => local.cancel(), +``` + +- [ ] **Step 3: Start Foundry local provider in `begin_session`** + +After `let active_asr = CredentialsVault::get_active_asr();`, add before Whisper-compatible branch: + +```rust +#[cfg(target_os = "windows")] +if foundry::is_foundry_local_whisper(&active_asr) { + let prefs = inner.prefs.get(); + let model_alias = if foundry::model_alias_is_known(&prefs.foundry_local_asr_model) { + prefs.foundry_local_asr_model.clone() + } else { + foundry::DEFAULT_MODEL_ALIAS.to_string() + }; + let language_hint = prefs + .foundry_local_asr_language_hint + .trim() + .to_string(); + let language_hint = if language_hint.is_empty() { + None + } else { + Some(language_hint) + }; + let local = Arc::new(FoundryLocalWhisperAsr::new( + Arc::clone(&inner.foundry_local_runtime), + model_alias, + language_hint, + )); + store_asr_for_session( + inner, + current_session_id, + ActiveAsr::FoundryLocalWhisper(Arc::clone(&local)), + ); + let consumer: Arc = local; + start_recorder_and_enter_listening(inner, current_session_id, &active_asr, consumer) + .await?; + return Ok(()); +} +``` + +- [ ] **Step 4: Transcribe Foundry local results in `end_session`** + +Add a match branch next to `ActiveAsr::Whisper`: + +```rust +#[cfg(target_os = "windows")] +ActiveAsr::FoundryLocalWhisper(local) => { + let timeout_duration = std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS); + match tokio::time::timeout(timeout_duration, local.transcribe()).await { + Ok(Ok(r)) => r, + Ok(Err(e)) => { + log::error!("[coord] Foundry Local Whisper transcribe failed: {e:#}"); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some(format!("本地识别失败: {e}")), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(e.to_string()); + } + Err(_) => { + log::error!( + "[coord] Foundry Local Whisper 全局超时 {} 秒", + COORDINATOR_GLOBAL_TIMEOUT_SECS + ); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some("识别超时".to_string()), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err("foundry local global timeout".to_string()); + } + } +} +``` + +- [ ] **Step 5: Relax ASR credential gate** + +In `ensure_asr_credentials`, add before local Qwen3: + +```rust +#[cfg(target_os = "windows")] +if foundry::is_foundry_local_whisper(&active_asr) { + return Ok(()); +} +``` + +- [ ] **Step 6: Add coordinator tests for fallback routing** + +Add tests in `coordinator.rs` tests: + +```rust +#[test] +fn foundry_local_provider_is_not_whisper_compatible_cloud_provider() { + assert!(!is_whisper_compatible_provider( + crate::asr::local::foundry::PROVIDER_ID + )); +} +``` + +- [ ] **Step 7: Run backend checks** + +Run: + +```powershell +cargo test --manifest-path openless-all/app/src-tauri/Cargo.toml foundry_local_provider_is_not_whisper_compatible_cloud_provider +cargo check --manifest-path openless-all/app/src-tauri/Cargo.toml +``` + +Expected: PASS. + +- [ ] **Step 8: Commit** + +```powershell +git add -- openless-all/app/src-tauri/src/coordinator.rs +git commit -m "feat(asr): route dictation through Foundry local Whisper" +``` + +### Task 7: Frontend IPC and Settings Provider + +**Files:** +- Modify: `openless-all/app/src/lib/localAsr.ts` +- Modify: `openless-all/app/src/pages/Settings.tsx` +- Modify: `openless-all/app/src/i18n/zh-CN.ts` +- Modify: `openless-all/app/src/i18n/en.ts` + +- [ ] **Step 1: Add TypeScript IPC wrappers** + +In `openless-all/app/src/lib/localAsr.ts`, add: + +```ts +export interface FoundryLocalAsrStatus { + providerId: string; + available: boolean; + activeModel: string; + loadedModelId: string | null; + endpoint: string | null; + error: string | null; +} + +export function getFoundryLocalAsrStatus(): Promise { + return invokeOrMock('foundry_local_asr_status', undefined, () => ({ + providerId: 'foundry-local-whisper', + available: true, + activeModel: 'whisper-small', + loadedModelId: null, + endpoint: null, + error: null, + })); +} + +export function setFoundryLocalAsrModel(modelAlias: string): Promise { + return invokeOrMock('foundry_local_asr_set_model', { modelAlias }, () => undefined); +} + +export function setFoundryLocalAsrLanguageHint(languageHint: string): Promise { + return invokeOrMock( + 'foundry_local_asr_set_language_hint', + { languageHint }, + () => undefined, + ); +} +``` + +- [ ] **Step 2: Add provider preset** + +In `Settings.tsx`, add to `ASR_PRESETS` before `local-qwen3`: + +```ts +{ id: 'foundry-local-whisper', nameKey: 'asrFoundryLocalWhisper', baseUrl: '', model: '' }, +``` + +Update the union type automatically via `as const`. + +- [ ] **Step 3: Render local provider hint** + +Change: + +```tsx +) : committedAsrProvider === 'local-qwen3' ? ( + +) : ( +``` + +to: + +```tsx +) : committedAsrProvider === 'local-qwen3' || committedAsrProvider === 'foundry-local-whisper' ? ( + +) : ( +``` + +Change `LocalAsrProviderHint` signature: + +```tsx +function LocalAsrProviderHint({ provider }: { provider: 'local-qwen3' | 'foundry-local-whisper' }) { +``` + +Use provider-specific text: + +```tsx +const hintKey = provider === 'foundry-local-whisper' + ? 'settings.providers.foundryLocalAsrHint' + : 'settings.providers.localAsrHint'; +``` + +- [ ] **Step 4: Add i18n strings** + +In `zh-CN.ts` under `settings.providers.presets`: + +```ts +asrFoundryLocalWhisper: '本地 Whisper(Foundry Local)', +``` + +Under `settings.providers`: + +```ts +foundryLocalAsrHint: 'Windows 本地 Whisper 在本机运行,无需 ASR API Key。首次使用需下载 Foundry Local 运行组件和 Whisper 模型;LLM 润色仍按你配置的模型供应商调用。', +``` + +In `en.ts` add: + +```ts +asrFoundryLocalWhisper: 'Local Whisper (Foundry Local)', +foundryLocalAsrHint: 'Windows local Whisper runs on this device and does not need an ASR API key. First use downloads Foundry Local runtime components and a Whisper model; LLM polishing still uses your configured LLM provider.', +``` + +- [ ] **Step 5: Build frontend** + +Run: + +```powershell +npm --prefix openless-all/app run build +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```powershell +git add -- openless-all/app/src/lib/localAsr.ts openless-all/app/src/pages/Settings.tsx openless-all/app/src/i18n/zh-CN.ts openless-all/app/src/i18n/en.ts +git commit -m "feat(ui): add Foundry local ASR provider" +``` + +### Task 8: Local ASR Page for Windows Foundry Models + +**Files:** +- Modify: `openless-all/app/src/pages/LocalAsr.tsx` +- Modify: `openless-all/app/src/i18n/zh-CN.ts` +- Modify: `openless-all/app/src/i18n/en.ts` + +- [ ] **Step 1: Load Foundry status on Local ASR page** + +In `LocalAsr.tsx`, import: + +```ts +getFoundryLocalAsrStatus, +setFoundryLocalAsrModel, +setFoundryLocalAsrLanguageHint, +type FoundryLocalAsrStatus, +``` + +Add state: + +```ts +const [foundryStatus, setFoundryStatus] = useState(null); +``` + +Add refresh function: + +```ts +const refreshFoundryStatus = async () => { + try { + const status = await getFoundryLocalAsrStatus(); + setFoundryStatus(status); + } catch (err) { + console.warn('[localAsr] Foundry status query failed', err); + } +}; +``` + +Call it inside `refresh()`: + +```ts +void refreshFoundryStatus(); +``` + +- [ ] **Step 2: Add Windows Foundry model controls** + +Add this block after the top page header: + +```tsx + +
+
+
+ {t('localAsr.foundryTitle')} +
+
+ {t('localAsr.foundryDesc')} +
+
+ + {foundryStatus?.available ? t('localAsr.runtimeReady') : t('localAsr.runtimeUnavailable')} + +
+
+ + +
+ {foundryStatus?.error && ( +
+ {foundryStatus.error} +
+ )} +
+``` + +- [ ] **Step 3: Add i18n strings** + +In `zh-CN.ts` under `localAsr`: + +```ts +foundryTitle: 'Windows 本地 Whisper', +foundryDesc: '使用 Microsoft Foundry Local 在本机转写语音。无需 ASR API Key;首次使用会准备运行组件和 Whisper 模型。', +runtimeReady: '运行时可用', +runtimeUnavailable: '运行时不可用', +foundryModelLabel: 'Whisper 模型', +languageHintLabel: '识别语言', +languageAuto: '自动检测', +languageZh: '优先中文', +languageEn: '优先英文', +``` + +Add matching English strings in `en.ts`. + +- [ ] **Step 4: Build frontend** + +Run: + +```powershell +npm --prefix openless-all/app run build +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```powershell +git add -- openless-all/app/src/pages/LocalAsr.tsx openless-all/app/src/i18n/zh-CN.ts openless-all/app/src/i18n/en.ts +git commit -m "feat(ui): manage Windows local Whisper" +``` + +### Task 9: Windows Smoke Script Local ASR Mode + +**Files:** +- Modify: `openless-all/app/scripts/windows-real-asr-insertion-smoke.ps1` + +- [ ] **Step 1: Add ASR mode parameter** + +Add parameter: + +```powershell +[ValidateSet("volcengine", "foundry-local-whisper")] +[string]$AsrProvider = "volcengine", +``` + +- [ ] **Step 2: Write active ASR preference for smoke** + +In `Set-HoldHotkeyPreference`, replace the active ASR default line with: + +```powershell +if ($null -eq $prefs.activeAsrProvider) { + $prefs | Add-Member -NotePropertyName activeAsrProvider -NotePropertyValue $AsrProvider +} else { + $prefs.activeAsrProvider = $AsrProvider +} +``` + +- [ ] **Step 3: Skip Volcengine credential requirement for local ASR** + +Replace: + +```powershell +if ($RequireJsonCredentials -and (-not $credentialStatus.VolcengineConfigured -or -not $credentialStatus.ArkConfigured)) { + throw "Real ASR regression requires configured Volcengine ASR and Ark LLM credentials." +} +``` + +with: + +```powershell +if ($RequireJsonCredentials -and $AsrProvider -eq "volcengine" -and (-not $credentialStatus.VolcengineConfigured -or -not $credentialStatus.ArkConfigured)) { + throw "Real ASR regression requires configured Volcengine ASR and Ark LLM credentials." +} +if ($RequireJsonCredentials -and $AsrProvider -eq "foundry-local-whisper" -and (-not $credentialStatus.ArkConfigured)) { + Write-Warning "Ark LLM credentials are not configured; local ASR smoke will accept raw transcript fallback." +} +``` + +- [ ] **Step 4: Add no Win+H log assertion** + +After history verification, add: + +```powershell +$logText = Get-Content -Raw $logPath +if ($logText -match "Win\\+H|Voice Typing|Windows\\.Media\\.SpeechRecognition|SAPI") { + throw "Unexpected Windows system dictation path appeared in OpenLess log." +} +``` + +- [ ] **Step 5: Run script syntax check** + +Run: + +```powershell +powershell -NoProfile -ExecutionPolicy Bypass -Command "$null = [scriptblock]::Create((Get-Content -Raw '.\openless-all\app\scripts\windows-real-asr-insertion-smoke.ps1')); 'ok'" +``` + +Expected: prints `ok`. + +- [ ] **Step 6: Commit** + +```powershell +git add -- openless-all/app/scripts/windows-real-asr-insertion-smoke.ps1 +git commit -m "test(windows): add local ASR smoke mode" +``` + +### Task 10: End-to-End Verification + +**Files:** +- No code changes unless a verification step exposes a bug. + +- [ ] **Step 1: Run backend unit and type checks** + +Run: + +```powershell +cargo test --manifest-path openless-all/app/src-tauri/Cargo.toml +cargo check --manifest-path openless-all/app/src-tauri/Cargo.toml +``` + +Expected: PASS. + +- [ ] **Step 2: Run frontend build** + +Run: + +```powershell +npm --prefix openless-all/app run build +``` + +Expected: PASS. + +- [ ] **Step 3: Run no Win+H source search** + +Run: + +```powershell +rg -n "Win\\+H|Voice Typing|Windows\\.Media\\.SpeechRecognition|SAPI|SendInput.*H" openless-all/app/src-tauri/src openless-all/app/windows-ime openless-all/app/src +``` + +Expected: no matches except documentation or explicit negative test strings. + +- [ ] **Step 4: Run local ASR smoke on Windows** + +Run after building a Windows executable: + +```powershell +powershell -ExecutionPolicy Bypass -File .\openless-all\app\scripts\windows-real-asr-insertion-smoke.ps1 -AsrProvider foundry-local-whisper -Target notepad -ManualSpeech -AllowClipboardFallback +``` + +Expected: + +- OpenLess observes hotkey and starts session. +- No Windows Voice Typing panel appears. +- History receives a new item with non-empty `rawTranscript` and `finalText`. +- If Ark is not configured, `finalText` equals raw transcript or records polish fallback. +- Notepad receives the final text through TSF or permitted fallback. + +- [ ] **Step 5: Confirm verification did not create file changes** + +Run: + +```powershell +git status --short +``` + +Expected: no output. If a verification step exposed a code defect, stop this task and write a new focused fix task before continuing. + +## Self-Review + +Spec coverage: + +- No Win+H: Task 10 source search and smoke log assertion cover it. +- Existing interaction: Task 6 routes through `Coordinator`; no UI shortcut path bypasses recorder/capsule. +- Local transcript into polish/history: Task 6 returns `RawTranscript` before existing polish and history code. +- First-use UX: Tasks 7 and 8 expose provider and runtime/model state. +- Windows TSF insertion unchanged: Task 6 leaves `insert_with_windows_ime_first` intact. +- Offline behavior after cache: Task 3 runtime caches loaded model state; Task 10 smoke can be repeated after model download. + +Placeholder scan: + +- This plan contains no unresolved placeholders or unspecified file paths. + +Type consistency: + +- Provider id is consistently `foundry-local-whisper`. +- Rust preference fields are `foundry_local_asr_model`, `foundry_local_asr_language_hint`, and `foundry_local_asr_keep_loaded_secs`. +- TypeScript preference fields use camelCase equivalents. diff --git a/docs/superpowers/specs/2026-05-06-windows-local-asr-design.md b/docs/superpowers/specs/2026-05-06-windows-local-asr-design.md new file mode 100644 index 00000000..069db72c --- /dev/null +++ b/docs/superpowers/specs/2026-05-06-windows-local-asr-design.md @@ -0,0 +1,247 @@ +# Windows 本地 ASR 设计 + +## 背景 + +OpenLess 的产品契约是:全局热键启动听写,胶囊显示录音状态,ASR 产出 transcript,现有 LLM provider 做润色、翻译或语义处理,再通过当前平台插入链路写回光标位置并保存历史。 + +Windows 新用户目前仍需要配置外部 ASR provider,才能完成真实听写。目标是在 Windows 上提供一个不依赖外部 ASR API Key 的本地识别方案,同时不调用 `Win+H`,不显示 Windows Voice Typing 系统面板,不绕开现有 polish、insert 和 history 流水线。 + +已确认的边界: + +- Windows `Win+H` / Voice Typing 是用户级系统功能,没有适合 OpenLess 嵌入并拿回 transcript 的公开 API。 +- `SendInput` 模拟 `Win+H` 只会打开系统面板,OpenLess 拿不到 transcript,也无法 polish 或写 history。 +- `Windows.Media.SpeechRecognition` 对普通 desktop app 的支持和授权路径不适合作为主线。 +- SAPI COM 可做 desktop dictation,但质量和现代体验不足以满足高品质目标。 + +## 官方资料核对 + +核对时间:2026-05-06。 + +Microsoft Learn 当前资料显示: + +- Foundry Local 是本地 AI runtime,支持 Windows、macOS Apple silicon 和 Linux,提供 C#、JavaScript、Rust、Python SDK;本地推理数据不离开设备,首次模型和执行 provider 下载仍需要网络。 +- Foundry Local catalog 覆盖 chat completion 和 audio transcription;音频转写示例明确使用 Whisper 模型。 +- Rust SDK 在 Windows 上使用 `foundry-local-sdk --features winml`,Windows 包集成 Windows ML runtime。 +- Rust native audio API 当前文档示例是:下载并 load Whisper 模型后 `model.create_audio_client()`,再调用 `audio_client.transcribe(file_path).await`。 +- Foundry Local 也能启动 OpenAI-compatible local REST service;REST endpoint `POST /v1/audio/transcriptions` 接收 multipart `file`、`model`,可选 `language`、`temperature`、`response_format`,返回 `text`。 +- REST service 的端口是动态分配,文档要求通过 SDK 暴露的 endpoint / urls 获取,不要硬编码。 +- CLI 是开发和管理辅助工具,不是应用集成主线;生产应用应使用 SDK 嵌入 runtime。 +- Foundry Local 仍是 preview,API、安装和分发方式可能变动。 + +主要来源: + +- https://learn.microsoft.com/en-us/azure/foundry-local/what-is-foundry-local +- https://learn.microsoft.com/en-us/azure/foundry-local/get-started +- https://learn.microsoft.com/en-us/azure/foundry-local/how-to/how-to-transcribe-audio +- https://learn.microsoft.com/en-us/azure/foundry-local/reference/reference-rest +- https://learn.microsoft.com/en-us/azure/foundry-local/reference/reference-sdk-current +- https://learn.microsoft.com/en-us/azure/foundry-local/how-to/how-to-use-foundry-local-cli +- https://learn.microsoft.com/en-us/azure/foundry-local/concepts/foundry-local-architecture + +## 目标 + +- Windows 新用户无需 Volcengine、Whisper HTTP、DashScope 等外部 ASR API Key,即可完成听写。 +- 不调用 `Win+H`,用户完全看不到 Windows Voice Typing 弹窗。 +- 现有交互不变:热键、OpenLess capsule、录音状态、转写、LLM polish / 翻译、插入、历史保存都走当前主流水线。 +- LLM polish 仍沿用用户配置的 OpenAI-compatible LLM provider;LLM 未配置或失败时插入原始 transcript。 +- 本地 ASR 缺 runtime / 模型时给出可操作引导,而不是静默失败。 +- 下载完成后可离线识别;首次模型 / execution provider 下载可以联网。 + +## 非目标 + +- 不把 Windows Voice Typing、SAPI 或系统听写面板嵌入 OpenLess。 +- 不在本阶段把 LLM polish 也改成本地模型;本设计只解决 ASR。 +- 不把大型模型直接打进默认 Windows 安装包,除非后续逐项确认模型 license、再分发条款、安装包体积和 updater 影响。 +- 不重写 Windows TSF IME 插入链路。 +- 不保证所有隔离目标窗口都能 TSF 上屏;现有 TSF / Unicode / clipboard fallback 策略继续负责插入可用性。 + +## 现有系统切入点 + +主听写状态机集中在 `openless-all/app/src-tauri/src/coordinator.rs`: + +- `ActiveAsr` 当前有 `Volcengine`、`Whisper`,以及 macOS-only `Local`。 +- `begin_session` 从 `CredentialsVault::get_active_asr()` 读取 active provider,再分流到 local Qwen3、OpenAI-compatible Whisper 或 Volcengine。 +- `end_session` 统一取得 `RawTranscript` 后,继续走 `polish_or_passthrough` / `translate_or_passthrough`、Windows TSF-first 插入和 history append。 +- `ensure_asr_credentials` 是录音前的 provider gate;本地 ASR 需要在这里改成“无需云凭据,但需要 runtime / model ready”。 +- `is_whisper_compatible_provider` 只覆盖云端 OpenAI-compatible `/audio/transcriptions` provider;Foundry Local 不应塞进这里,因为它需要 runtime / model lifecycle。 + +现有本地 ASR 模块在 `openless-all/app/src-tauri/src/asr/local/`: + +- provider id 是 `local-qwen3`,模型枚举是 `qwen3-asr-0.6b` / `qwen3-asr-1.7b`。 +- `LocalAsrCache` 目前只在 macOS 持有 `QwenAsrEngine`。 +- 下载页和 IPC 命令已覆盖 model status、下载、删除、test、preload、release,但 UI 文案和目录语义都强绑定 Qwen3-ASR。 +- Windows 端 `engine_available` 当前为 false,设置页提示“仅 macOS 已支持”。 + +Windows 插入链路已经满足本需求: + +- 会话开始时 `prepare_session()` 捕获当前输入法 profile 并临时激活 OpenLess TSF。 +- 会话结束时 `insert_with_windows_ime_first()` 通过 named pipe 把最终文本提交给 TSF DLL。 +- TSF DLL 在目标应用内调用 `ITfInsertAtSelection::InsertTextAtSelection`。 +- TSF 失败后按用户偏好走 Unicode `SendInput` 或 clipboard fallback。 + +## 推荐方案 + +新增 Windows-only provider:`foundry-local-whisper`。 + +实现上分两层: + +1. `FoundryLocalWhisperAsr`:形状对齐 `WhisperBatchASR` 和 `LocalQwenAsr`,实现 `AudioConsumer`,录音阶段 buffer 16 kHz mono i16 PCM,stop 后编码 WAV 并调用 Foundry Local。 +2. `FoundryLocalRuntime`:封装 Foundry Local SDK 的初始化、catalog 查询、execution provider 下载、模型下载、模型加载、endpoint 获取和卸载 / keep-loaded 管理。 + +MVP 调用路径建议先用 Foundry Local SDK 启动 local REST service,再调用 `/v1/audio/transcriptions`。原因: + +- OpenLess 已经有成熟的 multipart WAV 转写路径。 +- REST API 文档明确支持 `language` 参数,便于后续中文 / 中英混输策略调优。 +- SDK 仍负责动态端口、模型下载和加载,避免硬编码本地服务地址。 +- 后续如果 Rust native audio client 提供足够参数和稳定 API,可以把 REST 调用替换为纯 native audio client。 + +## Provider 与模型命名 + +新增 id: + +- `foundry-local-whisper`:Windows 主线本地 ASR。 + +模型别名: + +- 默认:`whisper-small`。 +- 低配选项:`whisper-base`。 +- 调试选项:`whisper-tiny`。 + +默认不强制 `language=zh`。中英混输时让 Whisper 自动检测更稳,避免英文产品名、代码词或中英夹杂被错误归入单一中文模式。后续可在高级设置里增加“优先中文识别”,仅用户明确选择时传 `language=zh`。 + +不要把 `foundry-local-whisper` 混入现有 `local-qwen3` provider。两者模型来源、runtime、平台支持和下载语义不同,应共享“本地 ASR 管理”页面的外壳,但后端 provider 和模型 registry 要分开。 + +## 会话时序 + +1. 用户按当前 OpenLess 全局热键。 +2. `Coordinator` 进入 `Starting`,Windows 侧准备 TSF IME session。 +3. `ensure_asr_credentials` 识别 active provider 是 `foundry-local-whisper`: + - runtime 可用且模型已缓存:继续; + - 模型未缓存:返回可操作错误,胶囊显示“请先下载本地语音模型”,不开始录音; + - runtime 初始化失败:显示“本地语音运行时不可用”,引导设置页。 +4. 创建 `FoundryLocalWhisperAsr`,把它作为 `AudioConsumer` 传给 `Recorder::start`。 +5. 录音期间 recorder 继续向 consumer 推 PCM,capsule 继续显示电平。 +6. 用户再次按热键或松开热键结束录音。 +7. `end_session` 停 recorder,调用 `FoundryLocalWhisperAsr::transcribe()`: + - PCM buffer 编码成临时 WAV; + - 确保模型 loaded; + - 通过 SDK endpoint 调 `/v1/audio/transcriptions`; + - 解析 `{ text }` 为 `RawTranscript`。 +8. 后续完全复用现有逻辑:空 transcript guard、polish / translate、Chinese script preference、Windows TSF-first insert、history append、capsule Done。 + +## 首次使用 UX + +Windows 新用户默认 active ASR 使用 `foundry-local-whisper`,但只在“没有现有 preferences / credentials active ASR”的新安装路径生效,不覆盖老用户。 + +设置页增加或改造“本地语音识别”区: + +- 显示 runtime 状态:可用、初始化中、不可用。 +- 显示 execution provider 状态:已注册、需要下载、下载中、失败。 +- 显示模型列表:`whisper-small`、`whisper-base`、`whisper-tiny`,尺寸和 license 从 Foundry catalog / REST metadata 获取。 +- 提供一键下载 / 取消 / 删除 / 设为默认 / 加载并测试。 +- 下载完成后后台 preload,减少第一次热键录音结束后的等待。 + +首次按热键但模型缺失时: + +- 不调用 Win+H。 +- 不弹系统 Voice Typing。 +- 不开始录音,避免用户说完才发现没有模型。 +- capsule 显示短错误,主窗口跳到本地语音识别页或给出“下载模型”入口。 + +## 质量与性能评估 + +中文 / 中英混输: + +- Whisper 系列对普通话和英文都可用,但 `tiny/base/small` 本地模型质量通常低于云端大模型 ASR 或 Whisper large。 +- `whisper-small` 更适合作为默认质量档;`whisper-base` 用于低配机器。 +- 热词 bias 当前不会直接进入 Whisper 解码;词汇表仍可作为 LLM polish 上下文和 history 命中统计使用。 + +首次延迟: + +- 首次下载 execution provider 和模型可能需要数分钟,取决于网络和硬件。 +- 首次 load 模型可能需要数秒;应在切换 provider / 下载完成后后台 preload。 +- 单次转写是 batch 型,不是 Volcengine 那种 streaming final;capsule 可保持“转写中”直到返回。 + +模型体积: + +- 体积不硬编码。UI 通过 Foundry catalog / REST metadata 显示当前真实 `fileSizeMb`。 +- 安装包不内置模型,避免 release artifact 暴涨和 license 风险。 + +离线能力: + +- 模型和 execution provider 下载完成后,ASR 推理可离线。 +- LLM polish 仍取决于用户配置的 LLM provider;LLM 不可用时按现有规则插入 raw transcript。 + +隐私: + +- ASR 音频在本机处理,不发送到外部 ASR 服务。 +- 首次下载模型和组件会访问 Foundry catalog / Microsoft 分发源。 +- LLM polish 仍可能把 transcript 发送到用户配置的 LLM endpoint;设置页文案需要明确区分“ASR 本地”和“LLM 仍按配置调用”。 + +## Windows 安装器与分发 + +MVP 不修改 Windows TSF IME 注册流程。 + +需要验证: + +- `foundry-local-sdk --features winml` 在 Tauri Windows build 中会引入哪些 DLL、runtime 文件和 redistributable 要求。 +- NSIS / MSI 是否能自动收集这些 native 依赖。 +- Windows release workflow 当前对 NSIS / MSI 有固定红线,不能把 bundler 两轮 invoke、`-sice:ICE80` repair 或 `bash` shell 约束顺手改掉。 +- 如果 Foundry Local runtime 需要额外安装或动态下载组件,UI 必须把“正在准备本地语音运行时”作为一键流程的一部分,而不是要求用户手动跑 `winget`。 + +## 失败与 fallback + +- Foundry runtime 缺失或初始化失败:不开始录音,提示本地语音运行时不可用,保留用户切回云 ASR 的入口。 +- 模型未下载:不开始录音,提示下载模型。 +- 模型下载失败:保留 partial / retry 状态,不切换到 Win+H。 +- 转写超时:沿用 coordinator global timeout,写失败状态,不插入空文本。 +- 转写返回空:沿用 `emptyTranscript` history guard。 +- LLM polish 失败:插入 raw transcript,history 标记 `polishFailed`。 +- TSF 提交失败:按现有 `allow_non_tsf_insertion_fallback` 走 Unicode / clipboard fallback;关闭 fallback 时标记 `windowsImeTsfRequired`。 + +## 文件与模块边界 + +后续实现计划触碰范围: + +- `openless-all/app/src-tauri/Cargo.toml`:Windows 依赖增加 Foundry Local Rust SDK,必要时启用 `winml` feature。 +- `openless-all/app/src-tauri/src/asr/local/`:拆出 provider-neutral local ASR registry,新增 Foundry Whisper runtime / provider;保留 macOS Qwen3 代码。 +- `openless-all/app/src-tauri/src/coordinator.rs`:扩展 `ActiveAsr`,在 `begin_session` 和 `end_session` 分支接入 `FoundryLocalWhisperAsr`。 +- `openless-all/app/src-tauri/src/commands.rs`:新增 Windows local Whisper runtime/model status、download、test、preload 命令,或把现有 `local_asr_*` 扩展成多 backend。 +- `openless-all/app/src-tauri/src/types.rs`:新增 Windows local ASR preferences,如 active Foundry Whisper model、keep-loaded 时长、语言 hint。 +- `openless-all/app/src/lib/localAsr.ts`、`src/pages/LocalAsr.tsx`、`src/pages/Settings.tsx`、`src/i18n/*`:展示 Windows 本地语音识别和模型管理。 +- `openless-all/app/scripts/windows-real-asr-insertion-smoke.ps1`:增加 local ASR 模式,不再强制 Volcengine 凭据。 + +Rust 叶子模块仍只依赖 `types.rs` 和自身 provider 内部类型。跨模块编排继续放在 `coordinator.rs`。 + +## 验证计划 + +静态与单元验证: + +- `asr_configured_for_provider("foundry-local-whisper")` 返回 true,不要求云端 API Key。 +- `ensure_asr_credentials` 对模型缺失返回明确错误。 +- fake Foundry endpoint 返回 `{ "text": "..." }` 时,`FoundryLocalWhisperAsr` 能把 PCM 编成 WAV 并产出 `RawTranscript`。 +- model id、provider id、prefs default 的序列化和迁移测试。 + +集成验证: + +- Windows 真机启动 OpenLess,active ASR 为 `foundry-local-whisper`,未配置 Volcengine / Whisper HTTP。 +- 首次缺模型时按热键,不出现 Win+H 面板,不开始录音,提示下载模型。 +- 下载模型后聚焦 Notepad,按热键录音,说测试短句,结束后 history 新增 session,`rawTranscript` 非空,`finalText` 非空。 +- Ark / LLM 未配置时,最终插入 raw transcript,并按现有 polish fallback 规则记录。 +- Ark / LLM 已配置时,transcript 进入现有 polish / translation 逻辑。 +- Windows TSF IME 已安装时 `insertStatus=inserted`;禁用 TSF 或目标不支持时按当前 fallback 策略表现。 +- 断网后重复已下载模型的听写,ASR 仍可完成;LLM 不可用时 raw transcript 不丢。 + +No Win+H 验证: + +- 代码搜索确认没有 `Win+H`、Voice Typing、`Windows.Media.SpeechRecognition`、SAPI dictation 调用路径。 +- 真机 smoke 过程中截图或窗口枚举确认没有 Voice Typing 面板窗口。 +- 日志只出现 OpenLess recorder、Foundry local ASR、polish、Windows IME / fallback 插入事件。 + +## 开放风险 + +- Foundry Local preview API 可能变化,尤其是 Rust audio client 和 WinML package 分发。 +- Foundry Local 的 Whisper 模型质量和中文标点风格需要真机样本验证,不能只靠官方能力声明。 +- 首次 execution provider 下载和模型下载的错误码、进度回调、缓存位置需要实测。 +- Windows installer 对 SDK native 依赖的收集需要 release workflow 验证。 +- 如果 Foundry Local runtime 无法在 Tauri app 内稳定嵌入,备选路线是用 SDK 管理 local REST service;若 REST 也不稳定,再评估 `whisper.cpp` / ONNX Runtime 自管路线。 diff --git a/openless-all/app/scripts/check-hotkey-recorder.mjs b/openless-all/app/scripts/check-hotkey-recorder.mjs new file mode 100644 index 00000000..f14aff3a --- /dev/null +++ b/openless-all/app/scripts/check-hotkey-recorder.mjs @@ -0,0 +1,22 @@ +import * as esbuild from 'esbuild'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const tmp = await mkdtemp(join(tmpdir(), 'openless-hotkey-recorder-')); +const outfile = join(tmp, 'hotkey-recorder-test.mjs'); + +try { + await esbuild.build({ + entryPoints: [fileURLToPath(new URL('../src/lib/hotkeyRecorder.test.ts', import.meta.url))], + outfile, + bundle: true, + platform: 'node', + format: 'esm', + logLevel: 'silent', + }); + await import(pathToFileURL(outfile).href); +} finally { + await rm(tmp, { recursive: true, force: true }); +} diff --git a/openless-all/app/scripts/check-window-hotkey-fallback.mjs b/openless-all/app/scripts/check-window-hotkey-fallback.mjs new file mode 100644 index 00000000..0891d4b2 --- /dev/null +++ b/openless-all/app/scripts/check-window-hotkey-fallback.mjs @@ -0,0 +1,22 @@ +import * as esbuild from 'esbuild'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const tmp = await mkdtemp(join(tmpdir(), 'openless-window-hotkey-fallback-')); +const outfile = join(tmp, 'window-hotkey-fallback-test.mjs'); + +try { + await esbuild.build({ + entryPoints: [fileURLToPath(new URL('../src/lib/windowHotkeyFallback.test.ts', import.meta.url))], + outfile, + bundle: true, + platform: 'node', + format: 'esm', + logLevel: 'silent', + }); + await import(pathToFileURL(outfile).href); +} finally { + await rm(tmp, { recursive: true, force: true }); +} diff --git a/openless-all/app/scripts/windows-real-asr-insertion-smoke.ps1 b/openless-all/app/scripts/windows-real-asr-insertion-smoke.ps1 index 4c92d1e4..5f76f934 100644 --- a/openless-all/app/scripts/windows-real-asr-insertion-smoke.ps1 +++ b/openless-all/app/scripts/windows-real-asr-insertion-smoke.ps1 @@ -2,6 +2,8 @@ param( [string]$ExePath = "", [ValidateSet("notepad", "browser", "wt-cmd", "wt-powershell", "win32edit")] [string]$Target = "notepad", + [ValidateSet("volcengine", "foundry-local-whisper")] + [string]$AsrProvider = "volcengine", [string]$Phrase = "OpenLess Windows real regression", [int]$TimeoutSeconds = 120, [int]$VirtualKey = 0xA3, @@ -118,7 +120,11 @@ function Set-HoldHotkeyPreference($Path) { if ($null -eq $prefs.enabledModes) { $prefs | Add-Member -NotePropertyName enabledModes -NotePropertyValue @("light", "structured", "formal", "raw") } if ($null -eq $prefs.launchAtLogin) { $prefs | Add-Member -NotePropertyName launchAtLogin -NotePropertyValue $false } if ($null -eq $prefs.showCapsule) { $prefs | Add-Member -NotePropertyName showCapsule -NotePropertyValue $true } - if ($null -eq $prefs.activeAsrProvider) { $prefs | Add-Member -NotePropertyName activeAsrProvider -NotePropertyValue "volcengine" } + if ($null -eq $prefs.PSObject.Properties["activeAsrProvider"]) { + $prefs | Add-Member -NotePropertyName activeAsrProvider -NotePropertyValue $AsrProvider + } else { + $prefs.activeAsrProvider = $AsrProvider + } if ($null -eq $prefs.activeLlmProvider) { $prefs | Add-Member -NotePropertyName activeLlmProvider -NotePropertyValue "ark" } if ($null -eq $prefs.restoreClipboardAfterPaste) { $prefs | Add-Member -NotePropertyName restoreClipboardAfterPaste -NotePropertyValue $true @@ -129,6 +135,297 @@ function Set-HoldHotkeyPreference($Path) { return $previous } +function Ensure-OpenLessCredentialNative { + if ("OpenLessCredentialNative" -as [type]) { + return + } + + Add-Type @" +using System; +using System.ComponentModel; +using System.Runtime.InteropServices; + +public static class OpenLessCredentialNative { + [DllImport("advapi32.dll", CharSet = CharSet.Unicode, SetLastError = true)] + public static extern bool CredRead(string target, UInt32 type, UInt32 reservedFlag, out IntPtr credentialPtr); + + [DllImport("advapi32.dll", CharSet = CharSet.Unicode, SetLastError = true)] + public static extern bool CredWrite(ref OpenLessCredentialNativeCredential credential, UInt32 flags); + + [DllImport("advapi32.dll", CharSet = CharSet.Unicode, SetLastError = true)] + public static extern bool CredDelete(string target, UInt32 type, UInt32 flags); + + [DllImport("advapi32.dll", SetLastError = true)] + public static extern void CredFree(IntPtr buffer); +} + +[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Unicode)] +public struct OpenLessCredentialNativeCredential { + public UInt32 Flags; + public UInt32 Type; + public string TargetName; + public string Comment; + public System.Runtime.InteropServices.ComTypes.FILETIME LastWritten; + public UInt32 CredentialBlobSize; + public IntPtr CredentialBlob; + public UInt32 Persist; + public UInt32 AttributeCount; + public IntPtr Attributes; + public string TargetAlias; + public string UserName; +} +"@ +} + +function Get-OpenLessCredentialTarget($Account) { + return "$Account.com.openless.app" +} + +function Get-OpenLessKeyringPassword($Account) { + Ensure-OpenLessCredentialNative + $target = Get-OpenLessCredentialTarget $Account + $ptr = [IntPtr]::Zero + $ok = [OpenLessCredentialNative]::CredRead($target, 1, 0, [ref]$ptr) + if (-not $ok) { + $errorCode = [Runtime.InteropServices.Marshal]::GetLastWin32Error() + if ($errorCode -eq 1168) { + return $null + } + throw (New-Object ComponentModel.Win32Exception($errorCode, "Read Windows Credential Manager entry $target failed")) + } + + try { + $credential = [Runtime.InteropServices.Marshal]::PtrToStructure($ptr, [type][OpenLessCredentialNativeCredential]) + if ($credential.CredentialBlobSize -eq 0) { + return "" + } + $bytes = New-Object byte[] $credential.CredentialBlobSize + [Runtime.InteropServices.Marshal]::Copy($credential.CredentialBlob, $bytes, 0, $bytes.Length) + return [Text.Encoding]::Unicode.GetString($bytes) + } finally { + [OpenLessCredentialNative]::CredFree($ptr) + } +} + +function Set-OpenLessKeyringPassword($Account, $Password) { + Ensure-OpenLessCredentialNative + $target = Get-OpenLessCredentialTarget $Account + $bytes = [Text.Encoding]::Unicode.GetBytes($Password) + $blob = [IntPtr]::Zero + if ($bytes.Length -gt 0) { + $blob = [Runtime.InteropServices.Marshal]::AllocHGlobal($bytes.Length) + [Runtime.InteropServices.Marshal]::Copy($bytes, 0, $blob, $bytes.Length) + } + + try { + $credential = [OpenLessCredentialNativeCredential]::new() + $credential.Flags = 0 + $credential.Type = 1 + $credential.TargetName = $target + $credential.Comment = "keyring v3.6.3" + $credential.CredentialBlobSize = $bytes.Length + $credential.CredentialBlob = $blob + $credential.Persist = 3 + $credential.AttributeCount = 0 + $credential.Attributes = [IntPtr]::Zero + $credential.TargetAlias = "" + $credential.UserName = $Account + $ok = [OpenLessCredentialNative]::CredWrite([ref]$credential, 0) + if (-not $ok) { + $errorCode = [Runtime.InteropServices.Marshal]::GetLastWin32Error() + throw (New-Object ComponentModel.Win32Exception($errorCode, "Write Windows Credential Manager entry $target failed")) + } + } finally { + if ($blob -ne [IntPtr]::Zero) { + [Runtime.InteropServices.Marshal]::FreeHGlobal($blob) + } + } +} + +function Remove-OpenLessKeyringPassword($Account) { + Ensure-OpenLessCredentialNative + $target = Get-OpenLessCredentialTarget $Account + $ok = [OpenLessCredentialNative]::CredDelete($target, 1, 0) + if (-not $ok) { + $errorCode = [Runtime.InteropServices.Marshal]::GetLastWin32Error() + if ($errorCode -ne 1168) { + throw (New-Object ComponentModel.Win32Exception($errorCode, "Delete Windows Credential Manager entry $target failed")) + } + } +} + +function Split-OpenLessCredentialJson($Json) { + $chunks = @() + for ($start = 0; $start -lt $Json.Length; $start += 1000) { + $len = [Math]::Min(1000, $Json.Length - $start) + $chunks += $Json.Substring($start, $len) + } + if ($chunks.Count -eq 0) { + $chunks += "" + } + return $chunks +} + +function Get-OpenLessVaultCredentials { + $manifestText = Get-OpenLessKeyringPassword "credentials.v1" + if ([string]::IsNullOrWhiteSpace($manifestText)) { + return $null + } + $manifest = $manifestText | ConvertFrom-Json + if ($manifest.openless_credentials_storage -ne "chunked" -or $manifest.version -ne 1) { + throw "Unsupported OpenLess credential vault manifest." + } + + $json = "" + for ($i = 0; $i -lt [int]$manifest.chunks; $i++) { + $chunkAccount = if ($null -ne $manifest.PSObject.Properties["generation"] -and -not [string]::IsNullOrWhiteSpace($manifest.generation)) { + "credentials.v1.chunk.$($manifest.generation).$i" + } else { + "credentials.v1.chunk.$i" + } + $chunk = Get-OpenLessKeyringPassword $chunkAccount + if ($null -eq $chunk) { + throw "Missing OpenLess credential vault chunk $i." + } + $json += $chunk + } + return $json +} + +function Set-OpenLessVaultCredentials($Json, $PreviousManifestJson) { + $previousManifest = $null + if (-not [string]::IsNullOrWhiteSpace($PreviousManifestJson)) { + $previousManifest = $PreviousManifestJson | ConvertFrom-Json + } + + $chunks = Split-OpenLessCredentialJson $Json + for ($i = 0; $i -lt $chunks.Count; $i++) { + Set-OpenLessKeyringPassword "credentials.v1.chunk.$i" $chunks[$i] + } + + $manifest = [pscustomobject]@{ + openless_credentials_storage = "chunked" + version = 1 + chunks = $chunks.Count + } + Set-OpenLessKeyringPassword "credentials.v1" ($manifest | ConvertTo-Json -Compress) + + if ($null -ne $previousManifest -and $null -ne $previousManifest.PSObject.Properties["chunks"]) { + if ($null -ne $previousManifest.PSObject.Properties["generation"] -and -not [string]::IsNullOrWhiteSpace($previousManifest.generation)) { + for ($i = 0; $i -lt [int]$previousManifest.chunks; $i++) { + Remove-OpenLessKeyringPassword "credentials.v1.chunk.$($previousManifest.generation).$i" + } + } else { + for ($i = $chunks.Count; $i -lt [int]$previousManifest.chunks; $i++) { + Remove-OpenLessKeyringPassword "credentials.v1.chunk.$i" + } + } + } +} + +function Restore-ActiveAsrCredential($Snapshot, $Path) { + if ($null -eq $Snapshot) { + return + } + if ($Snapshot.HadVault) { + $manifest = $Snapshot.VaultManifestJson | ConvertFrom-Json + $chunks = Split-OpenLessCredentialJson $Snapshot.VaultJson + $usesGeneratedChunks = $null -ne $manifest.PSObject.Properties["generation"] -and -not [string]::IsNullOrWhiteSpace($manifest.generation) + for ($i = 0; $i -lt $chunks.Count; $i++) { + $account = if ($usesGeneratedChunks) { + "credentials.v1.chunk.$($manifest.generation).$i" + } else { + "credentials.v1.chunk.$i" + } + Set-OpenLessKeyringPassword $account $chunks[$i] + } + Set-OpenLessKeyringPassword "credentials.v1" $Snapshot.VaultManifestJson + if ($usesGeneratedChunks) { + for ($i = 0; $i -lt $Snapshot.WrittenVaultChunks; $i++) { + Remove-OpenLessKeyringPassword "credentials.v1.chunk.$i" + } + } else { + for ($i = $chunks.Count; $i -lt $Snapshot.WrittenVaultChunks; $i++) { + Remove-OpenLessKeyringPassword "credentials.v1.chunk.$i" + } + } + } else { + Remove-OpenLessKeyringPassword "credentials.v1" + for ($i = 0; $i -lt $Snapshot.WrittenVaultChunks; $i++) { + Remove-OpenLessKeyringPassword "credentials.v1.chunk.$i" + } + } + + if ($null -eq $Snapshot.LegacyJson) { + Remove-Item -LiteralPath $Path -Force -ErrorAction SilentlyContinue + } else { + Write-TextUtf8 $Path $Snapshot.LegacyJson + } +} + +function Set-ActiveAsrCredential($Path) { + $previousLegacy = Read-TextUtf8 $Path + $previousManifest = Get-OpenLessKeyringPassword "credentials.v1" + $previousVault = Get-OpenLessVaultCredentials + $source = if (-not [string]::IsNullOrWhiteSpace($previousVault)) { $previousVault } else { $previousLegacy } + if ([string]::IsNullOrWhiteSpace($source)) { + $credentials = [pscustomobject]@{ + version = 1 + active = [pscustomobject]@{ + asr = $AsrProvider + llm = "ark" + } + providers = [pscustomobject]@{ + asr = [pscustomobject]@{} + llm = [pscustomobject]@{} + } + } + } else { + $credentials = $source | ConvertFrom-Json + if ($null -eq $credentials.PSObject.Properties["active"]) { + $credentials | Add-Member -NotePropertyName active -NotePropertyValue ([pscustomobject]@{}) + } elseif ($null -eq $credentials.active) { + $credentials.active = [pscustomobject]@{} + } + if ($null -eq $credentials.active.PSObject.Properties["asr"]) { + $credentials.active | Add-Member -NotePropertyName asr -NotePropertyValue $AsrProvider + } else { + $credentials.active.asr = $AsrProvider + } + if ($null -eq $credentials.active.PSObject.Properties["llm"]) { + $credentials.active | Add-Member -NotePropertyName llm -NotePropertyValue "ark" + } + if ($null -eq $credentials.PSObject.Properties["providers"]) { + $credentials | Add-Member -NotePropertyName providers -NotePropertyValue ([pscustomobject]@{}) + } elseif ($null -eq $credentials.providers) { + $credentials.providers = [pscustomobject]@{} + } + if ($null -eq $credentials.providers.PSObject.Properties["asr"]) { + $credentials.providers | Add-Member -NotePropertyName asr -NotePropertyValue ([pscustomobject]@{}) + } elseif ($null -eq $credentials.providers.asr) { + $credentials.providers.asr = [pscustomobject]@{} + } + if ($null -eq $credentials.providers.PSObject.Properties["llm"]) { + $credentials.providers | Add-Member -NotePropertyName llm -NotePropertyValue ([pscustomobject]@{}) + } elseif ($null -eq $credentials.providers.llm) { + $credentials.providers.llm = [pscustomobject]@{} + } + } + $json = $credentials | ConvertTo-Json -Depth 12 -Compress + $chunks = Split-OpenLessCredentialJson $json + Set-OpenLessVaultCredentials $json $previousManifest + if ([string]::IsNullOrWhiteSpace($previousVault)) { + Write-TextUtf8 $Path ($credentials | ConvertTo-Json -Depth 12) + } + return [pscustomobject]@{ + LegacyJson = $previousLegacy + VaultJson = $previousVault + VaultManifestJson = $previousManifest + HadVault = -not [string]::IsNullOrWhiteSpace($previousVault) + WrittenVaultChunks = $chunks.Count + } +} + function Wait-LogPattern($Path, $Pattern, $TimeoutSeconds) { $deadline = (Get-Date).AddSeconds($TimeoutSeconds) while ((Get-Date) -lt $deadline) { @@ -593,50 +890,76 @@ function Speak-TestPhrase($Text) { } $credentialStatus = Get-OpenLessCredentialStatus -if ($RequireJsonCredentials -and (-not $credentialStatus.VolcengineConfigured -or -not $credentialStatus.ArkConfigured)) { - throw "Real ASR regression requires configured Volcengine ASR and Ark LLM credentials." +if ($RequireJsonCredentials) { + if ($AsrProvider -eq "volcengine" -and (-not $credentialStatus.VolcengineConfigured -or -not $credentialStatus.ArkConfigured)) { + throw "Real ASR regression requires configured Volcengine ASR and Ark LLM credentials when ASR=volcengine." + } + if ($AsrProvider -eq "foundry-local-whisper" -and -not $credentialStatus.ArkConfigured) { + Write-Warning "Ark LLM credentials are not configured; local ASR smoke accepts the existing raw transcript fallback when LLM is unconfigured." + } } if (-not $credentialStatus.VolcengineConfigured -or -not $credentialStatus.ArkConfigured) { - Write-Warning "Legacy credentials.json is incomplete; continuing because the app uses the OS credential vault." + $missingCredentialParts = @() + if (-not $credentialStatus.VolcengineConfigured) { $missingCredentialParts += "Volcengine ASR" } + if (-not $credentialStatus.ArkConfigured) { $missingCredentialParts += "Ark LLM" } + $providerCredentialNote = if ($AsrProvider -eq "volcengine") { + "ASR=volcengine needs Volcengine ASR and Ark LLM credentials unless the app resolves them from the OS credential vault." + } else { + "ASR=foundry-local-whisper does not require Volcengine credentials; Ark LLM is optional because raw transcript fallback is accepted." + } + Write-Warning "Legacy credentials.json is incomplete ($($missingCredentialParts -join ', ')); $providerCredentialNote Continuing because the app may use the OS credential vault." } $logPath = Join-Path $env:LOCALAPPDATA "OpenLess\Logs\openless.log" $historyPath = Join-Path $env:APPDATA "OpenLess\history.json" $preferencesPath = Join-Path $env:APPDATA "OpenLess\preferences.json" -$baselineCount = Get-HistoryCount $historyPath -$previousPreferences = Set-HoldHotkeyPreference $preferencesPath -$previousClipboard = Get-Clipboard -Raw -ErrorAction SilentlyContinue -$clipboardSentinel = "OPENLESS_OLD_CLIPBOARD_SENTINEL_$(Get-Date -Format 'yyyyMMddHHmmssfff')" -Restore-ClipboardValue $clipboardSentinel +$credentialsPath = Join-Path $env:APPDATA "OpenLess\credentials.json" +$inputTarget = $null +$openless = $null +$previousPreferences = $null +$previousCredentials = $null +$previousClipboard = $null $debugTranscriptPath = $null -if (-not [string]::IsNullOrWhiteSpace($InjectedTranscriptText)) { - $debugTranscriptPath = Join-Path $env:TEMP "openless-debug-transcript.txt" - Write-TextUtf8 $debugTranscriptPath $InjectedTranscriptText -} +$preferencesRewritten = $false +$credentialsRewritten = $false +$clipboardCaptured = $false -Get-Process openless -ErrorAction SilentlyContinue | Stop-Process -Force -Remove-Item -LiteralPath $logPath -Force -ErrorAction SilentlyContinue - -Write-Host "== Real ASR + direct insertion smoke ($Target) ==" -$env:OPENLESS_SHOW_MAIN_ON_START = "1" -$env:OPENLESS_ACCEPT_SYNTHETIC_HOTKEY_EVENTS = "1" -if ($DebugHotkeyEvents) { - $env:OPENLESS_DEBUG_HOTKEY_EVENTS = "1" -} -if ($debugTranscriptPath) { - $env:OPENLESS_DEBUG_TRANSCRIPT_FILE = $debugTranscriptPath -} try { - $openless = Start-Process -FilePath $ExePath -WorkingDirectory (Split-Path $ExePath -Parent) -PassThru -} finally { - Remove-Item Env:OPENLESS_SHOW_MAIN_ON_START -ErrorAction SilentlyContinue - Remove-Item Env:OPENLESS_ACCEPT_SYNTHETIC_HOTKEY_EVENTS -ErrorAction SilentlyContinue - Remove-Item Env:OPENLESS_DEBUG_HOTKEY_EVENTS -ErrorAction SilentlyContinue - Remove-Item Env:OPENLESS_DEBUG_TRANSCRIPT_FILE -ErrorAction SilentlyContinue -} + $baselineCount = Get-HistoryCount $historyPath + $previousClipboard = Get-Clipboard -Raw -ErrorAction SilentlyContinue + $clipboardCaptured = $true + $previousPreferences = Set-HoldHotkeyPreference $preferencesPath + $preferencesRewritten = $true + $previousCredentials = Set-ActiveAsrCredential $credentialsPath + $credentialsRewritten = $true + $clipboardSentinel = "OPENLESS_OLD_CLIPBOARD_SENTINEL_$(Get-Date -Format 'yyyyMMddHHmmssfff')" + Restore-ClipboardValue $clipboardSentinel + if (-not [string]::IsNullOrWhiteSpace($InjectedTranscriptText)) { + $debugTranscriptPath = Join-Path $env:TEMP "openless-debug-transcript.txt" + Write-TextUtf8 $debugTranscriptPath $InjectedTranscriptText + } + + Get-Process openless -ErrorAction SilentlyContinue | Stop-Process -Force + Remove-Item -LiteralPath $logPath -Force -ErrorAction SilentlyContinue + + Write-Host "== Real ASR + direct insertion smoke ($Target, ASR=$AsrProvider) ==" + $env:OPENLESS_SHOW_MAIN_ON_START = "1" + $env:OPENLESS_ACCEPT_SYNTHETIC_HOTKEY_EVENTS = "1" + if ($DebugHotkeyEvents) { + $env:OPENLESS_DEBUG_HOTKEY_EVENTS = "1" + } + if ($debugTranscriptPath) { + $env:OPENLESS_DEBUG_TRANSCRIPT_FILE = $debugTranscriptPath + } + try { + $openless = Start-Process -FilePath $ExePath -WorkingDirectory (Split-Path $ExePath -Parent) -PassThru + } finally { + Remove-Item Env:OPENLESS_SHOW_MAIN_ON_START -ErrorAction SilentlyContinue + Remove-Item Env:OPENLESS_ACCEPT_SYNTHETIC_HOTKEY_EVENTS -ErrorAction SilentlyContinue + Remove-Item Env:OPENLESS_DEBUG_HOTKEY_EVENTS -ErrorAction SilentlyContinue + Remove-Item Env:OPENLESS_DEBUG_TRANSCRIPT_FILE -ErrorAction SilentlyContinue + } -$inputTarget = $null -try { if (-not (Wait-LogPattern $logPath "hotkey listener installed|Windows low-level keyboard hook" 20)) { throw "Windows low-level keyboard hook was not installed." } @@ -705,6 +1028,14 @@ try { Write-Host "[ok] History updated. raw='$($latest.rawTranscript)'" Write-Host "[ok] Final text length=$($latest.finalText.Length), insertStatus=$($latest.insertStatus)" Write-Host "[ok] $Target readback length=$($targetText.Length)" + + if (Test-Path $logPath) { + $logText = Get-Content -Raw -Encoding UTF8 $logPath + $forbiddenNativeDictationPattern = "Win\+H|Voice Typing|Windows\.Media\.SpeechRecognition|SpeechRecognizer|SAPI" + if ($logText -match $forbiddenNativeDictationPattern) { + throw "OpenLess log contains a native Windows dictation route marker; this smoke must use the OpenLess pipeline." + } + } } finally { Release-Hotkey if ($null -ne $inputTarget) { @@ -721,15 +1052,22 @@ try { } } Get-Process openless -ErrorAction SilentlyContinue | Stop-Process -Force - if ($null -eq $previousPreferences) { - Remove-Item -LiteralPath $preferencesPath -Force -ErrorAction SilentlyContinue - } else { - Write-TextUtf8 $preferencesPath $previousPreferences + if ($preferencesRewritten) { + if ($null -eq $previousPreferences) { + Remove-Item -LiteralPath $preferencesPath -Force -ErrorAction SilentlyContinue + } else { + Write-TextUtf8 $preferencesPath $previousPreferences + } + } + if ($credentialsRewritten) { + Restore-ActiveAsrCredential $previousCredentials $credentialsPath + } + if ($clipboardCaptured) { + Restore-ClipboardValue $previousClipboard } - Restore-ClipboardValue $previousClipboard if ($debugTranscriptPath) { Remove-Item -LiteralPath $debugTranscriptPath -Force -ErrorAction SilentlyContinue } } -Write-Host "Real ASR + direct insertion smoke ($Target) passed." +Write-Host "Real ASR + direct insertion smoke ($Target, ASR=$AsrProvider) passed." diff --git a/openless-all/app/src-tauri/Cargo.lock b/openless-all/app/src-tauri/Cargo.lock index d7340f89..80595e24 100644 --- a/openless-all/app/src-tauri/Cargo.lock +++ b/openless-all/app/src-tauri/Cargo.lock @@ -110,7 +110,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -121,7 +121,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -226,6 +226,19 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "async-openai" +version = "0.33.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc48c3deb4ad9a2ee8c8e364c79eb0f74e69e17ed7e883d55988b90ea44fe986" +dependencies = [ + "bytes", + "derive_builder", + "getrandom 0.3.4", + "serde", + "serde_json", +] + [[package]] name = "async-process" version = "2.5.0" @@ -547,6 +560,25 @@ dependencies = [ "serde", ] +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "cairo-rs" version = "0.18.5" @@ -767,6 +799,12 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + [[package]] name = "cookie" version = "0.18.1" @@ -956,6 +994,21 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "217698eaf96b4a3f0bc4f3662aaa55bdf913cd54d7204591faa790070c6d0853" + [[package]] name = "crc32fast" version = "1.5.0" @@ -1035,14 +1088,38 @@ version = "0.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1" +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + [[package]] name = "darling" version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.23.0", + "darling_macro 0.23.0", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.117", ] [[package]] @@ -1058,13 +1135,24 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core 0.20.11", + "quote", + "syn 2.0.117", +] + [[package]] name = "darling_macro" version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ - "darling_core", + "darling_core 0.23.0", "quote", "syn 2.0.117", ] @@ -1110,6 +1198,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "deflate64" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac6b926516df9c60bfa16e107b21086399f8285a44ca9711344b9e553c5146e2" + [[package]] name = "deranged" version = "0.5.8" @@ -1131,6 +1225,37 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling 0.20.11", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.117", +] + [[package]] name = "derive_more" version = "2.1.1" @@ -1201,7 +1326,7 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1442,7 +1567,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -1628,6 +1753,27 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "foundry-local-sdk" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6f6b9ce8ef529348022814444562c54d7216417e6ed89af3d56807f52e5788" +dependencies = [ + "async-openai", + "futures-core", + "libloading 0.8.9", + "reqwest 0.12.28", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tokio-util", + "ureq", + "urlencoding", + "zip 2.4.2", +] + [[package]] name = "fst" version = "0.4.7" @@ -2041,6 +2187,25 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "h2" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap 2.14.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "half" version = "2.7.1" @@ -2174,6 +2339,7 @@ dependencies = [ "bytes", "futures-channel", "futures-core", + "h2", "http", "http-body", "httparse", @@ -2234,9 +2400,11 @@ dependencies = [ "percent-encoding", "pin-project-lite", "socket2", + "system-configuration", "tokio", "tower-service", "tracing", + "windows-registry", ] [[package]] @@ -2251,7 +2419,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.58.0", + "windows-core 0.61.2", ] [[package]] @@ -2829,6 +2997,27 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lzma-rs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "297e814c836ae64db86b36cf2a557ba54368d03f6afcd7d947c266692f71115e" +dependencies = [ + "byteorder", + "crc", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "mach2" version = "0.4.3" @@ -3576,6 +3765,7 @@ dependencies = [ "enigo", "env_logger", "ferrous-opencc", + "foundry-local-sdk", "futures-util", "global-hotkey", "keyring", @@ -3675,7 +3865,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d8fae84b431384b68627d0f9b3b1245fcf9f46f6c0e3dc902e9dce64edd1967" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -3752,6 +3942,16 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" +[[package]] +name = "pbkdf2" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +dependencies = [ + "digest", + "hmac", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -4365,8 +4565,10 @@ checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ "base64 0.22.1", "bytes", + "encoding_rs", "futures-core", "futures-util", + "h2", "http", "http-body", "http-body-util", @@ -4376,6 +4578,7 @@ dependencies = [ "hyper-util", "js-sys", "log", + "mime", "mime_guess", "native-tls", "percent-encoding", @@ -4534,7 +4737,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4543,6 +4746,7 @@ version = "0.23.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" dependencies = [ + "log", "once_cell", "ring", "rustls-pki-types", @@ -4591,7 +4795,7 @@ dependencies = [ "security-framework 3.7.0", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4914,7 +5118,7 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf2ebbe86054f9b45bc3881e865683ccfaccce97b9b4cb53f3039d67f355a334" dependencies = [ - "darling", + "darling 0.23.0", "proc-macro2", "quote", "syn 2.0.117", @@ -5079,7 +5283,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -5230,6 +5434,27 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "system-configuration" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" +dependencies = [ + "bitflags 2.11.1", + "core-foundation 0.9.4", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys 0.8.7", + "libc", +] + [[package]] name = "system-deps" version = "6.2.2" @@ -5562,7 +5787,7 @@ dependencies = [ "tokio", "url", "windows-sys 0.60.2", - "zip", + "zip 4.6.1", ] [[package]] @@ -5675,7 +5900,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -5857,6 +6082,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-tungstenite" version = "0.24.0" @@ -6150,7 +6386,7 @@ checksum = "f2f6fb2847f6742cd76af783a2a2c49e9375d0a111c7bef6f71cd9e738c72d6e" dependencies = [ "memoffset", "tempfile", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -6224,6 +6460,35 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "ureq" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea7109cdcd5864d4eeb1b58a1648dc9bf520360d7af16ec26d0a9354bafcfc0" +dependencies = [ + "base64 0.22.1", + "flate2", + "log", + "percent-encoding", + "rustls", + "rustls-pki-types", + "ureq-proto", + "utf8-zero", + "webpki-roots", +] + +[[package]] +name = "ureq-proto" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e994ba84b0bd1b1b0cf92878b7ef898a5c1760108fe7b6010327e274917a808c" +dependencies = [ + "base64 0.22.1", + "http", + "httparse", + "log", +] + [[package]] name = "url" version = "2.5.8" @@ -6237,6 +6502,12 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "urlpattern" version = "0.3.0" @@ -6255,6 +6526,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8-zero" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8c0a043c9540bae7c578c88f91dda8bd82e59ae27c21baca69c8b191aaf5a6e" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -6633,7 +6910,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -6871,6 +7148,17 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-registry" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" +dependencies = [ + "windows-link 0.2.1", + "windows-result 0.4.1", + "windows-strings 0.5.1", +] + [[package]] name = "windows-result" version = "0.1.2" @@ -6898,6 +7186,15 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link 0.2.1", +] + [[package]] name = "windows-strings" version = "0.1.0" @@ -6917,6 +7214,15 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link 0.2.1", +] + [[package]] name = "windows-sys" version = "0.45.0" @@ -7504,6 +7810,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9cc00251562a284751c9973bace760d86c0276c471b4be569fe6b068ee97a56" +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yoke" version = "0.8.2" @@ -7738,6 +8053,36 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "zip" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50" +dependencies = [ + "aes", + "arbitrary", + "bzip2", + "constant_time_eq", + "crc32fast", + "crossbeam-utils", + "deflate64", + "displaydoc", + "flate2", + "getrandom 0.3.4", + "hmac", + "indexmap 2.14.0", + "lzma-rs", + "memchr", + "pbkdf2", + "sha1", + "thiserror 2.0.18", + "time", + "xz2", + "zeroize", + "zopfli", + "zstd", +] + [[package]] name = "zip" version = "4.6.1" @@ -7756,6 +8101,46 @@ version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" +[[package]] +name = "zopfli" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249" +dependencies = [ + "bumpalo", + "crc32fast", + "log", + "simd-adler32", +] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "zune-core" version = "0.5.1" diff --git a/openless-all/app/src-tauri/Cargo.toml b/openless-all/app/src-tauri/Cargo.toml index 2fd9bdfb..3d0a8c2d 100644 --- a/openless-all/app/src-tauri/Cargo.toml +++ b/openless-all/app/src-tauri/Cargo.toml @@ -77,6 +77,7 @@ objc2-app-kit = "0.2" libc = "0.2" [target.'cfg(target_os = "windows")'.dependencies] +foundry-local-sdk = { version = "1.1.0", features = ["winml"] } raw-window-handle = "0.6" windows = { version = "0.58", features = [ "Win32_Foundation", diff --git a/openless-all/app/src-tauri/src/asr/local/foundry.rs b/openless-all/app/src-tauri/src/asr/local/foundry.rs new file mode 100644 index 00000000..4415f102 --- /dev/null +++ b/openless-all/app/src-tauri/src/asr/local/foundry.rs @@ -0,0 +1,263 @@ +use serde::Serialize; + +pub const PROVIDER_ID: &str = "foundry-local-whisper"; +pub const DEFAULT_MODEL_ALIAS: &str = "whisper-small"; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +#[allow(dead_code)] +pub struct FoundryWhisperModel { + pub alias: &'static str, + pub display_name: &'static str, + pub quality_tier: &'static str, +} + +#[allow(dead_code)] +pub const MODELS: &[FoundryWhisperModel] = &[ + FoundryWhisperModel { + alias: "whisper-small", + display_name: "Whisper Small", + quality_tier: "balanced", + }, + FoundryWhisperModel { + alias: "whisper-base", + display_name: "Whisper Base", + quality_tier: "low-resource", + }, + FoundryWhisperModel { + alias: "whisper-tiny", + display_name: "Whisper Tiny", + quality_tier: "smoke-test", + }, +]; + +#[allow(dead_code)] +pub fn is_foundry_local_whisper(id: &str) -> bool { + id == PROVIDER_ID +} + +#[allow(dead_code)] +pub fn model_alias_is_known(alias: &str) -> bool { + MODELS.iter().any(|model| model.alias == alias) +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +#[allow(dead_code)] +pub struct FoundryCatalogModel { + pub alias: String, + pub display_name: String, + pub cached: bool, + pub file_size_mb: Option, +} + +impl FoundryCatalogModel { + #[allow(dead_code)] + pub fn from_static(model: &FoundryWhisperModel) -> Self { + Self { + alias: model.alias.to_string(), + display_name: model.display_name.to_string(), + cached: false, + file_size_mb: None, + } + } +} + +#[allow(dead_code)] +pub fn static_catalog_models() -> Vec { + MODELS + .iter() + .map(FoundryCatalogModel::from_static) + .collect() +} + +#[allow(dead_code)] +pub fn default_language_hint() -> Option { + None +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +#[allow(dead_code)] +pub enum FoundryPreparePhase { + Runtime, + Model, + Load, + Finished, + Failed, +} + +#[derive(Debug, Clone, PartialEq, Serialize)] +#[serde(rename_all = "camelCase")] +#[allow(dead_code)] +pub struct FoundryPrepareProgressPayload { + pub phase: FoundryPreparePhase, + pub model_alias: String, + pub label: String, + pub percent: Option, + pub error: Option, +} + +impl FoundryPrepareProgressPayload { + #[allow(dead_code)] + pub fn new( + phase: FoundryPreparePhase, + model_alias: impl Into, + label: impl Into, + percent: Option, + error: Option, + ) -> Self { + Self { + phase, + model_alias: model_alias.into(), + label: label.into(), + percent: percent.map(|value| value.clamp(0.0, 100.0)), + error, + } + } + + #[allow(dead_code)] + pub fn runtime(model_alias: impl Into, label: impl Into, percent: f64) -> Self { + Self::new( + FoundryPreparePhase::Runtime, + model_alias, + label, + Some(percent), + None, + ) + } + + #[allow(dead_code)] + pub fn model(model_alias: impl Into, label: impl Into, percent: f64) -> Self { + Self::new( + FoundryPreparePhase::Model, + model_alias, + label, + Some(percent), + None, + ) + } + + #[allow(dead_code)] + pub fn load(model_alias: impl Into, label: impl Into, percent: f64) -> Self { + Self::new( + FoundryPreparePhase::Load, + model_alias, + label, + Some(percent), + None, + ) + } + + #[allow(dead_code)] + pub fn finished(model_alias: impl Into, label: impl Into) -> Self { + Self::new( + FoundryPreparePhase::Finished, + model_alias, + label, + Some(100.0), + None, + ) + } + + #[allow(dead_code)] + pub fn failed( + model_alias: impl Into, + label: impl Into, + error: impl Into, + ) -> Self { + Self::new( + FoundryPreparePhase::Failed, + model_alias, + label, + None, + Some(error.into()), + ) + } +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +#[allow(dead_code)] +pub struct FoundryRuntimeStatus { + pub provider_id: String, + pub available: bool, + pub active_model: String, + pub loaded_model_id: Option, + pub endpoint: Option, + pub error: Option, +} + +impl FoundryRuntimeStatus { + #[allow(dead_code)] + pub fn unavailable(active_model: String, error: impl Into) -> Self { + Self { + provider_id: PROVIDER_ID.into(), + available: false, + active_model, + loaded_model_id: None, + endpoint: None, + error: Some(error.into()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn provider_id_is_stable() { + assert!(is_foundry_local_whisper("foundry-local-whisper")); + assert!(!is_foundry_local_whisper("local-qwen3")); + } + + #[test] + fn default_model_is_registered() { + assert!(model_alias_is_known(DEFAULT_MODEL_ALIAS)); + } + + #[test] + fn unavailable_runtime_status_uses_native_audio_shape() { + let status = FoundryRuntimeStatus::unavailable("whisper-base".to_string(), "not ready"); + + assert_eq!(status.provider_id, PROVIDER_ID); + assert!(!status.available); + assert_eq!(status.active_model, "whisper-base"); + assert_eq!(status.loaded_model_id, None); + assert_eq!(status.endpoint, None); + assert_eq!(status.error.as_deref(), Some("not ready")); + } + + #[test] + fn static_foundry_catalog_preserves_ui_order() { + let catalog = static_catalog_models(); + + assert_eq!( + catalog + .iter() + .map(|model| model.alias.as_str()) + .collect::>(), + vec!["whisper-small", "whisper-base", "whisper-tiny"] + ); + assert!(catalog.iter().all(|model| !model.cached)); + } + + #[test] + fn foundry_prepare_progress_payload_uses_expected_event_shape() { + let payload = FoundryPrepareProgressPayload::new( + FoundryPreparePhase::Model, + "whisper-small", + "download model", + Some(42.4), + None, + ); + let value = serde_json::to_value(payload).unwrap(); + + assert_eq!(value["phase"], "model"); + assert_eq!(value["modelAlias"], "whisper-small"); + assert_eq!(value["label"], "download model"); + assert_eq!(value["percent"], 42.4); + assert_eq!(value["error"], serde_json::Value::Null); + } +} diff --git a/openless-all/app/src-tauri/src/asr/local/foundry_provider.rs b/openless-all/app/src-tauri/src/asr/local/foundry_provider.rs new file mode 100644 index 00000000..830b1ad6 --- /dev/null +++ b/openless-all/app/src-tauri/src/asr/local/foundry_provider.rs @@ -0,0 +1,353 @@ +#![allow(dead_code)] // Task 6 接入 coordinator 后这些路径会变成运行时路径。 + +#[cfg(target_os = "windows")] +use std::fs::{self, OpenOptions}; +#[cfg(target_os = "windows")] +use std::io::Write; +#[cfg(target_os = "windows")] +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicU64, Ordering}; +#[cfg(target_os = "windows")] +use std::sync::Arc; + +#[cfg(target_os = "windows")] +use anyhow::Context; +use anyhow::Result; +use parking_lot::Mutex; +#[cfg(target_os = "windows")] +use uuid::Uuid; + +use crate::asr::wav::encode_wav_16k_mono; +use crate::asr::RawTranscript; + +#[cfg(target_os = "windows")] +use super::foundry_runtime::FoundryLocalRuntime; + +pub struct FoundryLocalWhisperAsr { + #[cfg(target_os = "windows")] + runtime: Arc, + model_alias: String, + language_hint: Option, + buffer: Mutex>, + cancel_generation: AtomicU64, +} + +impl FoundryLocalWhisperAsr { + #[cfg(target_os = "windows")] + pub fn new( + runtime: Arc, + model_alias: String, + language_hint: Option, + ) -> Self { + Self { + runtime, + model_alias, + language_hint: normalize_language_hint(language_hint), + buffer: Mutex::new(Vec::new()), + cancel_generation: AtomicU64::new(0), + } + } + + #[cfg(not(target_os = "windows"))] + pub fn new(model_alias: String, language_hint: Option) -> Self { + Self { + model_alias, + language_hint: normalize_language_hint(language_hint), + buffer: Mutex::new(Vec::new()), + cancel_generation: AtomicU64::new(0), + } + } + + pub fn model_alias(&self) -> &str { + &self.model_alias + } + + pub fn language_hint(&self) -> Option<&str> { + self.language_hint.as_deref() + } + + pub async fn transcribe(&self, audio_timeout: std::time::Duration) -> Result { + let cancel_generation = self.cancel_generation.load(Ordering::SeqCst); + let pcm = self.buffer.lock().clone(); + if pcm.is_empty() { + return Ok(RawTranscript { + text: String::new(), + duration_ms: 0, + }); + } + + let result = self.transcribe_inner(&pcm, audio_timeout).await; + if self.cancel_generation.load(Ordering::SeqCst) != cancel_generation { + anyhow::bail!("Foundry Local Whisper transcription cancelled"); + } + if foundry_transcribe_attempt_consumes_buffer(&result) { + self.buffer.lock().clear(); + } + result + } + + async fn transcribe_inner( + &self, + pcm: &[u8], + audio_timeout: std::time::Duration, + ) -> Result { + let duration_ms = pcm_duration_ms(pcm); + + #[cfg(not(target_os = "windows"))] + { + let _ = pcm; + anyhow::bail!( + "Foundry Local Whisper is only available on Windows: {}", + self.model_alias + ); + } + + #[cfg(target_os = "windows")] + { + let wav_file = TempWavFile::create(pcm)?; + let text = self + .runtime + .transcribe_audio_file( + &self.model_alias, + self.language_hint(), + wav_file.path(), + audio_timeout, + ) + .await + .with_context(|| { + format!( + "transcribe audio file with Foundry Local Whisper model {}", + self.model_alias + ) + })?; + + Ok(RawTranscript { + text: trim_transcript_text(&text), + duration_ms, + }) + } + } + + pub fn cancel(&self) { + self.cancel_generation.fetch_add(1, Ordering::SeqCst); + #[cfg(target_os = "windows")] + self.runtime.request_cancel_prepare(); + self.buffer.lock().clear(); + } +} + +impl crate::recorder::AudioConsumer for FoundryLocalWhisperAsr { + fn consume_pcm_chunk(&self, pcm: &[u8]) { + self.buffer.lock().extend_from_slice(pcm); + } +} + +fn pcm_duration_ms(pcm: &[u8]) -> u64 { + (pcm.len() as u64 / 2) * 1000 / 16_000 +} + +fn pcm_to_wav(pcm: &[u8]) -> Vec { + let samples: Vec = pcm + .chunks_exact(2) + .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]])) + .collect(); + encode_wav_16k_mono(&samples) +} + +#[cfg(target_os = "windows")] +struct TempWavFile { + path: PathBuf, +} + +#[cfg(target_os = "windows")] +impl TempWavFile { + fn create(pcm: &[u8]) -> Result { + let dir = foundry_temp_dir(); + fs::create_dir_all(&dir).with_context(|| format!("create {}", dir.display()))?; + let path = dir.join(format!("foundry-whisper-{}.wav", Uuid::new_v4())); + let wav = pcm_to_wav(pcm); + let mut file = OpenOptions::new() + .write(true) + .create_new(true) + .open(&path) + .with_context(|| format!("create {}", path.display()))?; + + if let Err(err) = file.write_all(&wav) { + drop(file); + remove_partial_temp_wav(&path); + return Err(err).with_context(|| format!("write {}", path.display())); + } + if let Err(err) = file.sync_all() { + drop(file); + remove_partial_temp_wav(&path); + return Err(err).with_context(|| format!("sync {}", path.display())); + } + + Ok(Self { path }) + } + + fn path(&self) -> &Path { + &self.path + } +} + +#[cfg(target_os = "windows")] +impl Drop for TempWavFile { + fn drop(&mut self) { + match fs::remove_file(&self.path) { + Ok(()) => {} + Err(err) if err.kind() == std::io::ErrorKind::NotFound => {} + Err(err) => { + log::warn!( + "[foundry-asr] 清理临时 WAV 失败 {}: {err}", + self.path.display() + ); + } + } + } +} + +#[cfg(target_os = "windows")] +fn remove_partial_temp_wav(path: &Path) { + match fs::remove_file(path) { + Ok(()) => {} + Err(err) if err.kind() == std::io::ErrorKind::NotFound => {} + Err(err) => { + log::warn!( + "[foundry-asr] 清理未完成的临时 WAV 失败 {}: {err}", + path.display() + ); + } + } +} + +#[cfg(target_os = "windows")] +fn foundry_temp_dir() -> PathBuf { + std::env::temp_dir() + .join("OpenLess") + .join("foundry-local-asr") +} + +fn normalize_language_hint(language_hint: Option) -> Option { + language_hint + .map(|hint| hint.trim().to_string()) + .filter(|hint| !hint.is_empty()) +} + +fn trim_transcript_text(text: &str) -> String { + text.trim().to_string() +} + +fn foundry_transcribe_attempt_consumes_buffer(result: &Result) -> bool { + let _ = result; + true +} + +#[cfg(test)] +mod tests { + use crate::recorder::AudioConsumer; + + #[cfg(target_os = "windows")] + fn test_provider() -> ( + super::FoundryLocalWhisperAsr, + std::sync::Arc, + ) { + use std::sync::Arc; + + let runtime = Arc::new(super::FoundryLocalRuntime::new()); + ( + super::FoundryLocalWhisperAsr::new( + Arc::clone(&runtime), + "whisper-small".into(), + Some(" zh ".into()), + ), + runtime, + ) + } + + #[cfg(not(target_os = "windows"))] + fn test_provider() -> super::FoundryLocalWhisperAsr { + super::FoundryLocalWhisperAsr::new("whisper-small".into(), Some(" zh ".into())) + } + + #[test] + fn foundry_provider_duration_uses_16k_i16_pcm() { + let pcm = vec![0u8; 32_000]; + + assert_eq!(super::pcm_duration_ms(&pcm), 1000); + } + + #[test] + fn foundry_provider_wav_ignores_odd_trailing_byte() { + let pcm = [0x01, 0x00, 0xff, 0x7f, 0xee]; + let wav = super::pcm_to_wav(&pcm); + + assert_eq!(&wav[0..4], b"RIFF"); + assert_eq!(u32::from_le_bytes(wav[40..44].try_into().unwrap()), 4); + assert_eq!(&wav[44..], &[0x01, 0x00, 0xff, 0x7f]); + } + + #[cfg(target_os = "windows")] + #[test] + fn foundry_provider_temp_wav_drop_removes_file() { + let pcm = [0x01, 0x00, 0xff, 0x7f]; + let path = { + let temp = super::TempWavFile::create(&pcm).unwrap(); + let path = temp.path().to_path_buf(); + + assert!(path.exists()); + + path + }; + + assert!(!path.exists()); + } + + #[test] + fn foundry_provider_normalizes_language_hint_and_text() { + assert_eq!( + super::normalize_language_hint(Some(" zh ".into())), + Some("zh".into()) + ); + assert_eq!(super::normalize_language_hint(Some(" ".into())), None); + assert_eq!(super::trim_transcript_text(" hello\r\n"), "hello"); + } + + #[test] + fn foundry_transcribe_attempt_consumes_buffer_even_on_error() { + let result: anyhow::Result<()> = Err(anyhow::anyhow!("transient runtime error")); + + assert!(super::foundry_transcribe_attempt_consumes_buffer(&result)); + } + + #[test] + fn foundry_provider_cancel_clears_buffer() { + #[cfg(target_os = "windows")] + let (provider, _) = test_provider(); + #[cfg(not(target_os = "windows"))] + let provider = test_provider(); + + provider.consume_pcm_chunk(&[1, 0, 2, 0]); + provider.cancel(); + + assert!(provider.buffer.lock().is_empty()); + assert_eq!( + provider + .cancel_generation + .load(std::sync::atomic::Ordering::SeqCst), + 1 + ); + assert_eq!(provider.model_alias(), "whisper-small"); + assert_eq!(provider.language_hint(), Some("zh")); + } + + #[cfg(target_os = "windows")] + #[test] + fn foundry_provider_cancel_requests_runtime_prepare_cancel() { + let (provider, runtime) = test_provider(); + + provider.cancel(); + + assert!(runtime.cancel_prepare_requested_for_tests()); + } +} diff --git a/openless-all/app/src-tauri/src/asr/local/foundry_runtime.rs b/openless-all/app/src-tauri/src/asr/local/foundry_runtime.rs new file mode 100644 index 00000000..d3b4fece --- /dev/null +++ b/openless-all/app/src-tauri/src/asr/local/foundry_runtime.rs @@ -0,0 +1,586 @@ +#[cfg(target_os = "windows")] +#[allow(dead_code)] +mod imp { + use std::path::Path; + use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }; + + use anyhow::{Context, Result}; + use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager, Model}; + use parking_lot::Mutex; + use tokio::sync::Mutex as AsyncMutex; + + use crate::asr::local::foundry::{ + FoundryCatalogModel, FoundryPrepareProgressPayload, FoundryRuntimeStatus, MODELS, + PROVIDER_ID, + }; + + type FoundryPrepareProgressCallback = + Arc; + + #[derive(Clone)] + struct LoadedModel { + alias: String, + model_id: String, + model: Arc, + } + + #[derive(Default)] + struct RuntimeState { + manager: Option<&'static FoundryLocalManager>, + loaded: Option, + } + + pub struct FoundryLocalRuntime { + lifecycle: AsyncMutex<()>, + cancel_prepare: AtomicBool, + state: Mutex, + } + + impl Default for FoundryLocalRuntime { + fn default() -> Self { + Self::new() + } + } + + impl FoundryLocalRuntime { + pub fn new() -> Self { + Self { + lifecycle: AsyncMutex::new(()), + cancel_prepare: AtomicBool::new(false), + state: Mutex::new(RuntimeState::default()), + } + } + + pub fn status_snapshot(&self, active_model: &str) -> FoundryRuntimeStatus { + match self.manager() { + Ok(_) => { + let state = self.state.lock(); + FoundryRuntimeStatus { + provider_id: PROVIDER_ID.into(), + available: true, + active_model: active_model.to_string(), + loaded_model_id: state + .loaded + .as_ref() + .map(|loaded| loaded.model_id.clone()), + endpoint: None, + error: None, + } + } + Err(error) => FoundryRuntimeStatus::unavailable( + active_model.to_string(), + format!("Foundry Local runtime unavailable: {error:#}"), + ), + } + } + + pub async fn ensure_loaded(&self, alias: &str) -> Result { + self.ensure_loaded_with_progress(alias, |_| {}).await + } + + pub async fn ensure_loaded_with_progress( + &self, + alias: &str, + progress: F, + ) -> Result + where + F: Fn(FoundryPrepareProgressPayload) + Send + Sync + 'static, + { + let _lifecycle = self.lifecycle.lock().await; + self.cancel_prepare.store(false, Ordering::SeqCst); + let progress: FoundryPrepareProgressCallback = Arc::new(progress); + Ok(self.ensure_loaded_locked(alias, progress).await?.model_id) + } + + pub fn request_cancel_prepare(&self) { + self.cancel_prepare.store(true, Ordering::SeqCst); + } + + #[cfg(test)] + pub(crate) fn cancel_prepare_requested_for_tests(&self) -> bool { + self.cancel_prepare.load(Ordering::SeqCst) + } + + pub async fn catalog_snapshot(&self) -> Result> { + let _lifecycle = self.lifecycle.lock().await; + let manager = self.manager()?; + let mut catalog = Vec::with_capacity(MODELS.len()); + for known in MODELS { + let model = manager + .catalog() + .get_model(known.alias) + .await + .with_context(|| format!("get Foundry catalog model {}", known.alias))?; + let info = model.info(); + let cached = model.is_cached().await.unwrap_or(info.cached); + catalog.push(FoundryCatalogModel { + alias: known.alias.to_string(), + display_name: info + .display_name + .clone() + .unwrap_or_else(|| known.display_name.to_string()), + cached, + file_size_mb: info.file_size_mb, + }); + } + Ok(catalog) + } + + pub async fn transcribe_audio_file( + &self, + alias: &str, + language_hint: Option<&str>, + audio_path: &Path, + audio_timeout: std::time::Duration, + ) -> Result { + let _lifecycle = self.lifecycle.lock().await; + self.cancel_prepare.store(false, Ordering::SeqCst); + let model = self + .ensure_loaded_locked(alias, Arc::new(|_| {})) + .await? + .model; + let mut client = model.create_audio_client(); + if let Some(language_hint) = normalized_language_hint(language_hint) { + client = client.language(language_hint); + } + let result = tokio::time::timeout(audio_timeout, client.transcribe(audio_path)) + .await + .with_context(|| { + format!( + "transcribe audio with Foundry model {alias} timed out after {} seconds", + audio_timeout.as_secs() + ) + })? + .with_context(|| format!("transcribe audio with Foundry model {alias}"))?; + Ok(result.text) + } + + pub async fn release_now(&self) -> Result<()> { + let _lifecycle = self.lifecycle.lock().await; + self.release_now_locked().await + } + + async fn ensure_loaded_locked( + &self, + alias: &str, + progress: FoundryPrepareProgressCallback, + ) -> Result { + if let Some(loaded) = self.cached_loaded_model(alias) { + progress.as_ref()(FoundryPrepareProgressPayload::finished( + alias, + "Foundry model already loaded", + )); + return Ok(loaded); + } + + let previous_loaded = self.loaded_for_different_alias(alias); + + self.check_prepare_cancelled()?; + let manager = self.manager()?; + progress.as_ref()(FoundryPrepareProgressPayload::runtime( + alias, + "Foundry Local runtime components", + 0.0, + )); + let runtime_progress = Arc::clone(&progress); + let runtime_alias = alias.to_string(); + manager + .download_and_register_eps_with_progress( + None, + move |ep_name: &str, percent: f64| { + let label = if ep_name.trim().is_empty() { + "Foundry Local runtime components".to_string() + } else { + format!("Foundry Local runtime component: {ep_name}") + }; + runtime_progress.as_ref()(FoundryPrepareProgressPayload::runtime( + runtime_alias.clone(), + label, + percent, + )); + }, + ) + .await + .context("download/register Foundry execution providers")?; + progress.as_ref()(FoundryPrepareProgressPayload::runtime( + alias, + "Foundry Local runtime components", + 100.0, + )); + self.check_prepare_cancelled()?; + + let model = manager + .catalog() + .get_model(alias) + .await + .with_context(|| format!("get Foundry model {alias}"))?; + + let model_label = model_display_label(alias); + if !model + .is_cached() + .await + .context("check Foundry model cache")? + { + progress.as_ref()(FoundryPrepareProgressPayload::model( + alias, + model_label.clone(), + 0.0, + )); + let model_progress = Arc::clone(&progress); + let model_alias = alias.to_string(); + let model_label_for_progress = model_label.clone(); + model + .download(Some(move |percent: f64| { + model_progress.as_ref()(FoundryPrepareProgressPayload::model( + model_alias.clone(), + model_label_for_progress.clone(), + percent, + )); + })) + .await + .with_context(|| format!("download Foundry model {alias}"))?; + progress.as_ref()(FoundryPrepareProgressPayload::model( + alias, + model_label.clone(), + 100.0, + )); + } else { + progress.as_ref()(FoundryPrepareProgressPayload::model( + alias, + format!("{model_label} already downloaded"), + 100.0, + )); + } + + self.check_prepare_cancelled()?; + progress.as_ref()(FoundryPrepareProgressPayload::load( + alias, + model_label.clone(), + 0.0, + )); + let model_id = model.id().to_string(); + if previous_loaded + .as_ref() + .is_some_and(|previous| previous.model_id == model_id) + { + progress.as_ref()(FoundryPrepareProgressPayload::load( + alias, + model_label.clone(), + 100.0, + )); + let loaded = LoadedModel { + alias: alias.to_string(), + model_id, + model, + }; + *self.state.lock() = RuntimeState { + manager: Some(manager), + loaded: Some(loaded.clone()), + }; + progress.as_ref()(FoundryPrepareProgressPayload::finished( + alias, + format!("{model_label} ready"), + )); + return Ok(loaded); + } + + let unloaded_previous = if let Some(previous) = previous_loaded.as_ref() { + Self::unload_model(previous).await?; + self.clear_loaded_if_model_id(&previous.model_id); + Some(previous.clone()) + } else { + None + }; + if let Err(error) = self.check_prepare_cancelled() { + self.rollback_prepare_error(manager, unloaded_previous.as_ref(), alias, error) + .await?; + } + if let Err(error) = model + .load() + .await + .with_context(|| format!("load Foundry model {alias}")) + { + self.rollback_prepare_error(manager, unloaded_previous.as_ref(), alias, error) + .await?; + } + if self.cancel_prepare.load(Ordering::SeqCst) { + if let Err(error) = model + .unload() + .await + .with_context(|| format!("unload cancelled Foundry model {alias}")) + { + self.rollback_prepare_error(manager, unloaded_previous.as_ref(), alias, error) + .await?; + } + self.rollback_prepare_error( + manager, + unloaded_previous.as_ref(), + alias, + anyhow::anyhow!("Foundry Local Whisper prepare cancelled"), + ) + .await?; + } + progress.as_ref()(FoundryPrepareProgressPayload::load( + alias, + model_label.clone(), + 100.0, + )); + + let loaded = LoadedModel { + alias: alias.to_string(), + model_id, + model, + }; + *self.state.lock() = RuntimeState { + manager: Some(manager), + loaded: Some(loaded.clone()), + }; + progress.as_ref()(FoundryPrepareProgressPayload::finished( + alias, + format!("{model_label} ready"), + )); + Ok(loaded) + } + + async fn release_now_locked(&self) -> Result<()> { + if let Some(loaded) = self.loaded_model_snapshot() { + Self::unload_model(&loaded).await?; + self.clear_loaded_if_model_id(&loaded.model_id); + } + Ok(()) + } + + async fn restore_loaded_model( + &self, + manager: &'static FoundryLocalManager, + loaded: &LoadedModel, + ) -> Result<()> { + loaded + .model + .load() + .await + .with_context(|| format!("restore Foundry model {}", loaded.model_id))?; + *self.state.lock() = RuntimeState { + manager: Some(manager), + loaded: Some(loaded.clone()), + }; + Ok(()) + } + + async fn rollback_prepare_error( + &self, + manager: &'static FoundryLocalManager, + previous: Option<&LoadedModel>, + alias: &str, + error: anyhow::Error, + ) -> Result<()> { + if let Some(previous) = previous { + if let Err(restore_error) = self.restore_loaded_model(manager, previous).await { + return Err(error).with_context(|| { + format!( + "prepare Foundry model {alias} failed; also failed to restore previous Foundry model {}: {restore_error:#}", + previous.model_id + ) + }); + } + } + Err(error) + } + + fn cached_loaded_model(&self, alias: &str) -> Option { + self.state + .lock() + .loaded + .as_ref() + .filter(|loaded| loaded.alias == alias) + .cloned() + } + + fn manager(&self) -> Result<&'static FoundryLocalManager> { + if let Some(manager) = self.state.lock().manager { + return Ok(manager); + } + + let manager = + FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples")) + .context("initialize Foundry Local manager")?; + self.state.lock().manager = Some(manager); + Ok(manager) + } + + fn loaded_model_snapshot(&self) -> Option { + self.state.lock().loaded.clone() + } + + fn loaded_for_different_alias(&self, alias: &str) -> Option { + self.state + .lock() + .loaded + .as_ref() + .filter(|loaded| loaded.alias != alias) + .cloned() + } + + fn clear_loaded_if_model_id(&self, model_id: &str) { + let mut state = self.state.lock(); + if state + .loaded + .as_ref() + .is_some_and(|loaded| loaded.model_id == model_id) + { + state.loaded.take(); + } + } + + async fn unload_model(loaded: &LoadedModel) -> Result<()> { + loaded + .model + .unload() + .await + .with_context(|| format!("unload Foundry model {}", loaded.model_id))?; + Ok(()) + } + + fn check_prepare_cancelled(&self) -> Result<()> { + if self.cancel_prepare.load(Ordering::SeqCst) { + anyhow::bail!("Foundry Local Whisper prepare cancelled"); + } + Ok(()) + } + } + + fn model_display_label(alias: &str) -> String { + MODELS + .iter() + .find(|model| model.alias == alias) + .map(|model| model.display_name.to_string()) + .unwrap_or_else(|| alias.to_string()) + } + + fn normalized_language_hint(language_hint: Option<&str>) -> Option { + language_hint + .map(str::trim) + .filter(|hint| !hint.is_empty()) + .map(str::to_string) + } + + #[cfg(test)] + mod lifecycle_tests { + use super::{normalized_language_hint, FoundryLocalRuntime}; + + #[test] + fn runtime_has_async_lifecycle_gate() { + let runtime = FoundryLocalRuntime::new(); + + assert!(runtime.lifecycle.try_lock().is_ok()); + } + + #[test] + fn runtime_normalizes_language_hint_before_audio_client() { + assert_eq!( + normalized_language_hint(Some(" zh ")), + Some("zh".to_string()) + ); + assert_eq!(normalized_language_hint(Some("")), None); + assert_eq!(normalized_language_hint(None), None); + } + } +} + +#[cfg(target_os = "windows")] +pub use imp::FoundryLocalRuntime; + +#[cfg(not(target_os = "windows"))] +pub struct FoundryLocalRuntime; + +#[cfg(not(target_os = "windows"))] +impl Default for FoundryLocalRuntime { + fn default() -> Self { + Self::new() + } +} + +#[cfg(not(target_os = "windows"))] +impl FoundryLocalRuntime { + pub fn new() -> Self { + Self + } + + pub fn status_snapshot(&self, active_model: &str) -> super::foundry::FoundryRuntimeStatus { + super::foundry::FoundryRuntimeStatus::unavailable( + active_model.to_string(), + "Foundry Local Whisper is only available on Windows", + ) + } + + pub async fn ensure_loaded(&self, alias: &str) -> anyhow::Result { + anyhow::bail!("Foundry Local Whisper is only available on Windows: {alias}"); + } + + pub async fn ensure_loaded_with_progress( + &self, + alias: &str, + _progress: F, + ) -> anyhow::Result + where + F: Fn(super::foundry::FoundryPrepareProgressPayload) + Send + Sync + 'static, + { + anyhow::bail!("Foundry Local Whisper is only available on Windows: {alias}"); + } + + pub fn request_cancel_prepare(&self) {} + + pub async fn catalog_snapshot( + &self, + ) -> anyhow::Result> { + Ok(super::foundry::static_catalog_models()) + } + + pub async fn transcribe_audio_file( + &self, + alias: &str, + _language_hint: Option<&str>, + _audio_path: &std::path::Path, + _audio_timeout: std::time::Duration, + ) -> anyhow::Result { + anyhow::bail!("Foundry Local Whisper is only available on Windows: {alias}"); + } + + pub async fn release_now(&self) -> anyhow::Result<()> { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::FoundryLocalRuntime; + + #[test] + fn new_runtime_reports_native_audio_status_shape() { + let runtime = FoundryLocalRuntime::new(); + let status = runtime.status_snapshot("whisper-small"); + + assert_eq!(status.provider_id, crate::asr::local::foundry::PROVIDER_ID); + assert_eq!(status.active_model, "whisper-small"); + assert_eq!(status.loaded_model_id, None); + assert_eq!(status.endpoint, None); + if status.available { + assert_eq!(status.error, None); + } else { + assert!(status.error.is_some()); + } + } + + #[tokio::test] + async fn new_runtime_release_now_has_real_async_unload_contract() { + let runtime = FoundryLocalRuntime::new(); + + runtime.release_now().await.unwrap(); + + let status = runtime.status_snapshot("whisper-small"); + assert_eq!(status.loaded_model_id, None); + } +} diff --git a/openless-all/app/src-tauri/src/asr/local/mod.rs b/openless-all/app/src-tauri/src/asr/local/mod.rs index 52fd1051..98ae85b5 100644 --- a/openless-all/app/src-tauri/src/asr/local/mod.rs +++ b/openless-all/app/src-tauri/src/asr/local/mod.rs @@ -5,11 +5,18 @@ pub mod cache; pub mod download; +pub mod foundry; +pub mod foundry_provider; +pub mod foundry_runtime; mod local_provider; pub mod models; pub mod test_run; pub use cache::LocalAsrCache; +#[allow(unused_imports)] +pub use foundry_provider::FoundryLocalWhisperAsr; +#[allow(unused_imports)] +pub use foundry_runtime::FoundryLocalRuntime; #[cfg(target_os = "macos")] mod qwen_engine; diff --git a/openless-all/app/src-tauri/src/asr/mod.rs b/openless-all/app/src-tauri/src/asr/mod.rs index d7347091..203cdea9 100644 --- a/openless-all/app/src-tauri/src/asr/mod.rs +++ b/openless-all/app/src-tauri/src/asr/mod.rs @@ -8,6 +8,7 @@ mod frame; pub mod local; pub mod volcengine; +pub mod wav; pub mod whisper; pub use volcengine::{VolcengineCredentials, VolcengineStreamingASR}; diff --git a/openless-all/app/src-tauri/src/asr/wav.rs b/openless-all/app/src-tauri/src/asr/wav.rs new file mode 100644 index 00000000..91503d15 --- /dev/null +++ b/openless-all/app/src-tauri/src/asr/wav.rs @@ -0,0 +1,61 @@ +//! WAV helpers for ASR providers that accept complete audio files. + +/// Encode 16 kHz / mono / 16-bit little-endian PCM samples as a RIFF WAV file. +pub fn encode_wav_16k_mono(samples: &[i16]) -> Vec { + let sample_rate: u32 = 16_000; + let num_channels: u16 = 1; + let bits_per_sample: u16 = 16; + let bytes_per_sample = bits_per_sample as u32 / 8; + let byte_rate = sample_rate * num_channels as u32 * bytes_per_sample; + let block_align = num_channels * (bits_per_sample / 8); + let data_size = samples.len() as u32 * bytes_per_sample; + let chunk_size = 36 + data_size; + + let mut wav = Vec::with_capacity(44 + data_size as usize); + wav.extend_from_slice(b"RIFF"); + wav.extend_from_slice(&chunk_size.to_le_bytes()); + wav.extend_from_slice(b"WAVE"); + wav.extend_from_slice(b"fmt "); + wav.extend_from_slice(&16u32.to_le_bytes()); + wav.extend_from_slice(&1u16.to_le_bytes()); + wav.extend_from_slice(&num_channels.to_le_bytes()); + wav.extend_from_slice(&sample_rate.to_le_bytes()); + wav.extend_from_slice(&byte_rate.to_le_bytes()); + wav.extend_from_slice(&block_align.to_le_bytes()); + wav.extend_from_slice(&bits_per_sample.to_le_bytes()); + wav.extend_from_slice(b"data"); + wav.extend_from_slice(&data_size.to_le_bytes()); + for sample in samples { + wav.extend_from_slice(&sample.to_le_bytes()); + } + wav +} + +#[cfg(test)] +mod tests { + use super::encode_wav_16k_mono; + + #[test] + fn wav_header_matches_16k_mono_pcm() { + let samples = [1i16, i16::MAX, i16::MIN, -2i16]; + let wav = encode_wav_16k_mono(&samples); + + assert_eq!(&wav[0..4], b"RIFF"); + assert_eq!(u32::from_le_bytes(wav[4..8].try_into().unwrap()), 44); + assert_eq!(&wav[8..12], b"WAVE"); + assert_eq!(&wav[12..16], b"fmt "); + assert_eq!(u32::from_le_bytes(wav[16..20].try_into().unwrap()), 16); + assert_eq!(u16::from_le_bytes(wav[20..22].try_into().unwrap()), 1); + assert_eq!(u16::from_le_bytes(wav[22..24].try_into().unwrap()), 1); + assert_eq!(u32::from_le_bytes(wav[24..28].try_into().unwrap()), 16_000); + assert_eq!(u32::from_le_bytes(wav[28..32].try_into().unwrap()), 32_000); + assert_eq!(u16::from_le_bytes(wav[32..34].try_into().unwrap()), 2); + assert_eq!(u16::from_le_bytes(wav[34..36].try_into().unwrap()), 16); + assert_eq!(&wav[36..40], b"data"); + assert_eq!(u32::from_le_bytes(wav[40..44].try_into().unwrap()), 8); + assert_eq!( + &wav[44..], + &[0x01, 0x00, 0xff, 0x7f, 0x00, 0x80, 0xfe, 0xff] + ); + } +} diff --git a/openless-all/app/src-tauri/src/asr/whisper.rs b/openless-all/app/src-tauri/src/asr/whisper.rs index c2465052..465657a0 100644 --- a/openless-all/app/src-tauri/src/asr/whisper.rs +++ b/openless-all/app/src-tauri/src/asr/whisper.rs @@ -4,6 +4,7 @@ use anyhow::{Context, Result}; use parking_lot::Mutex; +use crate::asr::wav::encode_wav_16k_mono; use crate::asr::RawTranscript; pub struct WhisperBatchASR { @@ -56,7 +57,11 @@ impl WhisperBatchASR { anyhow::bail!("Whisper API key missing"); } - let wav = encode_wav_16k_mono(pcm); + let samples: Vec = pcm + .chunks_exact(2) + .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]])) + .collect(); + let wav = encode_wav_16k_mono(&samples); let base_url = self.base_url.trim_end_matches('/'); let url = format!("{}/audio/transcriptions", base_url); @@ -99,30 +104,3 @@ impl crate::recorder::AudioConsumer for WhisperBatchASR { self.buffer.lock().extend_from_slice(pcm); } } - -fn encode_wav_16k_mono(pcm: &[u8]) -> Vec { - let sample_rate: u32 = 16_000; - let num_channels: u16 = 1; - let bits_per_sample: u16 = 16; - let byte_rate = sample_rate * num_channels as u32 * (bits_per_sample as u32 / 8); - let block_align = num_channels * (bits_per_sample / 8); - let data_size = pcm.len() as u32; - let chunk_size = 36 + data_size; - - let mut wav = Vec::with_capacity(44 + pcm.len()); - wav.extend_from_slice(b"RIFF"); - wav.extend_from_slice(&chunk_size.to_le_bytes()); - wav.extend_from_slice(b"WAVE"); - wav.extend_from_slice(b"fmt "); - wav.extend_from_slice(&16u32.to_le_bytes()); - wav.extend_from_slice(&1u16.to_le_bytes()); // PCM - wav.extend_from_slice(&num_channels.to_le_bytes()); - wav.extend_from_slice(&sample_rate.to_le_bytes()); - wav.extend_from_slice(&byte_rate.to_le_bytes()); - wav.extend_from_slice(&block_align.to_le_bytes()); - wav.extend_from_slice(&bits_per_sample.to_le_bytes()); - wav.extend_from_slice(b"data"); - wav.extend_from_slice(&data_size.to_le_bytes()); - wav.extend_from_slice(pcm); - wav -} diff --git a/openless-all/app/src-tauri/src/commands.rs b/openless-all/app/src-tauri/src/commands.rs index e80ef597..6ee090ba 100644 --- a/openless-all/app/src-tauri/src/commands.rs +++ b/openless-all/app/src-tauri/src/commands.rs @@ -8,6 +8,11 @@ use serde::Serialize; use serde_json::Value; use tauri::{AppHandle, Emitter, Manager, State, Window}; +use crate::asr::local::foundry::{ + model_alias_is_known, FoundryCatalogModel, FoundryPrepareProgressPayload, FoundryRuntimeStatus, + DEFAULT_MODEL_ALIAS, PROVIDER_ID as FOUNDRY_LOCAL_PROVIDER_ID, +}; +use crate::asr::local::FoundryLocalRuntime; use crate::coordinator::Coordinator; use crate::permissions::{self, PermissionStatus}; use crate::persistence::{CredentialAccount, CredentialsSnapshot, CredentialsVault}; @@ -250,7 +255,7 @@ fn asr_configured_for_provider(provider: &str, snap: &CredentialsSnapshot) -> bo if provider == "volcengine" { return volcengine_configured(snap); } - if provider == crate::asr::local::PROVIDER_ID { + if provider == crate::asr::local::PROVIDER_ID || active_foundry_asr_is_supported(provider) { // 本地 ASR 不依赖云端凭据。 return true; } @@ -268,6 +273,31 @@ fn configured(field: &Option) -> bool { .unwrap_or(false) } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct LocalAsrReleasePlan { + qwen: bool, + foundry: bool, +} + +fn local_asr_release_plan_for_provider(provider: &str) -> LocalAsrReleasePlan { + LocalAsrReleasePlan { + qwen: provider != crate::asr::local::PROVIDER_ID, + foundry: provider != FOUNDRY_LOCAL_PROVIDER_ID, + } +} + +async fn release_foundry_runtime_if_inactive( + runtime: &Arc, + release_foundry: bool, +) { + if release_foundry { + runtime.request_cancel_prepare(); + if let Err(error) = runtime.release_now().await { + log::warn!("[foundry-asr] release inactive runtime failed: {error:#}"); + } + } +} + #[tauri::command] pub fn set_credential(window: Window, account: String, value: String) -> Result<(), String> { ensure_main_window(&window)?; @@ -280,20 +310,27 @@ pub fn set_credential(window: Window, account: String, value: String) -> Result< } #[tauri::command] -pub fn set_active_asr_provider( +pub async fn set_active_asr_provider( coord: CoordinatorState<'_>, + runtime: State<'_, Arc>, provider: String, ) -> Result<(), String> { + if provider == FOUNDRY_LOCAL_PROVIDER_ID && !active_foundry_asr_is_supported(&provider) { + return Err("Foundry Local Whisper is only available on Windows".to_string()); + } CredentialsVault::set_active_asr_provider(&provider).map_err(|e| e.to_string())?; + let release_plan = local_asr_release_plan_for_provider(&provider); if provider == crate::asr::local::PROVIDER_ID { // 切到本地 ASR → 后台预加载模型,下次按 hotkey 时不必等数秒。 coord.preload_local_asr_in_background(); - } else { + } + if release_plan.qwen { // 切回云端 → 用户已不需要本地引擎,立刻释放 1.2GB+ RAM;不释放的话只会等到 // schedule_local_asr_release 的下一次 dictation 才触发,而切回云端后根本不会 // 再走 local 路径,引擎会驻留到进程退出。 coord.release_local_asr_engine(); } + release_foundry_runtime_if_inactive(runtime.inner(), release_plan.foundry).await; Ok(()) } @@ -419,6 +456,11 @@ async fn validate_llm_provider() -> Result<(), String> { } async fn validate_asr_provider() -> Result<(), String> { + let active_asr = CredentialsVault::get_active_asr(); + if active_asr_is_keyless_for_validation(&active_asr) { + return Ok(()); + } + let config = read_openai_provider_config("asr")?; let model = CredentialsVault::get(CredentialAccount::AsrModel) .map_err(|e| e.to_string())? @@ -427,6 +469,22 @@ async fn validate_asr_provider() -> Result<(), String> { validate_asr_transcription(&config, model.trim()).await } +fn active_asr_is_keyless_for_validation(provider: &str) -> bool { + provider == crate::asr::local::PROVIDER_ID || active_foundry_asr_is_supported(provider) +} + +fn active_foundry_asr_is_supported(provider: &str) -> bool { + #[cfg(target_os = "windows")] + { + provider == FOUNDRY_LOCAL_PROVIDER_ID + } + #[cfg(not(target_os = "windows"))] + { + let _ = provider; + false + } +} + async fn validate_asr_transcription(config: &ProviderConfig, model: &str) -> Result<(), String> { const MAX_ASR_VALIDATE_BODY_BYTES: usize = 1024 * 1024; let url = asr_transcriptions_url(&config.base_url)?; @@ -1086,15 +1144,24 @@ fn reject_hotkey_collisions(prefs: &UserPreferences) -> Result<(), String> { reject_qa_switch_style_hotkey_overlap(qa_hotkey, &prefs.switch_style_hotkey)?; reject_qa_open_app_hotkey_overlap(qa_hotkey, &prefs.open_app_hotkey)?; } - reject_dictation_translation_hotkey_overlap(&prefs.dictation_hotkey, &prefs.translation_hotkey)?; - reject_dictation_switch_style_hotkey_overlap(&prefs.dictation_hotkey, &prefs.switch_style_hotkey)?; + reject_dictation_translation_hotkey_overlap( + &prefs.dictation_hotkey, + &prefs.translation_hotkey, + )?; + reject_dictation_switch_style_hotkey_overlap( + &prefs.dictation_hotkey, + &prefs.switch_style_hotkey, + )?; reject_dictation_open_app_hotkey_overlap(&prefs.dictation_hotkey, &prefs.open_app_hotkey)?; reject_translation_switch_style_hotkey_overlap( &prefs.translation_hotkey, &prefs.switch_style_hotkey, )?; reject_translation_open_app_hotkey_overlap(&prefs.translation_hotkey, &prefs.open_app_hotkey)?; - reject_switch_style_open_app_hotkey_overlap(&prefs.switch_style_hotkey, &prefs.open_app_hotkey)?; + reject_switch_style_open_app_hotkey_overlap( + &prefs.switch_style_hotkey, + &prefs.open_app_hotkey, + )?; Ok(()) } @@ -1109,7 +1176,11 @@ fn reject_dictation_switch_style_hotkey_overlap( dictation: &ShortcutBinding, switch_style: &ShortcutBinding, ) -> Result<(), String> { - reject_hotkey_overlap(dictation, switch_style, "切换风格快捷键不能和听写快捷键相同") + reject_hotkey_overlap( + dictation, + switch_style, + "切换风格快捷键不能和听写快捷键相同", + ) } fn reject_dictation_open_app_hotkey_overlap( @@ -1144,7 +1215,11 @@ fn reject_translation_switch_style_hotkey_overlap( translation: &ShortcutBinding, switch_style: &ShortcutBinding, ) -> Result<(), String> { - reject_hotkey_overlap(translation, switch_style, "切换风格快捷键不能和翻译快捷键相同") + reject_hotkey_overlap( + translation, + switch_style, + "切换风格快捷键不能和翻译快捷键相同", + ) } fn reject_translation_open_app_hotkey_overlap( @@ -1158,7 +1233,11 @@ fn reject_switch_style_open_app_hotkey_overlap( switch_style: &ShortcutBinding, open_app: &ShortcutBinding, ) -> Result<(), String> { - reject_hotkey_overlap(switch_style, open_app, "打开应用快捷键不能和切换风格快捷键相同") + reject_hotkey_overlap( + switch_style, + open_app, + "打开应用快捷键不能和切换风格快捷键相同", + ) } fn shortcut_bindings_overlap(left: &ShortcutBinding, right: &ShortcutBinding) -> bool { @@ -1269,10 +1348,7 @@ pub fn local_asr_cancel_download( } #[tauri::command] -pub fn local_asr_delete_model( - coord: CoordinatorState<'_>, - model_id: String, -) -> Result<(), String> { +pub fn local_asr_delete_model(coord: CoordinatorState<'_>, model_id: String) -> Result<(), String> { let id = ModelId::from_str(&model_id).ok_or_else(|| format!("unknown model id: {model_id}"))?; // 如果内存里加载的就是要删的这个模型,先释放:否则 mmap 残留指向已 unlink 的文件, // 且 RAM 直到下次切模型 / 用户手动按"释放"才回收。 @@ -1330,6 +1406,130 @@ pub fn local_asr_set_keep_loaded_secs( coord.prefs().set(prefs).map_err(|e| e.to_string()) } +// ───────────────────── Windows local ASR (Foundry Local Whisper) ───────────────────── + +fn active_foundry_model_from_prefs(prefs: &UserPreferences) -> String { + if model_alias_is_known(&prefs.foundry_local_asr_model) { + prefs.foundry_local_asr_model.clone() + } else { + DEFAULT_MODEL_ALIAS.to_string() + } +} + +fn validate_foundry_model_alias(model_alias: &str) -> Result<(), String> { + if model_alias_is_known(model_alias) { + Ok(()) + } else { + Err(format!( + "unknown Foundry Whisper model alias: {model_alias}" + )) + } +} + +fn normalize_foundry_language_hint(language_hint: &str) -> Result { + let normalized = language_hint.trim().to_string(); + if normalized.is_empty() + || (normalized.len() == 2 && normalized.bytes().all(|b| b.is_ascii_lowercase())) + { + Ok(normalized) + } else { + Err("language hint must be empty or ISO 639-1 lowercase code".to_string()) + } +} + +#[tauri::command] +pub fn foundry_local_asr_status( + coord: CoordinatorState<'_>, + runtime: State<'_, Arc>, +) -> FoundryRuntimeStatus { + let prefs = coord.prefs().get(); + let active_model = active_foundry_model_from_prefs(&prefs); + runtime.status_snapshot(&active_model) +} + +#[tauri::command] +pub async fn foundry_local_asr_catalog( + runtime: State<'_, Arc>, +) -> Result, String> { + runtime + .catalog_snapshot() + .await + .map_err(|e| format!("{e:#}")) +} + +#[tauri::command] +pub fn foundry_local_asr_set_model( + coord: CoordinatorState<'_>, + model_alias: String, +) -> Result<(), String> { + validate_foundry_model_alias(&model_alias)?; + let mut prefs = coord.prefs().get(); + prefs.foundry_local_asr_model = model_alias; + coord.prefs().set(prefs).map_err(|e| e.to_string()) +} + +#[tauri::command] +pub fn foundry_local_asr_set_language_hint( + coord: CoordinatorState<'_>, + language_hint: String, +) -> Result<(), String> { + let normalized = normalize_foundry_language_hint(&language_hint)?; + let mut prefs = coord.prefs().get(); + prefs.foundry_local_asr_language_hint = normalized; + coord.prefs().set(prefs).map_err(|e| e.to_string()) +} + +#[tauri::command] +pub async fn foundry_local_asr_prepare( + app: AppHandle, + runtime: State<'_, Arc>, + model_alias: String, +) -> Result { + validate_foundry_model_alias(&model_alias)?; + let progress_app = app.clone(); + let result = runtime + .ensure_loaded_with_progress(&model_alias, move |payload| { + emit_foundry_prepare_progress(&progress_app, payload); + }) + .await; + match result { + Ok(model_id) => Ok(model_id), + Err(error) => { + let message = format!("{error:#}"); + emit_foundry_prepare_progress( + &app, + FoundryPrepareProgressPayload::failed( + model_alias, + "Foundry Local Whisper prepare failed", + message.clone(), + ), + ); + Err(message) + } + } +} + +#[tauri::command] +pub fn foundry_local_asr_cancel_prepare( + runtime: State<'_, Arc>, +) -> Result<(), String> { + runtime.request_cancel_prepare(); + Ok(()) +} + +#[tauri::command] +pub async fn foundry_local_asr_release( + runtime: State<'_, Arc>, +) -> Result<(), String> { + runtime.release_now().await.map_err(|e| format!("{e:#}")) +} + +fn emit_foundry_prepare_progress(app: &AppHandle, payload: FoundryPrepareProgressPayload) { + if let Err(error) = app.emit("foundry-local-asr-prepare-progress", payload) { + log::warn!("[foundry-asr] emit prepare progress failed: {error}"); + } +} + /// 把当前会话的 openless.log 复制到用户选择的位置(前端用 plugin-dialog 拿 target_path)。 /// 路径来自 lib::log_dir_path() —— mac: ~/Library/Logs/OpenLess/openless.log, /// windows: %LOCALAPPDATA%\OpenLess\Logs\openless.log。 @@ -1352,9 +1552,12 @@ fn _ensure_snapshot_used(_: CredentialsSnapshot) {} #[cfg(test)] mod tests { use super::{ + active_asr_is_keyless_for_validation, active_foundry_model_from_prefs, asr_configured_for_provider, asr_transcriptions_url, fetch_provider_models, - llm_configured_for_snapshot, models_url, parse_model_ids, persist_settings, - ProviderConfig, SettingsWriter, + llm_configured_for_snapshot, local_asr_release_plan_for_provider, models_url, + normalize_foundry_language_hint, parse_model_ids, persist_settings, + release_foundry_runtime_if_inactive, validate_foundry_model_alias, ProviderConfig, + SettingsWriter, }; use crate::persistence::CredentialsSnapshot; use crate::types::{ @@ -1407,6 +1610,112 @@ mod tests { crate::asr::local::PROVIDER_ID, &snapshot() )); + #[cfg(target_os = "windows")] + assert!(asr_configured_for_provider( + crate::asr::local::foundry::PROVIDER_ID, + &snapshot() + )); + #[cfg(not(target_os = "windows"))] + assert!(!asr_configured_for_provider( + crate::asr::local::foundry::PROVIDER_ID, + &snapshot() + )); + } + + #[test] + fn credentials_status_treats_foundry_local_asr_as_configured() { + #[cfg(target_os = "windows")] + { + assert!(asr_configured_for_provider( + crate::asr::local::foundry::PROVIDER_ID, + &CredentialsSnapshot::default() + )); + } + #[cfg(not(target_os = "windows"))] + { + assert!(!asr_configured_for_provider( + crate::asr::local::foundry::PROVIDER_ID, + &CredentialsSnapshot::default() + )); + } + } + + #[test] + fn local_asr_providers_skip_external_validation() { + assert!(active_asr_is_keyless_for_validation( + crate::asr::local::PROVIDER_ID + )); + #[cfg(target_os = "windows")] + assert!(active_asr_is_keyless_for_validation( + crate::asr::local::foundry::PROVIDER_ID + )); + #[cfg(not(target_os = "windows"))] + assert!(!active_asr_is_keyless_for_validation( + crate::asr::local::foundry::PROVIDER_ID + )); + assert!(!active_asr_is_keyless_for_validation("volcengine")); + assert!(!active_asr_is_keyless_for_validation("whisper")); + } + + #[test] + fn provider_switch_release_plan_covers_inactive_local_runtimes() { + let qwen = local_asr_release_plan_for_provider(crate::asr::local::PROVIDER_ID); + assert!(!qwen.qwen); + assert!(qwen.foundry); + + let foundry = local_asr_release_plan_for_provider(crate::asr::local::foundry::PROVIDER_ID); + assert!(foundry.qwen); + assert!(!foundry.foundry); + + let cloud = local_asr_release_plan_for_provider("volcengine"); + assert!(cloud.qwen); + assert!(cloud.foundry); + } + + #[cfg(target_os = "windows")] + #[tokio::test] + async fn provider_switch_release_requests_foundry_prepare_cancel_first() { + let runtime = std::sync::Arc::new(crate::asr::local::FoundryLocalRuntime::new()); + + release_foundry_runtime_if_inactive(&runtime, true).await; + + assert!(runtime.cancel_prepare_requested_for_tests()); + } + + #[test] + fn foundry_language_hint_accepts_empty_and_lowercase_iso_639_1() { + assert_eq!(normalize_foundry_language_hint("").unwrap(), ""); + assert_eq!(normalize_foundry_language_hint(" ").unwrap(), ""); + assert_eq!(normalize_foundry_language_hint("zh").unwrap(), "zh"); + assert_eq!(normalize_foundry_language_hint(" en ").unwrap(), "en"); + } + + #[test] + fn foundry_language_hint_rejects_non_lowercase_iso_639_1() { + assert!(normalize_foundry_language_hint("ZH").is_err()); + assert!(normalize_foundry_language_hint("zho").is_err()); + assert!(normalize_foundry_language_hint("z1").is_err()); + } + + #[test] + fn foundry_model_alias_validation_rejects_unknown_alias() { + assert!( + validate_foundry_model_alias(crate::asr::local::foundry::DEFAULT_MODEL_ALIAS).is_ok() + ); + assert!(validate_foundry_model_alias("whisper-large").is_err()); + } + + #[test] + fn foundry_active_model_pref_falls_back_to_default_for_unknown_alias() { + let prefs = UserPreferences { + foundry_local_asr_model: "whisper-large".to_string(), + ..Default::default() + }; + + assert_eq!( + active_foundry_model_from_prefs(&prefs), + crate::asr::local::foundry::DEFAULT_MODEL_ALIAS + ); } #[test] @@ -1506,6 +1815,7 @@ mod tests { hotkey: HotkeyBinding { trigger: HotkeyTrigger::RightControl, mode: HotkeyMode::Toggle, + ..Default::default() }, qa_hotkey: Some(ShortcutBinding { primary: ";".to_string(), @@ -1539,6 +1849,7 @@ mod tests { hotkey: HotkeyBinding { trigger: HotkeyTrigger::Custom, mode: HotkeyMode::Toggle, + keys: None, }, custom_combo_hotkey: Some(ComboBinding { primary: "D".into(), @@ -1563,6 +1874,7 @@ mod tests { hotkey: HotkeyBinding { trigger: HotkeyTrigger::RightControl, mode: HotkeyMode::Toggle, + keys: None, }, dictation_hotkey: ShortcutBinding { primary: "D".into(), @@ -1588,6 +1900,7 @@ mod tests { hotkey: HotkeyBinding { trigger: HotkeyTrigger::RightControl, mode: HotkeyMode::Toggle, + keys: None, }, custom_combo_hotkey: Some(ComboBinding { primary: "D".into(), diff --git a/openless-all/app/src-tauri/src/coordinator.rs b/openless-all/app/src-tauri/src/coordinator.rs index 82dda2a4..9f4c6ca8 100644 --- a/openless-all/app/src-tauri/src/coordinator.rs +++ b/openless-all/app/src-tauri/src/coordinator.rs @@ -16,6 +16,8 @@ use parking_lot::Mutex; use tauri::{async_runtime, AppHandle, Emitter, Manager}; use uuid::Uuid; +#[cfg(target_os = "windows")] +use crate::asr::local::{foundry, FoundryLocalRuntime, FoundryLocalWhisperAsr}; use crate::asr::{ DictionaryHotword, RawTranscript, VolcengineCredentials, VolcengineStreamingASR, WhisperBatchASR, @@ -57,11 +59,21 @@ enum SessionPhase { enum ActiveAsr { Volcengine(Arc), Whisper(Arc), + #[cfg(target_os = "windows")] + FoundryLocalWhisper(Arc), /// 本地 Qwen3-ASR;只在 macOS + 模型已下载时可达。 #[cfg(target_os = "macos")] Local(Arc), } +fn asr_transcribe_uses_global_timeout(asr: &ActiveAsr) -> bool { + match asr { + #[cfg(target_os = "windows")] + ActiveAsr::FoundryLocalWhisper(_) => false, + _ => true, + } +} + struct SessionResource { session_id: u64, resource: T, @@ -146,6 +158,8 @@ struct Inner { /// 本地 Qwen3-ASR 引擎缓存。跨会话复用,避免每次重加载 1.2GB+ 模型。 /// 释放时机由 prefs.local_asr_keep_loaded_secs 决定。 local_asr_cache: Arc, + #[cfg(target_os = "windows")] + foundry_local_runtime: Arc, recorder: Mutex>>, recording_mute: Mutex, hotkey: Mutex>, @@ -235,6 +249,54 @@ struct PreparedWindowsImeSessionSlot { impl Coordinator { pub fn new() -> Self { + #[cfg(target_os = "windows")] + { + Self::new_with_foundry_runtime(Arc::new(FoundryLocalRuntime::new())) + } + + #[cfg(not(target_os = "windows"))] + { + let history = HistoryStore::new().unwrap_or_else(|e| { + log::error!("[coord] HistoryStore init failed: {e}; falling back to empty"); + HistoryStore::new().expect("history store init") + }); + let prefs = PreferencesStore::new().expect("preferences store init"); + let vocab = DictionaryStore::new().expect("dictionary store init"); + + Self { + inner: Arc::new(Inner { + app: Mutex::new(None), + history, + prefs, + vocab, + inserter: TextInserter::new(), + state: Mutex::new(SessionState::default()), + asr: Mutex::new(None), + recorder: Mutex::new(None), + recording_mute: Mutex::new(SharedRecordingMuteState::new()), + hotkey: Mutex::new(None), + hotkey_status: Mutex::new(HotkeyStatus::default()), + hotkey_trigger_held: AtomicBool::new(false), + shortcut_recording_active: AtomicBool::new(false), + combo_hotkey: Mutex::new(None), + translation_hotkey: Mutex::new(None), + switch_style_hotkey: Mutex::new(None), + open_app_hotkey: Mutex::new(None), + translation_modifier_seen: AtomicBool::new(false), + qa_hotkey: Mutex::new(None), + qa_state: Mutex::new(QaSessionState::default()), + capsule_layout: Mutex::new(None), + qa_asr: Mutex::new(None), + qa_recorder: Mutex::new(None), + qa_stream_cancelled: Arc::new(AtomicBool::new(false)), + local_asr_cache: Arc::new(crate::asr::local::LocalAsrCache::new()), + }), + } + } + } + + #[cfg(target_os = "windows")] + pub fn new_with_foundry_runtime(foundry_local_runtime: Arc) -> Self { let history = HistoryStore::new().unwrap_or_else(|e| { log::error!("[coord] HistoryStore init failed: {e}; falling back to empty"); HistoryStore::new().expect("history store init") @@ -249,9 +311,7 @@ impl Coordinator { prefs, vocab, inserter: TextInserter::new(), - #[cfg(target_os = "windows")] windows_ime: WindowsImeSessionController::new(), - #[cfg(target_os = "windows")] prepared_windows_ime_session: Arc::new(Mutex::new(Vec::new())), state: Mutex::new(SessionState::default()), asr: Mutex::new(None), @@ -273,6 +333,7 @@ impl Coordinator { qa_recorder: Mutex::new(None), qa_stream_cancelled: Arc::new(AtomicBool::new(false)), local_asr_cache: Arc::new(crate::asr::local::LocalAsrCache::new()), + foundry_local_runtime, }), } } @@ -664,6 +725,7 @@ impl Coordinator { let binding = crate::types::HotkeyBinding { trigger: dictation_trigger.unwrap_or(crate::types::HotkeyTrigger::Custom), mode: prefs.hotkey.mode, + keys: None, }; if dictation_trigger.is_some() { take_combo_hotkey_on_main_thread(&self.inner); @@ -813,6 +875,7 @@ fn hotkey_supervisor_loop(inner: Arc) { let binding = crate::types::HotkeyBinding { trigger, mode: prefs.hotkey.mode, + keys: None, }; match HotkeyMonitor::start(binding, tx) { Ok(monitor) => { @@ -1662,6 +1725,8 @@ fn cancel_active_asr(asr: ActiveAsr) { match asr { ActiveAsr::Volcengine(v) => v.cancel(), ActiveAsr::Whisper(w) => w.cancel(), + #[cfg(target_os = "windows")] + ActiveAsr::FoundryLocalWhisper(local) => local.cancel(), #[cfg(target_os = "macos")] ActiveAsr::Local(local) => local.cancel(), } @@ -1927,6 +1992,8 @@ async fn begin_session(inner: &Arc) -> Result<(), String> { return Err(message); } + let active_asr = CredentialsVault::get_active_asr(); + if let Err(message) = ensure_microphone_permission(inner) { log::warn!("[coord] microphone permission gate failed: {message}"); emit_capsule( @@ -1947,7 +2014,35 @@ async fn begin_session(inner: &Arc) -> Result<(), String> { // Recorder::start 成功后再发,确保「用户看到录音条」时 mic 已经在 capture。 // 之前在这一行就 emit 会让用户看到录音条后立刻开口,但 mic 还在 cpal init // 窗口(50-200ms)内 → 开头几个字物理上录不到。详见 issue 备注。 - let active_asr = CredentialsVault::get_active_asr(); + #[cfg(target_os = "windows")] + if foundry::is_foundry_local_whisper(&active_asr) { + let prefs = inner.prefs.get(); + let model_alias = if foundry::model_alias_is_known(&prefs.foundry_local_asr_model) { + prefs.foundry_local_asr_model.clone() + } else { + foundry::DEFAULT_MODEL_ALIAS.to_string() + }; + let language_hint = prefs.foundry_local_asr_language_hint.trim().to_string(); + let language_hint = if language_hint.is_empty() { + None + } else { + Some(language_hint) + }; + let local = Arc::new(FoundryLocalWhisperAsr::new( + Arc::clone(&inner.foundry_local_runtime), + model_alias, + language_hint, + )); + store_asr_for_session( + inner, + current_session_id, + ActiveAsr::FoundryLocalWhisper(Arc::clone(&local)), + ); + let consumer: Arc = local; + start_recorder_and_enter_listening(inner, current_session_id, &active_asr, consumer) + .await?; + return Ok(()); + } #[cfg(target_os = "macos")] if crate::asr::local::is_local_qwen3(&active_asr) { @@ -2355,8 +2450,10 @@ async fn end_session(inner: &Arc) -> Result<(), String> { } }; + let uses_global_timeout = asr_transcribe_uses_global_timeout(&asr); let raw = match asr { ActiveAsr::Volcengine(asr) => { + debug_assert!(uses_global_timeout); if let Err(e) = asr.send_last_frame().await { log::error!("[coord] send last frame failed: {e}"); } @@ -2403,6 +2500,7 @@ async fn end_session(inner: &Arc) -> Result<(), String> { } } ActiveAsr::Whisper(w) => { + debug_assert!(uses_global_timeout); // Whisper 也添加类似的超时保护 let timeout_duration = std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS); match tokio::time::timeout(timeout_duration, w.transcribe()).await { @@ -2442,8 +2540,47 @@ async fn end_session(inner: &Arc) -> Result<(), String> { } } } + #[cfg(target_os = "windows")] + ActiveAsr::FoundryLocalWhisper(local) => { + debug_assert!(!uses_global_timeout); + match local + .transcribe(foundry_audio_transcribe_timeout_duration()) + .await + { + Ok(r) => { + schedule_foundry_local_asr_release(inner, current_session_id); + r + } + Err(e) => { + if inner.state.lock().cancelled { + log::info!( + "[coord] Foundry Local Whisper transcribe cancelled — discarding transcript" + ); + schedule_foundry_local_asr_release(inner, current_session_id); + restore_prepared_windows_ime_session(inner, current_session_id); + set_phase_idle_if_session_matches(inner, current_session_id); + return Ok(()); + } + log::error!("[coord] Foundry Local Whisper transcribe failed: {e:#}"); + schedule_foundry_local_asr_release(inner, current_session_id); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some(format!("本地识别失败: {e}")), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(e.to_string()); + } + } + } #[cfg(target_os = "macos")] ActiveAsr::Local(local) => { + debug_assert!(uses_global_timeout); // 与 Volcengine/Whisper 一致包一层 global timeout(来自 origin/main)。 // 注:缓存命中时 transcribe 不含 load 时间;冷启动 load 已在 build_local_qwen3 // 提前完成,所以 15s 给 transcribe 本身足够。 @@ -2998,6 +3135,14 @@ fn ensure_asr_credentials() -> Result<(), String> { } } + if crate::asr::local::foundry::is_foundry_local_whisper(&active_asr) { + #[cfg(not(target_os = "windows"))] + { + return Err("Foundry Local Whisper 当前仅支持 Windows".to_string()); + } + return Ok(()); + } + if is_whisper_compatible_provider(&active_asr) { let api_key = CredentialsVault::get(CredentialAccount::AsrApiKey) .ok() @@ -3017,6 +3162,22 @@ fn ensure_asr_credentials() -> Result<(), String> { } } +#[cfg(test)] +fn is_keyless_local_asr_provider(id: &str) -> bool { + if crate::asr::local::is_local_qwen3(id) { + return true; + } + #[cfg(target_os = "windows")] + { + crate::asr::local::foundry::is_foundry_local_whisper(id) + } + #[cfg(not(target_os = "windows"))] + { + let _ = id; + false + } +} + #[cfg(target_os = "macos")] fn ensure_local_qwen3_model_ready() -> Result<(), String> { let prefs = || -> Result { @@ -3053,6 +3214,34 @@ fn schedule_local_asr_release(inner: &Arc) { }); } +#[cfg(target_os = "windows")] +fn foundry_local_asr_release_keep_secs(inner: &Arc) -> u32 { + inner.prefs.get().foundry_local_asr_keep_loaded_secs +} + +#[cfg(target_os = "windows")] +fn foundry_release_session_is_current(inner: &Arc, session_id: u64) -> bool { + inner.state.lock().session_id == session_id +} + +#[cfg(target_os = "windows")] +fn schedule_foundry_local_asr_release(inner: &Arc, session_id: u64) { + let keep_secs = foundry_local_asr_release_keep_secs(inner); + let runtime = Arc::clone(&inner.foundry_local_runtime); + let inner = Arc::clone(inner); + tauri::async_runtime::spawn(async move { + if keep_secs > 0 { + tokio::time::sleep(std::time::Duration::from_secs(keep_secs as u64)).await; + } + if !foundry_release_session_is_current(&inner, session_id) { + return; + } + if let Err(error) = runtime.release_now().await { + log::warn!("[foundry-asr] scheduled release failed: {error:#}"); + } + }); +} + #[cfg(target_os = "macos")] async fn build_local_qwen3( inner: &Arc, @@ -3858,6 +4047,90 @@ mod tests { assert!(!window_key_matches_trigger(HotkeyTrigger::Fn, "Fn", "Fn")); } + #[test] + fn foundry_local_provider_is_keyless_and_not_whisper_compatible() { + #[cfg(target_os = "windows")] + assert!(is_keyless_local_asr_provider( + crate::asr::local::foundry::PROVIDER_ID + )); + #[cfg(not(target_os = "windows"))] + assert!(!is_keyless_local_asr_provider( + crate::asr::local::foundry::PROVIDER_ID + )); + assert!(!is_whisper_compatible_provider( + crate::asr::local::foundry::PROVIDER_ID + )); + } + + #[cfg(target_os = "windows")] + #[test] + fn coordinator_shares_app_foundry_runtime() { + let runtime = Arc::new(crate::asr::local::FoundryLocalRuntime::new()); + let coordinator = Coordinator::new_with_foundry_runtime(Arc::clone(&runtime)); + + assert!(Arc::ptr_eq( + &runtime, + &coordinator.inner.foundry_local_runtime + )); + } + + #[cfg(target_os = "windows")] + #[test] + fn foundry_transcribe_skips_global_timeout_for_first_run_provisioning() { + let provider = Arc::new(crate::asr::local::FoundryLocalWhisperAsr::new( + Arc::new(crate::asr::local::FoundryLocalRuntime::new()), + crate::asr::local::foundry::DEFAULT_MODEL_ALIAS.to_string(), + None, + )); + let active_asr = ActiveAsr::FoundryLocalWhisper(provider); + + assert!(!asr_transcribe_uses_global_timeout(&active_asr)); + } + + #[cfg(target_os = "windows")] + #[test] + fn foundry_audio_transcribe_timeout_is_separate_from_prepare() { + let timeout = foundry_audio_transcribe_timeout_duration(); + + assert_eq!( + timeout, + std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS) + ); + } + + #[cfg(target_os = "windows")] + #[test] + fn foundry_release_uses_foundry_keep_loaded_preference() { + let runtime = Arc::new(crate::asr::local::FoundryLocalRuntime::new()); + let coordinator = Coordinator::new_with_foundry_runtime(runtime); + let mut prefs = coordinator.inner.prefs.get(); + prefs.local_asr_keep_loaded_secs = 3; + prefs.foundry_local_asr_keep_loaded_secs = 7; + coordinator.inner.prefs.set(prefs).unwrap(); + + assert_eq!(foundry_local_asr_release_keep_secs(&coordinator.inner), 7); + } + + #[cfg(target_os = "windows")] + #[test] + fn foundry_release_guard_rejects_stale_session() { + let runtime = Arc::new(crate::asr::local::FoundryLocalRuntime::new()); + let coordinator = Coordinator::new_with_foundry_runtime(runtime); + let old_session_id = coordinator.inner.state.lock().session_id; + + assert!(foundry_release_session_is_current( + &coordinator.inner, + old_session_id + )); + + coordinator.inner.state.lock().session_id = old_session_id.wrapping_add(1); + + assert!(!foundry_release_session_is_current( + &coordinator.inner, + old_session_id + )); + } + #[test] fn resolve_ark_endpoint_rejects_blank_key_without_custom_endpoint() { assert_eq!( @@ -3981,6 +4254,7 @@ mod tests { hotkey: crate::types::HotkeyBinding { trigger: HotkeyTrigger::RightControl, mode: HotkeyMode::Hold, + keys: None, }, ..Default::default() }) @@ -4186,6 +4460,11 @@ const CAPSULE_AUTO_HIDE_DELAY_MS: u64 = 2000; /// 只在 ASR 超时机制失效时作为最后的防线触发。 const COORDINATOR_GLOBAL_TIMEOUT_SECS: u64 = 15; +#[cfg(target_os = "windows")] +fn foundry_audio_transcribe_timeout_duration() -> std::time::Duration { + std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS) +} + /// begin_session 中各 await 之间的 cancel race 检查结果。 enum BeginOutcome { /// 启动 continuation 属于旧 session;不能改动当前 session 状态。 diff --git a/openless-all/app/src-tauri/src/lib.rs b/openless-all/app/src-tauri/src/lib.rs index 96dcb30f..3d79a17d 100644 --- a/openless-all/app/src-tauri/src/lib.rs +++ b/openless-all/app/src-tauri/src/lib.rs @@ -51,6 +51,12 @@ use tauri::{AppHandle, Emitter, LogicalPosition, LogicalSize, Manager, RunEvent, #[cfg_attr(mobile, tauri::mobile_entry_point)] pub fn run() { + let foundry_local_runtime = Arc::new(asr::local::FoundryLocalRuntime::new()); + #[cfg(target_os = "windows")] + let coordinator = Arc::new(coordinator::Coordinator::new_with_foundry_runtime( + Arc::clone(&foundry_local_runtime), + )); + #[cfg(not(target_os = "windows"))] let coordinator = Arc::new(coordinator::Coordinator::new()); let local_asr_download_manager = Arc::new(asr::local::DownloadManager::new()); @@ -76,6 +82,7 @@ pub fn run() { )) .manage(coordinator.clone()) .manage(local_asr_download_manager.clone()) + .manage(foundry_local_runtime.clone()) .manage(commands::MicrophoneMonitorState::new(None)) .manage(commands::TrayMicrophoneMenuState::new(Vec::new())) .setup(move |app| { @@ -277,6 +284,13 @@ pub fn run() { commands::local_asr_release_engine, commands::local_asr_preload, commands::local_asr_set_keep_loaded_secs, + commands::foundry_local_asr_status, + commands::foundry_local_asr_catalog, + commands::foundry_local_asr_set_model, + commands::foundry_local_asr_set_language_hint, + commands::foundry_local_asr_prepare, + commands::foundry_local_asr_cancel_prepare, + commands::foundry_local_asr_release, commands::export_error_log, restart_app, ]) diff --git a/openless-all/app/src-tauri/src/persistence.rs b/openless-all/app/src-tauri/src/persistence.rs index a4b41ac1..f559b4a7 100644 --- a/openless-all/app/src-tauri/src/persistence.rs +++ b/openless-all/app/src-tauri/src/persistence.rs @@ -171,7 +171,14 @@ impl Default for CredsActive { } fn creds_default_asr() -> String { - "volcengine".into() + #[cfg(target_os = "windows")] + { + return crate::asr::local::foundry::PROVIDER_ID.into(); + } + #[cfg(not(target_os = "windows"))] + { + "volcengine".into() + } } fn creds_default_llm() -> String { "ark".into() diff --git a/openless-all/app/src-tauri/src/types.rs b/openless-all/app/src-tauri/src/types.rs index f6c0e83d..5c4b8ca3 100644 --- a/openless-all/app/src-tauri/src/types.rs +++ b/openless-all/app/src-tauri/src/types.rs @@ -186,6 +186,15 @@ pub struct UserPreferences { /// 默认 300(5 分钟):兼顾连续听写不重加载、长时间不用释放 1.2GB+ RAM。 #[serde(default = "default_local_asr_keep_loaded_secs")] pub local_asr_keep_loaded_secs: u32, + /// Windows Foundry Local Whisper 当前激活的模型 alias。 + #[serde(default = "default_foundry_local_asr_model")] + pub foundry_local_asr_model: String, + /// Windows Foundry Local Whisper 语言 hint。空字符串 = 自动检测。 + #[serde(default)] + pub foundry_local_asr_language_hint: String, + /// Windows Foundry Local Whisper 模型在 runtime 中保持加载多久。 + #[serde(default = "default_local_asr_keep_loaded_secs")] + pub foundry_local_asr_keep_loaded_secs: u32, } fn default_local_asr_model() -> String { @@ -200,6 +209,21 @@ fn default_local_asr_keep_loaded_secs() -> u32 { 300 } +fn default_foundry_local_asr_model() -> String { + crate::asr::local::foundry::DEFAULT_MODEL_ALIAS.into() +} + +fn default_active_asr_provider() -> String { + #[cfg(target_os = "windows")] + { + return crate::asr::local::foundry::PROVIDER_ID.into(); + } + #[cfg(not(target_os = "windows"))] + { + "volcengine".into() + } +} + #[derive(Debug, Clone, Deserialize)] #[serde(default, rename_all = "camelCase")] struct UserPreferencesWire { @@ -234,6 +258,12 @@ struct UserPreferencesWire { local_asr_mirror: String, #[serde(default = "default_local_asr_keep_loaded_secs")] local_asr_keep_loaded_secs: u32, + #[serde(default = "default_foundry_local_asr_model")] + foundry_local_asr_model: String, + #[serde(default)] + foundry_local_asr_language_hint: String, + #[serde(default = "default_local_asr_keep_loaded_secs")] + foundry_local_asr_keep_loaded_secs: u32, } impl Default for UserPreferencesWire { @@ -265,6 +295,9 @@ impl Default for UserPreferencesWire { local_asr_active_model: prefs.local_asr_active_model, local_asr_mirror: prefs.local_asr_mirror, local_asr_keep_loaded_secs: prefs.local_asr_keep_loaded_secs, + foundry_local_asr_model: prefs.foundry_local_asr_model, + foundry_local_asr_language_hint: prefs.foundry_local_asr_language_hint, + foundry_local_asr_keep_loaded_secs: prefs.foundry_local_asr_keep_loaded_secs, } } } @@ -310,6 +343,9 @@ impl<'de> Deserialize<'de> for UserPreferences { local_asr_active_model: wire.local_asr_active_model, local_asr_mirror: wire.local_asr_mirror, local_asr_keep_loaded_secs: wire.local_asr_keep_loaded_secs, + foundry_local_asr_model: wire.foundry_local_asr_model, + foundry_local_asr_language_hint: wire.foundry_local_asr_language_hint, + foundry_local_asr_keep_loaded_secs: wire.foundry_local_asr_keep_loaded_secs, }) } } @@ -394,7 +430,7 @@ impl Default for UserPreferences { show_capsule: true, mute_during_recording: false, microphone_device_name: String::new(), - active_asr_provider: "volcengine".into(), + active_asr_provider: default_active_asr_provider(), active_llm_provider: "ark".into(), restore_clipboard_after_paste: true, allow_non_tsf_insertion_fallback: true, @@ -411,6 +447,9 @@ impl Default for UserPreferences { local_asr_active_model: default_local_asr_model(), local_asr_mirror: default_local_asr_mirror(), local_asr_keep_loaded_secs: default_local_asr_keep_loaded_secs(), + foundry_local_asr_model: default_foundry_local_asr_model(), + foundry_local_asr_language_hint: String::new(), + foundry_local_asr_keep_loaded_secs: default_local_asr_keep_loaded_secs(), } } } @@ -608,6 +647,7 @@ impl HotkeyTrigger { pub enum HotkeyMode { Toggle, Hold, + DoubleClick, } #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] @@ -628,11 +668,140 @@ impl HotkeyAdapterKind { } } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "camelCase")] +pub struct HotkeyKey { + pub code: String, +} + +impl HotkeyKey { + pub fn new(code: impl Into) -> Self { + Self { code: code.into() } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(default, rename_all = "camelCase")] pub struct HotkeyBinding { pub trigger: HotkeyTrigger, pub mode: HotkeyMode, + pub keys: Option>, +} + +impl HotkeyBinding { + pub fn effective_codes(&self) -> Vec { + let Some(keys) = &self.keys else { + let code = legacy_trigger_code(self.trigger); + return if code.is_empty() { + Vec::new() + } else { + vec![code.to_string()] + }; + }; + keys.iter() + .map(|key| key.code.trim().to_string()) + .filter(|code| !code.is_empty()) + .collect() + } + + pub fn display_label(&self) -> String { + let codes = self.effective_codes(); + if codes.is_empty() { + return "未设置".to_string(); + } + codes + .iter() + .map(|code| display_hotkey_code(code)) + .collect::>() + .join("+") + } +} + +fn legacy_trigger_code(trigger: HotkeyTrigger) -> &'static str { + match trigger { + HotkeyTrigger::RightOption | HotkeyTrigger::RightAlt => "AltRight", + HotkeyTrigger::LeftOption => "AltLeft", + HotkeyTrigger::RightControl => "ControlRight", + HotkeyTrigger::LeftControl => "ControlLeft", + HotkeyTrigger::RightCommand => "MetaRight", + #[cfg(target_os = "windows")] + HotkeyTrigger::Fn => "ControlRight", + #[cfg(not(target_os = "windows"))] + HotkeyTrigger::Fn => "Fn", + HotkeyTrigger::Custom => "", + } +} + +fn display_hotkey_code(code: &str) -> String { + let label = match code { + "ControlLeft" => "左Ctrl", + "ControlRight" => "右 Control", + "AltLeft" => "左Alt", + "AltRight" => "右Alt", + "ShiftLeft" => "左Shift", + "ShiftRight" => "右Shift", + "MetaLeft" | "OSLeft" => "左Win", + "MetaRight" | "OSRight" => "右Win", + "Fn" => "Fn", + "FnLock" => "FnLock", + "CapsLock" => "CapsLock", + "ScrollLock" => "ScrLock", + "Pause" => "Pause", + "PrintScreen" => "PrtSc", + "Backspace" => "Backspace", + "Tab" => "Tab", + "Enter" => "Enter", + "Space" => "Space", + "Insert" => "Insert", + "Delete" => "Delete", + "Home" => "Home", + "End" => "End", + "PageUp" => "PageUp", + "PageDown" => "PageDown", + "ArrowUp" => "Up", + "ArrowDown" => "Down", + "ArrowLeft" => "Left", + "ArrowRight" => "Right", + "NumpadAdd" => "Num+", + "NumpadSubtract" => "Num-", + "NumpadMultiply" => "Num*", + "NumpadDivide" => "Num/", + "NumpadDecimal" => "Num.", + "NumpadEnter" => "NumEnter", + "Mouse4" => "Mouse4", + "Mouse5" => "Mouse5", + "Backquote" => "`", + "Minus" => "-", + "Equal" => "=", + "BracketLeft" => "[", + "BracketRight" => "]", + "Backslash" => "\\", + "Semicolon" => ";", + "Quote" => "'", + "Comma" => ",", + "Period" => ".", + "Slash" => "/", + _ => "", + }; + if !label.is_empty() { + return label.to_string(); + } + if let Some(letter) = code.strip_prefix("Key") { + if letter.len() == 1 { + return letter.to_string(); + } + } + if let Some(digit) = code.strip_prefix("Digit") { + if digit.len() == 1 { + return digit.to_string(); + } + } + if let Some(num) = code.strip_prefix("Numpad") { + if num.len() == 1 && num.as_bytes()[0].is_ascii_digit() { + return format!("Num{num}"); + } + } + code.to_string() } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] @@ -686,7 +855,7 @@ impl HotkeyCapability { supports_side_specific_modifiers: true, explicit_fallback_available: false, status_hint: Some( - "默认建议使用“右 Control + 切换式说话”;若更习惯按住说话,可在录音设置里切回。若无响应,可在权限页查看 hook 安装状态。" + "默认建议使用“右Ctrl + 单击”;若更习惯按住说话,可在录音设置里切回“按住”。若无响应,可在权限页查看 hook 安装状态。" .into(), ), }; @@ -780,6 +949,7 @@ impl Default for HotkeyBinding { Self { trigger: HotkeyTrigger::RightControl, mode: HotkeyMode::Toggle, + keys: Some(vec![HotkeyKey::new("ControlRight")]), } } @@ -788,6 +958,7 @@ impl Default for HotkeyBinding { Self { trigger: HotkeyTrigger::RightOption, mode: HotkeyMode::Toggle, + keys: Some(vec![HotkeyKey::new("AltRight")]), } } } @@ -908,4 +1079,57 @@ mod tests { assert_eq!(prefs.dictation_hotkey.primary, "Space"); assert_eq!(prefs.dictation_hotkey.modifiers, vec!["ctrl"]); } + + #[test] + fn legacy_hotkey_trigger_still_produces_effective_key_codes() { + let binding: HotkeyBinding = + serde_json::from_str(r#"{"trigger":"rightControl","mode":"toggle"}"#).unwrap(); + + assert_eq!(binding.effective_codes(), vec!["ControlRight".to_string()]); + assert_eq!(binding.display_label(), "右 Control"); + } + + #[cfg(target_os = "windows")] + #[test] + fn legacy_fn_trigger_uses_windows_control_right_alias() { + let binding: HotkeyBinding = + serde_json::from_str(r#"{"trigger":"fn","mode":"toggle"}"#).unwrap(); + + assert_eq!(binding.effective_codes(), vec!["ControlRight".to_string()]); + } + + #[test] + fn hotkey_binding_supports_combo_side_keys_mouse_and_double_click_mode() { + let binding = HotkeyBinding { + trigger: HotkeyTrigger::RightControl, + mode: HotkeyMode::DoubleClick, + keys: Some(vec![ + HotkeyKey::new("ControlLeft"), + HotkeyKey::new("AltLeft"), + HotkeyKey::new("Mouse4"), + ]), + }; + + assert_eq!( + binding.effective_codes(), + vec![ + "ControlLeft".to_string(), + "AltLeft".to_string(), + "Mouse4".to_string() + ] + ); + assert_eq!(binding.display_label(), "左Ctrl+左Alt+Mouse4"); + + let json = serde_json::to_value(&binding).unwrap(); + assert_eq!(json["mode"], "doubleClick"); + } + + #[test] + fn explicit_empty_hotkey_keys_clear_the_binding() { + let binding: HotkeyBinding = + serde_json::from_str(r#"{"trigger":"rightControl","mode":"toggle","keys":[]}"#) + .unwrap(); + + assert!(binding.effective_codes().is_empty()); + } } diff --git a/openless-all/app/src/App.tsx b/openless-all/app/src/App.tsx index d047bf72..37b82aed 100644 --- a/openless-all/app/src/App.tsx +++ b/openless-all/app/src/App.tsx @@ -10,6 +10,10 @@ import { handleWindowHotkeyEvent, isTauri, } from './lib/ipc'; +import { + isWindowHotkeyKeyboardCandidate, + windowMouseHotkeyCode, +} from './lib/windowHotkeyFallback'; import { QaPanel } from './pages/QaPanel'; import { HotkeySettingsProvider } from './state/HotkeySettingsContext'; @@ -110,7 +114,7 @@ export function App({ isCapsule, isQa }: AppProps) { useEffect(() => { if (!isTauri || os !== 'win') return; const forwardKey = (event: KeyboardEvent) => { - if (!isWindowHotkeyCandidate(event)) return; + if (!isWindowHotkeyKeyboardCandidate(event)) return; void handleWindowHotkeyEvent( event.type as 'keydown' | 'keyup', event.key, @@ -118,11 +122,25 @@ export function App({ isCapsule, isQa }: AppProps) { event.repeat, ).catch(error => console.warn('[window-hotkey] forward failed', error)); }; + const forwardMouse = (event: MouseEvent) => { + const code = windowMouseHotkeyCode(event.button); + if (!code) return; + void handleWindowHotkeyEvent( + event.type === 'mousedown' ? 'keydown' : 'keyup', + code, + code, + false, + ).catch(error => console.warn('[window-hotkey] mouse forward failed', error)); + }; window.addEventListener('keydown', forwardKey, true); window.addEventListener('keyup', forwardKey, true); + window.addEventListener('mousedown', forwardMouse, true); + window.addEventListener('mouseup', forwardMouse, true); return () => { window.removeEventListener('keydown', forwardKey, true); window.removeEventListener('keyup', forwardKey, true); + window.removeEventListener('mousedown', forwardMouse, true); + window.removeEventListener('mouseup', forwardMouse, true); }; }, [os]); @@ -136,16 +154,6 @@ export function App({ isCapsule, isQa }: AppProps) { ); } -function isWindowHotkeyCandidate(event: KeyboardEvent): boolean { - return ( - event.key === 'Escape' || - event.code === 'ControlRight' || - event.code === 'ControlLeft' || - event.code === 'AltRight' || - event.code === 'MetaRight' - ); -} - function StartupShell() { // 用透明背景:main window 是 transparent + macOSPrivateApi(NSVisualEffectView 磨砂)。 // 之前用 linear-gradient(rgba(245,245,247,0.96)...) 会盖过 macOS vibrancy,启动时 diff --git a/openless-all/app/src/i18n/en.ts b/openless-all/app/src/i18n/en.ts index 0c22e0c4..2a8b4153 100644 --- a/openless-all/app/src/i18n/en.ts +++ b/openless-all/app/src/i18n/en.ts @@ -323,6 +323,7 @@ export const en: typeof zhCN = { asrZhipu: 'Zhipu GLM-ASR', asrGroq: 'Groq Whisper-large-v3', asrWhisper: 'OpenAI Whisper (compatible)', + asrFoundryLocalWhisper: 'Local Whisper (Foundry Local)', asrLocalQwen3: 'Local Qwen3-ASR', }, volcengineAppKeyLabel: 'APP ID', @@ -330,6 +331,7 @@ export const en: typeof zhCN = { volcengineResourceIdLabel: 'Resource ID', volcengineMappingNote: 'Secret Key is not required right now. Resource ID defaults to volc.bigasr.sauc.duration.', localAsrHint: 'Local Qwen3-ASR runs entirely on this machine. No API key needed — just download the model from HuggingFace.', + foundryLocalAsrHint: 'Windows local Whisper runs on this device and does not need an ASR API key. First use downloads Foundry Local runtime components and a Whisper model; LLM polishing still uses your configured LLM provider.', localAsrPerformanceWarning: 'Local inference runs on CPU + Apple Silicon Accelerate; each transcription takes **several seconds longer than cloud ASR**, and Chinese / dialect accuracy is **typically lower** than Volcengine or Whisper turbo. Use it for offline, privacy-sensitive, or no-cloud-API scenarios.', localAsrReady: '{{model}} downloaded', localAsrNotReady: '{{model}} not downloaded', @@ -558,14 +560,53 @@ export const en: typeof zhCN = { localAsr: { kicker: 'LOCAL ASR', title: 'Models', - desc: 'Local Qwen3-ASR engine and model manager. Models download from HuggingFace and run fully offline. Streaming on Windows is tracked in repo issues.', - engineUnavailable: 'The local engine is not bundled on this platform yet (macOS only; Windows tracked in issue #256). You can still download models, but they cannot be activated here.', + desc: 'Manage on-device ASR models. Windows can use Microsoft Foundry Local Whisper; Qwen3-ASR model management stays separate.', + qwenTitle: 'Qwen3-ASR model manager', + engineUnavailable: 'The Qwen3-ASR inference engine is not bundled on this platform. You can still download models, but Qwen3-ASR cannot be activated here yet.', + foundryTitle: 'Windows Foundry Local Whisper', + foundryDesc: 'Windows uses Microsoft Foundry Local Whisper to recognize speech on this device with no ASR API key. First prepare downloads local runtime components and a model, then loads it. LLM polishing still uses your configured LLM provider; if none is configured, the existing raw transcript fallback still applies.', + foundryAvailable: 'Runtime available', + foundryUnavailable: 'Runtime unavailable', + foundrySelectedModel: 'Selected model', + foundryActiveModel: 'Current default alias', + foundryLoadedModel: 'Loaded model', + foundryNotLoaded: 'Not loaded', + foundryError: 'Foundry status', + foundrySetDefault: 'Set default / Enable Windows local ASR', + foundryEnabling: 'Enabling…', + foundryPrepare: 'Prepare / Download / Load', + foundryPreparing: 'Preparing…', + foundryReleasing: 'Releasing…', + foundryRetryPrepare: 'Continue / Retry prepare', + foundryCancelPrepare: 'Cancel prepare', + foundryCancelRequested: 'Cancel requested', + foundryCancelling: 'Cancelling…', + foundryCancelBestEffort: 'The Foundry SDK does not expose a download cancel token yet. OpenLess has requested cancellation and will stop the next load step after the current SDK step returns. You can continue / retry prepare later.', + foundryPrepareRuntime: 'Prepare runtime components', + foundryPrepareModel: 'Download model', + foundryPrepareLoad: 'Load model', + foundryPrepareModelSkipped: 'Model already downloaded; download skipped', + foundryPrepareDone: 'Done', + foundryPrepareWaiting: 'Waiting', + foundryApproxSizeMb: 'about {{mb}} MB', + foundryLanguageLabel: 'Recognition language', + foundryLanguageAuto: 'Auto', + foundryLanguageZh: 'Chinese zh', + foundryLanguageEn: 'English en', + foundryLanguageDesc: 'For Chinese dictation, choose Chinese. For mixed Chinese and English, try Auto first; choose Chinese if Chinese speech is recognized as English.', + foundryModelSmall: 'Whisper Small (default / balanced)', + foundryModelSmallDesc: 'Default balanced option for quality and resource use.', + foundryModelBase: 'Whisper Base (faster / lower resource)', + foundryModelBaseDesc: 'Faster with lower resource use for lightweight daily dictation.', + foundryModelTiny: 'Whisper Tiny (fastest / smoke test)', + foundryModelTinyDesc: 'Fastest check option for confirming the Foundry path works.', mirrorLabel: 'Download mirror', mirrorDesc: 'huggingface.co is the official source; hf-mirror.com is a community mirror friendlier to Mainland China networks.', mirrorHuggingface: 'HuggingFace official (huggingface.co)', mirrorHfMirror: 'Mainland mirror (hf-mirror.com)', activeBadge: 'In use', downloadedBadge: 'Downloaded', + notDownloadedBadge: 'Not downloaded', download: 'Download', resume: 'Resume', cancel: 'Cancel', @@ -576,7 +617,7 @@ export const en: typeof zhCN = { files: 'files', sizeLoading: 'Fetching size…', sizeUnknown: 'Size unknown', - performanceWarning: 'Local inference runs on CPU + Apple Silicon Accelerate. **First transcription loads the model (a few seconds)**, and each subsequent one is several seconds slower than cloud ASR. Chinese / dialect accuracy is typically lower than Volcengine / Whisper turbo. Best for offline, privacy-sensitive, or no-cloud-API scenarios.', + performanceWarning: 'Local ASR is best for offline, privacy-sensitive, or no-cloud-ASR-API scenarios. First use may take time because runtime and model downloads plus loading all happen locally.', test: 'Load & Test', testRunning: 'Testing…', testHeading: 'Built-in audio test', @@ -585,12 +626,12 @@ export const en: typeof zhCN = { testStats: 'Audio {{audio}}s · Load {{load}}s · Transcribe {{transcribe}}s · Backend {{backend}}', testFailed: 'Test failed', engineStatusLabel: 'Engine in memory', - engineLoaded: 'Loaded: {{model}} (~1.2-3.4 GB RAM)', - engineUnloaded: 'Not loaded (first transcription will load it, ~3-5 s)', + engineLoaded: 'Loaded: {{model}}', + engineUnloaded: 'Not loaded (first transcription must load the model)', loadNow: 'Load now', releaseNow: 'Release now', keepLoadedLabel: 'Keep loaded for', - keepLoadedDesc: 'How long the engine stays in memory after the last use, before being freed.', + keepLoadedDesc: 'How long Qwen3-ASR stays in memory after the last use, before being freed.', keepImmediate: 'Release immediately', keep1min: '1 minute after last use', keep5min: '5 minutes after last use (default)', diff --git a/openless-all/app/src/i18n/ja.ts b/openless-all/app/src/i18n/ja.ts index 45c8f8f8..93852631 100644 --- a/openless-all/app/src/i18n/ja.ts +++ b/openless-all/app/src/i18n/ja.ts @@ -325,6 +325,7 @@ export const ja: typeof zhCN = { asrZhipu: 'Zhipu GLM-ASR', asrGroq: 'Groq Whisper-large-v3', asrWhisper: 'OpenAI Whisper(互換)', + asrFoundryLocalWhisper: 'ローカル Whisper(Foundry Local)', asrLocalQwen3: 'ローカル Qwen3-ASR', }, volcengineAppKeyLabel: 'APP ID', @@ -332,6 +333,7 @@ export const ja: typeof zhCN = { volcengineResourceIdLabel: 'Resource ID', volcengineMappingNote: 'Secret Key は現在不要。Resource ID のデフォルトは volc.bigasr.sauc.duration。', localAsrHint: 'ローカル Qwen3-ASR は本機で実行されるため API Key 不要。HuggingFace からモデルをダウンロードすればすぐに利用できます。', + foundryLocalAsrHint: 'Windows ローカル Whisper は本機で実行され、ASR API Key は不要です。初回使用時に Foundry Local ランタイムコンポーネントと Whisper モデルをダウンロードします。LLM 整文は引き続き設定済みの LLM プロバイダーを使用します。', localAsrPerformanceWarning: 'ローカル推論は CPU + Apple Silicon Accelerate で動作するため、1 回の転写時間は **クラウド ASR より数秒長くなります**。中国語認識精度や方言/訛り対応も **通常は** Volcengine / Whisper turbo に劣ります。ネットワーク制限下またはプライバシー重視の場合に選択してください。', localAsrReady: '{{model}} ダウンロード済み', localAsrNotReady: '{{model}} 未ダウンロード', @@ -544,7 +546,7 @@ export const ja: typeof zhCN = { rightCommand: '右 Command', fn: 'Fn (地球キー)', rightAlt: '右 Alt', - custom: 'カスタム', + custom: 'カスタム組み合わせ…', }, fallback: 'グローバルショートカット', modeHoldSuffix: '(押し続けて話す)', @@ -560,14 +562,53 @@ export const ja: typeof zhCN = { localAsr: { kicker: 'ローカル ASR', title: 'モデル設定', - desc: 'ローカル Qwen3-ASR エンジンとモデル管理。HuggingFace から本機にモデルをダウンロードすれば、ネット接続なしで認識可能。Windows のストリーミング推論は issue を参照。', - engineUnavailable: '現在のプラットフォームではローカル推論エンジンが未統合(macOS のみ対応、Windows は issue #256 で追跡)。モデルのダウンロードは可能ですが、有効化はできません。', + desc: '本機の ASR モデルを管理します。Windows では Microsoft Foundry Local Whisper を使用でき、Qwen3-ASR のモデル管理は独立しています。', + qwenTitle: 'Qwen3-ASR モデル管理', + engineUnavailable: '現在のプラットフォームには Qwen3-ASR 推論エンジンが同梱されていません。モデルのダウンロードは可能ですが、ここではまだ Qwen3-ASR を有効化できません。', + foundryTitle: 'Windows Foundry Local Whisper', + foundryDesc: 'Windows では Microsoft Foundry Local Whisper が本機上で音声を認識し、ASR API Key は不要です。初回準備ではローカル実行コンポーネントとモデルをダウンロードして読み込みます。LLM 整文は設定済みの LLM プロバイダーを引き続き使用し、未設定の場合は従来どおり生の転写結果にフォールバックします。', + foundryAvailable: 'ランタイム利用可能', + foundryUnavailable: 'ランタイム利用不可', + foundrySelectedModel: '選択中のモデル', + foundryActiveModel: '現在の既定 alias', + foundryLoadedModel: '読み込み済みモデル', + foundryNotLoaded: '未読み込み', + foundryError: 'Foundry 状態', + foundrySetDefault: '既定に設定 / Windows ローカル ASR を有効化', + foundryEnabling: '有効化中…', + foundryPrepare: '準備 / ダウンロード / 読み込み', + foundryPreparing: '準備中…', + foundryReleasing: '解放中…', + foundryRetryPrepare: '準備を続行 / 再試行', + foundryCancelPrepare: '準備をキャンセル', + foundryCancelRequested: 'キャンセル要求済み', + foundryCancelling: 'キャンセル中…', + foundryCancelBestEffort: 'Foundry SDK は現在ダウンロード用のキャンセルトークンを公開していません。OpenLess はキャンセルを要求済みで、現在の SDK ステップが戻った後に次の読み込みステップを停止します。後で準備の続行 / 再試行ができます。', + foundryPrepareRuntime: 'ランタイムコンポーネントを準備', + foundryPrepareModel: 'モデルをダウンロード', + foundryPrepareLoad: 'モデルを読み込み', + foundryPrepareModelSkipped: 'モデルはダウンロード済みのため、ダウンロードをスキップ', + foundryPrepareDone: '完了', + foundryPrepareWaiting: '待機中', + foundryApproxSizeMb: '約 {{mb}} MB', + foundryLanguageLabel: '認識言語', + foundryLanguageAuto: '自動', + foundryLanguageZh: '中国語 zh', + foundryLanguageEn: '英語 en', + foundryLanguageDesc: '中国語のディクテーションでは中国語を推奨します。中英混在ではまず自動を試し、中国語が英語として認識される場合は中国語を選んでください。', + foundryModelSmall: 'Whisper Small(既定 / バランス)', + foundryModelSmallDesc: '品質とリソース使用量のバランスを取った既定オプション。', + foundryModelBase: 'Whisper Base(高速 / 低リソース)', + foundryModelBaseDesc: 'より高速でリソース消費が少なく、日常の軽量ディクテーションに適しています。', + foundryModelTiny: 'Whisper Tiny(最速 / スモークテスト)', + foundryModelTinyDesc: 'Foundry 経路が動作するか確認するための最速オプション。', mirrorLabel: 'ダウンロードミラー', mirrorDesc: '公式ソースは海外ネットワークで安定。hf-mirror.com は中国コミュニティ運営のミラー。', mirrorHuggingface: 'HuggingFace 公式 (huggingface.co)', mirrorHfMirror: '中国ミラー (hf-mirror.com)', activeBadge: '使用中', downloadedBadge: 'ダウンロード済み', + notDownloadedBadge: '未ダウンロード', download: 'ダウンロード', resume: '続行', cancel: 'キャンセル', diff --git a/openless-all/app/src/i18n/ko.ts b/openless-all/app/src/i18n/ko.ts index b7003c37..2bbc9628 100644 --- a/openless-all/app/src/i18n/ko.ts +++ b/openless-all/app/src/i18n/ko.ts @@ -325,6 +325,7 @@ export const ko: typeof zhCN = { asrZhipu: 'Zhipu GLM-ASR', asrGroq: 'Groq Whisper-large-v3', asrWhisper: 'OpenAI Whisper(호환)', + asrFoundryLocalWhisper: '로컬 Whisper(Foundry Local)', asrLocalQwen3: '로컬 Qwen3-ASR', }, volcengineAppKeyLabel: 'APP ID', @@ -332,6 +333,7 @@ export const ko: typeof zhCN = { volcengineResourceIdLabel: 'Resource ID', volcengineMappingNote: 'Secret Key 는 현재 입력 불필요. Resource ID 기본값은 volc.bigasr.sauc.duration.', localAsrHint: '로컬 Qwen3-ASR 은 본 기기에서 실행되며 API Key 가 필요 없습니다. HuggingFace 에서 모델을 로컬로 다운로드하면 즉시 사용 가능합니다.', + foundryLocalAsrHint: 'Windows 로컬 Whisper 는 이 기기에서 실행되며 ASR API Key 가 필요 없습니다. 첫 사용 시 Foundry Local 런타임 구성 요소와 Whisper 모델을 다운로드합니다. LLM 정리는 계속 설정된 LLM 공급자를 사용합니다.', localAsrPerformanceWarning: '로컬 추론은 CPU + Apple Silicon Accelerate 에서 동작하므로, 한 번의 전사 시간이 **클라우드 ASR 보다 몇 초 더 걸립니다**. 중국어 인식 정확도와 방언/억양 대응도 **일반적으로** Volcengine / Whisper turbo 에 미치지 못합니다. 네트워크 제한 또는 프라이버시가 중요한 경우에 사용하세요.', localAsrReady: '{{model}} 다운로드됨', localAsrNotReady: '{{model}} 다운로드되지 않음', @@ -544,7 +546,7 @@ export const ko: typeof zhCN = { rightCommand: '오른쪽 Command', fn: 'Fn (지구본 키)', rightAlt: '오른쪽 Alt', - custom: '사용자 정의', + custom: '사용자 지정 조합…', }, fallback: '전역 단축키', modeHoldSuffix: '(눌러서 말하기)', @@ -560,14 +562,53 @@ export const ko: typeof zhCN = { localAsr: { kicker: '로컬 ASR', title: '모델 설정', - desc: '로컬 Qwen3-ASR 엔진과 모델 관리. HuggingFace 에서 본 기기로 모델을 다운로드하면 네트워크 없이 인식 가능. Windows 의 스트리밍 추론은 저장소 issue 참조.', - engineUnavailable: '현재 플랫폼은 로컬 추론 엔진이 통합되지 않았습니다(macOS 만 지원, Windows 는 issue #256 추적). 모델은 다운로드 가능하지만 일시적으로 활성화할 수 없습니다.', + desc: '기기 내 ASR 모델을 관리합니다. Windows 에서는 Microsoft Foundry Local Whisper 를 사용할 수 있으며, Qwen3-ASR 모델 관리는 별도로 유지됩니다.', + qwenTitle: 'Qwen3-ASR 모델 관리', + engineUnavailable: '현재 플랫폼에는 Qwen3-ASR 추론 엔진이 포함되어 있지 않습니다. 모델은 다운로드할 수 있지만 여기서는 아직 Qwen3-ASR 을 활성화할 수 없습니다.', + foundryTitle: 'Windows Foundry Local Whisper', + foundryDesc: 'Windows 는 Microsoft Foundry Local Whisper 로 이 기기에서 음성을 인식하며 ASR API Key 가 필요 없습니다. 첫 준비 시 로컬 런타임 구성 요소와 모델을 다운로드한 뒤 로드합니다. LLM 정리는 계속 설정된 LLM 공급자를 사용하며, 설정되지 않은 경우 기존 원문 전사 폴백을 그대로 사용합니다.', + foundryAvailable: '런타임 사용 가능', + foundryUnavailable: '런타임 사용 불가', + foundrySelectedModel: '선택한 모델', + foundryActiveModel: '현재 기본 alias', + foundryLoadedModel: '로드된 모델', + foundryNotLoaded: '로드되지 않음', + foundryError: 'Foundry 상태', + foundrySetDefault: '기본값으로 설정 / Windows 로컬 ASR 활성화', + foundryEnabling: '활성화 중…', + foundryPrepare: '준비 / 다운로드 / 로드', + foundryPreparing: '준비 중…', + foundryReleasing: '해제 중…', + foundryRetryPrepare: '준비 계속 / 다시 시도', + foundryCancelPrepare: '준비 취소', + foundryCancelRequested: '취소 요청됨', + foundryCancelling: '취소 중…', + foundryCancelBestEffort: 'Foundry SDK 는 현재 다운로드 취소 토큰을 공개하지 않습니다. OpenLess 는 취소를 요청했으며 현재 SDK 단계가 반환된 뒤 다음 로드 단계를 중지합니다. 나중에 준비를 계속하거나 다시 시도할 수 있습니다.', + foundryPrepareRuntime: '런타임 구성 요소 준비', + foundryPrepareModel: '모델 다운로드', + foundryPrepareLoad: '모델 로드', + foundryPrepareModelSkipped: '모델이 이미 다운로드되어 다운로드 단계를 건너뜀', + foundryPrepareDone: '완료', + foundryPrepareWaiting: '대기 중', + foundryApproxSizeMb: '약 {{mb}} MB', + foundryLanguageLabel: '인식 언어', + foundryLanguageAuto: '자동', + foundryLanguageZh: '중국어 zh', + foundryLanguageEn: '영어 en', + foundryLanguageDesc: '중국어 받아쓰기는 중국어를 권장합니다. 중영 혼합 입력은 먼저 자동을 사용하고, 중국어가 영어로 인식되면 중국어를 선택하세요.', + foundryModelSmall: 'Whisper Small(기본 / 균형)', + foundryModelSmallDesc: '품질과 리소스 사용량을 균형 있게 맞춘 기본 옵션.', + foundryModelBase: 'Whisper Base(더 빠름 / 낮은 리소스)', + foundryModelBaseDesc: '더 빠르고 리소스를 적게 사용해 가벼운 일상 받아쓰기에 적합합니다.', + foundryModelTiny: 'Whisper Tiny(가장 빠름 / 스모크 테스트)', + foundryModelTinyDesc: 'Foundry 경로가 작동하는지 확인하기 위한 가장 빠른 옵션.', mirrorLabel: '다운로드 미러', mirrorDesc: '공식 소스는 해외 네트워크에서 안정적; hf-mirror.com 은 중국 커뮤니티가 운영하는 미러.', mirrorHuggingface: 'HuggingFace 공식 (huggingface.co)', mirrorHfMirror: '중국 미러 (hf-mirror.com)', activeBadge: '사용 중', downloadedBadge: '다운로드됨', + notDownloadedBadge: '다운로드되지 않음', download: '다운로드', resume: '계속 다운로드', cancel: '취소', diff --git a/openless-all/app/src/i18n/zh-CN.ts b/openless-all/app/src/i18n/zh-CN.ts index 97f72673..bdc97244 100644 --- a/openless-all/app/src/i18n/zh-CN.ts +++ b/openless-all/app/src/i18n/zh-CN.ts @@ -321,6 +321,7 @@ export const zhCN = { asrZhipu: '智谱 GLM-ASR', asrGroq: 'Groq Whisper-large-v3', asrWhisper: 'OpenAI Whisper(兼容)', + asrFoundryLocalWhisper: '本地 Whisper(Foundry Local)', asrLocalQwen3: '本地 Qwen3-ASR', }, volcengineAppKeyLabel: 'APP ID', @@ -328,6 +329,7 @@ export const zhCN = { volcengineResourceIdLabel: 'Resource ID', volcengineMappingNote: 'Secret Key 当前无需填写。Resource ID 默认使用 volc.bigasr.sauc.duration。', localAsrHint: '本地 Qwen3-ASR 在本机运行,无需 API Key。模型从 HuggingFace 下载到本地后即可使用。', + foundryLocalAsrHint: 'Windows 本地 Whisper 在本机运行,无需 ASR API Key。首次使用会下载 Foundry Local 运行组件和 Whisper 模型;LLM 润色仍按你配置的 LLM 提供商调用。', localAsrPerformanceWarning: '本地推理跑在 CPU + Apple Silicon Accelerate 上,单次转写时间会**比云端 ASR 长几秒**;中文识别准确率与方言/口音表现也**通常不如**火山引擎 / Whisper turbo。请按需取舍:网络受限或对隐私敏感时再用本地。', localAsrReady: '{{model}} 已下载', localAsrNotReady: '{{model}} 未下载', @@ -556,14 +558,53 @@ export const zhCN = { localAsr: { kicker: '本地 ASR', title: '模型设置', - desc: '本地 Qwen3-ASR 引擎与模型管理。模型从 HuggingFace 下载到本机,无需联网即可识别。Windows 端流式推理跟踪见仓库 issue。', - engineUnavailable: '当前平台暂未集成本地推理引擎(仅 macOS 已支持,Windows 端跟踪 issue #256)。可下载模型,但暂时无法启用。', + desc: '管理本机 ASR 模型。Windows 可使用 Microsoft Foundry Local Whisper;Qwen3-ASR 模型管理保持独立。', + qwenTitle: 'Qwen3-ASR 模型管理', + engineUnavailable: '当前平台暂未集成 Qwen3-ASR 推理引擎。可下载模型,但暂时无法启用 Qwen3-ASR。', + foundryTitle: 'Windows Foundry Local Whisper', + foundryDesc: 'Windows 使用 Microsoft Foundry Local Whisper 在本机识别语音,无需 ASR API Key。首次准备会在本机下载运行组件和模型并加载;LLM 润色仍使用你已配置的 LLM 提供商,未配置时沿用原始转写回退。', + foundryAvailable: '运行时可用', + foundryUnavailable: '运行时不可用', + foundrySelectedModel: '选择模型', + foundryActiveModel: '当前默认 alias', + foundryLoadedModel: '已加载模型', + foundryNotLoaded: '未加载', + foundryError: 'Foundry 状态', + foundrySetDefault: '设为默认 / 启用 Windows 本地 ASR', + foundryEnabling: '正在启用…', + foundryPrepare: '准备 / 下载 / 加载', + foundryPreparing: '正在准备…', + foundryReleasing: '正在释放…', + foundryRetryPrepare: '继续准备 / 重试', + foundryCancelPrepare: '取消准备', + foundryCancelRequested: '已请求取消', + foundryCancelling: '正在取消…', + foundryCancelBestEffort: 'Foundry SDK 当前未暴露下载取消令牌;OpenLess 已请求取消,会在当前 SDK 步骤返回后停止后续加载。可稍后继续准备 / 重试。', + foundryPrepareRuntime: '准备运行时组件', + foundryPrepareModel: '下载模型', + foundryPrepareLoad: '加载模型', + foundryPrepareModelSkipped: '模型已下载,跳过下载阶段', + foundryPrepareDone: '已完成', + foundryPrepareWaiting: '等待中', + foundryApproxSizeMb: '约 {{mb}} MB', + foundryLanguageLabel: '识别语言', + foundryLanguageAuto: '自动', + foundryLanguageZh: '中文 zh', + foundryLanguageEn: '英文 en', + foundryLanguageDesc: '中文听写建议选中文;中英混输可先用自动,若中文被识别成英文再选中文。', + foundryModelSmall: 'Whisper Small(默认 / 平衡)', + foundryModelSmallDesc: '默认平衡选项,兼顾质量与资源占用。', + foundryModelBase: 'Whisper Base(更快 / 更省资源)', + foundryModelBaseDesc: '更快、资源占用更低,适合日常轻量使用。', + foundryModelTiny: 'Whisper Tiny(最快 / 冒烟测试)', + foundryModelTinyDesc: '最快的检查选项,适合确认 Foundry 路径可用。', mirrorLabel: '下载镜像源', mirrorDesc: '官方源在国外网络更稳;hf-mirror.com 是国内社区维护的镜像。', mirrorHuggingface: 'HuggingFace 官方 (huggingface.co)', mirrorHfMirror: '国内镜像 (hf-mirror.com)', activeBadge: '当前使用', downloadedBadge: '已下载', + notDownloadedBadge: '未下载', download: '下载', resume: '继续下载', cancel: '取消', @@ -574,7 +615,7 @@ export const zhCN = { files: '文件', sizeLoading: '正在查询尺寸…', sizeUnknown: '尺寸未知', - performanceWarning: '本地推理跑在 CPU + Apple Silicon Accelerate 上,**首次转写需要加载模型(数秒)**,之后单次转写也会比云端 ASR 慢若干秒;中文识别准确率与方言/口音表现通常不如火山引擎 / Whisper turbo。适用场景:离线 / 隐私敏感 / 不愿付费云 API。', + performanceWarning: '本地 ASR 适合离线、隐私敏感或不想使用云端 ASR API 的场景。首次使用可能需要较长时间,因为运行时、模型下载和加载都在本机完成。', test: '加载并测试', testRunning: '测试中…', testHeading: '内置音频测试', @@ -583,12 +624,12 @@ export const zhCN = { testStats: '音频时长 {{audio}}s · 加载 {{load}}s · 推理 {{transcribe}}s · 后端 {{backend}}', testFailed: '测试失败', engineStatusLabel: '内存中的引擎', - engineLoaded: '已加载:{{model}}(约占 1.2-3.4 GB 内存)', - engineUnloaded: '未加载(首次听写需先加载,约 3-5 秒)', + engineLoaded: '已加载:{{model}}', + engineUnloaded: '未加载(首次听写需先加载模型)', loadNow: '立即加载', releaseNow: '立即释放', keepLoadedLabel: '保持加载多久', - keepLoadedDesc: '决定本地 ASR 用完后多久从内存释放,避免长期占用 1+ GB RAM。', + keepLoadedDesc: '决定 Qwen3-ASR 用完后多久从内存释放,避免长期占用内存。', keepImmediate: '说完话立即释放', keep1min: '上次使用后 1 分钟', keep5min: '上次使用后 5 分钟(默认)', diff --git a/openless-all/app/src/i18n/zh-TW.ts b/openless-all/app/src/i18n/zh-TW.ts index 44d92096..7c352e40 100644 --- a/openless-all/app/src/i18n/zh-TW.ts +++ b/openless-all/app/src/i18n/zh-TW.ts @@ -323,6 +323,7 @@ export const zhTW: typeof zhCN = { asrZhipu: '智譜 GLM-ASR', asrGroq: 'Groq Whisper-large-v3', asrWhisper: 'OpenAI Whisper(兼容)', + asrFoundryLocalWhisper: '本地 Whisper(Foundry Local)', asrLocalQwen3: '本地 Qwen3-ASR', }, volcengineAppKeyLabel: 'APP ID', @@ -330,6 +331,7 @@ export const zhTW: typeof zhCN = { volcengineResourceIdLabel: 'Resource ID', volcengineMappingNote: 'Secret Key 當前無需填寫。Resource ID 默認使用 volc.bigasr.sauc.duration。', localAsrHint: '本地 Qwen3-ASR 在本機運行,無需 API Key。模型從 HuggingFace 下載到本地後即可使用。', + foundryLocalAsrHint: 'Windows 本地 Whisper 在本機運行,無需 ASR API Key。首次使用會下載 Foundry Local 運行組件和 Whisper 模型;LLM 潤色仍按你配置的 LLM 提供商調用。', localAsrPerformanceWarning: '本地推理跑在 CPU + Apple Silicon Accelerate 上,**首次轉寫需要加載模型(數秒)**,之後單次轉寫也會比雲端 ASR 慢若干秒;中文識別準確率與方言/口音表現通常不如火山引擎 / Whisper turbo。適用場景:離線 / 隱私敏感 / 不願付費雲 API。', localAsrReady: '{{model}} 已下載', localAsrNotReady: '{{model}} 未下載', @@ -558,14 +560,53 @@ export const zhTW: typeof zhCN = { localAsr: { kicker: '本地 ASR', title: '模型設置', - desc: '本地 Qwen3-ASR 引擎與模型管理。模型從 HuggingFace 下載到本機,無需聯網即可識別。Windows 端流式推理跟蹤見倉庫 issue。', - engineUnavailable: '當前平臺暫未集成本地推理引擎(僅 macOS 已支持,Windows 端跟蹤 issue #256)。可下載模型,但暫時無法啟用。', + desc: '管理本機 ASR 模型。Windows 可使用 Microsoft Foundry Local Whisper;Qwen3-ASR 模型管理保持獨立。', + qwenTitle: 'Qwen3-ASR 模型管理', + engineUnavailable: '當前平臺暫未集成 Qwen3-ASR 推理引擎。可下載模型,但暫時無法啟用 Qwen3-ASR。', + foundryTitle: 'Windows Foundry Local Whisper', + foundryDesc: 'Windows 使用 Microsoft Foundry Local Whisper 在本機識別語音,無需 ASR API Key。首次準備會在本機下載運行組件和模型並加載;LLM 潤色仍使用你已配置的 LLM 提供商,未配置時沿用原始轉寫回退。', + foundryAvailable: '運行時可用', + foundryUnavailable: '運行時不可用', + foundrySelectedModel: '選擇模型', + foundryActiveModel: '當前默認 alias', + foundryLoadedModel: '已加載模型', + foundryNotLoaded: '未加載', + foundryError: 'Foundry 狀態', + foundrySetDefault: '設為默認 / 啟用 Windows 本地 ASR', + foundryEnabling: '正在啟用…', + foundryPrepare: '準備 / 下載 / 加載', + foundryPreparing: '正在準備…', + foundryReleasing: '正在釋放…', + foundryRetryPrepare: '繼續準備 / 重試', + foundryCancelPrepare: '取消準備', + foundryCancelRequested: '已請求取消', + foundryCancelling: '正在取消…', + foundryCancelBestEffort: 'Foundry SDK 目前未暴露下載取消令牌;OpenLess 已請求取消,會在當前 SDK 步驟返回後停止後續加載。可稍後繼續準備 / 重試。', + foundryPrepareRuntime: '準備運行時組件', + foundryPrepareModel: '下載模型', + foundryPrepareLoad: '加載模型', + foundryPrepareModelSkipped: '模型已下載,跳過下載階段', + foundryPrepareDone: '已完成', + foundryPrepareWaiting: '等待中', + foundryApproxSizeMb: '約 {{mb}} MB', + foundryLanguageLabel: '識別語言', + foundryLanguageAuto: '自動', + foundryLanguageZh: '中文 zh', + foundryLanguageEn: '英文 en', + foundryLanguageDesc: '中文聽寫建議選中文;中英混輸可先用自動,若中文被識別成英文再選中文。', + foundryModelSmall: 'Whisper Small(默認 / 平衡)', + foundryModelSmallDesc: '默認平衡選項,兼顧質量與資源佔用。', + foundryModelBase: 'Whisper Base(更快 / 更省資源)', + foundryModelBaseDesc: '更快、資源佔用更低,適合日常輕量使用。', + foundryModelTiny: 'Whisper Tiny(最快 / 冒煙測試)', + foundryModelTinyDesc: '最快的檢查選項,適合確認 Foundry 路徑可用。', mirrorLabel: '下載鏡像源', mirrorDesc: '官方源在國外網絡更穩;hf-mirror.com 是國內社區維護的鏡像。', mirrorHuggingface: 'HuggingFace 官方 (huggingface.co)', mirrorHfMirror: '國內鏡像 (hf-mirror.com)', activeBadge: '當前使用', downloadedBadge: '已下載', + notDownloadedBadge: '未下載', download: '下載', resume: '繼續下載', cancel: '取消', @@ -576,7 +617,7 @@ export const zhTW: typeof zhCN = { files: '文件', sizeLoading: '正在查詢尺寸…', sizeUnknown: '尺寸未知', - performanceWarning: '本地推理跑在 CPU + Apple Silicon Accelerate 上,**首次轉寫需要加載模型(數秒)**,之後單次轉寫也會比雲端 ASR 慢若干秒;中文識別準確率與方言/口音表現通常不如火山引擎 / Whisper turbo。適用場景:離線 / 隱私敏感 / 不願付費雲 API。', + performanceWarning: '本地 ASR 適合離線、隱私敏感或不想使用雲端 ASR API 的場景。首次使用可能需要較長時間,因為運行時、模型下載和加載都在本機完成。', test: '加載並測試', testRunning: '測試中…', testHeading: '內置音頻測試', @@ -585,12 +626,12 @@ export const zhTW: typeof zhCN = { testStats: '音頻時長 {{audio}}s · 加載 {{load}}s · 推理 {{transcribe}}s · 後端 {{backend}}', testFailed: '測試失敗', engineStatusLabel: '內存中的引擎', - engineLoaded: '已加載:{{model}}(約佔 1.2-3.4 GB 內存)', - engineUnloaded: '未加載(首次聽寫需先加載,約 3-5 秒)', + engineLoaded: '已加載:{{model}}', + engineUnloaded: '未加載(首次聽寫需先加載模型)', loadNow: '立即加載', releaseNow: '立即釋放', keepLoadedLabel: '保持加載多久', - keepLoadedDesc: '決定本地 ASR 用完後多久從內存釋放,避免長期佔用 1+ GB RAM。', + keepLoadedDesc: '決定 Qwen3-ASR 用完後多久從內存釋放,避免長期佔用內存。', keepImmediate: '說完話立即釋放', keep1min: '上次使用後 1 分鐘', keep5min: '上次使用後 5 分鐘(默認)', diff --git a/openless-all/app/src/lib/hotkey.ts b/openless-all/app/src/lib/hotkey.ts index 048febf2..d9cff152 100644 --- a/openless-all/app/src/lib/hotkey.ts +++ b/openless-all/app/src/lib/hotkey.ts @@ -39,7 +39,9 @@ export function getHotkeyStartStopLabel( const trigger = getHotkeyTriggerLabel(binding?.trigger); const suffix = binding?.mode === 'hold' ? i18n.t('hotkey.modeHoldSuffix') - : i18n.t('hotkey.modeToggleSuffix'); + : binding?.mode === 'doubleClick' + ? i18n.t('hotkey.modeDoubleClickSuffix') + : i18n.t('hotkey.modeToggleSuffix'); return `${trigger}${suffix}`; } @@ -63,9 +65,110 @@ export function getHotkeyUsageHint( const trigger = getHotkeyTriggerLabel(binding?.trigger); return binding?.mode === 'hold' ? i18n.t('hotkey.usageHold', { trigger }) + : binding?.mode === 'doubleClick' + ? i18n.t('hotkey.usageDoubleClick', { trigger }) : i18n.t('hotkey.usageToggle', { trigger }); } +export function getHotkeyBindingCodes(binding: HotkeyBinding | null | undefined): string[] { + if (!binding) return []; + if (Array.isArray(binding.keys)) { + return binding.keys.map(key => key.code.trim()).filter(Boolean); + } + const legacy = legacyTriggerCode(binding.trigger); + return legacy ? [legacy] : []; +} + +export function getHotkeyBindingLabel(binding: HotkeyBinding | null | undefined): string { + const codes = getHotkeyBindingCodes(binding); + if (codes.length === 0) return i18n.t('hotkey.unset'); + return codes.map(getHotkeyCodeLabel).join('+'); +} + +export function getHotkeyCodeLabel(code: string): string { + const zh = i18n.language.toLowerCase().startsWith('zh'); + const labels: Record = { + ControlLeft: zh ? '左Ctrl' : 'Left Ctrl', + ControlRight: zh ? '右Ctrl' : 'Right Ctrl', + AltLeft: zh ? '左Alt' : 'Left Alt', + AltRight: zh ? '右Alt' : 'Right Alt', + ShiftLeft: zh ? '左Shift' : 'Left Shift', + ShiftRight: zh ? '右Shift' : 'Right Shift', + MetaLeft: zh ? '左Win' : 'Left Win', + MetaRight: zh ? '右Win' : 'Right Win', + OSLeft: zh ? '左Win' : 'Left Win', + OSRight: zh ? '右Win' : 'Right Win', + Fn: 'Fn', + FnLock: 'FnLock', + CapsLock: 'CapsLock', + ScrollLock: 'ScrLock', + Pause: 'Pause', + PrintScreen: 'PrtSc', + Backspace: 'Backspace', + Tab: 'Tab', + Enter: 'Enter', + Space: 'Space', + Insert: 'Insert', + Delete: 'Delete', + Home: 'Home', + End: 'End', + PageUp: 'PageUp', + PageDown: 'PageDown', + ArrowUp: 'Up', + ArrowDown: 'Down', + ArrowLeft: 'Left', + ArrowRight: 'Right', + ContextMenu: 'Menu', + NumpadAdd: 'Num+', + NumpadSubtract: 'Num-', + NumpadMultiply: 'Num*', + NumpadDivide: 'Num/', + NumpadDecimal: 'Num.', + NumpadEnter: 'NumEnter', + Mouse4: 'Mouse4', + Mouse5: 'Mouse5', + Backquote: '`', + Minus: '-', + Equal: '=', + BracketLeft: '[', + BracketRight: ']', + Backslash: '\\', + Semicolon: ';', + Quote: "'", + Comma: ',', + Period: '.', + Slash: '/', + }; + if (labels[code]) return labels[code]; + const letter = code.match(/^Key([A-Z])$/); + if (letter) return letter[1]; + const digit = code.match(/^Digit([0-9])$/); + if (digit) return digit[1]; + const numpad = code.match(/^Numpad([0-9])$/); + if (numpad) return `Num${numpad[1]}`; + return code; +} + +function legacyTriggerCode(trigger: HotkeyTrigger | null | undefined): string | null { + switch (trigger) { + case 'rightOption': + case 'rightAlt': + return 'AltRight'; + case 'leftOption': + return 'AltLeft'; + case 'rightControl': + return 'ControlRight'; + case 'leftControl': + return 'ControlLeft'; + case 'rightCommand': + return 'MetaRight'; + case 'fn': + return 'Fn'; + default: + return null; + } +} + /** 把 ComboBinding 或 QaHotkeyBinding 格式化为可读标签,如 "⌘⇧D" / "Ctrl+Shift+D"。 */ export function formatComboLabel(binding: ComboBinding | QaHotkeyBinding | ShortcutBinding): string { const parts: string[] = []; diff --git a/openless-all/app/src/lib/hotkeyRecorder.test.ts b/openless-all/app/src/lib/hotkeyRecorder.test.ts new file mode 100644 index 00000000..a1a5841a --- /dev/null +++ b/openless-all/app/src/lib/hotkeyRecorder.test.ts @@ -0,0 +1,85 @@ +import { + createHotkeyRecorderState, + orderHotkeyCodes, + updateHotkeyRecorderState, +} from './hotkeyRecorder'; + +function assertEqual(actual: T, expected: T, name: string) { + if (actual !== expected) { + throw new Error(`${name}: expected ${expected}, got ${actual}`); + } +} + +function assertDeepEqual(actual: unknown, expected: unknown, name: string) { + const actualJson = JSON.stringify(actual); + const expectedJson = JSON.stringify(expected); + if (actualJson !== expectedJson) { + throw new Error(`${name}: expected ${expectedJson}, got ${actualJson}`); + } +} + +function apply( + state = createHotkeyRecorderState(), + code: string, + pressed: boolean, +) { + const next = updateHotkeyRecorderState(state, code, pressed); + return next; +} + +{ + let result = apply(undefined, 'ControlLeft', true); + assertDeepEqual(result.state.draftCodes, ['ControlLeft'], 'tracks first pressed key'); + assertEqual(result.commitCodes, null, 'does not commit until release'); + + result = apply(result.state, 'ControlLeft', false); + assertDeepEqual(result.commitCodes, ['ControlLeft'], 'commits single key on release'); + + result = apply(createHotkeyRecorderState(), 'KeyK', true); + assertDeepEqual(result.state.draftCodes, ['KeyK'], 'starts a new recording state cleanly'); + assertEqual(result.commitCodes, null, 'new keydown does not include old released keys'); +} + +{ + let result = apply(undefined, 'ControlLeft', true); + result = apply(result.state, 'KeyK', true); + assertDeepEqual(result.state.draftCodes, ['ControlLeft', 'KeyK'], 'records keyboard combo draft'); + + result = apply(result.state, 'ControlLeft', false); + assertEqual(result.commitCodes, null, 'keyboard combo waits for final release'); + assertDeepEqual(result.state.draftCodes, ['ControlLeft', 'KeyK'], 'released combo member stays in draft only'); + + result = apply(result.state, 'KeyK', false); + assertDeepEqual(result.commitCodes, ['ControlLeft', 'KeyK'], 'keyboard combo commits after all keys release'); + assertDeepEqual(result.state, createHotkeyRecorderState(), 'state resets after commit'); +} + +{ + let result = apply(undefined, 'Mouse4', true); + assertDeepEqual(result.state.draftCodes, ['Mouse4'], 'tracks mouse button as draft'); + assertEqual(result.commitCodes, null, 'mouse button does not commit on mousedown'); + + result = apply(result.state, 'ControlLeft', true); + assertDeepEqual(result.state.draftCodes, ['ControlLeft', 'Mouse4'], 'records keyboard plus mouse combo'); + assertEqual(result.commitCodes, null, 'combo does not commit while inputs remain pressed'); + + result = apply(result.state, 'Mouse4', false); + assertEqual(result.commitCodes, null, 'releasing one combo member does not commit early'); + + result = apply(result.state, 'ControlLeft', false); + assertDeepEqual(result.commitCodes, ['ControlLeft', 'Mouse4'], 'commits combo after final release'); +} + +{ + let result = apply(undefined, 'ControlLeft', true); + result = apply(result.state, 'Mouse5', true); + assertDeepEqual(result.state.draftCodes, ['ControlLeft', 'Mouse5'], 'records mouse button pressed after keyboard'); + + result = apply(result.state, 'ControlLeft', false); + assertEqual(result.commitCodes, null, 'mouse combo keeps waiting while mouse remains pressed'); + + result = apply(result.state, 'Mouse5', false); + assertDeepEqual(result.commitCodes, ['ControlLeft', 'Mouse5'], 'commits mouse-last combo after mouse release'); +} + +assertDeepEqual(orderHotkeyCodes(['Mouse4', 'ControlLeft']), ['ControlLeft', 'Mouse4'], 'orders mouse after modifiers'); diff --git a/openless-all/app/src/lib/hotkeyRecorder.ts b/openless-all/app/src/lib/hotkeyRecorder.ts new file mode 100644 index 00000000..8ab692f5 --- /dev/null +++ b/openless-all/app/src/lib/hotkeyRecorder.ts @@ -0,0 +1,70 @@ +export interface HotkeyRecorderState { + pressedCodes: string[]; + draftCodes: string[]; +} + +export interface HotkeyRecorderUpdate { + state: HotkeyRecorderState; + commitCodes: string[] | null; +} + +export function createHotkeyRecorderState(): HotkeyRecorderState { + return { + pressedCodes: [], + draftCodes: [], + }; +} + +export function updateHotkeyRecorderState( + state: HotkeyRecorderState, + code: string, + pressed: boolean, +): HotkeyRecorderUpdate { + const active = new Set(state.pressedCodes); + if (pressed) { + active.add(code); + } else { + active.delete(code); + } + + const pressedCodes = orderHotkeyCodes([...active]); + const draftCodes = pressed ? pressedCodes : state.draftCodes; + const shouldCommit = !pressed && pressedCodes.length === 0 && draftCodes.length > 0; + + return { + state: shouldCommit ? createHotkeyRecorderState() : { pressedCodes, draftCodes }, + commitCodes: shouldCommit ? draftCodes : null, + }; +} + +export function orderHotkeyCodes(codes: string[]): string[] { + const seen = new Set(); + return codes + .filter(code => { + if (!code || seen.has(code)) return false; + seen.add(code); + return true; + }) + .sort((a, b) => hotkeyCodeRank(a) - hotkeyCodeRank(b)); +} + +function hotkeyCodeRank(code: string): number { + const index = HOTKEY_CODE_ORDER.indexOf(code); + if (index >= 0) return index; + if (/^Key[A-Z]$/.test(code)) return 100 + code.charCodeAt(3); + if (/^Digit[0-9]$/.test(code)) return 200 + Number(code.slice(5)); + if (/^F([1-9]|1[0-9]|2[0-4])$/.test(code)) return 300 + Number(code.slice(1)); + if (/^Numpad[0-9]$/.test(code)) return 400 + Number(code.slice(6)); + return 1000; +} + +const HOTKEY_CODE_ORDER = [ + 'ControlLeft', 'ControlRight', 'AltLeft', 'AltRight', 'ShiftLeft', 'ShiftRight', + 'MetaLeft', 'MetaRight', 'Fn', 'FnLock', 'CapsLock', 'ScrollLock', 'Pause', + 'PrintScreen', 'Backspace', 'Tab', 'Enter', 'Space', 'Insert', 'Delete', 'Home', + 'End', 'PageUp', 'PageDown', 'ArrowUp', 'ArrowDown', 'ArrowLeft', 'ArrowRight', + 'ContextMenu', 'Backquote', 'Minus', 'Equal', 'BracketLeft', 'BracketRight', + 'Backslash', 'Semicolon', 'Quote', 'Comma', 'Period', 'Slash', 'NumpadAdd', + 'NumpadSubtract', 'NumpadMultiply', 'NumpadDivide', 'NumpadDecimal', 'NumpadEnter', + 'Mouse4', 'Mouse5', +]; diff --git a/openless-all/app/src/lib/ipc.ts b/openless-all/app/src/lib/ipc.ts index a0aa78af..3807725b 100644 --- a/openless-all/app/src/lib/ipc.ts +++ b/openless-all/app/src/lib/ipc.ts @@ -44,7 +44,7 @@ export async function invokeOrMock( // ── Mock fixtures ────────────────────────────────────────────────────── const mockSettings: UserPreferences = { - hotkey: { trigger: 'rightControl', mode: 'toggle' }, + hotkey: { trigger: 'rightControl', mode: 'toggle', keys: [{ code: 'ControlRight' }] }, dictationHotkey: { primary: 'RightControl', modifiers: [] }, defaultMode: 'structured', enabledModes: ['raw', 'light', 'structured', 'formal'], @@ -52,7 +52,7 @@ const mockSettings: UserPreferences = { showCapsule: true, muteDuringRecording: false, microphoneDeviceName: '', - activeAsrProvider: 'volcengine', + activeAsrProvider: 'foundry-local-whisper', activeLlmProvider: 'ark', restoreClipboardAfterPaste: true, allowNonTsfInsertionFallback: true, @@ -69,6 +69,9 @@ const mockSettings: UserPreferences = { localAsrActiveModel: 'qwen3-asr-0.6b', localAsrMirror: 'huggingface', localAsrKeepLoadedSecs: 300, + foundryLocalAsrModel: 'whisper-small', + foundryLocalAsrLanguageHint: '', + foundryLocalAsrKeepLoadedSecs: 300, }; const mockHotkeyCapability: HotkeyCapability = { @@ -78,11 +81,11 @@ const mockHotkeyCapability: HotkeyCapability = { supportsModifierOnlyTrigger: true, supportsSideSpecificModifiers: true, explicitFallbackAvailable: false, - statusHint: '默认建议使用“右 Control + 切换式说话”;若更习惯按住说话,可在录音设置里切回。若无响应,可在权限页查看 hook 安装状态。', + statusHint: '默认建议使用“右Ctrl + 单击”;若更习惯按住说话,可在录音设置里切回“按住”。若无响应,可在权限页查看 hook 安装状态。', }; const mockCredentialsStatus: CredentialsStatus = { - activeAsrProvider: 'volcengine', + activeAsrProvider: 'foundry-local-whisper', activeLlmProvider: 'ark', asrConfigured: true, llmConfigured: true, diff --git a/openless-all/app/src/lib/localAsr.ts b/openless-all/app/src/lib/localAsr.ts index ceacc19c..24741dd2 100644 --- a/openless-all/app/src/lib/localAsr.ts +++ b/openless-all/app/src/lib/localAsr.ts @@ -1,4 +1,4 @@ -// localAsr.ts — IPC + 事件类型 for 本地 Qwen3-ASR 引擎与模型管理。 +// localAsr.ts — IPC + 事件类型 for 本地 ASR 引擎与模型管理。 // // 后端命令定义:openless-all/app/src-tauri/src/commands.rs `local_asr_*` // 事件:local-asr-download-progress / local-asr-token @@ -55,6 +55,85 @@ export interface LocalAsrDownloadProgress { error: string | null; } +export interface FoundryLocalAsrStatus { + providerId: string; + available: boolean; + activeModel: string; + loadedModelId: string | null; + endpoint: string | null; + error: string | null; +} + +export type FoundryLocalAsrModelAlias = 'whisper-small' | 'whisper-base' | 'whisper-tiny'; +export type FoundryLocalAsrLanguageHint = '' | 'zh' | 'en'; + +export interface FoundryLocalAsrCatalogModel { + alias: FoundryLocalAsrModelAlias; + displayName: string; + cached: boolean; + fileSizeMb: number | null; +} + +export type FoundryPreparePhase = + | 'runtime' + | 'model' + | 'load' + | 'finished' + | 'failed'; + +export interface FoundryPrepareProgress { + phase: FoundryPreparePhase; + modelAlias: string; + label: string; + percent: number | null; + error: string | null; +} + +export interface FoundryLocalAsrModelOption { + alias: FoundryLocalAsrModelAlias; + labelKey: string; + descKey: string; +} + +export const FOUNDRY_LOCAL_ASR_MODELS: FoundryLocalAsrModelOption[] = [ + { + alias: 'whisper-small', + labelKey: 'localAsr.foundryModelSmall', + descKey: 'localAsr.foundryModelSmallDesc', + }, + { + alias: 'whisper-base', + labelKey: 'localAsr.foundryModelBase', + descKey: 'localAsr.foundryModelBaseDesc', + }, + { + alias: 'whisper-tiny', + labelKey: 'localAsr.foundryModelTiny', + descKey: 'localAsr.foundryModelTinyDesc', + }, +]; + +const MOCK_FOUNDRY_CATALOG: FoundryLocalAsrCatalogModel[] = [ + { + alias: 'whisper-small', + displayName: 'Whisper Small', + cached: false, + fileSizeMb: 967, + }, + { + alias: 'whisper-base', + displayName: 'Whisper Base', + cached: true, + fileSizeMb: 291, + }, + { + alias: 'whisper-tiny', + displayName: 'Whisper Tiny', + cached: false, + fileSizeMb: 151, + }, +]; + const MOCK_SETTINGS: LocalAsrSettings = { providerId: 'local-qwen3', activeModel: 'qwen3-asr-0.6b', @@ -175,3 +254,42 @@ export function preloadLocalAsr(): Promise { export function setLocalAsrKeepLoadedSecs(seconds: number): Promise { return invokeOrMock('local_asr_set_keep_loaded_secs', { seconds }, () => undefined); } + +export function getFoundryLocalAsrStatus(): Promise { + return invokeOrMock('foundry_local_asr_status', undefined, () => ({ + providerId: 'foundry-local-whisper', + available: true, + activeModel: 'whisper-small', + loadedModelId: null, + endpoint: null, + error: null, + })); +} + +export function getFoundryLocalAsrCatalog(): Promise { + return invokeOrMock('foundry_local_asr_catalog', undefined, () => MOCK_FOUNDRY_CATALOG); +} + +export function setFoundryLocalAsrModel(modelAlias: string): Promise { + return invokeOrMock('foundry_local_asr_set_model', { modelAlias }, () => undefined); +} + +export function setFoundryLocalAsrLanguageHint(languageHint: string): Promise { + return invokeOrMock( + 'foundry_local_asr_set_language_hint', + { languageHint }, + () => undefined, + ); +} + +export function prepareFoundryLocalAsr(modelAlias: string): Promise { + return invokeOrMock('foundry_local_asr_prepare', { modelAlias }, () => `mock-${modelAlias}`); +} + +export function cancelFoundryLocalAsrPrepare(): Promise { + return invokeOrMock('foundry_local_asr_cancel_prepare', undefined, () => undefined); +} + +export function releaseFoundryLocalAsr(): Promise { + return invokeOrMock('foundry_local_asr_release', undefined, () => undefined); +} diff --git a/openless-all/app/src/lib/types.ts b/openless-all/app/src/lib/types.ts index 71513411..415b549b 100644 --- a/openless-all/app/src/lib/types.ts +++ b/openless-all/app/src/lib/types.ts @@ -51,11 +51,16 @@ export type HotkeyTrigger = | 'rightAlt' | 'custom'; -export type HotkeyMode = 'toggle' | 'hold'; +export type HotkeyMode = 'toggle' | 'hold' | 'doubleClick'; + +export interface HotkeyKey { + code: string; +} export interface HotkeyBinding { trigger: HotkeyTrigger; mode: HotkeyMode; + keys?: HotkeyKey[] | null; } export type HotkeyAdapterKind = 'macEventTap' | 'windowsLowLevel' | 'rdev'; @@ -154,6 +159,12 @@ export interface UserPreferences { /** 本地 ASR 引擎在内存中的保留时长(秒)。0 = 说完话即释放; * 300 = 默认 5 分钟;86400 ≈ 不释放(保持加载)。 */ localAsrKeepLoadedSecs: number; + /** Windows Foundry Local Whisper 当前激活的模型 alias。 */ + foundryLocalAsrModel: string; + /** Windows Foundry Local Whisper 语言 hint。空字符串表示自动检测。 */ + foundryLocalAsrLanguageHint: string; + /** Windows Foundry Local Whisper 模型在 runtime 中保持加载的秒数。 */ + foundryLocalAsrKeepLoadedSecs: number; } export interface MicrophoneDevice { diff --git a/openless-all/app/src/lib/windowHotkeyFallback.test.ts b/openless-all/app/src/lib/windowHotkeyFallback.test.ts new file mode 100644 index 00000000..3cee5692 --- /dev/null +++ b/openless-all/app/src/lib/windowHotkeyFallback.test.ts @@ -0,0 +1,42 @@ +import { + isWindowHotkeyKeyboardCandidate, + windowMouseHotkeyCode, +} from './windowHotkeyFallback'; + +function assertEqual(actual: T, expected: T, name: string) { + if (actual !== expected) { + throw new Error(`${name}: expected ${expected}, got ${actual}`); + } +} + +function keyboardEvent(code: string, key = code): KeyboardEvent { + return { code, key } as KeyboardEvent; +} + +assertEqual( + isWindowHotkeyKeyboardCandidate(keyboardEvent('KeyK', 'k')), + true, + 'fallback forwards letter hotkeys', +); + +assertEqual( + isWindowHotkeyKeyboardCandidate(keyboardEvent('CapsLock')), + true, + 'fallback forwards CapsLock hotkeys', +); + +assertEqual( + isWindowHotkeyKeyboardCandidate(keyboardEvent('F12')), + true, + 'fallback forwards function key hotkeys', +); + +assertEqual( + isWindowHotkeyKeyboardCandidate(keyboardEvent('Numpad7')), + true, + 'fallback forwards numpad digit hotkeys', +); + +assertEqual(windowMouseHotkeyCode(3), 'Mouse4', 'fallback maps Mouse4'); +assertEqual(windowMouseHotkeyCode(4), 'Mouse5', 'fallback maps Mouse5'); +assertEqual(windowMouseHotkeyCode(0), null, 'fallback ignores primary mouse button'); diff --git a/openless-all/app/src/lib/windowHotkeyFallback.ts b/openless-all/app/src/lib/windowHotkeyFallback.ts new file mode 100644 index 00000000..52a7804f --- /dev/null +++ b/openless-all/app/src/lib/windowHotkeyFallback.ts @@ -0,0 +1,27 @@ +export function isWindowHotkeyKeyboardCandidate(event: KeyboardEvent): boolean { + const code = event.code; + if (event.key === 'Escape' || code === 'Escape') return true; + if (SUPPORTED_WINDOW_HOTKEY_CODES.has(code)) return true; + if (/^Key[A-Z]$/.test(code)) return true; + if (/^Digit[0-9]$/.test(code)) return true; + if (/^F([1-9]|1[0-9]|2[0-4])$/.test(code)) return true; + if (/^Numpad[0-9]$/.test(code)) return true; + return false; +} + +export function windowMouseHotkeyCode(button: number): string | null { + if (button === 3) return 'Mouse4'; + if (button === 4) return 'Mouse5'; + return null; +} + +const SUPPORTED_WINDOW_HOTKEY_CODES = new Set([ + 'ControlLeft', 'ControlRight', 'AltLeft', 'AltRight', 'ShiftLeft', 'ShiftRight', + 'MetaLeft', 'MetaRight', 'CapsLock', 'ScrollLock', 'Pause', 'PrintScreen', + 'Backspace', 'Tab', 'Enter', 'Space', 'Insert', 'Delete', 'Home', 'End', + 'PageUp', 'PageDown', 'ArrowUp', 'ArrowDown', 'ArrowLeft', 'ArrowRight', + 'ContextMenu', 'NumpadAdd', 'NumpadSubtract', 'NumpadMultiply', 'NumpadDivide', + 'NumpadDecimal', 'NumpadEnter', 'Backquote', 'Minus', 'Equal', 'BracketLeft', + 'BracketRight', 'Backslash', 'Semicolon', 'Quote', 'Comma', 'Period', 'Slash', + 'Fn', 'FnLock', +]); diff --git a/openless-all/app/src/pages/LocalAsr.tsx b/openless-all/app/src/pages/LocalAsr.tsx index 9b166a40..161cd319 100644 --- a/openless-all/app/src/pages/LocalAsr.tsx +++ b/openless-all/app/src/pages/LocalAsr.tsx @@ -1,4 +1,4 @@ -// LocalAsr.tsx — 本地 Qwen3-ASR 模型管理页。 +// LocalAsr.tsx — 本地 ASR 模型管理页。 // // 功能: // - 顶部:当前激活模型 + 镜像源切换 @@ -11,25 +11,39 @@ import { useEffect, useMemo, useRef, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { isTauri, setActiveAsrProvider } from '../lib/ipc'; import { + FOUNDRY_LOCAL_ASR_MODELS, + cancelFoundryLocalAsrPrepare, cancelLocalAsrDownload, deleteLocalAsrModel, downloadLocalAsrModel, fetchLocalAsrRemoteInfo, + getFoundryLocalAsrCatalog, + getFoundryLocalAsrStatus, getLocalAsrEngineStatus, getLocalAsrSettings, listLocalAsrModels, + prepareFoundryLocalAsr, preloadLocalAsr, + releaseFoundryLocalAsr, releaseLocalAsrEngine, + setFoundryLocalAsrLanguageHint, + setFoundryLocalAsrModel, setLocalAsrActiveModel, setLocalAsrKeepLoadedSecs, setLocalAsrMirror, testLocalAsrModel, + type FoundryLocalAsrCatalogModel, + type FoundryLocalAsrLanguageHint, + type FoundryLocalAsrModelAlias, + type FoundryLocalAsrStatus, + type FoundryPrepareProgress, type LocalAsrDownloadProgress, type LocalAsrEngineStatus, type LocalAsrModelStatus, type LocalAsrSettings, type LocalAsrTestResult, } from '../lib/localAsr'; +import { useHotkeySettings } from '../state/HotkeySettingsContext'; import { Btn, Card, PageHeader, Pill } from './_atoms'; interface RemoteSize { @@ -41,17 +55,26 @@ interface RemoteSize { export function LocalAsr() { const { t } = useTranslation(); + const { prefs, updatePrefs } = useHotkeySettings(); const [settings, setSettings] = useState(null); const [models, setModels] = useState([]); const [progress, setProgress] = useState>({}); const [remoteSizes, setRemoteSizes] = useState>({}); const [error, setError] = useState(null); const [busyModelId, setBusyModelId] = useState(null); + const [foundryStatus, setFoundryStatus] = useState(null); + const [foundryCatalog, setFoundryCatalog] = useState([]); + const [selectedFoundryAlias, setSelectedFoundryAlias] = useState('whisper-small'); + const [foundryBusy, setFoundryBusy] = useState<'enable' | 'prepare' | 'release' | null>(null); + const [foundryProgress, setFoundryProgress] = useState(null); + const [foundryCancelRequested, setFoundryCancelRequested] = useState(false); const [testingModelId, setTestingModelId] = useState(null); const [testResults, setTestResults] = useState>({}); const [engineStatus, setEngineStatus] = useState(null); const refreshTimer = useRef(null); + const foundryRefreshTimer = useRef(null); const engineStatusTimer = useRef(null); + const foundrySelectionDirty = useRef(false); const refreshEngineStatus = async () => { try { @@ -62,6 +85,35 @@ export function LocalAsr() { } }; + const refreshFoundryStatus = async () => { + try { + const status = await getFoundryLocalAsrStatus(); + setFoundryStatus(status); + if (!foundrySelectionDirty.current && isFoundryAlias(status.activeModel)) { + setSelectedFoundryAlias(status.activeModel); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + setFoundryStatus({ + providerId: 'foundry-local-whisper', + available: false, + activeModel: selectedFoundryAlias, + loadedModelId: null, + endpoint: null, + error: message, + }); + } + }; + + const refreshFoundryCatalog = async () => { + try { + const catalog = await getFoundryLocalAsrCatalog(); + setFoundryCatalog(catalog); + } catch (err) { + console.warn('[localAsr] Foundry catalog query failed', err); + } + }; + const refresh = async () => { try { setError(null); @@ -69,6 +121,8 @@ export function LocalAsr() { setSettings(s); setModels(list); void refreshEngineStatus(); + void refreshFoundryStatus(); + void refreshFoundryCatalog(); // 拉远端真实尺寸(每个模型一次,结果留缓存) void Promise.all( list.map(async m => { @@ -169,6 +223,37 @@ export function LocalAsr() { // eslint-disable-next-line react-hooks/exhaustive-deps }, []); + useEffect(() => { + if (!isTauri) return; + let unlisten: undefined | (() => void); + let cancelled = false; + (async () => { + const { listen } = await import('@tauri-apps/api/event'); + const off = await listen('foundry-local-asr-prepare-progress', e => { + const payload = e.payload; + setFoundryProgress(payload); + if (payload.phase === 'finished' || payload.phase === 'failed') { + if (foundryRefreshTimer.current) window.clearTimeout(foundryRefreshTimer.current); + foundryRefreshTimer.current = window.setTimeout(() => { + void refreshFoundryStatus(); + void refreshFoundryCatalog(); + }, 200); + } + }); + if (cancelled) { + off(); + } else { + unlisten = off; + } + })().catch(err => console.warn('[localAsr] Foundry prepare subscribe failed', err)); + return () => { + cancelled = true; + if (unlisten) unlisten(); + if (foundryRefreshTimer.current) window.clearTimeout(foundryRefreshTimer.current); + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + const handleSetActiveModel = async (modelId: string) => { setBusyModelId(modelId); try { @@ -183,6 +268,96 @@ export function LocalAsr() { } }; + const syncFoundryPrefs = async (modelAlias: FoundryLocalAsrModelAlias, enableProvider: boolean) => { + await updatePrefs(current => ({ + ...current, + activeAsrProvider: enableProvider ? 'foundry-local-whisper' : current.activeAsrProvider, + foundryLocalAsrModel: modelAlias, + })); + }; + + const handleFoundryLanguageChange = async (languageHint: FoundryLocalAsrLanguageHint) => { + try { + setError(null); + await setFoundryLocalAsrLanguageHint(languageHint); + await updatePrefs(current => ({ + ...current, + foundryLocalAsrLanguageHint: languageHint, + })); + } catch (e) { + setError(e instanceof Error ? e.message : String(e)); + } + }; + + const handleEnableFoundry = async () => { + if (!foundryAvailable) return; + setFoundryBusy('enable'); + try { + setError(null); + await setFoundryLocalAsrModel(selectedFoundryAlias); + await setActiveAsrProvider('foundry-local-whisper'); + await syncFoundryPrefs(selectedFoundryAlias, true); + foundrySelectionDirty.current = false; + await refreshFoundryStatus(); + } catch (e) { + setError(e instanceof Error ? e.message : String(e)); + } finally { + setFoundryBusy(null); + } + }; + + const handlePrepareFoundry = async () => { + if (!foundryAvailable) return; + setFoundryBusy('prepare'); + setFoundryCancelRequested(false); + setFoundryProgress({ + phase: 'runtime', + modelAlias: selectedFoundryAlias, + label: t('localAsr.foundryPrepareRuntime'), + percent: 0, + error: null, + }); + try { + setError(null); + await setFoundryLocalAsrModel(selectedFoundryAlias); + await syncFoundryPrefs(selectedFoundryAlias, false); + await prepareFoundryLocalAsr(selectedFoundryAlias); + foundrySelectionDirty.current = false; + await refreshFoundryStatus(); + await refreshFoundryCatalog(); + } catch (e) { + setError(e instanceof Error ? e.message : String(e)); + await refreshFoundryStatus(); + await refreshFoundryCatalog(); + } finally { + setFoundryBusy(null); + setFoundryCancelRequested(false); + } + }; + + const handleCancelFoundryPrepare = async () => { + if (foundryBusy !== 'prepare') return; + setFoundryCancelRequested(true); + try { + await cancelFoundryLocalAsrPrepare(); + } catch (e) { + setError(e instanceof Error ? e.message : String(e)); + } + }; + + const handleReleaseFoundry = async () => { + setFoundryBusy('release'); + try { + setError(null); + await releaseFoundryLocalAsr(); + await refreshFoundryStatus(); + } catch (e) { + setError(e instanceof Error ? e.message : String(e)); + } finally { + setFoundryBusy(null); + } + }; + const handleDownload = async (modelId: string) => { setBusyModelId(modelId); try { @@ -275,6 +450,31 @@ export function LocalAsr() { }; const engineAvailable = settings?.engineAvailable ?? false; + const foundryAvailable = foundryStatus?.available === true; + const foundryDefault = prefs?.activeAsrProvider === 'foundry-local-whisper'; + const selectedFoundryModel = FOUNDRY_LOCAL_ASR_MODELS.find( + model => model.alias === selectedFoundryAlias, + ) ?? FOUNDRY_LOCAL_ASR_MODELS[0]; + const selectedFoundryCatalog = foundryCatalog.find(model => model.alias === selectedFoundryAlias); + const selectedFoundryDisplayName = selectedFoundryCatalog?.displayName ?? t(selectedFoundryModel.labelKey); + const selectedFoundrySizeMb = formatFoundrySizeMb(selectedFoundryCatalog?.fileSizeMb); + const selectedFoundrySizeLabel = selectedFoundrySizeMb + ? t('localAsr.foundryApproxSizeMb', { mb: selectedFoundrySizeMb }) + : t('localAsr.sizeUnknown'); + const selectedFoundryDownloadLabel = selectedFoundryCatalog?.cached + ? t('localAsr.downloadedBadge') + : t('localAsr.notDownloadedBadge'); + const selectedFoundryLanguageHint = normalizeFoundryLanguageHintForUi( + prefs?.foundryLocalAsrLanguageHint ?? '', + ); + const foundryPrepareLabel = + foundryBusy === 'prepare' + ? foundryCancelRequested + ? t('localAsr.foundryCancelling') + : t('localAsr.foundryPreparing') + : foundryProgress?.phase === 'failed' + ? t('localAsr.foundryRetryPrepare') + : t('localAsr.foundryPrepare'); return (
@@ -291,6 +491,154 @@ export function LocalAsr() {
+ +
+
+
+
+
+ {t('localAsr.foundryTitle')} +
+ {foundryDefault && {t('localAsr.activeBadge')}} + + {foundryStatus?.available + ? t('localAsr.foundryAvailable') + : t('localAsr.foundryUnavailable')} + +
+
+ {t('localAsr.foundryDesc')} +
+
+
+ + +
+
+ +
+
+ {t('localAsr.foundrySelectedModel')}: + {selectedFoundryDisplayName} + · {selectedFoundrySizeLabel} · {selectedFoundryDownloadLabel} + · {t(selectedFoundryModel.descKey)} +
+
+ {t('localAsr.foundryLanguageLabel')}: + {selectedFoundryLanguageHint + ? t(`localAsr.foundryLanguage${selectedFoundryLanguageHint === 'zh' ? 'Zh' : 'En'}`) + : t('localAsr.foundryLanguageAuto')} + · {t('localAsr.foundryLanguageDesc')} +
+
+ {t('localAsr.foundryActiveModel')}: + {foundryStatus?.activeModel ?? 'whisper-small'} +
+
+ {t('localAsr.foundryLoadedModel')}: + {foundryStatus?.loadedModelId ?? t('localAsr.foundryNotLoaded')} +
+ {foundryStatus?.error && ( +
+ {t('localAsr.foundryError')}: + {foundryStatus.error} +
+ )} +
+ + {(foundryBusy === 'prepare' || foundryProgress) && ( + + )} + +
+ void handleEnableFoundry()}> + {foundryBusy === 'enable' ? t('localAsr.foundryEnabling') : t('localAsr.foundrySetDefault')} + + void handlePrepareFoundry()}> + {foundryPrepareLabel} + + {foundryBusy === 'prepare' && ( + void handleCancelFoundryPrepare()}> + {foundryCancelRequested + ? t('localAsr.foundryCancelRequested') + : t('localAsr.foundryCancelPrepare')} + + )} + void handleReleaseFoundry()}> + {foundryBusy === 'release' ? t('localAsr.foundryReleasing') : t('localAsr.releaseNow')} + +
+
+
+ {!engineAvailable && (
@@ -299,6 +647,10 @@ export function LocalAsr() { )} +
+ {t('localAsr.qwenTitle')} +
+
@@ -416,6 +768,93 @@ export function LocalAsr() { ); } +function FoundryPrepareProgressBlock({ + progress, + modelCached, + cancelRequested, +}: { + progress: FoundryPrepareProgress | null; + modelCached: boolean; + cancelRequested: boolean; +}) { + const { t } = useTranslation(); + const stages = [ + { phase: 'runtime', label: t('localAsr.foundryPrepareRuntime') }, + { phase: 'model', label: t('localAsr.foundryPrepareModel') }, + { phase: 'load', label: t('localAsr.foundryPrepareLoad') }, + ] as const; + const currentIndex = progress ? stages.findIndex(stage => stage.phase === progress.phase) : -1; + + return ( +
+ {stages.map((stage, index) => { + const finished = progress?.phase === 'finished' || currentIndex > index; + const skippedCachedModel = + stage.phase === 'model' && + modelCached && + (progress?.phase === 'load' || progress?.phase === 'finished'); + const active = progress?.phase === stage.phase; + const failed = progress?.phase === 'failed'; + const percent = finished || skippedCachedModel + ? 100 + : active + ? Math.max(0, Math.min(100, progress?.percent ?? 0)) + : 0; + const detail = skippedCachedModel + ? t('localAsr.foundryPrepareModelSkipped') + : active + ? progress?.label + : finished + ? t('localAsr.foundryPrepareDone') + : t('localAsr.foundryPrepareWaiting'); + return ( +
+
+ + {stage.label} + + + {failed ? t('localAsr.failed') : `${Math.round(percent)}%`} + +
+
+
+
+
+ {detail} +
+
+ ); + })} + {cancelRequested && ( +
+ {t('localAsr.foundryCancelBestEffort')} +
+ )} + {progress?.phase === 'failed' && progress.error && ( +
+ {progress.error} +
+ )} +
+ ); +} + interface ModelRowProps { model: LocalAsrModelStatus; remoteSize?: RemoteSize; @@ -610,6 +1049,19 @@ function TestResultBlock({ result }: { result: LocalAsrTestResult | { error: str ); } +function isFoundryAlias(value: string): value is FoundryLocalAsrModelAlias { + return FOUNDRY_LOCAL_ASR_MODELS.some(model => model.alias === value); +} + +function normalizeFoundryLanguageHintForUi(value: string): FoundryLocalAsrLanguageHint { + return value === 'zh' || value === 'en' ? value : ''; +} + +function formatFoundrySizeMb(fileSizeMb: number | null | undefined): string | null { + if (typeof fileSizeMb !== 'number' || fileSizeMb <= 0) return null; + return Math.round(fileSizeMb).toLocaleString(); +} + function formatBytes(n: number): string { if (n < 1024) return `${n} B`; if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`; diff --git a/openless-all/app/src/pages/Overview.tsx b/openless-all/app/src/pages/Overview.tsx index 946dfbfa..c81846d1 100644 --- a/openless-all/app/src/pages/Overview.tsx +++ b/openless-all/app/src/pages/Overview.tsx @@ -29,6 +29,7 @@ const ASR_NAME_KEY_BY_ID: Record = { zhipu: 'asrZhipu', groq: 'asrGroq', whisper: 'asrWhisper', + 'foundry-local-whisper': 'asrFoundryLocalWhisper', 'local-qwen3': 'asrLocalQwen3', }; diff --git a/openless-all/app/src/pages/QaPanel.tsx b/openless-all/app/src/pages/QaPanel.tsx index 959b6e87..e90c0812 100644 --- a/openless-all/app/src/pages/QaPanel.tsx +++ b/openless-all/app/src/pages/QaPanel.tsx @@ -14,7 +14,7 @@ import { useEffect, useMemo, useRef, useState, type CSSProperties } from 'react' import { useTranslation } from 'react-i18next'; import { getSettings, isTauri, qaWindowDismiss, qaWindowPin } from '../lib/ipc'; import type { QaChatMessage, QaStatePayload, UserPreferences } from '../lib/types'; -import { getHotkeyTriggerLabel } from '../lib/hotkey'; +import { getHotkeyBindingLabel } from '../lib/hotkey'; import { renderQaMarkdown, renderQaPlainText } from '../lib/qaMarkdown'; const SELECTION_PREVIEW_MAX = 60; @@ -121,7 +121,7 @@ export function QaPanel() { // webview,没有 HotkeySettingsContext;如果用户在主窗口改了录音键, // 浮窗里的 "{recordHotkey}" 文案必须立刻跟上,否则会一直停在旧值。 const prefsHandle = await listen('prefs:changed', event => { - setRecordHotkeyLabel(getHotkeyTriggerLabel(event.payload?.hotkey?.trigger)); + setRecordHotkeyLabel(getHotkeyBindingLabel(event.payload?.hotkey)); }); if (cancelled) { stateHandle(); @@ -172,7 +172,7 @@ export function QaPanel() { void getSettings() .then(prefs => { if (cancelled) return; - setRecordHotkeyLabel(getHotkeyTriggerLabel(prefs.hotkey?.trigger)); + setRecordHotkeyLabel(getHotkeyBindingLabel(prefs.hotkey)); }) .catch(err => { console.warn('[QaPanel] load hotkey label failed', err); diff --git a/openless-all/app/src/pages/SelectionAsk.tsx b/openless-all/app/src/pages/SelectionAsk.tsx index db58ef18..ce1b8c43 100644 --- a/openless-all/app/src/pages/SelectionAsk.tsx +++ b/openless-all/app/src/pages/SelectionAsk.tsx @@ -18,7 +18,6 @@ export function SelectionAsk() { const defaultQaHotkey = defaultQaShortcut(); const defaultHotkeyLabel = formatComboLabel(defaultQaHotkey); const recordHotkeyLabel = prefs ? formatComboLabel(prefs.dictationHotkey) : '快捷键'; - if (!prefs) { return ( <> @@ -139,7 +138,6 @@ export function SelectionAsk() { ); } - // 卡片标题行右侧开关:与 Style 页面顶栏的 36×20 toggle 同款,保持全局视觉一致。 function CardHeaderToggle({ title, diff --git a/openless-all/app/src/pages/Settings.tsx b/openless-all/app/src/pages/Settings.tsx index b667fdb7..14edcf9c 100644 --- a/openless-all/app/src/pages/Settings.tsx +++ b/openless-all/app/src/pages/Settings.tsx @@ -7,9 +7,16 @@ import { useTranslation } from 'react-i18next'; import { Icon } from '../components/Icon'; import { ShortcutRecorder } from '../components/ShortcutRecorder'; import { isDialogStatus, UpdateDialog, useAutoUpdate } from '../components/AutoUpdate'; +import { detectOS } from '../components/WindowChrome'; import { APP_VERSION_LABEL } from '../lib/appVersion'; import { isHotkeyModeMigrationNoticeActive } from '../lib/hotkeyMigration'; -import { defaultQaShortcut } from '../lib/hotkey'; +import { + defaultQaShortcut, + getHotkeyBindingCodes, + getHotkeyBindingLabel, + getHotkeyCodeLabel, +} from '../lib/hotkey'; +import { createHotkeyRecorderState, orderHotkeyCodes, updateHotkeyRecorderState } from '../lib/hotkeyRecorder'; import { checkAccessibilityPermission, checkMicrophonePermission, @@ -37,6 +44,7 @@ import { } from '../lib/ipc'; import type { HotkeyCapability, + HotkeyBinding, HotkeyMode, HotkeyStatus, HotkeyTrigger, @@ -69,7 +77,6 @@ interface SettingsProps { embedded?: boolean; initialSection?: SettingsSectionId; } - // "关于" tab 已移除(内容并入外层 SettingsModal 的 About 页,避免设置内外重复入口)。 export type SettingsSectionId = 'recording' | 'providers' | 'shortcuts' | 'permissions' | 'language'; @@ -433,6 +440,149 @@ function RecordingSection() { ); } +function HotkeyRecorder({ + binding, + onCommit, +}: { + binding: HotkeyBinding; + onCommit: (codes: string[]) => void; +}) { + const { t } = useTranslation(); + const [recording, setRecording] = useState(false); + const [draftCodes, setDraftCodes] = useState([]); + const recorderStateRef = useRef(createHotkeyRecorderState()); + const recordingRef = useRef(false); + + const resetRecording = () => { + recordingRef.current = false; + recorderStateRef.current = createHotkeyRecorderState(); + setDraftCodes([]); + setRecording(false); + }; + + const commitCodes = (codes: string[]) => { + const ordered = orderHotkeyCodes(codes); + resetRecording(); + onCommit(ordered); + }; + + const startRecording = () => { + recordingRef.current = true; + recorderStateRef.current = createHotkeyRecorderState(); + setDraftCodes([]); + setRecording(true); + }; + + useEffect(() => { + if (!recording) return undefined; + + const stopEvent = (event: Event) => { + event.preventDefault(); + event.stopPropagation(); + }; + + const applyHotkeyCode = (code: string, pressed: boolean) => { + if (!recordingRef.current) return; + const next = updateHotkeyRecorderState(recorderStateRef.current, code, pressed); + recorderStateRef.current = next.state; + setDraftCodes(next.state.draftCodes); + if (next.commitCodes) commitCodes(next.commitCodes); + }; + + const onKeyDown = (event: KeyboardEvent) => { + stopEvent(event); + if (event.key === 'Escape' || event.code === 'Escape') { + resetRecording(); + return; + } + const code = normalizeKeyboardHotkeyCode(event); + if (!code) return; + applyHotkeyCode(code, true); + }; + + const onKeyUp = (event: KeyboardEvent) => { + stopEvent(event); + if (!recordingRef.current) return; + if (event.key === 'Escape' || event.code === 'Escape') { + resetRecording(); + return; + } + const code = normalizeKeyboardHotkeyCode(event); + if (!code) return; + applyHotkeyCode(code, false); + }; + + const onMouseDown = (event: MouseEvent) => { + const code = mouseButtonToHotkeyCode(event.button); + if (!code) return; + stopEvent(event); + applyHotkeyCode(code, true); + }; + + const onMouseUp = (event: MouseEvent) => { + const code = mouseButtonToHotkeyCode(event.button); + if (!code) return; + stopEvent(event); + applyHotkeyCode(code, false); + }; + + window.addEventListener('keydown', onKeyDown, true); + window.addEventListener('keyup', onKeyUp, true); + window.addEventListener('mousedown', onMouseDown, true); + window.addEventListener('mouseup', onMouseUp, true); + return () => { + window.removeEventListener('keydown', onKeyDown, true); + window.removeEventListener('keyup', onKeyUp, true); + window.removeEventListener('mousedown', onMouseDown, true); + window.removeEventListener('mouseup', onMouseUp, true); + }; + }, [recording]); + + const label = recording + ? draftCodes.length > 0 + ? draftCodes.map(getHotkeyCodeLabel).join('+') + : t('settings.recording.hotkeyRecording') + : getHotkeyBindingLabel(binding); + const hasKeys = getHotkeyBindingCodes(binding).length > 0; + + return ( +
+ +
+ ); +} + function MicrophonePickerDialog({ devices, selectedName, @@ -731,6 +881,45 @@ function MicrophonePickerDialog({ ); } +function inferLegacyTrigger(codes: string[], fallback: HotkeyTrigger): HotkeyTrigger { + if (codes.includes('ControlRight')) return 'rightControl'; + if (codes.includes('ControlLeft')) return 'leftControl'; + if (codes.includes('AltRight')) return 'rightAlt'; + if (codes.includes('AltLeft')) return 'leftOption'; + if (codes.includes('MetaRight')) return 'rightCommand'; + if (codes.includes('Fn')) return 'fn'; + return fallback; +} + +function normalizeKeyboardHotkeyCode(event: KeyboardEvent): string | null { + if (event.key === 'Fn' || event.code === 'Fn') return 'Fn'; + if (event.key === 'FnLock' || event.code === 'FnLock') return 'FnLock'; + const code = event.code === 'OSLeft' ? 'MetaLeft' : event.code === 'OSRight' ? 'MetaRight' : event.code; + if (SUPPORTED_HOTKEY_CODES.has(code)) return code; + if (/^Key[A-Z]$/.test(code)) return code; + if (/^Digit[0-9]$/.test(code)) return code; + if (/^F([1-9]|1[0-9]|2[0-4])$/.test(code)) return code; + if (/^Numpad[0-9]$/.test(code)) return code; + return null; +} + +function mouseButtonToHotkeyCode(button: number): string | null { + if (button === 3) return 'Mouse4'; + if (button === 4) return 'Mouse5'; + return null; +} + +const SUPPORTED_HOTKEY_CODES = new Set([ + 'ControlLeft', 'ControlRight', 'AltLeft', 'AltRight', 'ShiftLeft', 'ShiftRight', + 'MetaLeft', 'MetaRight', 'CapsLock', 'ScrollLock', 'Pause', 'PrintScreen', + 'Backspace', 'Tab', 'Enter', 'Space', 'Insert', 'Delete', 'Home', 'End', + 'PageUp', 'PageDown', 'ArrowUp', 'ArrowDown', 'ArrowLeft', 'ArrowRight', + 'ContextMenu', 'NumpadAdd', 'NumpadSubtract', 'NumpadMultiply', 'NumpadDivide', + 'NumpadDecimal', 'NumpadEnter', 'Backquote', 'Minus', 'Equal', 'BracketLeft', + 'BracketRight', 'Backslash', 'Semicolon', 'Quote', 'Comma', 'Period', 'Slash', + 'Fn', 'FnLock', +]); + function LevelMeter({ level }: { level: number }) { const amplified = Math.min(1, Math.max(0, level * 4.5)); const bars = [0.25, 0.5, 0.75, 1, 0.75, 0.5]; @@ -866,6 +1055,7 @@ const ASR_PRESETS = [ { id: 'zhipu', nameKey: 'asrZhipu', baseUrl: 'https://open.bigmodel.cn/api/paas/v4', model: 'glm-asr-2512' }, { id: 'groq', nameKey: 'asrGroq', baseUrl: 'https://api.groq.com/openai/v1', model: 'whisper-large-v3-turbo' }, { id: 'whisper', nameKey: 'asrWhisper', baseUrl: 'https://api.openai.com/v1', model: 'whisper-1' }, + { id: 'foundry-local-whisper', nameKey: 'asrFoundryLocalWhisper', baseUrl: '', model: '' }, // 本地 Qwen3-ASR:无 baseUrl/model 配置,模型在「模型设置」页下载与切换。 { id: 'local-qwen3', nameKey: 'asrLocalQwen3', baseUrl: '', model: '' }, ] as const; @@ -890,6 +1080,10 @@ function ProvidersSection() { const asrSwitchSeqRef = useRef(0); const [llmModelRevision, setLlmModelRevision] = useState(0); const [asrModelRevision, setAsrModelRevision] = useState(0); + const os = detectOS(); + const visibleAsrPresets = ASR_PRESETS.filter( + p => p.id !== 'foundry-local-whisper' || os === 'win', + ); useEffect(() => { if (!prefs) return; @@ -897,11 +1091,11 @@ function ProvidersSection() { const llmId = knownLlm ? knownLlm.id : 'custom'; setLlmProvider(llmId); setCommittedLlmProvider(llmId); - const knownAsr = ASR_PRESETS.find(x => x.id === prefs.activeAsrProvider); + const knownAsr = visibleAsrPresets.find(x => x.id === prefs.activeAsrProvider); const asrId = knownAsr ? knownAsr.id : 'volcengine'; setAsrProvider(asrId); setCommittedAsrProvider(asrId); - }, [prefs]); + }, [prefs, os]); // issue #219 / #220 P2: // 1. 立刻 setLlmProvider —— 受控 立刻切到新厂商,但凭据字段还在显示旧 entry,placeholder // 会先于实际数据切换、视觉上对不上。 const preset = LLM_PRESETS.find(p => p.id === committedLlmProvider) ?? LLM_PRESETS[LLM_PRESETS.length - 1]; - const asrPreset = ASR_PRESETS.find(p => p.id === committedAsrProvider); + const asrPreset = visibleAsrPresets.find(p => p.id === committedAsrProvider); return ( <> @@ -1012,7 +1206,7 @@ function ProvidersSection() { onChange={e => onAsrProviderChange(e.target.value as AsrPresetId)} style={{ ...inputStyle, maxWidth: 200 }} > - {ASR_PRESETS.map(p => ( + {visibleAsrPresets.map(p => ( ))} @@ -1043,8 +1237,8 @@ function ProvidersSection() { {t('settings.providers.volcengineMappingNote')}
- ) : committedAsrProvider === 'local-qwen3' ? ( - + ) : committedAsrProvider === 'local-qwen3' || committedAsrProvider === 'foundry-local-whisper' ? ( + ) : ( <> @@ -1366,6 +1560,79 @@ const miniBtnStyle: CSSProperties = { transition: 'background 0.16s var(--ol-motion-quick), border-color 0.16s var(--ol-motion-quick), color 0.16s var(--ol-motion-quick)', }; +const recordingHotkeyControlWidth = 178; + +const hotkeyRecorderButtonStyle: CSSProperties = { + width: recordingHotkeyControlWidth, + height: 32, + padding: '0 8px 0 11px', + border: '0.5px solid var(--ol-line-strong)', + borderRadius: 8, + background: 'var(--ol-surface-2)', + display: 'inline-flex', + alignItems: 'center', + justifyContent: 'space-between', + gap: 8, + fontFamily: 'var(--ol-font-mono)', + fontSize: 12.5, + cursor: 'default', + transition: 'background 0.16s var(--ol-motion-quick), border-color 0.16s var(--ol-motion-quick), color 0.16s var(--ol-motion-quick)', +}; + +const recordingHotkeySegmentedStyle: CSSProperties = { + width: recordingHotkeyControlWidth, + display: 'inline-flex', + padding: 2, + borderRadius: 8, + background: 'rgba(0,0,0,0.05)', +}; + +const recordingHotkeyGroupStyle: CSSProperties = { + display: 'grid', + gridTemplateColumns: 'auto', + rowGap: 10, + justifyItems: 'start', +}; + +const recordingHotkeyLineStyle: CSSProperties = { + display: 'grid', + gridTemplateColumns: '64px auto', + alignItems: 'center', + columnGap: 10, +}; + +const recordingHotkeyFieldLabelStyle: CSSProperties = { + fontSize: 12, + color: 'var(--ol-ink-4)', + textAlign: 'right', + whiteSpace: 'nowrap', +}; + +const recordingHotkeyStatusStyle: CSSProperties = { + marginLeft: 74, + fontSize: 12, + lineHeight: 1.3, +}; + +const hotkeyRecorderLabelStyle: CSSProperties = { + minWidth: 0, + overflow: 'hidden', + textOverflow: 'ellipsis', + whiteSpace: 'nowrap', +}; + +const hotkeyClearButtonStyle: CSSProperties = { + width: 18, + height: 18, + borderRadius: 999, + display: 'inline-flex', + alignItems: 'center', + justifyContent: 'center', + flexShrink: 0, + background: 'rgba(0,0,0,0.2)', + color: '#fff', +}; + const iconBtnStyle: CSSProperties = { width: 32, height: 32, border: '0.5px solid var(--ol-line-strong)', @@ -1683,7 +1950,7 @@ export function AboutUpdateControl({ tagline }: { tagline: string }) { return ( <>
- {tagline} · {APP_VERSION_LABEL} + {tagline} 路 {APP_VERSION_LABEL} {u.checking ? t('settings.about.checkingUpdate') : t('settings.about.checkUpdateBtn')} @@ -1742,45 +2009,100 @@ function adapterDisplayName(adapter: HotkeyCapability['adapter'] | HotkeyStatus[ /// 本地 Qwen3-ASR 在 Settings → 服务商区里**不**让用户填空——展示当前激活模型 /// 是否已下载、列出所有已下载模型 + 删除按钮,并提示性能/质量预期,引导跳到 /// 「模型设置」页做下载。 -function LocalAsrProviderHint() { +function LocalAsrProviderHint({ + provider, + selectedProvider, +}: { + provider: 'local-qwen3' | 'foundry-local-whisper'; + selectedProvider: AsrPresetId; +}) { const { t } = useTranslation(); const [settings, setSettings] = useState(null); const [models, setModels] = useState([]); const [loading, setLoading] = useState(true); const [deletingId, setDeletingId] = useState(null); + const refreshSeqRef = useRef(0); + const providerStateRef = useRef({ provider, selectedProvider }); + providerStateRef.current = { provider, selectedProvider }; - const refresh = async () => { + const qwenReadyForFetch = () => { + const state = providerStateRef.current; + return state.provider === 'local-qwen3' && state.selectedProvider === 'local-qwen3'; + }; + + const refresh = async (seq: number) => { try { const [s, list] = await Promise.all([getLocalAsrSettings(), listLocalAsrModels()]); + if (seq !== refreshSeqRef.current) { + return; + } setSettings(s); setModels(list); } catch (err) { + if (seq !== refreshSeqRef.current) { + return; + } console.warn('[settings] load local asr status failed', err); } finally { + if (seq === refreshSeqRef.current) { + setLoading(false); + } + } + }; + + const beginRefresh = () => { + const seq = ++refreshSeqRef.current; + setSettings(null); + setModels([]); + setDeletingId(null); + if (provider !== selectedProvider) { + setLoading(true); + return; + } + if (provider === 'foundry-local-whisper') { setLoading(false); + return; } + setLoading(true); + void refresh(seq); }; useEffect(() => { - void refresh(); - }, []); + beginRefresh(); + return () => { + refreshSeqRef.current += 1; + }; + }, [provider, selectedProvider]); const goToLocalAsr = () => { window.dispatchEvent(new CustomEvent(NAVIGATE_LOCAL_ASR_EVENT)); }; const handleDelete = async (modelId: string) => { + const seq = refreshSeqRef.current; + if (!qwenReadyForFetch()) { + return; + } setDeletingId(modelId); try { await deleteLocalAsrModel(modelId); - await refresh(); + if (seq !== refreshSeqRef.current || !qwenReadyForFetch()) { + return; + } + beginRefresh(); } catch (err) { console.warn('[settings] delete local model failed', err); } finally { - setDeletingId(null); + if (seq === refreshSeqRef.current && provider === 'local-qwen3') { + setDeletingId(null); + } } }; + const hintKey = provider === 'foundry-local-whisper' + ? 'settings.providers.foundryLocalAsrHint' + : 'settings.providers.localAsrHint'; + if (loading) { return (
@@ -1793,6 +2115,21 @@ function LocalAsrProviderHint() { const isReady = active?.isDownloaded ?? false; const downloaded = models.filter(m => m.isDownloaded); + if (provider === 'foundry-local-whisper') { + return ( +
+
+ {t(hintKey)} +
+
+ + {t('settings.providers.localAsrManage')} + +
+
+ ); + } + return (
{/* 性能/质量预期警告 —— 用户硬要求要写清楚 */} @@ -1809,7 +2146,7 @@ function LocalAsrProviderHint() {
- {t('settings.providers.localAsrHint')} + {t(hintKey)}
{/* 当前激活模型状态 + 跳转按钮 */}