From ec0d1cc3d8f3ae930ffde109bd8131d4fe3bd91e Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 31 May 2026 06:25:13 -0700 Subject: [PATCH] fix: harden unstuck voice input and use glm air --- .../live-demo/deploy/docker-compose.vps.yml | 2 +- EF-COACH/live-demo/lib/llm-client.mjs | 6 +- EF-COACH/live-demo/public/app.js | 79 +++++++++++++------ .../scripts/build-hostinger-compose.mjs | 6 +- EF-COACH/live-demo/test/chat-ui.test.mjs | 9 +++ EF-COACH/live-demo/test/compose.test.mjs | 6 +- EF-COACH/live-demo/test/llm-client.test.mjs | 8 +- 7 files changed, 80 insertions(+), 36 deletions(-) diff --git a/EF-COACH/live-demo/deploy/docker-compose.vps.yml b/EF-COACH/live-demo/deploy/docker-compose.vps.yml index e3c0fb9..7c842ee 100644 --- a/EF-COACH/live-demo/deploy/docker-compose.vps.yml +++ b/EF-COACH/live-demo/deploy/docker-compose.vps.yml @@ -8,7 +8,7 @@ services: PORT: "3000" LLM_PROVIDER: zai-coding-plan ZAI_API_KEY: ${ZAI_API_KEY:?ZAI_API_KEY required} - OPENAI_MODEL: glm-5.1 + OPENAI_MODEL: glm-4.5-air COACH_RATE_LIMIT_MAX: "12" COACH_RATE_LIMIT_WINDOW_MS: "600000" COACH_MAX_CONCURRENT_MODEL_CALLS: "2" diff --git a/EF-COACH/live-demo/lib/llm-client.mjs b/EF-COACH/live-demo/lib/llm-client.mjs index e57b713..e7936c9 100644 --- a/EF-COACH/live-demo/lib/llm-client.mjs +++ b/EF-COACH/live-demo/lib/llm-client.mjs @@ -1,7 +1,7 @@ const DEFAULT_RESPONSES_MODEL = "gpt-5.5"; const DEFAULT_COMPATIBLE_MODEL = "Qwen3.5-0.8B-Q4_K_M"; const ZAI_CODING_PLAN_BASE_URL = "https://api.z.ai/api/coding/paas/v4"; -const ZAI_GLM_51_MODEL = "glm-5.1"; +const ZAI_GLM_AIR_MODEL = "glm-4.5-air"; const DEFAULT_LLM_TIMEOUT_MS = 25_000; const GLM_THINKING_MAX_TOKENS = 1200; const HOME_OPENAI_PROVIDERS = new Set(["home-openai", "nucbox-openai"]); @@ -26,13 +26,13 @@ export function resolveLlmConfig(env = process.env) { if (provider === "zai-coding-plan") { const apiKey = env.ZAI_API_KEY || env.OPENAI_API_KEY || env.LLM_API_KEY; - const model = env.OPENAI_MODEL || env.LLM_MODEL || ZAI_GLM_51_MODEL; + const model = env.OPENAI_MODEL || env.LLM_MODEL || ZAI_GLM_AIR_MODEL; if (!apiKey) { throw new Error("ZAI_API_KEY is required for Z.AI GLM Coding Plan mode."); } return { provider, - providerLabel: "Z.AI GLM-5.1 (medium reasoning)", + providerLabel: "Z.AI GLM-4.5-Air", model, apiKey, chatCompletionsUrl: `${ZAI_CODING_PLAN_BASE_URL}/chat/completions`, diff --git a/EF-COACH/live-demo/public/app.js b/EF-COACH/live-demo/public/app.js index bf6e4f3..dea57cf 100644 --- a/EF-COACH/live-demo/public/app.js +++ b/EF-COACH/live-demo/public/app.js @@ -22,6 +22,8 @@ const thread = []; const heldItems = []; let energyLevel = null; let currentRecognition = null; +let voiceBaseDraft = ""; +let lastVoiceTranscript = ""; let loadingTimers = []; let lastTrackedDraftBucket = null; @@ -448,20 +450,46 @@ function getSpeechRecognition() { function getVoiceErrorMessage(error) { if (error === "not-allowed" || error === "service-not-allowed") { - return "Microphone permission was blocked. You can still type or use a chip."; + return "Microphone permission was blocked. I focused the box for keyboard dictation or fragments."; } if (error === "no-speech") { - return "No speech caught. Try Mic again and say the messy version."; + return "No speech caught. Try Mic again, or use keyboard dictation in the box."; } if (error === "audio-capture") { - return "No microphone was found. The chips still work."; + return "No microphone was found. I focused the box for keyboard dictation or fragments."; } - return "Voice typing did not start. The chips still work."; + return "Voice typing did not start. I focused the box for keyboard dictation or fragments."; +} + +function focusManualDictationFallback(status, starterText = "") { + if (starterText && !message.value.trim()) { + insertText(starterText); + } else { + message.focus(); + message.setSelectionRange?.(message.value.length, message.value.length); + } + setVoiceState(status, false); +} + +function extractRecognitionTranscript(event) { + return Array.from(event.results || []) + .map((result) => result[0]?.transcript || "") + .join(" ") + .replace(/\s+/g, " ") + .trim(); +} + +function applyVoiceTranscript(transcript) { + lastVoiceTranscript = transcript; + message.value = [voiceBaseDraft, transcript].filter(Boolean).join(" ").trim(); + resizeComposer(); + updateDraftStatus(); + saveDraft(); } function startVoiceInput() { if (currentRecognition) { - stopVoiceInput(); + stopVoiceInput("Stopped listening. Captured text stays in the box."); return; } @@ -469,33 +497,39 @@ function startVoiceInput() { if (!SpeechRecognition) { trackChat("voice failed", { reason: "unsupported" }); - setVoiceState("Voice typing is not available here. I put a no-typing prompt in the box."); - insertText("I'm too overloaded to type. Help me start."); + focusManualDictationFallback( + "Voice typing is not available in this browser. I focused the box for keyboard dictation.", + "I'm too overloaded to type. Help me start.", + ); return; } - const recognition = new SpeechRecognition(); + let recognition; + try { + recognition = new SpeechRecognition(); + } catch { + trackChat("voice failed", { reason: "constructor" }); + focusManualDictationFallback("Voice typing did not initialize. I focused the box for keyboard dictation."); + return; + } let endedWithError = false; - recognition.lang = "en-US"; + voiceBaseDraft = message.value.trim(); + lastVoiceTranscript = ""; + recognition.lang = navigator.language || "en-US"; recognition.interimResults = true; - recognition.continuous = false; + recognition.continuous = true; + recognition.maxAlternatives = 1; currentRecognition = recognition; recognition.addEventListener("start", () => { trackChat("voice started"); - setVoiceState("Listening. Say the messy version.", true); + setVoiceState("Listening. Say the messy version. Tap Mic again to stop.", true); }); recognition.addEventListener("result", (event) => { - const transcript = Array.from(event.results) - .map((result) => result[0]?.transcript || "") - .join(" ") - .trim(); + const transcript = extractRecognitionTranscript(event); if (transcript) { - message.value = transcript; - resizeComposer(); - updateDraftStatus(); - saveDraft(); + applyVoiceTranscript(transcript); trackChat("voice transcript received", { input_length_bucket: getLengthBucket(transcript) }); voiceStatus.textContent = "Captured speech. Send when ready, or keep talking."; } @@ -510,7 +544,7 @@ function startVoiceInput() { setVoiceState( endedWithError ? voiceStatus.textContent - : message.value.trim() + : lastVoiceTranscript || message.value.trim() ? "Captured. Press Enter or Send." : "No speech captured. Try Mic again or use a chip.", false, @@ -521,14 +555,15 @@ function startVoiceInput() { recognition.addEventListener("error", (event) => { endedWithError = true; trackChat("voice failed", { reason: event.error || "unknown" }); - setVoiceState(getVoiceErrorMessage(event.error), false); + focusManualDictationFallback(getVoiceErrorMessage(event.error)); }); try { recognition.start(); } catch { currentRecognition = null; - setVoiceState("Voice typing did not start. Try a chip or type fragments.", false); + trackChat("voice failed", { reason: "start" }); + focusManualDictationFallback("Voice typing did not start. I focused the box for keyboard dictation."); } } diff --git a/EF-COACH/live-demo/scripts/build-hostinger-compose.mjs b/EF-COACH/live-demo/scripts/build-hostinger-compose.mjs index c613e8d..ca25dbb 100644 --- a/EF-COACH/live-demo/scripts/build-hostinger-compose.mjs +++ b/EF-COACH/live-demo/scripts/build-hostinger-compose.mjs @@ -8,7 +8,7 @@ const pathPrefix = process.env.UNSTUCK_LIVE_PATH_PREFIX || "/unstuck"; const liveProvider = process.env.UNSTUCK_LIVE_PROVIDER || "vps-local"; const isZaiCodingPlan = liveProvider === "zai-coding-plan"; const isHomeInference = liveProvider === "home-openai" || liveProvider === "nucbox-openai"; -const model = process.env.UNSTUCK_LIVE_MODEL || (isZaiCodingPlan ? "glm-5.1" : "Qwen3.5-0.8B-Q4_K_M"); +const model = process.env.UNSTUCK_LIVE_MODEL || (isZaiCodingPlan ? "glm-4.5-air" : "Qwen3.5-0.8B-Q4_K_M"); const baseUrl = isZaiCodingPlan ? "https://api.z.ai/api/coding/paas/v4" : process.env.UNSTUCK_LIVE_BASE_URL || "http://llama-local:8085/v1"; @@ -18,7 +18,7 @@ const contextBase = process.env.UNSTUCK_CONTEXT_BASE || "https://unstuck.kyanite const outputPath = process.argv[2]; const routeRule = `Host(\`${host}\`) || (Host(\`${fallbackHost}\`) && PathPrefix(\`${pathPrefix}\`))`; -const liveHtml = String.raw`Unstuck

Unstuck

Unstuck

Mic

`; +const liveHtml = String.raw`U

Unstuck

Unstuck

Mic

`; const pageBr = brotliCompressSync(Buffer.from(liveHtml), { params: { [constants.BROTLI_PARAM_QUALITY]: constants.BROTLI_MAX_QUALITY }, }).toString("base64"); @@ -37,7 +37,7 @@ const H={};let A=0;function ck(q){let o=q.headers.origin,k=(q.headers["x-forward C(async(q,r)=>{try{const u=new URL(q.url,"http://x");if(q.method==="GET"&&u.pathname==="/api/config")return S(r,200,{model:process.env.OPENAI_MODEL});if(q.method==="GET"&&u.pathname==="/"){mk("view");r.writeHead(200,{"content-type":"text/html"});return r.end(page)}if(q.method==="POST"&&u.pathname==="/api/coach"){ck(q);if(!/json/i.test(q.headers["content-type"]||""))throw Error("415 json");const b=await bd(q),msg=cl(b.message),ctx=cl(b.context,800);if(!msg)return S(r,400,{error:"message required"});mk("chat");const p={model:process.env.OPENAI_MODEL,max_tokens:1200${thinking},messages:[{role:"system",content:await cc()},...hi(b.history),{role:"user",content:ctx?msg+"\n\nContext:\n"+ctx:msg}]};let rr;A++;try{rr=await fetch(process.env.OPENAI_BASE_URL.replace(/\/$/,"")+"/chat/completions",{method:"POST",headers:{authorization:"Bearer "+process.env.OPENAI_API_KEY,"content-type":"application/json"},signal:AbortSignal.timeout(25e3),body:JSON.stringify(p)})}finally{A--}if(!rr.ok)throw Error("llm");const j=await rr.json(),text=j.choices?.[0]?.message?.content?.trim();if(!text)throw Error("no text");mk("reply");return S(r,200,{reply:sh(text)})}const api=u.pathname.startsWith("/api/");S(r,api?405:404,{error:api?"bad method":"not found"})}catch(e){mk("error");const m=/^(\d+) (.+)/.exec(e.message);S(r,m?+m[1]:502,{error:m?m[2]:"demo failed"})}}).listen(process.env.PORT||3000,"0.0.0.0");`; const compose = `services: - unstuck-coach-live: + u: image: node:22-alpine working_dir: /app command: diff --git a/EF-COACH/live-demo/test/chat-ui.test.mjs b/EF-COACH/live-demo/test/chat-ui.test.mjs index 3e57c1a..c2a903e 100644 --- a/EF-COACH/live-demo/test/chat-ui.test.mjs +++ b/EF-COACH/live-demo/test/chat-ui.test.mjs @@ -73,8 +73,17 @@ test("public demo includes the support panel low-friction sidecar", () => { assert.match(script, /function startVoiceInput/); assert.match(script, /function stopVoiceInput/); assert.match(script, /function getSpeechRecognition/); + assert.match(script, /function focusManualDictationFallback/); + assert.match(script, /function extractRecognitionTranscript/); + assert.match(script, /function applyVoiceTranscript/); + assert.match(script, /let voiceBaseDraft/); assert.match(script, /interimResults = true/); + assert.match(script, /continuous = true/); + assert.match(script, /maxAlternatives = 1/); + assert.match(script, /navigator\.language \|\| "en-US"/); assert.match(script, /Microphone permission was blocked/); + assert.match(script, /keyboard dictation/); + assert.match(script, /message\.setSelectionRange/); assert.match(script, /function inferState/); assert.match(script, /const state = inferState\(lastUser\)/); assert.doesNotMatch(script, /inferState\(`\\$\\{lastUser\\} \\$\\{lastAssistant\\}`\)/); diff --git a/EF-COACH/live-demo/test/compose.test.mjs b/EF-COACH/live-demo/test/compose.test.mjs index 77839cd..d5f838b 100644 --- a/EF-COACH/live-demo/test/compose.test.mjs +++ b/EF-COACH/live-demo/test/compose.test.mjs @@ -75,7 +75,7 @@ async function startGeneratedServer(t) { CONTEXT_BASE_URL: "http://127.0.0.1:9", OPENAI_BASE_URL: "http://127.0.0.1:9/v1", OPENAI_API_KEY: "test-key", - OPENAI_MODEL: "glm-5.1", + OPENAI_MODEL: "glm-4.5-air", }, stdio: ["ignore", "ignore", "pipe"], }); @@ -246,7 +246,7 @@ test("generated VPS-local server forces plain coach output instead of protocol l assert.match(result.stdout, /AbortSignal\.timeout/); }); -test("generated Hostinger compose targets GLM-5.1 with medium reasoning", () => { +test("generated Hostinger compose targets GLM-4.5-Air with thinking mode", () => { const result = spawnSync("node", ["live-demo/scripts/build-hostinger-compose.mjs"], { cwd: new URL("../..", import.meta.url), encoding: "utf8", @@ -260,7 +260,7 @@ test("generated Hostinger compose targets GLM-5.1 with medium reasoning", () => assert.ok(result.stdout.length < 8192, `compose was ${result.stdout.length} bytes`); assert.match(result.stdout, /OPENAI_BASE_URL: https:\/\/api\.z\.ai\/api\/coding\/paas\/v4/); - assert.match(result.stdout, /OPENAI_MODEL: glm-5\.1/); + assert.match(result.stdout, /OPENAI_MODEL: glm-4\.5-air/); assert.match(result.stdout, /thinking:\{type:"enabled"\}/); assert.doesNotMatch(result.stdout, /test-zai-secret/); }); diff --git a/EF-COACH/live-demo/test/llm-client.test.mjs b/EF-COACH/live-demo/test/llm-client.test.mjs index 22d6e17..fa17f8c 100644 --- a/EF-COACH/live-demo/test/llm-client.test.mjs +++ b/EF-COACH/live-demo/test/llm-client.test.mjs @@ -22,15 +22,15 @@ test("resolveLlmConfig supports OpenAI-compatible VPS-local endpoints", () => { assert.equal(config.chatCompletionsUrl, "http://host.docker.internal:8085/v1/chat/completions"); }); -test("resolveLlmConfig supports GLM-5.1 with medium reasoning", () => { +test("resolveLlmConfig supports GLM-4.5-Air with thinking mode", () => { const config = resolveLlmConfig({ LLM_PROVIDER: "zai-coding-plan", ZAI_API_KEY: "zai-secret", }); assert.equal(config.provider, "zai-coding-plan"); - assert.equal(config.providerLabel, "Z.AI GLM-5.1 (medium reasoning)"); - assert.equal(config.model, "glm-5.1"); + assert.equal(config.providerLabel, "Z.AI GLM-4.5-Air"); + assert.equal(config.model, "glm-4.5-air"); assert.equal(config.chatCompletionsUrl, "https://api.z.ai/api/coding/paas/v4/chat/completions"); assert.equal(config.chatMaxTokens, 1200); assert.deepEqual(config.chatExtraBody, { @@ -83,7 +83,7 @@ test("request builders put the coach contract in server-side instructions", () = const glm = buildChatCompletionsRequest({ instructions, messages, - model: "glm-5.1", + model: "glm-4.5-air", maxTokens: 1200, extraBody: { thinking: { type: "enabled" } }, });