diff --git a/frontend/src/components/message/PromptInput.tsx b/frontend/src/components/message/PromptInput.tsx
index b5f63a18..624d3c33 100644
--- a/frontend/src/components/message/PromptInput.tsx
+++ b/frontend/src/components/message/PromptInput.tsx
@@ -1241,7 +1241,7 @@ return (
- {isMobile && showScrollButton ? (
+ {isMobile && showScrollButton && !showVoiceFeedback ? (
+ )
+}
+
export function VoiceStatusOverlay({ show, label, state }: VoiceStatusOverlayProps) {
if (!show || !label || !state) {
return null
}
const isLoading = state === 'starting' || state === 'processing' || state === 'sending'
- const showLoadingText = state !== 'starting'
- const topLabel = state === 'readyToSend'
- ? 'Release'
- : state === 'starting'
- ? 'Starting'
- : state === 'processing'
- ? 'Transcribe'
- : state === 'sending'
- ? 'Sending'
- : 'Swipe'
- const bottomLabel = state === 'starting'
- ? 'Mic'
- : state === 'processing'
- ? 'Speech'
- : state === 'sending'
- ? 'Prompt'
- : state === 'readyToSend'
- ? 'Send'
- : 'Send'
const actionWords = state === 'readyToSend'
? ['Release', 'To', 'Send']
: ['Swipe', 'To', 'Send']
@@ -47,12 +52,11 @@ export function VoiceStatusOverlay({ show, label, state }: VoiceStatusOverlayPro
{isLoading ? (
- <>
+ state === 'processing' ? (
+
+ ) : (
- {showLoadingText && (
-
{topLabel}
- )}
- >
+ )
) : (
<>
@@ -64,11 +68,9 @@ export function VoiceStatusOverlay({ show, label, state }: VoiceStatusOverlayPro
>
)}
- {isLoading && showLoadingText ? (
- {bottomLabel}
- ) : !isLoading ? (
+ {!isLoading && (
- ) : null}
+ )}
)
diff --git a/frontend/src/hooks/useSTT.ts b/frontend/src/hooks/useSTT.ts
index f6ddac00..f4dc5e5c 100644
--- a/frontend/src/hooks/useSTT.ts
+++ b/frontend/src/hooks/useSTT.ts
@@ -5,6 +5,8 @@ import { AudioRecorder } from '@/lib/audioRecorder'
import { sttApi } from '@/api/stt'
import { DEFAULT_STT_CONFIG } from '@/api/types/settings'
+const STT_START_TIMEOUT_MS = 10_000
+
export function useSTT(userId = 'default') {
const { preferences } = useSettings(userId)
const [isRecording, setIsRecording] = useState(false)
@@ -21,6 +23,8 @@ export function useSTT(userId = 'default') {
const userIdRef = useRef(userId)
const errorTimeoutRef = useRef | null>(null)
const lastProcessedBlobRef = useRef(null)
+ const startupTimeoutRef = useRef | null>(null)
+ const startOpIdRef = useRef(0)
useEffect(() => {
userIdRef.current = userId
@@ -183,6 +187,26 @@ export function useSTT(userId = 'default') {
}
}, [isEnabled, isExternalProvider, setupAudioRecorder])
+ const clearStartupTimeout = useCallback(() => {
+ if (startupTimeoutRef.current) {
+ clearTimeout(startupTimeoutRef.current)
+ startupTimeoutRef.current = null
+ }
+ }, [])
+
+ const abortAndResetOnTimeout = useCallback(() => {
+ if (isExternalProvider && audioRecorder.current) {
+ audioRecorder.current.abort()
+ } else {
+ recognizer.current.abort()
+ }
+ setIsRecording(false)
+ setIsProcessing(false)
+ setState('idle')
+ setIsError(true)
+ setError('Microphone start timed out')
+ }, [isExternalProvider])
+
const startRecording = useCallback(async (): Promise => {
if (!isSupported) {
setError('Speech recognition is not supported in this browser')
@@ -202,6 +226,9 @@ export function useSTT(userId = 'default') {
setError(null)
lastProcessedBlobRef.current = null
+ const startOpId = ++startOpIdRef.current
+ clearStartupTimeout()
+
if (isExternalProvider) {
if (!audioRecorder.current) {
audioRecorder.current = new AudioRecorder()
@@ -210,11 +237,30 @@ export function useSTT(userId = 'default') {
try {
setIsProcessing(true)
- await audioRecorder.current.start()
+
+ const startupPromise = audioRecorder.current.start()
+ const timeoutPromise = new Promise((_, reject) => {
+ startupTimeoutRef.current = setTimeout(() => {
+ if (startOpIdRef.current !== startOpId) return
+ reject(new Error('Microphone start timed out'))
+ }, STT_START_TIMEOUT_MS)
+ })
+
+ await Promise.race([startupPromise, timeoutPromise])
+ clearStartupTimeout()
+
+ if (startOpIdRef.current !== startOpId) return false
+
setIsProcessing(false)
return true
} catch (err) {
+ clearStartupTimeout()
+ if (startOpIdRef.current !== startOpId) return false
setIsProcessing(false)
+ if (err instanceof Error && err.message === 'Microphone start timed out') {
+ abortAndResetOnTimeout()
+ return false
+ }
setIsError(true)
setError(err instanceof Error ? err.message : 'Failed to start recording')
return false
@@ -228,16 +274,35 @@ export function useSTT(userId = 'default') {
try {
setIsProcessing(true)
- await recognizer.current.start(options)
+
+ const startupPromise = recognizer.current.start(options)
+ const timeoutPromise = new Promise((_, reject) => {
+ startupTimeoutRef.current = setTimeout(() => {
+ if (startOpIdRef.current !== startOpId) return
+ reject(new Error('Microphone start timed out'))
+ }, STT_START_TIMEOUT_MS)
+ })
+
+ await Promise.race([startupPromise, timeoutPromise])
+ clearStartupTimeout()
+
+ if (startOpIdRef.current !== startOpId) return false
+
return true
} catch (err) {
+ clearStartupTimeout()
+ if (startOpIdRef.current !== startOpId) return false
setIsProcessing(false)
+ if (err instanceof Error && err.message === 'Microphone start timed out') {
+ abortAndResetOnTimeout()
+ return false
+ }
setIsError(true)
setError(err instanceof Error ? err.message : 'Failed to start recording')
return false
}
}
- }, [isSupported, isEnabled, isExternalProvider, config.language, setupAudioRecorder])
+ }, [isSupported, isEnabled, isExternalProvider, config.language, setupAudioRecorder, clearStartupTimeout, abortAndResetOnTimeout])
const stopRecording = useCallback(() => {
if (isExternalProvider && audioRecorder.current) {
@@ -286,8 +351,9 @@ export function useSTT(userId = 'default') {
useEffect(() => {
return () => {
if (errorTimeoutRef.current) clearTimeout(errorTimeoutRef.current)
+ clearStartupTimeout()
}
- }, [])
+ }, [clearStartupTimeout])
return {
isRecording,