|
1062 | 1062 | }); |
1063 | 1063 | }); |
1064 | 1064 |
|
1065 | | - // STT record button — start recording via mic |
| 1065 | + // STT record button — start recording via mic (card mode) |
1066 | 1066 | container.querySelectorAll('.ai-stt-record').forEach(function (btn) { |
1067 | 1067 | btn.addEventListener('click', function (e) { |
1068 | 1068 | e.preventDefault(); |
|
1079 | 1079 | return; |
1080 | 1080 | } |
1081 | 1081 |
|
1082 | | - // Start the STT engine |
1083 | | - M.speechToText.start(); |
1084 | | - |
1085 | 1082 | // Switch button state |
1086 | 1083 | this.style.display = 'none'; |
1087 | 1084 | var stopBtn = card.querySelector('.ai-stt-stop'); |
|
1092 | 1089 | var resultDiv = card.querySelector('.ai-stt-result'); |
1093 | 1090 | var resultText = card.querySelector('.ai-stt-result-text'); |
1094 | 1091 | if (resultDiv) resultDiv.style.display = ''; |
1095 | | - if (resultText) resultText.textContent = '🎤 Listening… speak now'; |
| 1092 | + if (resultText) { |
| 1093 | + resultText.innerHTML = '<span class="stt-interim">Listening… speak now</span>'; |
| 1094 | + } |
| 1095 | + |
| 1096 | + // Accumulated transcription for this recording session |
| 1097 | + var accumulated = ''; |
| 1098 | + var lastChunkNorm = ''; // normalized last chunk for dedup |
| 1099 | + |
| 1100 | + // Start in card mode — text routes to the card, not the editor |
| 1101 | + M.speechToText.startForCard( |
| 1102 | + // onText — final transcription chunk (deduped across engines) |
| 1103 | + function (text) { |
| 1104 | + if (!text || !text.trim()) return; |
| 1105 | + var chunk = text.trim(); |
| 1106 | + |
| 1107 | + // Dedup: only skip if one text contains the other (same speech from 2nd engine) |
| 1108 | + var normalizedChunk = chunk.toLowerCase().replace(/[^\w\s]/g, '').trim(); |
| 1109 | + if (lastChunkNorm && normalizedChunk) { |
| 1110 | + if (lastChunkNorm.includes(normalizedChunk) || normalizedChunk.includes(lastChunkNorm)) { |
| 1111 | + console.log('🎤 STT card: skipping duplicate chunk', JSON.stringify(chunk)); |
| 1112 | + return; |
| 1113 | + } |
| 1114 | + } |
| 1115 | + |
| 1116 | + lastChunkNorm = normalizedChunk; |
| 1117 | + accumulated += (accumulated ? ' ' : '') + chunk; |
| 1118 | + if (resultText) { |
| 1119 | + resultText.textContent = accumulated; |
| 1120 | + } |
| 1121 | + }, |
| 1122 | + // onInterim — live interim/partial text |
| 1123 | + function (interim) { |
| 1124 | + if (!resultText) return; |
| 1125 | + if (!interim) { |
| 1126 | + // Interim cleared — show accumulated or listening status |
| 1127 | + resultText.innerHTML = accumulated |
| 1128 | + ? accumulated |
| 1129 | + : '<span class="stt-interim">Listening… speak now</span>'; |
| 1130 | + } else { |
| 1131 | + // Show accumulated + current interim preview |
| 1132 | + resultText.innerHTML = accumulated |
| 1133 | + ? accumulated + ' <span class="stt-interim">' + escapeHtml(interim) + '</span>' |
| 1134 | + : '<span class="stt-interim">' + escapeHtml(interim) + '</span>'; |
| 1135 | + } |
| 1136 | + } |
| 1137 | + ); |
1096 | 1138 |
|
1097 | 1139 | M.showToast && M.showToast('🎤 Recording started — speak now', 'info'); |
1098 | 1140 | }); |
1099 | 1141 | }); |
1100 | 1142 |
|
1101 | | - // STT stop button — stop recording and capture transcription |
| 1143 | + // STT stop button — stop recording (card mode) |
1102 | 1144 | container.querySelectorAll('.ai-stt-stop').forEach(function (btn) { |
1103 | 1145 | btn.addEventListener('click', function (e) { |
1104 | 1146 | e.preventDefault(); |
1105 | 1147 | e.stopPropagation(); |
1106 | 1148 | var card = this.closest('.ai-stt-card'); |
1107 | 1149 | if (!card) return; |
1108 | 1150 |
|
1109 | | - // Stop the STT engine |
| 1151 | + // Stop the STT engine in card mode |
1110 | 1152 | if (M.speechToText && M.speechToText.isListening()) { |
1111 | | - M.speechToText.stop(); |
| 1153 | + M.speechToText.stopForCard(); |
1112 | 1154 | } |
1113 | 1155 |
|
1114 | 1156 | // Switch button state |
|
1117 | 1159 | if (recordBtn) recordBtn.style.display = ''; |
1118 | 1160 | card.classList.remove('ai-stt-recording'); |
1119 | 1161 |
|
1120 | | - // Grab whatever was transcribed from the editor |
1121 | | - // The STT engine inserts text at cursor — read the latest editor content |
| 1162 | + // Finalize the result area |
1122 | 1163 | var resultText = card.querySelector('.ai-stt-result-text'); |
1123 | | - if (resultText && resultText.textContent === '🎤 Listening… speak now') { |
1124 | | - resultText.textContent = '⏳ Processing transcription…'; |
1125 | | - // Give a moment for final STT result to arrive |
1126 | | - setTimeout(function () { |
1127 | | - if (resultText.textContent === '⏳ Processing transcription…') { |
1128 | | - resultText.textContent = '(No speech detected — try again)'; |
1129 | | - } |
1130 | | - }, 3000); |
| 1164 | + if (resultText) { |
| 1165 | + // Strip any remaining interim spans to reveal final text |
| 1166 | + var interimSpans = resultText.querySelectorAll('.stt-interim'); |
| 1167 | + interimSpans.forEach(function (s) { s.remove(); }); |
| 1168 | + var finalText = resultText.textContent.trim(); |
| 1169 | + if (!finalText) { |
| 1170 | + resultText.textContent = '(No speech detected — try again)'; |
| 1171 | + } else { |
| 1172 | + resultText.textContent = finalText; |
| 1173 | + } |
1131 | 1174 | } |
1132 | 1175 |
|
1133 | 1176 | M.showToast && M.showToast('🎤 Recording stopped', 'info'); |
|
0 commit comments