From 4790e984e83b5fa96d37e7a843f9804ae4d6a25e Mon Sep 17 00:00:00 2001 From: Tyler Stapler Date: Sat, 2 May 2026 23:37:53 -0700 Subject: [PATCH 1/4] feat(voice): rich markdown formatting, transcript pages, current-page insertion, UX improvements - Rich LLM system prompt with few-shot examples for #tags, key:: value, **bold**/*italic*, and - TODO action item detection - Long notes (>=20 words, configurable) create a dedicated transcript page Voice Note YYYY-MM-DD HH:mm:ss with source:: backlink; short notes inline - Voice notes insert into the currently-open page, falling back to journal - Removed 10-word minimum word-count gate - Android SpeechRecognizer silence timeout: 3s->6s complete, 1.5s->3s partial, +2s minimum so users can pause to think - includeRawTranscript toggle controls #+BEGIN_QUOTE on transcript page - transcriptPageWordThreshold setting configurable from settings UI Co-Authored-By: Claude Sonnet 4.6 --- .../voice/AndroidSpeechRecognizerProvider.kt | 5 +- .../voice/VoiceCaptureViewModelTest.kt | 115 +++++++------ .../voice/VoiceNoteBlockFormatTest.kt | 157 +++++++++++++++--- .../stelekit/voice/VoiceSettingsTest.kt | 17 ++ .../stelekit/repository/JournalService.kt | 74 +++++++++ .../kotlin/dev/stapler/stelekit/ui/App.kt | 6 +- .../settings/VoiceCaptureSettings.kt | 25 +++ .../stelekit/voice/VoiceCaptureViewModel.kt | 105 +++++++++--- .../stelekit/voice/VoicePipelineConfig.kt | 45 ++++- .../stelekit/voice/VoicePipelineFactory.kt | 2 + .../stapler/stelekit/voice/VoiceSettings.kt | 14 ++ 11 files changed, 459 insertions(+), 106 deletions(-) diff --git a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt index 0f607c48..da03038a 100644 --- a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt +++ b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt @@ -109,8 +109,9 @@ class AndroidSpeechRecognizerProvider( putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM) putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE, true) putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1) - putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 3_000L) - putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 1_500L) + putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 6_000L) + putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 3_000L) + putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 2_000L) } recognizer.startListening(intent) } catch (t: Throwable) { diff --git a/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt b/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt index 60723b4e..f2df6725 100644 --- a/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt +++ b/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt @@ -15,6 +15,7 @@ import kotlinx.coroutines.test.runTest import kotlin.test.Test import kotlin.test.assertEquals import kotlin.test.assertIs +import kotlin.test.assertTrue class VoiceCaptureViewModelTest { @@ -23,7 +24,7 @@ class VoiceCaptureViewModelTest { @Test fun `initial state is Idle`() = runTest { - val vm = VoiceCaptureViewModel(VoicePipelineConfig(), makeJournalService(), this) + val vm = VoiceCaptureViewModel(VoicePipelineConfig(), makeJournalService(), scope = this) assertIs(vm.state.first()) } @@ -38,7 +39,7 @@ class VoiceCaptureViewModelTest { val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Success(transcript) } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -47,27 +48,6 @@ class VoiceCaptureViewModelTest { assertIs(vm.state.first()) } - @Test - fun `word-count gate under 10 words emits Error at TRANSCRIBING`() = runTest { - val fakeRecorder = object : AudioRecorder { - override suspend fun startRecording(): PlatformAudioFile = PlatformAudioFile("/tmp/test.m4a") - override suspend fun stopRecording() = Unit - override suspend fun readBytes(file: PlatformAudioFile) = ByteArray(100) - } - val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Success("too short") } - val vm = VoiceCaptureViewModel( - VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), - makeJournalService(), this, - ) - - vm.onMicTapped() - advanceUntilIdle() - - val state = vm.state.first() - assertIs(state) - assertEquals(PipelineStage.TRANSCRIBING, state.stage) - } - @Test fun `permission denied (empty path) emits Error at RECORDING`() = runTest { val fakeRecorder = object : AudioRecorder { @@ -76,7 +56,7 @@ class VoiceCaptureViewModelTest { } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -98,7 +78,7 @@ class VoiceCaptureViewModelTest { } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -118,7 +98,7 @@ class VoiceCaptureViewModelTest { } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -139,7 +119,7 @@ class VoiceCaptureViewModelTest { val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Failure.NetworkError } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -163,7 +143,7 @@ class VoiceCaptureViewModelTest { val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Success(transcript) } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -185,7 +165,7 @@ class VoiceCaptureViewModelTest { } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -216,7 +196,7 @@ class VoiceCaptureViewModelTest { val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Empty } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -242,7 +222,7 @@ class VoiceCaptureViewModelTest { val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Failure.NetworkError } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -261,7 +241,7 @@ class VoiceCaptureViewModelTest { val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Empty } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -282,7 +262,7 @@ class VoiceCaptureViewModelTest { val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Failure.PermissionDenied } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -309,7 +289,7 @@ class VoiceCaptureViewModelTest { } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt, llmProvider = fakeLlm), - makeJournalService(), this, + makeJournalService(), scope = this, ) val collectionJob = launch { @@ -335,7 +315,7 @@ class VoiceCaptureViewModelTest { val fakeLlm = LlmFormatterProvider { _, _ -> LlmResult.Failure.NetworkError } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt, llmProvider = fakeLlm), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -360,7 +340,7 @@ class VoiceCaptureViewModelTest { val fakeLlm = LlmFormatterProvider { _, _ -> LlmResult.Failure.ApiError(401, "Invalid API key") } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt, llmProvider = fakeLlm), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -374,43 +354,70 @@ class VoiceCaptureViewModelTest { } @Test - fun `9-word transcript emits Error at TRANSCRIBING`() = runTest { + fun `2-word transcript reaches Done state (AC-11)`() = runTest { val fakeRecorder = object : AudioRecorder { override suspend fun startRecording(): PlatformAudioFile = PlatformAudioFile("/tmp/test.m4a") override suspend fun stopRecording() = Unit override suspend fun readBytes(file: PlatformAudioFile) = ByteArray(100) } - val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Success("one two three four five six seven eight nine") } + val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Success("buy milk") } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), - makeJournalService(), this, + makeJournalService(), scope = this, ) - vm.onMicTapped() advanceUntilIdle() - - val state = vm.state.first() - assertIs(state) - assertEquals(PipelineStage.TRANSCRIBING, state.stage) + assertIs(vm.state.first()) } @Test - fun `10-word transcript reaches Done state`() = runTest { + fun `when page is open voice note is appended to that page (AC-8)`() = runTest { + val blockRepo = InMemoryBlockRepository() + val pageRepo = InMemoryPageRepository() + val journalService = JournalService(pageRepo, blockRepo) + val targetPage = journalService.ensureTodayJournal() + val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Success("buy milk") } val fakeRecorder = object : AudioRecorder { override suspend fun startRecording(): PlatformAudioFile = PlatformAudioFile("/tmp/test.m4a") override suspend fun stopRecording() = Unit override suspend fun readBytes(file: PlatformAudioFile) = ByteArray(100) } - val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Success("one two three four five six seven eight nine ten") } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), - makeJournalService(), this, + journalService, + currentOpenPageUuid = { targetPage.uuid }, + scope = this, ) - vm.onMicTapped() advanceUntilIdle() + assertIs(vm.state.first()) + val blocks = blockRepo.getBlocksForPage(targetPage.uuid).first().getOrNull().orEmpty() + assertTrue(blocks.any { it.content.contains("πŸ“ Voice note") }) + } + @Test + fun `when no page is open voice note falls back to today journal (AC-9)`() = runTest { + val blockRepo = InMemoryBlockRepository() + val pageRepo = InMemoryPageRepository() + val journalService = JournalService(pageRepo, blockRepo) + val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Success("buy milk") } + val fakeRecorder = object : AudioRecorder { + override suspend fun startRecording(): PlatformAudioFile = PlatformAudioFile("/tmp/test.m4a") + override suspend fun stopRecording() = Unit + override suspend fun readBytes(file: PlatformAudioFile) = ByteArray(100) + } + val vm = VoiceCaptureViewModel( + VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), + journalService, + currentOpenPageUuid = { null }, + scope = this, + ) + vm.onMicTapped() + advanceUntilIdle() assertIs(vm.state.first()) + val journalPage = journalService.ensureTodayJournal() + val blocks = blockRepo.getBlocksForPage(journalPage.uuid).first().getOrNull().orEmpty() + assertTrue(blocks.any { it.content.contains("πŸ“ Voice note") }) } // --- DirectSpeechProvider path --- @@ -423,7 +430,7 @@ class VoiceCaptureViewModelTest { } val vm = VoiceCaptureViewModel( VoicePipelineConfig(directSpeechProvider = fakeDirectProvider), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -439,7 +446,7 @@ class VoiceCaptureViewModelTest { } val vm = VoiceCaptureViewModel( VoicePipelineConfig(directSpeechProvider = fakeDirectProvider), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -457,7 +464,7 @@ class VoiceCaptureViewModelTest { } val vm = VoiceCaptureViewModel( VoicePipelineConfig(directSpeechProvider = fakeDirectProvider), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -478,7 +485,7 @@ class VoiceCaptureViewModelTest { } val vm = VoiceCaptureViewModel( VoicePipelineConfig(directSpeechProvider = fakeDirectProvider), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -505,7 +512,7 @@ class VoiceCaptureViewModelTest { } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt, llmProvider = fakeLlm), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() @@ -538,7 +545,7 @@ class VoiceCaptureViewModelTest { val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Empty } val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), - makeJournalService(), this, + makeJournalService(), scope = this, ) vm.onMicTapped() diff --git a/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceNoteBlockFormatTest.kt b/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceNoteBlockFormatTest.kt index 3c99894c..d6af6b3d 100644 --- a/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceNoteBlockFormatTest.kt +++ b/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceNoteBlockFormatTest.kt @@ -9,6 +9,7 @@ import kotlinx.coroutines.flow.first import kotlinx.coroutines.test.advanceUntilIdle import kotlinx.coroutines.test.runTest import kotlin.test.Test +import kotlin.test.assertFalse import kotlin.test.assertIs import kotlin.test.assertNotNull import kotlin.test.assertTrue @@ -18,36 +19,32 @@ class VoiceNoteBlockFormatTest { private fun makeViewModel(scope: kotlinx.coroutines.CoroutineScope) = VoiceCaptureViewModel( VoicePipelineConfig(), JournalService(InMemoryPageRepository(), InMemoryBlockRepository()), - scope, + currentOpenPageUuid = { null }, + scope = scope, ) @Test fun `block starts with voice note header line`() = runTest { - val block = makeViewModel(this).buildVoiceNoteBlock("- formatted bullet.", "raw transcript text") + val block = buildVoiceNoteBlock( + pageTitle = "Voice Note 2026-05-02 14:35:22", + timeLabel = "14:35:22", + formattedText = "- formatted bullet.", + ) assertTrue(block.startsWith("- πŸ“ Voice note ("), "Expected block to start with '- πŸ“ Voice note (', got: $block") } @Test fun `block contains formatted text`() = runTest { val formatted = "- point one\n- point two." - val block = makeViewModel(this).buildVoiceNoteBlock(formatted, "raw transcript") + val block = buildVoiceNoteBlock("Test Page", "14:35:22", formatted) assertTrue(block.contains("point one"), "Expected formatted text in block") assertTrue(block.contains("point two"), "Expected formatted text in block") } - @Test - fun `block contains raw transcript in BEGIN_QUOTE block`() = runTest { - val raw = "this is the raw transcript text" - val block = makeViewModel(this).buildVoiceNoteBlock("- formatted.", raw) - assertTrue(block.contains("#+BEGIN_QUOTE"), "Expected #+BEGIN_QUOTE in block") - assertTrue(block.contains(raw), "Expected raw transcript in #+END_QUOTE block") - assertTrue(block.contains("#+END_QUOTE"), "Expected #+END_QUOTE in block") - } - @Test fun `multiline formatted text has each line indented under header`() = runTest { val formatted = "- line one\n- line two\n- line three." - val block = makeViewModel(this).buildVoiceNoteBlock(formatted, "raw") + val block = buildVoiceNoteBlock("Test Page", "14:35:22", formatted) assertTrue(block.contains("line one"), "Expected 'line one' in block") assertTrue(block.contains("line two"), "Expected 'line two' in block") assertTrue(block.contains("line three"), "Expected 'line three' in block") @@ -55,10 +52,119 @@ class VoiceNoteBlockFormatTest { @Test fun `timestamp in header has zero-padded hours and minutes`() = runTest { - val block = makeViewModel(this).buildVoiceNoteBlock("- formatted.", "raw") + val block = buildVoiceNoteBlock( + pageTitle = "Voice Note 2026-05-02 14:35:22", + timeLabel = "14:35:22", + formattedText = "- formatted.", + ) val headerLine = block.lines().first() - val timeRegex = Regex("""- πŸ“ Voice note \(\d{2}:\d{2}\)""") - assertTrue(timeRegex.containsMatchIn(headerLine), "Expected HH:mm timestamp in header, got: $headerLine") + val timeRegex = Regex("""- πŸ“ Voice note \(\d{2}:\d{2}:\d{2}\) \[\[Voice Note \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\]\]""") + assertTrue(timeRegex.containsMatchIn(headerLine), "Expected HH:mm:ss timestamp and wikilink in header, got: $headerLine") + } + + @Test + fun `buildVoiceNoteBlock_should_contain_wikilink_to_transcript_page`() = runTest { + val block = buildVoiceNoteBlock( + pageTitle = "Voice Note 2026-05-02 14:35:22", + timeLabel = "14:35:22", + formattedText = "- formatted bullet.", + ) + assertTrue(block.contains("[[Voice Note 2026-05-02 14:35:22]]"), + "Expected wikilink to transcript page in block, got: $block") + } + + @Test + fun `transcript page includes BEGIN_QUOTE when includeRawTranscript is true`() = runTest { + val raw = "this is the raw transcript text" + val content = buildTranscriptPageContent( + sourcePage = "Today", + formattedText = "- formatted.", + rawTranscript = raw, + includeRawTranscript = true, + ) + assertTrue(content.contains("#+BEGIN_QUOTE")) + assertTrue(content.contains(raw)) + assertTrue(content.contains("#+END_QUOTE")) + } + + @Test + fun `transcript page omits BEGIN_QUOTE when includeRawTranscript is false`() = runTest { + val raw = "this is the raw transcript text" + val content = buildTranscriptPageContent( + sourcePage = "Today", + formattedText = "- formatted.", + rawTranscript = raw, + includeRawTranscript = false, + ) + assertFalse(content.contains("#+BEGIN_QUOTE")) + } + + @Test + fun `buildTranscriptPageContent_should_start_with_source_property`() = runTest { + val content = buildTranscriptPageContent( + sourcePage = "My Page", + formattedText = "- bullet one", + rawTranscript = "raw text", + includeRawTranscript = false, + ) + assertTrue(content.startsWith("source:: [[My Page]]"), + "Expected content to start with source:: property, got: $content") + } + + @Test + fun `buildTranscriptPageContent_should_include_formatted_bullets_as_primary_content`() = runTest { + val formatted = "- TODO Call Alice about [[project]]\n- #meeting noted" + val content = buildTranscriptPageContent( + sourcePage = "Today", + formattedText = formatted, + rawTranscript = "call alice about the project, meeting noted", + includeRawTranscript = false, + ) + assertTrue(content.contains("- TODO Call Alice about [[project]]"), + "Expected formatted TODO bullet in transcript page, got: $content") + assertTrue(content.contains("#meeting"), + "Expected #tag in transcript page, got: $content") + } + + @Test + fun `buildTranscriptPageContent_should_passthrough_LLM_output_verbatim`() = runTest { + val formatted = "- project:: Stelekit\n- **bold term** in output\n- #tag example\n- TODO action" + val content = buildTranscriptPageContent( + sourcePage = "Source", + formattedText = formatted, + rawTranscript = "raw", + includeRawTranscript = false, + ) + assertTrue(content.contains("project:: Stelekit")) + assertTrue(content.contains("**bold term**")) + assertTrue(content.contains("#tag example")) + assertTrue(content.contains("TODO action")) + } + + @Test + fun `buildTranscriptPageContent_should_use_raw_text_without_quote_wrapper_when_llm_disabled`() = runTest { + val raw = "buy milk and eggs" + val content = buildTranscriptPageContent( + sourcePage = "Source", + formattedText = null, // LLM disabled or failed + rawTranscript = raw, + includeRawTranscript = true, // toggle is true, but has no effect when formattedText is null + ) + assertFalse(content.contains("#+BEGIN_QUOTE"), + "Expected no #+BEGIN_QUOTE when formattedText is null, got: $content") + assertTrue(content.contains(raw), + "Expected raw transcript in output, got: $content") + } + + @Test + fun `buildVoiceNoteBlockInline has no wikilink and contains formatted text`() = runTest { + val block = buildVoiceNoteBlockInline( + timeLabel = "08:05:03", + formattedText = "buy milk", + ) + assertTrue(block.startsWith("- πŸ“ Voice note (08:05:03)"), "Expected inline header, got: $block") + assertFalse(block.contains("[["), "Inline block must not contain a wikilink, got: $block") + assertTrue(block.contains("buy milk"), "Expected formatted text in inline block") } @Test @@ -71,12 +177,14 @@ class VoiceNoteBlockFormatTest { } val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Success(transcript) } val blockRepo = InMemoryBlockRepository() - val fakeJournal = JournalService(InMemoryPageRepository(), blockRepo) + val pageRepo = InMemoryPageRepository() + val fakeJournal = JournalService(pageRepo, blockRepo) val vm = VoiceCaptureViewModel( VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), fakeJournal, - this, + currentOpenPageUuid = { null }, + scope = this, ) vm.onMicTapped() advanceUntilIdle() @@ -89,7 +197,16 @@ class VoiceNoteBlockFormatTest { assertTrue(blocks.isNotEmpty(), "Expected at least one block inserted") val voiceBlock = blocks.firstOrNull { it.content.contains("πŸ“ Voice note") } assertNotNull(voiceBlock, "Expected a block with voice note header") - assertTrue(voiceBlock.content.contains("#+BEGIN_QUOTE"), "Expected #+BEGIN_QUOTE in block") - assertTrue(voiceBlock.content.contains(transcript), "Expected raw transcript in block") + // 11-word transcript is below the default threshold of 20 β†’ inline path (no wikilink, no transcript page) + assertFalse(voiceBlock.content.contains("[[Voice Note"), + "Short transcript must produce inline block without wikilink") + assertFalse(voiceBlock.content.contains("#+BEGIN_QUOTE"), + "#+BEGIN_QUOTE must not appear in inline block") + + // Verify no transcript page was created (below threshold) + val allPages = pageRepo.getAllPages().first().getOrNull().orEmpty() + val transcriptPages = allPages.filter { it.name.startsWith("Voice Note ") } + assertTrue(transcriptPages.isEmpty(), + "Expected no Voice Note transcript page for short (below-threshold) note") } } diff --git a/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceSettingsTest.kt b/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceSettingsTest.kt index 14e55ae5..d58ef933 100644 --- a/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceSettingsTest.kt +++ b/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceSettingsTest.kt @@ -56,4 +56,21 @@ class VoiceSettingsTest { val settings = VoiceSettings(MapSettings()) assertTrue(settings.getLlmEnabled()) } + + // --- includeRawTranscript --- + + @Test + fun `getIncludeRawTranscript_should_return_true_by_default`() { + val settings = VoiceSettings(MapSettings()) + assertTrue(settings.getIncludeRawTranscript(), "Default should be true") + } + + @Test + fun `setIncludeRawTranscript_should_persist_value_across_get_calls`() { + val settings = VoiceSettings(MapSettings()) + settings.setIncludeRawTranscript(false) + assertFalse(settings.getIncludeRawTranscript(), "Expected persisted false value") + settings.setIncludeRawTranscript(true) + assertTrue(settings.getIncludeRawTranscript(), "Expected persisted true value after re-setting to true") + } } diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt index d119336c..33ba0f18 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt @@ -159,6 +159,80 @@ class JournalService( } } + /** + * Appends a new block with [content] to the page identified by [pageUuid]. + * Falls back to today's journal if [pageUuid] resolves to no page. + */ + @OptIn(DirectRepositoryWrite::class) + suspend fun appendToPage(pageUuid: String, content: String) { + val page = pageRepository.getPageByUuid(pageUuid).first().getOrNull() + if (page == null) { + appendToToday(content) + return + } + val blocks = blockRepository.getBlocksForPage(page.uuid).first().getOrNull() ?: emptyList() + val nextPosition = (blocks.maxOfOrNull { it.position } ?: -1) + 1 + val newBlock = Block( + uuid = UuidGenerator.generateV7(), + pageUuid = page.uuid, + content = content, + position = nextPosition, + createdAt = Clock.System.now(), + updatedAt = Clock.System.now(), + ) + if (writeActor != null) { + writeActor.saveBlock(newBlock) + } else { + blockRepository.saveBlock(newBlock) + } + } + + /** + * Creates a new page with [title] and inserts [content] as its first block. + * If a page with that exact title already exists, appends [content] to it instead. + * + * @return the [Page] that was created or found. + */ + @OptIn(DirectRepositoryWrite::class) + suspend fun createTranscriptPage(title: String, content: String): Page { + val existing = pageRepository.getPageByName(title).first().getOrNull() + if (existing != null) { + appendToPage(existing.uuid, content) + return existing + } + val pageUuid = UuidGenerator.generateV7() + val newPage = Page( + uuid = pageUuid, + name = title, + createdAt = Clock.System.now(), + updatedAt = Clock.System.now(), + isJournal = false, + ) + if (writeActor != null) { + writeActor.savePage(newPage) + } else { + pageRepository.savePage(newPage) + } + val newBlock = Block( + uuid = UuidGenerator.generateV7(), + pageUuid = pageUuid, + content = content, + position = 0, + createdAt = Clock.System.now(), + updatedAt = Clock.System.now(), + ) + if (writeActor != null) { + writeActor.saveBlock(newBlock) + } else { + blockRepository.saveBlock(newBlock) + } + return newPage + } + + /** Returns the name of the page with [uuid], or null if not found. */ + suspend fun getPageNameByUuid(uuid: String): String? = + pageRepository.getPageByUuid(uuid).first().getOrNull()?.name + private suspend fun healJournalDate(page: Page, date: LocalDate): Page { logger.info("Healing missing journal_date for page ${page.uuid} (name=${page.name})") val healed = page.copy(journalDate = date, isJournal = true, updatedAt = Clock.System.now()) diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt index 5554c60a..fa977838 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt @@ -460,7 +460,11 @@ private fun GraphContent( JournalsViewModel(repos.journalService, blockStateManager) } val voiceCaptureViewModel = remember(voicePipeline) { - VoiceCaptureViewModel(voicePipeline, repos.journalService) + VoiceCaptureViewModel( + voicePipeline, + repos.journalService, + currentOpenPageUuid = { viewModel.uiState.value.currentPage?.uuid }, + ) } DisposableEffect(voiceCaptureViewModel) { onDispose { voiceCaptureViewModel.close() } diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt index a98b682c..387e3b55 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt @@ -19,6 +19,8 @@ import androidx.compose.runtime.remember import androidx.compose.runtime.setValue import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier +import androidx.compose.foundation.text.KeyboardOptions +import androidx.compose.ui.text.input.KeyboardType import androidx.compose.ui.text.input.PasswordVisualTransformation import androidx.compose.ui.unit.dp import dev.stapler.stelekit.voice.VoiceSettings @@ -36,6 +38,8 @@ fun VoiceCaptureSettings( var llmEnabled by remember { mutableStateOf(voiceSettings.getLlmEnabled()) } var useDeviceStt by remember { mutableStateOf(voiceSettings.getUseDeviceStt()) } var useDeviceLlm by remember { mutableStateOf(voiceSettings.getUseDeviceLlm()) } + var includeRawTranscript by remember { mutableStateOf(voiceSettings.getIncludeRawTranscript()) } + var transcriptPageWordThreshold by remember { mutableStateOf(voiceSettings.getTranscriptPageWordThreshold().toString()) } var saved by remember { mutableStateOf(false) } SettingsSection("Transcription (Speech-to-Text)") { @@ -146,6 +150,25 @@ fun VoiceCaptureSettings( ) } } + Row( + modifier = Modifier.fillMaxWidth().padding(top = 8.dp), + horizontalArrangement = Arrangement.SpaceBetween, + verticalAlignment = Alignment.CenterVertically, + ) { + Text("Include raw transcript in note", style = MaterialTheme.typography.bodyMedium) + Switch( + checked = includeRawTranscript, + onCheckedChange = { includeRawTranscript = it; saved = false }, + ) + } + OutlinedTextField( + value = transcriptPageWordThreshold, + onValueChange = { transcriptPageWordThreshold = it; saved = false }, + label = { Text("Create transcript page after N words") }, + singleLine = true, + keyboardOptions = KeyboardOptions(keyboardType = KeyboardType.Number), + modifier = Modifier.fillMaxWidth().padding(top = 8.dp), + ) } Column(modifier = Modifier.padding(vertical = 8.dp)) { @@ -162,6 +185,8 @@ fun VoiceCaptureSettings( voiceSettings.setLlmEnabled(llmEnabled) voiceSettings.setUseDeviceStt(useDeviceStt) voiceSettings.setUseDeviceLlm(useDeviceLlm) + voiceSettings.setIncludeRawTranscript(includeRawTranscript) + voiceSettings.setTranscriptPageWordThreshold(transcriptPageWordThreshold.toIntOrNull() ?: 20) saved = true onRebuildPipeline() }, diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt index e8e0ba77..43d42f50 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt @@ -21,6 +21,7 @@ private const val MAX_TRANSCRIPT_CHARS = 10_000 class VoiceCaptureViewModel( private val pipeline: VoicePipelineConfig, private val journalService: JournalService, + private val currentOpenPageUuid: () -> String? = { null }, // Default scope owns its lifecycle; callers in remember{} must not pass rememberCoroutineScope() // which is cancelled when the composable leaves composition. Tests inject a TestCoroutineScope. scope: CoroutineScope = CoroutineScope(SupervisorJob() + Dispatchers.Default), @@ -113,21 +114,15 @@ class VoiceCaptureViewModel( private suspend fun processTranscript(fullTranscript: String) { val inputTruncated = fullTranscript.length > MAX_TRANSCRIPT_CHARS val rawTranscript = if (inputTruncated) fullTranscript.take(MAX_TRANSCRIPT_CHARS) else fullTranscript - val wordCount = rawTranscript.split(Regex("\\s+")).count { it.isNotBlank() } - if (wordCount < pipeline.minWordCount) { - _state.value = VoiceCaptureState.Error( - PipelineStage.TRANSCRIBING, - "Recording too short β€” try speaking for a few more seconds" - ) - return - } _state.value = VoiceCaptureState.Formatting val prompt = pipeline.systemPrompt.replace("{{TRANSCRIPT}}", rawTranscript) var isLikelyTruncated = inputTruncated + var llmProducedOutput = false val formattedText = when (val llmResult = pipeline.llmProvider.format(rawTranscript, prompt)) { is LlmResult.Success -> { isLikelyTruncated = isLikelyTruncated || llmResult.isLikelyTruncated + llmProducedOutput = true llmResult.formattedText } is LlmResult.Failure -> { @@ -136,27 +131,93 @@ class VoiceCaptureViewModel( } } - journalService.appendToToday(buildVoiceNoteBlock(formattedText, rawTranscript)) + val now = Clock.System.now().toLocalDateTime(TimeZone.currentSystemDefault()) + val timeLabel = "${now.hour.toString().padStart(2, '0')}:${now.minute.toString().padStart(2, '0')}:${now.second.toString().padStart(2, '0')}" + val dateLabel = "${now.year}-${now.monthNumber.toString().padStart(2, '0')}-${now.dayOfMonth.toString().padStart(2, '0')}" + val pageTitle = "Voice Note $dateLabel $timeLabel" + + val targetPageUuid = currentOpenPageUuid() + + val wordCount = formattedText.split(Regex("\\s+")).count { it.isNotBlank() } + val useTranscriptPage = wordCount >= pipeline.transcriptPageWordThreshold + + val inlineBlock = if (useTranscriptPage) { + val sourcePage: String = if (targetPageUuid != null) { + journalService.getPageNameByUuid(targetPageUuid) ?: dateLabel.replace('-', '_') + } else { + dateLabel.replace('-', '_') + } + + val transcriptPageContent = buildTranscriptPageContent( + sourcePage = sourcePage, + formattedText = if (llmProducedOutput) formattedText else null, + rawTranscript = rawTranscript, + includeRawTranscript = pipeline.includeRawTranscript, + ) + journalService.createTranscriptPage(pageTitle, transcriptPageContent) + + buildVoiceNoteBlock( + pageTitle = pageTitle, + timeLabel = timeLabel, + formattedText = formattedText, + ) + } else { + buildVoiceNoteBlockInline(timeLabel = timeLabel, formattedText = formattedText) + } + + if (targetPageUuid != null) { + journalService.appendToPage(targetPageUuid, inlineBlock) + } else { + journalService.appendToToday(inlineBlock) + } + _state.value = VoiceCaptureState.Done( insertedText = formattedText, isLikelyTruncated = isLikelyTruncated, ) } - internal fun buildVoiceNoteBlock(formattedText: String, rawTranscript: String): String { - val now = Clock.System.now().toLocalDateTime(TimeZone.currentSystemDefault()) - val timeLabel = "${now.hour.toString().padStart(2, '0')}:${now.minute.toString().padStart(2, '0')}" - return buildString { - append("- πŸ“ Voice note ($timeLabel)") - append("\n - ") - append(formattedText.lines().joinToString("\n - ")) - append("\n #+BEGIN_QUOTE\n ") - append(rawTranscript) - append("\n #+END_QUOTE") - } - } - fun close() { scope.cancel() } } + +internal fun buildVoiceNoteBlockInline(timeLabel: String, formattedText: String): String { + return buildString { + append("- πŸ“ Voice note ($timeLabel)") + append("\n - ") + append(formattedText.lines().joinToString("\n - ")) + } +} + +internal fun buildVoiceNoteBlock(pageTitle: String, timeLabel: String, formattedText: String): String { + return buildString { + append("- πŸ“ Voice note ($timeLabel) [[$pageTitle]]") + append("\n - ") + append(formattedText.lines().joinToString("\n - ")) + } +} + +internal fun buildTranscriptPageContent( + sourcePage: String, + formattedText: String?, + rawTranscript: String, + includeRawTranscript: Boolean, +): String { + return buildString { + append("source:: [[$sourcePage]]") + append("\n\n") + if (formattedText != null) { + append(formattedText) + if (includeRawTranscript) { + append("\n\n#+BEGIN_QUOTE\n") + append(rawTranscript) + append("\n#+END_QUOTE") + } + } else { + // LLM disabled or failed β€” raw transcript is the full content, no quote wrapper + append("- ") + append(rawTranscript) + } + } +} diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt index a485db17..e8b085f5 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt @@ -4,12 +4,42 @@ package dev.stapler.stelekit.voice const val DEFAULT_VOICE_SYSTEM_PROMPT = """You are a Logseq note-taking assistant. Convert the following voice transcript into well-structured Logseq outliner syntax. -Rules: -- Use "- " bullet format for each main point -- Use 2-space indentation for sub-points -- Add [[Page Name]] wiki links ONLY for proper nouns or topics explicitly named in the transcript β€” do NOT invent links for terms not spoken -- Do not add a preamble or summary -- Do not add content not present in the transcript +Logseq syntax you may use: +- "- " bullet for each main point (required) +- 2-space indentation for sub-points +- [[Page Name]] wiki links β€” ONLY for proper nouns or topics explicitly named +- #tag β€” ONLY for topics or categories explicitly spoken (e.g. "#meeting", "#todo") +- key:: value property blocks β€” ONLY when the speaker states a clear key/value (e.g. "date:: 2026-05-02", "project:: Stelekit") +- **bold** for words the speaker stressed or called out as important +- *italic* for titles, technical terms, or qualified statements ("*maybe*", "*draft*") +- TODO at the start of a bullet for action items the speaker explicitly commits to +- DONE at the start of a bullet for completed actions explicitly mentioned + +Examples: + +Input: "met with Alice today about the Stelekit release, she said to make it a priority" +Output: +- Met with [[Alice]] about [[Stelekit]] release #meeting + - She flagged this as a priority +- TODO Follow up with Alice on release timeline + +Input: "project is stelekit, date is May 2nd, need to review the export feature" +Output: +- project:: Stelekit +- date:: 2026-05-02 +- TODO Review the export feature + +Input: "I think the new design is okay, maybe try bold colours, definitely update the readme" +Output: +- The new design is acceptable + - Consider *bold* colours as a possibility +- TODO Update the README + +Hard rules (never violate): +- Do NOT invent topics, names, tags, or properties not mentioned in the transcript +- Do NOT add a preamble, summary, or closing line +- Do NOT add content not present in the transcript +- Use TODO only when the speaker explicitly commits to an action Transcript: {{TRANSCRIPT}}""" @@ -19,9 +49,10 @@ class VoicePipelineConfig( val sttProvider: SpeechToTextProvider = NoOpSpeechToTextProvider(), val llmProvider: LlmFormatterProvider = NoOpLlmFormatterProvider(), val systemPrompt: String = DEFAULT_VOICE_SYSTEM_PROMPT, - val minWordCount: Int = 10, /** When set, replaces the (record β†’ STT) two-step path with a single integrated listen. */ val directSpeechProvider: DirectSpeechProvider? = null, + val includeRawTranscript: Boolean = true, + val transcriptPageWordThreshold: Int = 20, ) { /** Amplitude flow for waveform animation: prefers directSpeechProvider, falls back to audioRecorder. */ val effectiveAmplitudeFlow get() = directSpeechProvider?.amplitudeFlow ?: audioRecorder.amplitudeFlow diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineFactory.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineFactory.kt index 288d45f5..29f84de8 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineFactory.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineFactory.kt @@ -30,5 +30,7 @@ fun buildVoicePipeline( sttProvider = sttProvider, llmProvider = llmProvider, directSpeechProvider = directSpeechProvider, + includeRawTranscript = settings.getIncludeRawTranscript(), + transcriptPageWordThreshold = settings.getTranscriptPageWordThreshold(), ) } diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt index 9d4d0b2c..3c6afa8d 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt @@ -42,6 +42,18 @@ class VoiceSettings(private val platformSettings: Settings) { fun setUseDeviceLlm(enabled: Boolean) = platformSettings.putBoolean(KEY_USE_DEVICE_LLM, enabled) + fun getIncludeRawTranscript(): Boolean = + platformSettings.getBoolean(KEY_INCLUDE_RAW_TRANSCRIPT, true) + + fun setIncludeRawTranscript(enabled: Boolean) = + platformSettings.putBoolean(KEY_INCLUDE_RAW_TRANSCRIPT, enabled) + + fun getTranscriptPageWordThreshold(): Int = + platformSettings.getString(KEY_TRANSCRIPT_PAGE_WORD_THRESHOLD, "20").toIntOrNull() ?: 20 + + fun setTranscriptPageWordThreshold(threshold: Int) = + platformSettings.putString(KEY_TRANSCRIPT_PAGE_WORD_THRESHOLD, threshold.toString()) + companion object { private const val KEY_WHISPER = "voice.whisper_key" private const val KEY_ANTHROPIC = "voice.anthropic_key" @@ -49,5 +61,7 @@ class VoiceSettings(private val platformSettings: Settings) { private const val KEY_LLM_ENABLED = "voice.llm_enabled" private const val KEY_USE_DEVICE_STT = "voice.use_device_stt" private const val KEY_USE_DEVICE_LLM = "voice.use_device_llm" + private const val KEY_INCLUDE_RAW_TRANSCRIPT = "voice.include_raw_transcript" + private const val KEY_TRANSCRIPT_PAGE_WORD_THRESHOLD = "voice.transcript_page_word_threshold" } } From 28a6195217da4ce364d601a3bce1a61d02a31740 Mon Sep 17 00:00:00 2001 From: Tyler Stapler Date: Sat, 2 May 2026 23:42:11 -0700 Subject: [PATCH 2/4] =?UTF-8?q?fix(voice):=20continuous=20recording=20?= =?UTF-8?q?=E2=80=94=20SpeechRecognizer=20auto-restarts=20on=20silence=20u?= =?UTF-8?q?ntil=20explicit=20stop?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the single-shot listen loop with an accumulating restart loop. When the recognizer stops due to silence (ERROR_NO_MATCH / ERROR_SPEECH_TIMEOUT) it restarts automatically and appends the new text to the accumulated transcript. The coroutine only resolves when stopListening() is called explicitly β€” setting stopRequested=true causes the next onResults/onError to return the full text rather than restarting. If stopListening() is called between cycles (activeRecognizer==null) the next startCycle() sees stopRequested=true and resolves immediately. Also add project_plans/voice/ spec artifacts. Co-Authored-By: Claude Sonnet 4.6 --- .../voice/AndroidSpeechRecognizerProvider.kt | 105 ++- project_plans/voice/implementation/plan.md | 800 ++++++++++++++++++ .../voice/implementation/validation.md | 377 +++++++++ project_plans/voice/requirements.md | 212 +++++ .../voice/research/android-stt-tuning.md | 169 ++++ .../research/current-page-integration.md | 210 +++++ .../voice/research/llm-prompt-engineering.md | 141 +++ .../voice/research/settings-architecture.md | 183 ++++ 8 files changed, 2169 insertions(+), 28 deletions(-) create mode 100644 project_plans/voice/implementation/plan.md create mode 100644 project_plans/voice/implementation/validation.md create mode 100644 project_plans/voice/requirements.md create mode 100644 project_plans/voice/research/android-stt-tuning.md create mode 100644 project_plans/voice/research/current-page-integration.md create mode 100644 project_plans/voice/research/llm-prompt-engineering.md create mode 100644 project_plans/voice/research/settings-architecture.md diff --git a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt index da03038a..a0d2f83d 100644 --- a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt +++ b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt @@ -35,16 +35,26 @@ class AndroidSpeechRecognizerProvider( override val amplitudeFlow: Flow = _amplitudeFlow.asStateFlow() @Volatile private var activeRecognizer: SpeechRecognizer? = null + // Set to true when the user explicitly taps stop; resets at the start of each listen(). + @Volatile private var stopRequested = false private val mainHandler = Handler(Looper.getMainLooper()) override suspend fun listen(): TranscriptResult { if (requestMicPermission != null && !requestMicPermission()) { return TranscriptResult.Failure.PermissionDenied } - return listenInternal() + stopRequested = false + return listenContinuous() } - private suspend fun listenInternal(): TranscriptResult = suspendCancellableCoroutine { cont -> + /** + * Runs a continuous listen loop: each time the recognizer stops due to silence it is + * automatically restarted, accumulating transcript text across the gap. The loop only + * terminates when [stopListening] sets [stopRequested] = true. + */ + private suspend fun listenContinuous(): TranscriptResult = suspendCancellableCoroutine { cont -> + val accumulated = StringBuilder() + cont.invokeOnCancellation { mainHandler.post { activeRecognizer?.let { @@ -56,18 +66,27 @@ class AndroidSpeechRecognizerProvider( } } - mainHandler.post { - var recognizer: SpeechRecognizer? = null - try { - recognizer = SpeechRecognizer.createSpeechRecognizer(context) - activeRecognizer = recognizer + fun startCycle() { + mainHandler.post { + // Resolve immediately if stop was requested between cycles or after cancellation. + if (!cont.isActive || stopRequested) { + _amplitudeFlow.value = 0f + if (cont.isActive) { + val text = accumulated.toString().trim() + cont.resume(if (text.isBlank()) TranscriptResult.Empty else TranscriptResult.Success(text)) + } + return@post + } - // Guard against cancellation that fired before this post ran - if (!cont.isActive) { - recognizer.destroy() - activeRecognizer = null + val recognizer: SpeechRecognizer + try { + recognizer = SpeechRecognizer.createSpeechRecognizer(context) + } catch (t: Throwable) { + Log.w(TAG, "Failed to create SpeechRecognizer", t) + if (cont.isActive) cont.resume(mapError(SpeechRecognizer.ERROR_CLIENT)) return@post } + activeRecognizer = recognizer recognizer.setRecognitionListener(object : RecognitionListener { override fun onReadyForSpeech(params: Bundle?) {} @@ -78,7 +97,6 @@ class AndroidSpeechRecognizerProvider( override fun onPartialResults(partialResults: Bundle?) {} override fun onRmsChanged(rmsdB: Float) { - // Map roughly -2..10 dB β†’ 0..1 _amplitudeFlow.value = ((rmsdB + 2f) / 12f).coerceIn(0f, 1f) } @@ -87,12 +105,22 @@ class AndroidSpeechRecognizerProvider( activeRecognizer = null recognizer.destroy() if (!cont.isActive) return + val text = results ?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION) ?.firstOrNull() - Log.d(TAG, "onResults: text=${text?.take(80)}") - if (text.isNullOrBlank()) cont.resume(TranscriptResult.Empty) - else cont.resume(TranscriptResult.Success(text)) + Log.d(TAG, "onResults: text=${text?.take(80)}, stopRequested=$stopRequested") + if (!text.isNullOrBlank()) { + if (accumulated.isNotEmpty()) accumulated.append(" ") + accumulated.append(text.trim()) + } + + if (stopRequested) { + val finalText = accumulated.toString().trim() + cont.resume(if (finalText.isBlank()) TranscriptResult.Empty else TranscriptResult.Success(finalText)) + } else { + startCycle() + } } override fun onError(error: Int) { @@ -100,8 +128,21 @@ class AndroidSpeechRecognizerProvider( activeRecognizer = null recognizer.destroy() if (!cont.isActive) return - Log.w(TAG, "onError: code=$error") - cont.resume(mapError(error)) + Log.w(TAG, "onError: code=$error, stopRequested=$stopRequested") + + when (error) { + SpeechRecognizer.ERROR_NO_MATCH, + SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> { + // Silence gap β€” restart unless the user has tapped stop. + if (stopRequested) { + val finalText = accumulated.toString().trim() + cont.resume(if (finalText.isBlank()) TranscriptResult.Empty else TranscriptResult.Success(finalText)) + } else { + startCycle() + } + } + else -> cont.resume(mapError(error)) + } } }) @@ -113,28 +154,36 @@ class AndroidSpeechRecognizerProvider( putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 3_000L) putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 2_000L) } - recognizer.startListening(intent) - } catch (t: Throwable) { - _amplitudeFlow.value = 0f - activeRecognizer = null - recognizer?.destroy() - Log.w(TAG, "Failed to start speech recognition", t) - if (cont.isActive) { - cont.resume(mapError(SpeechRecognizer.ERROR_CLIENT)) + try { + recognizer.startListening(intent) + } catch (t: Throwable) { + _amplitudeFlow.value = 0f + activeRecognizer = null + recognizer.destroy() + Log.w(TAG, "Failed to start speech recognition", t) + if (cont.isActive) cont.resume(mapError(SpeechRecognizer.ERROR_CLIENT)) } } } + + startCycle() } override suspend fun stopListening() { + stopRequested = true withContext(Dispatchers.Main) { - activeRecognizer?.stopListening() + val recognizer = activeRecognizer + if (recognizer != null) { + // Triggers onResults() with whatever was heard; the loop sees stopRequested=true + // and resolves the coroutine with the full accumulated transcript. + recognizer.stopListening() + } + // If activeRecognizer is null we're between cycles β€” the next startCycle() call + // will see stopRequested=true and resolve the coroutine directly. } } private fun mapError(error: Int): TranscriptResult = when (error) { - SpeechRecognizer.ERROR_NO_MATCH, - SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> TranscriptResult.Empty SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> TranscriptResult.Failure.PermissionDenied SpeechRecognizer.ERROR_NETWORK, SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> TranscriptResult.Failure.NetworkError diff --git a/project_plans/voice/implementation/plan.md b/project_plans/voice/implementation/plan.md new file mode 100644 index 00000000..175278bb --- /dev/null +++ b/project_plans/voice/implementation/plan.md @@ -0,0 +1,800 @@ +# Implementation Plan: voice + +**Feature**: Voice note enhancements β€” rich LLM formatting, transcript page creation, current-page insertion, min-word-count removal, Android STT silence tuning +**Date**: 2026-05-02 +**Status**: Ready for implementation +**ADRs**: None (all technology choices are existing patterns; no new dependencies) + +--- + +## Dependency Visualization + +``` +FR-4 (remove minWordCount) FR-5 (Android STT timeouts) + | | + v v +FR-1 (update system prompt) [independent β€” androidMain only] + | + v +FR-2a (VoiceSettings + VoicePipelineConfig β€” includeRawTranscript) + | + +-----> FR-2b (JournalService.appendToPage + createTranscriptPage) + | | + v v +FR-3 (currentOpenPageUuid lambda on VoiceCaptureViewModel) + | + v +FR-2c (buildVoiceNoteBlock + buildTranscriptPageContent refactor) + | + v +FR-2d (VoiceCaptureSettings UI toggle) + | + v +Tests (VoiceNoteBlockFormatTest + VoiceCaptureViewModelTest updates) +``` + +--- + +## Phase 1: Cleanup and Config Changes + +### Epic 1.1: Remove minWordCount guard (FR-4) + +**Goal**: Delete the 10-word minimum so short voice notes ("buy milk") complete successfully. + +#### Story 1.1.1: Delete minWordCount from config and pipeline logic +**As a** user, **I want** short voice notes to be captured without error, **so that** brief commands like "buy milk" produce a note. +**Acceptance Criteria**: +- `VoicePipelineConfig` no longer has a `minWordCount` field +- `processTranscript()` no longer has the word-count block +- 2-word transcript reaches `Done` state +**Files**: +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt` +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt` + +##### Task 1.1.1a: Remove `minWordCount` from VoicePipelineConfig (~2 min) +- In `VoicePipelineConfig.kt`, delete `val minWordCount: Int = 10` from the constructor parameter list (line 22). +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt` + +##### Task 1.1.1b: Remove word-count guard from processTranscript (~3 min) +- In `VoiceCaptureViewModel.kt`, delete lines 116–123: + ```kotlin + val wordCount = rawTranscript.split(Regex("\\s+")).count { it.isNotBlank() } + if (wordCount < pipeline.minWordCount) { + _state.value = VoiceCaptureState.Error( + PipelineStage.TRANSCRIBING, + "Recording too short β€” try speaking for a few more seconds" + ) + return + } + ``` +- The `TranscriptResult.Empty` guard in `startPipeline()` already blocks blank results; no new guard needed. +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt` + +--- + +### Epic 1.2: Android STT silence timeout extension (FR-5) + +**Goal**: Users can pause mid-thought without the recognizer cutting them off. + +#### Story 1.2.1: Update Intent extras in AndroidSpeechRecognizerProvider +**As a** user on Android, **I want** longer silence tolerance, **so that** I can pause to think without the recording stopping. +**Acceptance Criteria**: +- `EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS` = 6,000 ms +- `EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS` = 3,000 ms +- `EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS` = 2,000 ms added +**Files**: +- `kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt` + +##### Task 1.2.1a: Update three SpeechRecognizer Intent extras (~2 min) +- In `listenInternal()` (lines 108–114), change: + - `3_000L` β†’ `6_000L` for `EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS` + - `1_500L` β†’ `3_000L` for `EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS` + - Add `putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 2_000L)` +- Files: `kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt` + +--- + +## Phase 2: LLM Prompt Enhancement (FR-1) + +### Epic 2.1: Update DEFAULT_VOICE_SYSTEM_PROMPT + +**Goal**: LLM output uses #tags, key:: value properties, **bold**, *italic*, and TODO markers. + +#### Story 2.1.1: Replace system prompt with rich-formatting version +**As a** user, **I want** voice notes formatted with full Logseq markdown vocabulary, **so that** tags, properties, bold text, and TODO items are extracted automatically. +**Acceptance Criteria**: +- `DEFAULT_VOICE_SYSTEM_PROMPT` includes rules for `#tag`, `key:: value`, `**bold**`, `*italic*`, `TODO` +- Three few-shot examples are included +- "Do not invent" constraints are restated for each new feature +- `{{TRANSCRIPT}}` placeholder is preserved (unchanged `replace()` call in `processTranscript()`) +**Files**: +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt` + +##### Task 2.1.1a: Replace DEFAULT_VOICE_SYSTEM_PROMPT constant (~3 min) +- Replace the existing `DEFAULT_VOICE_SYSTEM_PROMPT` const val with the following. The `{{TRANSCRIPT}}` placeholder and the `replace()` call in `processTranscript()` are unchanged. + +``` +You are a Logseq note-taking assistant. Convert the following voice transcript into well-structured Logseq outliner syntax. + +Logseq syntax you may use: +- "- " bullet for each main point (required) +- 2-space indentation for sub-points +- [[Page Name]] wiki links β€” ONLY for proper nouns or topics explicitly named +- #tag β€” ONLY for topics or categories explicitly spoken (e.g. "#meeting", "#todo") +- key:: value property blocks β€” ONLY when the speaker states a clear key/value (e.g. "date:: 2026-05-02", "project:: Stelekit") +- **bold** for words the speaker stressed or called out as important +- *italic* for titles, technical terms, or qualified statements ("*maybe*", "*draft*") +- TODO at the start of a bullet for action items the speaker explicitly commits to +- DONE at the start of a bullet for completed actions explicitly mentioned + +Examples of each feature: + +Input: "met with Alice today about the Stelekit release, she said to make it a priority" +Output: +- Met with [[Alice]] about [[Stelekit]] release #meeting + - She flagged this as a priority +- TODO Follow up with Alice on release timeline + +Input: "project is stelekit, date is May 2nd, need to review the export feature" +Output: +- project:: Stelekit +- date:: 2026-05-02 +- TODO Review the export feature + +Input: "I think the new design is okay, maybe try bold colours, definitely update the readme" +Output: +- The new design is acceptable + - Consider *bold* colours as a possibility +- TODO Update the README + +Hard rules (never violate): +- Do NOT invent topics, names, tags, or properties not mentioned in the transcript +- Do NOT add a preamble, summary, or closing line +- Do NOT add content not present in the transcript +- Use TODO only when the speaker explicitly commits to an action + +Transcript: +{{TRANSCRIPT}} +``` + +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt` + +--- + +## Phase 2.5: Transcript Page Word Threshold (FR-6) + +### Epic 2.5.1: Add `transcriptPageWordThreshold` to VoiceSettings and VoicePipelineConfig + +##### Task 2.5.1a: Add getter/setter to VoiceSettings (~2 min) +- Add after `setIncludeRawTranscript`: + ```kotlin + fun getTranscriptPageWordThreshold(): Int = + platformSettings.getInt(KEY_TRANSCRIPT_PAGE_WORD_THRESHOLD, 20) + + fun setTranscriptPageWordThreshold(threshold: Int) = + platformSettings.putInt(KEY_TRANSCRIPT_PAGE_WORD_THRESHOLD, threshold) + ``` +- Add to companion object: `private const val KEY_TRANSCRIPT_PAGE_WORD_THRESHOLD = "voice.transcript_page_word_threshold"` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt` + +##### Task 2.5.1b: Add `transcriptPageWordThreshold` to VoicePipelineConfig (~1 min) +- Add `val transcriptPageWordThreshold: Int = 20` to `VoicePipelineConfig` constructor. +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt` + +##### Task 2.5.1c: Apply threshold in processTranscript (~3 min) +- After computing `formattedText`, count words: `val wordCount = formattedText.split(Regex("\\s+")).count { it.isNotBlank() }` +- Branch on `wordCount >= pipeline.transcriptPageWordThreshold`: + - **Short path** (below threshold): inline block is `buildVoiceNoteBlockInline(timeLabel, formattedText)` β€” no page title, no wikilink, no transcript page created. Append to target page/journal. + - **Long path** (at/above threshold): existing full flow β€” create transcript page, build wikilink block, append. +- Add `buildVoiceNoteBlockInline(timeLabel: String, formattedText: String): String` as a private helper: + ```kotlin + internal fun buildVoiceNoteBlockInline(timeLabel: String, formattedText: String): String { + return buildString { + append("- πŸ“ Voice note ($timeLabel)") + append("\n - ") + append(formattedText.lines().joinToString("\n - ")) + } + } + ``` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt` + +##### Task 2.5.1d: Add numeric input to VoiceCaptureSettings UI (~3 min) +- Add `var transcriptPageWordThreshold by remember { mutableStateOf(voiceSettings.getTranscriptPageWordThreshold().toString()) }` state var. +- Add a labeled `OutlinedTextField` for the threshold below the `includeRawTranscript` toggle. +- Persist in Save handler: `voiceSettings.setTranscriptPageWordThreshold(transcriptPageWordThreshold.toIntOrNull() ?: 20)` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt` + +##### Task 2.5.1e: Wire threshold in App.kt VoicePipelineConfig construction (~1 min) +- Add `transcriptPageWordThreshold = voiceSettings.getTranscriptPageWordThreshold()` to `VoicePipelineConfig(...)` call in App.kt. +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt` + +--- + +## Phase 3: Settings and Config Extension (FR-2 β€” settings layer) + +### Epic 3.1: Add `includeRawTranscript` to VoiceSettings and VoicePipelineConfig + +**Goal**: New setting persists across sessions and controls whether the transcript page includes `#+BEGIN_QUOTE`. + +#### Story 3.1.1: Add setting to VoiceSettings +**As a** user, **I want** a persistent setting to control raw transcript inclusion, **so that** my preference survives app restarts. +**Acceptance Criteria**: +- `getIncludeRawTranscript()` returns `true` by default +- `setIncludeRawTranscript(Boolean)` persists value +- Key follows `voice.*` namespace convention +**Files**: +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt` + +##### Task 3.1.1a: Add getter, setter, and key constant to VoiceSettings (~2 min) +- Add after `getUseDeviceLlm()`/`setUseDeviceLlm()`: + ```kotlin + fun getIncludeRawTranscript(): Boolean = + platformSettings.getBoolean(KEY_INCLUDE_RAW_TRANSCRIPT, true) + + fun setIncludeRawTranscript(enabled: Boolean) = + platformSettings.putBoolean(KEY_INCLUDE_RAW_TRANSCRIPT, enabled) + ``` +- Add to companion object: + ```kotlin + private const val KEY_INCLUDE_RAW_TRANSCRIPT = "voice.include_raw_transcript" + ``` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt` + +#### Story 3.1.2: Add `includeRawTranscript` to VoicePipelineConfig +**As a** developer, **I want** pipeline behavior flags in one place, **so that** `VoiceCaptureViewModel` stays free of a `VoiceSettings` dependency. +**Acceptance Criteria**: +- `VoicePipelineConfig` has `val includeRawTranscript: Boolean = true` +- Default is `true` β€” existing callers unaffected +**Files**: +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt` + +##### Task 3.1.2a: Add `includeRawTranscript` parameter to VoicePipelineConfig (~2 min) +- Add `val includeRawTranscript: Boolean = true` to the `VoicePipelineConfig` constructor, after `val directSpeechProvider: DirectSpeechProvider? = null`. +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoicePipelineConfig.kt` + +--- + +## Phase 4: JournalService Extensions (FR-2 + FR-3 β€” repository layer) + +### Epic 4.1: Add `appendToPage`, `createTranscriptPage`, and `getPageNameByUuid` to JournalService + +**Goal**: Voice pipeline can write to any page (not just today's journal) and create the transcript page. + +#### Story 4.1.1: Add `appendToPage` method +**As a** developer, **I want** `JournalService` to append a block to any page by UUID, **so that** voice notes can target the currently-open page. +**Acceptance Criteria**: +- `appendToPage(pageUuid: String, content: String)` appends a block to the given page +- When `pageUuid` doesn't resolve to a real page, falls back to `appendToToday` +- Follows the same `writeActor`-first / `blockRepository.saveBlock` fallback pattern as `appendToToday` +**Files**: +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt` + +##### Task 4.1.1a: Implement `appendToPage` (~4 min) +- Add the following method to `JournalService` after `appendToToday`: + ```kotlin + /** + * Appends a new block with [content] to the page identified by [pageUuid]. + * Falls back to today's journal if [pageUuid] resolves to no page. + */ + @OptIn(DirectRepositoryWrite::class) + suspend fun appendToPage(pageUuid: String, content: String) { + val page = pageRepository.getPageByUuid(pageUuid).first().getOrNull() + if (page == null) { + appendToToday(content) + return + } + val blocks = blockRepository.getBlocksForPage(page.uuid).first().getOrNull() ?: emptyList() + val nextPosition = (blocks.maxOfOrNull { it.position } ?: -1) + 1 + val newBlock = Block( + uuid = UuidGenerator.generateV7(), + pageUuid = page.uuid, + content = content, + position = nextPosition, + createdAt = Clock.System.now(), + updatedAt = Clock.System.now(), + ) + if (writeActor != null) { + writeActor.saveBlock(newBlock) + } else { + blockRepository.saveBlock(newBlock) + } + } + ``` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt` + +#### Story 4.1.2: Add `createTranscriptPage` method +**As a** developer, **I want** `JournalService` to create a named page with content, **so that** voice notes get a dedicated transcript page. +**Acceptance Criteria**: +- `createTranscriptPage(title: String, content: String): Page` creates a page and populates its first block +- If a page with that title already exists, appends to it rather than creating a duplicate +- Returns the `Page` that was created or found +**Files**: +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt` + +##### Task 4.1.2a: Implement `createTranscriptPage` (~4 min) +- Add after `appendToPage`: + ```kotlin + /** + * Creates a new page with [title] and inserts [content] as its first block. + * If a page with that exact title already exists, appends [content] to it instead. + * + * @return the [Page] that was created or found. + */ + @OptIn(DirectRepositoryWrite::class) + suspend fun createTranscriptPage(title: String, content: String): Page { + val existing = pageRepository.getPageByName(title).first().getOrNull() + if (existing != null) { + appendToPage(existing.uuid, content) + return existing + } + val pageUuid = UuidGenerator.generateV7() + val newPage = Page( + uuid = pageUuid, + name = title, + createdAt = Clock.System.now(), + updatedAt = Clock.System.now(), + isJournal = false, + ) + if (writeActor != null) { + writeActor.savePage(newPage) + } else { + pageRepository.savePage(newPage) + } + val newBlock = Block( + uuid = UuidGenerator.generateV7(), + pageUuid = pageUuid, + content = content, + position = 0, + createdAt = Clock.System.now(), + updatedAt = Clock.System.now(), + ) + if (writeActor != null) { + writeActor.saveBlock(newBlock) + } else { + blockRepository.saveBlock(newBlock) + } + return newPage + } + ``` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt` + +#### Story 4.1.3: Add `getPageNameByUuid` convenience method +**As a** developer, **I want** to resolve a page name from a UUID inside `VoiceCaptureViewModel`, **so that** the `source::` property on the transcript page can name the originating page. +**Acceptance Criteria**: +- `getPageNameByUuid(uuid: String): String?` returns the page name or `null` if not found +**Files**: +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt` + +##### Task 4.1.3a: Add `getPageNameByUuid` (~2 min) +- Add: + ```kotlin + /** Returns the name of the page with [uuid], or null if not found. */ + suspend fun getPageNameByUuid(uuid: String): String? = + pageRepository.getPageByUuid(uuid).first().getOrNull()?.name + ``` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt` + +--- + +## Phase 5: VoiceCaptureViewModel Refactor (FR-2 + FR-3 β€” core logic) + +### Epic 5.1: Add `currentOpenPageUuid` lambda and refactor block/page builders + +**Goal**: ViewModel targets the current page, creates a transcript page, and produces the new two-part block format. + +#### Story 5.1.1: Add `currentOpenPageUuid` lambda constructor parameter +**As a** developer, **I want** `VoiceCaptureViewModel` to accept a `() -> String?` lambda, **so that** it can read the currently-open page UUID at insertion time without a `StateFlow` dependency. +**Acceptance Criteria**: +- Constructor accepts `currentOpenPageUuid: () -> String? = { null }` as 3rd parameter (before `scope`) +- Existing tests still compile (default `{ null }` preserves journal fallback) +**Files**: +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt` + +##### Task 5.1.1a: Add lambda parameter to constructor (~2 min) +- Add `private val currentOpenPageUuid: () -> String? = { null }` to the `VoiceCaptureViewModel` constructor after `journalService` and before `scope`: + ```kotlin + class VoiceCaptureViewModel( + private val pipeline: VoicePipelineConfig, + private val journalService: JournalService, + private val currentOpenPageUuid: () -> String? = { null }, + scope: CoroutineScope = CoroutineScope(SupervisorJob() + Dispatchers.Default), + ) + ``` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt` + +#### Story 5.1.2: Replace `buildVoiceNoteBlock` and add `buildTranscriptPageContent` +**As a** developer, **I want** the two builder functions to produce the new format, **so that** the inline block is a link header with sub-bullets and the transcript page has the full content. +**Acceptance Criteria**: +- `buildVoiceNoteBlock(pageTitle: String, timeLabel: String, formattedText: String): String` returns the header line + indented sub-bullets +- `buildTranscriptPageContent(sourcePage: String, formattedText: String?, rawTranscript: String, includeRawTranscript: Boolean): String` returns the full page content +- When `formattedText` is `null` (LLM disabled/failed): transcript page body is raw transcript with no `#+BEGIN_QUOTE` wrapper +- When `formattedText` is non-null and `includeRawTranscript=true`: transcript page includes `#+BEGIN_QUOTE` block +- When `formattedText` is non-null and `includeRawTranscript=false`: transcript page omits `#+BEGIN_QUOTE` +**Files**: +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt` + +##### Task 5.1.2a: Replace `buildVoiceNoteBlock` and add `buildTranscriptPageContent` (~5 min) +- Remove the existing `buildVoiceNoteBlock(formattedText: String, rawTranscript: String)` method entirely. +- Add two new `internal` functions: + +```kotlin +internal fun buildVoiceNoteBlock(pageTitle: String, timeLabel: String, formattedText: String): String { + return buildString { + append("- πŸ“ Voice note ($timeLabel) [[$pageTitle]]") + append("\n - ") + append(formattedText.lines().joinToString("\n - ")) + } +} + +internal fun buildTranscriptPageContent( + sourcePage: String, + formattedText: String?, + rawTranscript: String, + includeRawTranscript: Boolean, +): String { + return buildString { + append("source:: [[$sourcePage]]") + append("\n\n") + if (formattedText != null) { + append(formattedText) + if (includeRawTranscript) { + append("\n\n#+BEGIN_QUOTE\n") + append(rawTranscript) + append("\n#+END_QUOTE") + } + } else { + // LLM disabled or failed β€” raw transcript is the full content, no quote wrapper + append("- ") + append(rawTranscript) + } + } +} +``` + +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt` + +#### Story 5.1.3: Update `processTranscript` to use new format and current-page routing +**As a** user, **I want** completed recordings inserted into the page I'm viewing, **so that** I don't have to navigate to the journal to find my note. +**Acceptance Criteria**: +- `processTranscript` computes `timeLabel`, `dateLabel`, and `pageTitle` at insertion time +- Creates transcript page via `journalService.createTranscriptPage(pageTitle, transcriptPageContent)` +- Appends inline block to `currentOpenPageUuid()` target when non-null, else today's journal +- `sourcePage` resolved from the target page UUID name (falls back to today's journal name if UUID lookup fails) +**Files**: +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt` + +##### Task 5.1.3a: Rewrite insertion logic in `processTranscript` (~5 min) +- Track whether LLM succeeded explicitly. Replace the existing `formattedText` assignment block so that: + ```kotlin + var llmProducedOutput = false + val formattedText = when (val llmResult = pipeline.llmProvider.format(rawTranscript, prompt)) { + is LlmResult.Success -> { + isLikelyTruncated = isLikelyTruncated || llmResult.isLikelyTruncated + llmProducedOutput = true + llmResult.formattedText + } + is LlmResult.Failure -> { + println("[VoiceCaptureViewModel] LLM formatting failed ($llmResult), inserting raw transcript") + rawTranscript + } + } + ``` +- Replace the `journalService.appendToToday(...)` call and everything after it (through the `_state.value = ...Done` line) with: + ```kotlin + val now = Clock.System.now().toLocalDateTime(TimeZone.currentSystemDefault()) + val timeLabel = "${now.hour.toString().padStart(2, '0')}:${now.minute.toString().padStart(2, '0')}:${now.second.toString().padStart(2, '0')}" + val dateLabel = "${now.year}-${now.monthNumber.toString().padStart(2, '0')}-${now.dayOfMonth.toString().padStart(2, '0')}" + val pageTitle = "Voice Note $dateLabel $timeLabel" + + val targetPageUuid = currentOpenPageUuid() + val sourcePage: String = if (targetPageUuid != null) { + journalService.getPageNameByUuid(targetPageUuid) ?: dateLabel.replace('-', '_') + } else { + dateLabel.replace('-', '_') + } + + val transcriptPageContent = buildTranscriptPageContent( + sourcePage = sourcePage, + formattedText = if (llmProducedOutput) formattedText else null, + rawTranscript = rawTranscript, + includeRawTranscript = pipeline.includeRawTranscript, + ) + journalService.createTranscriptPage(pageTitle, transcriptPageContent) + + val inlineBlock = buildVoiceNoteBlock( + pageTitle = pageTitle, + timeLabel = timeLabel, + formattedText = formattedText, + ) + if (targetPageUuid != null) { + journalService.appendToPage(targetPageUuid, inlineBlock) + } else { + journalService.appendToToday(inlineBlock) + } + + _state.value = VoiceCaptureState.Done( + insertedText = formattedText, + isLikelyTruncated = isLikelyTruncated, + ) + ``` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt` + +--- + +## Phase 6: Settings UI Toggle (FR-2 β€” UI layer) + +### Epic 6.1: Add `includeRawTranscript` toggle to VoiceCaptureSettings + +**Goal**: Users can disable raw transcript inclusion from the settings panel. + +#### Story 6.1.1: Add toggle row to VoiceCaptureSettings +**As a** user, **I want** a toggle in the voice settings panel to control raw transcript inclusion, **so that** I can keep my transcript pages clean. +**Acceptance Criteria**: +- Toggle labelled "Include raw transcript in note" appears in the "LLM Formatting" section +- State initialised from `voiceSettings.getIncludeRawTranscript()` +- Save button persists value via `voiceSettings.setIncludeRawTranscript(...)` +- Toggle is always visible (not gated on `llmEnabled`) because it also affects the no-LLM path +**Files**: +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt` + +##### Task 6.1.1a: Add state variable for includeRawTranscript (~2 min) +- After `var useDeviceLlm by remember { ... }` add: + ```kotlin + var includeRawTranscript by remember { mutableStateOf(voiceSettings.getIncludeRawTranscript()) } + ``` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt` + +##### Task 6.1.1b: Add Switch row to LLM Formatting section (~3 min) +- At the bottom of the `SettingsSection("LLM Formatting")` block (before the closing `}`), add: + ```kotlin + Row( + modifier = Modifier.fillMaxWidth().padding(top = 8.dp), + horizontalArrangement = Arrangement.SpaceBetween, + verticalAlignment = Alignment.CenterVertically, + ) { + Text("Include raw transcript in note", style = MaterialTheme.typography.bodyMedium) + Switch( + checked = includeRawTranscript, + onCheckedChange = { includeRawTranscript = it; saved = false }, + ) + } + ``` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt` + +##### Task 6.1.1c: Persist in Save button handler (~2 min) +- Inside the `Button(onClick = { ... })` handler, add after `voiceSettings.setUseDeviceLlm(useDeviceLlm)`: + ```kotlin + voiceSettings.setIncludeRawTranscript(includeRawTranscript) + ``` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt` + +--- + +## Phase 7: App.kt Wiring (FR-3 β€” call site) + +### Epic 7.1: Thread `currentOpenPageUuid` and `includeRawTranscript` through App.kt + +**Goal**: VoiceCaptureViewModel is constructed with the live page-UUID lambda and updated pipeline config. + +#### Story 7.1.1: Update VoiceCaptureViewModel construction in App.kt +**As a** developer, **I want** App.kt to pass the live `currentOpenPageUuid` lambda and `includeRawTranscript`, **so that** the new routing and settings take effect in production. +**Acceptance Criteria**: +- `voiceCaptureViewModel` receives `currentOpenPageUuid = { viewModel.uiState.value.currentPage?.uuid }` +- `VoicePipelineConfig` includes `includeRawTranscript = voiceSettings.getIncludeRawTranscript()` when built +**Files**: +- `kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt` + +##### Task 7.1.1a: Update VoiceCaptureViewModel construction site (~3 min) +- Locate the `remember(voicePipeline)` block that constructs `VoiceCaptureViewModel` (around line 462–463). +- Add the lambda parameter: + ```kotlin + val voiceCaptureViewModel = remember(voicePipeline) { + VoiceCaptureViewModel( + voicePipeline, + repos.journalService, + currentOpenPageUuid = { viewModel.uiState.value.currentPage?.uuid }, + ) + } + ``` +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt` + +##### Task 7.1.1b: Add `includeRawTranscript` to VoicePipelineConfig construction (~2 min) +- Locate where `VoicePipelineConfig` is rebuilt from `VoiceSettings` (in the `onRebuildVoicePipeline` lambda or equivalent). +- Add `includeRawTranscript = voiceSettings.getIncludeRawTranscript()` to the `VoicePipelineConfig(...)` call. +- Files: `kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt` + +--- + +## Phase 8: Test Updates + +### Epic 8.1: Update and add unit tests + +**Goal**: Test suite covers the new behavior and removes obsolete tests. + +#### Story 8.1.1: Update VoiceNoteBlockFormatTest +**As a** developer, **I want** the format tests to reflect the new two-function API, **so that** the test suite stays green. +**Acceptance Criteria**: +- All `buildVoiceNoteBlock` calls use the new 3-parameter signature `(pageTitle, timeLabel, formattedText)` +- `block contains raw transcript in BEGIN_QUOTE block` is replaced by two tests covering `includeRawTranscript=true` and `=false` +- `success pipeline stores block with correct structure` checks for `[[Voice Note` wikilink in inline block (not `#+BEGIN_QUOTE`) +- Timestamp regex updated to match `- πŸ“ Voice note (HH:mm) [[Voice Note YYYY-MM-DD HH:mm]]` +- `makeViewModel` helper uses named `scope =` parameter to avoid positional collision with new `currentOpenPageUuid` parameter +**Files**: +- `kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceNoteBlockFormatTest.kt` + +##### Task 8.1.1a: Fix makeViewModel helper (~1 min) +- Update `makeViewModel` to use named `scope` parameter: + ```kotlin + private fun makeViewModel(scope: kotlinx.coroutines.CoroutineScope) = VoiceCaptureViewModel( + VoicePipelineConfig(), + JournalService(InMemoryPageRepository(), InMemoryBlockRepository()), + currentOpenPageUuid = { null }, + scope = scope, + ) + ``` +- Files: `kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceNoteBlockFormatTest.kt` + +##### Task 8.1.1b: Update `buildVoiceNoteBlock` call-sites in existing tests (~3 min) +- `block starts with voice note header line`: change to `makeViewModel(this).buildVoiceNoteBlock("Test Page", "14:35", "- formatted bullet.")` and assert starts with `"- πŸ“ Voice note (14:35) [[Test Page]]"`. +- `block contains formatted text`: change to `makeViewModel(this).buildVoiceNoteBlock("Test Page", "14:35", formatted)`. +- `multiline formatted text has each line indented under header`: change to `makeViewModel(this).buildVoiceNoteBlock("Test Page", "14:35", formatted)`. +- `timestamp in header has zero-padded hours and minutes`: change assertion regex to `"""- πŸ“ Voice note \(\d{2}:\d{2}:\d{2}\) \[\[Voice Note \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\]\]"""` (the method now takes explicit `timeLabel` so the real-time path is not exercised here; pass a fixed `timeLabel = "14:35:22"` and check the literal). +- Files: `kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceNoteBlockFormatTest.kt` + +##### Task 8.1.1c: Replace `block contains raw transcript in BEGIN_QUOTE block` test (~3 min) +- Delete the existing test and replace with two: + ```kotlin + @Test + fun `transcript page includes BEGIN_QUOTE when includeRawTranscript is true`() = runTest { + val raw = "this is the raw transcript text" + val vm = makeViewModel(this) + val content = vm.buildTranscriptPageContent("Today", "- formatted.", raw, includeRawTranscript = true) + assertTrue(content.contains("#+BEGIN_QUOTE")) + assertTrue(content.contains(raw)) + assertTrue(content.contains("#+END_QUOTE")) + } + + @Test + fun `transcript page omits BEGIN_QUOTE when includeRawTranscript is false`() = runTest { + val raw = "this is the raw transcript text" + val vm = makeViewModel(this) + val content = vm.buildTranscriptPageContent("Today", "- formatted.", raw, includeRawTranscript = false) + assertFalse(content.contains("#+BEGIN_QUOTE")) + } + ``` +- Add import `kotlin.test.assertFalse` if not present. +- Files: `kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceNoteBlockFormatTest.kt` + +##### Task 8.1.1d: Update `success pipeline stores block with correct structure` test (~3 min) +- Change the assertion from checking `#+BEGIN_QUOTE` in the inline block to checking for `[[Voice Note` wikilink. +- Also assert a transcript page exists by checking `pageRepo` (add `InMemoryPageRepository` reference to test scope). +- Files: `kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceNoteBlockFormatTest.kt` + +#### Story 8.1.2: Update VoiceCaptureViewModelTest +**As a** developer, **I want** the VM tests to reflect FR-3, FR-4, and the new format, **so that** CI passes. +**Acceptance Criteria**: +- Three obsolete word-count tests deleted +- New test: 2-word transcript reaches `Done` state (AC-11) +- New test: `currentOpenPageUuid` non-null β†’ block appended to that page (AC-8) +- New test: `currentOpenPageUuid` null β†’ block appended to today's journal (AC-9) +**Files**: +- `kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt` + +##### Task 8.1.2a: Delete three obsolete word-count tests (~2 min) +- Delete the entire test bodies for: + - `word-count gate under 10 words emits Error at TRANSCRIBING` (lines 51–69) + - `9-word transcript emits Error at TRANSCRIBING` (lines 377–395) + - `10-word transcript reaches Done state` (lines 397–414) +- Files: `kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt` + +##### Task 8.1.2b: Add 2-word transcript test (AC-11) (~3 min) +- Add after `success path reaches Done state`: + ```kotlin + @Test + fun `2-word transcript reaches Done state (AC-11)`() = runTest { + val fakeRecorder = object : AudioRecorder { + override suspend fun startRecording(): PlatformAudioFile = PlatformAudioFile("/tmp/test.m4a") + override suspend fun stopRecording() = Unit + override suspend fun readBytes(file: PlatformAudioFile) = ByteArray(100) + } + val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Success("buy milk") } + val vm = VoiceCaptureViewModel( + VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), + makeJournalService(), scope = this, + ) + vm.onMicTapped() + advanceUntilIdle() + assertIs(vm.state.first()) + } + ``` +- Files: `kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt` + +##### Task 8.1.2c: Add current-page routing tests (AC-8 and AC-9) (~5 min) +- Add the following two tests: + ```kotlin + @Test + fun `when page is open voice note is appended to that page (AC-8)`() = runTest { + val blockRepo = InMemoryBlockRepository() + val pageRepo = InMemoryPageRepository() + val journalService = JournalService(pageRepo, blockRepo) + val targetPage = journalService.ensureTodayJournal() + val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Success("buy milk") } + val fakeRecorder = object : AudioRecorder { + override suspend fun startRecording(): PlatformAudioFile = PlatformAudioFile("/tmp/test.m4a") + override suspend fun stopRecording() = Unit + override suspend fun readBytes(file: PlatformAudioFile) = ByteArray(100) + } + val vm = VoiceCaptureViewModel( + VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), + journalService, + currentOpenPageUuid = { targetPage.uuid }, + scope = this, + ) + vm.onMicTapped() + advanceUntilIdle() + assertIs(vm.state.first()) + val blocks = blockRepo.getBlocksForPage(targetPage.uuid).first().getOrNull().orEmpty() + assertTrue(blocks.any { it.content.contains("πŸ“ Voice note") }) + } + + @Test + fun `when no page is open voice note falls back to today journal (AC-9)`() = runTest { + val blockRepo = InMemoryBlockRepository() + val pageRepo = InMemoryPageRepository() + val journalService = JournalService(pageRepo, blockRepo) + val fakeStt = SpeechToTextProvider { _ -> TranscriptResult.Success("buy milk") } + val fakeRecorder = object : AudioRecorder { + override suspend fun startRecording(): PlatformAudioFile = PlatformAudioFile("/tmp/test.m4a") + override suspend fun stopRecording() = Unit + override suspend fun readBytes(file: PlatformAudioFile) = ByteArray(100) + } + val vm = VoiceCaptureViewModel( + VoicePipelineConfig(audioRecorder = fakeRecorder, sttProvider = fakeStt), + journalService, + currentOpenPageUuid = { null }, + scope = this, + ) + vm.onMicTapped() + advanceUntilIdle() + assertIs(vm.state.first()) + val journalPage = journalService.ensureTodayJournal() + val blocks = blockRepo.getBlocksForPage(journalPage.uuid).first().getOrNull().orEmpty() + assertTrue(blocks.any { it.content.contains("πŸ“ Voice note") }) + } + ``` +- Files: `kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt` + +--- + +## Summary Table + +| Phase | Epics | Stories | Tasks | +|-------|-------|---------|-------| +| 1 β€” Cleanup & Config | 2 | 2 | 3 | +| 2 β€” LLM Prompt | 1 | 1 | 1 | +| 3 β€” Settings Layer | 1 | 2 | 2 | +| 4 β€” JournalService | 1 | 3 | 3 | +| 5 β€” ViewModel Core | 1 | 3 | 4 | +| 6 β€” Settings UI | 1 | 1 | 3 | +| 7 β€” App.kt Wiring | 1 | 1 | 2 | +| 8 β€” Tests | 1 | 2 | 7 | +| **Total** | **9** | **15** | **25** | + +--- + +## Key Implementation Notes + +### `makeViewModel` positional parameter collision +After adding `currentOpenPageUuid` as the 3rd constructor parameter (before `scope`), the existing `makeViewModel(this)` helper in `VoiceNoteBlockFormatTest` passes `this` (the `TestScope`) positionally β€” it will land in the new `currentOpenPageUuid` parameter slot and fail to compile. Task 8.1.1a fixes this with named `scope = scope` syntax. + +### `llmProducedOutput` flag +The current code reuses `formattedText == rawTranscript` as an implicit LLM-failure signal. Task 5.1.3a introduces an explicit `llmProducedOutput: Boolean` to distinguish "LLM ran and returned formatted output" from "LLM was skipped or failed". This is cleaner and avoids false matches when the LLM happens to reproduce the raw transcript verbatim. + +### `source::` placement +Logseq parses `key:: value` lines at the top of a page as page properties when they appear before any blank line. The `buildTranscriptPageContent` implementation in Task 5.1.2a places `source:: [[...]]` first, followed by `\n\n`, which matches this expectation. + +### Transcript page `#+BEGIN_QUOTE` β€” LLM disabled/failed +When `formattedText` is `null` (passed as `null` when `!llmProducedOutput`), both the inline sub-bullets and the transcript page body use the raw transcript. No `#+BEGIN_QUOTE` wrapper is added even if `includeRawTranscript=true`, because the raw text is already the primary content (not a supplemental reference). This matches FR-2 requirements. + +### Flagged choices +**None.** All changes use existing project patterns: Arrow `Either` at repository boundaries, `DirectRepositoryWrite` opt-in annotation, `writeActor`-first / direct fallback pattern, `() -> String?` lambda for late-bound read-only access to VM state, `remember { }` with own internal scope. No new dependencies, no new platform-specific source sets. diff --git a/project_plans/voice/implementation/validation.md b/project_plans/voice/implementation/validation.md new file mode 100644 index 00000000..5d12dc83 --- /dev/null +++ b/project_plans/voice/implementation/validation.md @@ -0,0 +1,377 @@ +# Validation Plan: voice + +**Date**: 2026-05-02 + +--- + +## Requirement β†’ Test Mapping + +| AC | Requirement | Test File | Test Name | Type | Status | Scenario | +|----|-------------|-----------|-----------|------|--------|----------| +| AC-1 | Rich LLM output: #tags, key:: value, **bold**, TODO | `VoiceNoteBlockFormatTest` | `buildTranscriptPageContent_should_passthrough_LLM_output_verbatim` | Unit | NEW | LLM output containing #tag, key::, **bold**, TODO appears unmodified in transcript page content | +| AC-2 | TODO bullet for action items | `VoiceNoteBlockFormatTest` | `buildTranscriptPageContent_should_include_formatted_bullets_as_primary_content` | Unit | NEW | Simulated LLM output `- TODO Call Alice about [[project]]` appears in the `formattedText` section of the transcript page | +| AC-3 | Inline block: `- πŸ“ Voice note (HH:mm:ss) [[Voice Note YYYY-MM-DD HH:mm:ss]]` + sub-bullets | `VoiceNoteBlockFormatTest` | `block starts with voice note header line` | Unit | UPDATE | Header starts with `- πŸ“ Voice note (` β€” update to use 3-param signature `(pageTitle, timeLabel, formattedText)` | +| AC-3 | Inline block timestamp format HH:mm:ss | `VoiceNoteBlockFormatTest` | `timestamp in header has zero-padded hours and minutes` | Unit | UPDATE | Regex updated to `\d{2}:\d{2}:\d{2}` and wikilink pattern `[[Voice Note YYYY-MM-DD HH:mm:ss]]`; call `buildVoiceNoteBlock("Voice Note 2026-05-02 14:35:22", "14:35:22", "- bullet")` | +| AC-3 | Inline block contains wikilink to transcript page | `VoiceNoteBlockFormatTest` | `buildVoiceNoteBlock_should_contain_wikilink_to_transcript_page` | Unit | NEW | `buildVoiceNoteBlock("Voice Note 2026-05-02 14:35:22", "14:35:22", "- formatted.")` contains `[[Voice Note 2026-05-02 14:35:22]]` | +| AC-3 | Inline block has LLM bullets as sub-items | `VoiceNoteBlockFormatTest` | `block contains formatted text` | Unit | UPDATE | Use 3-param `buildVoiceNoteBlock("Test Page", "14:35:22", formatted)` β€” existing assertion retained | +| AC-3 | Multiline formatted text indented under header | `VoiceNoteBlockFormatTest` | `multiline formatted text has each line indented under header` | Unit | UPDATE | Use 3-param `buildVoiceNoteBlock("Test Page", "14:35:22", formatted)` | +| AC-4 | Transcript page has `source::` property | `VoiceNoteBlockFormatTest` | `buildTranscriptPageContent_should_start_with_source_property` | Unit | NEW | `buildTranscriptPageContent("My Page", "- bullets", "raw", true)` starts with `source:: [[My Page]]` | +| AC-4 | `source::` links to current open page name | `VoiceCaptureViewModelTest` | `when page is open voice note is appended to that page (AC-8)` | Integration | NEW | Transcript page block checked for `source:: [[` pointing to the open page name | +| AC-5 | Transcript page contains LLM-formatted bullets | `VoiceNoteBlockFormatTest` | `buildTranscriptPageContent_should_include_formatted_bullets_as_primary_content` | Unit | NEW (shared with AC-2) | `formattedText` argument appears in output between `source::` and `#+BEGIN_QUOTE` | +| AC-5 | Pipeline end-to-end: transcript page created | `VoiceNoteBlockFormatTest` | `success pipeline stores block with correct structure` | Integration | UPDATE | Assert transcript page exists in `pageRepo` with `[[Voice Note` title and no `#+BEGIN_QUOTE` in inline block | +| AC-6 | `includeRawTranscript=true` β†’ page includes `#+BEGIN_QUOTE` | `VoiceNoteBlockFormatTest` | `transcript page includes BEGIN_QUOTE when includeRawTranscript is true` | Unit | NEW (replaces deleted test) | Replaces `block contains raw transcript in BEGIN_QUOTE block`; calls `buildTranscriptPageContent(..., includeRawTranscript=true)` | +| AC-7 | `includeRawTranscript=false` β†’ page omits `#+BEGIN_QUOTE` | `VoiceNoteBlockFormatTest` | `transcript page omits BEGIN_QUOTE when includeRawTranscript is false` | Unit | NEW | Calls `buildTranscriptPageContent(..., includeRawTranscript=false)`; asserts no `#+BEGIN_QUOTE` | +| AC-7 | LLM disabled β†’ transcript page is raw text, no `#+BEGIN_QUOTE` | `VoiceNoteBlockFormatTest` | `buildTranscriptPageContent_should_use_raw_text_without_quote_wrapper_when_llm_disabled` | Unit | NEW | `buildTranscriptPageContent("Source", null, "raw text", true)` has no `#+BEGIN_QUOTE` and contains `raw text` | +| AC-8 | Voice note appended to open page when UUID non-null | `VoiceCaptureViewModelTest` | `when page is open voice note is appended to that page (AC-8)` | Integration | NEW | `currentOpenPageUuid = { targetPage.uuid }`; after pipeline, `blockRepo.getBlocksForPage(targetPage.uuid)` contains block with `πŸ“ Voice note` | +| AC-9 | Falls back to journal when no page open | `VoiceCaptureViewModelTest` | `when no page is open voice note falls back to today journal (AC-9)` | Integration | NEW | `currentOpenPageUuid = { null }`; after pipeline, today's journal page blocks contain `πŸ“ Voice note` | +| AC-10 | `VoiceSettings` persists `includeRawTranscript` | `VoiceSettingsTest` | `getIncludeRawTranscript_should_return_true_by_default` | Unit | NEW | Fresh `VoiceSettings(MockSettings())` returns `true` for `getIncludeRawTranscript()` | +| AC-10 | `VoiceSettings` round-trips persisted value | `VoiceSettingsTest` | `setIncludeRawTranscript_should_persist_value_across_get_calls` | Unit | NEW | `setIncludeRawTranscript(false)` followed by `getIncludeRawTranscript()` returns `false` | +| AC-11 | 2-word transcript reaches `Done` (no minWordCount) | `VoiceCaptureViewModelTest` | `2-word transcript reaches Done state (AC-11)` | Unit | NEW | STT returns `"buy milk"`; pipeline reaches `VoiceCaptureState.Done` | +| AC-11 | `VoicePipelineConfig` has no `minWordCount` | `VoiceCaptureViewModelTest` | `word-count gate under 10 words emits Error at TRANSCRIBING` | Unit | DELETE | Test asserted old behaviour; remove entirely | +| AC-11 | 9-word transcript no longer errors | `VoiceCaptureViewModelTest` | `9-word transcript emits Error at TRANSCRIBING` | Unit | DELETE | Old word-count boundary test; remove entirely | +| AC-11 | 10-word transcript boundary test obsolete | `VoiceCaptureViewModelTest` | `10-word transcript reaches Done state` | Unit | DELETE | Was boundary test for removed guard; remove entirely | +| AC-12 | Android STT: 6s complete-silence timeout | `AndroidSpeechRecognizerProviderTest` | `listenInternal_should_set_completeSilenceTimeout_to_6000ms` | Unit | NEW | Inspects `Intent` extras; `EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS == 6000L` | +| AC-12 | Android STT: 3s possibly-complete-silence timeout | `AndroidSpeechRecognizerProviderTest` | `listenInternal_should_set_possiblyCompleteSilenceTimeout_to_3000ms` | Unit | NEW | `EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS == 3000L` | +| AC-12 | Android STT: 2s minimum recording length | `AndroidSpeechRecognizerProviderTest` | `listenInternal_should_set_minimumLengthMillis_to_2000ms` | Unit | NEW | `EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS == 2000L` | + +--- + +## Test File Summary + +### `VoiceNoteBlockFormatTest` (`kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceNoteBlockFormatTest.kt`) + +| # | Test Name | Action | +|---|-----------|--------| +| 1 | `block starts with voice note header line` | UPDATE β€” 3-param `buildVoiceNoteBlock` | +| 2 | `block contains formatted text` | UPDATE β€” 3-param `buildVoiceNoteBlock` | +| 3 | `block contains raw transcript in BEGIN_QUOTE block` | DELETE β€” replaced by tests 6 & 7 | +| 4 | `multiline formatted text has each line indented under header` | UPDATE β€” 3-param `buildVoiceNoteBlock` | +| 5 | `timestamp in header has zero-padded hours and minutes` | UPDATE β€” regex includes seconds + wikilink | +| 6 | `transcript page includes BEGIN_QUOTE when includeRawTranscript is true` | NEW | +| 7 | `transcript page omits BEGIN_QUOTE when includeRawTranscript is false` | NEW | +| 8 | `buildVoiceNoteBlock_should_contain_wikilink_to_transcript_page` | NEW | +| 9 | `buildTranscriptPageContent_should_start_with_source_property` | NEW | +| 10 | `buildTranscriptPageContent_should_include_formatted_bullets_as_primary_content` | NEW | +| 11 | `buildTranscriptPageContent_should_passthrough_LLM_output_verbatim` | NEW | +| 12 | `buildTranscriptPageContent_should_use_raw_text_without_quote_wrapper_when_llm_disabled` | NEW | +| 13 | `success pipeline stores block with correct structure` | UPDATE β€” check `[[Voice Note` wikilink + transcript page exists | +| β€” | `makeViewModel()` helper | UPDATE β€” add named `scope =` param; add `currentOpenPageUuid = { null }` | + +### `VoiceCaptureViewModelTest` (`kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModelTest.kt`) + +| # | Test Name | Action | +|---|-----------|--------| +| β€” | `word-count gate under 10 words emits Error at TRANSCRIBING` | DELETE | +| β€” | `9-word transcript emits Error at TRANSCRIBING` | DELETE | +| β€” | `10-word transcript reaches Done state` | DELETE | +| 1 | `2-word transcript reaches Done state (AC-11)` | NEW | +| 2 | `when page is open voice note is appended to that page (AC-8)` | NEW | +| 3 | `when no page is open voice note falls back to today journal (AC-9)` | NEW | + +### `VoiceSettingsTest` (`kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceSettingsTest.kt`) + +> New test file. Use an existing `MockSettings` or `InMemorySettings` if available in the project; otherwise implement a simple map-backed stub inline. + +| # | Test Name | Action | +|---|-----------|--------| +| 1 | `getIncludeRawTranscript_should_return_true_by_default` | NEW | +| 2 | `setIncludeRawTranscript_should_persist_value_across_get_calls` | NEW | + +### `AndroidSpeechRecognizerProviderTest` (`kmp/src/androidUnitTest/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProviderTest.kt`) + +> New or existing test file in `androidUnitTest`. Uses Robolectric to create a real `Intent` and inspect extras. + +| # | Test Name | Action | +|---|-----------|--------| +| 1 | `listenInternal_should_set_completeSilenceTimeout_to_6000ms` | NEW | +| 2 | `listenInternal_should_set_possiblyCompleteSilenceTimeout_to_3000ms` | NEW | +| 3 | `listenInternal_should_set_minimumLengthMillis_to_2000ms` | NEW | + +--- + +## Detailed Test Specifications + +### UPDATE: `makeViewModel` helper in `VoiceNoteBlockFormatTest` + +```kotlin +// BEFORE (broken after constructor change β€” scope lands in currentOpenPageUuid slot) +private fun makeViewModel(scope: kotlinx.coroutines.CoroutineScope) = VoiceCaptureViewModel( + VoicePipelineConfig(), + JournalService(InMemoryPageRepository(), InMemoryBlockRepository()), + scope, +) + +// AFTER +private fun makeViewModel(scope: kotlinx.coroutines.CoroutineScope) = VoiceCaptureViewModel( + VoicePipelineConfig(), + JournalService(InMemoryPageRepository(), InMemoryBlockRepository()), + currentOpenPageUuid = { null }, + scope = scope, +) +``` + +Note: after the refactor `buildVoiceNoteBlock` and `buildTranscriptPageContent` are free (internal) functions, not VM instance methods. Update call sites from `makeViewModel(this).buildVoiceNoteBlock(...)` to `buildVoiceNoteBlock(...)` directly. + +### UPDATE: `block starts with voice note header line` + +```kotlin +@Test +fun `block starts with voice note header line`() = runTest { + val block = buildVoiceNoteBlock( + pageTitle = "Voice Note 2026-05-02 14:35:22", + timeLabel = "14:35:22", + formattedText = "- formatted bullet.", + ) + assertTrue(block.startsWith("- πŸ“ Voice note ("), + "Expected block to start with '- πŸ“ Voice note (', got: $block") +} +``` + +### UPDATE: `timestamp in header has zero-padded hours and minutes` + +```kotlin +@Test +fun `timestamp in header has zero-padded hours and minutes`() = runTest { + val block = buildVoiceNoteBlock( + pageTitle = "Voice Note 2026-05-02 14:35:22", + timeLabel = "14:35:22", + formattedText = "- formatted.", + ) + val headerLine = block.lines().first() + val timeRegex = Regex("""- πŸ“ Voice note \(\d{2}:\d{2}:\d{2}\) \[\[Voice Note \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\]\]""") + assertTrue(timeRegex.containsMatchIn(headerLine), + "Expected HH:mm:ss timestamp and wikilink in header, got: $headerLine") +} +``` + +### NEW: `buildVoiceNoteBlock_should_contain_wikilink_to_transcript_page` + +```kotlin +@Test +fun `buildVoiceNoteBlock_should_contain_wikilink_to_transcript_page`() = runTest { + val block = buildVoiceNoteBlock( + pageTitle = "Voice Note 2026-05-02 14:35:22", + timeLabel = "14:35:22", + formattedText = "- formatted bullet.", + ) + assertTrue(block.contains("[[Voice Note 2026-05-02 14:35:22]]"), + "Expected wikilink to transcript page in block, got: $block") +} +``` + +### NEW: `buildTranscriptPageContent_should_start_with_source_property` + +```kotlin +@Test +fun `buildTranscriptPageContent_should_start_with_source_property`() = runTest { + val content = buildTranscriptPageContent( + sourcePage = "My Page", + formattedText = "- bullet one", + rawTranscript = "raw text", + includeRawTranscript = false, + ) + assertTrue(content.startsWith("source:: [[My Page]]"), + "Expected content to start with source:: property, got: $content") +} +``` + +### NEW: `buildTranscriptPageContent_should_include_formatted_bullets_as_primary_content` + +```kotlin +@Test +fun `buildTranscriptPageContent_should_include_formatted_bullets_as_primary_content`() = runTest { + val formatted = "- TODO Call Alice about [[project]]\n- #meeting noted" + val content = buildTranscriptPageContent( + sourcePage = "Today", + formattedText = formatted, + rawTranscript = "call alice about the project, meeting noted", + includeRawTranscript = false, + ) + assertTrue(content.contains("- TODO Call Alice about [[project]]"), + "Expected formatted TODO bullet in transcript page, got: $content") + assertTrue(content.contains("#meeting"), + "Expected #tag in transcript page, got: $content") +} +``` + +### NEW: `buildTranscriptPageContent_should_passthrough_LLM_output_verbatim` + +```kotlin +@Test +fun `buildTranscriptPageContent_should_passthrough_LLM_output_verbatim`() = runTest { + val formatted = "- project:: Stelekit\n- **bold term** in output\n- #tag example\n- TODO action" + val content = buildTranscriptPageContent( + sourcePage = "Source", + formattedText = formatted, + rawTranscript = "raw", + includeRawTranscript = false, + ) + assertTrue(content.contains("project:: Stelekit")) + assertTrue(content.contains("**bold term**")) + assertTrue(content.contains("#tag example")) + assertTrue(content.contains("TODO action")) +} +``` + +### NEW: `transcript page includes BEGIN_QUOTE when includeRawTranscript is true` (replaces deleted test) + +```kotlin +@Test +fun `transcript page includes BEGIN_QUOTE when includeRawTranscript is true`() = runTest { + val raw = "this is the raw transcript text" + val content = buildTranscriptPageContent( + sourcePage = "Today", + formattedText = "- formatted.", + rawTranscript = raw, + includeRawTranscript = true, + ) + assertTrue(content.contains("#+BEGIN_QUOTE")) + assertTrue(content.contains(raw)) + assertTrue(content.contains("#+END_QUOTE")) +} +``` + +### NEW: `transcript page omits BEGIN_QUOTE when includeRawTranscript is false` + +```kotlin +@Test +fun `transcript page omits BEGIN_QUOTE when includeRawTranscript is false`() = runTest { + val raw = "this is the raw transcript text" + val content = buildTranscriptPageContent( + sourcePage = "Today", + formattedText = "- formatted.", + rawTranscript = raw, + includeRawTranscript = false, + ) + assertFalse(content.contains("#+BEGIN_QUOTE")) +} +``` + +### NEW: `buildTranscriptPageContent_should_use_raw_text_without_quote_wrapper_when_llm_disabled` + +```kotlin +@Test +fun `buildTranscriptPageContent_should_use_raw_text_without_quote_wrapper_when_llm_disabled`() = runTest { + val raw = "buy milk and eggs" + val content = buildTranscriptPageContent( + sourcePage = "Source", + formattedText = null, // LLM disabled or failed + rawTranscript = raw, + includeRawTranscript = true, // toggle is true, but has no effect when formattedText is null + ) + assertFalse(content.contains("#+BEGIN_QUOTE"), + "Expected no #+BEGIN_QUOTE when formattedText is null, got: $content") + assertTrue(content.contains(raw), + "Expected raw transcript in output, got: $content") +} +``` + +### UPDATE: `success pipeline stores block with correct structure` + +Key assertion changes: + +```kotlin +// REMOVE these assertions (inline block no longer contains BEGIN_QUOTE or raw transcript): +// assertTrue(voiceBlock.content.contains("#+BEGIN_QUOTE"), ...) +// assertTrue(voiceBlock.content.contains(transcript), ...) + +// ADD: inline block has wikilink, no BEGIN_QUOTE +assertTrue(voiceBlock.content.contains("[[Voice Note"), + "Expected wikilink to transcript page in inline block") +assertFalse(voiceBlock.content.contains("#+BEGIN_QUOTE"), + "#+BEGIN_QUOTE must not appear in inline block; it belongs on the transcript page") + +// ADD: transcript page was created +val allPages = pageRepo.getAllPages().first().getOrNull().orEmpty() +val transcriptPages = allPages.filter { it.name.startsWith("Voice Note ") } +assertTrue(transcriptPages.isNotEmpty(), + "Expected a Voice Note transcript page to be created") +``` + +Note: `pageRepo` must be declared at test scope and passed into `JournalService` to be accessible for this assertion. Update the `VoiceCaptureViewModel` constructor call in this test to inject `InMemoryPageRepository` reference. + +### NEW: `getIncludeRawTranscript_should_return_true_by_default` (`VoiceSettingsTest`) + +```kotlin +@Test +fun `getIncludeRawTranscript_should_return_true_by_default`() { + val settings = VoiceSettings(MockSettings()) // or MapSettings() if that's the project's test double + assertTrue(settings.getIncludeRawTranscript(), "Default should be true") +} +``` + +### NEW: `setIncludeRawTranscript_should_persist_value_across_get_calls` (`VoiceSettingsTest`) + +```kotlin +@Test +fun `setIncludeRawTranscript_should_persist_value_across_get_calls`() { + val settings = VoiceSettings(MockSettings()) + settings.setIncludeRawTranscript(false) + assertFalse(settings.getIncludeRawTranscript(), "Expected persisted false value") + settings.setIncludeRawTranscript(true) + assertTrue(settings.getIncludeRawTranscript(), "Expected persisted true value after re-setting to true") +} +``` + +### NEW: Android STT timeout tests (`AndroidSpeechRecognizerProviderTest`) + +```kotlin +// These tests capture the Intent built by listenInternal() and inspect extras. +// Implementation approach: extract Intent construction into a testable helper, or +// use a subclass/spy pattern to intercept the Intent before it is dispatched. + +@Test +fun `listenInternal_should_set_completeSilenceTimeout_to_6000ms`() { + // Assert EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS == 6_000L +} + +@Test +fun `listenInternal_should_set_possiblyCompleteSilenceTimeout_to_3000ms`() { + // Assert EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS == 3_000L +} + +@Test +fun `listenInternal_should_set_minimumLengthMillis_to_2000ms`() { + // Assert EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS == 2_000L +} +``` + +--- + +## Test Stack + +- **Unit**: `kotlin.test` (`assertTrue`, `assertFalse`, `assertIs`, `assertEquals`, `assertNotNull`) + `kotlinx-coroutines-test` (`runTest`, `advanceUntilIdle`, `TestCoroutineScope`) +- **Integration**: Same stack with `InMemoryPageRepository` and `InMemoryBlockRepository` as in-process test doubles β€” no mocking framework required +- **Android unit tests**: `junit4` + Robolectric (existing project setup) for `AndroidSpeechRecognizerProviderTest` +- **API/E2E**: Not applicable β€” all new behavior is `commonMain` only + +--- + +## Coverage Targets + +- Unit test coverage: β‰₯80% (line) for `VoiceCaptureViewModel`, `VoiceSettings`, `JournalService` new methods +- All public builder functions (`buildVoiceNoteBlock`, `buildTranscriptPageContent`): happy path + every conditional branch (`includeRawTranscript` true/false, `formattedText` null/non-null) +- All external integrations: `AndroidSpeechRecognizerProvider` Intent extras verified in `androidUnitTest` +- Arrow `Either` error paths: covered by existing `JournalService` and repository tests; new methods follow the same patterns + +--- + +## AC Coverage Summary + +| AC | Description | Tests Covering | New | Updated | Deleted | +|----|-------------|---------------|-----|---------|---------| +| AC-1 | Rich LLM output features | `buildTranscriptPageContent_should_passthrough_LLM_output_verbatim` | 1 | 0 | 0 | +| AC-2 | TODO bullet for action items | `buildTranscriptPageContent_should_include_formatted_bullets_as_primary_content` | 1 | 0 | 0 | +| AC-3 | Inline block format HH:mm:ss + wikilink + sub-bullets | `block starts with…` (upd), `timestamp…` (upd), `buildVoiceNoteBlock_should_contain_wikilink…` (new), `block contains formatted text` (upd), `multiline…` (upd) | 1 | 4 | 0 | +| AC-4 | `source::` property on transcript page | `buildTranscriptPageContent_should_start_with_source_property` (new), AC-8 integration test (new) | 2 | 0 | 0 | +| AC-5 | Transcript page has LLM bullets | `buildTranscriptPageContent_should_include_formatted_bullets_as_primary_content` (new), `success pipeline…` (upd) | 1 | 1 | 0 | +| AC-6 | `includeRawTranscript=true` β†’ `#+BEGIN_QUOTE` present | `transcript page includes BEGIN_QUOTE when includeRawTranscript is true` | 1 | 0 | 1 | +| AC-7 | `includeRawTranscript=false` β†’ absent; LLM null β†’ no quote | `transcript page omits BEGIN_QUOTE…` (new), `buildTranscriptPageContent_should_use_raw_text…` (new) | 2 | 0 | 0 | +| AC-8 | Append to open page | `when page is open voice note is appended to that page (AC-8)` | 1 | 0 | 0 | +| AC-9 | Fall back to journal | `when no page is open voice note falls back to today journal (AC-9)` | 1 | 0 | 0 | +| AC-10 | `includeRawTranscript` persists in `VoiceSettings` | `getIncludeRawTranscript_should_return_true_by_default` (new), `setIncludeRawTranscript_should_persist_value_across_get_calls` (new) | 2 | 0 | 0 | +| AC-11 | 2-word transcript completes; `minWordCount` removed | `2-word transcript reaches Done state (AC-11)` (new); 3 word-count tests deleted | 1 | 0 | 3 | +| AC-12 | Android STT silence timeouts | `listenInternal_should_set_completeSilenceTimeout_to_6000ms`, `…possiblyComplete…3000ms`, `…minimumLength…2000ms` | 3 | 0 | 0 | +| **Total** | | | **17** | **6** | **4** | + +**All 12 ACs covered. Coverage fraction: 12/12.** diff --git a/project_plans/voice/requirements.md b/project_plans/voice/requirements.md new file mode 100644 index 00000000..67cc253b --- /dev/null +++ b/project_plans/voice/requirements.md @@ -0,0 +1,212 @@ +# Voice Note Feature β€” Enhancement Requirements + +## Context + +The voice note feature shipped in three iterations: +1. **Story 1** β€” Android audio capture β†’ Whisper STT β†’ journal append +2. **Story 2** β€” LLM formatting (Claude/OpenAI) + settings UI +3. **On-device** β€” Android SpeechRecognizer + ML Kit as cloud-free fallback + +Current block format (to be replaced): +``` +- πŸ“ Voice note (HH:mm) + - + #+BEGIN_QUOTE + + #+END_QUOTE +``` + +Current LLM system prompt (`DEFAULT_VOICE_SYSTEM_PROMPT` in `VoicePipelineConfig.kt`) produces only basic `- bullet` lines and `[[wiki links]]`. It does not use Logseq's richer markdown vocabulary. + +**Desired new format (inline β€” journal or current page):** +``` +- πŸ“ Voice note (14:35:22) [[Voice Note 2026-05-02 14:35:22]] + - +``` + +**Desired new format (transcript page `Voice Note 2026-05-02 14:35:22`):** +``` +source:: [[]] + +- + +#+BEGIN_QUOTE + +#+END_QUOTE +``` + +--- + +## Goals + +Improve the voice note feature so that it takes full advantage of Logseq markdown formatting and integrates more naturally into the editor workflow. + +--- + +## Functional Requirements + +### FR-1 β€” Rich LLM Formatting + +Update `DEFAULT_VOICE_SYSTEM_PROMPT` (and the formatting logic) so the LLM output exploits all relevant Logseq features: + +| Feature | Rule | +|---------|------| +| `#tags` | Add `#tag` for topics/projects explicitly named in the transcript | +| `key:: value` properties | Extract structured properties (`status:: todo`, `priority:: high`, `date:: `) when clearly implied by speech | +| `**bold** / *italic*` | Use bold for key terms, italics for emphasis or titles | +| `- TODO` markers | Detect action items ("I need to", "remember to", "make sure to") and prefix them with `TODO` | + +Constraints: +- Do not invent content not present in the transcript +- Only add `#tags` or `[[links]]` for things explicitly named +- Properties block should appear at the top of the inserted block when present +- `TODO` items should be distinguishable from regular bullets in the Logseq outliner + +### FR-2 β€” Transcript Page Creation + +Instead of embedding the raw transcript inline, create a dedicated Logseq page for each voice note: + +**Page name**: `Voice Note YYYY-MM-DD HH:mm:ss` (e.g. `Voice Note 2026-05-02 14:35:22`) β€” seconds included to prevent collisions + +**Page content**: +``` +source:: [[]] + +- <LLM-formatted bullets (rich markdown from FR-1)> + +#+BEGIN_QUOTE +<raw transcript> +#+END_QUOTE +``` + +**Inline block** (inserted into the journal or current open page): +``` +- πŸ“ Voice note (HH:mm) [[Voice Note YYYY-MM-DD HH:mm]] +``` + +Rules: +- The `source::` property links back to the page that was open when the recording was made (today's journal title if no page was open) +- When LLM formatting is disabled or fails, the transcript page body is the raw transcript text (no formatted bullets section β€” `#+BEGIN_QUOTE` wrapper is also omitted since the raw text is already the full content) +- The transcript page is created via `PageRepository` / `BlockRepository` β€” same pattern as `JournalService.ensureTodayJournal()` +- `buildVoiceNoteBlock()` now returns only the single-line link block +- `buildVoiceNoteBlock()` returns the header line + indented formatted summary bullets +- A new method `buildTranscriptPageContent()` builds the full transcript page content + +**Raw transcript toggle** (`includeRawTranscript: Boolean`, default `true` in `VoiceSettings`): +- When `true`: transcript page includes the `#+BEGIN_QUOTE` section +- When `false`: transcript page omits it (formatted bullets only) +- Surface as a toggle in `VoiceCaptureSettings` UI panel + +### FR-3 β€” Insert into Current Open Page + +Change the voice note insert target from "always today's journal" to "current open page, falling back to today's journal". + +- `VoiceCaptureViewModel` needs access to the currently-open page UUID (or `null` if no page is open) +- When a page is open in the editor: append the voice block to that page instead of the journal +- When no page is open (home screen, search, etc.): fall back to today's journal (current behavior) +- The insertion target should be resolved at the moment the pipeline completes, not when recording starts + +### FR-6 β€” Configurable Inline vs. Transcript Page Threshold + +Short voice notes (e.g. "buy milk") should stay inline to avoid clutter. Longer notes should automatically get their own transcript page. The threshold should be user-configurable. + +Add `transcriptPageWordThreshold: Int` (default: 20) to `VoiceSettings`. + +**Below threshold** (short content β€” inline only): +``` +- πŸ“ Voice note (HH:mm:ss) + - <formatted content or raw text> +``` +No transcript page is created. No wikilink in the header. + +**At or above threshold** (long content β€” transcript page created): +``` +- πŸ“ Voice note (HH:mm:ss) [[Voice Note YYYY-MM-DD HH:mm:ss]] + - <formatted summary bullets> +``` +Transcript page created as described in FR-2. + +Rules: +- Word count is measured on the **formatted output** (or raw transcript if LLM is off/failed) +- `transcriptPageWordThreshold` is surfaced as a numeric input in `VoiceCaptureSettings` UI +- Default of 20 words means a quick command like "remind me to call Alice tomorrow" stays inline while a proper note gets its own page + +--- + +### FR-4 β€” Remove Minimum Word Count Guard + +Remove the `minWordCount` check from `VoiceCaptureViewModel.processTranscript()`. The current 10-word minimum is disruptive for short but valid voice notes (e.g. "buy milk", "call Alice"). + +- Delete the `minWordCount: Int = 10` field from `VoicePipelineConfig` +- Delete the word-count check and its error state from `processTranscript()` +- Keep the existing check for truly empty/blank transcripts (that is handled by `TranscriptResult.Empty` upstream) + +### FR-5 β€” Extend Android SpeechRecognizer Silence Timeout + +In `AndroidSpeechRecognizerProvider`, increase the silence tolerance so users can pause to think without the recognizer auto-stopping. + +Current values: +- `EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS`: 3,000 ms +- `EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS`: 1,500 ms + +Target values (configurable via `VoicePipelineConfig` or hardcoded to sensible defaults): +- `EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS`: 6,000 ms +- `EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS`: 3,000 ms +- Add `EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS`: 2,000 ms (prevents premature cutoff before user has spoken) + +### FR-7 β€” Continuous Recording with Silence Stripping (FUTURE β€” separate PR) + +> **Out of scope for this PR.** Requires dedicated research into platform VAD APIs. + +Users want to record indefinitely (manual stop only, no auto-stop on silence) while the submitted audio has silence stripped to reduce Whisper processing time and improve accuracy on long recordings. + +Requirements: +- Recording continues until the user explicitly taps stop β€” no automatic cutoff on silence +- Before submitting to Whisper (or storing), silence frames are removed from the audio, keeping a ~200 ms buffer before and after each detected voice segment (VAD β€” Voice Activity Detection) +- For the AndroidSpeechRecognizer path: auto-restart the recognizer when it naturally terminates, accumulating partial transcript results until the user taps stop +- Configurable silence buffer duration (default 200 ms) + +Implementation approach (to be researched): +- **Whisper path**: Energy-threshold VAD applied to the PCM frames in `AndroidAudioRecorder` before encoding to M4A +- **SpeechRecognizer path**: Auto-restart loop in `AndroidSpeechRecognizerProvider` that concatenates `onResults` partial transcripts + +--- + +## Non-Functional Requirements + +- All changes must preserve the Arrow `Either` error-handling pattern at repository boundaries +- New settings fields must be persisted alongside existing `VoiceSettings` fields +- All new code paths must have corresponding unit tests in `businessTest` +- No new platform-specific code required β€” changes are `commonMain` only +- `DEFAULT_VOICE_SYSTEM_PROMPT` update must not break existing `VoiceNoteBlockFormatTest` tests (adjust tests as needed) + +--- + +## Out of Scope + +- Post-insert navigation to the block (user confirmed: stay in place) +- Page picker before recording +- LLM-driven target page selection +- Desktop / iOS / web platform specifics (changes target commonMain only) + +--- + +## Acceptance Criteria + +| ID | Criterion | +|----|-----------| +| AC-1 | Transcript page may contain `#tags`, `key:: value`, `**bold**`, and `TODO` bullets when the transcript warrants them | +| AC-2 | A voice note with "I need to call Alice about the project" produces a `- TODO Call Alice about [[project]]` bullet on the transcript page | +| AC-3 | The inline block starts with `- πŸ“ Voice note (HH:mm:ss) [[Voice Note YYYY-MM-DD HH:mm:ss]]` followed by the LLM-formatted summary bullets as sub-items | +| AC-4 | The transcript page contains a `source::` property linking back to the originating page | +| AC-5 | The transcript page contains the LLM-formatted bullets | +| AC-6 | With `includeRawTranscript = true` (default), transcript page includes `#+BEGIN_QUOTE` raw transcript | +| AC-7 | With `includeRawTranscript = false`, transcript page omits `#+BEGIN_QUOTE` section | +| AC-8 | When a page is open in the editor, the inline link block is appended to that page | +| AC-9 | When no page is open, the inline link block falls back to today's journal | +| AC-10 | `VoiceSettings` persists `includeRawTranscript` across sessions | +| AC-11 | A 2-word voice note (e.g. "buy milk") completes successfully β€” transcript page created, link inserted | +| AC-12 | `AndroidSpeechRecognizerProvider` uses 6s complete silence timeout and 2s minimum recording length | +| AC-13 | A 5-word note is inserted inline (no transcript page created, no wikilink in header) when threshold is 20 | +| AC-14 | A 25-word note creates a transcript page and inserts a wikilink header when threshold is 20 | +| AC-15 | `VoiceSettings` persists `transcriptPageWordThreshold` across sessions | diff --git a/project_plans/voice/research/android-stt-tuning.md b/project_plans/voice/research/android-stt-tuning.md new file mode 100644 index 00000000..5214032a --- /dev/null +++ b/project_plans/voice/research/android-stt-tuning.md @@ -0,0 +1,169 @@ +# Android SpeechRecognizer Silence Tuning + Min Word Count + +## Research Question +What are the correct Intent extras for extending SpeechRecognizer silence tolerance, and what is the safest way to remove the `minWordCount` guard? + +--- + +## Current Implementation (AndroidSpeechRecognizerProvider.kt) + +### Intent extras currently set + +```kotlin +val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply { + putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM) + putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE, true) + putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1) + putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 3_000L) + putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 1_500L) +} +``` + +### Current values summary + +| Extra | Current value | Meaning | +|---|---|---| +| `EXTRA_LANGUAGE_MODEL` | `LANGUAGE_MODEL_FREE_FORM` | Dictation mode | +| `EXTRA_PREFER_OFFLINE` | `true` | Use on-device if available | +| `EXTRA_MAX_RESULTS` | `1` | Return only top result | +| `EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS` | 3,000 ms | Stop listening after 3s of silence | +| `EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS` | 1,500 ms | Consider utterance possibly done after 1.5s | + +**Notable absence:** `EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS` is not set β€” FR-5 requires adding it at 2,000 ms. + +--- + +## Android SDK Extra Documentation + +### `EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS` + +- **Class:** `android.speech.RecognizerIntent` +- **API level:** Added in API 8 (Android 2.2) +- **Type:** `long` (milliseconds) +- **Meaning:** The amount of time after the user stops speaking that the recognizer will wait before returning results. +- **Default (Google recognizer):** ~1,500–2,000 ms +- **FR-5 target:** 6,000 ms + +### `EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS` + +- **Class:** `android.speech.RecognizerIntent` +- **API level:** Added in API 8 (Android 2.2) +- **Type:** `long` (milliseconds) +- **Meaning:** The amount of silence the recognizer tolerates while potentially still listening (e.g., mid-sentence pause). +- **Default:** ~500–1,000 ms +- **FR-5 target:** 3,000 ms + +### `EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS` + +- **Class:** `android.speech.RecognizerIntent` +- **API level:** Added in API 8 (Android 2.2) +- **Type:** `long` (milliseconds) +- **Meaning:** Minimum duration of speech input before the recognizer considers the input complete. Prevents very short utterances from triggering premature end-of-speech. +- **Not currently set.** FR-5 requires adding at 2,000 ms. + +### Important caveat: OEM / AOSP recognizer honoring + +These extras are **hints to the recognition service**, not hard guarantees. The Google Recognition Service (GOOG-Speech) typically honors them; AOSP SpeechRecognizer and some OEM implementations may ignore them. The current code already uses `EXTRA_PREFER_OFFLINE = true`, which routes to the on-device recognizer β€” behavior may vary by device and Android version. This is inherent to the Android SpeechRecognizer API and cannot be worked around. + +--- + +## Exact Lines to Change in AndroidSpeechRecognizerProvider.kt + +### Current (lines 108–114) + +```kotlin +val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply { + putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM) + putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE, true) + putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1) + putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 3_000L) + putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 1_500L) +} +``` + +### Updated (FR-5) + +```kotlin +val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply { + putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM) + putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE, true) + putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1) + putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 6_000L) + putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 3_000L) + putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 2_000L) +} +``` + +--- + +## `minWordCount` Guard β€” Analysis and Removal + +### Current implementation (VoicePipelineConfig.kt + VoiceCaptureViewModel.kt) + +**VoicePipelineConfig.kt:** +```kotlin +class VoicePipelineConfig( + // ... + val minWordCount: Int = 10, +) +``` + +**VoiceCaptureViewModel.processTranscript() (lines 116–123):** +```kotlin +val wordCount = rawTranscript.split(Regex("\\s+")).count { it.isNotBlank() } +if (wordCount < pipeline.minWordCount) { + _state.value = VoiceCaptureState.Error( + PipelineStage.TRANSCRIBING, + "Recording too short β€” try speaking for a few more seconds" + ) + return +} +``` + +### Tests that reference `minWordCount` + +In `VoiceCaptureViewModelTest.kt`, these tests exercise the word-count gate: + +1. `word-count gate under 10 words emits Error at TRANSCRIBING` β€” transcript "too short" (2 words) +2. `9-word transcript emits Error at TRANSCRIBING` β€” 9 words β†’ error +3. `10-word transcript reaches Done state` β€” 10 words β†’ Done + +These tests must be **deleted or repurposed** when the guard is removed (FR-4). + +### What to remove + +**In `VoicePipelineConfig.kt`:** Delete `val minWordCount: Int = 10`. + +**In `VoiceCaptureViewModel.processTranscript()`:** Delete lines 116–123: +```kotlin +// DELETE: +val wordCount = rawTranscript.split(Regex("\\s+")).count { it.isNotBlank() } +if (wordCount < pipeline.minWordCount) { + _state.value = VoiceCaptureState.Error( + PipelineStage.TRANSCRIBING, + "Recording too short β€” try speaking for a few more seconds" + ) + return +} +``` + +After removal, `processTranscript()` proceeds directly to the `Formatting` state for any non-empty transcript. The `TranscriptResult.Empty` path (which returns an error for a blank result) is already handled in `startPipeline()` before `processTranscript` is called β€” that guard is independent and should remain. + +### Safety analysis of removing the guard + +The `minWordCount` guard was introduced to prevent the LLM from receiving trivial transcripts (single words, noise). After removal: + +- Short transcripts (e.g., "yes", "okay") will be sent to the LLM. This is acceptable β€” the LLM will output a minimal bullet like `- Yes.` and the voice note will be inserted. +- The `TranscriptResult.Empty` guard in `startPipeline()` still blocks completely blank transcripts from ever reaching `processTranscript`. +- The `MAX_TRANSCRIPT_CHARS = 10_000` truncation guard is unaffected. +- LLM cost impact: negligible. Short transcripts = short prompts = fewer tokens. + +**Verdict: safe to remove.** The only remaining guard against a truly empty result is `TranscriptResult.Empty` handled before `processTranscript` is called. + +--- + +## 3-Bullet Summary + +- **The three silence extras (`COMPLETE`, `POSSIBLY_COMPLETE`, `MINIMUM_LENGTH`) are all available since API 8** β€” no API level restrictions apply; the change is two value updates (`3_000L β†’ 6_000L`, `1_500L β†’ 3_000L`) plus one new `putExtra` for `MINIMUM_LENGTH_MILLIS = 2_000L`, all in `listenInternal()` of `AndroidSpeechRecognizerProvider.kt`. +- **`minWordCount` removal requires deleting the field from `VoicePipelineConfig` and 7 lines from `processTranscript()`** β€” the `TranscriptResult.Empty` guard already upstream in `startPipeline()` continues to block truly blank results, so there is no safety regression. +- **Three existing unit tests in `VoiceCaptureViewModelTest` must be deleted** (`word-count gate under 10 words`, `9-word transcript`, `10-word transcript`) β€” they test behavior that will no longer exist after FR-4. diff --git a/project_plans/voice/research/current-page-integration.md b/project_plans/voice/research/current-page-integration.md new file mode 100644 index 00000000..da58c60d --- /dev/null +++ b/project_plans/voice/research/current-page-integration.md @@ -0,0 +1,210 @@ +# Current Page Integration β€” Voice Note Feature + +## Research Question +How does the app track the currently-open page, and how can `VoiceCaptureViewModel` access it to insert voice notes to the current page instead of only today's journal? + +--- + +## How the App Tracks the Current Page + +### AppState.kt + +`AppState` contains two relevant fields: + +```kotlin +data class AppState( + val currentScreen: Screen = Screen.Journals, + val currentPage: Page? = null, + // ... +) +``` + +- `currentPage: Page?` is `null` when the user is on a non-page screen (Journals, AllPages, etc.) and non-null when `currentScreen` is `Screen.PageView(page)`. +- Both are set together in `StelekitViewModel.navigateTo()`: + ```kotlin + state.copy( + currentScreen = screen, + currentPage = if (screen is Screen.PageView) screen.page else null, + // ... + ) + ``` +- Also accessible as `(appState.currentScreen as? Screen.PageView)?.page`. + +### StelekitViewModel.uiState + +`StelekitViewModel` exposes: +```kotlin +val uiState: StateFlow<AppState> = _uiState.asStateFlow() +``` + +The current page UUID at any moment is: +```kotlin +viewModel.uiState.value.currentPage?.uuid +``` + +--- + +## How VoiceCaptureViewModel is Currently Wired (App.kt lines 462–463) + +```kotlin +val voiceCaptureViewModel = remember(voicePipeline) { + VoiceCaptureViewModel(voicePipeline, repos.journalService) +} +``` + +`VoiceCaptureViewModel` constructor signature: +```kotlin +class VoiceCaptureViewModel( + private val pipeline: VoicePipelineConfig, + private val journalService: JournalService, + scope: CoroutineScope = CoroutineScope(SupervisorJob() + Dispatchers.Default), +) +``` + +The VM calls `journalService.appendToToday(...)` unconditionally in `processTranscript()` β€” there is no concept of a target page. + +--- + +## JournalService.appendToToday vs. What We Need + +`JournalService.appendToToday(content)`: +1. Calls `ensureTodayJournal()` to get/create today's journal page +2. Appends a new block to that page + +For FR-3 we need an analogous `appendToPage(pageUuid, content)` path: +1. Resolve the page by UUID from `PageRepository` +2. Append a new block (same block-creation logic as `appendToToday`) + +**Option A β€” Add `appendToPage` to `JournalService`:** Minimal coupling; `JournalService` already has `blockRepository` and can append to any page. + +**Option B β€” Inject `BlockRepository` directly into `VoiceCaptureViewModel`:** More coupling but avoids bloating `JournalService` with non-journal logic. + +**Recommendation: Option A** β€” `JournalService` already owns the block-append logic. Adding: +```kotlin +suspend fun appendToPage(pageUuid: String, content: String) { + val blocks = blockRepository.getBlocksForPage(pageUuid).first().getOrNull() ?: emptyList() + val nextPosition = (blocks.maxOfOrNull { it.position } ?: -1) + 1 + val newBlock = Block( + uuid = UuidGenerator.generateV7(), + pageUuid = pageUuid, + content = content, + position = nextPosition, + createdAt = Clock.System.now(), + updatedAt = Clock.System.now(), + ) + if (writeActor != null) writeActor.saveBlock(newBlock) + else blockRepository.saveBlock(newBlock) +} +``` +is a near-copy of `appendToToday`'s block-creation section. + +--- + +## Wiring the Current Page UUID into VoiceCaptureViewModel + +### Option A β€” Pass `currentPageUuid` as a `StateFlow<String?>` parameter + +```kotlin +class VoiceCaptureViewModel( + private val pipeline: VoicePipelineConfig, + private val journalService: JournalService, + private val currentPageUuid: StateFlow<String?> = MutableStateFlow(null), + scope: CoroutineScope = ..., +) +``` + +In `processTranscript()`: +```kotlin +val targetPageUuid = currentPageUuid.value +if (targetPageUuid != null) { + journalService.appendToPage(targetPageUuid, buildVoiceNoteBlock(...)) +} else { + journalService.appendToToday(buildVoiceNoteBlock(...)) +} +``` + +At the call site in `App.kt`: +```kotlin +val voiceCaptureViewModel = remember(voicePipeline) { + VoiceCaptureViewModel( + voicePipeline, + repos.journalService, + currentPageUuid = viewModel.uiState.map { it.currentPage?.uuid }.stateIn( + scope = /* some scope */, + started = SharingStarted.Eagerly, + initialValue = null + ) + ) +} +``` + +**Problem:** `remember(voicePipeline)` recreates the VM only when `voicePipeline` changes. The `currentPageUuid` StateFlow is derived from `viewModel.uiState` which already updates reactively β€” the VM can read `.value` at the moment of insertion and always gets the current page. + +**Scope for stateIn:** Cannot use `rememberCoroutineScope()` (violates CLAUDE.md rule). Must use a scope that lives at least as long as the VM. Use the coroutine scope already available in `GraphContent` (the scope owned by `viewModel` or a dedicated `remember { CoroutineScope(...) }`). + +### Option B β€” Pass `() -> String?` lambda (simpler, no StateFlow overhead) + +```kotlin +class VoiceCaptureViewModel( + private val pipeline: VoicePipelineConfig, + private val journalService: JournalService, + private val currentOpenPageUuid: () -> String? = { null }, + scope: CoroutineScope = ..., +) +``` + +At call site: +```kotlin +VoiceCaptureViewModel( + voicePipeline, + repos.journalService, + currentOpenPageUuid = { viewModel.uiState.value.currentPage?.uuid } +) +``` + +This is the **simpler option** β€” no `stateIn` scope issues, no StateFlow chain. The lambda captures `viewModel` (stable reference) and reads `uiState.value` at call time. + +**Recommendation: Option B** β€” fewer moving parts, no scope ownership concern, straightforward testability (inject a lambda in tests). + +--- + +## Test Strategy for FR-3 + +In `VoiceCaptureViewModelTest`: +```kotlin +// When page is open β€” should append to that page, not journal +val targetRepo = InMemoryBlockRepository() +val targetPageService = JournalService(InMemoryPageRepository(), targetRepo) +val targetPage = targetPageService.ensureTodayJournal() // or create a non-journal page + +val vm = VoiceCaptureViewModel( + pipeline = VoicePipelineConfig(sttProvider = ...), + journalService = targetPageService, + currentOpenPageUuid = { targetPage.uuid } +) +// assert block inserted in targetRepo under targetPage.uuid + +// When no page is open β€” should fall back to journal +val vm2 = VoiceCaptureViewModel( + pipeline = ..., + journalService = ..., + currentOpenPageUuid = { null } // no page open +) +// assert block inserted into today's journal +``` + +--- + +## Edge Cases + +1. **Page deleted while recording:** The page UUID resolves at insertion time. If `appendToPage` can't find the page, it should fall back to journal. Add a null-check on the page lookup and fall back to `appendToToday`. +2. **Journal page is open:** `currentPage?.uuid` will be non-null for journal pages too. FR-3 says "append to current page when open" regardless of whether it's a journal. This is correct behavior β€” the user can see which page they're on. +3. **Journals screen (no page open):** `currentPage` is `null` β†’ `currentOpenPageUuid()` returns `null` β†’ fall back to `appendToToday`. Correct. + +--- + +## 3-Bullet Summary + +- **`AppState.currentPage: Page?` in `StelekitViewModel.uiState: StateFlow<AppState>` is the canonical source** for the currently-open page β€” it is set to `null` on non-page screens and non-null whenever `Screen.PageView` is active. +- **The cleanest wiring is a `currentOpenPageUuid: () -> String?` lambda constructor parameter** on `VoiceCaptureViewModel` β€” the lambda reads `viewModel.uiState.value.currentPage?.uuid` at insertion time, requires no StateFlow chaining, and is trivial to fake in tests. +- **`JournalService.appendToPage(pageUuid, content)` needs to be added** (a near-copy of the block-creation logic inside `appendToToday`) to support inserting into non-today pages; the fallback path when `currentOpenPageUuid()` returns `null` continues calling `appendToToday`. diff --git a/project_plans/voice/research/llm-prompt-engineering.md b/project_plans/voice/research/llm-prompt-engineering.md new file mode 100644 index 00000000..0550d01f --- /dev/null +++ b/project_plans/voice/research/llm-prompt-engineering.md @@ -0,0 +1,141 @@ +# LLM Prompt Engineering β€” Voice Note Feature + +## Research Question +How should `DEFAULT_VOICE_SYSTEM_PROMPT` be updated to produce rich Logseq markdown (bullets, #tags, key:: value properties, **bold**, *italic*, TODO markers)? + +--- + +## Current Prompt (from VoicePipelineConfig.kt) + +```kotlin +const val DEFAULT_VOICE_SYSTEM_PROMPT = """You are a Logseq note-taking assistant. Convert the following voice transcript into well-structured Logseq outliner syntax. + +Rules: +- Use "- " bullet format for each main point +- Use 2-space indentation for sub-points +- Add [[Page Name]] wiki links ONLY for proper nouns or topics explicitly named in the transcript β€” do NOT invent links for terms not spoken +- Do not add a preamble or summary +- Do not add content not present in the transcript + +Transcript: +{{TRANSCRIPT}}""" +``` + +**What it produces today:** flat bullet lists with wiki links. No tags, properties, emphasis, or TODO markers. + +--- + +## Logseq Markdown Feature Reference + +Logseq's outliner format supports the following constructs that are absent from the current prompt: + +| Feature | Syntax | Example | +|---|---|---| +| Hashtag | `#tag` | `#meeting #project` | +| Property | `key:: value` | `date:: 2026-05-02` | +| Bold | `**text**` | `**important**` | +| Italic | `*text*` | `*maybe*` | +| TODO marker | `TODO` at line start | `- TODO call Alice` | +| DONE marker | `DONE` at line start | `- DONE review PR` | +| Wikilink | `[[Page Name]]` | `[[Alice]]` | + +--- + +## Analysis of Current Prompt Weaknesses + +1. **No few-shot examples.** Without examples, the model uses its training prior for "Logseq format" which omits tags/properties. +2. **No mention of `#tags`.** The model never produces them. +3. **No mention of properties (`key:: value`).** The model never emits property blocks. +4. **No bold/italic guidance.** The model is conservative and never uses emphasis. +5. **No TODO guidance.** Action items stay as plain bullets. +6. **Constraint is well-stated** ("do not invent content not present in the transcript") β€” keep this. + +--- + +## Best Practices for Structured-Output Prompts + +From the LLM prompting literature and Anthropic guidelines: + +1. **Role + task framing** up front (already done β€” keep). +2. **Explicit enumeration of all output features** β€” the model only produces constructs it has been explicitly shown. +3. **Few-shot examples** are the single highest-signal intervention for structured output. Each example should cover one feature. +4. **Explicit "only if present" constraints** for every feature that should not be invented. +5. **Ordering matters**: place the most important rules (don't invent content) last so they appear closest to the generation point. +6. **Keep `{{TRANSCRIPT}}` placeholder** β€” the current replacement logic in `processTranscript()` uses `pipeline.systemPrompt.replace("{{TRANSCRIPT}}", rawTranscript)`. + +--- + +## Candidate Updated Prompt + +``` +You are a Logseq note-taking assistant. Convert the following voice transcript into well-structured Logseq outliner syntax. + +Logseq syntax you may use: +- "- " bullet for each main point (required) +- 2-space indentation for sub-points +- [[Page Name]] wiki links β€” ONLY for proper nouns or topics explicitly named +- #tag β€” ONLY for topics or categories explicitly spoken (e.g. "#meeting", "#todo") +- key:: value property blocks β€” ONLY when the speaker states a clear key/value (e.g. "date:: 2026-05-02", "project:: Stelekit") +- **bold** for words the speaker stressed or called out as important +- *italic* for titles, technical terms, or qualified statements ("*maybe*", "*draft*") +- TODO at the start of a bullet for action items the speaker explicitly commits to +- DONE at the start of a bullet for completed actions explicitly mentioned + +Examples of each feature: + +Input: "met with Alice today about the Stelekit release, she said to make it a priority" +Output: +- Met with [[Alice]] about [[Stelekit]] release #meeting + - She flagged this as a priority +- TODO Follow up with Alice on release timeline + +Input: "project is stelekit, date is May 2nd, need to review the export feature" +Output: +- project:: Stelekit +- date:: 2026-05-02 +- TODO Review the export feature + +Input: "I think the new design is okay, maybe try bold colours, definitely update the readme" +Output: +- The new design is acceptable + - Consider *bold* colours as a possibility +- TODO Update the README + +Hard rules (never violate): +- Do NOT invent topics, names, tags, or properties not mentioned in the transcript +- Do NOT add a preamble, summary, or closing line +- Do NOT add content not present in the transcript +- Use TODO only when the speaker explicitly commits to an action + +Transcript: +{{TRANSCRIPT}} +``` + +--- + +## Constraint Analysis + +The new prompt satisfies all FR-1 requirements: +- `#tags` β€” added with "only when explicitly spoken" guard +- `key:: value` β€” added with "only when the speaker states a clear key/value" guard +- `**bold**` β€” added for words "the speaker stressed or called out as important" +- `*italic*` β€” added for titles, technical terms, qualified statements +- `TODO`/`DONE` β€” added with "explicitly commits to / explicitly mentioned" guards +- Does not invent content β€” explicitly restated in hard rules + +--- + +## Implementation Notes + +- The prompt lives in `VoicePipelineConfig.kt` as a `const val` β€” update in place. +- The `replace("{{TRANSCRIPT}}", rawTranscript)` call in `VoiceCaptureViewModel.processTranscript()` continues to work unchanged. +- No new parameters needed in `VoicePipelineConfig` for the prompt change β€” existing `systemPrompt` field defaults to the updated constant. +- Callers that inject a custom `systemPrompt` are unaffected. + +--- + +## 3-Bullet Summary + +- **Current prompt produces flat bullets only** β€” it never instructs the model to use tags, properties, emphasis, or TODO markers, so none appear in output. +- **Few-shot examples are the highest-value addition** β€” three short input/output pairs covering tags, properties, and TODO markers will reliably activate these features without token overhead for typical transcripts. +- **All new features require explicit "only if present" guards** to satisfy the "do not invent content" constraint in FR-1; the candidate prompt above adds those guards for every new construct. diff --git a/project_plans/voice/research/settings-architecture.md b/project_plans/voice/research/settings-architecture.md new file mode 100644 index 00000000..4ff45b16 --- /dev/null +++ b/project_plans/voice/research/settings-architecture.md @@ -0,0 +1,183 @@ +# Settings Architecture β€” Voice Note Feature + +## Research Question +How is `VoiceSettings` currently structured, and what is the cleanest way to add `includeRawTranscript: Boolean`? + +--- + +## Current VoiceSettings Structure (from VoiceSettings.kt) + +```kotlin +class VoiceSettings(private val platformSettings: Settings) { + fun getWhisperApiKey(): String? + fun setWhisperApiKey(key: String) + fun getAnthropicKey(): String? + fun setAnthropicKey(key: String) + fun getOpenAiKey(): String? + fun setOpenAiKey(key: String) + fun getLlmEnabled(): Boolean // default: true + fun setLlmEnabled(enabled: Boolean) + fun getUseDeviceStt(): Boolean // default: true + fun setUseDeviceStt(enabled: Boolean) + fun getUseDeviceLlm(): Boolean // default: false + fun setUseDeviceLlm(enabled: Boolean) + + companion object { + private const val KEY_WHISPER = "voice.whisper_key" + private const val KEY_ANTHROPIC = "voice.anthropic_key" + private const val KEY_OPENAI = "voice.openai_key" + private const val KEY_LLM_ENABLED = "voice.llm_enabled" + private const val KEY_USE_DEVICE_STT = "voice.use_device_stt" + private const val KEY_USE_DEVICE_LLM = "voice.use_device_llm" + } +} +``` + +**Pattern:** Each setting is a get/set pair delegating to `platformSettings: Settings`. The key is a `const val` string in the companion object, namespaced with `"voice."`. + +--- + +## Persistence Mechanism + +The `Settings` interface (from `platform/Settings.kt`) is: + +```kotlin +interface Settings { + fun getBoolean(key: String, defaultValue: Boolean): Boolean + fun putBoolean(key: String, value: Boolean) + fun getString(key: String, defaultValue: String): String + fun putString(key: String, value: String) +} +``` + +This is a **multiplatform abstraction** over platform key-value stores: +- **Android:** SharedPreferences (via `PlatformSettings` in `androidMain`) +- **JVM Desktop:** likely java.util.prefs or a file-based store +- **iOS:** NSUserDefaults + +`VoiceSettings` is created with a `PlatformSettings()` instance passed from the host. Looking at App.kt: +```kotlin +val platformSettings = remember { PlatformSettings() } +``` +And `VoiceSettings` is created in the Android entry point / App plumbing, not inside App.kt directly β€” it's passed in as `voiceSettings: VoiceSettings? = null` parameter to `StelekitApp`. + +**Key finding:** `VoiceSettings` is an imperative wrapper β€” no reactive `StateFlow`, no serialization. All reads return current platform value; all writes are synchronous. Backward compatibility is automatic because `getBoolean(key, defaultValue)` returns the default when the key is absent. + +--- + +## Settings UI (VoiceCaptureSettings.kt) + +The settings composable pattern: +1. `remember { mutableStateOf(voiceSettings.get...()) }` β€” local copy of each setting +2. Switch composable with `onCheckedChange = { value = it; saved = false }` +3. "Save" button calls all `voiceSettings.set...()` and `onRebuildPipeline()` + +The `onRebuildPipeline` callback is wired in `App.kt` as `onRebuildVoicePipeline` β€” this triggers the host (Android Activity / desktop entry) to rebuild `VoicePipelineConfig` with updated values from `VoiceSettings`. + +--- + +## How `includeRawTranscript` Flows + +Currently `buildVoiceNoteBlock(formattedText, rawTranscript)` in `VoiceCaptureViewModel` unconditionally includes the `#+BEGIN_QUOTE` block. FR-2 requires: +- When `includeRawTranscript = true` (default): current behavior +- When `includeRawTranscript = false`: omit the `#+BEGIN_QUOTE ... #+END_QUOTE` section + +The setting needs to flow from `VoiceSettings` β†’ `VoiceCaptureViewModel` β†’ `buildVoiceNoteBlock`. + +**Two options for wiring:** +1. **Option A β€” Constructor injection:** Pass `includeRawTranscript` as a constructor parameter to `VoiceCaptureViewModel`. The host reads it from `VoiceSettings` when building the VM (same pattern as `voicePipeline`). +2. **Option B β€” Direct Settings injection:** Pass the full `VoiceSettings` instance to `VoiceCaptureViewModel`. The VM calls `voiceSettings.getIncludeRawTranscript()` at `buildVoiceNoteBlock` time. + +**Recommendation: Option A** β€” matches the existing pattern. `VoicePipelineConfig` already carries pipeline behavior; either add `includeRawTranscript` to `VoicePipelineConfig` OR thread it separately. Adding it to `VoicePipelineConfig` is cleanest: it's a pipeline behavior flag, consistent with `systemPrompt` and `minWordCount`. + +--- + +## Minimal Change Proposal + +### 1. Add to `VoiceSettings.kt` + +```kotlin +fun getIncludeRawTranscript(): Boolean = + platformSettings.getBoolean(KEY_INCLUDE_RAW_TRANSCRIPT, true) + +fun setIncludeRawTranscript(enabled: Boolean) = + platformSettings.putBoolean(KEY_INCLUDE_RAW_TRANSCRIPT, enabled) + +companion object { + // existing keys ... + private const val KEY_INCLUDE_RAW_TRANSCRIPT = "voice.include_raw_transcript" +} +``` + +**Backward compatible:** default is `true`, so existing users see no change. + +### 2. Add to `VoicePipelineConfig.kt` + +```kotlin +class VoicePipelineConfig( + // existing params ... + val includeRawTranscript: Boolean = true, +) +``` + +### 3. Update `VoiceCaptureViewModel.buildVoiceNoteBlock` + +```kotlin +internal fun buildVoiceNoteBlock(formattedText: String, rawTranscript: String): String { + val now = Clock.System.now().toLocalDateTime(TimeZone.currentSystemDefault()) + val timeLabel = "${now.hour.toString().padStart(2, '0')}:${now.minute.toString().padStart(2, '0')}" + return buildString { + append("- πŸ“ Voice note ($timeLabel)") + append("\n - ") + append(formattedText.lines().joinToString("\n - ")) + if (pipeline.includeRawTranscript) { + append("\n #+BEGIN_QUOTE\n ") + append(rawTranscript) + append("\n #+END_QUOTE") + } + } +} +``` + +### 4. Add Toggle in `VoiceCaptureSettings.kt` + +Add a new `Switch` row mirroring the `llmEnabled` toggle: + +```kotlin +var includeRawTranscript by remember { mutableStateOf(voiceSettings.getIncludeRawTranscript()) } +``` + +And in the Save button handler: +```kotlin +voiceSettings.setIncludeRawTranscript(includeRawTranscript) +``` + +Place the toggle in the "LLM Formatting" section with label "Include raw transcript in note". + +### 5. Host wiring (Android Activity / App.kt plumbing) + +When rebuilding `VoicePipelineConfig` from `VoiceSettings`, read: +```kotlin +VoicePipelineConfig( + // existing ... + includeRawTranscript = voiceSettings.getIncludeRawTranscript(), +) +``` + +--- + +## Test Impact + +`VoiceNoteBlockFormatTest.kt` has a test: +``` +`block contains raw transcript in BEGIN_QUOTE block` +``` +This test must be updated or parameterized. When `pipeline.includeRawTranscript = false`, the `#+BEGIN_QUOTE` block should be absent. + +--- + +## 3-Bullet Summary + +- **`VoiceSettings` uses a simple imperative get/set pattern over a multiplatform `Settings` interface** β€” adding `includeRawTranscript` is a 4-line addition (constant + getter + setter), backward-compatible by default `true`. +- **The cleanest wiring is to add `includeRawTranscript: Boolean = true` to `VoicePipelineConfig`** β€” it mirrors how `systemPrompt` and `minWordCount` already carry pipeline behavior, keeping `VoiceCaptureViewModel` free of a `VoiceSettings` dependency. +- **The existing `VoiceNoteBlockFormatTest` test for `#+BEGIN_QUOTE` will need updating** to cover both the `includeRawTranscript = true` (present) and `= false` (absent) cases. From 67b6b4c16024d8add5e3270c126c63b57a9aa21e Mon Sep 17 00:00:00 2001 From: Tyler Stapler <tystapler@gmail.com> Date: Sat, 2 May 2026 23:52:22 -0700 Subject: [PATCH 3/4] fix(voice): add missing closing brace for mainHandler.post in startCycle() Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --- .../stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt index 5690207b..93f635f5 100644 --- a/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt +++ b/kmp/src/androidMain/kotlin/dev/stapler/stelekit/voice/AndroidSpeechRecognizerProvider.kt @@ -166,6 +166,7 @@ class AndroidSpeechRecognizerProvider( Log.w(TAG, "Failed to start speech recognition", t) if (cont.isActive) cont.resume(mapError(SpeechRecognizer.ERROR_CLIENT)) } + } } startCycle() From d4f9381f05d4f0b5822d3a789acca610cce3102f Mon Sep 17 00:00:00 2001 From: Tyler Stapler <tystapler@gmail.com> Date: Sun, 3 May 2026 00:05:00 -0700 Subject: [PATCH 4/4] fix(voice): address Copilot review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use rawTranscript (not formattedText) for transcript page word-count threshold decision β€” LLM expansion/compression no longer skews routing - Replace println with Logger for LLM failure warning - Clamp transcriptPageWordThreshold to >= 1 in getter, setter, and UI save - Extract appendBlockToPage() private helper shared by appendToToday/appendToPage - Add VoiceSettingsTest coverage for threshold default, persistence, and clamping Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --- .../stelekit/voice/VoiceSettingsTest.kt | 30 +++++++++++++++++++ .../stelekit/repository/JournalService.kt | 24 ++++----------- .../settings/VoiceCaptureSettings.kt | 2 +- .../stelekit/voice/VoiceCaptureViewModel.kt | 6 ++-- .../stapler/stelekit/voice/VoiceSettings.kt | 4 +-- 5 files changed, 43 insertions(+), 23 deletions(-) diff --git a/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceSettingsTest.kt b/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceSettingsTest.kt index d58ef933..d3731fe5 100644 --- a/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceSettingsTest.kt +++ b/kmp/src/businessTest/kotlin/dev/stapler/stelekit/voice/VoiceSettingsTest.kt @@ -4,6 +4,7 @@ package dev.stapler.stelekit.voice import dev.stapler.stelekit.platform.Settings import kotlin.test.Test +import kotlin.test.assertEquals import kotlin.test.assertFalse import kotlin.test.assertTrue @@ -73,4 +74,33 @@ class VoiceSettingsTest { settings.setIncludeRawTranscript(true) assertTrue(settings.getIncludeRawTranscript(), "Expected persisted true value after re-setting to true") } + + // --- transcriptPageWordThreshold --- + + @Test + fun `getTranscriptPageWordThreshold returns 20 by default`() { + val settings = VoiceSettings(MapSettings()) + assertEquals(20, settings.getTranscriptPageWordThreshold()) + } + + @Test + fun `setTranscriptPageWordThreshold persists value`() { + val settings = VoiceSettings(MapSettings()) + settings.setTranscriptPageWordThreshold(50) + assertEquals(50, settings.getTranscriptPageWordThreshold()) + } + + @Test + fun `setTranscriptPageWordThreshold clamps negative to 1`() { + val settings = VoiceSettings(MapSettings()) + settings.setTranscriptPageWordThreshold(-5) + assertEquals(1, settings.getTranscriptPageWordThreshold()) + } + + @Test + fun `setTranscriptPageWordThreshold clamps zero to 1`() { + val settings = VoiceSettings(MapSettings()) + settings.setTranscriptPageWordThreshold(0) + assertEquals(1, settings.getTranscriptPageWordThreshold()) + } } diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt index 33ba0f18..a4e6c41f 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/repository/JournalService.kt @@ -139,37 +139,25 @@ class JournalService( * Appends a new block with [content] to today's journal page. * Creates the journal page if it does not yet exist. */ - @OptIn(DirectRepositoryWrite::class) suspend fun appendToToday(content: String) { - val page = ensureTodayJournal() - val blocks = blockRepository.getBlocksForPage(page.uuid).first().getOrNull() ?: emptyList() - val nextPosition = (blocks.maxOfOrNull { it.position } ?: -1) + 1 - val newBlock = Block( - uuid = UuidGenerator.generateV7(), - pageUuid = page.uuid, - content = content, - position = nextPosition, - createdAt = Clock.System.now(), - updatedAt = Clock.System.now(), - ) - if (writeActor != null) { - writeActor.saveBlock(newBlock) - } else { - blockRepository.saveBlock(newBlock) - } + appendBlockToPage(ensureTodayJournal(), content) } /** * Appends a new block with [content] to the page identified by [pageUuid]. * Falls back to today's journal if [pageUuid] resolves to no page. */ - @OptIn(DirectRepositoryWrite::class) suspend fun appendToPage(pageUuid: String, content: String) { val page = pageRepository.getPageByUuid(pageUuid).first().getOrNull() if (page == null) { appendToToday(content) return } + appendBlockToPage(page, content) + } + + @OptIn(DirectRepositoryWrite::class) + private suspend fun appendBlockToPage(page: Page, content: String) { val blocks = blockRepository.getBlocksForPage(page.uuid).first().getOrNull() ?: emptyList() val nextPosition = (blocks.maxOfOrNull { it.position } ?: -1) + 1 val newBlock = Block( diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt index 387e3b55..7c5ab0e6 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/settings/VoiceCaptureSettings.kt @@ -186,7 +186,7 @@ fun VoiceCaptureSettings( voiceSettings.setUseDeviceStt(useDeviceStt) voiceSettings.setUseDeviceLlm(useDeviceLlm) voiceSettings.setIncludeRawTranscript(includeRawTranscript) - voiceSettings.setTranscriptPageWordThreshold(transcriptPageWordThreshold.toIntOrNull() ?: 20) + voiceSettings.setTranscriptPageWordThreshold(maxOf(1, transcriptPageWordThreshold.toIntOrNull() ?: 20)) saved = true onRebuildPipeline() }, diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt index 43d42f50..0052745d 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceCaptureViewModel.kt @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Elastic-2.0 package dev.stapler.stelekit.voice +import dev.stapler.stelekit.logging.Logger import dev.stapler.stelekit.repository.JournalService import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers @@ -27,6 +28,7 @@ class VoiceCaptureViewModel( scope: CoroutineScope = CoroutineScope(SupervisorJob() + Dispatchers.Default), ) { private val scope = scope + private val logger = Logger("VoiceCaptureViewModel") private val _state = MutableStateFlow<VoiceCaptureState>(VoiceCaptureState.Idle) val state: StateFlow<VoiceCaptureState> = _state.asStateFlow() @@ -126,7 +128,7 @@ class VoiceCaptureViewModel( llmResult.formattedText } is LlmResult.Failure -> { - println("[VoiceCaptureViewModel] LLM formatting failed ($llmResult), inserting raw transcript") + logger.warn("LLM formatting failed ($llmResult), inserting raw transcript") rawTranscript } } @@ -138,7 +140,7 @@ class VoiceCaptureViewModel( val targetPageUuid = currentOpenPageUuid() - val wordCount = formattedText.split(Regex("\\s+")).count { it.isNotBlank() } + val wordCount = rawTranscript.split(Regex("\\s+")).count { it.isNotBlank() } val useTranscriptPage = wordCount >= pipeline.transcriptPageWordThreshold val inlineBlock = if (useTranscriptPage) { diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt index 3c6afa8d..14b350a3 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/voice/VoiceSettings.kt @@ -49,10 +49,10 @@ class VoiceSettings(private val platformSettings: Settings) { platformSettings.putBoolean(KEY_INCLUDE_RAW_TRANSCRIPT, enabled) fun getTranscriptPageWordThreshold(): Int = - platformSettings.getString(KEY_TRANSCRIPT_PAGE_WORD_THRESHOLD, "20").toIntOrNull() ?: 20 + maxOf(1, platformSettings.getString(KEY_TRANSCRIPT_PAGE_WORD_THRESHOLD, "20").toIntOrNull() ?: 20) fun setTranscriptPageWordThreshold(threshold: Int) = - platformSettings.putString(KEY_TRANSCRIPT_PAGE_WORD_THRESHOLD, threshold.toString()) + platformSettings.putString(KEY_TRANSCRIPT_PAGE_WORD_THRESHOLD, maxOf(1, threshold).toString()) companion object { private const val KEY_WHISPER = "voice.whisper_key"