diff --git a/README.md b/README.md index 6749ecb..be9810d 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,34 @@ # iva_mobile -A new Flutter project. +Voice-to-text mobile client built with Flutter. ## Getting Started -This project is a starting point for a Flutter application. +### Dependencies + +Audio and speech dependencies live in `pubspec.yaml`: + +- `record` – microphone capture and amplitude stream. +- `speech_to_text` – on-device speech recognition with partial results. +- `provider` – MVVM state injection. + +### Platform permissions + +- Android: `android/app/src/main/AndroidManifest.xml` declares + `android.permission.RECORD_AUDIO`. +- iOS: `ios/Runner/Info.plist` includes `NSMicrophoneUsageDescription` and + `NSSpeechRecognitionUsageDescription` strings. + +### Run locally + +``` +flutter pub get +flutter run +``` + +The app boots to the voice screen. Tap the microphone to request permission and +start recording. The waveform animates from live amplitude data and the +transcription updates as speech is recognized. A few resources to get you started if this is your first Flutter project: diff --git a/android/app/src/main/AndroidManifest.xml b/android/app/src/main/AndroidManifest.xml index 101579e..4b89e27 100644 --- a/android/app/src/main/AndroidManifest.xml +++ b/android/app/src/main/AndroidManifest.xml @@ -1,4 +1,5 @@ + LaunchScreen UIMainStoryboardFile Main - UISupportedInterfaceOrientations - - UIInterfaceOrientationPortrait - UIInterfaceOrientationLandscapeLeft - UIInterfaceOrientationLandscapeRight - +UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + +NSMicrophoneUsageDescription +This app requires microphone access to capture your voice. +NSSpeechRecognitionUsageDescription +Speech recognition is used to transcribe your voice into text. UISupportedInterfaceOrientations~ipad UIInterfaceOrientationPortrait diff --git a/lib/core/services/audio_capture.dart b/lib/core/services/audio_capture.dart new file mode 100644 index 0000000..333e00e --- /dev/null +++ b/lib/core/services/audio_capture.dart @@ -0,0 +1,98 @@ +import 'dart:async'; +import 'dart:io'; + +import 'package:flutter/foundation.dart'; +import 'package:path_provider/path_provider.dart'; +import 'package:record/record.dart'; + +enum AudioPermissionStatus { granted, denied, restricted } + +abstract class AudioCaptureService { + Future ensurePermission(); + Future start(); + Future pause(); + Future resume(); + Future stop(); + Stream get amplitudeStream; // 0.0 to 1.0 +} + +class AudioCaptureServiceImpl implements AudioCaptureService { + AudioCaptureServiceImpl({AudioRecorder? recorder}) + : _recorder = recorder ?? AudioRecorder(); + + final AudioRecorder _recorder; + final StreamController _levelController = + StreamController.broadcast(); + StreamSubscription? _subscription; + + @override + Stream get amplitudeStream => _levelController.stream; + + @override + Future ensurePermission() async { + final has = await _recorder.hasPermission(); + return has ? AudioPermissionStatus.granted : AudioPermissionStatus.denied; + } + + @override + Future start() async { + if (!await _recorder.isRecording()) { + final dir = await getTemporaryDirectory(); + final file = File( + '${dir.path}/iva_rec_${DateTime.now().millisecondsSinceEpoch}.m4a', + ); + await _recorder.start( + const RecordConfig(encoder: AudioEncoder.aacLc), + path: file.path, + ); + } + _subscription ??= _recorder + .onAmplitudeChanged(const Duration(milliseconds: 80)) + .listen((amp) { + final normalized = _normalizeDb(amp.current); + if (!_levelController.isClosed) { + _levelController.add(normalized); + } + }); + } + + @override + Future pause() async { + if (await _recorder.isRecording()) { + await _recorder.pause(); + } + } + + @override + Future resume() async { + if (await _recorder.isPaused()) { + await _recorder.resume(); + } + } + + @override + Future stop() async { + try { + await _subscription?.cancel(); + } finally { + _subscription = null; + } + if (await _recorder.isRecording() || await _recorder.isPaused()) { + await _recorder.stop(); + } + } + + double _normalizeDb(double db) { + // Map [-45dB, 0dB] to [0, 1], clamp outside + const minDb = -45.0; + const maxDb = 0.0; + final clamped = db.clamp(minDb, maxDb); + return (clamped - minDb) / (maxDb - minDb); + } + + @mustCallSuper + void dispose() { + _levelController.close(); + _subscription?.cancel(); + } +} diff --git a/lib/core/services/speech_recognition.dart b/lib/core/services/speech_recognition.dart new file mode 100644 index 0000000..7d00ce1 --- /dev/null +++ b/lib/core/services/speech_recognition.dart @@ -0,0 +1,71 @@ +import 'dart:async'; + +import 'package:speech_to_text/speech_to_text.dart' as stt; + +abstract class SpeechRecognitionService { + Future initialize(); + Future startListening({bool partialResults = true}); + Future stopListening(); + Future cancel(); + Stream get transcriptionStream; // incremental text +} + +class SpeechRecognitionServiceImpl implements SpeechRecognitionService { + SpeechRecognitionServiceImpl({stt.SpeechToText? engine}) + : _engine = engine ?? stt.SpeechToText(); + + final stt.SpeechToText _engine; + final StreamController _controller = + StreamController.broadcast(); + + @override + Stream get transcriptionStream => _controller.stream; + + @override + Future initialize() async { + final available = await _engine.initialize( + onStatus: (_) {}, + onError: (e) {}, + ); + return available; + } + + @override + Future startListening({bool partialResults = true}) async { + if (!_engine.isAvailable) { + final ok = await initialize(); + if (!ok) return; + } + await _engine.listen( + onResult: (result) { + final text = result.recognizedWords; + if (!_controller.isClosed) { + _controller.add(text); + } + }, + listenOptions: stt.SpeechListenOptions( + listenMode: stt.ListenMode.dictation, + partialResults: partialResults, + cancelOnError: true, + ), + ); + } + + @override + Future stopListening() async { + if (_engine.isListening) { + await _engine.stop(); + } + } + + @override + Future cancel() async { + if (_engine.isListening) { + await _engine.cancel(); + } + } + + void dispose() { + _controller.close(); + } +} diff --git a/lib/features/voice_to_text/view/voice_to_text_model.dart b/lib/features/voice_to_text/view/voice_to_text_model.dart index 71ec762..80e41af 100644 --- a/lib/features/voice_to_text/view/voice_to_text_model.dart +++ b/lib/features/voice_to_text/view/voice_to_text_model.dart @@ -1,9 +1,13 @@ import 'dart:async'; +import 'dart:collection'; -import 'package:flutter/material.dart'; +import 'package:flutter/foundation.dart'; +import 'package:iva_mobile/core/services/audio_capture.dart'; +import 'package:iva_mobile/core/services/speech_recognition.dart'; abstract class VoiceToTextModel extends Listenable { - List get transcript; // already tokenized words or segments + List get transcript; // tokenized words/segments for display + String get transcribedText; int get activeWordIndex; bool get isCursorVisible; List get waveformData; @@ -14,6 +18,7 @@ abstract class VoiceToTextModel extends Listenable { void setActiveWord(int index); void toggleCursorVisibility(bool visible); + void updateTranscription(String text, {int? activeWordIndex}); void updateWaveform(List amplitudes); void startTimer(); void pauseTimer(); @@ -24,24 +29,31 @@ abstract class VoiceToTextModel extends Listenable { void stopRecording(); void restartRecording(); void discardRecording(); - - // Other state already planned (timer, waveform, recording commands) lives here too. } enum RecordingState { idle, recording, paused, stopped } class VoiceToTextModelState extends ChangeNotifier implements VoiceToTextModel { VoiceToTextModelState({ - required List initialTranscript, + List initialTranscript = const [], int initialActiveWordIndex = 0, bool initialCursorVisible = true, RecordingState initialRecordingState = RecordingState.idle, - }) : _transcript = List.unmodifiable(initialTranscript), - _activeWordIndex = initialActiveWordIndex, + this.audio, + this.speech, + int waveformWindow = 48, + }) : _transcriptWords = List.from(initialTranscript), + _activeWordIndex = 0, _isCursorVisible = initialCursorVisible, - _recordingState = initialRecordingState; + _recordingState = initialRecordingState, + _waveformWindowSize = waveformWindow { + _activeWordIndex = _normalizeActiveIndex(initialActiveWordIndex); + _seedAmplitudeWindow(); + } - final List _transcript; + final AudioCaptureService? audio; + final SpeechRecognitionService? speech; + List _transcriptWords; int _activeWordIndex; bool _isCursorVisible; List _waveformData = const []; @@ -51,9 +63,18 @@ class VoiceToTextModelState extends ChangeNotifier implements VoiceToTextModel { bool _isTimerRunning = false; Timer? _timer; RecordingState _recordingState; + StreamSubscription? _ampSub; + StreamSubscription? _sttSub; + final List _amplitudeWindow = []; + final int _waveformWindowSize; + String? _lastError; + // No mock fallback: live amplitude must be provided by platform service + + @override + List get transcript => UnmodifiableListView(_transcriptWords); @override - List get transcript => _transcript; + String get transcribedText => _transcriptWords.join(' '); @override int get activeWordIndex => _activeWordIndex; @@ -62,7 +83,7 @@ class VoiceToTextModelState extends ChangeNotifier implements VoiceToTextModel { bool get isCursorVisible => _isCursorVisible; @override - List get waveformData => _waveformData; + List get waveformData => UnmodifiableListView(_waveformData); @override Stream> get waveformStream => _waveformController.stream; @@ -76,11 +97,13 @@ class VoiceToTextModelState extends ChangeNotifier implements VoiceToTextModel { @override RecordingState get recordingState => _recordingState; + String? get lastError => _lastError; + @override void setActiveWord(int index) { - if (index == _activeWordIndex) return; - if (index < 0 || index >= _transcript.length) return; - _activeWordIndex = index; + final normalized = _normalizeActiveIndex(index); + if (normalized == _activeWordIndex) return; + _activeWordIndex = normalized; notifyListeners(); } @@ -91,63 +114,57 @@ class VoiceToTextModelState extends ChangeNotifier implements VoiceToTextModel { notifyListeners(); } + @override + void updateTranscription(String text, {int? activeWordIndex}) { + final words = _normalizeTranscript(text); + final changed = _applyTranscription(words, activeWordIndex); + if (changed) { + notifyListeners(); + } + } + @override void updateWaveform(List amplitudes) { - _waveformData = List.unmodifiable(amplitudes); - notifyListeners(); + final normalized = amplitudes + .map((value) => value.isNaN ? 0.0 : value.clamp(0.0, 1.0)) + .toList(growable: false); + final changed = !listEquals(normalized, _waveformData); + _waveformData = List.unmodifiable(normalized); if (!_waveformController.isClosed) { _waveformController.add(_waveformData); } + if (changed) { + notifyListeners(); + } } @override void startTimer() { - if (_isTimerRunning) return; - _isTimerRunning = true; - _timer ??= Timer.periodic(const Duration(seconds: 1), (_) { - _elapsedDuration += const Duration(seconds: 1); - notifyListeners(); - }); - notifyListeners(); + _startTimer(); } @override void pauseTimer() { - if (!_isTimerRunning && _timer == null) return; - _isTimerRunning = false; - _timer?.cancel(); - _timer = null; - notifyListeners(); + _pauseTimer(); } @override void resetTimer() { - final hadElapsed = _elapsedDuration != Duration.zero; - final wasRunning = _isTimerRunning || _timer != null; - _elapsedDuration = Duration.zero; - if (wasRunning) { - _timer?.cancel(); - _timer = null; - _isTimerRunning = false; - } - if (hadElapsed || wasRunning) { - notifyListeners(); - } + _resetTimer(); } @override void startRecording() { - if (_recordingState == RecordingState.recording) return; - _recordingState = RecordingState.recording; - startTimer(); - notifyListeners(); + _beginRecording(); } @override void pauseRecording() { if (_recordingState != RecordingState.recording) return; _recordingState = RecordingState.paused; - pauseTimer(); + _pauseTimer(notify: false); + audio?.pause(); + speech?.stopListening(); notifyListeners(); } @@ -155,7 +172,9 @@ class VoiceToTextModelState extends ChangeNotifier implements VoiceToTextModel { void resumeRecording() { if (_recordingState != RecordingState.paused) return; _recordingState = RecordingState.recording; - startTimer(); + _startTimer(notify: false); + audio?.resume(); + speech?.startListening(partialResults: true); notifyListeners(); } @@ -163,33 +182,187 @@ class VoiceToTextModelState extends ChangeNotifier implements VoiceToTextModel { void stopRecording() { if (_recordingState == RecordingState.stopped) return; _recordingState = RecordingState.stopped; - resetTimer(); + _resetTimer(notify: false); + _teardownStreams(); + audio?.stop(); + speech?.stopListening(); notifyListeners(); } @override void restartRecording() { + _applyTranscription(const [], 0); + _seedAmplitudeWindow(); + _resetTimer(notify: false); _recordingState = RecordingState.recording; - resetTimer(); - startTimer(); + _startTimer(notify: false); + _setupStreams(); + audio?.start(); + speech?.startListening(partialResults: true); notifyListeners(); } @override void discardRecording() { + final transcriptChanged = _applyTranscription(const [], 0); + _seedAmplitudeWindow(); + final timerChanged = _resetTimer(notify: false); + _teardownStreams(); + audio?.stop(); + speech?.cancel(); + final previousState = _recordingState; _recordingState = RecordingState.idle; - _waveformData = const []; - if (!_waveformController.isClosed) { - _waveformController.add(_waveformData); + if (transcriptChanged || + timerChanged || + previousState != RecordingState.idle) { + notifyListeners(); } - resetTimer(); - notifyListeners(); } @override void dispose() { _timer?.cancel(); _waveformController.close(); + _teardownStreams(); super.dispose(); } + + Future _beginRecording() async { + if (_recordingState == RecordingState.recording) { + _startTimer(notify: false); + return; + } + if (audio != null) { + final status = await audio!.ensurePermission(); + if (status != AudioPermissionStatus.granted) { + _lastError = 'Microphone permission is required to record'; + notifyListeners(); + return; + } + } + _recordingState = RecordingState.recording; + _startTimer(notify: false); + _setupStreams(); + await audio?.start(); + await speech?.startListening(partialResults: true); + // Web implementation uses getUserMedia; on unsupported env, ensurePermission returns denied + notifyListeners(); + } + + void _setupStreams() { + _teardownStreams(); + if (audio != null) { + _ampSub = audio!.amplitudeStream.listen(_pushAmplitude); + } + if (speech != null) { + _sttSub = speech!.transcriptionStream.listen((text) { + updateTranscription(text); + }); + } + } + + void _teardownStreams() { + _ampSub?.cancel(); + _sttSub?.cancel(); + _ampSub = null; + _sttSub = null; + _amplitudeWindow.clear(); + } + + void _pushAmplitude(double level) { + _amplitudeWindow.add(level.clamp(0.0, 1.0)); + if (_amplitudeWindow.length > _waveformWindowSize) { + _amplitudeWindow.removeAt(0); + } + updateWaveform(List.from(_amplitudeWindow)); + } + + void _seedAmplitudeWindow() { + _amplitudeWindow + ..clear() + ..addAll(List.filled(_waveformWindowSize, 0.0)); + updateWaveform(List.from(_amplitudeWindow)); + } + + // No mock amplitude functions + + bool _applyTranscription(List words, int? activeWordIndex) { + final normalizedWords = List.from(words); + final normalizedIndex = _normalizeActiveIndex( + activeWordIndex, + wordCount: normalizedWords.length, + ); + final wordsChanged = !listEquals(normalizedWords, _transcriptWords); + final indexChanged = normalizedIndex != _activeWordIndex; + if (!wordsChanged && !indexChanged) { + return false; + } + _transcriptWords = normalizedWords; + _activeWordIndex = normalizedIndex; + return true; + } + + bool _startTimer({bool notify = true}) { + if (_isTimerRunning) return false; + _isTimerRunning = true; + _timer ??= Timer.periodic(const Duration(seconds: 1), (_) { + _elapsedDuration += const Duration(seconds: 1); + notifyListeners(); + }); + if (notify) { + notifyListeners(); + } + return true; + } + + bool _pauseTimer({bool notify = true}) { + if (!_isTimerRunning && _timer == null) return false; + _isTimerRunning = false; + _timer?.cancel(); + _timer = null; + if (notify) { + notifyListeners(); + } + return true; + } + + bool _resetTimer({bool notify = true}) { + final hadElapsed = _elapsedDuration != Duration.zero; + final wasRunning = _isTimerRunning || _timer != null; + if (!hadElapsed && !wasRunning) { + return false; + } + _elapsedDuration = Duration.zero; + if (wasRunning) { + _timer?.cancel(); + _timer = null; + _isTimerRunning = false; + } + if (notify) { + notifyListeners(); + } + return true; + } + + int _normalizeActiveIndex(int? index, {int? wordCount}) { + final total = wordCount ?? _transcriptWords.length; + if (total == 0) { + return 0; + } + final desired = index ?? (total - 1); + if (desired <= 0) { + return 0; + } + if (desired >= total) { + return total - 1; + } + return desired; + } + + List _normalizeTranscript(String text) { + return text + .split(RegExp(r'\s+')) + .where((segment) => segment.isNotEmpty) + .toList(growable: false); + } } diff --git a/lib/features/voice_to_text/view/voice_to_text_screen.dart b/lib/features/voice_to_text/view/voice_to_text_screen.dart index 978051f..dbc4ece 100644 --- a/lib/features/voice_to_text/view/voice_to_text_screen.dart +++ b/lib/features/voice_to_text/view/voice_to_text_screen.dart @@ -1,8 +1,9 @@ -import 'dart:async'; -import 'dart:math' as math; +// no-op import 'package:flutter/material.dart'; import 'package:provider/provider.dart'; +import 'package:iva_mobile/core/services/audio_capture.dart'; +import 'package:iva_mobile/core/services/speech_recognition.dart'; import '../widget/control_buttons.dart'; import '../widget/text_display.dart'; @@ -16,7 +17,7 @@ class VoiceToTextScreen extends StatelessWidget { @override Widget build(BuildContext context) { return ChangeNotifierProvider( - create: (_) => VoiceToTextModelState( + create: (context) => VoiceToTextModelState( initialTranscript: const [ 'Herman', 'is', @@ -35,6 +36,8 @@ class VoiceToTextScreen extends StatelessWidget { 'kin', ], initialActiveWordIndex: 14, + audio: context.read(), + speech: context.read(), ), child: const _VoiceToTextView(), ); @@ -49,53 +52,34 @@ class _VoiceToTextView extends StatefulWidget { } class _VoiceToTextViewState extends State<_VoiceToTextView> { - static const int _waveformSampleCount = 48; - static const Duration _waveformTick = Duration(milliseconds: 100); - - final math.Random _random = math.Random(); - Timer? _waveformTimer; + String? _shownError; @override void initState() { super.initState(); - WidgetsBinding.instance.addPostFrameCallback((_) { - final model = context.read(); - _pushWaveformSample(model); - _waveformTimer = Timer.periodic( - _waveformTick, - (_) => _pushWaveformSample(model), - ); - }); + // No-op: Real waveform is driven by audio amplitude when recording (issue #7) } @override void dispose() { - _waveformTimer?.cancel(); super.dispose(); } - void _pushWaveformSample(VoiceToTextModel model) { - final halfCount = _waveformSampleCount ~/ 2; - final leading = List.generate(halfCount, (index) { - final phase = index / halfCount * math.pi; - final base = (math.sin(phase) * 0.5) + 0.5; - final noise = (_random.nextDouble() - 0.5) * 0.15; - return (base + noise).clamp(0.0, 1.0); - }); - final trailing = List.from(leading.reversed); - final sample = [...leading, ...trailing]; - if (sample.length < _waveformSampleCount) { - sample.addAll( - List.filled(_waveformSampleCount - sample.length, 0.3), - ); - } - model.updateWaveform(sample); - } - @override Widget build(BuildContext context) { final VoiceToTextModelState model = context.watch(); + // Report new errors softly via SnackBar + WidgetsBinding.instance.addPostFrameCallback((_) { + final err = model.lastError; + if (err != null && err != _shownError && mounted) { + _shownError = err; + ScaffoldMessenger.of( + context, + ).showSnackBar(SnackBar(content: Text(err))); + } + }); + return Scaffold( backgroundColor: const Color.fromARGB(255, 227, 227, 198), body: SafeArea( @@ -125,6 +109,7 @@ class _VoiceToTextViewState extends State<_VoiceToTextView> { barWidth: 3, spacing: 6, backgroundColor: Colors.transparent, + minBarHeight: 2, ), ), ), diff --git a/lib/features/voice_to_text/widget/waveform.dart b/lib/features/voice_to_text/widget/waveform.dart index c34d0ad..f6a01ce 100644 --- a/lib/features/voice_to_text/widget/waveform.dart +++ b/lib/features/voice_to_text/widget/waveform.dart @@ -13,6 +13,7 @@ class WaveformWidget extends StatefulWidget { this.spacing = 6, this.backgroundColor = Colors.transparent, this.animationDuration = const Duration(milliseconds: 120), + this.minBarHeight = 2, }); final List amplitudes; @@ -22,6 +23,7 @@ class WaveformWidget extends StatefulWidget { final double spacing; final Color backgroundColor; final Duration animationDuration; + final double minBarHeight; @override State createState() => _WaveformWidgetState(); @@ -115,6 +117,7 @@ class _WaveformWidgetState extends State barColor: widget.barColor, barWidth: widget.barWidth, spacing: widget.spacing, + minBarHeight: widget.minBarHeight, ), ), ), @@ -128,12 +131,14 @@ class WaveformPainter extends CustomPainter { required this.barColor, required this.barWidth, required this.spacing, + this.minBarHeight = 2, }); final List amplitudes; final Color barColor; final double barWidth; final double spacing; + final double minBarHeight; static List normalize(List values) { return values @@ -147,6 +152,20 @@ class WaveformPainter extends CustomPainter { @override void paint(Canvas canvas, Size size) { if (amplitudes.isEmpty) { + // Draw a flat baseline using minBarHeight + final paint = Paint() + ..color = barColor + ..strokeWidth = barWidth + ..strokeCap = StrokeCap.round; + final centerY = size.height / 2; + final half = minBarHeight / 2; + final totalWidth = barWidth + spacing; // minimal placeholder segment + final startX = (size.width - totalWidth) / 2; + canvas.drawLine( + Offset(startX, centerY - half), + Offset(startX, centerY + half), + paint, + ); return; } @@ -162,7 +181,8 @@ class WaveformPainter extends CustomPainter { for (var i = 0; i < amplitudes.length; i++) { final normalized = amplitudes[i].clamp(0.0, 1.0).toDouble(); - final barHeight = normalized * maxHeight; + final barHeight = + (normalized * (maxHeight - minBarHeight)) + minBarHeight; final x = startX + i * (barWidth + spacing); final top = (maxHeight - barHeight) / 2; canvas.drawLine(Offset(x, top), Offset(x, top + barHeight), paint); @@ -189,6 +209,7 @@ class WaveformStream extends StatelessWidget { this.spacing = 6, this.backgroundColor = Colors.transparent, this.animationDuration = const Duration(milliseconds: 120), + this.minBarHeight = 2, }); final Stream> stream; @@ -199,6 +220,7 @@ class WaveformStream extends StatelessWidget { final double spacing; final Color backgroundColor; final Duration animationDuration; + final double minBarHeight; @override Widget build(BuildContext context) { @@ -215,6 +237,7 @@ class WaveformStream extends StatelessWidget { spacing: spacing, backgroundColor: backgroundColor, animationDuration: animationDuration, + minBarHeight: minBarHeight, ); }, ); diff --git a/lib/main.dart b/lib/main.dart index eed1679..e7d7068 100644 --- a/lib/main.dart +++ b/lib/main.dart @@ -1,5 +1,8 @@ import 'package:flutter/material.dart'; +import 'package:provider/provider.dart'; +import 'core/services/audio_capture.dart'; +import 'core/services/speech_recognition.dart'; import 'features/voice_to_text/view/voice_to_text_screen.dart'; void main() { @@ -11,15 +14,23 @@ class VoiceToTextApp extends StatelessWidget { @override Widget build(BuildContext context) { - return MaterialApp( - debugShowCheckedModeBanner: false, - title: 'IVA Voice', - theme: ThemeData( - colorScheme: ColorScheme.fromSeed(seedColor: Colors.black87), - scaffoldBackgroundColor: const Color(0xFFE3E3C6), - useMaterial3: true, + return MultiProvider( + providers: [ + Provider(create: (_) => AudioCaptureServiceImpl()), + Provider( + create: (_) => SpeechRecognitionServiceImpl(), + ), + ], + child: MaterialApp( + debugShowCheckedModeBanner: false, + title: 'IVA Voice', + theme: ThemeData( + colorScheme: ColorScheme.fromSeed(seedColor: Colors.black87), + scaffoldBackgroundColor: const Color(0xFFE3E3C6), + useMaterial3: true, + ), + home: const VoiceToTextScreen(), ), - home: const VoiceToTextScreen(), ); } } diff --git a/linux/flutter/generated_plugin_registrant.cc b/linux/flutter/generated_plugin_registrant.cc index e71a16d..9209285 100644 --- a/linux/flutter/generated_plugin_registrant.cc +++ b/linux/flutter/generated_plugin_registrant.cc @@ -6,6 +6,10 @@ #include "generated_plugin_registrant.h" +#include void fl_register_plugins(FlPluginRegistry* registry) { + g_autoptr(FlPluginRegistrar) record_linux_registrar = + fl_plugin_registry_get_registrar_for_plugin(registry, "RecordLinuxPlugin"); + record_linux_plugin_register_with_registrar(record_linux_registrar); } diff --git a/linux/flutter/generated_plugins.cmake b/linux/flutter/generated_plugins.cmake index 2e1de87..29e96ee 100644 --- a/linux/flutter/generated_plugins.cmake +++ b/linux/flutter/generated_plugins.cmake @@ -3,6 +3,7 @@ # list(APPEND FLUTTER_PLUGIN_LIST + record_linux ) list(APPEND FLUTTER_FFI_PLUGIN_LIST diff --git a/macos/Flutter/GeneratedPluginRegistrant.swift b/macos/Flutter/GeneratedPluginRegistrant.swift index cccf817..d63568b 100644 --- a/macos/Flutter/GeneratedPluginRegistrant.swift +++ b/macos/Flutter/GeneratedPluginRegistrant.swift @@ -5,6 +5,12 @@ import FlutterMacOS import Foundation +import path_provider_foundation +import record_darwin +import speech_to_text_macos func RegisterGeneratedPlugins(registry: FlutterPluginRegistry) { + PathProviderPlugin.register(with: registry.registrar(forPlugin: "PathProviderPlugin")) + RecordPlugin.register(with: registry.registrar(forPlugin: "RecordPlugin")) + SpeechToTextMacosPlugin.register(with: registry.registrar(forPlugin: "SpeechToTextMacosPlugin")) } diff --git a/pubspec.lock b/pubspec.lock index 8f3e021..a48033f 100644 --- a/pubspec.lock +++ b/pubspec.lock @@ -121,6 +121,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.3.3" + ffi: + dependency: transitive + description: + name: ffi + sha256: "289279317b4b16eb2bb7e271abccd4bf84ec9bdcbe999e278a94b804f5630418" + url: "https://pub.dev" + source: hosted + version: "2.1.4" file: dependency: transitive description: @@ -129,6 +137,14 @@ packages: url: "https://pub.dev" source: hosted version: "7.0.1" + fixnum: + dependency: transitive + description: + name: fixnum + sha256: b6dc7065e46c974bc7c5f143080a6764ec7a4be6da1285ececdc37be96de53be + url: "https://pub.dev" + source: hosted + version: "1.1.1" flutter: dependency: "direct main" description: flutter @@ -152,6 +168,11 @@ packages: description: flutter source: sdk version: "0.0.0" + flutter_web_plugins: + dependency: transitive + description: flutter + source: sdk + version: "0.0.0" freezed_annotation: dependency: transitive description: @@ -288,6 +309,78 @@ packages: url: "https://pub.dev" source: hosted version: "1.9.1" + path_provider: + dependency: "direct main" + description: + name: path_provider + sha256: "50c5dd5b6e1aaf6fb3a78b33f6aa3afca52bf903a8a5298f53101fdaee55bbcd" + url: "https://pub.dev" + source: hosted + version: "2.1.5" + path_provider_android: + dependency: transitive + description: + name: path_provider_android + sha256: "993381400e94d18469750e5b9dcb8206f15bc09f9da86b9e44a9b0092a0066db" + url: "https://pub.dev" + source: hosted + version: "2.2.18" + path_provider_foundation: + dependency: transitive + description: + name: path_provider_foundation + sha256: "16eef174aacb07e09c351502740fa6254c165757638eba1e9116b0a781201bbd" + url: "https://pub.dev" + source: hosted + version: "2.4.2" + path_provider_linux: + dependency: transitive + description: + name: path_provider_linux + sha256: f7a1fe3a634fe7734c8d3f2766ad746ae2a2884abe22e241a8b301bf5cac3279 + url: "https://pub.dev" + source: hosted + version: "2.2.1" + path_provider_platform_interface: + dependency: transitive + description: + name: path_provider_platform_interface + sha256: "88f5779f72ba699763fa3a3b06aa4bf6de76c8e5de842cf6f29e2e06476c2334" + url: "https://pub.dev" + source: hosted + version: "2.1.2" + path_provider_windows: + dependency: transitive + description: + name: path_provider_windows + sha256: bd6f00dbd873bfb70d0761682da2b3a2c2fccc2b9e84c495821639601d81afe7 + url: "https://pub.dev" + source: hosted + version: "2.3.0" + pedantic: + dependency: transitive + description: + name: pedantic + sha256: "67fc27ed9639506c856c840ccce7594d0bdcd91bc8d53d6e52359449a1d50602" + url: "https://pub.dev" + source: hosted + version: "1.11.1" + platform: + dependency: transitive + description: + name: platform + sha256: "5d6b1b0036a5f331ebc77c850ebc8506cbc1e9416c27e59b439f917a902a4984" + url: "https://pub.dev" + source: hosted + version: "3.1.6" + plugin_platform_interface: + dependency: transitive + description: + name: plugin_platform_interface + sha256: "4820fbfdb9478b1ebae27888254d445073732dae3d6ea81f0b7e06d5dedc3f02" + url: "https://pub.dev" + source: hosted + version: "2.1.8" provider: dependency: "direct main" description: @@ -312,6 +405,70 @@ packages: url: "https://pub.dev" source: hosted version: "1.5.0" + record: + dependency: "direct main" + description: + name: record + sha256: "9dbc6ff3e784612f90a9b001373c45ff76b7a08abd2bd9fdf72c242320c8911c" + url: "https://pub.dev" + source: hosted + version: "6.1.1" + record_android: + dependency: transitive + description: + name: record_android + sha256: "854627cd78d8d66190377f98477eee06ca96ab7c9f2e662700daf33dbf7e6673" + url: "https://pub.dev" + source: hosted + version: "1.4.2" + record_ios: + dependency: transitive + description: + name: record_ios + sha256: "13e241ed9cbc220534a40ae6b66222e21288db364d96dd66fb762ebd3cb77c71" + url: "https://pub.dev" + source: hosted + version: "1.1.2" + record_linux: + dependency: transitive + description: + name: record_linux + sha256: "235b1f1fb84e810f8149cc0c2c731d7d697f8d1c333b32cb820c449bf7bb72d8" + url: "https://pub.dev" + source: hosted + version: "1.2.1" + record_macos: + dependency: transitive + description: + name: record_macos + sha256: "2849068bb59072f300ad63ed146e543d66afaef8263edba4de4834fc7c8d4d35" + url: "https://pub.dev" + source: hosted + version: "1.1.1" + record_platform_interface: + dependency: transitive + description: + name: record_platform_interface + sha256: b0065fdf1ec28f5a634d676724d388a77e43ce7646fb049949f58c69f3fcb4ed + url: "https://pub.dev" + source: hosted + version: "1.4.0" + record_web: + dependency: transitive + description: + name: record_web + sha256: "4f0adf20c9ccafcc02d71111fd91fba1ca7b17a7453902593e5a9b25b74a5c56" + url: "https://pub.dev" + source: hosted + version: "1.2.0" + record_windows: + dependency: transitive + description: + name: record_windows + sha256: "223258060a1d25c62bae18282c16783f28581ec19401d17e56b5205b9f039d78" + url: "https://pub.dev" + source: hosted + version: "1.0.7" sky_engine: dependency: transitive description: flutter @@ -325,6 +482,38 @@ packages: url: "https://pub.dev" source: hosted version: "1.10.1" + speech_to_text: + dependency: "direct main" + description: + name: speech_to_text + sha256: c07557664974afa061f221d0d4186935bea4220728ea9446702825e8b988db04 + url: "https://pub.dev" + source: hosted + version: "7.3.0" + speech_to_text_platform_interface: + dependency: transitive + description: + name: speech_to_text_platform_interface + sha256: a1935847704e41ee468aad83181ddd2423d0833abe55d769c59afca07adb5114 + url: "https://pub.dev" + source: hosted + version: "2.3.0" + speech_to_text_windows: + dependency: transitive + description: + name: speech_to_text_windows + sha256: "2c9846d18253c7bbe059a276297ef9f27e8a2745dead32192525beb208195072" + url: "https://pub.dev" + source: hosted + version: "1.0.0+beta.8" + sprintf: + dependency: transitive + description: + name: sprintf + sha256: "1fc9ffe69d4df602376b52949af107d8f5703b77cda567c4d7d86a0693120f23" + url: "https://pub.dev" + source: hosted + version: "7.0.0" stack_trace: dependency: transitive description: @@ -373,6 +562,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.4.0" + uuid: + dependency: transitive + description: + name: uuid + sha256: a5be9ef6618a7ac1e964353ef476418026db906c4facdedaa299b7a2e71690ff + url: "https://pub.dev" + source: hosted + version: "4.5.1" vector_math: dependency: transitive description: @@ -397,6 +594,22 @@ packages: url: "https://pub.dev" source: hosted version: "1.1.3" + web: + dependency: transitive + description: + name: web + sha256: "868d88a33d8a87b18ffc05f9f030ba328ffefba92d6c127917a2ba740f9cfe4a" + url: "https://pub.dev" + source: hosted + version: "1.1.1" + xdg_directories: + dependency: transitive + description: + name: xdg_directories + sha256: "7a3f37b05d989967cdddcbb571f1ea834867ae2faa29725fd085180e0883aa15" + url: "https://pub.dev" + source: hosted + version: "1.1.0" yaml: dependency: transitive description: @@ -407,4 +620,4 @@ packages: version: "3.1.3" sdks: dart: ">=3.9.2 <4.0.0" - flutter: ">=3.18.0-18.0.pre.54" + flutter: ">=3.29.0" diff --git a/pubspec.yaml b/pubspec.yaml index 05df03c..409205c 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -33,6 +33,9 @@ dependencies: provider: ^6.1.5+1 flutter_localizations: sdk: flutter + record: ^6.1.1 + speech_to_text: ^7.3.0 + path_provider: ^2.1.5 # The following adds the Cupertino Icons font to your application. # Use with the CupertinoIcons class for iOS style icons. diff --git a/test/features/voice_to_text/view/voice_to_text_model_test.dart b/test/features/voice_to_text/view/voice_to_text_model_test.dart index 978dca5..e46c1ff 100644 --- a/test/features/voice_to_text/view/voice_to_text_model_test.dart +++ b/test/features/voice_to_text/view/voice_to_text_model_test.dart @@ -14,11 +14,13 @@ void main() { initialCursorVisible: false, ); - expect(model.transcript, transcript); + expect(model.transcript, equals(transcript)); expect(() => model.transcript.add('extra'), throwsUnsupportedError); expect(model.activeWordIndex, 1); expect(model.isCursorVisible, isFalse); - expect(model.waveformData, isEmpty); + expect(model.waveformData, isNotEmpty); + expect(model.waveformData.every((v) => v == 0.0), isTrue); + expect(model.transcribedText, 'Herman is recording'); }); test('updates active word and notifies listeners', () { @@ -36,7 +38,7 @@ void main() { expect(notifyCount, 1); }); - test('ignores out of range indices', () { + test('clamps out of range indices', () { final model = VoiceToTextModelState(initialTranscript: transcript); var notifyCount = 0; model.addListener(() => notifyCount++); @@ -44,8 +46,8 @@ void main() { model.setActiveWord(-1); model.setActiveWord(transcript.length); - expect(model.activeWordIndex, 0); - expect(notifyCount, 0); + expect(model.activeWordIndex, transcript.length - 1); + expect(notifyCount, 1); }); test('toggles cursor visibility and notifies once per change', () { @@ -77,11 +79,11 @@ void main() { model.updateWaveform([0.1, 0.5, 1.2]); - expect(model.waveformData, [0.1, 0.5, 1.2]); + expect(model.waveformData, equals([0.1, 0.5, 1.0])); expect(() => model.waveformData.add(0.3), throwsUnsupportedError); await Future.delayed(Duration.zero); expect(events, [ - [0.1, 0.5, 1.2], + [0.1, 0.5, 1.0], ]); expect(notifyCount, 1); @@ -133,6 +135,25 @@ void main() { }); }); + test('updateTranscription replaces transcript and active word', () { + final model = VoiceToTextModelState(initialTranscript: transcript); + var notifyCount = 0; + model.addListener(() => notifyCount++); + + model.updateTranscription('Hello world from iva', activeWordIndex: 1); + + expect(model.transcript, equals(['Hello', 'world', 'from', 'iva'])); + expect(model.transcribedText, 'Hello world from iva'); + expect(model.activeWordIndex, 1); + expect(notifyCount, 1); + + model.updateTranscription('Hello world from iva', activeWordIndex: 1); + expect(notifyCount, 1, reason: 'no change, no notification'); + + model.updateTranscription('Hello world from iva'); + expect(model.activeWordIndex, 3); + }); + test('recording state transitions control timer lifecycle', () { fakeAsync((async) { final model = VoiceToTextModelState(initialTranscript: transcript); @@ -161,15 +182,19 @@ void main() { model.restartRecording(); expect(model.recordingState, RecordingState.recording); expect(model.elapsedDuration, Duration.zero); + expect(model.transcript, isEmpty); async.elapse(const Duration(seconds: 1)); expect(model.elapsedDuration, const Duration(seconds: 1)); + model.updateTranscription('partial text'); model.updateWaveform([0.2, 0.4]); model.discardRecording(); expect(model.recordingState, RecordingState.idle); expect(model.elapsedDuration, Duration.zero); - expect(model.waveformData, isEmpty); + expect(model.waveformData, isNotEmpty); + expect(model.waveformData.every((v) => v == 0.0), isTrue); + expect(model.transcript, isEmpty); model.dispose(); }); diff --git a/windows/flutter/generated_plugin_registrant.cc b/windows/flutter/generated_plugin_registrant.cc index 8b6d468..458cde3 100644 --- a/windows/flutter/generated_plugin_registrant.cc +++ b/windows/flutter/generated_plugin_registrant.cc @@ -6,6 +6,9 @@ #include "generated_plugin_registrant.h" +#include void RegisterPlugins(flutter::PluginRegistry* registry) { + RecordWindowsPluginCApiRegisterWithRegistrar( + registry->GetRegistrarForPlugin("RecordWindowsPluginCApi")); } diff --git a/windows/flutter/generated_plugins.cmake b/windows/flutter/generated_plugins.cmake index b93c4c3..e9670a9 100644 --- a/windows/flutter/generated_plugins.cmake +++ b/windows/flutter/generated_plugins.cmake @@ -3,6 +3,7 @@ # list(APPEND FLUTTER_PLUGIN_LIST + record_windows ) list(APPEND FLUTTER_FFI_PLUGIN_LIST