Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,12 @@ for await detection in listener.detections() {
}
```

Pass `echoCancellation: true` to route the microphone through the platform's voice-processing I/O unit, which subtracts what the device is playing out (e.g. your app's own TTS) from the captured signal so the listener only reacts to the user's voice. It defaults to `false` (raw capture).

```swift
let listener = WakeWordListener(model: model, threshold: 0.5, debounce: 2.0, echoCancellation: true)
```

The mel spectrogram and speech embedding `.onnx` models ship inside the Swift package; only the classifier ships with your app. Audio at any sample rate is resampled to 16 kHz internally via `AVAudioConverter` (matches the Rust crate's 22050–384000 Hz input range); the listener handles mic-hardware resampling automatically. ONNX Runtime with the CoreML Execution Provider dispatches to ANE / GPU / CPU by default (override via `executionProvider:`).

Add `NSMicrophoneUsageDescription` to Info.plist (and `com.apple.security.device.audio-input` on sandboxed macOS apps) for listener use. A runnable SwiftUI demo (iOS + macOS) lives in [examples/ios_wakeword/](examples/ios_wakeword/).
Expand Down
6 changes: 6 additions & 0 deletions swift/Sources/LiveKitWakeWord/WakeWordError.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ public enum WakeWordError: Error, LocalizedError, Sendable {
/// The ONNX Runtime raised an error during session creation or
/// inference.
case runtimeFailure(underlying: Error)
/// Acoustic echo cancellation was requested (``WakeWordListener`` created
/// with `echoCancellation: true`) but the platform's voice-processing I/O
/// unit could not be enabled.
case echoCancellationUnavailable(underlying: Error)

public var errorDescription: String? {
switch self {
Expand All @@ -54,6 +58,8 @@ public enum WakeWordError: Error, LocalizedError, Sendable {
return "LiveKitWakeWord: resampling failed."
case .runtimeFailure(let underlying):
return "LiveKitWakeWord: ONNX Runtime error (\(underlying))."
case .echoCancellationUnavailable(let underlying):
return "LiveKitWakeWord: could not enable echo cancellation (\(underlying))."
}
}
}
21 changes: 19 additions & 2 deletions swift/Sources/LiveKitWakeWord/WakeWordListener.swift
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public struct Detection: Sendable {
public actor WakeWordListener {
public let threshold: Float
public let debounce: TimeInterval
public let echoCancellation: Bool

private let model: WakeWordModel
private var engine: AVAudioEngine?
Expand Down Expand Up @@ -71,16 +72,22 @@ public actor WakeWordListener {
/// same utterance.
/// - windowSeconds: Length of the rolling audio window fed to the
/// model. 2 s matches the Rust crate's recommendation.
/// - echoCancellation: When `true`, the microphone is routed through the
/// platform's voice-processing I/O unit so audio the device is playing
/// out (e.g. an assistant's own TTS) is removed from the captured
/// signal. Defaults to `false` (raw capture).
public init(
model: WakeWordModel,
threshold: Float = 0.5,
debounce: TimeInterval = 2.0,
windowSeconds: Double = 2.0
windowSeconds: Double = 2.0,
echoCancellation: Bool = false
) {
self.model = model
self.threshold = threshold
self.debounce = debounce
self.windowSeconds = windowSeconds
self.echoCancellation = echoCancellation
}

/// Start capturing audio and running inference. Must be called after
Expand All @@ -90,12 +97,22 @@ public actor WakeWordListener {

#if os(iOS)
let session = AVAudioSession.sharedInstance()
try session.setCategory(.playAndRecord, mode: .measurement, options: [.defaultToSpeaker])
let mode: AVAudioSession.Mode = echoCancellation ? .voiceChat : .measurement
try session.setCategory(.playAndRecord, mode: mode, options: [.defaultToSpeaker])
try session.setActive(true, options: [])
#endif

let engine = AVAudioEngine()
let input = engine.inputNode

if echoCancellation {
do {
try input.setVoiceProcessingEnabled(true)
} catch {
throw WakeWordError.echoCancellationUnavailable(underlying: error)
}
}

let hwFormat = input.inputFormat(forBus: 0)
guard hwFormat.sampleRate > 0 else {
throw WakeWordError.unsupportedSampleRate(rate: 0)
Expand Down
Loading