Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,9 @@ For full architectural details, see [project/docs/system_design.md](project/docs

| Component | Description |
|-----------|-------------|
| **AudioIn** | Audio capture from a selectable input device (including virtual cables) |
| **Mic** | Microphone audio capture via sounddevice |
| **RemoteAudioIn** | Audio capture from a remote bridge server over WebSocket |
| **Camera** | Webcam video capture with device enumeration |
| **VideoPlayer** | Video file playback (looping) |
| **TextInput** | Text input from the node UI |
Expand Down Expand Up @@ -295,7 +297,9 @@ For full architectural details, see [project/docs/system_design.md](project/docs

| Component | Description |
|-----------|-------------|
| **AudioOut** | Audio playback to a selectable output device (including virtual cables) |
| **Speaker** | Audio playback via sounddevice |
| **RemoteAudioOut** | Audio playback to a remote bridge server over WebSocket |
| **VideoStream** | JPEG video stream to node UI |
| **TextDisplay** | Text display in node UI |
| **OSCFace** | VRChat facial animation via OSC (with emotion detection) |
Expand Down
4 changes: 4 additions & 0 deletions backend/src/lib/audio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from src.lib.audio.audio_in import AudioIn as AudioIn
from src.lib.audio.audio_out import AudioOut as AudioOut
from src.lib.audio.mic import Mic as Mic
from src.lib.audio.mic_browser import MicBrowser as MicBrowser
from src.lib.audio.remote_audio_in import RemoteAudioIn as RemoteAudioIn
from src.lib.audio.remote_audio_out import RemoteAudioOut as RemoteAudioOut
from src.lib.audio.speaker import Speaker as Speaker
from src.lib.audio.speaker_browser import SpeakerBrowser as SpeakerBrowser
from src.lib.audio.vad import VAD as VAD
Expand Down
59 changes: 59 additions & 0 deletions backend/src/lib/audio/audio_in.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from __future__ import annotations

from typing import Any

import sounddevice as sd
from pydantic import BaseModel, ConfigDict

from src.lib.audio.devices import (
coerce_sounddevice_device,
list_audio_input_devices,
)
from src.lib.audio.mic import MicOutputs
from src.core.component import ThreadedComponent, Tag
from src.core.frames import AudioFrame


class AudioInConfig(BaseModel):
model_config = ConfigDict(json_schema_extra={"options": {"device": {}}})

device: str = ""
sample_rate: int = 48000
channels: int = 2
frame_ms: int = 20


class AudioIn(ThreadedComponent[tuple[()], MicOutputs]):
tags = Tag(io={"source"}, functionality={"audio"})
description = "Captures audio from any system **input device**. The node exposes the same `audio` output as `Mic`, but adds a dynamic device dropdown so you can listen to virtual cables or other capture devices."

def __init__(self, config: AudioInConfig = AudioInConfig()) -> None:
super().__init__()
self.config = config
self._sample_rate = self.config.sample_rate
self._channels = self.config.channels
self._frame_samples = int(self._sample_rate * self.config.frame_ms / 1000)
self._device = coerce_sounddevice_device(self.config.device)

@classmethod
def get_options(cls, values: dict[str, Any]) -> dict[str, Any]:
return {"config": {"device": list_audio_input_devices()}}

def run(self, inputs: tuple[()], outputs: MicOutputs) -> None:
with sd.InputStream(
samplerate=self._sample_rate,
channels=self._channels,
dtype="int16",
blocksize=self._frame_samples,
device=self._device,
latency="low",
) as stream:
while not self.stop_event.is_set():
data, _ = stream.read(self._frame_samples)
outputs.audio.send(
AudioFrame.new(
data=data.copy(),
sample_rate=self._sample_rate,
channels=self._channels,
)
)
61 changes: 61 additions & 0 deletions backend/src/lib/audio/audio_out.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from __future__ import annotations

from typing import Any

import numpy as np
import sounddevice as sd
from pydantic import BaseModel, ConfigDict

from src.lib.audio.devices import (
coerce_sounddevice_device,
list_audio_output_devices,
)
from src.lib.audio.speaker import SpeakerInputs
from src.core.component import ThreadedComponent, Tag
from src.core.frames import AudioDataFormat


class AudioOutConfig(BaseModel):
model_config = ConfigDict(json_schema_extra={"options": {"device": {}}})

device: str = ""
sample_rate: int = 48000
channels: int = 1


class AudioOut(ThreadedComponent[SpeakerInputs, tuple[()]]):
tags = Tag(io={"sink"}, functionality={"audio"})
description = "Plays `AudioFrame` data to any system **output device**. The node exposes the same `audio` input as `Speaker`, but adds a dynamic device dropdown so you can target virtual cables or specific playback devices."

def __init__(self, config: AudioOutConfig = AudioOutConfig()) -> None:
super().__init__()
self.config = config
self._sample_rate = self.config.sample_rate
self._channels = self.config.channels
self._device = coerce_sounddevice_device(self.config.device)

@classmethod
def get_options(cls, values: dict[str, Any]) -> dict[str, Any]:
return {"config": {"device": list_audio_output_devices()}}

def run(self, inputs: SpeakerInputs, outputs: tuple[()]) -> None:
with sd.OutputStream(
samplerate=self._sample_rate,
channels=self._channels,
dtype="int16",
device=self._device,
latency="low",
) as stream:
for frame in inputs.audio:
if frame is None:
break

pcm_bytes = frame.get(
sample_rate=self._sample_rate,
num_channels=self._channels,
data_format=AudioDataFormat.PCM16,
)

stream.write(
np.frombuffer(pcm_bytes, dtype=np.int16).reshape(-1, self._channels)
)
120 changes: 120 additions & 0 deletions backend/src/lib/audio/devices.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from __future__ import annotations

from typing import Any, Literal, TypeAlias

import sounddevice as sd

AudioDeviceDirection = Literal["input", "output"]
AudioDeviceOption: TypeAlias = dict[str, str]


def _hostapi_names() -> list[str]:
try:
hostapis = sd.query_hostapis()
except Exception:
return []
return [
str(api.get("name", f"Host API {index}")) for index, api in enumerate(hostapis)
]


def _default_device_index(direction: AudioDeviceDirection) -> int | None:
try:
input_default, output_default = sd.default.device
except Exception:
return None

raw = input_default if direction == "input" else output_default
if raw is None:
return None

try:
index = int(raw)
except (TypeError, ValueError):
return None

return None if index < 0 else index


def _format_device_label(
index: int,
device: dict[str, Any],
hostapis: list[str],
) -> str:
hostapi_index = int(device.get("hostapi", -1) or -1)
hostapi_name = (
hostapis[hostapi_index]
if 0 <= hostapi_index < len(hostapis)
else "Unknown Host API"
)
name = str(device.get("name", f"Device {index}"))
default_sample_rate = int(float(device.get("default_samplerate", 0) or 0))
return f"{name} [{hostapi_name}] (#{index}, {default_sample_rate} Hz)"


def list_audio_devices(
direction: AudioDeviceDirection,
*,
include_default: bool = True,
) -> list[AudioDeviceOption]:
try:
devices = sd.query_devices()
except Exception:
return []

max_channels_key = (
"max_input_channels" if direction == "input" else "max_output_channels"
)
hostapis = _hostapi_names()

options: list[AudioDeviceOption] = []

if include_default:
default_index = _default_device_index(direction)
default_label = "System Default"
if default_index is not None and default_index < len(devices):
default_label = (
"System Default: "
f"{_format_device_label(default_index, devices[default_index], hostapis)}"
)
options.append({"value": "", "label": default_label})

for index, device in enumerate(devices):
max_channels = int(device.get(max_channels_key, 0) or 0)
if max_channels <= 0:
continue
options.append(
{
"value": str(index),
"label": _format_device_label(index, device, hostapis),
}
)

return options


def list_audio_input_devices(
*, include_default: bool = True
) -> list[AudioDeviceOption]:
return list_audio_devices("input", include_default=include_default)


def list_audio_output_devices(
*,
include_default: bool = True,
) -> list[AudioDeviceOption]:
return list_audio_devices("output", include_default=include_default)


def coerce_sounddevice_device(value: str | None) -> int | str | None:
if value is None:
return None

stripped = value.strip()
if not stripped:
return None

if stripped.lstrip("-").isdigit():
return int(stripped)

return stripped
107 changes: 107 additions & 0 deletions backend/src/lib/audio/remote_audio_in.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from __future__ import annotations

from websockets.sync.client import ClientConnection, connect
from pydantic import BaseModel, TypeAdapter

from src.lib.audio.mic import MicOutputs
from src.lib.audio.remote_audio_protocol import (
RemoteAudioHello,
RemoteAudioResponse,
)
from src.core.component import ThreadedComponent, Tag
from src.core.frames import AudioFrame


class RemoteAudioInConfig(BaseModel):
server_url: str = "ws://127.0.0.1:8765/ws/audio"
sample_rate: int = 48000
channels: int = 2
frame_ms: int = 20
max_reconnect_delay: float = 10.0


class RemoteAudioIn(ThreadedComponent[tuple[()], MicOutputs]):
tags = Tag(io={"source"}, functionality={"audio"})
description = "Receives `AudioFrame` data from a remote audio bridge over **WebSocket**. Use it when the game runs on another machine and you want OpenNeuro to hear the remote system's game audio."

def __init__(self, config: RemoteAudioInConfig = RemoteAudioInConfig()) -> None:
super().__init__()
self.config = config
self._ws: ClientConnection | None = None
self._response_adapter: TypeAdapter[RemoteAudioResponse] = TypeAdapter(
RemoteAudioResponse
)

def stop(self) -> None:
super().stop()
self._close_ws()

def _close_ws(self) -> None:
ws = self._ws
if ws is None:
return
try:
ws.close()
except Exception:
pass
finally:
self._ws = None

def _connect(self) -> ClientConnection:
ws = connect(
self.config.server_url,
max_size=8 * 1024 * 1024,
ping_interval=20,
ping_timeout=20,
)
self._ws = ws

hello = RemoteAudioHello(
role="audio_in",
sample_rate=self.config.sample_rate,
channels=self.config.channels,
frame_ms=self.config.frame_ms,
)
ws.send(hello.model_dump_json())

ack_raw = ws.recv()
if not isinstance(ack_raw, str):
raise RuntimeError("RemoteAudioIn expected a text handshake response")

response = self._response_adapter.validate_json(ack_raw)
if not response.ok:
raise RuntimeError(response.error)
return ws

def run(self, inputs: tuple[()], outputs: MicOutputs) -> None:
reconnect_delay = 1.0

while not self.stop_event.is_set():
try:
ws = self._connect()
reconnect_delay = 1.0

while not self.stop_event.is_set():
message = ws.recv()
if isinstance(message, str):
raise RuntimeError(
f"RemoteAudioIn expected binary audio, got text: {message}"
)
outputs.audio.send(
AudioFrame.new(
data=message,
sample_rate=self.config.sample_rate,
channels=self.config.channels,
)
)
except Exception as exc:
if self.stop_event.is_set():
break
print(f"[RemoteAudioIn] {exc} (reconnecting in {reconnect_delay:.1f}s)")
if self.stop_event.wait(reconnect_delay):
break
reconnect_delay = min(
reconnect_delay * 2, self.config.max_reconnect_delay
)
finally:
self._close_ws()
Loading
Loading