Skip to content

Commit 0af2f17

Browse files
authored
local VAD implementation (#507)
## Description Added local VAD implementation that shows whether local peer is talking or not. I added useLocalVAD hook that is being called in useVAD and displays the VAD on local peer depending on whether he's present in PeerIDs array ## Motivation and Context This change was requested by our client ## Documentation impact - [X] Documentation update required - [ ] Documentation updated [in another PR](_) - [ ] No documentation update required ## Types of changes - [ ] Bug fix (non-breaking change which fixes an issue) - [X] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
1 parent 412c889 commit 0af2f17

10 files changed

Lines changed: 250 additions & 30 deletions

File tree

examples/mobile-client/fishjam-chat/app.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@
7171
}
7272
}
7373
],
74-
["../common/plugins/build/withLocalWebrtcPaths.js"]
74+
[
75+
"../common/plugins/build/withLocalWebrtcPaths.js"
76+
]
7577
],
7678
"experiments": {
7779
"typedRoutes": true

examples/mobile-client/fishjam-chat/components/VideosGrid.tsx

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { RTCView, usePeers } from '@fishjam-cloud/react-native-client';
1+
import { RTCView, usePeers, useVAD } from '@fishjam-cloud/react-native-client';
22
import React, { useCallback, useMemo } from 'react';
33
import type { ListRenderItemInfo } from 'react-native';
44
import { FlatList, StyleSheet, Text, View } from 'react-native';
@@ -21,6 +21,9 @@ const GridTrackItem = ({
2121
peer.track?.stream && !peer.track?.metadata?.paused
2222
? peer.track.stream
2323
: null;
24+
const vadStatus = useVAD({ peerIds: [peer.peerId] });
25+
const isPeerSpeaking =
26+
vadStatus[peer.peerId] && peer.track?.metadata?.type === 'camera';
2427

2528
return (
2629
<View style={styles.trackContainer}>
@@ -31,6 +34,10 @@ const GridTrackItem = ({
3134
backgroundColor: peer.isLocal
3235
? BrandColors.seaBlue60
3336
: BrandColors.darkBlue60,
37+
borderColor: isPeerSpeaking
38+
? BrandColors.seaBlue80
39+
: BrandColors.darkBlue100,
40+
borderWidth: isPeerSpeaking ? 3 : 2,
3441
},
3542
]}>
3643
{mediaStream ? (

examples/mobile-client/fishjam-chat/utils/tracks.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
import type { PeerWithTracks, Track } from '@fishjam-cloud/react-native-client';
1+
import type { PeerId, PeerWithTracks, Track } from '@fishjam-cloud/react-native-client';
22

33
export type GridTrack = {
44
track: Track | null;
5-
peerId: string;
5+
peerId: PeerId;
66
isLocal: boolean;
77
isVadActive: boolean;
88
aspectRatio: number | null;
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import { useContext, useEffect, useState } from "react";
2+
3+
import { FishjamClientContext } from "../contexts/fishjamClient";
4+
import type { PeerId } from "../types/public";
5+
import { usePeers } from "./usePeers";
6+
7+
// This is a dBov-to-linear conversion. -32 dBov number is taken from backend VAD threshold
8+
// formula for dBov to linear conversion: linear = 10 ^ (dBov / 20)
9+
// So -32 dBov = 10^(-32/20) ≈ 0.025. This is the minimum audio level considered "speech".
10+
const THRESHOLD = 10 ** (-32 / 20);
11+
12+
// Number of consecutive "silence" ticks before we consider speech to have stopped. Helps with smoothing out brief pauses in speech.
13+
const SILENCE_DEBOUNCE_TICKS = 2;
14+
15+
/**
16+
* Client-side voice activity detection for the local peer.
17+
*
18+
* Polls the local microphone's audio level every 100ms and derives a speech/silence
19+
* state from it. A level above ~0.025 (approximately −32 dBov, scaled to [0, 1])
20+
* is treated as speech. Silence is debounced over 2 consecutive ticks (~200ms)
21+
* to prevent rapid flapping.
22+
*
23+
* This is purely client-side — it does not signal other peers. Remote participants
24+
* receive the local peer's VAD status via backend `vadNotification` messages.
25+
*
26+
* @internal Used by `useVAD` when the local peer's id is included in `peerIds`.
27+
* @returns A record mapping the local peer's id to its current speaking state,
28+
* or an empty object if `options.disabled` is true, the local peer is not available, or no microphone track is found.
29+
*/
30+
export const useLocalVAD = (options: { disabled: boolean }): Record<PeerId, boolean> => {
31+
const fishjamClient = useContext(FishjamClientContext);
32+
const [isSpeaking, setIsSpeaking] = useState(false);
33+
const { localPeer } = usePeers();
34+
const localPeerId = localPeer?.id;
35+
const microphoneTrackId = localPeer?.microphoneTrack?.trackId;
36+
37+
useEffect(() => {
38+
if (options.disabled || !localPeerId || !microphoneTrackId) return;
39+
40+
let silenceTicks = 0;
41+
let timeoutId: ReturnType<typeof setTimeout>;
42+
43+
const poll = async () => {
44+
const trackAudio = await fishjamClient?.current?.getLocalTrackAudioLevel(microphoneTrackId);
45+
if (trackAudio != null && trackAudio.level > THRESHOLD) {
46+
silenceTicks = 0;
47+
setIsSpeaking(true);
48+
} else {
49+
silenceTicks += 1;
50+
if (silenceTicks >= SILENCE_DEBOUNCE_TICKS) {
51+
setIsSpeaking(false);
52+
}
53+
}
54+
55+
timeoutId = setTimeout(poll, 100);
56+
};
57+
58+
timeoutId = setTimeout(poll, 0);
59+
60+
return () => {
61+
clearTimeout(timeoutId);
62+
setIsSpeaking(false);
63+
};
64+
}, [options.disabled, fishjamClient, localPeerId, microphoneTrackId]);
65+
66+
if (!localPeerId || options.disabled || !microphoneTrackId) return {};
67+
return { [localPeerId]: isSpeaking };
68+
};

packages/react-client/src/hooks/useVAD.ts

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,90 +1,105 @@
1-
import type { TrackContext, VadStatus } from "@fishjam-cloud/ts-client";
1+
import type { FishjamTrackContext, VadStatus } from "@fishjam-cloud/ts-client";
22
import { useContext, useEffect, useMemo, useState } from "react";
33

44
import { FishjamClientStateContext } from "../contexts/fishjamState";
55
import type { PeerId, TrackId } from "../types/public";
6+
import { useLocalVAD } from "./useLocalVAD";
67

78
/**
8-
* Voice activity detection. Use this hook to check if voice is detected in audio track for given peer(s).
9+
* Voice activity detection. Use this hook to check if voice is detected in the audio track for given peer(s).
910
*
10-
* @param options - Options object containing `peerIds` - a list of ids of peers to subscribe to for voice activity detection notifications.
11+
* Remote peer VAD is driven by `vadNotification` messages from the backend.
12+
* If the local peer's id is included in `peerIds`, local VAD is determined client-side
13+
* by polling the microphone's audio level (see `useLocalVAD`).
14+
*
15+
* @param options - Options object.
16+
* @param options.peerIds - List of peer ids to subscribe to for VAD notifications.
17+
* Include the local peer's id to also track whether the local user is speaking.
1118
*
1219
* Example usage:
1320
* ```tsx
1421
* import { useVAD, type PeerId } from "@fishjam-cloud/react-client";
22+
*
1523
* function WhoIsTalkingComponent({ peerIds }: { peerIds: PeerId[] }) {
16-
* const peersInfo = useVAD({peerIds});
24+
* const peersInfo = useVAD({ peerIds });
1725
* const activePeers = (Object.keys(peersInfo) as PeerId[]).filter((peerId) => peersInfo[peerId]);
1826
*
1927
* return "Now talking: " + activePeers.join(", ");
2028
* }
2129
* ```
2230
* @category Connection
2331
* @group Hooks
24-
* @returns Each key is a peerId and the boolean value indicates if voice activity is currently detected for that peer.
32+
* @returns A record where each key is a peer id and the boolean value indicates
33+
* whether voice activity is currently detected for that peer.
2534
*/
2635
export const useVAD = (options: { peerIds: ReadonlyArray<PeerId> }): Record<PeerId, boolean> => {
2736
const { peerIds } = options;
2837
const clientState = useContext(FishjamClientStateContext);
2938
if (!clientState) throw Error("useVAD must be used within FishjamProvider");
39+
const showLocalPeerVAD = useMemo(
40+
() => (clientState.localPeer?.id ? peerIds.includes(clientState.localPeer?.id) : false),
41+
[clientState.localPeer?.id, peerIds],
42+
);
3043

3144
const micTracksWithSelectedPeerIds = useMemo(
3245
() =>
3346
Object.values(clientState.peers)
3447
.filter((peer) => peerIds.includes(peer.id))
3548
.map((peer) => ({
3649
peerId: peer.id,
37-
microphoneTracks: Array.from(peer.tracks.values()).filter(({ metadata }) => metadata?.type === "microphone"),
50+
microphoneTrack: Array.from(peer.tracks.values()).find(({ metadata }) => metadata?.type === "microphone"),
3851
})),
3952
[clientState.peers, peerIds],
4053
);
4154

4255
const getDefaultVadStatuses = () =>
4356
micTracksWithSelectedPeerIds.reduce<Record<PeerId, Record<TrackId, VadStatus>>>(
44-
(mappedTracks, peer) => ({
57+
(mappedTracks, { peerId, microphoneTrack }) => ({
4558
...mappedTracks,
46-
[peer.peerId]: peer.microphoneTracks.reduce(
47-
(vadStatuses, track) => ({ ...vadStatuses, [track.trackId]: track.vadStatus }),
48-
{},
49-
),
59+
[peerId]: microphoneTrack ? { [microphoneTrack.trackId]: microphoneTrack.vadStatus } : {},
5060
}),
5161
{},
5262
);
5363

5464
const [_vadStatuses, setVadStatuses] = useState<Record<PeerId, Record<TrackId, VadStatus>>>(getDefaultVadStatuses);
5565

5666
useEffect(() => {
57-
const unsubs = micTracksWithSelectedPeerIds.map(({ peerId, microphoneTracks }) => {
58-
const updateVadStatus = (track: TrackContext) => {
67+
const unsubs = micTracksWithSelectedPeerIds.map(({ peerId, microphoneTrack }) => {
68+
const updateVadStatus = (track: FishjamTrackContext) => {
5969
setVadStatuses((prev) => ({
6070
...prev,
6171
[peerId]: { ...prev[peerId], [track.trackId]: track.vadStatus },
6272
}));
6373
};
6474

65-
microphoneTracks.forEach((track) => {
66-
track.on("voiceActivityChanged", updateVadStatus);
67-
});
75+
if (microphoneTrack) {
76+
microphoneTrack.on("voiceActivityChanged", updateVadStatus);
77+
}
6878

6979
return () => {
70-
microphoneTracks.forEach((track) => {
71-
track.off("voiceActivityChanged", updateVadStatus);
72-
});
80+
if (microphoneTrack) {
81+
microphoneTrack.off("voiceActivityChanged", updateVadStatus);
82+
}
7383
};
7484
});
7585

7686
return () => unsubs.forEach((unsub) => unsub());
7787
}, [micTracksWithSelectedPeerIds]);
7888

89+
const localVAD = useLocalVAD({ disabled: !showLocalPeerVAD });
90+
7991
const vadStatuses = useMemo(
8092
() =>
81-
Object.fromEntries(
82-
Object.entries(_vadStatuses).map(([peerId, tracks]) => [
83-
peerId,
84-
Object.values(tracks).some((vad) => vad === "speech"),
85-
]),
86-
) satisfies Record<PeerId, boolean>,
87-
[_vadStatuses],
93+
({
94+
...Object.fromEntries(
95+
Object.entries(_vadStatuses).map(([peerId, tracks]) => [
96+
peerId,
97+
Object.values(tracks).some((vad) => vad === "speech"),
98+
]),
99+
),
100+
...localVAD,
101+
}) satisfies Record<PeerId, boolean>,
102+
[_vadStatuses, localVAD],
88103
);
89104

90105
return vadStatuses;

packages/ts-client/src/FishjamClient.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -948,4 +948,22 @@ export class FishjamClient<PeerMetadata = GenericMetadata, ServerMetadata = Gene
948948
public cleanup() {
949949
this.reconnectManager.cleanup();
950950
}
951+
952+
/**
953+
* Returns the current audio level for a local track.
954+
*
955+
* The `level` represents a normalized audio level in the range 0.0–1.0,
956+
* derived from WebRTC statistics for the given local audio track.
957+
*
958+
* This method returns `null` when the WebRTC layer is not initialized, when the track
959+
* cannot be found among local tracks, or when audio statistics are not yet or no longer
960+
* available for the track.
961+
*
962+
* @param trackId - The ID of the local track to query.
963+
* @returns A promise resolving to an object containing the audio `level`, or `null`
964+
* if the track is unknown or stats are not available.
965+
*/
966+
public getLocalTrackAudioLevel(trackId: string): Promise<{ level: number } | null> {
967+
return this.webrtc?.getLocalTrackAudioLevel(trackId) ?? Promise.resolve(null);
968+
}
951969
}

packages/webrtc-client/src/tracks/Local.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,4 +346,8 @@ export class Local {
346346
localTrack.addTrackToConnection();
347347
});
348348
};
349+
350+
public getLocalTrackAudioLevel = async (trackId: TrackId): Promise<{ level: number } | null> => {
351+
return this.localTracks[trackId]?.getAudioLevel() ?? null;
352+
};
349353
}

packages/webrtc-client/src/tracks/LocalTrack.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,20 @@ export class LocalTrack implements TrackCommon {
248248
);
249249
};
250250

251+
public getAudioLevel = async (): Promise<{ level: number } | null> => {
252+
if (!this.sender) return null;
253+
254+
try {
255+
const stats = await this.sender.getStats();
256+
const source = [...stats.values()].find(
257+
(r) => r.type === 'media-source' && r.kind === 'audio' && typeof r.audioLevel === 'number',
258+
);
259+
return source ? { level: source.audioLevel } : null;
260+
} catch {
261+
return null;
262+
}
263+
};
264+
251265
public createTrackVariantBitratesEvent = () => {
252266
// TODO implement this when simulcast is supported
253267
// return generateCustomEvent({

packages/webrtc-client/src/webRTCEndpoint.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,25 @@ export class WebRTCEndpoint extends (EventEmitter as new () => TypedEmitter<Requ
119119
this.sendMediaEvent({ connect });
120120
};
121121

122+
/**
123+
* Returns the current audio level for a local audio track, if available.
124+
*
125+
* This method only works for local **audio** tracks that have been negotiated
126+
* with the remote peer and for which an underlying `RTCRtpSender` and
127+
* statistics are available.
128+
*
129+
* @param trackId - Identifier of the local track to query, as used when
130+
* adding or managing local tracks on this endpoint.
131+
* @returns A promise that resolves to `{ level: number }` when an audio
132+
* level can be determined for the given track, or `null` if:
133+
* - the track does not exist,
134+
* - the track is not an audio track,
135+
* - the track has not yet been negotiated / no sender exists
136+
*/
137+
public getLocalTrackAudioLevel(trackId: string): Promise<{ level: number } | null> {
138+
return this.local.getLocalTrackAudioLevel(trackId);
139+
}
140+
122141
/**
123142
* Feeds media event received from RTC Engine to {@link WebRTCEndpoint}.
124143
* This function should be called whenever some media event from RTC Engine

0 commit comments

Comments
 (0)