diff --git a/examples/mobile-client/fishjam-chat/app.json b/examples/mobile-client/fishjam-chat/app.json index b44374e14..a336812d3 100644 --- a/examples/mobile-client/fishjam-chat/app.json +++ b/examples/mobile-client/fishjam-chat/app.json @@ -71,7 +71,9 @@ } } ], - ["../common/plugins/build/withLocalWebrtcPaths.js"] + [ + "../common/plugins/build/withLocalWebrtcPaths.js" + ] ], "experiments": { "typedRoutes": true diff --git a/examples/mobile-client/fishjam-chat/components/VideosGrid.tsx b/examples/mobile-client/fishjam-chat/components/VideosGrid.tsx index 82d7b56bb..83e9723ee 100644 --- a/examples/mobile-client/fishjam-chat/components/VideosGrid.tsx +++ b/examples/mobile-client/fishjam-chat/components/VideosGrid.tsx @@ -1,4 +1,4 @@ -import { RTCView, usePeers } from '@fishjam-cloud/react-native-client'; +import { RTCView, usePeers, useVAD } from '@fishjam-cloud/react-native-client'; import React, { useCallback, useMemo } from 'react'; import type { ListRenderItemInfo } from 'react-native'; import { FlatList, StyleSheet, Text, View } from 'react-native'; @@ -21,6 +21,9 @@ const GridTrackItem = ({ peer.track?.stream && !peer.track?.metadata?.paused ? peer.track.stream : null; + const vadStatus = useVAD({ peerIds: [peer.peerId] }); + const isPeerSpeaking = + vadStatus[peer.peerId] && peer.track?.metadata?.type === 'camera'; return ( @@ -31,6 +34,10 @@ const GridTrackItem = ({ backgroundColor: peer.isLocal ? BrandColors.seaBlue60 : BrandColors.darkBlue60, + borderColor: isPeerSpeaking + ? BrandColors.seaBlue80 + : BrandColors.darkBlue100, + borderWidth: isPeerSpeaking ? 3 : 2, }, ]}> {mediaStream ? ( diff --git a/examples/mobile-client/fishjam-chat/utils/tracks.ts b/examples/mobile-client/fishjam-chat/utils/tracks.ts index 5bff3937f..762da9270 100644 --- a/examples/mobile-client/fishjam-chat/utils/tracks.ts +++ b/examples/mobile-client/fishjam-chat/utils/tracks.ts @@ -1,8 +1,8 @@ -import type { PeerWithTracks, Track } from '@fishjam-cloud/react-native-client'; +import type { PeerId, PeerWithTracks, Track } from '@fishjam-cloud/react-native-client'; export type GridTrack = { track: Track | null; - peerId: string; + peerId: PeerId; isLocal: boolean; isVadActive: boolean; aspectRatio: number | null; diff --git a/packages/react-client/src/hooks/useLocalVAD.ts b/packages/react-client/src/hooks/useLocalVAD.ts new file mode 100644 index 000000000..6ca4a3f59 --- /dev/null +++ b/packages/react-client/src/hooks/useLocalVAD.ts @@ -0,0 +1,68 @@ +import { useContext, useEffect, useState } from "react"; + +import { FishjamClientContext } from "../contexts/fishjamClient"; +import type { PeerId } from "../types/public"; +import { usePeers } from "./usePeers"; + +// This is a dBov-to-linear conversion. -32 dBov number is taken from backend VAD threshold +// formula for dBov to linear conversion: linear = 10 ^ (dBov / 20) +// So -32 dBov = 10^(-32/20) ≈ 0.025. This is the minimum audio level considered "speech". +const THRESHOLD = 10 ** (-32 / 20); + +// Number of consecutive "silence" ticks before we consider speech to have stopped. Helps with smoothing out brief pauses in speech. +const SILENCE_DEBOUNCE_TICKS = 2; + +/** + * Client-side voice activity detection for the local peer. + * + * Polls the local microphone's audio level every 100ms and derives a speech/silence + * state from it. A level above ~0.025 (approximately −32 dBov, scaled to [0, 1]) + * is treated as speech. Silence is debounced over 2 consecutive ticks (~200ms) + * to prevent rapid flapping. + * + * This is purely client-side — it does not signal other peers. Remote participants + * receive the local peer's VAD status via backend `vadNotification` messages. + * + * @internal Used by `useVAD` when the local peer's id is included in `peerIds`. + * @returns A record mapping the local peer's id to its current speaking state, + * or an empty object if `options.disabled` is true, the local peer is not available, or no microphone track is found. + */ +export const useLocalVAD = (options: { disabled: boolean }): Record => { + const fishjamClient = useContext(FishjamClientContext); + const [isSpeaking, setIsSpeaking] = useState(false); + const { localPeer } = usePeers(); + const localPeerId = localPeer?.id; + const microphoneTrackId = localPeer?.microphoneTrack?.trackId; + + useEffect(() => { + if (options.disabled || !localPeerId || !microphoneTrackId) return; + + let silenceTicks = 0; + let timeoutId: ReturnType; + + const poll = async () => { + const trackAudio = await fishjamClient?.current?.getLocalTrackAudioLevel(microphoneTrackId); + if (trackAudio != null && trackAudio.level > THRESHOLD) { + silenceTicks = 0; + setIsSpeaking(true); + } else { + silenceTicks += 1; + if (silenceTicks >= SILENCE_DEBOUNCE_TICKS) { + setIsSpeaking(false); + } + } + + timeoutId = setTimeout(poll, 100); + }; + + timeoutId = setTimeout(poll, 0); + + return () => { + clearTimeout(timeoutId); + setIsSpeaking(false); + }; + }, [options.disabled, fishjamClient, localPeerId, microphoneTrackId]); + + if (!localPeerId || options.disabled || !microphoneTrackId) return {}; + return { [localPeerId]: isSpeaking }; +}; diff --git a/packages/react-client/src/hooks/useVAD.ts b/packages/react-client/src/hooks/useVAD.ts index 008e0e123..128223693 100644 --- a/packages/react-client/src/hooks/useVAD.ts +++ b/packages/react-client/src/hooks/useVAD.ts @@ -1,19 +1,27 @@ -import type { TrackContext, VadStatus } from "@fishjam-cloud/ts-client"; +import type { FishjamTrackContext, VadStatus } from "@fishjam-cloud/ts-client"; import { useContext, useEffect, useMemo, useState } from "react"; import { FishjamClientStateContext } from "../contexts/fishjamState"; import type { PeerId, TrackId } from "../types/public"; +import { useLocalVAD } from "./useLocalVAD"; /** - * Voice activity detection. Use this hook to check if voice is detected in audio track for given peer(s). + * Voice activity detection. Use this hook to check if voice is detected in the audio track for given peer(s). * - * @param options - Options object containing `peerIds` - a list of ids of peers to subscribe to for voice activity detection notifications. + * Remote peer VAD is driven by `vadNotification` messages from the backend. + * If the local peer's id is included in `peerIds`, local VAD is determined client-side + * by polling the microphone's audio level (see `useLocalVAD`). + * + * @param options - Options object. + * @param options.peerIds - List of peer ids to subscribe to for VAD notifications. + * Include the local peer's id to also track whether the local user is speaking. * * Example usage: * ```tsx * import { useVAD, type PeerId } from "@fishjam-cloud/react-client"; + * * function WhoIsTalkingComponent({ peerIds }: { peerIds: PeerId[] }) { - * const peersInfo = useVAD({peerIds}); + * const peersInfo = useVAD({ peerIds }); * const activePeers = (Object.keys(peersInfo) as PeerId[]).filter((peerId) => peersInfo[peerId]); * * return "Now talking: " + activePeers.join(", "); @@ -21,12 +29,17 @@ import type { PeerId, TrackId } from "../types/public"; * ``` * @category Connection * @group Hooks - * @returns Each key is a peerId and the boolean value indicates if voice activity is currently detected for that peer. + * @returns A record where each key is a peer id and the boolean value indicates + * whether voice activity is currently detected for that peer. */ export const useVAD = (options: { peerIds: ReadonlyArray }): Record => { const { peerIds } = options; const clientState = useContext(FishjamClientStateContext); if (!clientState) throw Error("useVAD must be used within FishjamProvider"); + const showLocalPeerVAD = useMemo( + () => (clientState.localPeer?.id ? peerIds.includes(clientState.localPeer?.id) : false), + [clientState.localPeer?.id, peerIds], + ); const micTracksWithSelectedPeerIds = useMemo( () => @@ -34,19 +47,16 @@ export const useVAD = (options: { peerIds: ReadonlyArray }): Record peerIds.includes(peer.id)) .map((peer) => ({ peerId: peer.id, - microphoneTracks: Array.from(peer.tracks.values()).filter(({ metadata }) => metadata?.type === "microphone"), + microphoneTrack: Array.from(peer.tracks.values()).find(({ metadata }) => metadata?.type === "microphone"), })), [clientState.peers, peerIds], ); const getDefaultVadStatuses = () => micTracksWithSelectedPeerIds.reduce>>( - (mappedTracks, peer) => ({ + (mappedTracks, { peerId, microphoneTrack }) => ({ ...mappedTracks, - [peer.peerId]: peer.microphoneTracks.reduce( - (vadStatuses, track) => ({ ...vadStatuses, [track.trackId]: track.vadStatus }), - {}, - ), + [peerId]: microphoneTrack ? { [microphoneTrack.trackId]: microphoneTrack.vadStatus } : {}, }), {}, ); @@ -54,37 +64,42 @@ export const useVAD = (options: { peerIds: ReadonlyArray }): Record>>(getDefaultVadStatuses); useEffect(() => { - const unsubs = micTracksWithSelectedPeerIds.map(({ peerId, microphoneTracks }) => { - const updateVadStatus = (track: TrackContext) => { + const unsubs = micTracksWithSelectedPeerIds.map(({ peerId, microphoneTrack }) => { + const updateVadStatus = (track: FishjamTrackContext) => { setVadStatuses((prev) => ({ ...prev, [peerId]: { ...prev[peerId], [track.trackId]: track.vadStatus }, })); }; - microphoneTracks.forEach((track) => { - track.on("voiceActivityChanged", updateVadStatus); - }); + if (microphoneTrack) { + microphoneTrack.on("voiceActivityChanged", updateVadStatus); + } return () => { - microphoneTracks.forEach((track) => { - track.off("voiceActivityChanged", updateVadStatus); - }); + if (microphoneTrack) { + microphoneTrack.off("voiceActivityChanged", updateVadStatus); + } }; }); return () => unsubs.forEach((unsub) => unsub()); }, [micTracksWithSelectedPeerIds]); + const localVAD = useLocalVAD({ disabled: !showLocalPeerVAD }); + const vadStatuses = useMemo( () => - Object.fromEntries( - Object.entries(_vadStatuses).map(([peerId, tracks]) => [ - peerId, - Object.values(tracks).some((vad) => vad === "speech"), - ]), - ) satisfies Record, - [_vadStatuses], + ({ + ...Object.fromEntries( + Object.entries(_vadStatuses).map(([peerId, tracks]) => [ + peerId, + Object.values(tracks).some((vad) => vad === "speech"), + ]), + ), + ...localVAD, + }) satisfies Record, + [_vadStatuses, localVAD], ); return vadStatuses; diff --git a/packages/ts-client/src/FishjamClient.ts b/packages/ts-client/src/FishjamClient.ts index 992bc862f..88a3f87b9 100644 --- a/packages/ts-client/src/FishjamClient.ts +++ b/packages/ts-client/src/FishjamClient.ts @@ -948,4 +948,22 @@ export class FishjamClient { + return this.webrtc?.getLocalTrackAudioLevel(trackId) ?? Promise.resolve(null); + } } diff --git a/packages/webrtc-client/src/tracks/Local.ts b/packages/webrtc-client/src/tracks/Local.ts index d9ece6bb3..83a4b488c 100644 --- a/packages/webrtc-client/src/tracks/Local.ts +++ b/packages/webrtc-client/src/tracks/Local.ts @@ -346,4 +346,8 @@ export class Local { localTrack.addTrackToConnection(); }); }; + + public getLocalTrackAudioLevel = async (trackId: TrackId): Promise<{ level: number } | null> => { + return this.localTracks[trackId]?.getAudioLevel() ?? null; + }; } diff --git a/packages/webrtc-client/src/tracks/LocalTrack.ts b/packages/webrtc-client/src/tracks/LocalTrack.ts index ee865649c..f6e191b32 100644 --- a/packages/webrtc-client/src/tracks/LocalTrack.ts +++ b/packages/webrtc-client/src/tracks/LocalTrack.ts @@ -248,6 +248,20 @@ export class LocalTrack implements TrackCommon { ); }; + public getAudioLevel = async (): Promise<{ level: number } | null> => { + if (!this.sender) return null; + + try { + const stats = await this.sender.getStats(); + const source = [...stats.values()].find( + (r) => r.type === 'media-source' && r.kind === 'audio' && typeof r.audioLevel === 'number', + ); + return source ? { level: source.audioLevel } : null; + } catch { + return null; + } + }; + public createTrackVariantBitratesEvent = () => { // TODO implement this when simulcast is supported // return generateCustomEvent({ diff --git a/packages/webrtc-client/src/webRTCEndpoint.ts b/packages/webrtc-client/src/webRTCEndpoint.ts index b2e9aeb87..a1d49a2ca 100644 --- a/packages/webrtc-client/src/webRTCEndpoint.ts +++ b/packages/webrtc-client/src/webRTCEndpoint.ts @@ -119,6 +119,25 @@ export class WebRTCEndpoint extends (EventEmitter as new () => TypedEmitter { + return this.local.getLocalTrackAudioLevel(trackId); + } + /** * Feeds media event received from RTC Engine to {@link WebRTCEndpoint}. * This function should be called whenever some media event from RTC Engine diff --git a/packages/webrtc-client/tests/methods/getLocalTrackAudioLevelMethod.test.ts b/packages/webrtc-client/tests/methods/getLocalTrackAudioLevelMethod.test.ts new file mode 100644 index 000000000..4844753c0 --- /dev/null +++ b/packages/webrtc-client/tests/methods/getLocalTrackAudioLevelMethod.test.ts @@ -0,0 +1,73 @@ +import { expect, it, vi } from 'vitest'; + +import { WebRTCEndpoint } from '../../src'; +import { serializeServerMediaEvent } from '../../src/mediaEvent'; +import { createConnectedEventWithOneEndpoint, mockTrack } from '../fixtures'; +import { mockMediaStream, mockRTCPeerConnection } from '../mocks'; + +it('getLocalTrackAudioLevel returns null for unknown track id', async () => { + const webRTCEndpoint = new WebRTCEndpoint(); + const serializedEvent = serializeServerMediaEvent({ connected: createConnectedEventWithOneEndpoint() }); + webRTCEndpoint.receiveMediaEvent(serializedEvent); + + const result = await webRTCEndpoint.getLocalTrackAudioLevel('non-existent-track-id'); + + expect(result).toBeNull(); +}); + +it('getLocalTrackAudioLevel returns null when track has no sender', async () => { + mockRTCPeerConnection(); + mockMediaStream(); + + const webRTCEndpoint = new WebRTCEndpoint(); + const serializedEvent = serializeServerMediaEvent({ connected: createConnectedEventWithOneEndpoint() }); + webRTCEndpoint.receiveMediaEvent(serializedEvent); + webRTCEndpoint.addTrack(mockTrack); + + const [trackId] = Object.keys(webRTCEndpoint['local']['localTracks']); + + const result = await webRTCEndpoint.getLocalTrackAudioLevel(trackId!); + + // sender is not set until offer/answer exchange, so getAudioLevel returns null + expect(result).toBeNull(); +}); + +it('getLocalTrackAudioLevel returns audio level from sender stats', async () => { + mockRTCPeerConnection(); + mockMediaStream(); + + const webRTCEndpoint = new WebRTCEndpoint(); + const serializedEvent = serializeServerMediaEvent({ connected: createConnectedEventWithOneEndpoint() }); + webRTCEndpoint.receiveMediaEvent(serializedEvent); + webRTCEndpoint.addTrack(mockTrack); + + const [trackId] = Object.keys(webRTCEndpoint['local']['localTracks']); + const localTrack = webRTCEndpoint['local']['localTracks'][trackId!]; + + const statsMap = new Map([['report-1', { type: 'media-source', kind: 'audio', audioLevel: 0.42 }]]); + localTrack!['sender'] = { getStats: vi.fn().mockResolvedValue(statsMap) } as any; + + const result = await webRTCEndpoint.getLocalTrackAudioLevel(trackId!); + + expect(result).toEqual({ level: 0.42 }); +}); + +it('getLocalTrackAudioLevel returns null when stats have no audio media-source report', async () => { + mockRTCPeerConnection(); + mockMediaStream(); + + const webRTCEndpoint = new WebRTCEndpoint(); + const serializedEvent = serializeServerMediaEvent({ connected: createConnectedEventWithOneEndpoint() }); + webRTCEndpoint.receiveMediaEvent(serializedEvent); + webRTCEndpoint.addTrack(mockTrack); + + const [trackId] = Object.keys(webRTCEndpoint['local']['localTracks']); + const localTrack = webRTCEndpoint['local']['localTracks'][trackId!]; + + const statsMap = new Map([['report-1', { type: 'media-source', kind: 'video', videoWidth: 1280 }]]); + localTrack!['sender'] = { getStats: vi.fn().mockResolvedValue(statsMap) } as any; + + const result = await webRTCEndpoint.getLocalTrackAudioLevel(trackId!); + + expect(result).toBeNull(); +});