Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 186 additions & 12 deletions demo/app.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { ChatMode, ConnectionState } from '@sdk/types';
import { useEffect, useRef, useState } from 'preact/hooks';
import { useCallback, useEffect, useRef, useState } from 'preact/hooks';

import './app.css';
import { agentId, clientKey, debug, didApiUrl, didSocketApiUrl } from './environment';
Expand All @@ -14,35 +14,179 @@ export function App() {
const [sessionTimeout, setSessionTimeout] = useState<number | undefined>();
const [compatibilityMode, setCompatibilityMode] = useState<'on' | 'off' | 'auto'>();
const [fluent, setFluent] = useState(false);
const [enableMicrophone, setEnableMicrophone] = useState(true);
const [microphoneStream, setMicrophoneStream] = useState<MediaStream | undefined>(undefined);
const microphoneStreamRef = useRef<MediaStream | undefined>(undefined);
const [audioInputDevices, setAudioInputDevices] = useState<MediaDeviceInfo[]>([]);
const [selectedAudioDeviceId, setSelectedAudioDeviceId] = useState<string>('');

const videoRef = useRef<HTMLVideoElement>(null);

const { srcObject, connectionState, messages, isSpeaking, connect, disconnect, speak, chat, interrupt } =
useAgentManager({
debug,
agentId,
baseURL: didApiUrl,
wsURL: didSocketApiUrl,
mode,
enableAnalytics: false,
auth: { type: 'key', clientKey },
streamOptions: { streamWarmup: warmup, sessionTimeout, compatibilityMode, fluent },
});
const {
srcObject,
connectionState,
messages,
isSpeaking,
connect,
disconnect,
speak,
chat,
interrupt,
publishMicrophoneStream,
unpublishMicrophoneStream,
microphoneEnabled,
isMicrophonePublished,
} = useAgentManager({
debug,
agentId,
baseURL: didApiUrl,
wsURL: didSocketApiUrl,
mode,
enableAnalytics: false,
auth: { type: 'key', clientKey },
streamOptions: { streamWarmup: warmup, sessionTimeout, compatibilityMode, fluent },
});

const cleanupMicrophoneStream = useCallback(() => {
if (microphoneStreamRef.current) {
microphoneStreamRef.current.getTracks().forEach(track => track.stop());
microphoneStreamRef.current = undefined;
setMicrophoneStream(undefined);
}
}, []);

const updateAudioDevices = useCallback(async () => {
try {
await navigator.mediaDevices.getUserMedia({ audio: true });
const devices = await navigator.mediaDevices.enumerateDevices();
const audioInputs = devices.filter(device => device.kind === 'audioinput');

const realDevices = audioInputs.filter(
device => !device.label.toLowerCase().includes('blackhole') &&
!device.label.toLowerCase().includes('virtual')
);

const devicesToShow = realDevices.length > 0 ? realDevices : audioInputs;

setAudioInputDevices(devicesToShow);

if (devicesToShow.length > 0 && !selectedAudioDeviceId) {
setSelectedAudioDeviceId(devicesToShow[0].deviceId);
}
} catch (error) {
console.error('Failed to enumerate audio devices:', error);
}
}, [selectedAudioDeviceId]);

const handleMicrophoneToggle = useCallback(
async (enabled: boolean) => {
if (connectionState !== ConnectionState.Connected) {
setEnableMicrophone(enabled);
return;
}

if (enabled) {
if (!microphoneStreamRef.current) {
try {
const audioConstraints: MediaStreamConstraints['audio'] = selectedAudioDeviceId
? { deviceId: { exact: selectedAudioDeviceId } }
: true;
const stream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints });
setMicrophoneStream(stream);
microphoneStreamRef.current = stream;
} catch (error) {
console.error('Failed to get microphone access:', error);
alert('Failed to access microphone. Please check permissions.');
return;
}
}

if (microphoneStreamRef.current && publishMicrophoneStream) {
try {
await publishMicrophoneStream(microphoneStreamRef.current);
setEnableMicrophone(true);
} catch (error) {
console.error('Failed to publish microphone stream:', error);
}
}
} else {
if (unpublishMicrophoneStream) {
try {
await unpublishMicrophoneStream();
setEnableMicrophone(false);
} catch (error) {
console.error('Failed to unpublish microphone stream:', error);
}
}
}
},
[connectionState, selectedAudioDeviceId, publishMicrophoneStream, unpublishMicrophoneStream]
);

async function onClick() {
if (connectionState === ConnectionState.New || connectionState === ConnectionState.Fail) {
if (enableMicrophone && !microphoneStreamRef.current) {
try {
const audioConstraints: MediaStreamConstraints['audio'] = selectedAudioDeviceId
? { deviceId: { exact: selectedAudioDeviceId } }
: true;

const stream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints });
setMicrophoneStream(stream);
microphoneStreamRef.current = stream;
} catch (error) {
console.error('Failed to get microphone access:', error);
alert('Failed to access microphone. Please check permissions.');
return;
}
}

await connect();
} else if (connectionState === ConnectionState.Connected && text) {
await speak(text);
}
}

useEffect(() => {
return cleanupMicrophoneStream;
}, [cleanupMicrophoneStream]);

useEffect(() => {
if (!enableMicrophone && microphoneStreamRef.current) {
cleanupMicrophoneStream();
}
}, [enableMicrophone, cleanupMicrophoneStream]);

useEffect(() => {
if (enableMicrophone) {
updateAudioDevices();
}
}, [enableMicrophone, updateAudioDevices]);

useEffect(() => {
if (srcObject && videoRef.current) {
videoRef.current.srcObject = srcObject;
}
}, [srcObject]);

useEffect(() => {
if (
connectionState === ConnectionState.Connected &&
enableMicrophone &&
microphoneEnabled &&
publishMicrophoneStream &&
microphoneStreamRef.current &&
!isMicrophonePublished
) {
const stream = microphoneStreamRef.current;
publishMicrophoneStream(stream).catch(error => {
if (error) {
console.error('Failed to publish microphone stream:', error);
}
});
}
}, [connectionState, enableMicrophone, microphoneEnabled, publishMicrophoneStream, isMicrophonePublished]);

return (
<div id="app">
<section>
Expand Down Expand Up @@ -105,7 +249,37 @@ export function App() {
/>
Fluent
</label>

{microphoneEnabled && (
<label>
<input
type="checkbox"
name="microphone"
checked={enableMicrophone}
onChange={e => handleMicrophoneToggle(e.currentTarget.checked)}
/>
Microphone
</label>
)}
</div>
{microphoneEnabled && enableMicrophone && audioInputDevices.length > 0 && (
<div className="input-options" style={{ marginTop: '10px' }}>
<label>
Audio Input Device:
<select
value={selectedAudioDeviceId}
onChange={e => setSelectedAudioDeviceId(e.currentTarget.value)}
disabled={connectionState === ConnectionState.Connected}
style={{ marginLeft: '10px', minWidth: '200px' }}>
{audioInputDevices.map(device => (
<option key={device.deviceId} value={device.deviceId}>
{device.label || `Device ${device.deviceId.substring(0, 8)}`}
</option>
))}
</select>
</label>
</div>
)}
</fieldset>
</div>
</section>
Expand Down
45 changes: 42 additions & 3 deletions demo/hooks/useAgentManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ import {
StreamType,
StreamingState,
} from '@sdk/types';
import { useCallback, useEffect, useState } from 'preact/hooks';
import { useCallback, useEffect, useMemo, useState } from 'preact/hooks';

interface UseAgentManagerOptions {
agentId: string;
baseURL: string;
wsURL: string;
mode: ChatMode;
mode?: ChatMode;
auth: Auth;
streamOptions?: {
streamWarmup?: boolean;
Expand All @@ -35,7 +35,7 @@ export function useAgentManager(props: UseAgentManagerOptions) {
agentId,
baseURL,
wsURL,
mode,
mode = ChatMode.Functional,
auth,
enableAnalytics,
externalId,
Expand All @@ -52,6 +52,7 @@ export function useAgentManager(props: UseAgentManagerOptions) {
const [srcObject, setSrcObject] = useState<MediaStream | null>(null);
const [agentManager, setAgentManager] = useState<AgentManager | null>(null);
const [connectionState, setConnectionState] = useState<ConnectionState>(ConnectionState.New);
const [isMicrophonePublished, setIsMicrophonePublished] = useState(false);
const streamType = agentManager?.getStreamType();

useEffect(() => {
Expand Down Expand Up @@ -183,6 +184,40 @@ export function useAgentManager(props: UseAgentManagerOptions) {
}
}, [agentManager, connectionState]);

const publishMicrophoneStream = useCallback(
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Microphone state not reset on disconnect breaks reconnection

Medium Severity

The disconnect callback resets agentManager, srcObject, connectionState, and messages but does not reset isMicrophonePublished to false. When a user disconnects and later reconnects, isMicrophonePublished remains true from the previous session. This causes the auto-publish effect in app.tsx (which checks !isMicrophonePublished) to skip publishing the microphone stream, breaking automatic microphone activation on reconnection.

Fix in Cursor Fix in Web

async (stream: MediaStream) => {
if (!agentManager) {
console.warn('Agent manager is not initialized yet. Will retry when ready.');
return;
}
if (!agentManager.publishMicrophoneStream) {
throw new Error('publishMicrophoneStream is not available for this streaming manager');
}
await agentManager.publishMicrophoneStream(stream);
setIsMicrophonePublished(true);
},
[agentManager]
);

const unpublishMicrophoneStream = useCallback(async () => {
if (!agentManager) {
console.warn('Agent manager is not initialized yet.');
return;
}
if (!agentManager.unpublishMicrophoneStream) {
throw new Error('unpublishMicrophoneStream is not available for this streaming manager');
}
await agentManager.unpublishMicrophoneStream();
setIsMicrophonePublished(false);
}, [agentManager]);

const microphoneEnabled = useMemo(() => {
return (
agentManager?.agent?.presenter?.type === 'expressive' &&
typeof agentManager?.publishMicrophoneStream === 'function'
);
}, [agentManager]);

return {
connectionState,
messages,
Expand All @@ -193,5 +228,9 @@ export function useAgentManager(props: UseAgentManagerOptions) {
speak,
chat,
interrupt,
publishMicrophoneStream,
unpublishMicrophoneStream,
microphoneEnabled,
isMicrophonePublished,
};
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@d-id/client-sdk",
"private": false,
"version": "1.1.22",
"version": "1.1.23",
"type": "module",
"description": "d-id client sdk",
"repository": {
Expand Down
17 changes: 17 additions & 0 deletions src/services/agent-manager/connect-to-manager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ jest.mock('../../config/environment', () => ({
jest.mock('../analytics/timestamp-tracker', () => ({
latencyTimestampTracker: { reset: jest.fn(), update: jest.fn(), get: jest.fn(() => 1000) },
interruptTimestampTracker: { reset: jest.fn(), update: jest.fn(), get: jest.fn(() => 500) },
streamReadyTimestampTracker: { reset: jest.fn(), update: jest.fn(), get: jest.fn(() => 1500) },
}));

describe('connect-to-manager', () => {
Expand Down Expand Up @@ -236,6 +237,7 @@ describe('connect-to-manager', () => {
let onConnectionStateChange: (state: ConnectionState) => void;
let onVideoStateChange: (state: StreamingState, statsReport?: any) => void;
let onAgentActivityStateChange: (state: AgentActivityState) => void;
let onStreamReady: (() => void) | undefined;

beforeEach(async () => {
// Initialize callbacks to avoid undefined errors
Expand All @@ -247,6 +249,7 @@ describe('connect-to-manager', () => {
onConnectionStateChange = options.callbacks.onConnectionStateChange;
onVideoStateChange = options.callbacks.onVideoStateChange;
onAgentActivityStateChange = options.callbacks.onAgentActivityStateChange;
onStreamReady = options.callbacks.onStreamReady;

return new Promise(resolve => {
setTimeout(() => {
Expand Down Expand Up @@ -370,6 +373,20 @@ describe('connect-to-manager', () => {
);
});
});

describe('onStreamReady', () => {
it('should track mixpanel event with latency when stream is ready', () => {
const { streamReadyTimestampTracker } = require('../analytics/timestamp-tracker');
streamReadyTimestampTracker.get.mockReturnValue(2000);

onStreamReady?.();

expect(mockAnalytics.track).toHaveBeenCalledWith('agent-chat', {
event: 'ready',
latency: 2000,
});
});
});
});

describe('Stream Options Mapping', () => {
Expand Down
Loading
Loading