Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions demo/app.css
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,16 @@ input[type='text'][placeholder='Chat Mode (Functional | Conversational)']:focus
background-color: black;
}

.camera-preview {
width: 100%;
aspect-ratio: 4 / 3;
border-radius: 8px;
background-color: #000;
border: 1px solid #444;
object-fit: cover;
transform: scaleX(-1);
}

.animated {
border: 2px solid #ffcc00;
}
Expand Down
145 changes: 144 additions & 1 deletion demo/app.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,15 @@ export function App() {
const [audioInputDevices, setAudioInputDevices] = useState<MediaDeviceInfo[]>([]);
const [selectedAudioDeviceId, setSelectedAudioDeviceId] = useState<string>('');

const [isCameraOn, setIsCameraOn] = useState(false);
const [cameraStream, setCameraStream] = useState<MediaStream | undefined>(undefined);
const [videoInputDevices, setVideoInputDevices] = useState<MediaDeviceInfo[]>([]);
const [selectedVideoDeviceId, setSelectedVideoDeviceId] = useState<string>('');
const cameraStreamRef = useRef<MediaStream | undefined>(undefined);
const hasSetDefaultVideoDevice = useRef(false);

const videoRef = useRef<HTMLVideoElement>(null);
const cameraPreviewRef = useRef<HTMLVideoElement>(null);

const {
srcObject,
Expand All @@ -36,6 +44,10 @@ export function App() {
unpublishMicrophoneStream,
microphoneEnabled,
isMicrophonePublished,
publishCameraStream,
unpublishCameraStream,
isCameraPublished,
cameraEnabled,
} = useAgentManager({
debug,
agentId,
Expand All @@ -55,6 +67,68 @@ export function App() {
}
}, []);

const cleanupCameraStream = useCallback(() => {
if (cameraStreamRef.current) {
cameraStreamRef.current.getTracks().forEach(track => track.stop());
cameraStreamRef.current = undefined;
setCameraStream(undefined);
hasSetDefaultVideoDevice.current = false;
}
}, []);

const handleDisconnect = useCallback(async () => {
try {
await disconnect();
} finally {
cleanupCameraStream();
setIsCameraOn(false);
}
}, [disconnect, cleanupCameraStream]);

const updateVideoDevices = useCallback(async () => {
try {
const tempStream = await navigator.mediaDevices.getUserMedia({ video: true });
tempStream.getTracks().forEach(track => track.stop());
const devices = await navigator.mediaDevices.enumerateDevices();
const videoInputs = devices.filter(device => device.kind === 'videoinput');
setVideoInputDevices(videoInputs);
if (videoInputs.length > 0 && !hasSetDefaultVideoDevice.current) {
hasSetDefaultVideoDevice.current = true;
setSelectedVideoDeviceId(videoInputs[0].deviceId);
}
} catch (error) {
console.error('Failed to enumerate video devices:', error);
}
}, []);

const handleCameraToggle = useCallback(
async (enabled: boolean) => {
if (enabled) {
try {
const videoConstraints: MediaStreamConstraints['video'] = selectedVideoDeviceId
? { deviceId: { exact: selectedVideoDeviceId } }
: true;
const stream = await navigator.mediaDevices.getUserMedia({ video: videoConstraints });
cameraStreamRef.current = stream;
setCameraStream(stream);
} catch (error) {
console.error('Failed to get camera access:', error);
alert('Failed to access camera. Please check permissions.');
return;
}
} else {
try {
await unpublishCameraStream();
} catch (error) {
console.error('Failed to unpublish camera stream:', error);
}
cleanupCameraStream();
}
setIsCameraOn(enabled);
},
[selectedVideoDeviceId, unpublishCameraStream, cleanupCameraStream]
);

const updateAudioDevices = useCallback(async () => {
try {
await navigator.mediaDevices.getUserMedia({ audio: true });
Expand Down Expand Up @@ -163,6 +237,36 @@ export function App() {
}
}, [enableMicrophone, updateAudioDevices]);

useEffect(() => {
return cleanupCameraStream;
}, [cleanupCameraStream]);

useEffect(() => {
if (cameraPreviewRef.current) {
cameraPreviewRef.current.srcObject = cameraStream ?? null;
}
}, [cameraStream]);

useEffect(() => {
if (isCameraOn) {
updateVideoDevices();
}
}, [isCameraOn, updateVideoDevices]);

useEffect(() => {
if (
connectionState === ConnectionState.Connected &&
isCameraOn &&
publishCameraStream &&
cameraStreamRef.current &&
!isCameraPublished
) {
publishCameraStream(cameraStreamRef.current).catch(error => {
console.error('Failed to publish camera stream:', error);
});
}
}, [connectionState, isCameraOn, publishCameraStream, isCameraPublished]);

useEffect(() => {
if (srcObject && videoRef.current) {
videoRef.current.srcObject = srcObject;
Expand Down Expand Up @@ -225,7 +329,7 @@ export function App() {
Interrupt
</button>

<button onClick={disconnect} disabled={connectionState !== ConnectionState.Connected}>
<button onClick={handleDisconnect} disabled={connectionState !== ConnectionState.Connected}>
Close Connection
</button>

Expand Down Expand Up @@ -261,6 +365,18 @@ export function App() {
Microphone
</label>
)}

{cameraEnabled && (
<label>
<input
type="checkbox"
name="camera"
checked={isCameraOn}
onChange={e => handleCameraToggle(e.currentTarget.checked)}
/>
Camera
</label>
)}
</div>
{microphoneEnabled && enableMicrophone && audioInputDevices.length > 0 && (
<div className="input-options" style={{ marginTop: '10px' }}>
Expand All @@ -280,6 +396,33 @@ export function App() {
</label>
</div>
)}
{isCameraOn && videoInputDevices.length > 0 && (
<div className="input-options" style={{ marginTop: '10px' }}>
<label>
Camera Device:
<select
value={selectedVideoDeviceId}
onChange={e => setSelectedVideoDeviceId(e.currentTarget.value)}
disabled={connectionState === ConnectionState.Connected}
style={{ marginLeft: '10px', minWidth: '200px' }}>
{videoInputDevices.map(device => (
<option key={device.deviceId} value={device.deviceId}>
{device.label || `Camera ${device.deviceId.substring(0, 8)}`}
</option>
))}
</select>
</label>
</div>
)}
{isCameraOn && (
<video
ref={cameraPreviewRef}
className="camera-preview"
autoPlay
playsInline
muted
/>
)}
</fieldset>
</div>
</section>
Expand Down
40 changes: 40 additions & 0 deletions demo/hooks/useAgentManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export function useAgentManager(props: UseAgentManagerOptions) {
const [agentManager, setAgentManager] = useState<AgentManager | null>(null);
const [connectionState, setConnectionState] = useState<ConnectionState>(ConnectionState.New);
const [isMicrophonePublished, setIsMicrophonePublished] = useState(false);
const [isCameraPublished, setIsCameraPublished] = useState(false);
const streamType = agentManager?.getStreamType();

useEffect(() => {
Expand Down Expand Up @@ -80,6 +81,8 @@ export function useAgentManager(props: UseAgentManagerOptions) {

if (state !== ConnectionState.Connected) {
setAgentManager(null);
setIsMicrophonePublished(false);
setIsCameraPublished(false);
}
},
onVideoStateChange(state) {
Expand Down Expand Up @@ -131,6 +134,8 @@ export function useAgentManager(props: UseAgentManagerOptions) {
setSrcObject(null);
setConnectionState(ConnectionState.New);
setMessages([]);
setIsMicrophonePublished(false);
setIsCameraPublished(false);
}
}, [agentManager]);

Expand Down Expand Up @@ -211,13 +216,44 @@ export function useAgentManager(props: UseAgentManagerOptions) {
setIsMicrophonePublished(false);
}, [agentManager]);

const publishCameraStream = useCallback(
async (stream: MediaStream) => {
if (!agentManager) {
console.warn('Agent manager is not initialized yet. Will retry when ready.');
return;
}
if (!agentManager.publishCameraStream) {
throw new Error('publishCameraStream is not available for this streaming manager');
}
await agentManager.publishCameraStream(stream);
setIsCameraPublished(true);
},
[agentManager]
);

const unpublishCameraStream = useCallback(async () => {
if (!agentManager) {
console.warn('Agent manager is not initialized yet.');
return;
}
if (!agentManager.unpublishCameraStream) {
throw new Error('unpublishCameraStream is not available for this streaming manager');
}
await agentManager.unpublishCameraStream();
setIsCameraPublished(false);
}, [agentManager]);

const microphoneEnabled = useMemo(() => {
return (
agentManager?.agent?.presenter?.type === 'expressive' &&
typeof agentManager?.publishMicrophoneStream === 'function'
);
}, [agentManager]);

const cameraEnabled = useMemo(() => {
return !!agentManager?.agent?.vision?.enabled && typeof agentManager?.publishCameraStream === 'function';
}, [agentManager]);

return {
connectionState,
messages,
Expand All @@ -232,5 +268,9 @@ export function useAgentManager(props: UseAgentManagerOptions) {
unpublishMicrophoneStream,
microphoneEnabled,
isMicrophonePublished,
publishCameraStream,
unpublishCameraStream,
isCameraPublished,
cameraEnabled,
};
}
53 changes: 53 additions & 0 deletions src/services/agent-manager/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,59 @@ describe('createAgentManager', () => {
});
});

describe('publishCameraStream', () => {
let manager: AgentManager;

beforeEach(async () => {
manager = await createAgentManager('agent-123', mockOptions);
await manager.connect();
});

it('should publish camera stream when available', async () => {
const mockStream = new MediaStream();
const mockPublish = jest.fn().mockResolvedValue(undefined);
mockStreamingManager.publishCameraStream = mockPublish;

await manager.publishCameraStream?.(mockStream);

expect(mockPublish).toHaveBeenCalledWith(mockStream);
});

it('should throw error when publishCameraStream is not available', async () => {
mockStreamingManager.publishCameraStream = undefined;

await expect(manager.publishCameraStream?.(new MediaStream())).rejects.toThrow(
'publishCameraStream is not available for this streaming manager'
);
});
});

describe('unpublishCameraStream', () => {
let manager: AgentManager;

beforeEach(async () => {
manager = await createAgentManager('agent-123', mockOptions);
await manager.connect();
});

it('should unpublish camera stream when available', async () => {
const mockUnpublish = jest.fn().mockResolvedValue(undefined);
mockStreamingManager.unpublishCameraStream = mockUnpublish;

await manager.unpublishCameraStream?.();

expect(mockUnpublish).toHaveBeenCalled();
});

it('should throw error when unpublishCameraStream is not available', async () => {
mockStreamingManager.unpublishCameraStream = undefined;

await expect(manager.unpublishCameraStream?.()).rejects.toThrow(
'unpublishCameraStream is not available for this streaming manager'
);
});
});

describe('DirectPlayback mode', () => {
it('should not create socket manager in DirectPlayback mode', async () => {
const directPlaybackOptions = { ...mockOptions, mode: ChatMode.DirectPlayback };
Expand Down
12 changes: 12 additions & 0 deletions src/services/agent-manager/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,18 @@ export async function createAgentManager(agent: string, options: AgentManagerOpt
}
return items.streamingManager.unpublishMicrophoneStream();
},
async publishCameraStream(stream: MediaStream) {
if (!items.streamingManager?.publishCameraStream) {
throw new Error('publishCameraStream is not available for this streaming manager');
}
return items.streamingManager.publishCameraStream(stream);
},
async unpublishCameraStream() {
if (!items.streamingManager?.unpublishCameraStream) {
throw new Error('unpublishCameraStream is not available for this streaming manager');
}
return items.streamingManager.unpublishCameraStream();
},
async chat(userMessage: string) {
const validateChatRequest = () => {
if (isChatModeWithoutChat(mode)) {
Expand Down
14 changes: 14 additions & 0 deletions src/services/streaming-manager/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,20 @@ export type StreamingManager<T extends CreateStreamOptions | CreateSessionV2Opti
*/
unpublishMicrophoneStream?(): Promise<void>;

/**
* Publish a camera video stream to the LiveKit room.
* Can be called after connection to enable vision.
* supported only for livekit manager
*/
publishCameraStream?(stream: MediaStream): Promise<void>;

/**
* Unpublish the currently published camera stream.
* Can be called after connection to disable vision.
* supported only for livekit manager
*/
unpublishCameraStream?(): Promise<void>;

/**
* Session identifier information, should be returned in the body of all streaming requests
*/
Expand Down
Loading
Loading