diff --git a/.github/workflows/codex-pr-review.yml b/.github/workflows/codex-inline-pr-review.yml
similarity index 65%
rename from .github/workflows/codex-pr-review.yml
rename to .github/workflows/codex-inline-pr-review.yml
index 21c282ce..453f70af 100644
--- a/.github/workflows/codex-pr-review.yml
+++ b/.github/workflows/codex-inline-pr-review.yml
@@ -1,4 +1,4 @@
-name: Codex PR Review
+name: Codex Inline PR Review
permissions:
contents: read
pull-requests: write
@@ -10,7 +10,7 @@ on:
jobs:
codex-pr-review:
- uses: de-id/cicd-shared/.github/workflows/codex-pr-review.yaml@main
+ uses: de-id/cicd-shared/.github/workflows/codex-inline-pr-review.yaml@main
secrets: inherit
diff --git a/README.md b/README.md
index 9e4e5f05..304b05ef 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,12 @@ The D-ID Agents SDK provides a seamless integration pathway for embedding your c
With a streamlined and user-friendly workflow, you can easily harness the capabilities of the D-ID Agents and Streams API right out of the box.
+The SDK supports three avatar types:
+
+- **Talks (V2)** — Photo-based presenters using WebRTC streaming.
+- **Clips (V3)** — Pre-built presenter avatars using WebRTC streaming.
+- **Expressives (V4)** — Next-generation avatars using LiveKit-based streaming, supporting microphone input and always-on fluent mode.
+
**Please note:** This SDK is designed for front-end development only. The creation of Agents and Knowledge bases should be handled through the [Agents API](https://docs.d-id.com/reference/agents-overview) or directly within the [D-ID Studio](https://studio.d-id.com/agents).
## ✴️ Getting Started
@@ -25,7 +31,7 @@ Follow these steps:
2. Create a new Agent with the required options - Image, voice, etc.
3. In the [Agents gallery](https://studio.d-id.com/agents), hover with your mouse over the created Agent, then click on the `[...]` button
4. Click on `> Embed` button
-5. Set the list of allowed domains for your Agent, for example: `http://localhost`
+5. Set the list of allowed domains for your Agent, for example: `http://localhost`
This is an additional security measurement: your Agent can be accessed only from the domains allowed by you.
6. In the code snippet section, fetch the `data-client-key` and the `data-agent-id`, these will be used later to access your Agent.
@@ -46,11 +52,11 @@ In your front-end application,
1. Import the Agents SDK library
2. Paste the `data-agent-id` obtained in the prerequisites step in the `agentId` variable
3. Paste the `data-client-key` obtained in the prerequisites step in the `auth.clientKey` variable
-4. Define an object called `callbacks`.
+4. Define an object called `callbacks`.
This will be explained in the [Usage section](#➤-%EF%B8%8F-callback-functions) in this guide.
-5. Define an object called `streamOptions` [optional]
+5. Define an object called `streamOptions` [optional — v2/v3 avatars only]
This will be explained in the [Usage section](#➤-%EF%B8%8F-stream-options) in this guide.
-6. Create an instance of the `createAgentManger` object called `agentManager` with the values created above.
+6. Create an instance of the `createAgentManger` object called `agentManager` with the values created above.
This will be explained later in the [Usage section](#➤-%EF%B8%8F-agent-manager) in this guide.
Example:
@@ -68,7 +74,7 @@ let auth = { type: 'key', clientKey: 'Z3123asdaczxSXSAasdcxzcashDY6MGSASFsafxSDd
// 4. Define the SDK callbacks functions in this object
const callbacks = {};
-// 5. Define the Stream Options object (Optional)
+// 5. Define the Stream Options object (Optional — v2/v3 avatars only)
let streamOptions = { compatibilityMode: 'auto', streamWarmup: true };
//....Rest of the APP's code here....//
@@ -86,21 +92,21 @@ let agentManager = await sdk.createAgentManager(agentId, { auth, callbacks, stre
The `agentManager` object created during initialization has several built-in parameters that might come in handy.
-- **`agentManager.agent`**
- Displaying all of the Agent's saved information (Same as the following [endpoint](/reference/getagent))
-- **`agentManager.starterMessages`**
- Displaying the Agent's defined Starter Messages.
+- **`agentManager.agent`**
+ Displaying all of the Agent's saved information (Same as the following [endpoint](/reference/getagent))
+- **`agentManager.starterMessages`**
+ Displaying the Agent's defined Starter Messages.
#### **Built-in Methods**
The `agentManager` object created during initialization has several built-in methods that allow you to interact with your Agent.
-- **`agentManager.connect()`**
- Method to create a new connection with an Agent (new WebRTC connection, web-socket, new Agent chat ID)
+- **`agentManager.connect()`**
+ Method to create a new connection with an Agent (new WebRTC connection, web-socket, new Agent chat ID)
-- **`agentManager.speak({type, input})`**
- Method to make your Agent stream back a video based on a text or audio file.
- (Similar to [Talks Streams](https://docs.d-id.com/reference/talks-streams-overview) / [Clips Streams API](https://docs.d-id.com/reference/clips-streams-overview))
+- **`agentManager.speak({type, input})`**
+ Method to make your Agent stream back a video based on a text or audio file.
+ (Similar to [Talks Streams](https://docs.d-id.com/reference/talks-streams-overview) / [Clips Streams API](https://docs.d-id.com/reference/clips-streams-overview))
```javascript Text - JavaScript
let speak = agentManager.speak({
@@ -118,30 +124,51 @@ The `agentManager` object created during initialization has several built-in met
)
```
-- **`agentManager.chat(string)`**
- Method to send a message to your Agent and get a streamed video based on its answer (LLM)
+- **`agentManager.chat(string)`**
+ Method to send a message to your Agent and get a streamed video based on its answer (LLM)
```javascript JavaScript
let chat = agentManager.chat('What is the distance to the moon?');
```
-- **`agentManager.rate(messageID, score)`**
- Method to rate the Agent's answer in the chat - for future analytics and insights.
+- **`agentManager.rate(messageID, score)`**
+ Method to rate the Agent's answer in the chat - for future analytics and insights.
+
+- **`agentManager.reconnect()`**
+ Method to reconnect to the Agent when the session expires and continue the conversation on the same chat ID.
+
+- **`agentManager.disconnect()`**
+ Method to close the existing connection and chat with the Agent.
+
+- **`agentManager.interrupt(interrupt)`**
+ Method to interrupt the current video stream mid-playback.
+ Supported for Fluent streams (V3 Pro Avatars) and all Expressive (V4) agents.
-- **`agentManager.reconnect()`**
- Method to reconnect to the Agent when the session expires and continue the conversation on the same chat ID.
+- **`agentManager.publishMicrophoneStream(stream)`**
+ **Supported only with Expressive (V4) agents.**
+ Method to publish a microphone audio track to the session. Call after `connect()` to enable voice input.
-- **`agentManager.disconnect()`**
- Method to close the existing connection and chat with the Agent.
+ ```javascript
+ const micStream = await navigator.mediaDevices.getUserMedia({ audio: true });
+ await agentManager.publishMicrophoneStream(micStream);
+ ```
+
+- **`agentManager.unpublishMicrophoneStream()`**
+ **Supported only with Expressive (V4) agents.**
+ Method to stop and remove the currently published microphone track from the session.
+
+ ```javascript
+ await agentManager.unpublishMicrophoneStream();
+ ```
### ➤ ✴️ Callback Functions
Callback functions enable you to manage various events throughout the SDK lifecycle. Each function is linked to one or more methods within the built-in `agentManager` and triggers automatically to handle specific events efficiently
-- **`onSrcObjectReady(value)`:**
- [**MANDATORY for using the SDK**] - Linking the Streamed video and audio to the HTML element.
- The `value` of this callback function is passed to the HTML video element in the following function.
- Triggered when `agentManager.connect(), agentManager.reconnect(), agentManager.disconnect()` are called.
+- **`onSrcObjectReady(value)`:**
+ [**MANDATORY for using the SDK**] - Linking the Streamed video and audio to the HTML element.
+ The `value` of this callback function is passed to the HTML video element in the following function.
+ Triggered when `agentManager.connect(), agentManager.reconnect(), agentManager.disconnect()` are called.
```javascript
onSrcObjectReady(value) {
@@ -151,9 +178,9 @@ Callback functions enable you to manage various events throughout the SDK lifecy
}
```
-- **`onVideoStateChange(state)`:**
- Displaying the state of the streamed video, used for switching the HTML element's source between the idle and streamed videos.
- Triggered when `agentManager.chat() and agentManager.speak()` are called.
+- **`onVideoStateChange(state)`:**
+ Displaying the state of the streamed video, used for switching the HTML element's source between the idle and streamed videos.
+ Triggered when `agentManager.chat() and agentManager.speak()` are called.
```javascript
onVideoStateChange(state) {
@@ -170,9 +197,9 @@ Callback functions enable you to manage various events throughout the SDK lifecy
}
```
-- **`onConnectionStateChange(state):`**
- Displaying the different connection states with the Agent's WebRTC stream connection
- Triggered when `agentManager.connect(), agentManager.reconnect(), agentManager.disconnect()` are called.
+- **`onConnectionStateChange(state):`**
+ Displaying the different connection states with the Agent's WebRTC stream connection
+ Triggered when `agentManager.connect(), agentManager.reconnect(), agentManager.disconnect()` are called.
```javascript
onConnectionStateChange(state) {
@@ -187,10 +214,10 @@ Callback functions enable you to manage various events throughout the SDK lifecy
state: ['new', 'fail', 'connecting', 'connected', 'disconnected', 'closed'];
```
-- **`onNewMessage(messages, type)`:**
- Displaying the chat messages array when a new message is sent to the chat.
- `type`: `answer` indicates the full answer replied in the streamed video.
- `role`: `user`, `assistant`(Agent)
+- **`onNewMessage(messages, type)`:**
+ Displaying the chat messages array when a new message is sent to the chat.
+ `type`: `answer` indicates the full answer replied in the streamed video.
+ `role`: `user`, `assistant`(Agent)
Triggered when `agentManager.chat()` is called:
@@ -227,8 +254,21 @@ Callback functions enable you to manage various events throughout the SDK lifecy
];
```
-- **`onError(error, errorData)`:**
- Throwing an error and displaying the error message when things go badly.
+- **`onConnectivityStateChange(state)`:**
+ Triggered when the user's internet connectivity state changes, estimated by real-time bitrate.
+
+ ```javascript
+ onConnectivityStateChange(state) {
+ console.log("onConnectivityStateChange(): ", state)
+ }
+ ```
+
+ ```javascript Example Values
+ state: ['STRONG', 'WEAK', 'UNKNOWN'];
+ ```
+
+- **`onError(error, errorData)`:**
+ Throwing an error and displaying the error message when things go badly.
```javascript
onError(error, errorData) {
@@ -236,33 +276,41 @@ Callback functions enable you to manage various events throughout the SDK lifecy
}
```
-### ➤ ✴️ Stream Options
+### ➤ ✴️ Stream Options (v2/v3 avatars only)
-- **`compatibilityMode`**:
- Defines the video codec to be used in the stream.
- When set to `"on"`: VP8 will be used.
- When set to `"off"`: H264 will be used
- When set to `"auto"` - the codec will be selected according to the browser [Default]
+> **Note:** `streamOptions` apply only to Talks (V2) and Clips (V3) agents. Expressive (V4) avatars manage transport settings automatically and do not use these options.
+
+- **`compatibilityMode`**:
+ Defines the video codec to be used in the stream.
+ When set to `"on"`: VP8 will be used.
+ When set to `"off"`: H264 will be used
+ When set to `"auto"` - the codec will be selected according to the browser [Default]
-- **`streamWarmup`**:
- Allowed values:
- `true` - warmup video will be streamed when the connection is established.
- `false` - no warmup video [Default]
+- **`streamWarmup`**:
+ Allowed values:
+ `true` - warmup video will be streamed when the connection is established.
+ `false` - no warmup video [Default]
-- **`sessionTimeout`**:
- **Can only be used with proper permissions**
- Maximum duration (in seconds) between messages before the session times out.
- Max value: `300`
+- **`sessionTimeout`**:
+ **Can only be used with proper permissions**
+ Maximum duration (in seconds) between messages before the session times out.
+ Max value: `300`
-- **`outputResolution`**:
- **Supported only with Talk presenters (photo-based).**
- The output resolution sets the maximum height or width pixels of the streamed video.
- When resolution is not configured, it defaults to the agent output resolution.
+- **`outputResolution`**:
+ **Supported only with Talk presenters (photo-based).**
+ The output resolution sets the maximum height or width pixels of the streamed video.
+ When resolution is not configured, it defaults to the agent output resolution.
Allowed values: `150 - 1080`
+- **`fluent`**:
+ **Supported with Agents created with V3 Pro Avatars. Always enabled for V4 Avatars.**
+ Allowed values:
+ `true` - Fluent streaming (one video for Idle/Talking states)
+ `false` - Legacy streaming mode (2 video elements)
+
## ✴️ See it in Action
-Explore our demo repository on GitHub to see the Agents SDK in action!
+Explore our demo repository on GitHub to see the Agents SDK in action!
This repository features a sample project crafted in Vanilla JavaScript and Vite, utilizing the Agents SDK to help you get started swiftly.
[GitHub Demo Repository](https://github.com/de-id/Agents-SDK-Demo)
diff --git a/src/auth/get-auth-header.test.ts b/src/auth/get-auth-header.test.ts
index 7fbaff11..7608ab96 100644
--- a/src/auth/get-auth-header.test.ts
+++ b/src/auth/get-auth-header.test.ts
@@ -106,6 +106,14 @@ describe('getAuthHeader', () => {
expect(result).toBe('Basic ' + btoa('user:pass'));
});
+
+ it('should return pre-encoded token without double-encoding', () => {
+ const preEncodedToken = btoa('user:pass');
+ const auth: Auth = { type: 'basic', token: preEncodedToken };
+ const result = getAuthHeader(auth);
+
+ expect(result).toBe(`Basic ${preEncodedToken}`);
+ });
});
describe('Client-Key auth', () => {
diff --git a/src/auth/get-auth-header.ts b/src/auth/get-auth-header.ts
index 10668863..2e3eddc1 100644
--- a/src/auth/get-auth-header.ts
+++ b/src/auth/get-auth-header.ts
@@ -23,7 +23,7 @@ export function getAuthHeader(auth: Auth, externalId?: string) {
if (auth.type === 'bearer') {
return `Bearer ${auth.token}`;
} else if (auth.type === 'basic') {
- return `Basic ${btoa(`${auth.username}:${auth.password}`)}`;
+ return `Basic ${'token' in auth ? auth.token : btoa(`${auth.username}:${auth.password}`)}`;
} else if (auth.type === 'key') {
return `Client-Key ${auth.clientKey}.${getExternalId(externalId)}_${sessionKey}`;
} else {
diff --git a/src/services/agent-manager/connect-to-manager.test.ts b/src/services/agent-manager/connect-to-manager.test.ts
index ae7e4cf9..8e626a2d 100644
--- a/src/services/agent-manager/connect-to-manager.test.ts
+++ b/src/services/agent-manager/connect-to-manager.test.ts
@@ -596,6 +596,7 @@ describe('connect-to-manager', () => {
...mockAgent,
presenter: {
type: 'expressive' as const,
+ presenter_id: 'expressive-presenter-123',
voice: { type: Providers.Microsoft, voice_id: 'voice-123' },
},
};
diff --git a/src/services/streaming-manager/factory.test.ts b/src/services/streaming-manager/factory.test.ts
index 6e0a7aa7..1c6c118a 100644
--- a/src/services/streaming-manager/factory.test.ts
+++ b/src/services/streaming-manager/factory.test.ts
@@ -83,6 +83,7 @@ describe('createStreamingManager', () => {
const agent = AgentFactory.build({
presenter: {
type: 'expressive',
+ presenter_id: 'expressive-presenter-123',
voice: {
type: Providers.Microsoft,
voice_id: 'voice-123',
@@ -105,6 +106,7 @@ describe('createStreamingManager', () => {
const agent = AgentFactory.build({
presenter: {
type: 'expressive',
+ presenter_id: 'expressive-presenter-123',
voice: {
type: Providers.Microsoft,
voice_id: 'voice-123',
diff --git a/src/types/auth.ts b/src/types/auth.ts
index 581a1938..0678a7ae 100644
--- a/src/types/auth.ts
+++ b/src/types/auth.ts
@@ -3,11 +3,7 @@ export interface BearerToken {
token: string;
}
-export interface BasicAuth {
- type: 'basic';
- username: string;
- password: string;
-}
+export type BasicAuth = { type: 'basic'; token: string } | { type: 'basic'; username: string; password: string };
export interface ClientKeyAuth {
type: 'key';
clientKey: string;
diff --git a/src/types/entities/agents/presenter.ts b/src/types/entities/agents/presenter.ts
index 6f20af93..464e6718 100644
--- a/src/types/entities/agents/presenter.ts
+++ b/src/types/entities/agents/presenter.ts
@@ -30,4 +30,5 @@ export interface ClipPresenter extends BasePresenter {
export interface ExpresivePresenter extends BasePresenter {
type: 'expressive';
+ presenter_id: string;
}
diff --git a/src/utils/agent.ts b/src/utils/agent.ts
index 58d50d0a..ae2a4a24 100644
--- a/src/utils/agent.ts
+++ b/src/utils/agent.ts
@@ -2,7 +2,27 @@ import { Agent, VideoType } from '@sdk/types';
type AgentType = 'clip_v2' | Agent['presenter']['type'];
+export type PresenterType = 'v4' | 'v3-pro' | 'v2';
+
export const getAgentType = (presenter: Agent['presenter']): AgentType =>
presenter.type === 'clip' && presenter.presenter_id.startsWith('v2_') ? 'clip_v2' : presenter.type;
+export const getPresenterType = (presenter: Agent['presenter']): PresenterType => {
+ switch (presenter.type) {
+ case 'expressive':
+ return 'v4';
+ case 'clip':
+ return 'v3-pro';
+ case 'talk':
+ return 'v2';
+ }
+};
+
+export const getPresenterIdentifier = (presenter: Agent['presenter']): string => {
+ if (presenter.type === 'talk') {
+ return presenter.source_url;
+ }
+ return presenter.presenter_id;
+};
+
export const isStreamsV2Agent = (type: AgentType): boolean => type === VideoType.Expressive;
diff --git a/src/utils/analytics.ts b/src/utils/analytics.ts
index 25705a47..657e8963 100644
--- a/src/utils/analytics.ts
+++ b/src/utils/analytics.ts
@@ -1,5 +1,5 @@
import { Agent } from '@sdk/types/index';
-import { getAgentType } from './agent';
+import { getAgentType, getPresenterIdentifier, getPresenterType } from './agent';
export function getAnalyticsInfo(agent: Agent) {
const mobileOrDesktop = () => {
@@ -38,6 +38,8 @@ export function getAgentInfo(agent: Agent) {
return {
agentType: getAgentType(agent.presenter),
+ presenterType: getPresenterType(agent.presenter),
+ presenter: getPresenterIdentifier(agent.presenter),
owner_id: agent.owner_id ?? '',
promptVersion: agent.llm?.prompt_version,
behavior: {