From e6c8017f131be59795c6b64b2b00fd04c6de6d9f Mon Sep 17 00:00:00 2001 From: orgoro <20637412+orgoro@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:08:27 +0200 Subject: [PATCH 1/4] Update and rename codex-pr-review.yml to codex-inline-pr-review.yml --- .../{codex-pr-review.yml => codex-inline-pr-review.yml} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename .github/workflows/{codex-pr-review.yml => codex-inline-pr-review.yml} (65%) diff --git a/.github/workflows/codex-pr-review.yml b/.github/workflows/codex-inline-pr-review.yml similarity index 65% rename from .github/workflows/codex-pr-review.yml rename to .github/workflows/codex-inline-pr-review.yml index 21c282ce..453f70af 100644 --- a/.github/workflows/codex-pr-review.yml +++ b/.github/workflows/codex-inline-pr-review.yml @@ -1,4 +1,4 @@ -name: Codex PR Review +name: Codex Inline PR Review permissions: contents: read pull-requests: write @@ -10,7 +10,7 @@ on: jobs: codex-pr-review: - uses: de-id/cicd-shared/.github/workflows/codex-pr-review.yaml@main + uses: de-id/cicd-shared/.github/workflows/codex-inline-pr-review.yaml@main secrets: inherit From c82edb95414a4ff2e9752792a2bea2cbfb1269dd Mon Sep 17 00:00:00 2001 From: dor-eitan <164745144+dor-eitan@users.noreply.github.com> Date: Tue, 3 Mar 2026 14:55:22 +0200 Subject: [PATCH 2/4] fix: support pre-encoded basic auth tokens (#334) * fix: support pre-encoded basic auth tokens from admin dashboard Closes #12 Co-Authored-By: Claude Opus 4.6 * style: fix prettier formatting in auth types Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 --- src/auth/get-auth-header.test.ts | 8 ++++++++ src/auth/get-auth-header.ts | 2 +- src/types/auth.ts | 6 +----- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/auth/get-auth-header.test.ts b/src/auth/get-auth-header.test.ts index 7fbaff11..7608ab96 100644 --- a/src/auth/get-auth-header.test.ts +++ b/src/auth/get-auth-header.test.ts @@ -106,6 +106,14 @@ describe('getAuthHeader', () => { expect(result).toBe('Basic ' + btoa('user:pass')); }); + + it('should return pre-encoded token without double-encoding', () => { + const preEncodedToken = btoa('user:pass'); + const auth: Auth = { type: 'basic', token: preEncodedToken }; + const result = getAuthHeader(auth); + + expect(result).toBe(`Basic ${preEncodedToken}`); + }); }); describe('Client-Key auth', () => { diff --git a/src/auth/get-auth-header.ts b/src/auth/get-auth-header.ts index 10668863..2e3eddc1 100644 --- a/src/auth/get-auth-header.ts +++ b/src/auth/get-auth-header.ts @@ -23,7 +23,7 @@ export function getAuthHeader(auth: Auth, externalId?: string) { if (auth.type === 'bearer') { return `Bearer ${auth.token}`; } else if (auth.type === 'basic') { - return `Basic ${btoa(`${auth.username}:${auth.password}`)}`; + return `Basic ${'token' in auth ? auth.token : btoa(`${auth.username}:${auth.password}`)}`; } else if (auth.type === 'key') { return `Client-Key ${auth.clientKey}.${getExternalId(externalId)}_${sessionKey}`; } else { diff --git a/src/types/auth.ts b/src/types/auth.ts index 581a1938..0678a7ae 100644 --- a/src/types/auth.ts +++ b/src/types/auth.ts @@ -3,11 +3,7 @@ export interface BearerToken { token: string; } -export interface BasicAuth { - type: 'basic'; - username: string; - password: string; -} +export type BasicAuth = { type: 'basic'; token: string } | { type: 'basic'; username: string; password: string }; export interface ClientKeyAuth { type: 'key'; clientKey: string; From 43517ac736ca7efe019a2b4f32d1e30012b73356 Mon Sep 17 00:00:00 2001 From: Arik Sfaradi Date: Mon, 16 Mar 2026 10:51:40 +0200 Subject: [PATCH 3/4] update sdk guide according expressives (#335) --- README.md | 162 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 105 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 9e4e5f05..304b05ef 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,12 @@ The D-ID Agents SDK provides a seamless integration pathway for embedding your c With a streamlined and user-friendly workflow, you can easily harness the capabilities of the D-ID Agents and Streams API right out of the box. +The SDK supports three avatar types: + +- **Talks (V2)** — Photo-based presenters using WebRTC streaming. +- **Clips (V3)** — Pre-built presenter avatars using WebRTC streaming. +- **Expressives (V4)** — Next-generation avatars using LiveKit-based streaming, supporting microphone input and always-on fluent mode. + **Please note:** This SDK is designed for front-end development only. The creation of Agents and Knowledge bases should be handled through the [Agents API](https://docs.d-id.com/reference/agents-overview) or directly within the [D-ID Studio](https://studio.d-id.com/agents). ## ✴️ Getting Started @@ -25,7 +31,7 @@ Follow these steps: 2. Create a new Agent with the required options - Image, voice, etc. 3. In the [Agents gallery](https://studio.d-id.com/agents), hover with your mouse over the created Agent, then click on the `[...]` button 4. Click on ` Embed` button -5. Set the list of allowed domains for your Agent, for example: `http://localhost` +5. Set the list of allowed domains for your Agent, for example: `http://localhost` This is an additional security measurement: your Agent can be accessed only from the domains allowed by you. 6. In the code snippet section, fetch the `data-client-key` and the `data-agent-id`, these will be used later to access your Agent. @@ -46,11 +52,11 @@ In your front-end application, 1. Import the Agents SDK library 2. Paste the `data-agent-id` obtained in the prerequisites step in the `agentId` variable 3. Paste the `data-client-key` obtained in the prerequisites step in the `auth.clientKey` variable -4. Define an object called `callbacks`. +4. Define an object called `callbacks`. This will be explained in the [Usage section](#➤-%EF%B8%8F-callback-functions) in this guide. -5. Define an object called `streamOptions` [optional] +5. Define an object called `streamOptions` [optional — v2/v3 avatars only] This will be explained in the [Usage section](#➤-%EF%B8%8F-stream-options) in this guide. -6. Create an instance of the `createAgentManger` object called `agentManager` with the values created above. +6. Create an instance of the `createAgentManger` object called `agentManager` with the values created above. This will be explained later in the [Usage section](#➤-%EF%B8%8F-agent-manager) in this guide. Example: @@ -68,7 +74,7 @@ let auth = { type: 'key', clientKey: 'Z3123asdaczxSXSAasdcxzcashDY6MGSASFsafxSDd // 4. Define the SDK callbacks functions in this object const callbacks = {}; -// 5. Define the Stream Options object (Optional) +// 5. Define the Stream Options object (Optional — v2/v3 avatars only) let streamOptions = { compatibilityMode: 'auto', streamWarmup: true }; //....Rest of the APP's code here....// @@ -86,21 +92,21 @@ let agentManager = await sdk.createAgentManager(agentId, { auth, callbacks, stre The `agentManager` object created during initialization has several built-in parameters that might come in handy. -- **`agentManager.agent`** - Displaying all of the Agent's saved information (Same as the following [endpoint](/reference/getagent)) -- **`agentManager.starterMessages`** - Displaying the Agent's defined Starter Messages. +- **`agentManager.agent`** + Displaying all of the Agent's saved information (Same as the following [endpoint](/reference/getagent)) +- **`agentManager.starterMessages`** + Displaying the Agent's defined Starter Messages. #### **Built-in Methods** The `agentManager` object created during initialization has several built-in methods that allow you to interact with your Agent. -- **`agentManager.connect()`** - Method to create a new connection with an Agent (new WebRTC connection, web-socket, new Agent chat ID) +- **`agentManager.connect()`** + Method to create a new connection with an Agent (new WebRTC connection, web-socket, new Agent chat ID) -- **`agentManager.speak({type, input})`** - Method to make your Agent stream back a video based on a text or audio file. - (Similar to [Talks Streams](https://docs.d-id.com/reference/talks-streams-overview) / [Clips Streams API](https://docs.d-id.com/reference/clips-streams-overview)) +- **`agentManager.speak({type, input})`** + Method to make your Agent stream back a video based on a text or audio file. + (Similar to [Talks Streams](https://docs.d-id.com/reference/talks-streams-overview) / [Clips Streams API](https://docs.d-id.com/reference/clips-streams-overview)) ```javascript Text - JavaScript let speak = agentManager.speak({ @@ -118,30 +124,51 @@ The `agentManager` object created during initialization has several built-in met ) ``` -- **`agentManager.chat(string)`** - Method to send a message to your Agent and get a streamed video based on its answer (LLM) +- **`agentManager.chat(string)`** + Method to send a message to your Agent and get a streamed video based on its answer (LLM) ```javascript JavaScript let chat = agentManager.chat('What is the distance to the moon?'); ``` -- **`agentManager.rate(messageID, score)`** - Method to rate the Agent's answer in the chat - for future analytics and insights. +- **`agentManager.rate(messageID, score)`** + Method to rate the Agent's answer in the chat - for future analytics and insights. + +- **`agentManager.reconnect()`** + Method to reconnect to the Agent when the session expires and continue the conversation on the same chat ID. + +- **`agentManager.disconnect()`** + Method to close the existing connection and chat with the Agent. + +- **`agentManager.interrupt(interrupt)`** + Method to interrupt the current video stream mid-playback. + Supported for Fluent streams (V3 Pro Avatars) and all Expressive (V4) agents. -- **`agentManager.reconnect()`** - Method to reconnect to the Agent when the session expires and continue the conversation on the same chat ID. +- **`agentManager.publishMicrophoneStream(stream)`** + **Supported only with Expressive (V4) agents.** + Method to publish a microphone audio track to the session. Call after `connect()` to enable voice input. -- **`agentManager.disconnect()`** - Method to close the existing connection and chat with the Agent. + ```javascript + const micStream = await navigator.mediaDevices.getUserMedia({ audio: true }); + await agentManager.publishMicrophoneStream(micStream); + ``` + +- **`agentManager.unpublishMicrophoneStream()`** + **Supported only with Expressive (V4) agents.** + Method to stop and remove the currently published microphone track from the session. + + ```javascript + await agentManager.unpublishMicrophoneStream(); + ``` ### ➤ ✴️ Callback Functions Callback functions enable you to manage various events throughout the SDK lifecycle. Each function is linked to one or more methods within the built-in `agentManager` and triggers automatically to handle specific events efficiently -- **`onSrcObjectReady(value)`:** - [**MANDATORY for using the SDK**] - Linking the Streamed video and audio to the HTML element. - The `value` of this callback function is passed to the HTML video element in the following function. - Triggered when `agentManager.connect(), agentManager.reconnect(), agentManager.disconnect()` are called. +- **`onSrcObjectReady(value)`:** + [**MANDATORY for using the SDK**] - Linking the Streamed video and audio to the HTML element. + The `value` of this callback function is passed to the HTML video element in the following function. + Triggered when `agentManager.connect(), agentManager.reconnect(), agentManager.disconnect()` are called. ```javascript onSrcObjectReady(value) { @@ -151,9 +178,9 @@ Callback functions enable you to manage various events throughout the SDK lifecy } ``` -- **`onVideoStateChange(state)`:** - Displaying the state of the streamed video, used for switching the HTML element's source between the idle and streamed videos. - Triggered when `agentManager.chat() and agentManager.speak()` are called. +- **`onVideoStateChange(state)`:** + Displaying the state of the streamed video, used for switching the HTML element's source between the idle and streamed videos. + Triggered when `agentManager.chat() and agentManager.speak()` are called. ```javascript onVideoStateChange(state) { @@ -170,9 +197,9 @@ Callback functions enable you to manage various events throughout the SDK lifecy } ``` -- **`onConnectionStateChange(state):`** - Displaying the different connection states with the Agent's WebRTC stream connection - Triggered when `agentManager.connect(), agentManager.reconnect(), agentManager.disconnect()` are called. +- **`onConnectionStateChange(state):`** + Displaying the different connection states with the Agent's WebRTC stream connection + Triggered when `agentManager.connect(), agentManager.reconnect(), agentManager.disconnect()` are called. ```javascript onConnectionStateChange(state) { @@ -187,10 +214,10 @@ Callback functions enable you to manage various events throughout the SDK lifecy state: ['new', 'fail', 'connecting', 'connected', 'disconnected', 'closed']; ``` -- **`onNewMessage(messages, type)`:** - Displaying the chat messages array when a new message is sent to the chat. - `type`: `answer` indicates the full answer replied in the streamed video. - `role`: `user`, `assistant`(Agent) +- **`onNewMessage(messages, type)`:** + Displaying the chat messages array when a new message is sent to the chat. + `type`: `answer` indicates the full answer replied in the streamed video. + `role`: `user`, `assistant`(Agent) Triggered when `agentManager.chat()` is called: @@ -227,8 +254,21 @@ Callback functions enable you to manage various events throughout the SDK lifecy ]; ``` -- **`onError(error, errorData)`:** - Throwing an error and displaying the error message when things go badly. +- **`onConnectivityStateChange(state)`:** + Triggered when the user's internet connectivity state changes, estimated by real-time bitrate. + + ```javascript + onConnectivityStateChange(state) { + console.log("onConnectivityStateChange(): ", state) + } + ``` + + ```javascript Example Values + state: ['STRONG', 'WEAK', 'UNKNOWN']; + ``` + +- **`onError(error, errorData)`:** + Throwing an error and displaying the error message when things go badly. ```javascript onError(error, errorData) { @@ -236,33 +276,41 @@ Callback functions enable you to manage various events throughout the SDK lifecy } ``` -### ➤ ✴️ Stream Options +### ➤ ✴️ Stream Options (v2/v3 avatars only) -- **`compatibilityMode`**: - Defines the video codec to be used in the stream. - When set to `"on"`: VP8 will be used. - When set to `"off"`: H264 will be used - When set to `"auto"` - the codec will be selected according to the browser [Default] +> **Note:** `streamOptions` apply only to Talks (V2) and Clips (V3) agents. Expressive (V4) avatars manage transport settings automatically and do not use these options. + +- **`compatibilityMode`**: + Defines the video codec to be used in the stream. + When set to `"on"`: VP8 will be used. + When set to `"off"`: H264 will be used + When set to `"auto"` - the codec will be selected according to the browser [Default]
-- **`streamWarmup`**: - Allowed values: - `true` - warmup video will be streamed when the connection is established. - `false` - no warmup video [Default] +- **`streamWarmup`**: + Allowed values: + `true` - warmup video will be streamed when the connection is established. + `false` - no warmup video [Default]
-- **`sessionTimeout`**: - **Can only be used with proper permissions** - Maximum duration (in seconds) between messages before the session times out. - Max value: `300` +- **`sessionTimeout`**: + **Can only be used with proper permissions** + Maximum duration (in seconds) between messages before the session times out. + Max value: `300`
-- **`outputResolution`**: - **Supported only with Talk presenters (photo-based).** - The output resolution sets the maximum height or width pixels of the streamed video. - When resolution is not configured, it defaults to the agent output resolution. +- **`outputResolution`**: + **Supported only with Talk presenters (photo-based).** + The output resolution sets the maximum height or width pixels of the streamed video. + When resolution is not configured, it defaults to the agent output resolution. Allowed values: `150 - 1080` +- **`fluent`**: + **Supported with Agents created with V3 Pro Avatars. Always enabled for V4 Avatars.** + Allowed values: + `true` - Fluent streaming (one video for Idle/Talking states) + `false` - Legacy streaming mode (2 video elements) + ## ✴️ See it in Action -Explore our demo repository on GitHub to see the Agents SDK in action! +Explore our demo repository on GitHub to see the Agents SDK in action! This repository features a sample project crafted in Vanilla JavaScript and Vite, utilizing the Agents SDK to help you get started swiftly. [GitHub Demo Repository](https://github.com/de-id/Agents-SDK-Demo) From 6c0b1d070a1f1d1d8b45b19fa9ab92c2fdcb9cc7 Mon Sep 17 00:00:00 2001 From: dor-eitan <164745144+dor-eitan@users.noreply.github.com> Date: Mon, 16 Mar 2026 18:23:20 +0200 Subject: [PATCH 4/4] add presenterType and presenter properties to agent-video analytics (#336) Add presenterType (v4/v3-pro/v2) and presenter (name/url) to enriched analytics properties so they are included in agent-video and all other Mixpanel events. Also adds missing presenter_id to ExpresivePresenter type to match the backend DTO. Co-authored-by: Claude Opus 4.6 (1M context) --- .../agent-manager/connect-to-manager.test.ts | 1 + .../streaming-manager/factory.test.ts | 2 ++ src/types/entities/agents/presenter.ts | 1 + src/utils/agent.ts | 20 +++++++++++++++++++ src/utils/analytics.ts | 4 +++- 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/services/agent-manager/connect-to-manager.test.ts b/src/services/agent-manager/connect-to-manager.test.ts index ae7e4cf9..8e626a2d 100644 --- a/src/services/agent-manager/connect-to-manager.test.ts +++ b/src/services/agent-manager/connect-to-manager.test.ts @@ -596,6 +596,7 @@ describe('connect-to-manager', () => { ...mockAgent, presenter: { type: 'expressive' as const, + presenter_id: 'expressive-presenter-123', voice: { type: Providers.Microsoft, voice_id: 'voice-123' }, }, }; diff --git a/src/services/streaming-manager/factory.test.ts b/src/services/streaming-manager/factory.test.ts index 6e0a7aa7..1c6c118a 100644 --- a/src/services/streaming-manager/factory.test.ts +++ b/src/services/streaming-manager/factory.test.ts @@ -83,6 +83,7 @@ describe('createStreamingManager', () => { const agent = AgentFactory.build({ presenter: { type: 'expressive', + presenter_id: 'expressive-presenter-123', voice: { type: Providers.Microsoft, voice_id: 'voice-123', @@ -105,6 +106,7 @@ describe('createStreamingManager', () => { const agent = AgentFactory.build({ presenter: { type: 'expressive', + presenter_id: 'expressive-presenter-123', voice: { type: Providers.Microsoft, voice_id: 'voice-123', diff --git a/src/types/entities/agents/presenter.ts b/src/types/entities/agents/presenter.ts index 6f20af93..464e6718 100644 --- a/src/types/entities/agents/presenter.ts +++ b/src/types/entities/agents/presenter.ts @@ -30,4 +30,5 @@ export interface ClipPresenter extends BasePresenter { export interface ExpresivePresenter extends BasePresenter { type: 'expressive'; + presenter_id: string; } diff --git a/src/utils/agent.ts b/src/utils/agent.ts index 58d50d0a..ae2a4a24 100644 --- a/src/utils/agent.ts +++ b/src/utils/agent.ts @@ -2,7 +2,27 @@ import { Agent, VideoType } from '@sdk/types'; type AgentType = 'clip_v2' | Agent['presenter']['type']; +export type PresenterType = 'v4' | 'v3-pro' | 'v2'; + export const getAgentType = (presenter: Agent['presenter']): AgentType => presenter.type === 'clip' && presenter.presenter_id.startsWith('v2_') ? 'clip_v2' : presenter.type; +export const getPresenterType = (presenter: Agent['presenter']): PresenterType => { + switch (presenter.type) { + case 'expressive': + return 'v4'; + case 'clip': + return 'v3-pro'; + case 'talk': + return 'v2'; + } +}; + +export const getPresenterIdentifier = (presenter: Agent['presenter']): string => { + if (presenter.type === 'talk') { + return presenter.source_url; + } + return presenter.presenter_id; +}; + export const isStreamsV2Agent = (type: AgentType): boolean => type === VideoType.Expressive; diff --git a/src/utils/analytics.ts b/src/utils/analytics.ts index 25705a47..657e8963 100644 --- a/src/utils/analytics.ts +++ b/src/utils/analytics.ts @@ -1,5 +1,5 @@ import { Agent } from '@sdk/types/index'; -import { getAgentType } from './agent'; +import { getAgentType, getPresenterIdentifier, getPresenterType } from './agent'; export function getAnalyticsInfo(agent: Agent) { const mobileOrDesktop = () => { @@ -38,6 +38,8 @@ export function getAgentInfo(agent: Agent) { return { agentType: getAgentType(agent.presenter), + presenterType: getPresenterType(agent.presenter), + presenter: getPresenterIdentifier(agent.presenter), owner_id: agent.owner_id ?? '', promptVersion: agent.llm?.prompt_version, behavior: {