livekit · btpod · Jun 11, 2026
diff --git a/.changeset/speechsdk-tts-plugin.md b/.changeset/speechsdk-tts-plugin.md
@@ -0,0 +1,5 @@
+---
+'@livekit/agents-plugin-speechsdk': patch
+---
+
+Add the speech-sdk TTS plugin: synthesis across 15 providers via one `provider/model` string, including providers without a dedicated plugin (Murf, Smallest.ai, fal.ai-hosted open-weight models).
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -126,7 +126,7 @@ Plugin capabilities by type:
 
 - **LLM**: openai, google, baseten, mistralai
 - **STT**: deepgram (v1+v2), openai, baseten, sarvam (v1/v2/v3), mistralai, inworld, cartesia
-- **TTS**: cartesia, elevenlabs, deepgram, openai, neuphonic, resemble, rime, inworld, baseten, sarvam (v1/v2/v3), mistralai, fishaudio, hume
+- **TTS**: cartesia, elevenlabs, deepgram, openai, neuphonic, resemble, rime, inworld, baseten, sarvam (v1/v2/v3), mistralai, fishaudio, hume, speechsdk (multi-provider)
 - **VAD**: silero (ONNX-based, local)
 - **EOU/Turn Detection**: livekit (HuggingFace + ONNX)
 - **Realtime**: openai (+ responses/, ws/ modules), google (beta), xai, phonic

diff --git a/README.md b/README.md
@@ -82,6 +82,7 @@ Currently, only the following plugins are supported:
 | [@livekit/agents-plugin-phonic](https://www.npmjs.com/package/@livekit/agents-plugin-phonic)         | Realtime      |
 | [@livekit/agents-plugin-fishaudio](https://www.npmjs.com/package/@livekit/agents-plugin-fishaudio)   | TTS           |
 | [@livekit/agents-plugin-hume](https://www.npmjs.com/package/@livekit/agents-plugin-hume)             | TTS           |
+| [@livekit/agents-plugin-speechsdk](https://www.npmjs.com/package/@livekit/agents-plugin-speechsdk)   | TTS           |
 
 ## Docs and guides
 

diff --git a/plugins/speechsdk/README.md b/plugins/speechsdk/README.md
@@ -0,0 +1,42 @@
+<!--
+SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+
+SPDX-License-Identifier: Apache-2.0
+-->
+# speech-sdk plugin for LiveKit Agents
+
+The Agents Framework is designed for building realtime, programmable
+participants that run on servers. Use it to create conversational, multi-modal
+voice agents that can see, hear, and understand.
+
+This package contains the [speech-sdk](https://github.com/Jellypod-Inc/speech-sdk)
+plugin, which allows for voice synthesis across 15 TTS providers through a
+single `provider/model` string, including providers without a dedicated
+LiveKit plugin (Murf, Smallest.ai, and fal.ai-hosted open-weight models such
+as Kokoro):
+
+```ts
+import * as speechsdk from '@livekit/agents-plugin-speechsdk';
+
+const tts = new speechsdk.TTS({ model: 'openai/gpt-4o-mini-tts', voice: 'alloy' });
+// or: { model: 'murf/FALCON', voice: 'en-US-amara' }
+// or: { model: 'fal-ai/kokoro/american-english', voice: 'af_heart' }
+```
+
+Calls go directly to the selected provider using your own API key from the
+provider's standard environment variable (`OPENAI_API_KEY`, `MURF_API_KEY`,
+and so on). Optionally, setting `SPEECHBASE_API_KEY` routes the same
+`provider/model` strings through [speechbase.ai](https://speechbase.ai), a
+hosted gateway, so one key covers every provider; without it, calls go
+directly to the provider.
+
+Synthesis is non-streaming (`AgentSession` wraps it in a sentence-level
+`StreamAdapter` automatically). For latency-critical production agents, a
+dedicated provider plugin with native WebSocket streaming remains the better
+choice when one exists; this plugin is useful for evaluating providers and
+for reaching providers without a dedicated plugin. Output is delivered as raw
+16-bit little-endian PCM (24 kHz by default; other native rates are
+resampled).
+
+See the [repository](https://github.com/livekit/agents-js) for more information
+about the framework as a whole.
diff --git a/plugins/speechsdk/api-extractor.json b/plugins/speechsdk/api-extractor.json
@@ -0,0 +1,5 @@
+{
+    "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
+    "extends": "../../api-extractor-shared.json",
+    "mainEntryPointFilePath": "./dist/index.d.ts"
+}
diff --git a/plugins/speechsdk/package.json b/plugins/speechsdk/package.json
@@ -0,0 +1,52 @@
+{
+  "name": "@livekit/agents-plugin-speechsdk",
+  "version": "1.4.5",
+  "description": "speech-sdk multi-provider TTS plugin for LiveKit Node Agents",
+  "main": "dist/index.js",
+  "require": "dist/index.cjs",
+  "types": "dist/index.d.ts",
+  "exports": {
+    "import": {
+      "types": "./dist/index.d.ts",
+      "default": "./dist/index.js"
+    },
+    "require": {
+      "types": "./dist/index.d.cts",
+      "default": "./dist/index.cjs"
+    }
+  },
+  "author": "LiveKit",
+  "type": "module",
+  "repository": "git@github.com:livekit/agents-js.git",
+  "license": "Apache-2.0",
+  "files": [
+    "dist",
+    "src",
+    "README.md"
+  ],
+  "scripts": {
+    "build": "tsup --onSuccess \"pnpm build:types\"",
+    "build:types": "tsc --declaration --emitDeclarationOnly && node ../../scripts/copyDeclarationOutput.js",
+    "clean": "rm -rf dist",
+    "clean:build": "pnpm clean && pnpm build",
+    "lint": "eslint -f unix \"src/**/*.{ts,js}\"",
+    "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
+    "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
+  },
+  "dependencies": {
+    "@speech-sdk/core": "^0.14.0"
+  },
+  "devDependencies": {
+    "@livekit/agents": "workspace:*",
+    "@livekit/agents-plugin-openai": "workspace:*",
+    "@livekit/agents-plugins-test": "workspace:*",
+    "@livekit/rtc-node": "catalog:",
+    "@microsoft/api-extractor": "^7.35.0",
+    "tsup": "^8.3.5",
+    "typescript": "^5.0.0"
+  },
+  "peerDependencies": {
+    "@livekit/agents": "workspace:*",
+    "@livekit/rtc-node": "catalog:"
+  }
+}
diff --git a/plugins/speechsdk/src/index.ts b/plugins/speechsdk/src/index.ts
@@ -0,0 +1,19 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { Plugin } from '@livekit/agents';
+
+export * from './models.js';
+export * from './tts.js';
+
+class SpeechSDKPlugin extends Plugin {
+  constructor() {
+    super({
+      title: 'speechsdk',
+      version: __PACKAGE_VERSION__,
+      package: __PACKAGE_NAME__,
+    });
+  }
+}
+
+Plugin.registerPlugin(new SpeechSDKPlugin());
diff --git a/plugins/speechsdk/src/models.ts b/plugins/speechsdk/src/models.ts
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * TTS providers supported by speech-sdk, used as the prefix of a `provider/model` string.
+ */
+export type TTSProviders =
+  | 'cartesia'
+  | 'deepgram'
+  | 'elevenlabs'
+  | 'fal-ai'
+  | 'fish-audio'
+  | 'google'
+  | 'hume'
+  | 'inworld'
+  | 'minimax'
+  | 'mistral'
+  | 'murf'
+  | 'openai'
+  | 'resemble'
+  | 'smallest-ai'
+  | 'xai';
+
+/**
+ * A `provider/model` string, e.g. `openai/gpt-4o-mini-tts` or `elevenlabs/eleven_flash_v2_5`.
+ */
+export type TTSModels = `${TTSProviders}/${string}`;
diff --git a/plugins/speechsdk/src/tts.test.ts b/plugins/speechsdk/src/tts.test.ts
@@ -0,0 +1,39 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { STT } from '@livekit/agents-plugin-openai';
+import { tts } from '@livekit/agents-plugins-test';
+import { describe, expect, it } from 'vitest';
+import { TTS } from './tts.js';
+
+describe('SpeechSDK TTS model strings', () => {
+  it('rejects a model without a provider prefix', () => {
+    expect(() => new TTS({ model: 'gpt-4o-mini-tts', speechbaseApiKey: undefined })).toThrow(
+      /provider\/model/,
+    );
+  });
+
+  it('rejects an unknown provider prefix', () => {
+    expect(() => new TTS({ model: 'acme/some-model', speechbaseApiKey: undefined })).toThrow(
+      /Unknown speech-sdk provider/,
+    );
+  });
+
+  it('splits path-style model ids on the first slash only', () => {
+    const instance = new TTS({ model: 'fal-ai/kokoro/american-english' });
+    expect(instance.provider).toEqual('fal-ai');
+    expect(instance.model).toEqual('fal-ai/kokoro/american-english');
+  });
+});
+
+const hasOpenAIKey = Boolean(process.env.OPENAI_API_KEY);
+
+if (hasOpenAIKey) {
+  describe('SpeechSDK TTS', async () => {
+    await tts(new TTS(), new STT({ useRealtime: false, model: 'whisper-1' }), { streaming: false });
+  });
+} else {
+  describe('SpeechSDK TTS', () => {
+    it.skip('requires OPENAI_API_KEY', () => {});
+  });
+}