From 96ee459db7eb635ef9f0312f31ce3654af18e087 Mon Sep 17 00:00:00 2001 From: Bilal Tahir Date: Thu, 11 Jun 2026 00:40:29 +0000 Subject: [PATCH] feat(speechsdk): add speech-sdk multi-provider TTS plugin New @livekit/agents-plugin-speechsdk package: non-streaming TTS across 15 providers through one provider/model string, including providers without a dedicated plugin (Murf, Smallest.ai, fal.ai-hosted open-weight models). Synthesis requests raw PCM and resamples to the configured frame rate with AudioResampler when a provider's native rate differs. speech-sdk's internal retry is disabled so the framework's ChunkedStream retry policy owns retries. Co-Authored-By: Claude Fable 5 --- .changeset/speechsdk-tts-plugin.md | 5 + CLAUDE.md | 2 +- README.md | 1 + plugins/speechsdk/README.md | 42 ++++ plugins/speechsdk/api-extractor.json | 5 + plugins/speechsdk/package.json | 52 +++++ plugins/speechsdk/src/index.ts | 19 ++ plugins/speechsdk/src/models.ts | 28 +++ plugins/speechsdk/src/tts.test.ts | 39 ++++ plugins/speechsdk/src/tts.ts | 278 +++++++++++++++++++++++++++ plugins/speechsdk/tsconfig.json | 14 ++ plugins/speechsdk/tsup.config.ts | 7 + pnpm-lock.yaml | 81 ++++++++ turbo.json | 1 + 14 files changed, 573 insertions(+), 1 deletion(-) create mode 100644 .changeset/speechsdk-tts-plugin.md create mode 100644 plugins/speechsdk/README.md create mode 100644 plugins/speechsdk/api-extractor.json create mode 100644 plugins/speechsdk/package.json create mode 100644 plugins/speechsdk/src/index.ts create mode 100644 plugins/speechsdk/src/models.ts create mode 100644 plugins/speechsdk/src/tts.test.ts create mode 100644 plugins/speechsdk/src/tts.ts create mode 100644 plugins/speechsdk/tsconfig.json create mode 100644 plugins/speechsdk/tsup.config.ts diff --git a/.changeset/speechsdk-tts-plugin.md b/.changeset/speechsdk-tts-plugin.md new file mode 100644 index 000000000..8ec015f28 --- /dev/null +++ b/.changeset/speechsdk-tts-plugin.md @@ -0,0 +1,5 @@ +--- +'@livekit/agents-plugin-speechsdk': patch +--- + +Add the speech-sdk TTS plugin: synthesis across 15 providers via one `provider/model` string, including providers without a dedicated plugin (Murf, Smallest.ai, fal.ai-hosted open-weight models). diff --git a/CLAUDE.md b/CLAUDE.md index 14d8b5b82..1c52da0d6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -126,7 +126,7 @@ Plugin capabilities by type: - **LLM**: openai, google, baseten, mistralai - **STT**: deepgram (v1+v2), openai, baseten, sarvam (v1/v2/v3), mistralai, inworld, cartesia -- **TTS**: cartesia, elevenlabs, deepgram, openai, neuphonic, resemble, rime, inworld, baseten, sarvam (v1/v2/v3), mistralai, fishaudio, hume +- **TTS**: cartesia, elevenlabs, deepgram, openai, neuphonic, resemble, rime, inworld, baseten, sarvam (v1/v2/v3), mistralai, fishaudio, hume, speechsdk (multi-provider) - **VAD**: silero (ONNX-based, local) - **EOU/Turn Detection**: livekit (HuggingFace + ONNX) - **Realtime**: openai (+ responses/, ws/ modules), google (beta), xai, phonic diff --git a/README.md b/README.md index 8b7de522f..0b83b9dc0 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ Currently, only the following plugins are supported: | [@livekit/agents-plugin-phonic](https://www.npmjs.com/package/@livekit/agents-plugin-phonic) | Realtime | | [@livekit/agents-plugin-fishaudio](https://www.npmjs.com/package/@livekit/agents-plugin-fishaudio) | TTS | | [@livekit/agents-plugin-hume](https://www.npmjs.com/package/@livekit/agents-plugin-hume) | TTS | +| [@livekit/agents-plugin-speechsdk](https://www.npmjs.com/package/@livekit/agents-plugin-speechsdk) | TTS | ## Docs and guides diff --git a/plugins/speechsdk/README.md b/plugins/speechsdk/README.md new file mode 100644 index 000000000..0dc2f0b18 --- /dev/null +++ b/plugins/speechsdk/README.md @@ -0,0 +1,42 @@ + +# speech-sdk plugin for LiveKit Agents + +The Agents Framework is designed for building realtime, programmable +participants that run on servers. Use it to create conversational, multi-modal +voice agents that can see, hear, and understand. + +This package contains the [speech-sdk](https://github.com/Jellypod-Inc/speech-sdk) +plugin, which allows for voice synthesis across 15 TTS providers through a +single `provider/model` string, including providers without a dedicated +LiveKit plugin (Murf, Smallest.ai, and fal.ai-hosted open-weight models such +as Kokoro): + +```ts +import * as speechsdk from '@livekit/agents-plugin-speechsdk'; + +const tts = new speechsdk.TTS({ model: 'openai/gpt-4o-mini-tts', voice: 'alloy' }); +// or: { model: 'murf/FALCON', voice: 'en-US-amara' } +// or: { model: 'fal-ai/kokoro/american-english', voice: 'af_heart' } +``` + +Calls go directly to the selected provider using your own API key from the +provider's standard environment variable (`OPENAI_API_KEY`, `MURF_API_KEY`, +and so on). Optionally, setting `SPEECHBASE_API_KEY` routes the same +`provider/model` strings through [speechbase.ai](https://speechbase.ai), a +hosted gateway, so one key covers every provider; without it, calls go +directly to the provider. + +Synthesis is non-streaming (`AgentSession` wraps it in a sentence-level +`StreamAdapter` automatically). For latency-critical production agents, a +dedicated provider plugin with native WebSocket streaming remains the better +choice when one exists; this plugin is useful for evaluating providers and +for reaching providers without a dedicated plugin. Output is delivered as raw +16-bit little-endian PCM (24 kHz by default; other native rates are +resampled). + +See the [repository](https://github.com/livekit/agents-js) for more information +about the framework as a whole. diff --git a/plugins/speechsdk/api-extractor.json b/plugins/speechsdk/api-extractor.json new file mode 100644 index 000000000..cf6493161 --- /dev/null +++ b/plugins/speechsdk/api-extractor.json @@ -0,0 +1,5 @@ +{ + "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", + "extends": "../../api-extractor-shared.json", + "mainEntryPointFilePath": "./dist/index.d.ts" +} diff --git a/plugins/speechsdk/package.json b/plugins/speechsdk/package.json new file mode 100644 index 000000000..1f3a3776e --- /dev/null +++ b/plugins/speechsdk/package.json @@ -0,0 +1,52 @@ +{ + "name": "@livekit/agents-plugin-speechsdk", + "version": "1.4.5", + "description": "speech-sdk multi-provider TTS plugin for LiveKit Node Agents", + "main": "dist/index.js", + "require": "dist/index.cjs", + "types": "dist/index.d.ts", + "exports": { + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + }, + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + } + }, + "author": "LiveKit", + "type": "module", + "repository": "git@github.com:livekit/agents-js.git", + "license": "Apache-2.0", + "files": [ + "dist", + "src", + "README.md" + ], + "scripts": { + "build": "tsup --onSuccess \"pnpm build:types\"", + "build:types": "tsc --declaration --emitDeclarationOnly && node ../../scripts/copyDeclarationOutput.js", + "clean": "rm -rf dist", + "clean:build": "pnpm clean && pnpm build", + "lint": "eslint -f unix \"src/**/*.{ts,js}\"", + "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript", + "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose" + }, + "dependencies": { + "@speech-sdk/core": "^0.14.0" + }, + "devDependencies": { + "@livekit/agents": "workspace:*", + "@livekit/agents-plugin-openai": "workspace:*", + "@livekit/agents-plugins-test": "workspace:*", + "@livekit/rtc-node": "catalog:", + "@microsoft/api-extractor": "^7.35.0", + "tsup": "^8.3.5", + "typescript": "^5.0.0" + }, + "peerDependencies": { + "@livekit/agents": "workspace:*", + "@livekit/rtc-node": "catalog:" + } +} diff --git a/plugins/speechsdk/src/index.ts b/plugins/speechsdk/src/index.ts new file mode 100644 index 000000000..8343608aa --- /dev/null +++ b/plugins/speechsdk/src/index.ts @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: 2026 LiveKit, Inc. +// +// SPDX-License-Identifier: Apache-2.0 +import { Plugin } from '@livekit/agents'; + +export * from './models.js'; +export * from './tts.js'; + +class SpeechSDKPlugin extends Plugin { + constructor() { + super({ + title: 'speechsdk', + version: __PACKAGE_VERSION__, + package: __PACKAGE_NAME__, + }); + } +} + +Plugin.registerPlugin(new SpeechSDKPlugin()); diff --git a/plugins/speechsdk/src/models.ts b/plugins/speechsdk/src/models.ts new file mode 100644 index 000000000..c242ed8c8 --- /dev/null +++ b/plugins/speechsdk/src/models.ts @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: 2026 LiveKit, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +/** + * TTS providers supported by speech-sdk, used as the prefix of a `provider/model` string. + */ +export type TTSProviders = + | 'cartesia' + | 'deepgram' + | 'elevenlabs' + | 'fal-ai' + | 'fish-audio' + | 'google' + | 'hume' + | 'inworld' + | 'minimax' + | 'mistral' + | 'murf' + | 'openai' + | 'resemble' + | 'smallest-ai' + | 'xai'; + +/** + * A `provider/model` string, e.g. `openai/gpt-4o-mini-tts` or `elevenlabs/eleven_flash_v2_5`. + */ +export type TTSModels = `${TTSProviders}/${string}`; diff --git a/plugins/speechsdk/src/tts.test.ts b/plugins/speechsdk/src/tts.test.ts new file mode 100644 index 000000000..112c33737 --- /dev/null +++ b/plugins/speechsdk/src/tts.test.ts @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: 2026 LiveKit, Inc. +// +// SPDX-License-Identifier: Apache-2.0 +import { STT } from '@livekit/agents-plugin-openai'; +import { tts } from '@livekit/agents-plugins-test'; +import { describe, expect, it } from 'vitest'; +import { TTS } from './tts.js'; + +describe('SpeechSDK TTS model strings', () => { + it('rejects a model without a provider prefix', () => { + expect(() => new TTS({ model: 'gpt-4o-mini-tts', speechbaseApiKey: undefined })).toThrow( + /provider\/model/, + ); + }); + + it('rejects an unknown provider prefix', () => { + expect(() => new TTS({ model: 'acme/some-model', speechbaseApiKey: undefined })).toThrow( + /Unknown speech-sdk provider/, + ); + }); + + it('splits path-style model ids on the first slash only', () => { + const instance = new TTS({ model: 'fal-ai/kokoro/american-english' }); + expect(instance.provider).toEqual('fal-ai'); + expect(instance.model).toEqual('fal-ai/kokoro/american-english'); + }); +}); + +const hasOpenAIKey = Boolean(process.env.OPENAI_API_KEY); + +if (hasOpenAIKey) { + describe('SpeechSDK TTS', async () => { + await tts(new TTS(), new STT({ useRealtime: false, model: 'whisper-1' }), { streaming: false }); + }); +} else { + describe('SpeechSDK TTS', () => { + it.skip('requires OPENAI_API_KEY', () => {}); + }); +} diff --git a/plugins/speechsdk/src/tts.ts b/plugins/speechsdk/src/tts.ts new file mode 100644 index 000000000..0fff0d6aa --- /dev/null +++ b/plugins/speechsdk/src/tts.ts @@ -0,0 +1,278 @@ +// SPDX-FileCopyrightText: 2026 LiveKit, Inc. +// +// SPDX-License-Identifier: Apache-2.0 +import { + type APIConnectOptions, + APIError, + APIStatusError, + AudioByteStream, + shortuuid, + tts, +} from '@livekit/agents'; +import { AudioFrame, AudioResampler } from '@livekit/rtc-node'; +import type * as speechProviders from '@speech-sdk/core/providers'; +import type { ResolvedModel } from '@speech-sdk/core/types'; +import type { TTSModels, TTSProviders } from './models.js'; + +const SPEECHSDK_TTS_CHANNELS = 1; +const DEFAULT_SAMPLE_RATE = 24000; +const DEFAULT_MODEL: TTSModels = 'openai/gpt-4o-mini-tts'; +const DEFAULT_VOICE = 'alloy'; + +const RETRYABLE_STATUS_CODES = new Set([408, 429]); +const PCM_RATE_REGEX = /rate=(\d+)/; + +type ProvidersModule = typeof speechProviders; +type SpeechModelFactory = (config: { apiKey?: string }) => (modelId: string) => ResolvedModel; + +const PROVIDER_FACTORIES: Record SpeechModelFactory> = { + cartesia: (mod) => mod.createCartesia, + deepgram: (mod) => mod.createDeepgram, + elevenlabs: (mod) => mod.createElevenLabs, + 'fal-ai': (mod) => mod.createFal, + 'fish-audio': (mod) => mod.createFishAudio, + google: (mod) => mod.createGoogle, + hume: (mod) => mod.createHume, + inworld: (mod) => mod.createInworld, + minimax: (mod) => mod.createMiniMax, + mistral: (mod) => mod.createMistral, + murf: (mod) => mod.createMurf, + openai: (mod) => mod.createOpenAI, + resemble: (mod) => mod.createResemble, + 'smallest-ai': (mod) => mod.createSmallestAI, + xai: (mod) => mod.createXai, +}; + +const isKnownProvider = (provider: string): provider is TTSProviders => + provider in PROVIDER_FACTORIES; + +// fal model ids are path-style ("kokoro/american-english"), so split on the first slash only. +const parseModel = (model: string): { provider: string; modelId: string } => { + const slash = model.indexOf('/'); + if (slash <= 0 || slash === model.length - 1) { + throw new Error( + `Invalid speech-sdk model "${model}": expected a "provider/model" string, e.g. "${DEFAULT_MODEL}"`, + ); + } + return { provider: model.slice(0, slash), modelId: model.slice(slash + 1) }; +}; + +export interface TTSOptions { + /** + * Model as a `provider/model` string, e.g. `openai/gpt-4o-mini-tts` or + * `elevenlabs/eleven_flash_v2_5`. The prefix selects which provider API is called. + */ + model: TTSModels | string; + /** Voice ID, as defined by the selected provider. */ + voice: string; + /** Sample rate of emitted frames in Hz. Audio returned at another native rate is resampled. */ + sampleRate: number; + /** + * Provider API key. Defaults to the selected provider's standard environment variable + * (e.g. `OPENAI_API_KEY`, `ELEVENLABS_API_KEY`, `MURF_API_KEY`). + */ + apiKey?: string; + /** + * SpeechBase gateway API key, defaulting to `SPEECHBASE_API_KEY`. When set, `provider/model` + * strings are routed through the speechbase.ai gateway with this single key; when unset, + * calls go directly to the provider with your own key. + */ + speechbaseApiKey?: string; + /** Additional provider-specific request options forwarded to speech-sdk. */ + providerOptions?: Record; +} + +const defaultTTSOptions: TTSOptions = { + model: DEFAULT_MODEL, + voice: DEFAULT_VOICE, + sampleRate: DEFAULT_SAMPLE_RATE, + speechbaseApiKey: process.env.SPEECHBASE_API_KEY, +}; + +const validateModel = (opts: TTSOptions) => { + const { provider } = parseModel(opts.model); + if (!opts.speechbaseApiKey && !isKnownProvider(provider)) { + throw new Error( + `Unknown speech-sdk provider "${provider}", expected one of: ${Object.keys(PROVIDER_FACTORIES).join(', ')}`, + ); + } +}; + +const resolveSpeechModel = async (opts: TTSOptions): Promise => { + if (opts.speechbaseApiKey) { + return opts.model; + } + const { provider, modelId } = parseModel(opts.model); + if (!isKnownProvider(provider)) { + throw new Error(`Unknown speech-sdk provider "${provider}"`); + } + const providers = await import('@speech-sdk/core/providers'); + const factory = PROVIDER_FACTORIES[provider](providers); + return factory(opts.apiKey !== undefined ? { apiKey: opts.apiKey } : {})(modelId); +}; + +const toAPIError = async (error: unknown): Promise => { + const { ApiError: SpeechApiError, SpeechSDKError } = await import('@speech-sdk/core'); + if (error instanceof SpeechApiError) { + return new APIStatusError({ + message: error.message, + options: { + statusCode: error.statusCode, + retryable: RETRYABLE_STATUS_CODES.has(error.statusCode) || error.statusCode >= 500, + }, + }); + } + if (error instanceof SpeechSDKError) { + return new APIError(error.message, { retryable: false }); + } + return error; +}; + +export class TTS extends tts.TTS { + #opts: TTSOptions; + label = 'speechsdk.TTS'; + private abortController = new AbortController(); + + get model(): string { + return this.#opts.model; + } + + get provider(): string { + return parseModel(this.#opts.model).provider; + } + + /** + * Create a new instance of speech-sdk TTS. + * + * @remarks + * The provider's API key must be set in its standard environment variable (e.g. + * `OPENAI_API_KEY` for `openai/...` models) or passed via the `apiKey` option. + */ + constructor(opts: Partial = {}) { + const merged = { ...defaultTTSOptions, ...opts }; + super(merged.sampleRate, SPEECHSDK_TTS_CHANNELS, { streaming: false }); + validateModel(merged); + this.#opts = merged; + } + + updateOptions(opts: { model?: TTSModels | string; voice?: string }) { + const merged = { ...this.#opts, ...opts }; + validateModel(merged); + this.#opts = merged; + } + + synthesize( + text: string, + connOptions?: APIConnectOptions, + abortSignal?: AbortSignal, + ): ChunkedStream { + const signal = abortSignal + ? AbortSignal.any([abortSignal, this.abortController.signal]) + : this.abortController.signal; + return new ChunkedStream(this, text, this.#opts, connOptions, signal); + } + + stream(): tts.SynthesizeStream { + throw new Error('Streaming is not supported on SpeechSDK TTS'); + } + + async close(): Promise { + this.abortController.abort(); + } +} + +export class ChunkedStream extends tts.ChunkedStream { + label = 'speechsdk.ChunkedStream'; + #opts: TTSOptions; + + constructor( + tts: TTS, + text: string, + opts: TTSOptions, + connOptions?: APIConnectOptions, + abortSignal?: AbortSignal, + ) { + super(text, tts, connOptions, abortSignal); + this.#opts = opts; + } + + protected async run() { + try { + const { generateSpeech } = await import('@speech-sdk/core'); + const model = await resolveSpeechModel(this.#opts); + // maxRetries: 0 disables speech-sdk's internal retry; the retry policy in + // tts.ChunkedStream (connOptions) owns retries to avoid multiplying attempts. + const result = await generateSpeech({ + model, + text: this.inputText, + voice: this.#opts.voice, + output: { format: 'pcm' }, + providerOptions: this.#opts.providerOptions, + apiKey: this.#opts.speechbaseApiKey, + maxRetries: 0, + abortSignal: this.abortController.signal, + }); + + const requestId = shortuuid(); + const frames = this.#toFrames(result.audio.uint8Array, result.audio.mediaType); + + let lastFrame: AudioFrame | undefined; + const sendLastFrame = (segmentId: string, final: boolean) => { + if (lastFrame) { + this.queue.put({ requestId, segmentId, frame: lastFrame, final }); + lastFrame = undefined; + } + }; + + for (const frame of frames) { + sendLastFrame(requestId, false); + lastFrame = frame; + } + sendLastFrame(requestId, true); + } catch (error) { + if (this.abortController.signal.aborted) { + return; + } + if (error instanceof Error && error.name === 'AbortError') { + return; + } + throw await toAPIError(error); + } finally { + this.queue.close(); + } + } + + #toFrames(pcm: Uint8Array, mediaType: string): AudioFrame[] { + const rateMatch = PCM_RATE_REGEX.exec(mediaType); + if (!rateMatch) { + throw new APIError(`speech-sdk returned unexpected mediaType "${mediaType}", expected PCM`, { + retryable: false, + }); + } + const nativeRate = Number(rateMatch[1]); + const bstream = new AudioByteStream(this.#opts.sampleRate, SPEECHSDK_TTS_CHANNELS); + + if (nativeRate === this.#opts.sampleRate) { + return [...bstream.write(pcm), ...bstream.flush()]; + } + + const aligned = pcm.byteOffset % 2 === 0 ? pcm : pcm.slice(); + const samples = new Int16Array( + aligned.buffer, + aligned.byteOffset, + Math.floor(aligned.byteLength / 2), + ); + const nativeFrame = new AudioFrame(samples, nativeRate, SPEECHSDK_TTS_CHANNELS, samples.length); + const resampler = new AudioResampler(nativeRate, this.#opts.sampleRate, SPEECHSDK_TTS_CHANNELS); + const frames: AudioFrame[] = []; + try { + for (const resampled of [...resampler.push(nativeFrame), ...resampler.flush()]) { + frames.push(...bstream.write(resampled.data)); + } + } finally { + resampler.close(); + } + frames.push(...bstream.flush()); + return frames; + } +} diff --git a/plugins/speechsdk/tsconfig.json b/plugins/speechsdk/tsconfig.json new file mode 100644 index 000000000..41188b488 --- /dev/null +++ b/plugins/speechsdk/tsconfig.json @@ -0,0 +1,14 @@ +{ + "extends": "../../tsconfig.json", + "include": ["./src"], + "compilerOptions": { + "rootDir": "./src", + "declarationDir": "./dist", + "outDir": "./dist" + }, + "typedocOptions": { + "name": "plugins/agents-plugin-speechsdk", + "entryPointStrategy": "resolve", + "entryPoints": ["src/index.ts"] + } +} diff --git a/plugins/speechsdk/tsup.config.ts b/plugins/speechsdk/tsup.config.ts new file mode 100644 index 000000000..8ca20961f --- /dev/null +++ b/plugins/speechsdk/tsup.config.ts @@ -0,0 +1,7 @@ +import { defineConfig } from 'tsup'; + +import defaults from '../../tsup.config'; + +export default defineConfig({ + ...defaults, +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7a3ebdd0c..dbec21809 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1296,6 +1296,34 @@ importers: specifier: ^5.0.0 version: 5.9.3 + plugins/speechsdk: + dependencies: + '@speech-sdk/core': + specifier: ^0.14.0 + version: 0.14.0 + devDependencies: + '@livekit/agents': + specifier: workspace:* + version: link:../../agents + '@livekit/agents-plugin-openai': + specifier: workspace:* + version: link:../openai + '@livekit/agents-plugins-test': + specifier: workspace:* + version: link:../test + '@livekit/rtc-node': + specifier: 'catalog:' + version: 0.13.29 + '@microsoft/api-extractor': + specifier: ^7.35.0 + version: 7.43.7(@types/node@25.6.0) + tsup: + specifier: ^8.3.5 + version: 8.4.0(@microsoft/api-extractor@7.43.7(@types/node@25.6.0))(postcss@8.5.9)(tsx@4.21.0)(typescript@5.9.3) + typescript: + specifier: ^5.0.0 + version: 5.9.3 + plugins/tavus: dependencies: livekit-server-sdk: @@ -2217,6 +2245,11 @@ packages: '@manypkg/get-packages@1.1.3': resolution: {integrity: sha512-fo+QhuU3qE/2TQMQmbVMqaQ6EWbMhi4ABWP+O4AM1NqPBuy0OrApV5LO6BrrgnhtAHS2NH6RrVk9OL181tTi8A==} + '@mediabunny/mp3-encoder@1.46.0': + resolution: {integrity: sha512-SG69nm0ntRR4yc5TP9LG1/oxrh5PcoIEZwFm8BXecj3xNF0V+PlnpyJ1IQVS/JEXRIOvBJQ09YFknVGnJNAJrQ==} + peerDependencies: + mediabunny: ^1.0.0 + '@microsoft/api-extractor-model@7.28.17': resolution: {integrity: sha512-b2AfLP33oEVtWLeNavSBRdyDa8sKlXjN4pdhBnC4HLontOtjILhL1ERAmZObF4PWSyChnnC2vjb47C9WKCFRGg==} @@ -2652,6 +2685,9 @@ packages: '@rushstack/ts-command-line@4.21.0': resolution: {integrity: sha512-z38FLUCn8M9FQf19gJ9eltdwkvc47PxvJmVZS6aKwbBAa3Pis3r3A+ZcBCVPNb9h/Tbga+i0tHdzoSGUoji9GQ==} + '@speech-sdk/core@0.14.0': + resolution: {integrity: sha512-oxVzxpV2eQ9Y8I/tJCUulD3FYf8f4y4WM//eyz7MPPuhnawpAv1j54v49EW5XoxlEOPHJaDf1+Vwmr5X5gnk1Q==} + '@standard-schema/spec@1.1.0': resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==} @@ -2673,6 +2709,12 @@ packages: '@types/deep-eql@4.0.2': resolution: {integrity: sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==} + '@types/dom-mediacapture-transform@0.1.11': + resolution: {integrity: sha512-Y2p+nGf1bF2XMttBnsVPHUWzRRZzqUoJAKmiP10b5umnO6DDrWI0BrGDJy1pOHoOULVmGSfFNkQrAlC5dcj6nQ==} + + '@types/dom-webcodecs@0.1.13': + resolution: {integrity: sha512-O5hkiFIcjjszPIYyUSyvScyvrBoV3NOEEZx/pMlsu44TKzWNkLVBBxnxJz42in5n3QIolYOcBYFCPZZ0h8SkwQ==} + '@types/estree@1.0.8': resolution: {integrity: sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==} @@ -3861,6 +3903,10 @@ packages: resolution: {integrity: sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw==} engines: {node: '>= 0.4'} + is-network-error@1.3.2: + resolution: {integrity: sha512-PhBY86zaxNZUuWP6h13Vu5oFe0XY6/UlKzQnYFELzGVHygP3MxmvTfYSG7GN3aIab/iWudSMgjSnG9Dq+nHrgA==} + engines: {node: '>=16'} + is-number-object@1.0.7: resolution: {integrity: sha512-k1U0IRzLMo7ZlYIfzRu23Oh6MiIFasgpb9X76eqfFZAqwH44UI4KTBvBYIZ1dSL9ZzChTB9ShHfLkR4pdW5krQ==} engines: {node: '>= 0.4'} @@ -4131,6 +4177,9 @@ packages: resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} engines: {node: '>= 0.4'} + mediabunny@1.46.0: + resolution: {integrity: sha512-HIaMQ6PVMndrFb9sNCh9/uAsLFBfLzyAgbk3kN7ZqlRRq//9RPYqIOQtXYpkbWBZU8ZJkaAUNLDtwTcdKo7yQw==} + merge2@1.4.1: resolution: {integrity: sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==} engines: {node: '>= 8'} @@ -4322,6 +4371,10 @@ packages: resolution: {integrity: sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==} engines: {node: '>=8'} + p-retry@8.0.0: + resolution: {integrity: sha512-kFVqH1HxOHp8LupNsOys7bSV09VYTRLxarH/mokO4Rqhk6wGi70E0jh4VzvVGXfEVNggHoHLAMWsQqHyU1Ey9A==} + engines: {node: '>=22'} + p-try@2.2.0: resolution: {integrity: sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==} engines: {node: '>=6'} @@ -5920,6 +5973,10 @@ snapshots: globby: 11.1.0 read-yaml-file: 1.1.0 + '@mediabunny/mp3-encoder@1.46.0(mediabunny@1.46.0)': + dependencies: + mediabunny: 1.46.0 + '@microsoft/api-extractor-model@7.28.17(@types/node@22.19.1)': dependencies: '@microsoft/tsdoc': 0.14.2 @@ -6425,6 +6482,13 @@ snapshots: transitivePeerDependencies: - '@types/node' + '@speech-sdk/core@0.14.0': + dependencies: + '@mediabunny/mp3-encoder': 1.46.0(mediabunny@1.46.0) + mediabunny: 1.46.0 + p-retry: 8.0.0 + zod: 4.3.6 + '@standard-schema/spec@1.1.0': {} '@trivago/prettier-plugin-sort-imports@4.3.0(prettier@3.2.5)': @@ -6448,6 +6512,12 @@ snapshots: '@types/deep-eql@4.0.2': {} + '@types/dom-mediacapture-transform@0.1.11': + dependencies: + '@types/dom-webcodecs': 0.1.13 + + '@types/dom-webcodecs@0.1.13': {} + '@types/estree@1.0.8': {} '@types/estree@1.0.9': {} @@ -7871,6 +7941,8 @@ snapshots: is-negative-zero@2.0.3: {} + is-network-error@1.3.2: {} + is-number-object@1.0.7: dependencies: has-tostringtag: 1.0.2 @@ -8115,6 +8187,11 @@ snapshots: math-intrinsics@1.1.0: {} + mediabunny@1.46.0: + dependencies: + '@types/dom-mediacapture-transform': 0.1.11 + '@types/dom-webcodecs': 0.1.13 + merge2@1.4.1: {} micromatch@4.0.8: @@ -8307,6 +8384,10 @@ snapshots: '@types/retry': 0.12.0 retry: 0.13.1 + p-retry@8.0.0: + dependencies: + is-network-error: 1.3.2 + p-try@2.2.0: {} package-json-from-dist@1.0.1: {} diff --git a/turbo.json b/turbo.json index c3b1e3ee6..56986853f 100644 --- a/turbo.json +++ b/turbo.json @@ -95,6 +95,7 @@ "MINIMAX_API_KEY", "MINIMAX_BASE_URL", "MISTRAL_API_KEY", + "SPEECHBASE_API_KEY", "VITEST" ], "pipeline": {