From 3a551eceb7e0ca2a1e4a36a93fd07206c37a7524 Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Fri, 20 Mar 2026 11:49:52 +0530 Subject: [PATCH 1/2] feat(voice_agent): add Azure TTS and STT provider support --- .../client/src/config/voice-providers.ts | 83 ++++++++++++++++++- .../stt-configs/CreateSttConfigDialog.tsx | 41 +++++++-- .../stt-configs/EditSttConfigDialog.tsx | 29 ++++++- .../tts-configs/CreateTtsConfigDialog.tsx | 43 ++++++++-- .../tts-configs/EditTtsConfigDialog.tsx | 29 ++++++- wavefront/client/src/types/stt-config.ts | 11 ++- wavefront/client/src/types/tts-config.ts | 13 ++- .../services/pipecat_service.py | 2 + .../call_processing/services/stt_service.py | 42 +++++++++- .../call_processing/services/tts_service.py | 71 +++++++++++++++- ...3b06cfe7f_add_region_to_tts_stt_configs.py | 33 ++++++++ .../db_repo_module/models/stt_config.py | 1 + .../db_repo_module/models/tts_config.py | 1 + .../controllers/stt_config_controller.py | 3 + .../controllers/tts_config_controller.py | 3 + .../voice_agents_module/models/stt_schemas.py | 5 ++ .../voice_agents_module/models/tts_schemas.py | 5 ++ .../services/stt_config_service.py | 3 + .../services/tts_config_service.py | 3 + 19 files changed, 396 insertions(+), 25 deletions(-) create mode 100644 wavefront/server/modules/db_repo_module/db_repo_module/alembic/versions/2026_03_19_1716-c153b06cfe7f_add_region_to_tts_stt_configs.py diff --git a/wavefront/client/src/config/voice-providers.ts b/wavefront/client/src/config/voice-providers.ts index 66592821..6c679b17 100644 --- a/wavefront/client/src/config/voice-providers.ts +++ b/wavefront/client/src/config/voice-providers.ts @@ -44,7 +44,7 @@ export interface VoiceProvidersConfig { */ export const VOICE_PROVIDERS_CONFIG: VoiceProvidersConfig = { tts: { - providers: ['elevenlabs', 'deepgram', 'cartesia', 'sarvam'] as const, + providers: ['elevenlabs', 'deepgram', 'cartesia', 'sarvam', 'azure'] as const, configs: { elevenlabs: { name: 'ElevenLabs', @@ -217,10 +217,61 @@ export const VOICE_PROVIDERS_CONFIG: VoiceProvidersConfig = { }, }, }, + azure: { + name: 'Azure', + badge: { + bg: 'bg-sky-100', + text: 'text-sky-700', + }, + parameters: { + style: { + type: 'string' as const, + default: '', + description: 'Speaking style (e.g. cheerful, sad, angry)', + placeholder: 'cheerful', + }, + rate: { + type: 'string' as const, + default: '', + description: 'Speech rate (e.g. +10%, fast, slow)', + placeholder: '+0%', + }, + pitch: { + type: 'string' as const, + default: '', + description: 'Pitch adjustment (e.g. +0Hz, high, low)', + placeholder: '+0Hz', + }, + role: { + type: 'string' as const, + default: '', + description: 'Voice role for expression (e.g. YoungAdultFemale)', + placeholder: 'YoungAdultFemale', + }, + style_degree: { + type: 'string' as const, + default: '', + description: 'Intensity of speaking style (0.01 to 2.0)', + placeholder: '1.0', + }, + volume: { + type: 'string' as const, + default: '', + description: 'Volume level (e.g. +20%, loud, x-soft)', + placeholder: '+0%', + }, + sample_rate: { + type: 'number' as const, + default: undefined, + description: 'Audio sample rate in Hz', + placeholder: '16000', + }, + }, + }, }, }, stt: { - providers: ['deepgram', 'sarvam', 'elevenlabs'] as const, + providers: ['deepgram', 'sarvam', 'elevenlabs', 'azure'] as const, configs: { deepgram: { name: 'Deepgram', @@ -352,6 +403,34 @@ export const VOICE_PROVIDERS_CONFIG: VoiceProvidersConfig = { }, }, }, + azure: { + name: 'Azure', + badge: { + bg: 'bg-sky-100', + text: 'text-sky-700', + }, + parameters: { + endpoint_id: { + type: 'string' as const, + default: '', + description: 'Custom model endpoint ID (optional)', + placeholder: '', + }, + sample_rate: { + type: 'number' as const, + default: undefined, + description: 'Audio sample rate in Hz', + placeholder: '8000', + }, + ttfs_p99_latency: { + type: 'number' as const, + default: undefined, + description: 'P99 latency threshold in seconds for first speech detection', + placeholder: '1.5', + step: 0.1, + }, + }, + }, }, }, }; diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx index dd9e48c9..c147fd44 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx @@ -30,12 +30,18 @@ import React, { useEffect, useState } from 'react'; import { useForm } from 'react-hook-form'; import { z } from 'zod'; -const createSttConfigSchema = z.object({ - display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'), - description: z.string().max(500, 'Description must be 500 characters or less').optional(), - provider: z.enum(['deepgram', 'sarvam', 'elevenlabs'] as [string, ...string[]]), - api_key: z.string().min(1, 'API key is required'), -}); +const createSttConfigSchema = z + .object({ + display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'), + description: z.string().max(500, 'Description must be 500 characters or less').optional(), + provider: z.enum(['deepgram', 'sarvam', 'elevenlabs', 'azure'] as [string, ...string[]]), + api_key: z.string().min(1, 'API key is required'), + region: z.string().optional(), + }) + .refine((data) => data.provider !== 'azure' || (data.region && data.region.trim().length > 0), { + message: 'Region is required for Azure', + path: ['region'], + }); type CreateSttConfigInput = z.infer; @@ -56,9 +62,12 @@ const CreateSttConfigDialog: React.FC = ({ isOpen, o description: '', provider: 'deepgram', api_key: '', + region: '', }, }); + const selectedProvider = form.watch('provider'); + // Reset form when dialog closes useEffect(() => { if (!isOpen) { @@ -67,6 +76,7 @@ const CreateSttConfigDialog: React.FC = ({ isOpen, o description: '', provider: 'deepgram', api_key: '', + region: '', }); } }, [isOpen, form]); @@ -79,6 +89,7 @@ const CreateSttConfigDialog: React.FC = ({ isOpen, o description: data.description?.trim() || null, provider: data.provider as SttProvider, api_key: data.api_key.trim(), + region: data.region?.trim() || null, }); notifySuccess('STT configuration created successfully'); onSuccess?.(); @@ -186,6 +197,24 @@ const CreateSttConfigDialog: React.FC = ({ isOpen, o )} /> + {selectedProvider === 'azure' && ( + ( + + + Region * + + + + + + + )} + /> + )} + Security Note: API keys are stored securely and never returned in API responses. diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/EditSttConfigDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/EditSttConfigDialog.tsx index 8213a558..c4eabf5a 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/EditSttConfigDialog.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/EditSttConfigDialog.tsx @@ -33,8 +33,9 @@ import { z } from 'zod'; const updateSttConfigSchema = z.object({ display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'), description: z.string().max(500, 'Description must be 500 characters or less').optional(), - provider: z.enum(['deepgram', 'sarvam', 'elevenlabs'] as [string, ...string[]]), + provider: z.enum(['deepgram', 'sarvam', 'elevenlabs', 'azure'] as [string, ...string[]]), api_key: z.string().optional(), + region: z.string().optional(), }); type UpdateSttConfigInput = z.infer; @@ -57,9 +58,12 @@ const EditSttConfigDialog: React.FC = ({ isOpen, onOpe description: config.description || '', provider: config.provider, api_key: '', + region: config.region || '', }, }); + const selectedProvider = form.watch('provider'); + // Reset form when dialog opens or config changes useEffect(() => { if (isOpen && config) { @@ -68,6 +72,7 @@ const EditSttConfigDialog: React.FC = ({ isOpen, onOpe description: config.description || '', provider: config.provider, api_key: '', + region: config.region || '', }); } }, [isOpen, config, form]); @@ -84,6 +89,10 @@ const EditSttConfigDialog: React.FC = ({ isOpen, onOpe updateData.api_key = data.api_key.trim(); } + if (data.region !== undefined) { + updateData.region = data.region?.trim() || null; + } + await floConsoleService.sttConfigService.updateSttConfig(config.id, updateData); notifySuccess('STT configuration updated successfully'); onSuccess?.(); @@ -196,6 +205,24 @@ const EditSttConfigDialog: React.FC = ({ isOpen, onOpe )} /> + {selectedProvider === 'azure' && ( + ( + + + Region * + + + + + + + )} + /> + )} +