Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@ const createVoiceAgentSchema = z.object({
tts_config_id: z.string().min(1, 'TTS configuration is required'),
stt_config_id: z.string().min(1, 'STT configuration is required'),
telephony_config_id: z.string().min(1, 'Telephony configuration is required'),
tts_voice_id: z.string().min(1, 'TTS Voice ID is required'),
tts_voice_ids: z
.record(z.string(), z.string().min(1, 'Voice ID must not be empty'))
.refine((val) => Object.keys(val).length > 0, {
message: 'At least one voice ID is required',
}),
system_prompt: z.string().min(1, 'System prompt is required'),
welcome_message: z.string().min(1, 'Welcome message is required'),
conversation_config: z.string().optional(),
Expand All @@ -83,6 +87,7 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
const [creating, setCreating] = useState(false);
const [ttsParameters, setTtsParameters] = useState<Record<string, unknown>>({});
const [sttParameters, setSttParameters] = useState<Record<string, unknown>>({});
const [voiceIdState, setVoiceIdState] = useState<Record<string, string>>({ en: '' });

// Fetch configs for dropdowns
const { data: llmConfigs = [] } = useGetLLMConfigs(appId);
Expand All @@ -99,7 +104,7 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
tts_config_id: '',
stt_config_id: '',
telephony_config_id: '',
tts_voice_id: '',
tts_voice_ids: { en: '' },
system_prompt: '',
welcome_message: '',
conversation_config: '{}',
Expand All @@ -114,6 +119,7 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
// Watch config selections to determine providers
const watchedTtsConfigId = form.watch('tts_config_id');
const watchedSttConfigId = form.watch('stt_config_id');
const watchedSupportedLanguages = form.watch('supported_languages');

// Get selected providers
const selectedTtsProvider = ttsConfigs.find((c) => c.id === watchedTtsConfigId)?.provider;
Expand All @@ -132,6 +138,21 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
}
}, [selectedSttProvider, isOpen]);

// Sync voice ID state with language changes
useEffect(() => {
if (isOpen && watchedSupportedLanguages) {
setVoiceIdState((prev) => {
const newState: Record<string, string> = {};
// Preserve existing voice IDs for languages still selected
watchedSupportedLanguages.forEach((lang) => {
newState[lang] = prev[lang] || '';
});
form.setValue('tts_voice_ids', newState);
return newState;
});
}
}, [watchedSupportedLanguages, isOpen]);

// Reset form when dialog closes
useEffect(() => {
if (!isOpen) {
Expand All @@ -142,7 +163,7 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
tts_config_id: '',
stt_config_id: '',
telephony_config_id: '',
tts_voice_id: '',
tts_voice_ids: { en: '' },
system_prompt: '',
welcome_message: '',
conversation_config: '{}',
Expand All @@ -154,6 +175,7 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
});
setTtsParameters({});
setSttParameters({});
setVoiceIdState({ en: '' });
}
}, [isOpen, form]);

Expand Down Expand Up @@ -232,7 +254,7 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
tts_config_id: data.tts_config_id.trim(),
stt_config_id: data.stt_config_id.trim(),
telephony_config_id: data.telephony_config_id.trim(),
tts_voice_id: data.tts_voice_id.trim(),
tts_voice_ids: data.tts_voice_ids,
tts_parameters: Object.keys(builtTtsParameters).length > 0 ? builtTtsParameters : null,
stt_parameters: Object.keys(builtSttParameters).length > 0 ? builtSttParameters : null,
system_prompt: data.system_prompt.trim(),
Expand Down Expand Up @@ -710,17 +732,31 @@ const CreateVoiceAgentDialog: React.FC<CreateVoiceAgentDialogProps> = ({ isOpen,
<h4 className="text-sm font-medium">TTS Voice Settings</h4>
<FormField
control={form.control}
name="tts_voice_id"
name="tts_voice_ids"
render={({ field }) => (
<FormItem>
<FormLabel>
TTS Voice ID<span className="text-red-500">*</span>
TTS Voice IDs<span className="text-red-500">*</span>
</FormLabel>
<FormControl>
<Input placeholder="e.g., alloy, echo, fable (OpenAI) or voice ID (ElevenLabs)" {...field} />
</FormControl>
<div className="space-y-3">
{watchedSupportedLanguages.map((langCode) => (
<div key={langCode} className="flex items-center gap-3">
<Label className="w-24 text-sm font-medium">{getLanguageDisplayName(langCode)}:</Label>
<Input
placeholder={`Voice ID for ${getLanguageDisplayName(langCode)}`}
value={voiceIdState[langCode] || ''}
onChange={(e) => {
const newState = { ...voiceIdState, [langCode]: e.target.value };
setVoiceIdState(newState);
field.onChange(newState);
}}
className="flex-1"
/>
</div>
))}
</div>
<FormDescription>
Provider-specific voice identifier (e.g., for Deepgram: aura-2-helena-en)
Provider-specific voice identifiers per language (e.g., "aura-2-helena-en" for Deepgram)
</FormDescription>
<FormMessage />
</FormItem>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ const updateVoiceAgentSchema = z.object({
tts_config_id: z.string().min(1, 'TTS configuration is required'),
stt_config_id: z.string().min(1, 'STT configuration is required'),
telephony_config_id: z.string().min(1, 'Telephony configuration is required'),
tts_voice_id: z.string().min(1, 'TTS Voice ID is required'),
tts_voice_ids: z.record(z.string(), z.string()).optional(),
system_prompt: z.string().min(1, 'System prompt is required'),
welcome_message: z.string().min(1, 'Welcome message is required'),
conversation_config: z.string().optional(),
Expand Down Expand Up @@ -92,6 +92,7 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
// State for TTS/STT parameters (managed separately from form)
const [ttsParameters, setTtsParameters] = useState<Record<string, unknown>>({});
const [sttParameters, setSttParameters] = useState<Record<string, unknown>>({});
const [voiceIdState, setVoiceIdState] = useState<Record<string, string>>(agent.tts_voice_ids || { en: '' });

const form = useForm<UpdateVoiceAgentInput>({
resolver: zodResolver(updateVoiceAgentSchema),
Expand All @@ -102,7 +103,7 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
tts_config_id: agent.tts_config_id,
stt_config_id: agent.stt_config_id,
telephony_config_id: agent.telephony_config_id,
tts_voice_id: agent.tts_voice_id,
tts_voice_ids: agent.tts_voice_ids,
system_prompt: agent.system_prompt,
welcome_message: agent.welcome_message,
conversation_config: agent.conversation_config ? JSON.stringify(agent.conversation_config, null, 2) : '{}',
Expand All @@ -117,6 +118,7 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
// Watch for config changes to determine providers
const watchedTtsConfigId = form.watch('tts_config_id');
const watchedSttConfigId = form.watch('stt_config_id');
const watchedSupportedLanguages = form.watch('supported_languages');

const selectedTtsProvider = ttsConfigs.find((c) => c.id === watchedTtsConfigId)?.provider;
const selectedSttProvider = sttConfigs.find((c) => c.id === watchedSttConfigId)?.provider;
Expand All @@ -134,7 +136,7 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
tts_config_id: agent.tts_config_id,
stt_config_id: agent.stt_config_id,
telephony_config_id: agent.telephony_config_id,
tts_voice_id: agent.tts_voice_id,
tts_voice_ids: agent.tts_voice_ids,
system_prompt: agent.system_prompt,
welcome_message: agent.welcome_message,
conversation_config: agent.conversation_config ? JSON.stringify(agent.conversation_config, null, 2) : '{}',
Expand All @@ -144,6 +146,7 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
supported_languages: agent.supported_languages || ['en'],
default_language: agent.default_language || 'en',
});
setVoiceIdState(agent.tts_voice_ids || { en: '' });
}
}, [isOpen, agent, form]);

Expand All @@ -161,6 +164,21 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
}
}, [isOpen, agent.stt_parameters]);

// Sync voice ID state with language changes
useEffect(() => {
if (isOpen && watchedSupportedLanguages) {
setVoiceIdState((prev) => {
const newState: Record<string, string> = {};
// Preserve existing voice IDs for languages still selected
watchedSupportedLanguages.forEach((lang) => {
newState[lang] = prev[lang] || '';
});
form.setValue('tts_voice_ids', newState);
return newState;
});
}
}, [watchedSupportedLanguages, isOpen]);

// Helper functions to update parameters
const setTtsParameter = (key: string, value: unknown) => {
setTtsParameters((prev) => ({ ...prev, [key]: value }));
Expand Down Expand Up @@ -266,8 +284,8 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
requestData.telephony_config_id = data.telephony_config_id;
}

if (data.tts_voice_id.trim() !== agent.tts_voice_id) {
requestData.tts_voice_id = data.tts_voice_id.trim();
if (JSON.stringify(data.tts_voice_ids) !== JSON.stringify(agent.tts_voice_ids)) {
requestData.tts_voice_ids = data.tts_voice_ids;
}

// Check if TTS parameters changed
Expand Down Expand Up @@ -757,17 +775,31 @@ const EditVoiceAgentDialog: React.FC<EditVoiceAgentDialogProps> = ({
<h4 className="text-sm font-medium">TTS Voice Settings</h4>
<FormField
control={form.control}
name="tts_voice_id"
name="tts_voice_ids"
render={({ field }) => (
<FormItem>
<FormLabel>
TTS Voice ID<span className="text-red-500">*</span>
TTS Voice IDs<span className="text-red-500">*</span>
</FormLabel>
<FormControl>
<Input placeholder="e.g., alloy, echo, fable (OpenAI) or voice ID (ElevenLabs)" {...field} />
</FormControl>
<div className="space-y-3">
{watchedSupportedLanguages.map((langCode) => (
<div key={langCode} className="flex items-center gap-3">
<Label className="w-24 text-sm font-medium">{getLanguageDisplayName(langCode)}:</Label>
<Input
placeholder={`Voice ID for ${getLanguageDisplayName(langCode)}`}
value={voiceIdState[langCode] || ''}
onChange={(e) => {
const newState = { ...voiceIdState, [langCode]: e.target.value };
setVoiceIdState(newState);
field.onChange(newState);
}}
className="flex-1"
/>
</div>
))}
</div>
<FormDescription>
Provider-specific voice identifier (e.g., for Deepgram: aura-2-helena-en)
Provider-specific voice identifiers per language (e.g., "aura-2-helena-en" for Deepgram)
</FormDescription>
<FormMessage />
</FormItem>
Expand Down
6 changes: 3 additions & 3 deletions wavefront/client/src/types/voice-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export interface VoiceAgent {
telephony_config_id: string;
system_prompt: string;
welcome_message: string;
tts_voice_id: string;
tts_voice_ids: Record<string, string>;
tts_parameters: Record<string, unknown> | null;
stt_parameters: Record<string, unknown> | null;
conversation_config: Record<string, unknown> | null;
Expand All @@ -39,7 +39,7 @@ export interface CreateVoiceAgentRequest {
telephony_config_id: string;
system_prompt: string;
welcome_message: string;
tts_voice_id: string;
tts_voice_ids: Record<string, string>;
tts_parameters?: Record<string, unknown> | null;
stt_parameters?: Record<string, unknown> | null;
conversation_config?: Record<string, unknown> | null;
Expand All @@ -63,7 +63,7 @@ export interface UpdateVoiceAgentRequest {
telephony_config_id?: string;
system_prompt?: string;
welcome_message?: string;
tts_voice_id?: string;
tts_voice_ids?: Record<string, string>;
tts_parameters?: Record<string, unknown> | null;
stt_parameters?: Record<string, unknown> | null;
conversation_config?: Record<string, unknown> | null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
from pipecat.runner.types import WebSocketRunnerArguments
from pipecat.runner.utils import parse_telephony_websocket

# from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
# from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
from pipecat.serializers.twilio import TwilioFrameSerializer
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
Expand Down Expand Up @@ -119,6 +117,8 @@ async def inbound_webhook(

# Pass parameters to WebSocket stream
stream.parameter(name='voice_agent_id', value=agent_id)
stream.parameter(name='customer_number', value=From)
stream.parameter(name='agent_number', value=To)

connect.append(stream)
response.append(connect)
Expand All @@ -134,13 +134,15 @@ async def inbound_webhook(

@webhook_router.post('/twiml')
async def twiml_endpoint(
From: str = Form(...),
To: str = Form(...),
voice_agent_id: str = Query(...),
welcome_message_audio_url: str = Query(default=''),
):
"""
Twilio TwiML endpoint

Called by Twilio when call connects (directly or via inbound webhook redirect).
Called by Twilio when call connects (directly or via outbound webhook redirect).
Returns TwiML XML with WebSocket connection instructions.

Query params:
Expand Down Expand Up @@ -181,6 +183,8 @@ async def twiml_endpoint(

# Pass parameters to WebSocket stream
stream.parameter(name='voice_agent_id', value=voice_agent_id)
stream.parameter(name='customer_number', value=To)
stream.parameter(name='agent_number', value=From)
Comment on lines +186 to +187
Copy link
Copy Markdown

@coderabbitai coderabbitai bot Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Critical: From/To mapping is swapped compared to inbound_webhook.

In the twiml_endpoint, the customer/agent number assignment is reversed:

  • Line 188: customer_number=To (should be From)
  • Line 189: agent_number=From (should be To)

In Twilio webhooks, From is the caller (customer) and To is the number being called (agent). Compare with inbound_webhook at lines 122-123 which correctly uses customer_number=From and agent_number=To.

🐛 Fix the swapped parameters
     # Pass parameters to WebSocket stream
     stream.parameter(name='voice_agent_id', value=voice_agent_id)
-    stream.parameter(name='customer_number', value=To)
-    stream.parameter(name='agent_number', value=From)
+    stream.parameter(name='customer_number', value=From)
+    stream.parameter(name='agent_number', value=To)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
stream.parameter(name='customer_number', value=To)
stream.parameter(name='agent_number', value=From)
stream.parameter(name='customer_number', value=From)
stream.parameter(name='agent_number', value=To)
🤖 Prompt for AI Agents
In
`@wavefront/server/apps/call_processing/call_processing/controllers/webhook_controller.py`
around lines 188 - 189, The twiml_endpoint is incorrectly swapping Twilio
parameters: change the stream.parameter assignments so customer_number uses From
and agent_number uses To (mirror the correct mapping used in inbound_webhook);
locate the stream.parameter calls inside the twiml_endpoint function and swap
the values for customer_number and agent_number (replace customer_number=To with
customer_number=From and agent_number=From with agent_number=To).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is actually an outbound web hook

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rootflo-hardik, you're absolutely right—thank you for the clarification! I apologize for the confusion.

For outbound calls initiated by Twilio:

  • From is the agent/service number (the caller)
  • To is the customer number (the called party)

So the mapping in twiml_endpoint is correct for outbound calls:

stream.parameter(name='customer_number', value=To)    # Customer is being called
stream.parameter(name='agent_number', value=From)     # Agent is the caller

This is the opposite of inbound_webhook, which handles incoming calls where the customer is calling in. My original comment incorrectly assumed twiml_endpoint was also for inbound calls. The code is working as intended for outbound scenarios.


✏️ Learnings added
Learnt from: rootflo-hardik
Repo: rootflo/wavefront PR: 207
File: wavefront/server/apps/call_processing/call_processing/controllers/webhook_controller.py:186-187
Timestamp: 2026-02-02T12:35:05.547Z
Learning: In Twilio voice webhooks, the twiml_endpoint function in wavefront/server/apps/call_processing/call_processing/controllers/webhook_controller.py handles outbound calls where From is the agent/service number and To is the customer number, which is the reverse of inbound calls.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


connect.append(stream)
response.append(connect)
Expand Down Expand Up @@ -223,12 +227,20 @@ async def websocket_endpoint(
# Extract parameters from stream
body_data = call_data.get('body', {})
voice_agent_id = body_data.get('voice_agent_id')
customer_number = body_data.get('customer_number')
# agent_number = body_data.get('agent_number')

if not voice_agent_id:
logger.error('voice_agent_id not found in stream parameters')
await websocket.close(code=1008, reason='Missing voice_agent_id')
return

if not customer_number:
logger.warning(
'customer_number not found in stream parameters, using empty string'
)
customer_number = ''

logger.info(f'Voice agent ID: {voice_agent_id}')

# Convert voice_agent_id to UUID
Expand Down Expand Up @@ -263,13 +275,12 @@ async def websocket_endpoint(
vad_analyzer=SileroVADAnalyzer(
params=VADParams(
confidence=0.7, # Default is 0.7, can lower to 0.4-0.5 for faster detection
start_secs=0.15, # Default is 0.2, keep it
stop_secs=0.8, # KEY: Lower from default 0.8 for faster cutoff (should be 0.2 for smart turn detection)
start_secs=0.2, # Default is 0.2, keep it
stop_secs=0.2, # KEY: Lower from default 0.8 for faster cutoff (should be 0.2 for smart turn detection)
min_volume=0.6, # Default is 0.6, adjust based on your audio quality
),
), # Voice Activity Detection
serializer=serializer,
# turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
),
)

Expand All @@ -282,6 +293,7 @@ async def websocket_endpoint(
tts_config=configs['tts_config'],
stt_config=configs['stt_config'],
tools=configs['tools'],
customer_number=customer_number,
)

except Exception as e:
Expand Down
Loading