diff --git a/README.md b/README.md index 5a77053..1d286e3 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,9 @@ echo "VAPI_TOKEN=your-token-here" > .env.dev | `npm run pull:prod` | Pull resources from prod | | `npm run apply:dev` | Push local YAML files to Vapi (dev) | | `npm run apply:prod` | Push local YAML files to Vapi (prod) | +| `npm run call:dev -- -a ` | Start a WebSocket call to an assistant (dev) | +| `npm run call:dev -- -s ` | Start a WebSocket call to a squad (dev) | +| `npm run call:prod -- -a ` | Start a WebSocket call to an assistant (prod) | ### Basic Workflow @@ -82,6 +85,111 @@ npm run apply:dev ## How-To Guides +### How to Make a WebSocket Call to an Assistant or Squad + +Test your assistants and squads directly from the terminal using real-time voice calls. + +**Prerequisites (Optional but recommended for audio):** + +```bash +# For microphone input and audio playback +npm install mic speaker + +# macOS may require additional setup: +brew install sox +``` + +> **Note:** The call script works without these dependencies but will only show transcripts (no audio I/O). + +**Step 1:** Ensure your assistant/squad is deployed + +```bash +npm run apply:dev +``` + +**Step 2:** Start the call + +```bash +# Call an assistant +bun run call:dev -a my-assistant + +# Call a nested assistant (in subdirectory) +bun run call:dev -a company-1/inbound-support + +# Call a squad +bun run call:dev -s my-squad + +# Call in production +bun run call:prod -a my-assistant +``` + +**CLI Options:** + +| Flag | Description | +|------|-------------| +| `-a ` | Call an assistant by name | +| `-s ` | Call a squad by name | + +**Step 3:** Grant microphone permissions + +On first run, the script will check for microphone permissions: +- **macOS**: You may see a system permission prompt. Grant access in System Preferences > Security & Privacy > Privacy > Microphone +- **Linux**: Ensure ALSA is configured and your user has access to audio devices +- **Windows**: You may be prompted to grant microphone access + +**Step 4:** Speak into your microphone + +The terminal will show: +- šŸŽ¤ Your speech transcripts +- šŸ¤– Assistant responses +- šŸ“ž Call status updates + +**Step 5:** End the call + +Press `Ctrl+C` to gracefully end the call. + +**Example output:** + +``` +šŸš€ Starting WebSocket call + Environment: dev + assistant: my-assistant + +šŸŽ¤ Checking microphone permissions... +āœ… Microphone permission granted + + UUID: 88d807a0-854a-4a95-960f-6b69921ff877 + +šŸ“ž Creating call... +šŸ“ž Call ID: abc123-def456 +šŸ”Œ Connecting to WebSocket... +āœ… Connected! +šŸŽ¤ Speak into your microphone... + Press Ctrl+C to end the call + +šŸ’¬ Assistant started speaking... +šŸ¤– Assistant: Hi there, this is Alex from TechSolutions customer support. How can I help you today? +šŸŽ¤ You: I need help with my account +šŸ¤– Assistant: I'd be happy to help you with your account. Could you tell me a bit more about what's happening? + +^C +šŸ‘‹ Ending call... +šŸ““ Call ended (code: 1000) +``` + +**Troubleshooting:** + +| Issue | Solution | +|-------|----------| +| `Assistant not found` | Run `npm run apply:dev` first to deploy | +| `Squad not found` | Ensure squads are added to the state file | +| `mic module not installed` | Run `npm install mic` | +| `speaker module not installed` | Run `npm install speaker` | +| No audio on macOS | Install sox: `brew install sox` | +| Microphone permission denied | Check system privacy settings | + +--- + ### How to Add a New Tool **Step 1:** Create a new YAML file in `resources/tools/` @@ -460,6 +568,7 @@ vapi-gitops/ ā”œā”€ā”€ src/ │ ā”œā”€ā”€ apply.ts # Apply entry point & functions │ ā”œā”€ā”€ pull.ts # Pull entry point & functions +│ ā”œā”€ā”€ call.ts # WebSocket call script │ ā”œā”€ā”€ types.ts # TypeScript interfaces │ ā”œā”€ā”€ config.ts # Environment & configuration │ ā”œā”€ā”€ api.ts # Vapi HTTP client diff --git a/package.json b/package.json index 7830874..5f85d0d 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,8 @@ "apply:prod": "tsx src/apply.ts prod", "pull:dev": "tsx src/pull.ts dev", "pull:prod": "tsx src/pull.ts prod", + "call:dev": "tsx src/call.ts dev", + "call:prod": "tsx src/call.ts prod", "build": "tsc --noEmit" }, "devDependencies": { @@ -18,5 +20,9 @@ }, "dependencies": { "yaml": "^2.7.0" + }, + "optionalDependencies": { + "mic": "^2.1.2", + "speaker": "^0.5.5" } -} +} \ No newline at end of file diff --git a/src/call.ts b/src/call.ts new file mode 100644 index 0000000..64e28f4 --- /dev/null +++ b/src/call.ts @@ -0,0 +1,616 @@ +import { existsSync, readFileSync } from "fs"; +import { join, dirname } from "path"; +import { fileURLToPath } from "url"; +import { execSync } from "child_process"; +import * as readline from "readline"; +import type { Environment, StateFile } from "./types.ts"; +import { VALID_ENVIRONMENTS } from "./types.ts"; + +// ───────────────────────────────────────────────────────────────────────────── +// Configuration +// ───────────────────────────────────────────────────────────────────────────── + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const BASE_DIR = join(__dirname, ".."); + +type ResourceType = "assistant" | "squad"; + +interface CallConfig { + env: Environment; + target: string; + resourceType: ResourceType; + token: string; + baseUrl: string; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Argument Parsing +// ───────────────────────────────────────────────────────────────────────────── + +function printUsage(): void { + console.error("āŒ Usage: bun run call: -a "); + console.error(" bun run call: -s "); + console.error(""); + console.error(" Options:"); + console.error(" -a Call an assistant by name"); + console.error(" -s Call a squad by name"); + console.error(""); + console.error(" Examples:"); + console.error(" bun run call:dev -a my-assistant"); + console.error(" bun run call:dev -a company-1/inbound-support"); + console.error(" bun run call:prod -s my-squad"); +} + +function parseArgs(): CallConfig { + const args = process.argv.slice(2); + + if (args.length < 3) { + printUsage(); + process.exit(1); + } + + const env = args[0] as Environment; + + if (!VALID_ENVIRONMENTS.includes(env)) { + console.error(`āŒ Invalid environment: ${env}`); + console.error(` Must be one of: ${VALID_ENVIRONMENTS.join(", ")}`); + process.exit(1); + } + + // Parse flags + let resourceType: ResourceType | null = null; + let target: string | null = null; + + for (let i = 1; i < args.length; i++) { + const arg = args[i]; + if (arg === "-a" || arg === "--assistant") { + if (resourceType) { + console.error("āŒ Cannot specify both -a and -s"); + process.exit(1); + } + resourceType = "assistant"; + target = args[++i]; + } else if (arg === "-s" || arg === "--squad") { + if (resourceType) { + console.error("āŒ Cannot specify both -a and -s"); + process.exit(1); + } + resourceType = "squad"; + target = args[++i]; + } + } + + if (!resourceType || !target) { + console.error("āŒ Must specify either -a or -s "); + printUsage(); + process.exit(1); + } + + // Load environment variables + const { token, baseUrl } = loadEnvFile(env); + + return { env, target, resourceType, token, baseUrl }; +} + +function loadEnvFile(env: string): { token: string; baseUrl: string } { + const envFiles = [ + join(BASE_DIR, `.env.${env}`), + join(BASE_DIR, `.env.${env}.local`), + join(BASE_DIR, ".env.local"), + ]; + + const envVars: Record = {}; + + for (const envFile of envFiles) { + if (existsSync(envFile)) { + const content = readFileSync(envFile, "utf-8"); + for (const line of content.split("\n")) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + + const eqIndex = trimmed.indexOf("="); + if (eqIndex === -1) continue; + + const key = trimmed.slice(0, eqIndex).trim(); + let value = trimmed.slice(eqIndex + 1).trim(); + + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + value = value.slice(1, -1); + } + + if (envVars[key] === undefined) { + envVars[key] = value; + } + } + } + } + + const token = process.env.VAPI_TOKEN || envVars.VAPI_TOKEN; + const baseUrl = process.env.VAPI_BASE_URL || envVars.VAPI_BASE_URL || "https://api.vapi.ai"; + + if (!token) { + console.error("āŒ VAPI_TOKEN environment variable is required"); + console.error(` Create a .env.${env} file with: VAPI_TOKEN=your-token`); + process.exit(1); + } + + return { token, baseUrl }; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Permission Check +// ───────────────────────────────────────────────────────────────────────────── + +async function checkMicrophonePermission(): Promise { + const platform = process.platform; + + if (platform === "darwin") { + // macOS - check and prompt for microphone permission + console.log("šŸŽ¤ Checking microphone permissions..."); + + try { + // Try to get microphone permission status using AppleScript + const result = execSync( + `osascript -e 'tell application "System Events" to return (name of processes whose name contains "sox" or name contains "rec")'`, + { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] } + ); + // If we get here without error, we have some level of access + } catch { + // Ignore errors from the check itself + } + + // Actually test microphone access by trying to record briefly + try { + // Check if sox/rec is available + execSync("which sox", { stdio: "pipe" }); + + // Try a quick recording to trigger permission prompt + console.log(" Testing microphone access (this may prompt for permission)..."); + execSync("rec -q -t raw -r 16000 -b 16 -c 1 -e signed-integer /dev/null trim 0 0.1 2>/dev/null || true", { + timeout: 5000, + stdio: "pipe", + }); + + console.log("āœ… Microphone permission granted\n"); + return true; + } catch { + // sox not installed or permission denied + console.log("āš ļø Could not verify microphone access."); + console.log(" If prompted, please grant microphone permission in System Preferences."); + console.log(" System Preferences > Security & Privacy > Privacy > Microphone\n"); + + // Ask user to continue anyway + const shouldContinue = await askUserConfirmation( + "Continue without confirmed microphone access? (y/n): " + ); + return shouldContinue; + } + } else if (platform === "linux") { + // Linux - check if audio devices are accessible + console.log("šŸŽ¤ Checking audio devices..."); + + try { + // Check for ALSA devices + execSync("arecord -l 2>/dev/null | grep -q card", { stdio: "pipe" }); + console.log("āœ… Audio recording devices found\n"); + return true; + } catch { + console.log("āš ļø No audio recording devices found."); + console.log(" Make sure your microphone is connected and ALSA is configured.\n"); + + const shouldContinue = await askUserConfirmation( + "Continue without confirmed microphone access? (y/n): " + ); + return shouldContinue; + } + } else if (platform === "win32") { + // Windows - just inform the user + console.log("šŸŽ¤ On Windows, you may be prompted to grant microphone access.\n"); + return true; + } + + return true; +} + +function askUserConfirmation(question: string): Promise { + return new Promise((resolve) => { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + + rl.question(question, (answer) => { + rl.close(); + resolve(answer.toLowerCase() === "y" || answer.toLowerCase() === "yes"); + }); + }); +} + +// ───────────────────────────────────────────────────────────────────────────── +// State Loading +// ───────────────────────────────────────────────────────────────────────────── + +function loadState(env: Environment): StateFile { + const stateFilePath = join(BASE_DIR, `.vapi-state.${env}.json`); + + if (!existsSync(stateFilePath)) { + console.error(`āŒ State file not found: .vapi-state.${env}.json`); + console.error(" Run 'npm run apply:" + env + "' first to create resources"); + process.exit(1); + } + + try { + const content = readFileSync(stateFilePath, "utf-8"); + return JSON.parse(content) as StateFile; + } catch (error) { + console.error(`āŒ Failed to parse state file: ${error}`); + process.exit(1); + } +} + +function resolveTarget( + state: StateFile, + target: string, + resourceType: ResourceType +): string { + if (resourceType === "squad") { + const squads = (state as StateFile & { squads?: Record }).squads || {}; + const uuid = squads[target]; + if (!uuid) { + console.error(`āŒ Squad not found: ${target}`); + console.error(" Available squads:"); + const squadKeys = Object.keys(squads); + if (squadKeys.length === 0) { + console.error(" (no squads in state file)"); + } else { + squadKeys.forEach((k) => console.error(` - ${k}`)); + } + process.exit(1); + } + return uuid; + } else { + const uuid = state.assistants[target]; + if (!uuid) { + console.error(`āŒ Assistant not found: ${target}`); + console.error(" Available assistants:"); + const assistantKeys = Object.keys(state.assistants); + if (assistantKeys.length === 0) { + console.error(" (no assistants in state file)"); + } else { + assistantKeys.forEach((k) => console.error(` - ${k}`)); + } + process.exit(1); + } + return uuid; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Call Creation +// ───────────────────────────────────────────────────────────────────────────── + +interface CreateCallResponse { + id: string; + transport?: { + websocketCallUrl?: string; + }; +} + +async function createCall( + config: CallConfig, + targetId: string +): Promise { + const url = `${config.baseUrl}/call`; + + const body: Record = { + transport: { + provider: "vapi.websocket", + audioFormat: { + format: "pcm_s16le", + container: "raw", + sampleRate: 16000, + }, + }, + }; + + if (config.resourceType === "squad") { + body.squadId = targetId; + } else { + body.assistantId = targetId; + } + + console.log(`šŸ“ž Creating call...`); + + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${config.token}`, + }, + body: JSON.stringify(body), + }); + + if (!response.ok) { + const errorText = await response.text(); + console.error(`āŒ Failed to create call: ${response.status}`); + console.error(` ${errorText}`); + process.exit(1); + } + + return response.json() as Promise; +} + +// ───────────────────────────────────────────────────────────────────────────── +// WebSocket Connection +// ───────────────────────────────────────────────────────────────────────────── + +interface TranscriptMessage { + type: "transcript"; + role: "user" | "assistant"; + transcriptType: "partial" | "final"; + transcript: string; +} + +interface SpeechUpdateMessage { + type: "speech-update"; + role: "user" | "assistant"; + status: "started" | "stopped"; +} + +interface CallEndedMessage { + type: "call-ended"; + reason?: string; +} + +type ControlMessage = TranscriptMessage | SpeechUpdateMessage | CallEndedMessage | { type: string }; + +async function connectWebSocket(websocketUrl: string, config: CallConfig): Promise { + return new Promise((resolve, reject) => { + console.log(`šŸ”Œ Connecting to WebSocket...`); + + const ws = new WebSocket(websocketUrl, { + headers: { + Authorization: `Bearer ${config.token}`, + }, + } as WebSocket extends { new(url: string, protocols?: string | string[], options?: unknown): WebSocket } ? unknown : never); + + let audioContext: ReturnType | null = null; + let micStream: ReturnType | null = null; + let isConnected = false; + let lastTranscript = ""; + + // Graceful shutdown + const cleanup = () => { + console.log("\nšŸ‘‹ Ending call..."); + if (micStream) { + micStream.stop(); + } + if (audioContext) { + audioContext.close(); + } + if (ws.readyState === WebSocket.OPEN) { + ws.close(); + } + resolve(); + }; + + process.on("SIGINT", cleanup); + process.on("SIGTERM", cleanup); + + ws.onopen = () => { + console.log("āœ… Connected!"); + console.log("šŸŽ¤ Speak into your microphone..."); + console.log(" Press Ctrl+C to end the call\n"); + isConnected = true; + + // Start audio capture + try { + audioContext = createAudioContext(); + micStream = createMicrophoneStream((audioData: Buffer) => { + if (ws.readyState === WebSocket.OPEN) { + ws.send(audioData); + } + }); + } catch (error) { + console.error("āš ļø Could not start microphone:", error); + console.log(" Continuing without microphone input..."); + } + }; + + ws.onmessage = (event) => { + if (event.data instanceof Buffer || event.data instanceof ArrayBuffer) { + // Binary audio data from assistant + if (audioContext) { + audioContext.playAudio(event.data); + } + } else { + // Control message (JSON) + try { + const message = JSON.parse(event.data as string) as ControlMessage; + handleControlMessage(message, lastTranscript, (t) => { lastTranscript = t; }); + } catch { + // Ignore parse errors + } + } + }; + + ws.onerror = (error) => { + console.error("āŒ WebSocket error:", error); + if (!isConnected) { + reject(error); + } + }; + + ws.onclose = (event) => { + console.log(`\nšŸ““ Call ended (code: ${event.code})`); + cleanup(); + }; + }); +} + +function handleControlMessage( + message: ControlMessage, + lastTranscript: string, + setLastTranscript: (t: string) => void +): void { + switch (message.type) { + case "transcript": { + const tm = message as TranscriptMessage; + const prefix = tm.role === "user" ? "šŸŽ¤ You" : "šŸ¤– Assistant"; + + if (tm.transcriptType === "final") { + // Clear partial and show final + process.stdout.write("\r" + " ".repeat(lastTranscript.length + 20) + "\r"); + console.log(`${prefix}: ${tm.transcript}`); + setLastTranscript(""); + } else { + // Show partial (overwrite previous partial) + const line = `${prefix}: ${tm.transcript}`; + process.stdout.write("\r" + " ".repeat(lastTranscript.length + 20) + "\r"); + process.stdout.write(line); + setLastTranscript(line); + } + break; + } + case "speech-update": { + const sm = message as SpeechUpdateMessage; + if (sm.status === "started") { + const who = sm.role === "user" ? "You" : "Assistant"; + console.log(`\nšŸ’¬ ${who} started speaking...`); + } + break; + } + case "call-ended": { + const cm = message as CallEndedMessage; + console.log(`\nšŸ“ž Call ended: ${cm.reason || "unknown reason"}`); + break; + } + default: + // Ignore other message types + break; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Audio Utilities (Stubs - require native modules) +// ───────────────────────────────────────────────────────────────────────────── + +function createAudioContext(): { playAudio: (data: Buffer | ArrayBuffer) => void; close: () => void } { + // Lazy load speaker module + let Speaker: typeof import("speaker") | null = null; + let speakerInstance: InstanceType | null = null; + + try { + // Dynamic import for optional dependency + Speaker = require("speaker"); + speakerInstance = new Speaker!({ + channels: 1, + bitDepth: 16, + sampleRate: 16000, + }); + } catch { + console.warn("āš ļø 'speaker' module not installed. Audio playback disabled."); + console.warn(" Install with: npm install speaker"); + } + + return { + playAudio: (data: Buffer | ArrayBuffer) => { + if (speakerInstance) { + const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data); + speakerInstance.write(buffer); + } + }, + close: () => { + if (speakerInstance) { + speakerInstance.end(); + } + }, + }; +} + +function createMicrophoneStream( + onData: (data: Buffer) => void +): { stop: () => void } { + let mic: ReturnType | null = null; + let micInstance: ReturnType> | null = null; + + try { + mic = require("mic"); + micInstance = mic!({ + rate: "16000", + channels: "1", + bitwidth: "16", + encoding: "signed-integer", + endian: "little", + device: "default", + }); + + const micInputStream = micInstance!.getAudioStream(); + + micInputStream.on("data", (data: Buffer) => { + onData(data); + }); + + micInputStream.on("error", (error: Error) => { + console.error("Microphone error:", error); + }); + + micInstance!.start(); + } catch (error) { + console.warn("āš ļø 'mic' module not installed or microphone unavailable."); + console.warn(" Install with: npm install mic"); + console.warn(" Error:", error); + } + + return { + stop: () => { + if (micInstance) { + micInstance.stop(); + } + }, + }; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Main +// ───────────────────────────────────────────────────────────────────────────── + +async function main() { + const config = parseArgs(); + + console.log(`\nšŸš€ Starting WebSocket call`); + console.log(` Environment: ${config.env}`); + console.log(` ${config.resourceType}: ${config.target}\n`); + + // Check microphone permissions first + const hasPermission = await checkMicrophonePermission(); + if (!hasPermission) { + console.log("āŒ Call cancelled due to microphone permission issues."); + process.exit(1); + } + + const state = loadState(config.env); + const targetId = resolveTarget(state, config.target, config.resourceType); + + console.log(` UUID: ${targetId}\n`); + + const call = await createCall(config, targetId); + + if (!call.transport?.websocketCallUrl) { + console.error("āŒ No WebSocket URL in response"); + console.error(" Response:", JSON.stringify(call, null, 2)); + process.exit(1); + } + + console.log(`šŸ“ž Call ID: ${call.id}`); + + await connectWebSocket(call.transport.websocketCallUrl, config); +} + +main().catch((error) => { + console.error("āŒ Fatal error:", error); + process.exit(1); +});