From 0c547bc79c6e5d8b48df7fb16cf2fec5f337b143 Mon Sep 17 00:00:00 2001
From: tsushanth <78000697+tsushanth@users.noreply.github.com>
Date: Thu, 11 Jun 2026 10:26:29 -0700
Subject: [PATCH] fix(openai): drop ?model= on native /realtime STT URL to
 avoid invalid_model
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OpenAI's native wss://api.openai.com/.../realtime endpoint now treats a
?model= query param on the WebSocket upgrade URL as selecting a
conversation session, and rejects the subsequent transcription-mode
session.update with error.invalid_request_error.invalid_model (close
4000). Every transcription model (gpt-4o-mini-transcribe,
gpt-4o-transcribe, whisper-1) currently fails this way against the
native endpoint, so realtime STT through this plugin produces zero
transcripts.

The ?model= upgrade-URL convention exists for OpenAI-compatible proxies
(LiteLLM, Cloudflare AI Gateway, etc.) that route at the HTTP upgrade
without parsing the first JSON frame — see #1467. Drop the param only
when the host is api.openai.com; non-OpenAI hosts still receive the
model on the URL so proxy routing keeps working. The model is conveyed
to OpenAI via session.update → audio.input.transcription.model instead.

Updates the corresponding URL builder test and adds a second test for
the explicit-OpenAI-baseURL path.

Closes #1756
---
 ...penai-stt-native-realtime-invalid-model.md | 18 ++++++++++++++++++
 plugins/openai/src/stt.test.ts                | 16 ++++++++++++++--
 plugins/openai/src/stt.ts                     | 19 ++++++++++++-------
 3 files changed, 44 insertions(+), 9 deletions(-)
 create mode 100644 .changeset/fix-openai-stt-native-realtime-invalid-model.md

diff --git a/.changeset/fix-openai-stt-native-realtime-invalid-model.md b/.changeset/fix-openai-stt-native-realtime-invalid-model.md
new file mode 100644
index 000000000..4b4d8e714
--- /dev/null
+++ b/.changeset/fix-openai-stt-native-realtime-invalid-model.md
@@ -0,0 +1,18 @@
+---
+'@livekit/agents-plugin-openai': patch
+---
+
+Fix `openai` realtime STT (transcription session) failing on every model
+with `invalid_request_error.invalid_model` when connecting directly to
+`wss://api.openai.com/.../realtime`.
+
+OpenAI's native endpoint now treats a `?model=` query param on the
+WebSocket upgrade URL as selecting a conversation session, so the
+subsequent transcription-mode `session.update` is rejected — surfacing
+as `invalid_model` and a `4000` close. Drop the `?model=` parameter
+when the host is `api.openai.com` (the model is conveyed via
+`session.update → audio.input.transcription.model` instead).
+
+OpenAI-compatible proxies (LiteLLM, Cloudflare AI Gateway, etc.) still
+receive the model on the upgrade URL so they can route by model before
+the first frame, preserving the original intent of #1467.
diff --git a/plugins/openai/src/stt.test.ts b/plugins/openai/src/stt.test.ts
index 107ca4b5d..46edf619e 100644
--- a/plugins/openai/src/stt.test.ts
+++ b/plugins/openai/src/stt.test.ts
@@ -74,14 +74,26 @@ describe('OpenAI STT options', () => {
 });
 
 describe('buildRealtimeSttUrl', () => {
-  it('points at OpenAI realtime with intent and model when no baseURL is set', () => {
+  it('points at OpenAI realtime with intent but omits model on the native endpoint', () => {
+    // OpenAI's native /realtime endpoint rejects `?model=` with
+    // invalid_request_error.invalid_model when intent=transcription, so the
+    // model is conveyed via the subsequent session.update instead.
     const url = new URL(buildRealtimeSttUrl(undefined, 'gpt-realtime-whisper'));
 
     expect(url.protocol).toBe('wss:');
     expect(url.host).toBe('api.openai.com');
     expect(url.pathname).toBe('/v1/realtime');
     expect(url.searchParams.get('intent')).toBe('transcription');
-    expect(url.searchParams.get('model')).toBe('gpt-realtime-whisper');
+    expect(url.searchParams.get('model')).toBe(null);
+  });
+
+  it('omits the model when an explicit baseURL still points at api.openai.com', () => {
+    const url = new URL(buildRealtimeSttUrl('https://api.openai.com/v1', 'gpt-4o-mini-transcribe'));
+
+    expect(url.host).toBe('api.openai.com');
+    expect(url.pathname).toBe('/v1/realtime');
+    expect(url.searchParams.get('intent')).toBe('transcription');
+    expect(url.searchParams.get('model')).toBe(null);
   });
 
   it('upgrades https baseURL to wss and appends /realtime when path is /v1', () => {
diff --git a/plugins/openai/src/stt.ts b/plugins/openai/src/stt.ts
index 9b2ee417c..1da789516 100644
--- a/plugins/openai/src/stt.ts
+++ b/plugins/openai/src/stt.ts
@@ -28,12 +28,15 @@ const DEFAULT_REALTIME_MODEL = 'gpt-realtime-whisper';
 /**
  * Build the realtime transcription WebSocket URL.
  *
- * Includes the model on the upgrade URL so OpenAI-compatible gateways
- * (which can only see the URL at the WebSocket upgrade, not the subsequent
- * `session.update` frame) can route by model. Mirrors the existing
- * convention in `realtime/realtime_model.ts` for the conversational
- * Realtime API. OpenAI's native endpoint accepts and ignores the
- * parameter, so this is a no-op for direct connections.
+ * For OpenAI-compatible gateways (LiteLLM, Cloudflare AI Gateway, etc.) the
+ * model is included on the upgrade URL so the gateway can route by model
+ * before the subsequent `session.update` frame arrives. OpenAI's own
+ * `wss://api.openai.com/.../realtime` endpoint, on the other hand, treats a
+ * `?model=` query param as selecting a conversation session and rejects the
+ * subsequent transcription-mode `session.update` with
+ * `error.invalid_request_error.invalid_model`, so the model is intentionally
+ * omitted for native OpenAI connections — the model is conveyed via
+ * `session.update → audio.input.transcription.model` instead.
  *
  * The scheme of `baseURL` is respected: `http://` maps to `ws://`
  * and `https://` maps to `wss://`.
@@ -56,7 +59,9 @@ export function buildRealtimeSttUrl(baseURL: string | undefined, model: string):
   }
 
   url.searchParams.set('intent', 'transcription');
-  url.searchParams.set('model', model);
+  if (url.hostname !== 'api.openai.com') {
+    url.searchParams.set('model', model);
+  }
   return url.toString();
 }