From 3ca140679761ccf2c6156f132145720e2509979b Mon Sep 17 00:00:00 2001 From: Lalit Gupta Date: Wed, 11 Mar 2026 00:06:42 +0530 Subject: [PATCH 1/8] =?UTF-8?q?feat:=20remove=20transcript=20from=20channe?= =?UTF-8?q?l=20config,=20add=20isPrimary=20=E2=80=94=20sync=20with=20Pytho?= =?UTF-8?q?n=20SDK?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove `transcript` field from RecordingChannelConfig and ChannelConfig - Add `isPrimary` field to RecordingChannelConfig, ChannelConfig, and Channel class - Channel.toDict() now includes is_primary - startSession primary video selection respects isPrimary flag - Bump version to 0.2.3 - Update test/index.ts quickstart --- package.json | 2 +- src/capture/captureClient.ts | 9 +- src/capture/channel.ts | 3 + src/capture/index.ts | 4 +- src/capture/types.ts | 4 +- src/types/capture.ts | 4 +- test/index.ts | 243 +++++++++++++++++++++++++++++++++++ 7 files changed, 258 insertions(+), 11 deletions(-) create mode 100644 test/index.ts diff --git a/package.json b/package.json index c4f1392..70d1735 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "videodb", - "version": "0.2.2", + "version": "0.2.3", "description": "A NodeJS wrapper for VideoDB's API written in TypeScript", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/src/capture/captureClient.ts b/src/capture/captureClient.ts index e8160a5..f73221d 100644 --- a/src/capture/captureClient.ts +++ b/src/capture/captureClient.ts @@ -39,8 +39,8 @@ import { * await client.startSession({ * sessionId: 'ss-xxx', // Required: from CaptureSession.id * channels: [ - * { channelId: 'mic:default', type: 'audio', record: true, transcript: true }, - * { channelId: 'display:1', type: 'video', record: true }, + * { channelId: 'mic:default', type: 'audio', record: true, store: true }, + * { channelId: 'display:1', type: 'video', record: true, store: true }, * ], * }); * @@ -237,15 +237,16 @@ export class CaptureClient extends EventEmitter implements ChannelClient { (channel as { channel_id?: string }).channel_id ?? channel.channelId, type: channel.type, record: channel.record ?? true, - transcript: channel.transcript ?? false, store: channel.store ?? true, + is_primary: channel.isPrimary ?? false, })); if (channels.some(ch => !ch.channel_id)) { throw new Error('channels must include channelId for each channel'); } - const primaryVideo = channels.find(ch => ch.type === 'video'); + const primaryVideo = channels.find(ch => ch.is_primary && ch.type === 'video') + || channels.find(ch => ch.type === 'video'); await this.binaryManager.sendCommand('startRecording', { uploadToken: this.sessionToken, diff --git a/src/capture/channel.ts b/src/capture/channel.ts index 88ffa70..6bc7cab 100644 --- a/src/capture/channel.ts +++ b/src/capture/channel.ts @@ -12,6 +12,7 @@ export class Channel { public readonly name: string; public readonly type: 'audio' | 'video'; public store: boolean = false; + public isPrimary: boolean = false; /** Reference to the CaptureClient for pause/resume operations */ #client: ChannelClient | null = null; @@ -87,6 +88,7 @@ export class Channel { name: string; record: boolean; store: boolean; + is_primary: boolean; } { return { channel_id: this.id, @@ -94,6 +96,7 @@ export class Channel { name: this.name, record: true, store: this.store, + is_primary: this.isPrimary, }; } diff --git a/src/capture/index.ts b/src/capture/index.ts index 2010c5f..79c3baa 100644 --- a/src/capture/index.ts +++ b/src/capture/index.ts @@ -27,8 +27,8 @@ * await client.startSession({ * sessionId: 'ss-xxx', // Required: from CaptureSession.id * channels: [ - * { channelId: 'mic:default', type: 'audio', record: true, transcript: true }, - * { channelId: 'display:1', type: 'video', record: true }, + * { channelId: 'mic:default', type: 'audio', record: true, store: true }, + * { channelId: 'display:1', type: 'video', record: true, store: true }, * ], * }); * diff --git a/src/capture/types.ts b/src/capture/types.ts index fc62dd9..f1659e8 100644 --- a/src/capture/types.ts +++ b/src/capture/types.ts @@ -59,10 +59,10 @@ export interface RecordingChannelConfig { type: 'audio' | 'video'; /** Whether to record this channel */ record?: boolean; - /** Whether to enable transcription */ - transcript?: boolean; /** Whether to store the recording */ store?: boolean; + /** Whether this is the primary video channel */ + isPrimary?: boolean; } /** diff --git a/src/types/capture.ts b/src/types/capture.ts index 30d39f3..fa64453 100644 --- a/src/types/capture.ts +++ b/src/types/capture.ts @@ -66,10 +66,10 @@ export interface ChannelConfig { type: ChannelTypeValue; /** Whether to record this channel */ record?: boolean; - /** Whether to enable transcription for this channel (audio only) */ - transcript?: boolean; /** Whether to store the recorded content */ store?: boolean; + /** Whether this is the primary video channel */ + isPrimary?: boolean; } /** diff --git a/test/index.ts b/test/index.ts new file mode 100644 index 0000000..5623ec4 --- /dev/null +++ b/test/index.ts @@ -0,0 +1,243 @@ +import 'dotenv/config'; +import { + connect, + WebSocketChannel, +} from 'videodb'; +import type { RecordingChannelConfig } from 'videodb/capture'; + +import { CaptureClient } from 'videodb/capture'; + +const API_KEY = process.env.VIDEODB_API_KEY; +const COLLECTION_ID = process.env.VIDEODB_COLLECTION_ID || 'default'; + +if (!API_KEY) { + throw new Error('VIDEODB_API_KEY is required. Set it in your .env file.'); +} + +async function main() { + console.log('============================================================'); + console.log('VideoDB Capture - Node.js Quickstart'); + console.log('============================================================\n'); + + // --- Connect --- + console.log('Connecting to VideoDB...'); + const conn = connect({ apiKey: API_KEY }); + const coll = await conn.getCollection(COLLECTION_ID); + console.log(`Using collection: ${coll.id}`); + + // --- WebSocket --- + console.log('Connecting WebSocket...'); + const ws = await conn.connectWebsocket(COLLECTION_ID); + await ws.connect(); + console.log(`WebSocket connected: ${ws.connectionId}`); + + // --- Session --- + console.log('Creating capture session...'); + const session = await coll.createCaptureSession({ + endUserId: 'quickstart-user', + wsConnectionId: ws.connectionId, + metadata: { app: 'node-quickstart' }, + }); + console.log(`Session created: ${session.id}`); + + const token = await conn.generateClientToken(3600); + console.log('Client token generated'); + + // --- Capture Client --- + const client = new CaptureClient({ sessionToken: token }); + + console.log('\nRequesting permissions...'); + await client.requestPermission('microphone'); + await client.requestPermission('screen-capture'); + + console.log('Discovering channels...'); + const channels = await client.listChannels(); + for (const ch of channels.all()) { + console.log(` - ${ch.id} (${ch.type}): ${ch.name}`); + } + + const micChannel = channels.mics.default; + const displayChannel = channels.displays.default; + const systemAudioChannel = channels.systemAudio.default; + + // record: true enables recording, store: true persists to VideoDB after capture stops. + const captureChannels: RecordingChannelConfig[] = []; + if (micChannel) { + captureChannels.push({ + channelId: micChannel.id, + type: 'audio', + record: true, + store: true, + }); + } + if (displayChannel) { + captureChannels.push({ + channelId: displayChannel.id, + type: 'video', + record: true, + store: true, + }); + } + if (systemAudioChannel) { + captureChannels.push({ + channelId: systemAudioChannel.id, + type: 'audio', + record: true, + store: true, + }); + } + + if (captureChannels.length === 0) { + console.log('No channels found.'); + return; + } + + console.log( + `\nStarting capture with ${captureChannels.length} channel(s):` + ); + for (const ch of captureChannels) { + console.log(` - ${ch.channelId}`); + } + + await client.startSession({ + sessionId: session.id, + channels: captureChannels, + }); + console.log('Capture started!'); + + // --- AI Pipelines --- + console.log('\nWaiting for session to become active...'); + await new Promise((resolve) => setTimeout(resolve, 3000)); + await session.refresh(); + console.log(`Session status: ${session.status}`); + + const audioStreams = session.rtstreams.filter((rts) => + rts.mediaTypes?.includes('audio') + ); + const videoStream = session.rtstreams.find((rts) => + rts.mediaTypes?.includes('video') + ); + + // Start AI on audio streams (mic + system audio) + for (const stream of audioStreams) { + console.log(` Starting audio indexing on: ${stream.id}`); + try { + await stream.indexAudio({ + prompt: 'Summarize what is being discussed', + batchConfig: { type: 'time', value: 30 }, + socketId: ws.connectionId, + }); + console.log(` Audio indexing started: ${stream.id}`); + } catch (e) { + console.error(` Failed to start audio indexing on ${stream.id}:`, e); + } + } + + // Start AI on video stream + if (videoStream) { + console.log(` Starting visual indexing on: ${videoStream.id}`); + try { + await videoStream.indexVisuals({ + prompt: 'In one sentence, describe what is on screen', + batchConfig: { type: 'time', value: 3, frameCount: 3 }, + socketId: ws.connectionId, + }); + console.log(` Visual indexing started: ${videoStream.id}`); + } catch (e) { + console.error(' Failed to start visual indexing:', e); + } + } + + // --- Real-time Events --- + console.log('\n============================================================'); + console.log('Recording... Press Enter to stop (or Ctrl+C to force quit)'); + console.log('============================================================\n'); + + let isShuttingDown = false; + const shutdown = async () => { + if (isShuttingDown) return; + isShuttingDown = true; + + console.log('\nStopping capture...'); + try { + await client.stopSession(); + } catch {} + try { + await client.shutdown(); + } catch {} + try { + await ws.close(); + } catch {} + + console.log('Capture stopped.'); + console.log('\n============================================================'); + console.log("What's next?"); + console.log( + ' - Try different indexAudio() prompts for richer insights' + ); + console.log(' - Build alerts with sceneIndex.createAlert()'); + console.log(' - Explore the full SDK: https://docs.videodb.io'); + console.log('============================================================'); + process.exit(0); + }; + + // Listen for Enter key to stop gracefully + process.stdin.setRawMode?.(false); + process.stdin.resume(); + process.stdin.once('data', () => shutdown()); + + process.on('SIGINT', shutdown); + process.on('SIGTERM', shutdown); + + try { + for await (const msg of ws.receive()) { + if (isShuttingDown) break; + + const channel = (msg.channel || msg.type || 'event') as string; + const data = (msg.data || {}) as Record; + // Extract short source label from rtstream_name (e.g., "Capture mic - cap-83e6" -> "mic") + const rawSource = (msg.rtstream_name || '') as string; + const sourceMatch = rawSource.match(/Capture (\w+)/); + const label = sourceMatch ? `:${sourceMatch[1]}` : (rawSource ? `:${rawSource}` : ''); + + if (channel === WebSocketChannel.transcript) { + const text = data.text || msg.text; + if (text) console.log(`[Transcript${label}] ${text}`); + } else if (channel === WebSocketChannel.spokenIndex) { + const text = (data.text || msg.text) as string; + if (text?.trim()) { + console.log(`\n${'*'.repeat(50)}`); + console.log(`[Audio Index${label}] ${text}`); + console.log('*'.repeat(50)); + } + } else if (channel === WebSocketChannel.sceneIndex) { + const text = (data.text || msg.text) as string; + if (text?.trim()) { + console.log(`\n${'*'.repeat(50)}`); + console.log(`[Visual Index${label}] ${text}`); + console.log('*'.repeat(50)); + } + } else if (channel === WebSocketChannel.captureSession) { + const status = data.status as string; + console.log(`\n[Session] ${status}`); + } else if (channel === WebSocketChannel.alert) { + const text = (data.text || msg.text) as string; + if (text) console.log(`\n[Alert${label}] ${text}`); + } + } + } catch (e) { + if (!isShuttingDown) { + console.error('WebSocket error:', e); + } + } + + if (!isShuttingDown) { + console.log('WebSocket connection closed unexpectedly'); + await shutdown(); + } +} + +main().catch((e) => { + console.error('Fatal error:', e); + process.exit(1); +}); From 138b78184c711599997cdcab4dd1868a47c2bd9a Mon Sep 17 00:00:00 2001 From: Lalit Gupta Date: Tue, 17 Mar 2026 11:08:04 +0530 Subject: [PATCH 2/8] chore: update capture binary to v0.2.9 Clean tarballs (no macOS ._* resource fork files). --- package.json | 8 ++++---- src/capture/installer.ts | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/package.json b/package.json index 70d1735..b4fb442 100644 --- a/package.json +++ b/package.json @@ -40,11 +40,11 @@ }, "binaryConfig": { "baseUrl": "https://artifacts.videodb.io/capture", - "version": "0.2.8", + "version": "0.2.9", "checksums": { - "darwin-arm64": "4aab67e524c2541bebbae24b8dd845da5d7f74fba006ce860a4914844e185c5d", - "darwin-x64": "ac67dc1a9edd2094d36e6961ed1dabab3e8b3e3e2a61655a49195b7e518901ca", - "win32-x64": "f19110d9b632c0149088abc09f4c86e0f43f64ce4b52a73bca6eb20789e156d0" + "darwin-arm64": "50ed352dc820287d720c28da8bdba0029cd0aeeb035c84f64487ff35edfaca86", + "darwin-x64": "3716f0f6bf12fc253448f90678628a0d829cd32646928e7cd84f5e24f19796d9", + "win32-x64": "dc01f195f25580edeb590b183753ceea9bab439cb36344805655a0e9a017175c" } }, "repository": { diff --git a/src/capture/installer.ts b/src/capture/installer.ts index 2169321..a1bcef6 100644 --- a/src/capture/installer.ts +++ b/src/capture/installer.ts @@ -37,11 +37,11 @@ export class RecorderInstaller { // Default binary config - can be overridden or loaded from package.json this.binaryConfig = binaryConfig || { baseUrl: 'https://artifacts.videodb.io/capture', - version: '0.2.8', + version: '0.2.9', checksums: { - 'darwin-x64': 'ac67dc1a9edd2094d36e6961ed1dabab3e8b3e3e2a61655a49195b7e518901ca', - 'darwin-arm64': '4aab67e524c2541bebbae24b8dd845da5d7f74fba006ce860a4914844e185c5d', - 'win32-x64': 'f19110d9b632c0149088abc09f4c86e0f43f64ce4b52a73bca6eb20789e156d0', + 'darwin-x64': '3716f0f6bf12fc253448f90678628a0d829cd32646928e7cd84f5e24f19796d9', + 'darwin-arm64': '50ed352dc820287d720c28da8bdba0029cd0aeeb035c84f64487ff35edfaca86', + 'win32-x64': 'dc01f195f25580edeb590b183753ceea9bab439cb36344805655a0e9a017175c', }, }; From 51008fc3b3bccb7023f55c344fe4de4209c7f206 Mon Sep 17 00:00:00 2001 From: Lalit Gupta Date: Wed, 18 Mar 2026 00:55:42 +0530 Subject: [PATCH 3/8] fix: clip literal types, shot URLs from search, caption warning, generateTranscript language code - clip() params: contentType and modelName now use literal union types - SearchResult passes streamLink/playerUrl from API response to Shot - SearchResponse type: stream_url -> stream_link to match server field name - CaptionAsset: console.warn when src='auto' about indexing requirement - generateTranscript: add languageCode param --- src/core/editor.ts | 6 ++++++ src/core/search/searchResult.ts | 2 ++ src/core/video.ts | 9 +++++---- src/types/response.ts | 3 ++- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/core/editor.ts b/src/core/editor.ts index c059115..b128f61 100644 --- a/src/core/editor.ts +++ b/src/core/editor.ts @@ -643,6 +643,12 @@ export class CaptionAsset { constructor(config: CaptionAssetConfig = {}) { this.src = config.src ?? 'auto'; + if (this.src === 'auto') { + console.warn( + "CaptionAsset(src='auto'): the video must be indexed " + + '(e.g. video.indexSpokenWords()) for captions to be generated.' + ); + } this.font = config.font ?? new FontStyling(); this.primaryColor = config.primaryColor ?? '&H00FFFFFF'; this.secondaryColor = config.secondaryColor ?? '&H000000FF'; diff --git a/src/core/search/searchResult.ts b/src/core/search/searchResult.ts index f49af89..804dbee 100644 --- a/src/core/search/searchResult.ts +++ b/src/core/search/searchResult.ts @@ -39,6 +39,8 @@ export class SearchResult { sceneIndexId: doc.sceneIndexId, sceneIndexName: doc.sceneIndexName, metadata: doc.metadata, + streamUrl: doc.streamLink, + playerUrl: doc.playerUrl, }) ); } diff --git a/src/core/video.ts b/src/core/video.ts index 3213555..5dd87ee 100644 --- a/src/core/video.ts +++ b/src/core/video.ts @@ -236,11 +236,12 @@ export class Video implements IVideo { * @returns Success status or transcript data */ public generateTranscript = async ( - force: boolean = false + force: boolean = false, + languageCode?: string ): Promise<{ success: boolean; message: string } | Transcript> => { const res = await this.#vhttp.post( [video, this.id, transcription], - { force } + { force, language_code: languageCode } ); const transcript = res.data?.wordTimestamps; @@ -818,8 +819,8 @@ export class Video implements IVideo { */ public clip = async ( prompt: string, - contentType: string, - modelName: string + contentType: 'spoken' | 'visual' | 'multimodal', + modelName: 'basic' | 'pro' | 'ultra' ): Promise => { type ClipResponse = { results: Array<{ diff --git a/src/types/response.ts b/src/types/response.ts index 68f81cd..3a11eb1 100644 --- a/src/types/response.ts +++ b/src/types/response.ts @@ -143,7 +143,8 @@ export type SearchResponse = { end: number; score: number; start: number; - stream_url: string; + stream_link?: string; + player_url?: string; text: string; scene_index_id?: string; scene_index_name?: string; From 9740fc0c59808af86f5a0c03693dd8a89fd9d552 Mon Sep 17 00:00:00 2001 From: Lalit Gupta Date: Wed, 18 Mar 2026 11:02:02 +0530 Subject: [PATCH 4/8] fix: remove record param from channel config and toDict --- src/capture/captureClient.ts | 5 ++--- src/capture/channel.ts | 2 -- src/capture/index.ts | 4 ++-- src/capture/types.ts | 2 -- src/types/capture.ts | 2 -- 5 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/capture/captureClient.ts b/src/capture/captureClient.ts index f73221d..7f72675 100644 --- a/src/capture/captureClient.ts +++ b/src/capture/captureClient.ts @@ -39,8 +39,8 @@ import { * await client.startSession({ * sessionId: 'ss-xxx', // Required: from CaptureSession.id * channels: [ - * { channelId: 'mic:default', type: 'audio', record: true, store: true }, - * { channelId: 'display:1', type: 'video', record: true, store: true }, + * { channelId: 'mic:default', type: 'audio', store: true }, + * { channelId: 'display:1', type: 'video', store: true }, * ], * }); * @@ -236,7 +236,6 @@ export class CaptureClient extends EventEmitter implements ChannelClient { channel_id: (channel as { channel_id?: string }).channel_id ?? channel.channelId, type: channel.type, - record: channel.record ?? true, store: channel.store ?? true, is_primary: channel.isPrimary ?? false, })); diff --git a/src/capture/channel.ts b/src/capture/channel.ts index 6bc7cab..7ccc2dd 100644 --- a/src/capture/channel.ts +++ b/src/capture/channel.ts @@ -86,7 +86,6 @@ export class Channel { channel_id: string; type: string; name: string; - record: boolean; store: boolean; is_primary: boolean; } { @@ -94,7 +93,6 @@ export class Channel { channel_id: this.id, type: this.type, name: this.name, - record: true, store: this.store, is_primary: this.isPrimary, }; diff --git a/src/capture/index.ts b/src/capture/index.ts index 79c3baa..0b4a93f 100644 --- a/src/capture/index.ts +++ b/src/capture/index.ts @@ -27,8 +27,8 @@ * await client.startSession({ * sessionId: 'ss-xxx', // Required: from CaptureSession.id * channels: [ - * { channelId: 'mic:default', type: 'audio', record: true, store: true }, - * { channelId: 'display:1', type: 'video', record: true, store: true }, + * { channelId: 'mic:default', type: 'audio', store: true }, + * { channelId: 'display:1', type: 'video', store: true }, * ], * }); * diff --git a/src/capture/types.ts b/src/capture/types.ts index f1659e8..8a2c458 100644 --- a/src/capture/types.ts +++ b/src/capture/types.ts @@ -57,8 +57,6 @@ export interface RecordingChannelConfig { channelId: string; /** Channel type */ type: 'audio' | 'video'; - /** Whether to record this channel */ - record?: boolean; /** Whether to store the recording */ store?: boolean; /** Whether this is the primary video channel */ diff --git a/src/types/capture.ts b/src/types/capture.ts index fa64453..e34f50a 100644 --- a/src/types/capture.ts +++ b/src/types/capture.ts @@ -64,8 +64,6 @@ export interface ChannelConfig { channelId: string; /** Type of the channel */ type: ChannelTypeValue; - /** Whether to record this channel */ - record?: boolean; /** Whether to store the recorded content */ store?: boolean; /** Whether this is the primary video channel */ From f8890eccbc253919fbf7aedf6dd627d7ff5e388e Mon Sep 17 00:00:00 2001 From: Lalit Gupta Date: Wed, 18 Mar 2026 11:40:40 +0530 Subject: [PATCH 5/8] chore: remove test quickstart file --- test/index.ts | 243 -------------------------------------------------- 1 file changed, 243 deletions(-) delete mode 100644 test/index.ts diff --git a/test/index.ts b/test/index.ts deleted file mode 100644 index 5623ec4..0000000 --- a/test/index.ts +++ /dev/null @@ -1,243 +0,0 @@ -import 'dotenv/config'; -import { - connect, - WebSocketChannel, -} from 'videodb'; -import type { RecordingChannelConfig } from 'videodb/capture'; - -import { CaptureClient } from 'videodb/capture'; - -const API_KEY = process.env.VIDEODB_API_KEY; -const COLLECTION_ID = process.env.VIDEODB_COLLECTION_ID || 'default'; - -if (!API_KEY) { - throw new Error('VIDEODB_API_KEY is required. Set it in your .env file.'); -} - -async function main() { - console.log('============================================================'); - console.log('VideoDB Capture - Node.js Quickstart'); - console.log('============================================================\n'); - - // --- Connect --- - console.log('Connecting to VideoDB...'); - const conn = connect({ apiKey: API_KEY }); - const coll = await conn.getCollection(COLLECTION_ID); - console.log(`Using collection: ${coll.id}`); - - // --- WebSocket --- - console.log('Connecting WebSocket...'); - const ws = await conn.connectWebsocket(COLLECTION_ID); - await ws.connect(); - console.log(`WebSocket connected: ${ws.connectionId}`); - - // --- Session --- - console.log('Creating capture session...'); - const session = await coll.createCaptureSession({ - endUserId: 'quickstart-user', - wsConnectionId: ws.connectionId, - metadata: { app: 'node-quickstart' }, - }); - console.log(`Session created: ${session.id}`); - - const token = await conn.generateClientToken(3600); - console.log('Client token generated'); - - // --- Capture Client --- - const client = new CaptureClient({ sessionToken: token }); - - console.log('\nRequesting permissions...'); - await client.requestPermission('microphone'); - await client.requestPermission('screen-capture'); - - console.log('Discovering channels...'); - const channels = await client.listChannels(); - for (const ch of channels.all()) { - console.log(` - ${ch.id} (${ch.type}): ${ch.name}`); - } - - const micChannel = channels.mics.default; - const displayChannel = channels.displays.default; - const systemAudioChannel = channels.systemAudio.default; - - // record: true enables recording, store: true persists to VideoDB after capture stops. - const captureChannels: RecordingChannelConfig[] = []; - if (micChannel) { - captureChannels.push({ - channelId: micChannel.id, - type: 'audio', - record: true, - store: true, - }); - } - if (displayChannel) { - captureChannels.push({ - channelId: displayChannel.id, - type: 'video', - record: true, - store: true, - }); - } - if (systemAudioChannel) { - captureChannels.push({ - channelId: systemAudioChannel.id, - type: 'audio', - record: true, - store: true, - }); - } - - if (captureChannels.length === 0) { - console.log('No channels found.'); - return; - } - - console.log( - `\nStarting capture with ${captureChannels.length} channel(s):` - ); - for (const ch of captureChannels) { - console.log(` - ${ch.channelId}`); - } - - await client.startSession({ - sessionId: session.id, - channels: captureChannels, - }); - console.log('Capture started!'); - - // --- AI Pipelines --- - console.log('\nWaiting for session to become active...'); - await new Promise((resolve) => setTimeout(resolve, 3000)); - await session.refresh(); - console.log(`Session status: ${session.status}`); - - const audioStreams = session.rtstreams.filter((rts) => - rts.mediaTypes?.includes('audio') - ); - const videoStream = session.rtstreams.find((rts) => - rts.mediaTypes?.includes('video') - ); - - // Start AI on audio streams (mic + system audio) - for (const stream of audioStreams) { - console.log(` Starting audio indexing on: ${stream.id}`); - try { - await stream.indexAudio({ - prompt: 'Summarize what is being discussed', - batchConfig: { type: 'time', value: 30 }, - socketId: ws.connectionId, - }); - console.log(` Audio indexing started: ${stream.id}`); - } catch (e) { - console.error(` Failed to start audio indexing on ${stream.id}:`, e); - } - } - - // Start AI on video stream - if (videoStream) { - console.log(` Starting visual indexing on: ${videoStream.id}`); - try { - await videoStream.indexVisuals({ - prompt: 'In one sentence, describe what is on screen', - batchConfig: { type: 'time', value: 3, frameCount: 3 }, - socketId: ws.connectionId, - }); - console.log(` Visual indexing started: ${videoStream.id}`); - } catch (e) { - console.error(' Failed to start visual indexing:', e); - } - } - - // --- Real-time Events --- - console.log('\n============================================================'); - console.log('Recording... Press Enter to stop (or Ctrl+C to force quit)'); - console.log('============================================================\n'); - - let isShuttingDown = false; - const shutdown = async () => { - if (isShuttingDown) return; - isShuttingDown = true; - - console.log('\nStopping capture...'); - try { - await client.stopSession(); - } catch {} - try { - await client.shutdown(); - } catch {} - try { - await ws.close(); - } catch {} - - console.log('Capture stopped.'); - console.log('\n============================================================'); - console.log("What's next?"); - console.log( - ' - Try different indexAudio() prompts for richer insights' - ); - console.log(' - Build alerts with sceneIndex.createAlert()'); - console.log(' - Explore the full SDK: https://docs.videodb.io'); - console.log('============================================================'); - process.exit(0); - }; - - // Listen for Enter key to stop gracefully - process.stdin.setRawMode?.(false); - process.stdin.resume(); - process.stdin.once('data', () => shutdown()); - - process.on('SIGINT', shutdown); - process.on('SIGTERM', shutdown); - - try { - for await (const msg of ws.receive()) { - if (isShuttingDown) break; - - const channel = (msg.channel || msg.type || 'event') as string; - const data = (msg.data || {}) as Record; - // Extract short source label from rtstream_name (e.g., "Capture mic - cap-83e6" -> "mic") - const rawSource = (msg.rtstream_name || '') as string; - const sourceMatch = rawSource.match(/Capture (\w+)/); - const label = sourceMatch ? `:${sourceMatch[1]}` : (rawSource ? `:${rawSource}` : ''); - - if (channel === WebSocketChannel.transcript) { - const text = data.text || msg.text; - if (text) console.log(`[Transcript${label}] ${text}`); - } else if (channel === WebSocketChannel.spokenIndex) { - const text = (data.text || msg.text) as string; - if (text?.trim()) { - console.log(`\n${'*'.repeat(50)}`); - console.log(`[Audio Index${label}] ${text}`); - console.log('*'.repeat(50)); - } - } else if (channel === WebSocketChannel.sceneIndex) { - const text = (data.text || msg.text) as string; - if (text?.trim()) { - console.log(`\n${'*'.repeat(50)}`); - console.log(`[Visual Index${label}] ${text}`); - console.log('*'.repeat(50)); - } - } else if (channel === WebSocketChannel.captureSession) { - const status = data.status as string; - console.log(`\n[Session] ${status}`); - } else if (channel === WebSocketChannel.alert) { - const text = (data.text || msg.text) as string; - if (text) console.log(`\n[Alert${label}] ${text}`); - } - } - } catch (e) { - if (!isShuttingDown) { - console.error('WebSocket error:', e); - } - } - - if (!isShuttingDown) { - console.log('WebSocket connection closed unexpectedly'); - await shutdown(); - } -} - -main().catch((e) => { - console.error('Fatal error:', e); - process.exit(1); -}); From f69d483e6b69091da671309d5064e797d460f7ab Mon Sep 17 00:00:00 2001 From: Lalit Gupta Date: Wed, 18 Mar 2026 12:07:17 +0530 Subject: [PATCH 6/8] fix: suppress verbose binary info/debug logs from console output Only surface warnings and errors from the capture binary stderr. Matches Python SDK behavior (logs binary stderr at DEBUG level). --- src/capture/binaryManager.ts | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/capture/binaryManager.ts b/src/capture/binaryManager.ts index 5612b7f..4259d97 100644 --- a/src/capture/binaryManager.ts +++ b/src/capture/binaryManager.ts @@ -189,7 +189,10 @@ export class BinaryManager extends EventEmitter { stderrRl.on('line', (line: string) => { this.appendError(line); - console.error(`[Capture Binary]: ${line}`); + // Only surface warnings and errors; suppress info/debug noise + if (!line.includes('[info]') && !line.includes('[debug]')) { + console.error(`[Capture Binary]: ${line}`); + } }); // Handle stdout (protocol messages) @@ -207,9 +210,11 @@ export class BinaryManager extends EventEmitter { } catch (e) { console.error('Failed to parse protocol message:', line, e); } - } else { - // Non-protocol output (debug logs from binary) - console.log(`[Capture Binary Debug]: ${line}`); + } else if (line.trim()) { + // Non-protocol output — only show if not info/debug noise + if (!line.includes('[info]') && !line.includes('[debug]')) { + console.log(`[Capture Binary]: ${line}`); + } } }); From 0ba5d9dd8f70682526112e6c489572413bd06c6b Mon Sep 17 00:00:00 2001 From: Lalit Gupta Date: Wed, 18 Mar 2026 13:17:58 +0530 Subject: [PATCH 7/8] chore: update capture binary to v0.2.10, revert log filtering, add changelog - Update binary version to 0.2.10 with new checksums (installer.ts + package.json) - Revert verbose log filtering in binaryManager.ts - Add CHANGELOG entry for 0.2.3 --- CHANGELOG.md | 17 +++++++++++++++++ package-lock.json | 4 ++-- package.json | 8 ++++---- src/capture/binaryManager.ts | 13 ++++--------- src/capture/installer.ts | 8 ++++---- 5 files changed, 31 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db747b4..59e3f9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # Changelog +## [0.2.3] (2026-03-18) + +### Changed + +- Synced channel config with Python SDK: removed `transcript` field, added `isPrimary` to `RecordingChannelConfig`, `ChannelConfig`, and `Channel` +- Removed `record` from `Channel.toDict()` (always sent as `true` server-side) +- Updated capture binary to v0.2.10 + +### Fixed + +- Clip literal types for `contentType` parameter +- Shot URLs from search results +- Caption style warning on missing fields +- `generateTranscript` language code parameter + +--- + ## [0.2.2] (2026-03-10) ### Added diff --git a/package-lock.json b/package-lock.json index e5d9457..a35a17d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "videodb", - "version": "0.2.0", + "version": "0.2.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "videodb", - "version": "0.2.0", + "version": "0.2.3", "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { diff --git a/package.json b/package.json index b4fb442..de29d26 100644 --- a/package.json +++ b/package.json @@ -40,11 +40,11 @@ }, "binaryConfig": { "baseUrl": "https://artifacts.videodb.io/capture", - "version": "0.2.9", + "version": "0.2.10", "checksums": { - "darwin-arm64": "50ed352dc820287d720c28da8bdba0029cd0aeeb035c84f64487ff35edfaca86", - "darwin-x64": "3716f0f6bf12fc253448f90678628a0d829cd32646928e7cd84f5e24f19796d9", - "win32-x64": "dc01f195f25580edeb590b183753ceea9bab439cb36344805655a0e9a017175c" + "darwin-arm64": "fc4be7de94153aa9f492b014db7b4f7378e45c3c6f1b5f3f838c2c007bde832f", + "darwin-x64": "bdfc3aa33a961ff532a99639ea95c181d51baee74a1eda555598ce45c30908ac", + "win32-x64": "3f9b9a355edc54dd06cef051b0ec7ed55df6beef6eb9e299fa6ba5f02ba3a50a" } }, "repository": { diff --git a/src/capture/binaryManager.ts b/src/capture/binaryManager.ts index 4259d97..5612b7f 100644 --- a/src/capture/binaryManager.ts +++ b/src/capture/binaryManager.ts @@ -189,10 +189,7 @@ export class BinaryManager extends EventEmitter { stderrRl.on('line', (line: string) => { this.appendError(line); - // Only surface warnings and errors; suppress info/debug noise - if (!line.includes('[info]') && !line.includes('[debug]')) { - console.error(`[Capture Binary]: ${line}`); - } + console.error(`[Capture Binary]: ${line}`); }); // Handle stdout (protocol messages) @@ -210,11 +207,9 @@ export class BinaryManager extends EventEmitter { } catch (e) { console.error('Failed to parse protocol message:', line, e); } - } else if (line.trim()) { - // Non-protocol output — only show if not info/debug noise - if (!line.includes('[info]') && !line.includes('[debug]')) { - console.log(`[Capture Binary]: ${line}`); - } + } else { + // Non-protocol output (debug logs from binary) + console.log(`[Capture Binary Debug]: ${line}`); } }); diff --git a/src/capture/installer.ts b/src/capture/installer.ts index a1bcef6..367d406 100644 --- a/src/capture/installer.ts +++ b/src/capture/installer.ts @@ -37,11 +37,11 @@ export class RecorderInstaller { // Default binary config - can be overridden or loaded from package.json this.binaryConfig = binaryConfig || { baseUrl: 'https://artifacts.videodb.io/capture', - version: '0.2.9', + version: '0.2.10', checksums: { - 'darwin-x64': '3716f0f6bf12fc253448f90678628a0d829cd32646928e7cd84f5e24f19796d9', - 'darwin-arm64': '50ed352dc820287d720c28da8bdba0029cd0aeeb035c84f64487ff35edfaca86', - 'win32-x64': 'dc01f195f25580edeb590b183753ceea9bab439cb36344805655a0e9a017175c', + 'darwin-x64': 'bdfc3aa33a961ff532a99639ea95c181d51baee74a1eda555598ce45c30908ac', + 'darwin-arm64': 'fc4be7de94153aa9f492b014db7b4f7378e45c3c6f1b5f3f838c2c007bde832f', + 'win32-x64': '3f9b9a355edc54dd06cef051b0ec7ed55df6beef6eb9e299fa6ba5f02ba3a50a', }, }; From 97c530d44bc98f0ffb0c912b165ba92e3cdccce0 Mon Sep 17 00:00:00 2001 From: Lalit Gupta Date: Wed, 18 Mar 2026 21:32:56 +0530 Subject: [PATCH 8/8] feat: separate camera channels from displays in Channels grouping Channels with `camera:` prefix now go to `channels.cameras` instead of polluting `channels.displays`. Cameras are excluded from `all()` since camera capture is not yet supported. --- src/capture/channel.ts | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/capture/channel.ts b/src/capture/channel.ts index 7ccc2dd..d97831c 100644 --- a/src/capture/channel.ts +++ b/src/capture/channel.ts @@ -151,19 +151,23 @@ export class Channels { public displays: ChannelList; /** System audio channels */ public systemAudio: ChannelList; + /** Camera channels (not yet supported for capture) */ + public cameras: ChannelList; constructor( mics: AudioChannel[] = [], displays: VideoChannel[] = [], - systemAudio: AudioChannel[] = [] + systemAudio: AudioChannel[] = [], + cameras: VideoChannel[] = [] ) { this.mics = new ChannelList(...mics); this.displays = new ChannelList(...displays); this.systemAudio = new ChannelList(...systemAudio); + this.cameras = new ChannelList(...cameras); } /** - * Return a flat list of all channels + * Return a flat list of all capturable channels (excludes cameras) */ public all(): Channel[] { return [ @@ -174,7 +178,7 @@ export class Channels { } toString(): string { - return `Channels(mics=${this.mics.length}, displays=${this.displays.length}, systemAudio=${this.systemAudio.length})`; + return `Channels(mics=${this.mics.length}, displays=${this.displays.length}, systemAudio=${this.systemAudio.length}, cameras=${this.cameras.length})`; } } @@ -210,6 +214,7 @@ export function groupChannels( const mics: AudioChannel[] = []; const displays: VideoChannel[] = []; const systemAudio: AudioChannel[] = []; + const cameras: VideoChannel[] = []; for (const ch of channels) { const channelId = ch.channelId; @@ -223,6 +228,8 @@ export function groupChannels( displays.push(new VideoChannel(ch, client)); } else if (channelId.startsWith('system_audio:')) { systemAudio.push(new AudioChannel(ch, client)); + } else if (channelId.startsWith('camera:')) { + cameras.push(new VideoChannel(ch, client)); } else if (ch.type === 'audio') { // Fallback for unknown audio channels mics.push(new AudioChannel(ch, client)); @@ -232,5 +239,5 @@ export function groupChannels( } } - return new Channels(mics, displays, systemAudio); + return new Channels(mics, displays, systemAudio, cameras); }