From a72ffc94febc09794be33d1ac2f07d0d8b4dd7e8 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 5 May 2026 15:30:15 +0000 Subject: [PATCH] Sync types and TEE retry logic with Python SDK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Brings the TS SDK's LLM response surface in line with the Python SDK. - TextGenerationOutput: replace `transactionHash` with the Python field set — `dataSettlementTransactionHash`, `dataSettlementBlobId`, `chatOutput`, `completionOutput`, `usage`, `paymentHash`, `teeSignature`, `teeTimestamp`, `teeId`, `teeEndpoint`, `teePaymentAddress`, `finishReason`. - StreamChunk: add TEE attestation and data settlement fields populated on the final chunk; rename `is_final` to `isFinal`. SSE parser now handles servers that send the non-streaming `message` key in a delta event (mirrors Python's `from_sse_data`). - X402SettlementMode: align enum values with the wire protocol used by the Python SDK (`private` / `batch` / `individual`). - TEE_LLM: refresh the model list (GPT-5 family, Claude Sonnet/Haiku/ Opus 4.5+, Gemini 3, Grok 4 family, ByteDance Seed). - Add `ResponseFormat` type with json_object/json_schema enforcement (and the Anthropic + json_object guard from the Python SDK). - llm.ts: read x-settlement-tx-hash and x-settlement-walrus-blob-id response headers; flatten Anthropic content-block arrays; fall back to non-streaming for tool-call requests and emit a single final StreamChunk; only retry on connection-level failures (preserve the HTTP-status no-retry rule). - teeConnection.ts: dedupe concurrent reconnect() calls and guard the refresh loop after close. --- examples/llm_chat.ts | 12 +- examples/llm_chat_stream.ts | 2 +- src/__tests__/client.test.ts | 12 +- src/index.ts | 3 +- src/llm.ts | 236 +++++++++++++++++++++++++++++------ src/teeConnection.ts | 25 +++- src/types.ts | 176 +++++++++++++++++++++----- 7 files changed, 383 insertions(+), 83 deletions(-) diff --git a/examples/llm_chat.ts b/examples/llm_chat.ts index dedca6e..e70e51c 100644 --- a/examples/llm_chat.ts +++ b/examples/llm_chat.ts @@ -22,11 +22,19 @@ async function main() { const result = await client.llm.chat({ model: TEE_LLM.GPT_4_1_2025_04_14, messages, - x402SettlementMode: X402SettlementMode.SETTLE_METADATA, + x402SettlementMode: X402SettlementMode.INDIVIDUAL_FULL, }); console.log(`Response: ${result.chatOutput?.content}`); - console.log(`Payment hash: ${result.transactionHash}`); + console.log(`Payment hash: ${result.paymentHash ?? "(none)"}`); + if (result.dataSettlementTransactionHash) { + console.log( + `Data settlement tx: ${result.dataSettlementTransactionHash}`, + ); + } + if (result.teeSignature) { + console.log(`TEE signature: ${result.teeSignature.slice(0, 16)}…`); + } } main().catch((err) => { diff --git a/examples/llm_chat_stream.ts b/examples/llm_chat_stream.ts index 71e1685..e70f4a3 100644 --- a/examples/llm_chat_stream.ts +++ b/examples/llm_chat_stream.ts @@ -20,7 +20,7 @@ async function main() { const stream = client.llm.chat({ model: TEE_LLM.GPT_4_1_2025_04_14, messages, - x402SettlementMode: X402SettlementMode.SETTLE_METADATA, + x402SettlementMode: X402SettlementMode.INDIVIDUAL_FULL, stream: true, maxTokens: 1000, }); diff --git a/src/__tests__/client.test.ts b/src/__tests__/client.test.ts index 8c36b66..6e40192 100644 --- a/src/__tests__/client.test.ts +++ b/src/__tests__/client.test.ts @@ -23,14 +23,16 @@ describe("Client construction", () => { describe("Public exports", () => { it("exposes TEE_LLM models with provider/model format", () => { - expect(TEE_LLM.CLAUDE_3_5_HAIKU).toBe("anthropic/claude-3.5-haiku"); - expect(TEE_LLM.GPT_4O).toBe("openai/gpt-4o"); + expect(TEE_LLM.CLAUDE_HAIKU_4_5).toBe("anthropic/claude-haiku-4-5"); + expect(TEE_LLM.GPT_5).toBe("openai/gpt-5"); + expect(TEE_LLM.GEMINI_3_FLASH).toBe("google/gemini-3-flash-preview"); + expect(TEE_LLM.GROK_4).toBe("x-ai/grok-4"); }); it("exposes X402SettlementMode values matching the wire protocol", () => { - expect(X402SettlementMode.SETTLE).toBe("settle"); - expect(X402SettlementMode.SETTLE_BATCH).toBe("settle-batch"); - expect(X402SettlementMode.SETTLE_METADATA).toBe("settle-metadata"); + expect(X402SettlementMode.PRIVATE).toBe("private"); + expect(X402SettlementMode.BATCH_HASHED).toBe("batch"); + expect(X402SettlementMode.INDIVIDUAL_FULL).toBe("individual"); }); it("defaults the settlement network to base", () => { diff --git a/src/index.ts b/src/index.ts index 1955ba4..4986103 100644 --- a/src/index.ts +++ b/src/index.ts @@ -8,11 +8,12 @@ export type { ChatParams, ClientConfig, CompletionParams, + ResponseFormat, StreamChoice, StreamChunk, StreamDelta, - StreamUsage, TextGenerationOutput, + TokenUsage, Tool, ToolFunction, } from "./types"; diff --git a/src/llm.ts b/src/llm.ts index c9ad1eb..bcea80f 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -6,11 +6,14 @@ import { privateKeyToAccount } from "viem/accounts"; import type { Agent } from "undici"; import { ChatParams, + ChatMessage, CompletionParams, OpenGradientError, + ResponseFormat, StreamChoice, StreamChunk, TextGenerationOutput, + TokenUsage, X402SettlementMode, } from "./types"; import type { ActiveTEE, TEEConnection } from "./teeConnection"; @@ -18,6 +21,11 @@ import type { ActiveTEE, TEEConnection } from "./teeConnection"; const X402_PLACEHOLDER_API_KEY = "0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"; const X402_PROCESSING_HASH_HEADER = "x-processing-hash"; +const X402_DATA_SETTLEMENT_TX_HASH_HEADER = "x-settlement-tx-hash"; +const X402_DATA_SETTLEMENT_BLOB_ID_HEADER = "x-settlement-walrus-blob-id"; + +const CHAT_ENDPOINT = "/v1/chat/completions"; +const COMPLETION_ENDPOINT = "/v1/completions"; export interface LLMConfig { privateKey: `0x${string}`; @@ -57,7 +65,7 @@ export class LLM { maxTokens = 100, stopSequence, temperature = 0.0, - x402SettlementMode = X402SettlementMode.SETTLE_BATCH, + x402SettlementMode = X402SettlementMode.BATCH_HASHED, } = params; const payload: Record = { @@ -68,17 +76,27 @@ export class LLM { }; if (stopSequence && stopSequence.length) payload.stop = stopSequence; - const { response } = await this.requestWithRetry( - "/v1/completions", + const { response, tee } = await this.requestWithRetry( + COMPLETION_ENDPOINT, payload, x402SettlementMode, ); - const result = (await response.json()) as { completion?: string }; + const result = (await response.json()) as { + completion?: string; + tee_signature?: string; + tee_timestamp?: string; + }; return { - transactionHash: "external", completionOutput: result.completion, - paymentHash: response.headers.get(X402_PROCESSING_HASH_HEADER) ?? "", + paymentHash: response.headers.get(X402_PROCESSING_HASH_HEADER) ?? undefined, + dataSettlementTransactionHash: dataSettlementTxHash(response), + dataSettlementBlobId: dataSettlementBlobId(response), + teeSignature: result.tee_signature, + teeTimestamp: result.tee_timestamp, + teeId: tee.teeId, + teeEndpoint: tee.endpoint, + teePaymentAddress: tee.paymentAddress, }; } @@ -93,7 +111,23 @@ export class LLM { chat( params: ChatParams & { stream?: boolean }, ): Promise | AsyncIterable { + if (params.responseFormat?.type === "json_object") { + const provider = params.model.split("/")[0]; + if (provider === "anthropic") { + throw new OpenGradientError( + "Anthropic models do not support response_format type 'json_object'. " + + "Use { type: 'json_schema', jsonSchema: {...} } with an explicit schema instead.", + ); + } + } + if (params.stream) { + // Tool-call streaming responses from the TEE proxy omit tool call + // content from SSE events; fall back to non-streaming and emit a single + // final chunk. Mirrors `_chat_tools_as_stream` in the Python SDK. + if (params.tools && params.tools.length) { + return this.chatToolsAsStream(params); + } return this.chatStream(params); } return this.chatNonStreaming(params); @@ -103,17 +137,22 @@ export class LLM { params: ChatParams, ): Promise { const payload = this.buildChatPayload(params, false); - const { response } = await this.requestWithRetry( - "/v1/chat/completions", + const settlementMode = + params.x402SettlementMode ?? X402SettlementMode.BATCH_HASHED; + const { response, tee } = await this.requestWithRetry( + CHAT_ENDPOINT, payload, - params.x402SettlementMode ?? X402SettlementMode.SETTLE_BATCH, + settlementMode, ); const result = (await response.json()) as { choices?: Array<{ - message?: any; + message?: ChatMessage; finish_reason?: string; }>; + usage?: TokenUsage; + tee_signature?: string; + tee_timestamp?: string; }; const choices = result.choices; @@ -123,21 +162,64 @@ export class LLM { ); } + const message = choices[0].message ?? { role: "assistant" }; + // Some providers (Anthropic via the proxy) return content as an array of + // typed blocks; flatten to a plain string for parity with Python. + if (Array.isArray((message as any).content)) { + message.content = ((message as any).content as any[]) + .filter((b) => b && typeof b === "object" && b.type === "text") + .map((b) => b.text ?? "") + .join(" ") + .trim(); + } + return { - transactionHash: "external", finishReason: choices[0].finish_reason, - chatOutput: choices[0].message, - paymentHash: response.headers.get(X402_PROCESSING_HASH_HEADER) ?? "", + chatOutput: message, + usage: result.usage, + paymentHash: response.headers.get(X402_PROCESSING_HASH_HEADER) ?? undefined, + dataSettlementTransactionHash: dataSettlementTxHash(response), + dataSettlementBlobId: dataSettlementBlobId(response), + teeSignature: result.tee_signature, + teeTimestamp: result.tee_timestamp, + teeId: tee.teeId, + teeEndpoint: tee.endpoint, + teePaymentAddress: tee.paymentAddress, }; } private async *chatStream(params: ChatParams): AsyncIterable { const payload = this.buildChatPayload(params, true); - const { response } = await this.requestWithRetry( - "/v1/chat/completions", - payload, - params.x402SettlementMode ?? X402SettlementMode.SETTLE_BATCH, - ); + const settlementMode = + params.x402SettlementMode ?? X402SettlementMode.BATCH_HASHED; + + let response: Response; + let tee: ActiveTEE; + try { + ({ response, tee } = await this.sendOnce( + CHAT_ENDPOINT, + payload, + settlementMode, + )); + } catch (e) { + if (e instanceof OpenGradientError && e.statusCode !== undefined) { + // Server responded with a non-2xx — don't retry. + throw e; + } + // Connection-level failure during stream setup: re-resolve and retry once. + try { + await this.config.connection.reconnect(); + } catch (reconnectErr) { + throw new OpenGradientError( + `TEE LLM stream failed and registry refresh failed: ${String(reconnectErr)}`, + ); + } + ({ response, tee } = await this.sendOnce( + CHAT_ENDPOINT, + payload, + settlementMode, + )); + } if (!response.body) { throw new OpenGradientError("TEE LLM chat stream returned empty body"); @@ -146,6 +228,7 @@ export class LLM { const reader = response.body.getReader(); const decoder = new TextDecoder(); let buffer = ""; + let pendingFinal: StreamChunk | null = null; try { while (true) { @@ -159,22 +242,70 @@ export class LLM { buffer = buffer.slice(newlineIdx + 1); if (!line || !line.startsWith("data: ")) continue; - const dataStr = line.slice(6); - if (dataStr === "[DONE]") return; + const dataStr = line.slice(6).trim(); + if (dataStr === "[DONE]") { + if (pendingFinal) yield pendingFinal; + return; + } + let data: any; try { - const data = JSON.parse(dataStr); - yield parseStreamChunk(data); + data = JSON.parse(dataStr); } catch { // Skip malformed chunks + continue; + } + const chunk = parseStreamChunk(data); + if (chunk.isFinal) { + chunk.dataSettlementTransactionHash = + chunk.dataSettlementTransactionHash ?? + dataSettlementTxHash(response); + chunk.dataSettlementBlobId = + chunk.dataSettlementBlobId ?? dataSettlementBlobId(response); + chunk.teeId = tee.teeId; + chunk.teeEndpoint = tee.endpoint; + chunk.teePaymentAddress = tee.paymentAddress; + pendingFinal = chunk; + continue; } + yield chunk; } } + if (pendingFinal) yield pendingFinal; } finally { reader.releaseLock(); } } + private async *chatToolsAsStream( + params: ChatParams, + ): AsyncIterable { + const result = await this.chatNonStreaming(params); + const chatOutput = result.chatOutput ?? { role: "assistant" }; + yield { + choices: [ + { + delta: { + role: chatOutput.role, + content: chatOutput.content ?? undefined, + tool_calls: chatOutput.tool_calls, + }, + index: 0, + finish_reason: result.finishReason ?? null, + }, + ], + model: stripProvider(params.model), + isFinal: true, + teeSignature: result.teeSignature, + teeTimestamp: result.teeTimestamp, + teeId: result.teeId, + teeEndpoint: result.teeEndpoint, + teePaymentAddress: result.teePaymentAddress, + dataSettlementTransactionHash: result.dataSettlementTransactionHash, + dataSettlementBlobId: result.dataSettlementBlobId, + }; + } + private buildChatPayload( params: ChatParams, stream: boolean, @@ -187,6 +318,7 @@ export class LLM { temperature = 0.0, tools, toolChoice, + responseFormat, } = params; const payload: Record = { @@ -201,6 +333,9 @@ export class LLM { payload.tools = tools; payload.tool_choice = toolChoice ?? "auto"; } + if (responseFormat) { + payload.response_format = serializeResponseFormat(responseFormat); + } return payload; } @@ -229,7 +364,8 @@ export class LLM { /** * Send a request, lazily resolving the TEE endpoint. On a connection-level * failure the TEE is re-resolved from the registry and the request is - * retried once. + * retried once. Server-side HTTP errors (non-2xx) are not retried, matching + * the Python SDK's `_call_with_tee_retry` behavior. */ private async requestWithRetry( path: string, @@ -299,18 +435,42 @@ function trimSlash(url: string): string { return url.endsWith("/") ? url.slice(0, -1) : url; } +function dataSettlementTxHash(response: Response): string | undefined { + return response.headers.get(X402_DATA_SETTLEMENT_TX_HASH_HEADER) ?? undefined; +} + +function dataSettlementBlobId(response: Response): string | undefined { + return response.headers.get(X402_DATA_SETTLEMENT_BLOB_ID_HEADER) ?? undefined; +} + +function serializeResponseFormat(format: ResponseFormat): Record { + if (format.type === "json_schema" && !format.jsonSchema) { + throw new OpenGradientError( + "ResponseFormat.jsonSchema is required when type='json_schema'", + ); + } + const out: Record = { type: format.type }; + if (format.jsonSchema) out.json_schema = format.jsonSchema; + return out; +} + function parseStreamChunk(data: any): StreamChunk { - const choices: StreamChoice[] = (data.choices ?? []).map((c: any) => ({ - delta: { - content: c.delta?.content, - role: c.delta?.role, - tool_calls: c.delta?.tool_calls, - }, - index: c.index ?? 0, - finish_reason: c.finish_reason ?? null, - })); - - const usage = data.usage + const choices: StreamChoice[] = (data.choices ?? []).map((c: any) => { + // The TEE proxy sometimes sends SSE events using the non-streaming + // "message" key instead of the standard streaming "delta" key. + const deltaSrc = c.delta ?? c.message ?? {}; + return { + delta: { + content: deltaSrc.content, + role: deltaSrc.role, + tool_calls: deltaSrc.tool_calls, + }, + index: c.index ?? 0, + finish_reason: c.finish_reason ?? null, + }; + }); + + const usage: TokenUsage | undefined = data.usage ? { prompt_tokens: data.usage.prompt_tokens ?? 0, completion_tokens: data.usage.completion_tokens ?? 0, @@ -318,7 +478,7 @@ function parseStreamChunk(data: any): StreamChunk { } : undefined; - const is_final = + const isFinal = choices.some( (c) => c.finish_reason !== null && c.finish_reason !== undefined, ) || !!usage; @@ -327,6 +487,10 @@ function parseStreamChunk(data: any): StreamChunk { choices, model: data.model ?? "unknown", usage, - is_final, + isFinal, + teeSignature: data.tee_signature, + teeTimestamp: data.tee_timestamp, + dataSettlementTransactionHash: data.data_settlement_transaction_hash, + dataSettlementBlobId: data.data_settlement_blob_id, }; } diff --git a/src/teeConnection.ts b/src/teeConnection.ts index 2cf93d4..fee9bb1 100644 --- a/src/teeConnection.ts +++ b/src/teeConnection.ts @@ -58,6 +58,7 @@ export interface TEEConnection { close(): Promise; } +/** Re-resolve TEE from the registry every 5 minutes. */ const REFRESH_INTERVAL_MS = 5 * 60 * 1000; /** @@ -114,8 +115,10 @@ export class StaticTEEConnection implements TEEConnection { */ export class RegistryTEEConnection implements TEEConnection { private active: ActiveTEE | null = null; + /** In-flight connect promise, used to dedupe concurrent resolves. */ private connecting: Promise | null = null; private refreshTimer: NodeJS.Timeout | null = null; + private closed = false; constructor(private readonly registry: TEERegistry) {} @@ -131,17 +134,26 @@ export class RegistryTEEConnection implements TEEConnection { } async reconnect(): Promise { + if (this.closed) return; + // Coalesce concurrent reconnect attempts onto a single resolution. + if (!this.connecting) this.connecting = this.connect(); const old = this.active?.dispatcher; - this.active = await this.connect(); try { - await old?.close(); - } catch { - /* ignore */ + this.active = await this.connecting; + } finally { + this.connecting = null; + } + if (old && old !== this.active.dispatcher) { + try { + await old.close(); + } catch { + /* ignore */ + } } } ensureRefreshLoop(): void { - if (this.refreshTimer) return; + if (this.refreshTimer || this.closed) return; this.refreshTimer = setInterval(() => { void this.runHealthCheck(); }, REFRESH_INTERVAL_MS); @@ -151,6 +163,7 @@ export class RegistryTEEConnection implements TEEConnection { } async close(): Promise { + this.closed = true; if (this.refreshTimer) { clearInterval(this.refreshTimer); this.refreshTimer = null; @@ -183,7 +196,7 @@ export class RegistryTEEConnection implements TEEConnection { } private async runHealthCheck(): Promise { - if (!this.active) return; + if (!this.active || this.closed) return; try { const tees = await this.registry.getActiveTEEsByType(TEE_TYPE_LLM_PROXY); if (tees.some((t) => t.teeId === this.active!.teeId)) return; diff --git a/src/types.ts b/src/types.ts index c5bbd12..b6d03f8 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,16 +1,25 @@ /** * Settlement modes for x402 payment protocol transactions. * - * Controls how inference data is recorded on-chain for payment settlement. + * These modes control how inference data is recorded on-chain for payment + * settlement and auditability. Each mode offers different trade-offs between + * data completeness, privacy, and transaction costs. * - * - SETTLE: Records input/output hashes only (most privacy-preserving). - * - SETTLE_METADATA: Records full model info, complete input/output data, and metadata. - * - SETTLE_BATCH: Aggregates multiple inferences into batch hashes (most cost-efficient). + * - PRIVATE: Payment-only settlement. Only the payment is settled on-chain — + * no input or output hashes are posted. Inference data remains completely + * off-chain, ensuring maximum privacy. + * - BATCH_HASHED: Batch settlement with hashes (default). Aggregates multiple + * inferences into a single settlement transaction using a Merkle tree + * containing input hashes, output hashes, and signatures. Most cost-efficient + * for high-volume applications. + * - INDIVIDUAL_FULL: Individual settlement with full metadata. Records input + * data, output data, timestamp, and verification on-chain. Higher gas cost + * in exchange for maximum auditability. */ export enum X402SettlementMode { - SETTLE = "settle", - SETTLE_METADATA = "settle-metadata", - SETTLE_BATCH = "settle-batch", + PRIVATE = "private", + BATCH_HASHED = "batch", + INDIVIDUAL_FULL = "individual", } /** @@ -22,27 +31,50 @@ export enum X402SettlementMode { export enum TEE_LLM { // OpenAI models via TEE GPT_4_1_2025_04_14 = "openai/gpt-4.1-2025-04-14", - GPT_4O = "openai/gpt-4o", + GPT_4_1_MINI = "openai/gpt-4.1-mini", + GPT_4_1_NANO = "openai/gpt-4.1-nano", + O3 = "openai/o3", O4_MINI = "openai/o4-mini", + GPT_5 = "openai/gpt-5", + GPT_5_MINI = "openai/gpt-5-mini", + GPT_5_2 = "openai/gpt-5.2", + GPT_5_4 = "openai/gpt-5.4", + GPT_5_4_MINI = "openai/gpt-5.4-mini", + GPT_5_4_NANO = "openai/gpt-5.4-nano", + GPT_5_5 = "openai/gpt-5.5", // Anthropic models via TEE - CLAUDE_3_7_SONNET = "anthropic/claude-3.7-sonnet", - CLAUDE_3_5_HAIKU = "anthropic/claude-3.5-haiku", - CLAUDE_4_0_SONNET = "anthropic/claude-4.0-sonnet", + CLAUDE_SONNET_4_5 = "anthropic/claude-sonnet-4-5", + CLAUDE_SONNET_4_6 = "anthropic/claude-sonnet-4-6", + CLAUDE_HAIKU_4_5 = "anthropic/claude-haiku-4-5", + CLAUDE_OPUS_4_5 = "anthropic/claude-opus-4-5", + CLAUDE_OPUS_4_6 = "anthropic/claude-opus-4-6", + CLAUDE_OPUS_4_7 = "anthropic/claude-opus-4-7", // Google models via TEE + // Note: gemini-2.5-flash, gemini-2.5-pro, and gemini-2.5-flash-lite are + // scheduled for deprecation on June 17, 2026 (flash-lite: July 22, 2026). + // Use the Gemini 3 replacements below for new integrations. GEMINI_2_5_FLASH = "google/gemini-2.5-flash", GEMINI_2_5_PRO = "google/gemini-2.5-pro", - GEMINI_2_0_FLASH = "google/gemini-2.0-flash", GEMINI_2_5_FLASH_LITE = "google/gemini-2.5-flash-lite", + GEMINI_3_FLASH = "google/gemini-3-flash-preview", + GEMINI_3_1_PRO_PREVIEW = "google/gemini-3.1-pro-preview", + GEMINI_3_1_FLASH_LITE_PREVIEW = "google/gemini-3.1-flash-lite-preview", // xAI Grok models via TEE - GROK_3_MINI_BETA = "x-ai/grok-3-mini-beta", - GROK_3_BETA = "x-ai/grok-3-beta", - GROK_2_1212 = "x-ai/grok-2-1212", - GROK_2_VISION_LATEST = "x-ai/grok-2-vision-latest", - GROK_4_1_FAST = "x-ai/grok-4.1-fast", + GROK_4 = "x-ai/grok-4", + GROK_4_FAST = "x-ai/grok-4-fast", + GROK_4_1_FAST = "x-ai/grok-4-1-fast", GROK_4_1_FAST_NON_REASONING = "x-ai/grok-4-1-fast-non-reasoning", + GROK_4_20_REASONING = "x-ai/grok-4.20-reasoning", + GROK_4_20_NON_REASONING = "x-ai/grok-4.20-non-reasoning", + GROK_CODE_FAST_1 = "x-ai/grok-code-fast-1", + + // ByteDance Seed models via TEE (BytePlus ModelArk) + SEED_1_6 = "bytedance/seed-1.6", + SEED_1_8 = "bytedance/seed-1.8", + SEED_2_0_LITE = "bytedance/seed-2.0-lite", } export interface ChatMessage { @@ -50,6 +82,7 @@ export interface ChatMessage { content?: string | null; name?: string; + /** OpenAI-style tool calls. Snake-case to match the wire format. */ tool_calls?: any[]; tool_call_id?: string; } @@ -66,6 +99,24 @@ export interface Tool { function: ToolFunction; } +/** + * Controls the output format enforced by the TEE gateway. + * + * Use `type: "json_object"` to receive any valid JSON object (supported by + * OpenAI, Gemini, and Grok). Use `type: "json_schema"` with a `jsonSchema` + * definition to enforce a specific schema (supported by all providers, + * including Anthropic). + */ +export interface ResponseFormat { + type: "text" | "json_object" | "json_schema"; + /** Required when `type` is `"json_schema"`. Must contain `name` and `schema`. */ + jsonSchema?: { + name: string; + schema: Record; + strict?: boolean; + }; +} + export interface CompletionParams { model: TEE_LLM; prompt: string; @@ -83,29 +134,78 @@ export interface ChatParams { temperature?: number; tools?: Tool[]; toolChoice?: string; + responseFormat?: ResponseFormat; x402SettlementMode?: X402SettlementMode; } +/** Token usage for a single LLM response. */ +export interface TokenUsage { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; +} + /** - * Output structure for non-streaming text generation requests. + * Output from a non-streaming `chat()` or `completion()` call. + * + * For chat requests the response is in `chatOutput`; for completion requests + * it is in `completionOutput`. Only the field matching the request type will + * be populated. + * + * Every response includes a `teeSignature` and `teeTimestamp` that can be used + * to cryptographically verify the inference was performed inside a TEE + * enclave. */ export interface TextGenerationOutput { - /** Blockchain transaction hash. "external" for TEE provider responses. */ - transactionHash: string; - /** Reason for completion (e.g. 'stop', 'tool_calls'). */ + /** + * Blockchain transaction hash for the data settlement transaction. + * `undefined` when the provider does not return data settlement metadata. + */ + dataSettlementTransactionHash?: string; + /** + * Walrus blob ID for individual data settlement. `undefined` for + * private/batch settlement or when the provider does not return it. + */ + dataSettlementBlobId?: string; + /** + * Reason the model stopped generating (e.g. `"stop"`, `"tool_calls"`, + * `"error"`). Only populated for chat requests. + */ finishReason?: string; - /** Chat response message containing role, content, tool calls, etc. */ + /** + * Assistant message returned by a chat request. Contains `role`, `content`, + * and optionally `tool_calls`. + */ chatOutput?: ChatMessage; - /** Raw text output from completion-style generation. */ + /** Raw text returned by a completion request. */ completionOutput?: string; - /** x402 payment hash returned by the server. */ + /** + * Token usage for the request. Contains `prompt_tokens`, + * `completion_tokens`, and `total_tokens` when reported by the server. + */ + usage?: TokenUsage; + /** Payment hash for the x402 transaction. */ paymentHash?: string; + /** RSA-PSS signature over the response produced by the TEE enclave. */ + teeSignature?: string; + /** ISO-8601 timestamp from the TEE at signing time. */ + teeTimestamp?: string; + /** + * On-chain TEE registry ID (keccak256 of the enclave's public key) of the + * TEE that served this request. + */ + teeId?: string; + /** Endpoint URL of the TEE that served this request, as registered on-chain. */ + teeEndpoint?: string; + /** Payment address registered for the TEE that served this request. */ + teePaymentAddress?: string; } export interface StreamDelta { content?: string; role?: string; + /** OpenAI-style tool calls. Snake-case to match the wire format. */ tool_calls?: any[]; } @@ -115,20 +215,32 @@ export interface StreamChoice { finish_reason?: string | null; } -export interface StreamUsage { - prompt_tokens: number; - completion_tokens: number; - total_tokens: number; -} - /** * A single chunk in a streaming LLM response (OpenAI-style SSE format). + * + * The final chunk additionally carries TEE attestation fields and any data + * settlement metadata that arrived with the response. */ export interface StreamChunk { choices: StreamChoice[]; model: string; - usage?: StreamUsage; - is_final: boolean; + usage?: TokenUsage; + isFinal: boolean; + + /** RSA-PSS signature over the response, present on the final chunk. */ + teeSignature?: string; + /** ISO-8601 TEE timestamp at signing time, present on the final chunk. */ + teeTimestamp?: string; + /** On-chain TEE registry ID of the enclave serving the request (final chunk). */ + teeId?: string; + /** Endpoint URL of the TEE that served this request (final chunk). */ + teeEndpoint?: string; + /** Payment address registered for the TEE (final chunk). */ + teePaymentAddress?: string; + /** Transaction hash for the data settlement transaction, when available. */ + dataSettlementTransactionHash?: string; + /** Walrus blob ID for individual data settlement, when available. */ + dataSettlementBlobId?: string; } export interface ClientConfig {