From e08b40d442f270ad8eb7514c647998d4b2bc7b68 Mon Sep 17 00:00:00 2001 From: qduc Date: Thu, 4 Sep 2025 10:30:58 +0700 Subject: [PATCH 1/5] Enhance Markdown component with language detection and improve button positioning --- frontend/components/ChatV2.tsx | 2 +- frontend/components/Markdown.tsx | 51 +++++++++++++++++++------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/frontend/components/ChatV2.tsx b/frontend/components/ChatV2.tsx index 1ce8cab8..0c7e37ce 100644 --- a/frontend/components/ChatV2.tsx +++ b/frontend/components/ChatV2.tsx @@ -158,7 +158,7 @@ export function ChatV2() { onRetryLastAssistant={handleRetryLastAssistant} /> {/* Removed soft fade to keep a cleaner boundaryless look */} -
+
= ({ text, className }) => { code: function CodeRenderer(p) { const { inline, className: cls, children } = p as any; const hasLanguage = /\blanguage-/.test(cls || ""); + const match = /language-(\w+)/.exec(cls || ""); + const language = match ? match[1] : ""; const isInline = inline ?? !hasLanguage; const className = ["md-code", cls].filter(Boolean).join(" "); @@ -74,29 +76,36 @@ export const Markdown: React.FC = ({ text, className }) => { } return ( -
-
- +
+
+
+ +
-
+                
+                  {language && (
+                    
+                      {language}
+                    
+                  )}
                   {children}
                 
From c3f68dfd865f3a5eccc2c406a71fb541681fb2f9 Mon Sep 17 00:00:00 2001 From: qduc Date: Thu, 4 Sep 2025 11:31:00 +0700 Subject: [PATCH 2/5] Add tool management features to ChatV2 and MessageInput components --- frontend/components/ChatV2.tsx | 2 + frontend/components/MessageInput.tsx | 88 +++++++++++++++++++++++++--- frontend/hooks/useChatState.ts | 15 ++++- 3 files changed, 96 insertions(+), 9 deletions(-) diff --git a/frontend/components/ChatV2.tsx b/frontend/components/ChatV2.tsx index 0c7e37ce..ae97eec2 100644 --- a/frontend/components/ChatV2.tsx +++ b/frontend/components/ChatV2.tsx @@ -172,6 +172,8 @@ export function ChatV2() { useTools={state.useTools} shouldStream={state.shouldStream} onUseToolsChange={actions.setUseTools} + enabledTools={state.enabledTools} + onEnabledToolsChange={actions.setEnabledTools} onShouldStreamChange={actions.setShouldStream} model={state.model} qualityLevel={state.qualityLevel} diff --git a/frontend/components/MessageInput.tsx b/frontend/components/MessageInput.tsx index 5eff0b86..dafba6f1 100644 --- a/frontend/components/MessageInput.tsx +++ b/frontend/components/MessageInput.tsx @@ -1,4 +1,4 @@ -import { useEffect, useRef } from 'react'; +import { useEffect, useRef, useState } from 'react'; import { Send, Loader2, Gauge, Wrench, Zap } from 'lucide-react'; import type { PendingState } from '../hooks/useChatStream'; import Toggle from './ui/Toggle'; @@ -15,6 +15,8 @@ interface MessageInputProps { shouldStream: boolean; onUseToolsChange: (useTools: boolean) => void; onShouldStreamChange: (val: boolean) => void; + enabledTools?: string[]; + onEnabledToolsChange?: (list: string[]) => void; model: string; qualityLevel: QualityLevel; onQualityLevelChange: (level: QualityLevel) => void; @@ -29,12 +31,17 @@ export function MessageInput({ useTools, shouldStream, onUseToolsChange, + enabledTools = [], + onEnabledToolsChange, onShouldStreamChange, model, qualityLevel, onQualityLevelChange, }: MessageInputProps) { const inputRef = useRef(null); + const [toolsOpen, setToolsOpen] = useState(false); + const [availableTools, setAvailableTools] = useState<{ name: string; description?: string }[]>([]); + const [localSelected, setLocalSelected] = useState(enabledTools); // Auto-grow textarea up to ~200px useEffect(() => { @@ -45,6 +52,26 @@ export function MessageInput({ el.style.height = `${next}px`; }, [input]); + useEffect(() => { + setLocalSelected(enabledTools ?? []); + }, [enabledTools]); + + // Load tool specs for the selector UI + useEffect(() => { + let mounted = true; + import('../lib/chat').then(mod => { + const ToolsClient = (mod as any).ToolsClient; + if (!ToolsClient) return; + const client = new ToolsClient(); + client.getToolSpecs().then((res: any) => { + if (!mounted) return; + const tools = (res.tools || []).map((t: any) => ({ name: t.function?.name || t.name, description: t.function?.description || t.description })); + setAvailableTools(tools); + }).catch(() => setAvailableTools([])); + }).catch(() => setAvailableTools([])); + return () => { mounted = false; }; + }, []); + const handleKey = (e: React.KeyboardEvent) => { if (e.key === 'Enter' && !e.shiftKey) { @@ -89,13 +116,58 @@ export function MessageInput({ )}
- } - checked={useTools} - onChange={onUseToolsChange} - className="whitespace-nowrap" - /> +
+ + + {toolsOpen && ( +
+
Tools
+
+ {availableTools.length === 0 && ( +
No tools available
+ )} + {availableTools.map(t => { + const id = t.name; + const checked = localSelected.includes(id); + return ( + + ); + })} +
+
+ +
+
+ )} +
diff --git a/frontend/hooks/useChatState.ts b/frontend/hooks/useChatState.ts index 2d624c4f..3a609390 100644 --- a/frontend/hooks/useChatState.ts +++ b/frontend/hooks/useChatState.ts @@ -25,6 +25,8 @@ export interface ChatState { qualityLevel: QualityLevel; // System prompt for the current session systemPrompt: string; + // Per-tool enablement (list of tool names). Empty array means no explicit selection. + enabledTools: string[]; // Conversations conversations: ConversationMeta[]; @@ -54,6 +56,7 @@ export type ChatAction = | { type: 'SET_VERBOSITY'; payload: string } | { type: 'SET_QUALITY_LEVEL'; payload: QualityLevel } | { type: 'SET_SYSTEM_PROMPT'; payload: string } + | { type: 'SET_ENABLED_TOOLS'; payload: string[] } | { type: 'SET_CONVERSATION_ID'; payload: string | null } | { type: 'START_STREAMING'; payload: { abort: AbortController; userMessage: ChatMessage; assistantMessage: ChatMessage } } | { type: 'REGENERATE_START'; payload: { abort: AbortController; baseMessages: ChatMessage[]; assistantMessage: ChatMessage } } @@ -93,6 +96,7 @@ const initialState: ChatState = { verbosity: 'medium', qualityLevel: 'balanced', systemPrompt: '', + enabledTools: [], conversations: [], nextCursor: null, historyEnabled: true, @@ -145,6 +149,9 @@ function chatReducer(state: ChatState, action: ChatAction): ChatState { case 'SET_SYSTEM_PROMPT': return { ...state, systemPrompt: action.payload }; + case 'SET_ENABLED_TOOLS': + return { ...state, enabledTools: action.payload }; + case 'SET_CONVERSATION_ID': return { ...state, conversationId: action.payload }; @@ -517,7 +524,9 @@ export function useChatState() { qualityLevel: state.qualityLevel, ...(state.useTools ? { - tools: Object.values(availableTools), + tools: (state.enabledTools && state.enabledTools.length > 0) + ? Object.values(availableTools).filter(t => state.enabledTools!.includes(t.function?.name ?? '')) + : Object.values(availableTools), tool_choice: 'auto', } : {}), @@ -613,6 +622,10 @@ export function useChatState() { dispatch({ type: 'SET_SYSTEM_PROMPT', payload: prompt }); }, []), + setEnabledTools: useCallback((list: string[]) => { + dispatch({ type: 'SET_ENABLED_TOOLS', payload: list }); + }, []), + // Chat Actions sendMessage: useCallback(async () => { const input = state.input.trim(); From ae1215c939ae5a2b6e5374a52ae443a80ee40f48 Mon Sep 17 00:00:00 2001 From: qduc Date: Thu, 4 Sep 2025 11:49:14 +0700 Subject: [PATCH 3/5] Enhance tool handling by allowing simplified tool name strings from frontend and updating orchestration logic to prefer these names when available. --- backend/src/lib/iterativeOrchestrator.js | 13 ++++++++++--- backend/src/lib/openaiProxy.js | 13 +++++++++++++ frontend/hooks/useChatState.ts | 5 +++-- frontend/lib/chat/types.ts | 3 ++- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/backend/src/lib/iterativeOrchestrator.js b/backend/src/lib/iterativeOrchestrator.js index 6796b767..1f641906 100644 --- a/backend/src/lib/iterativeOrchestrator.js +++ b/backend/src/lib/iterativeOrchestrator.js @@ -71,11 +71,16 @@ function streamEvent(res, event, model) { * Make a request to the AI model */ async function callModel(messages, config, bodyParams, tools = null, providerId) { + // Determine tools to send: prefer bodyParams.tools (frontend-provided), then explicit tools arg, then full registry + const toolsToSend = (Array.isArray(bodyParams.tools) && bodyParams.tools.length) + ? bodyParams.tools + : (Array.isArray(tools) && tools.length) ? tools : null; + const requestBody = { model: bodyParams.model || config.defaultModel, messages, stream: false, - ...(tools && { tools, tool_choice: 'auto' }) + ...(toolsToSend && { tools: toolsToSend, tool_choice: 'auto' }) }; // Include reasoning controls only if supported by provider const allowReasoning = providerSupportsReasoning(config, requestBody.model); @@ -135,12 +140,14 @@ export async function handleIterativeOrchestration({ iteration++; // Stream the model response for this iteration, buffering only tool calls + // Prefer the frontend-provided tools (expanded by sanitizeIncomingBody) when present. + // Otherwise fall back to the server-side registry. + const toolsToSend = (Array.isArray(body.tools) && body.tools.length) ? body.tools : generateOpenAIToolSpecs(); const requestBody = { model: body.model || config.defaultModel, messages: conversationHistory, stream: true, - tools: generateOpenAIToolSpecs(), - tool_choice: 'auto', + ...(toolsToSend && { tools: toolsToSend, tool_choice: body.tool_choice || 'auto' }), }; // Include reasoning controls only if supported by provider if (providerSupportsReasoning(config, requestBody.model)) { diff --git a/backend/src/lib/openaiProxy.js b/backend/src/lib/openaiProxy.js index 1d8fd13a..370ea4bf 100644 --- a/backend/src/lib/openaiProxy.js +++ b/backend/src/lib/openaiProxy.js @@ -1,4 +1,5 @@ import { config } from '../env.js'; +import { generateOpenAIToolSpecs } from './tools.js'; import { handleUnifiedToolOrchestration } from './unifiedToolOrchestrator.js'; import { handleIterativeOrchestration } from './iterativeOrchestrator.js'; import { handleRegularStreaming } from './streamingHandler.js'; @@ -38,6 +39,18 @@ function sanitizeIncomingBody(bodyIn, _cfg) { delete body.system_prompt; // Default model // Default model is resolved later (may come from DB) + + // Allow a simplified tools representation from frontend: an array of tool names (strings). + // Expand into full OpenAI-compatible tool specs using server-side registry. + try { + if (Array.isArray(bodyIn.tools) && bodyIn.tools.length > 0 && typeof bodyIn.tools[0] === 'string') { + const allSpecs = generateOpenAIToolSpecs(); + const selected = allSpecs.filter(s => bodyIn.tools.includes(s.function?.name)); + body.tools = selected; + } + } catch (e) { + // ignore expansion errors and let downstream validation handle unexpected shapes + } return body; } diff --git a/frontend/hooks/useChatState.ts b/frontend/hooks/useChatState.ts index 3a609390..ceaafe98 100644 --- a/frontend/hooks/useChatState.ts +++ b/frontend/hooks/useChatState.ts @@ -524,9 +524,10 @@ export function useChatState() { qualityLevel: state.qualityLevel, ...(state.useTools ? { + // Send a simplified list of tool names to the backend. Backend will map names -> specs. tools: (state.enabledTools && state.enabledTools.length > 0) - ? Object.values(availableTools).filter(t => state.enabledTools!.includes(t.function?.name ?? '')) - : Object.values(availableTools), + ? state.enabledTools + : Object.keys(availableTools), tool_choice: 'auto', } : {}), diff --git a/frontend/lib/chat/types.ts b/frontend/lib/chat/types.ts index f031212e..796db74c 100644 --- a/frontend/lib/chat/types.ts +++ b/frontend/lib/chat/types.ts @@ -95,7 +95,8 @@ export interface ChatOptions { // Extended options for advanced features export interface ChatOptionsExtended extends ChatOptions { conversationId?: string; - tools?: ToolSpec[]; + // Accept either full ToolSpec objects or simple tool name strings + tools?: Array; toolChoice?: any; reasoning?: { effort?: string; From d13b7679e0d2df372d6f32f0670b3b2eae8ca60d Mon Sep 17 00:00:00 2001 From: qduc Date: Thu, 4 Sep 2025 13:18:35 +0700 Subject: [PATCH 4/5] Refactor MessageInput component to manage tools dropdown visibility and enhance styling for better user interaction --- backend/src/lib/iterativeOrchestrator.js | 27 ------------------------ frontend/components/MessageInput.tsx | 25 +++++++++++++++++----- 2 files changed, 20 insertions(+), 32 deletions(-) diff --git a/backend/src/lib/iterativeOrchestrator.js b/backend/src/lib/iterativeOrchestrator.js index 1f641906..9c3ce0d3 100644 --- a/backend/src/lib/iterativeOrchestrator.js +++ b/backend/src/lib/iterativeOrchestrator.js @@ -67,33 +67,6 @@ function streamEvent(res, event, model) { if (typeof res.flush === 'function') res.flush(); } -/** - * Make a request to the AI model - */ -async function callModel(messages, config, bodyParams, tools = null, providerId) { - // Determine tools to send: prefer bodyParams.tools (frontend-provided), then explicit tools arg, then full registry - const toolsToSend = (Array.isArray(bodyParams.tools) && bodyParams.tools.length) - ? bodyParams.tools - : (Array.isArray(tools) && tools.length) ? tools : null; - - const requestBody = { - model: bodyParams.model || config.defaultModel, - messages, - stream: false, - ...(toolsToSend && { tools: toolsToSend, tool_choice: 'auto' }) - }; - // Include reasoning controls only if supported by provider - const allowReasoning = providerSupportsReasoning(config, requestBody.model); - if (allowReasoning) { - if (bodyParams.reasoning_effort) requestBody.reasoning_effort = bodyParams.reasoning_effort; - if (bodyParams.verbosity) requestBody.verbosity = bodyParams.verbosity; - } - - const response = await createOpenAIRequest(config, requestBody, { providerId }); - const result = await response.json(); - return result?.choices?.[0]?.message; -} - /** * Handle iterative tool orchestration with thinking support */ diff --git a/frontend/components/MessageInput.tsx b/frontend/components/MessageInput.tsx index dafba6f1..c1858191 100644 --- a/frontend/components/MessageInput.tsx +++ b/frontend/components/MessageInput.tsx @@ -39,6 +39,7 @@ export function MessageInput({ onQualityLevelChange, }: MessageInputProps) { const inputRef = useRef(null); + const toolsDropdownRef = useRef(null); const [toolsOpen, setToolsOpen] = useState(false); const [availableTools, setAvailableTools] = useState<{ name: string; description?: string }[]>([]); const [localSelected, setLocalSelected] = useState(enabledTools); @@ -56,6 +57,20 @@ export function MessageInput({ setLocalSelected(enabledTools ?? []); }, [enabledTools]); + // Click outside to close tools dropdown + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if (toolsDropdownRef.current && !toolsDropdownRef.current.contains(event.target as Node)) { + setToolsOpen(false); + } + }; + + if (toolsOpen) { + document.addEventListener('mousedown', handleClickOutside); + return () => document.removeEventListener('mousedown', handleClickOutside); + } + }, [toolsOpen]); + // Load tool specs for the selector UI useEffect(() => { let mounted = true; @@ -116,12 +131,12 @@ export function MessageInput({ )}
-
+
From 326599fd3696f472315a3942c0ff9fa19ac2b747 Mon Sep 17 00:00:00 2001 From: qduc Date: Thu, 4 Sep 2025 15:37:12 +0700 Subject: [PATCH 5/5] update docs --- .github/copilot-instructions.md | 137 ++++++++++++++++++++++++++------ README.md | 17 ++-- docs/API-SPECS.md | 49 +++++++----- 3 files changed, 150 insertions(+), 53 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 23e7ef60..6e48e93d 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -1,34 +1,119 @@ -Copilot Quick Guide +# Copilot Instructions for This Repo -Purpose -- Minimal instructions to get a coding assistant productive in this repo: a Next.js frontend + Node backend that proxies OpenAI‑style chat completions and preserves streaming. +## Goal -Quick flow -- Browser → `NEXT_PUBLIC_API_BASE` (default `/api`) → Next.js rewrite → Backend `/v1/chat/completions` → Provider. Preserve `Content-Type: text/event-stream` and the final `data: [DONE]` chunk. +Help an AI coding agent make small, correct changes that keep OpenAI compatibility and streaming intact. -Must‑know files -- Frontend: `frontend/lib/chat.ts`, `frontend/components/Chat.tsx`, `frontend/next.config.ts`. -- Backend: `backend/src/lib/openaiProxy.js`, `backend/src/routes/chat.js`, `backend/src/index.js`, `backend/src/middleware/rateLimit.js`. +--- -Key envs -- Backend: `OPENAI_BASE_URL`, `OPENAI_API_KEY`, `DEFAULT_MODEL`, `PORT`, `ALLOWED_ORIGIN`. -- Frontend: `NEXT_PUBLIC_API_BASE` (default `/api`), `BACKEND_ORIGIN`. +## Architecture (Big Picture) -Invariants -- Do not expose provider keys to the browser. -- Keep OpenAI request/response schema compatible. -- Stream passthrough: forward SSE chunks unchanged; client expects `data: [DONE]` terminator. -- Apply `rateLimit` to new backend endpoints. +- **Frontend**: Next.js 15, React 19. Calls backend via `NEXT_PUBLIC_API_BASE` (Compose sets `http://localhost:4001`). +- **Backend**: Express 5, ESM. Exposes OpenAI‑compatible endpoints, proxies to provider, orchestrates server‑side tools, persists conversations (SQLite). +- **Streaming**: SSE end‑to‑end. Backend may stream extra `_conversation` metadata chunks before `data: [DONE]`. -Local run (short) -```bash -cp backend/.env.example backend/.env && npm --prefix backend install && npm --prefix backend run dev -cp frontend/.env.example frontend/.env.local && npm --prefix frontend install && npm --prefix frontend run dev -``` +--- -Gotchas -- Dev compose ports differ (frontend `:3003`, backend `:4001`). -- Changing `BACKEND_ORIGIN` requires a Next.js server restart. +## Key Paths to Read/Change First -More details -- Full docs live in `docs/` (see `docs/OVERVIEW.md`, `docs/API-SPECS.md`, `docs/SECURITY.md`). +- **Frontend client & SSE**: + - `frontend/lib/chat.ts` + - `frontend/lib/chat/client.ts` + - Hooks: `frontend/hooks/` (e.g. `useChatStream.ts`) +- **Frontend UI**: + - `frontend/components/ChatV2.tsx` + - `MessageList.tsx` + - `MessageInput.tsx` +- **Backend request flow**: + - `backend/src/lib/openaiProxy.js` (entry) + - `streamUtils.js` / `streamingHandler.js` (SSE) + - `iterativeOrchestrator.js` and `unifiedToolOrchestrator.js` (tools) + - `simplifiedPersistence.js` (DB) + - Routes: `backend/src/routes/*` +- **Tools registry**: + - `backend/src/lib/tools.js` (e.g., `get_time`, `web_search`) + +--- + +## APIs to Preserve (Shapes/Semantics) + +- `POST /v1/chat/completions` (primary): + - OpenAI chat schema + - Supports: `stream=true`, `tools`, optional `provider_id`, `conversation_id`, `tool_choice`, reasoning controls for supported models +- Streams include: + - Normal OpenAI chunks + - Optional `tool_calls` (buffered, consolidated) + - `tool_output` + - `_conversation` metadata + - Always terminate with `data: [DONE]` +- `GET /v1/tools`: Returns tool specs generated from server registry +- Conversations: `GET/POST/DELETE /v1/conversations*` (gated by `config.persistence.enabled`, uses SQLite via `backend/src/db/*`) + +--- + +## Conventions and Invariants (Project‑Specific) + +- Never expose provider API keys to the browser. Backend injects `Authorization` using env/provider DB rows. +- Keep OpenAI compatibility at the proxy boundary; don’t break request/response fields or streaming format. +- Frontend relies on `_conversation` events in stream; see `ChatClient.processStreamChunk`. +- Input sanitation: backend maps `systemPrompt` → leading system message (`sanitizeIncomingBody`), accepts `tools` as names or full specs. +- Rate limit all new backend endpoints via `rateLimit` and respect `ALLOWED_ORIGIN` CORS. +- Next.js disables compression for SSE (`frontend/next.config.ts`); don’t re‑enable. + +--- + +## Add a Server‑Side Tool (Example) + +1. Edit `backend/src/lib/tools.js`: + - Provide `validate(args)` and an async `handler`. + - The OpenAI spec is auto‑generated by `generateOpenAIToolSpecs()`. + - Example: `web_search` uses Tavily; requires `TAVILY_API_KEY`. + +--- + +## Run, Test, Lint (Use These Exact Commands) + +### Local Dev + +- **Backend**: + ```sh + cp backend/.env.example backend/.env && npm --prefix backend install && npm --prefix backend run dev + ``` +- **Frontend**: + ```sh + cp frontend/.env.example frontend/.env.local && npm --prefix frontend install && npm --prefix frontend run dev + ``` + +### Docker Dev (hot reload, different ports) + +- ```sh + docker compose -f docker-compose.dev.yml up --build + # frontend :3003, backend :4001 + ``` + +### Tests & Lint + +- ```sh + npm --prefix backend test + npm --prefix frontend test + npm --prefix run lint + ``` + +--- + +## Gotchas (Seen in Code/Tests) + +- Streaming must flush promptly; use `setupStreamingHeaders` and `writeAndFlush` paths. Never buffer entire responses. +- When persistence is enabled, only final assistant content is written (`SimplifiedPersistence`), but deltas still stream to clients. +- Provider selection can be passed via body `provider_id` or `x-provider-id` header. + +--- + +## For Deeper Context + +See: + +- `docs/OVERVIEW.md` +- `docs/API-SPECS.md` +- `docs/SECURITY.md` +- `AI_ONBOARDING.md` (repo root) diff --git a/README.md b/README.md index 26368119..0e4b83d2 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,9 @@ A full-stack AI chat application with advanced tool orchestration and streaming - [x] **Enhanced UI components** (quality controls, floating dropdowns) - [x] **Advanced streaming** (tool events, thinking support) - [x] **Conversation persistence** (SQLite database with migrations) -- [ ] Conversation history UI integration -- [ ] System prompt / temperature controls -- [ ] Auth & per-user limits +- [x] Conversation history UI integration +- [x] System prompt / temperature controls +- [ ] Auth & per-user limits (planned) ## Key Features @@ -94,15 +94,20 @@ Frontend on http://localhost:3000 with hot reload enabled. ### Tool Development The application includes a server-side tool registry located in `backend/src/lib/tools.js`. To add new tools: -1. Define your tool in the registry with validation schema -2. Implement the handler function +1. Define your tool in the registry with a `validate` function for arguments +2. Implement the `handler` function 3. Tools are automatically available via the orchestration system Example: ```javascript export const tools = { get_weather: { - schema: z.object({ city: z.string().min(1) }).strict(), + validate: (args) => { + if (!args || typeof args.city !== 'string') { + throw new Error('get_weather requires a "city" argument of type string'); + } + return { city: args.city }; + }, handler: async ({ city }) => ({ tempC: 22, city }), } }; diff --git a/docs/API-SPECS.md b/docs/API-SPECS.md index e882e2cc..fa4f4e63 100644 --- a/docs/API-SPECS.md +++ b/docs/API-SPECS.md @@ -1,6 +1,7 @@ # API Specifications ## Chat APIs +> **Note:** All conversation history, including tool outputs and reasoning steps, is persisted in a SQLite database for continuity and auditability. ### POST /v1/responses (Primary) The primary chat endpoint supporting conversation continuity. @@ -34,7 +35,7 @@ OpenAI-compatible endpoint for standard chat completions. "research_mode": true // Optional: enable research mode for multi-step tool usage } ``` -- **Response**: +- **Response**: - `stream=false`: Standard OpenAI JSON response - `stream=true`: Standard OpenAI SSE format with `data: [DONE]` termination @@ -42,16 +43,14 @@ OpenAI-compatible endpoint for standard chat completions. #### Tool Usage When `tools` array is provided, the system can execute server-side tools during the conversation: -- Available tools: `get_time`, `web_search` -- Tools are executed automatically when the AI determines they're needed -- Tool results are streamed back to the client in real-time + Available tools: `get_time`, `web_search`, and any additional tools defined in the server registry (see `backend/src/lib/tools.js`). Tools can be added by extending the registry with validation schemas and handler functions. Tool inputs are validated server-side for safety and correctness. #### Research Mode When `research_mode: true` is set with tools, the system enables multi-step research capabilities: -- AI can use tools multiple times in sequence -- AI can analyze tool results and perform follow-up searches -- AI streams its reasoning process between tool calls -- Ideal for complex research queries requiring multiple information sources + +**Iterative Orchestration:** + When `research_mode: true` is enabled, the AI can perform up to 10 tool calls per request, streaming its reasoning and tool outputs between steps. The orchestration system adapts to both streaming and non-streaming requests. + Tool execution is server-side, with input validation and error handling. Tool results are persisted as part of the conversation history. **Research Mode Streaming**: Includes additional event types: ``` @@ -65,14 +64,19 @@ data: {"id":"iter_123","choices":[{"delta":{"content":"Based on the results, let ``` ### Streaming Format (Both Endpoints) + +Streaming events include: ``` data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"delta":{"content":"Hello"}}]} - data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"delta":{"content":" world"}}]} - +data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"delta":{"tool_calls":[{"id":"call_abc","function":{"name":"web_search","arguments":"{\"query\":\"AI developments 2024\"}"}}]}]}} +data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"delta":{"tool_output":{"tool_call_id":"call_abc","name":"web_search","output":"Search results..."}}}]} +data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"delta":{"content":"Based on the results, let me search for more specific information..."}}]} data: [DONE] ``` +Tool events (`tool_calls`, `tool_output`) and reasoning events are streamed in real-time. All tool outputs and reasoning steps are persisted as part of the conversation history. + ## Health & Monitoring ### GET /healthz @@ -88,10 +92,10 @@ Returns system health and configuration. ``` ## Rate Limiting -- **Current**: In-memory per-IP limiting -- **Headers**: Standard rate limit headers in responses -- **Limits**: Configurable per environment -- **Future**: Redis-based distributed limiting + - **Current**: In-memory per-IP sliding window limiting (configurable via `RATE_LIMIT_WINDOW_SEC` and `RATE_LIMIT_MAX`) + - **Headers**: Standard rate limit headers in responses + - **Limits**: Configurable per environment + - **Planned**: Redis-backed per-user and per-key limits for production scaling ## Server Features @@ -99,17 +103,20 @@ Returns system health and configuration. - **Proxy Mode**: Direct passthrough to OpenAI-compatible providers - **Header Injection**: Automatic `Authorization` header from server environment - **Format Conversion**: Automatic conversion between Responses API and Chat Completions formats -- **Error Handling**: Proper HTTP status codes and error responses + - **Error Handling**: Structured error responses with proper HTTP status codes. Tool failures and upstream errors are handled gracefully, with error details persisted in conversation history. Input validation and timeouts are enforced for all tool executions. ### Logging & Observability - **Access Logs**: Morgan middleware for HTTP request logging -- **Error Handling**: Structured error responses + - **Error Handling**: Structured error responses. Tool errors and upstream failures are logged and persisted for observability and debugging. - **Performance**: Request timing and basic metrics - **Privacy**: Input masking for sensitive data (planned) ## Planned Enhancements -- **Authentication**: JWT/API key support with per-user limits -- **Multi-Provider**: Dynamic routing between multiple LLM providers -- **Token Accounting**: Usage tracking and billing integration -- **Observability**: Prometheus metrics and structured logging -- **Conversation UI**: Frontend integration for conversation history browsing + - **Authentication**: JWT/API key support with per-user limits + - **Multi-Provider**: Dynamic routing between multiple LLM providers + - **Token Accounting**: Usage tracking and billing integration + - **Observability**: Prometheus metrics and structured logging + - **Conversation UI**: Frontend integration for conversation history browsing + - **System Prompt & Temperature Controls**: UI and backend support for system prompt and temperature settings + - **File Uploads & Attachments**: Support for file uploads and attachments in chat + - **Token Usage Display**: Show token usage and cost estimates in UI