diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 23e7ef60..6e48e93d 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -1,34 +1,119 @@ -Copilot Quick Guide +# Copilot Instructions for This Repo -Purpose -- Minimal instructions to get a coding assistant productive in this repo: a Next.js frontend + Node backend that proxies OpenAI‑style chat completions and preserves streaming. +## Goal -Quick flow -- Browser → `NEXT_PUBLIC_API_BASE` (default `/api`) → Next.js rewrite → Backend `/v1/chat/completions` → Provider. Preserve `Content-Type: text/event-stream` and the final `data: [DONE]` chunk. +Help an AI coding agent make small, correct changes that keep OpenAI compatibility and streaming intact. -Must‑know files -- Frontend: `frontend/lib/chat.ts`, `frontend/components/Chat.tsx`, `frontend/next.config.ts`. -- Backend: `backend/src/lib/openaiProxy.js`, `backend/src/routes/chat.js`, `backend/src/index.js`, `backend/src/middleware/rateLimit.js`. +--- -Key envs -- Backend: `OPENAI_BASE_URL`, `OPENAI_API_KEY`, `DEFAULT_MODEL`, `PORT`, `ALLOWED_ORIGIN`. -- Frontend: `NEXT_PUBLIC_API_BASE` (default `/api`), `BACKEND_ORIGIN`. +## Architecture (Big Picture) -Invariants -- Do not expose provider keys to the browser. -- Keep OpenAI request/response schema compatible. -- Stream passthrough: forward SSE chunks unchanged; client expects `data: [DONE]` terminator. -- Apply `rateLimit` to new backend endpoints. +- **Frontend**: Next.js 15, React 19. Calls backend via `NEXT_PUBLIC_API_BASE` (Compose sets `http://localhost:4001`). +- **Backend**: Express 5, ESM. Exposes OpenAI‑compatible endpoints, proxies to provider, orchestrates server‑side tools, persists conversations (SQLite). +- **Streaming**: SSE end‑to‑end. Backend may stream extra `_conversation` metadata chunks before `data: [DONE]`. -Local run (short) -```bash -cp backend/.env.example backend/.env && npm --prefix backend install && npm --prefix backend run dev -cp frontend/.env.example frontend/.env.local && npm --prefix frontend install && npm --prefix frontend run dev -``` +--- -Gotchas -- Dev compose ports differ (frontend `:3003`, backend `:4001`). -- Changing `BACKEND_ORIGIN` requires a Next.js server restart. +## Key Paths to Read/Change First -More details -- Full docs live in `docs/` (see `docs/OVERVIEW.md`, `docs/API-SPECS.md`, `docs/SECURITY.md`). +- **Frontend client & SSE**: + - `frontend/lib/chat.ts` + - `frontend/lib/chat/client.ts` + - Hooks: `frontend/hooks/` (e.g. `useChatStream.ts`) +- **Frontend UI**: + - `frontend/components/ChatV2.tsx` + - `MessageList.tsx` + - `MessageInput.tsx` +- **Backend request flow**: + - `backend/src/lib/openaiProxy.js` (entry) + - `streamUtils.js` / `streamingHandler.js` (SSE) + - `iterativeOrchestrator.js` and `unifiedToolOrchestrator.js` (tools) + - `simplifiedPersistence.js` (DB) + - Routes: `backend/src/routes/*` +- **Tools registry**: + - `backend/src/lib/tools.js` (e.g., `get_time`, `web_search`) + +--- + +## APIs to Preserve (Shapes/Semantics) + +- `POST /v1/chat/completions` (primary): + - OpenAI chat schema + - Supports: `stream=true`, `tools`, optional `provider_id`, `conversation_id`, `tool_choice`, reasoning controls for supported models +- Streams include: + - Normal OpenAI chunks + - Optional `tool_calls` (buffered, consolidated) + - `tool_output` + - `_conversation` metadata + - Always terminate with `data: [DONE]` +- `GET /v1/tools`: Returns tool specs generated from server registry +- Conversations: `GET/POST/DELETE /v1/conversations*` (gated by `config.persistence.enabled`, uses SQLite via `backend/src/db/*`) + +--- + +## Conventions and Invariants (Project‑Specific) + +- Never expose provider API keys to the browser. Backend injects `Authorization` using env/provider DB rows. +- Keep OpenAI compatibility at the proxy boundary; don’t break request/response fields or streaming format. +- Frontend relies on `_conversation` events in stream; see `ChatClient.processStreamChunk`. +- Input sanitation: backend maps `systemPrompt` → leading system message (`sanitizeIncomingBody`), accepts `tools` as names or full specs. +- Rate limit all new backend endpoints via `rateLimit` and respect `ALLOWED_ORIGIN` CORS. +- Next.js disables compression for SSE (`frontend/next.config.ts`); don’t re‑enable. + +--- + +## Add a Server‑Side Tool (Example) + +1. Edit `backend/src/lib/tools.js`: + - Provide `validate(args)` and an async `handler`. + - The OpenAI spec is auto‑generated by `generateOpenAIToolSpecs()`. + - Example: `web_search` uses Tavily; requires `TAVILY_API_KEY`. + +--- + +## Run, Test, Lint (Use These Exact Commands) + +### Local Dev + +- **Backend**: + ```sh + cp backend/.env.example backend/.env && npm --prefix backend install && npm --prefix backend run dev + ``` +- **Frontend**: + ```sh + cp frontend/.env.example frontend/.env.local && npm --prefix frontend install && npm --prefix frontend run dev + ``` + +### Docker Dev (hot reload, different ports) + +- ```sh + docker compose -f docker-compose.dev.yml up --build + # frontend :3003, backend :4001 + ``` + +### Tests & Lint + +- ```sh + npm --prefix backend test + npm --prefix frontend test + npm --prefix run lint + ``` + +--- + +## Gotchas (Seen in Code/Tests) + +- Streaming must flush promptly; use `setupStreamingHeaders` and `writeAndFlush` paths. Never buffer entire responses. +- When persistence is enabled, only final assistant content is written (`SimplifiedPersistence`), but deltas still stream to clients. +- Provider selection can be passed via body `provider_id` or `x-provider-id` header. + +--- + +## For Deeper Context + +See: + +- `docs/OVERVIEW.md` +- `docs/API-SPECS.md` +- `docs/SECURITY.md` +- `AI_ONBOARDING.md` (repo root) diff --git a/README.md b/README.md index 26368119..0e4b83d2 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,9 @@ A full-stack AI chat application with advanced tool orchestration and streaming - [x] **Enhanced UI components** (quality controls, floating dropdowns) - [x] **Advanced streaming** (tool events, thinking support) - [x] **Conversation persistence** (SQLite database with migrations) -- [ ] Conversation history UI integration -- [ ] System prompt / temperature controls -- [ ] Auth & per-user limits +- [x] Conversation history UI integration +- [x] System prompt / temperature controls +- [ ] Auth & per-user limits (planned) ## Key Features @@ -94,15 +94,20 @@ Frontend on http://localhost:3000 with hot reload enabled. ### Tool Development The application includes a server-side tool registry located in `backend/src/lib/tools.js`. To add new tools: -1. Define your tool in the registry with validation schema -2. Implement the handler function +1. Define your tool in the registry with a `validate` function for arguments +2. Implement the `handler` function 3. Tools are automatically available via the orchestration system Example: ```javascript export const tools = { get_weather: { - schema: z.object({ city: z.string().min(1) }).strict(), + validate: (args) => { + if (!args || typeof args.city !== 'string') { + throw new Error('get_weather requires a "city" argument of type string'); + } + return { city: args.city }; + }, handler: async ({ city }) => ({ tempC: 22, city }), } }; diff --git a/backend/src/lib/iterativeOrchestrator.js b/backend/src/lib/iterativeOrchestrator.js index 6796b767..9c3ce0d3 100644 --- a/backend/src/lib/iterativeOrchestrator.js +++ b/backend/src/lib/iterativeOrchestrator.js @@ -67,28 +67,6 @@ function streamEvent(res, event, model) { if (typeof res.flush === 'function') res.flush(); } -/** - * Make a request to the AI model - */ -async function callModel(messages, config, bodyParams, tools = null, providerId) { - const requestBody = { - model: bodyParams.model || config.defaultModel, - messages, - stream: false, - ...(tools && { tools, tool_choice: 'auto' }) - }; - // Include reasoning controls only if supported by provider - const allowReasoning = providerSupportsReasoning(config, requestBody.model); - if (allowReasoning) { - if (bodyParams.reasoning_effort) requestBody.reasoning_effort = bodyParams.reasoning_effort; - if (bodyParams.verbosity) requestBody.verbosity = bodyParams.verbosity; - } - - const response = await createOpenAIRequest(config, requestBody, { providerId }); - const result = await response.json(); - return result?.choices?.[0]?.message; -} - /** * Handle iterative tool orchestration with thinking support */ @@ -135,12 +113,14 @@ export async function handleIterativeOrchestration({ iteration++; // Stream the model response for this iteration, buffering only tool calls + // Prefer the frontend-provided tools (expanded by sanitizeIncomingBody) when present. + // Otherwise fall back to the server-side registry. + const toolsToSend = (Array.isArray(body.tools) && body.tools.length) ? body.tools : generateOpenAIToolSpecs(); const requestBody = { model: body.model || config.defaultModel, messages: conversationHistory, stream: true, - tools: generateOpenAIToolSpecs(), - tool_choice: 'auto', + ...(toolsToSend && { tools: toolsToSend, tool_choice: body.tool_choice || 'auto' }), }; // Include reasoning controls only if supported by provider if (providerSupportsReasoning(config, requestBody.model)) { diff --git a/backend/src/lib/openaiProxy.js b/backend/src/lib/openaiProxy.js index 1d8fd13a..370ea4bf 100644 --- a/backend/src/lib/openaiProxy.js +++ b/backend/src/lib/openaiProxy.js @@ -1,4 +1,5 @@ import { config } from '../env.js'; +import { generateOpenAIToolSpecs } from './tools.js'; import { handleUnifiedToolOrchestration } from './unifiedToolOrchestrator.js'; import { handleIterativeOrchestration } from './iterativeOrchestrator.js'; import { handleRegularStreaming } from './streamingHandler.js'; @@ -38,6 +39,18 @@ function sanitizeIncomingBody(bodyIn, _cfg) { delete body.system_prompt; // Default model // Default model is resolved later (may come from DB) + + // Allow a simplified tools representation from frontend: an array of tool names (strings). + // Expand into full OpenAI-compatible tool specs using server-side registry. + try { + if (Array.isArray(bodyIn.tools) && bodyIn.tools.length > 0 && typeof bodyIn.tools[0] === 'string') { + const allSpecs = generateOpenAIToolSpecs(); + const selected = allSpecs.filter(s => bodyIn.tools.includes(s.function?.name)); + body.tools = selected; + } + } catch (e) { + // ignore expansion errors and let downstream validation handle unexpected shapes + } return body; } diff --git a/docs/API-SPECS.md b/docs/API-SPECS.md index e882e2cc..fa4f4e63 100644 --- a/docs/API-SPECS.md +++ b/docs/API-SPECS.md @@ -1,6 +1,7 @@ # API Specifications ## Chat APIs +> **Note:** All conversation history, including tool outputs and reasoning steps, is persisted in a SQLite database for continuity and auditability. ### POST /v1/responses (Primary) The primary chat endpoint supporting conversation continuity. @@ -34,7 +35,7 @@ OpenAI-compatible endpoint for standard chat completions. "research_mode": true // Optional: enable research mode for multi-step tool usage } ``` -- **Response**: +- **Response**: - `stream=false`: Standard OpenAI JSON response - `stream=true`: Standard OpenAI SSE format with `data: [DONE]` termination @@ -42,16 +43,14 @@ OpenAI-compatible endpoint for standard chat completions. #### Tool Usage When `tools` array is provided, the system can execute server-side tools during the conversation: -- Available tools: `get_time`, `web_search` -- Tools are executed automatically when the AI determines they're needed -- Tool results are streamed back to the client in real-time + Available tools: `get_time`, `web_search`, and any additional tools defined in the server registry (see `backend/src/lib/tools.js`). Tools can be added by extending the registry with validation schemas and handler functions. Tool inputs are validated server-side for safety and correctness. #### Research Mode When `research_mode: true` is set with tools, the system enables multi-step research capabilities: -- AI can use tools multiple times in sequence -- AI can analyze tool results and perform follow-up searches -- AI streams its reasoning process between tool calls -- Ideal for complex research queries requiring multiple information sources + +**Iterative Orchestration:** + When `research_mode: true` is enabled, the AI can perform up to 10 tool calls per request, streaming its reasoning and tool outputs between steps. The orchestration system adapts to both streaming and non-streaming requests. + Tool execution is server-side, with input validation and error handling. Tool results are persisted as part of the conversation history. **Research Mode Streaming**: Includes additional event types: ``` @@ -65,14 +64,19 @@ data: {"id":"iter_123","choices":[{"delta":{"content":"Based on the results, let ``` ### Streaming Format (Both Endpoints) + +Streaming events include: ``` data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"delta":{"content":"Hello"}}]} - data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"delta":{"content":" world"}}]} - +data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"delta":{"tool_calls":[{"id":"call_abc","function":{"name":"web_search","arguments":"{\"query\":\"AI developments 2024\"}"}}]}]}} +data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"delta":{"tool_output":{"tool_call_id":"call_abc","name":"web_search","output":"Search results..."}}}]} +data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"delta":{"content":"Based on the results, let me search for more specific information..."}}]} data: [DONE] ``` +Tool events (`tool_calls`, `tool_output`) and reasoning events are streamed in real-time. All tool outputs and reasoning steps are persisted as part of the conversation history. + ## Health & Monitoring ### GET /healthz @@ -88,10 +92,10 @@ Returns system health and configuration. ``` ## Rate Limiting -- **Current**: In-memory per-IP limiting -- **Headers**: Standard rate limit headers in responses -- **Limits**: Configurable per environment -- **Future**: Redis-based distributed limiting + - **Current**: In-memory per-IP sliding window limiting (configurable via `RATE_LIMIT_WINDOW_SEC` and `RATE_LIMIT_MAX`) + - **Headers**: Standard rate limit headers in responses + - **Limits**: Configurable per environment + - **Planned**: Redis-backed per-user and per-key limits for production scaling ## Server Features @@ -99,17 +103,20 @@ Returns system health and configuration. - **Proxy Mode**: Direct passthrough to OpenAI-compatible providers - **Header Injection**: Automatic `Authorization` header from server environment - **Format Conversion**: Automatic conversion between Responses API and Chat Completions formats -- **Error Handling**: Proper HTTP status codes and error responses + - **Error Handling**: Structured error responses with proper HTTP status codes. Tool failures and upstream errors are handled gracefully, with error details persisted in conversation history. Input validation and timeouts are enforced for all tool executions. ### Logging & Observability - **Access Logs**: Morgan middleware for HTTP request logging -- **Error Handling**: Structured error responses + - **Error Handling**: Structured error responses. Tool errors and upstream failures are logged and persisted for observability and debugging. - **Performance**: Request timing and basic metrics - **Privacy**: Input masking for sensitive data (planned) ## Planned Enhancements -- **Authentication**: JWT/API key support with per-user limits -- **Multi-Provider**: Dynamic routing between multiple LLM providers -- **Token Accounting**: Usage tracking and billing integration -- **Observability**: Prometheus metrics and structured logging -- **Conversation UI**: Frontend integration for conversation history browsing + - **Authentication**: JWT/API key support with per-user limits + - **Multi-Provider**: Dynamic routing between multiple LLM providers + - **Token Accounting**: Usage tracking and billing integration + - **Observability**: Prometheus metrics and structured logging + - **Conversation UI**: Frontend integration for conversation history browsing + - **System Prompt & Temperature Controls**: UI and backend support for system prompt and temperature settings + - **File Uploads & Attachments**: Support for file uploads and attachments in chat + - **Token Usage Display**: Show token usage and cost estimates in UI diff --git a/frontend/components/ChatV2.tsx b/frontend/components/ChatV2.tsx index 8724f0b3..8305cc80 100644 --- a/frontend/components/ChatV2.tsx +++ b/frontend/components/ChatV2.tsx @@ -178,7 +178,7 @@ export function ChatV2() { onRetryLastAssistant={handleRetryLastAssistant} /> {/* Removed soft fade to keep a cleaner boundaryless look */} -
+
void; onShouldStreamChange: (val: boolean) => void; + enabledTools?: string[]; + onEnabledToolsChange?: (list: string[]) => void; model: string; qualityLevel: QualityLevel; onQualityLevelChange: (level: QualityLevel) => void; @@ -29,12 +31,18 @@ export function MessageInput({ useTools, shouldStream, onUseToolsChange, + enabledTools = [], + onEnabledToolsChange, onShouldStreamChange, model, qualityLevel, onQualityLevelChange, }: MessageInputProps) { const inputRef = useRef(null); + const toolsDropdownRef = useRef(null); + const [toolsOpen, setToolsOpen] = useState(false); + const [availableTools, setAvailableTools] = useState<{ name: string; description?: string }[]>([]); + const [localSelected, setLocalSelected] = useState(enabledTools); // Auto-grow textarea up to ~200px useEffect(() => { @@ -45,6 +53,40 @@ export function MessageInput({ el.style.height = `${next}px`; }, [input]); + useEffect(() => { + setLocalSelected(enabledTools ?? []); + }, [enabledTools]); + + // Click outside to close tools dropdown + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if (toolsDropdownRef.current && !toolsDropdownRef.current.contains(event.target as Node)) { + setToolsOpen(false); + } + }; + + if (toolsOpen) { + document.addEventListener('mousedown', handleClickOutside); + return () => document.removeEventListener('mousedown', handleClickOutside); + } + }, [toolsOpen]); + + // Load tool specs for the selector UI + useEffect(() => { + let mounted = true; + import('../lib/chat').then(mod => { + const ToolsClient = (mod as any).ToolsClient; + if (!ToolsClient) return; + const client = new ToolsClient(); + client.getToolSpecs().then((res: any) => { + if (!mounted) return; + const tools = (res.tools || []).map((t: any) => ({ name: t.function?.name || t.name, description: t.function?.description || t.description })); + setAvailableTools(tools); + }).catch(() => setAvailableTools([])); + }).catch(() => setAvailableTools([])); + return () => { mounted = false; }; + }, []); + const handleKey = (e: React.KeyboardEvent) => { if (e.key === 'Enter' && !e.shiftKey) { @@ -89,13 +131,58 @@ export function MessageInput({ )}
- } - checked={useTools} - onChange={onUseToolsChange} - className="whitespace-nowrap" - /> +
+ + + {toolsOpen && ( +
+
Tools
+
+ {availableTools.length === 0 && ( +
No tools available
+ )} + {availableTools.map(t => { + const id = t.name; + const checked = localSelected.includes(id); + return ( + + ); + })} +
+
+ +
+
+ )} +
diff --git a/frontend/hooks/useChatState.ts b/frontend/hooks/useChatState.ts index 7c26ce37..3c76ee85 100644 --- a/frontend/hooks/useChatState.ts +++ b/frontend/hooks/useChatState.ts @@ -25,6 +25,8 @@ export interface ChatState { qualityLevel: QualityLevel; // System prompt for the current session systemPrompt: string; + // Per-tool enablement (list of tool names). Empty array means no explicit selection. + enabledTools: string[]; // Conversations conversations: ConversationMeta[]; @@ -56,6 +58,7 @@ export type ChatAction = | { type: 'SET_VERBOSITY'; payload: string } | { type: 'SET_QUALITY_LEVEL'; payload: QualityLevel } | { type: 'SET_SYSTEM_PROMPT'; payload: string } + | { type: 'SET_ENABLED_TOOLS'; payload: string[] } | { type: 'SET_CONVERSATION_ID'; payload: string | null } | { type: 'START_STREAMING'; payload: { abort: AbortController; userMessage: ChatMessage; assistantMessage: ChatMessage } } | { type: 'REGENERATE_START'; payload: { abort: AbortController; baseMessages: ChatMessage[]; assistantMessage: ChatMessage } } @@ -99,6 +102,7 @@ const initialState: ChatState = { verbosity: 'medium', qualityLevel: 'balanced', systemPrompt: '', + enabledTools: [], conversations: [], nextCursor: null, historyEnabled: true, @@ -153,6 +157,9 @@ function chatReducer(state: ChatState, action: ChatAction): ChatState { case 'SET_SYSTEM_PROMPT': return { ...state, systemPrompt: action.payload }; + case 'SET_ENABLED_TOOLS': + return { ...state, enabledTools: action.payload }; + case 'SET_CONVERSATION_ID': return { ...state, conversationId: action.payload }; @@ -573,7 +580,10 @@ export function useChatState() { qualityLevel: state.qualityLevel, ...(state.useTools ? { - tools: Object.values(availableTools), + // Send a simplified list of tool names to the backend. Backend will map names -> specs. + tools: (state.enabledTools && state.enabledTools.length > 0) + ? state.enabledTools + : Object.keys(availableTools), tool_choice: 'auto', } : {}), @@ -669,6 +679,10 @@ export function useChatState() { dispatch({ type: 'SET_SYSTEM_PROMPT', payload: prompt }); }, []), + setEnabledTools: useCallback((list: string[]) => { + dispatch({ type: 'SET_ENABLED_TOOLS', payload: list }); + }, []), + // Chat Actions sendMessage: useCallback(async () => { const input = state.input.trim(); diff --git a/frontend/lib/chat/types.ts b/frontend/lib/chat/types.ts index f031212e..796db74c 100644 --- a/frontend/lib/chat/types.ts +++ b/frontend/lib/chat/types.ts @@ -95,7 +95,8 @@ export interface ChatOptions { // Extended options for advanced features export interface ChatOptionsExtended extends ChatOptions { conversationId?: string; - tools?: ToolSpec[]; + // Accept either full ToolSpec objects or simple tool name strings + tools?: Array; toolChoice?: any; reasoning?: { effort?: string;