diff --git a/.feature-base/CREATED_AT.txt b/.feature-base/CREATED_AT.txt new file mode 100644 index 00000000..9eb469af --- /dev/null +++ b/.feature-base/CREATED_AT.txt @@ -0,0 +1 @@ +2026-06-15 12:22:14 CST diff --git a/packages/shared/src/types/llm.ts b/packages/shared/src/types/llm.ts index 3d1d4cc1..a999d42a 100644 --- a/packages/shared/src/types/llm.ts +++ b/packages/shared/src/types/llm.ts @@ -60,6 +60,8 @@ export interface ModelDefinition { reasoning?: boolean; inputTypes?: Array<'text' | 'image'>; description?: string; + /** Model quality tier for routing decisions */ + tier?: import('./model-catalog.js').ModelTier; } export interface EnhancedProviderSettings { diff --git a/packages/shared/src/types/model-catalog.ts b/packages/shared/src/types/model-catalog.ts index 9a3afbe2..cb50bf41 100644 --- a/packages/shared/src/types/model-catalog.ts +++ b/packages/shared/src/types/model-catalog.ts @@ -1,3 +1,155 @@ +// --------------------------------------------------------------------------- +// Model Tier & Task Routing types +// --------------------------------------------------------------------------- + +/** Capability tier for a model — used for routing & UI grouping. */ +export type ModelTier = 'base' | 'pro' | 'max'; + +/** Cost bucket — UI-only, derived from pricing data. */ +export type CostTier = '$' | '$$' | '$$$' | '$$$$'; + +/** Task type used by the routing engine to select a model per use-case. */ +export type ModelTaskType = + | 'text_chat' + | 'text_reasoning' + | 'text_coding' + | 'text_translation' + | 'text_summary' + | 'image_recognition' + | 'image_generation' + | 'audio_tts' + | 'audio_stt' + | 'video_generation' + | 'embedding' + | 'web_search'; + +/** Routing strategy for `auto` mode in `TaskRoutingConfig`. */ +export type RoutingStrategy = + | 'always_max' + | 'always_cheapest' + | 'balanced' + | 'cache_optimized'; + +/** How multi-modal tasks (text+vision, text+image-gen, etc.) are dispatched. */ +export type MultiModalStrategy = 'unified' | 'specialized'; + +/** Pricing unit for a model — token-based, request-based, per-image, etc. */ +export type PricingType = 'token' | 'request' | 'image' | 'audio' | 'video' | 'free' | 'local' | 'variable' | 'unknown'; + +/** Confidence level for a price quote. */ +export type PriceConfidence = 'exact' | 'estimated' | 'unknown'; + +/** Assignment of a (provider, model) pair — optionally with a fallback — for a single task type. */ +export interface TaskModelAssignment { + provider: string; + model: string; + fallback?: { provider: string; model: string }; +} + +/** Per-task-type routing configuration — managed by the routing UI. */ +export interface TaskRoutingConfig { + mode: 'auto' | 'manual' | 'hybrid'; + assignments: Partial>; + autoStrategy: RoutingStrategy; + defaultTier: ModelTier; + multiModalStrategy?: MultiModalStrategy; +} + +/** Global routing configuration — controls tier-based model selection. */ +export interface RoutingConfig { + strategy: RoutingStrategy; + defaultTier: ModelTier; + tierOverrides?: Record; + budgetLimit?: number; + preferCacheHit: boolean; + taskRouting?: TaskRoutingConfig; +} + +// --------------------------------------------------------------------------- +// Normalized cost for multi-modal models (token, image, audio, video, etc.) +// --------------------------------------------------------------------------- + +/** Normalized cost representation for a model — supports token, per-request, per-image, per-minute, etc. */ +export interface NormalizedCost { + inputPer1MTokens?: number; + outputPer1MTokens?: number; + cachedReadPer1MTokens?: number; + cachedWritePer1MTokens?: number; + perRequest?: number; + perImage?: number; + perMinute?: number; + per1MChars?: number; + perSecond?: number; + pricingType: PricingType; + isFree: boolean; + isLocal: boolean; + priceConfidence: PriceConfidence; +} + +// --------------------------------------------------------------------------- +// Model quality scores (from benchmarks like Arena AI) +// --------------------------------------------------------------------------- + +/** Quality metrics for a model — combines benchmark Elo and derived tier. */ +export interface ModelQuality { + overallElo?: number; + codingElo?: number; + visionElo?: number; + qualityScore: number; + tier: ModelTier; + lastUpdated: string; + source?: 'arena' | 'heuristic' | 'user_override'; +} + +// --------------------------------------------------------------------------- +// Provider capability declaration +// --------------------------------------------------------------------------- + +/** Static capability declaration for an LLM provider (what modalities it supports). */ +export interface ProviderCapabilities { + chat: boolean; + vision: boolean; + imageGeneration: boolean; + tts: boolean; + stt: boolean; + videoGeneration: boolean; + embedding: boolean; + reasoning: boolean; + promptCaching: boolean; +} + +// --------------------------------------------------------------------------- +// Unified ModelProfile: merges catalog + benchmark + cost data +// --------------------------------------------------------------------------- + +/** + * Unified model profile: merges catalog data (from LiteLLM) + benchmark scores + * (from Arena AI) + normalized cost. This is the canonical shape consumed by + * the routing engine and the UI. + */ +export interface ModelProfile { + id: string; + provider: string; + displayName: string; + family: string; + mode: string; + maxInputTokens: number; + maxOutputTokens: number; + cost: NormalizedCost; + capabilities: CatalogModelCapabilities; + quality: ModelQuality; + taskTypes: ModelTaskType[]; + derived: { + costEfficiency: number; + costTier: CostTier; + latencyClass: 'fast' | 'medium' | 'slow'; + }; +} + +// --------------------------------------------------------------------------- +// Catalog types (existing, enhanced) +// --------------------------------------------------------------------------- + export interface CatalogModelCapabilities { vision: boolean; functionCalling: boolean; diff --git a/packages/shared/src/utils/config.ts b/packages/shared/src/utils/config.ts index 0d340d03..b64bc20f 100644 --- a/packages/shared/src/utils/config.ts +++ b/packages/shared/src/utils/config.ts @@ -1,6 +1,7 @@ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs'; import { resolve, join } from 'node:path'; import { homedir } from 'node:os'; +import type { RoutingConfig, TaskRoutingConfig } from '../types/model-catalog.js'; export interface MarkusConfig { org: { @@ -31,6 +32,10 @@ export interface MarkusConfig { timeoutMs?: number; /** Allow automatic fallback to other providers/models when the primary fails (default: true) */ autoFallback?: boolean; + /** Global routing config (strategy, tier overrides, budget, task routing). */ + routing?: RoutingConfig; + /** Shortcut for the per-task routing block. */ + taskRouting?: TaskRoutingConfig; }; server: { apiPort: number;