From 56518b37c9e6ca5c472e5962578d5f242922e1f4 Mon Sep 17 00:00:00 2001 From: Backend Developer Date: Mon, 15 Jun 2026 12:22:14 +0800 Subject: [PATCH 1/2] chore: initialize feature/model-routing-enhancement base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wave 0 / Subtask 1 — Create base branch for model-routing enhancement feature. This is the foundation branch for the 5-Wave implementation: - Wave 0: Shared types extension (current task) - Wave 1: Core services (catalog/scores/profile/router) - Wave 2: API endpoints + CLI - Wave 3: Frontend (ModelSelect + ModelRoutingSection + Settings) - Wave 4: Quality (e2e + review + docs) Reference: - Upstream: feat/model-routing @ 3e3d09d1 (33 files, 3700+ lines) - Research: delivery-reports/model-routing-research-step1.md - Decomposition: delivery-reports/model-routing-decomposition-step2.md - Task: tsk_82332b2bf0098cf4e80faa02 --- .feature-base/CREATED_AT.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 .feature-base/CREATED_AT.txt diff --git a/.feature-base/CREATED_AT.txt b/.feature-base/CREATED_AT.txt new file mode 100644 index 00000000..9eb469af --- /dev/null +++ b/.feature-base/CREATED_AT.txt @@ -0,0 +1 @@ +2026-06-15 12:22:14 CST From 907569cf7b0e5c20faeb05f7e3ce72cf7b09626d Mon Sep 17 00:00:00 2001 From: Backend Developer Date: Mon, 15 Jun 2026 12:27:07 +0800 Subject: [PATCH 2/2] [TASK-tsk_82332b2bf0098cf4e80faa02] feat(shared): add model-routing types (tier/cost/task/strategy) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wave 0: extend @markus/shared foundation for the model-routing feature. - packages/shared/src/types/model-catalog.ts + Add ModelTier ('base'|'pro'|'max') + CostTier ('$'..'4834648346') + Add ModelTaskType (12 types: text_chat/reasoning/coding/.., image_*, audio_*, video_*, embedding, web_search) + Add RoutingStrategy (always_max/always_cheapest/balanced/cache_optimized) + Add MultiModalStrategy, PricingType, PriceConfidence enums + Add TaskModelAssignment, TaskRoutingConfig, RoutingConfig + Add NormalizedCost, ModelQuality, ProviderCapabilities, ModelProfile - packages/shared/src/types/llm.ts + Add optional `tier?: ModelTier` field to ModelDefinition - packages/shared/src/utils/config.ts + Add `routing?: RoutingConfig` and `taskRouting?: TaskRoutingConfig` to llm config block All new types are non-breaking additions — existing consumers (CLI/core/org-manager) compile clean. Verification: - pnpm --filter @markus/shared build: success - pnpm typecheck: success (tsc -b + web-ui) --- packages/shared/src/types/llm.ts | 2 + packages/shared/src/types/model-catalog.ts | 152 +++++++++++++++++++++ packages/shared/src/utils/config.ts | 5 + 3 files changed, 159 insertions(+) diff --git a/packages/shared/src/types/llm.ts b/packages/shared/src/types/llm.ts index 3d1d4cc1..a999d42a 100644 --- a/packages/shared/src/types/llm.ts +++ b/packages/shared/src/types/llm.ts @@ -60,6 +60,8 @@ export interface ModelDefinition { reasoning?: boolean; inputTypes?: Array<'text' | 'image'>; description?: string; + /** Model quality tier for routing decisions */ + tier?: import('./model-catalog.js').ModelTier; } export interface EnhancedProviderSettings { diff --git a/packages/shared/src/types/model-catalog.ts b/packages/shared/src/types/model-catalog.ts index 9a3afbe2..cb50bf41 100644 --- a/packages/shared/src/types/model-catalog.ts +++ b/packages/shared/src/types/model-catalog.ts @@ -1,3 +1,155 @@ +// --------------------------------------------------------------------------- +// Model Tier & Task Routing types +// --------------------------------------------------------------------------- + +/** Capability tier for a model — used for routing & UI grouping. */ +export type ModelTier = 'base' | 'pro' | 'max'; + +/** Cost bucket — UI-only, derived from pricing data. */ +export type CostTier = '$' | '$$' | '$$$' | '$$$$'; + +/** Task type used by the routing engine to select a model per use-case. */ +export type ModelTaskType = + | 'text_chat' + | 'text_reasoning' + | 'text_coding' + | 'text_translation' + | 'text_summary' + | 'image_recognition' + | 'image_generation' + | 'audio_tts' + | 'audio_stt' + | 'video_generation' + | 'embedding' + | 'web_search'; + +/** Routing strategy for `auto` mode in `TaskRoutingConfig`. */ +export type RoutingStrategy = + | 'always_max' + | 'always_cheapest' + | 'balanced' + | 'cache_optimized'; + +/** How multi-modal tasks (text+vision, text+image-gen, etc.) are dispatched. */ +export type MultiModalStrategy = 'unified' | 'specialized'; + +/** Pricing unit for a model — token-based, request-based, per-image, etc. */ +export type PricingType = 'token' | 'request' | 'image' | 'audio' | 'video' | 'free' | 'local' | 'variable' | 'unknown'; + +/** Confidence level for a price quote. */ +export type PriceConfidence = 'exact' | 'estimated' | 'unknown'; + +/** Assignment of a (provider, model) pair — optionally with a fallback — for a single task type. */ +export interface TaskModelAssignment { + provider: string; + model: string; + fallback?: { provider: string; model: string }; +} + +/** Per-task-type routing configuration — managed by the routing UI. */ +export interface TaskRoutingConfig { + mode: 'auto' | 'manual' | 'hybrid'; + assignments: Partial>; + autoStrategy: RoutingStrategy; + defaultTier: ModelTier; + multiModalStrategy?: MultiModalStrategy; +} + +/** Global routing configuration — controls tier-based model selection. */ +export interface RoutingConfig { + strategy: RoutingStrategy; + defaultTier: ModelTier; + tierOverrides?: Record; + budgetLimit?: number; + preferCacheHit: boolean; + taskRouting?: TaskRoutingConfig; +} + +// --------------------------------------------------------------------------- +// Normalized cost for multi-modal models (token, image, audio, video, etc.) +// --------------------------------------------------------------------------- + +/** Normalized cost representation for a model — supports token, per-request, per-image, per-minute, etc. */ +export interface NormalizedCost { + inputPer1MTokens?: number; + outputPer1MTokens?: number; + cachedReadPer1MTokens?: number; + cachedWritePer1MTokens?: number; + perRequest?: number; + perImage?: number; + perMinute?: number; + per1MChars?: number; + perSecond?: number; + pricingType: PricingType; + isFree: boolean; + isLocal: boolean; + priceConfidence: PriceConfidence; +} + +// --------------------------------------------------------------------------- +// Model quality scores (from benchmarks like Arena AI) +// --------------------------------------------------------------------------- + +/** Quality metrics for a model — combines benchmark Elo and derived tier. */ +export interface ModelQuality { + overallElo?: number; + codingElo?: number; + visionElo?: number; + qualityScore: number; + tier: ModelTier; + lastUpdated: string; + source?: 'arena' | 'heuristic' | 'user_override'; +} + +// --------------------------------------------------------------------------- +// Provider capability declaration +// --------------------------------------------------------------------------- + +/** Static capability declaration for an LLM provider (what modalities it supports). */ +export interface ProviderCapabilities { + chat: boolean; + vision: boolean; + imageGeneration: boolean; + tts: boolean; + stt: boolean; + videoGeneration: boolean; + embedding: boolean; + reasoning: boolean; + promptCaching: boolean; +} + +// --------------------------------------------------------------------------- +// Unified ModelProfile: merges catalog + benchmark + cost data +// --------------------------------------------------------------------------- + +/** + * Unified model profile: merges catalog data (from LiteLLM) + benchmark scores + * (from Arena AI) + normalized cost. This is the canonical shape consumed by + * the routing engine and the UI. + */ +export interface ModelProfile { + id: string; + provider: string; + displayName: string; + family: string; + mode: string; + maxInputTokens: number; + maxOutputTokens: number; + cost: NormalizedCost; + capabilities: CatalogModelCapabilities; + quality: ModelQuality; + taskTypes: ModelTaskType[]; + derived: { + costEfficiency: number; + costTier: CostTier; + latencyClass: 'fast' | 'medium' | 'slow'; + }; +} + +// --------------------------------------------------------------------------- +// Catalog types (existing, enhanced) +// --------------------------------------------------------------------------- + export interface CatalogModelCapabilities { vision: boolean; functionCalling: boolean; diff --git a/packages/shared/src/utils/config.ts b/packages/shared/src/utils/config.ts index 0d340d03..b64bc20f 100644 --- a/packages/shared/src/utils/config.ts +++ b/packages/shared/src/utils/config.ts @@ -1,6 +1,7 @@ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs'; import { resolve, join } from 'node:path'; import { homedir } from 'node:os'; +import type { RoutingConfig, TaskRoutingConfig } from '../types/model-catalog.js'; export interface MarkusConfig { org: { @@ -31,6 +32,10 @@ export interface MarkusConfig { timeoutMs?: number; /** Allow automatic fallback to other providers/models when the primary fails (default: true) */ autoFallback?: boolean; + /** Global routing config (strategy, tier overrides, budget, task routing). */ + routing?: RoutingConfig; + /** Shortcut for the per-task routing block. */ + taskRouting?: TaskRoutingConfig; }; server: { apiPort: number;