Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .feature-base/CREATED_AT.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2026-06-15 12:22:14 CST
2 changes: 2 additions & 0 deletions packages/shared/src/types/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
reasoning?: boolean;
inputTypes?: Array<'text' | 'image'>;
description?: string;
/** Model quality tier for routing decisions */
tier?: import('./model-catalog.js').ModelTier;

Check failure on line 64 in packages/shared/src/types/llm.ts

View workflow job for this annotation

GitHub Actions / check

`import()` type annotations are forbidden
}

export interface EnhancedProviderSettings {
Expand Down
152 changes: 152 additions & 0 deletions packages/shared/src/types/model-catalog.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,155 @@
// ---------------------------------------------------------------------------
// Model Tier & Task Routing types
// ---------------------------------------------------------------------------

/** Capability tier for a model — used for routing & UI grouping. */
export type ModelTier = 'base' | 'pro' | 'max';

/** Cost bucket — UI-only, derived from pricing data. */
export type CostTier = '$' | '$$' | '$$$' | '$$$$';

/** Task type used by the routing engine to select a model per use-case. */
export type ModelTaskType =
| 'text_chat'
| 'text_reasoning'
| 'text_coding'
| 'text_translation'
| 'text_summary'
| 'image_recognition'
| 'image_generation'
| 'audio_tts'
| 'audio_stt'
| 'video_generation'
| 'embedding'
| 'web_search';

/** Routing strategy for `auto` mode in `TaskRoutingConfig`. */
export type RoutingStrategy =
| 'always_max'
| 'always_cheapest'
| 'balanced'
| 'cache_optimized';

/** How multi-modal tasks (text+vision, text+image-gen, etc.) are dispatched. */
export type MultiModalStrategy = 'unified' | 'specialized';

/** Pricing unit for a model — token-based, request-based, per-image, etc. */
export type PricingType = 'token' | 'request' | 'image' | 'audio' | 'video' | 'free' | 'local' | 'variable' | 'unknown';

/** Confidence level for a price quote. */
export type PriceConfidence = 'exact' | 'estimated' | 'unknown';

/** Assignment of a (provider, model) pair — optionally with a fallback — for a single task type. */
export interface TaskModelAssignment {
provider: string;
model: string;
fallback?: { provider: string; model: string };
}

/** Per-task-type routing configuration — managed by the routing UI. */
export interface TaskRoutingConfig {
mode: 'auto' | 'manual' | 'hybrid';
assignments: Partial<Record<ModelTaskType, TaskModelAssignment>>;
autoStrategy: RoutingStrategy;
defaultTier: ModelTier;
multiModalStrategy?: MultiModalStrategy;
}

/** Global routing configuration — controls tier-based model selection. */
export interface RoutingConfig {
strategy: RoutingStrategy;
defaultTier: ModelTier;
tierOverrides?: Record<string, ModelTier>;
budgetLimit?: number;
preferCacheHit: boolean;
taskRouting?: TaskRoutingConfig;
}

// ---------------------------------------------------------------------------
// Normalized cost for multi-modal models (token, image, audio, video, etc.)
// ---------------------------------------------------------------------------

/** Normalized cost representation for a model — supports token, per-request, per-image, per-minute, etc. */
export interface NormalizedCost {
inputPer1MTokens?: number;
outputPer1MTokens?: number;
cachedReadPer1MTokens?: number;
cachedWritePer1MTokens?: number;
perRequest?: number;
perImage?: number;
perMinute?: number;
per1MChars?: number;
perSecond?: number;
pricingType: PricingType;
isFree: boolean;
isLocal: boolean;
priceConfidence: PriceConfidence;
}

// ---------------------------------------------------------------------------
// Model quality scores (from benchmarks like Arena AI)
// ---------------------------------------------------------------------------

/** Quality metrics for a model — combines benchmark Elo and derived tier. */
export interface ModelQuality {
overallElo?: number;
codingElo?: number;
visionElo?: number;
qualityScore: number;
tier: ModelTier;
lastUpdated: string;
source?: 'arena' | 'heuristic' | 'user_override';
}

// ---------------------------------------------------------------------------
// Provider capability declaration
// ---------------------------------------------------------------------------

/** Static capability declaration for an LLM provider (what modalities it supports). */
export interface ProviderCapabilities {
chat: boolean;
vision: boolean;
imageGeneration: boolean;
tts: boolean;
stt: boolean;
videoGeneration: boolean;
embedding: boolean;
reasoning: boolean;
promptCaching: boolean;
}

// ---------------------------------------------------------------------------
// Unified ModelProfile: merges catalog + benchmark + cost data
// ---------------------------------------------------------------------------

/**
* Unified model profile: merges catalog data (from LiteLLM) + benchmark scores
* (from Arena AI) + normalized cost. This is the canonical shape consumed by
* the routing engine and the UI.
*/
export interface ModelProfile {
id: string;
provider: string;
displayName: string;
family: string;
mode: string;
maxInputTokens: number;
maxOutputTokens: number;
cost: NormalizedCost;
capabilities: CatalogModelCapabilities;
quality: ModelQuality;
taskTypes: ModelTaskType[];
derived: {
costEfficiency: number;
costTier: CostTier;
latencyClass: 'fast' | 'medium' | 'slow';
};
}

// ---------------------------------------------------------------------------
// Catalog types (existing, enhanced)
// ---------------------------------------------------------------------------

export interface CatalogModelCapabilities {
vision: boolean;
functionCalling: boolean;
Expand Down
5 changes: 5 additions & 0 deletions packages/shared/src/utils/config.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
import { resolve, join } from 'node:path';
import { homedir } from 'node:os';
import type { RoutingConfig, TaskRoutingConfig } from '../types/model-catalog.js';

export interface MarkusConfig {
org: {
Expand Down Expand Up @@ -31,6 +32,10 @@ export interface MarkusConfig {
timeoutMs?: number;
/** Allow automatic fallback to other providers/models when the primary fails (default: true) */
autoFallback?: boolean;
/** Global routing config (strategy, tier overrides, budget, task routing). */
routing?: RoutingConfig;
/** Shortcut for the per-task routing block. */
taskRouting?: TaskRoutingConfig;
};
server: {
apiPort: number;
Expand Down
Loading