raphaeltm · simple-agent-manager · May 6, 2026 · May 5, 2026 · May 6, 2026 · May 6, 2026
diff --git a/apps/api/tests/unit/routes/ai-proxy.test.ts b/apps/api/tests/unit/routes/ai-proxy.test.ts
@@ -295,15 +295,15 @@ describe('PLATFORM_AI_MODELS catalog', () => {
   it('has correct tier assignments', async () => {
     const { PLATFORM_AI_MODELS } = await import('@simple-agent-manager/shared');
 
-    const freeModels = PLATFORM_AI_MODELS.filter((m) => m.tier === 'free');
+    const lowCostModels = PLATFORM_AI_MODELS.filter((m) => m.tier === 'low-cost');
     const standardModels = PLATFORM_AI_MODELS.filter((m) => m.tier === 'standard');
     const premiumModels = PLATFORM_AI_MODELS.filter((m) => m.tier === 'premium');
 
-    // All Workers AI models are free tier
-    for (const m of freeModels) {
+    // Low-cost models route through Cloudflare-billed Workers AI.
+    for (const m of lowCostModels) {
       expect(m.provider).toBe('workers-ai');
-      expect(m.costPer1kInputTokens).toBe(0);
-      expect(m.costPer1kOutputTokens).toBe(0);
+      expect(m.costPer1kInputTokens).toBeGreaterThan(0);
+      expect(m.costPer1kOutputTokens).toBeGreaterThan(0);
     }
 
     // Standard tier has at least Haiku and GPT-4.1
@@ -328,13 +328,11 @@ describe('PLATFORM_AI_MODELS catalog', () => {
     expect(providers.has('openai')).toBe(true);
   });
 
-  it('has positive cost for non-free models', async () => {
+  it('has positive cost metadata for all catalog models', async () => {
     const { PLATFORM_AI_MODELS } = await import('@simple-agent-manager/shared');
     for (const m of PLATFORM_AI_MODELS) {
-      if (m.tier !== 'free') {
-        expect(m.costPer1kInputTokens).toBeGreaterThan(0);
-        expect(m.costPer1kOutputTokens).toBeGreaterThan(0);
-      }
+      expect(m.costPer1kInputTokens).toBeGreaterThan(0);
+      expect(m.costPer1kOutputTokens).toBeGreaterThan(0);
     }
   });
 
@@ -345,4 +343,3 @@ describe('PLATFORM_AI_MODELS catalog', () => {
     }
   });
 });
-
diff --git a/apps/web/src/lib/api/admin.ts b/apps/web/src/lib/api/admin.ts
@@ -347,7 +347,7 @@ export interface AIProxyConfigResponse {
     id: string;
     label: string;
     provider: 'workers-ai' | 'anthropic' | 'openai';
-    tier: 'free' | 'standard' | 'premium';
+    tier: 'low-cost' | 'standard' | 'premium';
     costPer1kInputTokens: number;
     costPer1kOutputTokens: number;
     isDefault?: boolean;

diff --git a/apps/web/src/pages/AdminAIProxy.tsx b/apps/web/src/pages/AdminAIProxy.tsx
@@ -30,13 +30,13 @@ const BILLING_MODE_OPTIONS: Array<{ value: BillingMode; label: string; descripti
 ];
 
 const TIER_LABELS: Record<string, string> = {
-  free: 'Free Tier',
+  'low-cost': 'Low Cost',
   standard: 'Standard',
   premium: 'Premium',
 };
 
 const TIER_ORDER: Record<string, number> = {
-  free: 0,
+  'low-cost': 0,
   standard: 1,
   premium: 2,
 };
@@ -48,15 +48,15 @@ const PROVIDER_LABELS: Record<string, string> = {
 };
 
 function formatCost(cost: number): string {
-  if (cost === 0) return 'Free';
+  if (cost === 0) return '$0.0000';
   if (cost < 0.001) return `$${cost.toFixed(4)}`;
   return `$${cost.toFixed(3)}`;
 }
 
 function tierBadgeClasses(tier: string): string {
   switch (tier) {
-    case 'free':
-      return 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400';
+    case 'low-cost':
+      return 'bg-emerald-100 text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-400';
     case 'standard':
       return 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400';
     case 'premium':
@@ -166,8 +166,8 @@ export function AdminAIProxy() {
     <div className="space-y-6">
       <Body>
         Configure the default AI model and billing mode for the platform inference proxy. Models are routed
-        through Cloudflare AI Gateway. Workers AI models are free; Anthropic and OpenAI models
-        require credentials or Unified Billing.
+        through Cloudflare AI Gateway. Workers AI models are Cloudflare-billed; Anthropic and OpenAI
+        models require credentials or Unified Billing.
       </Body>
 
       {error && (
@@ -265,7 +265,7 @@ export function AdminAIProxy() {
               ))}
             </select>
             <p className="mt-1.5 text-xs text-[var(--sam-text-secondary)]">
-              Workers AI models are free. Anthropic and OpenAI models require credentials on the{' '}
+              Workers AI models bill through Cloudflare. Anthropic and OpenAI models require credentials on the{' '}
               <a href="/admin/credentials" className="text-[var(--sam-accent)] underline">
                 Credentials
               </a>{' '}
@@ -337,7 +337,7 @@ export function AdminAIProxy() {
                           {formatCost(model.costPer1kOutputTokens)}/1K out
                         </span>
                       ) : (
-                        <span className="text-xs text-green-600 dark:text-green-400">Free</span>
+                        <span className="text-xs text-[var(--sam-text-secondary)]">No metered cost</span>
                       )}
                       {!model.available && (
                         <span className="text-xs text-yellow-600 dark:text-yellow-400">

diff --git a/apps/web/tests/playwright/admin-ai-proxy-audit.spec.ts b/apps/web/tests/playwright/admin-ai-proxy-audit.spec.ts
@@ -33,14 +33,14 @@ function makeConfig(overrides: Record<string, unknown> = {}) {
     hasOpenAICredential: false,
     hasUnifiedBilling: false,
     models: [
-      { id: '@cf/meta/llama-4-scout-17b-16e-instruct', label: 'Llama 4 Scout 17B', provider: 'workers-ai', tier: 'free', costPer1kInputTokens: 0, costPer1kOutputTokens: 0, isDefault: true, available: true },
-      { id: '@cf/qwen/qwen3-30b-a3b-fp8', label: 'Qwen3 30B', provider: 'workers-ai', tier: 'free', costPer1kInputTokens: 0, costPer1kOutputTokens: 0, available: true },
-      { id: '@cf/google/gemma-3-12b-it', label: 'Gemma 3 12B', provider: 'workers-ai', tier: 'free', costPer1kInputTokens: 0, costPer1kOutputTokens: 0, available: true },
-      { id: 'claude-haiku-4-5-20251001', label: 'Claude Haiku 4.5', provider: 'anthropic', tier: 'standard', costPer1kInputTokens: 0.0008, costPer1kOutputTokens: 0.004, available: false },
+      { id: '@cf/meta/llama-4-scout-17b-16e-instruct', label: 'Llama 4 Scout 17B', provider: 'workers-ai', tier: 'low-cost', costPer1kInputTokens: 0.00027, costPer1kOutputTokens: 0.00085, isDefault: true, available: true },
+      { id: '@cf/qwen/qwen3-30b-a3b-fp8', label: 'Qwen3 30B', provider: 'workers-ai', tier: 'low-cost', costPer1kInputTokens: 0.000051, costPer1kOutputTokens: 0.000335, available: true },
+      { id: '@cf/google/gemma-3-12b-it', label: 'Gemma 3 12B', provider: 'workers-ai', tier: 'low-cost', costPer1kInputTokens: 0.00035, costPer1kOutputTokens: 0.00056, available: true },
+      { id: 'claude-haiku-4-5-20251001', label: 'Claude Haiku 4.5', provider: 'anthropic', tier: 'standard', costPer1kInputTokens: 0.001, costPer1kOutputTokens: 0.005, available: false },
       { id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', provider: 'anthropic', tier: 'standard', costPer1kInputTokens: 0.003, costPer1kOutputTokens: 0.015, available: false },
       { id: 'gpt-4.1-mini', label: 'GPT-4.1 Mini', provider: 'openai', tier: 'standard', costPer1kInputTokens: 0.0004, costPer1kOutputTokens: 0.0016, available: false },
       { id: 'gpt-4.1', label: 'GPT-4.1', provider: 'openai', tier: 'standard', costPer1kInputTokens: 0.002, costPer1kOutputTokens: 0.008, available: false },
-      { id: 'claude-opus-4-6', label: 'Claude Opus 4.6', provider: 'anthropic', tier: 'premium', costPer1kInputTokens: 0.015, costPer1kOutputTokens: 0.075, available: false },
+      { id: 'claude-opus-4-6', label: 'Claude Opus 4.6', provider: 'anthropic', tier: 'premium', costPer1kInputTokens: 0.005, costPer1kOutputTokens: 0.025, available: false },
       { id: 'gpt-5.2', label: 'GPT-5.2', provider: 'openai', tier: 'premium', costPer1kInputTokens: 0.01, costPer1kOutputTokens: 0.04, available: false },
     ],
     ...overrides,
@@ -83,7 +83,7 @@ async function screenshot(page: Page, name: string) {
 test.describe('AdminAIProxy — Mobile', () => {
   test.use({ viewport: { width: 375, height: 667 }, isMobile: true });
 
-  test('normal data — free tier default', async ({ page }) => {
+  test('normal data — low-cost Workers AI default', async ({ page }) => {
     await setupApiMocks(page);
     await page.goto('/admin/ai-proxy');
     await screenshot(page, 'admin-ai-proxy-normal-mobile');
@@ -148,7 +148,7 @@ test.describe('AdminAIProxy — Mobile', () => {
 test.describe('AdminAIProxy — Desktop', () => {
   test.use({ viewport: { width: 1280, height: 800 }, isMobile: false });
 
-  test('normal data — free tier default', async ({ page }) => {
+  test('normal data — low-cost Workers AI default', async ({ page }) => {
     await setupApiMocks(page);
     await page.goto('/admin/ai-proxy');
     await screenshot(page, 'admin-ai-proxy-normal-desktop');

diff --git a/docs/architecture/agent-harness-integration.md b/docs/architecture/agent-harness-integration.md
@@ -184,7 +184,7 @@ Gemma 4 26B is the current recommended Workers AI model for harness/orchestrator
 - Produces structured `tool_calls` with `tool_choice: "auto"` (no forcing required)
 - Handles OpenAI-format `content: null` without workarounds
 - Returns built-in `reasoning` field for observability
-- Runs on the Workers AI free tier
+- Runs through Cloudflare-billed Workers AI at low per-token rates
 - Has official `function_calling=true` in Cloudflare model metadata
 
 ### Fallback: Qwen 2.5 Coder 32B (`@cf/qwen/qwen2.5-coder-32b-instruct`)

diff --git a/experiments/ai-gateway-tool-call/FINDINGS-gemma.md b/experiments/ai-gateway-tool-call/FINDINGS-gemma.md
@@ -13,7 +13,7 @@ Gemma 4 26B (`@cf/google/gemma-4-26b-a4b-it`) is **strictly superior** to all te
 
 - **Gateway endpoint**: `https://gateway.ai.cloudflare.com/v1/{account_id}/sam/workers-ai/v1/chat/completions`
 - **Auth**: `Authorization: Bearer {CF_TOKEN}` (standard Cloudflare API token — no Unified Billing needed for Workers AI path)
-- **Cost**: $0 (Workers AI free tier)
+- **Cost**: Cloudflare Workers AI billing ($0.10 per 1M input tokens, $0.30 per 1M output tokens as of 2026-05-06)
 - **Metadata**: `cf-aig-metadata` header with userId, workspaceId, projectId, source, modelId — same schema as existing SAM proxy
 
 ## Detailed Findings
@@ -60,7 +60,7 @@ Step 3: Call `calculate(expression='(F_value - 32) * 5 / 9')` where `F_value` is
 Step 4: Respond to the user with the weather condition and the temperature in Celsius.
 ```
 
-This provides free observability for harness traces without needing an explicit "think step by step" prompt.
+This provides built-in observability for harness traces without needing an explicit "think step by step" prompt.
 
 ### 4. Harness-Style Coding Tools: PASS
 
@@ -82,7 +82,7 @@ Tested with `grep`, `read_file`, `edit_file`, `bash` tools (the planned harness
 | **Workarounds needed** | **None** | 2 | None |
 | **CF function_calling flag** | `true` | N/A | N/A |
 | **Context window** | 32K | 32K | 32K |
-| **Cost** | $0 (Workers AI) | $0 (Workers AI) | $0 (Workers AI) |
+| **Cost** | Workers AI: $0.10/M input, $0.30/M output | Workers AI: $0.660/M input, $1.000/M output | Workers AI: $0.051/M input, $0.335/M output |
 
 ### 6. Workers AI Model Availability
 
@@ -190,7 +190,7 @@ These workarounds should remain in the generic proxy for backward compatibility
 
 1. **Use Gemma 4 26B as the default harness model.** It requires zero workarounds, produces reasoning traces, and has official function_calling support from Cloudflare. Qwen 2.5 Coder remains as a fallback but should not be the default.
 
-2. **Persist the `reasoning` field in harness traces.** It provides free observability — the model's decision-making process is visible without needing "chain of thought" prompting or separate logging.
+2. **Persist the `reasoning` field in harness traces.** It provides built-in observability — the model's decision-making process is visible without needing "chain of thought" prompting or separate logging.
 
 3. **Next experiment: OpenAI model through Unified Billing.** Per the knowledge graph, the priority after Gemma is a small OpenAI model (gpt-4.1-mini) through the Unified API path. This requires `CF_AIG_TOKEN` with Unified Billing scope, which was blocked in the previous experiment.