Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 8 additions & 11 deletions apps/api/tests/unit/routes/ai-proxy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -295,15 +295,15 @@ describe('PLATFORM_AI_MODELS catalog', () => {
it('has correct tier assignments', async () => {
const { PLATFORM_AI_MODELS } = await import('@simple-agent-manager/shared');

const freeModels = PLATFORM_AI_MODELS.filter((m) => m.tier === 'free');
const lowCostModels = PLATFORM_AI_MODELS.filter((m) => m.tier === 'low-cost');
const standardModels = PLATFORM_AI_MODELS.filter((m) => m.tier === 'standard');
const premiumModels = PLATFORM_AI_MODELS.filter((m) => m.tier === 'premium');

// All Workers AI models are free tier
for (const m of freeModels) {
// Low-cost models route through Cloudflare-billed Workers AI.
for (const m of lowCostModels) {
expect(m.provider).toBe('workers-ai');
expect(m.costPer1kInputTokens).toBe(0);
expect(m.costPer1kOutputTokens).toBe(0);
expect(m.costPer1kInputTokens).toBeGreaterThan(0);
expect(m.costPer1kOutputTokens).toBeGreaterThan(0);
}

// Standard tier has at least Haiku and GPT-4.1
Expand All @@ -328,13 +328,11 @@ describe('PLATFORM_AI_MODELS catalog', () => {
expect(providers.has('openai')).toBe(true);
});

it('has positive cost for non-free models', async () => {
it('has positive cost metadata for all catalog models', async () => {
const { PLATFORM_AI_MODELS } = await import('@simple-agent-manager/shared');
for (const m of PLATFORM_AI_MODELS) {
if (m.tier !== 'free') {
expect(m.costPer1kInputTokens).toBeGreaterThan(0);
expect(m.costPer1kOutputTokens).toBeGreaterThan(0);
}
expect(m.costPer1kInputTokens).toBeGreaterThan(0);
expect(m.costPer1kOutputTokens).toBeGreaterThan(0);
}
});

Expand All @@ -345,4 +343,3 @@ describe('PLATFORM_AI_MODELS catalog', () => {
}
});
});

2 changes: 1 addition & 1 deletion apps/web/src/lib/api/admin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ export interface AIProxyConfigResponse {
id: string;
label: string;
provider: 'workers-ai' | 'anthropic' | 'openai';
tier: 'free' | 'standard' | 'premium';
tier: 'low-cost' | 'standard' | 'premium';
costPer1kInputTokens: number;
costPer1kOutputTokens: number;
isDefault?: boolean;
Expand Down
18 changes: 9 additions & 9 deletions apps/web/src/pages/AdminAIProxy.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ const BILLING_MODE_OPTIONS: Array<{ value: BillingMode; label: string; descripti
];

const TIER_LABELS: Record<string, string> = {
free: 'Free Tier',
'low-cost': 'Low Cost',
standard: 'Standard',
premium: 'Premium',
};

const TIER_ORDER: Record<string, number> = {
free: 0,
'low-cost': 0,
standard: 1,
premium: 2,
};
Expand All @@ -48,15 +48,15 @@ const PROVIDER_LABELS: Record<string, string> = {
};

function formatCost(cost: number): string {
if (cost === 0) return 'Free';
if (cost === 0) return '$0.0000';
if (cost < 0.001) return `$${cost.toFixed(4)}`;
return `$${cost.toFixed(3)}`;
}

function tierBadgeClasses(tier: string): string {
switch (tier) {
case 'free':
return 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400';
case 'low-cost':
return 'bg-emerald-100 text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-400';
case 'standard':
return 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400';
case 'premium':
Expand Down Expand Up @@ -166,8 +166,8 @@ export function AdminAIProxy() {
<div className="space-y-6">
<Body>
Configure the default AI model and billing mode for the platform inference proxy. Models are routed
through Cloudflare AI Gateway. Workers AI models are free; Anthropic and OpenAI models
require credentials or Unified Billing.
through Cloudflare AI Gateway. Workers AI models are Cloudflare-billed; Anthropic and OpenAI
models require credentials or Unified Billing.
</Body>

{error && (
Expand Down Expand Up @@ -265,7 +265,7 @@ export function AdminAIProxy() {
))}
</select>
<p className="mt-1.5 text-xs text-[var(--sam-text-secondary)]">
Workers AI models are free. Anthropic and OpenAI models require credentials on the{' '}
Workers AI models bill through Cloudflare. Anthropic and OpenAI models require credentials on the{' '}
<a href="/admin/credentials" className="text-[var(--sam-accent)] underline">
Credentials
</a>{' '}
Expand Down Expand Up @@ -337,7 +337,7 @@ export function AdminAIProxy() {
{formatCost(model.costPer1kOutputTokens)}/1K out
</span>
) : (
<span className="text-xs text-green-600 dark:text-green-400">Free</span>
<span className="text-xs text-[var(--sam-text-secondary)]">No metered cost</span>
)}
{!model.available && (
<span className="text-xs text-yellow-600 dark:text-yellow-400">
Expand Down
14 changes: 7 additions & 7 deletions apps/web/tests/playwright/admin-ai-proxy-audit.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ function makeConfig(overrides: Record<string, unknown> = {}) {
hasOpenAICredential: false,
hasUnifiedBilling: false,
models: [
{ id: '@cf/meta/llama-4-scout-17b-16e-instruct', label: 'Llama 4 Scout 17B', provider: 'workers-ai', tier: 'free', costPer1kInputTokens: 0, costPer1kOutputTokens: 0, isDefault: true, available: true },
{ id: '@cf/qwen/qwen3-30b-a3b-fp8', label: 'Qwen3 30B', provider: 'workers-ai', tier: 'free', costPer1kInputTokens: 0, costPer1kOutputTokens: 0, available: true },
{ id: '@cf/google/gemma-3-12b-it', label: 'Gemma 3 12B', provider: 'workers-ai', tier: 'free', costPer1kInputTokens: 0, costPer1kOutputTokens: 0, available: true },
{ id: 'claude-haiku-4-5-20251001', label: 'Claude Haiku 4.5', provider: 'anthropic', tier: 'standard', costPer1kInputTokens: 0.0008, costPer1kOutputTokens: 0.004, available: false },
{ id: '@cf/meta/llama-4-scout-17b-16e-instruct', label: 'Llama 4 Scout 17B', provider: 'workers-ai', tier: 'low-cost', costPer1kInputTokens: 0.00027, costPer1kOutputTokens: 0.00085, isDefault: true, available: true },
{ id: '@cf/qwen/qwen3-30b-a3b-fp8', label: 'Qwen3 30B', provider: 'workers-ai', tier: 'low-cost', costPer1kInputTokens: 0.000051, costPer1kOutputTokens: 0.000335, available: true },
{ id: '@cf/google/gemma-3-12b-it', label: 'Gemma 3 12B', provider: 'workers-ai', tier: 'low-cost', costPer1kInputTokens: 0.00035, costPer1kOutputTokens: 0.00056, available: true },
{ id: 'claude-haiku-4-5-20251001', label: 'Claude Haiku 4.5', provider: 'anthropic', tier: 'standard', costPer1kInputTokens: 0.001, costPer1kOutputTokens: 0.005, available: false },
{ id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', provider: 'anthropic', tier: 'standard', costPer1kInputTokens: 0.003, costPer1kOutputTokens: 0.015, available: false },
{ id: 'gpt-4.1-mini', label: 'GPT-4.1 Mini', provider: 'openai', tier: 'standard', costPer1kInputTokens: 0.0004, costPer1kOutputTokens: 0.0016, available: false },
{ id: 'gpt-4.1', label: 'GPT-4.1', provider: 'openai', tier: 'standard', costPer1kInputTokens: 0.002, costPer1kOutputTokens: 0.008, available: false },
{ id: 'claude-opus-4-6', label: 'Claude Opus 4.6', provider: 'anthropic', tier: 'premium', costPer1kInputTokens: 0.015, costPer1kOutputTokens: 0.075, available: false },
{ id: 'claude-opus-4-6', label: 'Claude Opus 4.6', provider: 'anthropic', tier: 'premium', costPer1kInputTokens: 0.005, costPer1kOutputTokens: 0.025, available: false },
{ id: 'gpt-5.2', label: 'GPT-5.2', provider: 'openai', tier: 'premium', costPer1kInputTokens: 0.01, costPer1kOutputTokens: 0.04, available: false },
],
...overrides,
Expand Down Expand Up @@ -83,7 +83,7 @@ async function screenshot(page: Page, name: string) {
test.describe('AdminAIProxy — Mobile', () => {
test.use({ viewport: { width: 375, height: 667 }, isMobile: true });

test('normal data — free tier default', async ({ page }) => {
test('normal data — low-cost Workers AI default', async ({ page }) => {
await setupApiMocks(page);
await page.goto('/admin/ai-proxy');
await screenshot(page, 'admin-ai-proxy-normal-mobile');
Expand Down Expand Up @@ -148,7 +148,7 @@ test.describe('AdminAIProxy — Mobile', () => {
test.describe('AdminAIProxy — Desktop', () => {
test.use({ viewport: { width: 1280, height: 800 }, isMobile: false });

test('normal data — free tier default', async ({ page }) => {
test('normal data — low-cost Workers AI default', async ({ page }) => {
await setupApiMocks(page);
await page.goto('/admin/ai-proxy');
await screenshot(page, 'admin-ai-proxy-normal-desktop');
Expand Down
2 changes: 1 addition & 1 deletion docs/architecture/agent-harness-integration.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ Gemma 4 26B is the current recommended Workers AI model for harness/orchestrator
- Produces structured `tool_calls` with `tool_choice: "auto"` (no forcing required)
- Handles OpenAI-format `content: null` without workarounds
- Returns built-in `reasoning` field for observability
- Runs on the Workers AI free tier
- Runs through Cloudflare-billed Workers AI at low per-token rates
- Has official `function_calling=true` in Cloudflare model metadata

### Fallback: Qwen 2.5 Coder 32B (`@cf/qwen/qwen2.5-coder-32b-instruct`)
Expand Down
8 changes: 4 additions & 4 deletions experiments/ai-gateway-tool-call/FINDINGS-gemma.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Gemma 4 26B (`@cf/google/gemma-4-26b-a4b-it`) is **strictly superior** to all te

- **Gateway endpoint**: `https://gateway.ai.cloudflare.com/v1/{account_id}/sam/workers-ai/v1/chat/completions`
- **Auth**: `Authorization: Bearer {CF_TOKEN}` (standard Cloudflare API token — no Unified Billing needed for Workers AI path)
- **Cost**: $0 (Workers AI free tier)
- **Cost**: Cloudflare Workers AI billing ($0.10 per 1M input tokens, $0.30 per 1M output tokens as of 2026-05-06)
- **Metadata**: `cf-aig-metadata` header with userId, workspaceId, projectId, source, modelId — same schema as existing SAM proxy

## Detailed Findings
Expand Down Expand Up @@ -60,7 +60,7 @@ Step 3: Call `calculate(expression='(F_value - 32) * 5 / 9')` where `F_value` is
Step 4: Respond to the user with the weather condition and the temperature in Celsius.
```

This provides free observability for harness traces without needing an explicit "think step by step" prompt.
This provides built-in observability for harness traces without needing an explicit "think step by step" prompt.

### 4. Harness-Style Coding Tools: PASS

Expand All @@ -82,7 +82,7 @@ Tested with `grep`, `read_file`, `edit_file`, `bash` tools (the planned harness
| **Workarounds needed** | **None** | 2 | None |
| **CF function_calling flag** | `true` | N/A | N/A |
| **Context window** | 32K | 32K | 32K |
| **Cost** | $0 (Workers AI) | $0 (Workers AI) | $0 (Workers AI) |
| **Cost** | Workers AI: $0.10/M input, $0.30/M output | Workers AI: $0.660/M input, $1.000/M output | Workers AI: $0.051/M input, $0.335/M output |

### 6. Workers AI Model Availability

Expand Down Expand Up @@ -190,7 +190,7 @@ These workarounds should remain in the generic proxy for backward compatibility

1. **Use Gemma 4 26B as the default harness model.** It requires zero workarounds, produces reasoning traces, and has official function_calling support from Cloudflare. Qwen 2.5 Coder remains as a fallback but should not be the default.

2. **Persist the `reasoning` field in harness traces.** It provides free observability — the model's decision-making process is visible without needing "chain of thought" prompting or separate logging.
2. **Persist the `reasoning` field in harness traces.** It provides built-in observability — the model's decision-making process is visible without needing "chain of thought" prompting or separate logging.

3. **Next experiment: OpenAI model through Unified Billing.** Per the knowledge graph, the priority after Gemma is a small OpenAI model (gpt-4.1-mini) through the Unified API path. This requires `CF_AIG_TOKEN` with Unified Billing scope, which was blocked in the previous experiment.

Expand Down
Loading
Loading