diff --git a/.agentv/targets.yaml b/.agentv/targets.yaml index 91d3b1ebf..e2adfce58 100644 --- a/.agentv/targets.yaml +++ b/.agentv/targets.yaml @@ -98,11 +98,16 @@ targets: api_key: ${{ GH_MODELS_TOKEN }} model: ${{ GH_MODELS_MODEL }} + # Single Azure target. Control the endpoint shape with AZURE_OPENAI_API_FORMAT: + # - chat (default): uses /chat/completions and AZURE_OPENAI_API_VERSION + # If AZURE_OPENAI_API_VERSION is omitted, AgentV defaults chat targets to 2024-12-01-preview. + # - responses: uses /responses and AgentV auto-defaults the version to v1 - name: azure provider: azure endpoint: ${{ AZURE_OPENAI_ENDPOINT }} api_key: ${{ AZURE_OPENAI_API_KEY }} model: ${{ AZURE_DEPLOYMENT_NAME }} + api_format: ${{ AZURE_OPENAI_API_FORMAT }} version: ${{ AZURE_OPENAI_API_VERSION }} - name: gemini diff --git a/.env.example b/.env.example index 91e448936..ee5de1976 100644 --- a/.env.example +++ b/.env.example @@ -4,6 +4,10 @@ AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/ AZURE_OPENAI_API_KEY=your-openai-api-key-here AZURE_DEPLOYMENT_NAME=gpt-5-mini +AZURE_OPENAI_API_FORMAT=chat +# Chat/completions targets use this version. +# If omitted, AgentV defaults chat targets to 2024-12-01-preview. +# Azure responses targets default to `v1` automatically. AZURE_OPENAI_API_VERSION=2024-12-01-preview # OpenAI diff --git a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx index 71ce71a83..769b18641 100644 --- a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx +++ b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx @@ -44,7 +44,7 @@ The `duration_ms`, `started_at`, and `ended_at` fields are present on every grad Run against a different target than specified in the eval file: ```bash -agentv eval --target azure-base evals/**/*.yaml +agentv eval --target my-target evals/**/*.yaml ``` ### Experiment Label diff --git a/apps/web/src/content/docs/docs/targets/llm-providers.mdx b/apps/web/src/content/docs/docs/targets/llm-providers.mdx index 0bfcd8d11..c705fd33b 100644 --- a/apps/web/src/content/docs/docs/targets/llm-providers.mdx +++ b/apps/web/src/content/docs/docs/targets/llm-providers.mdx @@ -31,9 +31,9 @@ Controls which OpenAI API endpoint is used: | Value | Endpoint | When to use | |-------|----------|-------------| | `chat` (default) | `/chat/completions` | All OpenAI-compatible endpoints (GitHub Models, local proxies, etc.) | -| `responses` | `/responses` | Only `api.openai.com` — opt in to the Responses API | +| `responses` | `/responses` | `api.openai.com` and Azure OpenAI when the deployment supports the Responses API | -Most users should leave this unset. The default `chat` format is universally supported. Use `responses` only when you need Responses API features on `api.openai.com` directly. +Most users should leave this unset. The default `chat` format is universally supported. Use `responses` when you need Responses API features on OpenAI or Azure OpenAI deployments that support it. ```yaml # OpenAI-compatible endpoint (default chat format works) @@ -69,6 +69,23 @@ targets: | `endpoint` | Yes | Azure OpenAI endpoint URL | | `api_key` | Yes | API key | | `model` | Yes | Deployment name | +| `api_format` | No | API format: `chat` (default) or `responses` | + +Azure OpenAI supports the same `api_format` switch: + +```yaml +targets: + - name: azure-responses + provider: azure + endpoint: ${{ AZURE_OPENAI_ENDPOINT }} + api_key: ${{ AZURE_OPENAI_API_KEY }} + model: ${{ AZURE_DEPLOYMENT_NAME }} + api_format: responses +``` + +When `api_format: responses` is used with Azure, AgentV defaults the API version to `v1` unless you explicitly override `version`. + +The repository's default [`.agentv/targets.yaml`](/home/christso/projects/agentv.worktrees/feat-920-azure-responses-api/.agentv/targets.yaml) uses a single `azure` target and drives `api_format` from `AZURE_OPENAI_API_FORMAT`. ## Anthropic diff --git a/packages/core/src/evaluation/providers/ai-sdk.ts b/packages/core/src/evaluation/providers/ai-sdk.ts index 46bf3dd49..97bf3128a 100644 --- a/packages/core/src/evaluation/providers/ai-sdk.ts +++ b/packages/core/src/evaluation/providers/ai-sdk.ts @@ -93,7 +93,10 @@ export class AzureProvider implements Provider { this.retryConfig = config.retry; const azure = createAzure(buildAzureOptions(config)); - this.model = azure.chat(config.deploymentName); + this.model = + config.apiFormat === 'responses' + ? azure(config.deploymentName) + : azure.chat(config.deploymentName); } async invoke(request: ProviderRequest): Promise { @@ -241,7 +244,9 @@ function buildAzureOptions(config: AzureResolvedConfig): AzureOpenAIProviderSett const options: AzureOpenAIProviderSettings = { apiKey: config.apiKey, apiVersion: config.version, - useDeploymentBasedUrls: true, + // Chat completions still use deployment-scoped Azure URLs for compatibility + // with existing deployments. Responses API should use the SDK's v1 path. + useDeploymentBasedUrls: config.apiFormat !== 'responses', }; const baseURL = normalizeAzureBaseUrl(config.resourceName); diff --git a/packages/core/src/evaluation/providers/targets.ts b/packages/core/src/evaluation/providers/targets.ts index 5c498c10b..d820d8ba0 100644 --- a/packages/core/src/evaluation/providers/targets.ts +++ b/packages/core/src/evaluation/providers/targets.ts @@ -406,6 +406,7 @@ export interface AzureResolvedConfig { readonly deploymentName: string; readonly apiKey: string; readonly version?: string; + readonly apiFormat?: ApiFormat; readonly temperature?: number; readonly maxOutputTokens?: number; readonly retry?: RetryConfig; @@ -775,20 +776,27 @@ const BASE_TARGET_SCHEMA = z .passthrough(); const DEFAULT_AZURE_API_VERSION = '2024-12-01-preview'; +const DEFAULT_AZURE_RESPONSES_API_VERSION = 'v1'; const DEFAULT_OPENAI_BASE_URL = 'https://api.openai.com/v1'; -function normalizeAzureApiVersion(value: string | undefined): string { +function normalizeAzureApiVersion( + value: string | undefined, + apiFormat: ApiFormat | undefined, +): string { + const defaultVersion = + apiFormat === 'responses' ? DEFAULT_AZURE_RESPONSES_API_VERSION : DEFAULT_AZURE_API_VERSION; + if (!value) { - return DEFAULT_AZURE_API_VERSION; + return defaultVersion; } const trimmed = value.trim(); if (trimmed.length === 0) { - return DEFAULT_AZURE_API_VERSION; + return defaultVersion; } const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, '').trim(); - return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION; + return withoutPrefix.length > 0 ? withoutPrefix : defaultVersion; } function resolveRetryConfig(target: z.infer): RetryConfig | undefined { @@ -1104,11 +1112,13 @@ function resolveAzureConfig( const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`); const apiKey = resolveString(apiKeySource, env, `${target.name} api key`); const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`); + const apiFormat = resolveApiFormat(target, env, target.name); const version = normalizeAzureApiVersion( resolveOptionalString(versionSource, env, `${target.name} api version`, { allowLiteral: true, optionalEnv: true, }), + apiFormat, ); const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`); const maxOutputTokens = resolveOptionalNumber( @@ -1122,6 +1132,7 @@ function resolveAzureConfig( deploymentName, apiKey, version, + apiFormat, temperature, maxOutputTokens, retry, @@ -1130,9 +1141,18 @@ function resolveAzureConfig( function resolveApiFormat( target: z.infer, + env: EnvLookup, targetName: string, ): ApiFormat | undefined { - const raw = target.api_format ?? target.apiFormat; + const raw = resolveOptionalString( + target.api_format ?? target.apiFormat, + env, + `${targetName} api format`, + { + allowLiteral: true, + optionalEnv: true, + }, + ); if (raw === undefined) return undefined; if (raw === 'chat' || raw === 'responses') return raw; throw new Error( @@ -1164,7 +1184,7 @@ function resolveOpenAIConfig( baseURL, apiKey, model, - apiFormat: resolveApiFormat(target, target.name), + apiFormat: resolveApiFormat(target, env, target.name), temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`), maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`), retry, diff --git a/packages/core/src/evaluation/validation/targets-validator.ts b/packages/core/src/evaluation/validation/targets-validator.ts index ac3857a0a..22e6a715c 100644 --- a/packages/core/src/evaluation/validation/targets-validator.ts +++ b/packages/core/src/evaluation/validation/targets-validator.ts @@ -48,6 +48,7 @@ const AZURE_SETTINGS = new Set([ 'model', 'version', 'api_version', + 'api_format', 'temperature', 'max_output_tokens', 'maxTokens', @@ -65,7 +66,6 @@ const OPENAI_SETTINGS = new Set([ 'deployment', 'variant', 'api_format', - 'apiFormat', 'temperature', 'max_output_tokens', 'maxTokens', diff --git a/packages/core/test/evaluation/providers/targets.test.ts b/packages/core/test/evaluation/providers/targets.test.ts index 81a9e4874..b7c2cf865 100644 --- a/packages/core/test/evaluation/providers/targets.test.ts +++ b/packages/core/test/evaluation/providers/targets.test.ts @@ -19,9 +19,12 @@ const generateTextMock = mock(async () => ({ providerMetadata: undefined, })); -const createAzureMock = mock((options: unknown) => ({ - chat: () => ({ provider: 'azure', options }), -})); +const createAzureMock = mock((options: unknown) => { + const fn = () => ({ provider: 'azure', options, apiFormat: 'responses' }); + fn.chat = () => ({ provider: 'azure', options, apiFormat: 'chat' }); + fn.responses = () => ({ provider: 'azure', options, apiFormat: 'responses' }); + return fn; +}); const createOpenAIMock = mock((options: unknown) => { const fn = () => ({ provider: 'openai', options }); fn.chat = () => ({ provider: 'openai', options }); @@ -252,6 +255,90 @@ describe('resolveTargetDefinition', () => { expect(target.config.version).toBe('2024-08-01-preview'); }); + it('resolves azure api_format when configured', () => { + const env = { + AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com', + AZURE_OPENAI_API_KEY: 'secret', + AZURE_DEPLOYMENT_NAME: 'gpt-4o', + } satisfies Record; + + const target = resolveTargetDefinition( + { + name: 'azure-responses', + provider: 'azure', + endpoint: '${{ AZURE_OPENAI_ENDPOINT }}', + api_key: '${{ AZURE_OPENAI_API_KEY }}', + model: '${{ AZURE_DEPLOYMENT_NAME }}', + api_format: 'responses', + }, + env, + ); + + expect(target.kind).toBe('azure'); + if (target.kind !== 'azure') { + throw new Error('expected azure target'); + } + + expect(target.config.apiFormat).toBe('responses'); + expect(target.config.version).toBe('v1'); + }); + + it('resolves azure api_format from env interpolation', () => { + const env = { + AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com', + AZURE_OPENAI_API_KEY: 'secret', + AZURE_DEPLOYMENT_NAME: 'gpt-4o', + AZURE_OPENAI_API_FORMAT: 'responses', + } satisfies Record; + + const target = resolveTargetDefinition( + { + name: 'azure-env-format', + provider: 'azure', + endpoint: '${{ AZURE_OPENAI_ENDPOINT }}', + api_key: '${{ AZURE_OPENAI_API_KEY }}', + model: '${{ AZURE_DEPLOYMENT_NAME }}', + api_format: '${{ AZURE_OPENAI_API_FORMAT }}', + }, + env, + ); + + expect(target.kind).toBe('azure'); + if (target.kind !== 'azure') { + throw new Error('expected azure target'); + } + + expect(target.config.apiFormat).toBe('responses'); + expect(target.config.version).toBe('v1'); + }); + + it('defaults azure responses targets to api version v1', () => { + const env = { + AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com', + AZURE_OPENAI_API_KEY: 'secret', + AZURE_DEPLOYMENT_NAME: 'gpt-4o', + } satisfies Record; + + const target = resolveTargetDefinition( + { + name: 'azure-responses-default-version', + provider: 'azure', + endpoint: '${{ AZURE_OPENAI_ENDPOINT }}', + api_key: '${{ AZURE_OPENAI_API_KEY }}', + model: '${{ AZURE_DEPLOYMENT_NAME }}', + api_format: 'responses', + }, + env, + ); + + expect(target.kind).toBe('azure'); + if (target.kind !== 'azure') { + throw new Error('expected azure target'); + } + + expect(target.config.version).toBe('v1'); + }); + it('throws when required azure environment variables are missing', () => { const env = { AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com', @@ -787,6 +874,37 @@ describe('createProvider', () => { const response = await provider.invoke({ question: 'Hello' }); expect(createAzureMock).toHaveBeenCalledTimes(1); + expect(createAzureMock.mock.calls[0]?.[0]).toMatchObject({ useDeploymentBasedUrls: true }); + expect(provider.asLanguageModel()).toMatchObject({ apiFormat: 'chat' }); + expect(generateTextMock).toHaveBeenCalledTimes(1); + expect(extractLastAssistantContent(response.output)).toBe('ok'); + }); + + it('creates an azure provider using the responses api when requested', async () => { + const env = { + AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com', + AZURE_OPENAI_API_KEY: 'key', + AZURE_DEPLOYMENT_NAME: 'gpt-4o', + } satisfies Record; + + const resolved = resolveTargetDefinition( + { + name: 'azure-responses-target', + provider: 'azure', + endpoint: '${{ AZURE_OPENAI_ENDPOINT }}', + api_key: '${{ AZURE_OPENAI_API_KEY }}', + model: '${{ AZURE_DEPLOYMENT_NAME }}', + api_format: 'responses', + }, + env, + ); + + const provider = createProvider(resolved); + const response = await provider.invoke({ question: 'Hello' }); + + expect(createAzureMock).toHaveBeenCalledTimes(1); + expect(createAzureMock.mock.calls[0]?.[0]).toMatchObject({ useDeploymentBasedUrls: false }); + expect(provider.asLanguageModel()).toMatchObject({ apiFormat: 'responses' }); expect(generateTextMock).toHaveBeenCalledTimes(1); expect(extractLastAssistantContent(response.output)).toBe('ok'); }); diff --git a/packages/core/test/evaluation/validation/targets-validator.test.ts b/packages/core/test/evaluation/validation/targets-validator.test.ts index ecbf4f529..8733544b2 100644 --- a/packages/core/test/evaluation/validation/targets-validator.test.ts +++ b/packages/core/test/evaluation/validation/targets-validator.test.ts @@ -140,4 +140,29 @@ describe('validateTargetsFile', () => { ), ).toBe(true); }); + + it('accepts azure api_format as a known setting', async () => { + const filePath = path.join(tempDir, 'azure-api-format.yaml'); + await writeFile( + filePath, + `targets: + - name: azure-responses + provider: azure + endpoint: \${{ AZURE_OPENAI_ENDPOINT }} + api_key: \${{ AZURE_OPENAI_API_KEY }} + model: \${{ AZURE_DEPLOYMENT_NAME }} + api_format: responses +`, + ); + + const result = await validateTargetsFile(filePath); + + expect( + result.errors.some( + (error) => + error.location === 'targets[0].api_format' && + error.message.includes("Unknown setting 'api_format'"), + ), + ).toBe(false); + }); });