Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .agentv/targets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,16 @@ targets:
api_key: ${{ GH_MODELS_TOKEN }}
model: ${{ GH_MODELS_MODEL }}

# Single Azure target. Control the endpoint shape with AZURE_OPENAI_API_FORMAT:
# - chat (default): uses /chat/completions and AZURE_OPENAI_API_VERSION
# If AZURE_OPENAI_API_VERSION is omitted, AgentV defaults chat targets to 2024-12-01-preview.
# - responses: uses /responses and AgentV auto-defaults the version to v1
- name: azure
provider: azure
endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
api_key: ${{ AZURE_OPENAI_API_KEY }}
model: ${{ AZURE_DEPLOYMENT_NAME }}
api_format: ${{ AZURE_OPENAI_API_FORMAT }}
version: ${{ AZURE_OPENAI_API_VERSION }}

- name: gemini
Expand Down
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
AZURE_OPENAI_API_KEY=your-openai-api-key-here
AZURE_DEPLOYMENT_NAME=gpt-5-mini
AZURE_OPENAI_API_FORMAT=chat
# Chat/completions targets use this version.
# If omitted, AgentV defaults chat targets to 2024-12-01-preview.
# Azure responses targets default to `v1` automatically.
AZURE_OPENAI_API_VERSION=2024-12-01-preview

# OpenAI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ The `duration_ms`, `started_at`, and `ended_at` fields are present on every grad
Run against a different target than specified in the eval file:

```bash
agentv eval --target azure-base evals/**/*.yaml
agentv eval --target my-target evals/**/*.yaml
```

### Experiment Label
Expand Down
21 changes: 19 additions & 2 deletions apps/web/src/content/docs/docs/targets/llm-providers.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ Controls which OpenAI API endpoint is used:
| Value | Endpoint | When to use |
|-------|----------|-------------|
| `chat` (default) | `/chat/completions` | All OpenAI-compatible endpoints (GitHub Models, local proxies, etc.) |
| `responses` | `/responses` | Only `api.openai.com` — opt in to the Responses API |
| `responses` | `/responses` | `api.openai.com` and Azure OpenAI when the deployment supports the Responses API |

Most users should leave this unset. The default `chat` format is universally supported. Use `responses` only when you need Responses API features on `api.openai.com` directly.
Most users should leave this unset. The default `chat` format is universally supported. Use `responses` when you need Responses API features on OpenAI or Azure OpenAI deployments that support it.

```yaml
# OpenAI-compatible endpoint (default chat format works)
Expand Down Expand Up @@ -69,6 +69,23 @@ targets:
| `endpoint` | Yes | Azure OpenAI endpoint URL |
| `api_key` | Yes | API key |
| `model` | Yes | Deployment name |
| `api_format` | No | API format: `chat` (default) or `responses` |

Azure OpenAI supports the same `api_format` switch:

```yaml
targets:
- name: azure-responses
provider: azure
endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
api_key: ${{ AZURE_OPENAI_API_KEY }}
model: ${{ AZURE_DEPLOYMENT_NAME }}
api_format: responses
```

When `api_format: responses` is used with Azure, AgentV defaults the API version to `v1` unless you explicitly override `version`.

The repository's default [`.agentv/targets.yaml`](/home/christso/projects/agentv.worktrees/feat-920-azure-responses-api/.agentv/targets.yaml) uses a single `azure` target and drives `api_format` from `AZURE_OPENAI_API_FORMAT`.

## Anthropic

Expand Down
9 changes: 7 additions & 2 deletions packages/core/src/evaluation/providers/ai-sdk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ export class AzureProvider implements Provider {
this.retryConfig = config.retry;

const azure = createAzure(buildAzureOptions(config));
this.model = azure.chat(config.deploymentName);
this.model =
config.apiFormat === 'responses'
? azure(config.deploymentName)
: azure.chat(config.deploymentName);
}

async invoke(request: ProviderRequest): Promise<ProviderResponse> {
Expand Down Expand Up @@ -241,7 +244,9 @@ function buildAzureOptions(config: AzureResolvedConfig): AzureOpenAIProviderSett
const options: AzureOpenAIProviderSettings = {
apiKey: config.apiKey,
apiVersion: config.version,
useDeploymentBasedUrls: true,
// Chat completions still use deployment-scoped Azure URLs for compatibility
// with existing deployments. Responses API should use the SDK's v1 path.
useDeploymentBasedUrls: config.apiFormat !== 'responses',
};

const baseURL = normalizeAzureBaseUrl(config.resourceName);
Expand Down
32 changes: 26 additions & 6 deletions packages/core/src/evaluation/providers/targets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ export interface AzureResolvedConfig {
readonly deploymentName: string;
readonly apiKey: string;
readonly version?: string;
readonly apiFormat?: ApiFormat;
readonly temperature?: number;
readonly maxOutputTokens?: number;
readonly retry?: RetryConfig;
Expand Down Expand Up @@ -775,20 +776,27 @@ const BASE_TARGET_SCHEMA = z
.passthrough();

const DEFAULT_AZURE_API_VERSION = '2024-12-01-preview';
const DEFAULT_AZURE_RESPONSES_API_VERSION = 'v1';
const DEFAULT_OPENAI_BASE_URL = 'https://api.openai.com/v1';

function normalizeAzureApiVersion(value: string | undefined): string {
function normalizeAzureApiVersion(
value: string | undefined,
apiFormat: ApiFormat | undefined,
): string {
const defaultVersion =
apiFormat === 'responses' ? DEFAULT_AZURE_RESPONSES_API_VERSION : DEFAULT_AZURE_API_VERSION;

if (!value) {
return DEFAULT_AZURE_API_VERSION;
return defaultVersion;
}

const trimmed = value.trim();
if (trimmed.length === 0) {
return DEFAULT_AZURE_API_VERSION;
return defaultVersion;
}

const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, '').trim();
return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
return withoutPrefix.length > 0 ? withoutPrefix : defaultVersion;
}

function resolveRetryConfig(target: z.infer<typeof BASE_TARGET_SCHEMA>): RetryConfig | undefined {
Expand Down Expand Up @@ -1104,11 +1112,13 @@ function resolveAzureConfig(
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
const apiFormat = resolveApiFormat(target, env, target.name);
const version = normalizeAzureApiVersion(
resolveOptionalString(versionSource, env, `${target.name} api version`, {
allowLiteral: true,
optionalEnv: true,
}),
apiFormat,
);
const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
const maxOutputTokens = resolveOptionalNumber(
Expand All @@ -1122,6 +1132,7 @@ function resolveAzureConfig(
deploymentName,
apiKey,
version,
apiFormat,
temperature,
maxOutputTokens,
retry,
Expand All @@ -1130,9 +1141,18 @@ function resolveAzureConfig(

function resolveApiFormat(
target: z.infer<typeof BASE_TARGET_SCHEMA>,
env: EnvLookup,
targetName: string,
): ApiFormat | undefined {
const raw = target.api_format ?? target.apiFormat;
const raw = resolveOptionalString(
target.api_format ?? target.apiFormat,
env,
`${targetName} api format`,
{
allowLiteral: true,
optionalEnv: true,
},
);
if (raw === undefined) return undefined;
if (raw === 'chat' || raw === 'responses') return raw;
throw new Error(
Expand Down Expand Up @@ -1164,7 +1184,7 @@ function resolveOpenAIConfig(
baseURL,
apiKey,
model,
apiFormat: resolveApiFormat(target, target.name),
apiFormat: resolveApiFormat(target, env, target.name),
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
retry,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const AZURE_SETTINGS = new Set([
'model',
'version',
'api_version',
'api_format',
'temperature',
'max_output_tokens',
'maxTokens',
Expand All @@ -65,7 +66,6 @@ const OPENAI_SETTINGS = new Set([
'deployment',
'variant',
'api_format',
'apiFormat',
'temperature',
'max_output_tokens',
'maxTokens',
Expand Down
124 changes: 121 additions & 3 deletions packages/core/test/evaluation/providers/targets.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ const generateTextMock = mock(async () => ({
providerMetadata: undefined,
}));

const createAzureMock = mock((options: unknown) => ({
chat: () => ({ provider: 'azure', options }),
}));
const createAzureMock = mock((options: unknown) => {
const fn = () => ({ provider: 'azure', options, apiFormat: 'responses' });
fn.chat = () => ({ provider: 'azure', options, apiFormat: 'chat' });
fn.responses = () => ({ provider: 'azure', options, apiFormat: 'responses' });
return fn;
});
const createOpenAIMock = mock((options: unknown) => {
const fn = () => ({ provider: 'openai', options });
fn.chat = () => ({ provider: 'openai', options });
Expand Down Expand Up @@ -252,6 +255,90 @@ describe('resolveTargetDefinition', () => {
expect(target.config.version).toBe('2024-08-01-preview');
});

it('resolves azure api_format when configured', () => {
const env = {
AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com',
AZURE_OPENAI_API_KEY: 'secret',
AZURE_DEPLOYMENT_NAME: 'gpt-4o',
} satisfies Record<string, string>;

const target = resolveTargetDefinition(
{
name: 'azure-responses',
provider: 'azure',
endpoint: '${{ AZURE_OPENAI_ENDPOINT }}',
api_key: '${{ AZURE_OPENAI_API_KEY }}',
model: '${{ AZURE_DEPLOYMENT_NAME }}',
api_format: 'responses',
},
env,
);

expect(target.kind).toBe('azure');
if (target.kind !== 'azure') {
throw new Error('expected azure target');
}

expect(target.config.apiFormat).toBe('responses');
expect(target.config.version).toBe('v1');
});

it('resolves azure api_format from env interpolation', () => {
const env = {
AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com',
AZURE_OPENAI_API_KEY: 'secret',
AZURE_DEPLOYMENT_NAME: 'gpt-4o',
AZURE_OPENAI_API_FORMAT: 'responses',
} satisfies Record<string, string>;

const target = resolveTargetDefinition(
{
name: 'azure-env-format',
provider: 'azure',
endpoint: '${{ AZURE_OPENAI_ENDPOINT }}',
api_key: '${{ AZURE_OPENAI_API_KEY }}',
model: '${{ AZURE_DEPLOYMENT_NAME }}',
api_format: '${{ AZURE_OPENAI_API_FORMAT }}',
},
env,
);

expect(target.kind).toBe('azure');
if (target.kind !== 'azure') {
throw new Error('expected azure target');
}

expect(target.config.apiFormat).toBe('responses');
expect(target.config.version).toBe('v1');
});

it('defaults azure responses targets to api version v1', () => {
const env = {
AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com',
AZURE_OPENAI_API_KEY: 'secret',
AZURE_DEPLOYMENT_NAME: 'gpt-4o',
} satisfies Record<string, string>;

const target = resolveTargetDefinition(
{
name: 'azure-responses-default-version',
provider: 'azure',
endpoint: '${{ AZURE_OPENAI_ENDPOINT }}',
api_key: '${{ AZURE_OPENAI_API_KEY }}',
model: '${{ AZURE_DEPLOYMENT_NAME }}',
api_format: 'responses',
},
env,
);

expect(target.kind).toBe('azure');
if (target.kind !== 'azure') {
throw new Error('expected azure target');
}

expect(target.config.version).toBe('v1');
});

it('throws when required azure environment variables are missing', () => {
const env = {
AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com',
Expand Down Expand Up @@ -787,6 +874,37 @@ describe('createProvider', () => {
const response = await provider.invoke({ question: 'Hello' });

expect(createAzureMock).toHaveBeenCalledTimes(1);
expect(createAzureMock.mock.calls[0]?.[0]).toMatchObject({ useDeploymentBasedUrls: true });
expect(provider.asLanguageModel()).toMatchObject({ apiFormat: 'chat' });
expect(generateTextMock).toHaveBeenCalledTimes(1);
expect(extractLastAssistantContent(response.output)).toBe('ok');
});

it('creates an azure provider using the responses api when requested', async () => {
const env = {
AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com',
AZURE_OPENAI_API_KEY: 'key',
AZURE_DEPLOYMENT_NAME: 'gpt-4o',
} satisfies Record<string, string>;

const resolved = resolveTargetDefinition(
{
name: 'azure-responses-target',
provider: 'azure',
endpoint: '${{ AZURE_OPENAI_ENDPOINT }}',
api_key: '${{ AZURE_OPENAI_API_KEY }}',
model: '${{ AZURE_DEPLOYMENT_NAME }}',
api_format: 'responses',
},
env,
);

const provider = createProvider(resolved);
const response = await provider.invoke({ question: 'Hello' });

expect(createAzureMock).toHaveBeenCalledTimes(1);
expect(createAzureMock.mock.calls[0]?.[0]).toMatchObject({ useDeploymentBasedUrls: false });
expect(provider.asLanguageModel()).toMatchObject({ apiFormat: 'responses' });
expect(generateTextMock).toHaveBeenCalledTimes(1);
expect(extractLastAssistantContent(response.output)).toBe('ok');
});
Expand Down
25 changes: 25 additions & 0 deletions packages/core/test/evaluation/validation/targets-validator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,29 @@ describe('validateTargetsFile', () => {
),
).toBe(true);
});

it('accepts azure api_format as a known setting', async () => {
const filePath = path.join(tempDir, 'azure-api-format.yaml');
await writeFile(
filePath,
`targets:
- name: azure-responses
provider: azure
endpoint: \${{ AZURE_OPENAI_ENDPOINT }}
api_key: \${{ AZURE_OPENAI_API_KEY }}
model: \${{ AZURE_DEPLOYMENT_NAME }}
api_format: responses
`,
);

const result = await validateTargetsFile(filePath);

expect(
result.errors.some(
(error) =>
error.location === 'targets[0].api_format' &&
error.message.includes("Unknown setting 'api_format'"),
),
).toBe(false);
});
});
Loading