-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathconfig.sample.yaml
More file actions
113 lines (103 loc) · 3.08 KB
/
config.sample.yaml
File metadata and controls
113 lines (103 loc) · 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
server:
# 修改监听地址或监听端口后,需要重启服务才会生效。
port: 8080
host: 127.0.0.1
database:
# 可选,默认是 data/requests.db
path: data/requests.db
logging:
# 可选,默认是 logs
path: logs
# 支持:DEBUG / INFO / WARNING / ERROR / CRITICAL
level: INFO
# 开启后会把下游请求、上游请求、上游响应、下游响应写入
# logs/llm_request_trace.log,内容按 HTTP 报文风格换行记录。
llm_request_debug_enabled: false
chat:
# 开启后会按 IP 白名单控制 chat 请求与 /v1/models 返回结果。
whitelist_enabled: false
admin:
# username 和 password 同时非空时启用后台登录;
# 只要任一为空,就关闭后台登录。
username: admin
password: admin
auth_groups:
- name: openai-shared
strategy: least_inflight
cooldown_seconds_on_429: 60
entries:
- id: openai-key-a
enabled: true
headers:
Authorization: Bearer sk-your-openai-key-a
max_concurrency: 3
cooldown_seconds_on_429: 120
request_quota_per_minute: 60
request_quota_per_day: 3000
token_quota_per_minute: 120000
token_quota_per_day: 5000000
- id: openai-key-b
enabled: true
headers:
Authorization: Bearer sk-your-openai-key-b
max_concurrency: 2
providers:
- name: openai-chat
# enabled: true # optional, defaults to true; disabled providers are hidden from /v1/models
api: https://api.openai.com/v1/chat/completions
transport: http
source_format: openai_chat
auth_group: openai-shared
verify_ssl: true
timeout_seconds: 1200
max_retries: 3
model_list:
- gpt-4.1
- gpt-4.1-mini
hook: example_hook.py
- name: responses-upstream
api: https://api.openai.com/v1/responses
transport: http
source_format: openai_responses
auth_group: openai-shared
verify_ssl: true
timeout_seconds: 1200
max_retries: 3
model_list:
- gpt-4.1
- name: claude-messages
api: https://api.anthropic.com/v1/messages
transport: http
source_format: claude_chat
api_key: sk-ant-your-anthropic-key
verify_ssl: true
timeout_seconds: 1200
max_retries: 3
model_list:
- claude-sonnet-4-5
- name: responses-codex
api: https://api.openai.com/v1/responses
transport: http
source_format: openai_responses
api_key: sk-your-openai-key
verify_ssl: true
timeout_seconds: 1200
max_retries: 3
model_list:
- gpt-5-codex
# Common upstream choices:
# /v1/chat/completions -> source_format: openai_chat
# /v1/responses -> source_format: openai_responses
# /v1/messages -> source_format: claude_chat
#
# Downstream compatibility is built in:
# /v1/chat/completions
# /v1/responses
# /v1/messages
#
# There is no public stream_format field.
# The proxy auto-detects upstream responses internally:
# HTTP text/event-stream -> SSE JSON
# WebSocket -> JSON messages
# application/x-ndjson -> NDJSON
# other HTTP responses -> non-stream