diff --git a/main/config.h b/main/config.h index 44b2d05..5f4548a 100644 --- a/main/config.h +++ b/main/config.h @@ -43,22 +43,25 @@ // LLM Backend Configuration // ----------------------------------------------------------------------------- typedef enum { - LLM_BACKEND_ANTHROPIC = 0, - LLM_BACKEND_OPENAI = 1, + LLM_BACKEND_ANTHROPIC = 0, + LLM_BACKEND_OPENAI = 1, LLM_BACKEND_OPENROUTER = 2, - LLM_BACKEND_OLLAMA = 3, + LLM_BACKEND_OLLAMA = 3, + LLM_BACKEND_VOLCENGINE = 4, // Volcengine ARK (OpenAI-compatible) } llm_backend_t; -#define LLM_API_URL_ANTHROPIC "https://api.anthropic.com/v1/messages" -#define LLM_API_URL_OPENAI "https://api.openai.com/v1/chat/completions" -#define LLM_API_URL_OPENROUTER "https://openrouter.ai/api/v1/chat/completions" +#define LLM_API_URL_ANTHROPIC "https://api.anthropic.com/v1/messages" +#define LLM_API_URL_OPENAI "https://api.openai.com/v1/chat/completions" +#define LLM_API_URL_OPENROUTER "https://openrouter.ai/api/v1/chat/completions" // Loopback default is mainly a placeholder for provisioning/runtime override. -#define LLM_API_URL_OLLAMA "http://127.0.0.1:11434/v1/chat/completions" +#define LLM_API_URL_OLLAMA "http://127.0.0.1:11434/v1/chat/completions" +#define LLM_API_URL_VOLCENGINE "https://ark.cn-beijing.volces.com/api/v3/chat/completions" #define LLM_DEFAULT_MODEL_ANTHROPIC "claude-sonnet-4-6" #define LLM_DEFAULT_MODEL_OPENAI "gpt-5.4" #define LLM_DEFAULT_MODEL_OPENROUTER "openrouter/auto" #define LLM_DEFAULT_MODEL_OLLAMA "qwen3:8b" +#define LLM_DEFAULT_MODEL_VOLCENGINE "doubao-1-5-pro-32k-250115" #define LLM_API_KEY_MAX_LEN 511 #define LLM_API_KEY_BUF_SIZE (LLM_API_KEY_MAX_LEN + 1) diff --git a/main/llm.c b/main/llm.c index 6c81b9c..52214c2 100644 --- a/main/llm.c +++ b/main/llm.c @@ -48,6 +48,8 @@ static const char *llm_backend_name(llm_backend_t backend) return "OpenRouter"; case LLM_BACKEND_OLLAMA: return "Ollama"; + case LLM_BACKEND_VOLCENGINE: + return "Volcengine"; default: return "Unknown"; } @@ -457,6 +459,8 @@ esp_err_t llm_init(void) s_backend = LLM_BACKEND_OPENROUTER; } else if (strcmp(backend_str, "ollama") == 0) { s_backend = LLM_BACKEND_OLLAMA; + } else if (strcmp(backend_str, "volcengine") == 0) { + s_backend = LLM_BACKEND_VOLCENGINE; } else { ESP_LOGW(TAG, "Unknown llm_backend '%s', defaulting to OpenAI", backend_str); s_backend = LLM_BACKEND_OPENAI; @@ -539,6 +543,8 @@ const char *llm_get_api_url(void) return LLM_API_URL_OPENROUTER; case LLM_BACKEND_OLLAMA: return LLM_API_URL_OLLAMA; + case LLM_BACKEND_VOLCENGINE: + return LLM_API_URL_VOLCENGINE; default: return LLM_API_URL_ANTHROPIC; } @@ -553,6 +559,8 @@ const char *llm_get_default_model(void) return LLM_DEFAULT_MODEL_OPENROUTER; case LLM_BACKEND_OLLAMA: return LLM_DEFAULT_MODEL_OLLAMA; + case LLM_BACKEND_VOLCENGINE: + return LLM_DEFAULT_MODEL_VOLCENGINE; default: return LLM_DEFAULT_MODEL_ANTHROPIC; } @@ -574,7 +582,8 @@ bool llm_is_openai_format(void) { return s_backend == LLM_BACKEND_OPENAI || s_backend == LLM_BACKEND_OPENROUTER || - s_backend == LLM_BACKEND_OLLAMA; + s_backend == LLM_BACKEND_OLLAMA || + s_backend == LLM_BACKEND_VOLCENGINE; } #ifdef CONFIG_ZCLAW_STUB_LLM @@ -706,6 +715,7 @@ esp_err_t llm_request(const char *request_json, char *response_buf, size_t respo esp_http_client_set_header(client, "x-api-key", s_api_key); esp_http_client_set_header(client, "anthropic-version", "2023-06-01"); } else if (s_backend == LLM_BACKEND_OPENAI || s_backend == LLM_BACKEND_OPENROUTER || + s_backend == LLM_BACKEND_VOLCENGINE || (s_backend == LLM_BACKEND_OLLAMA && s_api_key[0] != '\0')) { // OpenAI/OpenRouter use Bearer token. For Ollama, Bearer is optional and only sent // when a key is explicitly provided (e.g. reverse proxy auth). diff --git a/scripts/provision-dev.sh b/scripts/provision-dev.sh index 54a1c42..f9d159d 100755 --- a/scripts/provision-dev.sh +++ b/scripts/provision-dev.sh @@ -38,7 +38,7 @@ Overrides: --port --ssid --pass - --backend anthropic | openai | openrouter | ollama + --backend anthropic | openai | openrouter | ollama | volcengine --model --api-key --api-url Custom API endpoint URL @@ -76,6 +76,7 @@ ZCLAW_API_KEY= # ANTHROPIC_API_KEY= # OPENROUTER_API_KEY= # OLLAMA_API_KEY= +# VOLCENGINE_API_KEY= # Optional Telegram credentials: ZCLAW_TG_TOKEN= @@ -186,6 +187,9 @@ resolve_api_key() { ollama) printf '%s\n' "${OLLAMA_API_KEY:-}" ;; + volcengine) + printf '%s\n' "${VOLCENGINE_API_KEY:-}" + ;; *) printf '%s\n' "" ;; diff --git a/scripts/provision.sh b/scripts/provision.sh index 63e7464..ecb9811 100755 --- a/scripts/provision.sh +++ b/scripts/provision.sh @@ -352,6 +352,7 @@ default_model_for_backend() { openai) echo "gpt-5.4" ;; openrouter) echo "openrouter/auto" ;; ollama) echo "qwen3:8b" ;; + volcengine) echo "doubao-1-5-pro-32k-250115" ;; *) echo "claude-sonnet-4-6" ;; esac } @@ -429,7 +430,7 @@ prompt_for_model() { validate_backend() { case "$1" in - anthropic|openai|openrouter|ollama) return 0 ;; + anthropic|openai|openrouter|ollama|volcengine) return 0 ;; *) return 1 ;; esac } @@ -703,6 +704,74 @@ PY return 1 } +verify_volcengine_api_key() { + local api_key="$1" + local model="$2" + local api_url_override="$3" + local api_url="${api_url_override:-https://ark.cn-beijing.volces.com/api/v3/chat/completions}" + local response_file + local http_code + local req_body + + if ! command -v curl >/dev/null 2>&1; then + echo "Warning: curl not found; skipping Volcengine API check." + return 2 + fi + + req_body=$(cat </dev/null 2>&1; then + python3 - "$response_file" <<'PY' +import json +import sys +from pathlib import Path + +p = Path(sys.argv[1]) +try: + data = json.loads(p.read_text(encoding="utf-8")) +except Exception: + print("Response preview: " + p.read_text(encoding="utf-8", errors="ignore")[:200]) + raise SystemExit(0) + +msg = "" +if isinstance(data, dict): + if isinstance(data.get("error"), dict): + msg = data["error"].get("message") or data["error"].get("type") or "" + elif isinstance(data.get("error"), str): + msg = data["error"] +if msg: + print("API said: " + msg) +PY + else + echo "Response preview: $(head -c 200 "$response_file")" + fi + + rm -f "$response_file" + return 1 +} + verify_openrouter_api_key() { local api_key="$1" local _model="$2" @@ -1010,13 +1079,13 @@ if [ -z "$BACKEND" ]; then if [ "$ASSUME_YES" = true ]; then BACKEND="openai" else - read -r -p "LLM provider [openai/anthropic/openrouter/ollama] (default: openai): " BACKEND + read -r -p "LLM provider [openai/anthropic/openrouter/ollama/volcengine] (default: openai): " BACKEND BACKEND="${BACKEND:-openai}" fi fi if ! validate_backend "$BACKEND"; then - echo "Error: invalid backend '$BACKEND' (expected anthropic|openai|openrouter|ollama)" + echo "Error: invalid backend '$BACKEND' (expected anthropic|openai|openrouter|ollama|volcengine)" exit 1 fi @@ -1077,6 +1146,10 @@ if [ "$VERIFY_API_KEY" = true ]; then VERIFY_LABEL="Ollama endpoint" VERIFY_FN="verify_ollama_endpoint" ;; + volcengine) + VERIFY_LABEL="Volcengine" + VERIFY_FN="verify_volcengine_api_key" + ;; esac if [ -n "$VERIFY_FN" ]; then diff --git a/scripts/qemu_live_llm_bridge.py b/scripts/qemu_live_llm_bridge.py index 3fa1ddb..27fd607 100644 --- a/scripts/qemu_live_llm_bridge.py +++ b/scripts/qemu_live_llm_bridge.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Run QEMU and proxy emulator LLM requests to Anthropic or OpenAI from host.""" +"""Run QEMU and proxy emulator LLM requests to Anthropic, OpenAI, or Volcengine from host.""" from __future__ import annotations @@ -20,6 +20,7 @@ RESP_PREFIX = "__zclaw_llm_resp__:" ANTHROPIC_API_URL = "https://api.anthropic.com/v1/messages" OPENAI_API_URL = "https://api.openai.com/v1/chat/completions" +VOLCENGINE_API_URL = "https://ark.cn-beijing.volces.com/api/v3/chat/completions" REQ_PREFIX_B = REQ_PREFIX.encode("utf-8") RESP_PREFIX_B = RESP_PREFIX.encode("utf-8") @@ -99,6 +100,35 @@ def call_openai(request_json: str, timeout_s: int) -> str: return build_error_payload(str(exc)) +def call_volcengine(request_json: str, timeout_s: int) -> str: + api_key = os.environ.get("VOLCENGINE_API_KEY", "") + if not api_key: + return build_error_payload("VOLCENGINE_API_KEY is not set") + + api_url = os.environ.get("VOLCENGINE_API_URL", VOLCENGINE_API_URL) + req = urllib.request.Request( + api_url, + data=request_json.encode("utf-8"), + headers={ + "authorization": f"Bearer {api_key}", + "content-type": "application/json", + }, + method="POST", + ) + + try: + with urllib.request.urlopen(req, timeout=timeout_s) as resp: + body = resp.read().decode("utf-8", errors="replace") + return compact_json_or_error(body) + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8", errors="replace") + if body: + return compact_json_or_error(body) + return build_error_payload(f"HTTP {exc.code}") + except Exception as exc: # pragma: no cover - network/runtime dependent + return build_error_payload(str(exc)) + + def detect_provider_from_request(request_json: str) -> str: try: payload = json.loads(request_json) @@ -138,6 +168,8 @@ def resolve_provider(provider: str, request_json: str) -> str: def call_provider(provider: str, request_json: str, timeout_s: int) -> str: if provider == "openai": return call_openai(request_json, timeout_s) + if provider == "volcengine": + return call_volcengine(request_json, timeout_s) return call_anthropic(request_json, timeout_s) @@ -314,9 +346,9 @@ def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="QEMU live LLM bridge for zclaw emulator") parser.add_argument( "--provider", - choices=("auto", "anthropic", "openai"), + choices=("auto", "anthropic", "openai", "volcengine"), default="auto", - help="Host API provider: auto-detect from request format (default), anthropic, or openai", + help="Host API provider: auto-detect from request format (default), anthropic, openai, or volcengine", ) parser.add_argument( "--api-timeout", @@ -356,7 +388,7 @@ def parse_args() -> argparse.Namespace: def main() -> int: args = parse_args() if args.provider == "auto": - provider_note = "auto-detect (anthropic/openai)" + provider_note = "auto-detect (anthropic/openai/volcengine)" else: provider_note = args.provider write_host_line(sys.stdout, f"[qemu-live-llm] Bridge active (provider: {provider_note}).") diff --git a/test/api/provider_harness.py b/test/api/provider_harness.py index cc7acaf..cb18256 100644 --- a/test/api/provider_harness.py +++ b/test/api/provider_harness.py @@ -251,6 +251,14 @@ class ProviderConfig: api_key_env="OPENROUTER_API_KEY", wire_format="openai", ), + "volcengine": ProviderConfig( + name="volcengine", + api_url="https://ark.cn-beijing.volces.com/api/v3/chat/completions", + default_model="doubao-1-5-pro-32k-250115", + model_env="VOLCENGINE_MODEL", + api_key_env="VOLCENGINE_API_KEY", + wire_format="openai", + ), } diff --git a/test/host/mock_llm.c b/test/host/mock_llm.c index 83df6ad..f5b4775 100644 --- a/test/host/mock_llm.c +++ b/test/host/mock_llm.c @@ -130,5 +130,6 @@ bool llm_is_openai_format(void) { return s_backend == LLM_BACKEND_OPENAI || s_backend == LLM_BACKEND_OPENROUTER || - s_backend == LLM_BACKEND_OLLAMA; + s_backend == LLM_BACKEND_OLLAMA || + s_backend == LLM_BACKEND_VOLCENGINE; } diff --git a/test/host/test_api_provider_harness.py b/test/host/test_api_provider_harness.py index 7f81351..1eebac9 100644 --- a/test/host/test_api_provider_harness.py +++ b/test/host/test_api_provider_harness.py @@ -162,6 +162,48 @@ def fake_post(url: str, headers: dict[str, str], json: dict[str, Any], timeout: request_json = payload["json"] self.assertEqual(request_json["messages"], messages) + def test_providers_dict_contains_volcengine(self) -> None: + self.assertIn("volcengine", provider_harness.PROVIDERS) + ve = provider_harness.PROVIDERS["volcengine"] + self.assertEqual(ve.name, "volcengine") + self.assertEqual(ve.wire_format, "openai") + self.assertIn("volces.com", ve.api_url) + self.assertEqual(ve.default_model, "doubao-1-5-pro-32k-250115") + self.assertEqual(ve.api_key_env, "VOLCENGINE_API_KEY") + self.assertEqual(ve.model_env, "VOLCENGINE_MODEL") + + def test_volcengine_uses_max_tokens_not_max_completion_tokens(self) -> None: + # volcengine model name does not start with "gpt-5", so must use max_tokens + field, value = provider_harness._openai_like_max_tokens_field("doubao-1-5-pro-32k-250115") + self.assertEqual(field, "max_tokens") + self.assertEqual(value, 1024) + + def test_call_api_volcengine_inserts_system_message_and_sends_to_correct_url(self) -> None: + provider = provider_harness.PROVIDERS["volcengine"] + messages = [{"role": "user", "content": "Hello"}] + payload: dict[str, Any] = {} + + def fake_post(url: str, headers: dict[str, str], json: dict[str, Any], timeout: int) -> Mock: + payload["url"] = url + payload["headers"] = headers + payload["json"] = json + response = Mock() + response.raise_for_status.return_value = None + response.json.return_value = {"ok": True} + return response + + with patch.object(provider_harness, "httpx", SimpleNamespace(post=fake_post)): + result = provider_harness.call_api(provider, messages, "test-ark-key", "doubao-1-5-pro-32k-250115", user_tools=[]) + + self.assertEqual(result, {"ok": True}) + self.assertIn("volces.com", payload["url"]) + self.assertEqual(payload["headers"]["Authorization"], "Bearer test-ark-key") + request_json = payload["json"] + self.assertEqual(request_json["messages"][0]["role"], "system") + self.assertEqual(request_json["messages"][1], {"role": "user", "content": "Hello"}) + self.assertIn("max_tokens", request_json) + self.assertNotIn("max_completion_tokens", request_json) + if __name__ == "__main__": unittest.main() diff --git a/test/host/test_install_provision_scripts.py b/test/host/test_install_provision_scripts.py index 7dbb584..4a3b560 100644 --- a/test/host/test_install_provision_scripts.py +++ b/test/host/test_install_provision_scripts.py @@ -786,6 +786,13 @@ def test_provision_openrouter_api_check_runs_in_yes_mode(self) -> None: self.assertIn("Verifying OpenRouter API key", output) self.assertIn("Error: API check failed in --yes mode.", output) + def test_provision_volcengine_api_check_runs_in_yes_mode(self) -> None: + proc = self._run_provision_api_check_fail("volcengine") + output = f"{proc.stdout}\n{proc.stderr}" + self.assertNotEqual(proc.returncode, 0, msg=output) + self.assertIn("Verifying Volcengine API key", output) + self.assertIn("Error: API check failed in --yes mode.", output) + def test_provision_openai_api_check_uses_models_endpoint_for_chat_override(self) -> None: proc, called_url = self._run_provision_api_check_capture_url( backend="openai", diff --git a/test/host/test_llm_runtime.c b/test/host/test_llm_runtime.c index c0d573f..c4e007c 100644 --- a/test/host/test_llm_runtime.c +++ b/test/host/test_llm_runtime.c @@ -117,6 +117,17 @@ TEST(loads_ollama_backend_with_default_model) return 0; } +TEST(loads_volcengine_backend_and_default_model) +{ + configure_mock_store("volcengine", NULL, "test-ark-key", NULL); + ASSERT(llm_init() == ESP_OK); + ASSERT(llm_get_backend() == LLM_BACKEND_VOLCENGINE); + ASSERT(strcmp(llm_get_api_url(), LLM_API_URL_VOLCENGINE) == 0); + ASSERT(strcmp(llm_get_model(), LLM_DEFAULT_MODEL_VOLCENGINE) == 0); + ASSERT(llm_is_openai_format()); + return 0; +} + TEST(custom_api_url_override_applies_to_any_backend) { configure_mock_store("openai", NULL, "test-key", "http://192.168.1.50:11434/v1/chat/completions"); @@ -192,6 +203,13 @@ int test_llm_runtime_all(void) failures++; } + printf(" loads_volcengine_backend_and_default_model... "); + if (test_loads_volcengine_backend_and_default_model() == 0) { + printf("OK\n"); + } else { + failures++; + } + printf(" custom_api_url_override_applies_to_any_backend... "); if (test_custom_api_url_override_applies_to_any_backend() == 0) { printf("OK\n"); diff --git a/test/host/test_qemu_live_llm_bridge.py b/test/host/test_qemu_live_llm_bridge.py index 15a6195..eb4a541 100644 --- a/test/host/test_qemu_live_llm_bridge.py +++ b/test/host/test_qemu_live_llm_bridge.py @@ -98,6 +98,7 @@ def test_resolve_provider_auto_uses_detection(self) -> None: ) self.assertEqual(resolve_provider("auto", request), "openai") self.assertEqual(resolve_provider("anthropic", request), "anthropic") + self.assertEqual(resolve_provider("volcengine", request), "volcengine") if __name__ == "__main__":