From 84a7ea1353aee27aac313674675140a8e862457f Mon Sep 17 00:00:00 2001 From: Nico Duldhardt Date: Sat, 14 Feb 2026 18:11:43 +0100 Subject: [PATCH 1/4] fix(security): strip x-litellm-api-key from forwarded headers to upstream providers Prevent x-litellm-api-key (LiteLLM's virtual key) from being leaked to upstream providers when _forward_headers=True is used in passthrough endpoints. --- litellm/passthrough/utils.py | 1 + ...test_passthrough_endpoints_common_utils.py | 42 ++++++++++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/litellm/passthrough/utils.py b/litellm/passthrough/utils.py index ef4357d1ca2..1f149313793 100644 --- a/litellm/passthrough/utils.py +++ b/litellm/passthrough/utils.py @@ -52,6 +52,7 @@ def forward_headers_from_request( # Header We Should NOT forward request_headers.pop("content-length", None) request_headers.pop("host", None) + request_headers.pop("x-litellm-api-key", None) # Combine request headers with custom headers headers = {**request_headers, **headers} diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py b/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py index 97ef05100de..b7e7359e9ff 100644 --- a/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py +++ b/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py @@ -10,7 +10,7 @@ from fastapi import Request, Response from fastapi.testclient import TestClient -from litellm.passthrough.utils import CommonUtils +from litellm.passthrough.utils import BasePassthroughUtils, CommonUtils sys.path.insert( 0, os.path.abspath("../../../..") @@ -95,4 +95,42 @@ def test_encode_bedrock_runtime_modelid_arn_edge_cases(): endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile.v1/invoke" expected = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile%2Ftest-profile.v1/invoke" result = CommonUtils.encode_bedrock_runtime_modelid_arn(endpoint) - assert result == expected \ No newline at end of file + assert result == expected + + +def test_forward_headers_strips_litellm_api_key(): + """x-litellm-api-key should not be forwarded to upstream providers.""" + request_headers = { + "x-litellm-api-key": "sk-litellm-secret-key", + "content-type": "application/json", + "x-api-key": "sk-ant-api-key", + } + + result = BasePassthroughUtils.forward_headers_from_request( + request_headers=request_headers.copy(), + headers={}, + forward_headers=True, + ) + + assert "x-litellm-api-key" not in result + assert result.get("content-type") == "application/json" + assert result.get("x-api-key") == "sk-ant-api-key" + + +def test_forward_headers_strips_host_and_content_length(): + """host and content-length should not be forwarded.""" + request_headers = { + "host": "api.anthropic.com", + "content-length": "1234", + "content-type": "application/json", + } + + result = BasePassthroughUtils.forward_headers_from_request( + request_headers=request_headers.copy(), + headers={}, + forward_headers=True, + ) + + assert "host" not in result + assert "content-length" not in result + assert result.get("content-type") == "application/json" \ No newline at end of file From 1e14e2521d93f2cbdf0b2014a7a060032fb62325 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Kieszczy=C5=84ski?= Date: Thu, 19 Mar 2026 09:19:04 +0100 Subject: [PATCH 2/4] fix(passthrough): implement credential priority for Anthropic endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Client-provided credentials now take precedence over server credentials in the /anthropic/ passthrough endpoint. This enables mixed mode where: 1. Client sends x-api-key → forwarded as-is (user pays via own API key) 2. Client sends Authorization → forwarded as-is (user pays via OAuth/Max) 3. No client credentials + server ANTHROPIC_API_KEY → server key used 4. No client credentials + no server key → no credentials forwarded Previously the server always sent x-api-key (even literal "None" when unconfigured), overwriting any client-provided credentials and breaking Claude Code Max (OAuth) and BYOK scenarios. Supersedes the simpler one-liner from d742c761af on v1.81.12-stable-patched. Based on the approach from PR #20429 (closed) and reverted PR #14821. --- .../llm_passthrough_endpoints.py | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py index 1ef866486ec..585122367df 100644 --- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py @@ -604,21 +604,34 @@ async def anthropic_proxy_route( base_url = httpx.URL(base_target_url) updated_url = base_url.copy_with(path=encoded_endpoint) - # Add or update query parameters - anthropic_api_key = passthrough_endpoint_router.get_credentials( - custom_llm_provider="anthropic", - region_name=None, - ) + # Credential priority: client-provided credentials take precedence over + # server credentials. This allows mixed mode where some users bring their + # own key (BYOK) or OAuth token (Claude Code Max) while others use the + # server's API key. + x_api_key_header = request.headers.get("x-api-key", "") + client_authorization_header = request.headers.get("authorization", "") + + custom_headers: dict + if x_api_key_header or client_authorization_header: + custom_headers = {} + else: + anthropic_api_key = passthrough_endpoint_router.get_credentials( + custom_llm_provider="anthropic", + region_name=None, + ) + server_auth_header = AnthropicModelInfo.get_auth_header( + anthropic_api_key or None + ) + custom_headers = server_auth_header if server_auth_header is not None else {} ## check for streaming is_streaming_request = await is_streaming_request_fn(request) ## CREATE PASS-THROUGH - auth_header = AnthropicModelInfo.get_auth_header(anthropic_api_key or None) endpoint_func = create_pass_through_route( endpoint=endpoint, target=str(updated_url), - custom_headers=auth_header if auth_header is not None else {}, + custom_headers=custom_headers, _forward_headers=True, is_streaming_request=is_streaming_request, ) # dynamically construct pass-through endpoint based on incoming path From ccb3e055f6c10be8657ce1d395b8c3860bf1c3e0 Mon Sep 17 00:00:00 2001 From: Mateusz Idziejczak Date: Thu, 30 Apr 2026 11:25:03 +0200 Subject: [PATCH 3/4] fix(BIT-455): convert raw dict to LitellmParams before in-memory update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defect 2 root cause: update_in_memory_guardrail passed Prisma's raw dict directly to update_in_memory_litellm_params, which calls vars() on it, raising TypeError and silently swallowing every DB update. Hot-reload of any guardrail param (presidio_language, score thresholds, URL bases, pii_entities_config, ...) was therefore broken — pod restart was the only way to pick up DB changes. Fix: isinstance(dict) -> LitellmParams(**data) conversion before the call, matching the existing pattern in initialize_guardrail. After this, the base class blanket setattr in update_in_memory_litellm_params propagates all Pydantic fields without any per-field copy in subclasses. Linear: https://linear.app/bitropy/issue/BIT-455 --- litellm/proxy/guardrails/guardrail_registry.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/litellm/proxy/guardrails/guardrail_registry.py b/litellm/proxy/guardrails/guardrail_registry.py index 96175877d65..e08dd3b6ebe 100644 --- a/litellm/proxy/guardrails/guardrail_registry.py +++ b/litellm/proxy/guardrails/guardrail_registry.py @@ -562,9 +562,11 @@ def update_in_memory_guardrail( guardrail_id ) if custom_guardrail_callback: - updated_litellm_params = cast( - LitellmParams, guardrail.get("litellm_params", {}) - ) + litellm_params_data = guardrail.get("litellm_params", {}) + if isinstance(litellm_params_data, dict): + updated_litellm_params = LitellmParams(**litellm_params_data) + else: + updated_litellm_params = cast(LitellmParams, litellm_params_data) custom_guardrail_callback.update_in_memory_litellm_params( litellm_params=updated_litellm_params ) From b33dab2d5d9d368b07abb1ac87c689629f5ff6c5 Mon Sep 17 00:00:00 2001 From: Mateusz Idziejczak Date: Thu, 30 Apr 2026 11:25:17 +0200 Subject: [PATCH 4/4] fix(BIT-455): use Presidio's redacted text verbatim in anonymize_text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defect 3: anonymize_text iterated redacted_text["items"] and applied new_text = new_text[:start] + replacement + new_text[end:] using item coordinates. Presidio's /anonymize returns item start/end as positions in the OUTPUT text (where each mask token sits after redaction), not in the original. Applying them to the original drifts proportional to len(replacement) - len(original_span), corrupting masked output on any non-trivial input. Fix: - output_parse_pii=False: return redacted_text["text"] verbatim — no re-stitching needed, Presidio already produced the correct output. - output_parse_pii=True: iterate analyze_results (pre-anonymize, with original-text coordinates) for both stitching and pii_tokens construction. Eliminates the secondary bug where pii_tokens stored text[start:end] using already-mutated coordinates. Fail-closed on /anonymize backend error preserved. Linear: https://linear.app/bitropy/issue/BIT-455 --- .../guardrails/guardrail_hooks/presidio.py | 97 ++++++++++--------- 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/litellm/proxy/guardrails/guardrail_hooks/presidio.py b/litellm/proxy/guardrails/guardrail_hooks/presidio.py index 0f4ebbd4880..a201db99609 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/presidio.py +++ b/litellm/proxy/guardrails/guardrail_hooks/presidio.py @@ -488,58 +488,59 @@ async def anonymize_text( new_text = text if redacted_text is not None: verbose_proxy_logger.debug("redacted_text: %s", redacted_text) - # Process items in reverse order by start position so that - # replacing later spans first does not shift earlier coordinates. - for item in sorted( - redacted_text["items"], key=lambda x: x["start"], reverse=True - ): - start = item["start"] - end = item["end"] - replacement = item["text"] # replacement token - if item["operator"] == "replace" and output_parse_pii is True: - if request_data is None: - verbose_proxy_logger.warning( - "Presidio anonymize_text called without request_data — " - "PII tokens cannot be stored per-request. " - "This may indicate a missing caller update." + if not output_parse_pii: + # Defect 3 primary fix: return Presidio's redacted output verbatim. + # redacted_text["items"] carry OUTPUT coordinates (positions in the + # already-redacted string), not original-text coordinates. Any + # attempt to stitch them back into the original text drifts whenever + # a replacement token has a different length than the original span. + # Presidio already produced the correct output — use it as-is. + new_text = redacted_text["text"] + for item in redacted_text.get("items", []): + entity_type = item.get("entity_type", None) + if entity_type is not None: + masked_entity_count[entity_type] = ( + masked_entity_count.get(entity_type, 0) + 1 ) - request_data = {} - # Store pii_tokens in metadata to avoid leaking to LLM providers. - # Providers like Anthropic reject unknown top-level fields. - if not request_data.get("metadata"): - request_data["metadata"] = {} - if "pii_tokens" not in request_data["metadata"]: - request_data["metadata"]["pii_tokens"] = {} - pii_tokens = request_data["metadata"]["pii_tokens"] - - # Append a sequential number to make each token unique - # per request, so unmasking maps back to the correct - # original value. Format: , - # This is LLM-friendly and degrades gracefully if the - # LLM doesn't echo the token verbatim. - seq = len(pii_tokens) + 1 - if replacement.endswith(">"): - replacement = f"{replacement[:-1]}_{seq}>" - else: - replacement = f"{replacement}_{seq}" + else: + # Defect 3 secondary fix (output_parse_pii=True): use analyze_results + # (pre-anonymize, original-text coordinates) for both stitching and + # pii_tokens construction. redacted_text["items"] coords are wrong + # here for the same reason as above. + if request_data is None: + verbose_proxy_logger.warning( + "Presidio anonymize_text called without request_data — " + "PII tokens cannot be stored per-request. " + "This may indicate a missing caller update." + ) + request_data = {} + if not request_data.get("metadata"): + request_data["metadata"] = {} + if "pii_tokens" not in request_data["metadata"]: + request_data["metadata"]["pii_tokens"] = {} + pii_tokens = request_data["metadata"]["pii_tokens"] + + for result_item in sorted( + analyze_results, + key=lambda x: x.get("start", 0), + reverse=True, + ): + start = result_item.get("start") + end = result_item.get("end") + entity_type = result_item.get("entity_type", "PII") + if start is None or end is None: + continue - # Use ORIGINAL text (not new_text) since start/end - # reference the original text's coordinates. + # Unique numbered token per detection so unmasking maps back + # to the correct original value. Format: . + seq = len(pii_tokens) + 1 + replacement = f"<{entity_type}_{seq}>" + # Original-text coordinates — correct original substring. pii_tokens[replacement] = text[start:end] - - new_text = new_text[:start] + replacement + new_text[end:] - entity_type = item.get("entity_type", None) - if entity_type is not None: - masked_entity_count[entity_type] = ( - masked_entity_count.get(entity_type, 0) + 1 + new_text = new_text[:start] + replacement + new_text[end:] + masked_entity_count[str(entity_type)] = ( + masked_entity_count.get(str(entity_type), 0) + 1 ) - # When output_parse_pii is True, new_text contains sequentially - # numbered tokens (e.g. ) that match the keys - # in pii_tokens. Returning redacted_text["text"] (Presidio's - # original output) would send un-numbered tokens to the LLM, - # making unmasking impossible. - # When output_parse_pii is False, new_text == redacted_text["text"] - # because no suffix is appended. return new_text else: raise Exception("Invalid anonymizer response: received None")