From 84a7ea1353aee27aac313674675140a8e862457f Mon Sep 17 00:00:00 2001
From: Nico Duldhardt <nico@duldhardt.com>
Date: Sat, 14 Feb 2026 18:11:43 +0100
Subject: [PATCH 1/4] fix(security): strip x-litellm-api-key from forwarded
 headers to upstream providers

Prevent x-litellm-api-key (LiteLLM's virtual key) from being leaked
to upstream providers when _forward_headers=True is used in passthrough
endpoints.
---
 litellm/passthrough/utils.py                  |  1 +
 ...test_passthrough_endpoints_common_utils.py | 42 ++++++++++++++++++-
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/litellm/passthrough/utils.py b/litellm/passthrough/utils.py
index ef4357d1ca2..1f149313793 100644
--- a/litellm/passthrough/utils.py
+++ b/litellm/passthrough/utils.py
@@ -52,6 +52,7 @@ def forward_headers_from_request(
             # Header We Should NOT forward
             request_headers.pop("content-length", None)
             request_headers.pop("host", None)
+            request_headers.pop("x-litellm-api-key", None)
 
             # Combine request headers with custom headers
             headers = {**request_headers, **headers}
diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py b/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py
index 97ef05100de..b7e7359e9ff 100644
--- a/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py
+++ b/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py
@@ -10,7 +10,7 @@
 from fastapi import Request, Response
 from fastapi.testclient import TestClient
 
-from litellm.passthrough.utils import CommonUtils
+from litellm.passthrough.utils import BasePassthroughUtils, CommonUtils
 
 sys.path.insert(
     0, os.path.abspath("../../../..")
@@ -95,4 +95,42 @@ def test_encode_bedrock_runtime_modelid_arn_edge_cases():
     endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile.v1/invoke"
     expected = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile%2Ftest-profile.v1/invoke"
     result = CommonUtils.encode_bedrock_runtime_modelid_arn(endpoint)
-    assert result == expected
\ No newline at end of file
+    assert result == expected
+
+
+def test_forward_headers_strips_litellm_api_key():
+    """x-litellm-api-key should not be forwarded to upstream providers."""
+    request_headers = {
+        "x-litellm-api-key": "sk-litellm-secret-key",
+        "content-type": "application/json",
+        "x-api-key": "sk-ant-api-key",
+    }
+
+    result = BasePassthroughUtils.forward_headers_from_request(
+        request_headers=request_headers.copy(),
+        headers={},
+        forward_headers=True,
+    )
+
+    assert "x-litellm-api-key" not in result
+    assert result.get("content-type") == "application/json"
+    assert result.get("x-api-key") == "sk-ant-api-key"
+
+
+def test_forward_headers_strips_host_and_content_length():
+    """host and content-length should not be forwarded."""
+    request_headers = {
+        "host": "api.anthropic.com",
+        "content-length": "1234",
+        "content-type": "application/json",
+    }
+
+    result = BasePassthroughUtils.forward_headers_from_request(
+        request_headers=request_headers.copy(),
+        headers={},
+        forward_headers=True,
+    )
+
+    assert "host" not in result
+    assert "content-length" not in result
+    assert result.get("content-type") == "application/json"
\ No newline at end of file

From 1e14e2521d93f2cbdf0b2014a7a060032fb62325 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Kieszczy=C5=84ski?= <piotr.kieszczynski@gmail.com>
Date: Thu, 19 Mar 2026 09:19:04 +0100
Subject: [PATCH 2/4] fix(passthrough): implement credential priority for
 Anthropic endpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Client-provided credentials now take precedence over server credentials
in the /anthropic/ passthrough endpoint. This enables mixed mode where:

1. Client sends x-api-key → forwarded as-is (user pays via own API key)
2. Client sends Authorization → forwarded as-is (user pays via OAuth/Max)
3. No client credentials + server ANTHROPIC_API_KEY → server key used
4. No client credentials + no server key → no credentials forwarded

Previously the server always sent x-api-key (even literal "None" when
unconfigured), overwriting any client-provided credentials and breaking
Claude Code Max (OAuth) and BYOK scenarios.

Supersedes the simpler one-liner from d742c761af on v1.81.12-stable-patched.
Based on the approach from PR #20429 (closed) and reverted PR #14821.
---
 .../llm_passthrough_endpoints.py              | 27 ++++++++++++++-----
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
index 1ef866486ec..585122367df 100644
--- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
@@ -604,21 +604,34 @@ async def anthropic_proxy_route(
     base_url = httpx.URL(base_target_url)
     updated_url = base_url.copy_with(path=encoded_endpoint)
 
-    # Add or update query parameters
-    anthropic_api_key = passthrough_endpoint_router.get_credentials(
-        custom_llm_provider="anthropic",
-        region_name=None,
-    )
+    # Credential priority: client-provided credentials take precedence over
+    # server credentials. This allows mixed mode where some users bring their
+    # own key (BYOK) or OAuth token (Claude Code Max) while others use the
+    # server's API key.
+    x_api_key_header = request.headers.get("x-api-key", "")
+    client_authorization_header = request.headers.get("authorization", "")
+
+    custom_headers: dict
+    if x_api_key_header or client_authorization_header:
+        custom_headers = {}
+    else:
+        anthropic_api_key = passthrough_endpoint_router.get_credentials(
+            custom_llm_provider="anthropic",
+            region_name=None,
+        )
+        server_auth_header = AnthropicModelInfo.get_auth_header(
+            anthropic_api_key or None
+        )
+        custom_headers = server_auth_header if server_auth_header is not None else {}
 
     ## check for streaming
     is_streaming_request = await is_streaming_request_fn(request)
 
     ## CREATE PASS-THROUGH
-    auth_header = AnthropicModelInfo.get_auth_header(anthropic_api_key or None)
     endpoint_func = create_pass_through_route(
         endpoint=endpoint,
         target=str(updated_url),
-        custom_headers=auth_header if auth_header is not None else {},
+        custom_headers=custom_headers,
         _forward_headers=True,
         is_streaming_request=is_streaming_request,
     )  # dynamically construct pass-through endpoint based on incoming path

From ccb3e055f6c10be8657ce1d395b8c3860bf1c3e0 Mon Sep 17 00:00:00 2001
From: Mateusz Idziejczak <mateusz.idziejczak@bitropy.io>
Date: Thu, 30 Apr 2026 11:25:03 +0200
Subject: [PATCH 3/4] fix(BIT-455): convert raw dict to LitellmParams before
 in-memory update
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Defect 2 root cause: update_in_memory_guardrail passed Prisma's raw dict
directly to update_in_memory_litellm_params, which calls vars() on it,
raising TypeError and silently swallowing every DB update.  Hot-reload
of any guardrail param (presidio_language, score thresholds, URL bases,
pii_entities_config, ...) was therefore broken — pod restart was the
only way to pick up DB changes.

Fix: isinstance(dict) -> LitellmParams(**data) conversion before the
call, matching the existing pattern in initialize_guardrail.  After
this, the base class blanket setattr in update_in_memory_litellm_params
propagates all Pydantic fields without any per-field copy in subclasses.

Linear: https://linear.app/bitropy/issue/BIT-455
---
 litellm/proxy/guardrails/guardrail_registry.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/litellm/proxy/guardrails/guardrail_registry.py b/litellm/proxy/guardrails/guardrail_registry.py
index 96175877d65..e08dd3b6ebe 100644
--- a/litellm/proxy/guardrails/guardrail_registry.py
+++ b/litellm/proxy/guardrails/guardrail_registry.py
@@ -562,9 +562,11 @@ def update_in_memory_guardrail(
             guardrail_id
         )
         if custom_guardrail_callback:
-            updated_litellm_params = cast(
-                LitellmParams, guardrail.get("litellm_params", {})
-            )
+            litellm_params_data = guardrail.get("litellm_params", {})
+            if isinstance(litellm_params_data, dict):
+                updated_litellm_params = LitellmParams(**litellm_params_data)
+            else:
+                updated_litellm_params = cast(LitellmParams, litellm_params_data)
             custom_guardrail_callback.update_in_memory_litellm_params(
                 litellm_params=updated_litellm_params
             )

From b33dab2d5d9d368b07abb1ac87c689629f5ff6c5 Mon Sep 17 00:00:00 2001
From: Mateusz Idziejczak <mateusz.idziejczak@bitropy.io>
Date: Thu, 30 Apr 2026 11:25:17 +0200
Subject: [PATCH 4/4] fix(BIT-455): use Presidio's redacted text verbatim in
 anonymize_text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Defect 3: anonymize_text iterated redacted_text["items"] and applied
new_text = new_text[:start] + replacement + new_text[end:] using item
coordinates.  Presidio's /anonymize returns item start/end as positions
in the OUTPUT text (where each mask token sits after redaction), not
in the original.  Applying them to the original drifts proportional to
len(replacement) - len(original_span), corrupting masked output on any
non-trivial input.

Fix:
- output_parse_pii=False: return redacted_text["text"] verbatim — no
  re-stitching needed, Presidio already produced the correct output.
- output_parse_pii=True: iterate analyze_results (pre-anonymize, with
  original-text coordinates) for both stitching and pii_tokens
  construction.  Eliminates the secondary bug where pii_tokens stored
  text[start:end] using already-mutated coordinates.

Fail-closed on /anonymize backend error preserved.

Linear: https://linear.app/bitropy/issue/BIT-455
---
 .../guardrails/guardrail_hooks/presidio.py    | 97 ++++++++++---------
 1 file changed, 49 insertions(+), 48 deletions(-)

diff --git a/litellm/proxy/guardrails/guardrail_hooks/presidio.py b/litellm/proxy/guardrails/guardrail_hooks/presidio.py
index 0f4ebbd4880..a201db99609 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/presidio.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/presidio.py
@@ -488,58 +488,59 @@ async def anonymize_text(
             new_text = text
             if redacted_text is not None:
                 verbose_proxy_logger.debug("redacted_text: %s", redacted_text)
-                # Process items in reverse order by start position so that
-                # replacing later spans first does not shift earlier coordinates.
-                for item in sorted(
-                    redacted_text["items"], key=lambda x: x["start"], reverse=True
-                ):
-                    start = item["start"]
-                    end = item["end"]
-                    replacement = item["text"]  # replacement token
-                    if item["operator"] == "replace" and output_parse_pii is True:
-                        if request_data is None:
-                            verbose_proxy_logger.warning(
-                                "Presidio anonymize_text called without request_data — "
-                                "PII tokens cannot be stored per-request. "
-                                "This may indicate a missing caller update."
+                if not output_parse_pii:
+                    # Defect 3 primary fix: return Presidio's redacted output verbatim.
+                    # redacted_text["items"] carry OUTPUT coordinates (positions in the
+                    # already-redacted string), not original-text coordinates.  Any
+                    # attempt to stitch them back into the original text drifts whenever
+                    # a replacement token has a different length than the original span.
+                    # Presidio already produced the correct output — use it as-is.
+                    new_text = redacted_text["text"]
+                    for item in redacted_text.get("items", []):
+                        entity_type = item.get("entity_type", None)
+                        if entity_type is not None:
+                            masked_entity_count[entity_type] = (
+                                masked_entity_count.get(entity_type, 0) + 1
                             )
-                            request_data = {}
-                        # Store pii_tokens in metadata to avoid leaking to LLM providers.
-                        # Providers like Anthropic reject unknown top-level fields.
-                        if not request_data.get("metadata"):
-                            request_data["metadata"] = {}
-                        if "pii_tokens" not in request_data["metadata"]:
-                            request_data["metadata"]["pii_tokens"] = {}
-                        pii_tokens = request_data["metadata"]["pii_tokens"]
-
-                        # Append a sequential number to make each token unique
-                        # per request, so unmasking maps back to the correct
-                        # original value.  Format: <PHONE_NUMBER_1>, <PHONE_NUMBER_2>
-                        # This is LLM-friendly and degrades gracefully if the
-                        # LLM doesn't echo the token verbatim.
-                        seq = len(pii_tokens) + 1
-                        if replacement.endswith(">"):
-                            replacement = f"{replacement[:-1]}_{seq}>"
-                        else:
-                            replacement = f"{replacement}_{seq}"
+                else:
+                    # Defect 3 secondary fix (output_parse_pii=True): use analyze_results
+                    # (pre-anonymize, original-text coordinates) for both stitching and
+                    # pii_tokens construction.  redacted_text["items"] coords are wrong
+                    # here for the same reason as above.
+                    if request_data is None:
+                        verbose_proxy_logger.warning(
+                            "Presidio anonymize_text called without request_data — "
+                            "PII tokens cannot be stored per-request. "
+                            "This may indicate a missing caller update."
+                        )
+                        request_data = {}
+                    if not request_data.get("metadata"):
+                        request_data["metadata"] = {}
+                    if "pii_tokens" not in request_data["metadata"]:
+                        request_data["metadata"]["pii_tokens"] = {}
+                    pii_tokens = request_data["metadata"]["pii_tokens"]
+
+                    for result_item in sorted(
+                        analyze_results,
+                        key=lambda x: x.get("start", 0),
+                        reverse=True,
+                    ):
+                        start = result_item.get("start")
+                        end = result_item.get("end")
+                        entity_type = result_item.get("entity_type", "PII")
+                        if start is None or end is None:
+                            continue
 
-                        # Use ORIGINAL text (not new_text) since start/end
-                        # reference the original text's coordinates.
+                        # Unique numbered token per detection so unmasking maps back
+                        # to the correct original value.  Format: <PHONE_NUMBER_1>.
+                        seq = len(pii_tokens) + 1
+                        replacement = f"<{entity_type}_{seq}>"
+                        # Original-text coordinates — correct original substring.
                         pii_tokens[replacement] = text[start:end]
-
-                    new_text = new_text[:start] + replacement + new_text[end:]
-                    entity_type = item.get("entity_type", None)
-                    if entity_type is not None:
-                        masked_entity_count[entity_type] = (
-                            masked_entity_count.get(entity_type, 0) + 1
+                        new_text = new_text[:start] + replacement + new_text[end:]
+                        masked_entity_count[str(entity_type)] = (
+                            masked_entity_count.get(str(entity_type), 0) + 1
                         )
-                # When output_parse_pii is True, new_text contains sequentially
-                # numbered tokens (e.g. <PHONE_NUMBER_1>) that match the keys
-                # in pii_tokens.  Returning redacted_text["text"] (Presidio's
-                # original output) would send un-numbered tokens to the LLM,
-                # making unmasking impossible.
-                # When output_parse_pii is False, new_text == redacted_text["text"]
-                # because no suffix is appended.
                 return new_text
             else:
                 raise Exception("Invalid anonymizer response: received None")