Bitropy · pkieszcz · Feb 14, 2026 · Mar 19, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/litellm/passthrough/utils.py b/litellm/passthrough/utils.py
@@ -52,6 +52,7 @@ def forward_headers_from_request(
             # Header We Should NOT forward
             request_headers.pop("content-length", None)
             request_headers.pop("host", None)
+            request_headers.pop("x-litellm-api-key", None)
 
             # Combine request headers with custom headers
             headers = {**request_headers, **headers}

diff --git a/litellm/proxy/guardrails/guardrail_hooks/presidio.py b/litellm/proxy/guardrails/guardrail_hooks/presidio.py
@@ -488,58 +488,59 @@ async def anonymize_text(
             new_text = text
             if redacted_text is not None:
                 verbose_proxy_logger.debug("redacted_text: %s", redacted_text)
-                # Process items in reverse order by start position so that
-                # replacing later spans first does not shift earlier coordinates.
-                for item in sorted(
-                    redacted_text["items"], key=lambda x: x["start"], reverse=True
-                ):
-                    start = item["start"]
-                    end = item["end"]
-                    replacement = item["text"]  # replacement token
-                    if item["operator"] == "replace" and output_parse_pii is True:
-                        if request_data is None:
-                            verbose_proxy_logger.warning(
-                                "Presidio anonymize_text called without request_data — "
-                                "PII tokens cannot be stored per-request. "
-                                "This may indicate a missing caller update."
+                if not output_parse_pii:
+                    # Defect 3 primary fix: return Presidio's redacted output verbatim.
+                    # redacted_text["items"] carry OUTPUT coordinates (positions in the
+                    # already-redacted string), not original-text coordinates.  Any
+                    # attempt to stitch them back into the original text drifts whenever
+                    # a replacement token has a different length than the original span.
+                    # Presidio already produced the correct output — use it as-is.
+                    new_text = redacted_text["text"]
+                    for item in redacted_text.get("items", []):
+                        entity_type = item.get("entity_type", None)
+                        if entity_type is not None:
+                            masked_entity_count[entity_type] = (
+                                masked_entity_count.get(entity_type, 0) + 1
                             )
-                            request_data = {}
-                        # Store pii_tokens in metadata to avoid leaking to LLM providers.
-                        # Providers like Anthropic reject unknown top-level fields.
-                        if not request_data.get("metadata"):
-                            request_data["metadata"] = {}
-                        if "pii_tokens" not in request_data["metadata"]:
-                            request_data["metadata"]["pii_tokens"] = {}
-                        pii_tokens = request_data["metadata"]["pii_tokens"]
-
-                        # Append a sequential number to make each token unique
-                        # per request, so unmasking maps back to the correct
-                        # original value.  Format: <PHONE_NUMBER_1>, <PHONE_NUMBER_2>
-                        # This is LLM-friendly and degrades gracefully if the
-                        # LLM doesn't echo the token verbatim.
-                        seq = len(pii_tokens) + 1
-                        if replacement.endswith(">"):
-                            replacement = f"{replacement[:-1]}_{seq}>"
-                        else:
-                            replacement = f"{replacement}_{seq}"
+                else:
+                    # Defect 3 secondary fix (output_parse_pii=True): use analyze_results
+                    # (pre-anonymize, original-text coordinates) for both stitching and
+                    # pii_tokens construction.  redacted_text["items"] coords are wrong
+                    # here for the same reason as above.
+                    if request_data is None:
+                        verbose_proxy_logger.warning(
+                            "Presidio anonymize_text called without request_data — "
+                            "PII tokens cannot be stored per-request. "
+                            "This may indicate a missing caller update."
+                        )
+                        request_data = {}
+                    if not request_data.get("metadata"):
+                        request_data["metadata"] = {}
+                    if "pii_tokens" not in request_data["metadata"]:
+                        request_data["metadata"]["pii_tokens"] = {}
+                    pii_tokens = request_data["metadata"]["pii_tokens"]
+
+                    for result_item in sorted(
+                        analyze_results,
+                        key=lambda x: x.get("start", 0),
+                        reverse=True,
+                    ):
+                        start = result_item.get("start")
+                        end = result_item.get("end")
+                        entity_type = result_item.get("entity_type", "PII")
+                        if start is None or end is None:
+                            continue
 
-                        # Use ORIGINAL text (not new_text) since start/end
-                        # reference the original text's coordinates.
+                        # Unique numbered token per detection so unmasking maps back
+                        # to the correct original value.  Format: <PHONE_NUMBER_1>.
+                        seq = len(pii_tokens) + 1
+                        replacement = f"<{entity_type}_{seq}>"
+                        # Original-text coordinates — correct original substring.
                         pii_tokens[replacement] = text[start:end]
-
-                    new_text = new_text[:start] + replacement + new_text[end:]
-                    entity_type = item.get("entity_type", None)
-                    if entity_type is not None:
-                        masked_entity_count[entity_type] = (
-                            masked_entity_count.get(entity_type, 0) + 1
+                        new_text = new_text[:start] + replacement + new_text[end:]
+                        masked_entity_count[str(entity_type)] = (
+                            masked_entity_count.get(str(entity_type), 0) + 1
                         )
-                # When output_parse_pii is True, new_text contains sequentially
-                # numbered tokens (e.g. <PHONE_NUMBER_1>) that match the keys
-                # in pii_tokens.  Returning redacted_text["text"] (Presidio's
-                # original output) would send un-numbered tokens to the LLM,
-                # making unmasking impossible.
-                # When output_parse_pii is False, new_text == redacted_text["text"]
-                # because no suffix is appended.
                 return new_text
             else:
                 raise Exception("Invalid anonymizer response: received None")

diff --git a/litellm/proxy/guardrails/guardrail_registry.py b/litellm/proxy/guardrails/guardrail_registry.py
@@ -562,9 +562,11 @@ def update_in_memory_guardrail(
             guardrail_id
         )
         if custom_guardrail_callback:
-            updated_litellm_params = cast(
-                LitellmParams, guardrail.get("litellm_params", {})
-            )
+            litellm_params_data = guardrail.get("litellm_params", {})
+            if isinstance(litellm_params_data, dict):
+                updated_litellm_params = LitellmParams(**litellm_params_data)
+            else:
+                updated_litellm_params = cast(LitellmParams, litellm_params_data)
             custom_guardrail_callback.update_in_memory_litellm_params(
                 litellm_params=updated_litellm_params
             )

diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
@@ -604,21 +604,34 @@ async def anthropic_proxy_route(
     base_url = httpx.URL(base_target_url)
     updated_url = base_url.copy_with(path=encoded_endpoint)
 
-    # Add or update query parameters
-    anthropic_api_key = passthrough_endpoint_router.get_credentials(
-        custom_llm_provider="anthropic",
-        region_name=None,
-    )
+    # Credential priority: client-provided credentials take precedence over
+    # server credentials. This allows mixed mode where some users bring their
+    # own key (BYOK) or OAuth token (Claude Code Max) while others use the
+    # server's API key.
+    x_api_key_header = request.headers.get("x-api-key", "")
+    client_authorization_header = request.headers.get("authorization", "")
+
+    custom_headers: dict
+    if x_api_key_header or client_authorization_header:
+        custom_headers = {}
+    else:
+        anthropic_api_key = passthrough_endpoint_router.get_credentials(
+            custom_llm_provider="anthropic",
+            region_name=None,
+        )
+        server_auth_header = AnthropicModelInfo.get_auth_header(
+            anthropic_api_key or None
+        )
+        custom_headers = server_auth_header if server_auth_header is not None else {}
 
     ## check for streaming
     is_streaming_request = await is_streaming_request_fn(request)
 
     ## CREATE PASS-THROUGH
-    auth_header = AnthropicModelInfo.get_auth_header(anthropic_api_key or None)
     endpoint_func = create_pass_through_route(
         endpoint=endpoint,
         target=str(updated_url),
-        custom_headers=auth_header if auth_header is not None else {},
+        custom_headers=custom_headers,
         _forward_headers=True,
         is_streaming_request=is_streaming_request,
     )  # dynamically construct pass-through endpoint based on incoming path

diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py b/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py
@@ -10,7 +10,7 @@
 from fastapi import Request, Response
 from fastapi.testclient import TestClient
 
-from litellm.passthrough.utils import CommonUtils
+from litellm.passthrough.utils import BasePassthroughUtils, CommonUtils
 
 sys.path.insert(
     0, os.path.abspath("../../../..")
@@ -95,4 +95,42 @@ def test_encode_bedrock_runtime_modelid_arn_edge_cases():
     endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile.v1/invoke"
     expected = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile%2Ftest-profile.v1/invoke"
     result = CommonUtils.encode_bedrock_runtime_modelid_arn(endpoint)
-    assert result == expected
+    assert result == expected
+
+
+def test_forward_headers_strips_litellm_api_key():
+    """x-litellm-api-key should not be forwarded to upstream providers."""
+    request_headers = {
+        "x-litellm-api-key": "sk-litellm-secret-key",
+        "content-type": "application/json",
+        "x-api-key": "sk-ant-api-key",
+    }
+
+    result = BasePassthroughUtils.forward_headers_from_request(
+        request_headers=request_headers.copy(),
+        headers={},
+        forward_headers=True,
+    )
+
+    assert "x-litellm-api-key" not in result
+    assert result.get("content-type") == "application/json"
+    assert result.get("x-api-key") == "sk-ant-api-key"
+
+
+def test_forward_headers_strips_host_and_content_length():
+    """host and content-length should not be forwarded."""
+    request_headers = {
+        "host": "api.anthropic.com",
+        "content-length": "1234",
+        "content-type": "application/json",
+    }
+
+    result = BasePassthroughUtils.forward_headers_from_request(
+        request_headers=request_headers.copy(),
+        headers={},
+        forward_headers=True,
+    )
+
+    assert "host" not in result
+    assert "content-length" not in result
+    assert result.get("content-type") == "application/json"