diff --git a/litellm/passthrough/utils.py b/litellm/passthrough/utils.py index ef4357d1ca2..1f149313793 100644 --- a/litellm/passthrough/utils.py +++ b/litellm/passthrough/utils.py @@ -52,6 +52,7 @@ def forward_headers_from_request( # Header We Should NOT forward request_headers.pop("content-length", None) request_headers.pop("host", None) + request_headers.pop("x-litellm-api-key", None) # Combine request headers with custom headers headers = {**request_headers, **headers} diff --git a/litellm/proxy/guardrails/guardrail_hooks/presidio.py b/litellm/proxy/guardrails/guardrail_hooks/presidio.py index 0f4ebbd4880..a201db99609 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/presidio.py +++ b/litellm/proxy/guardrails/guardrail_hooks/presidio.py @@ -488,58 +488,59 @@ async def anonymize_text( new_text = text if redacted_text is not None: verbose_proxy_logger.debug("redacted_text: %s", redacted_text) - # Process items in reverse order by start position so that - # replacing later spans first does not shift earlier coordinates. - for item in sorted( - redacted_text["items"], key=lambda x: x["start"], reverse=True - ): - start = item["start"] - end = item["end"] - replacement = item["text"] # replacement token - if item["operator"] == "replace" and output_parse_pii is True: - if request_data is None: - verbose_proxy_logger.warning( - "Presidio anonymize_text called without request_data — " - "PII tokens cannot be stored per-request. " - "This may indicate a missing caller update." + if not output_parse_pii: + # Defect 3 primary fix: return Presidio's redacted output verbatim. + # redacted_text["items"] carry OUTPUT coordinates (positions in the + # already-redacted string), not original-text coordinates. Any + # attempt to stitch them back into the original text drifts whenever + # a replacement token has a different length than the original span. + # Presidio already produced the correct output — use it as-is. + new_text = redacted_text["text"] + for item in redacted_text.get("items", []): + entity_type = item.get("entity_type", None) + if entity_type is not None: + masked_entity_count[entity_type] = ( + masked_entity_count.get(entity_type, 0) + 1 ) - request_data = {} - # Store pii_tokens in metadata to avoid leaking to LLM providers. - # Providers like Anthropic reject unknown top-level fields. - if not request_data.get("metadata"): - request_data["metadata"] = {} - if "pii_tokens" not in request_data["metadata"]: - request_data["metadata"]["pii_tokens"] = {} - pii_tokens = request_data["metadata"]["pii_tokens"] - - # Append a sequential number to make each token unique - # per request, so unmasking maps back to the correct - # original value. Format: , - # This is LLM-friendly and degrades gracefully if the - # LLM doesn't echo the token verbatim. - seq = len(pii_tokens) + 1 - if replacement.endswith(">"): - replacement = f"{replacement[:-1]}_{seq}>" - else: - replacement = f"{replacement}_{seq}" + else: + # Defect 3 secondary fix (output_parse_pii=True): use analyze_results + # (pre-anonymize, original-text coordinates) for both stitching and + # pii_tokens construction. redacted_text["items"] coords are wrong + # here for the same reason as above. + if request_data is None: + verbose_proxy_logger.warning( + "Presidio anonymize_text called without request_data — " + "PII tokens cannot be stored per-request. " + "This may indicate a missing caller update." + ) + request_data = {} + if not request_data.get("metadata"): + request_data["metadata"] = {} + if "pii_tokens" not in request_data["metadata"]: + request_data["metadata"]["pii_tokens"] = {} + pii_tokens = request_data["metadata"]["pii_tokens"] + + for result_item in sorted( + analyze_results, + key=lambda x: x.get("start", 0), + reverse=True, + ): + start = result_item.get("start") + end = result_item.get("end") + entity_type = result_item.get("entity_type", "PII") + if start is None or end is None: + continue - # Use ORIGINAL text (not new_text) since start/end - # reference the original text's coordinates. + # Unique numbered token per detection so unmasking maps back + # to the correct original value. Format: . + seq = len(pii_tokens) + 1 + replacement = f"<{entity_type}_{seq}>" + # Original-text coordinates — correct original substring. pii_tokens[replacement] = text[start:end] - - new_text = new_text[:start] + replacement + new_text[end:] - entity_type = item.get("entity_type", None) - if entity_type is not None: - masked_entity_count[entity_type] = ( - masked_entity_count.get(entity_type, 0) + 1 + new_text = new_text[:start] + replacement + new_text[end:] + masked_entity_count[str(entity_type)] = ( + masked_entity_count.get(str(entity_type), 0) + 1 ) - # When output_parse_pii is True, new_text contains sequentially - # numbered tokens (e.g. ) that match the keys - # in pii_tokens. Returning redacted_text["text"] (Presidio's - # original output) would send un-numbered tokens to the LLM, - # making unmasking impossible. - # When output_parse_pii is False, new_text == redacted_text["text"] - # because no suffix is appended. return new_text else: raise Exception("Invalid anonymizer response: received None") diff --git a/litellm/proxy/guardrails/guardrail_registry.py b/litellm/proxy/guardrails/guardrail_registry.py index 96175877d65..e08dd3b6ebe 100644 --- a/litellm/proxy/guardrails/guardrail_registry.py +++ b/litellm/proxy/guardrails/guardrail_registry.py @@ -562,9 +562,11 @@ def update_in_memory_guardrail( guardrail_id ) if custom_guardrail_callback: - updated_litellm_params = cast( - LitellmParams, guardrail.get("litellm_params", {}) - ) + litellm_params_data = guardrail.get("litellm_params", {}) + if isinstance(litellm_params_data, dict): + updated_litellm_params = LitellmParams(**litellm_params_data) + else: + updated_litellm_params = cast(LitellmParams, litellm_params_data) custom_guardrail_callback.update_in_memory_litellm_params( litellm_params=updated_litellm_params ) diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py index 1ef866486ec..585122367df 100644 --- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py @@ -604,21 +604,34 @@ async def anthropic_proxy_route( base_url = httpx.URL(base_target_url) updated_url = base_url.copy_with(path=encoded_endpoint) - # Add or update query parameters - anthropic_api_key = passthrough_endpoint_router.get_credentials( - custom_llm_provider="anthropic", - region_name=None, - ) + # Credential priority: client-provided credentials take precedence over + # server credentials. This allows mixed mode where some users bring their + # own key (BYOK) or OAuth token (Claude Code Max) while others use the + # server's API key. + x_api_key_header = request.headers.get("x-api-key", "") + client_authorization_header = request.headers.get("authorization", "") + + custom_headers: dict + if x_api_key_header or client_authorization_header: + custom_headers = {} + else: + anthropic_api_key = passthrough_endpoint_router.get_credentials( + custom_llm_provider="anthropic", + region_name=None, + ) + server_auth_header = AnthropicModelInfo.get_auth_header( + anthropic_api_key or None + ) + custom_headers = server_auth_header if server_auth_header is not None else {} ## check for streaming is_streaming_request = await is_streaming_request_fn(request) ## CREATE PASS-THROUGH - auth_header = AnthropicModelInfo.get_auth_header(anthropic_api_key or None) endpoint_func = create_pass_through_route( endpoint=endpoint, target=str(updated_url), - custom_headers=auth_header if auth_header is not None else {}, + custom_headers=custom_headers, _forward_headers=True, is_streaming_request=is_streaming_request, ) # dynamically construct pass-through endpoint based on incoming path diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py b/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py index 97ef05100de..b7e7359e9ff 100644 --- a/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py +++ b/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py @@ -10,7 +10,7 @@ from fastapi import Request, Response from fastapi.testclient import TestClient -from litellm.passthrough.utils import CommonUtils +from litellm.passthrough.utils import BasePassthroughUtils, CommonUtils sys.path.insert( 0, os.path.abspath("../../../..") @@ -95,4 +95,42 @@ def test_encode_bedrock_runtime_modelid_arn_edge_cases(): endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile.v1/invoke" expected = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile%2Ftest-profile.v1/invoke" result = CommonUtils.encode_bedrock_runtime_modelid_arn(endpoint) - assert result == expected \ No newline at end of file + assert result == expected + + +def test_forward_headers_strips_litellm_api_key(): + """x-litellm-api-key should not be forwarded to upstream providers.""" + request_headers = { + "x-litellm-api-key": "sk-litellm-secret-key", + "content-type": "application/json", + "x-api-key": "sk-ant-api-key", + } + + result = BasePassthroughUtils.forward_headers_from_request( + request_headers=request_headers.copy(), + headers={}, + forward_headers=True, + ) + + assert "x-litellm-api-key" not in result + assert result.get("content-type") == "application/json" + assert result.get("x-api-key") == "sk-ant-api-key" + + +def test_forward_headers_strips_host_and_content_length(): + """host and content-length should not be forwarded.""" + request_headers = { + "host": "api.anthropic.com", + "content-length": "1234", + "content-type": "application/json", + } + + result = BasePassthroughUtils.forward_headers_from_request( + request_headers=request_headers.copy(), + headers={}, + forward_headers=True, + ) + + assert "host" not in result + assert "content-length" not in result + assert result.get("content-type") == "application/json" \ No newline at end of file