Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions litellm/passthrough/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def forward_headers_from_request(
# Header We Should NOT forward
request_headers.pop("content-length", None)
request_headers.pop("host", None)
request_headers.pop("x-litellm-api-key", None)

# Combine request headers with custom headers
headers = {**request_headers, **headers}
Expand Down
97 changes: 49 additions & 48 deletions litellm/proxy/guardrails/guardrail_hooks/presidio.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,58 +488,59 @@ async def anonymize_text(
new_text = text
if redacted_text is not None:
verbose_proxy_logger.debug("redacted_text: %s", redacted_text)
# Process items in reverse order by start position so that
# replacing later spans first does not shift earlier coordinates.
for item in sorted(
redacted_text["items"], key=lambda x: x["start"], reverse=True
):
start = item["start"]
end = item["end"]
replacement = item["text"] # replacement token
if item["operator"] == "replace" and output_parse_pii is True:
if request_data is None:
verbose_proxy_logger.warning(
"Presidio anonymize_text called without request_data — "
"PII tokens cannot be stored per-request. "
"This may indicate a missing caller update."
if not output_parse_pii:
# Defect 3 primary fix: return Presidio's redacted output verbatim.
# redacted_text["items"] carry OUTPUT coordinates (positions in the
# already-redacted string), not original-text coordinates. Any
# attempt to stitch them back into the original text drifts whenever
# a replacement token has a different length than the original span.
# Presidio already produced the correct output — use it as-is.
new_text = redacted_text["text"]
for item in redacted_text.get("items", []):
entity_type = item.get("entity_type", None)
if entity_type is not None:
masked_entity_count[entity_type] = (
masked_entity_count.get(entity_type, 0) + 1
)
request_data = {}
# Store pii_tokens in metadata to avoid leaking to LLM providers.
# Providers like Anthropic reject unknown top-level fields.
if not request_data.get("metadata"):
request_data["metadata"] = {}
if "pii_tokens" not in request_data["metadata"]:
request_data["metadata"]["pii_tokens"] = {}
pii_tokens = request_data["metadata"]["pii_tokens"]

# Append a sequential number to make each token unique
# per request, so unmasking maps back to the correct
# original value. Format: <PHONE_NUMBER_1>, <PHONE_NUMBER_2>
# This is LLM-friendly and degrades gracefully if the
# LLM doesn't echo the token verbatim.
seq = len(pii_tokens) + 1
if replacement.endswith(">"):
replacement = f"{replacement[:-1]}_{seq}>"
else:
replacement = f"{replacement}_{seq}"
else:
# Defect 3 secondary fix (output_parse_pii=True): use analyze_results
# (pre-anonymize, original-text coordinates) for both stitching and
# pii_tokens construction. redacted_text["items"] coords are wrong
# here for the same reason as above.
if request_data is None:
verbose_proxy_logger.warning(
"Presidio anonymize_text called without request_data — "
"PII tokens cannot be stored per-request. "
"This may indicate a missing caller update."
)
request_data = {}
if not request_data.get("metadata"):
request_data["metadata"] = {}
if "pii_tokens" not in request_data["metadata"]:
request_data["metadata"]["pii_tokens"] = {}
pii_tokens = request_data["metadata"]["pii_tokens"]

for result_item in sorted(
analyze_results,
key=lambda x: x.get("start", 0),
reverse=True,
):
start = result_item.get("start")
end = result_item.get("end")
entity_type = result_item.get("entity_type", "PII")
if start is None or end is None:
continue

# Use ORIGINAL text (not new_text) since start/end
# reference the original text's coordinates.
# Unique numbered token per detection so unmasking maps back
# to the correct original value. Format: <PHONE_NUMBER_1>.
seq = len(pii_tokens) + 1
replacement = f"<{entity_type}_{seq}>"
# Original-text coordinates — correct original substring.
pii_tokens[replacement] = text[start:end]

new_text = new_text[:start] + replacement + new_text[end:]
entity_type = item.get("entity_type", None)
if entity_type is not None:
masked_entity_count[entity_type] = (
masked_entity_count.get(entity_type, 0) + 1
new_text = new_text[:start] + replacement + new_text[end:]
masked_entity_count[str(entity_type)] = (
masked_entity_count.get(str(entity_type), 0) + 1
)
# When output_parse_pii is True, new_text contains sequentially
# numbered tokens (e.g. <PHONE_NUMBER_1>) that match the keys
# in pii_tokens. Returning redacted_text["text"] (Presidio's
# original output) would send un-numbered tokens to the LLM,
# making unmasking impossible.
# When output_parse_pii is False, new_text == redacted_text["text"]
# because no suffix is appended.
return new_text
else:
raise Exception("Invalid anonymizer response: received None")
Expand Down
8 changes: 5 additions & 3 deletions litellm/proxy/guardrails/guardrail_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,9 +562,11 @@ def update_in_memory_guardrail(
guardrail_id
)
if custom_guardrail_callback:
updated_litellm_params = cast(
LitellmParams, guardrail.get("litellm_params", {})
)
litellm_params_data = guardrail.get("litellm_params", {})
if isinstance(litellm_params_data, dict):
updated_litellm_params = LitellmParams(**litellm_params_data)
else:
updated_litellm_params = cast(LitellmParams, litellm_params_data)
custom_guardrail_callback.update_in_memory_litellm_params(
litellm_params=updated_litellm_params
)
Expand Down
27 changes: 20 additions & 7 deletions litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,21 +604,34 @@ async def anthropic_proxy_route(
base_url = httpx.URL(base_target_url)
updated_url = base_url.copy_with(path=encoded_endpoint)

# Add or update query parameters
anthropic_api_key = passthrough_endpoint_router.get_credentials(
custom_llm_provider="anthropic",
region_name=None,
)
# Credential priority: client-provided credentials take precedence over
# server credentials. This allows mixed mode where some users bring their
# own key (BYOK) or OAuth token (Claude Code Max) while others use the
# server's API key.
x_api_key_header = request.headers.get("x-api-key", "")
client_authorization_header = request.headers.get("authorization", "")

custom_headers: dict
if x_api_key_header or client_authorization_header:
custom_headers = {}
else:
anthropic_api_key = passthrough_endpoint_router.get_credentials(
custom_llm_provider="anthropic",
region_name=None,
)
server_auth_header = AnthropicModelInfo.get_auth_header(
anthropic_api_key or None
)
custom_headers = server_auth_header if server_auth_header is not None else {}

## check for streaming
is_streaming_request = await is_streaming_request_fn(request)

## CREATE PASS-THROUGH
auth_header = AnthropicModelInfo.get_auth_header(anthropic_api_key or None)
endpoint_func = create_pass_through_route(
endpoint=endpoint,
target=str(updated_url),
custom_headers=auth_header if auth_header is not None else {},
custom_headers=custom_headers,
_forward_headers=True,
is_streaming_request=is_streaming_request,
) # dynamically construct pass-through endpoint based on incoming path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from fastapi import Request, Response
from fastapi.testclient import TestClient

from litellm.passthrough.utils import CommonUtils
from litellm.passthrough.utils import BasePassthroughUtils, CommonUtils

sys.path.insert(
0, os.path.abspath("../../../..")
Expand Down Expand Up @@ -95,4 +95,42 @@ def test_encode_bedrock_runtime_modelid_arn_edge_cases():
endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile.v1/invoke"
expected = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile%2Ftest-profile.v1/invoke"
result = CommonUtils.encode_bedrock_runtime_modelid_arn(endpoint)
assert result == expected
assert result == expected


def test_forward_headers_strips_litellm_api_key():
"""x-litellm-api-key should not be forwarded to upstream providers."""
request_headers = {
"x-litellm-api-key": "sk-litellm-secret-key",
"content-type": "application/json",
"x-api-key": "sk-ant-api-key",
}

result = BasePassthroughUtils.forward_headers_from_request(
request_headers=request_headers.copy(),
headers={},
forward_headers=True,
)

assert "x-litellm-api-key" not in result
assert result.get("content-type") == "application/json"
assert result.get("x-api-key") == "sk-ant-api-key"


def test_forward_headers_strips_host_and_content_length():
"""host and content-length should not be forwarded."""
request_headers = {
"host": "api.anthropic.com",
"content-length": "1234",
"content-type": "application/json",
}

result = BasePassthroughUtils.forward_headers_from_request(
request_headers=request_headers.copy(),
headers={},
forward_headers=True,
)

assert "host" not in result
assert "content-length" not in result
assert result.get("content-type") == "application/json"
Loading