Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion litellm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,20 @@
)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 request_timeout default change widens scope beyond completion()

Changing the default from 6000 to 600 fixes the completion() regression flagged in the previous thread, but litellm.request_timeout is also the default for Router.__init__ (self.timeout = timeout or litellm.request_timeout, router.py:530), speech() (main.py:6760), and the Anthropic / Azure-Anthropic / OpenAI count-token handlers. All of these would silently drop from a 6000 s ceiling to 600 s for users who have not set an explicit timeout. Long-running router calls or TTS jobs that complete in 600–6000 s will now time out.

Per the "avoid backwards-incompatible changes without user-controlled flags" rule, consider keeping the constant at 6000 (or introducing a separate COMPLETION_REQUEST_TIMEOUT constant) and only using the explicit-600 fallback inside _resolve_completion_timeout() itself, where you control the scope.

Rule Used: What: avoid backwards-incompatible changes without... (source)

DEFAULT_MAX_TOKENS_FOR_TRITON = int(os.getenv("DEFAULT_MAX_TOKENS_FOR_TRITON", 2000))
#### Networking settings ####
request_timeout: float = float(os.getenv("REQUEST_TIMEOUT", 6000)) # time in seconds
# Sentinel used when `REQUEST_TIMEOUT` is unset: `litellm.request_timeout` keeps this
# value so longer-running surfaces (Router `timeout or litellm.request_timeout`,
# speech/TTS, responses, vector stores, etc.) get a long HTTP deadline. Chat
# `completion()` maps this sentinel down to 600s when the caller did not set a
# per-request/model timeout—see ``CompletionTimeout.resolve`` in completion_timeout.py. MCP uses
# dedicated timeouts (e.g. `MCP_CLIENT_TIMEOUT`), not `request_timeout`.
DEFAULT_REQUEST_TIMEOUT_SECONDS: float = 6000.0
# Pair used for default httpx clients when no custom timeout is passed: read/write
# deadline and connect handshake (see ``http_handler`` cached handler paths).
COMPLETION_HTTP_FALLBACK_SECONDS: float = 600.0
HTTP_HANDLER_CONNECT_TIMEOUT_SECONDS: float = 5.0
request_timeout: float = float(
os.getenv("REQUEST_TIMEOUT", str(int(DEFAULT_REQUEST_TIMEOUT_SECONDS)))
)
DEFAULT_A2A_AGENT_TIMEOUT: float = float(
os.getenv("DEFAULT_A2A_AGENT_TIMEOUT", 6000)
) # 10 minutes
Expand Down
83 changes: 83 additions & 0 deletions litellm/litellm_core_utils/completion_timeout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Completion HTTP timeout resolution (kept out of ``main.py`` to limit import cycles)."""

from __future__ import annotations

from typing import Callable, Optional, Union

import httpx

from litellm.constants import (
COMPLETION_HTTP_FALLBACK_SECONDS,
DEFAULT_REQUEST_TIMEOUT_SECONDS,
)


class CompletionTimeout:
"""Resolves HTTP timeout for ``completion()`` from model vs global settings."""

@staticmethod
def _fallback_when_no_explicit_timeout(
global_timeout: Optional[Union[float, str]],
) -> float:
"""
Used when ``model_timeout`` and kwargs timeouts are all unset.

``global_timeout`` is :attr:`litellm.request_timeout` (numeric / string), not
:class:`httpx.Timeout`.

If it equals :data:`~litellm.constants.DEFAULT_REQUEST_TIMEOUT_SECONDS` (6000),
return :data:`~litellm.constants.COMPLETION_HTTP_FALLBACK_SECONDS`. Same if
``None``. Otherwise return ``float(global_timeout)``.
"""
if global_timeout is None:
return COMPLETION_HTTP_FALLBACK_SECONDS
if float(global_timeout) == float(DEFAULT_REQUEST_TIMEOUT_SECONDS):
return COMPLETION_HTTP_FALLBACK_SECONDS
return float(global_timeout)

@staticmethod
def resolve(
model_timeout: Optional[Union[float, str, httpx.Timeout]],
kwargs: dict,
custom_llm_provider: str,
*,
global_timeout: Optional[Union[float, str]],
supports_httpx_timeout: Callable[[str], bool],
) -> Union[float, httpx.Timeout]:
"""
Resolution order (first non-None wins):

1. ``model_timeout`` (call argument / merged ``litellm_params``)
2. ``kwargs["timeout"]``
3. ``kwargs["request_timeout"]``
4. Fallback from ``global_timeout`` (:attr:`litellm.request_timeout`) — if it is
the package default (6000), use 600 instead.

Coerce :class:`httpx.Timeout` when the provider does not support it.
Explicit ``6000`` on the model or in kwargs is kept as ``6000``.
"""
resolved: Union[float, str, httpx.Timeout]
if model_timeout is not None:
resolved = model_timeout
elif kwargs.get("timeout") is not None:
resolved = kwargs["timeout"]
elif kwargs.get("request_timeout") is not None:
resolved = kwargs["request_timeout"]
else:
resolved = CompletionTimeout._fallback_when_no_explicit_timeout(
global_timeout
)

if isinstance(resolved, httpx.Timeout) and not supports_httpx_timeout(
custom_llm_provider
):
read_timeout = resolved.read
resolved = (
float(read_timeout)
if read_timeout is not None
else COMPLETION_HTTP_FALLBACK_SECONDS
) # default 10 min timeout
elif not isinstance(resolved, httpx.Timeout):
resolved = float(resolved) # type: ignore

return resolved
11 changes: 8 additions & 3 deletions litellm/llms/custom_httpx/http_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
AIOHTTP_KEEPALIVE_TIMEOUT,
AIOHTTP_NEEDS_CLEANUP_CLOSED,
AIOHTTP_TTL_DNS_CACHE,
COMPLETION_HTTP_FALLBACK_SECONDS,
DEFAULT_SSL_CIPHERS,
HTTP_HANDLER_CONNECT_TIMEOUT_SECONDS,
)
from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
from litellm.types.llms.custom_http import *
Expand Down Expand Up @@ -70,7 +72,10 @@ def get_default_headers() -> dict:
headers = get_default_headers()

# https://www.python-httpx.org/advanced/timeouts
_DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0)
_DEFAULT_TIMEOUT = httpx.Timeout(
timeout=COMPLETION_HTTP_FALLBACK_SECONDS,
connect=HTTP_HANDLER_CONNECT_TIMEOUT_SECONDS,
)


def _prepare_request_data_and_content(
Expand Down Expand Up @@ -1244,7 +1249,7 @@ def get_async_httpx_client(
_new_client = AsyncHTTPHandler(**handler_params)
else:
_new_client = AsyncHTTPHandler(
timeout=httpx.Timeout(timeout=600.0, connect=5.0),
timeout=_DEFAULT_TIMEOUT,
shared_session=shared_session,
)

Expand Down Expand Up @@ -1293,7 +1298,7 @@ def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
}
_new_client = HTTPHandler(**handler_params)
else:
_new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
_new_client = HTTPHandler(timeout=_DEFAULT_TIMEOUT)

cache.set_cache(
key=_cache_key_name,
Expand Down
16 changes: 8 additions & 8 deletions litellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
calculate_request_duration,
get_audio_file_for_health_check,
)
from litellm.litellm_core_utils.completion_timeout import CompletionTimeout
from litellm.litellm_core_utils.dd_tracing import tracer
from litellm.litellm_core_utils.get_provider_specific_headers import (
ProviderSpecificHeaderUtils,
Expand Down Expand Up @@ -1400,14 +1401,13 @@ def completion( # type: ignore # noqa: PLR0915
) # support region-based pricing for bedrock

### TIMEOUT LOGIC ###
timeout = timeout or kwargs.get("request_timeout", 600) or 600
# set timeout for 10 minutes by default
if isinstance(timeout, httpx.Timeout) and not supports_httpx_timeout(
custom_llm_provider
):
timeout = timeout.read or 600 # default 10 min timeout
elif not isinstance(timeout, httpx.Timeout):
timeout = float(timeout) # type: ignore
timeout = CompletionTimeout.resolve(
timeout,
kwargs,
custom_llm_provider,
global_timeout=getattr(litellm, "request_timeout", None),
supports_httpx_timeout=supports_httpx_timeout,
)

### REGISTER CUSTOM MODEL PRICING -- IF GIVEN ###
if (
Expand Down
1 change: 1 addition & 0 deletions tests/llm_translation/test_azure_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
0, os.path.abspath("../../")
) # Adds the parent directory to the system path

import httpx
import pytest
from litellm.llms.azure.common_utils import process_azure_headers
from httpx import Headers
Expand Down
46 changes: 46 additions & 0 deletions tests/local_testing/test_azure_anthropic_sync_post.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
``_get_httpx_client`` + ``HTTPHandler.post`` (same pattern as Azure Anthropic sync path:
``_get_httpx_client(params={"timeout": ...})`` then ``post(..., timeout=...)``).

Uses https://httpbin.org/delay/10 with ``timeout=5`` — the handler must raise :class:`~litellm.exceptions.Timeout`
before the 10s delay completes. Skips if httpbin is unreachable.

Lives under ``local_testing`` (not ``make test-unit``).
"""

import json
import os
import sys

import httpx
import pytest

sys.path.insert(
0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
)

from litellm.exceptions import Timeout as LitellmTimeout
from litellm.llms.custom_httpx.http_handler import _get_httpx_client

_HTTPBIN_DELAY_S = 10
_PER_REQUEST_TIMEOUT_S = 5.0
_CLIENT_DEFAULT_TIMEOUT_S = 60.0


def test_post_delay_exceeds_per_request_timeout_raises():
try:
httpx.get("https://httpbin.org/get", timeout=5.0)
except Exception as e:
pytest.skip(f"httpbin.org unreachable: {e}")

handler = _get_httpx_client(params={"timeout": _CLIENT_DEFAULT_TIMEOUT_S})
try:
with pytest.raises(LitellmTimeout):
handler.post(
f"https://httpbin.org/delay/{_HTTPBIN_DELAY_S}",
headers={"content-type": "application/json"},
data=json.dumps({"model": "claude", "messages": []}),
timeout=_PER_REQUEST_TIMEOUT_S,
)
finally:
handler.close()
Original file line number Diff line number Diff line change
Expand Up @@ -222,5 +222,7 @@ def test_completion_non_streaming(self, mock_azure_config, mock_provider_manager

# Verify non-streaming was handled
mock_client.post.assert_called_once()
mock_get_client.assert_called_once_with(params={"timeout": timeout})
assert mock_client.post.call_args.kwargs["timeout"] == timeout
assert result is not None

Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""
Ensure litellm.completion() forwards timeout to Azure Anthropic handler (main.py dispatch).
"""

import os
import sys
from unittest.mock import MagicMock, patch

sys.path.insert(
0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../../.."))
)

from litellm import completion
from litellm.types.utils import ModelResponse


def test_main_azure_ai_claude_completion_passes_timeout_to_azure_anthropic_handler():
captured: dict = {}

def fake_azure_anthropic_completion(**kwargs):
captured.update(kwargs)
return ModelResponse()

with patch(
"litellm.main.azure_anthropic_chat_completions"
) as mock_azure_anthropic:
mock_azure_anthropic.completion = MagicMock(
side_effect=fake_azure_anthropic_completion
)

completion(
model="azure_ai/claude-sonnet-4-5",
messages=[{"role": "user", "content": "hi"}],
api_base="https://example.services.ai.azure.com/anthropic",
api_key="test-key",
timeout=42.5,
)

mock_azure_anthropic.completion.assert_called_once()
assert captured["timeout"] == 42.5
assert captured["model"] == "claude-sonnet-4-5"
assert captured["custom_llm_provider"] == "azure_ai"
30 changes: 29 additions & 1 deletion tests/test_litellm/llms/custom_httpx/test_http_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
) # Adds the parent directory to the system path
import litellm
from litellm.llms.custom_httpx.aiohttp_transport import LiteLLMAiohttpTransport
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, get_ssl_configuration
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
_get_httpx_client,
get_ssl_configuration,
)


@pytest.mark.asyncio
Expand Down Expand Up @@ -658,3 +663,26 @@ async def test_httpx_handler_uses_env_user_agent(monkeypatch):
assert req.headers.get("User-Agent") == "Claude Code"
finally:
await handler.close()


def test_get_httpx_client_applies_float_timeout_without_mocking_handler():
"""
Exercise real _get_httpx_client + HTTPHandler: params={'timeout': x} must reach httpx.Client(timeout=...).
Uses an uncommon timeout value to avoid colliding with other cached clients in-process.
"""
timeout = 3847.291
handler = _get_httpx_client(params={"timeout": timeout})
try:
assert isinstance(handler, HTTPHandler)
assert handler.client.timeout == httpx.Timeout(timeout)
finally:
handler.close()


def test_get_httpx_client_applies_httpx_timeout_object_without_mocking_handler():
t = httpx.Timeout(40.0, connect=5.0)
handler = _get_httpx_client(params={"timeout": t})
try:
assert handler.client.timeout == t
finally:
handler.close()
Loading
Loading