BerriAI · ishaan-berri · Apr 16, 2026 · Apr 12, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/litellm/constants.py b/litellm/constants.py
@@ -413,7 +413,20 @@
 )
 DEFAULT_MAX_TOKENS_FOR_TRITON = int(os.getenv("DEFAULT_MAX_TOKENS_FOR_TRITON", 2000))
 #### Networking settings ####
-request_timeout: float = float(os.getenv("REQUEST_TIMEOUT", 6000))  # time in seconds
+# Sentinel used when `REQUEST_TIMEOUT` is unset: `litellm.request_timeout` keeps this
+# value so longer-running surfaces (Router `timeout or litellm.request_timeout`,
+# speech/TTS, responses, vector stores, etc.) get a long HTTP deadline. Chat
+# `completion()` maps this sentinel down to 600s when the caller did not set a
+# per-request/model timeout—see ``CompletionTimeout.resolve`` in completion_timeout.py. MCP uses
+# dedicated timeouts (e.g. `MCP_CLIENT_TIMEOUT`), not `request_timeout`.
+DEFAULT_REQUEST_TIMEOUT_SECONDS: float = 6000.0
+# Pair used for default httpx clients when no custom timeout is passed: read/write
+# deadline and connect handshake (see ``http_handler`` cached handler paths).
+COMPLETION_HTTP_FALLBACK_SECONDS: float = 600.0
+HTTP_HANDLER_CONNECT_TIMEOUT_SECONDS: float = 5.0
+request_timeout: float = float(
+    os.getenv("REQUEST_TIMEOUT", str(int(DEFAULT_REQUEST_TIMEOUT_SECONDS)))
+)
 DEFAULT_A2A_AGENT_TIMEOUT: float = float(
     os.getenv("DEFAULT_A2A_AGENT_TIMEOUT", 6000)
 )  # 10 minutes

diff --git a/litellm/litellm_core_utils/completion_timeout.py b/litellm/litellm_core_utils/completion_timeout.py
@@ -0,0 +1,83 @@
+"""Completion HTTP timeout resolution (kept out of ``main.py`` to limit import cycles)."""
+
+from __future__ import annotations
+
+from typing import Callable, Optional, Union
+
+import httpx
+
+from litellm.constants import (
+    COMPLETION_HTTP_FALLBACK_SECONDS,
+    DEFAULT_REQUEST_TIMEOUT_SECONDS,
+)
+
+
+class CompletionTimeout:
+    """Resolves HTTP timeout for ``completion()`` from model vs global settings."""
+
+    @staticmethod
+    def _fallback_when_no_explicit_timeout(
+        global_timeout: Optional[Union[float, str]],
+    ) -> float:
+        """
+        Used when ``model_timeout`` and kwargs timeouts are all unset.
+
+        ``global_timeout`` is :attr:`litellm.request_timeout` (numeric / string), not
+        :class:`httpx.Timeout`.
+
+        If it equals :data:`~litellm.constants.DEFAULT_REQUEST_TIMEOUT_SECONDS` (6000),
+        return :data:`~litellm.constants.COMPLETION_HTTP_FALLBACK_SECONDS`. Same if
+        ``None``. Otherwise return ``float(global_timeout)``.
+        """
+        if global_timeout is None:
+            return COMPLETION_HTTP_FALLBACK_SECONDS
+        if float(global_timeout) == float(DEFAULT_REQUEST_TIMEOUT_SECONDS):
+            return COMPLETION_HTTP_FALLBACK_SECONDS
+        return float(global_timeout)
+
+    @staticmethod
+    def resolve(
+        model_timeout: Optional[Union[float, str, httpx.Timeout]],
+        kwargs: dict,
+        custom_llm_provider: str,
+        *,
+        global_timeout: Optional[Union[float, str]],
+        supports_httpx_timeout: Callable[[str], bool],
+    ) -> Union[float, httpx.Timeout]:
+        """
+        Resolution order (first non-None wins):
+
+        1. ``model_timeout`` (call argument / merged ``litellm_params``)
+        2. ``kwargs["timeout"]``
+        3. ``kwargs["request_timeout"]``
+        4. Fallback from ``global_timeout`` (:attr:`litellm.request_timeout`) — if it is
+           the package default (6000), use 600 instead.
+
+        Coerce :class:`httpx.Timeout` when the provider does not support it.
+        Explicit ``6000`` on the model or in kwargs is kept as ``6000``.
+        """
+        resolved: Union[float, str, httpx.Timeout]
+        if model_timeout is not None:
+            resolved = model_timeout
+        elif kwargs.get("timeout") is not None:
+            resolved = kwargs["timeout"]
+        elif kwargs.get("request_timeout") is not None:
+            resolved = kwargs["request_timeout"]
+        else:
+            resolved = CompletionTimeout._fallback_when_no_explicit_timeout(
+                global_timeout
+            )
+
+        if isinstance(resolved, httpx.Timeout) and not supports_httpx_timeout(
+            custom_llm_provider
+        ):
+            read_timeout = resolved.read
+            resolved = (
+                float(read_timeout)
+                if read_timeout is not None
+                else COMPLETION_HTTP_FALLBACK_SECONDS
+            )  # default 10 min timeout
+        elif not isinstance(resolved, httpx.Timeout):
+            resolved = float(resolved)  # type: ignore
+
+        return resolved
diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py
@@ -30,7 +30,9 @@
     AIOHTTP_KEEPALIVE_TIMEOUT,
     AIOHTTP_NEEDS_CLEANUP_CLOSED,
     AIOHTTP_TTL_DNS_CACHE,
+    COMPLETION_HTTP_FALLBACK_SECONDS,
     DEFAULT_SSL_CIPHERS,
+    HTTP_HANDLER_CONNECT_TIMEOUT_SECONDS,
 )
 from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
 from litellm.types.llms.custom_http import *
@@ -70,7 +72,10 @@ def get_default_headers() -> dict:
 headers = get_default_headers()
 
 # https://www.python-httpx.org/advanced/timeouts
-_DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0)
+_DEFAULT_TIMEOUT = httpx.Timeout(
+    timeout=COMPLETION_HTTP_FALLBACK_SECONDS,
+    connect=HTTP_HANDLER_CONNECT_TIMEOUT_SECONDS,
+)
 
 
 def _prepare_request_data_and_content(
@@ -1244,7 +1249,7 @@ def get_async_httpx_client(
         _new_client = AsyncHTTPHandler(**handler_params)
     else:
         _new_client = AsyncHTTPHandler(
-            timeout=httpx.Timeout(timeout=600.0, connect=5.0),
+            timeout=_DEFAULT_TIMEOUT,
             shared_session=shared_session,
         )
 
@@ -1293,7 +1298,7 @@ def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
         }
         _new_client = HTTPHandler(**handler_params)
     else:
-        _new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
+        _new_client = HTTPHandler(timeout=_DEFAULT_TIMEOUT)
 
     cache.set_cache(
         key=_cache_key_name,

diff --git a/litellm/main.py b/litellm/main.py
@@ -76,6 +76,7 @@
     calculate_request_duration,
     get_audio_file_for_health_check,
 )
+from litellm.litellm_core_utils.completion_timeout import CompletionTimeout
 from litellm.litellm_core_utils.dd_tracing import tracer
 from litellm.litellm_core_utils.get_provider_specific_headers import (
     ProviderSpecificHeaderUtils,
@@ -1400,14 +1401,13 @@ def completion(  # type: ignore # noqa: PLR0915
             )  # support region-based pricing for bedrock
 
         ### TIMEOUT LOGIC ###
-        timeout = timeout or kwargs.get("request_timeout", 600) or 600
-        # set timeout for 10 minutes by default
-        if isinstance(timeout, httpx.Timeout) and not supports_httpx_timeout(
-            custom_llm_provider
-        ):
-            timeout = timeout.read or 600  # default 10 min timeout
-        elif not isinstance(timeout, httpx.Timeout):
-            timeout = float(timeout)  # type: ignore
+        timeout = CompletionTimeout.resolve(
+            timeout,
+            kwargs,
+            custom_llm_provider,
+            global_timeout=getattr(litellm, "request_timeout", None),
+            supports_httpx_timeout=supports_httpx_timeout,
+        )
 
         ### REGISTER CUSTOM MODEL PRICING -- IF GIVEN ###
         if (

diff --git a/tests/llm_translation/test_azure_openai.py b/tests/llm_translation/test_azure_openai.py
@@ -5,6 +5,7 @@
     0, os.path.abspath("../../")
 )  # Adds the parent directory to the system path
 
+import httpx
 import pytest
 from litellm.llms.azure.common_utils import process_azure_headers
 from httpx import Headers

diff --git a/tests/local_testing/test_azure_anthropic_sync_post.py b/tests/local_testing/test_azure_anthropic_sync_post.py
@@ -0,0 +1,46 @@
+"""
+``_get_httpx_client`` + ``HTTPHandler.post`` (same pattern as Azure Anthropic sync path:
+``_get_httpx_client(params={"timeout": ...})`` then ``post(..., timeout=...)``).
+
+Uses https://httpbin.org/delay/10 with ``timeout=5`` — the handler must raise :class:`~litellm.exceptions.Timeout`
+before the 10s delay completes. Skips if httpbin is unreachable.
+
+Lives under ``local_testing`` (not ``make test-unit``).
+"""
+
+import json
+import os
+import sys
+
+import httpx
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
+)
+
+from litellm.exceptions import Timeout as LitellmTimeout
+from litellm.llms.custom_httpx.http_handler import _get_httpx_client
+
+_HTTPBIN_DELAY_S = 10
+_PER_REQUEST_TIMEOUT_S = 5.0
+_CLIENT_DEFAULT_TIMEOUT_S = 60.0
+
+
+def test_post_delay_exceeds_per_request_timeout_raises():
+    try:
+        httpx.get("https://httpbin.org/get", timeout=5.0)
+    except Exception as e:
+        pytest.skip(f"httpbin.org unreachable: {e}")
+
+    handler = _get_httpx_client(params={"timeout": _CLIENT_DEFAULT_TIMEOUT_S})
+    try:
+        with pytest.raises(LitellmTimeout):
+            handler.post(
+                f"https://httpbin.org/delay/{_HTTPBIN_DELAY_S}",
+                headers={"content-type": "application/json"},
+                data=json.dumps({"model": "claude", "messages": []}),
+                timeout=_PER_REQUEST_TIMEOUT_S,
+            )
+    finally:
+        handler.close()
diff --git a/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_handler.py b/tests/test_litellm/llms/azure_ai/claude/test_azure_anthropic_handler.py
@@ -222,5 +222,7 @@ def test_completion_non_streaming(self, mock_azure_config, mock_provider_manager
 
         # Verify non-streaming was handled
         mock_client.post.assert_called_once()
+        mock_get_client.assert_called_once_with(params={"timeout": timeout})
+        assert mock_client.post.call_args.kwargs["timeout"] == timeout
         assert result is not None
 
diff --git a/tests/test_litellm/llms/azure_ai/claude/test_main_azure_anthropic_timeout.py b/tests/test_litellm/llms/azure_ai/claude/test_main_azure_anthropic_timeout.py
@@ -0,0 +1,42 @@
+"""
+Ensure litellm.completion() forwards timeout to Azure Anthropic handler (main.py dispatch).
+"""
+
+import os
+import sys
+from unittest.mock import MagicMock, patch
+
+sys.path.insert(
+    0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../../.."))
+)
+
+from litellm import completion
+from litellm.types.utils import ModelResponse
+
+
+def test_main_azure_ai_claude_completion_passes_timeout_to_azure_anthropic_handler():
+    captured: dict = {}
+
+    def fake_azure_anthropic_completion(**kwargs):
+        captured.update(kwargs)
+        return ModelResponse()
+
+    with patch(
+        "litellm.main.azure_anthropic_chat_completions"
+    ) as mock_azure_anthropic:
+        mock_azure_anthropic.completion = MagicMock(
+            side_effect=fake_azure_anthropic_completion
+        )
+
+        completion(
+            model="azure_ai/claude-sonnet-4-5",
+            messages=[{"role": "user", "content": "hi"}],
+            api_base="https://example.services.ai.azure.com/anthropic",
+            api_key="test-key",
+            timeout=42.5,
+        )
+
+    mock_azure_anthropic.completion.assert_called_once()
+    assert captured["timeout"] == 42.5
+    assert captured["model"] == "claude-sonnet-4-5"
+    assert captured["custom_llm_provider"] == "azure_ai"
diff --git a/tests/test_litellm/llms/custom_httpx/test_http_handler.py b/tests/test_litellm/llms/custom_httpx/test_http_handler.py
@@ -15,7 +15,12 @@
 )  # Adds the parent directory to the system path
 import litellm
 from litellm.llms.custom_httpx.aiohttp_transport import LiteLLMAiohttpTransport
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, get_ssl_configuration
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+    _get_httpx_client,
+    get_ssl_configuration,
+)
 
 
 @pytest.mark.asyncio
@@ -658,3 +663,26 @@ async def test_httpx_handler_uses_env_user_agent(monkeypatch):
         assert req.headers.get("User-Agent") == "Claude Code"
     finally:
         await handler.close()
+
+
+def test_get_httpx_client_applies_float_timeout_without_mocking_handler():
+    """
+    Exercise real _get_httpx_client + HTTPHandler: params={'timeout': x} must reach httpx.Client(timeout=...).
+    Uses an uncommon timeout value to avoid colliding with other cached clients in-process.
+    """
+    timeout = 3847.291
+    handler = _get_httpx_client(params={"timeout": timeout})
+    try:
+        assert isinstance(handler, HTTPHandler)
+        assert handler.client.timeout == httpx.Timeout(timeout)
+    finally:
+        handler.close()
+
+
+def test_get_httpx_client_applies_httpx_timeout_object_without_mocking_handler():
+    t = httpx.Timeout(40.0, connect=5.0)
+    handler = _get_httpx_client(params={"timeout": t})
+    try:
+        assert handler.client.timeout == t
+    finally:
+        handler.close()