@@ -683,7 +683,7 @@ async def _try_stream_with_content_timeout(
683683 if use_converse :
684684 converse_params = self ._build_converse_params (request , model_id )
685685 else :
686- body = self ._build_anthropic_body (request )
686+ body = self ._build_anthropic_body (request , model_id = model_id )
687687 invoke_kwargs = self ._build_invoke_kwargs (request , model_id )
688688
689689 content_received = False
@@ -806,12 +806,34 @@ async def _try_stream_with_content_timeout(
806806
807807 # ------------------------------------------------------------------
808808
809+ # Models that support the extended 1-hour cache TTL.
810+ # Only Claude 4.5 family models support ``ttl`` in ``cache_control``;
811+ # older/newer families (Claude 4, etc.) reject it with
812+ # ``Extra inputs are not permitted``.
813+ _EXTENDED_TTL_MODEL_PATTERNS = (
814+ "claude-opus-4-5" ,
815+ "claude-sonnet-4-5" ,
816+ "claude-haiku-4-5" ,
817+ )
818+
819+ @classmethod
820+ def _model_supports_cache_ttl (cls , model_id : str | None ) -> bool :
821+ """Check if a model supports the extended ``ttl`` field in cache_control."""
822+ if not model_id :
823+ return False
824+ return any (pat in model_id for pat in cls ._EXTENDED_TTL_MODEL_PATTERNS )
825+
809826 @staticmethod
810- def _new_cache_marker (ttl : str | None = None ) -> dict :
811- """Create a cache_control marker with configured TTL."""
827+ def _new_cache_marker (ttl : str | None = None , model_id : str | None = None ) -> dict :
828+ """Create a cache_control marker with configured TTL.
829+
830+ The ``ttl`` field is only supported by Claude 4.5 family models.
831+ For unsupported models the field must be omitted, otherwise Bedrock
832+ returns ``Extra inputs are not permitted``.
833+ """
812834 cache_ttl = ttl or get_settings ().PROMPT_CACHE_TTL
813835 marker : dict = {"type" : "ephemeral" }
814- if cache_ttl != "5m" :
836+ if cache_ttl != "5m" and BedrockClient . _model_supports_cache_ttl ( model_id ) :
815837 marker ["ttl" ] = cache_ttl
816838 return marker
817839
@@ -846,7 +868,9 @@ def _body_has_cache_control(body: dict) -> bool:
846868 return len (BedrockClient ._collect_cache_blocks (body )) > 0
847869
848870 @staticmethod
849- def _inject_prompt_cache_breakpoints (body : dict , ttl : str | None = None ) -> None :
871+ def _inject_prompt_cache_breakpoints (
872+ body : dict , ttl : str | None = None , model_id : str | None = None
873+ ) -> None :
850874 """Inject up to 4 cache_control breakpoints into the request body.
851875
852876 Strategy aligned with claudecode-bedrock-proxy:
@@ -860,17 +884,24 @@ def _inject_prompt_cache_breakpoints(body: dict, ttl: str | None = None) -> None
860884 count against this budget.
861885 """
862886 cache_ttl = ttl or get_settings ().PROMPT_CACHE_TTL
863- marker = BedrockClient ._new_cache_marker (ttl = cache_ttl )
887+ supports_ttl = BedrockClient ._model_supports_cache_ttl (model_id )
888+ marker = BedrockClient ._new_cache_marker (ttl = cache_ttl , model_id = model_id )
864889
865890 # --- Step 1: Upgrade TTL on pre-existing breakpoints ---
866891 existing_blocks = BedrockClient ._collect_cache_blocks (body )
867892 upgraded = 0
868- if cache_ttl != "5m" :
893+ if cache_ttl != "5m" and supports_ttl :
869894 for block in existing_blocks :
870895 cc = block .get ("cache_control" )
871896 if isinstance (cc , dict ):
872897 cc ["ttl" ] = cache_ttl
873898 upgraded += 1
899+ elif not supports_ttl :
900+ # Strip ttl from pre-existing breakpoints for unsupported models
901+ for block in existing_blocks :
902+ cc = block .get ("cache_control" )
903+ if isinstance (cc , dict ) and "ttl" in cc :
904+ del cc ["ttl" ]
874905
875906 existing = len (existing_blocks )
876907 budget = BedrockClient .MAX_CACHE_BREAKPOINTS - existing
@@ -951,7 +982,9 @@ def _inject_prompt_cache_breakpoints(body: dict, ttl: str | None = None) -> None
951982 )
952983
953984 @staticmethod
954- def _build_anthropic_body (request : BedrockRequest ) -> dict :
985+ def _build_anthropic_body (
986+ request : BedrockRequest , model_id : str | None = None
987+ ) -> dict :
955988 """
956989 Build an Anthropic Messages API request body from a BedrockRequest.
957990
@@ -1019,7 +1052,9 @@ def _build_anthropic_body(request: BedrockRequest) -> dict:
10191052 BedrockClient ._body_has_cache_control (body ) if should_inject else False
10201053 )
10211054 if should_inject and not has_cache :
1022- BedrockClient ._inject_prompt_cache_breakpoints (body , ttl = request .cache_ttl )
1055+ BedrockClient ._inject_prompt_cache_breakpoints (
1056+ body , ttl = request .cache_ttl , model_id = model_id
1057+ )
10231058
10241059 # --- effort parameter: requires beta flag + output_config wrapper ---
10251060 # Users may pass "effort" as a top-level field (via additional_model_request_fields).
@@ -1425,7 +1460,7 @@ async def _invoke_inner(
14251460 },
14261461 )
14271462 else :
1428- body = self ._build_anthropic_body (request )
1463+ body = self ._build_anthropic_body (request , model_id = model_id )
14291464 invoke_kwargs = self ._build_invoke_kwargs (request , model_id )
14301465
14311466 max_retries = 3
@@ -1745,7 +1780,7 @@ async def _invoke_stream_inner(
17451780 },
17461781 )
17471782 else :
1748- body = self ._build_anthropic_body (request )
1783+ body = self ._build_anthropic_body (request , model_id = model_id )
17491784 invoke_kwargs = self ._build_invoke_kwargs (request , model_id )
17501785
17511786 max_retries = 4
0 commit comments