aws-samples
diff --git a/‎backend/app/api/admin/endpoints/tokens.py‎
Lines changed: 29 additions & 8 deletions b/‎backend/app/api/admin/endpoints/tokens.py‎
Lines changed: 29 additions & 8 deletions
diff --git a/‎backend/app/services/bedrock.py‎
Lines changed: 46 additions & 11 deletions b/‎backend/app/services/bedrock.py‎
Lines changed: 46 additions & 11 deletions
diff --git a/‎backend/app/services/token.py‎
Lines changed: 4 additions & 5 deletions b/‎backend/app/services/token.py‎
Lines changed: 4 additions & 5 deletions
@@ -7,7 +7,7 @@
 from typing import List
 
 from fastapi import APIRouter, Depends, HTTPException, status
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 from sqlalchemy import func, select
 from sqlalchemy.ext.asyncio import AsyncSession
 
@@ -58,16 +58,27 @@ class CreateTokenRequest(BaseModel):
 
 
 class BatchCreateTokenRequest(BaseModel):
-    """Batch create tokens request."""
+    """Batch create tokens request.
 
-    count: int = Field(ge=1, le=100)
-    name_prefix: str
+    ``names`` is a comma-separated string of token names (e.g. "alice, bob, charlie").
+    Whitespace around each name is automatically trimmed and empty entries are ignored.
+    """
+
+    names: str
     expires_at: datetime | None = None
     quota_usd: Decimal | None = None
     allowed_ips: List[str] | None = None
     token_metadata: dict | None = None
     model_names: List[str] | None = None
 
+    def parsed_names(self) -> List[str]:
+        """Parse comma-separated names. Supports ASCII comma, Chinese comma, semicolons, and newlines."""
+        import re
+
+        return [
+            n for n in (s.strip() for s in re.split(r"[,，;；\n]+", self.names)) if n
+        ]
+
 
 class UpdateTokenRequest(BaseModel):
     """Update token request."""
@@ -217,19 +228,29 @@ async def batch_create_tokens(
     """
     Batch create API tokens with optional shared model list.
 
-    - **count**: Number of tokens to create (1-100)
-    - **name_prefix**: Name prefix, tokens named {prefix}-001, {prefix}-002, ...
+    - **names**: Comma-separated token names (e.g. "alice, bob, charlie")
     - **model_names**: Optional list of model names to assign to all tokens
     """
+    names = request.parsed_names()
+    if not names:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="No valid names provided",
+        )
+    if len(names) > 100:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Too many names ({len(names)}), maximum is 100",
+        )
+
     try:
         validated_meta = validate_token_metadata(request.token_metadata)
     except ValueError as e:
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
 
     results = await token_service.create_tokens_batch(
         user_id=current_user.id,
-        count=request.count,
-        name_prefix=request.name_prefix,
+        names=names,
         expires_at=request.expires_at,
         quota_usd=request.quota_usd,
         allowed_ips=request.allowed_ips,
 
@@ -683,7 +683,7 @@ async def _try_stream_with_content_timeout(
         if use_converse:
             converse_params = self._build_converse_params(request, model_id)
         else:
-            body = self._build_anthropic_body(request)
+            body = self._build_anthropic_body(request, model_id=model_id)
             invoke_kwargs = self._build_invoke_kwargs(request, model_id)
 
         content_received = False
@@ -806,12 +806,34 @@ async def _try_stream_with_content_timeout(
 
     # ------------------------------------------------------------------
 
+    # Models that support the extended 1-hour cache TTL.
+    # Only Claude 4.5 family models support ``ttl`` in ``cache_control``;
+    # older/newer families (Claude 4, etc.) reject it with
+    # ``Extra inputs are not permitted``.
+    _EXTENDED_TTL_MODEL_PATTERNS = (
+        "claude-opus-4-5",
+        "claude-sonnet-4-5",
+        "claude-haiku-4-5",
+    )
+
+    @classmethod
+    def _model_supports_cache_ttl(cls, model_id: str | None) -> bool:
+        """Check if a model supports the extended ``ttl`` field in cache_control."""
+        if not model_id:
+            return False
+        return any(pat in model_id for pat in cls._EXTENDED_TTL_MODEL_PATTERNS)
+
     @staticmethod
-    def _new_cache_marker(ttl: str | None = None) -> dict:
-        """Create a cache_control marker with configured TTL."""
+    def _new_cache_marker(ttl: str | None = None, model_id: str | None = None) -> dict:
+        """Create a cache_control marker with configured TTL.
+
+        The ``ttl`` field is only supported by Claude 4.5 family models.
+        For unsupported models the field must be omitted, otherwise Bedrock
+        returns ``Extra inputs are not permitted``.
+        """
         cache_ttl = ttl or get_settings().PROMPT_CACHE_TTL
         marker: dict = {"type": "ephemeral"}
-        if cache_ttl != "5m":
+        if cache_ttl != "5m" and BedrockClient._model_supports_cache_ttl(model_id):
             marker["ttl"] = cache_ttl
         return marker
 
@@ -846,7 +868,9 @@ def _body_has_cache_control(body: dict) -> bool:
         return len(BedrockClient._collect_cache_blocks(body)) > 0
 
     @staticmethod
-    def _inject_prompt_cache_breakpoints(body: dict, ttl: str | None = None) -> None:
+    def _inject_prompt_cache_breakpoints(
+        body: dict, ttl: str | None = None, model_id: str | None = None
+    ) -> None:
         """Inject up to 4 cache_control breakpoints into the request body.
 
         Strategy aligned with claudecode-bedrock-proxy:
@@ -860,17 +884,24 @@ def _inject_prompt_cache_breakpoints(body: dict, ttl: str | None = None) -> None
         count against this budget.
         """
         cache_ttl = ttl or get_settings().PROMPT_CACHE_TTL
-        marker = BedrockClient._new_cache_marker(ttl=cache_ttl)
+        supports_ttl = BedrockClient._model_supports_cache_ttl(model_id)
+        marker = BedrockClient._new_cache_marker(ttl=cache_ttl, model_id=model_id)
 
         # --- Step 1: Upgrade TTL on pre-existing breakpoints ---
         existing_blocks = BedrockClient._collect_cache_blocks(body)
         upgraded = 0
-        if cache_ttl != "5m":
+        if cache_ttl != "5m" and supports_ttl:
             for block in existing_blocks:
                 cc = block.get("cache_control")
                 if isinstance(cc, dict):
                     cc["ttl"] = cache_ttl
                     upgraded += 1
+        elif not supports_ttl:
+            # Strip ttl from pre-existing breakpoints for unsupported models
+            for block in existing_blocks:
+                cc = block.get("cache_control")
+                if isinstance(cc, dict) and "ttl" in cc:
+                    del cc["ttl"]
 
         existing = len(existing_blocks)
         budget = BedrockClient.MAX_CACHE_BREAKPOINTS - existing
@@ -951,7 +982,9 @@ def _inject_prompt_cache_breakpoints(body: dict, ttl: str | None = None) -> None
             )
 
     @staticmethod
-    def _build_anthropic_body(request: BedrockRequest) -> dict:
+    def _build_anthropic_body(
+        request: BedrockRequest, model_id: str | None = None
+    ) -> dict:
         """
         Build an Anthropic Messages API request body from a BedrockRequest.
 
@@ -1019,7 +1052,9 @@ def _build_anthropic_body(request: BedrockRequest) -> dict:
             BedrockClient._body_has_cache_control(body) if should_inject else False
         )
         if should_inject and not has_cache:
-            BedrockClient._inject_prompt_cache_breakpoints(body, ttl=request.cache_ttl)
+            BedrockClient._inject_prompt_cache_breakpoints(
+                body, ttl=request.cache_ttl, model_id=model_id
+            )
 
         # --- effort parameter: requires beta flag + output_config wrapper ---
         # Users may pass "effort" as a top-level field (via additional_model_request_fields).
@@ -1425,7 +1460,7 @@ async def _invoke_inner(
                 },
             )
         else:
-            body = self._build_anthropic_body(request)
+            body = self._build_anthropic_body(request, model_id=model_id)
             invoke_kwargs = self._build_invoke_kwargs(request, model_id)
 
         max_retries = 3
@@ -1745,7 +1780,7 @@ async def _invoke_stream_inner(
                 },
             )
         else:
-            body = self._build_anthropic_body(request)
+            body = self._build_anthropic_body(request, model_id=model_id)
             invoke_kwargs = self._build_invoke_kwargs(request, model_id)
 
         max_retries = 4
 
@@ -87,16 +87,15 @@ async def create_token(
     async def create_tokens_batch(
         self,
         user_id: UUID,
-        count: int,
-        name_prefix: str,
+        names: List[str],
         expires_at: Optional[datetime] = None,
         quota_usd: Optional[Decimal] = None,
         allowed_ips: Optional[List[str]] = None,
         token_metadata: Optional[dict] = None,
         model_names: Optional[List[str]] = None,
     ) -> List[tuple[APIToken, str]]:
         """
-        Batch create API tokens with optional shared model list.
+        Batch create API tokens with explicit names and optional shared model list.
 
         All tokens are inserted in a single transaction (atomic).
 
@@ -105,14 +104,14 @@ async def create_tokens_batch(
         """
         tokens_and_keys: List[tuple[APIToken, str]] = []
 
-        for i in range(1, count + 1):
+        for name in names:
             plain_token = generate_api_token()
             token_hash = hash_token(plain_token)
             encrypted = encrypt_token(plain_token)
 
             token = APIToken(
                 user_id=user_id,
-                name=f"{name_prefix}-{i:03d}",
+                name=name,
                 token_hash=token_hash,
                 encrypted_token=encrypted,
                 expires_at=expires_at,