dstackai
diff --git a/‎src/dstack/_internal/cli/utils/run.py‎
Lines changed: 19 additions & 19 deletions b/‎src/dstack/_internal/cli/utils/run.py‎
Lines changed: 19 additions & 19 deletions
diff --git a/‎src/dstack/_internal/core/models/configurations.py‎
Lines changed: 7 additions & 7 deletions b/‎src/dstack/_internal/core/models/configurations.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/dstack/_internal/server/background/tasks/process_runs.py‎
Lines changed: 2 additions & 0 deletions b/‎src/dstack/_internal/server/background/tasks/process_runs.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/dstack/_internal/server/services/runs.py‎
Lines changed: 67 additions & 27 deletions b/‎src/dstack/_internal/server/services/runs.py‎
Lines changed: 67 additions & 27 deletions
diff --git a/‎src/dstack/_internal/server/services/services/autoscalers.py‎
Lines changed: 1 addition & 1 deletion b/‎src/dstack/_internal/server/services/services/autoscalers.py‎
Lines changed: 1 addition & 1 deletion
@@ -119,9 +119,9 @@ def th(s: str) -> str:
     if include_run_properties:
         props.add_row(th("Configuration"), run_spec.configuration_path)
         props.add_row(th("Type"), run_spec.configuration.type)
-    
+
     from dstack._internal.core.models.configurations import ServiceConfiguration
-    
+
     if (
         include_run_properties
         and isinstance(run_spec.configuration, ServiceConfiguration)
@@ -130,21 +130,21 @@ def th(s: str) -> str:
         groups_info = []
         for group in run_spec.configuration.replica_groups:
             group_parts = [f"[cyan]{group.name}[/cyan]"]
-            
+
             if group.replicas.min == group.replicas.max:
                 group_parts.append(f"×{group.replicas.max}")
             else:
                 group_parts.append(f"×{group.replicas.min}..{group.replicas.max}")
                 group_parts.append("[dim](autoscalable)[/dim]")
-            
+
             group_parts.append(f"[dim]({group.resources.pretty_format()})[/dim]")
-            
+
             groups_info.append(" ".join(group_parts))
-        
+
         props.add_row(th("Replica groups"), "\n".join(groups_info))
     else:
         props.add_row(th("Resources"), pretty_req)
-    
+
     props.add_row(th("Spot policy"), spot_policy)
     props.add_row(th("Max price"), max_price)
     if include_run_properties:
@@ -163,27 +163,27 @@ def th(s: str) -> str:
     offers.add_column("INSTANCE TYPE", style="grey58", no_wrap=True, ratio=2)
     offers.add_column("PRICE", style="grey58", ratio=1)
     offers.add_column()
-    
+
     # For replica groups, show offers from all job plans
     if len(run_plan.job_plans) > 1:
         # Multiple jobs - aggregate offers from all groups
         all_offers = []
         groups_with_no_offers = []
         total_offers_count = 0
-        
+
         for jp in run_plan.job_plans:
             group_name = jp.job_spec.replica_group_name or "default"
             if jp.total_offers == 0:
                 groups_with_no_offers.append(group_name)
             for offer in jp.offers[:max_offers] if max_offers else jp.offers:
                 all_offers.append((group_name, offer))
             total_offers_count += jp.total_offers
-        
+
         # Sort by price
         all_offers.sort(key=lambda x: x[1].price)
         if max_offers:
             all_offers = all_offers[:max_offers]
-        
+
         # Show groups with no offers FIRST
         for group_name in groups_with_no_offers:
             offers.add_row(
@@ -196,7 +196,7 @@ def th(s: str) -> str:
                 "",
                 style="secondary",
             )
-        
+
         # Then show groups with offers
         for i, (group_name, offer) in enumerate(all_offers, start=1):
             r = offer.instance.resources
@@ -212,10 +212,10 @@ def th(s: str) -> str:
             instance = offer.instance.name
             if offer.total_blocks > 1:
                 instance += f" ({offer.blocks}/{offer.total_blocks})"
-            
+
             # Add group name prefix for multi-group display
             backend_display = f"[cyan]{group_name}[/cyan]: {offer.backend.replace('remote', 'ssh')} ({offer.region})"
-            
+
             offers.add_row(
                 f"{i}",
                 backend_display,
@@ -225,7 +225,7 @@ def th(s: str) -> str:
                 availability,
                 style=None if i == 1 or not include_run_properties else "secondary",
             )
-        
+
         if total_offers_count > len(all_offers):
             offers.add_row("", "...", style="secondary")
     else:
@@ -260,14 +260,14 @@ def th(s: str) -> str:
 
     console.print(props)
     console.print()
-    
+
     # Check if we have offers to display
     has_offers = False
     if len(run_plan.job_plans) > 1:
         has_offers = any(len(jp.offers) > 0 for jp in run_plan.job_plans)
     else:
         has_offers = len(job_plan.offers) > 0
-    
+
     if has_offers:
         console.print(offers)
         # Show summary for multi-job plans
@@ -343,12 +343,12 @@ def get_runs_table(
             if verbose and latest_job_submission.inactivity_secs:
                 inactive_for = format_duration_multiunit(latest_job_submission.inactivity_secs)
                 status += f" (inactive for {inactive_for})"
-            
+
             job_name_parts = [f"  replica={job.job_spec.replica_num}"]
             if job.job_spec.replica_group_name:
                 job_name_parts.append(f"[cyan]group={job.job_spec.replica_group_name}[/cyan]")
             job_name_parts.append(f"job={job.job_spec.job_num}")
-            
+
             job_row: Dict[Union[str, int], Any] = {
                 "NAME": " ".join(job_name_parts)
                 + (
 
@@ -846,32 +846,32 @@ def validate_gateway(
     def validate_replica_groups_xor_replicas(cls, values):
         replica_groups = values.get("replica_groups")
         replicas = values.get("replicas")
-        
+
         # Check if user specified both
         has_groups = replica_groups is not None
         has_replicas = replicas != Range[int](min=1, max=1)
-        
+
         if has_groups and has_replicas:
             raise ValueError("Cannot specify both 'replicas' and 'replica_groups'")
-        
+
         if has_groups:
             # Validate unique names
             names = [g.name for g in replica_groups]
             if len(names) != len(set(names)):
                 raise ValueError("Replica group names must be unique")
-            
+
             # Validate at least one group
             if not replica_groups:
                 raise ValueError("replica_groups cannot be empty")
-        
+
         return values
 
     @root_validator()
     def validate_scaling(cls, values):
         scaling = values.get("scaling")
         replicas = values.get("replicas")
         replica_groups = values.get("replica_groups")
-        
+
         if replica_groups:
             # Check if any group has a range
             has_range = any(g.replicas.min != g.replicas.max for g in replica_groups)
@@ -883,7 +883,7 @@ def validate_scaling(cls, values):
             raise ValueError("When you set `replicas` to a range, ensure to specify `scaling`.")
         elif replicas and replicas.min == replicas.max and scaling:
             raise ValueError("To use `scaling`, `replicas` must be set to a range.")
-        
+
         return values
 
     @validator("rate_limits")
 
@@ -484,6 +484,7 @@ async def _handle_run_replicas(
                 session,
                 run_model,
                 replicas_diff=max_replica_count - non_terminated_replica_count,
+                allow_exceeding_max=True,  # Allow exceeding max for rolling deployments
             )
 
         replicas_to_stop_count = 0
@@ -510,6 +511,7 @@ async def _handle_run_replicas(
                 session,
                 run_model,
                 replicas_diff=-replicas_to_stop_count,
+                allow_exceeding_max=True,  # Allow terminating out-of-date replicas during rolling deployment
             )
 
 
 
@@ -340,7 +340,7 @@ async def get_plan(
                 action = ApplyAction.UPDATE
 
     secrets = await get_project_secrets_mapping(session=session, project=project)
-    
+
     # For services with replica groups, create jobs for all groups during planning
     jobs = []
     if (
@@ -407,12 +407,12 @@ async def get_plan(
     job_plans = []
     for job in jobs:
         job_offers: List[InstanceOfferWithAvailability] = []
-        
+
         # Filter pool offers to match this job's GPU requirements
         gpu_req = None
         if job.job_spec.requirements.resources and job.job_spec.requirements.resources.gpu:
             gpu_req = job.job_spec.requirements.resources.gpu.name
-        
+
         matching_pool_offers = []
         for pool_offer in pool_offers:
             offer_gpus = pool_offer.instance.resources.gpus
@@ -424,9 +424,9 @@ async def get_plan(
             elif not gpu_req:
                 # No GPU requirement, include all pool offers
                 matching_pool_offers.append(pool_offer)
-        
+
         job_offers.extend(matching_pool_offers)
-        
+
         # Use shared offers if all jobs are identical, otherwise fetch per-job
         if shared_offers:
             job_offers.extend(offer for _, offer in shared_offers)
@@ -443,7 +443,7 @@ async def get_plan(
                 instance_mounts=check_run_spec_requires_instance_mounts(effective_run_spec),
             )
             job_offers.extend(offer for _, offer in job_specific_offers)
-        
+
         job_offers.sort(key=lambda offer: not offer.availability.is_available())
 
         job_spec = job.job_spec
@@ -1287,7 +1287,21 @@ async def process_terminating_run(session: AsyncSession, run_model: RunModel):
         )
 
 
-async def scale_run_replicas(session: AsyncSession, run_model: RunModel, replicas_diff: int):
+async def scale_run_replicas(
+    session: AsyncSession,
+    run_model: RunModel,
+    replicas_diff: int,
+    allow_exceeding_max: bool = False,
+):
+    """
+    Scale run replicas up or down.
+
+    Args:
+        session: Database session
+        run_model: The run to scale
+        replicas_diff: Number of replicas to add (positive) or remove (negative)
+        allow_exceeding_max: If True, allow scaling beyond configured max (for rolling deployments)
+    """
     if replicas_diff == 0:
         # nothing to do
         return
@@ -1349,9 +1363,10 @@ async def scale_run_replicas(session: AsyncSession, run_model: RunModel, replica
         # Get group minimums
         group_mins = {g.name: g.replicas.min for g in normalized_groups}
 
-        # Terminate from end (reversed), but skip if group not autoscalable or at minimum
+        # Terminate from end (reversed)
+        # For rolling deployments (allow_exceeding_max), prioritize terminating out-of-date replicas
         terminated_count = 0
-        for _, _, _, replica_jobs in reversed(active_replicas):
+        for _, is_out_of_date, _, replica_jobs in reversed(active_replicas):
             if terminated_count >= abs(replicas_diff):
                 break
 
@@ -1360,7 +1375,19 @@ async def scale_run_replicas(session: AsyncSession, run_model: RunModel, replica
 
             group_name = replica_jobs[0].replica_group_name or "default"
 
-            # Skip if not autoscalable
+            # For rolling deployment, allow terminating any out-of-date replica
+            if allow_exceeding_max and is_out_of_date:
+                # Terminate this replica (out-of-date during rolling deployment)
+                for job in replica_jobs:
+                    if not job.status.is_finished() and job.status != JobStatus.TERMINATING:
+                        job.status = JobStatus.TERMINATING
+                        job.termination_reason = JobTerminationReason.SCALED_DOWN
+
+                group_counts[group_name] -= 1
+                terminated_count += 1
+                continue
+
+            # For normal scaling, skip if not autoscalable
             if normalized_groups and group_name not in autoscalable_groups:
                 continue
 
@@ -1379,29 +1406,42 @@ async def scale_run_replicas(session: AsyncSession, run_model: RunModel, replica
             group_counts[group_name] -= 1
             terminated_count += 1
     else:
-        # SCALE UP: Choose from autoscalable groups
-        autoscalable_groups = [g for g in normalized_groups if g.replicas.min != g.replicas.max]
-
-        if normalized_groups and not autoscalable_groups:
-            # No autoscalable groups, cannot scale
-            logger.info("%s: no autoscalable groups available for scaling up", fmt(run_model))
-            return
-
-        # Count current replicas per group to respect maximums
+        # SCALE UP
+        # Count current replicas per group
         group_counts = {}
         for _, _, _, replica_jobs in active_replicas:
             if replica_jobs:
                 group_name = replica_jobs[0].replica_group_name or "default"
                 group_counts[group_name] = group_counts.get(group_name, 0) + 1
 
-        # Filter groups that haven't reached maximum
-        eligible_groups = [
-            g for g in autoscalable_groups if group_counts.get(g.name, 0) < (g.replicas.max or float("inf"))
-        ] if normalized_groups else normalized_groups
+        # First, identify groups below minimum (need to scale regardless of autoscalability)
+        below_min_groups = [
+            g for g in normalized_groups
+            if group_counts.get(g.name, 0) < (g.replicas.min or 0)
+        ]
+
+        # Then, identify autoscalable groups that can scale beyond minimum
+        autoscalable_groups = [
+            g for g in normalized_groups
+            if g.replicas.min != g.replicas.max and (
+                allow_exceeding_max or group_counts.get(g.name, 0) < (g.replicas.max or float("inf"))
+            )
+        ]
+
+        # Eligible groups are: below-min groups + autoscalable groups
+        eligible_groups = []
+        if below_min_groups:
+            eligible_groups.extend(below_min_groups)
+        elif autoscalable_groups:
+            # Only use autoscalable groups if no groups are below minimum
+            eligible_groups.extend(autoscalable_groups)
+        elif allow_exceeding_max and normalized_groups:
+            # For rolling deployments, allow exceeding max even for fixed groups
+            eligible_groups.extend(normalized_groups)
 
         if normalized_groups and not eligible_groups:
-            # All groups at maximum
-            logger.info("%s: all autoscalable groups at maximum capacity", fmt(run_model))
+            # All groups at their limits
+            logger.info("%s: all replica groups at their limits (min/max)", fmt(run_model))
             return
 
         scheduled_replicas = 0
@@ -1410,10 +1450,10 @@ async def scale_run_replicas(session: AsyncSession, run_model: RunModel, replica
         for _, _, _, replica_jobs in inactive_replicas:
             if scheduled_replicas == replicas_diff:
                 break
-            # Only reuse if from autoscalable group
+            # Only reuse if from eligible group
             if replica_jobs:
                 group_name = replica_jobs[0].replica_group_name or "default"
-                if not normalized_groups or group_name in {g.name for g in autoscalable_groups}:
+                if not normalized_groups or group_name in {g.name for g in eligible_groups}:
                     await retry_run_replica_jobs(session, run_model, replica_jobs, only_failed=False)
                     scheduled_replicas += 1
 
 
@@ -131,7 +131,7 @@ def get_service_scaler(conf: ServiceConfiguration) -> BaseServiceScaler:
         assert conf.replicas.max is not None
         min_replicas = conf.replicas.min
         max_replicas = conf.replicas.max
-    
+
     if conf.scaling is None:
         return ManualScaler(
             min_replicas=min_replicas,
Original file line number	Diff line number	Diff line change
`@@ -484,6 +484,7 @@ async def _handle_run_replicas(`
`484`	`484`	`session,`
`485`	`485`	`run_model,`
`486`	`486`	`replicas_diff=max_replica_count - non_terminated_replica_count,`
	`487`	`+ allow_exceeding_max=True, # Allow exceeding max for rolling deployments`
`487`	`488`	`)`
`488`	`489`
`489`	`490`	`replicas_to_stop_count = 0`
`@@ -510,6 +511,7 @@ async def _handle_run_replicas(`
`510`	`511`	`session,`
`511`	`512`	`run_model,`
`512`	`513`	`replicas_diff=-replicas_to_stop_count,`
	`514`	`+ allow_exceeding_max=True, # Allow terminating out-of-date replicas during rolling deployment`
`513`	`515`	`)`
`514`	`516`
`515`	`517`