Add status-specific processing interval

r4victor · r4victor · commit d513c792b878 · 2026-03-30T14:48:31.000+05:00
diff --git a/src/dstack/_internal/server/background/pipeline_tasks/base.py b/src/dstack/_internal/server/background/pipeline_tasks/base.py
@@ -255,6 +255,7 @@ async def heartbeat(self):
 
 class Fetcher(Generic[ItemT], ABC):
     _DEFAULT_FETCH_DELAYS = [0.5, 1, 2, 5]
+    """Increasing fetch delays on empty fetches to avoid frequent selects on low-activity/low-resource servers."""
 
     def __init__(
         self,
@@ -319,7 +320,15 @@ async def fetch(self, limit: int) -> list[ItemT]:
         pass
 
     def _next_fetch_delay(self, empty_fetch_count: int) -> float:
-        next_delay = self._fetch_delays[min(empty_fetch_count, len(self._fetch_delays) - 1)]
+        effective_empty_fetch_count = empty_fetch_count
+        if random.random() < 0.1:
+            # Empty fetch count can be 0 not because there are no items in the DB,
+            # but for other reasons such as waiting parent resource processing.
+            # From time to time, force minimal next delay to avoid empty results due to rare fetches.
+            effective_empty_fetch_count = 0
+        next_delay = self._fetch_delays[
+            min(effective_empty_fetch_count, len(self._fetch_delays) - 1)
+        ]
         jitter = random.random() * 0.4 - 0.2
         return next_delay * (1 + jitter)
 
diff --git a/src/dstack/_internal/server/background/pipeline_tasks/jobs_running.py b/src/dstack/_internal/server/background/pipeline_tasks/jobs_running.py
@@ -117,7 +117,7 @@ def __init__(
         workers_num: int = 20,
         queue_lower_limit_factor: float = 0.5,
         queue_upper_limit_factor: float = 2.0,
-        min_processing_interval: timedelta = timedelta(seconds=10),
+        min_processing_interval: timedelta = timedelta(seconds=5),
         lock_timeout: timedelta = timedelta(seconds=30),
         heartbeat_trigger: timedelta = timedelta(seconds=15),
     ) -> None:
@@ -196,7 +196,19 @@ async def fetch(self, limit: int) -> list[JobRunningPipelineItem]:
                             [JobStatus.PROVISIONING, JobStatus.PULLING, JobStatus.RUNNING]
                         ),
                         RunModel.status.not_in([RunStatus.TERMINATING]),
-                        JobModel.last_processed_at <= now - self._min_processing_interval,
+                        or_(
+                            # Process provisioning and pulling jobs quicker for low-latency provisioning.
+                            # Active jobs processing can be less frequent to minimize contention with `RunPipeline`.
+                            and_(
+                                JobModel.status.in_([JobStatus.PROVISIONING, JobStatus.PULLING]),
+                                JobModel.last_processed_at <= now - self._min_processing_interval,
+                            ),
+                            and_(
+                                JobModel.status.in_([JobStatus.RUNNING]),
+                                JobModel.last_processed_at
+                                <= now - self._min_processing_interval * 2,
+                            ),
+                        ),
                         or_(
                             and_(
                                 # Do not try to lock jobs if the run is waiting for the lock,
diff --git a/src/dstack/_internal/server/background/pipeline_tasks/runs/__init__.py b/src/dstack/_internal/server/background/pipeline_tasks/runs/__init__.py
@@ -55,7 +55,7 @@ def __init__(
         workers_num: int = 10,
         queue_lower_limit_factor: float = 0.5,
         queue_upper_limit_factor: float = 2.0,
-        min_processing_interval: timedelta = timedelta(seconds=10),
+        min_processing_interval: timedelta = timedelta(seconds=5),
         lock_timeout: timedelta = timedelta(seconds=30),
         heartbeat_trigger: timedelta = timedelta(seconds=15),
     ) -> None:
@@ -164,7 +164,17 @@ async def fetch(self, limit: int) -> list[RunPipelineItem]:
                             ),
                         ),
                         or_(
-                            RunModel.last_processed_at <= now - self._min_processing_interval,
+                            # Process submitted runs quicker for low-latency provisioning.
+                            # Active run processing can be less frequent to minimize contention with `JobRunningPipeline`.
+                            and_(
+                                RunModel.status == RunStatus.SUBMITTED,
+                                RunModel.last_processed_at <= now - self._min_processing_interval,
+                            ),
+                            and_(
+                                RunModel.status != RunStatus.SUBMITTED,
+                                RunModel.last_processed_at
+                                <= now - self._min_processing_interval * 2,
+                            ),
                             RunModel.last_processed_at == RunModel.submitted_at,
                         ),
                         or_(