-
Notifications
You must be signed in to change notification settings - Fork 221
Health metrics (Part 2) #2796
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
un-def
merged 8 commits into
dstackai:master
from
Nadine-H:nadine/2736_add-custom-health-metrics
Jun 26, 2025
Merged
Health metrics (Part 2) #2796
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
6983f2f
Add basic http metrics
Nadine-H 97aac8f
Implement custom http metrics
Nadine-H ecc2ad7
Update docs
Nadine-H 36300d4
Add custom health metrics
Nadine-H f197327
Add prometheus wrapper class
Nadine-H 7227265
Apply PR comments
Nadine-H 1583387
Rename modules
un-def 2ff253d
Merge remote-tracking branch 'origin/master' into nadine/2736_add-cus…
un-def File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
52 changes: 52 additions & 0 deletions
52
src/dstack/_internal/server/services/prometheus/client_metrics.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| from prometheus_client import Counter, Histogram | ||
|
|
||
|
|
||
| class RunMetrics: | ||
| """Wrapper class for run-related Prometheus metrics.""" | ||
|
|
||
| def __init__(self): | ||
| self._submit_to_provision_duration = Histogram( | ||
| "dstack_submit_to_provision_duration_seconds", | ||
| "Time from when a run has been submitted and first job provisioning", | ||
| # Buckets optimized for percentile calculation | ||
| buckets=[ | ||
| 15, | ||
| 30, | ||
| 45, | ||
| 60, | ||
| 90, | ||
| 120, | ||
| 180, | ||
| 240, | ||
| 300, | ||
| 360, | ||
| 420, | ||
| 480, | ||
| 540, | ||
| 600, | ||
| 900, | ||
| 1200, | ||
| 1800, | ||
| float("inf"), | ||
| ], | ||
| labelnames=["project_name", "run_type"], | ||
| ) | ||
|
|
||
| self._pending_runs_total = Counter( | ||
| "dstack_pending_runs_total", | ||
| "Number of pending runs", | ||
| labelnames=["project_name", "run_type"], | ||
| ) | ||
|
|
||
| def log_submit_to_provision_duration( | ||
| self, duration_seconds: float, project_name: str, run_type: str | ||
| ): | ||
| self._submit_to_provision_duration.labels( | ||
| project_name=project_name, run_type=run_type | ||
| ).observe(duration_seconds) | ||
|
|
||
| def increment_pending_runs(self, project_name: str, run_type: str): | ||
| self._pending_runs_total.labels(project_name=project_name, run_type=run_type).inc() | ||
|
|
||
|
|
||
| run_metrics = RunMetrics() |
File renamed without changes.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
45 changes: 45 additions & 0 deletions
45
src/tests/_internal/server/services/prometheus/test_client_metrics.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| from unittest.mock import MagicMock | ||
|
|
||
| from dstack._internal.server.services.prometheus.client_metrics import run_metrics | ||
|
|
||
|
|
||
| class TestRunMetrics: | ||
| def test_log_submit_to_provision_duration(self, monkeypatch): | ||
| mock_histogram = MagicMock() | ||
| mock_labels = MagicMock() | ||
| mock_histogram.labels.return_value = mock_labels | ||
| monkeypatch.setattr(run_metrics, "_submit_to_provision_duration", mock_histogram) | ||
|
|
||
| duration = 120.5 | ||
| project_name = "test-project" | ||
| run_type = "dev" | ||
|
|
||
| run_metrics.log_submit_to_provision_duration(duration, project_name, run_type) | ||
|
|
||
| mock_histogram.labels.assert_called_once_with(project_name=project_name, run_type=run_type) | ||
| mock_labels.observe.assert_called_once_with(duration) | ||
|
|
||
| def test_increment_pending_runs(self, monkeypatch): | ||
| mock_counter = MagicMock() | ||
| mock_labels = MagicMock() | ||
| mock_counter.labels.return_value = mock_labels | ||
|
|
||
| monkeypatch.setattr(run_metrics, "_pending_runs_total", mock_counter) | ||
|
|
||
| project_name = "test-project" | ||
| run_type = "train" | ||
|
|
||
| run_metrics.increment_pending_runs(project_name, run_type) | ||
| mock_counter.labels.assert_called_once_with(project_name=project_name, run_type=run_type) | ||
| mock_labels.inc.assert_called_once() | ||
|
|
||
| def test_multiple_calls_to_log_submit_to_provision_duration(self): | ||
| run_metrics.log_submit_to_provision_duration(60.0, "project1", "dev") | ||
| run_metrics.log_submit_to_provision_duration(120.0, "project1", "prod") | ||
| run_metrics.log_submit_to_provision_duration(30.0, "project2", "dev") | ||
|
|
||
| def test_multiple_calls_to_increment_pending_runs(self): | ||
| run_metrics.increment_pending_runs("project1", "dev") | ||
| run_metrics.increment_pending_runs("project1", "prod") | ||
| run_metrics.increment_pending_runs("project2", "dev") | ||
| run_metrics.increment_pending_runs("project1", "dev") |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.