From be236db2a950aa55470da5aafafa0491d37d239d Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Wed, 25 Feb 2026 13:57:53 -0500 Subject: [PATCH] Bug 2036384 - Implement v2 Taskcluster route format for Github projects Adds `trust_domain` and `project` fields to the Repository model. The v1 route format only has a top-level project in the route. This is fine for hg.mozilla.org where there's a 1:1 mapping of project to branch, but in Github it makes things awkward. E.g `mozilla/enterprise-firefox` is a project that has many branches all hooked up to Treeherder, as well as pull requests. To differentiate them we need to use routes like `-`. This requires extra JSON-e logic in the .taskcluster.yml file and means we need to explicitly grant the Treeherder route scope on a case by case basis. Instead, this invents a v2 route format that handles our Github use cases better. It keys by trust_domain, project and branch rather than just project. --- tests/etl/taskcluster_pulse/test_handler.py | 250 +++++++++++++++++- .../etl/taskcluster_pulse/test_parse_route.py | 68 +++++ treeherder/etl/taskcluster_pulse/handler.py | 36 ++- .../etl/taskcluster_pulse/parse_route.py | 54 +++- .../0049_repository_trust_domain_project.py | 39 +++ treeherder/model/models.py | 3 + 6 files changed, 433 insertions(+), 17 deletions(-) create mode 100644 tests/etl/taskcluster_pulse/test_parse_route.py create mode 100644 treeherder/model/migrations/0049_repository_trust_domain_project.py diff --git a/tests/etl/taskcluster_pulse/test_handler.py b/tests/etl/taskcluster_pulse/test_handler.py index ecfe4e0ce17..d62ccda429b 100644 --- a/tests/etl/taskcluster_pulse/test_handler.py +++ b/tests/etl/taskcluster_pulse/test_handler.py @@ -1,6 +1,15 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + import pytest -from treeherder.etl.taskcluster_pulse.handler import handle_message, handle_task_defined +from treeherder.etl.taskcluster_pulse.handler import ( + PulseHandlerError, + handle_message, + handle_task_defined, + parse_route_info, +) +from treeherder.model.models import Repository, RepositoryGroup @pytest.mark.asyncio @@ -14,7 +23,7 @@ async def test_handle_message_routes_task_defined(): "created": "2025-01-01T00:00:00.000Z", "workerType": "test-worker", "tags": {}, - "routes": ["tc-treeherder.v2.autoland.abc123"], + "routes": ["tc-treeherder.v1.autoland.abc123"], "extra": { "treeherder": { "symbol": "T", @@ -36,19 +45,97 @@ async def test_handle_message_routes_task_defined(): }, } - result = await handle_message(message, task) + mock_repo = MagicMock() + mock_repo.name = "autoland" + mock_push_info = { + "version": "v1", + "project": "autoland", + "revision": "abc123", + "id": 0, + "repository": mock_repo, + } + + with patch( + "treeherder.etl.taskcluster_pulse.handler.parse_route_info", + return_value=mock_push_info, + ): + result = await handle_message(message, task) assert len(result) == 1 assert result[0]["state"] == "unscheduled" assert result[0]["result"] == "unknown" +@pytest.mark.asyncio +async def test_handle_message_routes_task_defined_v2(): + task = { + "metadata": { + "name": "test-task", + "description": "Test task", + "owner": "test@example.com", + }, + "created": "2025-01-01T00:00:00.000Z", + "workerType": "test-worker", + "tags": {}, + "routes": ["tc-treeherder.v2.firefox-ci.enterprise-firefox.enterprise-main.abc123.999"], + "extra": { + "treeherder": { + "symbol": "T", + "tier": 1, + } + }, + } + + message = { + "exchange": "exchange/taskcluster-queue/v1/task-defined", + "root_url": "https://firefox-ci-tc.services.mozilla.com", + "payload": { + "runId": 0, + "status": { + "taskId": "AJBb7wqZT6K9kz4niYAatg", + "state": "unscheduled", + "runs": [], + }, + }, + } + + mock_repo = MagicMock() + mock_repo.name = "enterprise-main" + mock_push_info = { + "version": "v2", + "trust_domain": "firefox-ci", + "project": "enterprise-firefox", + "branch": "enterprise-main", + "revision": "abc123", + "id": 999, + "repository": mock_repo, + } + + with ( + patch( + "treeherder.etl.taskcluster_pulse.handler.parse_route_info", + return_value=mock_push_info, + ), + patch( + "treeherder.etl.taskcluster_pulse.handler.ignore_task", + return_value=False, + ), + ): + result = await handle_message(message, task) + + assert len(result) == 1 + assert result[0]["state"] == "unscheduled" + assert result[0]["result"] == "unknown" + assert result[0]["origin"]["project"] == "enterprise-main" + + def test_handle_task_defined(): push_info = { "project": "autoland", "revision": "abc123", "origin": "hg.mozilla.org", "id": "12345", + "repository": SimpleNamespace(name="autoland"), } task = { @@ -85,3 +172,160 @@ def test_handle_task_defined(): assert isinstance(result, dict) assert result["buildMachine"]["name"] == "unknown" assert result["origin"]["project"] == "autoland" + + +@pytest.fixture +def autoland_repository(transactional_db): + rg = RepositoryGroup.objects.create(name="test-group", description="") + return Repository.objects.create( + name="autoland", + repository_group=rg, + dvcs_type="hg", + url="https://hg.mozilla.org/integration/autoland", + branch="default", + tc_root_url="https://firefox-ci-tc.services.mozilla.com", + ) + + +@pytest.mark.asyncio +async def test_handle_message_real_db_path(autoland_repository): + task = { + "metadata": { + "name": "test-task", + "description": "Test task", + "owner": "test@example.com", + }, + "created": "2025-01-01T00:00:00.000Z", + "workerType": "test-worker", + "tags": {}, + "routes": ["tc-treeherder.v1.autoland.abc123"], + "extra": { + "treeherder": { + "symbol": "T", + "tier": 1, + } + }, + } + + message = { + "exchange": "exchange/taskcluster-queue/v1/task-defined", + "root_url": "https://firefox-ci-tc.services.mozilla.com", + "payload": { + "runId": 0, + "status": { + "taskId": "AJBb7wqZT6K9kz4niYAatg", + "state": "unscheduled", + "runs": [], + }, + }, + } + + result = await handle_message(message, task) + + assert len(result) == 1 + assert result[0]["state"] == "unscheduled" + + +@pytest.fixture +def repository_group(db): + return RepositoryGroup.objects.create(name="test-group", description="") + + +@pytest.mark.parametrize( + "repo_name,route_suffix", + [ + ("mozilla-central", "mozilla-central.abc123def456.789"), + ("fenix", "fenix.abc123def456.456"), + ], +) +def test_parse_route_info_v1(db, repository_group, repo_name, route_suffix): + repository = Repository.objects.create( + name=repo_name, + repository_group=repository_group, + dvcs_type="hg" if repo_name == "mozilla-central" else "git", + url=f"https://example.com/{repo_name}", + branch="default" if repo_name == "mozilla-central" else "main", + tc_root_url="https://firefox-ci-tc.services.mozilla.com", + ) + + result = parse_route_info( + "tc-treeherder", "test-task", [f"tc-treeherder.v1.{route_suffix}"], {} + ) + + assert result["version"] == "v1" + assert result["project"] == repo_name + assert result["repository"] == repository + + +def test_parse_route_info_v1_with_owner(db, repository_group): + repository = Repository.objects.create( + name="fenix", + repository_group=repository_group, + dvcs_type="git", + url="https://github.com/mozilla-mobile/fenix", + branch="main", + tc_root_url="https://firefox-ci-tc.services.mozilla.com", + ) + + result = parse_route_info( + "tc-treeherder", "test-task", ["tc-treeherder.v1.mozilla-mobile/fenix.abc123def456.123"], {} + ) + + assert result["version"] == "v1" + assert result["project"] == "fenix" + assert result["repository"] == repository + + +def test_parse_route_info_v2(db, repository_group): + repository = Repository.objects.create( + name="enterprise-main", + repository_group=repository_group, + dvcs_type="git", + url="https://github.com/mozilla/enterprise-firefox", + branch="enterprise-main", + trust_domain="firefox-ci", + project="enterprise-firefox", + tc_root_url="https://firefox-ci-tc.services.mozilla.com", + ) + + result = parse_route_info( + "tc-treeherder", + "test-task", + ["tc-treeherder.v2.firefox-ci.enterprise-firefox.enterprise-main.abc123def456.999"], + {}, + ) + + assert result["version"] == "v2" + assert result["trust_domain"] == "firefox-ci" + assert result["project"] == "enterprise-firefox" + assert result["branch"] == "enterprise-main" + assert result["revision"] == "abc123def456" + assert result["id"] == 999 + assert result["repository"] == repository + + +def test_parse_route_info_repository_not_found(db): + with pytest.raises(PulseHandlerError, match="Could not find repository"): + parse_route_info( + "tc-treeherder", "test-task", ["tc-treeherder.v1.nonexistent.abc123.456"], {} + ) + + +@pytest.mark.parametrize( + "routes,expected_error", + [ + (["some.other.route", "another.route"], "Could not determine Treeherder route"), + ( + ["tc-treeherder.v1.mozilla-central.abc123.456", "tc-treeherder.v1.try.def456.789"], + "Could not determine Treeherder route", + ), + (["tc-treeherder.v1.too-short"], "Could not parse route"), + ], +) +def test_parse_route_info_invalid_routes(routes, expected_error): + task = {"taskId": "test-task-invalid"} + + with pytest.raises(PulseHandlerError) as exc_info: + parse_route_info("tc-treeherder", "test-task-invalid", routes, task) + + assert expected_error in str(exc_info.value) diff --git a/tests/etl/taskcluster_pulse/test_parse_route.py b/tests/etl/taskcluster_pulse/test_parse_route.py new file mode 100644 index 00000000000..f3c18c1eed4 --- /dev/null +++ b/tests/etl/taskcluster_pulse/test_parse_route.py @@ -0,0 +1,68 @@ +import pytest + +from treeherder.etl.taskcluster_pulse.parse_route import parse_route + + +@pytest.mark.parametrize( + "route,project,revision,id", + [ + ( + "tc-treeherder.v1.mozilla-central.abc123def456.789", + "mozilla-central", + "abc123def456", + 789, + ), + ("tc-treeherder.v1.mozilla-mobile/fenix.abc123def456.789", "fenix", "abc123def456", 789), + ("tc-treeherder.v1.try.abc123def456", "try", "abc123def456", 0), + ], +) +def test_parse_route_v1(route, project, revision, id): + result = parse_route(route) + assert result["version"] == "v1" + assert result["destination"] == "tc-treeherder" + assert result["project"] == project + assert result["revision"] == revision + assert result["id"] == id + assert "origin" not in result + + +@pytest.mark.parametrize( + "route,branch,revision,id", + [ + ( + "tc-treeherder.v2.firefox-ci.enterprise-firefox.main.abc123def456.789", + "main", + "abc123def456", + 789, + ), + ( + "tc-treeherder.v2.firefox-ci.enterprise-firefox.release.abc123def456", + "release", + "abc123def456", + 0, + ), + ], +) +def test_parse_route_v2(route, branch, revision, id): + result = parse_route(route) + assert result["version"] == "v2" + assert result["destination"] == "tc-treeherder" + assert result["trust_domain"] == "firefox-ci" + assert result["project"] == "enterprise-firefox" + assert result["branch"] == branch + assert result["revision"] == revision + assert result["id"] == id + assert "origin" not in result + + +@pytest.mark.parametrize( + "route", + [ + "tc-treeherder.v3.mozilla-central.abc123.789", + "tc-treeherder.v1.mozilla-central", + "tc-treeherder.v2.firefox-ci.enterprise-firefox.main", + ], +) +def test_parse_route_invalid(route): + with pytest.raises(ValueError): + parse_route(route) diff --git a/treeherder/etl/taskcluster_pulse/handler.py b/treeherder/etl/taskcluster_pulse/handler.py index 1bbc13da36c..dfd270cfbc5 100644 --- a/treeherder/etl/taskcluster_pulse/handler.py +++ b/treeherder/etl/taskcluster_pulse/handler.py @@ -8,10 +8,13 @@ import taskcluster import taskcluster.aio import taskcluster_urls +from asgiref.sync import sync_to_async from django.conf import settings +from django.db import connections from treeherder.etl.schema import get_json_schema from treeherder.etl.taskcluster_pulse.parse_route import parse_route +from treeherder.model.models import Repository env = environ.Env() logger = logging.getLogger(__name__) @@ -82,8 +85,28 @@ def parse_route_info(prefix, task_id, routes, task): + f"Task ID: {task_id} Routes: {routes}" ) - parsed_route = parse_route(matching_routes[0]) + try: + parsed_route = parse_route(matching_routes[0]) + except ValueError as e: + raise PulseHandlerError( + f"Could not parse route. Task ID: {task_id} Route: {matching_routes[0]}" + ) from e + + try: + if parsed_route["version"] == "v1": + repository = Repository.objects.get(name=parsed_route["project"]) + else: + repository = Repository.objects.get( + trust_domain=parsed_route["trust_domain"], + project=parsed_route["project"], + branch=parsed_route["branch"], + ) + except Repository.DoesNotExist: + raise PulseHandlerError( + f"Could not find repository for route '{matching_routes[0]}'. Task ID: {task_id}" + ) + parsed_route["repository"] = repository return parsed_route @@ -184,12 +207,15 @@ async def handle_message(message, task_definition=None): task = await async_queue.task(task_id) try: - parsed_route = parse_route_info("tc-treeherder", task_id, task["routes"], task) + parsed_route = await sync_to_async(parse_route_info)( + "tc-treeherder", task_id, task["routes"], task + ) + await sync_to_async(connections.close_all)() except PulseHandlerError as e: logger.debug("%s", str(e)) return jobs - if ignore_task(task, task_id, message["root_url"], parsed_route["project"]): + if ignore_task(task, task_id, message["root_url"], parsed_route["repository"].name): return jobs logger.debug("Message received for task %s", task_id) @@ -254,7 +280,7 @@ def build_message(push_info, task, run_id, payload): } job["origin"] = { - "project": push_info["project"], + "project": push_info["repository"].name, "revision": push_info["revision"], "id": push_info["id"], } @@ -321,7 +347,7 @@ def handle_task_defined(push_info, task, message): } job["origin"] = { - "project": push_info["project"], + "project": push_info["repository"].name, "revision": push_info["revision"], "id": push_info["id"], } diff --git a/treeherder/etl/taskcluster_pulse/parse_route.py b/treeherder/etl/taskcluster_pulse/parse_route.py index 57cfdfba4fd..7b277de1888 100644 --- a/treeherder/etl/taskcluster_pulse/parse_route.py +++ b/treeherder/etl/taskcluster_pulse/parse_route.py @@ -1,20 +1,35 @@ # Code imported from https://github.com/taskcluster/taskcluster/blob/32629c562f8d6f5a6b608a3141a8ee2e0984619f/services/treeherder/src/util/route_parser.js -# A Taskcluster routing key will be in the form: -# treeherder..|.. -# [0] Routing key prefix used for listening to only treeherder relevant messages -# [1] Routing key version -# [2] In the form of user/project for github repos and just project for hg.mozilla.org -# [3] Top level revision for the push -# [4] Pull Request ID (github) or Push Log ID (hg.mozilla.org) of the push -# Note: pushes on a branch on Github would not have a PR ID +# A Taskcluster routing key can be in two formats: +# +# v1: tc-treeherder.v1... +# v2: tc-treeherder.v2..... +# # Function extracted from # https://github.com/taskcluster/taskcluster/blob/32629c562f8d6f5a6b608a3141a8ee2e0984619f/services/treeherder/src/util/route_parser.js def parse_route(route): + parsed_route = route.split(".") + + if len(parsed_route) < 4: + raise ValueError(f"Route has too few segments: {route}") + + version = parsed_route[1] + + if version == "v1": + return parse_route_v1(parsed_route) + elif version == "v2": + return parse_route_v2(parsed_route) + else: + raise ValueError(f"Unrecognized route version '{version}': {route}") + + +def parse_route_v1(parsed_route): + """ + Parse v1 format: tc-treeherder.v1... + """ id = None parsed_project = None - parsed_route = route.split(".") project = parsed_route[2] if len(project.split("/")) == 2: @@ -27,9 +42,30 @@ def parse_route(route): push_info = { "destination": parsed_route[0], + "version": "v1", "id": int(id) if id else 0, "project": parsed_project, "revision": parsed_route[3], } return push_info + + +def parse_route_v2(parsed_route): + """ + Parse v2 format: tc-treeherder.v2..... + """ + if len(parsed_route) < 6: + raise ValueError(f"v2 route has too few segments: {'.'.join(parsed_route)}") + + push_info = { + "destination": parsed_route[0], + "version": "v2", + "trust_domain": parsed_route[2], + "project": parsed_route[3], + "branch": parsed_route[4], + "revision": parsed_route[5], + "id": int(parsed_route[6]) if len(parsed_route) > 6 else 0, + } + + return push_info diff --git a/treeherder/model/migrations/0049_repository_trust_domain_project.py b/treeherder/model/migrations/0049_repository_trust_domain_project.py new file mode 100644 index 00000000000..6a6d5855d9a --- /dev/null +++ b/treeherder/model/migrations/0049_repository_trust_domain_project.py @@ -0,0 +1,39 @@ +# Generated migration for adding trust_domain and project fields to Repository + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('model', '0048_alter_failureline_action'), + ] + + operations = [ + migrations.AddField( + model_name='repository', + name='trust_domain', + field=models.CharField( + blank=True, + db_index=True, + max_length=100, + null=True, + ), + ), + migrations.AddField( + model_name='repository', + name='project', + field=models.CharField( + blank=True, + db_index=True, + max_length=100, + null=True, + ), + ), + migrations.AddIndex( + model_name='repository', + index=models.Index( + fields=['trust_domain', 'project', 'branch'], + name='repo_v2_route_idx', + ), + ), + ] diff --git a/treeherder/model/models.py b/treeherder/model/models.py index 3b2d04fc134..c98ca9b3987 100644 --- a/treeherder/model/models.py +++ b/treeherder/model/models.py @@ -109,12 +109,15 @@ class Repository(models.Model): expire_performance_data = models.BooleanField(default=True) is_try_repo = models.BooleanField(default=False) tc_root_url = models.CharField(max_length=255, null=False, db_index=True) + trust_domain = models.CharField(max_length=100, null=True, blank=True, db_index=True) + project = models.CharField(max_length=100, null=True, blank=True, db_index=True) class Meta: db_table = "repository" verbose_name_plural = "repositories" indexes = [ models.Index(fields=["url", "active_status"], name="repo_url_active_idx"), + models.Index(fields=["trust_domain", "project", "branch"], name="repo_v2_route_idx"), ] @classmethod