From 51a8ce064f1323dd38dd7ca32465288cbd195877 Mon Sep 17 00:00:00 2001 From: Xavier-Do Date: Thu, 16 Apr 2026 12:27:51 +0200 Subject: [PATCH] [REF] runbot: use message queue for kill requests This will be a first step to test the message queue before switching state recomputation inside it. --- runbot/__manifest__.py | 2 +- runbot/controllers/badge.py | 8 +-- runbot/controllers/frontend.py | 1 + runbot/migrations/19.0.5.17/pre-migration.py | 3 + runbot/models/batch.py | 2 +- runbot/models/build.py | 68 +++++++++++--------- runbot/models/build_config.py | 2 +- runbot/models/host.py | 12 +++- runbot/models/runbot.py | 8 +-- runbot/security/ir.model.access.csv | 4 +- runbot/templates/utils.xml | 10 +-- runbot/tests/test_build.py | 21 +++--- runbot/views/build_views.xml | 2 +- 13 files changed, 78 insertions(+), 65 deletions(-) create mode 100644 runbot/migrations/19.0.5.17/pre-migration.py diff --git a/runbot/__manifest__.py b/runbot/__manifest__.py index 38db70949..f166684a5 100644 --- a/runbot/__manifest__.py +++ b/runbot/__manifest__.py @@ -6,7 +6,7 @@ 'author': "Odoo SA", 'website': "http://runbot.odoo.com", 'category': 'Website', - 'version': '5.16', + 'version': '5.17', 'application': True, 'depends': ['base', 'base_automation', 'website', 'auth_oauth'], 'data': [ diff --git a/runbot/controllers/badge.py b/runbot/controllers/badge.py index fa6e031ee..d0246cb21 100644 --- a/runbot/controllers/badge.py +++ b/runbot/controllers/badge.py @@ -44,13 +44,9 @@ def badge(self, name, repo_id=False, trigger_id=False, theme='default'): if not builds: state = 'testing' else: - result = builds._result_multi() - if result == 'ok': + state = 'failed' + if all(build.global_result == 'ok' for build in self): state = 'success' - elif result == 'warn': - state = 'warning' - else: - state = 'failed' etag = request.httprequest.headers.get('If-None-Match') retag = hashlib.md5(state.encode()).hexdigest() diff --git a/runbot/controllers/frontend.py b/runbot/controllers/frontend.py index a8f15e44a..fb55664ed 100644 --- a/runbot/controllers/frontend.py +++ b/runbot/controllers/frontend.py @@ -276,6 +276,7 @@ def resend_status(self, status_id=None, **kwargs): ], type='http', auth="user", methods=['POST'], csrf=False) def build_operations(self, build_id, operation, **post): build = request.env['runbot.build'].sudo().browse(build_id) + build.check_access('read') if operation == 'rebuild': build = build._rebuild() elif operation == 'kill': diff --git a/runbot/migrations/19.0.5.17/pre-migration.py b/runbot/migrations/19.0.5.17/pre-migration.py new file mode 100644 index 000000000..c21faba28 --- /dev/null +++ b/runbot/migrations/19.0.5.17/pre-migration.py @@ -0,0 +1,3 @@ +def migrate(cr, version): + cr.execute("""UPDATE runbot_build set local_result = 'killed' where local_result = 'manually_killed'""") + cr.execute("""UPDATE runbot_build set global_result = 'killed' where global_result = 'manually_killed'""") diff --git a/runbot/models/batch.py b/runbot/models/batch.py index 6cd395827..cad114cb7 100644 --- a/runbot/models/batch.py +++ b/runbot/models/batch.py @@ -154,7 +154,7 @@ def _create_build(self, params, slot): build = self.env['runbot.build'].search(domain, limit=1, order='id desc') link_type = 'matched' - killed_states = ('skipped', 'killed', 'manually_killed') + killed_states = ('skipped', 'killed') if build and build.local_result not in killed_states and build.global_result not in killed_states: if build.killable: build.killable = False diff --git a/runbot/models/build.py b/runbot/models/build.py index 340aec464..690533389 100644 --- a/runbot/models/build.py +++ b/runbot/models/build.py @@ -43,7 +43,7 @@ _logger = logging.getLogger(__name__) -result_order = ['ok', 'warn', 'ko', 'skipped', 'killed', 'manually_killed'] +result_order = ['ok', 'warn', 'ko', 'skipped', 'killed', 'manually_killed'] # TODO remove manually_killed state_order = ['pending', 'testing', 'waiting', 'running', 'done'] COPY_WHITELIST = [ @@ -297,6 +297,8 @@ class BuildResult(models.Model): local_result = fields.Selection(make_selection(result_order), string='Build Result', default='ok') requested_action = fields.Selection([('wake_up', 'To wake up'), ('deathrow', 'To kill')], string='Action requested', index=True) + to_kill = fields.Boolean('To kill', compute='_compute_to_kill') + message_ids = fields.One2many('runbot.host.message', 'build_id', string='Messages') # web infos host = fields.Char('Host name') host_id = fields.Many2one('runbot.host', string="Host", compute='_compute_host_id') @@ -394,6 +396,11 @@ def _compute_global_state(self): else: record.global_state = record.local_state + @api.depends('message_ids') + def _compute_to_kill(self): + for record in self: + record.to_kill = any(message.message == 'kill' for message in record.message_ids) + @api.depends('gc_delay', 'job_end') def _compute_gc_date(self): icp = self.env['ir.config_parameter'].sudo() @@ -543,17 +550,6 @@ def _add_child(self, param_values, orphan=False, description=False, additionnal_ **build_values, }) - def _result_multi(self): - if all(build.global_result == 'ok' or not build.global_result for build in self): - return 'ok' - if any(build.global_result in ('skipped', 'killed', 'manually_killed') for build in self): - return 'killed' - if any(build.global_result == 'ko' for build in self): - return 'ko' - if any(build.global_result == 'warning' for build in self): - return 'warning' - return 'ko' # ? - @api.depends('params_id.version_id.name') def _compute_dest(self): for build in self: @@ -826,11 +822,13 @@ def _init_pendings(self): def _process_requested_actions(self): self.ensure_one() build = self + # TODO remove, replaced by queue if build.requested_action == 'deathrow': result = None if build.local_state != 'running' and build.global_result not in ('warn', 'ko'): - result = 'manually_killed' + result = 'killed' build._kill(result=result) + build.requested_action = False return if build.requested_action == 'wake_up': @@ -1232,7 +1230,7 @@ def truncate(message, maxlenght=300000): 'line': '0', }) - def _kill(self, result=None): + def _kill(self, result='killed'): host_name = self.env['runbot.host']._get_current_name() self.ensure_one() build = self @@ -1240,30 +1238,38 @@ def _kill(self, result=None): return build._log('kill', 'Kill build %s' % build.dest) docker_stop(build._get_docker_name(), build._path()) - v = {'local_state': 'done', 'requested_action': False, 'active_step': False, 'job_end': now()} + build.local_state = 'done' + build.active_step = False + build.job_end = now() if not build.build_end: - v['build_end'] = now() + build.build_end = now() if result: - v['local_result'] = result - build.write(v) - - def _ask_kill(self, lock=True, message=None): - # if build remains in same bundle, it's ok like that - # if build can be cross bundle, need to check number of ref to build - if lock: - self.env.cr.execute("""SELECT id FROM runbot_build WHERE parent_path like %s FOR UPDATE""", ['%s%%' % self.parent_path]) + build.local_result = result + + def _ask_kill(self, message=None): self.ensure_one() user = self.env.user uid = user.id build = self message = message or 'Killing build %s, requested by %s (user #%s)' % (build.dest, user.name, uid) build._log('_ask_kill', message) - if build.local_state == 'pending': - build._skip() - elif build.local_state in ['testing', 'running']: - build.requested_action = 'deathrow' - for child in build.children_ids: - child._ask_kill(lock=False) + + self.env.cr.execute("""SELECT id, local_state FROM runbot_build WHERE parent_path like %s""", ['%s%%' % self.parent_path]) + builds = self.browse([b[0] for b in self.env.cr.fetchall()]) + pending = builds.filtered(lambda b: b.local_state == 'pending') + killable = builds.filtered(lambda b: b.local_state in ('running', 'testing')) + if pending: + pending.local_state = 'done' + pending.local_result = 'killed' + pending.flush_recordset() # faster concurrent error or lock row + + values = [{ + 'host_id': build.host_id.id, + 'build_id': build_id, + 'message': 'kill', + } for build_id in killable.ids] + + self.env['runbot.host.message'].sudo().create(values) def _wake_up(self): user = self.env.user @@ -1518,7 +1524,7 @@ def _get_color_class(self): if self.global_result == 'ok': return 'success' - if self.global_result in ('skipped', 'killed', 'manually_killed'): + if self.global_result in ('skipped', 'killed'): return 'secondary' return 'default' diff --git a/runbot/models/build_config.py b/runbot/models/build_config.py index 0c86e5eb8..9c3a5ccee 100644 --- a/runbot/models/build_config.py +++ b/runbot/models/build_config.py @@ -1027,7 +1027,7 @@ def get_reference_builds_for_versions(versions): ) if self.allow_similar_build_quick_result: - existing_done_build = next((build for build in child.params_id.build_ids.sorted('id') if build.global_state == 'done' and build.local_result not in ('skipped', 'killed', 'manually_killed')), None) + existing_done_build = next((build for build in child.params_id.build_ids.sorted('id') if build.global_state == 'done' and build.local_result not in ('skipped', 'killed')), None) if existing_done_build: child._log('', 'A similar [build](%s) has been found, marking as done directly', existing_done_build.build_url, log_type='markdown') child.local_state = 'done' diff --git a/runbot/models/host.py b/runbot/models/host.py index 7dea8fba1..72d59c555 100644 --- a/runbot/models/host.py +++ b/runbot/models/host.py @@ -341,7 +341,7 @@ def _get_builds(self, domain, order=None): return self.env['runbot.build'].search(self._get_build_domain(domain), order=order) def _process_messages(self): - self.host_message_ids._process() + return self.host_message_ids._process() class MessageQueue(models.Model): @@ -351,14 +351,20 @@ class MessageQueue(models.Model): _log_access = False create_date = fields.Datetime('Create date', default=fields.Datetime.now) - host_id = fields.Many2one('runbot.host', required=True, ondelete='cascade') - build_id = fields.Many2one('runbot.build') + host_id = fields.Many2one('runbot.host', required=True, ondelete='cascade', index=True) + build_id = fields.Many2one('runbot.build', index=True) message = fields.Char('Message') def _process(self): records = self + processed = False # todo consume messages here if records: + processed = True for record in records: + if record.message == 'kill': + if record.build_id: + record.build_id._kill('killed') self.env['runbot.runbot']._warning(f'Host {record.host_id.name} got an unexpected message {record.message}') self.unlink() + return processed diff --git a/runbot/models/runbot.py b/runbot/models/runbot.py index 047e3090b..b470e1df9 100644 --- a/runbot/models/runbot.py +++ b/runbot/models/runbot.py @@ -53,10 +53,10 @@ def _scheduler(self, host): processed += 1 build._process_requested_actions() self._commit() + if host._process_messages(): + self._commit() host._process_logs() self._commit() - host._process_messages() - self._commit() for build in host._get_builds([('local_state', 'in', ['testing', 'running'])]) | self._get_builds_to_init(host): build = build.browse(build.id) # remove preftech ids, manage build one by one result = build._schedule() @@ -113,14 +113,14 @@ def _gc_running(self, host): ][:running_max] build_ids = host._get_builds([('local_state', '=', 'running'), ('id', 'not in', cannot_be_killed_ids)], order='job_start desc').ids for build in Build.browse(build_ids)[running_max:]: - build._kill() + build._kill(None) def _gc_testing(self, host): """garbage collect builds that could be killed""" # decide if we need room Build = self.env['runbot.build'] domain_host = host._get_build_domain() - testing_builds = Build.search(domain_host + [('local_state', 'in', ['testing', 'pending']), ('requested_action', '!=', 'deathrow')]) + testing_builds = Build.search(domain_host + [('local_state', 'in', ['testing', 'pending']), ('message_ids', '=', False)]) used_slots = len(testing_builds) available_slots = host.nb_worker - used_slots nb_pending = Build.search_count([('local_state', '=', 'pending'), ('host', '=', False)]) diff --git a/runbot/security/ir.model.access.csv b/runbot/security/ir.model.access.csv index 6c233c654..f0ca84182 100644 --- a/runbot/security/ir.model.access.csv +++ b/runbot/security/ir.model.access.csv @@ -68,6 +68,9 @@ access_runbot_error_regex_manager,runbot_error_regex_manager,runbot.model_runbot access_runbot_host_public,runbot_host_public,runbot.model_runbot_host,runbot.base_runbot_model_access,1,0,0,0 access_runbot_host_manager,runbot_host_manager,runbot.model_runbot_host,runbot.group_runbot_admin,1,1,1,1 +access_runbot_host_message_public,runbot_host_message_public,runbot.model_runbot_host_message,runbot.base_runbot_model_access,1,0,0,0 +access_runbot_host_message_admin,runbot_host_message_admin,runbot.model_runbot_host_message,runbot.group_runbot_admin,1,1,1,1 + access_runbot_repo_hooktime,runbot_repo_hooktime,runbot.model_runbot_repo_hooktime,group_user,1,0,0,0 access_runbot_repo_referencetime,runbot_repo_referencetime,runbot.model_runbot_repo_reftime,group_user,1,0,0,0 access_runbot_build_stat_admin,runbot_build_stat_admin,runbot.model_runbot_build_stat,runbot.group_runbot_admin,1,1,1,1 @@ -106,7 +109,6 @@ access_runbot_bundle_public,access_runbot_bundle_public,runbot.model_runbot_bund access_runbot_bundle_runbot_bundle_manager,access_runbot_bundle_runbot_manager,runbot.model_runbot_bundle,runbot.group_runbot_bundle_manager,1,1,0,0 access_runbot_bundle_runbot_admin,access_runbot_bundle_runbot_admin,runbot.model_runbot_bundle,runbot.group_runbot_admin,1,1,1,1 - access_runbot_batch_public,access_runbot_batch_public,runbot.model_runbot_batch,runbot.base_runbot_model_access,1,0,0,0 access_runbot_batch_runbot_admin,access_runbot_batch_runbot_admin,runbot.model_runbot_batch,runbot.group_runbot_admin,1,1,1,1 diff --git a/runbot/templates/utils.xml b/runbot/templates/utils.xml index bf89535f2..ccc2eaa01 100644 --- a/runbot/templates/utils.xml +++ b/runbot/templates/utils.xml @@ -15,7 +15,7 @@ - + @@ -31,7 +31,7 @@ - + @@ -330,7 +330,7 @@ default - + killed @@ -365,12 +365,12 @@ Database selector - + Rebuild - + Kill diff --git a/runbot/tests/test_build.py b/runbot/tests/test_build.py index e55aab30a..3fc8cef24 100644 --- a/runbot/tests/test_build.py +++ b/runbot/tests/test_build.py @@ -744,7 +744,6 @@ def test_repo_gc_testing(self): bundle_b = self.env['runbot.bundle'].search([('name', '=', branch_b_name)]) bundle_b.last_batch._process() - build_b = bundle_b.last_batch.slot_ids[0].build_id # the two builds are starting tests on two different hosts @@ -754,8 +753,8 @@ def test_repo_gc_testing(self): # no room needed, verify that nobody got killed self.Runbot._gc_testing(host) - self.assertFalse(build_a.requested_action) - self.assertFalse(build_b.requested_action) + self.assertFalse(build_a.to_kill) + self.assertFalse(build_b.to_kill) # a new commit is pushed on branch_a self.push_commit(self.remote_odoo_dev, branch_a_name, 'new subject', sha='d0cad0ca') @@ -775,10 +774,10 @@ def test_repo_gc_testing(self): # no room needed, verify that nobody got killed self.Runbot._gc_testing(host) - self.assertFalse(build_a.requested_action) - self.assertFalse(build_b.requested_action) - self.assertFalse(build_a_last.requested_action) - self.assertFalse(children_b.requested_action) + self.assertFalse(build_a.to_kill) + self.assertFalse(build_b.to_kill) + self.assertFalse(build_a_last.to_kill) + self.assertFalse(children_b.to_kill) # now children_b starts on runbot_xxx children_b.write({'local_state': 'testing', 'host': host.name}) @@ -789,10 +788,10 @@ def test_repo_gc_testing(self): self.Runbot._gc_testing(host) # the killable build should have been marked to be killed - self.assertEqual(build_a.requested_action, 'deathrow') - self.assertFalse(build_b.requested_action) - self.assertFalse(build_a_last.requested_action) - self.assertFalse(children_b.requested_action) + self.assertTrue(build_a.to_kill) + self.assertFalse(build_b.to_kill) + self.assertFalse(build_a_last.to_kill) + self.assertFalse(children_b.to_kill) class TestGithubStatus(RunbotCase): diff --git a/runbot/views/build_views.xml b/runbot/views/build_views.xml index 25532541f..ce996e5b4 100644 --- a/runbot/views/build_views.xml +++ b/runbot/views/build_views.xml @@ -65,7 +65,7 @@ - +