From fcbbac9830802e3a361590a2654b4dc6ba0cecd1 Mon Sep 17 00:00:00 2001 From: Ben Grande Date: Tue, 22 Jul 2025 15:32:08 +0200 Subject: [PATCH] Refresh preloaded disposables on outdated volumes Reduce load when viable with delays, which can help on the current run or overall health of the system to not refreshing (cleanup+preload) in a short time. Fixes: https://github.com/QubesOS/qubes-issues/issues/10026 For: https://github.com/QubesOS/qubes-issues/issues/1512 --- doc/qubes-vm/index.rst | 4 ++ qubes/tests/integ/dispvm.py | 26 ++++++++++ qubes/tests/vm/dispvm.py | 5 +- qubes/vm/dispvm.py | 96 +++++++++++++++++++++++----------- qubes/vm/mix/dvmtemplate.py | 100 ++++++++++++++++++++++++++---------- qubes/vm/qubesvm.py | 11 ++++ qubes/vm/templatevm.py | 13 ++++- 7 files changed, 194 insertions(+), 61 deletions(-) diff --git a/doc/qubes-vm/index.rst b/doc/qubes-vm/index.rst index 755d6c0a7..046d6e327 100644 --- a/doc/qubes-vm/index.rst +++ b/doc/qubes-vm/index.rst @@ -63,6 +63,10 @@ Helper classes and functions :members: :show-inheritance: +.. autoclass:: qubes.vm.mix.dvmtemplate.DVMTemplateMixin + :members: + :show-inheritance: + Particular VM classes ^^^^^^^^^^^^^^^^^^^^^ diff --git a/qubes/tests/integ/dispvm.py b/qubes/tests/integ/dispvm.py index 23e282e82..f9cfcde23 100644 --- a/qubes/tests/integ/dispvm.py +++ b/qubes/tests/integ/dispvm.py @@ -692,6 +692,32 @@ async def _test_018_preload_global(self): self.log_preload() logger.info("end") + def test_019_preload_refresh(self): + """Refresh preload on volume change.""" + self.loop.run_until_complete(self._test_019_preload_refresh()) + + async def _test_019_preload_refresh(self): + logger.info("start") + self.log_preload() + preload_max = 1 + + self.disp_base.features["preload-dispvm-max"] = str(preload_max) + for qube in [self.disp_base, self.disp_base.template]: + await self.wait_preload(preload_max) + old_preload = self.disp_base.get_feat_preload() + await qube.start() + logger.info("shutdown '%s'", qube.name) + await qube.shutdown(wait=True) + await self.wait_preload(preload_max) + preload_dispvm = self.disp_base.get_feat_preload() + self.assertTrue( + set(old_preload).isdisjoint(preload_dispvm), + f"old_preload={old_preload} preload_dispvm={preload_dispvm}", + ) + + self.log_preload() + logger.info("end") + @unittest.skipUnless( spawn.find_executable("xdotool"), "xdotool not installed" ) diff --git a/qubes/tests/vm/dispvm.py b/qubes/tests/vm/dispvm.py index 28847aec3..49b016433 100644 --- a/qubes/tests/vm/dispvm.py +++ b/qubes/tests/vm/dispvm.py @@ -206,6 +206,7 @@ def test_000_from_appvm_preload_use( mock_qube.name = dispvm.name mock_qube.features = dispvm.features mock_qube.unpause = self.mock_coro + mock_qube.volumes = {} fresh_dispvm = self.loop.run_until_complete( qubes.vm.dispvm.DispVM.from_appvm(self.appvm) ) @@ -265,7 +266,9 @@ def test_000_from_appvm_preload_fill_gap( ) assert mock_events.mock_calls == [ mock.call( - "domain-preload-dispvm-start", reason=unittest.mock.ANY + "domain-preload-dispvm-start", + reason=unittest.mock.ANY, + delay=unittest.mock.ANY, ), mock.call("domain-create-on-disk"), ] diff --git a/qubes/vm/dispvm.py b/qubes/vm/dispvm.py index d7978e9bd..c057e9922 100644 --- a/qubes/vm/dispvm.py +++ b/qubes/vm/dispvm.py @@ -471,46 +471,80 @@ async def from_appvm(cls, appvm, preload=False, **kwargs): if not preload and appvm.can_preload(): # Not necessary to await for this event as its intent is to fill - # gaps and not relevant for this run. + # gaps and not relevant for this run. Delay to not affect this run. asyncio.ensure_future( appvm.fire_event_async( - "domain-preload-dispvm-start", reason="there is a gap" + "domain-preload-dispvm-start", + reason="there is a gap", + delay=5, ) ) if not preload and (preload_dispvm := appvm.get_feat_preload()): - dispvm = app.domains[preload_dispvm[0]] - dispvm.log.info("Requesting preloaded qube") - # The property "preload_requested" offloads "preload-dispvm" and - # thus avoids various race condition: - # - Decreasing maximum feature will not remove the qube; - # - Another request to this function will not return the same qube. - dispvm.features["preload-dispvm-in-progress"] = True - appvm.remove_preload_from_list([dispvm.name]) - dispvm.preload_requested = True - app.save() - timeout = int(dispvm.qrexec_timeout * 1.2) - try: - if not dispvm.features.get("preload-dispvm-completed", False): - dispvm.log.info( - "Waiting preload completion with '%s' seconds timeout", - timeout, + dispvm = None + for item in preload_dispvm: + qube = app.domains[item] + if any(vol.is_outdated() for vol in qube.volumes.values()): + qube.log.warning( + "Requested preloaded qube but it is outdated, trying " + "another one if available" ) - async with asyncio.timeout(timeout): - await dispvm.preload_complete.wait() - if dispvm.is_paused(): - await dispvm.unpause() - else: - dispvm.use_preload() + # The gap is filled after the delay set by the + # 'domain-shutdown' of its ancestors. Not refilling now to + # deliver a disposable faster. + appvm.remove_preload_from_list([qube.name]) + # Delay to not affect this run. + asyncio.ensure_future( + qube.delay(delay=2, coros=[qube.cleanup()]) + ) + continue + dispvm = qube + break + if dispvm: + dispvm.log.info("Requesting preloaded qube") + # The property "preload_requested" offloads "preload-dispvm" + # and thus avoids various race condition: + # - Decreasing maximum feature will not remove the qube; + # - Another request to this function will not return the same + # qube. + dispvm.features["preload-dispvm-in-progress"] = True + appvm.remove_preload_from_list([dispvm.name]) + dispvm.preload_requested = True app.save() - return dispvm - except asyncio.TimeoutError: - dispvm.log.warning( - "Requested preloaded qube but failed to finish preloading " - "after '%d' seconds, falling back to normal disposable", - int(timeout), + timeout = int(dispvm.qrexec_timeout * 1.2) + try: + if not dispvm.features.get( + "preload-dispvm-completed", False + ): + dispvm.log.info( + "Waiting preload completion with '%s' seconds " + "timeout", + timeout, + ) + async with asyncio.timeout(timeout): + await dispvm.preload_complete.wait() + if dispvm.is_paused(): + await dispvm.unpause() + else: + dispvm.use_preload() + app.save() + return dispvm + except asyncio.TimeoutError: + dispvm.log.warning( + "Requested preloaded qube but failed to finish " + "preloading after '%d' seconds, falling back to normal " + "disposable", + int(timeout), + ) + # Delay to not affect this run. + asyncio.ensure_future( + dispvm.delay(delay=2, coros=[dispvm.cleanup()]) + ) + else: + appvm.log.warning( + "Found only outdated preloaded qube(s), falling back to " + "normal disposable" ) - asyncio.ensure_future(dispvm.cleanup()) dispvm = app.add_new_vm( cls, template=appvm, auto_cleanup=True, **kwargs diff --git a/qubes/vm/mix/dvmtemplate.py b/qubes/vm/mix/dvmtemplate.py index 6ae95b606..9bb502107 100644 --- a/qubes/vm/mix/dvmtemplate.py +++ b/qubes/vm/mix/dvmtemplate.py @@ -19,7 +19,7 @@ # with this program; if not, see . import asyncio -from typing import Optional +from typing import Optional, Union import qubes.config import qubes.events @@ -87,6 +87,27 @@ def on_domain_loaded(self, event): # pylint: disable=unused-argument if changes: self.app.save() + @qubes.events.handler("domain-pre-start") + def __on_domain_pre_start(self, event, **kwargs): + """Prevents startup for domain having a volume with disabled snapshots + and a DispVM based on this volume started + """ + # pylint: disable=unused-argument + volume_with_disabled_snapshots = False + for vol in self.volumes.values(): + volume_with_disabled_snapshots |= vol.snapshots_disabled + + if not volume_with_disabled_snapshots: + return + + for vm in self.dispvms: + if vm.is_running(): + raise qubes.exc.QubesVMNotHaltedError(vm) + + @qubes.events.handler("domain-shutdown") + async def on_dvmtemplate_domain_shutdown(self, _event, **_kwargs): + await self.refresh_preload() + @qubes.events.handler("domain-feature-delete:preload-dispvm-max") def on_feature_delete_preload_dispvm_max( self, event, feature @@ -115,23 +136,6 @@ def on_feature_pre_set_preload_dispvm_max( "Invalid preload-dispvm-max value: not a digit" ) - @qubes.events.handler("domain-pre-start") - def __on_domain_pre_start(self, event, **kwargs): - """Prevents startup for domain having a volume with disabled snapshots - and a DispVM based on this volume started - """ - # pylint: disable=unused-argument - volume_with_disabled_snapshots = False - for vol in self.volumes.values(): - volume_with_disabled_snapshots |= vol.snapshots_disabled - - if not volume_with_disabled_snapshots: - return - - for vm in self.dispvms: - if vm.is_running(): - raise qubes.exc.QubesVMNotHaltedError(vm) - @qubes.events.handler("domain-feature-set:preload-dispvm-max") def on_feature_set_preload_dispvm_max( self, event, feature, value, oldvalue=None @@ -249,22 +253,40 @@ def __on_property_set_template(self, event, name, newvalue, oldvalue=None): "domain-preload-dispvm-start", ) async def on_domain_preload_dispvm_used( - self, event, **kwargs - ): # pylint: disable=unused-argument + self, + event: str, + dispvm: Optional[qubes.vm.BaseVM] = None, + reason: Optional[str] = None, + delay: Union[int, float] = 0, + **kwargs, # pylint: disable=unused-argument + ) -> None: """ - Preloads on vacancy and offloads on excess. If the event suffix is + Offloads on excess and preload on vacancy. ``autostart``, the preloaded list is emptied before preloading. - :param event: event which was fired + :param str event: Event which was fired. Events have the prefix \ + ``domain-preload-dispvm-``. If the suffix is ``autostart``, the \ + preload list is emptied before attempting to preload. If the \ + suffix is ``used`` or ``start``, tries to preload until it fills \ + gaps. + :param qubes.vm.dispvm.DispVM dispvm: Disposable that was used + :param str reason: Why the event was fired + :param float delay: Proceed only after sleeping that many seconds """ + assert isinstance(self, qubes.vm.BaseVM) event = event.removeprefix("domain-preload-dispvm-") event_log = "Received preload event '%s'" % str(event) - if event == "used": - event_log += " for dispvm '%s'" % str(kwargs.get("dispvm")) - if "reason" in kwargs: - event_log += " because %s" % str(kwargs.get("reason")) + if event == "used" and dispvm: + event_log += " for dispvm '%s'" % str(dispvm) + if reason: + event_log += " because %s" % str(reason) + if delay: + event_log += " with a delay of %s second(s)" % f"{delay:.1f}" self.log.info(event_log) + if delay: + await asyncio.sleep(delay) + if event == "autostart": self.remove_preload_excess(0) elif not self.can_preload(): @@ -329,7 +351,7 @@ def get_feat_global_preload_max(self) -> Optional[int]: def get_feat_preload_max(self, force_local=False) -> int: """Get the ``preload-dispvm-max`` feature as an integer. - :param force_local: ignore global setting. + :param bool force_local: ignore global setting. """ assert isinstance(self, qubes.vm.BaseVM) feature = "preload-dispvm-max" @@ -370,10 +392,32 @@ def can_preload(self) -> bool: return True return False + async def refresh_preload(self) -> None: + assert isinstance(self, qubes.vm.BaseVM) + outdated = [] + for qube in self.dispvms: + if not qube.is_preload or not any( + vol.is_outdated() for vol in qube.volumes.values() + ): + continue + outdated.append(qube) + self.remove_preload_from_list([qube.name]) + if outdated: + tasks = [self.app.domains[qube].cleanup() for qube in outdated] + asyncio.ensure_future(asyncio.gather(*tasks)) + # Delay to not overload the system with cleanup+preload. + asyncio.ensure_future( + self.fire_event_async( + "domain-preload-dispvm-start", + reason="of outdated volume(s)", + delay=4, + ) + ) + def remove_preload_from_list(self, disposables: list[str]) -> None: """Removes list of preload qubes from the list. - :param disposables: disposable names to remove from the preloaded list. + :param list[str] disposables: disposable names to remove from list. """ assert isinstance(self, qubes.vm.BaseVM) old_preload = self.get_feat_preload() diff --git a/qubes/vm/qubesvm.py b/qubes/vm/qubesvm.py index 432111331..841348f98 100644 --- a/qubes/vm/qubesvm.py +++ b/qubes/vm/qubesvm.py @@ -31,6 +31,8 @@ import string import subprocess +from typing import Awaitable + import libvirt # pylint: disable=import-error import lxml.etree @@ -2900,6 +2902,15 @@ def get_pref_mem(self): return qubes.qmemman.algo.pref_mem(domain) / 1024 + async def delay(self, delay: float | int, coros: list[Awaitable]) -> None: + self.log.debug( + "Scheduled awaitables to run after '%s' seconds: %s", + f"{delay:.1f}", + repr(coros), + ) + await asyncio.sleep(delay) + await asyncio.gather(*coros) + def _clean_volume_config(config): common_attributes = [ diff --git a/qubes/vm/templatevm.py b/qubes/vm/templatevm.py index 472683377..53b994e1f 100644 --- a/qubes/vm/templatevm.py +++ b/qubes/vm/templatevm.py @@ -19,7 +19,7 @@ # License along with this library; if not, see . # -""" This module contains the TemplateVM implementation """ +"""This module contains the TemplateVM implementation""" import qubes import qubes.config @@ -108,6 +108,17 @@ def __init__(self, *args, **kwargs): } super().__init__(*args, **kwargs) + @qubes.events.handler("domain-shutdown") + async def on_template_domain_shutdown(self, _event, **_kwargs): + appvms = [ + qube + for qube in self.app.domains + if getattr(qube, "template", None) == self + and getattr(qube, "template_for_dispvms", False) + ] + for qube in appvms: + await qube.refresh_preload() + @qubes.events.handler("domain-feature-set:boot-mode.appvm-default") def on_feature_bootmode_appvm_set( self, event, feature, value, oldvalue=None