-
-
Notifications
You must be signed in to change notification settings - Fork 136
Refresh preloaded disposables on outdated volumes #707
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -471,46 +471,80 @@ async def from_appvm(cls, appvm, preload=False, **kwargs): | |
|
|
||
| if not preload and appvm.can_preload(): | ||
| # Not necessary to await for this event as its intent is to fill | ||
| # gaps and not relevant for this run. | ||
| # gaps and not relevant for this run. Delay to not affect this run. | ||
| asyncio.ensure_future( | ||
| appvm.fire_event_async( | ||
| "domain-preload-dispvm-start", reason="there is a gap" | ||
| "domain-preload-dispvm-start", | ||
| reason="there is a gap", | ||
| delay=5, | ||
| ) | ||
| ) | ||
|
|
||
| if not preload and (preload_dispvm := appvm.get_feat_preload()): | ||
| dispvm = app.domains[preload_dispvm[0]] | ||
| dispvm.log.info("Requesting preloaded qube") | ||
| # The property "preload_requested" offloads "preload-dispvm" and | ||
| # thus avoids various race condition: | ||
| # - Decreasing maximum feature will not remove the qube; | ||
| # - Another request to this function will not return the same qube. | ||
| dispvm.features["preload-dispvm-in-progress"] = True | ||
| appvm.remove_preload_from_list([dispvm.name]) | ||
| dispvm.preload_requested = True | ||
| app.save() | ||
| timeout = int(dispvm.qrexec_timeout * 1.2) | ||
| try: | ||
| if not dispvm.features.get("preload-dispvm-completed", False): | ||
| dispvm.log.info( | ||
| "Waiting preload completion with '%s' seconds timeout", | ||
| timeout, | ||
| dispvm = None | ||
| for item in preload_dispvm: | ||
| qube = app.domains[item] | ||
| if any(vol.is_outdated() for vol in qube.volumes.values()): | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to be clear of why this is needed, receiving the event |
||
| qube.log.warning( | ||
| "Requested preloaded qube but it is outdated, trying " | ||
| "another one if available" | ||
| ) | ||
| async with asyncio.timeout(timeout): | ||
| await dispvm.preload_complete.wait() | ||
| if dispvm.is_paused(): | ||
| await dispvm.unpause() | ||
| else: | ||
| dispvm.use_preload() | ||
| # The gap is filled after the delay set by the | ||
| # 'domain-shutdown' of its ancestors. Not refilling now to | ||
| # deliver a disposable faster. | ||
| appvm.remove_preload_from_list([qube.name]) | ||
| # Delay to not affect this run. | ||
| asyncio.ensure_future( | ||
| qube.delay(delay=2, coros=[qube.cleanup()]) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2 seconds seems to be enough according to qubesd logs. |
||
| ) | ||
| continue | ||
| dispvm = qube | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This loop doesn't stop checking. There should be some break/continue to stop at first non-outdated.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
| break | ||
|
Comment on lines
+501
to
+502
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should it be under
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't test the latest version since the TODOs yet, focusing on the benchmark PR to test delays on different stages (somehow put a delay in those places).
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Anyway. fixed. |
||
| if dispvm: | ||
| dispvm.log.info("Requesting preloaded qube") | ||
| # The property "preload_requested" offloads "preload-dispvm" | ||
| # and thus avoids various race condition: | ||
| # - Decreasing maximum feature will not remove the qube; | ||
| # - Another request to this function will not return the same | ||
| # qube. | ||
| dispvm.features["preload-dispvm-in-progress"] = True | ||
| appvm.remove_preload_from_list([dispvm.name]) | ||
| dispvm.preload_requested = True | ||
| app.save() | ||
| return dispvm | ||
| except asyncio.TimeoutError: | ||
| dispvm.log.warning( | ||
| "Requested preloaded qube but failed to finish preloading " | ||
| "after '%d' seconds, falling back to normal disposable", | ||
| int(timeout), | ||
| timeout = int(dispvm.qrexec_timeout * 1.2) | ||
| try: | ||
| if not dispvm.features.get( | ||
| "preload-dispvm-completed", False | ||
| ): | ||
| dispvm.log.info( | ||
| "Waiting preload completion with '%s' seconds " | ||
| "timeout", | ||
| timeout, | ||
| ) | ||
| async with asyncio.timeout(timeout): | ||
| await dispvm.preload_complete.wait() | ||
| if dispvm.is_paused(): | ||
| await dispvm.unpause() | ||
| else: | ||
| dispvm.use_preload() | ||
| app.save() | ||
| return dispvm | ||
| except asyncio.TimeoutError: | ||
| dispvm.log.warning( | ||
| "Requested preloaded qube but failed to finish " | ||
| "preloading after '%d' seconds, falling back to normal " | ||
| "disposable", | ||
| int(timeout), | ||
| ) | ||
| # Delay to not affect this run. | ||
| asyncio.ensure_future( | ||
| dispvm.delay(delay=2, coros=[dispvm.cleanup()]) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2 seconds seems to be enough according to qubesd logs. |
||
| ) | ||
| else: | ||
| appvm.log.warning( | ||
| "Found only outdated preloaded qube(s), falling back to " | ||
| "normal disposable" | ||
| ) | ||
| asyncio.ensure_future(dispvm.cleanup()) | ||
|
|
||
| dispvm = app.add_new_vm( | ||
| cls, template=appvm, auto_cleanup=True, **kwargs | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,7 +19,7 @@ | |
| # with this program; if not, see <http://www.gnu.org/licenses/>. | ||
|
|
||
| import asyncio | ||
| from typing import Optional | ||
| from typing import Optional, Union | ||
|
|
||
| import qubes.config | ||
| import qubes.events | ||
|
|
@@ -87,6 +87,27 @@ def on_domain_loaded(self, event): # pylint: disable=unused-argument | |
| if changes: | ||
| self.app.save() | ||
|
|
||
| @qubes.events.handler("domain-pre-start") | ||
| def __on_domain_pre_start(self, event, **kwargs): | ||
| """Prevents startup for domain having a volume with disabled snapshots | ||
| and a DispVM based on this volume started | ||
| """ | ||
| # pylint: disable=unused-argument | ||
| volume_with_disabled_snapshots = False | ||
| for vol in self.volumes.values(): | ||
| volume_with_disabled_snapshots |= vol.snapshots_disabled | ||
|
|
||
| if not volume_with_disabled_snapshots: | ||
| return | ||
|
|
||
| for vm in self.dispvms: | ||
| if vm.is_running(): | ||
| raise qubes.exc.QubesVMNotHaltedError(vm) | ||
|
|
||
| @qubes.events.handler("domain-shutdown") | ||
| async def on_dvmtemplate_domain_shutdown(self, _event, **_kwargs): | ||
| await self.refresh_preload() | ||
|
|
||
| @qubes.events.handler("domain-feature-delete:preload-dispvm-max") | ||
| def on_feature_delete_preload_dispvm_max( | ||
| self, event, feature | ||
|
|
@@ -115,23 +136,6 @@ def on_feature_pre_set_preload_dispvm_max( | |
| "Invalid preload-dispvm-max value: not a digit" | ||
| ) | ||
|
|
||
| @qubes.events.handler("domain-pre-start") | ||
| def __on_domain_pre_start(self, event, **kwargs): | ||
| """Prevents startup for domain having a volume with disabled snapshots | ||
| and a DispVM based on this volume started | ||
| """ | ||
| # pylint: disable=unused-argument | ||
| volume_with_disabled_snapshots = False | ||
| for vol in self.volumes.values(): | ||
| volume_with_disabled_snapshots |= vol.snapshots_disabled | ||
|
|
||
| if not volume_with_disabled_snapshots: | ||
| return | ||
|
|
||
| for vm in self.dispvms: | ||
| if vm.is_running(): | ||
| raise qubes.exc.QubesVMNotHaltedError(vm) | ||
|
|
||
| @qubes.events.handler("domain-feature-set:preload-dispvm-max") | ||
| def on_feature_set_preload_dispvm_max( | ||
| self, event, feature, value, oldvalue=None | ||
|
|
@@ -249,22 +253,40 @@ def __on_property_set_template(self, event, name, newvalue, oldvalue=None): | |
| "domain-preload-dispvm-start", | ||
| ) | ||
| async def on_domain_preload_dispvm_used( | ||
| self, event, **kwargs | ||
| ): # pylint: disable=unused-argument | ||
| self, | ||
| event: str, | ||
| dispvm: Optional[qubes.vm.BaseVM] = None, | ||
| reason: Optional[str] = None, | ||
| delay: Union[int, float] = 0, | ||
| **kwargs, # pylint: disable=unused-argument | ||
| ) -> None: | ||
| """ | ||
| Preloads on vacancy and offloads on excess. If the event suffix is | ||
| Offloads on excess and preload on vacancy. | ||
| ``autostart``, the preloaded list is emptied before preloading. | ||
|
|
||
| :param event: event which was fired | ||
| :param str event: Event which was fired. Events have the prefix \ | ||
| ``domain-preload-dispvm-``. If the suffix is ``autostart``, the \ | ||
| preload list is emptied before attempting to preload. If the \ | ||
| suffix is ``used`` or ``start``, tries to preload until it fills \ | ||
| gaps. | ||
| :param qubes.vm.dispvm.DispVM dispvm: Disposable that was used | ||
| :param str reason: Why the event was fired | ||
| :param float delay: Proceed only after sleeping that many seconds | ||
| """ | ||
| assert isinstance(self, qubes.vm.BaseVM) | ||
| event = event.removeprefix("domain-preload-dispvm-") | ||
| event_log = "Received preload event '%s'" % str(event) | ||
| if event == "used": | ||
| event_log += " for dispvm '%s'" % str(kwargs.get("dispvm")) | ||
| if "reason" in kwargs: | ||
| event_log += " because %s" % str(kwargs.get("reason")) | ||
| if event == "used" and dispvm: | ||
| event_log += " for dispvm '%s'" % str(dispvm) | ||
| if reason: | ||
| event_log += " because %s" % str(reason) | ||
| if delay: | ||
| event_log += " with a delay of %s second(s)" % f"{delay:.1f}" | ||
| self.log.info(event_log) | ||
|
|
||
| if delay: | ||
| await asyncio.sleep(delay) | ||
|
|
||
| if event == "autostart": | ||
| self.remove_preload_excess(0) | ||
| elif not self.can_preload(): | ||
|
|
@@ -329,7 +351,7 @@ def get_feat_global_preload_max(self) -> Optional[int]: | |
| def get_feat_preload_max(self, force_local=False) -> int: | ||
| """Get the ``preload-dispvm-max`` feature as an integer. | ||
|
|
||
| :param force_local: ignore global setting. | ||
| :param bool force_local: ignore global setting. | ||
| """ | ||
| assert isinstance(self, qubes.vm.BaseVM) | ||
| feature = "preload-dispvm-max" | ||
|
|
@@ -370,10 +392,32 @@ def can_preload(self) -> bool: | |
| return True | ||
| return False | ||
|
|
||
| async def refresh_preload(self) -> None: | ||
| assert isinstance(self, qubes.vm.BaseVM) | ||
| outdated = [] | ||
| for qube in self.dispvms: | ||
| if not qube.is_preload or not any( | ||
| vol.is_outdated() for vol in qube.volumes.values() | ||
| ): | ||
| continue | ||
| outdated.append(qube) | ||
| self.remove_preload_from_list([qube.name]) | ||
| if outdated: | ||
| tasks = [self.app.domains[qube].cleanup() for qube in outdated] | ||
| asyncio.ensure_future(asyncio.gather(*tasks)) | ||
| # Delay to not overload the system with cleanup+preload. | ||
| asyncio.ensure_future( | ||
| self.fire_event_async( | ||
| "domain-preload-dispvm-start", | ||
| reason="of outdated volume(s)", | ||
| delay=4, | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This delay gives enough time for the old preloads to be cleaned up before trying to preload. It is intended to avoid overload of the system because cleanup and preload can be a bit intensive when combined with many preloaded disposables. There is no perfect delay here, basing of what my tests told were enough, but my hardware is not the same as any other user's hardware. |
||
| ) | ||
| ) | ||
|
|
||
| def remove_preload_from_list(self, disposables: list[str]) -> None: | ||
| """Removes list of preload qubes from the list. | ||
|
|
||
| :param disposables: disposable names to remove from the preloaded list. | ||
| :param list[str] disposables: disposable names to remove from list. | ||
| """ | ||
| assert isinstance(self, qubes.vm.BaseVM) | ||
| old_preload = self.get_feat_preload() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is also relevant on refresh because the old preloads might be removed already and there is a gap, but because of the delay in
refresh_preload, there is a higher chance of this event being called. Therefore, add a delay to not attempt preload.