From 2d5853f5b85a79eb655de558ba683fade2256a25 Mon Sep 17 00:00:00 2001 From: Nihal <121309701+nihalxkumar@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:22:35 +0530 Subject: [PATCH 1/3] qvm-template-upgrade: add template clone upgrade workflow Add qvm-template-upgrade as an initial safe upgrade workflow for TemplateVMs and StandaloneVMs. The command validates the source qube, derives the next distro-version clone name, clones the source, updates template metadata, and cleans up failed clones unless explicitly asked to keep them. The version-upgrade agent hook remains a stub for now, so the command can land the orchestration, rollback behavior, and tests without pretending to perform in-VM distro upgrades yet. Fixes: https://github.com/QubesOS/qubes-issues/issues/8605 --- setup.py | 6 +- vmupdate/template_upgrade.py | 399 ++++++++++++++++++++++++ vmupdate/tests/test_template_upgrade.py | 347 +++++++++++++++++++++ 3 files changed, 751 insertions(+), 1 deletion(-) create mode 100644 vmupdate/template_upgrade.py create mode 100644 vmupdate/tests/test_template_upgrade.py diff --git a/setup.py b/setup.py index 576b639..dbc6de2 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,10 @@ url="https://www.qubes-os.org/", packages=setuptools.find_packages(include=("vmupdate", "vmupdate*")), entry_points={ - "console_scripts": "qubes-vm-update = vmupdate.vmupdate:main", + "console_scripts": [ + "qubes-vm-update = vmupdate.vmupdate:main", + "qvm-template-upgrade = " + "vmupdate.template_upgrade:main", + ], }, ) diff --git a/vmupdate/template_upgrade.py b/vmupdate/template_upgrade.py new file mode 100644 index 0000000..8466499 --- /dev/null +++ b/vmupdate/template_upgrade.py @@ -0,0 +1,399 @@ +#!/usr/bin/python3 +""" +qvm-template-upgrade — perform an N -> N+1 distro version upgrade of a qube + +Workflow: + 1. Validate that --template names an existing TemplateVM or StandaloneVM. + 2. Read os-distribution / os-version from qvm-features. + 3. Compute the target version as os-version + 1 (N -> N+1 is the + only supported scope; multi-hop is rejected by construction). + 4. Clone the qube to a new name derived from the target version. + 5. Run the in-VM version-upgrade agent inside the clone + (reuses the vmupdate qrexec transport — currently stubbed). + 6. On success: update template metadata features on the clone. + 7. On failure: remove the half-upgraded clone unless + --keep-new-on-failure. + +The original qube is never touched by this tool. AppVMs based on a source +template continue to use it until the user manually switches them and +uninstalls the old template. +""" + +import argparse +import logging +import sys +from datetime import datetime, timezone +from typing import Optional, Sequence, Tuple + +import qubesadmin +import qubesadmin.app +import qubesadmin.exc +import qubesadmin.tools +import qubesadmin.vm + +from vmupdate.agent.source.common.exit_codes import EXIT + +LOG_PATH = "/var/log/qubes/qvm-template-upgrade.log" +LOG_FORMAT = "%(asctime)s %(levelname)s %(message)s" + +SUPPORTED_DISTROS = {"fedora", "debian"} +SUPPORTED_CLASSES = {"TemplateVM", "StandaloneVM"} + +DATE_FMT = "%Y-%m-%d %H:%M:%S" + + +class UpgradeError(Exception): + """Failure during the upgrade run itself.""" + + +class ValidationError(Exception): + """Invalid user input or unsupported source qube.""" + + +def compute_target_version(current: str) -> str: + """Return current + 1 as the target distro version. + + Non-integer versions are rejected here. + """ + try: + current_n = int(current) + except ValueError as exc: + raise ValidationError( + f"Non-numeric distro version {current!r}; multi-component " + f"versions (e.g. Debian point releases) are not yet supported " + f"by this tool." + ) from exc + return str(current_n + 1) + + +def derive_clone_name( + source_name: str, + current_version: str, + target_version: str, + override: Optional[str], +) -> str: + """Replace the version in the source name with the target version. + + Examples: + fedora-41, 41 -> 42 => fedora-42 + + fedora-41-minimal, 41 -> 42 => fedora-42-minimal + + custom, 41 -> 42 => custom-42 + """ + if override: + return override + if current_version not in source_name: + return f"{source_name}-{target_version}" + # Replace only the last occurrence (e.g. fedora-41-extras-41 stays sane). + head, _, tail = source_name.rpartition(current_version) + return f"{head}{target_version}{tail}" + + +# Argument parsing / logging + + +def get_parser() -> qubesadmin.tools.QubesArgumentParser: + parser = qubesadmin.tools.QubesArgumentParser( + prog="qvm-template-upgrade", + description="Upgrade a TemplateVM or StandaloneVM to the next distro " + "version.", + # Disable --version: this tool has no standalone version, and the + # default path reads qubesadmin's package metadata, which is absent + # when qubesadmin is only on PYTHONPATH (e.g. in tests/CI). + version="", + ) + parser.add_argument( + "--template", + required=True, + help="Name of the source TemplateVM or StandaloneVM to upgrade.", + ) + parser.add_argument( + "--new-name", + help="Name for the upgraded clone. Defaults to replacing the version " + "suffix in the source name (e.g. fedora-41 -> fedora-42).", + ) + parser.add_argument( + "--keep-new-on-failure", + action="store_true", + help="Preserve the half-upgraded clone if the upgrade fails. " + "By default the clone is removed and the original remains.", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Validate inputs and print the planned actions; do not clone " + "or upgrade anything.", + ) + parser.add_argument( + "--log", default="INFO", help="Log level (default: INFO)." + ) + return parser + + +def parse_args( + argv: Optional[Sequence[str]] = None, + app: Optional[qubesadmin.app.QubesBase] = None, +) -> Tuple[qubesadmin.tools.QubesArgumentParser, argparse.Namespace]: + parser = get_parser() + return parser, parser.parse_args(argv, app=app) + + +def setup_logging(level: str) -> logging.Logger: + log = logging.getLogger("vm-template-upgrade") + log.setLevel(level) + # Don't let our messages also flow through the root logger. + log.propagate = False + # Idempotent: if main() is called more than once in the same process + # (embedded use, repeated CLI invocations in tests), skip re-adding + # handlers so output isn't duplicated. + if log.handlers: + return log + # Always log to stderr: so user sees progress even when the log file + # is unavailable (dev machine without /var/log/qubes, perms issues, etc.). + stderr = logging.StreamHandler(sys.stderr) + stderr.setFormatter(logging.Formatter("%(message)s")) + log.addHandler(stderr) + try: + handler = logging.FileHandler(LOG_PATH, encoding="utf-8") + handler.setFormatter(logging.Formatter(LOG_FORMAT)) + log.addHandler(handler) + except OSError as err: + log.warning("Could not open log file %s: %s", LOG_PATH, err) + return log + + +# Orchestrator + + +class TemplateUpgrader: + """Stateful orchestrator for one source qube upgrade.""" + + def __init__( + self, + app: qubesadmin.app.QubesBase, + args: argparse.Namespace, + log: logging.Logger, + ) -> None: + self.app = app + self.args = args + self.log = log + # Populated by validate(). qubesadmin ships no py.typed, so QubesVM + # resolves to Any for the type checker; the None default is fine and + # these are always set before any method that reads them runs. + self.source_vm: qubesadmin.vm.QubesVM = None + self.distro = "" + self.current_version = "" + self.target_version = "" + self.new_name = "" + # Populated by clone(); stays None until then so rollback() can tell + # whether there is anything to remove. + self.cloned_qube: qubesadmin.vm.QubesVM = None + + # validation + + def validate(self) -> None: + """Run all pre-flight checks. Populates planning attributes. + + Raises ValidationError on any input/setup problem. After this call, + self.source_vm / distro / current_version / target_version / new_name + are all set and the upgrade can proceed (or be reported via + describe_plan() for --dry-run). + """ + self.source_vm = self._resolve_source_qube() + self.distro, self.current_version = self._detect_distro() + self.target_version = compute_target_version(self.current_version) + self.new_name = derive_clone_name( + self.source_vm.name, + self.current_version, + self.target_version, + self.args.new_name, + ) + if self.new_name in self.app.domains: + raise ValidationError( + f"Target name {self.new_name!r} already exists. Remove it " + "first or pass a different --new-name." + ) + + def _resolve_source_qube(self) -> qubesadmin.vm.QubesVM: + try: + vm = self.app.domains[self.args.template] + except KeyError as exc: + raise ValidationError( + f"No such qube: {self.args.template}" + ) from exc + if vm.klass not in SUPPORTED_CLASSES: + raise ValidationError( + f"{vm.name} is a {vm.klass}; only TemplateVMs and " + f"StandaloneVMs can be upgraded with this tool." + ) + return vm + + def _detect_distro(self) -> Tuple[str, str]: + distro = self.source_vm.features.get("os-distribution") + distro_like = self.source_vm.features.get("os-distribution-like", "") + version = self.source_vm.features.get("os-version") + if not distro or not version: + raise ValidationError( + f"{self.source_vm.name} is missing os-distribution / " + f"os-version features. Start the qube once so the in-VM " + f"agent can report them, then retry." + ) + candidates = {distro.lower(), *distro_like.lower().split()} + supported = SUPPORTED_DISTROS & candidates + if not supported: + raise ValidationError( + f"Unsupported distro {distro!r}; only Fedora- and " + f"Debian-based qubes are supported for now." + ) + return sorted(supported)[0], version + + def describe_plan(self) -> str: + return ( + f"upgrade {self.source_vm.name} " + f"({self.distro} {self.current_version}) -> " + f"clone {self.new_name} " + f"({self.distro} {self.target_version})" + ) + + # execution + + def clone(self) -> None: + """Clone the source qube. Populates self.cloned_qube.""" + self.log.info("Cloning %s -> %s", self.source_vm.name, self.new_name) + self.cloned_qube = self.app.clone_vm(self.source_vm, self.new_name) + + def run_agent(self) -> None: + """Run the in-VM upgrade agent inside the clone. + + STUB: replaced in a follow-up commit by a dispatch into a new + `version_upgrade(target_version)` method on the existing + vmupdate agent (vmupdate/agent/source/{dnf,apt}/), reused via the + qrexec transport in qube_connection.py. The VM-side agent must + re-detect or verify the distro from inside the qube before running + distro-specific upgrade commands. + """ + raise NotImplementedError( + f"version-upgrade agent is not implemented yet for " + f"{self.cloned_qube.name} -> {self.target_version}" + ) + + def finalize(self) -> None: + """Write post-upgrade qvm-features on the clone. + + TemplateVM: always set template-name (required so qvm-template + recognises the upgraded clone as managed) and refresh + template-installtime. + + StandaloneVM: rewrite an existing template-name from the old to + the new release (e.g. fedora-41 -> fedora-42), keeping the value + compatible with qui.utils.check_support()'s EOL_DATES lookup + (which strips only -minimal / -xfce, not -standalone or the qube + name). We do not invent a template-name for standalones that + never had one, and we leave one in place that doesn't carry the + current version (can't safely transform). + """ + self.log.info("Updating metadata on %s", self.cloned_qube.name) + if self.cloned_qube.klass == "TemplateVM": + self.cloned_qube.features["template-name"] = self.cloned_qube.name + self.cloned_qube.features["template-installtime"] = datetime.now( + tz=timezone.utc + ).strftime(DATE_FMT) + return + old = self.cloned_qube.features.get("template-name") + if not old: + return + if self.current_version not in old: + # template-name doesn't carry the current version (custom + # value, manual edit) it is safer to leave it alone than to + # guess what the user intended. + self.log.info( + "Leaving standalone template-name=%r untouched " + "(no version substring to rewrite)", + old, + ) + return + new = derive_clone_name( + old, self.current_version, self.target_version, None + ) + self.cloned_qube.features["template-name"] = new + + def rollback(self) -> None: + """Remove the half-upgraded clone, if any. Safe to call repeatedly.""" + if self.cloned_qube is None: + return + self.log.warning("Removing failed clone %s", self.cloned_qube.name) + try: + del self.app.domains[self.cloned_qube.name] + except qubesadmin.exc.QubesException as err: + self.log.error( + "Could not remove failed clone %s: %s", + self.cloned_qube.name, + err, + ) + + +# CLI entry point + + +def main( + argv: Optional[Sequence[str]] = None, + app: Optional[qubesadmin.app.QubesBase] = None, +) -> int: + parser, args = parse_args(argv, app) + log = setup_logging(args.log) + upgrader = TemplateUpgrader(args.app, args, log) + + try: + upgrader.validate() + except ValidationError as err: + parser.print_error(str(err)) + return EXIT.ERR_USAGE + + log.info("Plan: %s", upgrader.describe_plan()) + + if args.dry_run: + print( + f"[dry-run] would clone {upgrader.source_vm.name} -> " + f"{upgrader.new_name} and upgrade {upgrader.distro} " + f"{upgrader.current_version} -> {upgrader.target_version}" + ) + return EXIT.OK + + try: + upgrader.clone() + except qubesadmin.exc.QubesException as err: + print(f"error: clone failed: {err}", file=sys.stderr) + return EXIT.ERR + + try: + upgrader.run_agent() + upgrader.finalize() + except ( + UpgradeError, + NotImplementedError, + qubesadmin.exc.QubesException, + ) as err: + log.error("Upgrade failed: %s", err) + if not args.keep_new_on_failure: + upgrader.rollback() + else: + log.info( + "Leaving clone %s in place (--keep-new-on-failure).", + upgrader.cloned_qube.name, + ) + print(f"error: {err}", file=sys.stderr) + return EXIT.ERR + + label = ( + "template" + if upgrader.cloned_qube.klass == "TemplateVM" + else "standalone" + ) + print(f"Upgrade complete. New {label}: {upgrader.cloned_qube.name}") + print(f"Original qube {upgrader.source_vm.name} is untouched.") + return EXIT.OK + + +if __name__ == "__main__": # pragma: no cover + sys.exit(main()) diff --git a/vmupdate/tests/test_template_upgrade.py b/vmupdate/tests/test_template_upgrade.py new file mode 100644 index 0000000..a8b030e --- /dev/null +++ b/vmupdate/tests/test_template_upgrade.py @@ -0,0 +1,347 @@ +#!/usr/bin/python3 +# coding=utf-8 +import logging +from unittest.mock import MagicMock, Mock + +import pytest + +import qubesadmin.exc + +from vmupdate import template_upgrade +from vmupdate.agent.source.common.exit_codes import EXIT +from vmupdate.tests.conftest import TestApp as _TestApp +from vmupdate.tests.conftest import TestVM as _TestVM + +# Captured at import time, before the quiet_logging autouse fixture can +# replace it. Tests that need to exercise the real setup_logging restore +# this reference explicitly. +_REAL_SETUP_LOGGING = template_upgrade.setup_logging + + +class CloneApp(_TestApp): + def __init__(self): + super().__init__() + self.clone_calls = [] + + def clone_vm(self, source_vm, new_name): + self.clone_calls.append((source_vm.name, new_name)) + clone = _TestVM(new_name, self, klass=source_vm.klass) + clone.features.update(source_vm.features) + return clone + + +def add_template(app, name="fedora-41", **features): + vm = _TestVM(name, app, klass="TemplateVM") + vm.features.update( + { + "os-distribution": "fedora", + "os-version": "41", + "template-name": name, + "template-epoch": "0", + "template-version": "41", + "template-release": "20250101", + "template-buildtime": "2025-01-01 00:00:00", + } + ) + vm.features.update(features) + return vm + + +def add_standalone(app, name="fedora-41-standalone", **features): + vm = _TestVM(name, app, klass="StandaloneVM") + vm.features.update( + { + "os-distribution": "fedora", + "os-version": "41", + } + ) + vm.features.update(features) + return vm + + +@pytest.fixture(autouse=True) +def quiet_logging(monkeypatch): + monkeypatch.setattr(template_upgrade, "setup_logging", lambda *_: Mock()) + + +@pytest.mark.parametrize( + "scenario, expected", + [ + ("missing-qube", "No such qube"), + ("non-template", "only TemplateVMs and StandaloneVMs"), + ("missing-os-version", "missing os-distribution / os-version"), + ("non-numeric-os-version", "Non-numeric distro version"), + ("unsupported-distro", "Unsupported distro"), + ], +) +def test_validation_errors(scenario, expected, capsys): + app = CloneApp() + template_name = "fedora-41" + if scenario == "non-template": + _TestVM(template_name, app, klass="AppVM", template=add_template(app)) + elif scenario == "missing-os-version": + add_template(app) + del app.domains[template_name].features["os-version"] + elif scenario == "non-numeric-os-version": + add_template(app, **{"os-version": "rawhide"}) + elif scenario == "unsupported-distro": + add_template(app, **{"os-distribution": "arch"}) + + retcode = template_upgrade.main(["--template", template_name], app) + + assert retcode == EXIT.ERR_USAGE + assert expected in capsys.readouterr().err + + +@pytest.mark.parametrize( + "source, current, target, override, expected", + [ + ("fedora-41", "41", "42", None, "fedora-42"), + ("debian-12", "12", "13", None, "debian-13"), + ("fedora-41-minimal", "41", "42", None, "fedora-42-minimal"), + ("custom", "41", "42", None, "custom-42"), + ("custom", "41", "42", "my-template", "my-template"), + ], +) +def test_clone_name_derivation(source, current, target, override, expected): + assert ( + template_upgrade.derive_clone_name(source, current, target, override) + == expected + ) + + +def test_dry_run_does_not_mutate(capsys): + app = CloneApp() + vm = add_template( + app, + "ubuntu-22", + **{ + "os-distribution": "ubuntu", + "os-distribution-like": "debian", + "os-version": "22", + } + ) + before = dict(vm.features) + + retcode = template_upgrade.main( + ["--template", "ubuntu-22", "--dry-run"], app + ) + + assert retcode == EXIT.OK + assert app.clone_calls == [] + assert vm.features == before + assert "would clone ubuntu-22 -> ubuntu-23" in capsys.readouterr().out + + +def test_success_applies_metadata(monkeypatch): + app = CloneApp() + add_template(app) + monkeypatch.setattr( + template_upgrade.TemplateUpgrader, "run_agent", lambda self: None + ) + + retcode = template_upgrade.main(["--template", "fedora-41"], app) + + assert retcode == EXIT.OK + clone = app.domains["fedora-42"] + assert clone.features["template-name"] == "fedora-42" + assert clone.features["template-installtime"] != app.domains[ + "fedora-41" + ].features.get("template-installtime") + assert clone.features["template-epoch"] == "0" + assert clone.features["template-version"] == "41" + assert clone.features["template-release"] == "20250101" + assert clone.features["template-buildtime"] == "2025-01-01 00:00:00" + assert clone.features["os-distribution"] == "fedora" + assert clone.features["os-version"] == "41" + + +def test_standalone_without_template_name_left_alone(monkeypatch): + """A standalone that never had template-name doesn't get one invented.""" + app = CloneApp() + add_standalone(app) + monkeypatch.setattr( + template_upgrade.TemplateUpgrader, "run_agent", lambda self: None + ) + + retcode = template_upgrade.main(["--template", "fedora-41-standalone"], app) + + assert retcode == EXIT.OK + clone = app.domains["fedora-42-standalone"] + assert clone.klass == "StandaloneVM" + assert "template-name" not in clone.features + assert "template-installtime" not in clone.features + + +def test_standalone_with_template_name_refreshed(monkeypatch): + """Refresh stale standalone template-name for updater EOL checks.""" + app = CloneApp() + add_standalone(app, **{"template-name": "fedora-41"}) + monkeypatch.setattr( + template_upgrade.TemplateUpgrader, "run_agent", lambda self: None + ) + + retcode = template_upgrade.main(["--template", "fedora-41-standalone"], app) + + assert retcode == EXIT.OK + clone = app.domains["fedora-42-standalone"] + # check_support() resolves this through EOL_DATES. + assert clone.features["template-name"] == "fedora-42" + # template-installtime is template-only; standalones don't get one. + assert "template-installtime" not in clone.features + + +def test_default_stub_fails_and_cleans_clone(capsys): + app = CloneApp() + add_template(app) + + retcode = template_upgrade.main(["--template", "fedora-41"], app) + + assert retcode == EXIT.ERR + assert "fedora-42" not in app.domains + assert "not implemented yet" in capsys.readouterr().err + + +@pytest.mark.parametrize( + "keep_on_failure, expect_clone_removed", + [ + (False, True), + (True, False), + ], +) +def test_failure_cleanup(monkeypatch, keep_on_failure, expect_clone_removed): + app = CloneApp() + add_template(app) + + def fail_agent(self): + raise template_upgrade.UpgradeError("agent failed") + + monkeypatch.setattr( + template_upgrade.TemplateUpgrader, "run_agent", fail_agent + ) + args = ["--template", "fedora-41"] + if keep_on_failure: + args.append("--keep-new-on-failure") + + retcode = template_upgrade.main(args, app) + + assert retcode == EXIT.ERR + assert ("fedora-42" not in app.domains) is expect_clone_removed + + +def test_rejects_existing_clone_name(capsys): + """If the target clone name already exists, validation fails before + anything is mutated.""" + app = CloneApp() + add_template(app) + add_template(app, name="fedora-42", **{"os-version": "42"}) + + retcode = template_upgrade.main(["--template", "fedora-41"], app) + + assert retcode == EXIT.ERR_USAGE + assert "already exists" in capsys.readouterr().err + assert app.clone_calls == [] + + +def test_standalone_template_name_without_version_is_left_alone(monkeypatch): + """Standalone whose template-name doesn't carry the current version + (custom string, manual edit) is left untouched.""" + app = CloneApp() + add_standalone(app, **{"template-name": "my-custom-base"}) + monkeypatch.setattr( + template_upgrade.TemplateUpgrader, "run_agent", lambda self: None + ) + + retcode = template_upgrade.main(["--template", "fedora-41-standalone"], app) + + assert retcode == EXIT.OK + clone = app.domains["fedora-42-standalone"] + assert clone.features["template-name"] == "my-custom-base" + + +def test_main_clone_failure(monkeypatch, capsys): + """If the Admin-API clone call raises, main() reports it as a runtime + error (EXIT.ERR), not a usage error.""" + app = CloneApp() + add_template(app) + + def boom(*_a, **_kw): + raise qubesadmin.exc.QubesException("storage pool full") + + monkeypatch.setattr(app, "clone_vm", boom) + + retcode = template_upgrade.main(["--template", "fedora-41"], app) + + assert retcode == EXIT.ERR + assert "clone failed: storage pool full" in capsys.readouterr().err + + +def test_rollback_noop_when_no_clone(): + """rollback() before clone() ran is a safe no-op.""" + upgrader = template_upgrade.TemplateUpgrader(CloneApp(), Mock(), Mock()) + upgrader.rollback() # must not raise + + +def test_rollback_handles_delete_failure(): + """If the Admin-API delete raises, rollback logs and swallows; the + caller has already decided the upgrade has failed, so re-raising would + just mask the original error. + """ + # dict's __delitem__ is looked up on the type, not the instance, so we + # use a MagicMock for app.domains (which supports __delitem__ as a side + # effect) instead of trying to patch the test-helper Domains dict. + app = MagicMock() + app.domains.__delitem__.side_effect = qubesadmin.exc.QubesException( + "VM is running" + ) + upgrader = template_upgrade.TemplateUpgrader(app, Mock(), Mock()) + upgrader.cloned_qube = Mock(name="fedora-42") + upgrader.cloned_qube.name = "fedora-42" + + upgrader.rollback() # must not raise + + upgrader.log.error.assert_called_once() + + +def _reset_template_upgrade_logger(): + logger = logging.getLogger("vm-template-upgrade") + logger.handlers.clear() + logger.propagate = True + + +def test_setup_logging_is_idempotent(tmp_path, monkeypatch): + """Calling setup_logging twice must not duplicate handlers.""" + monkeypatch.setattr(template_upgrade, "setup_logging", _REAL_SETUP_LOGGING) + monkeypatch.setattr( + template_upgrade, "LOG_PATH", str(tmp_path / "qvm-template-upgrade.log") + ) + _reset_template_upgrade_logger() + + log1 = template_upgrade.setup_logging("INFO") + handler_count = len(log1.handlers) + log2 = template_upgrade.setup_logging("INFO") + + assert log1 is log2 + assert len(log2.handlers) == handler_count + assert log2.propagate is False + + +def test_setup_logging_tolerates_missing_log_dir(tmp_path, monkeypatch): + """A missing log directory degrades to stderr-only, not a crash.""" + monkeypatch.setattr(template_upgrade, "setup_logging", _REAL_SETUP_LOGGING) + monkeypatch.setattr( + template_upgrade, + "LOG_PATH", + str(tmp_path / "nope" / "qvm-template-upgrade.log"), + ) + _reset_template_upgrade_logger() + + log = template_upgrade.setup_logging("INFO") + + # The file handler should have been skipped; stderr stays. + assert not any(isinstance(h, logging.FileHandler) for h in log.handlers) + assert any( + isinstance(h, logging.StreamHandler) + and not isinstance(h, logging.FileHandler) + for h in log.handlers + ) From d3ab85244555a8ecb7a325515dd331efca109f4c Mon Sep 17 00:00:00 2001 From: Nihal <121309701+nihalxkumar@users.noreply.github.com> Date: Sun, 14 Jun 2026 16:12:09 +0530 Subject: [PATCH 2/3] qvm-template-upgrade: add the dnf version-upgrade agent This adds the in-qube side of the distro version upgrade and connects it to the dom0 orchestrator. The package manager grows a version_upgrade entry point whose default fails loud, so families without a real path return an error. The dnf path re-reads the distribution from inside the qube, refuses anything that isn't a single-step move to a RedHat-family release, and runs distro-sync onto the target releasever. A --version-upgrade flag selects it, and value-bearing agent args are skipped when unset so a normal update never injects a bare "None". On dom0, run_agent drives the clone through the existing vmupdate qrexec transport, forwarding the agent's streamed output to the log so the user sees progress. On failure the clone is shut down before deletion. StandaloneVM clones keep their inherited template-* features untouched. --- vmupdate/agent/entrypoint.py | 36 +- vmupdate/agent/source/args.py | 14 +- .../agent/source/common/package_manager.py | 44 +++ vmupdate/agent/source/dnf/dnf_cli.py | 113 +++++++ vmupdate/template_upgrade.py | 151 +++++---- vmupdate/tests/test_template_upgrade.py | 99 +++++- vmupdate/tests/test_version_upgrade_agent.py | 312 ++++++++++++++++++ 7 files changed, 693 insertions(+), 76 deletions(-) create mode 100644 vmupdate/tests/test_version_upgrade_agent.py diff --git a/vmupdate/agent/entrypoint.py b/vmupdate/agent/entrypoint.py index 157e887..08c78b3 100755 --- a/vmupdate/agent/entrypoint.py +++ b/vmupdate/agent/entrypoint.py @@ -32,15 +32,35 @@ def main(args=None): os_data, log, log_handler, log_level, agent_type, args.no_progress ) - log.debug("Running upgrades.") - return_code = pkg_mng.upgrade( - refresh=not args.no_refresh, - hard_fail=not args.force_upgrade, - remove_obsolete=not args.leave_obsolete, - print_streams=args.show_output, - ) + if args.version_upgrade: + log.debug( + "Running distribution version upgrade to %s.", + args.version_upgrade, + ) + try: + return_code = pkg_mng.version_upgrade( + args.version_upgrade, print_streams=args.show_output + ) + except NotImplementedError as err: + log.error("Distribution version upgrade failed: %s", err) + print(str(err), file=sys.stderr) + return_code = EXIT.ERR_VM_UPDATE + else: + log.debug("Running upgrades.") + return_code = pkg_mng.upgrade( + refresh=not args.no_refresh, + hard_fail=not args.force_upgrade, + remove_obsolete=not args.leave_obsolete, + print_streams=args.show_output, + ) - if not pkg_mng.PROGRESS_REPORTING and not args.no_progress: + # A version upgrade emits its own 0/100 milestones, so only the normal + # update path needs the fallback "finished" tick for CLI managers. + if ( + not args.version_upgrade + and not pkg_mng.PROGRESS_REPORTING + and not args.no_progress + ): # even if progress reporting is unavailable we want info that update finished if agent_type is AgentType.UPDATE_VM: print(f"{55:.2f}", flush=True, file=sys.stderr) diff --git a/vmupdate/agent/source/args.py b/vmupdate/agent/source/args.py index 1090bb3..bf0f872 100644 --- a/vmupdate/agent/source/args.py +++ b/vmupdate/agent/source/args.py @@ -51,6 +51,13 @@ class AgentArgs: "action": "store_true", "help": "Only download packages", }, + ("--version-upgrade",): { + "action": "store", + "default": None, + "help": "Upgrade the distribution to the given next major " + "release, e.g. --version-upgrade 42. Without this flag a " + "normal same-release package update is performed.", + }, } EXCLUSIVE_OPTIONS_1 = { ("--show-output", "--verbose", "-v"): { @@ -104,5 +111,10 @@ def to_cli_args(args): if args_dict[param_name]: cli_args.append(keys[0]) else: - cli_args.extend((keys[0], args_dict[param_name])) + # Value-bearing options default to None when unset (e.g. + # --version-upgrade on a normal update). Skip those so we + # never inject a bare "None" into the agent command line. + arg_value = args_dict[param_name] + if arg_value is not None: + cli_args.extend((keys[0], str(arg_value))) return cli_args diff --git a/vmupdate/agent/source/common/package_manager.py b/vmupdate/agent/source/common/package_manager.py index 5c1ca9f..e57119c 100644 --- a/vmupdate/agent/source/common/package_manager.py +++ b/vmupdate/agent/source/common/package_manager.py @@ -78,6 +78,33 @@ def upgrade( print(result.err, file=sys.stderr, flush=True) return result.code + def version_upgrade( + self, + target_version: str, + print_streams: bool = False, + ) -> int: + """ + Upgrade the distribution to the next major release. + + Separate from `upgrade`, which only refreshes packages within the + current release. The family-specific work lives in + `_release_upgrade`. + + :param target_version: the major release to move to, e.g. "42" + :param print_streams: dump captured output to std streams + :return: return code (0 on success) + """ + result = self._release_upgrade(target_version) + self._log_output("version-upgrade", result) + # Logs are for the agent log; print_streams is only CLI passthrough. + # Avoid printing again when the subprocess already streamed live. + if print_streams and not result.posted: + if result.out: + print(result.out, flush=True) + if result.err: + print(result.err, file=sys.stderr, flush=True) + return result.code + def _upgrade( self, refresh: bool, @@ -312,6 +339,23 @@ def upgrade_internal(self, remove_obsolete: bool) -> ProcessResult: return self.run_cmd(cmd) + def _release_upgrade(self, target_version: str) -> ProcessResult: + """ + Perform a distribution release upgrade. + + Overridden per package-manager family. The default raises + NotImplementedError; callers (e.g. entrypoint.main) catch it and + decide how to report the unsupported path. + """ + raise self._missing_release_upgrade_error() + + def _missing_release_upgrade_error(self) -> NotImplementedError: + """Build the error used when a family has no release upgrade.""" + return NotImplementedError( + "Distribution version upgrade is not implemented for this " + f"package manager ({self.package_manager})." + ) + def clean(self) -> int: """ Clean cache files of package manager. diff --git a/vmupdate/agent/source/dnf/dnf_cli.py b/vmupdate/agent/source/dnf/dnf_cli.py index afa8d1d..9ef58f7 100644 --- a/vmupdate/agent/source/dnf/dnf_cli.py +++ b/vmupdate/agent/source/dnf/dnf_cli.py @@ -19,9 +19,11 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # USA. +import sys import shutil from typing import List +from source.utils import get_os_data from source.common.package_manager import PackageManager, AgentType from source.common.process_result import ProcessResult from source.common.exit_codes import EXIT @@ -142,6 +144,117 @@ def get_action(self, remove_obsolete) -> List[str]: result.append("update") return result + def _release_upgrade(self, target_version: str) -> ProcessResult: + """ + Move the qube to a new Fedora/RHEL release with `distro-sync`. + + Crossing a release boundary is not the same as a normal update: + we point dnf/yum at the *target* `--releasever` and let + `distro-sync` converge the whole package set onto that release + (installing, upgrading, and erasing as needed). A plain `upgrade` + would leave the system on a mix of old and new release packages. + + dom0 derives `target_version` from `os-*` qvm-features that this + agent wrote on an earlier boot, and those can drift, so we re-read + the distribution from inside the qube and refuse on any mismatch + before doing anything irreversible. + + Progress is reported as bare floats on stderr (one per line), + which dom0's QubeConnection parses to drive the progress bar. + 0 at the start and 100 once distro-sync succeeds. A whole-release + distro-sync has no usable fine-grained progress, so the streamed + package output is what gives the user liveliness. + """ + target = str(target_version).strip() + + # Re-verify from in-qube data before we touch anything. + guard = self._verify_release_upgrade(target) + if guard.code: + return guard + + self._report_progress(0.0) + + # Wipe metadata and packages cached for the *old* release; otherwise + # dnf may resolve the transaction against the previous releasever + # and land on an inconsistent set. + result = self.run_cmd( + [self.package_manager, "clean", "all"], realtime=False + ) + if result.code: + result.code = EXIT.ERR_VM_UPDATE + return result + + # The actual release bump. + upgrade = self.run_cmd( + [ + self.package_manager, + f"--releasever={target}", + "distro-sync", + "--best", + "--allowerasing", + "--assumeyes", + ] + ) + if upgrade.code: + upgrade.code = EXIT.ERR_VM_UPDATE + result += upgrade + return result + + result += upgrade + self._report_progress(100.0) + return result + + def _verify_release_upgrade(self, target: str) -> ProcessResult: + """ + Confirm, from inside the qube, that a release upgrade to `target` + is sane. Returns an errored ProcessResult to abort, or an empty + (code 0) result to proceed. + + Enforces a single-step jump: the target must be a plain release + number, the qube must be a RedHat-family system, and the target must + be exactly one greater than the current in-qube major release. This + mirrors dom0's `compute_target_version`, ruling out downgrades, + no-ops, and multi-step jumps. + """ + if not target.isdigit(): + return self._refuse(f"invalid target release {target!r}.") + + os_data = get_os_data(self.log) + family = os_data.get("os_family") + if family != "RedHat": + return self._refuse(f"in-qube os-family is {family!r}, not RedHat.") + + current_major = os_data.get("release", "").split(".")[0] + if not current_major.isdigit(): + return self._refuse( + f"cannot read a numeric in-qube release from " + f"{os_data.get('release')!r}." + ) + if int(target) != int(current_major) + 1: + return self._refuse( + f"in-qube release {os_data.get('release')!r} can only move " + f"to {int(current_major) + 1} (single step), not {target!r}." + ) + + return ProcessResult() + + def _refuse(self, reason: str) -> ProcessResult: + """Log and build the standard "refusing version upgrade" error.""" + msg = f"Refusing version upgrade: {reason}" + self.log.error(msg) + return ProcessResult(EXIT.ERR_VM_UPDATE, out="", err=msg) + + @staticmethod + def _report_progress(percent: float) -> None: + """ + Emit a progress milestone for dom0's progress bar. + + The agent-to-dom0 progress protocol writes a bare float value + (one per line) to stderr; dom0's QubeConnection reads these + to drive the progress bar. 100.0 signals completion. + """ + print(f"{percent:.2f}", flush=True, file=sys.stderr) + def clean(self) -> int: """ Performs cleanup of temporary files kept for repositories. diff --git a/vmupdate/template_upgrade.py b/vmupdate/template_upgrade.py index 8466499..504c8ca 100644 --- a/vmupdate/template_upgrade.py +++ b/vmupdate/template_upgrade.py @@ -1,28 +1,23 @@ #!/usr/bin/python3 """ -qvm-template-upgrade — perform an N -> N+1 distro version upgrade of a qube +qvm-template-upgrade — upgrade a qube to the next distro release. Workflow: 1. Validate that --template names an existing TemplateVM or StandaloneVM. - 2. Read os-distribution / os-version from qvm-features. - 3. Compute the target version as os-version + 1 (N -> N+1 is the - only supported scope; multi-hop is rejected by construction). - 4. Clone the qube to a new name derived from the target version. - 5. Run the in-VM version-upgrade agent inside the clone - (reuses the vmupdate qrexec transport — currently stubbed). - 6. On success: update template metadata features on the clone. - 7. On failure: remove the half-upgraded clone unless - --keep-new-on-failure. - -The original qube is never touched by this tool. AppVMs based on a source -template continue to use it until the user manually switches them and -uninstalls the old template. + 2. Compute the target as os-version + 1 (only consecutive upgrades). + 3. Clone the qube to a new name derived from the target version. + 4. Run the in-VM version-upgrade agent inside the clone. + 5. On success, update template metadata on the clone. + 6. On failure, remove the clone unless --keep-new-on-failure. + +The original qube is never touched, so it stays available as the fallback. """ import argparse import logging import sys from datetime import datetime, timezone +from types import SimpleNamespace from typing import Optional, Sequence, Tuple import qubesadmin @@ -31,7 +26,11 @@ import qubesadmin.tools import qubesadmin.vm +from vmupdate.agent.source.args import AgentArgs from vmupdate.agent.source.common.exit_codes import EXIT +from vmupdate.agent.source.status import FormatedLine +from vmupdate.update_manager import update_qube +from vmupdate.utils import shutdown_domains LOG_PATH = "/var/log/qubes/qvm-template-upgrade.log" LOG_FORMAT = "%(asctime)s %(levelname)s %(message)s" @@ -50,6 +49,24 @@ class ValidationError(Exception): """Invalid user input or unsupported source qube.""" +class _AgentOutput: + """Minimal status sink that logs the agent's output for a single qube. + + Replaces the bulk updater's multiprocessing progress queue, which we + don't need when upgrading exactly one qube synchronously. + """ + + def __init__(self, log: logging.Logger) -> None: + self.log = log + + def put(self, item) -> None: + # The transport streams agent output as FormatedLine objects; forward + # those to the log and drop the StatusInfo progress ticks, which need + # the bulk updater's progress bar to mean anything. + if isinstance(item, (str, FormatedLine)): + self.log.info("%s", item) + + def compute_target_version(current: str) -> str: """Return current + 1 as the target distro version. @@ -264,59 +281,70 @@ def clone(self) -> None: self.cloned_qube = self.app.clone_vm(self.source_vm, self.new_name) def run_agent(self) -> None: - """Run the in-VM upgrade agent inside the clone. - - STUB: replaced in a follow-up commit by a dispatch into a new - `version_upgrade(target_version)` method on the existing - vmupdate agent (vmupdate/agent/source/{dnf,apt}/), reused via the - qrexec transport in qube_connection.py. The VM-side agent must - re-detect or verify the distro from inside the qube before running - distro-specific upgrade commands. + """Run the in-VM version-upgrade agent inside the clone. + + Reuses the vmupdate qrexec transport (``update_qube``) with a minimal + status sink instead of the bulk updater's progress bar. A non-zero + agent result becomes an UpgradeError so main() rolls the clone back. """ - raise NotImplementedError( - f"version-upgrade agent is not implemented yet for " - f"{self.cloned_qube.name} -> {self.target_version}" + agent_args = self._build_agent_args() + status_notifier = _AgentOutput(self.log) + termination = SimpleNamespace(value=False) + + self.log.info( + "Running version-upgrade agent in %s (-> %s)", + self.cloned_qube.name, + self.target_version, + ) + _name, result = update_qube( + self.cloned_qube, + agent_args, + show_progress=True, + status_notifier=status_notifier, + termination=termination, + dom0=False, ) + if result.code != EXIT.OK: + raise UpgradeError( + f"in-VM version-upgrade agent failed for " + f"{self.cloned_qube.name} (exit code {result.code}); " + f"see /var/log/qubes/update-{self.cloned_qube.name}.log" + ) + + def _build_agent_args(self) -> argparse.Namespace: + """Build the entrypoint args for a version-upgrade agent run. + + Reuse the agent's parser for proper defaults and set only the target + version; display_name is unused with our private sink. + """ + parser = argparse.ArgumentParser() + AgentArgs.add_arguments(parser) + agent_args = parser.parse_args( + [ + "--version-upgrade", + self.target_version, + "--log", + self.args.log, + ] + ) + agent_args.display_name = None + return agent_args def finalize(self) -> None: """Write post-upgrade qvm-features on the clone. - TemplateVM: always set template-name (required so qvm-template - recognises the upgraded clone as managed) and refresh - template-installtime. - - StandaloneVM: rewrite an existing template-name from the old to - the new release (e.g. fedora-41 -> fedora-42), keeping the value - compatible with qui.utils.check_support()'s EOL_DATES lookup - (which strips only -minimal / -xfce, not -standalone or the qube - name). We do not invent a template-name for standalones that - never had one, and we leave one in place that doesn't carry the - current version (can't safely transform). + Only TemplateVMs are touched: template-name marks the clone as + managed by qvm-template and template-installtime is refreshed. + StandaloneVMs are outside qvm-template's management model, so their + template-* features are left as inherited. """ - self.log.info("Updating metadata on %s", self.cloned_qube.name) - if self.cloned_qube.klass == "TemplateVM": - self.cloned_qube.features["template-name"] = self.cloned_qube.name - self.cloned_qube.features["template-installtime"] = datetime.now( - tz=timezone.utc - ).strftime(DATE_FMT) + if self.cloned_qube.klass != "TemplateVM": return - old = self.cloned_qube.features.get("template-name") - if not old: - return - if self.current_version not in old: - # template-name doesn't carry the current version (custom - # value, manual edit) it is safer to leave it alone than to - # guess what the user intended. - self.log.info( - "Leaving standalone template-name=%r untouched " - "(no version substring to rewrite)", - old, - ) - return - new = derive_clone_name( - old, self.current_version, self.target_version, None - ) - self.cloned_qube.features["template-name"] = new + self.log.info("Updating metadata on %s", self.cloned_qube.name) + self.cloned_qube.features["template-name"] = self.cloned_qube.name + self.cloned_qube.features["template-installtime"] = datetime.now( + tz=timezone.utc + ).strftime(DATE_FMT) def rollback(self) -> None: """Remove the half-upgraded clone, if any. Safe to call repeatedly.""" @@ -324,6 +352,11 @@ def rollback(self) -> None: return self.log.warning("Removing failed clone %s", self.cloned_qube.name) try: + # The transport only requests an async shutdown, so the clone may + # still be running; Qubes refuses to delete a running VM, so force + # it down and wait before removing it. + if self.cloned_qube.is_running(): + shutdown_domains([self.cloned_qube], self.log) del self.app.domains[self.cloned_qube.name] except qubesadmin.exc.QubesException as err: self.log.error( diff --git a/vmupdate/tests/test_template_upgrade.py b/vmupdate/tests/test_template_upgrade.py index a8b030e..1544835 100644 --- a/vmupdate/tests/test_template_upgrade.py +++ b/vmupdate/tests/test_template_upgrade.py @@ -9,6 +9,7 @@ from vmupdate import template_upgrade from vmupdate.agent.source.common.exit_codes import EXIT +from vmupdate.agent.source.common.process_result import ProcessResult from vmupdate.tests.conftest import TestApp as _TestApp from vmupdate.tests.conftest import TestVM as _TestVM @@ -27,6 +28,9 @@ def clone_vm(self, source_vm, new_name): self.clone_calls.append((source_vm.name, new_name)) clone = _TestVM(new_name, self, klass=source_vm.klass) clone.features.update(source_vm.features) + # A freshly cloned qube hasn't been started, so rollback() shouldn't + # try to force it down. (TestVM defaults running=True.) + clone.running = False return clone @@ -173,8 +177,9 @@ def test_standalone_without_template_name_left_alone(monkeypatch): assert "template-installtime" not in clone.features -def test_standalone_with_template_name_refreshed(monkeypatch): - """Refresh stale standalone template-name for updater EOL checks.""" +def test_standalone_template_name_left_untouched(monkeypatch): + """A standalone's template-* features are never rewritten; the clone + keeps whatever it inherited from the source.""" app = CloneApp() add_standalone(app, **{"template-name": "fedora-41"}) monkeypatch.setattr( @@ -185,21 +190,54 @@ def test_standalone_with_template_name_refreshed(monkeypatch): assert retcode == EXIT.OK clone = app.domains["fedora-42-standalone"] - # check_support() resolves this through EOL_DATES. - assert clone.features["template-name"] == "fedora-42" - # template-installtime is template-only; standalones don't get one. + # The clone inherits the source value; the tool does not touch it. + assert clone.features["template-name"] == "fedora-41" assert "template-installtime" not in clone.features -def test_default_stub_fails_and_cleans_clone(capsys): +def test_run_agent_success_invokes_transport(monkeypatch): + """A successful agent run upgrades the clone (not the source) in + single-qube VM mode and tells the agent the exact target release.""" app = CloneApp() add_template(app) + captured = {} + + def fake_update_qube(qube, agent_args, **kwargs): + captured["qube"] = qube + captured["agent_args"] = agent_args + captured["kwargs"] = kwargs + return qube.name, ProcessResult(EXIT.OK) + + monkeypatch.setattr(template_upgrade, "update_qube", fake_update_qube) + + retcode = template_upgrade.main(["--template", "fedora-41"], app) + + assert retcode == EXIT.OK + assert captured["qube"].name == "fedora-42" + assert captured["kwargs"]["dom0"] is False + assert captured["kwargs"]["show_progress"] is True + assert captured["agent_args"].version_upgrade == "42" + assert captured["agent_args"].display_name is None + # success path still applies post-upgrade metadata + assert app.domains["fedora-42"].features["template-name"] == "fedora-42" + + +def test_run_agent_failure_rolls_back_clone(monkeypatch, capsys): + """A non-zero agent exit becomes an UpgradeError and the clone is + removed (the wired replacement for the old NotImplementedError stub).""" + app = CloneApp() + add_template(app) + + def fake_update_qube(qube, agent_args, **kwargs): + return qube.name, ProcessResult(EXIT.ERR_VM_UPDATE) + + monkeypatch.setattr(template_upgrade, "update_qube", fake_update_qube) retcode = template_upgrade.main(["--template", "fedora-41"], app) assert retcode == EXIT.ERR assert "fedora-42" not in app.domains - assert "not implemented yet" in capsys.readouterr().err + assert "version-upgrade agent failed" in capsys.readouterr().err @pytest.mark.parametrize( @@ -276,6 +314,26 @@ def boom(*_a, **_kw): assert "clone failed: storage pool full" in capsys.readouterr().err +def test_agent_output_forwards_lines_and_drops_status(): + """FormatedLine output reaches the log; StatusInfo ticks are dropped.""" + from vmupdate.agent.source.status import ( + FinalStatus, + FormatedLine, + StatusInfo, + ) + + log = Mock() + sink = template_upgrade._AgentOutput(log) + + sink.put(FormatedLine("fedora-42", "out", "Downloading packages")) + sink.put("fedora-42:err: a plain string line") + sink.put(StatusInfo.updating(Mock(name="fedora-42"), 42.0)) + sink.put(StatusInfo.done(Mock(name="fedora-42"), FinalStatus.SUCCESS)) + + # Only the two human-readable lines are logged. + assert log.info.call_count == 2 + + def test_rollback_noop_when_no_clone(): """rollback() before clone() ran is a safe no-op.""" upgrader = template_upgrade.TemplateUpgrader(CloneApp(), Mock(), Mock()) @@ -297,12 +355,35 @@ def test_rollback_handles_delete_failure(): upgrader = template_upgrade.TemplateUpgrader(app, Mock(), Mock()) upgrader.cloned_qube = Mock(name="fedora-42") upgrader.cloned_qube.name = "fedora-42" + upgrader.cloned_qube.is_running.return_value = False upgrader.rollback() # must not raise upgrader.log.error.assert_called_once() +def test_rollback_powers_off_running_clone_before_delete(monkeypatch): + """A clone still running is forced down before deletion (Qubes refuses + to remove a running VM).""" + shutdown_calls = [] + monkeypatch.setattr( + template_upgrade, + "shutdown_domains", + lambda vms, log: shutdown_calls.append(vms), + ) + app = MagicMock() + upgrader = template_upgrade.TemplateUpgrader(app, Mock(), Mock()) + upgrader.cloned_qube = Mock() + upgrader.cloned_qube.name = "fedora-42" + upgrader.cloned_qube.is_running.return_value = True + + upgrader.rollback() + + # Forced down first, then removed. + assert shutdown_calls == [[upgrader.cloned_qube]] + app.domains.__delitem__.assert_called_once_with("fedora-42") + + def _reset_template_upgrade_logger(): logger = logging.getLogger("vm-template-upgrade") logger.handlers.clear() @@ -313,7 +394,9 @@ def test_setup_logging_is_idempotent(tmp_path, monkeypatch): """Calling setup_logging twice must not duplicate handlers.""" monkeypatch.setattr(template_upgrade, "setup_logging", _REAL_SETUP_LOGGING) monkeypatch.setattr( - template_upgrade, "LOG_PATH", str(tmp_path / "qvm-template-upgrade.log") + template_upgrade, + "LOG_PATH", + str(tmp_path / "qvm-template-upgrade.log"), ) _reset_template_upgrade_logger() diff --git a/vmupdate/tests/test_version_upgrade_agent.py b/vmupdate/tests/test_version_upgrade_agent.py new file mode 100644 index 0000000..14750ae --- /dev/null +++ b/vmupdate/tests/test_version_upgrade_agent.py @@ -0,0 +1,312 @@ +#!/usr/bin/python3 +# coding=utf-8 +# +# The Qubes OS Project, https://www.qubes-os.org +# +# Copyright (C) 2026 Qubes OS contributors +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. +""" +Unit tests for the in-VM distribution version-upgrade agent path. + +The agent modules use absolute ``source.*`` imports because inside a qube +they run with the agent directory as the top-level package root (see +``entrypoint.py``). We mirror that here by putting the agent directory on +``sys.path`` so the agent can be imported and unit-tested in isolation, with +``subprocess`` fully mocked -- no real dnf is ever invoked. +""" + +import os +import sys +import logging +import argparse + +from unittest.mock import patch, MagicMock + +import pytest + +_AGENT_DIR = os.path.abspath( + os.path.join(os.path.dirname(__file__), os.pardir, "agent") +) +if _AGENT_DIR not in sys.path: + sys.path.insert(0, _AGENT_DIR) + +# pylint: disable=wrong-import-position +import entrypoint +from source.dnf.dnf_cli import DNFCLI +from source.common.package_manager import PackageManager, AgentType +from source.common.process_result import ProcessResult +from source.common.exit_codes import EXIT +from source.args import AgentArgs + + +def _expected_sync_cmd(target): + return ( + "dnf", + f"--releasever={target}", + "distro-sync", + "--best", + "--allowerasing", + "--assumeyes", + ) + + +def make_dnf_cli(): + """Build a DNFCLI without requiring a real dnf binary on the host.""" + with patch("source.dnf.dnf_cli.shutil.which", return_value="/usr/bin/dnf"): + return DNFCLI(logging.NullHandler(), logging.DEBUG, AgentType.VM) + + +def fedora_os_data(release="41"): + return {"id": "fedora", "os_family": "RedHat", "release": release} + + +# DNFCLI._release_upgrade -- happy path + + +def test_version_upgrade_runs_clean_then_distro_sync(): + mgr = make_dnf_cli() + calls = [] + + def fake_run_cmd(cmd, realtime=True): + calls.append((tuple(cmd), realtime)) + return ProcessResult(EXIT.OK) + + with patch.object(mgr, "run_cmd", side_effect=fake_run_cmd), patch( + "source.dnf.dnf_cli.get_os_data", return_value=fedora_os_data("41") + ): + code = mgr.version_upgrade("42") + + assert code == EXIT.OK + # Old-release cache is wiped (captured, not streamed) before the bump. + assert calls[0] == (("dnf", "clean", "all"), False) + sync_cmd, sync_realtime = calls[1] + assert sync_cmd == _expected_sync_cmd("42") + # distro-sync streams in real time so dom0 sees live output. + assert sync_realtime is True + + +def test_version_upgrade_emits_progress_milestones(capsys): + mgr = make_dnf_cli() + with patch.object( + mgr, "run_cmd", return_value=ProcessResult(EXIT.OK) + ), patch( + "source.dnf.dnf_cli.get_os_data", return_value=fedora_os_data("41") + ): + mgr.version_upgrade("42") + + # The progress contract QubeConnection._collect_stderr parses: bare floats + # terminated by 100.00. + assert capsys.readouterr().err.split() == ["0.00", "100.00"] + + +# DNFCLI._release_upgrade -- in-qube re-verification (single-step only) + + +def test_version_upgrade_refuses_non_redhat_qube(): + mgr = make_dnf_cli() + with patch.object(mgr, "run_cmd") as run_cmd, patch( + "source.dnf.dnf_cli.get_os_data", + return_value={"id": "debian", "os_family": "Debian", "release": "12"}, + ): + code = mgr.version_upgrade("42") + + assert code == EXIT.ERR_VM_UPDATE + run_cmd.assert_not_called() # bail before anything irreversible + + +def test_version_upgrade_refuses_non_numeric_target(): + mgr = make_dnf_cli() + with patch.object(mgr, "run_cmd") as run_cmd, patch( + "source.dnf.dnf_cli.get_os_data", return_value=fedora_os_data("41") + ): + code = mgr.version_upgrade("bookworm") + + assert code == EXIT.ERR_VM_UPDATE + run_cmd.assert_not_called() + + +@pytest.mark.parametrize( + "target,current", + [ + ("41", "41"), # no-op + ("40", "41"), # downgrade + ("43", "41"), # two-step jump + ], +) +def test_version_upgrade_enforces_single_step(target, current): + mgr = make_dnf_cli() + with patch.object(mgr, "run_cmd") as run_cmd, patch( + "source.dnf.dnf_cli.get_os_data", + return_value=fedora_os_data(current), + ): + code = mgr.version_upgrade(target) + + assert code == EXIT.ERR_VM_UPDATE + run_cmd.assert_not_called() + + +def test_version_upgrade_allows_dotted_current_release(): + # VERSION_ID like "41.20240101" should compare on the major component. + mgr = make_dnf_cli() + with patch.object( + mgr, "run_cmd", return_value=ProcessResult(EXIT.OK) + ) as run_cmd, patch( + "source.dnf.dnf_cli.get_os_data", + return_value=fedora_os_data("41.20240101"), + ): + code = mgr.version_upgrade("42") + + assert code == EXIT.OK + assert run_cmd.call_count == 2 + + +# DNFCLI._release_upgrade -- failure mapping + + +def test_version_upgrade_bails_when_clean_fails(): + mgr = make_dnf_cli() + calls = [] + + def fake_run_cmd(cmd, realtime=True): + calls.append(tuple(cmd)) + return ProcessResult(3) # clean all fails + + with patch.object(mgr, "run_cmd", side_effect=fake_run_cmd), patch( + "source.dnf.dnf_cli.get_os_data", return_value=fedora_os_data("41") + ): + code = mgr.version_upgrade("42") + + assert code == EXIT.ERR_VM_UPDATE + # distro-sync is never attempted once the cache wipe fails. + assert calls == [("dnf", "clean", "all")] + + +def test_version_upgrade_maps_distro_sync_failure(): + mgr = make_dnf_cli() + + def fake_run_cmd(cmd, realtime=True): + if "distro-sync" in cmd: + return ProcessResult(7) # arbitrary non-zero dnf failure + return ProcessResult(EXIT.OK) + + with patch.object(mgr, "run_cmd", side_effect=fake_run_cmd), patch( + "source.dnf.dnf_cli.get_os_data", return_value=fedora_os_data("41") + ): + code = mgr.version_upgrade("42") + + # Any non-zero is normalised to a dom0-handled VM error code. + assert code == EXIT.ERR_VM_UPDATE + + +# Base class -- loud fail-closed default, covers Debian/apt + Arch + + +def test_base_version_upgrade_fails_loud(): + # Families without a real implementation must raise NotImplementedError + # so callers can decide how to log and map the unsupported path. + mgr = PackageManager(logging.NullHandler(), logging.DEBUG, AgentType.VM) + with pytest.raises(NotImplementedError, match="not implemented"): + mgr._release_upgrade("42") + with pytest.raises(NotImplementedError, match="not implemented"): + mgr.version_upgrade("42") + + +# Agent CLI surface -- args round-trip + + +def _parse_agent_args(argv): + parser = argparse.ArgumentParser() + AgentArgs.add_arguments(parser) + return parser.parse_args(argv) + + +def test_version_upgrade_flag_round_trips_through_cli_args(): + args = _parse_agent_args(["--version-upgrade", "42"]) + assert args.version_upgrade == "42" + + cli = AgentArgs.to_cli_args(args) + assert "--version-upgrade" in cli + assert cli[cli.index("--version-upgrade") + 1] == "42" + + +def test_version_upgrade_flag_absent_by_default(): + args = _parse_agent_args([]) + assert args.version_upgrade is None + + cli = AgentArgs.to_cli_args(args) + assert "--version-upgrade" not in cli + # Regression guard: a None-valued option must never leak a bare token. + assert None not in cli + + +# Entrypoint dispatch + + +def _patched_entrypoint(pkg_mng): + """Common patches so entrypoint.main runs without a real qube/logs.""" + fake_logs = (MagicMock(), MagicMock(), logging.DEBUG, "", "") + return ( + patch("entrypoint.init_logs", return_value=fake_logs), + patch("entrypoint.get_os_data", return_value=fedora_os_data("41")), + patch("entrypoint.get_package_manager", return_value=pkg_mng), + patch("entrypoint.os.system"), + ) + + +def test_entrypoint_dispatches_to_version_upgrade(): + pkg_mng = MagicMock() + pkg_mng.version_upgrade.return_value = EXIT.OK + pkg_mng.clean.return_value = EXIT.OK + + patches = _patched_entrypoint(pkg_mng) + with patches[0], patches[1], patches[2], patches[3]: + code = entrypoint.main(["--version-upgrade", "42"]) + + pkg_mng.version_upgrade.assert_called_once_with("42", print_streams=False) + pkg_mng.upgrade.assert_not_called() + assert code == EXIT.OK + + +def test_entrypoint_maps_missing_version_upgrade_to_handled_error(capsys): + pkg_mng = MagicMock() + pkg_mng.version_upgrade.side_effect = NotImplementedError( + "Distribution version upgrade is not implemented for this package manager." + ) + pkg_mng.clean.return_value = EXIT.OK + + patches = _patched_entrypoint(pkg_mng) + with patches[0], patches[1], patches[2], patches[3]: + code = entrypoint.main(["--version-upgrade", "42"]) + + pkg_mng.version_upgrade.assert_called_once_with("42", print_streams=False) + pkg_mng.upgrade.assert_not_called() + assert code == EXIT.ERR_VM_UPDATE + assert "not implemented" in capsys.readouterr().err + + +def test_entrypoint_runs_normal_update_without_flag(): + pkg_mng = MagicMock() + pkg_mng.upgrade.return_value = EXIT.OK + pkg_mng.clean.return_value = EXIT.OK + + patches = _patched_entrypoint(pkg_mng) + with patches[0], patches[1], patches[2], patches[3]: + code = entrypoint.main([]) + + pkg_mng.upgrade.assert_called_once() + pkg_mng.version_upgrade.assert_not_called() + assert code == EXIT.OK From bd9a7236a6e8c2ca0b4da9e5294cc069ac2eb4fa Mon Sep 17 00:00:00 2001 From: Nihal <121309701+nihalxkumar@users.noreply.github.com> Date: Fri, 26 Jun 2026 01:15:53 -0400 Subject: [PATCH 3/3] qvm-template-upgrade: tolerate clone shutdown failure during teardown When QubeConnection.__exit__ shut the upgraded clone down it called is_running()/shutdown() outside any error handling. An exception raised there propagated out of _run_agent, was caught by update_qube's broad except, and surfaced as ERR_VM_UNHANDLED (exit code 26), which rolled back and deleted an otherwise successfully upgraded clone. The detail explaining the real cause was buried in result.out, which run_agent discarded, so it never reached the tool log. Wrap the shutdown block in a broad try/except that logs the error instead of raising, mirroring the rm cleanup immediately above it, so a teardown problem can no longer mask a successful upgrade. Also append result.out/result.err to the UpgradeError so the underlying cause is visible in the tool log when the agent genuinely fails. --- vmupdate/qube_connection.py | 27 ++++++++++++++--------- vmupdate/template_upgrade.py | 2 ++ vmupdate/tests/test_qube_connection.py | 30 ++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 10 deletions(-) diff --git a/vmupdate/qube_connection.py b/vmupdate/qube_connection.py index 56aadd8..20b6000 100644 --- a/vmupdate/qube_connection.py +++ b/vmupdate/qube_connection.py @@ -92,16 +92,23 @@ def __exit__(self, exc_type, exc_val, exc_tb): str(err), ) - if self.qube.is_running() and not self._initially_running: - if self._has_assigned_pci_devices(self.qube): - self.logger.info( - "Waiting for full shutdown %s (PCI devices assigned)", - self.qube.name, - ) - shutdown_domains([self.qube], self.logger) - else: - self.logger.info("Shutdown %s", self.qube.name) - self.qube.shutdown() + try: + if self.qube.is_running() and not self._initially_running: + if self._has_assigned_pci_devices(self.qube): + self.logger.info( + "Waiting for full shutdown %s (PCI devices assigned)", + self.qube.name, + ) + shutdown_domains([self.qube], self.logger) + else: + self.logger.info("Shutdown %s", self.qube.name) + self.qube.shutdown() + except Exception as err: # pylint: disable=broad-except + self.logger.error( + "Cannot shutdown %s, because of error: %s", + self.qube.name, + str(err), + ) self.__connected = False diff --git a/vmupdate/template_upgrade.py b/vmupdate/template_upgrade.py index 504c8ca..91ca358 100644 --- a/vmupdate/template_upgrade.py +++ b/vmupdate/template_upgrade.py @@ -305,10 +305,12 @@ def run_agent(self) -> None: dom0=False, ) if result.code != EXIT.OK: + detail = (result.out or "").strip() or (result.err or "").strip() raise UpgradeError( f"in-VM version-upgrade agent failed for " f"{self.cloned_qube.name} (exit code {result.code}); " f"see /var/log/qubes/update-{self.cloned_qube.name}.log" + + (f" -- {detail}" if detail else "") ) def _build_agent_args(self) -> argparse.Namespace: diff --git a/vmupdate/tests/test_qube_connection.py b/vmupdate/tests/test_qube_connection.py index b707a85..4551570 100644 --- a/vmupdate/tests/test_qube_connection.py +++ b/vmupdate/tests/test_qube_connection.py @@ -20,6 +20,8 @@ # USA. from unittest.mock import Mock, patch +import qubesadmin.exc + from vmupdate.qube_connection import QubeConnection @@ -93,3 +95,31 @@ def test_do_not_shutdown_if_vm_was_already_running(shutdown_domains): vm.shutdown.assert_not_called() shutdown_domains.assert_not_called() + + +@patch("vmupdate.qube_connection.shutdown_domains") +def test_shutdown_error_does_not_propagate(shutdown_domains): + # A QubesException raised while shutting the clone down during teardown + # must not propagate (and mask an otherwise successful run). + vm = Mock() + vm.name = "hvm4" + vm.is_running.side_effect = [False, True] + vm.devices = {"pci": Mock()} + vm.devices["pci"].get_assigned_devices.return_value = [] + vm.shutdown.side_effect = qubesadmin.exc.QubesException("boom") + status_notifier = Mock() + logger = Mock() + + with QubeConnection( + vm, + "/tmp/qubes-update", + cleanup=False, + logger=logger, + show_progress=False, + status_notifier=status_notifier, + ): + pass + + vm.shutdown.assert_called_once_with() + shutdown_domains.assert_not_called() + logger.error.assert_called_once()