Skip to content
This repository was archived by the owner on Jan 23, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/client.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from contextlib import contextmanager

import click
from jumpstarter_driver_composite.client import CompositeClient
from jumpstarter_driver_network.adapters import FabricAdapter, NovncAdapter

from jumpstarter.client.decorators import driver_click_group


class QemuClient(CompositeClient):
@property
Expand All @@ -17,6 +20,14 @@ def username(self) -> str:
def password(self) -> str:
return self.call("get_password")

def set_disk_size(self, size: str) -> None:
"""Set the disk size for resizing before boot."""
self.call("set_disk_size", size)

def set_memory_size(self, size: str) -> None:
"""Set the memory size for next boot."""
self.call("set_memory_size", size)

@contextmanager
def novnc(self):
with NovncAdapter(client=self.vnc) as url:
Expand All @@ -30,3 +41,30 @@ def shell(self):
connect_kwargs={"password": self.password},
) as conn:
yield conn

def cli(self):
@driver_click_group(self)
def base():
"""QEMU virtual machine operations"""
pass

@base.group()
def resize():
"""Resize QEMU resources"""
pass

@resize.command(name="disk")
@click.argument("size")
def resize_disk(size):
"""Resize the root disk (e.g., 20G). Run before power on."""
self.set_disk_size(size)
click.echo(f"Disk will be resized to {size} on next power on")

@resize.command(name="memory")
@click.argument("size")
def resize_memory(size):
"""Set memory size (e.g., 2G, 4G). Takes effect on next boot."""
self.set_memory_size(size)
click.echo(f"Memory will be set to {size} on next power on")

return base
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import os
import platform
import shutil
from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
from functools import cached_property
Expand All @@ -20,7 +21,7 @@
from jumpstarter_driver_opendal.driver import FlasherInterface
from jumpstarter_driver_power.driver import PowerInterface, PowerReading
from jumpstarter_driver_pyserial.driver import PySerial
from pydantic import BaseModel, Field, validate_call
from pydantic import BaseModel, ByteSize, Field, TypeAdapter, ValidationError, validate_call
from qemu.qmp import QMPClient
from qemu.qmp.protocol import ConnectError, Runstate

Expand Down Expand Up @@ -169,6 +170,7 @@ async def on(self) -> None: # noqa: C901
proc.check_returncode()
info = json.loads(proc.stdout)
image_format = info.get("format", "raw")
current_virtual_size = info.get("virtual-size") or root.stat().st_size
match image_format:
case "raw" | "qcow2" | "qcow" | "vmdk":
image_driver = image_format
Expand All @@ -177,6 +179,34 @@ async def on(self) -> None: # noqa: C901
except CalledProcessError:
self.logger.warning("unable to detect image format, assuming raw")
image_driver = "raw"
current_virtual_size = root.stat().st_size

# Resize disk if configured
if self.parent.disk_size:
requested = self.parent._parse_size(self.parent.disk_size)

if requested < current_virtual_size:
raise RuntimeError(
f"Shrinking disk is not supported: current {ByteSize(current_virtual_size).human_readable()}, "
f"requested {self.parent.disk_size}"
)

available = shutil.disk_usage(root.parent).free
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this could exhaust all the resources on the host, but i think the issue here goes deeper, since if we consider multi tenancy a single client can overcommit and make the host unusable for others. perhaps it is not for this PR, but maybe we need to set hard limits, perhaps as some percentage of what is available

Copy link
Contributor Author

@evakhoni evakhoni Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah that's pretty easy to implement, we just need a consensus on the maximum percentage, or the host reserved amount, or both. I don't mind including it in this PR as well if we all agree on the amounts.

speaking about potential issues, it goes deeper than that, because iiuc our free space calculation will not take into account the virtual size of other thin-provisioned guests on that host, and assuming they're all stored in the same partition, each of them theoretically allowed to grow not taking into account the growth of the others.. however, in order to address that properly, we'll probably have to either iterate over all other images stored in the host at runtime, or alternatively maintain a data structure storing this, and update it at each resize, both of which seems to me like an overkill for such a niche feature. maybe I'm shooting myself in the foot writing this one 😆 but it had to be said.

lastly, ideally we should not be implementation-specific on the user-facing side, but rather let the user make a lease request with his needs, and figure out on our side what we can do in order to satisfy it, if the resources and user permissions allow it (something similar to https://github.com/jumpstarter-dev/jumpstarter/issues/724)

that being said, it's a slippery slope and a deep rabbit hole, not sure if we have the resources to dive into that one 😉

anyway, let me know what you think about implementing a percentage or something, in this (or a follow up) PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well, some is left for the lab admin to decide the configuration of the host, and how many exporters they set up on each host, and how much each gets, and whether they set a separate partition for jumpstarter storage

another problem however is that it would be easy to violate the quota constraints without trying too hard when the image is a compressed qcow2 with a large virtual size

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well, this one at least have "some" constrains :) we're probably need to allow less than (root.parent).free for a safe margin, but iiuc without this PR just flashing a qemu image have no limits at all, so this one's a step in a good direction in a sense 😉
so what do you suggest?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm not saying it's bad, i'm just thinking out loud :) let's merge for now and circle back
so please squash the commits and i'll merge

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to squash manually via rebase then force-push, or there's any facilities available for it in the repo?
I thought Allow squash merging is enabled in this repo so you can choice it from your side while you merge?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

force push...

yeah we just use merge commits too so i don't want to make it inconsistent

Copy link
Contributor Author

@evakhoni evakhoni Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok done 👌 squashed into 3 separated by functionality. the first adds the resize, second one is the j command and third have the tests. fine like that or have to be one?

if requested > available:
raise RuntimeError(
f"Not enough disk space: need {ByteSize(requested).human_readable()}, "
f"only {ByteSize(available).human_readable()} available"
)

if requested > current_virtual_size:
self.logger.info(f"Resizing disk to {ByteSize(requested).human_readable()}")
proc = await run_process(
["qemu-img", "resize", str(root), str(requested)],
stdout=PIPE,
stderr=PIPE,
)
if proc.returncode != 0:
raise RuntimeError(f"Failed to resize disk: {proc.stderr.decode()}")

cmdline += [
"-blockdev",
Expand Down Expand Up @@ -254,6 +284,7 @@ class Qemu(Driver):

smp: int = 2
mem: str = "512M"
disk_size: str | None = None # e.g., "20G" (resize disk before boot)

hostname: str = "demo"
username: str = "jumpstarter"
Expand Down Expand Up @@ -372,3 +403,24 @@ def get_username(self) -> str:
@validate_call(validate_return=True)
def get_password(self) -> str:
return self.password

def _parse_size(self, size: str) -> int:
"""Parse size string (e.g., '20G') to bytes."""
try:
return int(TypeAdapter(ByteSize).validate_python(size + "iB" if size[-1] in "kmgtKMGT" else size))
except (ValidationError, IndexError):
raise ValueError(f"Invalid size: '{size}'. Use e.g. '20G', '512M', '2T'") from None

@export
@validate_call(validate_return=True)
def set_disk_size(self, size: str) -> None:
"""Set the disk size for resizing before boot."""
self._parse_size(size) # Validate
self.disk_size = size

@export
@validate_call(validate_return=True)
def set_memory_size(self, size: str) -> None:
"""Set the memory size for next boot."""
self._parse_size(size) # Validate
self.mem = size
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import json
import os
import platform
import sys
import tarfile
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import AsyncMock, patch

import pytest
import requests
Expand All @@ -13,6 +16,12 @@
from jumpstarter.common.utils import serve


@pytest.fixture
def anyio_backend():
"""Use only asyncio backend for anyio tests."""
return "asyncio"


@pytest.fixture(scope="session")
def ovmf(tmpdir_factory):
tmp_path = tmpdir_factory.mktemp("ovmf")
Expand Down Expand Up @@ -91,3 +100,104 @@ def test_driver_qemu(tmp_path, ovmf):
assert s.run("uname -r").stdout.strip() == f"6.11.4-301.fc41.{arch}"

qemu.power.off()


@pytest.fixture
def resize_test():
"""Create a Qemu driver with a sparse root disk, cleanup after test."""
driver = None

def _create(disk_size, current_size_gb):
nonlocal driver
driver = Qemu(disk_size=disk_size)
root = Path(driver._tmp_dir.name) / "root"
root.write_bytes(b"")
os.truncate(root, current_size_gb * 1024**3)
return driver, current_size_gb * 1024**3

yield _create

if driver:
driver._tmp_dir.cleanup()


def _mock_qemu_img_info(virtual_size):
"""Return a mock for run_process that simulates qemu-img info."""
async def mock(cmd, **kwargs):
result = AsyncMock()
result.returncode = 0
result.stdout = json.dumps({"format": "raw", "virtual-size": virtual_size}).encode()
result.check_returncode = lambda: None
return result
return mock


@pytest.mark.anyio
async def test_resize_shrink_blocked(resize_test):
"""Shrinking disk should raise RuntimeError."""
driver, current = resize_test("10G", 20) # requested: 10G, current: 20G

with patch("jumpstarter_driver_qemu.driver.run_process", side_effect=_mock_qemu_img_info(current)):
with pytest.raises(RuntimeError, match="Shrinking disk is not supported"):
await driver.children["power"].on()


@pytest.mark.anyio
async def test_resize_insufficient_space_blocked(resize_test):
"""Resize beyond available host space should raise RuntimeError."""
driver, current = resize_test("100G", 10) # requested: 100G, current: 10G

mock_usage = SimpleNamespace(free=5 * 1024**3) # only 5G free

with patch("jumpstarter_driver_qemu.driver.run_process", side_effect=_mock_qemu_img_info(current)):
with patch("jumpstarter_driver_qemu.driver.shutil.disk_usage", return_value=mock_usage):
with pytest.raises(RuntimeError, match="Not enough disk space"):
await driver.children["power"].on()


@pytest.mark.anyio
async def test_resize_succeeds(resize_test):
"""Resize should call qemu-img resize with correct size."""
driver, current = resize_test("20G", 10) # requested: 20G, current: 10G
mock_usage = SimpleNamespace(free=50 * 1024**3)

with patch("jumpstarter_driver_qemu.driver.run_process", side_effect=_mock_qemu_img_info(current)) as mock_run:
with patch("jumpstarter_driver_qemu.driver.shutil.disk_usage", return_value=mock_usage):
# Mock Popen to stop before actually starting QEMU VM
with patch("jumpstarter_driver_qemu.driver.Popen", side_effect=RuntimeError("mock popen")):
with pytest.raises(RuntimeError, match="mock popen"):
await driver.children["power"].on()

# Find the resize call and verify size argument
resize_calls = [c for c in mock_run.call_args_list if "resize" in c.args[0]]
assert resize_calls, "qemu-img resize should be called"
resize_cmd = resize_calls[0].args[0] # ['qemu-img', 'resize', path, size]
assert resize_cmd[-1] == str(20 * 1024**3)


def test_set_disk_size_valid():
"""Valid size strings should be accepted."""
driver = Qemu()
driver.set_disk_size("20G")
assert driver.disk_size == "20G"


def test_set_disk_size_invalid():
"""Invalid size strings should raise ValueError."""
driver = Qemu()
with pytest.raises(ValueError, match="Invalid size"):
driver.set_disk_size("invalid")


def test_set_memory_size_valid():
"""Valid size strings should be accepted."""
driver = Qemu()
driver.set_memory_size("2G")
assert driver.mem == "2G"


def test_set_memory_size_invalid():
"""Invalid size strings should raise ValueError."""
driver = Qemu()
with pytest.raises(ValueError, match="Invalid size"):
driver.set_memory_size("invalid")
Loading