From 13ef6eb27cb9371f602dd66d91441767f04dfd15 Mon Sep 17 00:00:00 2001 From: Asia Khromov Date: Mon, 11 May 2026 11:55:08 +0300 Subject: [PATCH 1/4] net: Add mixed_os_nodes marker to pytest.ini New markers must be registered in pytest.ini to be recognized during collection. Gates test collection to clusters with both RHCOS 9 and RHCOS 10 worker nodes, preventing false failures on homogeneous clusters. Signed-off-by: Asia Khromov Co-Authored-By: Claude Sonnet 4.6 (1M context) --- pytest.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/pytest.ini b/pytest.ini index c19714291c..f991324bad 100644 --- a/pytest.ini +++ b/pytest.ini @@ -72,6 +72,7 @@ markers = rwx_default_storage: Tests that require RWX storage descheduler: Tests that require kube-descheduler on nodes remote_cluster: Tests that require a remote cluster + mixed_os_nodes: Tests that require a dual-stream cluster with both RHCOS 9 and RHCOS 10 worker nodes ## Required operators mtv: Tests that require the MTV operator to be installed From a3268fb3f600fad18d31abe4e7a7f864f23091fd Mon Sep 17 00:00:00 2001 From: Asia Khromov Date: Mon, 11 May 2026 11:55:08 +0300 Subject: [PATCH 2/4] net: Add label-based node scheduling Dual-stream clusters label RHCOS 9 workers with: node-role.kubernetes.io/worker-rhcos9 RHCOS 10 workers carry only the generic worker role label: node-role.kubernetes.io/worker Add update_vm_scheduling_by_node_label helper that targets nodes by label presence/absence, scheduling VMs on a group of nodes matching an OS version rather than a specific host. Signed-off-by: Asia Khromov Co-Authored-By: Claude Sonnet 4.6 (1M context) --- libs/vm/spec.py | 1 + tests/network/libs/vm_scheduling.py | 62 +++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 tests/network/libs/vm_scheduling.py diff --git a/libs/vm/spec.py b/libs/vm/spec.py index 24fd13fdbe..e20b5f9ff4 100644 --- a/libs/vm/spec.py +++ b/libs/vm/spec.py @@ -31,6 +31,7 @@ class VMISpec: volumes: list[Volume] | None = None terminationGracePeriodSeconds: int | None = None # noqa: N815 affinity: Affinity | None = None + nodeSelector: dict[str, str] | None = None # noqa: N815 @dataclass diff --git a/tests/network/libs/vm_scheduling.py b/tests/network/libs/vm_scheduling.py new file mode 100644 index 0000000000..905f3e8439 --- /dev/null +++ b/tests/network/libs/vm_scheduling.py @@ -0,0 +1,62 @@ +from typing import Final + +from ocp_resources.resource import ResourceEditor + +from libs.vm.vm import BaseVirtualMachine + +RHCOS9_WORKER_LABEL: Final[str] = "node-role.kubernetes.io/worker-rhcos9" +WORKER_LABEL: Final[str] = "node-role.kubernetes.io/worker" + +RHCOS9_NODE_SELECTOR: Final[dict[str, str]] = {RHCOS9_WORKER_LABEL: ""} + + +def set_vm_node_selector(vm: BaseVirtualMachine, label: str) -> None: + """Set a nodeSelector on the VM to schedule it on nodes carrying the given label. + + Clears any existing nodeAffinity so only the nodeSelector is active. + + Args: + vm: VirtualMachine to update. + label: Node role label key (e.g. "node-role.kubernetes.io/worker-rhcos9"). + """ + ResourceEditor( + patches={vm: {"spec": {"template": {"spec": {"nodeSelector": {label: ""}, "affinity": None}}}}} + ).update() + + +def set_vm_node_affinity(vm: BaseVirtualMachine, excluded_label: str) -> None: + """Set a nodeAffinity on the VM to schedule it on worker nodes NOT carrying the given label. + + Clears any existing nodeSelector so only the nodeAffinity is active. + + Args: + vm: VirtualMachine to update. + excluded_label: Node role label key that target nodes must NOT have. + """ + ResourceEditor( + patches={ + vm: { + "spec": { + "template": { + "spec": { + "nodeSelector": None, + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + {"key": excluded_label, "operator": "DoesNotExist"}, + {"key": WORKER_LABEL, "operator": "Exists"}, + ] + } + ] + } + } + }, + } + } + } + } + } + ).update() From ae7040ff2904124e110db9522907a167e661c0b2 Mon Sep 17 00:00:00 2001 From: Asia Khromov Date: Mon, 11 May 2026 11:55:08 +0300 Subject: [PATCH 3/4] net, ip: Add cidr_addresses_by_family helper Returns CIDR-formatted addresses for each IP family supported by the cluster, suitable for cloud-init configuration of any VM interface. Signed-off-by: Asia Khromov Co-Authored-By: Claude Sonnet 4.6 (1M context) --- libs/net/ip.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/libs/net/ip.py b/libs/net/ip.py index 6daac2d0d6..ec3742bdeb 100644 --- a/libs/net/ip.py +++ b/libs/net/ip.py @@ -128,3 +128,24 @@ def have_same_ip_families( expected_ips: list[ipaddress.IPv4Address | ipaddress.IPv6Address], ) -> bool: return {ip.version for ip in actual_ips} == {ip.version for ip in expected_ips} + + +def random_cidr_addresses_by_family(net_seed: int, host_address: int) -> list[str]: + """Return CIDR-formatted addresses for each IP family supported by the cluster. + + IPv4 addresses use a /24 prefix; IPv6 addresses use /64, matching the + subnet definitions in this module. VMs with the same net_seed share the + same subnet, allowing direct L2 communication without routing. Only + families supported by the cluster are included. + + Args: + net_seed: Index into the cached pool of random network prefixes. + host_address: Host portion of the address — must be unique per VM in the test. + + Returns: + List of CIDR strings (e.g. ["192.168.1.1/24", "fd00::1/64"]). + """ + return [ + f"{ip}/64" if ipaddress.ip_address(ip).version == 6 else f"{ip}/24" + for ip in random_ip_addresses_by_family(net_seed=net_seed, host_address=host_address) + ] From 1c20dce7e85eefdcd87cf56c818329a4401e06cd Mon Sep 17 00:00:00 2001 From: Asia Khromov Date: Mon, 11 May 2026 11:55:08 +0300 Subject: [PATCH 4/4] net: Add Linux bridge connectivity tests Live migration between RHCOS 9 and RHCOS 10 nodes can expose kernel bridge driver incompatibilities that break active TCP connections. These tests verify that secondary Linux bridge network connectivity is preserved during cross-OS migration in both directions. Signed-off-by: Asia Khromov Co-Authored-By: Claude Sonnet 4.6 (1M context) --- .../rhel9_rhel10_cluster/conftest.py | 100 ++++++++++++++++++ .../rhel9_rhel10_cluster/lib_helpers.py | 62 +++++++++++ .../rhel9_rhel10_cluster/test_connectivity.py | 37 ++++++- 3 files changed, 196 insertions(+), 3 deletions(-) create mode 100644 tests/network/l2_bridge/rhel9_rhel10_cluster/conftest.py create mode 100644 tests/network/l2_bridge/rhel9_rhel10_cluster/lib_helpers.py diff --git a/tests/network/l2_bridge/rhel9_rhel10_cluster/conftest.py b/tests/network/l2_bridge/rhel9_rhel10_cluster/conftest.py new file mode 100644 index 0000000000..12eaaec228 --- /dev/null +++ b/tests/network/l2_bridge/rhel9_rhel10_cluster/conftest.py @@ -0,0 +1,100 @@ +import ipaddress +from collections.abc import Generator +from typing import Final + +import pytest +from kubernetes.dynamic import DynamicClient +from ocp_resources.namespace import Namespace + +import tests.network.libs.nodenetworkconfigurationpolicy as libnncp +from libs.net.ip import random_cidr_addresses_by_family +from libs.net.netattachdef import CNIPluginBridgeConfig, NetConfig, NetworkAttachmentDefinition +from libs.net.traffic_generator import TcpServer, VMTcpClient, active_tcp_connections +from libs.net.vmspec import wait_for_ifaces_status +from libs.vm.vm import BaseVirtualMachine +from tests.network.l2_bridge.rhel9_rhel10_cluster.lib_helpers import LINUX_BRIDGE_IFACE_NAME, bridge_vm + +_SERVER_HOST_ADDRESS: Final[int] = 1 +_CLIENT_HOST_ADDRESS: Final[int] = 2 + + +@pytest.fixture(scope="module") +def dual_stream_bridge_nad( + admin_client: DynamicClient, + namespace: Namespace, + bridge_nncp: libnncp.NodeNetworkConfigurationPolicy, +) -> Generator[NetworkAttachmentDefinition]: + config = NetConfig( + name="rhel9-rhel10-bridge-nad", + plugins=[CNIPluginBridgeConfig(bridge=bridge_nncp.desired_state_spec.interfaces[0].name)], # type: ignore + ) + with NetworkAttachmentDefinition( + name="rhel9-rhel10-bridge-nad", + namespace=namespace.name, + config=config, + client=admin_client, + ) as nad: + yield nad + + +@pytest.fixture(scope="module") +def bridge_server_vm( + unprivileged_client: DynamicClient, + namespace: Namespace, + dual_stream_bridge_nad: NetworkAttachmentDefinition, +) -> Generator[BaseVirtualMachine]: + addresses = random_cidr_addresses_by_family(net_seed=0, host_address=_SERVER_HOST_ADDRESS) + with bridge_vm( + namespace=namespace.name, + name="server-vm", + client=unprivileged_client, + bridge_network_name=dual_stream_bridge_nad.name, + addresses=addresses, + ) as vm: + vm.start(wait=True) + vm.wait_for_agent_connected() + wait_for_ifaces_status( + vm=vm, + ip_addresses_by_spec_net_name={ + LINUX_BRIDGE_IFACE_NAME: [str(ipaddress.ip_interface(addr).ip) for addr in addresses] + }, + ) + yield vm + + +@pytest.fixture(scope="module") +def bridge_client_vm( + unprivileged_client: DynamicClient, + namespace: Namespace, + dual_stream_bridge_nad: NetworkAttachmentDefinition, +) -> Generator[BaseVirtualMachine]: + addresses = random_cidr_addresses_by_family(net_seed=0, host_address=_CLIENT_HOST_ADDRESS) + with bridge_vm( + namespace=namespace.name, + name="client-vm", + client=unprivileged_client, + bridge_network_name=dual_stream_bridge_nad.name, + addresses=addresses, + ) as vm: + vm.start(wait=True) + vm.wait_for_agent_connected() + wait_for_ifaces_status( + vm=vm, + ip_addresses_by_spec_net_name={ + LINUX_BRIDGE_IFACE_NAME: [str(ipaddress.ip_interface(addr).ip) for addr in addresses] + }, + ) + yield vm + + +@pytest.fixture(scope="module") +def bridge_active_tcp_connection( + bridge_client_vm: BaseVirtualMachine, + bridge_server_vm: BaseVirtualMachine, +) -> Generator[list[tuple[VMTcpClient, TcpServer]]]: + with active_tcp_connections( + client_vm=bridge_client_vm, + server_vm=bridge_server_vm, + iface_name=LINUX_BRIDGE_IFACE_NAME, + ) as connections: + yield connections diff --git a/tests/network/l2_bridge/rhel9_rhel10_cluster/lib_helpers.py b/tests/network/l2_bridge/rhel9_rhel10_cluster/lib_helpers.py new file mode 100644 index 0000000000..432f99f5d5 --- /dev/null +++ b/tests/network/l2_bridge/rhel9_rhel10_cluster/lib_helpers.py @@ -0,0 +1,62 @@ +from typing import Final + +from kubernetes.dynamic import DynamicClient + +from libs.vm.factory import base_vmspec, fedora_vm +from libs.vm.spec import CloudInitNoCloud, Devices, Interface, Multus, Network +from libs.vm.vm import BaseVirtualMachine, add_volume_disk, cloudinitdisk_storage +from tests.network.libs import cloudinit +from tests.network.libs.cloudinit import EthernetDevice +from tests.network.libs.vm_scheduling import RHCOS9_NODE_SELECTOR + +LINUX_BRIDGE_IFACE_NAME: Final[str] = "linux-bridge-1" + + +def bridge_vm( + namespace: str, + name: str, + client: DynamicClient, + bridge_network_name: str, + addresses: list[str], +) -> BaseVirtualMachine: + """Create a Fedora VM with a primary masquerade and a secondary Linux bridge interface. + + The VM is scheduled on RHCOS 9 worker nodes via the worker-rhcos9 role label. + Pass the same addresses list to wait_for_ifaces_status so that expected IPs + are derived from the same configuration used to create the VM. + + Args: + namespace: Namespace in which the VM will be created. + name: Name of the VM. + client: Kubernetes dynamic client. + bridge_network_name: Name of the NetworkAttachmentDefinition for the bridge. + addresses: CIDR addresses for the secondary interface (e.g. ["192.168.1.1/24"]). + + Returns: + Configured BaseVirtualMachine object (not yet started). + """ + spec = base_vmspec() + spec.template.spec.domain.devices = Devices( + interfaces=[ + Interface(name="default", masquerade={}), + Interface(name=LINUX_BRIDGE_IFACE_NAME, bridge={}), + ] + ) + spec.template.spec.networks = [ + Network(name="default", pod={}), + Network(name=LINUX_BRIDGE_IFACE_NAME, multus=Multus(networkName=bridge_network_name)), + ] + spec.template.spec.nodeSelector = RHCOS9_NODE_SELECTOR + + userdata = cloudinit.UserData(users=[]) + disk, volume = cloudinitdisk_storage( + data=CloudInitNoCloud( + networkData=cloudinit.asyaml( + no_cloud=cloudinit.NetworkData(ethernets={"eth1": EthernetDevice(addresses=addresses)}) + ), + userData=cloudinit.format_cloud_config(userdata=userdata), + ) + ) + spec.template.spec = add_volume_disk(vmi_spec=spec.template.spec, volume=volume, disk=disk) + + return fedora_vm(namespace=namespace, name=name, client=client, spec=spec) diff --git a/tests/network/l2_bridge/rhel9_rhel10_cluster/test_connectivity.py b/tests/network/l2_bridge/rhel9_rhel10_cluster/test_connectivity.py index 7e40dd5845..1796c988f3 100644 --- a/tests/network/l2_bridge/rhel9_rhel10_cluster/test_connectivity.py +++ b/tests/network/l2_bridge/rhel9_rhel10_cluster/test_connectivity.py @@ -8,11 +8,16 @@ - mixed_os_nodes """ +import ipaddress + import pytest -__test__ = False +from libs.net.traffic_generator import is_tcp_connection +from tests.network.libs.vm_scheduling import RHCOS9_WORKER_LABEL, set_vm_node_affinity, set_vm_node_selector +from utilities.virt import migrate_vm_and_verify +@pytest.mark.mixed_os_nodes @pytest.mark.incremental class TestConnectivity: """ @@ -23,7 +28,12 @@ class TestConnectivity: """ @pytest.mark.polarion("CNV-15949") - def test_linux_bridge_connectivity_preserved_during_server_migration_to_rhcos10(self): + def test_linux_bridge_connectivity_preserved_during_server_migration_to_rhcos10( + self, + subtests, + bridge_server_vm, + bridge_active_tcp_connection, + ): """ Test that an active TCP connection over a secondary Linux bridge network is preserved when the server VM migrates from an RHCOS 9 node to an RHCOS 10 node. @@ -39,9 +49,21 @@ def test_linux_bridge_connectivity_preserved_during_server_migration_to_rhcos10( Expected: - The active TCP connection from the client VM to the server VM is preserved during the migration """ + set_vm_node_affinity(vm=bridge_server_vm, excluded_label=RHCOS9_WORKER_LABEL) + migrate_vm_and_verify(vm=bridge_server_vm) + for client, server in bridge_active_tcp_connection: + with subtests.test(msg=f"IPv{ipaddress.ip_address(client.server_ip).version} after migration to RHCOS 10"): + assert is_tcp_connection(server=server, client=client), ( + f"TCP connection lost after migrating {bridge_server_vm.name} to RHCOS 10 node" + ) @pytest.mark.polarion("CNV-15964") - def test_linux_bridge_connectivity_preserved_during_server_migration_to_rhcos9(self): + def test_linux_bridge_connectivity_preserved_during_server_migration_to_rhcos9( + self, + subtests, + bridge_server_vm, + bridge_active_tcp_connection, + ): """ Test that an active TCP connection over a secondary Linux bridge network is preserved when the server VM migrates from an RHCOS 10 node to an RHCOS 9 node. @@ -57,3 +79,12 @@ def test_linux_bridge_connectivity_preserved_during_server_migration_to_rhcos9(s Expected: - The active TCP connection from the client VM to the server VM is preserved during the migration """ + set_vm_node_selector(vm=bridge_server_vm, label=RHCOS9_WORKER_LABEL) + migrate_vm_and_verify(vm=bridge_server_vm) + for client, server in bridge_active_tcp_connection: + with subtests.test( + msg=f"IPv{ipaddress.ip_address(client.server_ip).version} after migration back to RHCOS 9" + ): + assert is_tcp_connection(server=server, client=client), ( + f"TCP connection lost after migrating {bridge_server_vm.name} back to RHCOS 9 node" + )