Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions libs/net/ip.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,24 @@ def have_same_ip_families(
expected_ips: list[ipaddress.IPv4Address | ipaddress.IPv6Address],
) -> bool:
return {ip.version for ip in actual_ips} == {ip.version for ip in expected_ips}


def random_cidr_addresses_by_family(net_seed: int, host_address: int) -> list[str]:
"""Return CIDR-formatted addresses for each IP family supported by the cluster.

IPv4 addresses use a /24 prefix; IPv6 addresses use /64, matching the
subnet definitions in this module. VMs with the same net_seed share the
same subnet, allowing direct L2 communication without routing. Only
families supported by the cluster are included.

Args:
net_seed: Index into the cached pool of random network prefixes.
host_address: Host portion of the address — must be unique per VM in the test.

Returns:
List of CIDR strings (e.g. ["192.168.1.1/24", "fd00::1/64"]).
"""
return [
f"{ip}/64" if ipaddress.ip_address(ip).version == 6 else f"{ip}/24"
for ip in random_ip_addresses_by_family(net_seed=net_seed, host_address=host_address)
]
1 change: 1 addition & 0 deletions libs/vm/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class VMISpec:
volumes: list[Volume] | None = None
terminationGracePeriodSeconds: int | None = None # noqa: N815
affinity: Affinity | None = None
nodeSelector: dict[str, str] | None = None # noqa: N815
Comment thread
azhivovk marked this conversation as resolved.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commit message:

Dual-stream clusters label RHCOS 9 workers with:
  node-role.kubernetes.io/worker-rhcos9

RHCOS 10 workers carry only the generic worker role label:
  node-role.kubernetes.io/worker

Can you please reference some kind of proof from openshift? It's far from intuitive and I need to believe this otherwise I can't trust that you're migrating to/from the right nodes.

In addition: "Co-Authored-By" -> "Assited by"

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shows each worker node alongside its OS image and whether it has the worker-rhcos9 label:

oc get nodes -l node-role.kubernetes.io/worker -o custom-columns='NAME:.metadata.name,OS:.status.nodeInfo.osImage,ROLES:.metadata.labels.node-role\.kubernetes\.io/worker-rhcos9'
NAME                                            OS                                                           ROLES
net-asiazk-ds-422-hrzt5-worker-0-2f2j9          Red Hat Enterprise Linux CoreOS 10.2.20260423-0 (Coughlan)   <none>
net-asiazk-ds-422-hrzt5-worker-0-rhcos9-b6gdp   Red Hat Enterprise Linux CoreOS 9.8.20260504-0 (Plow)        
net-asiazk-ds-422-hrzt5-worker-0-rhcos9-xxbpm   Red Hat Enterprise Linux CoreOS 9.8.20260504-0 (Plow)       

Co-Authored-By is how claude automatically signs off the commit - without we won't see claude icon in the commit and we'll only se my icon

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it in VMI spec and not in VM spec?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nodeSelector applies to the VMI (the running pod), so it belongs in VMISpec



@dataclass
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ markers =
rwx_default_storage: Tests that require RWX storage
descheduler: Tests that require kube-descheduler on nodes
remote_cluster: Tests that require a remote cluster
mixed_os_nodes: Tests that require a dual-stream cluster with both RHCOS 9 and RHCOS 10 worker nodes

## Required operators
mtv: Tests that require the MTV operator to be installed
Expand Down
100 changes: 100 additions & 0 deletions tests/network/l2_bridge/rhel9_rhel10_cluster/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import ipaddress
from collections.abc import Generator
from typing import Final

import pytest
from kubernetes.dynamic import DynamicClient
from ocp_resources.namespace import Namespace

import tests.network.libs.nodenetworkconfigurationpolicy as libnncp
from libs.net.ip import random_cidr_addresses_by_family
from libs.net.netattachdef import CNIPluginBridgeConfig, NetConfig, NetworkAttachmentDefinition
from libs.net.traffic_generator import TcpServer, VMTcpClient, active_tcp_connections
from libs.net.vmspec import wait_for_ifaces_status
from libs.vm.vm import BaseVirtualMachine
from tests.network.l2_bridge.rhel9_rhel10_cluster.lib_helpers import LINUX_BRIDGE_IFACE_NAME, bridge_vm

_SERVER_HOST_ADDRESS: Final[int] = 1
_CLIENT_HOST_ADDRESS: Final[int] = 2


@pytest.fixture(scope="module")
def dual_stream_bridge_nad(
admin_client: DynamicClient,
namespace: Namespace,
bridge_nncp: libnncp.NodeNetworkConfigurationPolicy,
) -> Generator[NetworkAttachmentDefinition]:
Comment thread
azhivovk marked this conversation as resolved.
config = NetConfig(
name="rhel9-rhel10-bridge-nad",
plugins=[CNIPluginBridgeConfig(bridge=bridge_nncp.desired_state_spec.interfaces[0].name)], # type: ignore
)
with NetworkAttachmentDefinition(
name="rhel9-rhel10-bridge-nad",
namespace=namespace.name,
config=config,
client=admin_client,
) as nad:
yield nad


@pytest.fixture(scope="module")
def bridge_server_vm(
unprivileged_client: DynamicClient,
namespace: Namespace,
dual_stream_bridge_nad: NetworkAttachmentDefinition,
) -> Generator[BaseVirtualMachine]:
addresses = random_cidr_addresses_by_family(net_seed=0, host_address=_SERVER_HOST_ADDRESS)
with bridge_vm(
namespace=namespace.name,
name="server-vm",
client=unprivileged_client,
bridge_network_name=dual_stream_bridge_nad.name,
addresses=addresses,
) as vm:
vm.start(wait=True)
vm.wait_for_agent_connected()
wait_for_ifaces_status(
vm=vm,
ip_addresses_by_spec_net_name={
LINUX_BRIDGE_IFACE_NAME: [str(ipaddress.ip_interface(addr).ip) for addr in addresses]
},
)
yield vm


@pytest.fixture(scope="module")
def bridge_client_vm(
unprivileged_client: DynamicClient,
namespace: Namespace,
dual_stream_bridge_nad: NetworkAttachmentDefinition,
) -> Generator[BaseVirtualMachine]:
addresses = random_cidr_addresses_by_family(net_seed=0, host_address=_CLIENT_HOST_ADDRESS)
with bridge_vm(
namespace=namespace.name,
name="client-vm",
client=unprivileged_client,
bridge_network_name=dual_stream_bridge_nad.name,
addresses=addresses,
) as vm:
vm.start(wait=True)
vm.wait_for_agent_connected()
wait_for_ifaces_status(
vm=vm,
ip_addresses_by_spec_net_name={
LINUX_BRIDGE_IFACE_NAME: [str(ipaddress.ip_interface(addr).ip) for addr in addresses]
},
)
yield vm
Comment thread
azhivovk marked this conversation as resolved.


@pytest.fixture(scope="module")
def bridge_active_tcp_connection(
bridge_client_vm: BaseVirtualMachine,
bridge_server_vm: BaseVirtualMachine,
) -> Generator[list[tuple[VMTcpClient, TcpServer]]]:
with active_tcp_connections(
client_vm=bridge_client_vm,
server_vm=bridge_server_vm,
iface_name=LINUX_BRIDGE_IFACE_NAME,
) as connections:
yield connections
62 changes: 62 additions & 0 deletions tests/network/l2_bridge/rhel9_rhel10_cluster/lib_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from typing import Final

from kubernetes.dynamic import DynamicClient

from libs.vm.factory import base_vmspec, fedora_vm
from libs.vm.spec import CloudInitNoCloud, Devices, Interface, Multus, Network
from libs.vm.vm import BaseVirtualMachine, add_volume_disk, cloudinitdisk_storage
from tests.network.libs import cloudinit
from tests.network.libs.cloudinit import EthernetDevice
from tests.network.libs.vm_scheduling import RHCOS9_NODE_SELECTOR

LINUX_BRIDGE_IFACE_NAME: Final[str] = "linux-bridge-1"


def bridge_vm(
namespace: str,
name: str,
client: DynamicClient,
bridge_network_name: str,
addresses: list[str],
) -> BaseVirtualMachine:
"""Create a Fedora VM with a primary masquerade and a secondary Linux bridge interface.

The VM is scheduled on RHCOS 9 worker nodes via the worker-rhcos9 role label.
Pass the same addresses list to wait_for_ifaces_status so that expected IPs
are derived from the same configuration used to create the VM.

Args:
namespace: Namespace in which the VM will be created.
name: Name of the VM.
client: Kubernetes dynamic client.
bridge_network_name: Name of the NetworkAttachmentDefinition for the bridge.
addresses: CIDR addresses for the secondary interface (e.g. ["192.168.1.1/24"]).

Returns:
Configured BaseVirtualMachine object (not yet started).
"""
spec = base_vmspec()
spec.template.spec.domain.devices = Devices(
interfaces=[
Interface(name="default", masquerade={}),
Interface(name=LINUX_BRIDGE_IFACE_NAME, bridge={}),
]
)
spec.template.spec.networks = [
Network(name="default", pod={}),
Network(name=LINUX_BRIDGE_IFACE_NAME, multus=Multus(networkName=bridge_network_name)),
]
spec.template.spec.nodeSelector = RHCOS9_NODE_SELECTOR

userdata = cloudinit.UserData(users=[])
disk, volume = cloudinitdisk_storage(
data=CloudInitNoCloud(
networkData=cloudinit.asyaml(
no_cloud=cloudinit.NetworkData(ethernets={"eth1": EthernetDevice(addresses=addresses)})
),
userData=cloudinit.format_cloud_config(userdata=userdata),
)
)
spec.template.spec = add_volume_disk(vmi_spec=spec.template.spec, volume=volume, disk=disk)

return fedora_vm(namespace=namespace, name=name, client=client, spec=spec)
Comment thread
azhivovk marked this conversation as resolved.
37 changes: 34 additions & 3 deletions tests/network/l2_bridge/rhel9_rhel10_cluster/test_connectivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,16 @@
- mixed_os_nodes
"""

import ipaddress

import pytest

__test__ = False
from libs.net.traffic_generator import is_tcp_connection
from tests.network.libs.vm_scheduling import RHCOS9_WORKER_LABEL, set_vm_node_affinity, set_vm_node_selector
from utilities.virt import migrate_vm_and_verify


@pytest.mark.mixed_os_nodes
@pytest.mark.incremental
class TestConnectivity:
Comment thread
azhivovk marked this conversation as resolved.
"""
Expand All @@ -23,7 +28,12 @@ class TestConnectivity:
"""

@pytest.mark.polarion("CNV-15949")
def test_linux_bridge_connectivity_preserved_during_server_migration_to_rhcos10(self):
def test_linux_bridge_connectivity_preserved_during_server_migration_to_rhcos10(
self,
subtests,
bridge_server_vm,
bridge_active_tcp_connection,
):
Comment thread
azhivovk marked this conversation as resolved.
"""
Test that an active TCP connection over a secondary Linux bridge network
is preserved when the server VM migrates from an RHCOS 9 node to an RHCOS 10 node.
Expand All @@ -39,9 +49,21 @@ def test_linux_bridge_connectivity_preserved_during_server_migration_to_rhcos10(
Expected:
- The active TCP connection from the client VM to the server VM is preserved during the migration
"""
set_vm_node_affinity(vm=bridge_server_vm, excluded_label=RHCOS9_WORKER_LABEL)
migrate_vm_and_verify(vm=bridge_server_vm)
for client, server in bridge_active_tcp_connection:
with subtests.test(msg=f"IPv{ipaddress.ip_address(client.server_ip).version} after migration to RHCOS 10"):
assert is_tcp_connection(server=server, client=client), (
f"TCP connection lost after migrating {bridge_server_vm.name} to RHCOS 10 node"
)

@pytest.mark.polarion("CNV-15964")
def test_linux_bridge_connectivity_preserved_during_server_migration_to_rhcos9(self):
def test_linux_bridge_connectivity_preserved_during_server_migration_to_rhcos9(
self,
subtests,
bridge_server_vm,
bridge_active_tcp_connection,
):
"""
Test that an active TCP connection over a secondary Linux bridge network
is preserved when the server VM migrates from an RHCOS 10 node to an RHCOS 9 node.
Expand All @@ -57,3 +79,12 @@ def test_linux_bridge_connectivity_preserved_during_server_migration_to_rhcos9(s
Expected:
- The active TCP connection from the client VM to the server VM is preserved during the migration
"""
set_vm_node_selector(vm=bridge_server_vm, label=RHCOS9_WORKER_LABEL)
migrate_vm_and_verify(vm=bridge_server_vm)
for client, server in bridge_active_tcp_connection:
with subtests.test(
msg=f"IPv{ipaddress.ip_address(client.server_ip).version} after migration back to RHCOS 9"
):
assert is_tcp_connection(server=server, client=client), (
f"TCP connection lost after migrating {bridge_server_vm.name} back to RHCOS 9 node"
)
62 changes: 62 additions & 0 deletions tests/network/libs/vm_scheduling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from typing import Final

from ocp_resources.resource import ResourceEditor

from libs.vm.vm import BaseVirtualMachine

RHCOS9_WORKER_LABEL: Final[str] = "node-role.kubernetes.io/worker-rhcos9"
WORKER_LABEL: Final[str] = "node-role.kubernetes.io/worker"

RHCOS9_NODE_SELECTOR: Final[dict[str, str]] = {RHCOS9_WORKER_LABEL: ""}


def set_vm_node_selector(vm: BaseVirtualMachine, label: str) -> None:
"""Set a nodeSelector on the VM to schedule it on nodes carrying the given label.

Clears any existing nodeAffinity so only the nodeSelector is active.

Args:
vm: VirtualMachine to update.
label: Node role label key (e.g. "node-role.kubernetes.io/worker-rhcos9").
"""
ResourceEditor(
patches={vm: {"spec": {"template": {"spec": {"nodeSelector": {label: ""}, "affinity": None}}}}}
).update()


def set_vm_node_affinity(vm: BaseVirtualMachine, excluded_label: str) -> None:
"""Set a nodeAffinity on the VM to schedule it on worker nodes NOT carrying the given label.

Clears any existing nodeSelector so only the nodeAffinity is active.

Args:
vm: VirtualMachine to update.
excluded_label: Node role label key that target nodes must NOT have.
"""
ResourceEditor(
patches={
vm: {
"spec": {
"template": {
"spec": {
"nodeSelector": None,
"affinity": {
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{"key": excluded_label, "operator": "DoesNotExist"},
{"key": WORKER_LABEL, "operator": "Exists"},
]
}
]
}
}
},
}
}
}
}
}
).update()