Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 7 additions & 50 deletions tests/virt/node/descheduler/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
deploy_vms,
vm_nodes,
vms_per_nodes,
wait_vmi_failover,
)
from tests.virt.utils import (
build_node_affinity_dict,
Expand All @@ -29,11 +28,7 @@
)
from utilities.constants import TIMEOUT_5MIN, TIMEOUT_5SEC
from utilities.infra import wait_for_pods_deletion
from utilities.virt import (
node_mgmt_console,
wait_for_migration_finished,
wait_for_node_schedulable_status,
)
from utilities.virt import wait_for_migration_finished

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -123,46 +118,6 @@ def deployed_vms_for_descheduler_test(
)


@pytest.fixture(scope="class")
def vms_orig_nodes_before_node_drain(deployed_vms_for_descheduler_test):
return vm_nodes(vms=deployed_vms_for_descheduler_test)


@pytest.fixture(scope="class")
def vms_boot_time_before_node_drain(
deployed_vms_for_descheduler_test,
):
yield get_boot_time_for_multiple_vms(vm_list=deployed_vms_for_descheduler_test)


@pytest.fixture(scope="class")
def node_to_drain(
schedulable_nodes,
vms_orig_nodes_before_node_drain,
):
vm_per_node_counters = vms_per_nodes(vms=vms_orig_nodes_before_node_drain)
for node in schedulable_nodes:
if vm_per_node_counters[node.name] > 0:
return node

raise ValueError("No suitable node to drain")


@pytest.fixture()
def drain_uncordon_node(
admin_client,
deployed_vms_for_descheduler_test,
vms_orig_nodes_before_node_drain,
node_to_drain,
):
"""Return when node is schedulable again after uncordon"""
with node_mgmt_console(admin_client=admin_client, node=node_to_drain, node_mgmt="drain"):
wait_for_node_schedulable_status(node=node_to_drain, status=False)
for vm in deployed_vms_for_descheduler_test:
if vms_orig_nodes_before_node_drain[vm.name].name == node_to_drain.name:
wait_vmi_failover(vm=vm, orig_node=vms_orig_nodes_before_node_drain[vm.name])


@pytest.fixture()
def all_existing_migrations_completed(admin_client, namespace):
# Descheduler may trigger multiple migrations, need to wait when all succeeded
Expand Down Expand Up @@ -320,10 +275,12 @@ def utilization_imbalance(
@pytest.fixture(scope="class")
def node_to_run_stress(schedulable_nodes, deployed_vms_for_descheduler_test):
vm_per_node_counters = vms_per_nodes(vms=vm_nodes(vms=deployed_vms_for_descheduler_test))
for node in schedulable_nodes:
if vm_per_node_counters[node.name] > 0:
LOGGER.info(f"Node to run stress: {node.name}")
return node
node_with_most_vms = max(schedulable_nodes, key=lambda node: vm_per_node_counters.get(node.name, 0))
if vm_per_node_counters[node_with_most_vms.name] > 0:
LOGGER.info(
f"Node to run stress: {node_with_most_vms.name} with {vm_per_node_counters[node_with_most_vms.name]} VMs"
)
return node_with_most_vms

raise ValueError("No suitable node to run stress")

Expand Down
76 changes: 1 addition & 75 deletions tests/virt/node/descheduler/test_descheduler.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
import logging

import pytest
from ocp_resources.resource import ResourceEditor

from tests.virt.node.descheduler.constants import DESCHEDULER_TEST_LABEL
from tests.virt.node.descheduler.utils import (
assert_vms_consistent_virt_launcher_pods,
assert_vms_distribution_after_failover,
verify_at_least_one_vm_migrated,
)
from tests.virt.utils import verify_guest_boot_time

LOGGER = logging.getLogger(__name__)

pytestmark = [
pytest.mark.tier3,
pytest.mark.descheduler,
Expand All @@ -23,59 +18,6 @@
),
]

NO_MIGRATION_STORM_ASSERT_MESSAGE = "Verify no migration storm after triggered migrations by the descheduler."


@pytest.mark.parametrize(
"calculated_vm_deployment_for_descheduler_test",
[pytest.param(0.50)],
indirect=True,
)
class TestDeschedulerEvictsVMAfterDrainUncordon:
TESTS_CLASS_NAME = "TestDeschedulerEvictsVMAfterDrainUncordon"

@pytest.mark.dependency(name=f"{TESTS_CLASS_NAME}::test_descheduler_evicts_vm_after_drain_uncordon")
@pytest.mark.polarion("CNV-5922")
def test_descheduler_evicts_vm_after_drain_uncordon(
self,
schedulable_nodes,
deployed_vms_for_descheduler_test,
vms_boot_time_before_node_drain,
drain_uncordon_node,
):
assert_vms_distribution_after_failover(
vms=deployed_vms_for_descheduler_test,
nodes=schedulable_nodes,
)

@pytest.mark.dependency(
name=f"{TESTS_CLASS_NAME}::test_no_migrations_storm",
depends=[f"{TESTS_CLASS_NAME}::test_descheduler_evicts_vm_after_drain_uncordon"],
)
@pytest.mark.polarion("CNV-7316")
def test_no_migrations_storm(
self,
deployed_vms_for_descheduler_test,
all_existing_migrations_completed,
admin_client,
):
LOGGER.info(NO_MIGRATION_STORM_ASSERT_MESSAGE)
assert_vms_consistent_virt_launcher_pods(
running_vms=deployed_vms_for_descheduler_test, admin_client=admin_client
)

@pytest.mark.dependency(depends=[f"{TESTS_CLASS_NAME}::test_no_migrations_storm"])
@pytest.mark.polarion("CNV-8288")
def test_boot_time_after_migrations_complete(
self,
deployed_vms_for_descheduler_test,
vms_boot_time_before_node_drain,
):
verify_guest_boot_time(
vm_list=deployed_vms_for_descheduler_test,
initial_boot_time=vms_boot_time_before_node_drain,
)


@pytest.mark.parametrize(
"calculated_vm_deployment_for_node_with_least_available_memory, deployed_vms_for_utilization_imbalance",
Expand Down Expand Up @@ -108,23 +50,7 @@ def test_descheduler_evicts_vm_from_utilization_imbalance(
vms=deployed_vms_for_utilization_imbalance, node_before=node_with_least_available_memory
)

@pytest.mark.dependency(
name=f"{TESTS_CLASS_NAME}::test_no_migrations_storm",
depends=[f"{TESTS_CLASS_NAME}::test_descheduler_evicts_vm_from_utilization_imbalance"],
)
@pytest.mark.polarion("CNV-8918")
def test_no_migrations_storm(
self,
deployed_vms_for_utilization_imbalance,
all_existing_migrations_completed,
admin_client,
):
LOGGER.info(NO_MIGRATION_STORM_ASSERT_MESSAGE)
assert_vms_consistent_virt_launcher_pods(
running_vms=deployed_vms_for_utilization_imbalance, admin_client=admin_client
)

@pytest.mark.dependency(depends=[f"{TESTS_CLASS_NAME}::test_no_migrations_storm"])
@pytest.mark.dependency(depends=[f"{TESTS_CLASS_NAME}::test_descheduler_evicts_vm_from_utilization_imbalance"])
@pytest.mark.polarion("CNV-8919")
def test_boot_time_after_migrations_complete(
self,
Expand Down
47 changes: 0 additions & 47 deletions tests/virt/node/descheduler/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
TIMEOUT_5MIN,
TIMEOUT_5SEC,
TIMEOUT_10MIN,
TIMEOUT_15MIN,
TIMEOUT_20SEC,
NamespacesNames,
)
Expand Down Expand Up @@ -95,52 +94,6 @@ def calculate_vm_deployment(
return vm_deployment


def wait_vmi_failover(vm, orig_node):
samples = TimeoutSampler(wait_timeout=TIMEOUT_15MIN, sleep=TIMEOUT_5SEC, func=lambda: vm.vmi.node.name)
LOGGER.info(f"Waiting for {vm.name} to be moved from node {orig_node.name}")
try:
for sample in samples:
if sample and sample != orig_node.name:
return
except TimeoutExpiredError:
LOGGER.error(f"VM {vm.name} failed to deploy on new node")
raise


def assert_vms_distribution_after_failover(vms, nodes, all_nodes=True):
def _get_vms_per_nodes():
return vms_per_nodes(vms=vm_nodes(vms=vms))

# Allow the descheduler to cycle multiple times before returning.
# The value can be affected by high pod counts or load within
# the cluster which increases the descheduler runtime.
descheduling_failover_timeout = DESCHEDULING_INTERVAL_120SEC * 3

if all_nodes:
LOGGER.info("Verify all nodes have at least one VM running")
else:
LOGGER.info("Verify at least one node has a VM running")

samples = TimeoutSampler(
wait_timeout=descheduling_failover_timeout,
sleep=TIMEOUT_5SEC,
func=_get_vms_per_nodes,
)
vms_per_nodes_dict = None
try:
for vms_per_nodes_dict in samples:
vm_counts = [vm_count for vm_count in vms_per_nodes_dict.values() if vm_count]
if all_nodes and len(vm_counts) == len(nodes):
LOGGER.info(f"Every node has at least one VM running on it: {vms_per_nodes_dict}")
return
elif vm_counts and not all_nodes:
LOGGER.info(f"There is at least one node with a VM running on it: {vms_per_nodes_dict}")
return
except TimeoutExpiredError:
LOGGER.error(f"Running VMs missing from nodes: {vms_per_nodes_dict}")
raise


def vms_per_nodes(vms):
"""
Args:
Expand Down