From 2a94db8f083aa76a191ade89cdddba28dc596db3 Mon Sep 17 00:00:00 2001 From: Peter Grzybowski Date: Fri, 29 May 2026 15:43:30 +0200 Subject: [PATCH 1/8] chore: Revert "chore: move test_in_poor to ent only" This reverts commit 99187c9bccc8ebd75279b3041a08ae57ed47a65e. Signed-off-by: Peter Grzybowski --- tests/tests/test_mender_connect.py | 120 ++++++++++++++--------------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/tests/tests/test_mender_connect.py b/tests/tests/test_mender_connect.py index 001eea88d..329afb8dc 100644 --- a/tests/tests/test_mender_connect.py +++ b/tests/tests/test_mender_connect.py @@ -188,6 +188,66 @@ def test_bogus_shell_message(self, docker_env): assert prot.protoType == proto_shell.PROTO_TYPE_SHELL assert prot.typ == "bogusmessage" + def test_in_poor_network_environment(self, docker_env): + self.assert_env(docker_env) + + receive_timeout_s = 16 + + def is_shell_working(shell): + # Test if a simple command works. + shell.sendInput("ls /\n".encode()) + output = shell.recvOutput(receive_timeout_s) + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + output = output.decode() + assert "usr" in output + assert "etc" in output + + def detect_shell_prompt(shell): + # Drain any initial output from the prompt. It should end in either "# " + # (root) or "$ " (user). + output = shell.recvOutput(receive_timeout_s) + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + assert output[-2:].decode() in [ + "# ", + "$ ", + ], "Could not detect shell prompt." + + with docker_env.devconnect.get_websocket() as ws: + shell = proto_shell.ProtoShell(ws) + body = shell.startShell() + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + assert body == proto_shell.MSG_BODY_SHELL_STARTED + + detect_shell_prompt(shell) + is_shell_working(shell) + + docker_env.device.run("apt-get update") + docker_env.device.run("apt-get install -y iptables") + docker_env.device.run( + "iptables -A OUTPUT -j DROP --destination docker.mender.io" + ) + + # Plenty of time for the session to mess up + # see also QA-1591: the DROP will not cause ICMP response so we rely on the + # TCP RTO which means sometimes we need additional time to sleep. + # this was exposed by the move to docker client in those tests, as the + # network stack acts differently + time.sleep(128) + + # Re-enable a good connection + docker_env.device.run("iptables -D OUTPUT 1") + time.sleep(128) + + # mender-connect should have "healed" now and be able to start a new shell + with docker_env.devconnect.get_websocket() as ws: + shell = proto_shell.ProtoShell(ws) + body = shell.startShell() + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + assert body == proto_shell.MSG_BODY_SHELL_STARTED + + detect_shell_prompt(shell) + is_shell_working(shell) + @flaky(max_runs=3) def test_session_recording(self, docker_env): self.assert_env(docker_env) @@ -403,63 +463,3 @@ def docker_env_flaky_test(self, enterprise_one_docker_client_bootstrapped): env.devconnect = devconn yield env - - def test_in_poor_network_environment(self, docker_env): - self.assert_env(docker_env) - - receive_timeout_s = 16 - - def is_shell_working(shell): - # Test if a simple command works. - shell.sendInput("ls /\n".encode()) - output = shell.recvOutput(receive_timeout_s) - assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL - output = output.decode() - assert "usr" in output - assert "etc" in output - - def detect_shell_prompt(shell): - # Drain any initial output from the prompt. It should end in either "# " - # (root) or "$ " (user). - output = shell.recvOutput(receive_timeout_s) - assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL - assert output[-2:].decode() in [ - "# ", - "$ ", - ], "Could not detect shell prompt." - - with docker_env.devconnect.get_websocket() as ws: - shell = proto_shell.ProtoShell(ws) - body = shell.startShell() - assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL - assert body == proto_shell.MSG_BODY_SHELL_STARTED - - detect_shell_prompt(shell) - is_shell_working(shell) - - docker_env.device.run("apt-get update") - docker_env.device.run("apt-get install -y iptables") - docker_env.device.run( - "iptables -A OUTPUT -j DROP --destination docker.mender.io" - ) - - # Plenty of time for the session to mess up - # see also QA-1591: the DROP will not cause ICMP response so we rely on the - # TCP RTO which means sometimes we need additional time to sleep. - # this was exposed by the move to docker client in those tests, as the - # network stack acts differently - time.sleep(128) - - # Re-enable a good connection - docker_env.device.run("iptables -D OUTPUT 1") - time.sleep(128) - - # mender-connect should have "healed" now and be able to start a new shell - with docker_env.devconnect.get_websocket() as ws: - shell = proto_shell.ProtoShell(ws) - body = shell.startShell() - assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL - assert body == proto_shell.MSG_BODY_SHELL_STARTED - - detect_shell_prompt(shell) - is_shell_working(shell) From b71d2e9984223fe56f99f7cd553db433fab43ce6 Mon Sep 17 00:00:00 2001 From: Peter Grzybowski Date: Fri, 29 May 2026 14:58:45 +0200 Subject: [PATCH 2/8] chore: install pytest-order Ticket: QA-1625 Signed-off-by: Peter Grzybowski --- tests/requirements-python/python-requirements.in | 1 + tests/requirements-python/python-requirements.txt | 2 ++ 2 files changed, 3 insertions(+) diff --git a/tests/requirements-python/python-requirements.in b/tests/requirements-python/python-requirements.in index 8885e8c92..994e41110 100644 --- a/tests/requirements-python/python-requirements.in +++ b/tests/requirements-python/python-requirements.in @@ -9,6 +9,7 @@ pytest-html==4.2.0 pytest-metadata==3.1.1 pytest-timeout==2.4.0 pytest-xdist==3.8.0 +pytest-order==1.1.0 redo==3.0.0 requests==2.33.1 urllib3==2.6.3 diff --git a/tests/requirements-python/python-requirements.txt b/tests/requirements-python/python-requirements.txt index 6fb459f9c..2997597db 100644 --- a/tests/requirements-python/python-requirements.txt +++ b/tests/requirements-python/python-requirements.txt @@ -69,6 +69,8 @@ pytest-timeout==2.4.0 # via -r python-requirements.in pytest-xdist==3.8.0 # via -r python-requirements.in +pytest-order==1.1.0 + # via -r python-requirements.in redo==3.0.0 # via -r python-requirements.in requests==2.33.1 From 0d41adcbb350d6d3f8624e1f5cf802e88cdab8eb Mon Sep 17 00:00:00 2001 From: Peter Grzybowski Date: Fri, 29 May 2026 14:59:23 +0200 Subject: [PATCH 3/8] chore: send ent mender_connect tests to other runner via pytest-order Ticket: QA-1625 Signed-off-by: Peter Grzybowski --- ...t.py => test_enterprise_mender_connect.py} | 21 +- tests/tests/test_opensource_mender_connect.py | 433 ++++++++++++++++++ 2 files changed, 434 insertions(+), 20 deletions(-) rename tests/tests/{test_mender_connect.py => test_enterprise_mender_connect.py} (95%) create mode 100644 tests/tests/test_opensource_mender_connect.py diff --git a/tests/tests/test_mender_connect.py b/tests/tests/test_enterprise_mender_connect.py similarity index 95% rename from tests/tests/test_mender_connect.py rename to tests/tests/test_enterprise_mender_connect.py index 329afb8dc..2aa6d2f4c 100644 --- a/tests/tests/test_mender_connect.py +++ b/tests/tests/test_enterprise_mender_connect.py @@ -188,6 +188,7 @@ def test_bogus_shell_message(self, docker_env): assert prot.protoType == proto_shell.PROTO_TYPE_SHELL assert prot.typ == "bogusmessage" + @pytest.mark.order(-1) def test_in_poor_network_environment(self, docker_env): self.assert_env(docker_env) @@ -377,26 +378,6 @@ def test_bogus_proto_message(self, docker_env): assert isinstance(body.get("err"), str) and len(body.get("err")) > 0 -class TestRemoteTerminalOpenSource( - _TestRemoteTerminalBase, _TestRemoteTerminalBaseBogusProtoMessage -): - @pytest.fixture(scope="function") - def docker_env(self, standard_setup_one_docker_client_bootstrapped): - env = standard_setup_one_docker_client_bootstrapped - auth = Authentication() - env.devconnect = DeviceConnect(auth, DeviceAuthV2(auth)) - yield env - - @pytest.fixture(scope="function") - def docker_env_flaky_test( - self, request, standard_setup_one_docker_client_bootstrapped - ): - env = standard_setup_one_docker_client_bootstrapped - auth = Authentication() - env.devconnect = DeviceConnect(auth, DeviceAuthV2(auth)) - yield env - - def connected_device(env): uuidv4 = str(uuid.uuid4()) tname = "test.mender.io-{}".format(uuidv4) diff --git a/tests/tests/test_opensource_mender_connect.py b/tests/tests/test_opensource_mender_connect.py new file mode 100644 index 000000000..f8a8ff4d4 --- /dev/null +++ b/tests/tests/test_opensource_mender_connect.py @@ -0,0 +1,433 @@ +# Copyright 2023 Northern.tech AS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import pytest +import time +import uuid + +from flaky import flaky + +from testutils.api import proto_shell, protomsg +from testutils.infra.cli import CliTenantadm +from testutils.infra.container_manager import factory +from testutils.infra.device import MenderDevice +from ..common_setup import ( + standard_setup_one_docker_client_bootstrapped, + enterprise_one_docker_client_bootstrapped, +) +from ..MenderAPI import ( + DeviceAuthV2, + Authentication, + DeviceConnect, + get_container_manager, + set_container_manager, + logger, +) +from testutils.common import User, update_tenant +from .common_connect import wait_for_connect + +container_factory = factory.get_factory() + + +class _TestRemoteTerminalBase: + @flaky(max_runs=3) + def test_regular_protocol_commands(self, docker_env_flaky_test): + """ + Ticket: QA-504 + Reason: The test fails due to the fact that the websocket connection is broken, + and the mender-connect can't recover from situation when shell could not + be stopped, and the session is left as empty with non-existent process + (see MEN-6137) while many other things timeout. + """ + + self.assert_env(docker_env_flaky_test) + + with docker_env_flaky_test.devconnect.get_websocket() as ws: + # Start shell. + receive_timeout_s = 16 + shell = proto_shell.ProtoShell(ws) + body = shell.startShell() + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + assert body == proto_shell.MSG_BODY_SHELL_STARTED + + # Drain any initial output from the prompt. It should end in either "# " + # (root) or "$ " (user). + output = shell.recvOutput(receive_timeout_s) + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + assert output[-2:].decode() in [ + "# ", + "$ ", + ], "Could not detect shell prompt." + + # Starting the shell again should be a no-op. It should return that + # it is already started, as long as the shell limit is 1. MEN-4240. + body = shell.startShell() + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_ERROR + assert body == b"failed to start shell: shell is already running" + + # Make sure we do not get any new output, it should be the same shell as before. + output = shell.recvOutput(receive_timeout_s) + assert ( + output == b"" + ), "Unexpected output received when relauncing already launched shell." + + # Test if a simple command works. + shell.sendInput("ls /\n".encode()) + output = shell.recvOutput(receive_timeout_s) + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + output = output.decode() + assert "usr" in output + assert "etc" in output + + # Try to stop shell. + body = shell.stopShell() + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + assert body is None + + # Repeat stopping and verify the error + body = shell.stopShell() + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_ERROR + assert b"session not found" in body, body + + # Make sure we can not send anything to the shell. + shell.sendInput("ls /\n".encode()) + output = shell.recvOutput(receive_timeout_s) + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_ERROR + output = output.decode() + assert "usr" not in output + assert "etc" not in output + assert "session not found" in output, output + + # Start it again. + shell.startShell() + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + + # Drain any initial output from the prompt. It should end in either "# " + # (root) or "$ " (user). + output = shell.recvOutput(receive_timeout_s) + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + assert output[-2:].decode() in [ + "# ", + "$ ", + ], "Could not detect shell prompt." + + @pytest.mark.skip( + reason="this test has been broken, and is disabled after the move to mender-docker-client see QA-1563" + ) + def test_dbus_reconnect(self, docker_env): + self.assert_env(docker_env) + + with docker_env.devconnect.get_websocket(): + # Nothing to do, just connecting successfully is enough. + pass + + # Test that mender-connect recovers if it initially has no DBus + # connection. This is important because we don't have DBus activation + # enabled in the systemd service file, so it's a race condition who gets + # to the DBus service first. + docker_env.device.run( + f"systemctl --job-mode=ignore-dependencies stop mender-updated" + ) + docker_env.device.run( + "systemctl --job-mode=ignore-dependencies restart mender-connect" + ) + + time.sleep(10) + + # At this point, mender-connect will already have queried DBus. + docker_env.device.run( + f"systemctl --job-mode=ignore-dependencies start mender-updated" + ) + + with docker_env.devconnect.get_websocket(): + # Nothing to do, just connecting successfully is enough. + pass + + def test_websocket_reconnect(self, docker_env): + self.assert_env(docker_env) + + with docker_env.devconnect.get_websocket(): + # Nothing to do, just connecting successfully is enough. + pass + + # Test that mender-connect recovers if it loses the connection to deviceconnect. + docker_env.restart_service("mender-deviceconnect") + + time.sleep(10) + + with docker_env.devconnect.get_websocket(): + # Nothing to do, just connecting successfully is enough. + pass + + def test_bogus_shell_message(self, docker_env): + self.assert_env(docker_env) + + with docker_env.devconnect.get_websocket() as ws: + prot = protomsg.ProtoMsg(proto_shell.PROTO_TYPE_SHELL) + + prot.clear() + prot.setTyp("bogusmessage") + msg = prot.encode(b"") + ws.send(msg) + + msg = ws.recv() + prot.decode(msg) + assert prot.props["status"] == protomsg.PROP_STATUS_ERROR + assert prot.protoType == proto_shell.PROTO_TYPE_SHELL + assert prot.typ == "bogusmessage" + + @pytest.mark.order(0) + def test_in_poor_network_environment(self, docker_env): + self.assert_env(docker_env) + + receive_timeout_s = 16 + + def is_shell_working(shell): + # Test if a simple command works. + shell.sendInput("ls /\n".encode()) + output = shell.recvOutput(receive_timeout_s) + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + output = output.decode() + assert "usr" in output + assert "etc" in output + + def detect_shell_prompt(shell): + # Drain any initial output from the prompt. It should end in either "# " + # (root) or "$ " (user). + output = shell.recvOutput(receive_timeout_s) + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + assert output[-2:].decode() in [ + "# ", + "$ ", + ], "Could not detect shell prompt." + + with docker_env.devconnect.get_websocket() as ws: + shell = proto_shell.ProtoShell(ws) + body = shell.startShell() + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + assert body == proto_shell.MSG_BODY_SHELL_STARTED + + detect_shell_prompt(shell) + is_shell_working(shell) + + docker_env.device.run("apt-get update") + docker_env.device.run("apt-get install -y iptables") + docker_env.device.run( + "iptables -A OUTPUT -j DROP --destination docker.mender.io" + ) + + # Plenty of time for the session to mess up + # see also QA-1591: the DROP will not cause ICMP response so we rely on the + # TCP RTO which means sometimes we need additional time to sleep. + # this was exposed by the move to docker client in those tests, as the + # network stack acts differently + time.sleep(128) + + # Re-enable a good connection + docker_env.device.run("iptables -D OUTPUT 1") + time.sleep(128) + + # mender-connect should have "healed" now and be able to start a new shell + with docker_env.devconnect.get_websocket() as ws: + shell = proto_shell.ProtoShell(ws) + body = shell.startShell() + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + assert body == proto_shell.MSG_BODY_SHELL_STARTED + + detect_shell_prompt(shell) + is_shell_working(shell) + + @flaky(max_runs=3) + def test_session_recording(self, docker_env): + self.assert_env(docker_env) + + def get_cmd(ws, timeout=1): + pmsg = protomsg.ProtoMsg(proto_shell.PROTO_TYPE_SHELL) + body = b"" + try: + while True: + msg = ws.recv(timeout) + b = pmsg.decode(msg) + if pmsg.typ == proto_shell.MSG_TYPE_SHELL_COMMAND: + body += b + except TimeoutError: + return body + + session_id = "" + session_bytes = b"" + with docker_env.devconnect.get_websocket() as ws: + # Start shell. + shell = proto_shell.ProtoShell(ws) + body = shell.startShell() + assert shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + assert body == proto_shell.MSG_BODY_SHELL_STARTED + + assert shell.sid is not None + session_id = shell.sid + + """ Record a series of commands """ + shell.sendInput("echo 'now you see me'\n".encode()) + session_bytes += get_cmd(ws) + # Disable echo + shell.sendInput("stty -echo\n".encode()) + shell.sendInput('echo "echo disabled $?"\n'.encode()) + for i in range(10): + session_bytes += get_cmd(ws) + if b"echo disabled 0" in session_bytes: + break + time.sleep(1) + shell.sendInput('echo "now you don\'t" > /dev/null\n'.encode()) + session_bytes += get_cmd(ws) + shell.sendInput("# Invisible comment\n".encode()) + session_bytes += get_cmd(ws) + # Turn echo back on + shell.sendInput("stty echo\n".encode()) + shell.sendInput('echo "echo enabled $?"\n'.encode()) + for i in range(10): + session_bytes += get_cmd(ws) + if b"echo enabled 0" in session_bytes: + break + time.sleep(1) + session_bytes += get_cmd(ws) + shell.sendInput("echo 'and now echo is back on'\n".encode()) + session_bytes += get_cmd(ws) + + body = shell.stopShell() + assert ( + shell.protomsg.props["status"] == protomsg.PROP_STATUS_NORMAL + ), f"Body is: {body}" + assert body is None + + # Sleep for a second to make sure the session log propagate to the DB. + time.sleep(1) + + playback_bytes = b"" + with docker_env.devconnect.get_playback_websocket(session_id, sleep_ms=0) as ws: + playback_bytes = get_cmd(ws) + + assert playback_bytes == session_bytes + + assert b"now you see me" in playback_bytes + assert b"echo 'now you see me'" in playback_bytes + + # Check that the commands after echo was disabled is not present in the log + assert b"# Invisible comment" not in playback_bytes + assert b'echo "now you don\'t" > /dev/null' not in playback_bytes + + # ... and after echo is enabled + assert b"echo 'and now echo is back on'" in playback_bytes + + def assert_env(self, docker_env): + """Check extra env vars used by base test funcs - make sure they're set. + Mostly important for custom setups. + """ + assert ( + docker_env.device is not None + ), "docker_env must have a designated 'device'" + assert ( + docker_env.devconnect is not None + ), "docker_env must have a set up 'devconnect' instance" + proxy_connected_timeout_s = 15 * 60 + docker_env.device.run( + """dbus-send --print-reply --system \\ + --dest=io.mender.AuthenticationManager \\ + /io/mender/AuthenticationManager \\ + io.mender.Authentication1.FetchJwtToken""", + wait=proxy_connected_timeout_s, + ) + output = docker_env.device.run( + "dbus-send --system --dest=io.mender.AuthenticationManager --print-reply /io/mender/AuthenticationManager io.mender.Authentication1.GetJwtToken" + ) + logger.info("assert_env: GetJWT: returns: '%s'" % (output)) + + # MenderAPI is a (partially) global object, which does not play well with these tests that + # combine class and function scoped fixtures. Set always the container manager so that each + # test correctly access its own environment from MenderAPI code. + set_container_manager(docker_env) + + +class _TestRemoteTerminalBaseBogusProtoMessage: + def test_bogus_proto_message(self, docker_env): + self.assert_env(docker_env) + + with docker_env.devconnect.get_websocket() as ws: + prot = protomsg.ProtoMsg(12345) + + prot.clear() + prot.setTyp(proto_shell.MSG_TYPE_SPAWN_SHELL) + msg = prot.encode(b"") + ws.send(msg) + + data = ws.recv() + rsp = protomsg.ProtoMsg(0xFFFF) + rsp.decode(data) + assert rsp.typ == "error" + body = rsp.body + assert isinstance(body.get("err"), str) and len(body.get("err")) > 0 + + +class TestRemoteTerminalOpenSource( + _TestRemoteTerminalBase, _TestRemoteTerminalBaseBogusProtoMessage +): + @pytest.fixture(scope="function") + def docker_env(self, standard_setup_one_docker_client_bootstrapped): + env = standard_setup_one_docker_client_bootstrapped + auth = Authentication() + env.devconnect = DeviceConnect(auth, DeviceAuthV2(auth)) + yield env + + @pytest.fixture(scope="function") + def docker_env_flaky_test( + self, request, standard_setup_one_docker_client_bootstrapped + ): + env = standard_setup_one_docker_client_bootstrapped + auth = Authentication() + env.devconnect = DeviceConnect(auth, DeviceAuthV2(auth)) + yield env + + +def connected_device(env): + uuidv4 = str(uuid.uuid4()) + tname = "test.mender.io-{}".format(uuidv4) + email = "some.user+{}@example.com".format(uuidv4) + u = User("", email, "whatsupdoc") + cli = CliTenantadm(containers_namespace=env.name) + tid = cli.create_org(tname, u.name, u.pwd, plan="enterprise") + update_tenant( + tid, + addons=["troubleshoot"], + container_manager=get_container_manager(), + ) + tenant = cli.get_tenant(tid) + tenant = json.loads(tenant) + ttoken = tenant["tenant_token"] + + auth = Authentication(name="enterprise-tenant", username=u.name, password=u.pwd) + auth.create_org = False + auth.reset_auth_token() + devauth = DeviceAuthV2(auth) + + env.new_tenant_docker_client("mender-client", ttoken) + device = MenderDevice(env.get_mender_clients()[0]) + devauth.accept_devices(1) + + devices = devauth.get_devices_status("accepted") + assert 1 == len(devices) + + wait_for_connect(auth, devices[0]["id"]) + + devconn = DeviceConnect(auth, devauth) + + return device, devconn From 123b1e7c25dca8262db30df38aecc830d4a2851e Mon Sep 17 00:00:00 2001 From: Peter Grzybowski Date: Fri, 29 May 2026 17:33:39 +0200 Subject: [PATCH 4/8] chore: xdist run tests in one file on one worker Ticket: QA-1625 Signed-off-by: Peter Grzybowski --- tests/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/run.sh b/tests/run.sh index 1a44f3dc9..a74ec3a78 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -145,6 +145,7 @@ if test ${CI_NODE_TOTAL:-1} -gt 1; then fi python3 -m pytest \ $EXTRA_TEST_ARGS \ + --dist=loadfile \ --verbose \ --junitxml=results.xml \ $HTML_REPORT \ From 4abda557c9aa97015d19cb2ce12a1a9c3a2966f5 Mon Sep 17 00:00:00 2001 From: Peter Grzybowski Date: Fri, 29 May 2026 19:51:22 +0200 Subject: [PATCH 5/8] chore: be sure that connectivity is restored in test_in_poor Ticket: QA-1625 Signed-off-by: Peter Grzybowski --- tests/tests/test_enterprise_mender_connect.py | 26 ++++++++++++++++++- tests/tests/test_opensource_mender_connect.py | 26 ++++++++++++++++++- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/tests/tests/test_enterprise_mender_connect.py b/tests/tests/test_enterprise_mender_connect.py index 2aa6d2f4c..7eb5f275d 100644 --- a/tests/tests/test_enterprise_mender_connect.py +++ b/tests/tests/test_enterprise_mender_connect.py @@ -189,6 +189,7 @@ def test_bogus_shell_message(self, docker_env): assert prot.typ == "bogusmessage" @pytest.mark.order(-1) + @flaky(max_runs=3) def test_in_poor_network_environment(self, docker_env): self.assert_env(docker_env) @@ -223,7 +224,7 @@ def detect_shell_prompt(shell): is_shell_working(shell) docker_env.device.run("apt-get update") - docker_env.device.run("apt-get install -y iptables") + docker_env.device.run("apt-get install -y iptables curl") docker_env.device.run( "iptables -A OUTPUT -j DROP --destination docker.mender.io" ) @@ -237,6 +238,29 @@ def detect_shell_prompt(shell): # Re-enable a good connection docker_env.device.run("iptables -D OUTPUT 1") + docker_env.device.run("iptables -F OUTPUT") + docker_env._docker_compose_cmd("exec mender-client iptables -F OUTPUT") + connectivity_reestablished = False + sleep_time_s = 8 + iteration = 0 + max_iteration = 64 + while not connectivity_reestablished: + rc = docker_env._docker_compose_cmd( + 'exec mender-client curl -s -o /dev/null -w "%{http_code}" -k --connect-timeout 240 --keepalive-time 30 --max-time 300 --retry-delay 8 https://docker.mender.io' + ) + if rc.startswith("20"): + connectivity_reestablished = True + break + if rc.startswith("30"): + connectivity_reestablished = True + break + time.sleep(sleep_time_s) + iteration = iteration + 1 + if iteration > max_iteration: + break + assert ( + connectivity_reestablished + ), "connectivity was not reestablished; this is a test env failure" time.sleep(128) # mender-connect should have "healed" now and be able to start a new shell diff --git a/tests/tests/test_opensource_mender_connect.py b/tests/tests/test_opensource_mender_connect.py index f8a8ff4d4..e9eca4188 100644 --- a/tests/tests/test_opensource_mender_connect.py +++ b/tests/tests/test_opensource_mender_connect.py @@ -189,6 +189,7 @@ def test_bogus_shell_message(self, docker_env): assert prot.typ == "bogusmessage" @pytest.mark.order(0) + @flaky(max_runs=3) def test_in_poor_network_environment(self, docker_env): self.assert_env(docker_env) @@ -223,7 +224,7 @@ def detect_shell_prompt(shell): is_shell_working(shell) docker_env.device.run("apt-get update") - docker_env.device.run("apt-get install -y iptables") + docker_env.device.run("apt-get install -y iptables curl") docker_env.device.run( "iptables -A OUTPUT -j DROP --destination docker.mender.io" ) @@ -237,6 +238,29 @@ def detect_shell_prompt(shell): # Re-enable a good connection docker_env.device.run("iptables -D OUTPUT 1") + docker_env.device.run("iptables -F OUTPUT") + docker_env._docker_compose_cmd("exec mender-client iptables -F OUTPUT") + connectivity_reestablished = False + sleep_time_s = 8 + iteration = 0 + max_iteration = 64 + while not connectivity_reestablished: + rc = docker_env._docker_compose_cmd( + 'exec mender-client curl -s -o /dev/null -w "%{http_code}" -k --connect-timeout 240 --keepalive-time 30 --max-time 300 --retry-delay 8 https://docker.mender.io' + ) + if rc.startswith("20"): + connectivity_reestablished = True + break + if rc.startswith("30"): + connectivity_reestablished = True + break + time.sleep(sleep_time_s) + iteration = iteration + 1 + if iteration > max_iteration: + break + assert ( + connectivity_reestablished + ), "connectivity was not reestablished; this is a test env failure" time.sleep(128) # mender-connect should have "healed" now and be able to start a new shell From cbff029276e4ee9996d1edccfdd590edc1221fcc Mon Sep 17 00:00:00 2001 From: Peter Grzybowski Date: Fri, 29 May 2026 20:11:40 +0200 Subject: [PATCH 6/8] chore: readjust the kernel tcp retries2 Ticket: QA-1625 Signed-off-by: Peter Grzybowski --- .gitlab-ci-full-integration-template.yml | 1 + tests/run.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/.gitlab-ci-full-integration-template.yml b/.gitlab-ci-full-integration-template.yml index 51a1d76d7..ded39a83e 100644 --- a/.gitlab-ci-full-integration-template.yml +++ b/.gitlab-ci-full-integration-template.yml @@ -51,6 +51,7 @@ test:integration:$CI_NODE_INDEX: # running with high parallelism on a single VM - sysctl -w fs.inotify.max_user_instances=1024 - sysctl -w fs.file-max=600000 + - sysctl -w net.ipv4.tcp_retries2=5 - ulimit -n 524288 script: diff --git a/tests/run.sh b/tests/run.sh index a74ec3a78..b5caffb9d 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -143,6 +143,7 @@ if test ${CI_NODE_TOTAL:-1} -gt 1; then fi export PYTEST_ADDOPTS="$PYTEST_ADDOPTS $PYTEST_NODES" fi +sysctl -w net.ipv4.tcp_retries2=5 python3 -m pytest \ $EXTRA_TEST_ARGS \ --dist=loadfile \ From cb88958c67462310021784d1f3ae510eddf695b9 Mon Sep 17 00:00:00 2001 From: Peter Grzybowski Date: Fri, 29 May 2026 21:28:36 +0200 Subject: [PATCH 7/8] chore: tcp retries2/1 Ticket: QA-1625 Signed-off-by: Peter Grzybowski --- .gitlab-ci-full-integration-template.yml | 4 +++- tests/run.sh | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci-full-integration-template.yml b/.gitlab-ci-full-integration-template.yml index ded39a83e..553c57fbe 100644 --- a/.gitlab-ci-full-integration-template.yml +++ b/.gitlab-ci-full-integration-template.yml @@ -49,9 +49,11 @@ test:integration:$CI_NODE_INDEX: fi # Increase system limits to make sure the tests are not limited while # running with high parallelism on a single VM + - sysctl net.ipv4.tcp_retries1 + - sysctl net.ipv4.tcp_retries2 - sysctl -w fs.inotify.max_user_instances=1024 - sysctl -w fs.file-max=600000 - - sysctl -w net.ipv4.tcp_retries2=5 + - sysctl -w net.ipv4.tcp_retries2=8 - ulimit -n 524288 script: diff --git a/tests/run.sh b/tests/run.sh index b5caffb9d..a74ec3a78 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -143,7 +143,6 @@ if test ${CI_NODE_TOTAL:-1} -gt 1; then fi export PYTEST_ADDOPTS="$PYTEST_ADDOPTS $PYTEST_NODES" fi -sysctl -w net.ipv4.tcp_retries2=5 python3 -m pytest \ $EXTRA_TEST_ARGS \ --dist=loadfile \ From 1d3f95e2897cb1d4a9723eb70240913fe204971f Mon Sep 17 00:00:00 2001 From: Peter Grzybowski Date: Fri, 29 May 2026 22:13:18 +0200 Subject: [PATCH 8/8] chore: tcp retries2/1 f Ticket: QA-1625 Signed-off-by: Peter Grzybowski --- .gitlab-ci-full-integration-template.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci-full-integration-template.yml b/.gitlab-ci-full-integration-template.yml index 553c57fbe..303ba91ef 100644 --- a/.gitlab-ci-full-integration-template.yml +++ b/.gitlab-ci-full-integration-template.yml @@ -53,7 +53,7 @@ test:integration:$CI_NODE_INDEX: - sysctl net.ipv4.tcp_retries2 - sysctl -w fs.inotify.max_user_instances=1024 - sysctl -w fs.file-max=600000 - - sysctl -w net.ipv4.tcp_retries2=8 + - sysctl -w net.ipv4.tcp_retries2=3 - ulimit -n 524288 script: