diff --git a/contrib/Dockerfile.test.d/cri-in-userns/docker-entrypoint.sh b/contrib/Dockerfile.test.d/cri-in-userns/docker-entrypoint.sh index d793a4956de6c..cf6b4302b46ae 100755 --- a/contrib/Dockerfile.test.d/cri-in-userns/docker-entrypoint.sh +++ b/contrib/Dockerfile.test.d/cri-in-userns/docker-entrypoint.sh @@ -56,7 +56,7 @@ echo >&2 "Waiting for containerd" until ctr plugins list; do sleep 3; done if [ ! -z "$IS_SYSTEMD_CGROUP" ] && [ "$IS_SYSTEMD_CGROUP" = true ];then - critest "--ginkgo.skip=should prefer new apparmor field|should support apparmor field|should support deprecated apparmor_profile field|should support unsafe sysctls|should support safe sysctls|should allow privilege escalation when false" + critest "--ginkgo.skip=should prefer new apparmor field|should support apparmor field|should support deprecated apparmor_profile field|should support unsafe sysctls|should support safe sysctls|should allow privilege escalation when false|should terminate with exitCode 137 and reason OOMKilled" /bin/bash /critest.sh exit else exec "$@" diff --git a/script/critest.sh b/script/critest.sh index 3d02971ddc9fe..86866c9e1a2b9 100755 --- a/script/critest.sh +++ b/script/critest.sh @@ -74,40 +74,41 @@ GINKGO_SKIP_TEST=() if [ -n "${SKIP_TEST:-}" ]; then GINKGO_SKIP_TEST+=("--ginkgo.skip" "$SKIP_TEST") - # With the systemd cgroup driver, the container runtime uses a scope unit to - # manage the cgroup path. According to the scope unit documentation: - # - # Unlike service units, scope units have no “main” process: all processes in - # the scope are equivalent. The lifecycle of a scope unit is therefore not - # bound to a specific process, but to the existence of at least one process in - # the scope. As a result, individual process exit statuses are not relevant to - # the scope unit’s failure state. - # - # We cannot rely on CollectMode=inactive-or-failed to preserve the cgroup path. - # So there is a race condition between containerd and systemd garbage collection. - # If systemd GC removes the scope unit’s cgroup before containerd reads it, - # containerd loses the opportunity to inspect the cgroup and determine the OOM status. - # - # So we disable the OOMKilled testcase. - # - # FIXME(fuweid): - # - # In theory, this could be mitigated by inspecting the unit logs (e.g. - # `journalctl -u XXX.scope`) and searching for the "OOMKilled" keyword. - # However, this approach depends on journalctl and systemd logging behavior, - # so it should be avoided. - # - # Example journal output: - # - # Dec 22 01:24:58 devbox systemd[1]: Started /usr/bin/bash -c dd if=/dev/zero of=/dev/null bs=20M. - # Dec 22 01:24:58 devbox systemd[1]: XXX.service: A process of this unit has been killed by the OOM killer. - # Dec 22 01:24:58 devbox systemd[1]: XXX.service: Main process exited, code=killed, status=9/KILL - # Dec 22 01:24:58 devbox systemd[1]: XXX.service: Failed with result 'oom-kill'. - # - # Ref: https://www.freedesktop.org/software/systemd/man/latest/systemd.scope.html - if [ ! -z "$CGROUP_DRIVER" ] && [ "$CGROUP_DRIVER" = "systemd" ];then - GINKGO_SKIP_TEST+=("--ginkgo.skip" "should terminate with exitCode 137 and reason OOMKilled") - fi +fi + +# With the systemd cgroup driver, the container runtime uses a scope unit to +# manage the cgroup path. According to the scope unit documentation: +# +# Unlike service units, scope units have no “main” process: all processes in +# the scope are equivalent. The lifecycle of a scope unit is therefore not +# bound to a specific process, but to the existence of at least one process in +# the scope. As a result, individual process exit statuses are not relevant to +# the scope unit’s failure state. +# +# We cannot rely on CollectMode=inactive-or-failed to preserve the cgroup path. +# So there is a race condition between containerd and systemd garbage collection. +# If systemd GC removes the scope unit’s cgroup before containerd reads it, +# containerd loses the opportunity to inspect the cgroup and determine the OOM status. +# +# So we disable the OOMKilled testcase. +# +# FIXME(fuweid): +# +# In theory, this could be mitigated by inspecting the unit logs (e.g. +# `journalctl -u XXX.scope`) and searching for the "OOMKilled" keyword. +# However, this approach depends on journalctl and systemd logging behavior, +# so it should be avoided. +# +# Example journal output: +# +# Dec 22 01:24:58 devbox systemd[1]: Started /usr/bin/bash -c dd if=/dev/zero of=/dev/null bs=20M. +# Dec 22 01:24:58 devbox systemd[1]: XXX.service: A process of this unit has been killed by the OOM killer. +# Dec 22 01:24:58 devbox systemd[1]: XXX.service: Main process exited, code=killed, status=9/KILL +# Dec 22 01:24:58 devbox systemd[1]: XXX.service: Failed with result 'oom-kill'. +# +# Ref: https://www.freedesktop.org/software/systemd/man/latest/systemd.scope.html +if [ ! -z "$CGROUP_DRIVER" ] && [ "$CGROUP_DRIVER" = "systemd" ];then + GINKGO_SKIP_TEST+=("--ginkgo.skip" "should terminate with exitCode 137 and reason OOMKilled") fi GINKGO_FOCUS_TEST=()