From de609c6d253f73e99d066e790d541f0f18cfb72a Mon Sep 17 00:00:00 2001 From: Predrag Knezevic Date: Mon, 13 Apr 2026 13:04:22 +0200 Subject: [PATCH 1/2] Fix race condition in e2e coverage collection kubectl scale --replicas=0 is non-blocking and returns as soon as the API server accepts the change, not when pods have terminated. The existing wait on the copy pod was a no-op since it was already running. This meant kubectl cp could run before manager pods had terminated and flushed coverage data to the PVC. Wait for each deployment's .status.replicas to reach 0 before copying, ensuring the Go coverage runtime has written its data. Co-Authored-By: Claude Opus 4.6 --- hack/test/e2e-coverage.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hack/test/e2e-coverage.sh b/hack/test/e2e-coverage.sh index 49c8db3d7..c6a759621 100755 --- a/hack/test/e2e-coverage.sh +++ b/hack/test/e2e-coverage.sh @@ -22,8 +22,9 @@ rm -rf ${COVERAGE_DIR} && mkdir -p ${COVERAGE_DIR} kubectl -n "$OPERATOR_CONTROLLER_NAMESPACE" scale deployment/"$OPERATOR_CONTROLLER_MANAGER_DEPLOYMENT_NAME" --replicas=0 kubectl -n "$CATALOGD_NAMESPACE" scale deployment/"$CATALOGD_MANAGER_DEPLOYMENT_NAME" --replicas=0 -# Wait for the copy pod to be ready -kubectl -n "$OPERATOR_CONTROLLER_NAMESPACE" wait --for=condition=ready pod "$COPY_POD_NAME" +# Wait for deployments to scale down so coverage data is flushed to the PVC +kubectl -n "$OPERATOR_CONTROLLER_NAMESPACE" wait --for=jsonpath='{.status.replicas}'=0 deployment/"$OPERATOR_CONTROLLER_MANAGER_DEPLOYMENT_NAME" --timeout=60s +kubectl -n "$CATALOGD_NAMESPACE" wait --for=jsonpath='{.status.replicas}'=0 deployment/"$CATALOGD_MANAGER_DEPLOYMENT_NAME" --timeout=60s # Copy the coverage data from the temporary pod kubectl -n "$OPERATOR_CONTROLLER_NAMESPACE" cp "$COPY_POD_NAME":/e2e-coverage/ "$COVERAGE_DIR" From df9cdbed9d174b6dfd9b3fa119320fcc9a5ab72f Mon Sep 17 00:00:00 2001 From: Predrag Knezevic Date: Mon, 13 Apr 2026 13:29:47 +0200 Subject: [PATCH 2/2] Fix race condition in e2e coverage collection kubectl scale --replicas=0 is non-blocking and returns as soon as the API server accepts the change, not when pods have terminated. The existing wait on the copy pod was a no-op since it was already running. This meant kubectl cp could run before manager pods had terminated and flushed coverage data to the PVC. Wait for manager pods to be deleted before copying, ensuring the Go coverage runtime has written its data on process exit. Co-Authored-By: Claude Opus 4.6 --- hack/test/e2e-coverage.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hack/test/e2e-coverage.sh b/hack/test/e2e-coverage.sh index c6a759621..e4b4610d4 100755 --- a/hack/test/e2e-coverage.sh +++ b/hack/test/e2e-coverage.sh @@ -22,9 +22,9 @@ rm -rf ${COVERAGE_DIR} && mkdir -p ${COVERAGE_DIR} kubectl -n "$OPERATOR_CONTROLLER_NAMESPACE" scale deployment/"$OPERATOR_CONTROLLER_MANAGER_DEPLOYMENT_NAME" --replicas=0 kubectl -n "$CATALOGD_NAMESPACE" scale deployment/"$CATALOGD_MANAGER_DEPLOYMENT_NAME" --replicas=0 -# Wait for deployments to scale down so coverage data is flushed to the PVC -kubectl -n "$OPERATOR_CONTROLLER_NAMESPACE" wait --for=jsonpath='{.status.replicas}'=0 deployment/"$OPERATOR_CONTROLLER_MANAGER_DEPLOYMENT_NAME" --timeout=60s -kubectl -n "$CATALOGD_NAMESPACE" wait --for=jsonpath='{.status.replicas}'=0 deployment/"$CATALOGD_MANAGER_DEPLOYMENT_NAME" --timeout=60s +# Wait for manager pods to terminate so coverage data is flushed to the PVC +kubectl -n "$OPERATOR_CONTROLLER_NAMESPACE" wait --for=delete pods -l control-plane="$OPERATOR_CONTROLLER_MANAGER_DEPLOYMENT_NAME" --timeout=60s +kubectl -n "$CATALOGD_NAMESPACE" wait --for=delete pods -l control-plane="$CATALOGD_MANAGER_DEPLOYMENT_NAME" --timeout=60s # Copy the coverage data from the temporary pod kubectl -n "$OPERATOR_CONTROLLER_NAMESPACE" cp "$COPY_POD_NAME":/e2e-coverage/ "$COVERAGE_DIR"