Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions integration/sandbox_clean_remove_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"os"
"path/filepath"
"strings"
"sync"
"syscall"
"testing"
"time"

Expand Down Expand Up @@ -123,3 +125,67 @@ func TestSandboxRemoveWithoutIPLeakage(t *testing.T) {
t.Logf("Should not be able to find the pod ip in host-local checkpoint")
assert.False(t, checkIP(ip))
}

// TestSandboxStopWithNilCNIResult verifies that StopPodSandbox succeeds when
// CNIResult is nil (network setup never completed) even if the network teardown
// itself fails. This exercises the condition in sandbox_stop.go where a nil
// CNIResult causes the teardown error to be logged as a warning instead of
// returned as a hard error.
func TestSandboxStopWithNilCNIResult(t *testing.T) {
t.Log("Init PodSandboxConfig with specific label")
sbName := t.Name()
labels := map[string]string{
sbName: "true",
}
sbConfig := PodSandboxConfig(sbName, "failpoint", WithPodLabels(labels))

t.Log("Inject CNI failpoint: delay Add (so CNIResult is never set) and fail Del")
conf := &failpointConf{
// Delay CNI Add for 1 day so network setup never completes and CNIResult stays nil
Add: "1*delay(86400000)",
// Make CNI Del fail so teardownPodNetwork returns an error during stop
Del: "1*error(network-teardown-injected-error)",
}
injectCNIFailpoint(t, sbConfig, conf)

var wg sync.WaitGroup
wg.Add(1)

go func() {
defer wg.Done()
t.Log("Create a sandbox (will hang on CNI Add)")
_, err := runtimeService.RunPodSandbox(sbConfig, failpointRuntimeHandler)
assert.Error(t, err)
}()

t.Log("Wait for CNI Add to start running")
assert.NoError(t, ensureCNIAddRunning(t, sbName), "CNI Add should be running")

// Kill containerd while CNI Add is in progress so CNIResult is never set
// and no deferred cleanup runs.
t.Log("Kill containerd with SIGKILL to leave sandbox with nil CNIResult")
RestartContainerd(t, syscall.SIGKILL)

wg.Wait()

t.Log("ListPodSandbox with the specific label")
l, err := runtimeService.ListPodSandbox(&runtime.PodSandboxFilter{LabelSelector: labels})
require.NoError(t, err)
require.Len(t, l, 1)

sb := l[0]
require.Equal(t, runtime.PodSandboxState_SANDBOX_NOTREADY, sb.State)

t.Log("Get sandbox info and verify CNIResult is nil")
_, info, err := SandboxInfo(sb.Id)
require.NoError(t, err)
require.Nil(t, info.CNIResult, "CNIResult should be nil because CNI setup never completed")

t.Log("StopPodSandbox should succeed even though CNI Del fails, because CNIResult is nil")
err = runtimeService.StopPodSandbox(sb.Id)
assert.NoError(t, err, "StopPodSandbox should not return error when CNIResult is nil")

t.Log("RemovePodSandbox should succeed")
err = runtimeService.RemovePodSandbox(sb.Id)
assert.NoError(t, err)
}
6 changes: 4 additions & 2 deletions internal/cri/server/sandbox_stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,11 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
} else if closed {
sandbox.NetNSPath = ""
}
if sandbox.CNIResult != nil {
if err := c.teardownPodNetwork(ctx, sandbox); err != nil {
if err := c.teardownPodNetwork(ctx, sandbox); err != nil {
if sandbox.CNIResult != nil {
return fmt.Errorf("failed to destroy network for sandbox %q: %w", id, err)
} else {
log.G(ctx).WithError(err).Warnf("failed to destroy network for sandbox %q; and ignoring because the sandbox network setup result is nil indicating the network setup never completed", id)
}
}
if err := sandbox.NetNS.Remove(); err != nil {
Expand Down
Loading