Skip to content
203 changes: 6 additions & 197 deletions pkg/provisioner/templates/containerd.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,187 +212,6 @@ holodeck_mark_installed "$COMPONENT" "$FINAL_VERSION"
holodeck_log "INFO" "$COMPONENT" "Successfully installed containerd ${FINAL_VERSION}"
`

// containerdV2Template is used for containerd 2.x versions
// Based on official containerd installation guide for v2.x
const containerdV2Template = `
COMPONENT="containerd"
SOURCE="package"
DESIRED_VERSION="{{.Version}}"

holodeck_progress "$COMPONENT" 1 6 "Checking existing installation"

# Check if containerd is already installed and functional
if systemctl is-active --quiet containerd 2>/dev/null; then
INSTALLED_VERSION=$(containerd --version 2>/dev/null | awk '{print $3}' || true)
if [[ -n "$INSTALLED_VERSION" ]]; then
if [[ -z "$DESIRED_VERSION" ]] || \
[[ "$INSTALLED_VERSION" == "$DESIRED_VERSION" ]] || \
[[ "$INSTALLED_VERSION" == "$DESIRED_VERSION."* ]]; then
holodeck_log "INFO" "$COMPONENT" "Already installed: ${INSTALLED_VERSION}"

if holodeck_verify_containerd; then
holodeck_log "INFO" "$COMPONENT" "Containerd verified functional"
holodeck_mark_installed "$COMPONENT" "$INSTALLED_VERSION"
exit 0
else
holodeck_log "WARN" "$COMPONENT" \
"Containerd installed but not functional, attempting repair"
fi
else
holodeck_log "INFO" "$COMPONENT" \
"Version mismatch: installed=${INSTALLED_VERSION}, desired=${DESIRED_VERSION}"
fi
fi
fi

holodeck_progress "$COMPONENT" 2 6 "Setting up prerequisites"

# Set up kernel modules (idempotent)
sudo modprobe overlay
sudo modprobe br_netfilter

# Setup required sysctl params (idempotent)
if [[ ! -f /etc/sysctl.d/99-kubernetes-cri.conf ]]; then
cat <<EOF | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF
sudo sysctl --system
else
holodeck_log "INFO" "$COMPONENT" "Sysctl params already configured"
fi

holodeck_progress "$COMPONENT" 3 6 "Installing dependencies"

holodeck_retry 3 "$COMPONENT" sudo apt-get update
holodeck_retry 3 "$COMPONENT" install_packages_with_retry ca-certificates curl

# Detect architecture
ARCH=$(uname -m)
if [[ "$ARCH" == "x86_64" ]]; then
ARCH="amd64"
elif [[ "$ARCH" == "aarch64" ]]; then
ARCH="arm64"
fi

holodeck_progress "$COMPONENT" 4 6 "Installing containerd {{.Version}} from official binaries"

# Download and install containerd (check if already installed)
if [[ ! -f /usr/local/bin/containerd ]] || \
! containerd --version 2>/dev/null | grep -q "{{.Version}}"; then
CONTAINERD_TAR="containerd-{{.Version}}-linux-${ARCH}.tar.gz"
CONTAINERD_URL="https://github.com/containerd/containerd/releases/download/v{{.Version}}/${CONTAINERD_TAR}"

holodeck_log "INFO" "$COMPONENT" "Downloading containerd from $CONTAINERD_URL"
holodeck_retry 3 "$COMPONENT" curl -fsSL -o "${CONTAINERD_TAR}" "${CONTAINERD_URL}"
sudo tar Cxzvf /usr/local "${CONTAINERD_TAR}"
rm -f "${CONTAINERD_TAR}"
else
holodeck_log "INFO" "$COMPONENT" "Containerd binary already at correct version"
fi

# Download and install runc (idempotent)
RUNC_VERSION="1.2.3"
if [[ ! -f /usr/local/sbin/runc ]]; then
holodeck_log "INFO" "$COMPONENT" "Installing runc ${RUNC_VERSION}"
holodeck_retry 3 "$COMPONENT" curl -fsSL -o "runc.${ARCH}" \
"https://github.com/opencontainers/runc/releases/download/v${RUNC_VERSION}/runc.${ARCH}"
sudo install -m 755 "runc.${ARCH}" /usr/local/sbin/runc
rm -f "runc.${ARCH}"
else
holodeck_log "INFO" "$COMPONENT" "runc already installed"
fi

# Install CNI plugins (idempotent)
CNI_VERSION="v1.6.2"
if [[ ! -f /opt/cni/bin/bridge ]]; then
CNI_TAR="cni-plugins-linux-${ARCH}-${CNI_VERSION}.tgz"
holodeck_log "INFO" "$COMPONENT" "Installing CNI plugins ${CNI_VERSION}"
holodeck_retry 3 "$COMPONENT" curl -fsSL -o "${CNI_TAR}" \
"https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/${CNI_TAR}"
sudo mkdir -p /opt/cni/bin
sudo tar Cxzvf /opt/cni/bin "${CNI_TAR}"
rm -f "${CNI_TAR}"
else
holodeck_log "INFO" "$COMPONENT" "CNI plugins already installed"
fi

holodeck_progress "$COMPONENT" 5 6 "Configuring containerd"

# Configure containerd
sudo mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml > /dev/null

# Update config for systemd cgroup
sudo sed -i 's/SystemdCgroup = false/SystemdCgroup = true/g' /etc/containerd/config.toml

# Ensure CNI paths are configured correctly in the CRI plugin only
sudo sed -i '/\[plugins.*cri.*\.cni\]/,/^\[/{s|conf_dir = .*|conf_dir = "/etc/cni/net.d"|; s|bin_dir = .*|bin_dir = "/opt/cni/bin"|}' /etc/containerd/config.toml

# Disable the image-verifier bindir plugin if present (containerd 2.x feature)
if grep -q 'io.containerd.image-verifier.v1.bindir' /etc/containerd/config.toml 2>/dev/null; then
holodeck_log "INFO" "$COMPONENT" "Disabling containerd image-verifier bindir plugin"
sudo sed -i 's/disabled_plugins = \[\]/disabled_plugins = ["io.containerd.image-verifier.v1.bindir"]/' /etc/containerd/config.toml
fi

# Create containerd service (idempotent)
if [[ ! -f /etc/systemd/system/containerd.service ]]; then
cat <<EOF | sudo tee /etc/systemd/system/containerd.service
[Unit]
Description=containerd container runtime
Documentation=https://containerd.io
After=network.target local-fs.target

[Service]
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/containerd
Type=notify
Delegate=yes
KillMode=process
Restart=always
RestartSec=5
LimitNPROC=infinity
LimitCORE=infinity

[Install]
WantedBy=multi-user.target
EOF
fi

# Start containerd
sudo systemctl daemon-reload
sudo systemctl enable --now containerd

holodeck_progress "$COMPONENT" 6 6 "Verifying installation"

# Wait for containerd to be ready with timeout (120s for slow VMs)
timeout=120
while ! sudo ctr version &>/dev/null; do
if [[ $timeout -le 0 ]]; then
holodeck_error 11 "$COMPONENT" \
"Timeout waiting for containerd to become ready" \
"Check 'systemctl status containerd' and 'journalctl -u containerd'"
fi
if (( timeout % 15 == 0 )); then
holodeck_log "INFO" "$COMPONENT" \
"Waiting for containerd to become ready (${timeout}s remaining)"
fi
sleep 1
((timeout--))
done

if ! holodeck_verify_containerd; then
holodeck_error 5 "$COMPONENT" \
"Containerd installation verification failed" \
"Run 'systemctl status containerd' to diagnose"
fi

FINAL_VERSION=$(containerd --version | awk '{print $3}')
holodeck_mark_installed "$COMPONENT" "$FINAL_VERSION"
holodeck_log "INFO" "$COMPONENT" "Successfully installed containerd ${FINAL_VERSION} (v2.x)"
`

// containerdGitTemplate builds and installs containerd from source.
const containerdGitTemplate = `
COMPONENT="containerd"
Expand Down Expand Up @@ -722,7 +541,6 @@ holodeck_log "INFO" "$COMPONENT" "Successfully installed containerd from ${TRACK
// Pre-compiled templates for containerd installation.
var (
containerdV1Tmpl = template.Must(template.New("containerd-v1").Parse(containerdV1Template))
containerdV2Tmpl = template.Must(template.New("containerd-v2").Parse(containerdV2Template))
containerdGitTmpl = template.Must(template.New("containerd-git").Parse(containerdGitTemplate))
containerdLatestTmpl = template.Must(template.New("containerd-latest").Parse(containerdLatestTemplate))
)
Expand All @@ -733,8 +551,7 @@ type Containerd struct {
Source string // "package", "git", "latest"

// Package source fields
Version string
MajorVersion int
Version string

// Git source fields
GitRepo string
Expand Down Expand Up @@ -772,14 +589,10 @@ func NewContainerd(env v1alpha1.Environment) (*Containerd, error) {
}
version = strings.TrimPrefix(version, "v")

// Parse major version
c.MajorVersion = 1
parts := strings.Split(version, ".")
if len(parts) > 0 && parts[0] == "2" {
c.MajorVersion = 2
if version == "2" {
version = "2.0.0"
}
// Coerce bare-major "2" to "2.0.0" so apt/dnf gets a concrete
// version string. Preserved from the prior major-version dispatch.
if version == "2" {
version = "2.0.0"
}
c.Version = version

Expand Down Expand Up @@ -820,11 +633,7 @@ func (t *Containerd) Execute(tpl *bytes.Buffer, env v1alpha1.Environment) error

switch t.Source {
case "package", "":
if t.MajorVersion == 2 {
tmpl = containerdV2Tmpl
} else {
tmpl = containerdV1Tmpl
}
tmpl = containerdV1Tmpl
case "git":
tmpl = containerdGitTmpl
case "latest":
Expand Down
64 changes: 25 additions & 39 deletions pkg/provisioner/templates/containerd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ package templates

import (
"bytes"
"strings"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -32,7 +31,6 @@ func TestNewContainerd_Defaults(t *testing.T) {
require.NoError(t, err)
assert.Equal(t, "package", c.Source)
assert.Equal(t, "1.7.27", c.Version)
assert.Equal(t, 1, c.MajorVersion)
}

func TestNewContainerd_CustomVersion(t *testing.T) {
Expand All @@ -46,7 +44,6 @@ func TestNewContainerd_CustomVersion(t *testing.T) {
c, err := NewContainerd(env)
require.NoError(t, err)
assert.Equal(t, "1.7.0", c.Version)
assert.Equal(t, 1, c.MajorVersion)
}

func TestNewContainerd_EmptyVersion(t *testing.T) {
Expand All @@ -60,7 +57,6 @@ func TestNewContainerd_EmptyVersion(t *testing.T) {
c, err := NewContainerd(env)
require.NoError(t, err)
assert.Equal(t, "1.7.27", c.Version)
assert.Equal(t, 1, c.MajorVersion)
}

func TestNewContainerd_Version2(t *testing.T) {
Expand All @@ -74,7 +70,6 @@ func TestNewContainerd_Version2(t *testing.T) {
c, err := NewContainerd(env)
require.NoError(t, err)
assert.Equal(t, "2.0.0", c.Version)
assert.Equal(t, 2, c.MajorVersion)
}

func TestNewContainerd_PackageSpec(t *testing.T) {
Expand Down Expand Up @@ -186,6 +181,12 @@ func TestContainerd_Execute_Version1(t *testing.T) {
assert.Contains(t, out, "1.7.26")
assert.Contains(t, out, "download.docker.com")
assert.Contains(t, out, "Amazon Linux repository")
assert.Contains(t, out, "/etc/apt/keyrings/docker.gpg",
"debian branch must add the Docker apt keyring")
assert.Contains(t, out, "docker-ce.repo",
"rhel branch must install the Docker dnf repo")
assert.Contains(t, out, "Unsupported OS family",
"unknown OS families must be rejected with a diagnostic")
assert.Contains(t, out, `SystemdCgroup \= true`)
assert.Contains(t, out, "containerd config default")
assert.Contains(t, out, `conf_dir = "/etc/cni/net.d"`)
Expand All @@ -198,11 +199,14 @@ func TestContainerd_Execute_Version1(t *testing.T) {
assert.Contains(t, out, "holodeck_mark_installed")
}

func TestContainerd_Execute_Version2(t *testing.T) {
// TestContainerd_Execute_Version2x asserts that requesting a v2.x version
// renders the unified containerd.io package template (same path as v1.x),
// not the binary-download v2 template.
func TestContainerd_Execute_Version2x(t *testing.T) {
env := v1alpha1.Environment{
Spec: v1alpha1.EnvironmentSpec{
ContainerRuntime: v1alpha1.ContainerRuntime{
Version: "2.0.0",
Version: "2.2.3",
},
},
}
Expand All @@ -213,19 +217,19 @@ func TestContainerd_Execute_Version2(t *testing.T) {
require.NoError(t, err)
out := buf.String()

assert.Contains(t, out, `COMPONENT="containerd"`)
assert.Contains(t, out, "holodeck_progress")
assert.Contains(t, out, "Installing containerd 2.0.0 from official binaries")
assert.Contains(t, out, "containerd-2.0.0-linux-${ARCH}.tar.gz")
assert.Contains(t, out, "https://github.com/containerd/containerd/releases/download/v2.0.0/")
assert.Contains(t, out, "SystemdCgroup = true")
assert.Contains(t, out, "containerd config default")
assert.Contains(t, out, `RUNC_VERSION="1.2.3"`)
assert.Contains(t, out, `CNI_VERSION="v1.6.2"`)
assert.Contains(t, out, `conf_dir = "/etc/cni/net.d"`)
assert.Contains(t, out, `bin_dir = "/opt/cni/bin"`)
assert.Contains(t, out, "holodeck_verify_containerd")
assert.Contains(t, out, "holodeck_mark_installed")
// Discriminators that exist ONLY in the unified package (V1) template:
assert.Contains(t, out, "download.docker.com",
"v2.x must use the Docker apt/dnf repo path")
assert.Contains(t, out, "Installing containerd 2.2.3 using package repository",
"v2.x must take the package-repo install path, not the binary-download path")
assert.Contains(t, out, "HOLODECK_OS_FAMILY",
"v2.x must go through the OS-family switch (debian/amazon/rhel)")

// Discriminators that exist ONLY in the V2 binary template — must be ABSENT:
assert.NotContains(t, out, "github.com/containerd/containerd/releases/download",
"template should not download containerd binaries from GitHub")
assert.NotContains(t, out, "RUNC_VERSION",
"template should not pin runc version (containerd.io bundles it)")
}

func TestContainerd_Execute_GitSource(t *testing.T) {
Expand Down Expand Up @@ -294,26 +298,8 @@ func TestContainerd_Execute_CommonElements(t *testing.T) {
require.NoError(t, err)
out := buf.String()

if tt.version == "2.0.0" {
assert.True(t, strings.Contains(out, "sudo modprobe overlay"))
assert.True(t, strings.Contains(out, "sudo modprobe br_netfilter"))
assert.True(t, strings.Contains(out, "net.bridge.bridge-nf-call-iptables"))
assert.True(t, strings.Contains(out, "net.ipv4.ip_forward"))
assert.True(t, strings.Contains(out, "sudo sysctl --system"))
assert.True(t, strings.Contains(out, `if [[ "$ARCH" == "x86_64" ]]`))
assert.True(t, strings.Contains(out, `ARCH="amd64"`))
assert.True(t, strings.Contains(out, `elif [[ "$ARCH" == "aarch64" ]]`))
assert.True(t, strings.Contains(out, `ARCH="arm64"`))
assert.True(t, strings.Contains(out, `CNI_VERSION="v1.6.2"`))
assert.True(t, strings.Contains(out, "/opt/cni/bin"))
}

assert.Contains(t, out, "sudo mkdir -p /etc/containerd")
if tt.version == "1.7.26" {
assert.Contains(t, out, "sudo systemctl restart containerd")
} else {
assert.Contains(t, out, "sudo systemctl enable --now containerd")
}
assert.Contains(t, out, "sudo systemctl restart containerd")
assert.Contains(t, out, "sudo systemctl enable")
})
}
Expand Down
25 changes: 25 additions & 0 deletions tests/data/test_aws_containerd_v1_al2023.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: holodeck.nvidia.com/v1alpha1
kind: Environment
metadata:
name: containerd-cell-3-amzn-1727
description: "containerd consolidation E2E — Amazon Linux 2023 + 1.7.27"
spec:
provider: aws
auth:
keyName: cnt-ci
privateKey: /Users/eduardoa/.ssh/cnt-ci.pem
username: ec2-user
instance:
type: g4dn.xlarge
region: us-west-1
ingressIpRanges:
- 12.161.61.162/32
image:
architecture: amd64
imageId: ami-0a21b93c10617c1a5
containerRuntime:
install: true
name: containerd
version: 1.7.27
kubernetes:
install: false
Loading