Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions demo/clusters/kind/create-cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env bash

# Copyright 2023 The Kubernetes Authors.
# Copyright 2023 NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# A reference to the current directory where this script is located
CURRENT_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)"

set -ex
set -o pipefail

source "${CURRENT_DIR}/scripts/common.sh"

# Build the kind image and create a test cluster
${SCRIPTS_DIR}/build-kind-image.sh
${SCRIPTS_DIR}/create-kind-cluster.sh

set +x
printf '\033[0;32m'
echo "Cluster creation complete: ${KIND_CLUSTER_NAME}"
printf '\033[0m'
32 changes: 32 additions & 0 deletions demo/clusters/kind/delete-cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env bash

# Copyright 2023 The Kubernetes Authors.
# Copyright 2023 NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# A reference to the current directory where this script is located
CURRENT_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)"

set -ex
set -o pipefail

source "${CURRENT_DIR}/scripts/common.sh"

# Delete the test cluster
${SCRIPTS_DIR}/delete-kind-cluster.sh

set +x
printf '\033[0;32m'
echo "Cluster deletion complete: ${KIND_CLUSTER_NAME}"
printf '\033[0m'
108 changes: 108 additions & 0 deletions demo/clusters/kind/demo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#!/usr/bin/env bash

# Copyright 2023 The Kubernetes Authors.
# Copyright 2023 NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# A reference to the current directory where this script is located
CURRENT_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)"

set -e
set -o pipefail

source "${CURRENT_DIR}/scripts/common.sh"

add_repo () {
REPO_COUNT=$(helm repo list | awk 'NR > 1 && $1 == "nvidia" {count++} END {print count+0}')
if [[ ${REPO_COUNT} < 1 ]]; then
helm repo add nvidia https://helm.ngc.nvidia.com/nvidia
helm repo update
fi
}

clear_old_cluster () {
NUM=$(kind get clusters | grep -Fxc "${KIND_CLUSTER_NAME}" || true)
if [[ ${NUM} == 1 ]]; then
./delete-cluster.sh
elif [[ ${NUM} -gt 1 ]]; then
echo 'too many clusters debug'
kind get clusters
exit 1
else
echo 'no clusters to clear'
fi
}

create_cluster () {
clear_old_cluster
add_repo
./create-cluster.sh
}

exec_local () {
create_cluster
./install-operator.sh local
}

exec_gdrcopy () {
create_cluster
./install-operator.sh gdrcopy
}

exec_release () {
create_cluster
./install-operator.sh release
}

exec_bare () {
create_cluster
echo 'As this is a bare-cluster we will end here instead of installing the operator and the gpu-pod'
exit 0
}

wait_for_daemonset () {
TARGET_NAMESPACE=$1
TARGET_DAEMONSET=$2
kubectl rollout status --timeout=180s -n "${TARGET_NAMESPACE}" "daemonset/${TARGET_DAEMONSET}"
}

usage () {
echo './demo.sh [CHOICE]'
echo 'where [CHOICE] is one of "bare", "release", "local", or "gdrcopy"'
exit 1
}

demo () {
if [[ -z $1 ]]; then
usage
elif [[ $1 == 'release' ]]; then
exec_release
elif [[ $1 == 'local' ]]; then
exec_local
elif [[ $1 == 'gdrcopy' ]]; then
exec_gdrcopy
elif [[ $1 == 'bare' ]]; then
exec_bare
else
echo 'unrecognized option'
usage
fi
wait_for_daemonset gpu-operator nvidia-container-toolkit-daemonset
wait_for_daemonset gpu-operator nvidia-device-plugin-daemonset
kubectl apply -f gpu-pod.yml
sleep 3
kubectl get pod gpu-pod
}

time demo "$@"
16 changes: 16 additions & 0 deletions demo/clusters/kind/gpu-pod.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: v1
kind: Pod
metadata:
name: gpu-pod
spec:
restartPolicy: Never
containers:
- name: cuda-container
image: nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda10.2
resources:
limits:
nvidia.com/gpu: 1 # requesting 1 GPU
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
80 changes: 80 additions & 0 deletions demo/clusters/kind/install-operator.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env bash

# Copyright 2023 The Kubernetes Authors.
# Copyright 2023 NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

usage () {
echo 'USAGE:'
echo './install-operator.sh [option]'
echo 'where [option] is one of local, gdrcopy, release, template, template-release'
exit 1
}

CURRENT_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)"
source "${CURRENT_DIR}/scripts/common.sh"

# setting default
# but these can be overridden by environment variables
if [[ -z $1 ]]; then
usage
elif [[ $1 == 'local' ]]; then
: ${TARGET_CHART:="${PROJECT_DIR}/deployments/gpu-operator"}
: ${TARGET_ACTION:="upgrade -i"}
: ${XTRA_OPTS:="--wait"}
elif [[ $1 == 'gdrcopy' ]]; then
: ${TARGET_CHART:="${PROJECT_DIR}/deployments/gpu-operator"}
: ${TARGET_ACTION:="upgrade -i"}
: ${XTRA_OPTS:="--wait --set gdrcopy.enabled=true"}
elif [[ $1 == 'release' ]]; then
: ${TARGET_CHART:="nvidia/gpu-operator"}
: ${TARGET_ACTION:="upgrade -i"}
: ${XTRA_OPTS:="--wait"}
elif [[ $1 == 'template' ]]; then
: ${TARGET_CHART:="${PROJECT_DIR}/deployments/gpu-operator"}
: ${TARGET_ACTION:="template"}
: ${XTRA_OPTS:="--output-dir /tmp/gpu-operator"}
elif [[ $1 == 'template-release' ]]; then
: ${TARGET_CHART:="nvidia/gpu-operator"}
: ${TARGET_ACTION:="template"}
: ${XTRA_OPTS:="--output-dir /tmp/gpu-operator-release"}
else
echo unknown usage "$0 $@"
usage
fi

set -ex
set -o pipefail

#kubectl label node "${KIND_CLUSTER_NAME}-worker" --overwrite nvidia.com/gpu.present=true

helm ${TARGET_ACTION} \
--set cdi.enabled=true \
--set driver.enabled=false \
--set operator.runtimeClass=nvidia \
--set toolkit.enabled=true \
--set validator.driver.env[0].name="DISABLE_DEV_CHAR_SYMLINK_CREATION" \
--set-string validator.driver.env[0].value="true" \
--namespace gpu-operator --create-namespace \
${XTRA_OPTS} \
nvidia-gpu-operator \
${TARGET_CHART}

#--set runtimeClassName=nvidia \

set +x
printf '\033[0;32m'
echo "$TARGET_ACTION complete:"
kubectl get pod -n gpu-operator
printf '\033[0m'
47 changes: 47 additions & 0 deletions demo/clusters/kind/scripts/build-kind-image.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env bash

# Copyright 2023 The Kubernetes Authors.
# Copyright 2023 NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# A reference to the current directory where this script is located
CURRENT_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)"

set -ex
set -o pipefail

source "${CURRENT_DIR}/common.sh"

# If an image ID already exists for the image we plan to build, we are done.
EXISTING_IMAGE_ID="$(docker images --filter "reference=${KIND_IMAGE}" -q)"
if [ "${EXISTING_IMAGE_ID}" != "" ]; then
exit 0
fi

# Create a temporary directory to hold all the artifacts we need for building the image
TMP_DIR="$(mktemp -d)"
cleanup() {
rm -rf "${TMP_DIR}"
}
trap cleanup EXIT

# Set some build variables
KIND_K8S_REPO="https://github.com/kubernetes/kubernetes.git"
KIND_K8S_DIR="${TMP_DIR}/kubernetes-${KIND_K8S_TAG}"

# Checkout the version of kubernetes we want to build our kind image from
git clone --depth 1 --branch "${KIND_K8S_TAG}" "${KIND_K8S_REPO}" "${KIND_K8S_DIR}"

# Build the kind base image
kind build node-image --base-image "${KIND_IMAGE_BASE}" --image "${KIND_IMAGE}" "${KIND_K8S_DIR}"
40 changes: 40 additions & 0 deletions demo/clusters/kind/scripts/common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env bash

# Copyright 2023 The Kubernetes Authors.
# Copyright 2023 NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# A reference to the current directory where this script is located
SCRIPTS_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)"
PROJECT_DIR="$(cd -- "$( dirname -- "${SCRIPTS_DIR}/../../../../.." )" &> /dev/null && pwd)"

CLUSTER_NAME=gpu-operator-demo

# The kubernetes tag to build the kind cluster from
# From https://github.com/kubernetes/kubernetes/tags
: ${KIND_K8S_TAG:="v1.27.1"}

# The name of the kind cluster to create
: ${KIND_CLUSTER_NAME:="${CLUSTER_NAME}-cluster"}

# The worker node used for toolkit and GPU configuration
: ${KIND_WORKER_NODE:="${KIND_CLUSTER_NAME}-worker"}

# The path to kind's cluster configuration file
: ${KIND_CLUSTER_CONFIG_PATH:="${SCRIPTS_DIR}/kind-cluster-config.yaml"}

# The derived name of the kind image to build
: ${KIND_IMAGE_BASE_TAG:="v20230515-01914134-containerd_v1.7.1"}
: ${KIND_IMAGE_BASE:="gcr.io/k8s-staging-kind/base:${KIND_IMAGE_BASE_TAG}"}
: ${KIND_IMAGE:="kindest/node:${KIND_K8S_TAG}-${KIND_IMAGE_BASE_TAG}"}
Loading
Loading