diff --git a/nvidia-drivers.sh b/nvidia-drivers.sh new file mode 100644 index 0000000..c8cfddc --- /dev/null +++ b/nvidia-drivers.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +apt install -y nvidia-container-toolkit nvidia-container-runtime cuda-drivers-fabricmanager-580 nvidia-headless-580-server-open nvidia-utils-580-server + +kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.1/deployments/static/nvidia-device-plugin.yml \ No newline at end of file diff --git a/overlays/dev/dev-game-stream/service.yaml b/overlays/dev/dev-game-stream/service.yaml deleted file mode 100644 index 309ffa6..0000000 --- a/overlays/dev/dev-game-stream/service.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: dev-game-stream - namespace: 5stack -spec: - ports: - - name: vnc - port: 8083 - protocol: TCP - targetPort: 8083 - - name: hud - port: 31982 - protocol: TCP - targetPort: 31982 - - selector: - app: dev-game-stream - type: NodePort diff --git a/overlays/dev/dev-game-stream/stateful-set.yaml b/overlays/dev/dev-game-stream/stateful-set.yaml index fcf5c43..f1d5578 100644 --- a/overlays/dev/dev-game-stream/stateful-set.yaml +++ b/overlays/dev/dev-game-stream/stateful-set.yaml @@ -1,110 +1,200 @@ apiVersion: apps/v1 -kind: StatefulSet +kind: Deployment metadata: - name: steam-headless + name: egl + namespace: 5stack spec: - serviceName: "steam-headless" replicas: 1 selector: matchLabels: - app: steam-headless + app: egl template: metadata: labels: - app: steam-headless + app: egl spec: dnsConfig: options: - name: ndots value: "1" - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: 5stack-dev-server - operator: In - values: - - "true" runtimeClassName: nvidia - securityContext: - fsGroup: 1000 + hostname: egl + hostNetwork: true containers: - - name: steam-headless - securityContext: - privileged: true - image: josh5/steam-headless:latest - resources: - requests: - memory: "4" - cpu: "4" - limits: - nvidia.com/gpu: 1 - ports: - - containerPort: 8083 - protocol: TCP - - containerPort: 31982 - protocol: TCP - volumeMounts: - - name: home-dir - mountPath: /home/default/ - - name: games-dir - mountPath: /mnt/games/ - - name: input-devices - mountPath: /dev/input/ - - name: dshm - mountPath: /dev/shm + - name: egl + image: ghcr.io/selkies-project/nvidia-egl-desktop:latest env: - - name: NAME - value: 'SteamHeadless' - name: TZ - value: 'America/New_York' - - name: USER_LOCALES - value: 'en_US.UTF-8 UTF-8' - - name: DISPLAY - value: ':55' - - name: SHM_SIZE - value: '2G' - - name: DOCKER_RUNTIME - value: 'nvidia' - - name: PUID - value: '1000' - - name: PGID - value: '1000' - - name: UMASK - value: '000' - - name: USER_PASSWORD - value: 'password' - - name: MODE - value: 'primary' - - name: WEB_UI_MODE - value: 'vnc' - - name: ENABLE_VNC_AUDIO - value: 'false' - - name: PORT_NOVNC_WEB - value: '8083' - - name: NEKO_NAT1TO1 - value: '' - - name: ENABLE_SUNSHINE - value: 'false' - - name: ENABLE_EVDEV_INPUTS - value: 'true' + value: "UTC" + - name: DISPLAY_SIZEW + value: "1920" + - name: DISPLAY_SIZEH + value: "1080" + - name: DISPLAY_REFRESH + value: "60" + - name: DISPLAY_DPI + value: "96" + - name: DISPLAY_CDEPTH + value: "24" + # Keep to default unless you know what you are doing with VirtualGL, `VGL_DISPLAY` should be set to either `egl[n]` or `/dev/dri/card[n]` only when the device was passed to the container +# - name: VGL_DISPLAY +# value: "egl" + # Choose either `value:` or `secretKeyRef:` but not both at the same time + - name: SELKIES_BASIC_AUTH_USER + value: "dev" + - name: PASSWD + value: "dev" +# valueFrom: +# secretKeyRef: +# name: my-pass +# key: my-pass + # Uncomment to enable KasmVNC instead of Selkies, `SELKIES_BASIC_AUTH_PASSWORD` is used for authentication with KasmVNC, defaulting to `PASSWD` if not provided + # Uses: `SELKIES_ENABLE_BASIC_AUTH`, `SELKIES_BASIC_AUTH_USER`, `SELKIES_BASIC_AUTH_PASSWORD`, `SELKIES_ENABLE_RESIZE`, `SELKIES_ENABLE_HTTPS`, `SELKIES_HTTPS_CERT`, `SELKIES_HTTPS_KEY` +# - name: KASMVNC_ENABLE +# value: "true" + # Number of threads for encoding frames with KasmVNC, default value is all threads +# - name: KASMVNC_THREADS +# value: "0" + ### + # Selkies parameters, for additional configurations see `selkies-gstreamer --help` + ### + # Change `SELKIES_ENCODER` to `x264enc`, `vp8enc`, or `vp9enc` if using software fallback without allocated GPUs or your GPU does not support `H.264 (AVCHD)` under the `NVENC - Encoding` section in https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new + - name: SELKIES_ENCODER + value: "nvh264enc" + - name: SELKIES_ENABLE_RESIZE + value: "false" + # Initial video bitrate in kilobits per second, may be changed later within web interface + - name: SELKIES_VIDEO_BITRATE + value: "8000" + # Initial frames per second, may be changed later within web interface + - name: SELKIES_FRAMERATE + value: "60" + # Initial audio bitrate in bits per second, may be changed later within web interface + - name: SELKIES_AUDIO_BITRATE + value: "128000" + # Uncomment if network conditions rapidly fluctuate +# - name: SELKIES_CONGESTION_CONTROL +# value: "true" + # Enable Basic Authentication from the web interface + - name: SELKIES_ENABLE_BASIC_AUTH + value: "true" + # Defaults to `PASSWD` if unspecified, choose either `value:` or `secretKeyRef:` but not both at the same time +# - name: SELKIES_BASIC_AUTH_PASSWORD +# value: "mypasswd" +# valueFrom: +# secretKeyRef: +# name: my-pass +# key: my-pass + # Enable HTTPS web interface from inside the container + - name: SELKIES_ENABLE_HTTPS + value: "false" + # Volume mount trusted HTTPS certificate to new path for no web browser warnings +# - name: SELKIES_HTTPS_CERT +# value: /etc/ssl/certs/ssl-cert-snakeoil.pem +# - name: SELKIES_HTTPS_KEY +# value: /etc/ssl/private/ssl-cert-snakeoil.key + ### + # Uncomment below to use a TURN server for improved network compatibility + ### +# - name: SELKIES_TURN_HOST +# value: "turn.example.com" +# - name: SELKIES_TURN_PORT +# value: "3478" + # Change to `tcp` if the UDP protocol is throttled or blocked in your client network, or when the TURN server does not support UDP +# - name: SELKIES_TURN_PROTOCOL +# value: "udp" + # You need a valid hostname and a certificate from authorities such as ZeroSSL or Let's Encrypt with your TURN server to enable TURN over TLS +# - name: SELKIES_TURN_TLS +# value: "false" + # Internal TURN server settings, do not uncomment other TURN server settings below this when using an internal TURN server +# - name: TURN_MIN_PORT +# value: "65532" +# - name: TURN_MAX_PORT +# value: "65535" + # Provide only `SELKIES_TURN_SHARED_SECRET` for time-limited shared secret authentication or both `SELKIES_TURN_USERNAME` and `SELKIES_TURN_PASSWORD` for legacy long-term authentication, but do not provide both authentication methods at the same time +# - name: SELKIES_TURN_SHARED_SECRET +# valueFrom: +# secretKeyRef: +# name: turn-shared-secret +# key: turn-shared-secret +# - name: SELKIES_TURN_USERNAME +# value: "username" + # Choose either `value:` or `secretKeyRef:` but not both at the same time +# - name: SELKIES_TURN_PASSWORD +# value: "mypasswd" +# valueFrom: +# secretKeyRef: +# name: turn-password +# key: turn-password + # TURN REST URI authentication, all TURN server settings above are ignored if enabled +# - name: SELKIES_TURN_REST_URI +# value: "http://localhost:8008" - name: NVIDIA_DRIVER_CAPABILITIES value: 'all' - name: NVIDIA_VISIBLE_DEVICES value: 'all' + - name: NGINX_PORT + value: "8084" + - name: SELKIES_PORT + value: "8085" + stdin: true + tty: true + ports: + - name: http + containerPort: 8084 + protocol: TCP + # Internal TURN server settings +# - containerPort: 3478 +# protocol: TCP +# - containerPort: 65532 +# protocol: TCP +# - containerPort: 65533 +# protocol: TCP +# - containerPort: 65534 +# protocol: TCP +# - containerPort: 65535 +# protocol: TCP +# - containerPort: 3478 +# protocol: UDP +# - containerPort: 65532 +# protocol: UDP +# - containerPort: 65533 +# protocol: UDP +# - containerPort: 65534 +# protocol: UDP +# - containerPort: 65535 +# protocol: UDP + resources: + limits: + memory: 64Gi + cpu: "16" + nvidia.com/gpu: 1 + requests: + memory: 100Mi + cpu: 100m + volumeMounts: + - mountPath: /dev/shm + name: dshm + - mountPath: /cache + name: egl-cache-vol + - mountPath: /home/ubuntu + name: egl-root-vol + - mountPath: /home/ubuntu/.steam/ + name: steam-data volumes: - - name: home-dir - hostPath: - path: /opt/5stack/steam/home - type: Directory - - name: games-dir - hostPath: - path: /opt/5stack/steam/games - type: Directory - - name: input-devices - hostPath: - path: /dev/input/ - - name: dshm - emptyDir: - medium: Memory \ No newline at end of file + - name: dshm + emptyDir: + medium: Memory + sizeLimit: 2Gi + - name: egl-cache-vol + hostPath: + path: /opt/5stack/game-streamer/cache + type: DirectoryOrCreate + - name: egl-root-vol + hostPath: + path: /opt/5stack/game-streamer/home + type: DirectoryOrCreate + - name: steam-data + hostPath: + path: /opt/5stack/game-streamer/steam-data + type: DirectoryOrCreate \ No newline at end of file diff --git a/overlays/dev/secrets/kustomization.yaml b/overlays/dev/secrets/kustomization.yaml index f69ab3d..de1fb6e 100644 --- a/overlays/dev/secrets/kustomization.yaml +++ b/overlays/dev/secrets/kustomization.yaml @@ -3,6 +3,7 @@ kind: Kustomization resources: - ../../../base + - ../../../nvidia secretGenerator: - name: api-secrets diff --git a/overlays/nvidia/nvidia-plugin.yaml b/overlays/nvidia/nvidia-plugin.yaml index 1a2343e..6233b96 100644 --- a/overlays/nvidia/nvidia-plugin.yaml +++ b/overlays/nvidia/nvidia-plugin.yaml @@ -7,6 +7,8 @@ spec: selector: matchLabels: name: nvidia-device-plugin-ds + updateStrategy: + type: RollingUpdate template: metadata: labels: @@ -17,7 +19,7 @@ spec: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: 5stack-dev-server + - key: nvidia-gpu operator: In values: - "true" @@ -25,22 +27,25 @@ spec: - key: nvidia.com/gpu operator: Exists effect: NoSchedule + # Mark this pod as a critical add-on; when enabled, the critical add-on + # scheduler reserves resources for critical add-on pods so that they can + # be rescheduled after a failure. + # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ priorityClassName: "system-node-critical" runtimeClassName: nvidia containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.0 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.18.1 name: nvidia-device-plugin-ctr - env: - - name: FAIL_ON_INIT_ERROR - value: "false" + env: [] securityContext: allowPrivilegeEscalation: false capabilities: drop: ["ALL"] volumeMounts: - - name: device-plugin + - name: kubelet-device-plugins-dir mountPath: /var/lib/kubelet/device-plugins volumes: - - name: device-plugin + - name: kubelet-device-plugins-dir hostPath: - path: /var/lib/kubelet/device-plugins \ No newline at end of file + path: /var/lib/kubelet/device-plugins + type: Directory \ No newline at end of file diff --git a/utils/output_redirect.sh b/utils/output_redirect.sh new file mode 100644 index 0000000..c9d2eb8 --- /dev/null +++ b/utils/output_redirect.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +output_redirect() { + if [ "$DEBUG" = true ]; then + "$@" + else + "$@" >/dev/null + fi +} \ No newline at end of file diff --git a/setup-env.sh b/utils/setup-env.sh similarity index 98% rename from setup-env.sh rename to utils/setup-env.sh index 1062d60..97b73fe 100755 --- a/setup-env.sh +++ b/utils/setup-env.sh @@ -67,13 +67,6 @@ ask_reverse_proxy() { fi } -output_redirect() { - if [ "$DEBUG" = true ]; then - "$@" - else - "$@" >/dev/null - fi -} migrate_secrets_to_vault() { local secret_file=$1 diff --git a/utils/utils.sh b/utils/utils.sh index 366436f..c77b1ff 100755 --- a/utils/utils.sh +++ b/utils/utils.sh @@ -3,6 +3,8 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Source all utility functions +source "$SCRIPT_DIR/output_redirect.sh" +source "$SCRIPT_DIR/setup-env.sh" source "$SCRIPT_DIR/update_env_var.sh" source "$SCRIPT_DIR/copy_config_or_secrets.sh" source "$SCRIPT_DIR/replace_rand32_in_env_files.sh"