diff --git a/docker/runpod/Dockerfile b/docker/runpod/Dockerfile new file mode 100644 index 0000000..01fdf01 --- /dev/null +++ b/docker/runpod/Dockerfile @@ -0,0 +1,44 @@ +# Optional Dockerfile for DiffSplat: GPU dev + services for RunPod +# Non-invasive: lives at repo root under docker/runpod/ + +FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 + +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +# Core system deps +RUN apt-get update && apt-get install -y --no-install-recommends \ + openssh-server \ + nginx \ + curl wget ca-certificates \ + git \ + build-essential \ + python3 python3-pip python3-dev \ + # cuDNN 9 for CUDA 12 provider paths (ONNX/Torch) + libcudnn9-cuda-12 \ + && rm -rf /var/lib/apt/lists/* + +# Python: Torch cu121 + tools (pin to known-good) +RUN pip3 install --no-cache-dir --upgrade pip wheel setuptools && \ + pip3 install --no-cache-dir \ + torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121 \ + --index-url https://download.pytorch.org/whl/cu121 && \ + pip3 install --no-cache-dir xformers==0.0.27 jupyterlab filebrowser-cli + +# App deps if present +COPY requirements.txt /tmp/requirements.txt +RUN if [ -f /tmp/requirements.txt ]; then pip3 install --no-cache-dir -r /tmp/requirements.txt; fi + +# SSH daemon setup +RUN mkdir -p /var/run/sshd /root/.ssh && \ + chmod 700 /root/.ssh + +# Expose standard service ports +EXPOSE 22 80 9090 9999 + +# Add startup script +COPY docker/runpod/start.sh /start.sh +RUN chmod +x /start.sh + +CMD ["/start.sh"] diff --git a/docker/runpod/start.sh b/docker/runpod/start.sh new file mode 100644 index 0000000..570f8a1 --- /dev/null +++ b/docker/runpod/start.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +set -euo pipefail + +log() { echo "[start] $*"; } + +start_nginx() { + if command -v nginx >/dev/null 2>&1; then + log "Starting nginx on :80" + service nginx start || nginx + else + log "nginx not installed; skipping" + fi +} + +setup_ssh() { + log "Configuring SSH" + mkdir -p /var/run/sshd /root/.ssh + chmod 700 /root/.ssh + # Host keys + if [ ! -f /etc/ssh/ssh_host_rsa_key ]; then + ssh-keygen -A + fi + # Authorized keys from env + if [ -n "${PUBLIC_KEY:-}" ]; then + echo "$PUBLIC_KEY" >> /root/.ssh/authorized_keys + chmod 600 /root/.ssh/authorized_keys + fi + # Start SSHD + /usr/sbin/sshd + log "SSH running. Example: ssh -i ~/.ssh/id_rsa -p 22 root@" +} + +start_jupyter() { + if command -v jupyter >/dev/null 2>&1; then + local token_flag="" + if [ -n "${JUPYTER_PASSWORD:-}" ]; then + token_flag="--ServerApp.token=${JUPYTER_PASSWORD} --ServerApp.password=''" + fi + log "Starting JupyterLab on :9999" + nohup jupyter lab --no-browser --ip=0.0.0.0 --port=9999 $token_flag >/var/log/jupyter.log 2>&1 & + else + log "Jupyter not installed; skipping" + fi +} + +start_filebrowser() { + if command -v filebrowser >/dev/null 2>&1; then + log "Starting FileBrowser on :9090" + nohup filebrowser -r / -p 9090 >/var/log/filebrowser.log 2>&1 & + else + log "FileBrowser not installed; skipping" + fi +} + +export_env() { + log "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-not-set}" +} + +start_nginx +setup_ssh +start_jupyter +start_filebrowser +export_env + +log "Container started. Services: SSH:22, Nginx:80, FileBrowser:9090, Jupyter:9999" +log "Tail logs: tail -f /var/log/jupyter.log /var/log/filebrowser.log" + +sleep infinity