diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c055b09 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,94 @@ +FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu20.04 + +# Prevent timezone questions during package installations +ENV DEBIAN_FRONTEND=noninteractive + +# Install basic dependencies +RUN apt-get update && apt-get install -y \ + git \ + python3.9 \ + python3.9-dev \ + python3-pip \ + wget \ + ffmpeg \ + libsm6 \ + libxext6 \ + libgl1-mesa-glx \ + libglib2.0-0 \ + libsndfile1 \ + portaudio19-dev \ + build-essential \ + cmake \ + libopenblas-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Set Python 3.9 as default +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 \ + && update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1 \ + && python -m pip install --upgrade pip + +# Install Miniconda +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh \ + && bash /tmp/miniconda.sh -b -p /opt/conda \ + && rm /tmp/miniconda.sh + +# Add conda to path +ENV PATH="/opt/conda/bin:${PATH}" + +# Create a working directory +WORKDIR /app + +# First, copy only the environment file to leverage Docker caching +COPY environment_cu117.yml /app/ + +# Create conda environment +RUN conda env create -f environment_cu117.yml + +# Make the conda environment the default +SHELL ["conda", "run", "-n", "instag", "/bin/bash", "-c"] + +# Install OpenFace for facial action unit extraction +RUN git clone https://github.com/TadasBaltrusaitis/OpenFace.git /tmp/OpenFace \ + && cd /tmp/OpenFace \ + && bash ./download_models.sh \ + && mkdir -p build \ + && cd build \ + && cmake -D CMAKE_BUILD_TYPE=RELEASE .. \ + && make -j4 \ + && make install \ + && cp -r /tmp/OpenFace/build/bin /app/OpenFace \ + && cp -r /tmp/OpenFace/lib /app/OpenFace/ \ + && cp -r /tmp/OpenFace/build/lib /app/OpenFace/ \ + && rm -rf /tmp/OpenFace + +# Install additional required dependencies +RUN pip install "git+https://github.com/facebookresearch/pytorch3d.git" || \ + echo "PyTorch3D installation failed, please check compatibility with PyTorch version" \ + && pip install tensorflow-gpu==2.10.0 \ + && pip install openmim \ + && mim install mmcv-full==1.7.1 prettytable + +# Copy the repository (except for large data files) +COPY . /app/ + +# Properly initialize and install submodules in one step to avoid race conditions +RUN git submodule update --init --recursive \ + && cd /app/submodules/diff-gaussian-rasterization && pip install -e . \ + && cd /app/submodules/simple-knn && pip install -e . \ + && cd /app/gridencoder && pip install -e . + +# Create directories for data and output +RUN mkdir -p /app/data /app/output + +# Add a script to activate the conda environment when starting the container +RUN echo '#!/bin/bash\neval "$(conda shell.bash hook)"\nconda activate instag\nexec "$@"' > /app/entrypoint.sh \ + && chmod +x /app/entrypoint.sh + +# Add OpenFace to PATH +ENV PATH="/app/OpenFace/bin:${PATH}" + +ENTRYPOINT ["/app/entrypoint.sh"] + +# Default command keeps the container running +CMD ["bash"] \ No newline at end of file diff --git a/Dockerfile.sapiens b/Dockerfile.sapiens new file mode 100644 index 0000000..ac4122e --- /dev/null +++ b/Dockerfile.sapiens @@ -0,0 +1,60 @@ +FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 + +# Prevent timezone questions during package installations +ENV DEBIAN_FRONTEND=noninteractive + +# Install basic dependencies +RUN apt-get update && apt-get install -y \ + git \ + python3.10 \ + python3.10-dev \ + python3-pip \ + wget \ + ffmpeg \ + libsm6 \ + libxext6 \ + libgl1-mesa-glx \ + git-lfs \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Set Python 3.10 as default +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \ + && update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 \ + && python -m pip install --upgrade pip + +# Install Miniconda +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh \ + && bash /tmp/miniconda.sh -b -p /opt/conda \ + && rm /tmp/miniconda.sh + +# Add conda to path +ENV PATH="/opt/conda/bin:${PATH}" + +# Create a working directory +WORKDIR /app + +# Create sapiens_lite environment with required dependencies +RUN conda create -n sapiens_lite python=3.10 -y \ + && conda install -n sapiens_lite pytorch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 pytorch-cuda=12.1 -c pytorch -c nvidia \ + && conda run -n sapiens_lite pip install opencv-python tqdm json-tricks + +# Create directories for data and outputs +RUN mkdir -p /app/data /app/output /app/data_utils/sapiens/checkpoint + +# Initialize git-lfs for Sapiens models +RUN git lfs install + +# Copy only necessary scripts +COPY data_utils/sapiens/lite /app/data_utils/sapiens/lite +COPY data_utils/sapiens/run.sh /app/data_utils/sapiens/run.sh +COPY scripts/prepare_sapiens.sh /app/scripts/prepare_sapiens.sh + +# Add a script to activate the conda environment when starting the container +RUN echo '#!/bin/bash\neval "$(conda shell.bash hook)"\nconda activate sapiens_lite\nexec "$@"' > /app/entrypoint.sh \ + && chmod +x /app/entrypoint.sh + +ENTRYPOINT ["/app/entrypoint.sh"] + +# Default command keeps the container running +CMD ["bash"] \ No newline at end of file diff --git a/README_docker.md b/README_docker.md new file mode 100644 index 0000000..e2b9f35 --- /dev/null +++ b/README_docker.md @@ -0,0 +1,320 @@ +# InsTaG Docker Setup + +This document provides instructions for running InsTaG using Docker and Docker Compose for containerized training and inference. + +## Prerequisites + +- [Docker](https://docs.docker.com/get-docker/) installed on your system +- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) installed (for GPU support) +- An NVIDIA GPU with sufficient VRAM (12+ GB recommended) +- NVIDIA drivers compatible with CUDA 11.7 +- The [Basel Face Model (BFM2009)](https://faces.dmi.unibas.ch/bfm/main.php?nav=1-2&id=downloads) (requires registration) + +## Quick Start + +1. Clone the repository (if you haven't already): + ```bash + git clone https://github.com/Fictionarry/InsTaG.git + cd InsTaG + git submodule update --init --recursive + ``` + +2. Run the setup script to build containers and download required resources: + ```bash + chmod +x setup-docker.sh + ./setup-docker.sh + ``` + +3. Download the Basel Face Model: + - Register at [Basel Face Model website](https://faces.dmi.unibas.ch/bfm/main.php?nav=1-2&id=downloads) + - Download the 01_MorphableModel.mat file + - Place it at `data_utils/face_tracking/3DMM/01_MorphableModel.mat` + - Convert the model: + ```bash + ./docker-run.sh convert-bfm + ``` + +## Container Architecture + +The Docker setup consists of two separate containers: + +1. **Main InsTaG Container (`instag`):** + - Based on CUDA 11.7 with Python 3.9 + - Contains PyTorch 1.13.1, TensorFlow 2.10.0, OpenFace + - Used for all training, processing, and inference tasks + +2. **Sapiens Container (`sapiens`):** + - Based on CUDA 12.1 with Python 3.10 + - Contains PyTorch 2.2.1 + - Used specifically for generating geometry priors for short videos + - Only needed if you want to use Sapiens for improved fine-tuning on very short videos + +This dual-container approach is necessary because Sapiens requires a different Python and PyTorch version than the main InsTaG framework. + +## Complete Training Workflow + +### Pre-Training (Identity-Free Stage) + +1. Place pre-training videos in the data directory: + ```bash + mkdir -p data/pretrain/person1 + cp /path/to/video.mp4 data/pretrain/person1/person1.mp4 + ``` + +2. Process each video to extract frames and audio: + ```bash + ./docker-run.sh process data/pretrain/person1/person1.mp4 + ``` + +3. Generate teeth masks: + ```bash + ./docker-run.sh teeth-mask data/pretrain/person1 + ``` + +4. Extract facial Action Units: + ```bash + ./docker-run.sh extract-au data/pretrain/person1 + ``` + +5. Run pre-training: + ```bash + ./docker-run.sh pretrain data/pretrain output/pretrain_model 0 + ``` + This will train the universal motion field on all videos in data/pretrain. + +### Adaptation (Person-Specific Stage) + +1. Place a video of the target person: + ```bash + mkdir -p data/alice + cp /path/to/alice_video.mp4 data/alice/alice.mp4 + ``` + +2. Process the video: + ```bash + ./docker-run.sh process data/alice/alice.mp4 + ``` + +3. Generate teeth masks: + ```bash + ./docker-run.sh teeth-mask data/alice + ``` + +4. Extract facial Action Units: + ```bash + ./docker-run.sh extract-au data/alice + ``` + +5. For short videos (< 10 seconds), generate geometry priors: + ```bash + ./docker-run.sh run-sapiens data/alice + ``` + +6. Fine-tune the model: + ```bash + ./docker-run.sh train data/alice output/alice_model 0 + ``` + +7. Synthesize with new audio: + ```bash + ./docker-run.sh synthesize -S data/alice -M output/alice_model --audio path_to_audio.wav --audio_extractor deepspeech + ``` + +## Audio Feature Options + +InsTaG supports multiple audio feature extractors, each with different characteristics: + +1. **DeepSpeech** (default): + - Basic speech features + - Example: + ```bash + ./docker-run.sh extract-audio-features data/alice/audio.wav deepspeech + ./docker-run.sh synthesize -S data/alice -M output/alice_model --audio_extractor deepspeech + ``` + +2. **Wav2Vec**: + - Better lip synchronization + - Example: + ```bash + ./docker-run.sh extract-audio-features data/alice/audio.wav wav2vec + ./docker-run.sh synthesize -S data/alice -M output/alice_model --audio_extractor esperanto + ``` + +3. **AVE** (Audio-Visual Entangler): + - Best lip-sync quality for English + - Example: + ```bash + # AVE features are processed on-the-fly + ./docker-run.sh synthesize -S data/alice -M output/alice_model --audio audio.wav --audio_extractor ave + ``` + +4. **HuBERT**: + - Good for non-English languages + - Example: + ```bash + ./docker-run.sh extract-audio-features data/alice/audio.wav hubert + ./docker-run.sh synthesize -S data/alice -M output/alice_model --audio_extractor hubert + ``` + +## Available Commands + +Run `./docker-run.sh` without arguments to see the complete list of available commands: + +``` +Usage: ./docker-run.sh COMMAND [ARGS] + +Available commands: + build - Build the Docker image + build-sapiens - Build the Sapiens Docker image + shell - Open a shell in the container + sapiens-shell - Open a shell in the Sapiens container + prepare - Run the prepare.sh script inside the container + prepare-sapiens - Run the prepare_sapiens.sh script + download-easyportrait-model - Download the EasyPortrait model + convert-bfm - Convert Basel Face Model (requires manual download first) + pretrain ARGS - Run pretrain_con.sh with arguments (data source, output dir, gpu) + train ARGS - Run train_df_few.sh with arguments (data source, output dir, gpu) + process VIDEO_PATH - Process a video using data_utils/process.py + teeth-mask PATH - Generate teeth masks for a given person directory + extract-au PATH - Extract facial Action Units for a person using OpenFace + extract-audio-features PATH TYPE - Extract audio features (types: deepspeech, wav2vec, hubert, ave) + run-sapiens PATH - Generate Sapiens geometry priors for a given person + synthesize ARGS - Run synthesize_fuse.py with arguments +``` + +## Different Training Scenarios + +### Training on Very Short Videos (5-10 seconds) + +For very short videos, Sapiens geometry priors are essential: + +```bash +# Process the short video +./docker-run.sh process data/john/john.mp4 + +# Generate teeth masks and extract AUs +./docker-run.sh teeth-mask data/john +./docker-run.sh extract-au data/john + +# Generate geometry priors with Sapiens +./docker-run.sh run-sapiens data/john + +# Fine-tune with default settings +./docker-run.sh train data/john output/john_model 0 +``` + +### Training on Longer Videos (>30 seconds) + +For longer videos, you can skip geometry priors and use the "--long" flag: + +```bash +# Process the video +./docker-run.sh process data/mary/mary.mp4 + +# Generate teeth masks and extract AUs +./docker-run.sh teeth-mask data/mary +./docker-run.sh extract-au data/mary + +# Open a shell to edit the training script +./docker-run.sh shell + +# Inside the container: +# Edit scripts/train_xx_few.sh to add --long flag to the python commands +# Then exit and run: +./docker-run.sh train data/mary output/mary_model 0 +``` + +## Customization + +### Modifying the Dockerfile + +If you need to customize the Docker environment: + +1. Edit the `Dockerfile` (for main container) or `Dockerfile.sapiens` (for Sapiens container) with your changes +2. Rebuild the image with `./docker-run.sh build` or `./docker-run.sh build-sapiens` respectively + +### Using a Different CUDA Version + +The default configuration uses CUDA 11.7 for the main container and CUDA 12.1 for the Sapiens container. To use a different CUDA version: + +1. Edit the `Dockerfile` to change the base image (e.g., to `nvidia/cuda:11.3.1-cudnn8-devel-ubuntu20.04`) +2. Update the environment file reference to use the appropriate file (e.g., `environment.yml` for CUDA 11.3) +3. Rebuild the image + +## Troubleshooting + +### Common Issues + +- **"Unable to find teeth mask" error**: + - Make sure you've downloaded the EasyPortrait model: + ```bash + ./docker-run.sh download-easyportrait-model + ``` + - Verify the model exists at `data_utils/easyportrait/fpn-fp-512.pth` + +- **OpenFace FeatureExtraction failures**: + - Make sure your video frames have clear faces visible + - Try with fewer frames initially (use a shorter video) + - Run in the shell for detailed output: + ```bash + ./docker-run.sh shell + # Inside container: + FeatureExtraction -fdir data/person/frames -out_dir data/person/au -aus + ``` + +- **PyTorch3D installation failures**: + - PyTorch3D may fail to install depending on the PyTorch version + - The container will still work for most use cases without PyTorch3D + - If needed, install it manually in the container following their installation guide + +- **GPU not visible in container**: + - Ensure the NVIDIA Container Toolkit is properly installed + - Verify your drivers are compatible with CUDA 11.7 + - Test with `nvidia-smi` on the host + - Inside the container, run: + ```bash + ./docker-run.sh shell + # Inside container: + python -c "import torch; print(torch.cuda.is_available())" + ``` + +- **Out of memory errors during training**: + - Reduce batch size in training scripts + - Use a smaller value for `--init_num` in training scripts + - Free up space by removing cached files: + ```bash + ./docker-run.sh shell + # Inside container: + rm -rf ~/.cache/torch + ``` + +### Handling Submodule Compilation Errors + +If you encounter issues with the CUDA submodules: + +1. Enter the container shell: + ```bash + ./docker-run.sh shell + ``` + +2. Manually install the problematic module: + ```bash + cd /app/submodules/diff-gaussian-rasterization + pip uninstall -y diff_gaussian_rasterization + pip install -e . + + cd /app/submodules/simple-knn + pip uninstall -y simple-knn + pip install -e . + + cd /app/gridencoder + pip uninstall -y gridencoder + pip install -e . + ``` + +## Notes + +- The containers mount `./data`, `./output`, and `./scripts` directories from your host machine, ensuring that your data and results persist outside the container +- All model weights and training results will be saved to the `./output` directory +- To download the Basel Face Model (BFM2009), you'll need to register on their website and follow the instructions in the training document +- For multi-GPU training, use `CUDA_VISIBLE_DEVICES` in the training scripts or specify a different GPU index in the training commands \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..8ba5c57 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,64 @@ +version: '3.8' + +services: + instag: + build: + context: . + dockerfile: Dockerfile + image: instag-training + container_name: instag-training + volumes: + # Mount the data directory + - ./data:/app/data + # Mount the output directory + - ./output:/app/output + # Optional: Mount a local directory for scripts + - ./scripts:/app/scripts + # Optional: Mount custom user code + # - ./custom:/app/custom + environment: + # Ensures NVIDIA GPU is visible + - NVIDIA_VISIBLE_DEVICES=all + # Use the NVIDIA Container Runtime + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + # Start with an interactive shell + stdin_open: true + tty: true + # Keeps the container running + command: bash + + sapiens: + build: + context: . + dockerfile: Dockerfile.sapiens + image: instag-sapiens + container_name: instag-sapiens + volumes: + # Mount the data directory + - ./data:/app/data + # Mount the output directory + - ./output:/app/output + # Optional: Mount a local directory for scripts + - ./scripts:/app/scripts + environment: + # Ensures NVIDIA GPU is visible + - NVIDIA_VISIBLE_DEVICES=all + # Use the NVIDIA Container Runtime + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + # Start with an interactive shell + stdin_open: true + tty: true + # Keeps the container running + command: bash \ No newline at end of file diff --git a/docker-run.sh b/docker-run.sh new file mode 100755 index 0000000..50006c2 --- /dev/null +++ b/docker-run.sh @@ -0,0 +1,233 @@ +#!/bin/bash + +# Script to help run InsTaG commands inside the Docker container + +# Ensure script exits on error +set -e + +# Function to print usage information +print_usage() { + echo "Usage: ./docker-run.sh COMMAND [ARGS]" + echo "" + echo "Available commands:" + echo " build - Build the Docker image" + echo " build-sapiens - Build the Sapiens Docker image" + echo " shell - Open a shell in the container" + echo " sapiens-shell - Open a shell in the Sapiens container" + echo " prepare - Run the prepare.sh script inside the container" + echo " prepare-sapiens - Run the prepare_sapiens.sh script" + echo " download-easyportrait-model - Download the EasyPortrait model" + echo " convert-bfm - Convert Basel Face Model (requires manual download first)" + echo " pretrain ARGS - Run pretrain_con.sh with arguments (data source, output dir, gpu)" + echo " train ARGS - Run train_df_few.sh with arguments (data source, output dir, gpu)" + echo " process VIDEO_PATH - Process a video using data_utils/process.py" + echo " teeth-mask PATH - Generate teeth masks for a given person directory" + echo " extract-au PATH - Extract facial Action Units for a person using OpenFace" + echo " extract-audio-features PATH TYPE - Extract audio features (types: deepspeech, wav2vec, hubert, ave)" + echo " run-sapiens PATH - Generate Sapiens geometry priors for a given person" + echo " synthesize ARGS - Run synthesize_fuse.py with arguments" + echo "" + echo "Examples:" + echo " ./docker-run.sh build" + echo " ./docker-run.sh shell" + echo " ./docker-run.sh pretrain data/pretrain output/pretrain_model 0" + echo " ./docker-run.sh train data/alice output/alice_model 0" + echo " ./docker-run.sh run-sapiens data/alice" + echo " ./docker-run.sh extract-audio-features data/alice/audio.wav wav2vec" +} + +# Check if there are any arguments +if [ $# -eq 0 ]; then + print_usage + exit 1 +fi + +# Parse command +COMMAND=$1 +shift + +case $COMMAND in + build) + echo "Building Docker image..." + docker-compose build instag + ;; + + build-sapiens) + echo "Building Sapiens Docker image..." + docker-compose build sapiens + ;; + + shell) + echo "Opening shell in container..." + docker-compose run --rm instag bash + ;; + + sapiens-shell) + echo "Opening shell in Sapiens container..." + docker-compose run --rm sapiens bash + ;; + + prepare) + echo "Running prepare.sh in container..." + docker-compose run --rm instag bash scripts/prepare.sh + ;; + + download-easyportrait-model) + echo "Downloading EasyPortrait model..." + docker-compose run --rm instag bash -c "mkdir -p data_utils/easyportrait && \ + wget -O data_utils/easyportrait/fpn-fp-512.pth \ + https://rndml-team-cv.obs.ru-moscow-1.hc.sbercloud.ru/ep_models_v2/fpn-fp-512.pth || \ + echo 'Failed to download. Please check URL in training document and download manually.'" + ;; + + convert-bfm) + echo "Converting Basel Face Model..." + if [ ! -f "data_utils/face_tracking/3DMM/01_MorphableModel.mat" ]; then + echo "Error: Basel Face Model file not found." + echo "Please download it from https://faces.dmi.unibas.ch/bfm/main.php?nav=1-2&id=downloads" + echo "and place it at data_utils/face_tracking/3DMM/01_MorphableModel.mat" + exit 1 + fi + docker-compose run --rm instag bash -c "cd data_utils/face_tracking && python convert_BFM.py" + ;; + + prepare-sapiens) + echo "Running prepare_sapiens.sh in Sapiens container..." + docker-compose run --rm sapiens bash scripts/prepare_sapiens.sh + ;; + + pretrain) + if [ $# -lt 3 ]; then + echo "Error: pretrain requires at least 3 arguments: data source, output dir, gpu" + print_usage + exit 1 + fi + + DATA_SOURCE=$1 + OUTPUT_DIR=$2 + GPU=$3 + shift 3 + + echo "Running pretrain_con.sh with data from $DATA_SOURCE, output to $OUTPUT_DIR, gpu $GPU" + docker-compose run --rm instag bash scripts/pretrain_con.sh $DATA_SOURCE $OUTPUT_DIR $GPU $@ + ;; + + train) + if [ $# -lt 3 ]; then + echo "Error: train requires at least 3 arguments: data source, output dir, gpu" + print_usage + exit 1 + fi + + DATA_SOURCE=$1 + OUTPUT_DIR=$2 + GPU=$3 + shift 3 + + echo "Running train_df_few.sh with data from $DATA_SOURCE, output to $OUTPUT_DIR, gpu $GPU" + docker-compose run --rm instag bash scripts/train_df_few.sh $DATA_SOURCE $OUTPUT_DIR $GPU $@ + ;; + + process) + if [ $# -lt 1 ]; then + echo "Error: process requires a video path" + print_usage + exit 1 + fi + + VIDEO_PATH=$1 + shift + + echo "Processing video at $VIDEO_PATH" + docker-compose run --rm instag python data_utils/process.py $VIDEO_PATH $@ + ;; + + teeth-mask) + if [ $# -lt 1 ]; then + echo "Error: teeth-mask requires a path" + print_usage + exit 1 + fi + + PERSON_PATH=$1 + + echo "Generating teeth masks for $PERSON_PATH" + docker-compose run --rm instag bash -c "export PYTHONPATH=./data_utils/easyportrait && python data_utils/easyportrait/create_teeth_mask.py $PERSON_PATH" + ;; + + extract-au) + if [ $# -lt 1 ]; then + echo "Error: extract-au requires a person path" + print_usage + exit 1 + fi + + PERSON_PATH=$1 + + echo "Extracting facial Action Units for $PERSON_PATH using OpenFace..." + docker-compose run --rm instag bash -c "mkdir -p $PERSON_PATH/au && \ + FeatureExtraction -fdir $PERSON_PATH/frames -out_dir $PERSON_PATH/au -aus && \ + cp $PERSON_PATH/au/*.csv $PERSON_PATH/au.csv" + ;; + + extract-audio-features) + if [ $# -lt 2 ]; then + echo "Error: extract-audio-features requires an audio path and feature type" + print_usage + exit 1 + fi + + AUDIO_PATH=$1 + FEATURE_TYPE=$2 + + echo "Extracting $FEATURE_TYPE audio features from $AUDIO_PATH" + case $FEATURE_TYPE in + deepspeech) + docker-compose run --rm instag python data_utils/deepspeech_features/extract_ds_features.py --input $AUDIO_PATH + ;; + wav2vec) + docker-compose run --rm instag python data_utils/wav2vec.py $AUDIO_PATH + ;; + hubert) + docker-compose run --rm instag python data_utils/hubert.py $AUDIO_PATH + ;; + ave) + echo "AVE features are processed on-the-fly during training/inference with --audio_extractor ave" + ;; + *) + echo "Unknown feature type. Supported types: deepspeech, wav2vec, hubert, ave" + exit 1 + ;; + esac + ;; + + run-sapiens) + if [ $# -lt 1 ]; then + echo "Error: run-sapiens requires a path" + print_usage + exit 1 + fi + + PERSON_PATH=$1 + + echo "Generating Sapiens geometry priors for $PERSON_PATH using the Sapiens container" + docker-compose run --rm sapiens bash data_utils/sapiens/run.sh $PERSON_PATH + ;; + + synthesize) + if [ $# -lt 2 ]; then + echo "Error: synthesize requires at least -S and -M arguments" + print_usage + exit 1 + fi + + echo "Running synthesize_fuse.py with arguments: $@" + docker-compose run --rm instag python synthesize_fuse.py $@ + ;; + + *) + echo "Unknown command: $COMMAND" + print_usage + exit 1 + ;; +esac \ No newline at end of file diff --git a/setup-docker.sh b/setup-docker.sh new file mode 100755 index 0000000..7611288 --- /dev/null +++ b/setup-docker.sh @@ -0,0 +1,116 @@ +#!/bin/bash +# Script to set up the Docker environment for InsTaG + +# Ensure script exits on error +set -e + +# Print header +echo "==================================" +echo "InsTaG Docker Environment Setup" +echo "==================================" +echo "" + +# Check Docker is installed +if ! command -v docker &> /dev/null; then + echo "Error: Docker is not installed or not in PATH." + echo "Please install Docker first: https://docs.docker.com/get-docker/" + exit 1 +fi + +# Check Docker Compose is installed +if ! docker compose version &> /dev/null; then + echo "Error: Docker Compose is not installed or not in PATH." + echo "Please install Docker Compose first: https://docs.docker.com/compose/install/" + exit 1 +fi + +# Check NVIDIA Docker is installed +if ! command -v nvidia-smi &> /dev/null; then + echo "Warning: NVIDIA drivers may not be installed." + echo "GPU support requires NVIDIA drivers and NVIDIA Container Toolkit." + echo "For more information: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html" + read -p "Continue anyway? (y/n) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + exit 1 + fi +fi + +# Make docker-run.sh executable +chmod +x docker-run.sh + +# Create necessary directories if they don't exist +echo "Creating data and output directories..." +mkdir -p data/pretrain +mkdir -p output +mkdir -p data_utils/face_tracking/3DMM +mkdir -p data_utils/easyportrait +mkdir -p submodules + +# Check for submodules +if [ ! -d "submodules/diff-gaussian-rasterization" ] || [ ! -d "submodules/simple-knn" ]; then + echo "Initializing git submodules..." + git submodule update --init --recursive +fi + +# Build the main container +echo "Building main InsTaG container (this may take a while)..." +./docker-run.sh build + +# Ask if user wants to build Sapiens container +read -p "Do you want to build the Sapiens container for geometry priors? (y/n) " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "Building Sapiens container (this may take a while)..." + ./docker-run.sh build-sapiens +fi + +# Ask if user wants to download required models +read -p "Do you want to download required models and resources? (y/n) " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "Downloading models and resources..." + ./docker-run.sh prepare + + # Download EasyPortrait model + read -p "Do you want to download the EasyPortrait model for teeth masking? (y/n) " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "Downloading EasyPortrait model..." + ./docker-run.sh download-easyportrait-model + fi + + # Ask about Sapiens models + read -p "Do you want to download Sapiens models (required for geometry priors)? (y/n) " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "Downloading Sapiens models..." + ./docker-run.sh prepare-sapiens + fi +fi + +# Prompt about Basel Face Model +echo "" +echo "NOTE: The Basel Face Model (BFM2009) is required for face tracking." +echo "You need to manually download it from https://faces.dmi.unibas.ch/bfm/main.php?nav=1-2&id=downloads" +echo "After downloading, place the file at: data_utils/face_tracking/3DMM/01_MorphableModel.mat" +echo "Then run: ./docker-run.sh convert-bfm" +echo "" + +# Print completion message +echo "==================================" +echo "Setup Complete!" +echo "==================================" +echo "" +echo "You can now use the docker-run.sh script to interact with the InsTaG environment." +echo "For a list of available commands, run:" +echo "./docker-run.sh" +echo "" +echo "Next steps:" +echo "1. Download the Basel Face Model (BFM2009) if you haven't already" +echo "2. Place a pretrain video in data/pretrain//.mp4" +echo "3. Process the video with: ./docker-run.sh process data/pretrain//.mp4" +echo "4. Generate teeth masks: ./docker-run.sh teeth-mask data/pretrain/" +echo "5. Extract facial Action Units: ./docker-run.sh extract-au data/pretrain/" +echo "" +echo "For more information, refer to README_docker.md" \ No newline at end of file