diff --git a/README.md b/README.md index a4cfc84..dfeef1c 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,9 @@ The collection includes roles for user initialization, server hardening, Postgre 9. **Update Web App** 🔄 - Updates and manages a Django web application by fetching the latest changes from the Git repository, installing dependencies, collecting static files, and applying database migrations. +10. **Cuda Support** 🔄 + - Sets up Cuda on a GPU-enabled Ubuntu instance and (optionally) install pytroch and/or tensorflow, as well as, DeepLabCut. + ## Usage To use this collection, include the desired roles in your playbook. Below is an example of how to use multiple roles from this collection: diff --git a/roles/cudaSupport/README.md b/roles/cudaSupport/README.md new file mode 100644 index 0000000..71999f9 --- /dev/null +++ b/roles/cudaSupport/README.md @@ -0,0 +1,34 @@ +# Ansible Role: cudaSupport + + +## Role Variables + +- `scripts_repo_url`: +- `scripts_repo_branch`: +- `scripts_repo_dest`: + +... + +## Role: TigerVNC Server Setup + +### Overview + +This Ansible role automates the installation and configuration of the TigerVNC server on an Ubuntu machine. It is designed to set up a graphical desktop environment that allows users to connect remotely via VNC (Virtual Network Computing). The role ensures that the specified user can access the machine's desktop environment securely and efficiently. + +### Features + +- **Install TigerVNC Server**: Installs the `tigervnc-standalone-server` package, enabling VNC functionality on the machine. +- **Install Desktop Environment**: Installs the XFCE desktop environment along with necessary components to provide a lightweight and user-friendly interface. +- **User Configuration**: Creates a VNC password for the specified user, ensuring secure access to the VNC session. +- **Startup Script**: Configures a startup script that initializes the XFCE desktop environment when the VNC server starts. +- **Systemd Service**: Sets up a systemd service to manage the VNC server, ensuring it starts automatically on boot and can be easily controlled. +- **Customizable**: Allows customization of the VNC password and the username through variables, making it adaptable to different environments. + +### Usage + +To use this role, define the following variables in your playbook or inventory: + +- `new_username`: The username of the user who will connect via VNC. +- `vault_vnc_password`: The password for the VNC session (should be stored in a vault). + +Run the role against your target Ubuntu machine to set up the TigerVNC server and enable remote desktop access for the specified user. diff --git a/roles/cudaSupport/defaults/main.yml b/roles/cudaSupport/defaults/main.yml new file mode 100644 index 0000000..dac0688 --- /dev/null +++ b/roles/cudaSupport/defaults/main.yml @@ -0,0 +1,11 @@ +--- +python_version: "3.10" # Is required for DeepLabCut 2.x +cuda_version: "12.4" +nvidia_driver_version: "" # Leave empty to use the recommended version +pytorch_version: "2.1.5" # Specify the default PyTorch version +tensorflow_version: "2.10" # Last version supported for DLC 2.x +new_username: "ml_user" # Specify the new username +conda_env_name: "ml_env" # Specify the name of the Conda environment +additional_packages: [] # List of additional Python packages to install +ml_backend: ['tensorflow', 'pytorch'] # determines what backend to use +dlc_branch: "v2.3.10" # "pytorch_dlc" # What verion of DeepLabCut to install diff --git a/roles/cudaSupport/handlers/main.yml b/roles/cudaSupport/handlers/main.yml new file mode 100644 index 0000000..b3dfc70 --- /dev/null +++ b/roles/cudaSupport/handlers/main.yml @@ -0,0 +1,4 @@ +--- +- name: Reload systemd + systemd: + daemon_reload: yes diff --git a/roles/cudaSupport/tasks/checkout_scripts.yml b/roles/cudaSupport/tasks/checkout_scripts.yml new file mode 100644 index 0000000..c7e7d5c --- /dev/null +++ b/roles/cudaSupport/tasks/checkout_scripts.yml @@ -0,0 +1,17 @@ +--- +- name: Checkout the Git repository + git: + repo: "{{ scripts_repo_url }}" # URL of the Git repository + dest: "{{ scripts_repo_dest }}" # Destination path for the repository + version: "{{ scripts_repo_branch }}" # Branch to check out + force: yes + become: yes + become_user: "{{ new_username }}" + +- name: Install additional dependencies from requirements.txt + command: > + /home/{{ new_username }}/miniconda/bin/conda run -n {{ conda_env_name }} pip install -r requirements.txt + args: + chdir: "{{ scripts_repo_dest }}" + become: yes + become_user: "{{ new_username }}" diff --git a/roles/cudaSupport/tasks/install_DeepLabCut.yml b/roles/cudaSupport/tasks/install_DeepLabCut.yml new file mode 100644 index 0000000..c42a46e --- /dev/null +++ b/roles/cudaSupport/tasks/install_DeepLabCut.yml @@ -0,0 +1,45 @@ +--- +- name: Install Qt Creator and development packages + apt: + name: + - qtcreator + - qtbase5-dev + - qt5-qmake + - cmake + state: present + update_cache: yes + become: yes + +- name: Install dependencies DeepLabCut + command: > + /home/{{ new_username }}/miniconda/bin/conda install -n {{ conda_env_name }} -c conda-forge pytables==3.8.0 -y + args: + chdir: /home/{{ new_username }}/miniconda + become: yes + become_user: "{{ new_username }}" + +- name: Activate Conda environment and install pip + command: | + /home/{{ new_username }}/miniconda/bin/conda install -y pip + args: + chdir: /home/{{ new_username }}/miniconda + become: yes + become_user: "{{ new_username }}" + +- name: "Install DeepLabCut {{ dlc_branch }}" + command: | + /home/{{ new_username }}/miniconda/bin/conda run -n {{ conda_env_name }} pip install "deeplabcut[gui,modelzool,wandb] @ git+https://github.com/DeepLabCut/DeepLabCut.git@{{ dlc_branch }}" + args: + chdir: /home/{{ new_username }}/miniconda + become: yes + become_user: "{{ new_username }}" + when: "'tensorflow' not in ml_backend" + +- name: "Install DeepLabCut {{ dlc_branch }} with TensorFlow support" + command: | + /home/{{ new_username }}/miniconda/bin/conda run -n {{ conda_env_name }} pip install "deeplabcut[gui,modelzoo,tf] @ git+https://github.com/DeepLabCut/DeepLabCut.git@{{ dlc_branch }}" + args: + chdir: /home/{{ new_username }}/miniconda + become: yes + become_user: "{{ new_username }}" + when: "'tensorflow' in ml_backend" diff --git a/roles/cudaSupport/tasks/install_cuda.yml b/roles/cudaSupport/tasks/install_cuda.yml new file mode 100644 index 0000000..96615d5 --- /dev/null +++ b/roles/cudaSupport/tasks/install_cuda.yml @@ -0,0 +1,42 @@ +--- +- name: Add NVIDIA PPA + apt_repository: + repo: ppa:graphics-drivers/ppa + state: present + become: yes + +- name: Update apt cache + apt: + update_cache: yes + become: yes + +- name: Install ubuntu-drivers tool + apt: + name: "ubuntu-drivers-common" + state: present + become: yes + +- name: Detect recommended NVIDIA driver version + command: ubuntu-drivers devices + register: nvidia_driver_info + become: yes + +- name: Set NVIDIA driver version variable + set_fact: + # nvidia_driver_version: "{{ item | regex_search('nvidia-driver-(\\d+)') }}" + nvidia_driver_version: "{{ item | regex_search('nvidia-driver-(\\d+)') | regex_replace('nvidia-driver-', '') }}" + loop: "{{ nvidia_driver_info.stdout_lines }}" + when: "'recommended' in item" + +- name: Install NVIDIA driver + apt: + name: "{{ 'nvidia-driver-' + nvidia_driver_version if nvidia_driver_version else 'nvidia-driver-{{ nvidia_driver_version }}' }}" + state: present + when: nvidia_driver_version is defined or nvidia_driver_version == "" + become: yes + +- name: Reboot the server if required + reboot: + msg: "Rebooting after NVIDIA driver installation." + when: nvidia_driver_version is defined or nvidia_driver_version == "" + become: yes diff --git a/roles/cudaSupport/tasks/install_python.yml b/roles/cudaSupport/tasks/install_python.yml new file mode 100644 index 0000000..1f933be --- /dev/null +++ b/roles/cudaSupport/tasks/install_python.yml @@ -0,0 +1,20 @@ +--- +- name: Add Snakeoil PPA for recent Python versions + apt_repository: + repo: ppa:deadsnakes/ppa + state: present + become: yes + +- name: Update apt cache + apt: + update_cache: yes + become: yes + +- name: "Install Python {{ python_version }} and the venv package" + apt: + name: + - "python{{ python_version }}" + - "python{{ python_version }}-dev" + - "python{{ python_version }}-venv" + state: present + become: yes diff --git a/roles/cudaSupport/tasks/install_pytorch.yml b/roles/cudaSupport/tasks/install_pytorch.yml new file mode 100644 index 0000000..d1eecb2 --- /dev/null +++ b/roles/cudaSupport/tasks/install_pytorch.yml @@ -0,0 +1,34 @@ +--- +- name: Install PyTorch in the Conda environment for the new user + command: > + /home/{{ new_username }}/miniconda/bin/conda install -n {{ conda_env_name }} pytorch=={{ pytorch_version }} torchvision torchaudio pytorch-cuda={{ cuda_version }} -c pytorch -c nvidia -y + args: + chdir: /home/{{ new_username }}/miniconda + become: yes + become_user: "{{ new_username }}" + +- name: Check for PyTorch installation with Conda + command: > + /home/{{ new_username }}/miniconda/bin/conda list torch + register: conda_check + become: yes + become_user: "{{ new_username }}" + ignore_errors: yes # Ignore errors if PyTorch is not installed + +- name: Display PyTorch version + debug: + msg: "PyTorch is installed. Version: {{ conda_check.stdout }}" + when: conda_check.rc == 0 # Only run if the command was successful + +- name: Raise an error if PyTorch is not installed + fail: + msg: "PyTorch is not installed in the Conda environment." + when: conda_check.rc != 0 # Only run if the command failed + +- name: Install additional packages in the Conda environment for the new user + command: > + /home/{{ new_username }}/miniconda/bin/conda install -n {{ conda_env_name }} {{ item }} -y + loop: "{{ additional_packages }}" + when: additional_packages | length > 0 + become: yes + become_user: "{{ new_username }}" diff --git a/roles/cudaSupport/tasks/install_tensorflow.yml b/roles/cudaSupport/tasks/install_tensorflow.yml new file mode 100644 index 0000000..a2a8bef --- /dev/null +++ b/roles/cudaSupport/tasks/install_tensorflow.yml @@ -0,0 +1,43 @@ +--- +- name: Check for NVIDIA GPU + command: nvidia-smi + register: nvidia_smi_check + ignore_errors: yes + +- name: Set TensorFlow package based on GPU presence + set_fact: + tensorflow_package: "{{ 'tensorflow_gpu' if nvidia_smi_check.rc == 0 else 'tensorflow' }}" + +- name: "Install TensorFlow {{ tensorflow_version }} in the Conda environment for the new user" + command: > + /home/{{ new_username }}/miniconda/bin/conda install -n {{ conda_env_name }} {{ tensorflow_package }}=={{ tensorflow_version }} -y + args: + chdir: /home/{{ new_username }}/miniconda + become: yes + become_user: "{{ new_username }}" + +- name: Check for TensorFlow installation with Conda + command: > + /home/{{ new_username }}/miniconda/bin/conda list {{ tensorflow_package }} + register: conda_check + become: yes + become_user: "{{ new_username }}" + ignore_errors: yes # Ignore errors if TensorFlow is not installed + +- name: Display TensorFlow version + debug: + msg: "{{ tensorflow_package | capitalize }} is installed. Version: {{ conda_check.stdout }}" + when: conda_check.rc == 0 # Only run if the command was successful + +- name: Raise an error if TensorFlow is not installed + fail: + msg: "{{ tensorflow_package | capitalize }} is not installed in the Conda environment." + when: conda_check.rc != 0 # Only run if the command failed + +- name: Install additional packages in the Conda environment for the new user + command: > + /home/{{ new_username }}/miniconda/bin/conda install -n {{ conda_env_name }} {{ item }} -y + loop: "{{ additional_packages }}" + when: additional_packages | length > 0 + become: yes + become_user: "{{ new_username }}" diff --git a/roles/cudaSupport/tasks/install_tigervnc.yml b/roles/cudaSupport/tasks/install_tigervnc.yml new file mode 100644 index 0000000..1e518b9 --- /dev/null +++ b/roles/cudaSupport/tasks/install_tigervnc.yml @@ -0,0 +1,111 @@ +--- +- name: Update apt cache + apt: + update_cache: yes + become: yes + +- name: Install required packages + apt: + name: + - xfce4 + - xfce4-goodies + - tigervnc-standalone-server + - tigervnc-xorg-extension + state: present + become: yes + +- name: Ensure the .vnc directory exists + file: + path: /home/{{ new_username }}/.vnc + state: directory + mode: '0755' # Set permissions for the directory + become: yes + become_user: "{{ new_username }}" + +- name: Create VNC password file + shell: echo "{{ vault_vnc_password }}" | vncpasswd -f > /home/{{ new_username }}/.vnc/passwd + args: + creates: /home/{{ new_username }}/.vnc/passwd + become: yes + become_user: "{{ new_username }}" + +- name: Set permissions for VNC password file + file: + path: /home/{{ new_username }}/.vnc/passwd + owner: "{{ new_username }}" + group: "{{ new_username }}" + mode: '0600' + become: yes + +- name: Create VNC startup script + copy: + dest: /home/{{ new_username }}/.vnc/xstartup + content: | + #!/bin/sh + unset SESSION_MANAGER + unset DBUS_SESSION_BUS_ADDRESS + startxfce4 + mode: '0755' + become: yes + become_user: "{{ new_username }}" + +- name: Create VNC service file + copy: + dest: /etc/systemd/system/vncserver@.service + content: | + [Unit] + Description=Start TigerVNC server at startup + After=syslog.target network.target + + [Service] + Type=simple + User={{ new_username }} + Group={{ new_username }} + WorkingDirectory=/home/{{ new_username }} + + PIDFile=/home/{{ new_username }}/.vnc/%H:590%i.pid + ExecStartPre=-/bin/sh -c "/usr/bin/vncserver -kill :%i > /dev/null 2>&1" + ExecStart=/usr/bin/vncserver -fg -depth 24 -geometry 1920x1080 -localhost no :%i + ExecStop=/usr/bin/vncserver -kill :%i + + [Install] + WantedBy=multi-user.target + become: yes + +- name: Reload systemd daemons + command: systemctl daemon-reload + become: yes + +- name: Enable VNC server service + systemd: + name: vncserver@1.service + enabled: yes + state: started + become: yes + +- name: Kill any existing VNC servers + command: vncserver -kill :* + become: yes + ignore_errors: yes + +- name: Start VNC server service + systemd: + name: vncserver@1.service + state: started + become: yes + +- name: Check the status of the VNC server service + command: systemctl status vncserver@1 + register: vnc_status + become: yes + +- name: Display VNC server status + debug: + var: vnc_status.stdout_lines + +- name: Open VNC port in UFW + ufw: + rule: allow + port: '5901' # Adjust this if using a different display number + proto: 'tcp' + become: yes diff --git a/roles/cudaSupport/tasks/main.yml b/roles/cudaSupport/tasks/main.yml new file mode 100644 index 0000000..309deb7 --- /dev/null +++ b/roles/cudaSupport/tasks/main.yml @@ -0,0 +1,34 @@ +--- +- name: Update and Reboot if Kernel is Updated + include_tasks: update_and_reboot.yml + +- name: Install NVIDIA Driver and CUDA Toolkit + include_tasks: install_cuda.yml + +- name: Install Python + include_tasks: install_python.yml + +- name: Prepare Environment + include_tasks: prepare_environment.yml + +# - name: Install PyTorch +# include_tasks: install_pytorch.yml +# +# - name: Install TensorFlow +# include_tasks: install_tensorflow.yml +- name: Install TensorFlow if specified + include_tasks: install_tensorflow.yml + when: "'tensorflow' in ml_backend" + +- name: Install PyTorch if specified + include_tasks: install_pytorch.yml + when: "'pytorch' in ml_backend" + +- name: Install DeepLabCut + include_tasks: install_DeepLabCut.yml + +- name: Fetching Scripts + include_tasks: checkout_scripts.yml + +- name: Installing TigerVNC for remote access + include_tasks: install_tigervnc.yml diff --git a/roles/cudaSupport/tasks/prepare_environment.yml b/roles/cudaSupport/tasks/prepare_environment.yml new file mode 100644 index 0000000..98724a2 --- /dev/null +++ b/roles/cudaSupport/tasks/prepare_environment.yml @@ -0,0 +1,41 @@ +--- +- name: Install ACL package + apt: + name: acl + state: present + become: yes + +- name: Create a new user + user: + name: "{{ new_username }}" + shell: /bin/bash + create_home: yes + state: present + become: yes + +- name: Check if Miniconda is already installed + command: /home/{{ new_username }}/miniconda/bin/conda --version + register: miniconda_installed + ignore_errors: yes + become: yes + become_user: "{{ new_username }}" + +- name: Install Miniconda for the new user + shell: | + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /home/{{ new_username }}/miniconda.sh + bash /home/{{ new_username }}/miniconda.sh -b -p /home/{{ new_username }}/miniconda + echo 'export PATH="/home/{{ new_username }}/miniconda/bin:$PATH"' >> /home/{{ new_username }}/.bashrc + source /home/{{ new_username }}/.bashrc + args: + executable: /bin/bash + when: miniconda_installed.rc != 0 + become: yes + become_user: "{{ new_username }}" + +- name: "Create Conda environment for user {{ new_username }}" + command: > + /home/{{ new_username }}/miniconda/bin/conda create -n {{ conda_env_name }} python={{ python_version }} -y + args: + creates: "/home/{{ new_username }}/miniconda/envs/{{ conda_env_name }}" + become: yes + become_user: "{{ new_username }}" diff --git a/roles/cudaSupport/tasks/update_and_reboot.yml b/roles/cudaSupport/tasks/update_and_reboot.yml new file mode 100644 index 0000000..8e5bea0 --- /dev/null +++ b/roles/cudaSupport/tasks/update_and_reboot.yml @@ -0,0 +1,34 @@ +--- +- name: Update apt packages + apt: + update_cache: yes + upgrade: dist + become: yes + +- name: Check if kernel was updated + command: uname -r + register: current_kernel + become: yes + +- name: Check if a restart is required + stat: + path: /var/run/reboot-required + register: reboot_required + become: yes + +- name: Reboot if kernel was updated + reboot: + msg: "Rebooting after kernel update" + pre_reboot_delay: 0 + post_reboot_delay: 50 + when: reboot_required.stat.exists or current_kernel.stdout != ansible_kernel + become: yes + +- name: Wait for the server to come back online + wait_for: + port: 22 + delay: 10 # Initial delay before checking + timeout: 300 # Total timeout for waiting (in seconds) + state: started + when: reboot_required.stat.exists or current_kernel.stdout != ansible_kernel + become: yes