diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 726df50..4440d00 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,7 +25,7 @@ jobs: run: | sed -i "s?main?${{ github.event.pull_request.head.ref }}?g" pixi-setup.sh sed -i "s?init.sh | bash?init.sh | sed 's,main,${{ github.event.pull_request.head.ref }},g' | bash?" pixi-setup.sh - ./pixi-setup.sh + printf "\n1\n" | ./pixi-setup.sh osx-64: runs-on: macos-13 @@ -48,7 +48,7 @@ jobs: run: | sed -i "s?main?${{ github.event.pull_request.head.ref }}?g" pixi-setup.sh sed -i "s?init.sh | bash?init.sh | sed 's,main,${{ github.event.pull_request.head.ref }},g' | bash?" pixi-setup.sh - ./pixi-setup.sh + printf "\n1\n" | ./pixi-setup.sh osx-arm64: runs-on: macos-14 @@ -71,4 +71,4 @@ jobs: run: | sed -i "s?main?${{ github.event.pull_request.head.ref }}?g" pixi-setup.sh sed -i "s?init.sh | bash?init.sh | sed 's,main,${{ github.event.pull_request.head.ref }},g' | bash?" pixi-setup.sh - ./pixi-setup.sh + printf "\n1\n" | ./pixi-setup.sh \ No newline at end of file diff --git a/.github/workflows/dispatch_ci.yml b/.github/workflows/dispatch_ci.yml index 6f8db5a..eeb3d6e 100644 --- a/.github/workflows/dispatch_ci.yml +++ b/.github/workflows/dispatch_ci.yml @@ -23,9 +23,9 @@ jobs: - name: Run setup script shell: pixi exec --spec sed -- bash -e {0} run: | - sed -i "s?main?${{ github.event.pull_request.head.ref }}?g" pixi-setup.sh - sed -i "s?init.sh | bash?init.sh | sed 's,main,${{ github.event.pull_request.head.ref }},g' | bash?" pixi-setup.sh - ./pixi-setup.sh + sed -i "s?StatFunGen/pixi-setup/main?${{ github.repository }}/${{ github.ref_name }}?g" pixi-setup.sh + sed -i "s?init.sh | bash?init.sh | sed 's,StatFunGen/pixi-setup/main,${{ github.repository }}/${{ github.ref_name }},g' | bash?" pixi-setup.sh + printf "\n1\n" | ./pixi-setup.sh osx-64_pixi: runs-on: macos-latest @@ -46,9 +46,9 @@ jobs: - name: Run setup script shell: pixi exec --spec sed -- bash -e {0} run: | - sed -i "s?main?${{ github.event.pull_request.head.ref }}?g" pixi-setup.sh - sed -i "s?init.sh | bash?init.sh | sed 's,main,${{ github.event.pull_request.head.ref }},g' | bash?" pixi-setup.sh - ./pixi-setup.sh + sed -i "s?StatFunGen/pixi-setup/main?${{ github.repository }}/${{ github.ref_name }}?g" pixi-setup.sh + sed -i "s?init.sh | bash?init.sh | sed 's,StatFunGen/pixi-setup/main,${{ github.repository }}/${{ github.ref_name }},g' | bash?" pixi-setup.sh + printf "\n1\n" | ./pixi-setup.sh osx-arm64_pixi: runs-on: macos-14 @@ -69,6 +69,6 @@ jobs: - name: Run setup script shell: pixi exec --spec sed -- bash -e {0} run: | - sed -i "s?main?${{ github.event.pull_request.head.ref }}?g" pixi-setup.sh - sed -i "s?init.sh | bash?init.sh | sed 's,main,${{ github.event.pull_request.head.ref }},g' | bash?" pixi-setup.sh - ./pixi-setup.sh + sed -i "s?StatFunGen/pixi-setup/main?${{ github.repository }}/${{ github.ref_name }}?g" pixi-setup.sh + sed -i "s?init.sh | bash?init.sh | sed 's,StatFunGen/pixi-setup/main,${{ github.repository }}/${{ github.ref_name }},g' | bash?" pixi-setup.sh + printf "\n1\n" | ./pixi-setup.sh \ No newline at end of file diff --git a/envs/full_packages.txt b/envs/full_packages.txt new file mode 100644 index 0000000..5f91dec --- /dev/null +++ b/envs/full_packages.txt @@ -0,0 +1,191 @@ +# Full package list - complete bioinformatics environment +# Sections: [global], [global_linux], [python], [r] + +# [global] +ark +awscli +bamtools +bcftools +bedtools +curl +deeptools +diffutils +ensembl-vep +fastqc +fastp +findutils +gatk4 +gawk +gffread +git +go-ghq +grep +gtex-pipeline-gene_model +gtex-pipeline-qtl +gtex-pipeline-rnaseq +htslib +king +kent-tools +less +metal +multiqc +pandoc +parallel +perl +picard +plink +plink2 +python=3.12 +quantas +r-base=4.4 +regtools +ripgrep +rnaseqc +rsem +ruby +samtools +sed +snpeff +snpsift +star +tree +trimmomatic +unzip +util-linux +vim +wget + +# [global_linux] +code-server +fastlmmc +gcta +ldstore +procps-ng +regenie + +# [python] +altair +bash_kernel +bitarray +cython +dill +feather-format +imageio +ipykernel +ipywidgets +ipympl +ipyparallel +ipython +jedi-language-server +jupyterlab +jupyter_client +jupyter_console +jupyter_core +jupyter_server +jupyter-lsp +jupyterlab-lsp +jupyterlab-sos +jupyterhub +markdown +markdown-kernel +matplotlib-base +mofapy2 +multipy +nb_conda_kernels +nbconvert-core +nbdime +nbformat +notebook +numba +numexpr +numpy +openpyxl +pandas +pgenlib +pillow +ptpython +pyarrow +pybigwig +python-ldstore +python-lsp-server +qvalue +scikit-learn +scipy +seaborn +statsmodels +sympy +tqdm +tensorqtl +wand +widgetsnbextension +sos +sos-bash +sos-notebook +sos-python +sos-r + +# [r] +bioconductor-annotationdbi +bioconductor-clusterprofiler +bioconductor-edger +bioconductor-illuminahumanmethylation450kmanifest +bioconductor-illuminahumanmethylationepicmanifest +bioconductor-impute +bioconductor-limma +bioconductor-minfi +bioconductor-mofa2 +bioconductor-org.hs.eg.db +bioconductor-pcatools +bioconductor-preprocesscore +bioconductor-psichomics +bioconductor-qvalue +bioconductor-sesame +bioconductor-sesamedata +bioconductor-variantannotation +r-abind +r-arrow +r-ape +r-bedtoolsr +r-bigsnpr +r-biocmanager +r-bitops +r-catools +r-colocboost +r-complexupset +r-corrplot +r-devtools +r-flashpcar +r-fsusier +r-gbj +r-genio +r-glmnet +r-gplots +r-gtools +r-gwasrapidd +r-harmonicmeanp +r-httpgd +r-hudson +r-igraph +r-irkernel +r-languageserver +r-mediation +r-missforest +r-mr.ash.alpha +r-mr.mash.alpha +r-mvsusier +r-pacman +r-pecotmr +r-pgenlibr +r-plinkqc +r-profvis +r-qgg +r-qqman +r-qrank +r-quadprog +r-quantreg +r-rfast +r-rgl +r-rmtstat +r-seurat +r-tidyverse +r-watershedr diff --git a/envs/minimal_packages.txt b/envs/minimal_packages.txt new file mode 100644 index 0000000..def5f96 --- /dev/null +++ b/envs/minimal_packages.txt @@ -0,0 +1,74 @@ +# Minimal package list - general purpose only, no bioinformatics tools +# Sections: [global], [python], [r] + +# [global] +curl +diffutils +findutils +gawk +git +grep +less +parallel +pandoc +perl +python=3.12 +r-base=4.4 +ripgrep +ruby +sed +tree +unzip +vim +wget + +# [python] +cython +dill +ipykernel +ipywidgets +ipympl +ipyparallel +ipython +jedi-language-server +jupyterlab +jupyter_client +jupyter_console +jupyter_core +jupyter_server +jupyter-lsp +jupyterlab-lsp +matplotlib-base +nb_conda_kernels +nbconvert-core +nbdime +nbformat +notebook +numba +numexpr +numpy +openpyxl +pandas +pillow +ptpython +pyarrow +python-lsp-server +scikit-learn +scipy +seaborn +statsmodels +sympy +tqdm +widgetsnbextension + +# [r] +r-biocmanager +r-corrplot +r-devtools +r-glmnet +r-httpgd +r-igraph +r-irkernel +r-languageserver +r-pacman +r-tidyverse diff --git a/init.sh b/init.sh index 596ce60..be91659 100755 --- a/init.sh +++ b/init.sh @@ -1,10 +1,13 @@ #!/usr/bin/env bash -set -o errexit -o nounset -o xtrace -o pipefail +set -o errexit -o nounset -o pipefail + +# Use PIXI_HOME if set (e.g. custom HPC path), otherwise fall back to default +PIXI_HOME="${PIXI_HOME:-${HOME}/.pixi}" # Use Rprofile.site so that only pixi-installed R can see r-base packages -mkdir -p ${HOME}/.pixi/envs/python/lib/R/etc -echo ".libPaths('${HOME}/.pixi/envs/r-base/lib/R/library')" > ${HOME}/.pixi/envs/python/lib/R/etc/Rprofile.site +mkdir -p ${PIXI_HOME}/envs/python/lib/R/etc +echo ".libPaths('${PIXI_HOME}/envs/r-base/lib/R/library')" > ${PIXI_HOME}/envs/python/lib/R/etc/Rprofile.site # Create config files for rstudio mkdir -p ${HOME}/.config/rstudio @@ -13,7 +16,7 @@ directory=${HOME}/.local/var/lib/rstudio-server EOF tee ${HOME}/.config/rstudio/rserver.conf << EOF -rsession-which-r=${HOME}/.pixi/envs/r-base/bin/R +rsession-which-r=${PIXI_HOME}/envs/r-base/bin/R auth-none=1 database-config-file=${HOME}/.config/rstudio/database.conf server-daemonize=0 @@ -21,10 +24,10 @@ server-data-dir=${HOME}/.local/var/run/rstudio-server server-user=${USER} EOF -# Register Juypter kernels -find ${HOME}/.pixi/envs/python/share/jupyter/kernels/ -maxdepth 1 -mindepth 1 -type d | \ +# Register Jupyter kernels +find ${PIXI_HOME}/envs/python/share/jupyter/kernels/ -maxdepth 1 -mindepth 1 -type d | \ xargs -I % jupyter-kernelspec install --log-level=50 --user % -find ${HOME}/.pixi/envs/r-base/share/jupyter/kernels/ -maxdepth 1 -mindepth 1 -type d | \ +find ${PIXI_HOME}/envs/r-base/share/jupyter/kernels/ -maxdepth 1 -mindepth 1 -type d | \ xargs -I % jupyter-kernelspec install --log-level=50 --user % # ark --install @@ -49,7 +52,9 @@ else code-server --install-extension usernamehw.errorlens fi -# Temporary fix to run post-link scripts -bash -c "PREFIX=${HOME}/.pixi/envs/r-base PATH=${HOME}/.pixi/envs/r-base/bin:${PATH} .bioconductor-genomeinfodbdata-post-link.sh" -find ${HOME}/.pixi/envs/r-base/bin -name '*bioconductor-*-post-link.sh' | \ -xargs -I % bash -c "PREFIX=${HOME}/.pixi/envs/r-base PATH=${HOME}/.pixi/envs/r-base/bin:${PATH} %" +# Temporary fix to run post-link scripts (only present in full install with bioconductor packages) +if [ -f "${PIXI_HOME}/envs/r-base/bin/.bioconductor-genomeinfodbdata-post-link.sh" ]; then + bash -c "PREFIX=${PIXI_HOME}/envs/r-base PATH=${PIXI_HOME}/envs/r-base/bin:${PATH} .bioconductor-genomeinfodbdata-post-link.sh" +fi +find ${PIXI_HOME}/envs/r-base/bin -name '*bioconductor-*-post-link.sh' | \ +xargs -I % bash -c "PREFIX=${PIXI_HOME}/envs/r-base PATH=${PIXI_HOME}/envs/r-base/bin:${PATH} %" diff --git a/pixi-install.sh b/pixi-install.sh index aa9db9e..8a87e16 100755 --- a/pixi-install.sh +++ b/pixi-install.sh @@ -27,8 +27,9 @@ else fi # Configure shell -if ! grep -q 'export PATH=${HOME}/.pixi/bin:${PATH}' "${CONFIG_FILE}"; then - echo 'export PATH=${HOME}/.pixi/bin:${PATH}' >> "${CONFIG_FILE}" +_pixi_bin="${PIXI_HOME:-${HOME}/.pixi}/bin" +if ! grep -q "${_pixi_bin}" "${CONFIG_FILE}"; then + echo "export PATH=${_pixi_bin}:\${PATH}" >> "${CONFIG_FILE}" fi if ! grep -q 'unset PYTHONPATH' "${CONFIG_FILE}"; then echo "unset PYTHONPATH" >> "${CONFIG_FILE}" @@ -38,4 +39,6 @@ if ! grep -q 'export PYDEVD_DISABLE_FILE_VALIDATION=1' "${CONFIG_FILE}"; then fi # set default channels -mkdir -p ${HOME}/.pixi && echo 'default_channels = ["dnachun", "conda-forge", "bioconda"]' > ${HOME}/.pixi/config.toml +_pixi_dir="${PIXI_HOME:-${HOME}/.pixi}" +mkdir -p "${_pixi_dir}" +echo 'default_channels = ["dnachun", "conda-forge", "bioconda"]' > "${_pixi_dir}/config.toml" diff --git a/pixi-setup.sh b/pixi-setup.sh index a1837a2..68d7c9c 100755 --- a/pixi-setup.sh +++ b/pixi-setup.sh @@ -5,7 +5,7 @@ set -o nounset -o errexit -o pipefail safe_expose_remove() { environment=$1 executable=$2 - if [ -d ${HOME}/.pixi/envs/${environment} ]; then + if [ -d ${PIXI_HOME}/envs/${environment} ]; then exposed_exes=$(pixi global list --environment ${environment} | tail -n 3 | head -n 1 | tr ',' '\n') if [[ " ${exposed_exes[*]} " =~ [[:space:]]${executable}[[:space:]] ]]; then pixi global expose remove ${executable} @@ -17,18 +17,18 @@ export -f safe_expose_remove install_global_packages() { package_list=$1 - + # Check if the directory exists, if not, create an empty list of packages - if [ ! -d ${HOME}/.pixi/envs ]; then - mkdir -p ${HOME}/.pixi/envs + if [ ! -d ${PIXI_HOME}/envs ]; then + mkdir -p ${PIXI_HOME}/envs existing_pkgs="" else - existing_pkgs=$(ls ${HOME}/.pixi/envs 2>/dev/null | sort -u || echo "") + existing_pkgs=$(ls ${PIXI_HOME}/envs 2>/dev/null | sort -u || echo "") fi - + # Use the existing packages or empty string to compare with desired packages missing_pkgs=$(comm -13 <(echo "$existing_pkgs" | sort -u) <(sort -u ${package_list})) - + if (($(echo ${missing_pkgs} | wc -w) > 0 )); then pixi global install $(echo ${missing_pkgs} | tr '\n' ' ') fi @@ -39,14 +39,14 @@ export -f install_global_packages inject_packages() { environment=$1 package_list=$2 - + # Check if the environment exists before trying to list packages - if [ ! -d ${HOME}/.pixi/envs/${environment} ]; then + if [ ! -d ${PIXI_HOME}/envs/${environment} ]; then missing_pkgs=$(cat ${package_list}) else missing_pkgs=$(comm -13 <(pixi global list --environment ${environment} | cut -f 1 -d ' ' | head -n -6 | tail -n +3 | sort -u) <(sort -u ${package_list})) fi - + if (( $(echo ${missing_pkgs} | wc -w) > 0 )); then pixi global install --environment ${environment} $(echo ${missing_pkgs} | tr '\n' ' ') fi @@ -54,47 +54,114 @@ inject_packages() { export -f inject_packages -# If PIXI_HOME is not set already, set it to ${HOME}/.pixi -if [[ -z ${PIXI_HOME:-} ]]; then - export PIXI_HOME="${HOME}/.pixi" -fi +extract_section() { + local file=$1 section=$2 + awk "/^# \[${section}\]/{found=1; next} /^# \[/{found=0} found && !/^#/ && NF" "$file" +} + +export -f extract_section + +# --- Prompt: installation path --- +_default_pixi_home="${HOME}/.pixi" +echo "" +echo "Where should pixi store its environments and packages?" +echo " Default: ${_default_pixi_home}" +echo " NOTE: Home directories have storage quotas. On HPC, prefer VAST over" +echo " GPFS/Lustre for better small-file I/O. e.g. /lab/\$USER/.pixi" +echo "" +read -r -p "Installation path [${_default_pixi_home}]: " _user_pixi_home +export PIXI_HOME="${_user_pixi_home:-${_default_pixi_home}}" +export RATTLER_CACHE_DIR="${PIXI_HOME}/cache" +echo "Using PIXI_HOME=${PIXI_HOME}" + +# --- Prompt: install type --- +echo "" +echo "Choose installation type:" +echo " 1) minimal - Essential CLI tools + Python data science + base R" +echo " ~5 GB, ~100k files" +echo " 2) full - Complete bioinformatics environment (samtools, GATK, plink," +echo " STAR, Seurat, bioconductor packages, etc.)" +echo " ~35 GB, ~350k files" +echo "" +read -r -p "Install type [1=minimal, 2=full, default=1]: " _install_type_input +case "${_install_type_input:-1}" in + 2|full) INSTALL_TYPE="full" ;; + *) INSTALL_TYPE="minimal" ;; +esac +echo "Installation type: ${INSTALL_TYPE}" # Ensure PIXI_HOME exists -mkdir -p ${PIXI_HOME} +mkdir -p "${PIXI_HOME}" # Install pixi and source it right after installation to move forward curl -fsSL https://raw.githubusercontent.com/StatFunGen/pixi-setup/main/pixi-install.sh | bash -export PATH="${HOME}/.pixi/bin:${PATH}" +export PATH="${PIXI_HOME}/bin:${PATH}" -# Install global packages -install_global_packages <(curl -fsSL https://raw.githubusercontent.com/StatFunGen/pixi-setup/main/envs/global_packages.txt | grep -v "#") +if [[ "${INSTALL_TYPE}" == "minimal" ]]; then + # --- Minimal install --- + _minimal_url="https://raw.githubusercontent.com/StatFunGen/pixi-setup/main/envs/minimal_packages.txt" + _minimal_file=$(mktemp) + curl -fsSL "${_minimal_url}" -o "${_minimal_file}" -if [[ "$OSTYPE" == "linux-gnu"* ]]; then - safe_expose_remove util-linux kill -fi + install_global_packages <(extract_section "${_minimal_file}" "global") + install_global_packages <(echo "coreutils") -install_global_packages <(echo "coreutils") + echo "Installing minimal R packages ..." + inject_packages r-base <(extract_section "${_minimal_file}" "r") -if [[ "$OSTYPE" == "linux-gnu"* ]]; then - safe_expose_remove coreutils kill - safe_expose_remove coreutils uptime - install_global_packages <(curl -fsSL https://raw.githubusercontent.com/StatFunGen/pixi-setup/main/envs/global_packages_linux.txt | grep -v "#") -fi + echo "Installing minimal Python packages ..." + inject_packages python <(extract_section "${_minimal_file}" "python") -echo "Installing recommended R libraries ..." -inject_packages r-base <(curl -fsSL https://raw.githubusercontent.com/StatFunGen/pixi-setup/main/envs/r_packages.txt | grep -v "#") + rm -f "${_minimal_file}" + pixi clean cache -y -echo "Installing recommended Python packages ..." -inject_packages python <(curl -fsSL https://raw.githubusercontent.com/StatFunGen/pixi-setup/main/envs/python_packages.txt | grep -v "#") + # Install config files (init.sh handles missing bioconductor packages gracefully) + curl -fsSL https://raw.githubusercontent.com/StatFunGen/pixi-setup/main/init.sh | bash -pixi clean cache -y +else + # --- Full install --- + _full_url="https://raw.githubusercontent.com/StatFunGen/pixi-setup/main/envs/full_packages.txt" + _full_file=$(mktemp) + curl -fsSL "${_full_url}" -o "${_full_file}" -# Install config files -curl -fsSL https://raw.githubusercontent.com/StatFunGen/pixi-setup/main/init.sh | bash + install_global_packages <(extract_section "${_full_file}" "global") + + if [[ "$OSTYPE" == "linux-gnu"* ]]; then + safe_expose_remove util-linux kill + fi + + install_global_packages <(echo "coreutils") + + if [[ "$OSTYPE" == "linux-gnu"* ]]; then + safe_expose_remove coreutils kill + safe_expose_remove coreutils uptime + install_global_packages <(extract_section "${_full_file}" "global_linux") + fi + + echo "Installing recommended R libraries ..." + inject_packages r-base <(extract_section "${_full_file}" "r") + + echo "Installing recommended Python packages ..." + inject_packages python <(extract_section "${_full_file}" "python") + + rm -f "${_full_file}" + pixi clean cache -y + + # Install config files + curl -fsSL https://raw.githubusercontent.com/StatFunGen/pixi-setup/main/init.sh | bash +fi # print messages BB='\033[1;34m' NC='\033[0m' -echo -e "${BB}Installation completed.${NC}" +if [[ "$OSTYPE" == "darwin"* ]]; then + _shell_config="${HOME}/.zshrc" +else + _shell_config="${HOME}/.bashrc" +fi +echo -e "${BB}Installation completed. Pixi is installed at: ${PIXI_HOME}${NC}" echo -e "${BB}Note: From now on you can install other R packages as needed with 'pixi global install --environment r-base ...'${NC}" echo -e "${BB}and Python with 'pixi global install --environment python ...'${NC}" +echo -e "${BB}To keep the package cache in ${PIXI_HOME}/cache across all future sessions,${NC}" +echo -e "${BB}add this line to ${_shell_config}:${NC}" +echo -e "${BB} export RATTLER_CACHE_DIR=\"${PIXI_HOME}/cache\"${NC}"