AnantharamanLab · biociao · Jul 8, 2025 · Jul 8, 2025
diff --git a/README.md b/README.md
@@ -319,7 +319,7 @@ Choose one:
    1. `/path/to/ViWrap/yamls/ViWrap.yml` indicates the ViWrap.yml file address. This file was placed within the yamls folder of ViWrap directory.
 
    2. `/path/to/ViWrap_conda_environments` indicates the directory that you will need to use to store all conda environments for ViWrap.
-   3. For the first step, one can also use one-line conda environment setting up string as: `conda create -c bioconda -c conda-forge -p /path/to/ViWrap_conda_environments/ViWrap python=3.8 biopython=1.80 mamba=1.3.0 numpy=1.24.2 pandas=1.5.3 pyfastx=0.8.4 matplotlib=3.6.3 seaborn=0.12.2 diamond=2.0.15 hmmer=3.3.2 pyparsing=2.4.7` 
+   3. For the first step, one can also use one-line conda environment setting up string as: `conda create -c bioconda -c conda-forge -p /path/to/ViWrap_conda_environments/ViWrap python=3.8 biopython=1.80 mamba=1.5.1 numpy=1.24.2 pandas=1.5.3 pyfastx=0.8.4 matplotlib=3.6.3 seaborn=0.12.2 diamond=2.0.15 hmmer=3.3.2 pyparsing=2.4.7` 
 
 2. Install in normal conda folder
 

diff --git a/scripts/master_downloader.py b/scripts/master_downloader.py
@@ -55,7 +55,8 @@ def main(args):
         sys.exit(f"Could not find conda env dirs within {args['conda_env_dir']}") 
 
     if os.path.exists(args['db_dir']):
-        sys.exit(f"The db dir of {args['db_dir']} has also ready been set up")
+        #sys.exit(f"The db dir of {args['db_dir']} has also ready been set up")
+        logger.info(f"The db dir of {args['db_dir']} has also ready been set up")
     else:
         os.mkdir(args['db_dir'])
 
@@ -66,123 +67,160 @@ def main(args):
 
     # Step 2  Make VIBRANT db
     vibrant_db_dir_absolute_path = os.path.abspath(args['VIBRANT_db'])
-    os.system(f"conda run -p {os.path.join(args['conda_env_dir'], 'ViWrap-VIBRANT')} bash {os.path.join(args['conda_env_dir'], 'ViWrap-VIBRANT/bin/download-db.sh')} {vibrant_db_dir_absolute_path}")
-
+    if os.path.exists(f"{os.path.join(args['db_dir'], 'VIBRANT_db')}"):
+        logger.info(f"{time_current} | Found VIBRANT_db. Skip download.")
+    else:  
+        os.system(f"conda run -p {os.path.join(args['conda_env_dir'], 'ViWrap-VIBRANT')} bash {os.path.join(args['conda_env_dir'], 'ViWrap-VIBRANT/bin/download-db.sh')} {vibrant_db_dir_absolute_path}")
+
     time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
     logger.info(f"{time_current} | VIBRANT db has been set up")  
 
     # Step 3  Make geNomad db
-    os.system(f"conda run -p {os.path.join(args['conda_env_dir'], 'ViWrap-geNomad')} genomad download-database {args['db_dir']}")
+    if os.path.exists(f"{os.path.join(args['db_dir'], 'genomad_db')}"):
+        logger.info(f"{time_current} | Found genomad_db. Skip download.")
+    else:  
+        os.system(f"conda run -p {os.path.join(args['conda_env_dir'], 'ViWrap-geNomad')} genomad download-database {args['db_dir']}")
 
     time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
     logger.info(f"{time_current} | geNomad db has been set up")      
 
     # Step 4  Make Tax classification db
-    os.mkdir(args['Tax_classification_db'])
+    if os.path.exists(f"{os.path.join(args['db_dir'], args['Tax_classification_db'])}"):
+        logger.info(f"{time_current} | Found {args['Tax_classification_db']}. Tax_classification_db db seems already set up.")
+    else:
+        os.mkdir(args['Tax_classification_db'])
 
-    ###############################################
-    # Part I Download NCBI RefSeq viral protein db#
-    ###############################################
+        ###############################################
+        # Part I Download NCBI RefSeq viral protein db#
+        ###############################################
 
-    ## Step 4.1 Download NCBI RefSeq viral protein and protein gpff
-    scripts.downloadDB.dl_refseq_viral_protein(args['Tax_classification_db'])
-    scripts.downloadDB.dl_refseq_viral_protein_gpff(args['Tax_classification_db'])
+        ## Step 4.1 Download NCBI RefSeq viral protein and protein gpff
+        scripts.downloadDB.dl_refseq_viral_protein(args['Tax_classification_db'])
+        scripts.downloadDB.dl_refseq_viral_protein_gpff(args['Tax_classification_db'])
 
-    ## Step 4.2 Parse to get protein to NCBI taxonomy info
-    scripts.downloadDB.parse_gpff(args['Tax_classification_db'])
+        ## Step 4.2 Parse to get protein to NCBI taxonomy info
+        scripts.downloadDB.parse_gpff(args['Tax_classification_db'])
 
-    ## Step 4.3 Grep NCBI RefSeq viral proteins with taxonomy info
-    scripts.downloadDB.grep_NCBI_RefSeq_viral_proteins_w_tax(args['Tax_classification_db'])
+        ## Step 4.3 Grep NCBI RefSeq viral proteins with taxonomy info
+        scripts.downloadDB.grep_NCBI_RefSeq_viral_proteins_w_tax(args['Tax_classification_db'])
 
-    ## Step 4.4 Reformat NCBI tax to ICTV 8-rank tax
-    ictv_tax_info = os.path.join(args['root_dir'], 'database/ICTV_Master_Species_List.txt')
-    pro2ictv_8_rank_tax = os.path.join(args['Tax_classification_db'], 'pro2ictv_8_rank_tax.txt')
-    scripts.downloadDB.reformat_NCBI_tax_to_ICTV_8_rank_tax(args['Tax_classification_db'], ictv_tax_info, pro2ictv_8_rank_tax)
+        ## Step 4.4 Reformat NCBI tax to ICTV 8-rank tax
+        ictv_tax_info = os.path.join(args['root_dir'], 'database/ICTV_Master_Species_List.txt')
+        pro2ictv_8_rank_tax = os.path.join(args['Tax_classification_db'], 'pro2ictv_8_rank_tax.txt')
+        scripts.downloadDB.reformat_NCBI_tax_to_ICTV_8_rank_tax(args['Tax_classification_db'], ictv_tax_info, pro2ictv_8_rank_tax)
 
-    ## Step 4.5 Make diamond blastp db
-    scripts.downloadDB.make_diamond_db(args['Tax_classification_db'])
+        ## Step 4.5 Make diamond blastp db
+        scripts.downloadDB.make_diamond_db(args['Tax_classification_db'])
 
-    ## Step 4.6 Remove useless files
-    scripts.downloadDB.remove(args['Tax_classification_db'])
-
-    ##########################
-    # Part II Download VOG db#
-    ##########################
-    ## Step 4.7 Parse to get VOG marker list
-    vog_marker_table = os.path.join(args['root_dir'], 'database/VOG_marker_table.txt')
-    os.system(f"cp {os.path.join(args['root_dir'], 'database/VOG_marker_table.txt')} {os.path.join(args['Tax_classification_db'], 'VOG_marker_table.txt')}")
-    vog_marker_list = scripts.downloadDB.get_vog_marker_table(vog_marker_table)
-
-    ## Step 4.8 Download the latest VOG db and pick VOG markers
-    scripts.downloadDB.get_marker_vog_hmm(vog_marker_list, args['Tax_classification_db'])
-
-    #############################
-    # Part III Download IMGVR db#
-    #############################
-    ## Step 4.9 cp and degzip IMGVR db
-    os.system(f"cat {os.path.join(args['root_dir'], 'database/IMGVR_high-quality_phage_vOTU_representatives.tar.gz*')} > {os.path.join(args['Tax_classification_db'], 'IMGVR_high-quality_phage_vOTU_representatives.tar.gz')}")
-    os.system(f"tar xzf {os.path.join(args['Tax_classification_db'], 'IMGVR_high-quality_phage_vOTU_representatives.tar.gz')} --directory {args['Tax_classification_db']}")
-    os.system(f"mv {os.path.join(args['Tax_classification_db'], 'IMGVR_high-quality_phage_vOTU_representatives/*')} {args['Tax_classification_db']}")
-    os.system(f"rm {os.path.join(args['Tax_classification_db'], 'IMGVR_high-quality_phage_vOTU_representatives.tar.gz')}")
-    os.system(f"rm -r {os.path.join(args['Tax_classification_db'], 'IMGVR_high-quality_phage_vOTU_representatives')}")
-
-    time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
-    logger.info(f"{time_current} | Tax classification db has been set up")    
+        ## Step 4.6 Remove useless files
+        scripts.downloadDB.remove(args['Tax_classification_db'])
+        
+        ##########################
+        # Part II Download VOG db#
+        ##########################
+        ## Step 4.7 Parse to get VOG marker list
+        vog_marker_table = os.path.join(args['root_dir'], 'database/VOG_marker_table.txt')
+        os.system(f"cp {os.path.join(args['root_dir'], 'database/VOG_marker_table.txt')} {os.path.join(args['Tax_classification_db'], 'VOG_marker_table.txt')}")
+        vog_marker_list = scripts.downloadDB.get_vog_marker_table(vog_marker_table)
+
+        ## Step 4.8 Download the latest VOG db and pick VOG markers
+        scripts.downloadDB.get_marker_vog_hmm(vog_marker_list, args['Tax_classification_db'])
+        
+        #############################
+        # Part III Download IMGVR db#
+        #############################
+        ## Step 4.9 cp and degzip IMGVR db
+        os.system(f"cat {os.path.join(args['root_dir'], 'database/IMGVR_high-quality_phage_vOTU_representatives.tar.gz*')} > {os.path.join(args['Tax_classification_db'], 'IMGVR_high-quality_phage_vOTU_representatives.tar.gz')}")
+        os.system(f"tar xzf {os.path.join(args['Tax_classification_db'], 'IMGVR_high-quality_phage_vOTU_representatives.tar.gz')} --directory {args['Tax_classification_db']}")
+        os.system(f"mv {os.path.join(args['Tax_classification_db'], 'IMGVR_high-quality_phage_vOTU_representatives/*')} {args['Tax_classification_db']}")
+        os.system(f"rm {os.path.join(args['Tax_classification_db'], 'IMGVR_high-quality_phage_vOTU_representatives.tar.gz')}")
+        os.system(f"rm -r {os.path.join(args['Tax_classification_db'], 'IMGVR_high-quality_phage_vOTU_representatives')}")
+        
+        time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
+        logger.info(f"{time_current} | Tax classification db has been set up")    
 
 
     # Step 5 Make CheckV db
-    os.system(f"conda run -p {os.path.join(args['conda_env_dir'], 'ViWrap-CheckV')} checkv download_database {args['db_dir']} >/dev/null 2>&1")
-    os.system(f"mv {os.path.join(args['db_dir'], 'checkv-db-v*')} {args['CheckV_db']}")
-    os.system(f"conda run -p {os.path.join(args['conda_env_dir'], 'ViWrap-CheckV')} export CHECKVDB={args['CheckV_db']}")
-
-    time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
-    logger.info(f"{time_current} | CheckV db has been set up")  
+    if os.path.exists(f"{os.path.join(args['db_dir'], args['CheckV_db'])}"):
+        logger.info(f"{time_current} | Found CheckV_db. CheckV db seems already set up.")
+    else:
+        if os.path.exists(f"{os.path.join(args['db_dir'], 'checkv-db-v*')}"): 
+            logger.info(f"{time_current} | Found checkv-db-v*. Skip download.")
+        else:    
+            os.system(f"conda run -p {os.path.join(args['conda_env_dir'], 'ViWrap-CheckV')} checkv download_database {args['db_dir']} >/dev/null 2>&1")
+
+        os.system(f"mv {os.path.join(args['db_dir'], 'checkv-db-v*')} args['CheckV_db']")
+        os.system(f"conda run -p {os.path.join(args['conda_env_dir'], 'ViWrap-CheckV')} export CHECKVDB={args['CheckV_db']}")
+        time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
+        logger.info(f"{time_current} | CheckV db has been set up")  
 
 
     # Step 6 Make iPHoP db
-    os.system(f"wget -c https://portal.nersc.gov/cfs/m342/iphop/db/iPHoP.latest_rw.tar.gz --no-check-certificate -O {os.path.join(args['db_dir'], 'iPHoP.latest_rw.tar.gz')}") 
-    os.mkdir(args['iPHoP_db'])
-    os.system(f"tar xzf {os.path.join(args['db_dir'], 'iPHoP.latest_rw.tar.gz')} --directory {args['iPHoP_db']}")
-    os.system(f"mv {os.path.join(args['db_dir'], 'iPHoP_db/*_pub_rw/*')} {args['iPHoP_db']}")
-    os.system(f"rm {os.path.join(args['db_dir'], 'iPHoP.latest_rw.tar.gz')}")
-    os.system(f"rmdir {os.path.join(args['db_dir'], 'iPHoP_db/*_pub_rw/')}")
-
-    time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
-    logger.info(f"{time_current} | iPHoP db has been set up")     
-
+    if os.path.exists(f"{os.path.join(args['db_dir'], args['iPHoP_db'])}"):
+        logger.info(f"{time_current} | Found CheckV_db. CheckV db seems already set up.")
+    else:
+        if os.path.exists(f"{os.path.join(args['db_dir'], 'iPHoP.latest_rw.tar.gz')}"):
+            logger.info(f"{time_current} | Found iPHoP.latest_rw.tar.gz. Skip download.")  
+        else:
+            os.system(f"wget -c https://portal.nersc.gov/cfs/m342/iphop/db/iPHoP.latest_rw.tar.gz --no-check-certificate -O {os.path.join(args['db_dir'], 'iPHoP.latest_rw.tar.gz')}")     
+
+        os.mkdir(args['iPHoP_db'])
+        os.system(f"tar xzf {os.path.join(args['db_dir'], 'iPHoP.latest_rw.tar.gz')} --directory {args['iPHoP_db']}")
+        os.system(f"mv {os.path.join(args['db_dir'], 'iPHoP_db/*_pub_rw/*')} {args['iPHoP_db']}")
+        os.system(f"rm {os.path.join(args['db_dir'], 'iPHoP.latest_rw.tar.gz')}")
+        os.system(f"rmdir {os.path.join(args['db_dir'], 'iPHoP_db/*_pub_rw/')}")
+
+        time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
+        logger.info(f"{time_current} | iPHoP db has been set up")     
+
 
     # Step 7 Make GTDB-Tk db release 214
-    os.system(f"wget -c ftp://download.nmdc.cn/tools/meta/gtdb/gtdbtk_r214_data.tar.gz -O {os.path.join(args['db_dir'], 'gtdbtk_r214_data.tar.gz')} --no-check-certificate")   
-    os.mkdir(args['GTDB_db'])     
-    os.system(f"tar xzf {os.path.join(args['db_dir'], 'gtdbtk_r214_data.tar.gz')} --directory {args['GTDB_db']}")
-    os.system(f"mv {os.path.join(args['GTDB_db'], 'release214')} {os.path.join(args['GTDB_db'], 'GTDB_db')}")  
-    os.system(f"rm {os.path.join(args['db_dir'], 'gtdbtk_r214_data.tar.gz')}")
-    os.system(f"conda env config vars set GTDBTK_DATA_PATH={os.path.join(args['GTDB_db'], 'GTDB_db')} -p {os.path.join(args['conda_env_dir'], 'ViWrap-GTDBTk')}")
-
-    time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
-    logger.info(f"{time_current} | GTDB-Tk db has been set up") 
+    if os.path.exists(f"{os.path.join(args['db_dir'], args['GTDB_db'])}"):
+        logger.info(f"{time_current} | Found {args['GTDB_db']}. GTDB_db db seems already set up.")
+    else:
+        if os.path.exists(f"{os.path.join(args['db_dir'], 'gtdbtk_r214_data.tar.gz')}"):
+            logger.info(f"{time_current} | Found gtdbtk_r214_data.tar.gz. Skip download.")  
+        else:
+            os.system(f"wget -c ftp://download.nmdc.cn/tools/meta/gtdb/gtdbtk_r214_data.tar.gz -O {os.path.join(args['db_dir'], 'gtdbtk_r214_data.tar.gz')} --no-check-certificate")   
+        os.mkdir(args['GTDB_db'])     
+        os.system(f"tar xzf {os.path.join(args['db_dir'], 'gtdbtk_r214_data.tar.gz')} --directory {args['GTDB_db']}")
+        os.system(f"mv {os.path.join(args['GTDB_db'], 'release214')} {os.path.join(args['GTDB_db'], 'GTDB_db')}")  
+        os.system(f"rm {os.path.join(args['db_dir'], 'gtdbtk_r214_data.tar.gz')}")
+        os.system(f"conda env config vars set GTDBTK_DATA_PATH={os.path.join(args['GTDB_db'], 'GTDB_db')} -p {os.path.join(args['conda_env_dir'], 'ViWrap-GTDBTk')}")
+
+        time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
+        logger.info(f"{time_current} | GTDB-Tk db has been set up") 
 
 
     # Step 8 Download VirSorter2 db
-    os.system(f"wget -c https://osf.io/v46sc/download -O {os.path.join(args['db_dir'], 'db.tgz')} --no-check-certificate")
-    os.system(f"tar -xzf {os.path.join(args['db_dir'], 'db.tgz')} -C {args['db_dir']}")
-    os.system(f"mv {os.path.join(args['db_dir'], 'db')} {args['VirSorter2_db']}")
-    os.system(f"rm {os.path.join(args['db_dir'], 'db.tgz')}")
-    os.system(f"conda run -p {os.path.join(args['conda_env_dir'], 'ViWrap-vs2')} virsorter config --init-source --db-dir={args['VirSorter2_db']} >/dev/null 2>&1")
-
-    time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
-    logger.info(f"{time_current} | VirSorter2 db has been set up")     
+    if os.path.exists(f"{os.path.join(args['db_dir'], args['VirSorter2_db'])}"):
+        logger.info(f"{time_current} | Found {args['VirSorter2_db']}. VirSorter2_db db seems already set up.")
+    else:
+        if os.path.exists(f"{os.path.join(args['db_dir'], 'db.tgz')}"):
+            logger.info(f"{time_current} | Found db.tgz. Skip download.")  
+        else:
+            os.system(f"wget -c https://osf.io/v46sc/download -O {os.path.join(args['db_dir'], 'db.tgz')} --no-check-certificate")
+        os.system(f"tar -xzf {os.path.join(args['db_dir'], 'db.tgz')} -C {args['db_dir']}")
+        os.system(f"mv {os.path.join(args['db_dir'], 'db')} {args['VirSorter2_db']}")
+        os.system(f"rm {os.path.join(args['db_dir'], 'db.tgz')}")
+        os.system(f"conda run -p {os.path.join(args['conda_env_dir'], 'ViWrap-vs2')} virsorter config --init-source --db-dir={args['VirSorter2_db']} >/dev/null 2>&1")
+
+        time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
+        logger.info(f"{time_current} | VirSorter2 db has been set up")     
 
     # Step 9 Download DVF db
-    os.system(f"git clone https://github.com/jessieren/DeepVirFinder.git {os.path.join(args['db_dir'], 'DVF_db_tmp')}")
-    os.system(f"mv {os.path.join(args['db_dir'], 'DVF_db_tmp/models')} {args['DVF_db']}")
-    os.system(f"rm -rf {os.path.join(args['db_dir'], 'DVF_db_tmp')}")
-
-    time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
-    logger.info(f"{time_current} | DVF db has been set up")     
+    if os.path.exists(f"{os.path.join(args['db_dir'], args['DVF_db'])}"):
+        logger.info(f"{time_current} | Found {args['DVF_db']}. DVF_db db seems already set up.")
+    else:
+        os.system(f"git clone https://github.com/jessieren/DeepVirFinder.git {os.path.join(args['db_dir'], 'DVF_db_tmp')}")
+        os.system(f"mv {os.path.join(args['db_dir'], 'DVF_db_tmp/models')} {args['DVF_db']}")
+        os.system(f"rm -rf {os.path.join(args['db_dir'], 'DVF_db_tmp')}")
 
+        time_current = f"[{str(datetime.now().replace(microsecond=0))}]"
+        logger.info(f"{time_current} | DVF db has been set up")     
+
 
-    end_time = datetime.now().replace(microsecond=0)
-    duration = end_time - start_time
-    logger.info(f"The total running time is {duration} (in \"hr:min:sec\" format)")  
-
+        end_time = datetime.now().replace(microsecond=0)
+        duration = end_time - start_time
+        logger.info(f"The total running time is {duration} (in \"hr:min:sec\" format)")  
+