From ccd415664d6fd848ea964e82a561f1fd8c2ba48b Mon Sep 17 00:00:00 2001 From: biocyberman Date: Mon, 30 Oct 2017 15:16:37 +0100 Subject: [PATCH 1/5] Fixed missing download types, and gave more informative error messages --- tasks/common.py | 7 +++++-- tasks/download/download_tools.py | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tasks/common.py b/tasks/common.py index cf2f9b81..d37b2b07 100644 --- a/tasks/common.py +++ b/tasks/common.py @@ -163,7 +163,7 @@ def unzip_todir(input, directory, type): tar = tarfile.open(fileobj=input, mode='r:bz2') tar.extractall(tempdir) else: - raise ValueError('Can only download .tar.gz, .tar.bz2, or .zip file') + raise ValueError('Type: {0} given. Can only download .tar.gz, .tar.bz2, or .zip file'.format(type)) # If there is only one subdirectory, take the files inside that files = [os.path.join(tempdir, f) for f in os.listdir(tempdir)] @@ -207,7 +207,7 @@ def download_zip(url_str, directory, type=None): input = BytesIO(url.read()) # Try to deduce the type from the URL - if not type: + if type is None: [name, ext2] = os.path.splitext(url_str) [name, ext1] = os.path.splitext(name) @@ -217,6 +217,9 @@ def download_zip(url_str, directory, type=None): type = 'tgz' elif (ext1 == '.tar' and ext2 == '.bz2') or ext2 == '.tbz2': type = 'tbz2' + else: + raise ValueError ("Unknow filetype: ext1: {}, ext2: {}\nurl_str: {}".format(ext1, ext2, + url_str)) unzip_todir(input, directory, type) diff --git a/tasks/download/download_tools.py b/tasks/download/download_tools.py index 7deff6ec..146dccdd 100644 --- a/tasks/download/download_tools.py +++ b/tasks/download/download_tools.py @@ -99,7 +99,7 @@ def task_download_bwa(): if swift_install(): return nectar_download('bwa') else: - return download_task("https://codeload.github.com/lh3/bwa/tar.gz/v{0}".format(BWA_VERSION)) + return download_task("https://codeload.github.com/lh3/bwa/tar.gz/v{0}".format(BWA_VERSION), type='tgz') def task_download_htslib(): @@ -130,7 +130,7 @@ def task_download_bedtools(): if swift_install(): return nectar_download('bedtools') else: - return download_task("https://codeload.github.com/arq5x/bedtools2/tar.gz/v{0}".format(BEDTOOLS_VERSION)) + return download_task("https://codeload.github.com/arq5x/bedtools2/tar.gz/v{0}".format(BEDTOOLS_VERSION), type='tgz') def task_download_vep(): if swift_install(): @@ -331,7 +331,7 @@ def task_download_zlib(): return nectar_download('zlib') else: return download_task( - 'https://codeload.github.com/madler/zlib/tar.gz/v{}'.format(ZLIB_VERSION)) + 'https://codeload.github.com/madler/zlib/tar.gz/v{}'.format(ZLIB_VERSION), type='tgz') def task_download_vcfanno(): if has_swift_auth(): From ab9f0f9a98a00f9bbdecfe83b247af0c601f13d6 Mon Sep 17 00:00:00 2001 From: biocyberman Date: Mon, 30 Oct 2017 15:18:54 +0100 Subject: [PATCH 2/5] Avoided error with accidental deletion of `tmpdata` --- install.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/install.sh b/install.sh index 1b06765e..82c83e3d 100755 --- a/install.sh +++ b/install.sh @@ -7,6 +7,7 @@ set -e PYTHON_VERSION='3.6.0' PYTHON_INTERPRETER='python3.6' ROOT=$(readlink -f $(dirname ${BASH_SOURCE})) +mkdir -p ${ROOT}/tmpdata export TMPDIR=${ROOT}/tmpdata # Write temporary files to tmpdata export CPIPE_ROOT=$ROOT TEMP_SUBDIR=`mktemp -d` From ea8ef24dde666bb8e1a158291ab9fd04b6a02e25 Mon Sep 17 00:00:00 2001 From: biocyberman Date: Mon, 30 Oct 2017 15:20:18 +0100 Subject: [PATCH 3/5] Fixed missing field of action's return in `task_download_cpanm` --- tasks/download/download_tools.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tasks/download/download_tools.py b/tasks/download/download_tools.py index 146dccdd..9f2cb6cb 100644 --- a/tasks/download/download_tools.py +++ b/tasks/download/download_tools.py @@ -67,6 +67,9 @@ def action(): curl -L https://cpanmin.us/ -o cpanm chmod +x cpanm ''', cwd=temp_dir) + return { + 'dir': temp_dir + } return { 'actions': [action], From 9ce89811bf56e09cefb5c43ac07858d78b19cf8f Mon Sep 17 00:00:00 2001 From: biocyberman Date: Mon, 30 Oct 2017 15:21:55 +0100 Subject: [PATCH 4/5] Fixed a typo for index command in `task_bwa_index_ucsc_reference`. --- tasks/download/download_reference_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/download/download_reference_files.py b/tasks/download/download_reference_files.py index 2e3a175f..e80521bb 100644 --- a/tasks/download/download_reference_files.py +++ b/tasks/download/download_reference_files.py @@ -311,7 +311,7 @@ def task_bwa_index_ucsc_reference(): return { 'targets': [UCSC_BWA_INDEX], 'actions': [ - '{tools}/bwa/bwa index -a bwtsw {data}/ucsc/ucsc.hg19.fasta'.format(tools=TOOLS_ROOT, data=DATA_ROOT) + '{tools}/bin/bwa index -a bwtsw {data}/ucsc/ucsc.hg19.fasta'.format(tools=TOOLS_ROOT, data=DATA_ROOT) ], 'task_dep': [ 'install_bwa', From ba286e9d1dc0bb6c202f5ac93fa7bfc4906d2b05 Mon Sep 17 00:00:00 2001 From: biocyberman Date: Mon, 30 Oct 2017 15:23:31 +0100 Subject: [PATCH 5/5] Unziped ucsc hg19 genome so tools that expect `.fasta` can run --- tasks/download/download_reference_files.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tasks/download/download_reference_files.py b/tasks/download/download_reference_files.py index e80521bb..8c7f0fac 100644 --- a/tasks/download/download_reference_files.py +++ b/tasks/download/download_reference_files.py @@ -130,7 +130,15 @@ def task_download_ucsc(): ["ucsc.hg19.dict.gz", "ucsc.hg19.fasta.gz", "ucsc.hg19.fasta.fai.gz"], UCSC_ROOT, 'bundle/hg19/' - ) + ), + cmd(''' + mkdir -p ucsc\ + && pushd {data_dir}/ucsc\ + && gunzip -c ucsc.hg19.dict.gz >ucsc.hg19.dict\ + && gunzip -c ucsc.hg19.fasta.fai.gz >ucsc.hg19.fasta.fai\ + && gunzip -c ucsc.hg19.fasta.gz >ucsc.hg19.fasta + '''.format(data_dir=DATA_ROOT), cwd=DATA_ROOT, executable='bash') + ], 'uptodate': [run_once], }