diff --git a/.travis.yml b/.travis.yml index f647f52..8a91330 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,11 +19,10 @@ install: - conda config --add channels defaults - conda config --add channels conda-forge - conda config --add channels bioconda - - conda install -c bioconda -c conda-forge snakemake - - conda create -q -n snakemake snakemake>=5.3.1 python=3.6 + - conda create -q -n snakemake 'snakemake>=5.3.1' 'python>=3.10' script: - - python3.6 setup.py install + - pip install . - pytest after_success: diff --git a/cite_seq_count/__main__.py b/cite_seq_count/__main__.py index 0e36e13..6684880 100755 --- a/cite_seq_count/__main__.py +++ b/cite_seq_count/__main__.py @@ -94,6 +94,8 @@ def main(): print("Skipping cell barcode correction") bcs_corrected = 0 + ###### HERE IT STOPS WORKING ########## + # Create sparse matrices for reads results read_results_matrix = processing.generate_sparse_matrices( final_results=final_results, diff --git a/cite_seq_count/preprocessing.py b/cite_seq_count/preprocessing.py index baff2d1..4462608 100644 --- a/cite_seq_count/preprocessing.py +++ b/cite_seq_count/preprocessing.py @@ -57,7 +57,7 @@ def parse_barcode_reference( barcode_pattern = rf"^[ATGC]{{{barcode_length}}}" header = barcodes_pl.columns - set_dif = set(required_header) - set(header) + set_dif = set([required_header]) - set(header) if len(set_dif) != 0: set_diff_string = ",".join(list(set_dif)) raise SystemExit(f"The header is missing {set_diff_string}. Exiting") @@ -74,7 +74,7 @@ def parse_barcode_reference( else: barcodes_pl = barcodes_pl.with_columns( - reference=pl.col(REFERENCE_COLUMN).str.strip_chars(STRIP_CHARS), + reference=pl.col(required_header).str.strip_chars(STRIP_CHARS), ) check_sequence_pattern( @@ -110,7 +110,7 @@ def parse_tags_csv(file_name: str) -> pl.DataFrame: TTCCGCCTCTCTTTG,Hashtag_3 Args: - file_name (str): file path as a tring + file_name (str): file path as a string Returns: pl.DataFrame: polars dataframe with the csv content @@ -381,16 +381,17 @@ def get_barcode_subset( enable_barcode_correction = True if barcode_whitelist: barcode_subset = parse_barcode_reference( - filename=expected_barcodes, + filename=barcode_whitelist, barcode_length=(chemistry.cell_barcode_end - chemistry.cell_barcode_start), required_header=WHITELIST_COLUMN, ) + n_barcodes = len(barcode_subset) # ??? else: - n_barcodes = barcode_whitelist + n_barcodes = expected_barcodes if barcode_reference is not None: barcode_subset = ( barcodes_df.filter( - pl.col(BARCODE_COLUMN).str.is_in( + pl.col(BARCODE_COLUMN).is_in( barcode_reference[REFERENCE_COLUMN] ) ) @@ -399,7 +400,7 @@ def get_barcode_subset( .sort("count", descending=True) .head(n_barcodes * 1.2) .drop("count") - .rename({SEQUENCE_COLUMN: WHITELIST_COLUMN}) + .rename({BARCODE_COLUMN: WHITELIST_COLUMN}) ) else: raw_barcodes_dict = ( diff --git a/setup.py b/setup.py index 58f6cec..49c715d 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,8 @@ "pyyaml==6.0", "pooch==1.6.0", "six==1.16.0", + "polars" ], - python_requires=">=3.8", + python_requires=">=3.10", package_data={"report_template": ["templates/*.json"]}, ) diff --git a/tests/test_data/fastq/correct_R1_with_cell_barcode_mm.fastq.gz b/tests/test_data/fastq/correct_R1_with_cell_barcode_mm.fastq.gz new file mode 100644 index 0000000..4dc0f1d Binary files /dev/null and b/tests/test_data/fastq/correct_R1_with_cell_barcode_mm.fastq.gz differ diff --git a/tests/test_data/matrix/.~lock.test_matrix.csv# b/tests/test_data/matrix/.~lock.test_matrix.csv# deleted file mode 100644 index 6f2f611..0000000 --- a/tests/test_data/matrix/.~lock.test_matrix.csv# +++ /dev/null @@ -1 +0,0 @@ -,proelli,proelli-ThinkPad-T470s,23.01.2019 16:02,file:///home/proelli/.config/libreoffice/4; \ No newline at end of file diff --git a/tests/test_data/tags/pass/correct_3.csv b/tests/test_data/tags/pass/correct_3.csv new file mode 100644 index 0000000..dcb4b31 --- /dev/null +++ b/tests/test_data/tags/pass/correct_3.csv @@ -0,0 +1,5 @@ +sequence,feature_name +CGTACGTAGCCTAGC,test1 +CGTAGCTCGAAAAAA,test2 +CGTCGAAGCTGAACG,test3 +CGTCGTAGCTGATCG,test4 diff --git a/tests/test_data/whitelist.csv b/tests/test_data/whitelist.csv new file mode 100644 index 0000000..2f7cd0f --- /dev/null +++ b/tests/test_data/whitelist.csv @@ -0,0 +1,3 @@ +whitelist +TACATATTCTTTACTG +TAGAGGGAAGTCAAGC