BuysDB
diff --git a/‎README.md‎
Lines changed: 3 additions & 4 deletions b/‎README.md‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎setup.py‎
Lines changed: 3 additions & 28 deletions b/‎setup.py‎
Lines changed: 3 additions & 28 deletions
diff --git a/‎singlecellmultiomics/libraryDetection/archivestats.py‎
Lines changed: 0 additions & 148 deletions b/‎singlecellmultiomics/libraryDetection/archivestats.py‎
Lines changed: 0 additions & 148 deletions
diff --git a/‎singlecellmultiomics/libraryProcessing/libraryStatistics.py‎
Lines changed: 25 additions & 7 deletions b/‎singlecellmultiomics/libraryProcessing/libraryStatistics.py‎
Lines changed: 25 additions & 7 deletions
diff --git a/‎singlecellmultiomics/libraryProcessing/scsortchicqc.py‎
Lines changed: 8 additions & 4 deletions b/‎singlecellmultiomics/libraryProcessing/scsortchicqc.py‎
Lines changed: 8 additions & 4 deletions
@@ -1,7 +1,6 @@
-[![Documentation Status](https://readthedocs.org/projects/singlecellmultiomics/badge/?version=latest)](https://singlecellmultiomics.readthedocs.io/en/latest/?badge=latest) [![PyPI version](https://badge.fury.io/py/singlecellmultiomics.svg)](https://badge.fury.io/py/singlecellmultiomics) [![DOI](https://zenodo.org/badge/187592829.svg)](https://zenodo.org/badge/latestdoi/187592829) [![Anaconda-Server Badge](https://anaconda.org/buysdb/singlecellmultiomics/badges/installer/conda.svg)](https://anaconda.org/buysdb/singlecellmultiomics)
-
+[![Documentation Status](https://readthedocs.org/projects/singlecellmultiomics/badge/?version=latest)](https://singlecellmultiomics.readthedocs.io/en/latest/?badge=latest) [![PyPI version](https://badge.fury.io/py/singlecellmultiomics.svg)](https://badge.fury.io/py/singlecellmultiomics) [![DOI](https://zenodo.org/badge/187592829.svg)](https://zenodo.org/badge/latestdoi/187592829)
 ## Single cell multi omics
-Single cell multi omics is a set of tools to deal with multiple measurements from the same cell. This package has been developed by the [van Oudenaarden group](https://www.hubrecht.eu/research-groups/van-oudenaarden-group/).
+Single cell multi omics is a set of tools to deal with multiple measurements from the same cell. This package is maintained by [Barbanson Biotech](https://barbansonbiotech.com/).
 
 # Installation
 ```
@@ -32,7 +31,7 @@ The mapped reads are encoded in a BAM file. This BAM file still contains the enc
  methylation digest sequencing:SC MSPJI ,  
  lineage tracing:SCARTRACE,
  DNA digest sequencing: NLAIII,
- histone modification sequencing: scCHIC,
+ Epigenetic modification sequencing: scCHIC, scCHIC+Transcriptome, DamID, DamID+T
  Single cell methylation : TAPs (in combination with any other supported protocol).
 
 4) Assigns reads to molecules to allow for deduplication, adds duplication BAM flag
 
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-from setuptools import setup
+from setuptools import setup, find_namespace_packages
 import os
 import sys
 
@@ -24,36 +24,12 @@
     long_description=long_description,
     long_description_content_type='text/markdown',
     author='Buys de Barbanson',
-    author_email='b.barbanson@hubrecht.eu',
+    author_email='github@barbansonbiotech.com',
     url='https://github.com/BuysDB/SingleCellMultiOmics',
     download_url = 'https://github.com/BuysDB/SingleCellMultiOmics/archive/v0.1.9.tar.gz',
 
     license='MIT',
-    packages=['singlecellmultiomics',
-
-        'singlecellmultiomics.alleleTools',
-        'singlecellmultiomics.bamProcessing',
-        'singlecellmultiomics.barcodeFileParser',
-        'singlecellmultiomics.countTableProcessing',
-        'singlecellmultiomics.features',
-        'singlecellmultiomics.fragment',
-        'singlecellmultiomics.fastqProcessing',
-        'singlecellmultiomics.fastaProcessing',
-        'singlecellmultiomics.libraryDetection',
-        'singlecellmultiomics.libraryProcessing',
-        'singlecellmultiomics.modularDemultiplexer',
-        'singlecellmultiomics.molecule',
-        'singlecellmultiomics.methylation',
-        'singlecellmultiomics.pyutils',
-        'singlecellmultiomics.variants',
-        'singlecellmultiomics.tags',
-        'singlecellmultiomics.statistic',
-        'singlecellmultiomics.tagtools',
-        'singlecellmultiomics.universalBamTagger',
-        'singlecellmultiomics.utils',
-        'singlecellmultiomics.modularDemultiplexer.demultiplexModules'
-        ],
-
+    packages=find_namespace_packages(),
 
     scripts=[
         # Demultiplexing
@@ -121,7 +97,6 @@
         # Library processing:
         'singlecellmultiomics/libraryProcessing/libraryStatistics.py',
         'singlecellmultiomics/libraryProcessing/scsortchicstats.py',
-        'singlecellmultiomics/libraryDetection/archivestats.py',
         'singlecellmultiomics/alleleTools/heterozygousSNPedit.py',
         'singlecellmultiomics/libraryProcessing/scsortchicfeaturedensitytable.py',
         'singlecellmultiomics/libraryProcessing/scsortchicqc.py',
 
@@ -87,6 +87,10 @@ def select_fastq_file(lookup):
     argparser.add_argument('--v', action='store_true')
     argparser.add_argument('--nort', action='store_true')
     argparser.add_argument('--nolorenz', action='store_true')
+
+    argparser.add_argument('-demux_R1_path', type=str)
+    argparser.add_argument('-demux_R2_path', type=str)
+
     args = argparser.parse_args()
 
     for library in args.libraries:
@@ -127,6 +131,13 @@ def select_fastq_file(lookup):
         if(args.t in ['chic-stats', 'all-stats']):
             statistics.extend([ScCHICLigation(args)])
 
+        if args.t=='non-scmo-stats':
+            statistics.extend([
+                ScCHICLigation(args)
+
+            ])
+
+
         if(args.t in ['demult-stats', 'all-stats']):
             statistics.extend([
                 TrimmingStats(args),
@@ -137,13 +148,20 @@ def select_fastq_file(lookup):
                 PlateStatistic2(args)
             ])
 
-        demuxFastqFilesLookup = [
-            (f'{library}/demultiplexedR1.fastq.gz',
-             f'{library}/demultiplexedR2.fastq.gz'),
-            (f'{library}/demultiplexedR1_val_1.fq.gz',
-             f'{library}/demultiplexedR2_val_2.fq.gz'),
-            (f'{library}/demultiplexedR1_val_1.fq',
-             f'{library}/demultiplexedR2_val_2.fq')]
+        if args.demux_R1_path is not None:
+            assert  args.demux_R2_path is not None
+            demuxFastqFilesLookup = [
+                (args.demux_R1_path,args.demux_R2_path),
+            ]
+        else:
+            demuxFastqFilesLookup = [
+                (f'{library}/demultiplexedR1.fastq.gz',
+                 f'{library}/demultiplexedR2.fastq.gz'),
+                (f'{library}/demultiplexedR1_val_1.fq.gz',
+                 f'{library}/demultiplexedR2_val_2.fq.gz'),
+                (f'{library}/demultiplexedR1_val_1.fq',
+                 f'{library}/demultiplexedR2_val_2.fq')
+            ]
 
         rejectFilesLookup = [
             (f'{library}/rejectsR1.fastq.gz', f'{library}/rejectsR2.fastq.gz'),
 
@@ -95,7 +95,6 @@ def read_contaminant_info(sortchicstats_paths):
     statistics_paths = []
     count_table_paths = []
     for path in args.count_tables_sortchicstats_statistics:
-
         if path.endswith('statistics.pickle.gz'):
             statistics_paths.append(path)
         elif path.endswith('sortchicstats.json'):
@@ -111,7 +110,10 @@ def read_contaminant_info(sortchicstats_paths):
     # Read the count tables
     df = pd.concat([read_count_table(path) for path in count_table_paths])
     # Add mark as first level of df, library second, cell third
-    df.index = pd.MultiIndex.from_tuples([(sample_sheet['marks'][cell.split('_')[0]], cell.split('_')[0], int(cell.split('_')[1])) for cell in df.index])
+    df.index = pd.MultiIndex.from_tuples([(
+        sample_sheet['marks'][cell.split('_')[0]],
+        cell.split('_')[0], int(cell.split('_')[1]))
+        for cell in df.index])
 
     avail_marks = df.index.get_level_values(0).unique()
     print('Target marks:')
@@ -135,15 +137,17 @@ def read_contaminant_info(sortchicstats_paths):
     y = cell_labels=='empty'
     rf = RandomForestClassifier(class_weight='balanced')
 
-    X = plate_stats.loc[y.index]
+    y=y.loc[[idx for idx in y.index if idx in plate_stats.index]]
+    X = plate_stats.loc[[idx for idx in y.index if idx in plate_stats.index]]
     X[('AA', 'ligated molecules')]/=X[('total mapped',       '# molecules')]
     X[('TA', 'fraction ligated molecules')]= X[('TA', 'ligated molecules')] / X[('total mapped',       '# molecules')]
     X[('TT', 'ligated molecules')]/=X[('total mapped',       '# molecules')]
     X[('qcfail', '# reads')]/=X[('total mapped',       '# molecules')]
     X[('duprate', 'pct')] =X[('total mapped',       '# molecules')]/X[('total mapped', '# reads')]
 
     y[X[('total mapped','# reads')]<500] = True
-    X = X.join(contaminant_info)
+    X = X.join(contaminant_info).fillna(0)
+    X = X.replace([np.inf,], 0)
 
     predictions = []
     for train_index, test_index in KFold(n_splits=8, shuffle=True, random_state=None).split(X):