Skip to content

Commit aaa5ee9

Browse files
authored
Merge pull request #281 from BuysDB/resource-patch
Fixes issues with multiprocessing, removed dependency on pkg_resources, and fixes minor issues.
2 parents a27c2b9 + a8b0e51 commit aaa5ee9

16 files changed

Lines changed: 205 additions & 291 deletions

File tree

README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
[![Documentation Status](https://readthedocs.org/projects/singlecellmultiomics/badge/?version=latest)](https://singlecellmultiomics.readthedocs.io/en/latest/?badge=latest) [![PyPI version](https://badge.fury.io/py/singlecellmultiomics.svg)](https://badge.fury.io/py/singlecellmultiomics) [![DOI](https://zenodo.org/badge/187592829.svg)](https://zenodo.org/badge/latestdoi/187592829) [![Anaconda-Server Badge](https://anaconda.org/buysdb/singlecellmultiomics/badges/installer/conda.svg)](https://anaconda.org/buysdb/singlecellmultiomics)
2-
1+
[![Documentation Status](https://readthedocs.org/projects/singlecellmultiomics/badge/?version=latest)](https://singlecellmultiomics.readthedocs.io/en/latest/?badge=latest) [![PyPI version](https://badge.fury.io/py/singlecellmultiomics.svg)](https://badge.fury.io/py/singlecellmultiomics) [![DOI](https://zenodo.org/badge/187592829.svg)](https://zenodo.org/badge/latestdoi/187592829)
32
## Single cell multi omics
4-
Single cell multi omics is a set of tools to deal with multiple measurements from the same cell. This package has been developed by the [van Oudenaarden group](https://www.hubrecht.eu/research-groups/van-oudenaarden-group/).
3+
Single cell multi omics is a set of tools to deal with multiple measurements from the same cell. This package is maintained by [Barbanson Biotech](https://barbansonbiotech.com/).
54

65
# Installation
76
```
@@ -32,7 +31,7 @@ The mapped reads are encoded in a BAM file. This BAM file still contains the enc
3231
methylation digest sequencing:SC MSPJI ,
3332
lineage tracing:SCARTRACE,
3433
DNA digest sequencing: NLAIII,
35-
histone modification sequencing: scCHIC,
34+
Epigenetic modification sequencing: scCHIC, scCHIC+Transcriptome, DamID, DamID+T
3635
Single cell methylation : TAPs (in combination with any other supported protocol).
3736

3837
4) Assigns reads to molecules to allow for deduplication, adds duplication BAM flag

setup.py

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
3-
from setuptools import setup
3+
from setuptools import setup, find_namespace_packages
44
import os
55
import sys
66

@@ -24,36 +24,12 @@
2424
long_description=long_description,
2525
long_description_content_type='text/markdown',
2626
author='Buys de Barbanson',
27-
author_email='b.barbanson@hubrecht.eu',
27+
author_email='github@barbansonbiotech.com',
2828
url='https://github.com/BuysDB/SingleCellMultiOmics',
2929
download_url = 'https://github.com/BuysDB/SingleCellMultiOmics/archive/v0.1.9.tar.gz',
3030

3131
license='MIT',
32-
packages=['singlecellmultiomics',
33-
34-
'singlecellmultiomics.alleleTools',
35-
'singlecellmultiomics.bamProcessing',
36-
'singlecellmultiomics.barcodeFileParser',
37-
'singlecellmultiomics.countTableProcessing',
38-
'singlecellmultiomics.features',
39-
'singlecellmultiomics.fragment',
40-
'singlecellmultiomics.fastqProcessing',
41-
'singlecellmultiomics.fastaProcessing',
42-
'singlecellmultiomics.libraryDetection',
43-
'singlecellmultiomics.libraryProcessing',
44-
'singlecellmultiomics.modularDemultiplexer',
45-
'singlecellmultiomics.molecule',
46-
'singlecellmultiomics.methylation',
47-
'singlecellmultiomics.pyutils',
48-
'singlecellmultiomics.variants',
49-
'singlecellmultiomics.tags',
50-
'singlecellmultiomics.statistic',
51-
'singlecellmultiomics.tagtools',
52-
'singlecellmultiomics.universalBamTagger',
53-
'singlecellmultiomics.utils',
54-
'singlecellmultiomics.modularDemultiplexer.demultiplexModules'
55-
],
56-
32+
packages=find_namespace_packages(),
5733

5834
scripts=[
5935
# Demultiplexing
@@ -121,7 +97,6 @@
12197
# Library processing:
12298
'singlecellmultiomics/libraryProcessing/libraryStatistics.py',
12399
'singlecellmultiomics/libraryProcessing/scsortchicstats.py',
124-
'singlecellmultiomics/libraryDetection/archivestats.py',
125100
'singlecellmultiomics/alleleTools/heterozygousSNPedit.py',
126101
'singlecellmultiomics/libraryProcessing/scsortchicfeaturedensitytable.py',
127102
'singlecellmultiomics/libraryProcessing/scsortchicqc.py',

singlecellmultiomics/libraryDetection/archivestats.py

Lines changed: 0 additions & 148 deletions
This file was deleted.

singlecellmultiomics/libraryProcessing/libraryStatistics.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ def select_fastq_file(lookup):
8787
argparser.add_argument('--v', action='store_true')
8888
argparser.add_argument('--nort', action='store_true')
8989
argparser.add_argument('--nolorenz', action='store_true')
90+
91+
argparser.add_argument('-demux_R1_path', type=str)
92+
argparser.add_argument('-demux_R2_path', type=str)
93+
9094
args = argparser.parse_args()
9195

9296
for library in args.libraries:
@@ -127,6 +131,13 @@ def select_fastq_file(lookup):
127131
if(args.t in ['chic-stats', 'all-stats']):
128132
statistics.extend([ScCHICLigation(args)])
129133

134+
if args.t=='non-scmo-stats':
135+
statistics.extend([
136+
ScCHICLigation(args)
137+
138+
])
139+
140+
130141
if(args.t in ['demult-stats', 'all-stats']):
131142
statistics.extend([
132143
TrimmingStats(args),
@@ -137,13 +148,20 @@ def select_fastq_file(lookup):
137148
PlateStatistic2(args)
138149
])
139150

140-
demuxFastqFilesLookup = [
141-
(f'{library}/demultiplexedR1.fastq.gz',
142-
f'{library}/demultiplexedR2.fastq.gz'),
143-
(f'{library}/demultiplexedR1_val_1.fq.gz',
144-
f'{library}/demultiplexedR2_val_2.fq.gz'),
145-
(f'{library}/demultiplexedR1_val_1.fq',
146-
f'{library}/demultiplexedR2_val_2.fq')]
151+
if args.demux_R1_path is not None:
152+
assert args.demux_R2_path is not None
153+
demuxFastqFilesLookup = [
154+
(args.demux_R1_path,args.demux_R2_path),
155+
]
156+
else:
157+
demuxFastqFilesLookup = [
158+
(f'{library}/demultiplexedR1.fastq.gz',
159+
f'{library}/demultiplexedR2.fastq.gz'),
160+
(f'{library}/demultiplexedR1_val_1.fq.gz',
161+
f'{library}/demultiplexedR2_val_2.fq.gz'),
162+
(f'{library}/demultiplexedR1_val_1.fq',
163+
f'{library}/demultiplexedR2_val_2.fq')
164+
]
147165

148166
rejectFilesLookup = [
149167
(f'{library}/rejectsR1.fastq.gz', f'{library}/rejectsR2.fastq.gz'),

singlecellmultiomics/libraryProcessing/scsortchicqc.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ def read_contaminant_info(sortchicstats_paths):
9595
statistics_paths = []
9696
count_table_paths = []
9797
for path in args.count_tables_sortchicstats_statistics:
98-
9998
if path.endswith('statistics.pickle.gz'):
10099
statistics_paths.append(path)
101100
elif path.endswith('sortchicstats.json'):
@@ -111,7 +110,10 @@ def read_contaminant_info(sortchicstats_paths):
111110
# Read the count tables
112111
df = pd.concat([read_count_table(path) for path in count_table_paths])
113112
# Add mark as first level of df, library second, cell third
114-
df.index = pd.MultiIndex.from_tuples([(sample_sheet['marks'][cell.split('_')[0]], cell.split('_')[0], int(cell.split('_')[1])) for cell in df.index])
113+
df.index = pd.MultiIndex.from_tuples([(
114+
sample_sheet['marks'][cell.split('_')[0]],
115+
cell.split('_')[0], int(cell.split('_')[1]))
116+
for cell in df.index])
115117

116118
avail_marks = df.index.get_level_values(0).unique()
117119
print('Target marks:')
@@ -135,15 +137,17 @@ def read_contaminant_info(sortchicstats_paths):
135137
y = cell_labels=='empty'
136138
rf = RandomForestClassifier(class_weight='balanced')
137139

138-
X = plate_stats.loc[y.index]
140+
y=y.loc[[idx for idx in y.index if idx in plate_stats.index]]
141+
X = plate_stats.loc[[idx for idx in y.index if idx in plate_stats.index]]
139142
X[('AA', 'ligated molecules')]/=X[('total mapped', '# molecules')]
140143
X[('TA', 'fraction ligated molecules')]= X[('TA', 'ligated molecules')] / X[('total mapped', '# molecules')]
141144
X[('TT', 'ligated molecules')]/=X[('total mapped', '# molecules')]
142145
X[('qcfail', '# reads')]/=X[('total mapped', '# molecules')]
143146
X[('duprate', 'pct')] =X[('total mapped', '# molecules')]/X[('total mapped', '# reads')]
144147

145148
y[X[('total mapped','# reads')]<500] = True
146-
X = X.join(contaminant_info)
149+
X = X.join(contaminant_info).fillna(0)
150+
X = X.replace([np.inf,], 0)
147151

148152
predictions = []
149153
for train_index, test_index in KFold(n_splits=8, shuffle=True, random_state=None).split(X):

0 commit comments

Comments
 (0)