Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
[![Documentation Status](https://readthedocs.org/projects/singlecellmultiomics/badge/?version=latest)](https://singlecellmultiomics.readthedocs.io/en/latest/?badge=latest) [![PyPI version](https://badge.fury.io/py/singlecellmultiomics.svg)](https://badge.fury.io/py/singlecellmultiomics) [![DOI](https://zenodo.org/badge/187592829.svg)](https://zenodo.org/badge/latestdoi/187592829) [![Anaconda-Server Badge](https://anaconda.org/buysdb/singlecellmultiomics/badges/installer/conda.svg)](https://anaconda.org/buysdb/singlecellmultiomics)

[![Documentation Status](https://readthedocs.org/projects/singlecellmultiomics/badge/?version=latest)](https://singlecellmultiomics.readthedocs.io/en/latest/?badge=latest) [![PyPI version](https://badge.fury.io/py/singlecellmultiomics.svg)](https://badge.fury.io/py/singlecellmultiomics) [![DOI](https://zenodo.org/badge/187592829.svg)](https://zenodo.org/badge/latestdoi/187592829)
## Single cell multi omics
Single cell multi omics is a set of tools to deal with multiple measurements from the same cell. This package has been developed by the [van Oudenaarden group](https://www.hubrecht.eu/research-groups/van-oudenaarden-group/).
Single cell multi omics is a set of tools to deal with multiple measurements from the same cell. This package is maintained by [Barbanson Biotech](https://barbansonbiotech.com/).

# Installation
```
Expand Down Expand Up @@ -32,7 +31,7 @@ The mapped reads are encoded in a BAM file. This BAM file still contains the enc
methylation digest sequencing:SC MSPJI ,
lineage tracing:SCARTRACE,
DNA digest sequencing: NLAIII,
histone modification sequencing: scCHIC,
Epigenetic modification sequencing: scCHIC, scCHIC+Transcriptome, DamID, DamID+T
Single cell methylation : TAPs (in combination with any other supported protocol).

4) Assigns reads to molecules to allow for deduplication, adds duplication BAM flag
Expand Down
31 changes: 3 additions & 28 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from setuptools import setup
from setuptools import setup, find_namespace_packages
import os
import sys

Expand All @@ -24,36 +24,12 @@
long_description=long_description,
long_description_content_type='text/markdown',
author='Buys de Barbanson',
author_email='b.barbanson@hubrecht.eu',
author_email='github@barbansonbiotech.com',
url='https://github.com/BuysDB/SingleCellMultiOmics',
download_url = 'https://github.com/BuysDB/SingleCellMultiOmics/archive/v0.1.9.tar.gz',

license='MIT',
packages=['singlecellmultiomics',

'singlecellmultiomics.alleleTools',
'singlecellmultiomics.bamProcessing',
'singlecellmultiomics.barcodeFileParser',
'singlecellmultiomics.countTableProcessing',
'singlecellmultiomics.features',
'singlecellmultiomics.fragment',
'singlecellmultiomics.fastqProcessing',
'singlecellmultiomics.fastaProcessing',
'singlecellmultiomics.libraryDetection',
'singlecellmultiomics.libraryProcessing',
'singlecellmultiomics.modularDemultiplexer',
'singlecellmultiomics.molecule',
'singlecellmultiomics.methylation',
'singlecellmultiomics.pyutils',
'singlecellmultiomics.variants',
'singlecellmultiomics.tags',
'singlecellmultiomics.statistic',
'singlecellmultiomics.tagtools',
'singlecellmultiomics.universalBamTagger',
'singlecellmultiomics.utils',
'singlecellmultiomics.modularDemultiplexer.demultiplexModules'
],

packages=find_namespace_packages(),

scripts=[
# Demultiplexing
Expand Down Expand Up @@ -121,7 +97,6 @@
# Library processing:
'singlecellmultiomics/libraryProcessing/libraryStatistics.py',
'singlecellmultiomics/libraryProcessing/scsortchicstats.py',
'singlecellmultiomics/libraryDetection/archivestats.py',
'singlecellmultiomics/alleleTools/heterozygousSNPedit.py',
'singlecellmultiomics/libraryProcessing/scsortchicfeaturedensitytable.py',
'singlecellmultiomics/libraryProcessing/scsortchicqc.py',
Expand Down
148 changes: 0 additions & 148 deletions singlecellmultiomics/libraryDetection/archivestats.py

This file was deleted.

32 changes: 25 additions & 7 deletions singlecellmultiomics/libraryProcessing/libraryStatistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ def select_fastq_file(lookup):
argparser.add_argument('--v', action='store_true')
argparser.add_argument('--nort', action='store_true')
argparser.add_argument('--nolorenz', action='store_true')

argparser.add_argument('-demux_R1_path', type=str)
argparser.add_argument('-demux_R2_path', type=str)

args = argparser.parse_args()

for library in args.libraries:
Expand Down Expand Up @@ -127,6 +131,13 @@ def select_fastq_file(lookup):
if(args.t in ['chic-stats', 'all-stats']):
statistics.extend([ScCHICLigation(args)])

if args.t=='non-scmo-stats':
statistics.extend([
ScCHICLigation(args)

])


if(args.t in ['demult-stats', 'all-stats']):
statistics.extend([
TrimmingStats(args),
Expand All @@ -137,13 +148,20 @@ def select_fastq_file(lookup):
PlateStatistic2(args)
])

demuxFastqFilesLookup = [
(f'{library}/demultiplexedR1.fastq.gz',
f'{library}/demultiplexedR2.fastq.gz'),
(f'{library}/demultiplexedR1_val_1.fq.gz',
f'{library}/demultiplexedR2_val_2.fq.gz'),
(f'{library}/demultiplexedR1_val_1.fq',
f'{library}/demultiplexedR2_val_2.fq')]
if args.demux_R1_path is not None:
assert args.demux_R2_path is not None
demuxFastqFilesLookup = [
(args.demux_R1_path,args.demux_R2_path),
]
else:
demuxFastqFilesLookup = [
(f'{library}/demultiplexedR1.fastq.gz',
f'{library}/demultiplexedR2.fastq.gz'),
(f'{library}/demultiplexedR1_val_1.fq.gz',
f'{library}/demultiplexedR2_val_2.fq.gz'),
(f'{library}/demultiplexedR1_val_1.fq',
f'{library}/demultiplexedR2_val_2.fq')
]

rejectFilesLookup = [
(f'{library}/rejectsR1.fastq.gz', f'{library}/rejectsR2.fastq.gz'),
Expand Down
12 changes: 8 additions & 4 deletions singlecellmultiomics/libraryProcessing/scsortchicqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ def read_contaminant_info(sortchicstats_paths):
statistics_paths = []
count_table_paths = []
for path in args.count_tables_sortchicstats_statistics:

if path.endswith('statistics.pickle.gz'):
statistics_paths.append(path)
elif path.endswith('sortchicstats.json'):
Expand All @@ -111,7 +110,10 @@ def read_contaminant_info(sortchicstats_paths):
# Read the count tables
df = pd.concat([read_count_table(path) for path in count_table_paths])
# Add mark as first level of df, library second, cell third
df.index = pd.MultiIndex.from_tuples([(sample_sheet['marks'][cell.split('_')[0]], cell.split('_')[0], int(cell.split('_')[1])) for cell in df.index])
df.index = pd.MultiIndex.from_tuples([(
sample_sheet['marks'][cell.split('_')[0]],
cell.split('_')[0], int(cell.split('_')[1]))
for cell in df.index])

avail_marks = df.index.get_level_values(0).unique()
print('Target marks:')
Expand All @@ -135,15 +137,17 @@ def read_contaminant_info(sortchicstats_paths):
y = cell_labels=='empty'
rf = RandomForestClassifier(class_weight='balanced')

X = plate_stats.loc[y.index]
y=y.loc[[idx for idx in y.index if idx in plate_stats.index]]
X = plate_stats.loc[[idx for idx in y.index if idx in plate_stats.index]]
X[('AA', 'ligated molecules')]/=X[('total mapped', '# molecules')]
X[('TA', 'fraction ligated molecules')]= X[('TA', 'ligated molecules')] / X[('total mapped', '# molecules')]
X[('TT', 'ligated molecules')]/=X[('total mapped', '# molecules')]
X[('qcfail', '# reads')]/=X[('total mapped', '# molecules')]
X[('duprate', 'pct')] =X[('total mapped', '# molecules')]/X[('total mapped', '# reads')]

y[X[('total mapped','# reads')]<500] = True
X = X.join(contaminant_info)
X = X.join(contaminant_info).fillna(0)
X = X.replace([np.inf,], 0)

predictions = []
for train_index, test_index in KFold(n_splits=8, shuffle=True, random_state=None).split(X):
Expand Down
Loading