Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def metaFromRead(read, tag):


# Clean a string to be able to be used in a fastq file header
fastqCleanerRegex = re.compile('[^a-zA-Z0-9-_]', re.UNICODE)
fastqCleanerRegex = re.compile('[^a-zA-Z0-9-_.]', re.UNICODE)

def fqSafe(string) -> str:
"""
Expand Down Expand Up @@ -147,7 +147,13 @@ def has_tag(self, tag):
return tag in self.tags

def asIlluminaHeader(self):
return '{Is}:{RN}:{Fc}:{La}:{Ti}:{CX}:{CY}'.format(**self.tags)
try:
return '{Is}:{RN}:{Fc}:{La}:{Ti}:{CX}:{CY}'.format(**self.tags)
except KeyError as e:
if 'oh' in self.tags:
return self.tags['oh']
raise e




Expand Down Expand Up @@ -188,6 +194,11 @@ def parse_3dec_header(self,fastqRecord, indexFileParser, indexFileAlias):
'CN': controlNumber
})

def parse_other_header(self,fastqRecord, indexFileParser, indexFileAlias):
self.tags.update({
'oh':fastqRecord.header[1:].replace(';','').split()[0] # original header
})


def _parse_illumina_header(self,header, indexFileParser = None, indexFileAlias = None):

Expand Down Expand Up @@ -270,7 +281,10 @@ def fromRawFastq(
if fastqRecord.header.startswith('@Is'):
self.parse_scmo_header(fastqRecord, indexFileParser, indexFileAlias)
else:
self.parse_3dec_header(fastqRecord, indexFileParser, indexFileAlias)
if fastqRecord.header.startswith('@Cluster'):
self.parse_3dec_header(fastqRecord, indexFileParser, indexFileAlias)
else:
self.parse_other_header(fastqRecord, indexFileParser, indexFileAlias)


# NS500413:32:H14TKBGXX:2:11101:16448:1664 1:N:0::
Expand Down Expand Up @@ -379,10 +393,11 @@ def fromTaggedFastq(self, fastqRecord):

def fromTaggedBamRecord(self, pysamRecord):
try:

for keyValue in pysamRecord.query_name.strip().split(';'):
key, value = keyValue.split(':')
self.addTagByTag(key, value, isPhred=False)
except ValueError:
except ValueError as e:
# Try to parse "Single Cell Discoveries" header
# These have the following header:
#NBXXXXXX:530:HXXXXXX:2:2:17:6;SS:GTCATTAG;CB:GTCATTAG;QT:eeeeeeee;RX:CTGAAC;RQ:aaaaae;SM:SAMPLE_NAME
Expand Down
1 change: 1 addition & 0 deletions singlecellmultiomics/tags/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def __repr__(self):
SamTag('QX', 'barcodeQual', isPhred=True),
SamTag('bc', 'rawBarcode'),
SamTag('hd', 'hammingDistanceRawBcAssignedBc'),
SamTag('oh', 'originalHeader'),

SamTag('bi', 'barcodeIndex'),

Expand Down
2 changes: 1 addition & 1 deletion singlecellmultiomics/universalBamTagger/bamtagmultiome.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ def tag_multiome_multi_processing(
# Remove the temp dir:
sleep(5)
try:
shutil.rmtree(temp_folder, ignore_errors=False, onerror=None)
shutil.rmtree(temp_folder, ignore_errors=False)
except Exception as e:
sys.stderr.write(f'Failed to remove {temp_folder}\n')
sys.stderr.write(f'{e}\n')
Expand Down
36 changes: 36 additions & 0 deletions tests/test_demultiplexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,42 @@ def test_3DEC_UmiBarcodeDemuxMethod_matching_barcode(self):
self.assertEqual( demultiplexed_record[0].tags['BC'], 'ACACACTA')
self.assertEqual( demultiplexed_record[0].tags['bi'], 1)

def test_sra_header(self):

barcode_folder = str(importlib.resources.files('singlecellmultiomics').joinpath('modularDemultiplexer/barcodes/'))

barcode_parser = BarcodeParser(barcode_folder,lazyLoad='*')

r1 = FastqRecord(
'@SRR21016692.1 1/1',
'ATCACACACTATAGTCATTCAGGAGCAGGTTCTTCAGGTTCCCTGTAGTTGTGTGGTTTTGAGTGAGTTTTTTAAT',
'+',
'AAAAA#EEEEEEEEEEEAEEEEEEEAEEEEEEEEEEEEEEEEEE/EEEEEEEEEEEE/EEEEEEEEEEEEEEEEEE'
)
r2 = FastqRecord(
'@SRR21016692.1 1/2',
'ACCCCAGATCAACGTTGGACNTCNNCNTTNTNCTCNGCACCNNNNCNNNCTTATNCNNNANNNNNNNNNNTNNGN',
'+',
'6AAAAEEAEE/AEEEEEEEE#EE##<#6E#A#EEE#EAEEA####A###EE6EE#E###E##########E##A#'
)
demux = UmiBarcodeDemuxMethod(umiRead=0,
umiStart=0,
umiLength=3,
barcodeRead=0,
barcodeStart=3,
barcodeLength=8,
barcodeFileParser=barcode_parser,
barcodeFileAlias='maya_384NLA',
indexFileParser=None,
indexFileAlias='illumina_merged_ThruPlex48S_RP',
random_primer_read=None,
random_primer_length=6)

demultiplexed_record = demux.demultiplex([r1,r2])
# The barcode sequence is ACACACTA (first barcode)
self.assertEqual( demultiplexed_record[0].tags['BC'], 'ACACACTA')
self.assertEqual( demultiplexed_record[0].tags['bi'], 1)


if __name__ == '__main__':
unittest.main()
Loading