From 59bac92ec6a486286bec3dd571bbdad1bc14767d Mon Sep 17 00:00:00 2001 From: BuysDB Date: Sat, 25 Apr 2026 13:52:25 +0200 Subject: [PATCH] Generate job queue correctly when short contigs are not at the end of the reference list #279 --- .../universalBamTagger/bamtagmultiome.py | 16 +++++++--------- .../universalBamTagger/tagging.py | 16 ++++++++++++---- singlecellmultiomics/version.py | 2 +- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/singlecellmultiomics/universalBamTagger/bamtagmultiome.py b/singlecellmultiomics/universalBamTagger/bamtagmultiome.py index eebd912d..32e5808b 100755 --- a/singlecellmultiomics/universalBamTagger/bamtagmultiome.py +++ b/singlecellmultiomics/universalBamTagger/bamtagmultiome.py @@ -333,18 +333,15 @@ def tag_multiome_multi_processing( small_contig_threshold = 100000 job_gen = [[('*',None,None,None,None),],] - current = [] + small_contigs_jobs = [] for contig,contig_len in get_contigs_with_reads(input_bam_path, True): if contig_len1: - job_gen.append(current) - current=[] - else: - job_gen.append([ (contig,None,None,None,None), ]) - if len(current)>1: - job_gen.append(current) + job_gen.append([ (contig,None,None,None,None), ]) + if len(small_contigs_jobs)>0: + job_gen.append(small_contigs_jobs) + #job_gen = [[('*',None,None,None,None),],] + [ [(contig,None,None,None,None),] for contig,contig_len in get_contigs_with_reads(input_bam_path, True) if contig!='*' ] @@ -375,6 +372,7 @@ def tag_multiome_multi_processing( additional_args= additional_args, max_time_per_segment=max_time_per_segment) + # Create header bam: temp_header_bam_path = f'{temp_folder}/{uuid.uuid4()}_header.bam' with pysam.AlignmentFile(input_bam_path) as input_bam: diff --git a/singlecellmultiomics/universalBamTagger/tagging.py b/singlecellmultiomics/universalBamTagger/tagging.py index 2e16d7d5..2302dc66 100644 --- a/singlecellmultiomics/universalBamTagger/tagging.py +++ b/singlecellmultiomics/universalBamTagger/tagging.py @@ -158,6 +158,15 @@ def timeout_check_function(iteration, mol_iter, reads ): 'time_start': time_start} +def task_to_alias(kwargs): + if kwargs.get('contig') is None: + return 'all' + contig = kwargs['contig'].replace('*', 'star') + if kwargs.get('start') is None: + return contig + if kwargs.get('end') is None: + return contig + '-' + kwargs['start'] + return contig + '-' + kwargs['start'] + '-' + kwargs['end'] def run_tagging_tasks(args: tuple): """ Run tagging for one or more tasks @@ -169,11 +178,10 @@ def run_tagging_tasks(args: tuple): (alignments_path, temp_dir, timeout_time), arglist = args - target_file = f"{temp_dir}/{uuid4()}.bam" - while os.path.exists(target_file): - print(f'Collision at {target_file}') - target_file = f"{temp_dir}/{uuid4()}.bam" + target_file_prefix = '_'.join( [task_to_alias(kwargs) for kwargs in arglist[:2]]) + (f'_{len(arglist)-2}more' if len(arglist) > 2 else '' ) + target_file = f"{temp_dir}/{target_file_prefix}.bam" + assert not os.path.exists(target_file), f'File {target_file} already exists' timeout_tasks = [] total_molecules = 0 diff --git a/singlecellmultiomics/version.py b/singlecellmultiomics/version.py index 847695f1..c663ce3c 100644 --- a/singlecellmultiomics/version.py +++ b/singlecellmultiomics/version.py @@ -1,2 +1,2 @@ -__version__ = '0.1.44' +__version__ = '0.1.45'