From e0930457b2a7b97e4d1c965a9fa3e183651cefce Mon Sep 17 00:00:00 2001 From: BuysDB Date: Tue, 12 Aug 2025 10:33:56 +0200 Subject: [PATCH 1/3] Prune contigs smaller than bin size --- singlecellmultiomics/bamProcessing/bamCopyNumber.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/singlecellmultiomics/bamProcessing/bamCopyNumber.py b/singlecellmultiomics/bamProcessing/bamCopyNumber.py index 03a0f057..96914bf4 100644 --- a/singlecellmultiomics/bamProcessing/bamCopyNumber.py +++ b/singlecellmultiomics/bamProcessing/bamCopyNumber.py @@ -644,6 +644,15 @@ def bulk_trace(pdf_path, copy_mat, cell_cluster_names, cell_order,segmented_matr ignore_contigs = None if args.ignore_contigs is None else args.ignore_contigs.split(',') reference = pysam.FastaFile(args.ref) + # Determine contig lengths and skip contigs which are smaller than the provided bin size: + for contig, clen in zip(reference.references, reference.lengths): + if clen < bin_size: + if ignore_contigs is None: + ignore_contigs = [contig] + elif contig not in ignore_contigs: + ignore_contigs.append(contig) + + h=GenomicPlot(reference, ignore_contigs=ignore_contigs) contigs = GenomicPlot(reference).contigs From bd80799240a5a1f270eaa6ea8bce1e67f0a88f53 Mon Sep 17 00:00:00 2001 From: BuysDB Date: Tue, 12 Aug 2025 10:34:30 +0200 Subject: [PATCH 2/3] Format progress, sort indices --- singlecellmultiomics/bamProcessing/bamToCountTable.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/singlecellmultiomics/bamProcessing/bamToCountTable.py b/singlecellmultiomics/bamProcessing/bamToCountTable.py index d99fc418..7f1ea48c 100644 --- a/singlecellmultiomics/bamProcessing/bamToCountTable.py +++ b/singlecellmultiomics/bamProcessing/bamToCountTable.py @@ -502,7 +502,7 @@ def create_count_table(args, return_df=False): for i, read in enumerate(pysam_iterator): if i % 1_000_000 == 0: print( - f"{bamFile} Processed {i} reads, assigned {assigned}, completion:{100*(i/(0.001+f.mapped+f.unmapped+f.nocoordinate))}%") + f"{bamFile} Processed {i} reads, assigned {assigned}, completion:{100*(i/(0.001+f.mapped+f.unmapped+f.nocoordinate)):.2f}%") if args.head is not None and i > args.head: break @@ -547,7 +547,9 @@ def create_count_table(args, return_df=False): f"Finished: {bamFile} Processed {i} reads, assigned {assigned}") print(f"Finished counting, now exporting to {args.o}") df = pd.DataFrame.from_dict(countTable) - + # Sort indices: + df = df.sort_index(axis=1).sort_index(axis=0) + # Set names of indices if not args.noNames: df.columns.set_names([tagToHumanName(t, TagDefinitions) From c559a3aa4e57ce9327ac3c259fa83cb455c8fd57 Mon Sep 17 00:00:00 2001 From: BuysDB Date: Tue, 12 Aug 2025 10:35:46 +0200 Subject: [PATCH 3/3] version bump --- singlecellmultiomics/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecellmultiomics/version.py b/singlecellmultiomics/version.py index b155c640..8e2638b5 100644 --- a/singlecellmultiomics/version.py +++ b/singlecellmultiomics/version.py @@ -1,2 +1,2 @@ -__version__ = '0.1.41' +__version__ = '0.1.43'