Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions .github/workflows/makefile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ jobs:
- name: Install dependencies
run: |
sudo apt-get update
mamba install -c conda-forge -c bioconda 'python=3.11' 'pulp<=2.7.0' 'snakemake-minimal>7,<8' GraphAligner mashmap winnowmap
sudo apt-get install gcc-11 g++-11 libcurl4-openssl-dev liblzma-dev libbz2-dev
pip install networkx
mamba install -c conda-forge -c bioconda 'python=3.11' 'pulp<=2.7.0' 'snakemake-minimal>7,<8' GraphAligner mashmap winnowmap samtools seqtk
sudo apt-get install gcc-11 g++-11 libcurl4-openssl-dev liblzma-dev libbz2-dev bamtools libbamtools-dev
pip install networkx pysam
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100

Expand Down Expand Up @@ -74,6 +74,10 @@ jobs:
echo >> x.py "print(G)"
python x.py

echo ""
echo "BAMTOOLS TEST"
echo " "`command -v bamtools`

- name: Compile
run: |
cd src
Expand Down Expand Up @@ -120,10 +124,12 @@ jobs:
echo > lib/verkko/bin/utgcns '#!/bin/sh'
echo >> lib/verkko/bin/utgcns 'if [ ! -e part001.fasta ]; then'
echo >> lib/verkko/bin/utgcns 'cp ../../cache/part001.fasta packages/part001.fasta.WORKING'
echo >> lib/verkko/bin/utgcns 'samtools dict ../../cache/part001.fasta | samtools view -b -S > packages/part001.bam00000002.bam'
echo >> lib/verkko/bin/utgcns 'fi'
echo >> lib/verkko/bin/utgcns ''
echo >> lib/verkko/bin/utgcns 'if [ ! -e part002.fasta ]; then'
echo >> lib/verkko/bin/utgcns 'cp ../../cache/part002.fasta packages/part002.fasta.WORKING'
echo >> lib/verkko/bin/utgcns 'samtools dict ../../cache/part002.fasta | samtools view -b -S > packages/part002.bam00000001.bam'
echo >> lib/verkko/bin/utgcns 'fi'

chmod 755 mbg.sh
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
url = https://github.com/marbl/rukki
[submodule "src/MBG"]
path = src/MBG
url = https://github.com/maickrau/MBG
url = https://github.com/skoren/MBG
[submodule "src/hifioverlapper"]
path = src/hifioverlapper
url = https://github.com/maickrau/hifioverlapper
2 changes: 1 addition & 1 deletion src/MBG
Submodule MBG updated from f312a8 to 8fc6ca
11 changes: 7 additions & 4 deletions src/Snakefiles/1-buildGraph.sm
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,13 @@ awk 'BEGIN \\\\
}} \\\\
\$1 == "S" \\\\
{{ \\\\
if (\$6 != "") {{
\$4 = \$6;
}}
print \$2, substr(\$4, 6), length(\$3); \\\\
if (\$6 != "" && match(\$6, /^kl:f:/)) {{ \\\\
\$4 = \$6; \\\\
}} \\\\
if (!match(\$4, /^kl:f:/) && !match(\$4, /^ll:f:/)) {{ \\\\
print "ERROR: line "\$0" does not match expected format for coverages"; exit 1 \\\\
}} \\\\
print \$2, substr(\$4, 6), length(\$3); \\\\
}}' \\\\
< ../{output.graph} \\\\
> ../{output.hifi_node_cov}
Expand Down
6 changes: 3 additions & 3 deletions src/Snakefiles/2-processGraph.sm
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,17 @@ cat > ./processGraph.sh <<EOF
set -e

echo Gap insertion pass 1.
{PYTHON} {VERKKO}/scripts/insert_aln_gaps.py ../{input.gfa} ../{input.paths} 2 50 paths-nogap-1.gaf gaps-hifi-1.gaf gapone n \\\\
{PYTHON} {VERKKO}/scripts/insert_aln_gaps.py ../{input.gfa} ../{input.paths} 2 5 paths-nogap-1.gaf gaps-hifi-1.gaf gapone n \\\\
> gapped-once-hifi-resolved.gfa

echo ""
echo Gap insertion pass 2.
{PYTHON} {VERKKO}/scripts/insert_aln_gaps.py gapped-once-hifi-resolved.gfa ../{input.paths} 2 300 paths-nogap-2.gaf gaps-hifi-2.gaf gaptwo n \\\\
{PYTHON} {VERKKO}/scripts/insert_aln_gaps.py gapped-once-hifi-resolved.gfa ../{input.paths} 2 50 paths-nogap-2.gaf gaps-hifi-2.gaf gaptwo n \\\\
> gapped-twice-hifi-resolved.gfa

echo ""
echo Gap insertion pass 3.
{PYTHON} {VERKKO}/scripts/insert_aln_gaps.py gapped-twice-hifi-resolved.gfa ../{input.paths} 1 5 paths-nogap-3.gaf gaps-hifi-3.gaf gapthree n \\\\
{PYTHON} {VERKKO}/scripts/insert_aln_gaps.py gapped-twice-hifi-resolved.gfa ../{input.paths} 2 300 paths-nogap-3.gaf gaps-hifi-3.gaf gapthree n \\\\
> gapped-hifi-resolved.gfa

echo ""
Expand Down
15 changes: 10 additions & 5 deletions src/Snakefiles/6-layoutContigs.sm
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ rule generateLayoutContigsInputs:
edges = '6-layoutContigs/combined-edges.gfa',
hifialns = '6-layoutContigs/hifi.alignments.gaf',
ontalns = '6-layoutContigs/ont.alignments.gaf',
ontgapaln = '6-layoutContigs/ont-gapfill.txt',
paths = '6-layoutContigs/consensus_paths.txt',
lengths = '6-layoutContigs/nodelens.txt'
log:
Expand Down Expand Up @@ -91,8 +92,8 @@ cat {params.aligns} ../{input.paths} | sed 's/^/hifi_/' \\\\
cat ../{input.ontgaps} | sed 's/^/ont_/' >> ../{output.hifialns}

# now get the ONT alignments, again prepending ont_ but ignoring any reads we've already included due to it being used as a gap fill
cat ../{input.ontgaps} |cut -f 1 > ../6-layoutContigs/ont-gapfill.txt
{PYTHON} {VERKKO}/scripts/replace_path_nodes.py ../{input.ontalns} ../{output.nodemap} |grep -F -v -w -f ../6-layoutContigs/ont-gapfill.txt | sed 's/^/ont_/' > ../{output.ontalns} || true
cat ../{input.ontgaps} |cut -f 1 > ../{output.ontgapaln}
{PYTHON} {VERKKO}/scripts/replace_path_nodes.py ../{input.ontalns} ../{output.nodemap} |grep -F -v -w -f ../{output.ontgapaln} | sed 's/^/ont_/' > ../{output.ontalns} || true

cat ../{input.graph} \\\\
| awk 'BEGIN \\\\
Expand Down Expand Up @@ -144,6 +145,7 @@ rule layoutContigs:
output:
layout = rules.verkko.input.layout,
scfmap = rules.verkko.input.scfmap,
dropped = '6-layoutContigs/unitig-popped.layout.scfmap.dropped',
gaps = '6-layoutContigs/gaps.txt'
params:
haveONT = config['withONT'],
Expand Down Expand Up @@ -180,15 +182,18 @@ fi
../{input.paths} \\\\
../{input.lengths} \\\\
../6-layoutContigs/unitig_initial.layout \\\\
../{output.scfmap}
../6-layoutContigs/unitig_initial.layout.scfmap

if [ "{params.haveONT}" = "True" ] && [ "{params.haveBAM}" = "True" ]; then
cp ../6-layoutContigs/unitig_initial.layout.scfmap.dropped ../{output.dropped}
{PYTHON} {VERKKO}/scripts/merge_layouts.py \\\\
../6-layoutContigs/unitig_initial.layout \\\\
../{input.ontalns} \\\\
> ../{output.layout}
../{output.layout}
else
cp ../6-layoutContigs/unitig_initial.layout ../{output.layout}
cp ../6-layoutContigs/unitig_initial.layout.scfmap.dropped ../{output.dropped}
cp ../6-layoutContigs/unitig_initial.layout.scfmap ../{output.scfmap}
cp ../6-layoutContigs/unitig_initial.layout ../{output.layout}
fi

{PYTHON} {VERKKO}/scripts/check_layout_gaps.py < ../{output.layout} > ../{output.gaps}
Expand Down
31 changes: 22 additions & 9 deletions src/Snakefiles/7-combineConsensus.sm
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ def combineConsensusI(wildcards):
def combineConsensusP(wildcards):
return expand( "packages/part{nnnn}.fasta", nnnn = glob_wildcards("7-consensus/packages/part{xxxx}.cnspack").xxxx)
def combineConsensusB(wildcards):
return expand( "packages/part{nnnn}.bam*.bam", nnnn = glob_wildcards("7-consensus/packages/part{xxxx}.cnspack").xxxx)
return expand( "packages/part{nnnn}.bam*.bam", nnnn = glob_wildcards("7-consensus/packages/part{xxxx}.cnspack").xxxx)

rule combineConsensus:
input:
packcns = combineConsensusI,
tigmap = rules.buildPackages.output.tigmap,
scfmap = rules.layoutContigs.output.scfmap,
layout = rules.layoutContigs.output.layout,
ontalns = rules.generateLayoutContigsInputs.output.ontalns,
ontalns = rules.generateLayoutContigsInputs.output.ontgapaln if config['withONT'] == "True" else {rules.emptyfile.output},
hificov = rules.verkko.input.hifi5cov,
finished = rules.buildPackages.output.finished,
pathstsv = rules.rukki.output.pathstsv if config['ruk_enable'] == "True" else rules.emptyfile.output,
Expand All @@ -59,14 +59,15 @@ rule combineConsensus:
short_len = config['short_contig_length'],
screens = config['screen'],
packbam = combineConsensusB,
haveBAM = config['withBAM']
haveBAM = config['withBAM'],
quick = config['cns_quick']
threads:
8
resources:
job_id = 1,
n_cpus = 8,
mem_gb = lambda wildcards, input, attempt: getAlignMemoryRequest(attempt, 2, input.packcns),
time_h = 4
mem_gb = lambda wildcards, input, attempt: getAlignMemoryRequest(attempt, 5, input.packcns),
time_h = 48
shell:
'''
cd 7-consensus
Expand All @@ -82,8 +83,8 @@ if [ -e ../../label2 ] ; then label2=`cat ../../label2` ; fi


cat > ./combineConsensus.sh <<EOF
#!/bin/sh
set -e
#!/bin/bash
set -e -o pipefail


echo "--------------------"
Expand Down Expand Up @@ -117,11 +118,23 @@ fi
# Copy the screened assembly to the output.
cp assembly.fasta ../{output.cns}

if [ "{params.haveBAM}" = "True" ]; then
if [ "{params.haveBAM}" = "True" ] && [ '{params.quick}' != 'True' ]; then
echo "--------------------"
echo "Combining consensus bam results."
echo ""
mem_per_core=\`expr {resources.mem_gb} \/ {threads} | awk '{{if (\$1 < 1) print "1G"; else print \$1"G"}}'\`
# increase ulimit for files in case we have lots of bam files to merge
max=\`ulimit -Hn\`
bef=\`ulimit -Sn\`
if [ \$bef -lt \$max ] ; then
ulimit -Sn \$max
aft=\`ulimit -Sn\`
echo " Changed max open files from \$bef to \$aft (max \$max)."
else
echo " Max open files limited to \$bef, no increase possible."
fi

mem_per_core=\`(expr \( {resources.mem_gb} \* 70 \) / \( 100 \* {threads} \) | awk '{{if (\$1 < 1) print "1G"; else print \$1"G"}}') || true\`
Copy link

Copilot AI Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The memory calculation uses magic number 70 (70% of memory). Consider defining this as a variable with a descriptive name like MEM_USAGE_PERCENT=70 to improve clarity and maintainability.

Copilot uses AI. Check for mistakes.
rm -f ../{output.bam}.tmp.*

{params.SAMTOOLS} merge -f -O bam --reference combined.fasta -@{threads} -u - {params.packbam} | {PYTHON} {VERKKO}/scripts/bam_rename.py ../{input.layout} ../{input.ontalns} ../{input.scfmap} ../{input.tigmap} {params.packcns} | {params.SAMTOOLS} sort -m \$mem_per_core -@{threads} -o ../{output.bam}
else
Expand Down
20 changes: 15 additions & 5 deletions src/Snakefiles/7-generateConsensus.sm
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ rule generateConsensus:
params:
ont_ids = lambda wildcards: getNumLines({rules.extractONT.output.ont_subset_id}) if config['withONT'] == "True" else 0,
bam = '7-consensus/packages/part{nnnn}.bam' if config['withBAM'] == "True" else rules.emptyfile.output,
minread = config['cor_min_read']
minread = config['cor_min_read'],
iterations = config['cns_num_iter'],
coverage = config['cns_max_cov'],
quick = config['cns_quick']
threads:
int(config['cns_n_cpus']),
resources:
Expand Down Expand Up @@ -63,18 +66,25 @@ if [ "\$(expr {params.minread} - 500)" -gt 0 ]; then
minread=500
fi

# when generating initial consensus skip bam generation and only 1 round
if [ '{params.quick}' = 'True' ]; then
align="-norealign"
iter="-I 1"
else
iter="-I {params.iterations}"
fi

{VERKKO}/bin/utgcns \\\\
-threads {threads} \\\\
-import ../{input.package} \\\\
-A ../{output.consensus}.WORKING \\\\
-C 2 \\\\
\$iter -C 2 \\\\
\$align \\\\
-maxcoverage 50 \\\\
-maxcoverage {params.coverage} \\\\
-e 0.05 \\\\
-em 0.20 \\\\
-em 0.10 \\\\
-EM {params.ont_ids} \\\\
-l \$minread \\\\
-edlib \\\\
&& \\\\
mv ../{output.consensus}.WORKING ../{output.consensus} \\\\
&& \\\\
Expand Down
2 changes: 1 addition & 1 deletion src/Snakefiles/8-hicPipeline.sm
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,6 @@ chmod +x ./scaffold_mergeBWA.sh
rule mergeBWA:
input:
split_finished = rules.splitHIC.output.split_finished,
index = '8-hicPipeline/unitigs.fasta.bwt',
alignments = lambda wildcards: expand("8-hicPipeline/mapped{nnnn}.bam", nnnn = splitHICoutputs(wildcards))

output:
Expand Down Expand Up @@ -489,6 +488,7 @@ chmod +x ./transform_bwa.sh
rule hicPhasing:
input:
mashmap_matches = rules.runMashMap.output.unitigs_hpc50,
mashmap_nohpcmatches = rules.runMashMap.output.unitigs_nohpc50,
hicmapping_byread = '8-hicPipeline/hic_mapping.byread.output',
graph='8-hicPipeline/unitigs.hpc.noseq.gfa'
output:
Expand Down
8 changes: 6 additions & 2 deletions src/Snakefiles/c2-findOverlaps.sm
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,12 @@ done
-n 1 -w {params.merwindow} \\\\
--max-coverage {params.mercount} \\\\
--tmp-file-count {params.nindex} \\\\
-o matchchains \\\\
{input.reads}
-o matchchains.WORKING \\\\
{input.reads} \\\\
&& \\\\
mv matchchains.WORKING.metadata matchchains.metadata \\\\
&& \\\\
mv matchchains.WORKING.positions matchchains.positions

if [ ! -s matchchains.positions ]; then
echo "Error: indexing failed, check above for any errors"
Expand Down
10 changes: 6 additions & 4 deletions src/Snakefiles/functions.sm
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import math
import re
import glob

def getNumLines(input):
infile = input.pop()
Expand Down Expand Up @@ -77,9 +78,10 @@ def getAlignMemoryRequest(attempt, factor, files):
mem = float(config['ali_mem_gb'])

sum = 0
for filename in files:
if os.path.exists(filename):
sum += os.path.getsize(filename)
for wildcard in files:
for filename in glob.glob(wildcard):
if os.path.exists(filename):
sum += os.path.getsize(filename)
if sum > 0:
mem = factor * sum / 1024 / 1024 / 1024

Expand Down Expand Up @@ -141,7 +143,7 @@ def getConsensusMemoryRequest(attempt, jobid): # Figure out what partitionin
for l in f:
words = l.strip().split();
if jobid == words[5]:
mem = max(mem, 5.0 + 0.80 * float(words[4]))
mem = max(mem, 5.0 + 0.90 * float(words[4]))

return int(math.ceil(math.ceil(mem)*scl))

Expand Down
2 changes: 1 addition & 1 deletion src/main.mk
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ FILES += \
scripts/hic_prefilter.py -> ../lib/verkko/scripts/hic_prefilter.py \
scripts/launch_scaffolding.py -> ../lib/verkko/scripts/launch_scaffolding.py \
scripts/remove_nodes_add_telomere.py -> ../lib/verkko/scripts/remove_nodes_add_telomere.py \
scripts/scaffolding/logger_wrap.py -> ../lib/verkko/scripts/scaffolding/logger_wrap.py \
scripts/logging_utils.py -> ../lib/verkko/scripts/logging_utils.py \
scripts/scaffolding/match_graph.py -> ../lib/verkko/scripts/scaffolding/match_graph.py \
scripts/scaffolding/path_storage.py -> ../lib/verkko/scripts/scaffolding/path_storage.py \
scripts/scaffolding/scaff_prefilter.py -> ../lib/verkko/scripts/scaffolding/scaff_prefilter.py \
Expand Down
2 changes: 1 addition & 1 deletion src/scripts/bam_rename.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
bamfile = pysam.AlignmentFile("-", "rb")
layout = sys.argv[1]
gap_info = sys.argv[2]
scfmap = seq.readScfMap(sys.argv[3])
scfmap,_ = seq.readScfMap(sys.argv[3])
namedict = seq.readNameMap(sys.argv[4])
lens = dict()
offsets = dict()
Expand Down
2 changes: 1 addition & 1 deletion src/scripts/check_layout_gaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def check_alns(current_tig, current_len, current_alns):
if current_start > last_end:
print(current_tig + "\t" + str(last_end) + "\t" + str(current_start) + " (len " + str(current_len) + ")")
if current_start < 0 or current_end > current_len:
print("layout outside contig: " + current_tig + "\t" + current_start + "\t" + current_end + "\t" + " (len " + str(current_len) + ")")
print("layout outside contig: " + current_tig + "\t" + str(current_start) + "\t" + str(current_end) + "\t" + " (len " + str(current_len) + ")")
last_end = max(last_end, current_end)
if last_end < current_len:
print(current_tig + "\t" + str(last_end) + "\t" + str(current_len) + " (len " + str(current_len) + ")")
Expand Down
Loading
Loading